From 855b7b87a549e255c34bff2a70e6457e1c4ce430 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 6 Sep 2023 11:29:14 -0400 Subject: [PATCH 01/18] chore: update requirements to current versions --- requirements/dev.txt | 8 +++- requirements/main.in | 1 + requirements/main.txt | 87 ++++++++++++++++++++++++++----------------- 3 files changed, 60 insertions(+), 36 deletions(-) diff --git a/requirements/dev.txt b/requirements/dev.txt index e14a84c..da4bbc0 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,6 +1,6 @@ # -# This file is autogenerated by pip-compile with python 3.11 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: # # pip-compile requirements/dev.in # @@ -14,6 +14,10 @@ pep517==0.13.0 # via build pip-tools==6.12.1 # via -r requirements/dev.in +tomli==2.0.1 + # via + # build + # pep517 wheel==0.38.4 # via pip-tools diff --git a/requirements/main.in b/requirements/main.in index 6df35e6..bb56960 100644 --- a/requirements/main.in +++ b/requirements/main.in @@ -12,3 +12,4 @@ pydantic pymupdf pypdfium2 lxml +pdfrw diff --git a/requirements/main.txt b/requirements/main.txt index df79ee6..c740d56 100644 --- a/requirements/main.txt +++ b/requirements/main.txt @@ -1,81 +1,100 @@ # -# This file is autogenerated by pip-compile with python 3.11 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: # # pip-compile requirements/main.in # -borb==2.1.7 +annotated-types==0.5.0 + # via pydantic +borb==2.1.17 # via -r requirements/main.in -certifi==2022.12.7 +certifi==2023.7.22 # via requests cffi==1.15.1 # via cryptography -charset-normalizer==2.1.1 +charset-normalizer==3.2.0 # via # pdfminer-six # requests -commonmark==0.9.1 - # via rich -cryptography==38.0.4 - # via pdfminer-six -fonttools==4.38.0 +cryptography==41.0.3 + # via + # borb + # pdfminer-six +fonttools==4.42.1 # via borb idna==3.4 # via requests -levenshtein==0.20.9 +levenshtein==0.21.1 # via python-levenshtein -lxml==4.9.2 - # via -r requirements/main.in -numpy==1.24.1 +lxml==4.9.3 + # via + # -r requirements/main.in + # borb +markdown-it-py==3.0.0 + # via rich +mdurl==0.1.2 + # via markdown-it-py +numpy==1.25.2 # via -r requirements/main.in pdfminer-six==20221105 # via # -r requirements/main.in # pdfplumber -pdfplumber==0.7.6 +pdfplumber==0.10.2 + # via -r requirements/main.in +pdfrw==0.4 # via -r requirements/main.in pdftotext==2.2.2 # via -r requirements/main.in -pillow==9.3.0 +pillow==10.0.0 # via # borb # pdfplumber # qrcode pycparser==2.21 # via cffi -pydantic==1.10.4 +pydantic==2.3.0 # via -r requirements/main.in -pygments==2.14.0 +pydantic-core==2.6.3 + # via pydantic +pygments==2.16.1 # via rich -pymupdf==1.21.1 - # via -r requirements/main.in -pypdf==3.2.0 +pymupdf==1.23.3 # via -r requirements/main.in -pypdfium2==3.15.0 +pymupdfb==1.23.3 + # via pymupdf +pypdf==3.15.5 # via -r requirements/main.in -python-barcode==0.14.0 +pypdfium2==4.19.0 + # via + # -r requirements/main.in + # pdfplumber +pypng==0.20220715.0 + # via qrcode +python-barcode==0.15.1 # via borb -python-levenshtein==0.20.9 +python-levenshtein==0.21.1 # via -r requirements/main.in -qrcode[pil]==7.3.1 +qrcode[pil]==7.4.2 # via borb -rapidfuzz==2.13.7 +rapidfuzz==3.2.0 # via levenshtein -requests==2.28.1 +requests==2.31.0 # via # -r requirements/main.in # borb # tika -rich==13.0.0 +rich==13.5.2 # via -r requirements/main.in -tika==1.25 +tika==2.6.0 # via -r requirements/main.in -typing-extensions==4.4.0 - # via pydantic -urllib3==1.26.13 +typing-extensions==4.7.1 + # via + # pydantic + # pydantic-core + # qrcode +urllib3==2.0.4 # via requests -wand==0.6.10 - # via pdfplumber # The following packages are considered to be unsafe in a requirements file: # setuptools From 5e33cfd5df072148c64a39ae5479a2062cc103ca Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 6 Sep 2023 12:50:56 -0400 Subject: [PATCH 02/18] fix: remove unsupported argument in pydantic2 --- pdf_benchmark/data_structures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdf_benchmark/data_structures.py b/pdf_benchmark/data_structures.py index c3db7e2..fbdaad2 100644 --- a/pdf_benchmark/data_structures.py +++ b/pdf_benchmark/data_structures.py @@ -88,4 +88,4 @@ def has_doc(self, library: Library, document: Document) -> bool: def write(self, path: Path): with open(path, "w") as f: - f.write(self.json(indent=4, sort_keys=True)) + f.write(self.model_dump_json(indent=4)) From 1f1596fd0665574f508688504c4f8bcc96344772 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 6 Sep 2023 16:18:39 -0400 Subject: [PATCH 03/18] fix: use `use_text_flow` for pdfplumber --- benchmark.py | 2 +- pdf_benchmark/library_code.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark.py b/benchmark.py index 2cbee6b..ade825b 100644 --- a/benchmark.py +++ b/benchmark.py @@ -188,7 +188,7 @@ def write_single_result( "pdfplumber", "https://pypi.org/project/pdfplumber/", text_extraction_function=pdfplubmer_get_text, - version=pdfplumber.__version__, + version="git+https://github.com/dhdaines/pdfplumber", license="MIT", last_release_date="2023-07-29", dependencies="pdfminer.six", diff --git a/pdf_benchmark/library_code.py b/pdf_benchmark/library_code.py index 4fc20f9..da688c7 100644 --- a/pdf_benchmark/library_code.py +++ b/pdf_benchmark/library_code.py @@ -161,7 +161,7 @@ def pdfplubmer_get_text(data: bytes) -> str: text = "" with pdfplumber.open(BytesIO(data)) as pdf: for page in pdf.pages: - text += page.extract_text() + text += page.extract_text(use_text_flow=True) text += "\n" return text From dc1cb6bbc4a94923a2d5fd8f7e1d3cb94020964d Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 6 Sep 2023 16:19:48 -0400 Subject: [PATCH 04/18] chore: rebuild --- README.md | 76 +- cache.json | 882 +- read/results/borb/2201.00022.txt | Bin 44826 -> 58807 bytes read/results/pdfium/2201.00022.txt | 563 +- read/results/pdfminer/1602.06541.txt | 4 +- read/results/pdfminer/1707.09725.txt | 26 +- read/results/pdfminer/2201.00022.txt | 1221 ++- read/results/pdfminer/GeoTopo-book.txt | 66 +- read/results/pdfplumber/1601.03642.txt | 582 +- read/results/pdfplumber/1602.06541.txt | 2113 ++-- read/results/pdfplumber/1707.09725.txt | 1507 ++- read/results/pdfplumber/2201.00021.txt | 1163 ++- read/results/pdfplumber/2201.00022.txt | 1699 ++- read/results/pdfplumber/2201.00029.txt | 79 +- read/results/pdfplumber/2201.00037.txt | 2736 +++-- read/results/pdfplumber/2201.00069.txt | 1012 +- read/results/pdfplumber/2201.00151.txt | 1993 ++-- read/results/pdfplumber/2201.00178.txt | 1053 +- read/results/pdfplumber/2201.00200.txt | 665 +- read/results/pdfplumber/2201.00201.txt | 1107 +- read/results/pdfplumber/2201.00214.txt | 607 +- read/results/pdfplumber/GeoTopo-book.txt | 11369 ++++++++++++++------- read/results/pdftotext/1601.03642.txt | 49 +- read/results/pdftotext/1602.06541.txt | 19 +- read/results/pdftotext/1707.09725.txt | 900 +- read/results/pdftotext/2201.00021.txt | 15 +- read/results/pdftotext/2201.00022.txt | 775 +- read/results/pdftotext/2201.00037.txt | 91 +- read/results/pdftotext/2201.00069.txt | 29 +- read/results/pdftotext/2201.00151.txt | 51 +- read/results/pdftotext/2201.00178.txt | 38 +- read/results/pdftotext/2201.00200.txt | 6 +- read/results/pdftotext/2201.00214.txt | 4 +- read/results/pdftotext/GeoTopo-book.txt | 1163 +-- read/results/pymupdf/2201.00022.txt | 651 +- read/results/pypdf/2201.00022.txt | 635 +- read/results/tika/1601.03642.txt | 23 +- read/results/tika/1602.06541.txt | 38 +- read/results/tika/1707.09725.txt | 421 +- read/results/tika/2201.00021.txt | 28 +- read/results/tika/2201.00022.txt | 1323 ++- read/results/tika/2201.00029.txt | 6 + read/results/tika/2201.00037.txt | 527 +- read/results/tika/2201.00069.txt | 23 +- read/results/tika/2201.00151.txt | 124 +- read/results/tika/2201.00178.txt | 189 +- read/results/tika/2201.00200.txt | 61 +- read/results/tika/2201.00201.txt | 21 + read/results/tika/2201.00214.txt | 197 +- read/results/tika/GeoTopo-book.txt | 1236 +-- 50 files changed, 24056 insertions(+), 15110 deletions(-) diff --git a/README.md b/README.md index f7d1f5f..9f95f4e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not documents that applied OCR. ## Benchmarking machine - Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz + Intel(R) Core(TM) i5 CPU 650 @ 3.20GHz ## Input Documents | # | Name | File Size | Pages | @@ -18,73 +18,73 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | 9 | [2201.00201](https://arxiv.org/pdf/2201.00201.pdf) | 1.3MiB | 9 | | 10 | [1602.06541](https://arxiv.org/pdf/1602.06541.pdf) | 2.9MiB | 16 | | 11 | [2201.00200](https://arxiv.org/pdf/2201.00200.pdf) | 284.8KiB | 7 | -| 12 | [2201.00022](https://arxiv.org/pdf/2201.00022.pdf) | 1.1MiB | 11 | +| 12 | [2201.00022](https://arxiv.org/pdf/2201.00022.pdf) | 1.2MiB | 14 | | 13 | [2201.00029](https://arxiv.org/pdf/2201.00029.pdf) | 797.6KiB | 12 | | 14 | [1601.03642](https://arxiv.org/pdf/1601.03642.pdf) | 1004.9KiB | 8 | ## Libraries -| Name | Last PyPI Release | License | Version | Dependencies | -| -----------: | :---------------- | ------------------------------: | -------: | :-------------------------------------------------------- | -| Borb | 2023-06-23 | AGPL/Commercial | 2.1.16 | | -| pypdfium2 | 2023-07-04 | Apache-2.0 or BSD-3-Clause | 4.18.0 | PDFium (Foxit/Google) | -| pdfminer.six | 2022-11-05 | MIT/X | 20221105 | | -| pdfplumber | 2023-07-29 | MIT | 0.10.2 | pdfminer.six | -| pdfrw | 2017-09-18 | MIT | 0.4 | | -| pdftotext | - | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | -| PyMuPDF | 2023-08-24 | GNU AFFERO GPL 3.0 / Commerical | 1.23.1 | MuPDF | -| pypdf | 2023-08-26 | BSD 3-Clause | 3.15.4 | | -| Tika | 2023-01-01 | Apache v2 | 2.6.0 | Apache Tika | +| Name | Last PyPI Release | License | Version | Dependencies | +| -----------: | :---------------- | ------------------------------: | -----------------------------------------: | :-------------------------------------------------------- | +| Borb | 2023-06-23 | AGPL/Commercial | 2.1.16 | | +| pypdfium2 | 2023-07-04 | Apache-2.0 or BSD-3-Clause | 4.19.0 | PDFium (Foxit/Google) | +| pdfminer.six | 2022-11-05 | MIT/X | 20221105 | | +| pdfplumber | 2023-07-29 | MIT | git+https://github.com/dhdaines/pdfplumber | pdfminer.six | +| pdfrw | 2017-09-18 | MIT | 0.4 | | +| pdftotext | - | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | +| PyMuPDF | 2023-08-24 | GNU AFFERO GPL 3.0 / Commerical | 1.23.3 | MuPDF | +| pypdf | 2023-08-26 | BSD 3-Clause | 3.15.5 | | +| Tika | 2023-01-01 | Apache v2 | 2.6.0 | Apache Tika | ## Text Extraction Speed | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :-------------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.1s | 0.4s | 0.2s | 0.2s | 0.2s | 0.0s | 0.1s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | -| 2 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 0.2s | 1.9s | 0.2s | 0.2s | 0.2s | 0.0s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | 0.0s | 0.0s | -| 3 | [pdftotext ](https://poppler.freedesktop.org/) | 0.3s | 0.8s | 1.0s | 0.3s | 0.8s | 0.1s | 0.2s | 0.2s | 0.1s | 0.0s | 0.1s | 0.1s | 0.1s | 0.0s | 0.0s | -| 4 | [Tika ](https://pypi.org/project/tika/) | 1.1s | 12.9s | 0.9s | 0.6s | 0.4s | 0.1s | 0.3s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.0s | -| 5 | [pypdf ](https://pypi.org/project/pypdf/) | 2.6s | 18.7s | 4.8s | 5.3s | 2.3s | 0.7s | 0.9s | 0.4s | 0.5s | 0.3s | 0.6s | 0.5s | 0.4s | 0.4s | 0.2s | -| 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 4.5s | 26.0s | 12.9s | 8.0s | 4.6s | 1.3s | 2.1s | 1.0s | 1.2s | 0.8s | 1.5s | 0.9s | 0.9s | 0.6s | 0.6s | -| 7 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 6.7s | 41.7s | 10.9s | 11.5s | 8.4s | 2.4s | 4.3s | 2.0s | 1.9s | 1.9s | 2.7s | 1.8s | 1.7s | 1.0s | 1.2s | -| 8 | [Borb ](https://pypi.org/project/borb/) | 34.7s | 111.2s | 105.0s | 1.4s | 87.2s | 21.1s | 7.4s | 83.5s | 16.4s | 20.3s | 5.4s | 3.4s | 18.8s | 3.2s | 2.1s | +| 1 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.1s | 0.6s | 0.3s | 0.2s | 0.2s | 0.0s | 0.1s | 0.0s | 0.1s | 0.0s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | +| 2 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 0.2s | 0.7s | 0.4s | 0.2s | 0.3s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | +| 3 | [pdftotext ](https://poppler.freedesktop.org/) | 0.3s | 0.9s | 1.0s | 0.3s | 0.7s | 0.1s | 0.3s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.0s | +| 4 | [Tika ](https://pypi.org/project/tika/) | 0.3s | 1.5s | 0.8s | 0.6s | 0.5s | 0.1s | 0.3s | 0.2s | 0.1s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | +| 5 | [pypdf ](https://pypi.org/project/pypdf/) | 3.6s | 26.0s | 6.6s | 7.5s | 3.2s | 0.9s | 1.5s | 0.7s | 0.7s | 0.4s | 0.7s | 0.7s | 0.6s | 0.6s | 0.4s | +| 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 8.1s | 51.6s | 20.9s | 14.0s | 7.8s | 2.1s | 3.6s | 1.6s | 2.0s | 1.3s | 2.5s | 1.7s | 1.8s | 1.0s | 0.9s | +| 7 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 10.7s | 70.3s | 17.7s | 19.3s | 12.1s | 3.5s | 6.1s | 3.0s | 2.8s | 2.6s | 3.9s | 2.8s | 3.2s | 1.6s | 1.5s | +| 8 | [Borb ](https://pypi.org/project/borb/) | 38.6s | 170.2s | 130.2s | 1.8s | 77.9s | 17.4s | 10.5s | 66.3s | 14.2s | 15.5s | 7.3s | 5.1s | 16.7s | 4.4s | 2.5s | ## Image Extraction Speed | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :-------------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.5s | 0.3s | 0.5s | 0.0s | 1.7s | 0.4s | 0.0s | 3.2s | 0.4s | 0.4s | 0.1s | 0.0s | 0.3s | 0.2s | 0.0s | -| 2 | [pypdf ](https://pypi.org/project/pypdf/) | 2.8s | 16.4s | 2.1s | 0.8s | 9.2s | 1.1s | 0.0s | 6.7s | 0.9s | 0.9s | 0.4s | 0.0s | 0.7s | 0.2s | 0.1s | -| 3 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 6.5s | 31.8s | 13.7s | 9.2s | 24.0s | 1.5s | 2.3s | 1.5s | 1.4s | 0.9s | 1.5s | 0.9s | 1.0s | 0.6s | 0.5s | +| 1 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.5s | 0.3s | 0.6s | 0.0s | 1.9s | 0.5s | 0.0s | 3.0s | 0.4s | 0.4s | 0.1s | 0.0s | 0.3s | 0.2s | 0.0s | +| 2 | [pypdf ](https://pypi.org/project/pypdf/) | 3.8s | 24.0s | 2.3s | 1.2s | 14.3s | 1.3s | 0.0s | 6.7s | 1.0s | 1.1s | 0.3s | 0.0s | 0.8s | 0.2s | 0.1s | +| 3 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 10.5s | 59.1s | 21.9s | 16.1s | 30.9s | 2.3s | 3.8s | 2.3s | 2.0s | 1.4s | 2.4s | 1.7s | 1.9s | 1.0s | 0.8s | ## Watermarking Speed | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :--------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.0s | 0.0s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | -| 2 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.1s | 0.0s | 0.4s | 0.0s | 0.3s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | -| 3 | [pypdf ](https://pypi.org/project/pypdf/) | 0.4s | 0.6s | 1.7s | 0.4s | 0.9s | 0.2s | 0.3s | 0.4s | 0.3s | 0.2s | 0.3s | 0.1s | 0.2s | 0.0s | 0.2s | +| 1 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.0s | 0.0s | 0.2s | 0.0s | 0.1s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | +| 2 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.1s | 0.1s | 0.5s | 0.1s | 0.4s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.0s | +| 3 | [pypdf ](https://pypi.org/project/pypdf/) | 0.5s | 0.7s | 2.0s | 0.4s | 1.2s | 0.3s | 0.3s | 0.3s | 0.3s | 0.3s | 0.5s | 0.1s | 0.5s | 0.1s | 0.1s | ## Watermarking File Size | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :--------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 3.4MB | 2.5MB | 5.7MB | 1.6MB | 7.3MB | 2.7MB | 3.1MB | 15.4MB | 2.4MB | 1.3MB | 3.0MB | 0.3MB | 1.1MB | 0.8MB | 1.0MB | -| 2 | [pypdf ](https://pypi.org/project/pypdf/) | 3.5MB | 2.5MB | 5.7MB | 1.6MB | 7.3MB | 2.7MB | 3.1MB | 15.4MB | 2.4MB | 1.3MB | 3.0MB | 0.3MB | 1.1MB | 0.8MB | 1.0MB | -| 3 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 3.7MB | 2.7MB | 6.8MB | 1.7MB | 8.5MB | 2.8MB | 3.4MB | 15.5MB | 2.5MB | 1.4MB | 3.2MB | 0.3MB | 1.2MB | 0.9MB | 1.1MB | +| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 3.5MB | 2.5MB | 5.7MB | 1.6MB | 7.3MB | 2.7MB | 3.1MB | 15.4MB | 2.4MB | 1.3MB | 3.0MB | 0.3MB | 1.2MB | 0.8MB | 1.0MB | +| 2 | [pypdf ](https://pypi.org/project/pypdf/) | 3.5MB | 2.5MB | 5.7MB | 1.6MB | 7.3MB | 2.7MB | 3.1MB | 15.4MB | 2.4MB | 1.3MB | 3.0MB | 0.3MB | 1.2MB | 0.8MB | 1.0MB | +| 3 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 3.7MB | 2.7MB | 6.8MB | 1.7MB | 8.5MB | 2.8MB | 3.4MB | 15.5MB | 2.5MB | 1.4MB | 3.2MB | 0.3MB | 1.3MB | 0.9MB | 1.1MB | ## Text Extraction Quality | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :-------------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 98% | 99% | 97% | 94% | 99% | 98% | 96% | 99% | 98% | 99% | 99% | 98% | 98% | 99% | 99% | -| 2 | [pypdf ](https://pypi.org/project/pypdf/) | 97% | 98% | 93% | 94% | 98% | 98% | 96% | 97% | 98% | 99% | 99% | 98% | 98% | 98% | 99% | -| 3 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 97% | 98% | 96% | 93% | 97% | 98% | 96% | 98% | 98% | 98% | 98% | 97% | 97% | 98% | 99% | -| 4 | [Tika ](https://pypi.org/project/tika/) | 96% | 99% | 98% | 92% | 97% | 98% | 96% | 93% | 97% | 98% | 93% | 98% | 93% | 98% | 96% | -| 5 | [pdftotext ](https://poppler.freedesktop.org/) | 93% | 96% | 93% | 91% | 94% | 92% | 96% | 96% | 96% | 97% | 83% | 94% | 96% | 96% | 79% | -| 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 90% | 95% | 79% | 86% | 92% | 86% | 93% | 95% | 93% | 92% | 92% | 93% | 86% | 98% | 86% | -| 7 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 75% | 94% | 84% | 61% | 97% | 61% | 93% | 61% | 89% | 57% | 59% | 67% | 59% | 98% | 67% | -| 8 | [Borb ](https://pypi.org/project/borb/) | 45% | 70% | 79% | 0% | 40% | 48% | 92% | 0% | 64% | 51% | 41% | 55% | 43% | 0% | 53% | +| 1 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 97% | 99% | 97% | 94% | 99% | 98% | 96% | 99% | 98% | 99% | 99% | 98% | 78% | 99% | 99% | +| 2 | [pypdf ](https://pypi.org/project/pypdf/) | 96% | 98% | 93% | 94% | 98% | 98% | 96% | 97% | 98% | 99% | 99% | 98% | 78% | 98% | 99% | +| 3 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 96% | 98% | 96% | 93% | 97% | 98% | 96% | 98% | 98% | 98% | 98% | 97% | 77% | 98% | 99% | +| 4 | [Tika ](https://pypi.org/project/tika/) | 95% | 99% | 99% | 92% | 97% | 98% | 96% | 93% | 97% | 98% | 93% | 98% | 73% | 98% | 96% | +| 5 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 93% | 96% | 89% | 88% | 98% | 92% | 94% | 93% | 95% | 93% | 97% | 94% | 76% | 99% | 98% | +| 6 | [pdftotext ](https://poppler.freedesktop.org/) | 92% | 96% | 94% | 91% | 95% | 92% | 96% | 96% | 96% | 97% | 83% | 94% | 77% | 96% | 79% | +| 7 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 89% | 95% | 79% | 86% | 92% | 86% | 93% | 95% | 93% | 92% | 92% | 93% | 71% | 98% | 86% | +| 8 | [Borb ](https://pypi.org/project/borb/) | 45% | 70% | 79% | 0% | 40% | 48% | 92% | 0% | 64% | 51% | 41% | 55% | 41% | 0% | 53% | diff --git a/cache.json b/cache.json index 521253c..7164f81 100644 --- a/cache.json +++ b/cache.json @@ -1,657 +1,657 @@ { "benchmark_times": { - "borb": { - "1601.03642": { - "read": 2.053179979324341 + "pdfplumber": { + "2201.00214": { + "read": 70.30025053024292 }, - "1602.06541": { - "read": 5.41968560218811 + "GeoTopo-book": { + "read": 17.71614122390747 + }, + "2201.00151": { + "read": 19.277418613433838 }, "1707.09725": { - "read": 87.24084305763245 + "read": 12.056224346160889 }, "2201.00021": { - "read": 21.095511198043823 - }, - "2201.00022": { - "read": 18.75901699066162 - }, - "2201.00029": { - "read": 3.2207601070404053 + "read": 3.5353875160217285 }, "2201.00037": { - "read": 7.43989634513855 + "read": 6.122551918029785 }, "2201.00069": { - "read": 83.48481893539429 - }, - "2201.00151": { - "read": 1.3735122680664062 + "read": 2.9618067741394043 }, "2201.00178": { - "read": 16.361470699310303 + "read": 2.80582332611084 + }, + "2201.00201": { + "read": 2.5980660915374756 + }, + "1602.06541": { + "read": 3.878690004348755 }, "2201.00200": { - "read": 3.4077320098876953 + "read": 2.7724621295928955 }, - "2201.00201": { - "read": 20.287503480911255 + "2201.00022": { + "read": 3.221090793609619 }, - "2201.00214": { - "read": 111.19087290763855 + "2201.00029": { + "read": 1.5970120429992676 }, - "GeoTopo-book": { - "read": 105.00666165351868 + "1601.03642": { + "read": 1.4764699935913086 } }, - "pdfium": { - "1601.03642": { - "read": 0.026182889938354492 + "pypdf": { + "2201.00214": { + "read": 26.020102977752686, + "watermark": 0.6621012687683105, + "image_extraction": 23.955000400543213 }, - "1602.06541": { - "read": 0.05526089668273926 + "GeoTopo-book": { + "read": 6.64851450920105, + "watermark": 2.0183393955230713, + "image_extraction": 2.250887632369995 + }, + "2201.00151": { + "read": 7.473206520080566, + "watermark": 0.428668737411499, + "image_extraction": 1.1657159328460693 }, "1707.09725": { - "read": 0.19156575202941895 + "read": 3.212459087371826, + "watermark": 1.1811449527740479, + "image_extraction": 14.264930486679077 }, "2201.00021": { - "read": 0.04799509048461914 - }, - "2201.00022": { - "read": 0.0395662784576416 - }, - "2201.00029": { - "read": 0.02202439308166504 + "read": 0.9077854156494141, + "watermark": 0.3055300712585449, + "image_extraction": 1.2899971008300781 }, "2201.00037": { - "read": 0.10238933563232422 + "read": 1.5470695495605469, + "watermark": 0.270383358001709, + "image_extraction": 0.022456884384155273 }, "2201.00069": { - "read": 0.05537557601928711 - }, - "2201.00151": { - "read": 0.16690778732299805 + "read": 0.692096471786499, + "watermark": 0.2626011371612549, + "image_extraction": 6.7051780223846436 }, "2201.00178": { - "read": 0.0502932071685791 + "read": 0.722989559173584, + "watermark": 0.2680470943450928, + "image_extraction": 1.019047498703003 + }, + "2201.00201": { + "read": 0.4151496887207031, + "watermark": 0.3354034423828125, + "image_extraction": 1.0872318744659424 + }, + "1602.06541": { + "read": 0.6642146110534668, + "watermark": 0.5023202896118164, + "image_extraction": 0.3495500087738037 }, "2201.00200": { - "read": 0.030122041702270508 + "read": 0.743006706237793, + "watermark": 0.12680268287658691, + "image_extraction": 0.007048606872558594 }, - "2201.00201": { - "read": 0.03543281555175781 + "2201.00022": { + "read": 0.5628504753112793, + "watermark": 0.5410447120666504, + "image_extraction": 0.7783563137054443 }, - "2201.00214": { - "read": 1.9107882976531982 + "2201.00029": { + "read": 0.5693433284759521, + "watermark": 0.0550692081451416, + "image_extraction": 0.23301029205322266 }, - "GeoTopo-book": { - "read": 0.24455952644348145 + "1601.03642": { + "read": 0.36649465560913086, + "watermark": 0.10128593444824219, + "image_extraction": 0.0654292106628418 } }, - "pdfminer": { - "1601.03642": { - "image_extraction": 0.5113904476165771, - "read": 0.5691556930541992 + "borb": { + "2201.00214": { + "read": 170.16125798225403 }, - "1602.06541": { - "image_extraction": 1.5237865447998047, - "read": 1.5191314220428467 + "GeoTopo-book": { + "read": 130.1867437362671 + }, + "2201.00151": { + "read": 1.7567353248596191 }, "1707.09725": { - "image_extraction": 23.978344202041626, - "read": 4.634711742401123 + "read": 77.86134815216064 }, "2201.00021": { - "image_extraction": 1.4553284645080566, - "read": 1.3015577793121338 - }, - "2201.00022": { - "image_extraction": 0.9866993427276611, - "read": 0.8724193572998047 - }, - "2201.00029": { - "image_extraction": 0.6153700351715088, - "read": 0.5640342235565186 + "read": 17.365882873535156 }, "2201.00037": { - "image_extraction": 2.2505035400390625, - "read": 2.127037763595581 + "read": 10.51969861984253 }, "2201.00069": { - "image_extraction": 1.5143694877624512, - "read": 0.9530880451202393 - }, - "2201.00151": { - "image_extraction": 9.24244236946106, - "read": 7.973270893096924 + "read": 66.33814978599548 }, "2201.00178": { - "image_extraction": 1.3969669342041016, - "read": 1.2367455959320068 + "read": 14.184422016143799 + }, + "2201.00201": { + "read": 15.534729957580566 + }, + "1602.06541": { + "read": 7.256805181503296 }, "2201.00200": { - "image_extraction": 0.9476666450500488, - "read": 0.9224035739898682 + "read": 5.089946269989014 }, - "2201.00201": { - "image_extraction": 0.8980197906494141, - "read": 0.7907443046569824 + "2201.00022": { + "read": 16.713526487350464 }, - "2201.00214": { - "image_extraction": 31.77288317680359, - "read": 25.975944757461548 + "2201.00029": { + "read": 4.427474021911621 }, - "GeoTopo-book": { - "image_extraction": 13.708941459655762, - "read": 12.885846138000488 + "1601.03642": { + "read": 2.4990572929382324 } }, - "pdfplumber": { - "1601.03642": { - "read": 1.150937557220459 + "pdfium": { + "2201.00214": { + "read": 0.7194085121154785 }, - "1602.06541": { - "read": 2.7273688316345215 + "GeoTopo-book": { + "read": 0.35127973556518555 + }, + "2201.00151": { + "read": 0.2381303310394287 }, "1707.09725": { - "read": 8.424808502197266 + "read": 0.2519388198852539 }, "2201.00021": { - "read": 2.4490716457366943 - }, - "2201.00022": { - "read": 1.7107877731323242 - }, - "2201.00029": { - "read": 1.0425853729248047 + "read": 0.05813455581665039 }, "2201.00037": { - "read": 4.317584991455078 + "read": 0.13362622261047363 }, "2201.00069": { - "read": 2.015364170074463 - }, - "2201.00151": { - "read": 11.465008974075317 + "read": 0.08023524284362793 }, "2201.00178": { - "read": 1.9202370643615723 + "read": 0.06390500068664551 + }, + "2201.00201": { + "read": 0.05460667610168457 + }, + "1602.06541": { + "read": 0.0795130729675293 }, "2201.00200": { - "read": 1.7538721561431885 + "read": 0.043161869049072266 }, - "2201.00201": { - "read": 1.8756508827209473 + "2201.00022": { + "read": 0.06084847450256348 }, - "2201.00214": { - "read": 41.712098360061646 + "2201.00029": { + "read": 0.028944969177246094 }, - "GeoTopo-book": { - "read": 10.938571691513062 + "1601.03642": { + "read": 0.03023362159729004 } }, - "pdfrw": { - "1601.03642": { - "watermark": 0.036574363708496094 + "pdfminer": { + "2201.00214": { + "read": 51.59387469291687, + "image_extraction": 59.07294750213623 }, - "1602.06541": { - "watermark": 0.09327936172485352 + "GeoTopo-book": { + "read": 20.944347858428955, + "image_extraction": 21.889408588409424 + }, + "2201.00151": { + "read": 13.961804151535034, + "image_extraction": 16.127032041549683 }, "1707.09725": { - "watermark": 0.27477574348449707 + "read": 7.790618896484375, + "image_extraction": 30.880011081695557 }, "2201.00021": { - "watermark": 0.0827476978302002 - }, - "2201.00022": { - "watermark": 0.08185839653015137 - }, - "2201.00029": { - "watermark": 0.013228416442871094 + "read": 2.0765562057495117, + "image_extraction": 2.265204906463623 }, "2201.00037": { - "watermark": 0.053519248962402344 + "read": 3.551035165786743, + "image_extraction": 3.7663705348968506 }, "2201.00069": { - "watermark": 0.08611893653869629 - }, - "2201.00151": { - "watermark": 0.0400242805480957 + "read": 1.6111178398132324, + "image_extraction": 2.334484338760376 }, "2201.00178": { - "watermark": 0.0774388313293457 + "read": 1.9711239337921143, + "image_extraction": 2.026402711868286 + }, + "2201.00201": { + "read": 1.3471250534057617, + "image_extraction": 1.4003777503967285 + }, + "1602.06541": { + "read": 2.4590721130371094, + "image_extraction": 2.4072012901306152 }, "2201.00200": { - "watermark": 0.038611650466918945 + "read": 1.65625, + "image_extraction": 1.7444770336151123 }, - "2201.00201": { - "watermark": 0.05524253845214844 + "2201.00022": { + "read": 1.8396813869476318, + "image_extraction": 1.8976826667785645 }, - "2201.00214": { - "watermark": 0.0414888858795166 + "2201.00029": { + "read": 1.0377953052520752, + "image_extraction": 1.0113511085510254 }, - "GeoTopo-book": { - "watermark": 0.35054945945739746 + "1601.03642": { + "read": 0.859734058380127, + "image_extraction": 0.8416221141815186 } }, - "pdftotext": { - "1601.03642": { - "read": 0.04276108741760254 + "pdfrw": { + "2201.00214": { + "watermark": 0.06189870834350586 }, - "1602.06541": { - "read": 0.12382841110229492 + "GeoTopo-book": { + "watermark": 0.4900834560394287 + }, + "2201.00151": { + "watermark": 0.05237531661987305 }, "1707.09725": { - "read": 0.8157875537872314 + "watermark": 0.37189579010009766 }, "2201.00021": { - "read": 0.07499313354492188 - }, - "2201.00022": { - "read": 0.07213330268859863 - }, - "2201.00029": { - "read": 0.030113697052001953 + "watermark": 0.0998694896697998 }, "2201.00037": { - "read": 0.21310901641845703 + "watermark": 0.07250571250915527 }, "2201.00069": { - "read": 0.24145245552062988 - }, - "2201.00151": { - "read": 0.2548847198486328 + "watermark": 0.1360166072845459 }, "2201.00178": { - "read": 0.12105298042297363 + "watermark": 0.10211992263793945 + }, + "2201.00201": { + "watermark": 0.07320046424865723 + }, + "1602.06541": { + "watermark": 0.12589788436889648 }, "2201.00200": { - "read": 0.052548885345458984 + "watermark": 0.05352663993835449 }, - "2201.00201": { - "read": 0.04883241653442383 + "2201.00022": { + "watermark": 0.1426396369934082 }, - "2201.00214": { - "read": 0.76462721824646 + "2201.00029": { + "watermark": 0.016480207443237305 }, - "GeoTopo-book": { - "read": 1.0103209018707275 + "1601.03642": { + "watermark": 0.04465961456298828 } }, - "pymupdf": { - "1601.03642": { - "image_extraction": 0.0022954940795898438, - "read": 0.02459883689880371, - "watermark": 0.012035369873046875 + "pdftotext": { + "2201.00214": { + "read": 0.9134228229522705 }, - "1602.06541": { - "image_extraction": 0.08666229248046875, - "read": 0.04839634895324707, - "watermark": 0.027688026428222656 + "GeoTopo-book": { + "read": 0.9935319423675537 + }, + "2201.00151": { + "read": 0.3033919334411621 }, "1707.09725": { - "image_extraction": 1.6604242324829102, - "read": 0.1567060947418213, - "watermark": 0.11259746551513672 + "read": 0.7321336269378662 }, "2201.00021": { - "image_extraction": 0.4480102062225342, - "read": 0.03892374038696289, - "watermark": 0.019346952438354492 - }, - "2201.00022": { - "image_extraction": 0.2813708782196045, - "read": 0.036031246185302734, - "watermark": 0.021375656127929688 - }, - "2201.00029": { - "image_extraction": 0.20908689498901367, - "read": 0.023555278778076172, - "watermark": 0.008962631225585938 + "read": 0.08934545516967773 }, "2201.00037": { - "image_extraction": 0.0018870830535888672, - "read": 0.0897064208984375, - "watermark": 0.028612375259399414 + "read": 0.2617814540863037 }, "2201.00069": { - "image_extraction": 3.1560046672821045, - "read": 0.03975987434387207, - "watermark": 0.02903914451599121 - }, - "2201.00151": { - "image_extraction": 0.0021486282348632812, - "read": 0.16691279411315918, - "watermark": 0.013991832733154297 + "read": 0.20723533630371094 }, "2201.00178": { - "image_extraction": 0.3780343532562256, - "read": 0.04298090934753418, - "watermark": 0.02316737174987793 + "read": 0.13864588737487793 + }, + "2201.00201": { + "read": 0.06463861465454102 + }, + "1602.06541": { + "read": 0.11054682731628418 }, "2201.00200": { - "image_extraction": 0.001901388168334961, - "read": 0.028005599975585938, - "watermark": 0.01051187515258789 + "read": 0.07274961471557617 }, - "2201.00201": { - "image_extraction": 0.3551938533782959, - "read": 0.031032323837280273, - "watermark": 0.015187978744506836 + "2201.00022": { + "read": 0.11013388633728027 }, - "2201.00214": { - "image_extraction": 0.2810511589050293, - "read": 0.41155457496643066, - "watermark": 0.01828455924987793 + "2201.00029": { + "read": 0.046718597412109375 }, - "GeoTopo-book": { - "image_extraction": 0.5082950592041016, - "read": 0.24191975593566895, - "watermark": 0.12025952339172363 + "1601.03642": { + "read": 0.0490870475769043 } }, - "pypdf": { - "1601.03642": { - "image_extraction": 0.05196261405944824, - "read": 0.1702582836151123, - "watermark": 0.20092368125915527 + "pymupdf": { + "2201.00214": { + "read": 0.5514135360717773, + "watermark": 0.026504039764404297, + "image_extraction": 0.27562856674194336 }, - "1602.06541": { - "image_extraction": 0.35231685638427734, - "read": 0.6377561092376709, - "watermark": 0.3079540729522705 + "GeoTopo-book": { + "read": 0.33759236335754395, + "watermark": 0.15517616271972656, + "image_extraction": 0.587421178817749 + }, + "2201.00151": { + "read": 0.20269060134887695, + "watermark": 0.016669273376464844, + "image_extraction": 0.0031175613403320312 }, "1707.09725": { - "image_extraction": 9.18139910697937, - "read": 2.2707440853118896, - "watermark": 0.9489884376525879 + "read": 0.19739532470703125, + "watermark": 0.14671063423156738, + "image_extraction": 1.8962862491607666 }, "2201.00021": { - "image_extraction": 1.0856575965881348, - "read": 0.6681113243103027, - "watermark": 0.20781850814819336 - }, - "2201.00022": { - "image_extraction": 0.6631152629852295, - "read": 0.42851996421813965, - "watermark": 0.2411966323852539 - }, - "2201.00029": { - "image_extraction": 0.20132708549499512, - "read": 0.4068417549133301, - "watermark": 0.04504132270812988 + "read": 0.04858541488647461, + "watermark": 0.0232088565826416, + "image_extraction": 0.5244166851043701 }, "2201.00037": { - "image_extraction": 0.02179741859436035, - "read": 0.8668158054351807, - "watermark": 0.28296732902526855 + "read": 0.10872936248779297, + "watermark": 0.03429841995239258, + "image_extraction": 0.002572774887084961 }, "2201.00069": { - "image_extraction": 6.685812473297119, - "read": 0.36688804626464844, - "watermark": 0.37302637100219727 - }, - "2201.00151": { - "image_extraction": 0.8311829566955566, - "read": 5.328065633773804, - "watermark": 0.43613553047180176 + "read": 0.048812150955200195, + "watermark": 0.041498422622680664, + "image_extraction": 2.9812166690826416 }, "2201.00178": { - "image_extraction": 0.9407749176025391, - "read": 0.5250120162963867, - "watermark": 0.2806210517883301 + "read": 0.05117154121398926, + "watermark": 0.027867794036865234, + "image_extraction": 0.3875887393951416 + }, + "2201.00201": { + "read": 0.0379939079284668, + "watermark": 0.02063298225402832, + "image_extraction": 0.392880916595459 + }, + "1602.06541": { + "read": 0.0582125186920166, + "watermark": 0.03419327735900879, + "image_extraction": 0.08664584159851074 }, "2201.00200": { - "image_extraction": 0.005396366119384766, - "read": 0.5119338035583496, - "watermark": 0.14427518844604492 + "read": 0.04317927360534668, + "watermark": 0.0125732421875, + "image_extraction": 0.002869129180908203 }, - "2201.00201": { - "image_extraction": 0.9274122714996338, - "read": 0.27799558639526367, - "watermark": 0.1510756015777588 + "2201.00022": { + "read": 0.053061723709106445, + "watermark": 0.029779672622680664, + "image_extraction": 0.3130757808685303 }, - "2201.00214": { - "image_extraction": 16.357728481292725, - "read": 18.680219888687134, - "watermark": 0.6249253749847412 + "2201.00029": { + "read": 0.030493736267089844, + "watermark": 0.010860204696655273, + "image_extraction": 0.22451543807983398 }, - "GeoTopo-book": { - "image_extraction": 2.1184744834899902, - "read": 4.759061336517334, - "watermark": 1.6904757022857666 + "1601.03642": { + "read": 0.026759624481201172, + "watermark": 0.013607501983642578, + "image_extraction": 0.003281831741333008 } }, "tika": { - "1601.03642": { - "read": 0.04623913764953613 + "2201.00214": { + "read": 1.4826128482818604 }, - "1602.06541": { - "read": 0.1097116470336914 + "GeoTopo-book": { + "read": 0.8197121620178223 + }, + "2201.00151": { + "read": 0.5797288417816162 }, "1707.09725": { - "read": 0.44649672508239746 + "read": 0.510779619216919 }, "2201.00021": { - "read": 0.130723237991333 - }, - "2201.00022": { - "read": 0.0659487247467041 - }, - "2201.00029": { - "read": 0.04696822166442871 + "read": 0.11818695068359375 }, "2201.00037": { - "read": 0.31470751762390137 + "read": 0.2835381031036377 }, "2201.00069": { - "read": 0.1581587791442871 - }, - "2201.00151": { - "read": 0.5808892250061035 + "read": 0.15885210037231445 }, "2201.00178": { - "read": 0.11357355117797852 + "read": 0.11153674125671387 + }, + "2201.00201": { + "read": 0.10024166107177734 + }, + "1602.06541": { + "read": 0.15640830993652344 }, "2201.00200": { - "read": 0.09151124954223633 + "read": 0.07609415054321289 }, - "2201.00201": { - "read": 0.07536077499389648 + "2201.00022": { + "read": 0.13745427131652832 }, - "2201.00214": { - "read": 12.93626356124878 + "2201.00029": { + "read": 0.058022499084472656 }, - "GeoTopo-book": { - "read": 0.947016716003418 + "1601.03642": { + "read": 0.08746814727783203 } } }, "read_quality": { + "pdfplumber": { + "2201.00214": 0.9599753755721704, + "GeoTopo-book": 0.8932105947573745, + "2201.00151": 0.8827261192677179, + "1707.09725": 0.9778392493054645, + "2201.00021": 0.9170581778265642, + "2201.00037": 0.9422577069826292, + "2201.00069": 0.9317511947117907, + "2201.00178": 0.9527037762830358, + "2201.00201": 0.9313577012811591, + "1602.06541": 0.9739415077617999, + "2201.00200": 0.937365010799136, + "2201.00022": 0.7643161565284773, + "2201.00029": 0.9927616243405717, + "1601.03642": 0.981982138212087 + }, + "pypdf": { + "2201.00214": 0.984773043075498, + "GeoTopo-book": 0.9267843483814432, + "2201.00151": 0.9366784193042167, + "1707.09725": 0.9799128437947946, + "2201.00021": 0.9806264058057124, + "2201.00037": 0.959331208757123, + "2201.00069": 0.9668886543437042, + "2201.00178": 0.9825861828182239, + "2201.00201": 0.9865737079024715, + "1602.06541": 0.9879715846375909, + "2201.00200": 0.9839537609635827, + "2201.00022": 0.7783073130649137, + "2201.00029": 0.9798789064888997, + "1601.03642": 0.9927797833935018 + }, "borb": { - "1601.03642": 0.5295431890832847, - "1602.06541": 0.405852417302799, + "2201.00214": 0.7037028842821007, + "GeoTopo-book": 0.7910254212656228, + "2201.00151": 0.0, "1707.09725": 0.40052709687324084, "2201.00021": 0.4769067796610169, - "2201.00022": 0.4301739518287243, - "2201.00029": 0.0, "2201.00037": 0.9182362504460923, "2201.00069": 0.0, - "2201.00151": 0.0, "2201.00178": 0.643753339745645, - "2201.00200": 0.5542067356599346, "2201.00201": 0.5095382561142038, - "2201.00214": 0.7037028842821007, - "GeoTopo-book": 0.7910254212656228 + "1602.06541": 0.405852417302799, + "2201.00200": 0.5542067356599346, + "2201.00022": 0.41112858259133134, + "2201.00029": 0.0, + "1601.03642": 0.5295431890832847 }, "pdfium": { - "1601.03642": 0.9935736623251659, - "1602.06541": 0.9919005142642908, + "2201.00214": 0.9932975353472919, + "GeoTopo-book": 0.9656593310168123, + "2201.00151": 0.9371048049607478, "1707.09725": 0.9869033794742829, "2201.00021": 0.9825806792373105, - "2201.00022": 0.9782950402996555, - "2201.00029": 0.988813497157528, "2201.00037": 0.9617606084193095, "2201.00069": 0.9894269749096088, - "2201.00151": 0.9371048049607478, "2201.00178": 0.9849444987879046, - "2201.00200": 0.9836863694438841, "2201.00201": 0.9860127582372564, - "2201.00214": 0.9932975353472919, - "GeoTopo-book": 0.9656593310168123 + "1602.06541": 0.9919005142642908, + "2201.00200": 0.9836863694438841, + "2201.00022": 0.7771305119401257, + "2201.00029": 0.988813497157528, + "1601.03642": 0.9935736623251659 }, "pdfminer": { - "1601.03642": 0.8623963054819123, - "1602.06541": 0.9230412447205493, - "1707.09725": 0.9189976553065722, + "2201.00214": 0.9487280293804596, + "GeoTopo-book": 0.7883238686104862, + "2201.00151": 0.8602045202371076, + "1707.09725": 0.9189741613844499, "2201.00021": 0.8588197275011207, - "2201.00022": 0.856704693395706, - "2201.00029": 0.975523516322736, "2201.00037": 0.9301479087658201, "2201.00069": 0.9540472289854548, - "2201.00151": 0.8602045202371076, "2201.00178": 0.9286101949651401, - "2201.00200": 0.9338492127465206, "2201.00201": 0.9153569694026227, - "2201.00214": 0.9487280293804596, - "GeoTopo-book": 0.7882974400650142 - }, - "pdfplumber": { - "1601.03642": 0.6740574957927792, - "1602.06541": 0.5866773388981397, - "1707.09725": 0.9664367136584459, - "2201.00021": 0.6088302790069504, - "2201.00022": 0.5887380987010662, - "2201.00029": 0.984571971904426, - "2201.00037": 0.9338000304367676, - "2201.00069": 0.6146320998483967, - "2201.00151": 0.6123003519582357, - "2201.00178": 0.8864967748757534, - "2201.00200": 0.6680035900600213, - "2201.00201": 0.5674785100286532, - "2201.00214": 0.9386228438413943, - "GeoTopo-book": 0.8423270156489054 + "1602.06541": 0.9230412447205493, + "2201.00200": 0.9338492127465206, + "2201.00022": 0.7090353973857456, + "2201.00029": 0.975523516322736, + "1601.03642": 0.8623963054819123 }, "pdfrw": {}, "pdftotext": { - "1601.03642": 0.7876688841643235, - "1602.06541": 0.832311127441282, - "1707.09725": 0.9445130343025355, - "2201.00021": 0.9194266776433834, - "2201.00022": 0.9633196241682549, - "2201.00029": 0.9649219467401285, - "2201.00037": 0.9555825890870249, - "2201.00069": 0.9580918006489482, - "2201.00151": 0.9134287661895024, - "2201.00178": 0.962171435833351, - "2201.00200": 0.9386013327051221, + "2201.00214": 0.9600762653108389, + "GeoTopo-book": 0.9411707401930939, + "2201.00151": 0.9134194729880964, + "1707.09725": 0.9452373645570218, + "2201.00021": 0.919541928333949, + "2201.00037": 0.9555313782701153, + "2201.00069": 0.9586758781603748, + "2201.00178": 0.9634509272301712, "2201.00201": 0.9652466695944957, - "2201.00214": 0.9600477616539399, - "GeoTopo-book": 0.9329012382167732 + "1602.06541": 0.8323735364569717, + "2201.00200": 0.9386551793767248, + "2201.00022": 0.7741910594589381, + "2201.00029": 0.9649219467401285, + "1601.03642": 0.7867700010287713 }, "pymupdf": { - "1601.03642": 0.988502191286414, - "1602.06541": 0.9798295776242781, + "2201.00214": 0.9780968228783716, + "GeoTopo-book": 0.9644376202326115, + "2201.00151": 0.9262640520751881, "1707.09725": 0.9705185650275407, "2201.00021": 0.9773729808638253, - "2201.00022": 0.9744584545748465, - "2201.00029": 0.9771271181366386, "2201.00037": 0.9550639423053028, "2201.00069": 0.9811348240949814, - "2201.00151": 0.9262640520751881, "2201.00178": 0.9792454038818782, - "2201.00200": 0.9749010314275711, "2201.00201": 0.9810750465567505, - "2201.00214": 0.9780968228783716, - "GeoTopo-book": 0.9644376202326115 - }, - "pypdf": { - "1601.03642": 0.9927797833935018, - "1602.06541": 0.9879715846375909, - "1707.09725": 0.9799128437947946, - "2201.00021": 0.9806264058057124, - "2201.00022": 0.979681702355939, - "2201.00029": 0.9798789064888997, - "2201.00037": 0.959331208757123, - "2201.00069": 0.9668886543437042, - "2201.00151": 0.9366784193042167, - "2201.00178": 0.9825861828182239, - "2201.00200": 0.9839537609635827, - "2201.00201": 0.9865737079024715, - "2201.00214": 0.984773043075498, - "GeoTopo-book": 0.9267843483814432 + "1602.06541": 0.9798295776242781, + "2201.00200": 0.9749010314275711, + "2201.00022": 0.7742949731877629, + "2201.00029": 0.9771271181366386, + "1601.03642": 0.988502191286414 }, "tika": { - "1601.03642": 0.9551993153165015, - "1602.06541": 0.92860998828161, - "1707.09725": 0.9691487650775417, - "2201.00021": 0.9807331664030006, - "2201.00022": 0.9299007574327018, - "2201.00029": 0.9828859664925239, - "2201.00037": 0.9633315975122081, - "2201.00069": 0.9327143795813528, - "2201.00151": 0.9216462958343726, - "2201.00178": 0.9663575232885726, - "2201.00200": 0.9774490203918432, - "2201.00201": 0.981721720946443, - "2201.00214": 0.9905843784546182, - "GeoTopo-book": 0.9826887048907266 + "2201.00214": 0.9909526851172147, + "GeoTopo-book": 0.9853733000277547, + "2201.00151": 0.9216634911767934, + "1707.09725": 0.9695899981614268, + "2201.00021": 0.9805897449326979, + "2201.00037": 0.9643852585939919, + "2201.00069": 0.9325631793594185, + "2201.00178": 0.9675184507534486, + "2201.00201": 0.9815221704916128, + "1602.06541": 0.9286278537009243, + "2201.00200": 0.9774111438357077, + "2201.00022": 0.7298005166475316, + "2201.00029": 0.9827089337175793, + "1601.03642": 0.9550548853743231 } }, "watermarking_result_file_size": { + "pdfplumber": {}, + "pypdf": { + "2201.00214": 2511916.0, + "GeoTopo-book": 5732063.0, + "2201.00151": 1575105.0, + "1707.09725": 7273085.0, + "2201.00021": 2727836.0, + "2201.00037": 3113158.0, + "2201.00069": 15399764.0, + "2201.00178": 2398354.0, + "2201.00201": 1327004.0, + "1602.06541": 3024013.0, + "2201.00200": 285365.0, + "2201.00022": 1211522.0, + "2201.00029": 830633.0, + "1601.03642": 1021439.0 + }, "borb": {}, "pdfium": {}, "pdfminer": {}, - "pdfplumber": {}, "pdfrw": { - "1601.03642": 1026759, - "1602.06541": 3029173, - "1707.09725": 7251530, - "2201.00021": 2725055, - "2201.00022": 1093465, - "2201.00029": 828767, - "2201.00037": 3086248, - "2201.00069": 15393345, - "2201.00151": 1582521, - "2201.00178": 2379988, - "2201.00200": 288194, - "2201.00201": 1329452, - "2201.00214": 2515466, - "GeoTopo-book": 5738184 + "2201.00214": 2515466.0, + "GeoTopo-book": 5738184.0, + "2201.00151": 1582521.0, + "1707.09725": 7251530.0, + "2201.00021": 2725055.0, + "2201.00037": 3086248.0, + "2201.00069": 15393345.0, + "2201.00178": 2379988.0, + "2201.00201": 1329452.0, + "1602.06541": 3029173.0, + "2201.00200": 288194.0, + "2201.00022": 1187087.0, + "2201.00029": 828767.0, + "1601.03642": 1026759.0 }, "pdftotext": {}, "pymupdf": { - "1601.03642": 1091306, - "1602.06541": 3163793, - "1707.09725": 8524289, - "2201.00021": 2802599, - "2201.00022": 1180780, - "2201.00029": 935908, - "2201.00037": 3395981, - "2201.00069": 15520607, - "2201.00151": 1682101, - "2201.00178": 2518436, - "2201.00200": 341709, - "2201.00201": 1400680, - "2201.00214": 2716298, - "GeoTopo-book": 6838694 - }, - "pypdf": { - "1601.03642": 1021439, - "1602.06541": 3024013, - "1707.09725": 7273085, - "2201.00021": 2727836, - "2201.00022": 1113365, - "2201.00029": 830633, - "2201.00037": 3113158, - "2201.00069": 15399764, - "2201.00151": 1575105, - "2201.00178": 2398354, - "2201.00200": 285365, - "2201.00201": 1327004, - "2201.00214": 2511916, - "GeoTopo-book": 5732063 + "2201.00214": 2716298.0, + "GeoTopo-book": 6838694.0, + "2201.00151": 1682101.0, + "1707.09725": 8524289.0, + "2201.00021": 2802599.0, + "2201.00037": 3395981.0, + "2201.00069": 15520607.0, + "2201.00178": 2518436.0, + "2201.00201": 1400680.0, + "1602.06541": 3163793.0, + "2201.00200": 341709.0, + "2201.00022": 1299852.0, + "2201.00029": 935908.0, + "1601.03642": 1091306.0 }, "tika": {} } -} +} \ No newline at end of file diff --git a/read/results/borb/2201.00022.txt b/read/results/borb/2201.00022.txt index 2a5902789c5726bce071964f50c03f4858307474..d8229b20168ddff6b13c983d3a698546581ca4bb 100644 GIT binary patch delta 21700 zcmaic3wT`RdFB{jun!mCB;RHKzR8k3b2R6EW=4{&i!EOy*;oSOU@m7yN7C4H5p$6& z2T&j(5D293kq{sdLnxSVNz=63-R-7Hy1ndfn(XGuCTY?(1h!AoH0d@p*=^d=bl>kk zXGSCAb{}|TY39uTfB$!Vzso;QfB&lA`Rd}P{*pId5z~IDoGli_(Q0m1IG~>oydFTvY7CuRB^aS>8o~K6;bHJS7{wyx;R~#%$9{O zDw8F@JXy?*sp3?*Ix*n~FM1VGksr@a`wIQ&Dr=FTmc^TzDH!+%N4Is$$B|4Uh*;(K6Y_jK3T5#xtv!D4w&(#D%FxdCdR6z zY+*w9UZ#P(IpGz?M4QfA@Tss*$>R+wVrDX%nN$k?jKD@E>|OFjwooY*$Eq1DhhK}q z^Zo?yUGT9Ze|}`B_prc`CbQs!Uy$s;qs2(suZS^!D(6dD)3aFDpJPImv23|io#L0V z4d~$wWBj9mkC1>b@L;A2sg3#5Suc2gyj0BdzGYvW^Jj%*UAR=XAPK7Y=PRl>UM(pQ zW;ItSi$Iio+^ca=W-R^hFD=>8 zs*1k4q($1V%=mtx#xEQMODh?#Ku0(1T#5xmlsc9so0Xoiu`E;X<#MyUF4!5RdzKzE zl$mU0QW9ORLLFE`v(xFEmw_&x6Hr7c8RoJi4)QM1uakEvR?d zalvPv@}VBner~p1RWwx`86G`(y#L0&(Idx)1@&K>f>v(6Iu0HF$+aoE@j88JMR+-t zj$N(O&#t!d`v193$IrIwopp*gHB~B}&w>?LPiUqthD3x?%f(W~wZss8W4TGcvCLX5 zB7%On%%r6&tkp`s2;G5#6-1}7;&|%G6}##9+SD>U6%iVJd9Axrk84i*2n-YoftBg& zD|XTCN}OU%E7(jarw|Hf!WNd0u9q74L2%Ak`=y;Pl zu5l>2Vvr`U(de3MUHZEf1^Pq9rvH0=E4{0jP32Ot>>rRHSF4$vr}|@6dg_`4J+-oz zzKMUERwd~J*Cy$qRh{%Z*BXznw&;=ToRwN!oWv&uv>9Ad=(c8)zS?YdPk4}1C7Y3= z!Fk(!bwZ7bp=zNRJ`1|y7XL_ys5tCpbI{k660Aedt=l_S_IyEeyb$D8Q;RqMV(GDFu%Wz6`ej%ji9PxgoVfpV?Y$m2KD$eMdQ} zWT|C+T)~T?k{$DM3O%xZ<6N&-%};oxu}X~^kZ-3?h?MZ*iOM1MIFx3OREhrIDT*xaF3$yuRwJb{hIF|4w8>?l19BPs&=JT*|{*0F^ zLrUQXk7Da=#uJ0ps5r!~^P~q?4#CXA{y=?x&M!rkle5Kg^_(vbXG>l-Dvso4Juw9D z<>#W(i5)EY7sL>JsK=JV(!;hfg*Oh}7=)-QBPWAh+1vY~ZxY%M*&S5H>0%X}I|Nr= zoGDLcrvlClXA7P}AKPfqFE*xOxc_0(S^WIi=B_#ErDlqyb7ICT3wV#vvw&4oUI{V@ z+8DPL9YQl$oY;WE6Ci-qusxE7^R?H@a7FV^nYj
1di9Si{X zK$>?hTY&i-WiG*atO}`>wBiYyR(5Y=GIzPIrJh&j#Ts`y%8;36#;{*39@mVROF!D; zP-n}wt877!Y#l- zetVL>rf<1M(^MSX(bV&Tl6zyY*SQwM75!c)+>y*=&WEFbR}uXuRq%BHlIZoy@IS1j zt)jQ+pF{MA)JIkN%@%hX4m!+U6NIZZMYZA9ZVx!<4;l;xAZ3f4!}(l@BecvYGR)1HEjJo!w#T4p=~rFzLm;?wa5vaXd8; z#%p?3Y@&bK8--YYB~oso?Q@1Tk&}a9>=9FGGOL*2Xb{e54c&`d3=2g&O zcJxF2NBL;JAIf}S(!UUx_I$X+he8({6^4y-*)|Nh(UHt)antTNt7>>V=EC2pS|X89 zLH>2^ms@Ybs5O}m=mT#@qk6}+=Pc#ES9!f;?Ib#2nvj7$T>A53S<)eA7< zsOT6~Yu3yH!J*3460{Vmz$#lm3S6`_DEdKn^B%0E8Y-3&mJPGOMwf}mFkCoy8r;60 znxe@~3Pg6<6Bf)zcLd!TjUP~Yi-j@J0>7&zEG!u6oyInYS3vL>5Q{t%%nlA{*q`Y+ zX+q@C57S_p6g&(sfTsmD#e`4}mNQTX5Y4*GT#zc2U_G%w41H@yjNa;QqT@Riy3g6T zWGrE1sJb&jzj<&C-4ko4w|DNKMe%j?tAkDS_p$4Gl-`nGD6m&OsrF44K`-b6i$;n7 zD$`JTC@{Fr=3VC+`&79v!eyKe^sm3Js_AZAacyX}X3|?*H!brrdd3t~P!qI$SLfnE zL!(t)o3}YmCZWJ7z%hdhNBeqCz!k{f;}$qT*LAq`wOzYto4a1o;*JV6G7>6XvwJ^% zp=bSy`b%{G?kH`DC+NTJ-cGaeeWbKT>8o*9#*Rl@ms5|ngI;M((0v*k%*BokFlJA) z>0W2!QqPNzIdpoDN$t8tf4nC~kLueQoqV{NI@-3=n(i$tOf~K#G!xP2(Y761fvzTe z4+@0XFa%Z{EduGl63h*8K?dkJ0(7V=hK=9h7EF3=r$b-Yx2&+#cp_mqpzB^| zQ&+-FXOsoYEm#bpa=z*|Cu?6O)JCnEEj;N9rVuH-ssUV{%vz>EMrYl2KqqnvfFg*&{t*m)eBXJuG9_Q(xCW; z!v-|sovTVvdm0O$W|@q^Hiq%Xq3YQz1O%1_fr-H{$MzAadJb%B{W_>9r-C7Rl7Pf^ z9MW^a0; zX9Q8m0-=NU0$-9aI;Onx*$&H6Z58j4n&?OzpG3M3@{eKNp(3L_*0f>o9M-Rdez_Gu zy?{zjb^3;@iCh@#9-81Ewf<@||atk3Ib3GyQS z#li%~*voxVYfn_^mlBRkenf@Cs$-k{gY~>ecW9NpL4klY$B3Jj&JLqf*8!IBlybr= zfuJEZ#!;eGEIbEpo z^u_(V>G>{;wj9__-^RZa2U1i<82HYC9rQrAM!S0UP;YZ5%;#edo6S7fr{5y3;ZfYE`p6^pv)lAUF3#R7EkTQ8bo^TW-w3FwG zu(!b2hP*L$!VK#XEW+UDxEys{e0Cubuob^@0l79@2qPelGxMI+WV3XwL^RAcmR;pK z(#K8fx~tLe^mo!f9Ej4YgROL|Ct;qN-y+wA+g*So!;NyP5;wQ(ckrNrSEUux+G&+@)S*8+BJ1e zUswS;x;y1COs!7A%GfMNs%dbCP4Ot|D`{{VZ}6E~#VN0VAOU&9`^Ev(Kfin9>bkE? zFx;?VJUG|Yo~P@qR6p6h%@KphunO?Sa*bX;S4v6W|{M$H=OK6%r8KAds6v zcI*Dj(qzN2ZO2mRc3pkXJC3HqH@)4OqPMp;E4J0)d6~2h=5_9Ap`XL455j+B&LP4zzgHP&zY&>pa5V4zMfa;L|ZqdjShOd zyLrilxM9=h+S=)#+PJX(w?>NYFjY#oTlCvz2ff;E(8bgS0Dm|a!`2qsH5`O%y6^!L z#ZoDY(rdR^-@E{h!@roO1LjQbP3>v;@RfnJYZ*-6U+it6`>jrD+h@|dmff_#mmm_@ z34k&j2AcGz`%<9#lD!wdACGRw@2lWwUWl^Z^*M=6${^$H1q&Ds5_DbApF0eyteBm| z0l2Q2t`-Z$ST7=bw#?o=2=tdhj|I>~p-Zt{>shM9Lu7#JuBmjM3W0J6J)DdHu5@3sz$B0uuQjX=FlZ|r*ruNtndR~KA(j@#d3$F zo|LN}60{9M3t!$BZRFiw)CCxF>YiS|zAC@cwX>$Dm>q=wNz>IfsL z0B9rJB9NhS5U^OD(McdK(9aU7Wr)E1>&4W@J$45`LEIKLBZxsw1D;zzfvG1P9R;#C zJJ?`ePr9&(lkryk{*TEx$|)^rTSfNfpi+d|dI@X*H$cV>Rj=Zq-Vh*}Ruq$q`KdCO z>mWl})2oS5;Sw$jml5G$F`ovCfX*ECxaG!y_9$JaZO~eU%?X*c5Mlz1p&SsmWenw_ zFqB%Zj*df%yKw-=hgSnuM1iIb!6Unn&!Vgc6o=$2@GerufU`h@ z5H8D|{-t9BGT^mz`=M>5A5sqlt8k%!Q3G_U0{KYDz3tG1rt zL~eGk(HRI0eN=@tvmzFpyG zA1E_k7_Rc~zbreelmV1e8+vl|N+n6_s}8+(7hEWgjn3&qRTYPBrtZ7fE{c4?`q=Y} zXwo-Gxx=7W{g!nDr$i@ObvbXUjOGnGIeEXL3hKIJAAR4q=&3v0MJ62?Z=>a&jlC~< z_G(>q&=cmS{Y0raQ<+rg`yPZfVbe(3rbj0ndMNGCKj7c*rA->2bg3<~bp`j3#?YWE z(7TVc&@U&~u2aVR^ARCMD*W@@b7sAzGg+}%N407^vyHxxwdv`MMmx_Y=`S;h)#w*+ z(`#UO5BClVeu-W_yN6oFm@Ti|!ky=jj%oOP?HOkSdYgrboF9`GQl7=^Kpy8yDs=yu z#9}srUz|y9fTSuVoc=saES@w>HZm;^q4~G!tHppGnH@4C% zg<8g=ON6ev^zN-HmB%d_xNRR@8fU-r%55h7bR0szO{bf|ijUpCZ-tGI7Lap#^am3b z{lV=fsbJ1?lh#$>O3(W?(w^)#^3UzJ{0Ns~4`F7cX7I#z52c0&I5r~Y37NTTXv6PmREa)_fV)`sI8_e|BF;wV-x&3ZQ&Upkg z7#SWGC}lI4Iq=JnbucAT`xr(pMDO9KK*b2+pkL(r50s}+*2%Fd1|uGFqyvbEyM=k) z^5D?PBeGq{0||mBH5Nn<`gyetOglYYps!7{FgMR6>E@Zw;I~`PPvPhOS%ao$*=jal zuqpO|6rH?~qKh9$(z_RuwDR60IqyeJ@!tLPH2(ejd%G!jUl)DxzWr+;9}^i51|!5k zsIJ-v574LYHt5a|?x%0wZP5+SHql!j+)d3NO3;dXbh_h1aZ26uBDCpKA4<_R_nY+Q zJq7$VdcRE{y?^T((bZ-~P8wZCyX@OmymK@-w{0%MkRm6?DW=&^!X(;ecZ zIE164bJ9RQg~EU3ySIqXd&ncsuMigCF6{64Wf0t4%2J z_z1GIT<{?CRZjchLkq`~R0UcdvXXv$_qJ7X3aN@Fbg6=vkHCKJBc`KFV!8{XUqRm; zpTL{|hH*rmV<)=l7}}B+C>;{)1%Z@D5NP#78|iO9vf0Ei6;B0a^HaImz<<@|i~`zu z*aFry0xRQRb=3mj`W~|B-TN$h?ID9c_rYz{^sq^HJ)CSYBLjM#Lk5n{hf?z5b_f=Z z$Z8k#(}&$Ph(N;V(nVWFDT{p(ftPRl=%x+qNx)qc?Xr2$gGMXH7IL#u`lFAE=4b(f zU%}g$-jL)Qf`0t|F47)xY2RIQbn%e{{ncGP^utGzbi>{8B?{an-F{{ZRUX~Qh-8hW zRJk>a15~nkA0aeK-+gpe z#uYS^Qx&{|n)Up%m}gOy@gk2laR1o96W|?`CD9Mr|WcZe&boKcv z_E*wWJ<%`qh6iRs8NvzTe>o<{M};5(Sy_AeH-wpKOpx(72=jn@KE4=L=*#!-nX4%c zW+y-uL?On!Sx6&bH?Qs$a#hp@i)Cn7Ma)7k!Ac(VQ>uk@7Tz&mEc0}s+%8mFycnuF zQz2*J#(b!5t_=Bv^A@8J=M;^97&J(?#=r$t2d>jShlt#lNU~EV8GoAij zutSEQWm{9_adCVZUPH%tB{i(;2J548@J>}YwENKxw;pnhFt32!%OSl$F7pzr%?k#B zU!hfZHPH(QZcusvubRE^SRhCOJL?;A%x6o z2Ti3)V9ETE;9P7#EY!5meWMAs@RB!{jZx{Aj{SYbLJasRh`QbkP)TiCxdv+H<3r7H z*9S~QgxpHqn4U@1=V#`PF>a;k=$TYUJs#Jpu&kM3lx91f z<-hHqP>G35`n@xDchHFCH{6EJIDmN3bbY}Fh=4euDv>-?O&b#A-WppI0YbuXDN@lR zg&>olCvV-oA<`v7PFr3YDbJQqzz>y^Gl$FS>;pA=WPrt03SN%%@Qgj$ z0b6fR0{X8>8`rTnUM}J0W)euMRYNhse_VWl@Mdk@zgMeCjv*!%FK#gDIq(LA2N+JY%L8H;1QRyN zOz{r3yz>HFfOE>jzV56dZ7PcbJQK2486L~U98Vl7ywik_iUBgT3kYrlCq<;19jhZr zpaq_!&lE8`#+BbyvrZKxy{>fO1&6HCxKB8ABV>+1I-Wk>syeV-J-$uXzghd-v5!8-|mSJ?wg}K>zrpa;=DVE!0BPO93NuTBk>j^ zLdot3djXL|4@7Dw{BwZ4_~|S8{jvbu@-jf^W0+pRSyZL}815@%LzvKT$pz2|hm|)^ z>TVwm=)=+`SsNs{!)=}r4r0r^N?-k~<<5j>kRcRJoy+l74Oi8d1*tqFLC&kT@vl(b zkO~G6hl4x^XHY^fzS?QB8w*H6WCU1BS0$AF*BODhM}K=y3z@HNat~!EV2a{MeoCO$ zfq`7$*c>O?5ol)UNBHE-^C0|$SmrAaZ=@GrTkl?h7oxzU2hxVd{xuFO^f&jk-y#j8 z3gI6K{nC5*j*dsVSgi481Cs4oT z%0jh-50^Mbqx>{3AgOGa=VDd(1a_1hs^LHsQr@s#>9}|kU=!FSN9J*4(_vMFi({;U z03~q?QHCiCRFw9=k#eL2pxmrQT=X&kjo=pzY$D5_EFAd}fE^GL#2jqQWrF|mM#~%& zD7dx*-W&&tueAy@$bCQuM2hwn@%q4prd#!g(=Q8FBCB!58>a(9Fvk{rm-+{`6&&ywAAQ zcgdx%f5xFVE+wh+MVDG$ap=_-ZTj#lTyNg+>lXdZD+&70uRCGq}_8a)fg3n1G5Za9`qky zbLcDgt=%Q9p%yMVZI_j~AhVZt3ck`iuh}M2=`z>1STc2X@qv+F&)?3IUa<~D>B-kS z9o})?eL!(NdHW4MYd$Xz#S=ys_zM)EzWF-(i0n7y9$}!VMVH`UVS+J_3d7Js=ihK_ zq_Hx-@O2yn7~~+e$*O-NxTNSW->~e$g1&MCcKkQ1Ks2}7avSda)M`QVpV!PG5EIT3 zG`^fd@oTkxxHkMZ{}$JR#3)pfc^{NMr2mj9J4-ll#|vj}O!|ZC;>!gW#Skm#b0a&i z0WYF~oD{m_`h+GIVX@=RA@?eZ1&E229f>%E5y|<*alC>7W=!rB#$pORb$u)bi&C9TpQUC$k>iBYuqR&u-I*-_T=$(1b|YdIHZBrs;+1zDv>G505< zj)OX>j1G)G@GJWR#KPdhSTIP`z@Iqy!ww3cz2Qb~hyU41ZjJxpjV68{UWF;go4BWR zZPP}2bJY&Ih=!TDx(iRNIc3qW4RlfUl(WdDWrve!#JKd^r&PM(a9pX6bqo5{DXn%7 zO)3uS)53coP_3a1dMB8E=O^?9Pf+*GZnq+@rJ%Nzu6>OquEE`#D-i- zA93i|utQ%v;?i#oTZ;|cM(953F4f`)K(zaKj9x!#t>&5}`&-mJb5$h!CE9=;zjZuz zb$E-99zNz$&k2YA`ItkWJK@sAprhg{s)S?kVnG8>MsCPyIF>FVVF>&PmYs!~Ptex~ zdD8Prg*rwT;XU95w+5+es8dH^3qlU`7C{3cFF-XMGlM1(Bd#FR#a`@xUmkLe0GQ+-w=B8|lF1#WXJGa9J;K47ixW z@}e7#yNQNkBF@Yq5*~n45Lsc!NX7)PnOzEmD{p9n9rVocq{S;vV8&(&%$y3#^mT{8 zbLg_LMG7#5)!Oq}7ZR?1RJuaBJZn^thqklz%+ zEkl-M{eG<+z>U$jDP`~&u6(bj;eH-I$FA6bYlWVUH zi=?Bre!OSu_PrgxrPOWMb`I7+>(0#}W%O~4wc z9Tp}KYDGl0))+W(P=?%ky~!9V=1?8_*^|xm!pFDLJ!jW#M_}eAYnO1kFe8+~BpUJ+ zKyN^UqPXCfiuBIMll0EnwTN&_&k4Hy6ZXnlaLpEQH*!p+Kly|eZ_IhqBIJ#a3dhW_oicX{gV#8bZ*ybkcI>U&jig~e~f0HOwy^Gx~P?Y^rUk==F7Q&j{=;F z3O#)zw`&zh1Z2S18)PG>>ZAQ{=UsaClL`9vujzFCQx+X~%%<6=Yk_aT-y4$ zPNPp-t?(wC__79uxE&Yt!8I{nB}G2>F~1_wSB^&?&p*A#3yz& z$$RCzSMdE3xR{(1^v3hfT0m0S$Ewwnk?+tu&&R3%Nt;GrFz7d)bm@0raH!>z2JQWf zP2MNF=;_ZG^er5C*^3tS;-FRh`vwmFHvZl8bdrXF^dEfMr2m9}zxK2N*|oj2V>P=4 z4>KyL#IQw+$m7o>=!uv1(k;(e^yOz1@Sm>`VG5o1M#sF;e6`t1k;bYEHVMH(t_mx+o`1{1;Jf_q7+1;x#ricR{!T3U1 zcKa22`f=@QInRJN@cA9P=$19k99upj6WYO?2V^GE^@f@Ph7^!IpqkB?6Fz|{X8(hU z6Mq^xx?-~&8>Z-BhdQITpuYn)_B7Po(MRC)OqRZ6)`-TA^ex4?v>XqJ@m2PEqQTj5 zSli)=3*Q@?<(vIX37Ckfm#|sE&Jo}e_yHv1RUm+pAK?KJT<$0`x(zcCBs#cP$$HD( zD*m_Zm7(@!f37rL%;L*JI0p|qONXR7^6l~&fIv`tS=tadZTb|@z6B<&r zDqc#YC)Ac2(NpzEk6RX8 z5Q&U(%>yl?xmv3%)Qzz5gH3Vo6b>H?@i#tXV@eTCPrw>@uz^{`;95@`MDRUB@Mpdc zh}53Hzed8M>R8x?Vt*bM?J#eg!GHndGW5b4GHuopE;@m?D7&)8vT;`0#b6Apy%~7J4~&_OK@fsWCb8e2NN;-k^?{)Ga0ak^^5#7#R+a0)pVDU7ph8Z zZc_Dwx)qKAc*f_g307_sBBz=Xh7&0sS6f4VfhlDa8iR5}z&IQais{b*@<5+u4Lu@o zl@78U$M^6|!Xg+c4&kB}qtgK7u}Ptt&SVXo?78X4=xLt3y891Z}<;wzM(Ub#7Z0}_f@jI!qUfFna9i)R)TLxT7nR9|ILJ_6lmWyI_N zdz6|nsu;%$%#r;E8&BO+;6Rs|ZeV})5x0?eOlRM~7ANbDDE@&PJWdf)q&|Xb^fI}J zu&_%SNCjT%yJSg2vYA`2X3oiZxv+rCbt2edLjg#hGg}x(1d#5uZpX4> z5geQ0L)quXX$TlGND9uOyeYL^s)%H{{H~4?ZuiJxA8A_^l^KVMF~^2NaYo@PB`ZPQ zW>BY_r~vI6cLrU;b?| zuppk4KrBQD2Ej(wWns>-K)#YB=>J>|}#l9(L&t-%Nc zyR$Ii3Y=~XS3K~2F9q7PR`0d|S{a2Fu?<*?DqN7bp~79x!ZU$Oby^X@E3&-Ik%ik2 zf*xMmasxgYjAw~lxc~e}8qT8E77jz!~uny zZdtx!7`lVNowAMvT?V;#y^>KAJnw~)MDJnh!Myk9nl~pf)dF5N>H_zw7`&9Yp@-+C zsmZwu2ykAr>0g@7l+uu`LxpN!o8irT&uboe@p3`-hnj76ofT>f)&=i9OacWq-#Z)HhYVv~_%${)7mX-j zV8%<%BzrZ&pfL5c6*Em=TAvtJc(5q2{E%)u6xPI1rR3U0g-2T%`$`BF_~D?8rUVX{ z55d_l@`Az!ZN1V{s0COgPHb5o15QkaZ@vnT{O$$^cgA{v8G>m5j6GF&pn_M3W-Cxa zO#8q!q(i>Q4e6gYbfoZrBGE0%w9rimR@x`bHHexV=vmJuXR!jl;H-!d=TI7S$HtC9 zrgLDpHl$Pkik*3u;j43SrxC@@;8r8@?S_X6)A*v10)Q+Sggoy%8(UnSdoJ@mDi%j4pQ*9u)H&y_U-|XVu~ymvfTk#XUD-V4hpi>1crkF{?y}-Hf@dx z?8E-LmV)vrIsSo;=QnLg;b3_MiE{8VkH=jx_C@4Z0tig)O`-lQZDWkuHn;VjlAr}( z4-4@iqGVL~eJCNu4f-R2X0tZq5!n?96goy{HgCLz3u>5(4p2GfDyKrt0^zdii0>3D zqvp<9C3}7gFsf*4;B^3H7-4x}EB2yqZ{F*|t*P9Nsv(vDpD?j?$6r%ri8~k;FoWp} ztGCOD)KI^YAr~b``OS&42*uz07nV$Al&gc+G%$4qSw{xxQYI^%D@=0+B|FAtL4;#a z42k*Y0nXV?$%(>xQLIqS?8jI7z`K=$uk@?C0nUqRG)v|(2n!YtiDgV)oGH*_Yj$i= z0zu)#z!S1;5ezQor~C>%w|L_L-^Z{x>qZUwz(@*EJFYyUyoE7BDRv$oWgiYq1A7W! ze0kEF^7;10E}%ekEraF@G>j8d9|%!S;YvXws?hM--H1p-7+pmO0%WU8%p8CL;bMB} z9CW!=$nQ#H0By~GhZTl}t%dckL?5eD3g7Oxbwi9H3ujMZ$F_z-;Ov_>L6s(ClP8RM z0TM#_u|RLF-$2haZM+5Bqns7YF+erKKI;6?T6qesP2f=sL$U#JcQ}Be?E1raDo5cM zzeQNPQXFkqI!uDGuttM#ongb|$I9F&t}{Z91AgP9jKJ~5gIjhf3$r!kV%R_YWWyPo H7ODRaNCOYo delta 8460 zcmZ`;33ycHxgH=S=;^xBSR&ROR(Cb zeKt#U2jlb+v*X>-P>0A?*essx>aybrdTS+E*ow!4)DOAC)FA>|h7}A6f^EvihG7bQ ze_+_OAq?*gGl$j)$WE(4Q(C2>uvWGx8KdP~0!^F@CF1}0r0pRq(8Fk9JC3aM$_f^+ zBMBNIvSoZ*ozYm05VjKhEP-w-60t+f3dN&XkzTs6DH;j|`7z_PPP?7xx9tdvC#+bU zS&)=GXFNZaBbOiZo zRwy*+7U*@@?m%wF0+sa#6Wt^bE}UN5*;Ps`Kb(M(uy_Z41Utr}oebA!)aKAh?6}Rk zVpd-;;T**Z33w-C5s9sGL)JSfkUF@0n`wv05fI@6mYSiN+N zfgp*(4h=F%kx9VF5*Q|pGKaX(HcCr3RC?!vA}IKcprUki%eZKatCwU@$5fB=Yr)8= z<6VO6k@TvzU^p1EVthKB7+Hq5M@R7G$XZ-JO2?crD)!L-m!mX^zES${#wC`tqk`0WSUs?cs#P)Co#6#ij%x+db`EuhBb_NB{s_r zFxewZ_-cflZiPgaV0d_}j>3I>qV1V7eLkD1lMpW5XX49!HJEd^2OGweuF=J=o zxA*$7Wvqg^_tlLt*ixF0*$IM_C_wtPitj&SdCp*Fk?2Cf9xm-flx8My@A|c7uwa~^ z9E^}vL@CXL z1B6_{xXo(t(Hr?@!j7ac#$q->DB4bYOR!kqX~;O3CU}ufX+8kjqWV39TD9X8-1?rPot0?H4OH4H` zs+&Gc|6D5=vIm`)OcgNleHDAhO&eEr+SWumCz&AWEcw1EFs>t#wVPqyc!?nP#rtX< z*I^>qL2~Q}#$(A|$HnlIk)%?~vc)!un)Kt~gSIYdZC1F3`30>0omoKsBhr=nk$y7C z1ucl#Bn9TjKgY8?vN2hzBK0Ri!HC_#NFz3ZW)Qp6r_dHY{+(3Fw_u(2HyjYnE6zxf zB-H$Yo+*&c2kdy<#=#G2wWW?J_i$frv!-scTszKYvsKX~?W2=W7wwOC2YcPl?+b$w zOK2lAK>n#(v}Su{KB{>kvm<>$(p%V$aIa4gB;Oe9;}GW^xWh{+jC-%m+3o3UcI(o4 zaeQ%V+Av{WD;9s4T}W=u4$ocBI%D?5NjnlC5W4ATJzO66efCAe`3}N(;={@rHieUJ zQkB3bC-WpY5+KM$TA@HPlv*j+PBt+}8*w%s4@7%uZQ2d(4-0eZ1&;GUKHiO;A59xx zBhWe+mpcnLPplumxO-_|iW-`34>> zt{5XYBV`YbZ0Mbjqs7zElV6)9G}^HQS(BSJi*7at<4a_?n?`5qhNo7pl@tTZN~&>Q zt`AR@ln*5bd7+puNFG{)c4>+bn1Ziz%cpvb<})tG&1MZotZ*<8_pvscE0WDl#V*uL z^b*9Zi4(}OPBQ<5j%d)wB#$V046W9y$eOC+&!sbllI3Jg((@Hrce8W)s2-0l>kQ#Z zb$DP(?np_J-Hgs}s&2e6X*J}j6Ef+YD2wz8i{2>1qOuxnt)7J^ONFT(MN~yu_cBc( zp_wG(pG>YvrWZ%cOiXpFX|hL=Vb$iPNwg>qHRKO(HE{nkEi=_Q-wRPHQHgvBBb2GI7!3J4BD)h1c&b`O7VR~;V|BwTj|Ev zGiKnK{QOCpOhPx+bGPNybxGFoRi%k-GyNpL#l?A3taeM%m?nGNOjnej9E9X1=_+igribY%YPLoWZJ|idjwSxq6dSO*2&NYf`YY(1*!Qd4ldyYo%JRCJFeiu!fX9 zdlujLbjDrkzF9@ozMZ*;uLbF-z~lsqJeg?4|#Qa4k`kPOA^W-ab&W+`zgcFi* zCuMbbyquplZyrxf)`=4#RtOwL>Fw>BW(j09`PfmBL0wk17}Vvt3O>=xE#=fMs;tJz zh9XRF&7^OMNQ3+&VOgFIohe-5 zlQ?Y2ppI9m)Md_0k-C&mn~Z0Rb09VdC~YmwR78(%N{TpeU4v0HCuVoqR?^Mpi>#IO zK0iv{+szitr?!YwRRvHQYr(2jNKx6Uygw$54p0k z>~rkRE18C3Ff6O%D)@VyjN`NFu)5w4WwwTmv&}-sbVRn)?i1N8GII}s*hAppegPlW zGaB}vvw4&KJxH57Sj&xNyB(haYUtsW8G3%p#&_=_hUZ!hq}EtxP|im^kg#nvyQy zuPq+PvN>|06Y=#-Z% znwEF&NU}FomV_3s>Oev1q(UG~F~*!3xPsCt8u)092q+`TKLaYYmG55qCDi2=04DZjv7(}%(7 zMX-G3PK>bH4GJoY-Y6X^N%+U22n88QL2bPoU5iTy!M|NBa{I-}#p#%}#E09L$RyT{ zOSpF_TRI&pi?i_Ml1xHnPOhfbS{7McV)AQ8_ycqMaj+{d(1|FV?wnJ`w{;&@>9je87M|6C5$iOp46kPP6j>@AF z-g!{N_M>Wsd*yd%f#R8#ZoDyyk(MNpv@5412ybQ0Q?DK<1 z3|z7%A6p(O#==K+y!DWQCm!{Ac_B~9<_{$ZTS10m^J2Z17iA9fkgCLZ4bSsZR6ph` zbHK%m^E34oPYj)6fin5tfU_GkJeZB!9@FJiAuh7EUONzUQO-#h;Z?A*ulwc^%_eNjw$>WM3oLvJ?JzkN{YqErmKdjJ(lNX^tvuxj#tPZ=E zCOsP{B1lLqm!i8b+$;wH5EM-RIH}TKrZdhBB!Z z{<;5d4WHbtRks)_|zb*Fr~ z+EH77tLDKvZ#5L zfb#pckhu)NtsahIwmsOnyMaBWroiTRKF=i&xivnlE4a{Tflv8ZVo3$sdBcru^As@~O5%|+Hw zWwT173X6KGb><>Ii$(_{#BCAz)X$6229mwJl+`zLVxftz;%bgCdf{r|d>R=Y4)iEl z97_)7Qy<6+$tb&){K&N$>3K`2T2N`a2qD#C_+v;JCs1-L@>+IYB;Fg0Ss@$^C@Aiz z#y_`9*w|s9dIwKLPId4JuH2#E*bd5IYz=E{1#7SMjuxo$<7bSp_CX(Bu=RAZzDU5v zHGA@~q_bcm2Zr+}aU4p-g}GRmoXwt2EkDt`!X^7m@>6UuMyZTOw3qXV(Oo5s4--xg z=3|McbP>D=Qx>se1eIM~CE3(PrtztfE`@8ug|EBhJeleBGIMouyj4mr`Xr}ZX;xuP zw+|<~O*{wD5cyN)S&P zvIa;G9-mj?^Gcp7hOGf_mdu)6-L0-*DB@zV&;b4zD9&Yd37UlXf`lV$8rmBt_cgCW zO=*|$&UoS?H%Yo+Q$bb=PE@%!&K2p@DCkkKzDFKUDAGiSFFQ>I3HYc-$F-!61tAUp zqUVv2f}TztEUe+JP6NxsDr&lnVdUP(H1hH5a7nJvVGp=YyKsAe+^yrkv7w9Uwu=aY zNP^9YSt&xPoV8i8ATP-cf2vvj!;hRwsiF-<@MM<;`>C>=upKAQZF91ui}-2BFf2^z z_Ro)5kuJyoxl*ws5+!EAS(qD6qt(qlK4lIoqM=R_r>SdI+-Y(G!7;7og%v?!lnLIr zySqXsXX@tGrq@$_6Ntu$81AL2+TrTf+p#E7UJ2$(4xAz4Vm1kt3XuIzvl(YNwqS%p zeTOLgm_{aQI$$L!ra7@^&|wzCoS97#>pV)}czEVes}BdBDI3XYYfe5A6aTil2H%`F9TP9`LRucf#TSsXU0#Q$==r8yLRzZ%X zi-uTk0ZIe4UtqB^y;VC1>JDat0pTSd8jEBTJAeR4T6KFmv&+g+QMt(#&=NZK+12U_2$yEX+>v#0~@yMl|X853A$srHJxm2FOQO@UZls*Al2DG81 zm7@d2xvu(3#kJH`PnzOvd&26m*)oRve_lZ|TrfDPloligjr9=E?~-_ECkb|Qcb#B# zB}KE-UB{q1gJ8vjE9U%yo13=Y#2$v4v9q+!bBI7wfnojB%e9nvgCl!47TrMWQAtB?qiry!5 z)F1ca?I(P=o}ROw^x?mb>v-@<3G1HMQ1FyIl=b4J=M`M`l$OqVofxdWpyI2i4D5bE z#@eS%WS&s)>eCtqPE5(B@X|{Kg%hbdlN5sDb^_y`QAd+UIfE!VVc#=4roE`*Q+nb> z9WCU*hrT3_qqswYNYK56E6Hy+MO^e!J-$C?;Jud&bUmwL-peZ9q@z9bvVz9v3>3d& zV*hh0u71Tk?ni09L!68F_7yKKc-0&x6FrkSJ{iYe_2Px&625&^hxojV`LFr$D|#Mz z&4XbtP^$KNK6>c+CLNKdxs=o2@Usk78M8Ym3%~w`NKR)g=p@%73(;+{K>A9CZRFi( z4(Guuf#N^`qBm~JJtqo^%n5pK0mE+!XQuMo{-}TrH?7C5dxTY@u#BR12NnHx2VKZo z9TfEV)k&PtLgJ(h@bvBK*~rW-^mIzg=(Zpcr7PAj;gBw{Q(GcON=Y}0Z!8;2w+Q@R z`0P|D2o$G@f=fo-3BSe>#B*-C9dG)=skxp)s2a1Eka?$qsM6~br z;SMsN-|Qv~oRrb9M;p$(E}C-FNjDDcG4avKsbl$dD_s$#oZ;-&8Wio-(&&YV%lGPd z=2r^7rKkI54T0MY+<3DeZ{DusgPlbv-B|$R4g=AhDsI0+L)M)dhO8&ferI035Tfhp sM2eK++e(F=RJ16@`aAvj@=h+uBe!UndaDns?^1E&tr||;r3^{@FM4j~umAu6 diff --git a/read/results/pdfium/2201.00022.txt b/read/results/pdfium/2201.00022.txt index 7fbd35c..835736a 100644 --- a/read/results/pdfium/2201.00022.txt +++ b/read/results/pdfium/2201.00022.txt @@ -1,4 +1,4 @@ -Draft version January 4, 2022 +Draft version July 7, 2022 Typeset using LATEX twocolumn style in AASTeX631 The Formation of Intermediate Mass Black Holes in Galactic Nuclei Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3 @@ -7,14 +7,22 @@ Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3 3Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel ABSTRACT Most stellar evolution models predict that black holes (BHs) should not exist above approximately -50−70 M . However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and -above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs), -can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding -main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relax￾ation, we find that this channel can be quite efficient, forming IMBHs as massive as 104 M . Our -results suggest that massive black holes and IMBHs may be ubiquitous in galactic centres. This for￾mation channel also has implications for observations. Collisions between stars and BHs can produce -electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. Additionally, -formed through this channel, both black holes in the mass gap and IMBHs can merge with the super￾massive black hole at the center of a galactic nucleus through gravitational waves. These gravitational -wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively). +50 − 70 M , the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections +indicate the existence of BHs with masses at and above this threshold. We suggest that massive +BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions +between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical +processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite +efficient, forming IMBHs as massive as 104 M . This upper limit assumes that (1) the BHs accrete a +substantial fraction of the stellar mass captured during each collision and (2) that the rate at which +new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar +disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our +results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic +centers. This formation channel has implications for observations. Collisions between stars and BHs +can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. +Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge +with the supermassive black hole at the center of a galactic nucleus through gravitational waves. +These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, +respectively). 1. INTRODUCTION The recently detected gravitational wave source GW190521 (The LIGO Scientific Collaboration et al. @@ -30,16 +38,16 @@ more than ∼ GW170104, and GW170814 fall within the mass gap (e.g., Abbott et al. 2016, 2017a,b). BH mergers that form second generation BHs and, in some cases, inter￾mediate mass BHs (IMBHs), these gravitational wave -(GW) events can occur in globular clusters, young stel￾lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro￾driguez et al. 2019; Fishbach et al. 2020; Mapelli et al. -2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. -2021; Arca Sedda et al. 2021). However, IMBHs are -not limited to these locations and may reside in galac￾Corresponding author: Sanaea C. Rose +(GW) events can occur in globular clusters, young stel￾Corresponding author: Sanaea C. Rose srose@astro.ucla.edu 1 Note that the exact lower and upper limits may be sensitive to metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli 2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski et al. 2020a; Renzo et al. 2020; Vink et al. 2021). -tic nuclei as well. Several studies propose that our +lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro￾driguez et al. 2019; Fishbach et al. 2020; Mapelli et al. +2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. +2021; Arca Sedda et al. 2021). However, IMBHs are +not limited to these locations and may reside in galac￾tic nuclei as well. Several studies propose that our own galactic center may host an IMBH in the inner pc (e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004; G¨urkan & Rasio 2005; Gualandris & Merritt 2009; Chen @@ -53,22 +61,24 @@ as a result of the very first stars (e.g., Madau & Rees Valiante et al. 2016) or from direct collapse of accumu￾lated gas (e.g., Begelman et al. 2006; Yue et al. 2014; Ferrara et al. 2014; Choi et al. 2015; Shlosman et al. 2016). These high redshift IMBHs would need to sur￾vive galaxy evolution and mergers to present day (e.g., +arXiv:2201.00022v2 [astro-ph.GA] 6 Jul 2022 +2 Rose et al. Rashkov & Madau 2014), with significant effects on their stellar and even dark matter surroundings (e.g., Bertone et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another popular formation channel relies on the coalescence of -many stellar-mass black holes. For example, IMBHs +many stellar-mass black holes, which may seed objects +as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs may form in the centers of globular clusters, where few￾body interactions lead to the merger of stellar-mass BHs (e.g., O’Leary et al. 2006; G¨urkan et al. 2006; Blecha -et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro￾arXiv:2201.00022v1 [astro-ph.GA] 31 Dec 2021 -2 Rose et al. -driguez et al. 2018; Rodriguez et al. 2019; Fragione et al. +et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro￾driguez et al. 2018; Rodriguez et al. 2019; Fragione et al. 2020b). Other formation mechanisms invoke successive -collisions and mergers of massive stars (e.g., Portegies -Zwart & McMillan 2002; Portegies Zwart et al. 2004; -Freitag et al. 2006; Kremer et al. 2020; Gonz´alez et al. -2021; Di Carlo et al. 2021). +collisions and mergers of massive stars (e.g., Ebisuzaki +et al. 2001; Portegies Zwart & McMillan 2002; Portegies +Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017; +Kremer et al. 2020; Gonz´alez et al. 2021; Di Carlo et al. +2021; Das et al. 2021a,b; Escala 2021). The main obstacle to sequential BH mergers in clus￾ters is that the merger recoil velocity kick often exceeds the escape velocity from the cluster (e.g., Schnittman & Buonanno 2007; Centrella et al. 2010; O’Leary et al. @@ -79,24 +89,31 @@ clusters without a SMBH. They considered BH binary merger recoil kicks. The post-kick merger product sinks back towards the cluster center over a dynamical fric￾tion timescale. Using this approach, they showed that 103 − 104 M IMBHs can form efficiently over the life￾time of a cluster. -However, as discussed in Section 2.2, direct star-BH +However, as discussed in Section 2.2, direct BH-star collisions are much more frequent than BH-BH collision -in galactic nuclei, making the former a promising chan￾nel for BH growth. We propose that IMBHs can form -naturally within the central pc of a SMBH in a galactic -center. Specifically, these IMBHs form through repeated -collisions with main sequence stars, accreting some or -all of the star’s mass depending on the details of the -collision. We demonstrate that this channel can create -IMBHs with masses as large as 104 M , depending on -the density profile of the surrounding stars. +in galactic nuclei, making the former a promising chan￾nel for BH growth. In an N-body study of young star +clusters, Rizzuto et al. (2022) find that BH-star colli￾sions are a main contributor to the formation of BHs +in the mass gap and IMBHs. In a similar vein, Stone +et al. (2017) demonstrate that massive BHs can form +from repeated tidal encounters between stars and BHs. +More generally, several studies have explored the role of +collisions in a GN, with implications for the stellar and +red giant populations (e.g., Dale & Davies 2006; Dale +et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti +et al. 2021). We propose that IMBHs can form naturally +within the central pc of a galactic center through re￾peated collisions between BHs and main sequence stars. +During a collision, the BH can accrete some portion of +the star’s mass. Over many collisions, it can grow ap￾preciably in size. We demonstrate that this channel can +create IMBHs with masses as large as 104 M , an upper +limit that depends on the density profile of the surround￾ing stars and the efficiency of the accretion. The paper is structured as follows: we describe rele￾vant physical processes and our approach in Section 2. In particular, we provide an overview of collisions in Section 2.2 and present our statistical approach in Sec￾tion 2.3. Section 2.4 discusses our treatment of the mass growth with each collision and presents analytic solutions to our equations in two different regimes, ef￾ficient collisions and inefficient collisions We compare -these solutions to our statistical results. Sections 2.5 -and 2.7 discuss implications for GW merger events be￾tween IMBHs and the SMBH. We then incorporate re￾laxation processes and discuss the subsequent results in -Section 2.8. Finally, we discuss and summarize our find￾ings in Section 3. +these solutions to our statistical results. Sections 2.6 +and 2.8 discuss implications for GW merger events be￾tween IMBHs and the SMBH. We then incorporate re￾laxation processes and discuss the subsequent results in +Section 2.9. Finally, we discuss and summarize our find￾ings in Section 3. 2. METHODOLOGY We consider a population of stellar mass BHs embed￾ded in a cluster of 1 M stars. When stars and BHs collide, the BHs can accrete mass. The growth rate de￾pends on the physical processes outlined below. We use @@ -123,8 +140,17 @@ constant. While this distribution is not necessarily rep build a comprehensive physical picture of BH growth at all distances from the SMBH, including within 0.01 pc. Otherwise, the innermost region of the GN would be -poorly represented in our sample. We consider other -observationally motivated distributions in Section 2.8, +poorly represented in our sample. We consider other +IMBH Formation in Galactic Nuclei 3 +Figure 1. We plot the relevant timescales, including col￾lision (green), relaxation (gold), and BH-BH GW capture +(purple), for a single BH in the GN as a function of distance +from the SMBH. For the collision timescale, we assume the +BH is on a circular orbit. The timescales depend on the +density, so we adopt a range of density profiles, bounded by +α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark +blue line represents the time for a 105 M BH to merge with +the SMBH through GW emission. +observationally motivated distributions in Section 2.9, but reserve a more detailed examination of the distribu￾tion’s impact for future work. 2.2. Direct Collisions BHs in the GN can undergo direct collisions with other @@ -152,16 +178,7 @@ mass mBH and a star with mass m?. Detailed in Rose et al. (2020), f1(e•) and f2(e•) account for the effect of the eccentricity of the BH’s orbit about the SMBH on the collision rate, while n and σ are simply evaluated -at the semimajor axis of the orbit (see below). Note -IMBH Formation in Galactic Nuclei 3 -Figure 1. We plot the relevant timescales, including col￾lision (green), relaxation (gold), and BH-BH GW capture -(purple), for a single BH in the GN as a function of distance -from the SMBH. For the collision timescale, we assume the -BH is on a circular orbit. The timescales depend on the -density, so we adopt a range of density profiles, bounded by -α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark -blue line represents the time for a 105 M BH to merge with -the SMBH through GW emission. +at the semimajor axis of the orbit (see below). Note that this timescale equation includes the effects of grav￾itational focusing, which enhances the cross-section of interaction. Assuming a circular orbit for simplicity, we plot the @@ -187,8 +204,6 @@ in the GN, making the stellar number density: n(r•) = ρ(r•) 1 M . (3) -2 We note that the eccentricity has a very minor effect on the -collision timescale (Rose et al. 2020). The collision timescale also depends on the velocity dis￾persion, which we express as: σ(r•) = s GM• @@ -210,6 +225,9 @@ of density profiles is many orders of magnitude shorter than the BH-BH GW collision timescale (for the rele￾vant equations, see O’Leary et al. 2009; Gond´an et al. 2018, for example). Thus, we expect that star-BH col￾lisions will be the main driver of IMBH growth in the GN. +2 We note that the eccentricity has a very minor effect on the +collision timescale (Rose et al. 2020). +4 Rose et al. 2.3. Statistical Approach to Collisions We simulate the mass growth of a population of BHs with initial conditions detailed in Section 2.1. Over an @@ -227,11 +245,6 @@ for details). We recalculate the collision timescale using the updated BH mass and repeat this process until the time elapsed equals the simulation time of 10 Gyr3 . -3 Closer to the SMBH, ∆t may exceed the collision timescale by -a factor of a few for steep density profiles. We include a safe￾guard in our code which takes the ratio tcoll/∆t and rounds it -to the nearest integer. We take this integer to be the number of -collisions and increase the BH mass accordingly. -4 Rose et al. 2.4. Mass Growth When a BH collides with a star, it may accrete ma￾terial and grow in mass. The details of the accretion depend on the relative velocity between the BH and @@ -240,15 +253,18 @@ two objects experience a head on collision, with the BH passing through the star’s center. We begin by con￾sidering the escape velocity from the BH at the star’s outermost point, its surface, which corresponds to the maximum impact parameter 1 R . Qualitatively, one -might expect that the BH could accrete the entire star +might expect that the BH could capture the entire star (i.e., ∆m ∼ 1 M ) if the relative velocity is smaller than the escape velocity from the BH at this point. However, in the vicinity of the SMBH, the dispersion velocity of the stars may be much larger than the escape velocity from the BH at the star’s surface. In this case, the BH -accretes a “tunnel” of material through the star. This +captures a “tunnel” of material through the star. This tunnel has radius equal to the Bondi radius and length -approximately 1 R . +approximately 1 R . For the purposes of this study, we +assume that the BH accretes all of the material that +it captures. The details of the accretion are uncertain, +however, and it may be much less efficient than our re￾sults imply. We discuss accretion in Section 2.5. To estimate ∆m, we begin with the Bondi-Hoyle ac￾cretion rate, ˙m, given by: m˙ = 4πG2m2 @@ -259,6 +275,22 @@ s + σ 2) 3/2 , (5) +3 Closer to the SMBH, ∆t may exceed the collision timescale by +a factor of a few for steep density profiles. We include a safe￾guard in our code which takes the ratio tcoll/∆t and rounds it +to the nearest integer. We take this integer to be the number of +collisions and increase the BH mass accordingly. +Figure 2. We consider an example that highlights the mass +growth as a function of distance from the SMBH. Grey dots +represent the initial masses and distances from the SMBH +of the BHs involved in the simulation. For simplicity, we set +the inital mass equal to 10 M for all of the BHs. Assuming +the density profile of stars has α = 1, we consider two cases: +BHs accrete all of the star’s mass during a collision (red) and +only a portion of the star’s mass is accreted during a collision +given by Eq. 6 (blue). The latter case results in less growth +closer to the SMBH where the velocity dispersion becomes +high. The shaded regions and dashed lines represent the +analytical predictions detailed in Section 2.4. where cs is the speed of sound in the star and ρstar is its density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima et al. 1985; Edgar 2004, see latter for a review). We @@ -282,19 +314,9 @@ start with identical populations of 10 M BHs (grey) and simulate growth through collisions using a statisti￾cal approach. As the BHs grow, the collision timescale, which depends on mBH, decreases. Simultaneously, ∆m, which also depends on mBH, increases. The re￾sult is exponential growth (see discussion and details -surrounding Eq. (8)). In Figure 2, however, the simula￾tions assume α = 1 for the stellar density profile, ensur￾ing the collision timescale is long compared to the sim￾ulation time, 10 Gyr. Therefore, the BHs grow slowly, -Figure 2. We consider an example that highlights the mass -growth as a function of distance from the SMBH. Grey dots -represent the initial masses and distances from the SMBH -of the BHs involved in the simulation. For simplicity, we set -the inital mass equal to 10 M for all of the BHs. Assuming -the density profile of stars has α = 1, we consider two cases: -BHs accrete all of the star’s mass during a collision (red) and -only a portion of the star’s mass is accreted during a collision -given by Eq. 6 (blue). The latter case results in less growth -closer to the SMBH where the velocity dispersion becomes -high. The shaded regions and dashed lines represent the -analytical predictions detailed in Section 2.4. +surrounding Eq. (8)). In Figure 2, however, the simula￾tions assume α = 1 for the stellar density profile, ensur￾ing the collision timescale is long compared to the sim- +IMBH Formation in Galactic Nuclei 5 +ulation time, 10 Gyr. Therefore, the BHs grow slowly, and their final masses can be approximated using the following equation: mfinal(tcoll → const.) = minitial + ∆m @@ -319,8 +341,7 @@ consumes the star’s entire mass: the accretion-limited star’s mass. Eq. 7 does not apply for other values of α. When the collision timescale is shorter, corresponding to a larger -index α in the density profile (see Figure 1), the growth -IMBH Formation in Galactic Nuclei 5 +index α in the density profile (see Figure 1), the growth is very efficient and ∆m quickly approaches 1 M . Con￾sequently, while we can now assume ∆m = 1 M , we can no longer assume the collision timescale is constant. The final mass grows exponentially as a result. For @@ -333,7 +354,40 @@ where A = σ 2Rstar/G and C = 2πGnstarRstar/σ. As an example, we plot this curve in purple for the α = 2 case, in Figure 3, which agrees with the simulated masses. -2.5. GW Inspiral +2.5. Uncertainties in Accretion +We note that the ∆M calculated in this proof-of￾concept study assumes that the BH accretes all of the +material that it captures. Estimating the true fraction +of the material accreted by the BH is very challeng￾ing; this complex problem requires numerically solving +the generalized GR fluid equations with cooling, heat￾ing, and radiative transfer, etc. and remains an active +field of research (e.g., Blandford & Begelman 1999; Park +& Ostriker 2001; Narayan et al. 2003; Igumenshchev +et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang +et al. 2014; McKinney et al. 2014; Narayan et al. 2022). +Heuristically, if a collision between a BH and a star re￾sults in an accretion disk, the disk’s viscous timescale +may be as low as days. The resultant luminosity can +unbind most of the captured material, though details +such as the amount accreted and peak luminosity re￾main uncertain (e.g., Yuan et al. (2012); Jiang et al. +(2014), see also the discussion in Stone et al. (2017), +Rizzuto et al. (2022), and Kremer et al. (2022)). The +question becomes whether or not a BH can still accu￾mulate significant amounts of mass over many collisions +even if it accretes very little in a single one. We ex￾plore the viability of our channel using a physically mo￾tivated inefficient accretion model. Several studies have +invoked momentum-driven winds in BH accretion (e.g., +Murray et al. 2005; Ostriker et al. 2010; Brennan et al. +2018). We thus estimate the fraction of captured mass +accreted to be approximately vesc/(cη), where vesc is +the escape velocity from the BH at 1 R and η is the +accretion efficiency at the ISCO. We take η to be 0.1 +(e.g., Yu & Tremaine 2002). This expression for the +fraction accreted is consistent with Kremer et al. (2022) +equation 19 for s = 0.5, which is a reasonable value for +s, a free parameter between 0.2 and 0.8. We discuss +the results of the momentum-driven winds estimate in +Section 3. We note that the accretion process may be +more efficient than this estimate implies if, for example, +jets or other instabilities result in the beaming of radi￾ation away from the captured material (e.g., Blandford +& Znajek 1977; Begelman 1979; De Villiers et al. 2005; +McKinney & Gammie 2004; McKinney 2006; Igumen￾shchev 2008; Begelman 2012a,b; McKinney et al. 2014). +2.6. GW Inspiral When a BH is close to the SMBH, GW emission can circularize and shrink its orbit. We implement the ef￾fects of GW emission on the BH’s semimajor axis and eccentricity following Peters & Mathews (1963a). The @@ -352,7 +406,7 @@ M• + mBH 2 × 106 M −1  a• -10−4 pc4 +10−2 pc4 ×f(e•)(1 − e 2 • @@ -362,12 +416,17 @@ a• where f(e•) is a function of e•. For all values of e•, f(e•) is between 0.979 and 1.81 (Blaes et al. 2002). We plot this timescale for a 1 × 105 M BH in Figure 1 in -blue. +blue. +6 Rose et al. +Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to +cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass +of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and +merger times of these BHs. In our simulations, we assume a BH has merged with the SMBH when the condition tGW < telapsed is met. When this condition is satisfied, we terminate mass growth through collisions for that BH.4 -2.6. IMBH growth +2.7. IMBH growth As detailed above, BH-stellar collisions can increase the BH masses as a function of time. Here, we examine the sensitivity of the BH growth to the density power @@ -375,22 +434,22 @@ law. From Eq. (1), it is clear that the growth rate de profiles, will result in more efficient mass growth. In Figure 1, larger values of α lead to collision timescales in the GN’s inner region, inwards of 0.25 pc, that are -4 For comparison, we also incrementally changed the semimajor -axis and eccentricity from GW emission following the equations -in Peters & Mathews (1963b). This method leads to a slight -increase in the final IMBH masses because it accounts for the -collisions that take place while the orbit is gradually shrinking. much smaller that the 10 Gyr simulation time. Figure 3 confirms this expectation. It depicts the mass growth of a uniform distribution of BHs with initial conditions de￾tailed in Section 2.1 for five α values, spanning 1 (green) to 2 (purple). The most massive IMBHs form inwards of 0.25 pc for the α = 2 case. -2.7. Gravitational Wave Mergers and Intermediate +2.8. Gravitational Wave Mergers and Intermediate and Extreme Mass Ratio Inspiral Candidates Towards the SMBH, efficient collisions can create BHs massive enough to merge with the SMBH through GWs. -Following the method detailed in Section 2.5, when a +Following the method detailed in Section 2.6, when a given BH meets the criterion tGW < telapsed, we mark +4 For comparison, we also incrementally changed the semimajor +axis and eccentricity from GW emission following the equations +in Peters & Mathews (1963b). This method leads to a slight +increase in the final IMBH masses because it accounts for the +collisions that take place while the orbit is gradually shrinking. it as merged with the SMBH. We assume that at this point the dynamics of the BH will be determined by GW emission, shrinking and circularizing the BHs orbit un￾til it undergoes an extreme or intermediate mass ratio @@ -399,7 +458,7 @@ plot in Figure 3 shows the BH masses versus time of merger. It is interesting to note that even in the ab￾sence of relaxation processes, which are often invoked to explain the formation of EMRIs, EMRIs and notably IMRIs can form in this region. -2.8. Two Body Relaxation Processes +2.9. Two Body Relaxation Processes A BH orbiting the SMBH experiences weak gravita￾tional interactions with other objects in the GN. Over a relaxation time, these interactions alter its orbit about the SMBH. The two-body relaxation timescale for a @@ -417,19 +476,23 @@ its orbital energy and angular momentum by order of themselves. The BH experiences diffusion in its angular momentum and energy as a function of time (depending on the eccentricity of the orbit, this process can be more -efficient Fragione & Sari 2018; Sari & Fragione 2019). In -Figure 1, we plot the relaxation timescale in gold for a -range of α. We note that the Bahcall & Wolf (1976) pro￾file, α = 7/4, corresponds to zero net flux and therefore -does not preferentially migrate objects inward. -Additionally, because they are more massive on -average than the surrounding objects, BHs are ex￾pected to segregate inwards in the GN (e.g., Shapiro -& Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; -Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004). -6 Rose et al. -Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to -cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass -of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and -merger times of these BHs. +efficient Fragione & Sari 2018; Sari & Fragione 2019). +Relaxation can cause the orbit of an object in a GN to +reach high eccentricities. If the object is a BH, it can +spiral into the SMBH and form an EMRI, while a star +IMBH Formation in Galactic Nuclei 7 +can be tidally disrupted by the SMBH (e.g. Magorrian +& Tremaine 1999; Wang & Merritt 2004; Hopman & +Alexander 2005; Aharon & Perets 2016; Stone & Met￾zger 2016; Amaro-Seoane 2018; Sari & Fragione 2019; +Naoz et al. 2022). The relaxation process is therefore +crucial to our study. In Figure 1, we plot the relaxation +timescale in gold for a range of α. We note that the Bah￾call & Wolf (1976) profile, α = 7/4, corresponds to zero +net flux and therefore does not preferentially migrate +objects inward. +Additionally, because BHs are more massive on av￾erage than the surrounding objects, they are expected +to segregate inwards in the GN (e.g., Shapiro & +Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; +Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004). They sink toward the SMBH on the mass segregation timescale, tseg ≈ hM∗i/mBH × trelax (e.g., Spitzer 1987; Fregeau et al. 2002; Merritt 2006), which is typically an @@ -447,8 +510,8 @@ p P•/trlx (see Bradnick et al. 2017, for an approach to changes in the angular momentum). The new orbital parameters can be calculated following Lu -& Naoz (2019), and see Naoz et al. in prep for full set -of equations. +& Naoz (2019), and see Naoz et al. (2022) for the full +set of equations. We account for the effects of relaxation processes, including mass-segregation, using a multi-faceted ap￾proach. We begin by migrating each BH towards the center over its mass-segregation timescale, shifting it in￾crementally inward such that its orbital energy changes @@ -482,22 +545,12 @@ between the BHs. As mentioned above, as the BHs sink towards the SMBH, their concentration in the inner re￾gion of the GN increases, allowing them to dominate the scattering. We reserve the inclusion of these interactions for future study. -2.9. Effect of Relaxation Processes +2.10. Effect of Relaxation Processes As depicted in Figure 4, two-body relaxation processes result in more EMRIs and IMRIs events. These pro￾cesses allow BHs that begin further from the SMBH to migrate inwards and grow more efficiently in mass. However, it also impedes the growth of BHs that are -initially closer to the SMBH by allowing them to dif- -IMBH Formation in Galactic Nuclei 7 -Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance (red) -for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. We -assume α = 1.75 for the GN density profile. Faded stars represent BHs that merged with the SMBH. As a result of inward -migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, more -BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses for two -different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation processes. -The dashed, faded lines represent the corresponding initial histograms. We assume α = 1.75 for the GN density profile. Faded -stars represent BHs that merged with the SMBH. -fuse out of the inner region where collisions are efficient. +initially closer to the SMBH by allowing them to dif￾fuse out of the inner region where collisions are efficient. As can be seen in Figure 4, the net result is that more BHs grow, but the maximum mass is lower compared to the scenario that ignores two-body relaxation. The @@ -514,14 +567,24 @@ We explore the feasibility of forming IMBHs in a GN through successive collisions between a stellar-mass BH and main-sequence stars. Taking both a statisti￾cal and analytic approach, we show that this channel can produce IMBHs efficiently with masses as high as -103−4 M and may result in many IMBH-SMBH merg￾ers (intermediate-mass ratio inspiral, IMRIs) and EM￾RIs. +103−4 M and may result in many IMBH-SMBH merg￾ers (intermediate-mass ratio inspirals, or IMRIs) and +EMRIs. +8 Rose et al. +Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance +(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. +We assume α = 1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward +migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, +more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses +for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation +processes. We also show the results for a simulation with α = 1.75 that accounts for momentum-driven winds (black, dotted). +Despite the substantially reduced accretion, BHs in the mass gap still form. As the stellar mass BH collides with a star, the BH will grow in mass. The increase may equal star’s en￾tire mass if the relative velocity is smaller than the es￾cape velocity from the BH at 1 R . However, near the SMBH, the velocity dispersion may be larger than the escape velocity from the BH at the star’s radius. In this -limit, the BH accretes a “tunnel” of material through +limit, the BH captures a “tunnel” of material through the star, estimated using Bondi-Hoyle-Lyttleton accre￾tion. In our statistical analysis, we account for Bondi￾Hoyle-Lyttleton accretion and find that BHs outside of -10−2 pc from the SMBH can accrete the entire star (see +10−2 pc from the SMBH can capture the entire star (see Figure 2). The efficiency of collisions, and therefore IMBH, EMRI, and IMRI formation as well, are sensitive to @@ -534,6 +597,23 @@ profile by allowing BHs to diffuse into regions of more or less efficient growth. As a result, more BHs grow in mass, but their maximum mass is smaller (∼ 104 M ). Additionally, the final masses have no apparent depen￾dence on distance from the SMBH (see Figure 4). +Most simulations in our study assume that the BHs +accrete all of the mass that they capture. The final BH +masses can be taken as an upper limit. We note that +the accretion is a highly uncertain process and repre￾sents an active field of study (e.g., Blandford & Begel￾man 1999; Park & Ostriker 2001; Narayan et al. 2003; +Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan +et al. 2012; Jiang et al. 2014; McKinney et al. 2014; +Narayan et al. 2022). To assess the limits of our model, +we also consider a physically motivated accretion model, +momentum-driven winds (Section 2.5). We present the +final mass distribution for momentum-driven winds in +Figure 4. Importantly, we find that BHs within the +mass gap still form naturally despite the substantially +reduced accretion. About 5% of the BHs grow by 10 +to 100 M . Furthermore, if we increase this ∆M esti￾mate by a factor of 2 (i.e., use η = 0.05), the simula￾tion produces a 3.5×103 M IMBH for the same initial +conditions. Our proof-of-concept demonstrates that col￾lisions between BH and stars are an important process +that should be taken into account in dense places such +as a GN. Mass growth through BH-main-sequence star colli￾sions may act in concert with other IMBH formation channels, such as compact object binary mergers (e.g., Hoang et al. 2018; Stephan et al. 2019; Fragione et al. @@ -547,14 +627,74 @@ as highlighted in previous studies, a substantial frac Kozai Lidov mechanism, leaving behind a single star or a single compact object (e.g., Stephan et al. 2016, 2019; Hoang et al. 2018). Additionally, to be susceptible to -evaporation, BH binaries must have a wider configura￾tion. Otherwise, they will be more tightly bound that -8 Rose et al. -the average kinetic energy of the surrounding objects, -and will only harden through weak gravitational inter￾actions with neighboring stars (see for example Figure +evaporation, BH binaries must have a wider configura￾tion. Otherwise, they will be more tightly bound than +the average kinetic energy of the surrounding objects +and will only harden through weak gravitational inter- +IMBH Formation in Galactic Nuclei 9 +actions with neighboring stars (see for example Figure 6 in Rose et al. 2020). -Not included in this study, collisions between the BH -and other compact objects will increase the BH growth -rate. BH-BH mergers (e.g., O’Leary et al. 2009; Fra￾gione et al. 2021) and even neutron star BH mergers +We note that we assume a steady-state and treat the +stars as a reservoir in this model. Future work will take a +more nuanced approach to the background stars, whose +density as a function of time can be influenced by several +factors. Firstly, the relaxation of the stellar population +occurs on Gyr timescales. Some studies have suggested +that in situ star formation can occur in the Galactic +Center as close as 0.04 pc from the SMBH (e.g., Levin +& Beloborodov 2003; Paumard et al. 2006), and star +formation episodes can occur as often as every ∼ 5 Myr +(e.g. Lu et al. 2009). Therefore, we expect that after +the first Gyr, stars within . 0.01 pc will be replenished +at intervals consistent with the star formation episodes; +the infalling populations of stars are separated by ∼ +5−10 Myr, which is shorter than the collision timescale. +However, star-star collisions may complicate this pic￾ture within ∼ 0.01 pc. As discussed above, regular star +formation ensures the BHs always have a stellar popula￾tion to interact with outside of ∼ 0.01 pc.5 At 0.01 pc, +however, the kinetic energy during a collision between +two 1 M stars is larger than their binding energies. +Collisions can therefore thin out the stellar populations +during the time it takes them to diffuse to these small +radii, . 0.01 pc, and may reduce the BH growth in the +innermost region. We reserve the inclusion of star-star +collisions for future work. We also note that the disrup￾tion of binary stars by the SMBH may help replenish +the stellar population even as collisions work to deplete +it (e.g., Balberg et al. 2013); when a binary is disrupted, +one of the stars is captured on a tightly bound orbit +about the SMBH. +An IMBH may also affect the stellar density profile. +As it spirals into the SMBH, it can perturb stellar orbits, +and these interactions can lead to hypervelocity stars +(e.g., Baumgardt et al. 2006a; L¨ockmann & Baumgardt +2008). L¨ockmann & Baumgardt (2008) show that an +IMBH can modify an initially steep stellar density pro￾file to become consistent with the flatter cusp observed +in the Galactic Center. The stars may then be replen￾ished on 100 Myr timescales (Baumgardt et al. 2006a). +Therefore, after the formation of the first few IMBHs, +subsequent BH growth may occur in bursts, coinciding +with replenishment of the stars. +While there are many competing dynamical processes +that shape the stellar density profile, we stress that α +5 +In fact, the star-star collision timescale is greater than 10 Myr +for the entire parameter space, save at 0.001 pc for larger values +of α; the BH-star collision timescale plotted in Fig. 1 is the same +order of magnitude as the star-star collision timescale. +can simply be chosen to encapsulate all of the relevant +physics. A value for α that is constrained by observa￾tions must already reflect ongoing processes like star￾star collisions and replenishment. Sch¨odel et al. (2018) +find the observed stellar mass enclosed within 0.01 pc of +the Milky Way’s Galactic Center to be approximately +180 M . This estimate is consistent to order of magni￾tude with our α = 1.25 case. In a simulation like those +depicted in Figure 4, which include relaxation, α = 1.25 +leads to a maximum IMBH mass of 140 M . Further￾more, while the stellar mass within 0.01 pc may be a +few hundred M , Do et al. (2019) and GRAVITY Col￾laboration et al. (2020) set an upper limit on the mass +enclosed within the orbit of S0-2 to be about a few thou￾sand M , or 0.1% of the central mass. This upper limit +can include mass that was previously in stars but is now +in BHs. In that case, the 180 M is what remains of the +stars, while BHs and IMBHs make up the ∼ 1000 M +in the innermost region. +Also not included in this study, collisions between the +BH and other compact objects will increase the BH +growth rate. BH-BH mergers (e.g., O’Leary et al. 2009; +Fragione et al. 2021) and even neutron star BH mergers (e.g., Hoang et al. 2020) become more likely as the BHs increase in mass through stellar collisions. As a result, the BH-BH collision timescale, discussed in Section 2.2, @@ -562,39 +702,45 @@ will become relevant to our simulations, allowing the BHs to grow through this channel in addition to stel￾lar collisions. Additionally, this compact object mergers result in GW recoil, which may have a large impact on the dynamics (e.g., Baibhav et al. 2020; Fragione et al. -2021) +2021). The BH’s mass growth increases GW emission, which -dissipates energy from the orbit. Along with relaxation -processes, GW emission causes BHs to sink towards the -SMBH and eventually undergo a merger. As a result, -the GN environment is conducive to the formation of -EMRIs and IMRIs. The GW emission from EMRIs and -IMRIs is expected to be at mHz frequencies, making -them promising candidates for LISA to observe. While -the exact rate calculation is beyond the scope of this -study, the mechanism outlined here seems very promis￾ing. -Our results also suggest that IMBHs are likely to ex￾ists in many galactic nuclei, as well as within our own -galactic center. This implication seems to be consis￾tent with recent observational and theoretical studies -(e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004; -G¨urkan & Rasio 2005; Gualandris & Merritt 2009; Chen -& Liu 2013; Generozov & Madigan 2020; Fragione et al. -2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY -Collaboration et al. 2020). +dissipates energy from the orbit. Along with relaxation, +GW emission causes BHs to sink towards the SMBH +and eventually undergo a merger. As a result, the GN +environment is conducive to the formation of EMRIs +and IMRIs. The GW emission from EMRIs and IM￾RIs is expected to be at mHz frequencies, making them +promising candidates for LISA to observe. While the +exact rate calculation is beyond the scope of this study, +the mechanism outlined here seems very promising. +Our results also suggest that BHs within the mass gap +as well as IMBHs likely exist in many galactic nuclei, as +well as within our own galactic center. This implication +seems to be consistent with recent observational and +theoretical studies (e.g., Hansen & Milosavljevi´c 2003; +Maillard et al. 2004; G¨urkan & Rasio 2005; Gualandris +& Merritt 2009; Chen & Liu 2013; Generozov & Madi￾gan 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz +et al. 2020; GRAVITY Collaboration et al. 2020). +10 Rose et al. Lastly, the collisions between stellar mass BHs and stars may contribute to the x-ray emission from our -galactic centre (e.g., Muno et al. 2005, 2009; Hailey et al. -2018; Zhu et al. 2018; Cheng et al. 2018) -5 -. These inter￾actions, in particular grazing collisions, may also result -in tidal disruption events (e.g., Perets et al. 2016; Sam￾sing et al. 2019; Kremer et al. 2021). Thus, the process -outlined here may produce electromagnetic signatures -in addition to GW mergers. -SR thanks the Charles E Young fellowship, the Nina +galactic centre (e.g., Muno et al. 2005, 2009; Hailey +et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kre￾mer et al. (2022) for a discussion of electromagnetic sig￾natures from BH-star collisions)6 +. These interactions, +in particular grazing collisions, may also result in tidal +disruption events (e.g., Baumgardt et al. 2006b; Perets +et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kre￾mer et al. 2021). Thus, the process outlined here may +produce electromagnetic signatures in addition to GW +mergers. +We thank the anonymous referee for useful comments. +We also thank Jessica Lu, Fred Rasio, Kyle Kremer, +Ryosuke Hirai, Ilya Mandel, and Erez Michaely for use￾ful discussion. +SR thanks the Charles E. Young Fellowship, the Nina Byers Fellowship, and the Michael A. Jura Memorial Graduate Award for support. SR and SN acknowledge the partial support from NASA ATP 80NSSC20K0505. SN thanks Howard and Astrid Preston for their gener￾ous support. IL thanks support from the Adams Fellow￾ship. SN and RS thank the Bhaumik Institute visitor -program. +program. This work was performed in part at the As￾pen Center for Physics, which is supported by National +Science Foundation grant PHY-1607611. REFERENCES Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016, PhRvL, 116, 241102, @@ -603,21 +749,39 @@ doi: 10.1103/PhysRevLett.116.241102 doi: 10.1103/PhysRevLett.118.221101 —. 2017b, PhRvL, 119, 141101, doi: 10.1103/PhysRevLett.119.141101 +Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1, +doi: 10.3847/2041-8205/830/1/L1 Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129 Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, doi: 10.1088/0004-637X/780/2/148 +Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4, +doi: 10.1007/s41114-018-0013-8 +6 The connection between the observed X-ray sources at the Galac￾tic Center and tidal capture has been suggested by Generozov +et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for +alternative channels. Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. 2021, arXiv e-prints, arXiv:2109.12119. https://arxiv.org/abs/2109.12119 Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214, doi: 10.1086/154711 -5 The connection between the observed X-ray sources at the Galac￾tic Center and tidal capture has been suggested by Generozov -et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for -alternative channels. Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102, 043002, doi: 10.1103/PhysRevD.102.043002 +Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26, +doi: 10.1093/mnrasl/slt071 +Baumgardt, H., Gualandris, A., & Portegies Zwart, S. +2006a, MNRAS, 372, 174, +doi: 10.1111/j.1365-2966.2006.10818.x +Baumgardt, H., Hopman, C., Portegies Zwart, S., & +Makino, J. 2006b, MNRAS, 372, 467, +doi: 10.1111/j.1365-2966.2006.10885.x Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ, 613, 1143, doi: 10.1086/423299 +Begelman, M. C. 1979, MNRAS, 187, 237, +doi: 10.1093/mnras/187.2.237 +—. 2012a, ApJL, 749, L3, doi: 10.1088/2041-8205/749/1/L3 +IMBH Formation in Galactic Nuclei 11 +—. 2012b, MNRAS, 420, 2912, +doi: 10.1111/j.1365-2966.2011.20071.x Begelman, M. C., Volonteri, M., & Rees, M. J. 2006, MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ, @@ -630,15 +794,20 @@ Binney, J., & Tremaine, S. 1987, Galactic dynamics —. 2008, Galactic Dynamics: Second Edition Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775, doi: 10.1086/342655 +Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303, +L1, doi: 10.1046/j.1365-8711.1999.02358.x +Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433, +doi: 10.1093/mnras/179.3.433 Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642, 427, doi: 10.1086/500727 Bondi, H. 1952, MNRAS, 112, 195, -doi: 10.1093/mnras/112.2.195 -IMBH Formation in Galactic Nuclei 9 +doi: 10.1093/mnras/112.2.195 Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273, doi: 10.1093/mnras/104.5.273 Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469, 2042, doi: 10.1093/mnras/stx1007 +Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ, +860, 14, doi: 10.3847/1538-4357/aac2c4 Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger, C. 2012, JCAP, 2012, 054, doi: 10.1088/1475-7516/2012/07/054 @@ -656,16 +825,35 @@ et al. 1996, Science, 272, 1286, doi: 10.1126/science.272.5266.1286 Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087, doi: 10.1086/156685 +Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424, +doi: 10.1111/j.1365-2966.2005.09937.x +Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M. +2009, MNRAS, 393, 1016, +doi: 10.1111/j.1365-2966.2008.14254.x Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783 +Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T. +C. N. 2021a, MNRAS, 505, 2186, +doi: 10.1093/mnras/stab1428 +Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt, +T. C. N. 2021b, MNRAS, 503, 1051, +doi: 10.1093/mnras/stab402 +De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S. +2005, ApJ, 620, 878, doi: 10.1086/427142 Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019, MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453 Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021, MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390 +Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664, +doi: 10.1126/science.aav8137 +Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL, +562, L19, doi: 10.1086/338118 Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL, 110, 221101, doi: 10.1103/PhysRevLett.110.221101 Edgar, R. 2004, NewAR, 48, 843, doi: 10.1016/j.newar.2004.06.001 +Escala, A. 2021, ApJ, 908, 57, +doi: 10.3847/1538-4357/abd93c Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014, Monthly Notices of the Royal Astronomical Society, 443, 2410, doi: 10.1093/mnras/stu1280 @@ -691,7 +879,8 @@ Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137, doi: 10.3847/1538-4357/ab94bc Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker, J. P. 2018, MNRAS, 478, 4030, -doi: 10.1093/mnras/sty1262 +doi: 10.1093/mnras/sty1262 +12 Rose et al. Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of Modern Physics, 82, 3121, doi: 10.1103/RevModPhys.82.3121 @@ -723,23 +912,41 @@ Dosopoulou, F. 2018, ApJ, 856, 140, doi: 10.3847/1538-4357/aaafce Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8, doi: 10.3847/1538-4357/abb66a +Hopman, C., & Alexander, T. 2005, ApJ, 629, 362, +doi: 10.1086/431475 +Igumenshchev, I. V. 2008, ApJ, 677, 317, +doi: 10.1086/529025 +Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A. +2003, ApJ, 592, 1042, doi: 10.1086/375769 +Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796, +106, doi: 10.1088/0004-637X/796/2/106 Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the Royal Astronomical Society, 374, 1557, doi: 10.1111/j.1365-2966.2006.11275.x +Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., & +Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368. +https://arxiv.org/abs/2201.12368 Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104, doi: 10.3847/1538-4357/abeb14 Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, 45, doi: 10.3847/1538-4357/abb945 +Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020, +MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276 +Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33, +doi: 10.1086/376675 Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 —. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 +L¨ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323, +doi: 10.1111/j.1365-2966.2007.12699.x Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506, -doi: 10.1093/mnras/stz036 -10 Rose et al. +doi: 10.1093/mnras/stz036 Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ, 690, 1463, doi: 10.1088/0004-637X/690/2/1463 Madau, P., & Rees, M. J. 2001, ApJL, 551, L27, doi: 10.1086/319848 +Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447, +doi: 10.1046/j.1365-8711.1999.02853.x Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F. 2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147 Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda, @@ -747,6 +954,15 @@ M., & Artale, M. C. 2021a, arXiv e-prints, arXiv:2109.06222. https://arxiv.org/abs/2109.06222 Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b, MNRAS, 505, 339, doi: 10.1093/mnras/stab1334 +Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B. +2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409 +McKinney, J. C. 2006, MNRAS, 368, 1561, +doi: 10.1111/j.1365-2966.2006.10256.x +McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977, +doi: 10.1086/422244 +McKinney, J. C., Tchekhovskoy, A., Sadowski, A., & +Narayan, R. 2014, MNRAS, 441, 3177, +doi: 10.1093/mnras/stu762 Merritt, D. 2006, Reports on Progress in Physics, 69, 2513, doi: 10.1088/0034-4885/69/9/R01 Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847, @@ -756,17 +972,36 @@ Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL, 622, L113, doi: 10.1086/429721 Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110 +Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ, +618, 569, doi: 10.1086/426067 +Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927, +L18, doi: 10.3847/2041-8213/ac574b Naoz, S., & Silk, J. 2014, ApJ, 795, 102, doi: 10.1088/0004-637X/795/2/102 Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885, -L35, doi: 10.3847/2041-8213/ab4fed +L35, doi: 10.3847/2041-8213/ab4fed +IMBH Formation in Galactic Nuclei 13 Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL, 888, L8, doi: 10.3847/2041-8213/ab5e3b +Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., & +Curd, B. 2022, MNRAS, 511, 3795, +doi: 10.1093/mnras/stac285 +Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A. +2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69 +Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005, +ApJ, 628, 368, doi: 10.1086/430728 O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395, 2127, doi: 10.1111/j.1365-2966.2009.14653.x O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N., & O’Shaughnessy, R. 2006, ApJ, 637, 937, doi: 10.1086/498446 +Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga, +D. 2010, ApJ, 722, 642, +doi: 10.1088/0004-637X/722/1/642 +Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100, +doi: 10.1086/319042 +Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643, +1011, doi: 10.1086/503273 Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek, Stephen R., J. 2016, ApJ, 823, 113, doi: 10.3847/0004-637X/823/2/113 @@ -784,6 +1019,8 @@ Rashkov, V., & Madau, P. 2014, ApJ, 780, 187, doi: 10.1088/0004-637X/780/2/187 Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640, A56, doi: 10.1051/0004-6361/202037710 +Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022, +MNRAS, doi: 10.1093/mnras/stac231 Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., & Rasio, F. A. 2018, PhRvL, 120, 151101, doi: 10.1103/PhysRevLett.120.151101 @@ -797,6 +1034,8 @@ Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904, Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., & Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213. https://arxiv.org/abs/2009.01213 +Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017, +MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044 Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD, 100, 043009, doi: 10.1103/PhysRevD.100.043009 Sari, R., & Fragione, G. 2019, ApJ, 885, 24, @@ -806,6 +1045,8 @@ Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K. doi: 10.1086/339917 Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63, doi: 10.1086/519309 +Sch¨odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A, +609, A27, doi: 10.1051/0004-6361/201730452 Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603, doi: 10.1086/156521 Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985, @@ -821,6 +1062,10 @@ Spitzer, L. 1987, Dynamical evolution of globular clusters Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv e-prints. https://arxiv.org/abs/1603.02709 —. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d +Stone, N. C., K¨upper, A. H. W., & Ostriker, J. P. 2017, +MNRAS, 467, 4180, doi: 10.1093/mnras/stx097 +Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859, +doi: 10.1093/mnras/stv2281 The LIGO Scientific Collaboration, the Virgo Collaboration, Abbott, R., et al. 2020a, arXiv e-prints, arXiv:2009.01075. https://arxiv.org/abs/2009.01075 @@ -834,12 +1079,18 @@ Society, 457, 3356, doi: 10.1093/mnras/stw225 Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit, G. N. 2021, MNRAS, 504, 146, doi: 10.1093/mnras/stab842 -IMBH Formation in Galactic Nuclei 11 +14 Rose et al. Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., & Breivik, K. 2021, ApJ, 917, 76, doi: 10.3847/1538-4357/ac088d +Wang, J., & Merritt, D. 2004, ApJ, 600, 149, +doi: 10.1086/379767 Woosley, S. E. 2017, ApJ, 836, 244, doi: 10.3847/1538-4357/836/2/244 +Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965, +doi: 10.1046/j.1365-8711.2002.05532.x +Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129, +doi: 10.1088/0004-637X/761/2/129 Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. 2014, Monthly Notices of the Royal Astronomical Society, 440, 1263, doi: 10.1093/mnras/stu351 diff --git a/read/results/pdfminer/1602.06541.txt b/read/results/pdfminer/1602.06541.txt index 8ca229f..175bca7 100644 --- a/read/results/pdfminer/1602.06541.txt +++ b/read/results/pdfminer/1602.06541.txt @@ -1969,12 +1969,12 @@ J. 13 +no. + 30, 10, -no. - Analysis 1699–1712, diff --git a/read/results/pdfminer/1707.09725.txt b/read/results/pdfminer/1707.09725.txt index 5d9257e..55f0dc1 100644 --- a/read/results/pdfminer/1707.09725.txt +++ b/read/results/pdfminer/1707.09725.txt @@ -6213,16 +6213,16 @@ Softmax 81.46 % σ = 5.08 -88.41 % σ = 0.36 - 88.19 % σ = 0.31 -87.92 % σ = 0.40 +88.41 % σ = 0.36 -79.67 % σ = 4.85 +87.92 % σ = 0.40 84.70 % σ = 0.15 +79.67 % σ = 4.85 + 84.69 % σ = 0.08 88.59 % 85.43 % 92 – 140 @@ -6240,14 +6240,14 @@ ELU 89.49 % σ = 0.42 85.35 % σ = 0.10 -88.42 % σ = 0.29 85.16 % σ = 0.15 - 88.93 % σ = 0.46 85.35 % σ = 0.21 -84.46 % σ = 0.27 +88.42 % σ = 0.29 85.16 % σ = 0.15 84.46 % σ = 0.23 +84.46 % σ = 0.27 + 88.61 % σ = 0.41 88.00 % σ = 0.47 @@ -6324,16 +6324,16 @@ ELU 75.5 -83.2 +80.1 78.8 -80.1 - -67.2 +83.2 68.9 +67.2 + Table A.3.: Test accuracy of adjusted baseline models trained with different activation functions on STL-10. For LReLU, α = 0.3 was chosen. @@ -8596,11 +8596,11 @@ Springer, 2003, vol. 53. [Online]. Available: https://dx.doi.org/10.1007/978-3- S. E. Fahlman, “An empirical study of learning speed in back-propagation http://repository.cmu.edu/cgi/ +[Online]. Available: + networks,” viewcontent.cgi?article=2799&context=compsci -[Online]. Available: - 1988. L. Fei-Fei, R. Fergus, and P. Perona, diff --git a/read/results/pdfminer/2201.00022.txt b/read/results/pdfminer/2201.00022.txt index e4ebdb5..54b836f 100644 --- a/read/results/pdfminer/2201.00022.txt +++ b/read/results/pdfminer/2201.00022.txt @@ -1,15 +1,16 @@ -Draft version January 4, 2022 +Draft version July 7, 2022 Typeset using LATEX twocolumn style in AASTeX631 -1 +2 2 0 2 -c -e -D -1 -3 + +l +u +J + +6 ] @@ -26,7 +27,7 @@ s a [ -1 +2 v 2 2 @@ -55,17 +56,22 @@ Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3 ABSTRACT Most stellar evolution models predict that black holes (BHs) should not exist above approximately -50 − 70 M(cid:12). However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and -above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs), -can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding -main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relax- -ation, we find that this channel can be quite efficient, forming IMBHs as massive as 104 M(cid:12). Our -results suggest that massive black holes and IMBHs may be ubiquitous in galactic centres. This for- -mation channel also has implications for observations. Collisions between stars and BHs can produce -electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. Additionally, -formed through this channel, both black holes in the mass gap and IMBHs can merge with the super- -massive black hole at the center of a galactic nucleus through gravitational waves. These gravitational -wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively). +50 − 70 M(cid:12), the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections +indicate the existence of BHs with masses at and above this threshold. We suggest that massive +BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions +between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical +processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite +efficient, forming IMBHs as massive as 104 M(cid:12). This upper limit assumes that (1) the BHs accrete a +substantial fraction of the stellar mass captured during each collision and (2) that the rate at which +new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar +disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our +results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic +centers. This formation channel has implications for observations. Collisions between stars and BHs +can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. +Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge +with the supermassive black hole at the center of a galactic nucleus through gravitational waves. +These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, +respectively). 1. INTRODUCTION @@ -82,11 +88,6 @@ GW170104, and GW170814 fall within the mass gap form second generation BHs and, in some cases, inter- mediate mass BHs (IMBHs), these gravitational wave (GW) events can occur in globular clusters, young stel- -lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro- -driguez et al. 2019; Fishbach et al. 2020; Mapelli et al. -2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. -2021; Arca Sedda et al. 2021). However, IMBHs are -not limited to these locations and may reside in galac- Corresponding author: Sanaea C. Rose srose@astro.ucla.edu @@ -96,6 +97,11 @@ metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli 2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski et al. 2020a; Renzo et al. 2020; Vink et al. 2021). +lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro- +driguez et al. 2019; Fishbach et al. 2020; Mapelli et al. +2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. +2021; Arca Sedda et al. 2021). However, IMBHs are +not limited to these locations and may reside in galac- tic nuclei as well. Several studies propose that our own galactic center may host an IMBH in the inner pc (e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004; @@ -114,16 +120,6 @@ lated gas (e.g., Begelman et al. 2006; Yue et al. 2014; Ferrara et al. 2014; Choi et al. 2015; Shlosman et al. 2016). These high redshift IMBHs would need to sur- vive galaxy evolution and mergers to present day (e.g., -Rashkov & Madau 2014), with significant effects on their -stellar and even dark matter surroundings (e.g., Bertone -et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda -et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another -popular formation channel relies on the coalescence of -many stellar-mass black holes. For example, IMBHs -may form in the centers of globular clusters, where few- -body interactions lead to the merger of stellar-mass BHs -(e.g., O’Leary et al. 2006; G¨urkan et al. 2006; Blecha -et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro- @@ -135,12 +131,24 @@ et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro- Rose et al. +Rashkov & Madau 2014), with significant effects on their +stellar and even dark matter surroundings (e.g., Bertone +et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda +et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another +popular formation channel relies on the coalescence of +many stellar-mass black holes, which may seed objects +as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs +may form in the centers of globular clusters, where few- +body interactions lead to the merger of stellar-mass BHs +(e.g., O’Leary et al. 2006; G¨urkan et al. 2006; Blecha +et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro- driguez et al. 2018; Rodriguez et al. 2019; Fragione et al. 2020b). Other formation mechanisms invoke successive -collisions and mergers of massive stars (e.g., Portegies -Zwart & McMillan 2002; Portegies Zwart et al. 2004; -Freitag et al. 2006; Kremer et al. 2020; Gonz´alez et al. -2021; Di Carlo et al. 2021). +collisions and mergers of massive stars (e.g., Ebisuzaki +et al. 2001; Portegies Zwart & McMillan 2002; Portegies +Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017; +Kremer et al. 2020; Gonz´alez et al. 2021; Di Carlo et al. +2021; Das et al. 2021a,b; Escala 2021). The main obstacle to sequential BH mergers in clus- ters is that the merger recoil velocity kick often exceeds @@ -158,17 +166,29 @@ tion timescale. Using this approach, they showed that 103 − 104 M(cid:12) IMBHs can form efficiently over the life- time of a cluster. -However, as discussed in Section 2.2, direct star-BH +However, as discussed in Section 2.2, direct BH-star collisions are much more frequent than BH-BH collision in galactic nuclei, making the former a promising chan- -nel for BH growth. We propose that IMBHs can form -naturally within the central pc of a SMBH in a galactic -center. Specifically, these IMBHs form through repeated -collisions with main sequence stars, accreting some or -all of the star’s mass depending on the details of the -collision. We demonstrate that this channel can create -IMBHs with masses as large as 104 M(cid:12), depending on -the density profile of the surrounding stars. +nel for BH growth. In an N-body study of young star +clusters, Rizzuto et al. (2022) find that BH-star colli- +sions are a main contributor to the formation of BHs +in the mass gap and IMBHs. In a similar vein, Stone +et al. (2017) demonstrate that massive BHs can form +from repeated tidal encounters between stars and BHs. +More generally, several studies have explored the role of +collisions in a GN, with implications for the stellar and +red giant populations (e.g., Dale & Davies 2006; Dale +et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti +et al. 2021). We propose that IMBHs can form naturally +within the central pc of a galactic center through re- +peated collisions between BHs and main sequence stars. +During a collision, the BH can accrete some portion of +the star’s mass. Over many collisions, it can grow ap- +preciably in size. We demonstrate that this channel can + +create IMBHs with masses as large as 104 M(cid:12), an upper +limit that depends on the density profile of the surround- +ing stars and the efficiency of the accretion. The paper is structured as follows: we describe rele- vant physical processes and our approach in Section 2. @@ -178,11 +198,11 @@ tion 2.3. Section 2.4 discusses our treatment of the mass growth with each collision and presents analytic solutions to our equations in two different regimes, ef- ficient collisions and inefficient collisions We compare -these solutions to our statistical results. Sections 2.5 -and 2.7 discuss implications for GW merger events be- +these solutions to our statistical results. Sections 2.6 +and 2.8 discuss implications for GW merger events be- tween IMBHs and the SMBH. We then incorporate re- laxation processes and discuss the subsequent results in -Section 2.8. Finally, we discuss and summarize our find- +Section 2.9. Finally, we discuss and summarize our find- ings in Section 3. 2. METHODOLOGY @@ -191,7 +211,6 @@ We consider a population of stellar mass BHs embed- ded in a cluster of 1 M(cid:12) stars. When stars and BHs collide, the BHs can accrete mass. The growth rate de- pends on the physical processes outlined below. We use - a statistical approach to estimate the stellar encounters and final IMBH masses. @@ -222,49 +241,44 @@ build a comprehensive physical picture of BH growth at all distances from the SMBH, including within 0.01 pc. Otherwise, the innermost region of the GN would be poorly represented in our sample. We consider other -observationally motivated distributions in Section 2.8, -but reserve a more detailed examination of the distribu- -tion’s impact for future work. -2.2. Direct Collisions + IMBH Formation in Galactic Nuclei -BHs in the GN can undergo direct collisions with other -objects. The timescale for this process, tcoll, can be es- -timated using a simple rate calculation: t−1 -coll = nσA, -where n is the number density of objects, σ is the ve- -locity dispersion, and A is the cross-section. We use the -collision timescale from Rose et al. (2020): +3 -t−1 -coll = πn(a•)σ(a•) +in Figure 1.2 As this timescale depends on the density +of surrounding stars, we adopt a density profile of the +form: -(cid:18) +ρ(r•) = ρ0 -× +(cid:19)−α -f1(e•)r2 +, -c + f2(e•)rc +(cid:18) r• +r0 -2G(mBH + m(cid:63)) -σ(a•)2 +(2) -(cid:19) +where r• denotes the distance from the SMBH. We adopt +a SMBH mass of 4 × 106 M(cid:12) such that our fiducial GN +matches our own galactic center (e.g., Ghez et al. 2005; +Genzel et al. 2003). In this case, the normalization in +Eq. (2) is ρ0 = 1.35 × 106 M(cid:12)/pc3 at r0 = 0.25 pc (Gen- +zel et al. 2010). Additionally, in Eq. (2), α gives the +slope of the power law. We assume that a uniform pop- +ulation of solar mass stars account for most of the mass +in the GN, making the stellar number density: -. (1) +n(r•) = -where G is the gravitational constant and rc is the sum -of the radii of the interacting objects, a black hole with -mass mBH and a star with mass m(cid:63). Detailed in Rose -et al. (2020), f1(e•) and f2(e•) account for the effect of -the eccentricity of the BH’s orbit about the SMBH on -the collision rate, while n and σ are simply evaluated -at the semimajor axis of the orbit (see below). Note +ρ(r•) +1 M(cid:12) - IMBH Formation in Galactic Nuclei +. -3 +(3) The collision timescale also depends on the velocity dis- persion, which we express as: @@ -305,24 +319,9 @@ vant equations, see O’Leary et al. 2009; Gond´an et al. lisions will be the main driver of IMBH growth in the GN. -2.3. Statistical Approach to Collisions +2 We note that the eccentricity has a very minor effect on the -We simulate the mass growth of a population of BHs -with initial conditions detailed in Section 2.1. Over an -increment ∆t of 106 yr, we calculate the probability of -a collision occurring, given by ∆t/tcoll. This choice of -∆t is motivated by our galactic center’s star formation -timescale (e.g., Lu et al. 2009), allowing for regular re- -plenishment of the stellar population in the GN. We have -checked that the results are not sensitive to this choice -of ∆t, omitted here to avoid clutter. We draw a number -between 0 and 1 using a random number generator. If -that number is less than or equal to the probability, we -increase the BH’s mass by ∆m, the mass that the BH is -expected to accrete in a single collision (see Section 2.4 -for details). We recalculate the collision timescale using -the updated BH mass and repeat this process until the -time elapsed equals the simulation time of 10 Gyr3. +collision timescale (Rose et al. 2020). Figure 1. We plot the relevant timescales, including col- lision (green), relaxation (gold), and BH-BH GW capture @@ -334,61 +333,76 @@ density, so we adopt a range of density profiles, bounded by blue line represents the time for a 105 M(cid:12) BH to merge with the SMBH through GW emission. -that this timescale equation includes the effects of grav- -itational focusing, which enhances the cross-section of -interaction. - -Assuming a circular orbit for simplicity, we plot the -timescale for a BH orbiting in the GN to collide with -a 1 M(cid:12) star as a function of distance from the SMBH -in Figure 1.2 As this timescale depends on the density -of surrounding stars, we adopt a density profile of the -form: - -ρ(r•) = ρ0 +observationally motivated distributions in Section 2.9, +but reserve a more detailed examination of the distribu- +tion’s impact for future work. -(cid:19)−α +2.2. Direct Collisions -, +BHs in the GN can undergo direct collisions with other +objects. The timescale for this process, tcoll, can be es- +timated using a simple rate calculation: t−1 +coll = nσA, +where n is the number density of objects, σ is the ve- +locity dispersion, and A is the cross-section. We use the +collision timescale from Rose et al. (2020): -(cid:18) r• -r0 +t−1 +coll = πn(a•)σ(a•) -(2) +(cid:18) -where r• denotes the distance from the SMBH. We adopt -a SMBH mass of 4 × 106 M(cid:12) such that our fiducial GN -matches our own galactic center (e.g., Ghez et al. 2005; -Genzel et al. 2003). In this case, the normalization in -Eq. (2) is ρ0 = 1.35 × 106 M(cid:12)/pc3 at r0 = 0.25 pc (Gen- -zel et al. 2010). Additionally, in Eq. (2), α gives the -slope of the power law. We assume that a uniform pop- -ulation of solar mass stars account for most of the mass -in the GN, making the stellar number density: +× -n(r•) = +f1(e•)r2 -ρ(r•) -1 M(cid:12) +c + f2(e•)rc -. +2G(mBH + m(cid:63)) +σ(a•)2 -(3) +(cid:19) -2 We note that the eccentricity has a very minor effect on the +. (1) -collision timescale (Rose et al. 2020). +where G is the gravitational constant and rc is the sum +of the radii of the interacting objects, a black hole with +mass mBH and a star with mass m(cid:63). Detailed in Rose +et al. (2020), f1(e•) and f2(e•) account for the effect of +the eccentricity of the BH’s orbit about the SMBH on +the collision rate, while n and σ are simply evaluated +at the semimajor axis of the orbit (see below). Note +that this timescale equation includes the effects of grav- +itational focusing, which enhances the cross-section of +interaction. -3 Closer to the SMBH, ∆t may exceed the collision timescale by -a factor of a few for steep density profiles. We include a safe- -guard in our code which takes the ratio tcoll/∆t and rounds it -to the nearest integer. We take this integer to be the number of -collisions and increase the BH mass accordingly. +Assuming a circular orbit for simplicity, we plot the +timescale for a BH orbiting in the GN to collide with +a 1 M(cid:12) star as a function of distance from the SMBH 4 Rose et al. +2.3. Statistical Approach to Collisions + +We simulate the mass growth of a population of BHs +with initial conditions detailed in Section 2.1. Over an +increment ∆t of 106 yr, we calculate the probability of +a collision occurring, given by ∆t/tcoll. This choice of +∆t is motivated by our galactic center’s star formation +timescale (e.g., Lu et al. 2009), allowing for regular re- +plenishment of the stellar population in the GN. We have +checked that the results are not sensitive to this choice +of ∆t, omitted here to avoid clutter. We draw a number +between 0 and 1 using a random number generator. If +that number is less than or equal to the probability, we +increase the BH’s mass by ∆m, the mass that the BH is +expected to accrete in a single collision (see Section 2.4 +for details). We recalculate the collision timescale using +the updated BH mass and repeat this process until the +time elapsed equals the simulation time of 10 Gyr3. + 2.4. Mass Growth When a BH collides with a star, it may accrete ma- @@ -400,15 +414,19 @@ passing through the star’s center. We begin by con- sidering the escape velocity from the BH at the star’s outermost point, its surface, which corresponds to the maximum impact parameter 1 R(cid:12). Qualitatively, one -might expect that the BH could accrete the entire star +might expect that the BH could capture the entire star (i.e., ∆m ∼ 1 M(cid:12)) if the relative velocity is smaller than the escape velocity from the BH at this point. However, in the vicinity of the SMBH, the dispersion velocity of the stars may be much larger than the escape velocity from the BH at the star’s surface. In this case, the BH -accretes a “tunnel” of material through the star. This +captures a “tunnel” of material through the star. This tunnel has radius equal to the Bondi radius and length -approximately 1 R(cid:12). +approximately 1 R(cid:12). For the purposes of this study, we +assume that the BH accretes all of the material that +it captures. The details of the accretion are uncertain, +however, and it may be much less efficient than our re- +sults imply. We discuss accretion in Section 2.5. To estimate ∆m, we begin with the Bondi-Hoyle ac- @@ -429,6 +447,25 @@ s + σ2)3/2 (5) +3 Closer to the SMBH, ∆t may exceed the collision timescale by +a factor of a few for steep density profiles. We include a safe- +guard in our code which takes the ratio tcoll/∆t and rounds it +to the nearest integer. We take this integer to be the number of +collisions and increase the BH mass accordingly. + +Figure 2. We consider an example that highlights the mass +growth as a function of distance from the SMBH. Grey dots +represent the initial masses and distances from the SMBH +of the BHs involved in the simulation. For simplicity, we set +the inital mass equal to 10 M(cid:12) for all of the BHs. Assuming +the density profile of stars has α = 1, we consider two cases: +BHs accrete all of the star’s mass during a collision (red) and +only a portion of the star’s mass is accreted during a collision +given by Eq. 6 (blue). The latter case results in less growth +closer to the SMBH where the velocity dispersion becomes +high. The shaded regions and dashed lines represent the +analytical predictions detailed in Section 2.4. + where cs is the speed of sound in the star and ρstar is its density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima et al. 1985; Edgar 2004, see latter for a review). We @@ -441,13 +478,14 @@ a lower limit on ∆m. To find ∆m, at each collision, we have: ∆m = min( ˙m × t(cid:63),cross, 1 M(cid:12)) , + +(6) + where t(cid:63),cross ∼ R(cid:12)/σ is the crossing time of the BH in the star. We take the minimum between ˙m × t(cid:63),cross and 1 M(cid:12) because the BH cannot accrete more mass than one star at each collision. -(6) - Figure 2 juxtaposes the expected growth using Bondi- Hoyle-Lyttleton accretion (blue small points) with a much simpler model in which the BH accretes the star’s @@ -462,21 +500,12 @@ sult is exponential growth (see discussion and details surrounding Eq. (8)). In Figure 2, however, the simula- tions assume α = 1 for the stellar density profile, ensur- ing the collision timescale is long compared to the sim- -ulation time, 10 Gyr. Therefore, the BHs grow slowly, -Figure 2. We consider an example that highlights the mass -growth as a function of distance from the SMBH. Grey dots -represent the initial masses and distances from the SMBH -of the BHs involved in the simulation. For simplicity, we set -the inital mass equal to 10 M(cid:12) for all of the BHs. Assuming -the density profile of stars has α = 1, we consider two cases: -BHs accrete all of the star’s mass during a collision (red) and -only a portion of the star’s mass is accreted during a collision -given by Eq. 6 (blue). The latter case results in less growth -closer to the SMBH where the velocity dispersion becomes -high. The shaded regions and dashed lines represent the -analytical predictions detailed in Section 2.4. + IMBH Formation in Galactic Nuclei + +5 +ulation time, 10 Gyr. Therefore, the BHs grow slowly, and their final masses can be approximated using the following equation: @@ -514,11 +543,6 @@ star’s mass. Eq. 7 does not apply for other values of α. When the collision timescale is shorter, corresponding to a larger index α in the density profile (see Figure 1), the growth - - IMBH Formation in Galactic Nuclei - -5 - is very efficient and ∆m quickly approaches 1 M(cid:12). Con- sequently, while we can now assume ∆m = 1 M(cid:12), we can no longer assume the collision timescale is constant. @@ -533,7 +557,41 @@ where A = σ2Rstar/G and C = 2πGnstarRstar/σ. As an example, we plot this curve in purple for the α = 2 case, in Figure 3, which agrees with the simulated masses. -2.5. GW Inspiral +et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang +et al. 2014; McKinney et al. 2014; Narayan et al. 2022). +Heuristically, if a collision between a BH and a star re- +sults in an accretion disk, the disk’s viscous timescale +may be as low as days. The resultant luminosity can +unbind most of the captured material, though details +such as the amount accreted and peak luminosity re- +main uncertain (e.g., Yuan et al. (2012); Jiang et al. +(2014), see also the discussion in Stone et al. (2017), +Rizzuto et al. (2022), and Kremer et al. (2022)). The +question becomes whether or not a BH can still accu- +mulate significant amounts of mass over many collisions +even if it accretes very little in a single one. We ex- +plore the viability of our channel using a physically mo- +tivated inefficient accretion model. Several studies have +invoked momentum-driven winds in BH accretion (e.g., +Murray et al. 2005; Ostriker et al. 2010; Brennan et al. +2018). We thus estimate the fraction of captured mass +accreted to be approximately vesc/(cη), where vesc is +the escape velocity from the BH at 1 R(cid:12) and η is the +accretion efficiency at the ISCO. We take η to be 0.1 +(e.g., Yu & Tremaine 2002). This expression for the +fraction accreted is consistent with Kremer et al. (2022) +equation 19 for s = 0.5, which is a reasonable value for +s, a free parameter between 0.2 and 0.8. We discuss +the results of the momentum-driven winds estimate in +Section 3. We note that the accretion process may be +more efficient than this estimate implies if, for example, +jets or other instabilities result in the beaming of radi- +ation away from the captured material (e.g., Blandford +& Znajek 1977; Begelman 1979; De Villiers et al. 2005; +McKinney & Gammie 2004; McKinney 2006; Igumen- +shchev 2008; Begelman 2012a,b; McKinney et al. 2014). + +2.6. GW Inspiral When a BH is close to the SMBH, GW emission can circularize and shrink its orbit. We implement the ef- @@ -542,8 +600,20 @@ eccentricity following Peters & Mathews (1963a). The characteristic timescale to merge a BH with an SMBH is given by: +2.5. Uncertainties in Accretion + tGW ≈ 2.9 × 1012 yr +We note that the ∆M calculated in this proof-of- +concept study assumes that the BH accretes all of the +material that it captures. Estimating the true fraction +of the material accreted by the BH is very challeng- +ing; this complex problem requires numerically solving +the generalized GR fluid equations with cooling, heat- +ing, and radiative transfer, etc. and remains an active +field of research (e.g., Blandford & Begelman 1999; Park +& Ostriker 2001; Narayan et al. 2003; Igumenshchev + (cid:18) M• 106 M(cid:12) @@ -553,7 +623,7 @@ tGW ≈ 2.9 × 1012 yr 106 M(cid:12) (cid:19)4 -10−4 pc +10−2 pc (cid:19)−1 @@ -572,15 +642,24 @@ f (e•) is between 0.979 and 1.81 (Blaes et al. 2002). We plot this timescale for a 1 × 105 M(cid:12) BH in Figure 1 in blue. -In our simulations, we assume a BH has merged with -the SMBH when the condition tGW < telapsed is met. -When this condition is satisfied, we terminate mass -growth through collisions for that BH.4 + 6 -2.6. IMBH growth +Rose et al. -As detailed above, BH-stellar collisions can increase -the BH masses as a function of time. Here, we examine +Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to +cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass +of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and +merger times of these BHs. + +In our simulations, we assume a BH has merged with +the SMBH when the condition tGW < telapsed is met. +When this condition is satisfied, we terminate mass +growth through collisions for that BH.4 + +2.7. IMBH growth + +As detailed above, BH-stellar collisions can increase +the BH masses as a function of time. Here, we examine the sensitivity of the BH growth to the density power law. From Eq. (1), it is clear that the growth rate de- pends on the stellar density profile, governed by the in- @@ -589,13 +668,6 @@ profiles, will result in more efficient mass growth. In Figure 1, larger values of α lead to collision timescales in the GN’s inner region, inwards of 0.25 pc, that are - -4 For comparison, we also incrementally changed the semimajor -axis and eccentricity from GW emission following the equations -in Peters & Mathews (1963b). This method leads to a slight -increase in the final IMBH masses because it accounts for the -collisions that take place while the orbit is gradually shrinking. - much smaller that the 10 Gyr simulation time. Figure 3 confirms this expectation. It depicts the mass growth of a uniform distribution of BHs with initial conditions de- @@ -603,26 +675,33 @@ tailed in Section 2.1 for five α values, spanning 1 (green) to 2 (purple). The most massive IMBHs form inwards of 0.25 pc for the α = 2 case. -2.7. Gravitational Wave Mergers and Intermediate +2.8. Gravitational Wave Mergers and Intermediate and Extreme Mass Ratio Inspiral Candidates Towards the SMBH, efficient collisions can create BHs massive enough to merge with the SMBH through GWs. -Following the method detailed in Section 2.5, when a +Following the method detailed in Section 2.6, when a given BH meets the criterion tGW < telapsed, we mark + +4 For comparison, we also incrementally changed the semimajor +axis and eccentricity from GW emission following the equations +in Peters & Mathews (1963b). This method leads to a slight +increase in the final IMBH masses because it accounts for the +collisions that take place while the orbit is gradually shrinking. + it as merged with the SMBH. We assume that at this point the dynamics of the BH will be determined by GW emission, shrinking and circularizing the BHs orbit un- til it undergoes an extreme or intermediate mass ratio inspiral (EMRI and IMRI, respectively). The righthand plot in Figure 3 shows the BH masses versus time of -merger. It is interesting to note that even in the ab- +merger. sence of relaxation processes, which are often invoked to explain the formation of EMRIs, EMRIs and notably IMRIs can form in this region. -2.8. Two Body Relaxation Processes +2.9. Two Body Relaxation Processes A BH orbiting the SMBH experiences weak gravita- tional interactions with other objects in the GN. Over a @@ -648,27 +727,31 @@ its orbital energy and angular momentum by order of themselves. The BH experiences diffusion in its angular momentum and energy as a function of time (depending on the eccentricity of the orbit, this process can be more -efficient Fragione & Sari 2018; Sari & Fragione 2019). In -Figure 1, we plot the relaxation timescale in gold for a -range of α. We note that the Bahcall & Wolf (1976) pro- -file, α = 7/4, corresponds to zero net flux and therefore -does not preferentially migrate objects inward. - -Additionally, because they are more massive on -average than the surrounding objects, BHs are ex- -pected to segregate inwards in the GN (e.g., Shapiro -& Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; -Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004). +efficient Fragione & Sari 2018; Sari & Fragione 2019). +Relaxation can cause the orbit of an object in a GN to +reach high eccentricities. If the object is a BH, it can +spiral into the SMBH and form an EMRI, while a star - 6 - -Rose et al. + IMBH Formation in Galactic Nuclei -Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to -cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass -of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and -merger times of these BHs. +7 +can be tidally disrupted by the SMBH (e.g. Magorrian +& Tremaine 1999; Wang & Merritt 2004; Hopman & +Alexander 2005; Aharon & Perets 2016; Stone & Met- +zger 2016; Amaro-Seoane 2018; Sari & Fragione 2019; +Naoz et al. 2022). The relaxation process is therefore +crucial to our study. In Figure 1, we plot the relaxation +timescale in gold for a range of α. We note that the Bah- +call & Wolf (1976) profile, α = 7/4, corresponds to zero +net flux and therefore does not preferentially migrate +objects inward. + +Additionally, because BHs are more massive on av- +erage than the surrounding objects, they are expected +to segregate inwards in the GN (e.g., Shapiro & +Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; +Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004). They sink toward the SMBH on the mass segregation timescale, tseg ≈ (cid:104)M∗(cid:105)/mBH × trelax (e.g., Spitzer 1987; Fregeau et al. 2002; Merritt 2006), which is typically an @@ -686,8 +769,8 @@ of zero and a standard deviation of ∆vrlx/ ∆vrlx = v• approach to changes in the angular momentum). The new orbital parameters can be calculated following Lu -& Naoz (2019), and see Naoz et al. in prep for full set -of equations. +& Naoz (2019), and see Naoz et al. (2022) for the full +set of equations. √ @@ -714,6 +797,7 @@ the BHs to begin diffusing over a relaxation timescale, their orbital parameters changing slowly through a ran- dom process. In this random process, some of the BHs may migrate closer to the SMBH. We terminate mass + growth when the BH enters the inner 200 au of the GN, within which the density of stars is uncertain. This cut- off is based on the 120 au pericenter of S0-2, the closest @@ -738,7 +822,7 @@ gion of the GN increases, allowing them to dominate the scattering. We reserve the inclusion of these interactions for future study. -2.9. Effect of Relaxation Processes +2.10. Effect of Relaxation Processes As depicted in Figure 4, two-body relaxation processes result in more EMRIs and IMRIs events. These pro- @@ -746,20 +830,6 @@ cesses allow BHs that begin further from the SMBH to migrate inwards and grow more efficiently in mass. However, it also impedes the growth of BHs that are initially closer to the SMBH by allowing them to dif- - - IMBH Formation in Galactic Nuclei - -7 - -Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance (red) -for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. We -assume α = 1.75 for the GN density profile. Faded stars represent BHs that merged with the SMBH. As a result of inward -migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, more -BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses for two -different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation processes. -The dashed, faded lines represent the corresponding initial histograms. We assume α = 1.75 for the GN density profile. Faded -stars represent BHs that merged with the SMBH. - fuse out of the inner region where collisions are efficient. As can be seen in Figure 4, the net result is that more BHs grow, but the maximum mass is lower compared @@ -767,8 +837,8 @@ to the scenario that ignores two-body relaxation. The histogram in Figure 4 presents the final BH mass distri- butions for different power law indices α. As expected, the two-body relaxation suppresses the α dependence -In fact, using a KS test, we highlighted in Figure 3. +In fact, using a KS test, we find that we cannot reject the hypothesis that the two distributions were drawn from the same sample for the α = 1.75 and α = 2 results. Interestingly, a BH mass @@ -784,8 +854,21 @@ BH and main-sequence stars. Taking both a statisti- cal and analytic approach, we show that this channel can produce IMBHs efficiently with masses as high as 103−4 M(cid:12) and may result in many IMBH-SMBH merg- -ers (intermediate-mass ratio inspiral, IMRIs) and EM- -RIs. +ers (intermediate-mass ratio inspirals, or IMRIs) and +EMRIs. + + 8 + +Rose et al. + +Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance +(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. +We assume α = 1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward +migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, +more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses +for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation +processes. We also show the results for a simulation with α = 1.75 that accounts for momentum-driven winds (black, dotted). +Despite the substantially reduced accretion, BHs in the mass gap still form. As the stellar mass BH collides with a star, the BH will grow in mass. The increase may equal star’s en- @@ -793,12 +876,11 @@ tire mass if the relative velocity is smaller than the es- cape velocity from the BH at 1 R(cid:12). However, near the SMBH, the velocity dispersion may be larger than the escape velocity from the BH at the star’s radius. In this -limit, the BH accretes a “tunnel” of material through +limit, the BH captures a “tunnel” of material through the star, estimated using Bondi-Hoyle-Lyttleton accre- tion. In our statistical analysis, we account for Bondi- Hoyle-Lyttleton accretion and find that BHs outside of - -10−2 pc from the SMBH can accrete the entire star (see +10−2 pc from the SMBH can capture the entire star (see Figure 2). The efficiency of collisions, and therefore IMBH, @@ -816,6 +898,31 @@ mass, but their maximum mass is smaller (∼ 104 M(cid:12)). Additionally, the final masses have no apparent depen- dence on distance from the SMBH (see Figure 4). +Most simulations in our study assume that the BHs +accrete all of the mass that they capture. The final BH +masses can be taken as an upper limit. We note that +the accretion is a highly uncertain process and repre- +sents an active field of study (e.g., Blandford & Begel- +man 1999; Park & Ostriker 2001; Narayan et al. 2003; +Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan +et al. 2012; Jiang et al. 2014; McKinney et al. 2014; +Narayan et al. 2022). To assess the limits of our model, + +we also consider a physically motivated accretion model, +momentum-driven winds (Section 2.5). We present the +final mass distribution for momentum-driven winds in +Importantly, we find that BHs within the +Figure 4. +mass gap still form naturally despite the substantially +reduced accretion. About 5% of the BHs grow by 10 +to 100 M(cid:12). Furthermore, if we increase this ∆M esti- +mate by a factor of 2 (i.e., use η = 0.05), the simula- +tion produces a 3.5 × 103 M(cid:12) IMBH for the same initial +conditions. Our proof-of-concept demonstrates that col- +lisions between BH and stars are an important process +that should be taken into account in dense places such +as a GN. + Mass growth through BH-main-sequence star colli- sions may act in concert with other IMBH formation channels, such as compact object binary mergers (e.g., @@ -834,21 +941,94 @@ Kozai Lidov mechanism, leaving behind a single star or a single compact object (e.g., Stephan et al. 2016, 2019; Hoang et al. 2018). Additionally, to be susceptible to evaporation, BH binaries must have a wider configura- -tion. Otherwise, they will be more tightly bound that +tion. Otherwise, they will be more tightly bound than +the average kinetic energy of the surrounding objects +and will only harden through weak gravitational inter- - 8 + IMBH Formation in Galactic Nuclei -Rose et al. +9 -the average kinetic energy of the surrounding objects, -and will only harden through weak gravitational inter- actions with neighboring stars (see for example Figure 6 in Rose et al. 2020). -Not included in this study, collisions between the BH -and other compact objects will increase the BH growth -rate. BH-BH mergers (e.g., O’Leary et al. 2009; Fra- -gione et al. 2021) and even neutron star BH mergers +We note that we assume a steady-state and treat the +stars as a reservoir in this model. Future work will take a +more nuanced approach to the background stars, whose +density as a function of time can be influenced by several +factors. Firstly, the relaxation of the stellar population +occurs on Gyr timescales. Some studies have suggested +that in situ star formation can occur in the Galactic +Center as close as 0.04 pc from the SMBH (e.g., Levin +& Beloborodov 2003; Paumard et al. 2006), and star +formation episodes can occur as often as every ∼ 5 Myr +(e.g. Lu et al. 2009). Therefore, we expect that after +the first Gyr, stars within (cid:46) 0.01 pc will be replenished +at intervals consistent with the star formation episodes; +the infalling populations of stars are separated by ∼ +5 −10 Myr, which is shorter than the collision timescale. +However, star-star collisions may complicate this pic- +ture within ∼ 0.01 pc. As discussed above, regular star +formation ensures the BHs always have a stellar popula- +tion to interact with outside of ∼ 0.01 pc.5 At 0.01 pc, +however, the kinetic energy during a collision between +two 1 M(cid:12) stars is larger than their binding energies. +Collisions can therefore thin out the stellar populations +during the time it takes them to diffuse to these small +radii, (cid:46) 0.01 pc, and may reduce the BH growth in the +innermost region. We reserve the inclusion of star-star +collisions for future work. We also note that the disrup- +tion of binary stars by the SMBH may help replenish +the stellar population even as collisions work to deplete +it (e.g., Balberg et al. 2013); when a binary is disrupted, +one of the stars is captured on a tightly bound orbit +about the SMBH. + +An IMBH may also affect the stellar density profile. +As it spirals into the SMBH, it can perturb stellar orbits, +and these interactions can lead to hypervelocity stars +(e.g., Baumgardt et al. 2006a; L¨ockmann & Baumgardt +2008). L¨ockmann & Baumgardt (2008) show that an +IMBH can modify an initially steep stellar density pro- +file to become consistent with the flatter cusp observed +in the Galactic Center. The stars may then be replen- +ished on 100 Myr timescales (Baumgardt et al. 2006a). +Therefore, after the formation of the first few IMBHs, +subsequent BH growth may occur in bursts, coinciding +with replenishment of the stars. + +While there are many competing dynamical processes +that shape the stellar density profile, we stress that α + +5 In fact, the star-star collision timescale is greater than 10 Myr +for the entire parameter space, save at 0.001 pc for larger values +of α; the BH-star collision timescale plotted in Fig. 1 is the same +order of magnitude as the star-star collision timescale. + +can simply be chosen to encapsulate all of the relevant +physics. A value for α that is constrained by observa- +tions must already reflect ongoing processes like star- +star collisions and replenishment. Sch¨odel et al. (2018) +find the observed stellar mass enclosed within 0.01 pc of +the Milky Way’s Galactic Center to be approximately +180 M(cid:12). This estimate is consistent to order of magni- +tude with our α = 1.25 case. In a simulation like those +depicted in Figure 4, which include relaxation, α = 1.25 +leads to a maximum IMBH mass of 140 M(cid:12). Further- +more, while the stellar mass within 0.01 pc may be a +few hundred M(cid:12), Do et al. (2019) and GRAVITY Col- +laboration et al. (2020) set an upper limit on the mass +enclosed within the orbit of S0-2 to be about a few thou- +sand M(cid:12), or 0.1% of the central mass. This upper limit +can include mass that was previously in stars but is now +in BHs. In that case, the 180 M(cid:12) is what remains of the +stars, while BHs and IMBHs make up the ∼ 1000 M(cid:12) +in the innermost region. + +Also not included in this study, collisions between the +BH and other compact objects will increase the BH +growth rate. BH-BH mergers (e.g., O’Leary et al. 2009; +Fragione et al. 2021) and even neutron star BH mergers (e.g., Hoang et al. 2020) become more likely as the BHs increase in mass through stellar collisions. As a result, the BH-BH collision timescale, discussed in Section 2.2, @@ -857,63 +1037,72 @@ BHs to grow through this channel in addition to stel- lar collisions. Additionally, this compact object mergers result in GW recoil, which may have a large impact on the dynamics (e.g., Baibhav et al. 2020; Fragione et al. -2021) +2021). The BH’s mass growth increases GW emission, which -dissipates energy from the orbit. Along with relaxation -processes, GW emission causes BHs to sink towards the -SMBH and eventually undergo a merger. As a result, -the GN environment is conducive to the formation of -EMRIs and IMRIs. The GW emission from EMRIs and -IMRIs is expected to be at mHz frequencies, making -them promising candidates for LISA to observe. While -the exact rate calculation is beyond the scope of this -study, the mechanism outlined here seems very promis- -ing. - -Our results also suggest that IMBHs are likely to ex- -ists in many galactic nuclei, as well as within our own -galactic center. This implication seems to be consis- -tent with recent observational and theoretical studies -(e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004; -G¨urkan & Rasio 2005; Gualandris & Merritt 2009; Chen -& Liu 2013; Generozov & Madigan 2020; Fragione et al. -2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY -Collaboration et al. 2020). +dissipates energy from the orbit. Along with relaxation, +GW emission causes BHs to sink towards the SMBH +and eventually undergo a merger. As a result, the GN +environment is conducive to the formation of EMRIs +and IMRIs. The GW emission from EMRIs and IM- +RIs is expected to be at mHz frequencies, making them +promising candidates for LISA to observe. While the +exact rate calculation is beyond the scope of this study, +the mechanism outlined here seems very promising. + +Our results also suggest that BHs within the mass gap +as well as IMBHs likely exist in many galactic nuclei, as +well as within our own galactic center. This implication +seems to be consistent with recent observational and +theoretical studies (e.g., Hansen & Milosavljevi´c 2003; +Maillard et al. 2004; G¨urkan & Rasio 2005; Gualandris +& Merritt 2009; Chen & Liu 2013; Generozov & Madi- +gan 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz +et al. 2020; GRAVITY Collaboration et al. 2020). + + 10 + +Rose et al. Lastly, the collisions between stellar mass BHs and stars may contribute to the x-ray emission from our -galactic centre (e.g., Muno et al. 2005, 2009; Hailey et al. -2018; Zhu et al. 2018; Cheng et al. 2018)5. These inter- -actions, in particular grazing collisions, may also result -in tidal disruption events (e.g., Perets et al. 2016; Sam- -sing et al. 2019; Kremer et al. 2021). Thus, the process -outlined here may produce electromagnetic signatures -in addition to GW mergers. - -SR thanks the Charles E Young fellowship, the Nina +galactic centre (e.g., Muno et al. 2005, 2009; Hailey +et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kre- +mer et al. (2022) for a discussion of electromagnetic sig- +natures from BH-star collisions)6. These interactions, +in particular grazing collisions, may also result in tidal +disruption events (e.g., Baumgardt et al. 2006b; Perets +et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kre- +mer et al. 2021). Thus, the process outlined here may +produce electromagnetic signatures in addition to GW +mergers. + +We thank the anonymous referee for useful comments. +We also thank Jessica Lu, Fred Rasio, Kyle Kremer, +Ryosuke Hirai, Ilya Mandel, and Erez Michaely for use- +ful discussion. + +SR thanks the Charles E. Young Fellowship, the Nina Byers Fellowship, and the Michael A. Jura Memorial Graduate Award for support. SR and SN acknowledge the partial support from NASA ATP 80NSSC20K0505. SN thanks Howard and Astrid Preston for their gener- ous support. IL thanks support from the Adams Fellow- ship. SN and RS thank the Bhaumik Institute visitor -program. - -REFERENCES +program. This work was performed in part at the As- +pen Center for Physics, which is supported by National +Science Foundation grant PHY-1607611. Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016, -Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102, +Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. -PhRvL, 116, 241102, +REFERENCES -043002, doi: 10.1103/PhysRevD.102.043002 +PhRvL, 116, 241102, doi: 10.1103/PhysRevLett.116.241102 -Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ, - —. 2017a, PhRvL, 118, 221101, doi: 10.1103/PhysRevLett.118.221101 @@ -922,182 +1111,299 @@ doi: 10.1103/PhysRevLett.118.221101 doi: 10.1103/PhysRevLett.119.141101 -613, 1143, doi: 10.1086/423299 +Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1, -Begelman, M. C., Volonteri, M., & Rees, M. J. 2006, - -MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x - -Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ, +doi: 10.3847/2041-8205/830/1/L1 Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129 -890, 113, doi: 10.3847/1538-4357/ab6d77 - Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, doi: 10.1088/0004-637X/780/2/148 -Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. +Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4, -2021, arXiv e-prints, arXiv:2109.12119. +doi: 10.1007/s41114-018-0013-8 + +6 The connection between the observed X-ray sources at the Galac- +tic Center and tidal capture has been suggested by Generozov +et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for +alternative channels. +2021, arXiv e-prints, arXiv:2109.12119. https://arxiv.org/abs/2109.12119 Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214, doi: 10.1086/154711 -—. 2020b, ApJ, 890, 113, doi: 10.3847/1538-4357/ab6d77 +Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102, + +043002, doi: 10.1103/PhysRevD.102.043002 + +Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26, + +doi: 10.1093/mnrasl/slt071 + +Baumgardt, H., Gualandris, A., & Portegies Zwart, S. + +2006a, MNRAS, 372, 174, +doi: 10.1111/j.1365-2966.2006.10818.x + +Baumgardt, H., Hopman, C., Portegies Zwart, S., & + +Makino, J. 2006b, MNRAS, 372, 467, +doi: 10.1111/j.1365-2966.2006.10885.x + +Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ, + +613, 1143, doi: 10.1086/423299 + +Begelman, M. C. 1979, MNRAS, 187, 237, + +doi: 10.1093/mnras/187.2.237 + +—. 2012a, ApJL, 749, L3, doi: 10.1088/2041-8205/749/1/L3 + + IMBH Formation in Galactic Nuclei + +11 + +—. 2012b, MNRAS, 420, 2912, +doi: 10.1111/j.1365-2966.2011.20071.x + +Begelman, M. C., Volonteri, M., & Rees, M. J. 2006, + +Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, + +MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783 +Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T. + +MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x +Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ, + +C. N. 2021a, MNRAS, 505, 2186, +doi: 10.1093/mnras/stab1428 + +890, 113, doi: 10.3847/1538-4357/ab6d77 + +Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt, + +—. 2020b, ApJ, 890, 113, doi: 10.3847/1538-4357/ab6d77 Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R. +T. C. N. 2021b, MNRAS, 503, 1051, +doi: 10.1093/mnras/stab402 + 2009, New Journal of Physics, 11, 105016, doi: 10.1088/1367-2630/11/10/105016 Binney, J., & Tremaine, S. 1987, Galactic dynamics - —. 2008, Galactic Dynamics: Second Edition - Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775, doi: 10.1086/342655 +De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S. + +2005, ApJ, 620, 878, doi: 10.1086/427142 + +Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019, + +MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453 + +Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021, +MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390 + +Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303, + +Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664, + +L1, doi: 10.1046/j.1365-8711.1999.02358.x + +doi: 10.1126/science.aav8137 + +Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433, + +Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL, + +doi: 10.1093/mnras/179.3.433 + +562, L19, doi: 10.1086/338118 + Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642, -5 The connection between the observed X-ray sources at the Galac- -tic Center and tidal capture has been suggested by Generozov -et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for -alternative channels. +Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL, 427, doi: 10.1086/500727 Bondi, H. 1952, MNRAS, 112, 195, doi: 10.1093/mnras/112.2.195 - IMBH Formation in Galactic Nuclei +110, 221101, doi: 10.1103/PhysRevLett.110.221101 -9 +Edgar, R. 2004, NewAR, 48, 843, + +doi: 10.1016/j.newar.2004.06.001 Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273, -Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ, +Escala, A. 2021, ApJ, 908, 57, doi: 10.1093/mnras/104.5.273 -649, 91, doi: 10.1086/506193 +doi: 10.3847/1538-4357/abd93c Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469, -Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137, +Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014, 2042, doi: 10.1093/mnras/stx1007 -doi: 10.3847/1538-4357/ab94bc +Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ, + +Monthly Notices of the Royal Astronomical Society, 443, +2410, doi: 10.1093/mnras/stu1280 + +860, 14, doi: 10.3847/1538-4357/aac2c4 + +Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891, Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger, -Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker, +L31, doi: 10.3847/2041-8213/ab77c9 C. 2012, JCAP, 2012, 054, doi: 10.1088/1475-7516/2012/07/054 -J. P. 2018, MNRAS, 478, 4030, -doi: 10.1093/mnras/sty1262 - Centrella, J., Baker, J. G., Kelly, B. J., & van Meter, J. R. -Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of +Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021, + +arXiv e-prints, arXiv:2107.04639. +https://arxiv.org/abs/2107.04639 2010, Reviews of Modern Physics, 82, 3069, doi: 10.1103/RevModPhys.82.3069 -Modern Physics, 82, 3121, -doi: 10.1103/RevModPhys.82.3121 +Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a, + +ApJ, 897, 46, doi: 10.3847/1538-4357/ab94b2 Chen, X., & Liu, F. K. 2013, ApJ, 762, 95, -Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812, +Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902, doi: 10.1088/0004-637X/762/2/95 -doi: 10.1086/377127 +L26, doi: 10.3847/2041-8213/abbc0a Cheng, Z., Li, Z., Xu, X., & Li, X. 2018, ApJ, 858, 33, -Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ, +Fragione, G., & Sari, R. 2018, ApJ, 852, 51, doi: 10.3847/1538-4357/aaba16 -620, 744, doi: 10.1086/427175 +doi: 10.3847/1538-4357/aaa0d7 Choi, J.-H., Shlosman, I., & Begelman, M. C. 2015, MNRAS, 450, 4411, doi: 10.1093/mnras/stv694 -Gond´an, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ, - -860, 5, doi: 10.3847/1538-4357/aabfee - Christensen-Dalsgaard, J., Dappen, W., Ajukov, S. V., -Gonz´alez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL, - et al. 1996, Science, 272, 1286, doi: 10.1126/science.272.5266.1286 Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087, +Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., & + +Rasio, F. A. 2004, MNRAS, 352, 1, +doi: 10.1111/j.1365-2966.2004.07914.x + +Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., & + +Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576 +Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ, + doi: 10.1086/156685 -908, L29, doi: 10.3847/2041-8213/abdf5b +649, 91, doi: 10.1086/506193 -GRAVITY Collaboration, Abuter, R., Amorim, A., et al. -2020, A&A, 636, L5, doi: 10.1051/0004-6361/202037813 +Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424, -Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361, +Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137, -Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, +doi: 10.1111/j.1365-2966.2005.09937.x -doi: 10.1088/0004-637X/705/1/361 +doi: 10.3847/1538-4357/ab94bc -MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783 -Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019, +Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M. -G¨urkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL, +Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker, -640, L39, doi: 10.1086/503295 +2009, MNRAS, 393, 1016, +doi: 10.1111/j.1365-2966.2008.14254.x -MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453 +J. P. 2018, MNRAS, 478, 4030, +doi: 10.1093/mnras/sty1262 -G¨urkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236, + 12 -Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021, -MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390 +Rose et al. -doi: 10.1086/430694 +Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of -Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature, +Modern Physics, 82, 3121, +doi: 10.1103/RevModPhys.82.3121 -Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL, +Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020, +MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276 +Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33, -556, 70, doi: 10.1038/nature25029 +Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812, -110, 221101, doi: 10.1103/PhysRevLett.110.221101 +doi: 10.1086/376675 -Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593, +doi: 10.1086/377127 -Edgar, R. 2004, NewAR, 48, 843, +Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, -doi: 10.1016/j.newar.2004.06.001 +Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ, -Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014, +doi: 10.3847/1538-4365/aacb24 -Monthly Notices of the Royal Astronomical Society, 443, -2410, doi: 10.1093/mnras/stu1280 +620, 744, doi: 10.1086/427175 -Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891, +Gond´an, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ, -L31, doi: 10.3847/2041-8213/ab77c9 +860, 5, doi: 10.3847/1538-4357/aabfee + +Gonz´alez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL, + +908, L29, doi: 10.3847/2041-8213/abdf5b + +GRAVITY Collaboration, Abuter, R., Amorim, A., et al. +2020, A&A, 636, L5, doi: 10.1051/0004-6361/202037813 + +Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361, + +doi: 10.1088/0004-637X/705/1/361 + +G¨urkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL, + +640, L39, doi: 10.1086/503295 + +G¨urkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236, + +doi: 10.1086/430694 + +Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature, + +556, 70, doi: 10.1038/nature25029 + +Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593, L77, doi: 10.1086/378182 @@ -1111,84 +1417,76 @@ Hoang, B.-M., Naoz, S., Kocsis, B., Rasio, F. A., & Dosopoulou, F. 2018, ApJ, 856, 140, doi: 10.3847/1538-4357/aaafce -Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021, - Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8, -arXiv e-prints, arXiv:2107.04639. -https://arxiv.org/abs/2107.04639 - doi: 10.3847/1538-4357/abb66a -Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the - -Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a, - -ApJ, 897, 46, doi: 10.3847/1538-4357/ab94b2 - -Royal Astronomical Society, 374, 1557, -doi: 10.1111/j.1365-2966.2006.11275.x +Hopman, C., & Alexander, T. 2005, ApJ, 629, 362, -Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902, +doi: 10.1086/431475 -Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104, +Igumenshchev, I. V. 2008, ApJ, 677, 317, -L26, doi: 10.3847/2041-8213/abbc0a +doi: 10.1086/529025 -doi: 10.3847/1538-4357/abeb14 +Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A. -Fragione, G., & Sari, R. 2018, ApJ, 852, 51, +2003, ApJ, 592, 1042, doi: 10.1086/375769 -Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, +Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796, -doi: 10.3847/1538-4357/aaa0d7 - -45, doi: 10.3847/1538-4357/abb945 +106, doi: 10.1088/0004-637X/796/2/106 -Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., & - -Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, +Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the -Rasio, F. A. 2004, MNRAS, 352, 1, -doi: 10.1111/j.1365-2966.2004.07914.x +Royal Astronomical Society, 374, 1557, +doi: 10.1111/j.1365-2966.2006.11275.x -Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., & +—. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 +L¨ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323, -doi: 10.3847/1538-4365/aacb24 +doi: 10.1111/j.1365-2966.2007.12699.x -—. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506, -Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576 - doi: 10.1093/mnras/stz036 - 10 - -Rose et al. - Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ, -Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., & - 690, 1463, doi: 10.1088/0004-637X/690/2/1463 - Madau, P., & Rees, M. J. 2001, ApJL, 551, L27, doi: 10.1086/319848 +Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447, + +doi: 10.1046/j.1365-8711.1999.02853.x + Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F. 2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147 - Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda, M., & Artale, M. C. 2021a, arXiv e-prints, arXiv:2109.06222. https://arxiv.org/abs/2109.06222 - Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b, MNRAS, 505, 339, doi: 10.1093/mnras/stab1334 +Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B. +2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409 + +McKinney, J. C. 2006, MNRAS, 368, 1561, +doi: 10.1111/j.1365-2966.2006.10256.x + +McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977, + +doi: 10.1086/422244 + +McKinney, J. C., Tchekhovskoy, A., Sadowski, A., & + +Narayan, R. 2014, MNRAS, 441, 3177, +doi: 10.1093/mnras/stu762 + Merritt, D. 2006, Reports on Progress in Physics, 69, 2513, doi: 10.1088/0034-4885/69/9/R01 @@ -1198,7 +1496,6 @@ Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847, doi: 10.1086/317837 Morris, M. 1993, ApJ, 408, 496, doi: 10.1086/172607 - Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL, 622, L113, doi: 10.1086/429721 @@ -1206,28 +1503,89 @@ Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL, Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110 +Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ, + +Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., & + +618, 569, doi: 10.1086/426067 + +Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368. +https://arxiv.org/abs/2201.12368 + +Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104, + +doi: 10.3847/1538-4357/abeb14 + +Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927, + +L18, doi: 10.3847/2041-8213/ac574b Naoz, S., & Silk, J. 2014, ApJ, 795, 102, doi: 10.1088/0004-637X/795/2/102 -Rasio, F. A. 2018, PhRvL, 120, 151101, -doi: 10.1103/PhysRevLett.120.151101 +Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, -Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016, +Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885, -PhRvD, 93, 084029, doi: 10.1103/PhysRevD.93.084029 -Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019, +45, doi: 10.3847/1538-4357/abb945 -Phys. Rev. D, 100, 043027, -doi: 10.1103/PhysRevD.100.043027 +L35, doi: 10.3847/2041-8213/ab4fed -Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904, + IMBH Formation in Galactic Nuclei -113, doi: 10.3847/1538-4357/abc557 +13 + +Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL, + +888, L8, doi: 10.3847/2041-8213/ab5e3b + +Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., & + +Curd, B. 2022, MNRAS, 511, 3795, +doi: 10.1093/mnras/stac285 + +Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A. + +2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69 + +Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005, + +ApJ, 628, 368, doi: 10.1086/430728 + +O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395, + +2127, doi: 10.1111/j.1365-2966.2009.14653.x + +O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N., + +& O’Shaughnessy, R. 2006, ApJ, 637, 937, +doi: 10.1086/498446 + +Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga, + +D. 2010, ApJ, 722, 642, +doi: 10.1088/0004-637X/722/1/642 + +Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100, + +doi: 10.1086/319042 + +Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643, + +1011, doi: 10.1086/503273 + +Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek, + +Stephen R., J. 2016, ApJ, 823, 113, +doi: 10.3847/0004-637X/823/2/113 Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., & Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213. https://arxiv.org/abs/2009.01213 +Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017, + +MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044 + Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD, 100, 043009, doi: 10.1103/PhysRevD.100.043009 @@ -1245,48 +1603,29 @@ Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63, doi: 10.1086/519309 -Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603, +Sch¨odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A, -Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885, +609, A27, doi: 10.1051/0004-6361/201730452 -doi: 10.1086/156521 +Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603, -L35, doi: 10.3847/2041-8213/ab4fed +doi: 10.1086/156521 Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985, -Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL, - MNRAS, 217, 367, doi: 10.1093/mnras/217.2.367 -888, L8, doi: 10.3847/2041-8213/ab5e3b - -O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395, - -2127, doi: 10.1111/j.1365-2966.2009.14653.x - Shlosman, I., Choi, J.-H., Begelman, M. C., & Nagamine, K. 2016, MNRAS, 456, 500, doi: 10.1093/mnras/stv2700 Sigurdsson, S., & Phinney, E. S. 1993, ApJ, 415, 631, -O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N., +Peters, P. C., & Mathews, J. 1963a, Physical Review, 131, doi: 10.1086/173190 -& O’Shaughnessy, R. 2006, ApJ, 637, 937, -doi: 10.1086/498446 - -Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek, - -Stephen R., J. 2016, ApJ, 823, 113, -doi: 10.3847/0004-637X/823/2/113 - -Peters, P. C., & Mathews, J. 1963a, Physical Review, 131, - 435, doi: 10.1103/PhysRev.131.435 - —. 1963b, Physical Review, 131, 435, doi: 10.1103/PhysRev.131.435 @@ -1301,22 +1640,53 @@ Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL, 528, L17, doi: 10.1086/312422 —. 2002, ApJ, 576, 899, doi: 10.1086/341798 - Rashkov, V., & Madau, P. 2014, ApJ, 780, 187, doi: 10.1088/0004-637X/780/2/187 +Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640, + +A56, doi: 10.1051/0004-6361/202037710 + +Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022, + +MNRAS, doi: 10.1093/mnras/stac231 + +Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., & + +Rasio, F. A. 2018, PhRvL, 120, 151101, +doi: 10.1103/PhysRevLett.120.151101 + +Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016, + +PhRvD, 93, 084029, doi: 10.1103/PhysRevD.93.084029 +Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019, + +Phys. Rev. D, 100, 043027, +doi: 10.1103/PhysRevD.100.043027 + Spera, M., & Mapelli, M. 2017a, MNRAS, 470, 4739, doi: 10.1093/mnras/stx1576 —. 2017b, MNRAS, 470, 4739, doi: 10.1093/mnras/stx1576 + Spitzer, L. 1987, Dynamical evolution of globular clusters + Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv e-prints. https://arxiv.org/abs/1603.02709 —. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d + +Stone, N. C., K¨upper, A. H. W., & Ostriker, J. P. 2017, + +MNRAS, 467, 4180, doi: 10.1093/mnras/stx097 + +Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859, + +doi: 10.1093/mnras/stv2281 + The LIGO Scientific Collaboration, the Virgo Collaboration, Abbott, R., et al. 2020a, arXiv e-prints, @@ -1328,6 +1698,7 @@ https://arxiv.org/abs/2009.01190 Umbreit, S., Fregeau, J. M., Chatterjee, S., & Rasio, F. A. 2012, ApJ, 750, 31, doi: 10.1088/0004-637X/750/1/31 + Valiante, R., Schneider, R., Volonteri, M., & Omukai, K. 2016, Monthly Notices of the Royal Astronomical @@ -1335,36 +1706,50 @@ Society, 457, 3356, doi: 10.1093/mnras/stw225 Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit, -Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640, +Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904, -A56, doi: 10.1051/0004-6361/202037710 +113, doi: 10.3847/1538-4357/abc557 G. N. 2021, MNRAS, 504, 146, doi: 10.1093/mnras/stab842 - IMBH Formation in Galactic Nuclei + 14 -11 +Rose et al. Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., & -Breivik, K. 2021, ApJ, 917, 76, +Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. +Breivik, K. 2021, ApJ, 917, 76, doi: 10.3847/1538-4357/ac088d +Wang, J., & Merritt, D. 2004, ApJ, 600, 149, + +doi: 10.1086/379767 + Woosley, S. E. 2017, ApJ, 836, 244, -Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. +doi: 10.3847/1538-4357/836/2/244 + +Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965, + +doi: 10.1046/j.1365-8711.2002.05532.x + +Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129, + +doi: 10.1088/0004-637X/761/2/129 + 2014, Monthly Notices of the Royal Astronomical + Society, 440, 1263, doi: 10.1093/mnras/stu351 Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints, + arXiv:2011.04653. https://arxiv.org/abs/2011.04653 Zhu, Z., Li, Z., & Morris, M. R. 2018, ApJS, 235, 26, -doi: 10.3847/1538-4357/836/2/244 - doi: 10.3847/1538-4365/aab14f \ No newline at end of file diff --git a/read/results/pdfminer/GeoTopo-book.txt b/read/results/pdfminer/GeoTopo-book.txt index bb02786..40a246b 100644 --- a/read/results/pdfminer/GeoTopo-book.txt +++ b/read/results/pdfminer/GeoTopo-book.txt @@ -51,19 +51,19 @@ in „Analysis I“ vermittelt. ), Mengenschreibweisen ( , -∩ +∪ , -∪ +∩ P ∃ -∅ - \ +∅ + , , R, @@ -648,10 +648,10 @@ und T = 0, 1, 2 -} - { +} + 0 , @@ -7537,10 +7537,10 @@ R) → -(cid:55)→ - → +(cid:55)→ + × I @@ -8097,12 +8097,12 @@ e → -→ - (cid:55)→ → +→ + ist nicht injektiv. π1(S1, 1) ∼= @@ -8855,10 +8855,10 @@ z Abbildungen. -} - (cid:107) +} + ∈ 1) f1 := idR ist eine offene und stetige Abbildung. 2) g(x) := e2πix ist eine offene, aber keine stetige Abbildung (vgl. Abbildung 1.5). @@ -9034,10 +9034,10 @@ Vi Vj = -∀ - ∃ +∀ + ∈ ∈ @@ -9079,10 +9079,10 @@ yi ∩ -} - { +} + . ∅ @@ -10188,10 +10188,10 @@ von y. ⊆ -U ein Homöomorphismus. Dann ist W := f −1(V ) - Y +U ein Homöomorphismus. Dann ist W := f −1(V ) + × ∈ @@ -11446,6 +11446,8 @@ Hi, B ∈ { +1, 2 + ∈ ∈ @@ -11457,8 +11459,6 @@ H2 in zwei nichtleere Teilmengen H1, H2, sodass = j. -1, 2 - g i @@ -12447,10 +12447,10 @@ g = (cid:4) -{ - } +{ + ⇒ ⇒ @@ -13660,17 +13660,17 @@ Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht. 1 − -4 +2 -0 +1 3 -5 +0 -1 +4 -2 +5 x @@ -16703,12 +16703,12 @@ y3 (cid:43) - -   + + (cid:42)  @@ -17773,10 +17773,10 @@ phismus. H, x -(cid:55)→ - → +(cid:55)→ + 3) Sei X ein topologischer Raum. Dann ist idX ein Homöomorphismus. Da keine Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Grup- penhomomorphismus. @@ -18229,12 +18229,12 @@ g. Da aber x f und f -∈ - ∩ ∈ +∈ + (cid:107) ∈ diff --git a/read/results/pdfplumber/1601.03642.txt b/read/results/pdfplumber/1601.03642.txt index cd8e8fd..c7d8790 100644 --- a/read/results/pdfplumber/1601.03642.txt +++ b/read/results/pdfplumber/1601.03642.txt @@ -1,84 +1,166 @@ 1 -Creativity in Machine Learning w -x 0 +Creativity in Machine Learning +Martin Thoma +E-Mail: info@martin-thoma.de +Abstract—Recent machine learning techniques can be modified +to produce creative results. Those results did not exist before; it +is not a trivial combination of the data which was fed into the +machine learning system. The obtained results come in multiple +forms: As images, as text and as audio. +This paper gives a high level overview of how they are created +and gives some examples. It is meant to be a summary of the +current work and give people who are new to machine learning +some starting points. +I. INTRODUCTION +According to [Gad06] creativity is “the ability to use your +imagination to produce new ideas, make things etc.” and +imagination is “the ability to form pictures or ideas in your +mind”. +Recentadvancesinmachinelearningproduceresultswhichthe +author would intuitively call creative. A high-level overview +over several of those algorithms are described in the following. +This paper is structured as follows: Section II introduces the +reader on a very simple and superficial level to machine +learning, Section III gives examples of creativity with images, +Section IV gives examples of machines producing textual +content, and Section V gives examples of machine learning +and music. A discussion follows in Section VI. +II. BASICSOFMACHINELEARNING +The traditional approach of solving problems with software +is to program machines to do so. The task is divided in as +simple sub-tasks as possible, the subtasks are analyzed and the +machineisinstructedtoprocesstheinputwithhuman-designed +algorithms to produce the desired output. However, for some +taskslikeobjectrecognitionthisapproachisnotfeasible.There +are way to many different objects, different lighting situations, +variations in rotation and the arrangement of a scene for a +human to think of all of them and model them. But with the +internet, cheap computers, cameras, crowd-sourcing platforms +like Wikipedia and lots of Websites, services like Amazon +Mechanical Turk and several other changes in the past decades +alotofdatahasbecomeavailable.Theideaofmachinelearning +is to make use of this data. +A formal definition of the field of Machine Learning is given +by Tom Mitchel [Mit97]: +A computer program is said to learn from experi- +ence E with respect to some class of tasks T and +performance measure P, if its performance at tasks +inT,asmeasuredbyP,improveswithexperienceE. +Σ ϕ +x +0 +x +1 +x 2 +x +3 +x +n +w 0 w -x 1 1 w -Martin Thoma x 2 2 Σ ϕ +2 w -E-Mail: info@martin-thoma.de x 3 -.3 -. . wn -x -n -Abstract—Recent machine learning techniques can be modified (a) Exampleofanartificialneuronunit.(b) Avisualizationofasimplefeed- -to produce creative results. Those results did not exist before; it xiaretheinputsignalsandwiare forwardneuralnetwork.The5in- -is not a trivial combination of the data which was fed into the weightswhichhavetogetlearned. putnodesarered,the2biasnodes -machine learning system. The obtained results come in multiple Each input signal gets multiplied are gray, the 3 hidden units are -forms: As images, as text and as audio. with its weight, everything gets greenandthesingleoutputnode -summedupandtheactivationfunc- isblue. -This paper gives a high level overview of how they are created tionϕisapplied. -and gives some examples. It is meant to be a summary of the -current work and give people who are new to machine learning Fig. 1: Neural networks are based on simple units which get -some starting points. combined to complex networks. -I. INTRODUCTION This means that machine learning programs adjust internal 6102 +3 +wn +. +. . +(a) Exampleofanartificialneuronunit. +xiaretheinputsignalsandwiare +weightswhichhavetogetlearned. +Each input signal gets multiplied +with its weight, everything gets +summedupandtheactivationfunc- +tionϕisapplied. +(b) Avisualizationofasimplefeed- +forwardneuralnetwork.The5in- +putnodesarered,the2biasnodes +are gray, the 3 hidden units are +greenandthesingleoutputnode +isblue. +Fig. 1: Neural networks are based on simple units which get +combined to complex networks. +This means that machine learning programs adjust internal parameters to fit the data they are given. Those computer -According to [Gad06] creativity is “the ability to use your programs are still developed by software developers, but the -imagination to produce new ideas, make things etc.” and developer writes them in a way which makes it possible to -imagination is “the ability to form pictures or ideas in your adjust them without having to re-program everything. Machine naJ -mind”. learning programs should generally improve when they are fed +programs are still developed by software developers, but the +developer writes them in a way which makes it possible to +adjust them without having to re-program everything. Machine +learning programs should generally improve when they are fed with more data. -Recentadvancesinmachinelearningproduceresultswhichthe 21 -author would intuitively call creative. A high-level overview The field of machine learning is related to statistics. Some -over several of those algorithms are described in the following. algorithms directly try to find models which are based on well- -]VC.sc[ +The field of machine learning is related to statistics. Some +algorithms directly try to find models which are based on well- known distribution assumptions of the developer, others are -This paper is structured as follows: Section II introduces the more general. -reader on a very simple and superficial level to machine -learning, Section III gives examples of creativity with images, A common misunderstanding of people who are not related -Section IV gives examples of machines producing textual in this field is that the developers don’t understand what their -content, and Section V gives examples of machine learning machine learning program is doing. It is understood very well -and music. A discussion follows in Section VI. in the sense that the developer, given only a pen, lots of paper 1v24630.1061:viXra +A common misunderstanding of people who are not related +in this field is that the developers don’t understand what their +machine learning program is doing. It is understood very well +in the sense that the developer, given only a pen, lots of paper and a calculator could calculate the same result as the machine doeswhenhegetsthesamedata.Andlotsoftime,ofcourse.It -II. BASICSOFMACHINELEARNING isnotunderstoodinthesensethatitishardtomakepredictions how the algorithm behaves without actually trying it. However, -The traditional approach of solving problems with software this is similar to expecting from an electrical engineer to -is to program machines to do so. The task is divided in as explain how a computer works. The electrical engineer could -simple sub-tasks as possible, the subtasks are analyzed and the probably get the knowledge he needs to do so, but the amount -machineisinstructedtoprocesstheinputwithhuman-designed of time required to understand such a complex system from -algorithms to produce the desired output. However, for some basic building blocks is a time-intensive and difficult task. -taskslikeobjectrecognitionthisapproachisnotfeasible.There -are way to many different objects, different lighting situations, An important group of machine learning algorithms was -variations in rotation and the arrangement of a scene for a inspired by biological neurons and are thus called artificial -human to think of all of them and model them. But with the neural networks. Those networks are based on mathematical -internet, cheap computers, cameras, crowd-sourcing platforms functions called artificial neurons which take n ∈ N num- -like Wikipedia and lots of Websites, services like Amazon bers x ,...,x ∈ R as input, multiply them with weights -1 n -Mechanical Turk and several other changes in the past decades w ,...,w ∈ R, add them and apply a so called activation -1 n -alotofdatahasbecomeavailable.Theideaofmachinelearning function ϕ as visualized in Figure 1(a). One example of such -is to make use of this data. an activation function is the sigmoid function ϕ(x)= 1 . -1+e−x +An important group of machine learning algorithms was +inspired by biological neurons and are thus called artificial +neural networks. Those networks are based on mathematical +functions called artificial neurons which take n ∈ N num- +bers x 1,...,x +n +∈ R as input, multiply them with weights +w 1,...,w +n +∈ R, add them and apply a so called activation +function ϕ as visualized in Figure 1(a). One example of such +an activation function is the sigmoid function ϕ(x)= 1 1+e−x. Those functions act as building blocks for more complex -A formal definition of the field of Machine Learning is given systems as they can be chained and grouped in layers as -by Tom Mitchel [Mit97]: visualized in Figure 1(b). The interesting question is how -A computer program is said to learn from experi- the parameters w are learned. This is usually done by an +the parameters w +i +are learned. This is usually done by an +optimization technique called gradient descent. The gradient +descent algorithm takes a function which has to be derivable, +starts at any point of the surface of this error function and +a +r +X i -ence E with respect to some class of tasks T and optimization technique called gradient descent. The gradient -performance measure P, if its performance at tasks descent algorithm takes a function which has to be derivable, -inT,asmeasuredbyP,improveswithexperienceE. starts at any point of the surface of this error function and +v +: +1 +6 +0 +1 +. +0 +3 +6 +4 +2 +v +1 +[ +c +s +. +C +V +] +1 +2 +J +a +n +2 +0 +1 +6 2 makes a step in the direction which goes downwards. Hence it tries to find a minimum of this high-dimensional function. @@ -92,7 +174,7 @@ One would take one neuron per pixel and channel. This means for 500px×500px RGB images one would get 750,000 input signals. To approach this problem, so called Convolutional Neural Networks (CNNs) were introduced. Instead of learning -the full connection between the input layer and the first Fig. 2: Aurelia aurita +the full connection between the input layer and the first hidden layer, those networks make use of convolution layers. Convolution layers learn a convolution; this means they learn the weights of an image filter. An additional advantage is that @@ -106,35 +188,78 @@ effect it has on the recognition system is difficult to estimate. [MOT15] proposes a technique to analyze the weights learned by such a network. A similar idea was applied by [VKMT13]. For example, consider a neural network which was trained to -recognize various images like bananas. This technique turns Fig. 3: DeepDream impression of Aurelia aurita +recognize various images like bananas. This technique turns the network upside down and starts with random noise. To analyze what the network considers bananas to look like, the -random noise image is gradually tweaked so that it generates Ithasbecomefamousintheinternet[Red].Usually,theimages -theoutput“banana”.Additionally,thechangescanberestricted are generated in iterations and in each iteration it is zoomed -inawaythatthestatisticsoftheinputimagehavetobesimilar into the image. -to natural images. One example of this is that neighboring Images and videos published by the Google engineers can be -pixels are correlated. seenat[goo15].Figure2showstheoriginalimagefromwhich -Figure 3 was created with the deep dream algorithm. +random noise image is gradually tweaked so that it generates +theoutput“banana”.Additionally,thechangescanberestricted +inawaythatthestatisticsoftheinputimagehavetobesimilar +to natural images. One example of this is that neighboring +pixels are correlated. Another technique is to amplify the output of layers. This was described in [MOT15]: Weaskthenetwork:“Whateveryouseethere,Iwant -B. Artistic Style Imitation more of it!” This creates a feedback loop: if a cloud looks a little bit like a bird, the network will make -it look more like a bird. This in turn will make the A key idea of neural networks is that they learn different -network recognize the bird even more strongly on representations of the data in each layer. In the case of -the next pass and so forth, until a highly detailed CNNs, this can easily be visualized as it was done in various -bird appears, seemingly out of nowhere. papers [ZF14]. Usually, one finds that the network learned -to build edge detectors in the first layer and more complex +it look more like a bird. This in turn will make the +network recognize the bird even more strongly on +the next pass and so forth, until a highly detailed +bird appears, seemingly out of nowhere. The name “Inceptionism” in the title of [MOT15] comes from -structures in the upper layers. the science-fiction movie “Inception” (2010). One reason it -might be chosen is because neural networks are structured Gatys,EckerandBethgeshowedin[GEB15]thatwithaclever -in layers. Recent publications tend to have more and more choice of features it is possible to separate the general style of -layers [HZRS15]. The used jargon is to say they get “deeper”. an image in terms of local image appearance from the content -As this technique as published by Google engineers, the of an image. They support their claim by applying the style of -technique is called Google DeepDream. different artists to an arbitrary image of their choice. +might be chosen is because neural networks are structured +in layers. Recent publications tend to have more and more +layers [HZRS15]. The used jargon is to say they get “deeper”. +As this technique as published by Google engineers, the +technique is called Google DeepDream. +Fig. 2: Aurelia aurita +Fig. 3: DeepDream impression of Aurelia aurita +Ithasbecomefamousintheinternet[Red].Usually,theimages +are generated in iterations and in each iteration it is zoomed +into the image. +Images and videos published by the Google engineers can be +seenat[goo15].Figure2showstheoriginalimagefromwhich +Figure 3 was created with the deep dream algorithm. +B. Artistic Style Imitation +A key idea of neural networks is that they learn different +representations of the data in each layer. In the case of +CNNs, this can easily be visualized as it was done in various +papers [ZF14]. Usually, one finds that the network learned +to build edge detectors in the first layer and more complex +structures in the upper layers. +Gatys,EckerandBethgeshowedin[GEB15]thatwithaclever +choice of features it is possible to separate the general style of +an image in terms of local image appearance from the content +of an image. They support their claim by applying the style of +different artists to an arbitrary image of their choice. 3 +(a) OriginalImage (b) Styleimage +(c) TheartisticstyleofVanGogh’s“StarryNight”appliedtothephotograph +ofaScottishHighlandCattle. +Fig. 4: The algorithm takes both, the original image and the +style image to produce the result. +This artistic style imitation can be seen itself as creative work. +An example is given by Figure 4. The code which created this +example is available under [Joh16]. +Something similar was done by [SPB+14], where the style of +a portrait photograph was transferred to another photograph. +A demo can be seen on [Shi14]. +C. Drawing Robots +PatrickTressetandFrdricFolLeymariecreatedasystemcalled +AIKON (Automatic IKONic drawing) which can automatically +generated sketches for portraits [TL05]. AIKON takes a digital +photograph, detects faces on them and sketches them with a +pen-plotter. +Tresset and Leymaire use k-means clustering [KMN+02] to +segment regions of the photograph with similar color which, +in turn, will get a similar shading. +Such a drawing robot could apply machine learning techniques +known from computer vision for detecting the human. It +could apply self-learning techniques to draw results most +similar to the artists impression of the image. However, the +system described in [TL05] seems not to be a machine +learning computer program according to the definition by Tom +Mitchell [Mit97]. IV. TEXTDATA Digital text is the first form of natural communication which involved computers. It is used in the form of chats, websites, @@ -142,7 +267,7 @@ on collaborative projects like Wikipedia, in scientific literature. Of course, it was used in pre-digital times, too: In newspaper, in novels, in dramas, in religious texts like the bible, in books for education, in notes from conversations. -(a) OriginalImage (b) Styleimage This list could be continued and most of these kinds of texts +This list could be continued and most of these kinds of texts are now available in digital form. This digital form can be used to teach machines to generate similar texts. The most simple language model which is of use is an n-gram @@ -158,208 +283,249 @@ Networks (RNNs). Those character predictors take a sequence of characters as input and predict the next character. In that sense they are similar to the n-gram model, but operate on a lower level. Using such a predictor, one can generate texts -(c) TheartisticstyleofVanGogh’s“StarryNight”appliedtothephotograph -ofaScottishHighlandCattle. character by character. If the model is good, the text can have +character by character. If the model is good, the text can have the correct punctuation. This would not be possible with a -Fig. 4: The algorithm takes both, the original image and the word predictor. -style image to produce the result. +word predictor. Character predictors can be implemented with RNNs. In con- trast to standard feed-forward neural networks like multilayer -This artistic style imitation can be seen itself as creative work. Perceptrons (MLPs) which was shown in Figure 1(b), those -An example is given by Figure 4. The code which created this networksaretrainedtotaketheiroutputatsomepointaswellas -example is available under [Joh16]. the normal input. This means they can keep some information +Perceptrons (MLPs) which was shown in Figure 1(b), those +networksaretrainedtotaketheiroutputatsomepointaswellas +the normal input. This means they can keep some information over time. One of the most common variant to implement -Something similar was done by [SPB+14], where the style of RNNs is by using so called Long short-term memory (LSTM) -a portrait photograph was transferred to another photograph. cells [HS97]. -A demo can be seen on [Shi14]. +RNNs is by using so called Long short-term memory (LSTM) +cells [HS97]. Recurrentnetworksapplytwomainideasinordertolearn:The first is called unrolling and means that an recurrent network is imagined to be an infinite network over time. At each time -C. Drawing Robots step the recurrent neurons get duplicated. The second idea is weight sharing which means that those unrolled neurons share -PatrickTressetandFrdricFolLeymariecreatedasystemcalled the same weight. -AIKON (Automatic IKONic drawing) which can automatically -generated sketches for portraits [TL05]. AIKON takes a digital -photograph, detects faces on them and sketches them with a A. Similar Texts Generation -pen-plotter. -Tresset and Leymaire use k-means clustering [KMN+02] to KarpathytrainedmultiplecharacterRNNsondifferentdatasets -segment regions of the photograph with similar color which, and gave an excellent introduction [Kar15b]. He trained it on -in turn, will get a similar shading. Paul Graham’s essays, all the works of Shakespeare, the Hutter +KarpathytrainedmultiplecharacterRNNsondifferentdatasets +and gave an excellent introduction [Kar15b]. He trained it on +Paul Graham’s essays, all the works of Shakespeare, the Hutter Prize [hut] 100MB dataset of raw Wikipedia articles, the raw -Such a drawing robot could apply machine learning techniques LATEXsourcefileofabookaboutalgebraicstacksandgeometry -known from computer vision for detecting the human. It and Linux C code. -could apply self-learning techniques to draw results most -similar to the artists impression of the image. However, the With that training data, the models can generate similar texts. -system described in [TL05] seems not to be a machine New works which look like Shakespeare plays, new Wikipedia -learning computer program according to the definition by Tom articles, new Linux code and new papers about algebraic -Mitchell [Mit97]. geometry can thus automatically be generated. At a first +With that training data, the models can generate similar texts. +New works which look like Shakespeare plays, new Wikipedia +articles, new Linux code and new papers about algebraic +geometry can thus automatically be generated. At a first 4 -glance, they do look authentic. The syntax was mostly used we will now investigate the work which was done in audio -correctly, the formatting looks as expected, the sentences are synthesization. +glance, they do look authentic. The syntax was mostly used +correctly, the formatting looks as expected, the sentences are grammaticallycorrect.However,whenonelooksatthebroader context it is easy to recognize that the algorithm has no insight -A. Emily Howell in what it is doing. It does match patterns really well, but it fails to follow a central theme. In the context of C code this -David Cope created a project called “Experiments in Musical means that new variables are introduced, but not used. At the -Intelligence” (short: EMI or Emmy) in 1984 [Cop87]. He same time, variables which were not declared are used. In -introduces the idea of seeing music as a language which the context of Shakespear plays this means that a lot of new -can be analyzed with natural language processing (NLP) characters are introduced, but they don’t speak with each other -methods. Cope mentions that EMI was more useful to him, or about each other. -when he used the system to “create small phrase-size textures -The code used to generate these examples is available and as next possibilities using its syntactic dictionary and rule -ready to use through [Kar15a]. A couple of examples are base” [Cop87]. +The code used to generate these examples is available and +ready to use through [Kar15a]. A couple of examples are in Section A. -In 2003, Cope started a new project which was based on EMI: -Emily Howell [Cop13]. This program is able to “creat[e] both -highly authentic replications and novel music compositions”. B. Chatbots -Thereadermightwanttolistento[Cop12]togetanimpression -of the beauty of the created music. Chatbots are computer programs which participate in chat -rooms as autonomous agents. This means they have similar According to Cope, an essential part of music is “a set of -permissions and possibilities as usual human users have, but instructions for creating different, but highly related self- -users can trigger a set of commands to make the bot give them replications”. Emmy was programmed to find this set of -valuable information or features. instructions. It tries to find the “signature” of a composer, -which Cope describes as “contiguous patterns that recur in two +rooms as autonomous agents. This means they have similar +permissions and possibilities as usual human users have, but +users can trigger a set of commands to make the bot give them +valuable information or features. A special category of chatbots are such bots which actively -or more works of the composer”. participate in the conversation, which is usually not the case. -One of the earliest programs in this category is ELIZA, a bot The new feature of Emily Howell compared to Emmy is that -created by Joseph Weizenbaum in the 1960s [Wei76]. This Emily Howell does not necessarily remain in a single, already -program had a set of patterns implemented to which it would known style. +One of the earliest programs in this category is ELIZA, a bot +created by Joseph Weizenbaum in the 1960s [Wei76]. This +program had a set of patterns implemented to which it would reply in a seemingly smart way in a psychologists fashion. -Emily Howell makes use of association network. Cope empha- This means quite often the program would simply repeat the -sizes that this is not a form of a neural network. However, it last sentence and add something meaningless like “How do -is not clear from [Cop13] how exactly an association network you feel about it?”. According to [Cur14], Weizenbaum once -is trained. Cope mentions that Emily Howell is explained in found his secretary — who was aware of the fact that this is -detail in [Cop05]. a computer program — chatting with the machine. When he looked over her shoulder, she asked him “would you mind leaving the room”. -B. GRUV Today, much more sophisticated chatbots exist. They make use -Recurrent neural networks — LSTM networks, to be exact of the vast amount of data which is available by millions of -— are used in [NV15] together with Gated Recurrent Units Wikipediaarticles,chatprotocols,websites,helpdeskprotocols, -(GRU) to build a network which can be trained to generate subtitles of movies as well as the astonishing increase in -music. Instead of taking notes directly or MIDI files, Nayebi computing power to train RNNs and language models similar -and Vitelli took raw audio waveforms as input. Those audio to the ones described before. +Interesting results like the following were obtained by [VL15]: +Human: what is the purpose of life ? +Machine: to serve the greater good . +Human: what is the purpose of living ? +Machine: to live forever . +V. AUDIODATA +Common machine learning tasks which involve audio data +are speech recognition, speaker identification, identification of +songs. This leads to some less-common, but interesting topics: +The composition of music, the synthesizing of audio as art. +While the composition might be considered in Section IV, +we will now investigate the work which was done in audio +synthesization. +A. Emily Howell +David Cope created a project called “Experiments in Musical +Intelligence” (short: EMI or Emmy) in 1984 [Cop87]. He +introduces the idea of seeing music as a language which +can be analyzed with natural language processing (NLP) +methods. Cope mentions that EMI was more useful to him, +when he used the system to “create small phrase-size textures +as next possibilities using its syntactic dictionary and rule +base” [Cop87]. +In 2003, Cope started a new project which was based on EMI: +Emily Howell [Cop13]. This program is able to “creat[e] both +highly authentic replications and novel music compositions”. +Thereadermightwanttolistento[Cop12]togetanimpression +of the beauty of the created music. +According to Cope, an essential part of music is “a set of +instructions for creating different, but highly related self- +replications”. Emmy was programmed to find this set of +instructions. It tries to find the “signature” of a composer, +which Cope describes as “contiguous patterns that recur in two +or more works of the composer”. +The new feature of Emily Howell compared to Emmy is that +Emily Howell does not necessarily remain in a single, already +known style. +Emily Howell makes use of association network. Cope empha- +sizes that this is not a form of a neural network. However, it +is not clear from [Cop13] how exactly an association network +is trained. Cope mentions that Emily Howell is explained in +detail in [Cop05]. +B. GRUV +Recurrent neural networks — LSTM networks, to be exact +— are used in [NV15] together with Gated Recurrent Units +(GRU) to build a network which can be trained to generate +music. Instead of taking notes directly or MIDI files, Nayebi +and Vitelli took raw audio waveforms as input. Those audio waveformsarefeaturevectorsgivenfortimesteps0,1,...,t− -Interesting results like the following were obtained by [VL15]: 1,t. The network is given those feature vectors X ,...,X -1 t -and has to predict the following feature vector X . This -Human: what is the purpose of life ? t+1 +1,t. The network is given those feature vectors X 1,...,X +t +and has to predict the following feature vector X t+1. This means it continues the music. As the input is continuous, the -Machine: to serve the greater good . problem was modeled as a regression task. Discrete Fourier -Human: what is the purpose of living ? Transformation (DFT) was used on chunks of length N of the -Machine: to live forever . music to obtain features in the frequency domain. Animplementationcanbefoundat[VN15]andademonstration -V. AUDIODATA can be found at [Vit15]. -Common machine learning tasks which involve audio data -are speech recognition, speaker identification, identification of C. Audio Synthesization -songs. This leads to some less-common, but interesting topics: -The composition of music, the synthesizing of audio as art. Audio synthesization is generating new audio files. This can -While the composition might be considered in Section IV, eitherbemusicorspeech.Withthetechniquesdescribedbefore, +C. Audio Synthesization +Audio synthesization is generating new audio files. This can +eitherbemusicorspeech.Withthetechniquesdescribedbefore, 5 -neural networks can be trained to generate music note by note. [Joh15a] D. Johnson, “Biaxial recurrent neural network for music -However, it is desirable to allow multiple notes being played composition,” GitHub, Aug. 2015. [Online]. Available: https: -//github.com/hexahedria/biaxial-rnn-music-composition +neural networks can be trained to generate music note by note. +However, it is desirable to allow multiple notes being played at the same time. +ThisideaandsomeotherswereappliedbyDanielJohnson.He +wrote a very good introduction into neural networks for music +composition which explains those ideas [Joh15b]. Example +compositionsareavailablethere,too.Healsomadethecodefor +hisBiaxialRecurrentNeuralNetworkavailableunder[Joh15a]. +VI. DISCUSSION +What does these examples mean for our understanding of +creativity? Does it influence how much we value art? Could +wedefineartandcreativitybetterafterhavingthoseandsimilar +results? +I think we might readjust our understanding of creativity just +like we adjusted our understanding of algorithmically hard +problems after Deep Blue won against the reigning world +chess champion Garry Kasparov in 1997. +However,bynowitisobviousthatmachinelearningalgorithms +cannot compete with human artists. Today’s state of the art +algorithms which are purely based on machine learning don’t +follow a central theme. They lack the ability to plan. Although +clever algorithms were implemented for composing music, it +seems as if there is still a lot of supervision involved. +REFERENCES +[Cop87] D. Cope, “Experiments in music intelligence (emi),” 1987. +[Online].Available:http://hdl.handle.net/2027/spo.bbp2372.1987. +025 +[Cop05] ——, Computer models of musical creativity. MIT Press +Cambridge,2005. +[Cop12] ——, “Emily howell fugue,” YouTube, Oct. 2012. [Online]. +Available:https://www.youtube.com/watch?v=jLR- c uCwI +[Cop13] ——,“Thewell-programmedclavier:Styleincomputermusic +composition,” XRDS: Crossroads, The ACM Magazine for +Students, vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available: +http://dl.acm.org/citation.cfm?id=2460444 +[Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [On- +line].Available:http://www.bbc.co.uk/blogs/adamcurtis/entries/ +78691781-c9b7-30a0-9a0a-3ff76e8bfe58 +[Gad06] A.Gadsby,Ed.,DictionaryofContemporaryEnglish. Pearson +EducationLimited,2006. +[GEB15] L.A.Gatys,A.S.Ecker,andM.Bethge,“Aneuralalgorithmof +artisticstyle,”arXivpreprintarXiv:1508.06576,2015.[Online]. +Available:http://arxiv.org/abs/1508.06576 +[goo15] “Inceptionism: Going deeper into neural networks,” Google +Photos,Jun.2015.[Online].Available:https://goo.gl/Bydofw +[HS97] S.HochreiterandJ.Schmidhuber,“Longshort-termmemory,” +Neural computation, vol. 9, no. 8, pp. 1735–1780, 1997. +[Online].Available:http://ieeexplore.ieee.org/xpl/freeabs all.jsp? +arnumber=6795963 +[hut] “50’000europrizeforcompressinghumanknowledge.”[Online]. +Available:http://prize.hutter1.net/ +[HZRS15] K.He,X.Zhang,S.Ren,andJ.Sun,“Deepresiduallearning +forimagerecognition,”arXivpreprintarXiv:1512.03385,2015. +[Online].Available:http://arxiv.org/abs/1512.03385 +[Joh15a] D. Johnson, “Biaxial recurrent neural network for music +composition,” GitHub, Aug. 2015. [Online]. Available: https: +//github.com/hexahedria/biaxial-rnn-music-composition [Joh15b] ——, “Composing music with recurrent neu- -ThisideaandsomeotherswereappliedbyDanielJohnson.He ral networks,” Personal Blog, Aug. 2015. [On- -wrote a very good introduction into neural networks for music line]. Available: http://www.hexahedria.com/2015/08/03/ +ral networks,” Personal Blog, Aug. 2015. [On- +line]. Available: http://www.hexahedria.com/2015/08/03/ composing-music-with-recurrent-neural-networks/ -composition which explains those ideas [Joh15b]. Example -compositionsareavailablethere,too.Healsomadethecodefor [Joh16] J.Johnson,“neural-style,”GitHub,Jan.2016.[Online].Available: +[Joh16] J.Johnson,“neural-style,”GitHub,Jan.2016.[Online].Available: https://github.com/jcjohnson/neural-style -hisBiaxialRecurrentNeuralNetworkavailableunder[Joh15a]. [Kar15a] A.Karpathy,“char-rnn,”GitHub,Nov.2015.[Online].Available: https://github.com/karpathy/char-rnn -VI. DISCUSSION [Kar15b] ——, “The unreasonable effectiveness of recurrent neural networks,” Personal Blog, May 2015. [Online]. Available: -What does these examples mean for our understanding of http://karpathy.github.io/2015/05/21/rnn-effectiveness/ -creativity? Does it influence how much we value art? Could [KMN+02] T.Kanungo,D.Mount,N.Netanyahu,C.Piatko,R.Silverman, -wedefineartandcreativitybetterafterhavingthoseandsimilar andA.Wu,“Anefficientk-meansclusteringalgorithm:analysis +http://karpathy.github.io/2015/05/21/rnn-effectiveness/ +[KMN+02] T.Kanungo,D.Mount,N.Netanyahu,C.Piatko,R.Silverman, +andA.Wu,“Anefficientk-meansclusteringalgorithm:analysis andimplementation,”PatternAnalysisandMachineIntelligence, -results? IEEETransactionson,vol.24,no.7,pp.881–892,Jul2002. -I think we might readjust our understanding of creativity just [Mit97] T. M. Mitchell, Machine learning, ser. McGraw Hill series in -like we adjusted our understanding of algorithmically hard computerscience. McGraw-Hill,1997. -problems after Deep Blue won against the reigning world [MOT15] A. Mordvintsev, C. Olah, and M. Tyka, “Inceptionism: Going -chess champion Garry Kasparov in 1997. deeper into neural networks,” googleresearch.blogspot.co.uk, +[Mit97] T. M. Mitchell, Machine learning, ser. McGraw Hill series in +computerscience. McGraw-Hill,1997. +[MOT15] A. Mordvintsev, C. Olah, and M. Tyka, “Inceptionism: Going +deeper into neural networks,” googleresearch.blogspot.co.uk, Jun.2015.[Online].Available:http://googleresearch.blogspot.de/ -However,bynowitisobviousthatmachinelearningalgorithms 2015/06/inceptionism-going-deeper-into-neural.html -cannot compete with human artists. Today’s state of the art [Nie15] M. A. Nielsen, Neural Networks and Deep Learn- -algorithms which are purely based on machine learning don’t ing. Determination Press, 2015. [Online]. Avail- +2015/06/inceptionism-going-deeper-into-neural.html +[Nie15] M. A. Nielsen, Neural Networks and Deep Learn- +ing. Determination Press, 2015. [Online]. Avail- able: http://neuralnetworksanddeeplearning.com/chap6.html# -follow a central theme. They lack the ability to plan. Although introducing convolutional networks -clever algorithms were implemented for composing music, it [NV15] A.NayebiandM.Vitelli,“GRUV:Algorithmicmusicgeneration -seems as if there is still a lot of supervision involved. using recurrent neural networks,” 2015. [Online]. Available: http://cs224d.stanford.edu/reports/NayebiAran.pdf -REFERENCES [Red] “Deepdream,” Reddit. [Online]. Available: https://www.reddit. +[Red] “Deepdream,” Reddit. [Online]. Available: https://www.reddit. com/r/deepdream/ -[Cop87] D. Cope, “Experiments in music intelligence (emi),” 1987. [Shi14] Y. Shih, “Style transfer for headshot portraits,” YouTube, Jun. -[Online].Available:http://hdl.handle.net/2027/spo.bbp2372.1987. 2014. [Online]. Available: https://www.youtube.com/watch?v= -025 Hj5lGFzlubU -[Cop05] ——, Computer models of musical creativity. MIT Press [SPB+14] Y. Shih, S. Paris, C. Barnes, W. T. Freeman, and F. Durand, -Cambridge,2005. “Style transfer for headshot portraits,” ACM Transactions on +[Shi14] Y. Shih, “Style transfer for headshot portraits,” YouTube, Jun. +2014. [Online]. Available: https://www.youtube.com/watch?v= +Hj5lGFzlubU +[SPB+14] Y. Shih, S. Paris, C. Barnes, W. T. Freeman, and F. Durand, +“Style transfer for headshot portraits,” ACM Transactions on Graphics(TOG),vol.33,no.4,p.148,2014.[Online].Available: -[Cop12] ——, “Emily howell fugue,” YouTube, Oct. 2012. [Online]. http://dl.acm.org/citation.cfm?id=2601137 -Available:https://www.youtube.com/watch?v=jLR- c uCwI +http://dl.acm.org/citation.cfm?id=2601137 [TL05] P.TressetandF.F.Leymarie,“Generativeportraitsketching,”in -[Cop13] ——,“Thewell-programmedclavier:Styleincomputermusic ProceedingsofVSMM,2005,pp.739–748. -composition,” XRDS: Crossroads, The ACM Magazine for -Students, vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available: [Vit15] M. Vitelli, “Algorithmic music generation with recurrent -http://dl.acm.org/citation.cfm?id=2460444 neural networks,” YouTube, Jun. 2015. [Online]. Available: +ProceedingsofVSMM,2005,pp.739–748. +[Vit15] M. Vitelli, “Algorithmic music generation with recurrent +neural networks,” YouTube, Jun. 2015. [Online]. Available: https://youtu.be/0VTI1BBLydE -[Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [On- -line].Available:http://www.bbc.co.uk/blogs/adamcurtis/entries/ [VKMT13] C. Vondrick, A. Khosla, T. Malisiewicz, and A. Torralba, -78691781-c9b7-30a0-9a0a-3ff76e8bfe58 “Hoggles: Visualizing object detection features,” in Computer +[VKMT13] C. Vondrick, A. Khosla, T. Malisiewicz, and A. Torralba, +“Hoggles: Visualizing object detection features,” in Computer Vision(ICCV),2013IEEEInternationalConferenceon. IEEE, -[Gad06] A.Gadsby,Ed.,DictionaryofContemporaryEnglish. Pearson 2013, pp. 1–8. [Online]. Available: http://ieeexplore.ieee.org/ -EducationLimited,2006. xpls/abs all.jsp?arnumber=6751109 -[GEB15] L.A.Gatys,A.S.Ecker,andM.Bethge,“Aneuralalgorithmof [VL15] O. Vinyals and Q. Le, “A neural conversational model,” -artisticstyle,”arXivpreprintarXiv:1508.06576,2015.[Online]. arXivpreprintarXiv:1506.05869,Jul.2015.[Online].Available: -Available:http://arxiv.org/abs/1508.06576 http://arxiv.org/abs/1506.05869v2 -[goo15] “Inceptionism: Going deeper into neural networks,” Google [VN15] M. Vitelli and A. Nayebi, “GRUV,” Aug. 2015. [Online]. -Photos,Jun.2015.[Online].Available:https://goo.gl/Bydofw Available:https://github.com/MattVitelli/GRUV -[HS97] S.HochreiterandJ.Schmidhuber,“Longshort-termmemory,” [Wei76] J. Weizenbaum, Computer Power and Human Reason: From -Neural computation, vol. 9, no. 8, pp. 1735–1780, 1997. JudgementtoCalculation. W.H.Freeman&CoLtd,1976. -[Online].Available:http://ieeexplore.ieee.org/xpl/freeabs all.jsp? -arnumber=6795963 [ZF14] M.D.ZeilerandR.Fergus,“Visualizingandunderstandingcon- +2013, pp. 1–8. [Online]. Available: http://ieeexplore.ieee.org/ +xpls/abs all.jsp?arnumber=6751109 +[VL15] O. Vinyals and Q. Le, “A neural conversational model,” +arXivpreprintarXiv:1506.05869,Jul.2015.[Online].Available: +http://arxiv.org/abs/1506.05869v2 +[VN15] M. Vitelli and A. Nayebi, “GRUV,” Aug. 2015. [Online]. +Available:https://github.com/MattVitelli/GRUV +[Wei76] J. Weizenbaum, Computer Power and Human Reason: From +JudgementtoCalculation. W.H.Freeman&CoLtd,1976. +[ZF14] M.D.ZeilerandR.Fergus,“Visualizingandunderstandingcon- volutionalnetworks,”inComputerVision–ECCV2014. Springer, -[hut] “50’000europrizeforcompressinghumanknowledge.”[Online]. 2014,pp.818–833. -Available:http://prize.hutter1.net/ -[HZRS15] K.He,X.Zhang,S.Ren,andJ.Sun,“Deepresiduallearning -forimagerecognition,”arXivpreprintarXiv:1512.03385,2015. -[Online].Available:http://arxiv.org/abs/1512.03385 +2014,pp.818–833. 6 APPENDIXA AUTOMATICALLYGENERATEDTEXTS diff --git a/read/results/pdfplumber/1602.06541.txt b/read/results/pdfplumber/1602.06541.txt index 984d57f..6a34a91 100644 --- a/read/results/pdfplumber/1602.06541.txt +++ b/read/results/pdfplumber/1602.06541.txt @@ -1,65 +1,132 @@ 1 -A Survey of Semantic Segmentation II. TAXONOMYOFSEGMENTATIONALGORITHMS -Martin Thoma The computer vision community has published a -info@martin-thoma.de wide range of segmentation algorithms so far. Those -algorithms can be grouped by the kind of data they -operate on and the kind of segmentation they are able +A Survey of Semantic Segmentation +Martin Thoma +info@martin-thoma.de Abstract—Thissurveygivesanoverviewoverdifferent -to produce. techniques used for pixel-level semantic segmentation. -Metrics and datasets for the evaluation of segmenta- The following subsections will give four different -tion algorithms and traditional approaches for segmen- criteria by which segmentation algorithms can be +Metrics and datasets for the evaluation of segmenta- +tion algorithms and traditional approaches for segmen- tation such as unsupervised methods, Decision Forests -classified. and SVMs are described and pointers to the relevant -papers are given. Recently published approaches with This survey describes fixed-class (see Section II-A), -convolutionalneuralnetworksarementionedandtypical single-class affiliation (see Section II-B) algorithms -problematic situations for segmentation algorithms are whichworkongrayscaleorcoloredsinglepixelimages 6102 +papers are given. Recently published approaches with +convolutionalneuralnetworksarementionedandtypical +problematic situations for segmentation algorithms are examined. A taxonomy of segmentation algorithms is -(see Section II-C) in a completely automated, passive given. -fashion (see Section II-D). -yaM I. INTRODUCTION -Semantic segmentation is the task of clustering A. Allowed classes +Semantic segmentation is the task of clustering parts of images together which belong to the same -Semantic segmentation is a classification task. As object class. This type of algorithm has several use- 11 -cases such as detecting road signs [MBLAGJ+07], such, the classes on which the algorithm is trained is a -central design decision. +object class. This type of algorithm has several use- +cases such as detecting road signs [MBLAGJ+07], detecting tumors [MBVLG02], detecting medical in- -]VC.sc[ -strumentsinoperations[WAH97],coloncryptssegmen- Most algorithms work with a fixed set of classes; -tation [CRSS14], land use and land cover classifica- some even only work on binary classes like fore- -tion [HDT02]. In contrast, non-semantic segmentation ground vs background [RM07], [CS10] or street vs -onlyclusterspixelstogetherbasedongeneralcharacter- no street [BKTT15]. -istics of single objects. Hence the task of non-semantic However, there are also unsupervised segmentation -segmentation is not well-defined, as many different algorithms which do not distinguish classes at all (see -2v14560.2061:viXra -segmentations might be acceptable. Section V-B) as well as segmentation algorithms which -Several applications of segmentation in medicine are are able to recognize when they don’t know a class. -listed in [PXP00]. For example, in [GRC+08] a void class was added -Object detection, in comparison to semantic seg- for classes which were not in the training set. Such -mentation, has to distinguish different instances of the a void class was also used in the MSRCv2 dataset -same object. While having a semantic segmentation (see Section III-B2) to make it possible to make more -is certainly a big advantage when trying to get object coarse segmentations and thus having to spend less -instances, there are a couple of problems: neighboring time annotating the image. +strumentsinoperations[WAH97],coloncryptssegmen- +tation [CRSS14], land use and land cover classifica- +tion [HDT02]. In contrast, non-semantic segmentation +onlyclusterspixelstogetherbasedongeneralcharacter- +istics of single objects. Hence the task of non-semantic +segmentation is not well-defined, as many different +segmentations might be acceptable. +Several applications of segmentation in medicine are +listed in [PXP00]. +Object detection, in comparison to semantic seg- +mentation, has to distinguish different instances of the +same object. While having a semantic segmentation +is certainly a big advantage when trying to get object +instances, there are a couple of problems: neighboring pixelsofthesameclassmightbelongtodifferentobject instances and regions which are not connected my belong to the same object instance. For example, a -B. Class affiliation of pixels tree in front of a car which visually divides the car into -two parts. Humans do an incredible job when looking at the -Thispaperisorganizedasfollows:Itbeginsbygiving world. For example, when we see a glass of water -a taxonomy of segmentation algorithms in Section II. standing on a table we can automatically say that there -A summary of quality measures and datasets which are istheglassandbehinditthetable,evenifweonlyhada -used for semantic segmentation follows in Section III. singleimageandwerenotallowedtomove.Thismeans -A summary of traditional segmentation algorithms and we simultaneously two labels to the coordinates of the -their characteristics follows in Section V, as well as a glass: Glass and table. Although there is much more -brief, non-exhaustive summary of recently published work being done on single class affiliation segmenta- -semantic segmentation algorithms which are based on tion algorithms, there is a publication about multiple -neural networks in Section VI. Finally, Section VII class affiliation segmentation [LRAL08]. Similarly, -informs the reader about typical problematic cases for recent publications in pixel-level object segmentation -segmentation algorithms. used layered models [YHRF12]. +two parts. +Thispaperisorganizedasfollows:Itbeginsbygiving +a taxonomy of segmentation algorithms in Section II. +A summary of quality measures and datasets which are +used for semantic segmentation follows in Section III. +A summary of traditional segmentation algorithms and +their characteristics follows in Section V, as well as a +brief, non-exhaustive summary of recently published +semantic segmentation algorithms which are based on +neural networks in Section VI. Finally, Section VII +informs the reader about typical problematic cases for +segmentation algorithms. +II. TAXONOMYOFSEGMENTATIONALGORITHMS +The computer vision community has published a +wide range of segmentation algorithms so far. Those +algorithms can be grouped by the kind of data they +operate on and the kind of segmentation they are able +to produce. +The following subsections will give four different +criteria by which segmentation algorithms can be +classified. +This survey describes fixed-class (see Section II-A), +single-class affiliation (see Section II-B) algorithms +whichworkongrayscaleorcoloredsinglepixelimages +(see Section II-C) in a completely automated, passive +fashion (see Section II-D). +A. Allowed classes +Semantic segmentation is a classification task. As +such, the classes on which the algorithm is trained is a +central design decision. +Most algorithms work with a fixed set of classes; +some even only work on binary classes like fore- +ground vs background [RM07], [CS10] or street vs +no street [BKTT15]. +However, there are also unsupervised segmentation +algorithms which do not distinguish classes at all (see +Section V-B) as well as segmentation algorithms which +are able to recognize when they don’t know a class. +For example, in [GRC+08] a void class was added +for classes which were not in the training set. Such +a void class was also used in the MSRCv2 dataset +(see Section III-B2) to make it possible to make more +coarse segmentations and thus having to spend less +time annotating the image. +B. Class affiliation of pixels +Humans do an incredible job when looking at the +world. For example, when we see a glass of water +standing on a table we can automatically say that there +istheglassandbehinditthetable,evenifweonlyhada +singleimageandwerenotallowedtomove.Thismeans +we simultaneously two labels to the coordinates of the +glass: Glass and table. Although there is much more +work being done on single class affiliation segmenta- +tion algorithms, there is a publication about multiple +class affiliation segmentation [LRAL08]. Similarly, +recent publications in pixel-level object segmentation +used layered models [YHRF12]. +a +r +X +i +v +: +1 +6 +0 +2 +. +0 +6 +5 +4 +1 +v +2 +[ +c s +. +C +V +] +1 +1 +M +a +y +2 +0 +1 +6 2 C. Input Data The available data which can be used for the @@ -67,563 +134,910 @@ inference of a segmentation varies by application. • Grayscale vs colored: Grayscale images are commonly used in medical imaging such as magnetic resonance (MR) imaging or ultrasonog- -(a) ExampleScene (b) Visualizationofafoundseg- -raphy whereas colored photographs are obviously mentation +raphy whereas colored photographs are obviously widespread. -Figure 1: An example of a scene and a possible visu- • Excluding or including depth data: RGB-D, -sometimes also called range [HJBJ+96] is avail- alization of a found segmentation. +sometimes also called range [HJBJ+96] is avail- able in robotics, autonomous cars and recently also in consumer electronics such as Microsoft -III. EVALUATIONANDDATASETS Kinect [Zha12]. -• Single image vs stereo images vs co- A. Quality measures for evaluation +• Single image vs stereo images vs co- segmentation: Single image segmentation is the -A performance measure is a crucial part of any most wide-spread kind of segmentation, but using -machine learning system. As users of a semantic stereoimageswasalreadytriedin[BVZ01].Itcan -segmentationsystemexpectcorrectresults,theaccuracy be seen as a more natural way of segmentation as -is the most commonly used performance measure, but most mammals have two eyes. It can also be seen -there are other measures of quality which matter when as being related to having depth data. -segmentation algorithms are compared. This section Co-segmentation as in [RMBK06], [CXGS12] is -gives an overview of those quality measures. the problem of finding a consistent segmentation -1) Accuracy: Showingthecorrectnessofthesegmen- for multiple images. This problem can be seen -tation hypotheses is done in most publications about in two ways: One the one hand, it can be seen -semantic segmentation. However, there are a couple as the problem of finding common objects in at -of different ways how this accuracy can be displayed. least two images. On the other hand, every image -One way to give readers a first qualitative impression after the first can be used as an additional source -of the obtained segmentations is by showing examples of information to find a meaningful segmentation. -such as Figure 1. This idea can be extended to time series such as -However, this can only support the explanation of videos. -particular problems or showcase special situation. For -• 2D vs 3D: Segmenting images is a 2D segmenta- meaningfulinformationabouttheoverallaccuracy,there +• 2D vs 3D: Segmenting images is a 2D segmenta- tion task where the smallest unit is called a pixel. -are a couple of metrics how accuracy can be defined. -In 3D data, such as volumetric X-ray CT images For this section, let k ∈N be the number of classes, -as they were used in [HHR01], the smallest unit n ∈N with i,j ∈1,...,k be the number of pixels -ij 0 +In 3D data, such as volumetric X-ray CT images +as they were used in [HHR01], the smallest unit is called a voxel. +D. Operation state +The operation state of the classifying machine can +eitherbeactiveasin[SUM+11],[SSA12]whererobots +can move objects to find a segmentation or passive, +where the received image cannot be influenced. Among +the passive algorithms, some segment in a completely +automaticfashion,othersworkinaninteractivemode. +One example would be a system where the user clicks +on the background or marks a coarse segmentation and +thealgorithmfindsafine-grainedsegmentation.[BJ00], +[RKB04], [PS07] describe systems which work in an +interactive mode. +(a) ExampleScene (b) Visualizationofafoundseg- +mentation +Figure 1: An example of a scene and a possible visu- +alization of a found segmentation. +III. EVALUATIONANDDATASETS +A. Quality measures for evaluation +A performance measure is a crucial part of any +machine learning system. As users of a semantic +segmentationsystemexpectcorrectresults,theaccuracy +is the most commonly used performance measure, but +there are other measures of quality which matter when +segmentation algorithms are compared. This section +gives an overview of those quality measures. +1) Accuracy: Showingthecorrectnessofthesegmen- +tation hypotheses is done in most publications about +semantic segmentation. However, there are a couple +of different ways how this accuracy can be displayed. +One way to give readers a first qualitative impression +of the obtained segmentations is by showing examples +such as Figure 1. +However, this can only support the explanation of +particular problems or showcase special situation. For +meaningfulinformationabouttheoverallaccuracy,there +are a couple of metrics how accuracy can be defined. +For this section, let k ∈N be the number of classes, +n +ij +∈N +0 +with i,j ∈1,...,k be the number of pixels which belong to class i and were labeled as class j. +(n ij) is called a confusion matrix. Let t +i =(cid:80)k -(n ) is called a confusion matrix. Let t n -ij i j=1 ij +j=1n +ij be the total number of pixels of class i. One way to compare segmentation algorithms is by -D. Operation state the pixel-wise accuracy of the predicted segmentation as done in many publications [SWRC06], [CP08], -The operation state of the classifying machine can [LSD14]. This is also called per-pixel rate and de- -eitherbeactiveasin[SUM+11],[SSA12]whererobots (cid:80)k i=1nii. -fined as Taking the pixel-wise classification -can move objects to find a segmentation or passive, h(cid:80) ak is=1twti -accuracy o major drawbacks: -where the received image cannot be influenced. Among -the passive algorithms, some segment in a completely P1 Taskslikesegmentingimagesforautonomouscars -automaticfashion,othersworkinaninteractivemode. have large regions which have one class. This -One example would be a system where the user clicks makes achieving classification accuracies of more -on the background or marks a coarse segmentation and than 30% with a priori knowledge only possible. -thealgorithmfindsafine-grainedsegmentation.[BJ00], For example, a system might learn that a certain -[RKB04], [PS07] describe systems which work in an position of the image is most of the time “sky” -interactive mode. while another position is most of the time “road”. +fined as +(cid:80)k i=1nii +(cid:80)k i=1ti +. Taking the pixel-wise classification +accuracy has two major drawbacks: +P1 Taskslikesegmentingimagesforautonomouscars +have large regions which have one class. This +makes achieving classification accuracies of more +than 30% with a priori knowledge only possible. +For example, a system might learn that a certain +position of the image is most of the time “sky” +while another position is most of the time “road”. 3 -P2 The manually labeled images could have a more segmentation,everyimageneedstobeprocessedwithin -coarse labeling. For example, a human classifier 20ms [BKTT15]. This time is called latency. -could have labeled a region as “car” and the Most papers do not give exact values for the time -algorithm could have split that region into the theirapplicationneeds.Onereasonmightbethatthisis -general “car” and the more specific “wheel of a very hardware, implementation and in some cases even -car” data specific. For example, [HJBJ+96] notes that their -Three accuracy metrics which do not suffer from algorithm needs 10s on a Sun SparcStation 20. The -problem P1 are used in [LSD14]: fastestCPUeverproducedforthissystemhad200MHz. -• mean accuracy: k1 ·(cid:80)k i=1 n ti ii ∈[0,1] C tao inm edpa ur si in ng gt ah nis Ind ti er le ic 7tl -y 48w 20it Kh wre is thul 3ts .9w Gh Hic zh ww oe ur le dno ob t- +P2 The manually labeled images could have a more +coarse labeling. For example, a human classifier +could have labeled a region as “car” and the +algorithm could have split that region into the +general “car” and the more specific “wheel of a +car” +Three accuracy metrics which do not suffer from +problem P1 are used in [LSD14]: +• mean accuracy: 1 k ·(cid:80)k i=1 nii ti ∈[0,1] • mean intersection over union: -1 ·(cid:80)k nii ∈[0,1] be meaningful. -k i=1 ti−nii+(cid:80)k j=1nji However, it does still make sense to mention the +1 +k +·(cid:80)k +i=1 +nii +ti−nii+(cid:80)k j=1nji +∈[0,1] • frequency weighted intersection over union: -execution time as well as the hardware in individual -((cid:80)k i=1t i)−1(cid:80)k i=1t i· ti−nii+n (cid:80)ii k j=1nji ∈[0,1] papers.Thisgivestheinterestedreaderthepossibilityto -Another problem might be pixels which cannot be estimatehowdifficultitmightbetoadjustthealgorithm -assigned to one of the known classes. For this reason, to work in the required time-constraints. -[SWRC06] makes use of a void class. This class gets Besides the latency, the throughput is another -completely ignored for all quality measures. Hence the relevant characteristic of algorithms and implementa- -totalnumberofpixelsisassumedtobewidth·height− tions for semantic segmentation. For example, for the -number of void pixels. automatic description of images in order to enable text -One way to deal with problem P1 and problem P2 search the throughput is of much higher importance -is giving the confusion matrix as done in [SWRC06]. than latency. -However, this approach is not feasible if many classes 3) Stability: A reasonable requirement on semantic -are given. segmentation algorithms is the stability of a segmen- -The F-measure is useful for binary classifica- tation over slight changes in the input image. When -tion task such as the KITTI road segmentation the image data is sightly blurred by smoke such as -benchmark [FKG13] or crypt segmentation as done in Figure 4(c), the segmentation should not change. -by [CRSS14]. It is calculated as “the harmonic mean Also, two images which show a slight change in -of the precision and recall” [PH05]: perspective should also only result in slight changes in -the segmentation [PH05]. +((cid:80)k i=1t i)−1(cid:80)k i=1t i· nii +ti−nii+(cid:80)k j=1nji +∈[0,1] +Another problem might be pixels which cannot be +assigned to one of the known classes. For this reason, +[SWRC06] makes use of a void class. This class gets +completely ignored for all quality measures. Hence the +totalnumberofpixelsisassumedtobewidth·height− +number of void pixels. +One way to deal with problem P1 and problem P2 +is giving the confusion matrix as done in [SWRC06]. +However, this approach is not feasible if many classes +are given. +The F-measure is useful for binary classifica- +tion task such as the KITTI road segmentation +benchmark [FKG13] or crypt segmentation as done +by [CRSS14]. It is calculated as “the harmonic mean +of the precision and recall” [PH05]: +F β =(1+β)2 tp -F β =(1+β)2 (1+β2)·tp+β2·fn+fp 4) Memory usage: Peak memory usage matters -when segmentation algorithms are used in devices like -where β = 1 is chosen in most cases and tp means smartphones or cameras, or when the algorithms have -true positive, fn means false negative and fp means to finish in a given time frame, run on the graphics +(1+β2)·tp+β2·fn+fp +where β = 1 is chosen in most cases and tp means +true positive, fn means false negative and fp means false positive. -processing unit (GPU) and consume so much memory -Finally,itshouldbenotedthatalotofothermeasures for single image segmentation that only the latest -for the accuracy of segmentations were proposed for graphic cards can be used. However, no publication -non-semantic segmentation. One of those accuracy were available mentioning the peak memory usage. +Finally,itshouldbenotedthatalotofothermeasures +for the accuracy of segmentations were proposed for +non-semantic segmentation. One of those accuracy measures is Normalized Probabilistic Rand (NPR) index which was introduced in [UPH05] and eval- -B. Datasets uated in [CSI+09] on dermoscopy images. Other -non-semantic segmentation measures were introduced The computer vision community produced a couple -in[MFTM01],butthereasonforcreatingthemseemsto of different datasets which are publicly available. In -betodealwiththeunder-definedtaskdescriptionofnon- the following, only the most widely used ones as well -semantic segmentation. These accuracy measures try to as three medical databases are described. An overview -dealwithdifferentlevelsofcoarsityofthesegmentation. over the quantity and the kind of data is given by -Thisismuchlessofaprobleminsemanticsegmentation Table I. -and thus those measures are not explained here. 1) PASCAL VOC: The PASCAL1 VOC2 challenge -2) Speed: Amaximumupperboundontheexecution was organized eight times with different datasets: -time for the inference on a single image is a hard Once every year from 2005 to 2012 [EVGW+b]. +non-semantic segmentation measures were introduced +in[MFTM01],butthereasonforcreatingthemseemsto +betodealwiththeunder-definedtaskdescriptionofnon- +semantic segmentation. These accuracy measures try to +dealwithdifferentlevelsofcoarsityofthesegmentation. +Thisismuchlessofaprobleminsemanticsegmentation +and thus those measures are not explained here. +2) Speed: Amaximumupperboundontheexecution +time for the inference on a single image is a hard requirement for some applications. For example, in the -1patternanalysis,statisticalmodellingandcomputationallearning, case of autonomous cars an algorithm which classifies +pixel as street or no-street and thus makes a semantic +segmentation,everyimageneedstobeprocessedwithin +20ms [BKTT15]. This time is called latency. +Most papers do not give exact values for the time +theirapplicationneeds.Onereasonmightbethatthisis +very hardware, implementation and in some cases even +data specific. For example, [HJBJ+96] notes that their +algorithm needs 10s on a Sun SparcStation 20. The +fastestCPUeverproducedforthissystemhad200MHz. +Comparing this directly with results which were ob- tainedusinganInteli7-4820Kwith3.9GHzwouldnot +be meaningful. +However, it does still make sense to mention the +execution time as well as the hardware in individual +papers.Thisgivestheinterestedreaderthepossibilityto +estimatehowdifficultitmightbetoadjustthealgorithm +to work in the required time-constraints. +Besides the latency, the throughput is another +relevant characteristic of algorithms and implementa- +tions for semantic segmentation. For example, for the +automatic description of images in order to enable text +search the throughput is of much higher importance +than latency. +3) Stability: A reasonable requirement on semantic +segmentation algorithms is the stability of a segmen- +tation over slight changes in the input image. When +the image data is sightly blurred by smoke such as +in Figure 4(c), the segmentation should not change. +Also, two images which show a slight change in +perspective should also only result in slight changes in +the segmentation [PH05]. +4) Memory usage: Peak memory usage matters +when segmentation algorithms are used in devices like +smartphones or cameras, or when the algorithms have +to finish in a given time frame, run on the graphics +processing unit (GPU) and consume so much memory +for single image segmentation that only the latest +graphic cards can be used. However, no publication +were available mentioning the peak memory usage. +B. Datasets +The computer vision community produced a couple +of different datasets which are publicly available. In +the following, only the most widely used ones as well +as three medical databases are described. An overview +over the quantity and the kind of data is given by +Table I. +1) PASCAL VOC: The PASCAL1 VOC2 challenge +was organized eight times with different datasets: +Once every year from 2005 to 2012 [EVGW+b]. +1patternanalysis,statisticalmodellingandcomputationallearning, anEUnetworkofexcellence -pixel as street or no-street and thus makes a semantic 2VisualObjectClasses +2VisualObjectClasses 4 -Beginning with 2007, a segmentation challenge was Training -added [EVGW+a]. Prediction +Beginning with 2007, a segmentation challenge was +added [EVGW+a]. The dataset consists of annotated photographs from -Preprocessing -www.flicker.com, a photo sharing website. There are Data -multiple challenges for PASCAL VOC. The 2012 Feature extraction augmentation +www.flicker.com, a photo sharing website. There are +multiple challenges for PASCAL VOC. The 2012 competition had five challenges of which one is a segmentation challenge where a single class label was givenforeachpixel.Theclassesare:aeroplane,bicycle, -Window Window-wise Post- -bird, boat, bottle, bus, car, cat, chair, cow, dining table, extraction Classification processing +bird, boat, bottle, bus, car, cat, chair, cow, dining table, dog,horse,motorbike,person,pottedplant,sheep,sofa, -train, tv/monitor. Figure 2: A typical segmentation pipeline gets raw -Although no new competitions will be held, new pixel data, applies preprocessing techniques -algorithms can be evaluated on the 2010, 2011 and like scaling and feature extraction like HOG -2012 data via http://host.robots.ox.ac.uk:8080/ features. For training, data augmentation -The PASCAL VOC segmentation challenges use the techniques such as image rotation can be -segmentation over union criterion (see Section III-A). applied. For every single image, patches of -2) MSRCv2: Microsoft Research has published a the image called windows are extracted and -databaseof591photographswithpixel-levelannotation those windows are classified. The resulting -of 21 classes: aeroplane, bike, bird, boat, body, book, semantic segmentation can be refined by -building, car, cat, chair, cow, dog, face, flower, grass, simple morphologic operations or by more -road, sheep, sign, sky, tree, water. Additionally, there complexapproachessuchasMarkovRandom -is a void label for pixels which do not belong to Fields (MRFs). +train, tv/monitor. +Although no new competitions will be held, new +algorithms can be evaluated on the 2010, 2011 and +2012 data via http://host.robots.ox.ac.uk:8080/ +The PASCAL VOC segmentation challenges use the +segmentation over union criterion (see Section III-A). +2) MSRCv2: Microsoft Research has published a +databaseof591photographswithpixel-levelannotation +of 21 classes: aeroplane, bike, bird, boat, body, book, +building, car, cat, chair, cow, dog, face, flower, grass, +road, sheep, sign, sky, tree, water. Additionally, there +is a void label for pixels which do not belong to any of the 21 classes or which are close to the segmentationboundary.Thisallowsa“roughandquick hand-segmentation which does not align exactly with -the object boundaries” [SWRC06]. IV. SEGMENTATIONPIPELINE +the object boundaries” [SWRC06]. 3) Medical Databases: The Warwick-QU Dataset consists of 165 images with pixel-level annotation of -5 classes: “healthy, adenomatous, moderately differen- Typically, semantic segmentation is done with a -tiated, moderately-to-poorly differentiated, and poorly classifier which operates on fixed-size feature inputs -differentiated” [CSM09]. This dataset is part of the and a sliding-window approach [DT05], [YBCK10], -Gland Segmentation (GlaS) challenge. [SCZ08]. This means a classifier is trained on images -The DIARETDB1 [KKV+14] is a dataset of 89 im- of a fixed size. The trained classifier is then fed with -ages fundus images. Those images show the interior rectangular regions of the image which are called win- -surfaceoftheeye.Fundusimagescanbeusedtodetect dows.Althoughtheclassifiergetsanimagepatchofe.g. -diabetic retinopathy. The images have four classes of 51px×51pxoftheenvironment,itmightonlyclassify -coarseannotations:hardandsoftexudates,hemorrhages the center pixel or a subset of the complete window. -and red small dots. This segmentation pipeline is visualized in Figure 2. +5 classes: “healthy, adenomatous, moderately differen- +tiated, moderately-to-poorly differentiated, and poorly +differentiated” [CSM09]. This dataset is part of the +Gland Segmentation (GlaS) challenge. +The DIARETDB1 [KKV+14] is a dataset of 89 im- +ages fundus images. Those images show the interior +surfaceoftheeye.Fundusimagescanbeusedtodetect +diabetic retinopathy. The images have four classes of +coarseannotations:hardandsoftexudates,hemorrhages +and red small dots. 20 test and additionally 20 training retinal fun- -This approach was taken by [BKTT15] and a major- -dus images are available through the DRIVE data ity of the VOC2007 participants [EVGW+a]. As this +dus images are available through the DRIVE data set [SAN+04]. The vessels were annotated. Addition- -approach has to apply the patch classifier 512·512= ally, [AP11] added vascular features. -262144timesforimagesofsize512px×512px,there The Open-CAS Endoscopic Datasets [MHMK+14] -are techniques for speeding it up such as applying a are 60 images taken from laparoscopic adrenalectomies -stride and interpolating the results. and 60 images taken from laparoscopic pancreatic -Neuralnetworksareabletoapplytheslidingwindow resections. Those are from 3 surgical procedures each. -approach in a very efficient way by handling a trained Half of the data was annotated by a medical expert for -network as a convolution and applying the convolution “medial instrument” and “no medical instrument”. All -on the complete image. images were labeled by anonymous untrained workers -to which they refer to as knowledge workers (KWs). However, there are alternatives. Namely MRFs and -One crowd annotation was obtained for each image by Conditional Random Fields (CRFs) which take the -a majority vote on a pixel basis of 10 segmentations information of the complete image and segment it in -given by 10 different KWs. an holistic approach. +to which they refer to as knowledge workers (KWs). +One crowd annotation was obtained for each image by +a majority vote on a pixel basis of 10 segmentations +given by 10 different KWs. +Training +Prediction +Post- +processing +Window-wise +Classification +Window +extraction +Data +augmentation Feature extraction +Preprocessing +Figure 2: A typical segmentation pipeline gets raw +pixel data, applies preprocessing techniques +like scaling and feature extraction like HOG +features. For training, data augmentation +techniques such as image rotation can be +applied. For every single image, patches of +the image called windows are extracted and +those windows are classified. The resulting +semantic segmentation can be refined by +simple morphologic operations or by more +complexapproachessuchasMarkovRandom +Fields (MRFs). +IV. SEGMENTATIONPIPELINE +Typically, semantic segmentation is done with a +classifier which operates on fixed-size feature inputs +and a sliding-window approach [DT05], [YBCK10], +[SCZ08]. This means a classifier is trained on images +of a fixed size. The trained classifier is then fed with +rectangular regions of the image which are called win- +dows.Althoughtheclassifiergetsanimagepatchofe.g. +51px×51pxoftheenvironment,itmightonlyclassify +the center pixel or a subset of the complete window. +This segmentation pipeline is visualized in Figure 2. +This approach was taken by [BKTT15] and a major- +ity of the VOC2007 participants [EVGW+a]. As this +approach has to apply the patch classifier 512·512= +262144timesforimagesofsize512px×512px,there +are techniques for speeding it up such as applying a +stride and interpolating the results. +Neuralnetworksareabletoapplytheslidingwindow +approach in a very efficient way by handling a trained +network as a convolution and applying the convolution +on the complete image. +However, there are alternatives. Namely MRFs and +Conditional Random Fields (CRFs) which take the +information of the complete image and segment it in +an holistic approach. 5 -V. TRADITIONALAPPROACHES thedirectionsiscalculatedforeachpatch.HOGfeatures -were proposed in [DT05] and are used in [BMBM10], +V. TRADITIONALAPPROACHES Image segmentation algorithms which use traditional -[FGMR10] for segmentation tasks. approaches, hence don’t apply neural networks and -3) SIFT: Scale-invariant feature transform (SIFT) make heavy use of domain knowledge, are wide-spread -feature descriptors describe keypoints in an image. The in the computer vision community. Features which can -image patch of the size 16×16 around the keypoint be used for segmentation are described in Section V-A, -is taken. This patch is divided in 16 distinct parts of a very brief overview of unsupervised, non-semantic -the size 4×4. For each of those parts a histogram of segmentationisgiveninSectionV-B,RandomDecision -8 orientations is calculated similar as for HOG features. Forests are described in Section V-C, Markov Random -This results in a 128-dimensional feature vector for Fields in Section V-E and Support Vector Machines -each keypoint. (SVMs) in Section V-D. Postprocessing is covered in -ItshouldbeemphasizedthatSIFTisaglobalfeature Section V-G. -for a complete image. It should be noted that algorithms can use combina- -SIFT is described in detail in [Low04] and are used tionof methods.For example, [TNL14]makesuse ofa -in [PTN09]. combinationofaSVMandaMRF.Also,auto-encoders -4) BOV: Bag-of-visual-words (BOV), also called can be used to learn features which in turn can be used -bag of keypoints, is based on vector quantization. by any classifier. +A. Features and Preprocessing methods +Thechoiceoffeaturesisveryimportantintraditional +approaches. The most commonly used local and global +featuresareexplainedinthefollowingaswellasfeature +dimensionality reduction algorithms. +1) PixelColor: Pixelcolorindifferentimagespaces +(e.g. 3 features for RGB, 3 features for HSV, 1 feature +forthegray-value)arethemostwidelyusedfeatures.A +typical image is in the RGB color space, but depending +on the classifier and the problem another color space +mightresultinbettersegmentations.RGB,YcBcr,HSL, +Lab and YIQ are some examples used by [CRSS14]. +No single color space has been proven to be superior +to all others in all contexts [CJSW01]. However, the +most common choices seem to be RGB and HSI. +ReasonsforchoosingRGBissimplicityandthesupport +by programming languages, whereas the choice of +the HSI color space might make it simpler for the +classifier to become invariant to illumination. One +reason for choosing CIE-L*a*b* color space is that it +approximates human perception of brightness [KP92]. +It follows that choosing the L*a*b color space helps +algorithms to detect structures which are seen by +humans.Anotherwayofimprovingthestructurewithin +an image is histogram equalization, which can be +applied to improve contrast [PAA+87], [RM07]. +2) Histogram of oriented Gradients: Histogram of +oriented gradients (HOG) features interpret the image +as a discrete function I : N2 → {0,...,255} which +mapstheposition(x,y)toacolor.Foreachpixel,there +are two gradients: The partial derivative of x and y. +Now the original image is transformed to two feature +mapsofequalsizewhichrepresentsthegradient.These +featuremapsaresplittedintopatchesandahistogramof +thedirectionsiscalculatedforeachpatch.HOGfeatures +were proposed in [DT05] and are used in [BMBM10], +[FGMR10] for segmentation tasks. +3) SIFT: Scale-invariant feature transform (SIFT) +feature descriptors describe keypoints in an image. The +image patch of the size 16×16 around the keypoint +is taken. This patch is divided in 16 distinct parts of +the size 4×4. For each of those parts a histogram of +8 orientations is calculated similar as for HOG features. +This results in a 128-dimensional feature vector for +each keypoint. +ItshouldbeemphasizedthatSIFTisaglobalfeature +for a complete image. +SIFT is described in detail in [Low04] and are used +in [PTN09]. +4) BOV: Bag-of-visual-words (BOV), also called +bag of keypoints, is based on vector quantization. Similar to HOG features, BOV features are histograms which count the number of occurrences of certain -A. Features and Preprocessing methods patternswithinapatchoftheimage.BOVaredescribed -Thechoiceoffeaturesisveryimportantintraditional in [CDF+04] and used in combination with SIFT -approaches. The most commonly used local and global feature descriptors in [CP08]. -featuresareexplainedinthefollowingaswellasfeature 5) Poselets: Poselets rely on manually added extra -dimensionality reduction algorithms. keypoints such as “right shoulder”, “left shoulder”, -1) PixelColor: Pixelcolorindifferentimagespaces “right knee” and “left knee”. They were originally -(e.g. 3 features for RGB, 3 features for HSV, 1 feature used for human pose estimation. Finding those extra -forthegray-value)arethemostwidelyusedfeatures.A keypoints is easily possible for well-known image -typical image is in the RGB color space, but depending classes like humans. However, it is difficult for classes -on the classifier and the problem another color space like airplanes, ships, organs or cells where the human -mightresultinbettersegmentations.RGB,YcBcr,HSL, annotators do not know the keypoints. Additionally, the -Lab and YIQ are some examples used by [CRSS14]. keypointshavetobechosenforeverysingleclass.There -No single color space has been proven to be superior arestrategiestodealwiththoseproblemslikeviewpoint- -to all others in all contexts [CJSW01]. However, the dependentkeypoints.Poseletswereusedin[BMBM10] -most common choices seem to be RGB and HSI. to detect people and in [BBMM11] for general object -ReasonsforchoosingRGBissimplicityandthesupport detection of the PASCAL VOC dataset. -by programming languages, whereas the choice of 6) Textons: A texton is the minimal building block -the HSI color space might make it simpler for the ofvision.Thecomputervisionliteraturedoesnotgivea -classifier to become invariant to illumination. One strict definition for textons, but edge detectors could be -reason for choosing CIE-L*a*b* color space is that it one example. One might argue that deep learning tech- -approximates human perception of brightness [KP92]. niques with Convolution Neuronal Networks (CNNs) -It follows that choosing the L*a*b color space helps learn textons in the first filters. -algorithms to detect structures which are seen by An excellent explanation of textons can be found -humans.Anotherwayofimprovingthestructurewithin in [ZGWX05]. -an image is histogram equalization, which can be 7) Dimensionality Reduction: High-resolution im- -applied to improve contrast [PAA+87], [RM07]. ageshavealotofpixels.Havingoneormorefeatureper -2) Histogram of oriented Gradients: Histogram of pixelresultsinwelloveramillionfeatures.Thismakes -oriented gradients (HOG) features interpret the image training difficult while the higher resolution might not -as a discrete function I : N2 → {0,...,255} which contain much more information. A simple approach -mapstheposition(x,y)toacolor.Foreachpixel,there to deal with this is downsampling the high-resolution -are two gradients: The partial derivative of x and y. image to a low-resolution variant. Another way of -Now the original image is transformed to two feature doing dimensionality reduction is principal component -mapsofequalsizewhichrepresentsthegradient.These analysis (PCA), which is applied by [COWR11]. The -featuremapsaresplittedintopatchesandahistogramof idea behind PCA is to find a hyperplane on which all +in [CDF+04] and used in combination with SIFT +feature descriptors in [CP08]. +5) Poselets: Poselets rely on manually added extra +keypoints such as “right shoulder”, “left shoulder”, +“right knee” and “left knee”. They were originally +used for human pose estimation. Finding those extra +keypoints is easily possible for well-known image +classes like humans. However, it is difficult for classes +like airplanes, ships, organs or cells where the human +annotators do not know the keypoints. Additionally, the +keypointshavetobechosenforeverysingleclass.There +arestrategiestodealwiththoseproblemslikeviewpoint- +dependentkeypoints.Poseletswereusedin[BMBM10] +to detect people and in [BBMM11] for general object +detection of the PASCAL VOC dataset. +6) Textons: A texton is the minimal building block +ofvision.Thecomputervisionliteraturedoesnotgivea +strict definition for textons, but edge detectors could be +one example. One might argue that deep learning tech- +niques with Convolution Neuronal Networks (CNNs) +learn textons in the first filters. +An excellent explanation of textons can be found +in [ZGWX05]. +7) Dimensionality Reduction: High-resolution im- +ageshavealotofpixels.Havingoneormorefeatureper +pixelresultsinwelloveramillionfeatures.Thismakes +training difficult while the higher resolution might not +contain much more information. A simple approach +to deal with this is downsampling the high-resolution +image to a low-resolution variant. Another way of +doing dimensionality reduction is principal component +analysis (PCA), which is applied by [COWR11]. The +idea behind PCA is to find a hyperplane on which all 6 -feature vectors can be projected with a minimal loss The 4-neighborhood (north, east, south west) or an 8- -of information. A detailed description of PCA is given neighborhood (north, north-east, east, south-east, south, -by [Smi02]. south-west, west, north-west) are plausible choices. -One problem of PCA is the fact that it does not One way to cut the edges is by building a minimum -distinguish different classes. This means it can happen spanning tree and removing edges above a threshold. -that a perfectly linearly separable set of feature vectors This threshold can either be constant, adapted to the -becomes not separable at all after applying PCA. graph or adjusted by the user. After the edge-cutting -There are many other techniques for dimensionality step, the connected components are the segments. -reduction. An overview and a comparison over some A graph-based method which ranked 2nd in the -of them is given by [vdMPvdH09]. Pascal VOC 2010 challenge [EVGW+10] is described +feature vectors can be projected with a minimal loss +of information. A detailed description of PCA is given +by [Smi02]. +One problem of PCA is the fact that it does not +distinguish different classes. This means it can happen +that a perfectly linearly separable set of feature vectors +becomes not separable at all after applying PCA. +There are many other techniques for dimensionality +reduction. An overview and a comparison over some +of them is given by [vdMPvdH09]. +B. Unsupervised Segmentation +Unsupervised segmentation algorithms can be used +in supervised segmentation as another source of infor- +mation or to refine a segmentation. While unsupervised +segmentationalgorithmscanneverbesemantic,theyare +well-studied and deserve at least a very brief overview. +Semantic segmentation algorithms store information +about the classes they were trained to segment while +non-semantic segmentation algorithms try to detect +consistent regions or region boundaries. +1) Clustering Algorithms: Clustering algorithms can +directly be applied on the pixels, when one gives a +feature vector per pixel. Two clustering algorithms are +k-means and the mean-shift algorithm. +The k-means algorithm is a general-purpose cluster- +ing algorithm which requires the number of clusters to +be given beforehand. Initially, it places the k centroids +randomly in the feature space. Then it assigns each +data point to the nearest centroid, moves the centroid +to the center of the cluster and continues the process +until a stopping criterion is reached. A faster variant is +described in [Har75]. +k-means was applied by [CLP98] for medical image +segmentation. +Another clustering algorithm is the mean-shift algo- +rithm which was introduced by [CM02] for segmen- +tation tasks. The algorithm finds the cluster centers +by initializing centroids at random seed points and +iteratively shifting them to the mean coordinate within +acertainrange.Insteadoftakingahardrangeconstraint, +the mean can also be calculated by using any kernel. +This effectively applies a weight to the coordinates +of the points. The mean shift algorithm finds cluster +centers at positions with a highest local density of +points. +2) Graph Based Image Segmentation: Graph-based +imagesegmentationalgorithmstypicallyinterpretpixels +as vertices and an edge weight is a measure of +dissimilarity such as the difference in color [FH04], +[Fel]. There are several different candidates for edges. +The 4-neighborhood (north, east, south west) or an 8- +neighborhood (north, north-east, east, south-east, south, +south-west, west, north-west) are plausible choices. +One way to cut the edges is by building a minimum +spanning tree and removing edges above a threshold. +This threshold can either be constant, adapted to the +graph or adjusted by the user. After the edge-cutting +step, the connected components are the segments. +A graph-based method which ranked 2nd in the +Pascal VOC 2010 challenge [EVGW+10] is described in [CS10]. The system makes heavy use of the multi- cue contour detector globalPb [MAFM08] and needs -B. Unsupervised Segmentation about 10GB of main memory [CS11]. -Unsupervised segmentation algorithms can be used 3) Random Walks: Random walks belong to the -in supervised segmentation as another source of infor- graph-based image segmentation algorithms. Random -mation or to refine a segmentation. While unsupervised walk image segmentation usually works as follows: -segmentationalgorithmscanneverbesemantic,theyare Seed points are placed on the image for the different -well-studied and deserve at least a very brief overview. objects in the image. From every single pixel, the -Semantic segmentation algorithms store information probability to reach the different seed points by a -about the classes they were trained to segment while random walk is calculated. This is done by taking -non-semantic segmentation algorithms try to detect image gradients as described in Section V-A for HOG -consistent regions or region boundaries. features. The class of the pixel is the class of which a -1) Clustering Algorithms: Clustering algorithms can seed point will be reached with highest probability. At -directly be applied on the pixels, when one gives a first, this is an interactive segmentation method, but it -feature vector per pixel. Two clustering algorithms are can be extended to be non-interactive by using another -k-means and the mean-shift algorithm. segmentation methods output as seed points. -The k-means algorithm is a general-purpose cluster- 4) Active Contour Models: Active contour models -ing algorithm which requires the number of clusters to (ACMs) are algorithms which segment images roughly -be given beforehand. Initially, it places the k centroids along edges, but also try to find a border which is -randomly in the feature space. Then it assigns each smooth. This is done by defining a so called energy -data point to the nearest centroid, moves the centroid function which will be minimized. They were initially -to the center of the cluster and continues the process described in [KWT88]. ACMs can be used to segment -until a stopping criterion is reached. A faster variant is an image or to refine segmentation as it was done -described in [Har75]. in [AM98] for brain MR images. -k-means was applied by [CLP98] for medical image 5) Watershed Segmentation: The watershed algo- -segmentation. rithm takes a grayscale image and interprets it as a -Another clustering algorithm is the mean-shift algo- height map. Low values are catchment basins and -rithm which was introduced by [CM02] for segmen- the higher values between two neighboring catchment -tation tasks. The algorithm finds the cluster centers basins is the watershed. The catchment basins should -by initializing centroids at random seed points and contain what the developer wants to capture. This -iteratively shifting them to the mean coordinate within implies that those areas must be dark on grayscale -acertainrange.Insteadoftakingahardrangeconstraint, images. The algorithm starts to fill the basins from -the mean can also be calculated by using any kernel. the lowest point. When two basins are connected, a -This effectively applies a weight to the coordinates watershed is found. The algorithm stops when the -of the points. The mean shift algorithm finds cluster highest point is reached. -centers at positions with a highest local density of A detaileddescription ofthe watershed segmentation -points. algorithm is given in [RM00]. -2) Graph Based Image Segmentation: Graph-based The watershed segmentation was used in [JLD03] to -imagesegmentationalgorithmstypicallyinterpretpixels segment white blood cells. As the authors describe, -as vertices and an edge weight is a measure of the segmentation by watershed transform has two -dissimilarity such as the difference in color [FH04], flaws:Over-segmentationduetolocalminimaandthick -[Fel]. There are several different candidates for edges. watersheds due to plateaus. +about 10GB of main memory [CS11]. +3) Random Walks: Random walks belong to the +graph-based image segmentation algorithms. Random +walk image segmentation usually works as follows: +Seed points are placed on the image for the different +objects in the image. From every single pixel, the +probability to reach the different seed points by a +random walk is calculated. This is done by taking +image gradients as described in Section V-A for HOG +features. The class of the pixel is the class of which a +seed point will be reached with highest probability. At +first, this is an interactive segmentation method, but it +can be extended to be non-interactive by using another +segmentation methods output as seed points. +4) Active Contour Models: Active contour models +(ACMs) are algorithms which segment images roughly +along edges, but also try to find a border which is +smooth. This is done by defining a so called energy +function which will be minimized. They were initially +described in [KWT88]. ACMs can be used to segment +an image or to refine segmentation as it was done +in [AM98] for brain MR images. +5) Watershed Segmentation: The watershed algo- +rithm takes a grayscale image and interprets it as a +height map. Low values are catchment basins and +the higher values between two neighboring catchment +basins is the watershed. The catchment basins should +contain what the developer wants to capture. This +implies that those areas must be dark on grayscale +images. The algorithm starts to fill the basins from +the lowest point. When two basins are connected, a +watershed is found. The algorithm stops when the +highest point is reached. +A detaileddescription ofthe watershed segmentation +algorithm is given in [RM00]. +The watershed segmentation was used in [JLD03] to +segment white blood cells. As the authors describe, +the segmentation by watershed transform has two +flaws:Over-segmentationduetolocalminimaandthick +watersheds due to plateaus. 7 -C. Random Decision Forests 1) If data is linearly separable, it can be separated -by a hyperplane. There is one hyperplane which +C. Random Decision Forests Random Decision Forests were first proposed -maximizes the distance to the next datapoints in [Ho95]. This type of classifier applies techniques -(supportvectors).Thishyperplaneshouldbetaken: called ensemble learning, where multiple classifiers are trained and a combination of their hypotheses is -1 -used. One ensemble learning technique is the random minimize (cid:107)w(cid:107)2 -subspaces method where each classifier is trained w,b 2 -s.t. ∀m y ·((cid:104)w,x (cid:105)+b)≥1 -on a random subspace of the feature space. Another i=1 i i -(cid:124) (cid:123)(cid:122) (cid:125) -ensemble learning technique is bagging, which is sgn appliedtothisgivestheclassification +used. One ensemble learning technique is the random +subspaces method where each classifier is trained +on a random subspace of the feature space. Another +ensemble learning technique is bagging, which is training the trees on random subsets of the training set. -2) Eveniftheunderlyingprocesswhichgeneratesthe In the case of Random Decision Forests, the classifiers -features for the two classes is linearly separable, are decision trees. A decision tree is a tree where each -noise can make the data not separable. The intro- innernodeusesoneormorefeaturestodecideinwhich -duction ofslackvariables to relaxthe requirement branch to descend. Each leaf is a class. -of linear separability solves this problem. The One strength of Random Decision Forests compared -trade-off between accepting some errors and a tomanyotherclassifierslikeSVMsandneuralnetworks -more complex model is weighted by a parameter is that the scale of measure of the features (nominal, -C ∈ R+. The bigger C, the more errors are -ordinal, interval, ratio) can be arbitrary. Another advan- 0 -accepted. The new optimization problem is: +ordinal, interval, ratio) can be arbitrary. Another advan- tage of Random Decision Forests compared to SVMs, -for example, is the speed of training and classification. 1 (cid:88)m -Decision trees were extensively studied in the past min wimize 2(cid:107)w(cid:107)2+C· ξ i -20 years and a multitude of training algorithms have i=1 -been proposed (e.g. ID3 in [Qui86], C4.5 in [Qui93]). s.t. ∀m i=1y i·((cid:104)w,x i(cid:105)+b)≥1−ξ i +for example, is the speed of training and classification. +Decision trees were extensively studied in the past +20 years and a multitude of training algorithms have +been proposed (e.g. ID3 in [Qui86], C4.5 in [Qui93]). Possible training hyperparameters are the measure to -Note that 0 ≤ ξ ≤ 1 means that the data point -i evaluatethe“goodnessofsplit”[Min89],thenumberof -is within the margin, whereas ξ ≥1 means it is -i decision trees being used, and if the depth of the trees -misclassified. An SVM with C >0 is also called is restricted. Typically in the context of classification, -a soft-margin SVM. decision trees are trained by adding new nodes until -3) The primal problem is to find the normal vector eachleafcontainsonlynodesofasingleclassoruntilit -w and the bias b. The dual problem is to express is not possible to split further. This is called a stopping -w as a linear combination of the training data x : -i criterion. -There are two typical training modes: Central axis (cid:88)m -w= α y x -projection and perceptron training. In training, for i i i -each node a hyperplane is searched which is optimal i=1 -according to an error function. where y ∈ {−1,1} represents the class of the +There are two typical training modes: Central axis +projection and perceptron training. In training, for +each node a hyperplane is searched which is optimal +according to an error function. +Random Decision Forests with texton features (see +Section V-A6) are applied in [SJC08] for segmentation. +In the [MSC] dataset, they report a per-pixel accuracy +rate of 66.9% for their best system. This system +requires415msforthesegmentationof320px×213px +images on a single 2.7GHz core. On the Pascal +VOC 2007 dataset, they report an average per-pixel +accuracy for their best segmentation system of 42%. +An excellent introduction to Random Decision +Forestsforsemanticsegmentationisgivenby[SCZ08]. +D. SVMs +SVMs are well-studied binary classifiers which can +be described by five central ideas. For those ideas, the +training data is represented as (x i,y i) where x i is the +feature vector and y i ∈ {−1,1} the binary label for +training example i∈{1,...,m}. +1) If data is linearly separable, it can be separated +by a hyperplane. There is one hyperplane which +maximizes the distance to the next datapoints +(supportvectors).Thishyperplaneshouldbetaken: +minimize +w,b +1 +2(cid:107)w(cid:107)2 +s.t. ∀m i=1y i·((cid:104)w,x i(cid:105)+b) +(cid:124) (cid:123)(cid:122) (cid:125) +sgn appliedtothisgivestheclassification +≥1 +2) Eveniftheunderlyingprocesswhichgeneratesthe +features for the two classes is linearly separable, +noise can make the data not separable. The intro- +duction ofslackvariables to relaxthe requirement +of linear separability solves this problem. The +trade-off between accepting some errors and a +more complex model is weighted by a parameter +C ∈ R+ 0. The bigger C, the more errors are +accepted. The new optimization problem is: +minimize +w +1 +2(cid:107)w(cid:107)2+C· +m (cid:88) +i=1ξ i +s.t. ∀m i=1y i·((cid:104)w,x i(cid:105)+b)≥1−ξ i +Note that 0 ≤ ξ +i +≤ 1 means that the data point +is within the margin, whereas ξ +i +≥1 means it is +misclassified. An SVM with C >0 is also called +a soft-margin SVM. +3) The primal problem is to find the normal vector +w and the bias b. The dual problem is to express +w as a linear combination of the training data x i: +w= +m (cid:88) +i=1α iy ix +i +where y +i +∈ {−1,1} represents the class of the +training example and α i -Random Decision Forests with texton features (see training example and α are Lagrange multipliers. +are Lagrange multipliers. +The usage of Lagrange multipliers is explained +with some examples in [Smi04]. The usage of the +Lagrange multipliers α i -Section V-A6) are applied in [SJC08] for segmentation. The usage of Lagrange multipliers is explained -In the [MSC] dataset, they report a per-pixel accuracy with some examples in [Smi04]. The usage of the -rate of 66.9% for their best system. This system Lagrange multipliers α changes the optimization +changes the optimization +problem depend on the α i -requires415msforthesegmentationof320px×213px problem depend on the α which are weights for +which are weights for +the feature vectors. It turns out that most α i -images on a single 2.7GHz core. On the Pascal the feature vectors. It turns out that most α will +will +be zero. The non-zero weighted vectors are called +support vectors. +The optimization problem is now, according +to [Bur98]: +maximize +αi +m +(cid:88) +i=1α i− +1 +2 +m +(cid:88) +i=1 +m +(cid:88) +j=1α iα jy iy j(cid:104)x i,x j(cid:105) +s.t. ∀m i=10≤α i -VOC 2007 dataset, they report an average per-pixel be zero. The non-zero weighted vectors are called -accuracy for their best segmentation system of 42%. support vectors. -An excellent introduction to Random Decision The optimization problem is now, according -Forestsforsemanticsegmentationisgivenby[SCZ08]. to [Bur98]: -m m m -(cid:88) 1(cid:88)(cid:88) -D. SVMs maximize α − α α y y (cid:104)x ,x (cid:105) -αi i 2 i j i j i j -SVMs are well-studied binary classifiers which can i=1 i=1j=1 -be described by five central ideas. For those ideas, the s.t. ∀m 0≤α ≤C -i=1 i -training data is represented as (x i,y i) where x i is the (cid:88)m -feature vector and y i ∈ {−1,1} the binary label for s.t. α iy i =0 -training example i∈{1,...,m}. i=1 +≤C +s.t. +m (cid:88) +i=1α iy i =0 8 -4) Not every dataset is linearly separable. This prob- yy 77 yy 88 yy 99 +4) Not every dataset is linearly separable. This prob- lem is approached by transforming the feature -xx xx xx -77 88 99 -vectors x with a non-linear mapping Φ into yy 44 yy 55 yy 66 +vectors x with a non-linear mapping Φ into a higher dimensional (probably ∞-dimensional) -xx xx xx -44 55 66 -space. As the feature vectors x are only used yy 11 yy 22 yy 33 -within scalar product (cid:104)x ,x (cid:105), it is not necessary -i j xx xx xx -11 22 33 +space. As the feature vectors x are only used +within scalar product (cid:104)x i,x j(cid:105), it is not necessary to do the transformation. It is enough to do the -calculation Figure 3: CRF with 4-neighborhood. Each node x i -representsapixelandeachnodey represents -i -K(x ,x )=(cid:104)x ,x (cid:105) -i j i j a label. +calculation +K(x i,x j)=(cid:104)x i,x j(cid:105) This function K is called a kernel. The idea of -never explicitly transforming the vectors x to the +never explicitly transforming the vectors x i -getslabeledasshowninFigure3.Forexample,aMRF +to the higher dimensional space is called the kernel trick. -whichistrainedonimagesofthesize224px×224pixel Common kernels include the polynomial kernel -and gets the raw RGB values as features has -K (x ,x )=((cid:104)x ,x (cid:105)+r)p -P i j i j -224·224·3+224·224=200704 -of degree p and coefficient r, the Gaussian radial (cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125) -input output +K P(x i,x j)=((cid:104)x i,x j(cid:105)+r)p +of degree p and coefficient r, the Gaussian radial basis function (RBF) kernel +K Gauss(x i,x +j)=e−γ(cid:107)xi−xj(cid:107)2 +2σ2 +and the sigmoid kernel +K tanh(x i,x j)=tanh(γ(cid:104)x i,x j(cid:105)−r) +where the parameter γ determines how much +influence single training examples have. +5) ThedescribedSVMscanonlydistinguishbetween +two classes. Common strategies to expand those +binary classifiers to multi-class classification is +the one-vs-all and the one-vs-one strategy. In the +one-vs-all strategy n classifiers have to be trained +which can distinguish one of the n classes against +all other classes. In the one-vs-one strategy +n2−n +2 +classifiers are trained; one classifier for each pair +of classes. +A detailed description of SVMs can be found +in [Bur98]. +SVMs are used by [YHRF12] on the 2009 and 2010 +PASCAL segmentation challenge [EVGW+10]. They +did not hand their classifier in to the challenge itself, +but calculated an average rank of 7 among the different +categories. +[FGMR10] also used an SVM based method with +HOG features and achieved the 7th rank in the 2010 +PASCAL segmentation challenge by mean accuracy. It +needs about 2s on a 2.8GHz 8-core Intel processor. +E. Markov Random Fields +MRFs are undirected probabilistic graphical models +which are wide-spread model in computer vision. The +overall idea of MRFs is to assign a random variable for +eachfeatureandarandomvariableforeachpixelwhich +x +1 +x +2 +x +3 +x +4 +x +5 +x +6 +x +7 +x +8 +x +9 +y 1 y 2 y 3 +y 4 y 5 y 6 +y 7 y 8 y 9 +x +1 +x +2 +x +3 +x +4 +x +5 +x +6 +x +7 +x +8 +x +9 +y 1 y 2 y 3 +y 4 y 5 y 6 +y 7 y 8 y 9 +Figure 3: CRF with 4-neighborhood. Each node x i +representsapixelandeachnodey +i +represents +a label. +getslabeledasshowninFigure3.Forexample,aMRF +whichistrainedonimagesofthesize224px×224pixel +and gets the raw RGB values as features has +224·224·3 +(cid:124) (cid:123)(cid:122) (cid:125) +input ++224·224 +(cid:124) (cid:123)(cid:122) (cid:125) +output +=200704 random variables. Those random variables are condi- -K Gauss(x i,x j)=e−γ(cid:107)x 2i σ− 2xj(cid:107)2 tionally independent, given their local neighborhood. +tionally independent, given their local neighborhood. These (in)dependencies can be expressed with a graph. -and the sigmoid kernel Let G=(V,E) be the associated undirected graph +Let G=(V,E) be the associated undirected graph of an MRF and C be the set of all maximal cliques in -K (x ,x )=tanh(γ(cid:104)x ,x (cid:105)−r) -tanh i j i j that graph. Nodes represent random variables x,y and -where the parameter γ determines how much edges represent conditional dependencies. Just like in -influence single training examples have. he 4-neighborhood [SWRC06] and the 8-neighborhood -5) ThedescribedSVMscanonlydistinguishbetween are reasonable choices for constructing the graph. -two classes. Common strategies to expand those Typically,randomvariablesyrepresenttheclassofa -binary classifiers to multi-class classification is singlepixel,randomvariablesxrepresentapixelvalues -the one-vs-all and the one-vs-one strategy. In the and edges represent pixel neighborhood in computer -one-vs-all strategy n classifiers have to be trained vision problems segmentation problems where MRFs -which can distinguish one of the n classes against are used. Accordingly, the random variables y live -n2−n -all other classes. In the one-vs-one strategy on 1,...,nr of classes and the random variables x +edges represent conditional dependencies. Just like in +he 4-neighborhood [SWRC06] and the 8-neighborhood +are reasonable choices for constructing the graph. +Typically,randomvariablesyrepresenttheclassofa +singlepixel,randomvariablesxrepresentapixelvalues +and edges represent pixel neighborhood in computer +vision problems segmentation problems where MRFs +are used. Accordingly, the random variables y live +on 1,...,nr of classes and the random variables x +typically live on 0,...,255 or [0,1]. +The probability of x,y can be expressed as +P(x,y)= +1 +Ze−E(x,y) +where Z = (cid:80) x,ye−E(x,y) is a normalization term +called the partition function and E is called the energy +function. A common choice for the energy function is +E(x,y)=(cid:88) +c∈Cψ c(x,y) +where ψ is called a clique potential. One choice for +cliques of size two x,y=(x 1,x 2) is [KP06] +ψ c(x 1,x 2)=wδ(x 1,x +2)=(cid:40) ++w if x +1 +(cid:54)=x 2 -classifiers are trained; one classifier for each pair typically live on 0,...,255 or [0,1]. -of classes. The probability of x,y can be expressed as -A detailed description of SVMs can be found +−w if x 1 -in [Bur98]. P(x,y)= e−E(x,y) -Z -SVMs are used by [YHRF12] on the 2009 and 2010 -PASCAL segmentation challenge [EVGW+10]. They where Z = (cid:80) e−E(x,y) is a normalization term -x,y -did not hand their classifier in to the challenge itself, called the partition function and E is called the energy -but calculated an average rank of 7 among the different function. A common choice for the energy function is -categories. (cid:88) -E(x,y)= ψ (x,y) -[FGMR10] also used an SVM based method with c -HOG features and achieved the 7th rank in the 2010 c∈C -PASCAL segmentation challenge by mean accuracy. It where ψ is called a clique potential. One choice for -needs about 2s on a 2.8GHz 8-core Intel processor. cliques of size two x,y=(x ,x ) is [KP06] -1 2 -(cid:40) -+w if x (cid:54)=x -E. Markov Random Fields ψ (x ,x )=wδ(x ,x )= 1 2 -c 1 2 1 2 -−w if x =x -1 2 -MRFs are undirected probabilistic graphical models -which are wide-spread model in computer vision. The According to [Mur12], the most common way of -overall idea of MRFs is to assign a random variable for inference over the posterior MRF in computer vision -eachfeatureandarandomvariableforeachpixelwhich problems is Maximum A Posteriori (MAP) estimation. +=x +2 +According to [Mur12], the most common way of +inference over the posterior MRF in computer vision +problems is Maximum A Posteriori (MAP) estimation. 9 -Detailed introductions to MRFs are given by VI. NEURALNETWORKSFORSEMANTIC -[BKR11], [Mur12]. MRFs are used by [ZBS01] and SEGMENTATION -[MSB12] for image segmentation. Artificial neural networks are classifiers which are +Detailed introductions to MRFs are given by +[BKR11], [Mur12]. MRFs are used by [ZBS01] and +[MSB12] for image segmentation. +F. Conditional Random Fields +CRFs are MRFs where all clique potentials are +conditioned on input features [Mur12]. This means, +instead of learning the distribution P(y,x), the task +is reformulated to learn the distribution P(y|x). One +consequence of this reformulation is that CRFs need +much less parameters as the distribution of x does +not have to be estimated. Another advantage of CRFs +compared to MRFs is that no distribution assumption +about x has to be made. +A CRF has the partition function Z: +Z(x)=(cid:88) +y +P(x,y) +and joint probability distribution +P(y|x)= +1 +Z(x) +(cid:89) +c∈Cψ c(y c|x) +The simplest way to define the clique potentials ψ is +the count of the class y c given x added with a positive +smoothing constant to prevent the complete term from +getting zero. +CRFs as described in [LRKT09] have reached top +performance in PASCAL VOC 2010 [VOC10] and +are also used in [HZCP04], [SWRC06] for semantic +segmentation. +A method similar to CRFs was proposed +in [GBVdW+10]. The system of Gonfaus et.al. +ranked 1st by mean accuracy in the segmentation task +of the PASCAL VOC 2010 challenge [EVGW+10]. +An introduction to CRFs is given by [SM11]. +G. Post-processing methods +Post-processing refine a found segmentation and +remove obvious errors. For example, the morphological +operations opening and closing can remove noise. The +opening operation is a dilation followed by a erosion. +This removes tiny segments. The closing operation is a +erosion followed by a dilation. This removes tiny gaps +in otherwise filled regions. They were used in [CLP98] +for biomedical image segmentation. +Anotherwayofrefinementofthefoundsegmentation +is by adjusting the segmentation to match close edges. +This was used in [BBMM11] with an ultra-metric +contour map [AMFM09]. +Active contour models are another example of a +post-processing method [KWT88]. +VI. NEURALNETWORKSFORSEMANTIC +SEGMENTATION +Artificial neural networks are classifiers which are inspired by biologic neurons. Every single artificial -F. Conditional Random Fields neuron has some inputs which are weighted and sumed +neuron has some inputs which are weighted and sumed up. Then, the neuron applies a so called activation -CRFs are MRFs where all clique potentials are functiontotheweightedsumandgivesanoutput.Those -conditioned on input features [Mur12]. This means, neurons can take either a feature vector as input or the -instead of learning the distribution P(y,x), the task output of other neurons. In this way, they build up -is reformulated to learn the distribution P(y|x). One feature hierarchies. -consequence of this reformulation is that CRFs need The parameters they learn are the weights w ∈ R. -much less parameters as the distribution of x does Theyarelearnedbygradientdescent.Todoso,anerror -not have to be estimated. Another advantage of CRFs function—usuallycross-entropyormeansquarederror -compared to MRFs is that no distribution assumption — is necessary. For the gradient descent algorithm, one -about x has to be made. sees the labeled training data as given, the weights -A CRF has the partition function Z: as variables and the error function as a surface in -(cid:88) -Z(x)= P(x,y) this weight-space. Minimizing the error function in the -y weight space adapts the neural network to the problem. +this weight-space. Minimizing the error function in the +weight space adapts the neural network to the problem. There are lots of ideas around neural networks like -and joint probability distribution regularization, better optimization algorithms, automat- -1 (cid:89) ically building up architectures, design choices for -P(y|x)= ψ (y |x) -Z(x) c c activationfunctions.Thisisnotexplainedindetailhere, -c∈C +ically building up architectures, design choices for +activationfunctions.Thisisnotexplainedindetailhere, but some of the mayor breakthroughs are outlined. -The simplest way to define the clique potentials ψ is CNNs are neural networks which learn image filters. -the count of the class y c given x added with a positive Theydrasticallyreducethenumberofparameterswhich -smoothing constant to prevent the complete term from have to be learned while being still general enough for -getting zero. theproblemdomainofimages.ThiswasshownbyAlex -CRFs as described in [LRKT09] have reached top Krizhevsky et al. in [KSH12]. One major idea was a -performance in PASCAL VOC 2010 [VOC10] and clever regularization called dropout training, which set -are also used in [HZCP04], [SWRC06] for semantic the output of neurons while training randomly to zero. -segmentation. Another contribution was the usage of an activation -A method similar to CRFs was proposed function called rectified linear unit: -in [GBVdW+10]. The system of Gonfaus et.al. -ranked 1st by mean accuracy in the segmentation task ϕ ReLU(x)=max(0,x) -of the PASCAL VOC 2010 challenge [EVGW+10]. Those are much faster to train than the commonly used -An introduction to CRFs is given by [SM11]. sigmoid activation functions +CNNs are neural networks which learn image filters. +Theydrasticallyreducethenumberofparameterswhich +have to be learned while being still general enough for +theproblemdomainofimages.ThiswasshownbyAlex +Krizhevsky et al. in [KSH12]. One major idea was a +clever regularization called dropout training, which set +the output of neurons while training randomly to zero. +Another contribution was the usage of an activation +function called rectified linear unit: +ϕ ReLU(x)=max(0,x) +Those are much faster to train than the commonly used +sigmoid activation functions +ϕ Sigmoid(x)= 1 -ϕ (x)= -G. Post-processing methods Sigmoid e−x+1 -Post-processing refine a found segmentation and Krizhevsky et al. implemented those ideas and partici- -remove obvious errors. For example, the morphological pated in the ImageNet Large-Scale Visual Recognition -operations opening and closing can remove noise. The Challenge (ILSVRC). The best other system, which -opening operation is a dilation followed by a erosion. used SIFT features and Fisher Vectors, had a perfor- -This removes tiny segments. The closing operation is a mance of about 25.7% while the network by Alex -erosion followed by a dilation. This removes tiny gaps Krizhevsky et al. got 17.0% error rate on the ILSVRC- -in otherwise filled regions. They were used in [CLP98] 2010 dataset. As a preprocessing step, they downsam- -for biomedical image segmentation. pledallimagestoafixedsizeof256px×256pxbefore -Anotherwayofrefinementofthefoundsegmentation they fed the features into their network. This network -is by adjusting the segmentation to match close edges. is commonly known as AlexNet. -This was used in [BBMM11] with an ultra-metric Since AlexNet was developed, a lot of different -contour map [AMFM09]. neural networks have been proposed. One interesting -Active contour models are another example of a exampleis[PC13],wherearecurrentCNNforsemantic -post-processing method [KWT88]. segmentation is presented. +e−x+1 +Krizhevsky et al. implemented those ideas and partici- +pated in the ImageNet Large-Scale Visual Recognition +Challenge (ILSVRC). The best other system, which +used SIFT features and Fisher Vectors, had a perfor- +mance of about 25.7% while the network by Alex +Krizhevsky et al. got 17.0% error rate on the ILSVRC- +2010 dataset. As a preprocessing step, they downsam- +pledallimagestoafixedsizeof256px×256pxbefore +they fed the features into their network. This network +is commonly known as AlexNet. +Since AlexNet was developed, a lot of different +neural networks have been proposed. One interesting +exampleis[PC13],wherearecurrentCNNforsemantic +segmentation is presented. 10 Another notable paper is [LSD14]. The algorithm presentedtheremakesuseofaclassifyingnetworksuch @@ -632,93 +1046,129 @@ image filter. This way, each pixel gets a probability distribution for each of the trained classes. By taking the most likely class, a semantic segmentation can be done with arbitrary image sizes. -A very recent publication by Dai et al. [DHS15] (a) LensFlare (b) Vignetting -showed that segmentation with much deeper networks Imageby[Hus07] Imageby[Man12] +A very recent publication by Dai et al. [DHS15] +showed that segmentation with much deeper networks is possible and achieves better results. More detailed explanations to neural networks for visual recognition is given by [LKJ15]. VII. POSSIBLEPROBLEMSINTHEDATAFOR SEGMENTATIONALGORITHMS Different segmentation workflows have different -problems. However, there are a couple of special cases (c) Smokebycauterization (d) Camouflage -Imageby[GVSY13] Imageby[Kaf07] +problems. However, there are a couple of special cases which should be tested. Those cases might not occur often in the training data, but it could still happen in the productive system. I am not aware of any systematic work which exam- ined the influence of problems such as the following. A. Lens Flare -Lens flare is the effect of light getting scattered in (e) Transparency (f) Viewpoint +Lens flare is the effect of light getting scattered in the lens system of the camera. The testing data set of -the KITTI road evaluation benchmark [FKG13] has a Figure 4: Examples of images which might cause -couple of photos with this problem. Figure 4(a) shows semantic segmentation systems to fail. +the KITTI road evaluation benchmark [FKG13] has a +couple of photos with this problem. Figure 4(a) shows an extreme example of lens flare. -B. Vignetting 2) Camouflage: Some objects, like animals in the -wild,activelytrytohide(seeFigure4(d)asanexample). +B. Vignetting Vignettingistheeffectofaphotographgettingdarker -In other cases it might just be bad luck that objects inthecorners.Thiscanhavemanyreasons,forexample -are hard for humans to detect. This problem has two filters on the camera blocking light at the corners. +C. Blurred images +Images can be blurred for a couple of reasons. A +problem with the lenses mechanics, focusing on the +wrongpoint,tooquickmovement,smokeorfoam.One +example of a blurred image is Figure 4(c), which was +takenduringaninvivoporcineprocedureofdiaphragm +dissection. The smoke was caused by cauterization. +D. Other Problems +If the following effects can occur at all and if they +are problems depends heavily on the problem domain +and the used model. +1) Partial Occlusions: Segmentation systems which +employ a model of the objects which should be +segmented might suffer from partial occlusions. +(a) LensFlare +Imageby[Hus07] +(b) Vignetting +Imageby[Man12] +(c) Smokebycauterization +Imageby[GVSY13] +(d) Camouflage +Imageby[Kaf07] +(e) Transparency (f) Viewpoint +Figure 4: Examples of images which might cause +semantic segmentation systems to fail. +2) Camouflage: Some objects, like animals in the +wild,activelytrytohide(seeFigure4(d)asanexample). +In other cases it might just be bad luck that objects +are hard for humans to detect. This problem has two interesting aspects: On the one hand, the segmenting systemmightsufferfromthesameproblemsashumans -C. Blurred images do. On the other hand, the segmenting system might be -Images can be blurred for a couple of reasons. A better than humans are, but it is forced to learn from -problem with the lenses mechanics, focusing on the images labeled by humans. If the labels are wrong, the -wrongpoint,tooquickmovement,smokeorfoam.One system is forced to learn something wrong. -example of a blurred image is Figure 4(c), which was +better than humans are, but it is forced to learn from +images labeled by humans. If the labels are wrong, the +system is forced to learn something wrong. 3) Semi-transparent Occlusion: Some objects like -takenduringaninvivoporcineprocedureofdiaphragm drinkingglassescanbevisibleandstillleavetheobject -dissection. The smoke was caused by cauterization. behind them visible as shown in Figure 4(e). This is mainly a definition problem: Is the seen pixel the glass -D. Other Problems label or the smartphone label? -If the following effects can occur at all and if they 4) Viewpoints: Changes in viewpoints can be a -are problems depends heavily on the problem domain problem, if they don’t occur in the training data. For -and the used model. example,animagecaptioningsystemwhichwastrained -1) Partial Occlusions: Segmentation systems which on photographs of professional photographers might -employ a model of the objects which should be not have photos from the point of view of a child. This -segmented might suffer from partial occlusions. is visualized in Figure 4(f). +label or the smartphone label? +4) Viewpoints: Changes in viewpoints can be a +problem, if they don’t occur in the training data. For +example,animagecaptioningsystemwhichwastrained +on photographs of professional photographers might +not have photos from the point of view of a child. This +is visualized in Figure 4(f). 11 -VIII. DISCUSSION REFERENCES -Ohta et al. wrote [OKS78] 38 years ago. It is one [AM98] M. S. Atkins and B. T. Mackiewich, “Fully -of the first papers mentioning semantic segmentation. automatic segmentation of the brain in -mri,” Medical Imaging, IEEE Transactions +VIII. DISCUSSION +Ohta et al. wrote [OKS78] 38 years ago. It is one +of the first papers mentioning semantic segmentation. In this time, a lot of work was done and many +different directions have been explored. Different kinds +of semantic segmentation have emerged. +This paper presents a taxonomy of those kinds +of semantic segmentation and a brief overview of +completely automatic, passive, semantic segmentation +algorithms. +Future work includes a comparative study of +those algorithms on publicly available dataset such +as the ones presented in Table I. Another open +question is the influence of the problems described +inSectionVII.Thiscouldbedoneusingasubsetofthe +thousands of images of Wikipedia Commons, such as +https://commons.wikimedia.org/wiki/Category:Blurring +for blurred images. +A combination of different classifiers in an ensemble +would be an interesting option to explore in order to +improve accuracy. Another direction which is currently +studiediscombiningclassifierssuchasneuralnetworks +with CRFs [ZJRP+15]. +REFERENCES +[AM98] M. S. Atkins and B. T. Mackiewich, “Fully +automatic segmentation of the brain in +mri,” Medical Imaging, IEEE Transactions on, vol. 17, no. 1, pp. 98–107, Feb. 1998. -different directions have been explored. Different kinds [Online].Available:http://ieeexplore.ieee.org/xpls/ -of semantic segmentation have emerged. abs_all.jsp?arnumber=668699 +[Online].Available:http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=668699 [AMFM09] P. Arbelaez, M. Maire, C. Fowlkes, and -This paper presents a taxonomy of those kinds J. Malik, “From contours to regions: An -of semantic segmentation and a brief overview of empirical evaluation,” in Computer Vision and -completely automatic, passive, semantic segmentation Pattern Recognition, 2009. CVPR 2009. IEEE +empirical evaluation,” in Computer Vision and +Pattern Recognition, 2009. CVPR 2009. IEEE Conferenceon. IEEE,Jun.2009,pp.2294–2301. -algorithms. [Online].Available:http://ieeexplore.ieee.org/xpls/ -Future work includes a comparative study of abs_all.jsp?arnumber=5206707 -those algorithms on publicly available dataset such [AP11] G. Azzopardi and N. Petkov, “Detection of +abs_all.jsp?arnumber=5206707 +[AP11] G. Azzopardi and N. Petkov, “Detection of retinal vascular bifurcations by trainable v4-like -as the ones presented in Table I. Another open filters,” in Computer Analysis of Images and -question is the influence of the problems described Patterns. Springer,2011,pp.451–459.[Online]. -inSectionVII.Thiscouldbedoneusingasubsetofthe Available:http://www.cs.rug.nl/~imaging/databases/ +Patterns. Springer,2011,pp.451–459.[Online]. +Available:http://www.cs.rug.nl/~imaging/databases/ retina_database/retinalfeatures_database.html -thousands of images of Wikipedia Commons, such as [BBMM11] T. Brox, L. Bourdev, S. Maji, and J. Malik, -https://commons.wikimedia.org/wiki/Category:Blurring “Object segmentation by alignment of poselet -for blurred images. activationstoimagecontours,”inComputerVision +activationstoimagecontours,”inComputerVision and Pattern Recognition (CVPR), 2011 IEEE -A combination of different classifiers in an ensemble Conferenceon. IEEE,Jun.2011,pp.2225–2232. -would be an interesting option to explore in order to [Online].Available:http://ieeexplore.ieee.org/xpls/ -improve accuracy. Another direction which is currently abs_all.jsp?arnumber=5995659 -studiediscombiningclassifierssuchasneuralnetworks [BJ00] Y. Boykov and M.-P. Jolly, “Interactive organ +[Online].Available:http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=5995659 +[BJ00] Y. Boykov and M.-P. Jolly, “Interactive organ segmentationusinggraphcuts,”inMedicalImage -with CRFs [ZJRP+15]. Computing and Computer-Assisted Intervention– MICCAI 2000. Springer, 2000, pp. 276– 286.[Online].Available:http://link.springer.com/ @@ -760,279 +1210,453 @@ knowledge-based morphological operations with biomedicalapplications,”ImageProcessing,IEEE Transactionson,vol.7,no.12,pp.1673–1683,Dec. 12 -1998.[Online].Available:http://ieeexplore.ieee.org/ vol. 1, June 2005, pp. 886–893 vol. 1. -xpls/abs_all.jsp?arnumber=730379 [Online].Available:http://ieeexplore.ieee.org/xpls/ -[CM02] D. Comaniciu and P. Meer, “Mean shift: A abs_all.jsp?arnumber=1467360 -robust approach toward feature space analysis,” [EVGW+a] M. Everingham, L. Van Gool, C. K. I. -Pattern Analysis and Machine Intelligence, IEEE Williams, J. Winn, and A. Zisserman, “The -Transactionson,vol.24,no.5,pp.603–619,2002. PASCAL Visual Object Classes Challenge -[Online]. Available: http://ieeexplore.ieee.org/xpl/ 2007 (VOC2007) Results,” http://www.pascal- -login.jsp?tp=&arnumber=1000236 network.org/challenges/VOC/voc2007/workshop/index.html. -[COWR11] C. Chen, J. Ozolek, W. Wang, and G. K. Rohde, [Online]. Available: http://host.robots.ox.ac.uk: -“A pixel classification system for segmenting 8080/pascal/VOC/voc2007/index.html -biomedicalimagesusingintensityneighborhoods [EVGW+b] ——,“ThePASCALVisualObjectClassesChal- -anddimensionreduction,”inBiomedicalImaging: lenge2012(VOC2012)Results,”http://www.pascal- -From Nano to Macro, 2011 IEEE International network.org/challenges/VOC/voc2012/workshop/index.html. -Symposium on. IEEE, 2011, pp. 1649–1652. [Online]. Available: http://host.robots.ox.ac.uk: -[Online].Available:https://www.andrew.cmu.edu/ 8080/pascal/VOC/voc2012/index.html -user/gustavor/chen_isbi_11.pdf [EVGW+10] M. Everingham, L. Van Gool, C. K. Williams, -[CP08] G. Csurka and F. Perronnin, “A simple high J.Winn,andA.Zisserman,“Thepascalvisualobject -performanceapproachtosemanticsegmentation.” classes (voc) challenge,” International journal of -in BMVC, 2008, pp. 1–10. [Online]. Avail- computervision,vol.88,no.2,pp.303–338,2010. -able: http://www.xrce.xerox.com/layout/set/print/ [EVGW+12] M. Everingham, L. Van Gool, C. K. I. Williams, +1998.[Online].Available:http://ieeexplore.ieee.org/ +xpls/abs_all.jsp?arnumber=730379 +[CM02] D. Comaniciu and P. Meer, “Mean shift: A +robust approach toward feature space analysis,” +Pattern Analysis and Machine Intelligence, IEEE +Transactionson,vol.24,no.5,pp.603–619,2002. +[Online]. Available: http://ieeexplore.ieee.org/xpl/ +login.jsp?tp=&arnumber=1000236 +[COWR11] C. Chen, J. Ozolek, W. Wang, and G. K. Rohde, +“A pixel classification system for segmenting +biomedicalimagesusingintensityneighborhoods +anddimensionreduction,”inBiomedicalImaging: +From Nano to Macro, 2011 IEEE International +Symposium on. IEEE, 2011, pp. 1649–1652. +[Online].Available:https://www.andrew.cmu.edu/ +user/gustavor/chen_isbi_11.pdf +[CP08] G. Csurka and F. Perronnin, “A simple high +performanceapproachtosemanticsegmentation.” +in BMVC, 2008, pp. 1–10. [Online]. Avail- +able: http://www.xrce.xerox.com/layout/set/print/ content/download/16654/118653/file/2008-023.pdf -J. Winn, and A. Zisserman, “Visual object -[CRSS] A. Cohen, E. Rivlin, I. Shimshoni, and classeschallenge2012(voc2012),”2012.[Online]. -E.Sabo,“Coloncryptsegmentationwebsite.”[On- Available:http://host.robots.ox.ac.uk:8080/pascal/ -line].Available:http://mis.haifa.ac.il/~ishimshoni/ VOC/voc2012/index.html +[CRSS] A. Cohen, E. Rivlin, I. Shimshoni, and +E.Sabo,“Coloncryptsegmentationwebsite.”[On- +line].Available:http://mis.haifa.ac.il/~ishimshoni/ SegmentCrypt/Download.htm -[Fel] P. F. Felzenszwalb, “Graph based im- -[CRSS14] ——, “Memory based active contour algorithm age segmentation.” [Online]. Available: http: -usingpixel-levelclassifiedimagesforcoloncrypt //cs.brown.edu/~pff/segment/ +[CRSS14] ——, “Memory based active contour algorithm +usingpixel-levelclassifiedimagesforcoloncrypt segmentation,” Computerized Medical Imaging -[FGMR10] P.F.Felzenszwalb,R.B.Girshick,D.McAllester, and Graphics, Nov. 2014. [Online]. Available: -andD.Ramanan,“Objectdetectionwithdiscrimina- http://mis.haifa.ac.il/~ishimshoni/SegmentCrypt/ -tivelytrainedpart-basedmodels,”PatternAnalysis Active%20contour%20based%20on%20pixel- -and Machine Intelligence, IEEE Transactions on, level%20classified%20image%20for%20colon% -vol.32,no.9,pp.1627–1645,2010. 20crypts%20segmentation.pdf -[FH04] P. F. Felzenszwalb and D. P. Huttenlocher, [CS10] J. Carreira and C. Sminchisescu, “Constrained -“Efficient graph-based image segmentation,” parametricmin-cutsforautomaticobjectsegmenta- -International Journal of Computer Vision, tion,”inComputerVisionandPatternRecognition -vol. 59, no. 2, pp. 167–181, 2004. [Online]. (CVPR),2010IEEEConferenceon. IEEE,2010, -Available:http://link.springer.com/article/10.1023/ pp.3241–3248. -B:VISI.0000022288.19776.77 [CS11] ——,“Cpmc:Constrainedparametricmin-cutsfor -[FKG13] J. Fritsch, T. Kuehnl, and A. Geiger, “A automaticobjectsegmentation,”Feb.2011.[Online]. -new performance measure and evaluation Available: http://www.maths.lth.se/matematiklth/ -benchmark for road detection algorithms,” in personal/sminchis/code/cpmc/ -InternationalConferenceonIntelligentTransporta- [CSI+09] M. E. Celebi, G. Schaefer, H. Iyatomi, W. V. -tion Systems (ITSC), 2013. [Online]. Available: Stoecker,J.M.Malters,andJ.M.Grichnik,“An -http://www.cvlibs.net/datasets/kitti/eval_road.php improvedobjectiveevaluationmeasureforborder -[GBVdW+10] J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D. detection in dermoscopy images,” Skin Research -Bagdanov,J.Serrat,andJ.Gonzalez,“Harmonypo- andTechnology,vol.15,no.4,pp.444–450,2009. -tentialsforjointclassificationandsegmentation,”in [Online].Available:http://arxiv.org/abs/1009.1020 -ComputerVisionandPatternRecognition(CVPR), [CSM09] L.P.Coelho,A.Shariff,andR.F.Murphy,“Nuclear -2010IEEEConferenceon. IEEE,2010,pp.3280– segmentation in microscope cell images: a hand- -3287. segmenteddatasetandcomparisonofalgorithms,” -in Biomedical Imaging: From Nano to Macro, [GRC+08] S. Gould, J. Rodgers, D. Cohen, G. Elidan, and -2009.ISBI’09.IEEEInternationalSymposiumon. D.Koller,“Multi-classsegmentationwithrelative -locationprior,”InternationalJournalofComputer +in Biomedical Imaging: From Nano to Macro, +2009.ISBI’09.IEEEInternationalSymposiumon. IEEE, 2009, pp. 518–521. [Online]. Available: -http://murphylab.web.cmu.edu/data Vision,vol.80,no.3,pp.300–316,Apr.2008. -[CXGS12] M. D. Collins, J. Xu, L. Grady, and V. Singh, [GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.- -“Random walks based multi-image segmentation: Z.Yang,“Probabilistictrackingofaffine-invariant -Quasiconvexity results and gpu-based solutions,” anisotropicregions,”PatternAnalysisandMachine -in Computer Vision and Pattern Recognition Intelligence,IEEETransactionson,vol.35,no.1, -(CVPR), 2012 IEEE Conference on. IEEE, pp.130–143,2013. -2012, pp. 1656–1663. [Online]. Available: http: [Har75] J.A.Hartigan,Clusteringalgorithms. JohnWiley -//pages.cs.wisc.edu/~jiaxu/pub/rwcoseg.pdf &Sons,Inc.,1975. -[DHS15] J.Dai,K.He,andJ.Sun,“Instance-awareseman- [HDT02] C. Huang, L. Davis, and J. Townshend, “An -ticsegmentationviamulti-tasknetworkcascades,” assessment of support vector machines for land -arXivpreprintarXiv:1512.04412,2015. coverclassification,”InternationalJournalofremote -[DT05] N. Dalal and B. Triggs, “Histograms of oriented sensing,vol.23,no.4,pp.725–749,2002. -gradients for human detection,” in Computer [HHR01] S.Hu,E.Hoffman,andJ.Reinhardt,“Automatic -Vision and Pattern Recognition, 2005. CVPR lung segmentation for accurate quantitation of -2005. IEEE Computer Society Conference on, volumetricx-rayctimages,”MedicalImaging,IEEE +http://murphylab.web.cmu.edu/data +[CXGS12] M. D. Collins, J. Xu, L. Grady, and V. Singh, +“Random walks based multi-image segmentation: +Quasiconvexity results and gpu-based solutions,” +in Computer Vision and Pattern Recognition +(CVPR), 2012 IEEE Conference on. IEEE, +2012, pp. 1656–1663. [Online]. Available: http: +//pages.cs.wisc.edu/~jiaxu/pub/rwcoseg.pdf +[DHS15] J.Dai,K.He,andJ.Sun,“Instance-awareseman- +ticsegmentationviamulti-tasknetworkcascades,” +arXivpreprintarXiv:1512.04412,2015. +[DT05] N. Dalal and B. Triggs, “Histograms of oriented +gradients for human detection,” in Computer +Vision and Pattern Recognition, 2005. CVPR +2005. IEEE Computer Society Conference on, +vol. 1, June 2005, pp. 886–893 vol. 1. +[Online].Available:http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=1467360 +[EVGW+a] M. Everingham, L. Van Gool, C. K. I. +Williams, J. Winn, and A. Zisserman, “The +PASCAL Visual Object Classes Challenge +2007 (VOC2007) Results,” http://www.pascal- +network.org/challenges/VOC/voc2007/workshop/index.html. +[Online]. Available: http://host.robots.ox.ac.uk: +8080/pascal/VOC/voc2007/index.html +[EVGW+b] ——,“ThePASCALVisualObjectClassesChal- +lenge2012(VOC2012)Results,”http://www.pascal- +network.org/challenges/VOC/voc2012/workshop/index.html. +[Online]. Available: http://host.robots.ox.ac.uk: +8080/pascal/VOC/voc2012/index.html +[EVGW+10] M. Everingham, L. Van Gool, C. K. Williams, +J.Winn,andA.Zisserman,“Thepascalvisualobject +classes (voc) challenge,” International journal of +computervision,vol.88,no.2,pp.303–338,2010. +[EVGW+12] M. Everingham, L. Van Gool, C. K. I. Williams, +J. Winn, and A. Zisserman, “Visual object +classeschallenge2012(voc2012),”2012.[Online]. +Available:http://host.robots.ox.ac.uk:8080/pascal/ +VOC/voc2012/index.html +[Fel] P. F. Felzenszwalb, “Graph based im- +age segmentation.” [Online]. Available: http: +//cs.brown.edu/~pff/segment/ +[FGMR10] P.F.Felzenszwalb,R.B.Girshick,D.McAllester, +andD.Ramanan,“Objectdetectionwithdiscrimina- +tivelytrainedpart-basedmodels,”PatternAnalysis +and Machine Intelligence, IEEE Transactions on, +vol.32,no.9,pp.1627–1645,2010. +[FH04] P. F. Felzenszwalb and D. P. Huttenlocher, +“Efficient graph-based image segmentation,” +International Journal of Computer Vision, +vol. 59, no. 2, pp. 167–181, 2004. [Online]. +Available:http://link.springer.com/article/10.1023/ +B:VISI.0000022288.19776.77 +[FKG13] J. Fritsch, T. Kuehnl, and A. Geiger, “A +new performance measure and evaluation +benchmark for road detection algorithms,” in +InternationalConferenceonIntelligentTransporta- +tion Systems (ITSC), 2013. [Online]. Available: +http://www.cvlibs.net/datasets/kitti/eval_road.php +[GBVdW+10] J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D. +Bagdanov,J.Serrat,andJ.Gonzalez,“Harmonypo- +tentialsforjointclassificationandsegmentation,”in +ComputerVisionandPatternRecognition(CVPR), +2010IEEEConferenceon. IEEE,2010,pp.3280– +3287. +[GRC+08] S. Gould, J. Rodgers, D. Cohen, G. Elidan, and +D.Koller,“Multi-classsegmentationwithrelative +locationprior,”InternationalJournalofComputer +Vision,vol.80,no.3,pp.300–316,Apr.2008. +[GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.- +Z.Yang,“Probabilistictrackingofaffine-invariant +anisotropicregions,”PatternAnalysisandMachine +Intelligence,IEEETransactionson,vol.35,no.1, +pp.130–143,2013. +[Har75] J.A.Hartigan,Clusteringalgorithms. JohnWiley +&Sons,Inc.,1975. +[HDT02] C. Huang, L. Davis, and J. Townshend, “An +assessment of support vector machines for land +coverclassification,”InternationalJournalofremote +sensing,vol.23,no.4,pp.725–749,2002. +[HHR01] S.Hu,E.Hoffman,andJ.Reinhardt,“Automatic +lung segmentation for accurate quantitation of +volumetricx-rayctimages,”MedicalImaging,IEEE 13 -Transactionson,vol.20,no.6,pp.490–498,Jun. invariant keypoints,” International Journal of -2001. ComputerVision,vol.60,no.2,pp.91–110,2004. -[HJBJ+96] A. Hoover, G. Jean-Baptiste, X. Jiang, P. J. [Online]. Available: http://dx.doi.org/10.1023/B% -Flynn, H. Bunke, D. B. Goldgof, K. Bowyer, 3AVISI.0000029664.99615.94 -D. W. Eggert, A. Fitzgibbon, and R. B. [LRAL08] A. Levin, A. Rav-Acha, and D. Lischinski, -Fisher, “An experimental comparison of range “Spectral matting,” Pattern Analysis and -imagesegmentationalgorithms,”PatternAnalysis Machine Intelligence, IEEE Transactions on, -and Machine Intelligence, IEEE Transactions vol. 30, no. 10, pp. 1699–1712, 2008. -on, vol. 18, no. 7, pp. 673–689, Jul. 1996. [Online].Available:http://ieeexplore.ieee.org/xpls/ -[Online].Available:http://ieeexplore.ieee.org/xpls/ abs_all.jsp?arnumber=4547428 -abs_all.jsp?arnumber=506791 [LRKT09] L. Ladický, C. Russell, P. Kohli, and P. Torr, -[Ho95] T. K. Ho, “Random decision forests,” in “Associativehierarchicalcrfsforobjectclassimage -Document Analysis and Recognition, 1995., segmentation,”inComputerVision,2009IEEE12th -ProceedingsoftheThirdInternationalConference International Conference on, 2009, pp. 739–746. -on, vol. 1. IEEE, 1995, pp. 278–282. [Online].Available:http://ieeexplore.ieee.org/xpls/ -[Online]. Available: http://ect.bell-labs.com/who/ abs_all.jsp?arnumber=5459248 -tkh/publications/papers/odt.pdf [LSD14] J. Long, E. Shelhamer, and T. Darrell, “Fully -[Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia convolutionalnetworksforsemanticsegmentation,” -Commons, Nov. 2007. [Online]. Avail- arXiv preprint arXiv:1411.4038, 2014. [Online]. -able: https://commons.wikimedia.org/wiki/File: Available:http://arxiv.org/abs/1411.4038 -CCTV_Lens_flare.jpg [MAFM08] M. Maire, P. Arbelaez, C. Fowlkes, and -[HZCP04] X. He, R. Zemel, and M. Carreira-Perpindn, J. Malik, “Using contours to detect and localize -“Multiscale conditional random fields for image junctions in natural images,” in Computer Vision -labeling,” in Computer Vision and Pattern and Pattern Recognition, 2008. CVPR 2008. -Recognition, 2004. CVPR 2004. Proceedings IEEE Conference on, June 2008, pp. 1–8. -of the 2004 IEEE Computer Society Conference [Online].Available:http://ieeexplore.ieee.org/xpls/ -on, vol. 2, Jun. 2004, pp. II–695–II–702 Vol.2. abs_all.jsp?arnumber=4587420 -[Online]. Available: http://ieeexplore.ieee.org/xpl/ [Man12] M. Manske, “File:randabschattung mikroskop -login.jsp?tp=&arnumber=1315232 kamera 6.jpg,” Wikipedia Com- -[JLD03] K.Jiang,Q.-M.Liao,andS.-Y.Dai,“Anovelwhite mons, Dec. 2012. [Online]. Avail- -bloodcellsegmentationschemeusingscale-space able: https://commons.wikimedia.org/wiki/File: -filtering and watershed clustering,” in Machine Randabschattung_Mikroskop_Kamera_6.JPG -Learning and Cybernetics, 2003 International [MBLAGJ+07] S.Maldonado-Bascon,S.Lafuente-Arroyo,P.Gil- -Conferenceon,vol.5,Nov2003,pp.2820–2825 Jimenez, H. Gomez-Moreno, and F. Lopez- -Vol.5.[Online].Available:http://ieeexplore.ieee.org/ Ferreras, “Road-sign detection and recognition -xpl/login.jsp?tp=&arnumber=1260033 based on support vector machines,” Intelligent -[Kaf07] L.Kaffer,“File:greatmaleleopardinsouthafrika- Transportation Systems, IEEE Transactions on, -jd.jpg,”WikipediaCommons,Jul.2007.[Online]. vol. 8, no. 2, pp. 264–278, Jun. 2007. -Available:https://commons.wikimedia.org/wiki/File: [Online].Available:http://ieeexplore.ieee.org/xpls/ -Great_male_Leopard_in_South_Afrika-JD.JPG abs_all.jsp?arnumber=4220659 -[KKV+14] V.Kalesnykiene,J.-k.Kamarainen,R.Voutilainen, [MBVLG02] N.Moon,E.Bullitt,K.VanLeemput,andG.Gerig, -J. Pietilä, H. Kälviäinen, and H. Uusitalo, “Automaticbrainandtumorsegmentation,”inMed- -“Diaretdb1 diabetic retinopathy database and icalImageComputingandComputer-AssistedIn- -evaluation protocol,” 2014. [Online]. Available: tervention—MICCAI 2002. Springer, 2002, pp. -http://www2.it.lut.fi/project/imageret/diaretdb1/ 372–379. -[KP92] J. M. Kasson and W. Plouffe, “An analysis of [MFTM01] D. Martin, C. Fowlkes, D. Tal, and J. Malik, -selectedcomputerinterchangecolorspaces,”ACM “A database of human segmented natural -TransactionsonGraphics(TOG),vol.11,no.4,pp. images and its application to evaluating -373–405,1992. segmentationalgorithmsandmeasuringecological -[KP06] Z. Kato and T.-C. Pong, “A markov random statistics,” in Computer Vision, 2001. ICCV -field image segmentation model for color 2001. Proceedings. Eighth IEEE International -textured images,” Image and Vision Computing, Conferenceon,vol.2. IEEE,2001,pp.416–423. -vol. 24, no. 10, pp. 1103–1114, 2006. [Online]. [Online].Available:http://ieeexplore.ieee.org/xpls/ -Available: http://www.sciencedirect.com/science/ abs_all.jsp?arnumber=937655 -article/pii/S0262885606001223 [MHMK+14] L. Maier-Hein, S. Mersmann, D. Kondermann, -[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, S. Bodenstedt, A. Sanchez, C. Stock, H. G. -“Imagenet classification with deep convolutional Kenngott, M. Eisenmann, and S. Speidel, “Can -neuralnetworks,”inAdvancesinneuralinformation masses of non-experts train highly accurate -processingsystems,2012,pp.1097–1105. image classifiers?” in Medical Image Computing -[KWT88] M. Kass, A. Witkin, and D. Terzopoulos, andComputer-AssistedIntervention–MICCAI2014. -“Snakes: Active contour models,” International Springer,2014,pp.438–445.[Online].Available: -journal of computer vision, vol. 1, no. 4, pp. http://opencas.webarchiv.kit.edu/?q=node/26 -321–331, Jan. 1988. [Online]. Available: http: [Min89] J.Mingers,“Anempiricalcomparisonofselection -//link.springer.com/article/10.1007/BF00133570 measures for decision-tree induction,” Machine -[LKJ15] F.-F. Li, A. Karpathy, and J. Johnson, Learning, vol. 3, no. 4, pp. 319–342, 1989. -“CS231n: Convolutional neural networks for [Online].Available:http://dx.doi.org/10.1023/A% -visual recognition,” 2015. [Online]. Available: 3A1022645801436 -http://cs231n.stanford.edu/ [MSB12] G.Moser,S.B.Serpico,andJ.A.Benediktsson, -[Low04] D. Lowe, “Distinctive image features from scale- “Markovrandomfieldmodelsforsupervisedland +Transactionson,vol.20,no.6,pp.490–498,Jun. +2001. +[HJBJ+96] A. Hoover, G. Jean-Baptiste, X. Jiang, P. J. +Flynn, H. Bunke, D. B. Goldgof, K. Bowyer, +D. W. Eggert, A. Fitzgibbon, and R. B. +Fisher, “An experimental comparison of range +imagesegmentationalgorithms,”PatternAnalysis +and Machine Intelligence, IEEE Transactions +on, vol. 18, no. 7, pp. 673–689, Jul. 1996. +[Online].Available:http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=506791 +[Ho95] T. K. Ho, “Random decision forests,” in +Document Analysis and Recognition, 1995., +ProceedingsoftheThirdInternationalConference +on, vol. 1. IEEE, 1995, pp. 278–282. +[Online]. Available: http://ect.bell-labs.com/who/ +tkh/publications/papers/odt.pdf +[Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia +Commons, Nov. 2007. [Online]. Avail- +able: https://commons.wikimedia.org/wiki/File: +CCTV_Lens_flare.jpg +[HZCP04] X. He, R. Zemel, and M. Carreira-Perpindn, +“Multiscale conditional random fields for image +labeling,” in Computer Vision and Pattern +Recognition, 2004. CVPR 2004. Proceedings +of the 2004 IEEE Computer Society Conference +on, vol. 2, Jun. 2004, pp. II–695–II–702 Vol.2. +[Online]. Available: http://ieeexplore.ieee.org/xpl/ +login.jsp?tp=&arnumber=1315232 +[JLD03] K.Jiang,Q.-M.Liao,andS.-Y.Dai,“Anovelwhite +bloodcellsegmentationschemeusingscale-space +filtering and watershed clustering,” in Machine +Learning and Cybernetics, 2003 International +Conferenceon,vol.5,Nov2003,pp.2820–2825 +Vol.5.[Online].Available:http://ieeexplore.ieee.org/ +xpl/login.jsp?tp=&arnumber=1260033 +[Kaf07] L.Kaffer,“File:greatmaleleopardinsouthafrika- +jd.jpg,”WikipediaCommons,Jul.2007.[Online]. +Available:https://commons.wikimedia.org/wiki/File: +Great_male_Leopard_in_South_Afrika-JD.JPG +[KKV+14] V.Kalesnykiene,J.-k.Kamarainen,R.Voutilainen, +J. Pietilä, H. Kälviäinen, and H. Uusitalo, +“Diaretdb1 diabetic retinopathy database and +evaluation protocol,” 2014. [Online]. Available: +http://www2.it.lut.fi/project/imageret/diaretdb1/ +[KP92] J. M. Kasson and W. Plouffe, “An analysis of +selectedcomputerinterchangecolorspaces,”ACM +TransactionsonGraphics(TOG),vol.11,no.4,pp. +373–405,1992. +[KP06] Z. Kato and T.-C. Pong, “A markov random +field image segmentation model for color +textured images,” Image and Vision Computing, +vol. 24, no. 10, pp. 1103–1114, 2006. [Online]. +Available: http://www.sciencedirect.com/science/ +article/pii/S0262885606001223 +[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, +“Imagenet classification with deep convolutional +neuralnetworks,”inAdvancesinneuralinformation +processingsystems,2012,pp.1097–1105. +[KWT88] M. Kass, A. Witkin, and D. Terzopoulos, +“Snakes: Active contour models,” International +journal of computer vision, vol. 1, no. 4, pp. +321–331, Jan. 1988. [Online]. Available: http: +//link.springer.com/article/10.1007/BF00133570 +[LKJ15] F.-F. Li, A. Karpathy, and J. Johnson, +“CS231n: Convolutional neural networks for +visual recognition,” 2015. [Online]. Available: +http://cs231n.stanford.edu/ +[Low04] D. Lowe, “Distinctive image features from scale- +invariant keypoints,” International Journal of +ComputerVision,vol.60,no.2,pp.91–110,2004. +[Online]. Available: http://dx.doi.org/10.1023/B% +3AVISI.0000029664.99615.94 +[LRAL08] A. Levin, A. Rav-Acha, and D. Lischinski, +“Spectral matting,” Pattern Analysis and +Machine Intelligence, IEEE Transactions on, +vol. 30, no. 10, pp. 1699–1712, 2008. +[Online].Available:http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4547428 +[LRKT09] L. Ladický, C. Russell, P. Kohli, and P. Torr, +“Associativehierarchicalcrfsforobjectclassimage +segmentation,”inComputerVision,2009IEEE12th +International Conference on, 2009, pp. 739–746. +[Online].Available:http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=5459248 +[LSD14] J. Long, E. Shelhamer, and T. Darrell, “Fully +convolutionalnetworksforsemanticsegmentation,” +arXiv preprint arXiv:1411.4038, 2014. [Online]. +Available:http://arxiv.org/abs/1411.4038 +[MAFM08] M. Maire, P. Arbelaez, C. Fowlkes, and +J. Malik, “Using contours to detect and localize +junctions in natural images,” in Computer Vision +and Pattern Recognition, 2008. CVPR 2008. +IEEE Conference on, June 2008, pp. 1–8. +[Online].Available:http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4587420 +[Man12] M. Manske, “File:randabschattung mikroskop +kamera 6.jpg,” Wikipedia Com- +mons, Dec. 2012. [Online]. Avail- +able: https://commons.wikimedia.org/wiki/File: +Randabschattung_Mikroskop_Kamera_6.JPG +[MBLAGJ+07] S.Maldonado-Bascon,S.Lafuente-Arroyo,P.Gil- +Jimenez, H. Gomez-Moreno, and F. Lopez- +Ferreras, “Road-sign detection and recognition +based on support vector machines,” Intelligent +Transportation Systems, IEEE Transactions on, +vol. 8, no. 2, pp. 264–278, Jun. 2007. +[Online].Available:http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4220659 +[MBVLG02] N.Moon,E.Bullitt,K.VanLeemput,andG.Gerig, +“Automaticbrainandtumorsegmentation,”inMed- +icalImageComputingandComputer-AssistedIn- +tervention—MICCAI 2002. Springer, 2002, pp. +372–379. +[MFTM01] D. Martin, C. Fowlkes, D. Tal, and J. Malik, +“A database of human segmented natural +images and its application to evaluating +segmentationalgorithmsandmeasuringecological +statistics,” in Computer Vision, 2001. ICCV +2001. Proceedings. Eighth IEEE International +Conferenceon,vol.2. IEEE,2001,pp.416–423. +[Online].Available:http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=937655 +[MHMK+14] L. Maier-Hein, S. Mersmann, D. Kondermann, +S. Bodenstedt, A. Sanchez, C. Stock, H. G. +Kenngott, M. Eisenmann, and S. Speidel, “Can +masses of non-experts train highly accurate +image classifiers?” in Medical Image Computing +andComputer-AssistedIntervention–MICCAI2014. +Springer,2014,pp.438–445.[Online].Available: +http://opencas.webarchiv.kit.edu/?q=node/26 +[Min89] J.Mingers,“Anempiricalcomparisonofselection +measures for decision-tree induction,” Machine +Learning, vol. 3, no. 4, pp. 319–342, 1989. +[Online].Available:http://dx.doi.org/10.1023/A% +3A1022645801436 +[MSB12] G.Moser,S.B.Serpico,andJ.A.Benediktsson, +“Markovrandomfieldmodelsforsupervisedland 14 -cover classification from very high resolution tionstrategies,”Fundam.Inform.,vol.41,no.1-2, -multispectralremotesensingimages,”inAdvances pp.187–228,2000. -in Radar and Remote Sensing (TyWRRS), 2012 [RM07] J. Reynolds and K. Murphy, “Figure-ground -Tyrrhenian Workshop on. IEEE, 2012, pp. 235– segmentation using a hierarchical conditional -242.[Online].Available:http://ieeexplore.ieee.org/ random field,” in Computer and Robot -xpl/login.jsp?tp=&arnumber=6381135 Vision, 2007. CRV ’07. Fourth Canadian -[MSC] “Object class recognition image database.” Conference on, May 2007, pp. 175–182. -[Online].Available:http://research.microsoft.com/ [Online].Available:http://ieeexplore.ieee.org/xpls/ -vision/cambridge/recognition/ abs_all.jsp?arnumber=4228537 -[MSR] “Image understanding - research data,” [RMBK06] C.Rother,T.Minka,A.Blake,andV.Kolmogorov, -Microsoft Research. [Online]. Avail- “Cosegmentation of image pairs by histogram -able:http://research.microsoft.com/en-us/projects/ matching - incorporating a global constraint -objectclassrecognition/ into mrfs,” in Computer Vision and Pattern -[Mur12] K. P. Murphy, Machine learning: a probabilistic Recognition, 2006 IEEE Computer Society -perspective. MITpress,2012. Conference on, vol. 1, June 2006, pp. 993– -[OKS78] Y.-i.Ohta,T.Kanade,andT.Sakai,“Ananalysis 1000.[Online].Available:http://ieeexplore.ieee.org/ -systemforscenescontainingobjectswithsubstruc- xpls/abs_all.jsp?arnumber=1640859 -tures,”inProceedingsoftheFourthInternational [SAN+04] J. Staal, M. D. Abràmoff, M. Niemeijer, -JointConferenceonPatternRecognitions,1978,pp. M.Viergever,B.VanGinnekenetal.,“Ridge-based -752–754. vesselsegmentationincolorimagesoftheretina,” -[PAA+87] S. M. Pizer, E. P. Amburn, J. D. Austin, Medical Imaging, IEEE Transactions on, vol. 23, -R. Cromartie, A. Geselowitz, T. Greer, B. ter no. 4, pp. 501–509, 2004. [Online]. Available: -HaarRomeny,J.B.Zimmerman,andK.Zuiderveld, http://www.isi.uu.nl/Research/Databases/DRIVE/ -“Adaptivehistogramequalizationanditsvariations,” [SCZ08] F. Schroff, A. Criminisi, and A. Zisserman, -Computervision,graphics,andimageprocessing, “Object class segmentation using random -vol. 39, no. 3, pp. 355–368, 1987. [Online]. forests.” in BMVC, 2008, pp. 1–10. [On- -Available: http://www.sciencedirect.com/science/ line].Available:http://research.microsoft.com/pubs/ -article/pii/S0734189X8780186X 72423/Criminisi_bmvc2008.pdf -[PC13] P. H. Pinheiro and R. Collobert, “Recurrent [SJC08] J. Shotton, M. Johnson, and R. Cipolla, -convolutional neural networks for scene parsing,” “Semantictextonforestsforimagecategorization -arXiv preprint arXiv:1306.2795, 2013. [Online]. and segmentation,” in Computer vision and -Available:http://arxiv.org/abs/1306.2795v1 pattern recognition, 2008. CVPR 2008. IEEE -[PH05] C. Pantofaru and M. Hebert, “A Conference on. IEEE, Jun. 2008, pp. 1–8. -comparison of image segmentation algorithms,” [Online].Available:http://ieeexplore.ieee.org/xpls/ -Robotics Institute, p. 336, 2005. [Online]. abs_all.jsp?arnumber=4587503 -Available: http://riweb-backend.ri.cmu.edu/ [SM11] C. Sutton and A. McCallum, “An introduction -pub_files/pub4/pantofaru_caroline_2005_1/ to conditional random fields,” Machine Learning, -pantofaru_caroline_2005_1.pdf vol. 4, no. 4, pp. 267–373, 2011. [Online]. -[PS07] A. Protiere and G. Sapiro, “Interactive Available: http://homepages.inf.ed.ac.uk/csutton/ -image segmentation via adaptive weighted publications/crftutv2.pdf -distances,” Image Processing, IEEE Transactions [Smi02] L. I. Smith, “A tutorial on principal components -on, vol. 16, no. 4, pp. 1046–1057, 2007. analysis,”CornellUniversity,USA,vol.51,p.52, -[Online].Available:http://ieeexplore.ieee.org/xpls/ 2002. -abs_all.jsp?arnumber=4130436 [Smi04] B.T.Smith,“Lagrangemultiplierstutorialinthe -[PTN09] N.Plath,M.Toussaint,andS.Nakajima,“Multi- contextofsupportvectormachines,”MemorialUni- -classimagesegmentationusingconditionalrandom versityofNewfoundlandSt.John’s,Newfoundland, -fields and global classification,” in Proceedings Canada,Jun.2004. -of the 26th Annual International Conference on [SSA12] D.Schiebener,J.Schill,andT.Asfour,“Discovery, -MachineLearning. ACM,2009,pp.817–824. segmentation and reactive grasping of unknown -[PXP00] D. L. Pham, C. Xu, and J. L. Prince, “A objects.” in Humanoids, 2012, pp. 71–77. [On- -survey of current methods in medical image line]. Available: http://h2t.anthropomatik.kit.edu/ -segmentation,” Annual Review of Biomedical pdf/Schiebener2012.pdf -Engineering, vol. 2, no. 1, pp. 315–337, 2000, [SUM+11] D. Schiebener, A. Ude, J. Morimotot, -pMID: 11701515. [Online]. Available: http:// T. Asfour, and R. Dillmann, “Segmentation -dx.doi.org/10.1146/annurev.bioeng.2.1.315 andlearningofunknownobjectsthroughphysical -[Qui86] J. R. Quinlan, “Induction of decision trees,” interaction,” in Humanoid Robots (Humanoids), -Machine learning, vol. 1, no. 1, pp. 81–106, 2011 11th IEEE-RAS International Conference -Aug. 1986. [Online]. Available: http://dx.doi.org/ on. IEEE, 2011, pp. 500–506. [Online]. -10.1023/A%3A1022643204877 Available:http://ieeexplore.ieee.org/ielx5/6086637/ -[Qui93] ——,C4.5:ProgramsforMachineLearning,P.Lan- 6100798/06100843.pdf -gley,Ed. MorganKaufmannPublishers,Inc.,1993. [SWRC06] J. Shotton, J. Winn, C. Rother, and A. Criminisi, -[RKB04] C.Rother,V.Kolmogorov,andA.Blake,“Grabcut: “Textonboost:Jointappearance,shapeandcontext -Interactive foreground extraction using iterated modeling for multi-class object recognition and -graph cuts,” ACM Transactions on Graphics segmentation,” in Computer Vision–ECCV 2006. -(TOG),vol.23,no.3,pp.309–314,2004.[Online]. Springer,2006,pp.1–15.[Online].Available:http: -Available:http://delivery.acm.org/10.1145/1020000/ //link.springer.com/chapter/10.1007/11744023_1 -1015720/p309-rother.pdf [TNL14] J. Tighe, M. Niethammer, and S. Lazebnik, -[RM00] J. B. Roerdink and A. Meijster, “The watershed “Scene parsing with object instances and -transform:Definitions,algorithmsandparalleliza- occlusion ordering,” in Computer Vision and +cover classification from very high resolution +multispectralremotesensingimages,”inAdvances +in Radar and Remote Sensing (TyWRRS), 2012 +Tyrrhenian Workshop on. IEEE, 2012, pp. 235– +242.[Online].Available:http://ieeexplore.ieee.org/ +xpl/login.jsp?tp=&arnumber=6381135 +[MSC] “Object class recognition image database.” +[Online].Available:http://research.microsoft.com/ +vision/cambridge/recognition/ +[MSR] “Image understanding - research data,” +Microsoft Research. [Online]. Avail- +able:http://research.microsoft.com/en-us/projects/ +objectclassrecognition/ +[Mur12] K. P. Murphy, Machine learning: a probabilistic +perspective. MITpress,2012. +[OKS78] Y.-i.Ohta,T.Kanade,andT.Sakai,“Ananalysis +systemforscenescontainingobjectswithsubstruc- +tures,”inProceedingsoftheFourthInternational +JointConferenceonPatternRecognitions,1978,pp. +752–754. +[PAA+87] S. M. Pizer, E. P. Amburn, J. D. Austin, +R. Cromartie, A. Geselowitz, T. Greer, B. ter +HaarRomeny,J.B.Zimmerman,andK.Zuiderveld, +“Adaptivehistogramequalizationanditsvariations,” +Computervision,graphics,andimageprocessing, +vol. 39, no. 3, pp. 355–368, 1987. [Online]. +Available: http://www.sciencedirect.com/science/ +article/pii/S0734189X8780186X +[PC13] P. H. Pinheiro and R. Collobert, “Recurrent +convolutional neural networks for scene parsing,” +arXiv preprint arXiv:1306.2795, 2013. [Online]. +Available:http://arxiv.org/abs/1306.2795v1 +[PH05] C. Pantofaru and M. Hebert, “A +comparison of image segmentation algorithms,” +Robotics Institute, p. 336, 2005. [Online]. +Available: http://riweb-backend.ri.cmu.edu/ +pub_files/pub4/pantofaru_caroline_2005_1/ +pantofaru_caroline_2005_1.pdf +[PS07] A. Protiere and G. Sapiro, “Interactive +image segmentation via adaptive weighted +distances,” Image Processing, IEEE Transactions +on, vol. 16, no. 4, pp. 1046–1057, 2007. +[Online].Available:http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4130436 +[PTN09] N.Plath,M.Toussaint,andS.Nakajima,“Multi- +classimagesegmentationusingconditionalrandom +fields and global classification,” in Proceedings +of the 26th Annual International Conference on +MachineLearning. ACM,2009,pp.817–824. +[PXP00] D. L. Pham, C. Xu, and J. L. Prince, “A +survey of current methods in medical image +segmentation,” Annual Review of Biomedical +Engineering, vol. 2, no. 1, pp. 315–337, 2000, +pMID: 11701515. [Online]. Available: http:// +dx.doi.org/10.1146/annurev.bioeng.2.1.315 +[Qui86] J. R. Quinlan, “Induction of decision trees,” +Machine learning, vol. 1, no. 1, pp. 81–106, +Aug. 1986. [Online]. Available: http://dx.doi.org/ +10.1023/A%3A1022643204877 +[Qui93] ——,C4.5:ProgramsforMachineLearning,P.Lan- +gley,Ed. MorganKaufmannPublishers,Inc.,1993. +[RKB04] C.Rother,V.Kolmogorov,andA.Blake,“Grabcut: +Interactive foreground extraction using iterated +graph cuts,” ACM Transactions on Graphics +(TOG),vol.23,no.3,pp.309–314,2004.[Online]. +Available:http://delivery.acm.org/10.1145/1020000/ +1015720/p309-rother.pdf +[RM00] J. B. Roerdink and A. Meijster, “The watershed +transform:Definitions,algorithmsandparalleliza- +tionstrategies,”Fundam.Inform.,vol.41,no.1-2, +pp.187–228,2000. +[RM07] J. Reynolds and K. Murphy, “Figure-ground +segmentation using a hierarchical conditional +random field,” in Computer and Robot +Vision, 2007. CRV ’07. Fourth Canadian +Conference on, May 2007, pp. 175–182. +[Online].Available:http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4228537 +[RMBK06] C.Rother,T.Minka,A.Blake,andV.Kolmogorov, +“Cosegmentation of image pairs by histogram +matching - incorporating a global constraint +into mrfs,” in Computer Vision and Pattern +Recognition, 2006 IEEE Computer Society +Conference on, vol. 1, June 2006, pp. 993– +1000.[Online].Available:http://ieeexplore.ieee.org/ +xpls/abs_all.jsp?arnumber=1640859 +[SAN+04] J. Staal, M. D. Abràmoff, M. Niemeijer, +M.Viergever,B.VanGinnekenetal.,“Ridge-based +vesselsegmentationincolorimagesoftheretina,” +Medical Imaging, IEEE Transactions on, vol. 23, +no. 4, pp. 501–509, 2004. [Online]. Available: +http://www.isi.uu.nl/Research/Databases/DRIVE/ +[SCZ08] F. Schroff, A. Criminisi, and A. Zisserman, +“Object class segmentation using random +forests.” in BMVC, 2008, pp. 1–10. [On- +line].Available:http://research.microsoft.com/pubs/ +72423/Criminisi_bmvc2008.pdf +[SJC08] J. Shotton, M. Johnson, and R. Cipolla, +“Semantictextonforestsforimagecategorization +and segmentation,” in Computer vision and +pattern recognition, 2008. CVPR 2008. IEEE +Conference on. IEEE, Jun. 2008, pp. 1–8. +[Online].Available:http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4587503 +[SM11] C. Sutton and A. McCallum, “An introduction +to conditional random fields,” Machine Learning, +vol. 4, no. 4, pp. 267–373, 2011. [Online]. +Available: http://homepages.inf.ed.ac.uk/csutton/ +publications/crftutv2.pdf +[Smi02] L. I. Smith, “A tutorial on principal components +analysis,”CornellUniversity,USA,vol.51,p.52, +2002. +[Smi04] B.T.Smith,“Lagrangemultiplierstutorialinthe +contextofsupportvectormachines,”MemorialUni- +versityofNewfoundlandSt.John’s,Newfoundland, +Canada,Jun.2004. +[SSA12] D.Schiebener,J.Schill,andT.Asfour,“Discovery, +segmentation and reactive grasping of unknown +objects.” in Humanoids, 2012, pp. 71–77. [On- +line]. Available: http://h2t.anthropomatik.kit.edu/ +pdf/Schiebener2012.pdf +[SUM+11] D. Schiebener, A. Ude, J. Morimotot, +T. Asfour, and R. Dillmann, “Segmentation +andlearningofunknownobjectsthroughphysical +interaction,” in Humanoid Robots (Humanoids), +2011 11th IEEE-RAS International Conference +on. IEEE, 2011, pp. 500–506. [Online]. +Available:http://ieeexplore.ieee.org/ielx5/6086637/ +6100798/06100843.pdf +[SWRC06] J. Shotton, J. Winn, C. Rother, and A. Criminisi, +“Textonboost:Jointappearance,shapeandcontext +modeling for multi-class object recognition and +segmentation,” in Computer Vision–ECCV 2006. +Springer,2006,pp.1–15.[Online].Available:http: +//link.springer.com/chapter/10.1007/11744023_1 +[TNL14] J. Tighe, M. Niethammer, and S. Lazebnik, +“Scene parsing with object instances and +occlusion ordering,” in Computer Vision and 15 -Pattern Recognition (CVPR), 2014 IEEE GLOSSARY +Pattern Recognition (CVPR), 2014 IEEE Conference on. IEEE, 2014, pp. 3748–3755. -ACM active contour model. 6 [Online].Available:http://ieeexplore.ieee.org/xpls/ abs_all.jsp?arnumber=6909874 -[UPH05] R. Unnikrishnan, C. Pantofaru, and M. Hebert, BOV bag-of-visual-words. 5 +[UPH05] R. Unnikrishnan, C. Pantofaru, and M. Hebert, “A measure for objective evaluation of image segmentation algorithms,” in Computer -CNN Convolution Neuronal Network. 5, 9 Vision and Pattern Recognition-Workshops, 2005. -CVPR Workshops. IEEE Computer Society CRF Conditional Random Field. 4, 8, 9, 11 +CVPR Workshops. IEEE Computer Society Conference on. IEEE, 2005, pp. 34–34. -[Online].Available:http://repository.cmu.edu/cgi/ GPU graphics processing unit. 3 +[Online].Available:http://repository.cmu.edu/cgi/ viewcontent.cgi?article=1365&context=robotics [vdMPvdH09] L. J. van der Maaten, E. O. Postma, and H. J. -HOG histogram of oriented gradients. 5, 6, 8 vandenHerik,“Dimensionalityreduction:Acom- parative review,” Journal of Machine Learning -Research,vol.10,no.1-41,pp.66–71,2009. ILSVRC ImageNet Large-Scale Visual Recognition +Research,vol.10,no.1-41,pp.66–71,2009. [VOC10] “Voc2010 preliminary results,” 2010. [Online]. -Challenge. 9 Available:http://host.robots.ox.ac.uk/pascal/VOC/ voc2010/results/index.html -[WAH97] G.-Q.Wei,K.Arbter,andG.Hirzinger,“Automatic MAP Maximum A Posteriori. 8 -tracking of laparoscopic instruments by color MR magnetic resonance. 2, 6 +[WAH97] G.-Q.Wei,K.Arbter,andG.Hirzinger,“Automatic +tracking of laparoscopic instruments by color coding,” in CVRMed-MRCAS’97, ser. Lecture -MRF Markov Random Field. 4, 8 NotesinComputerScience,J.Troccaz,E.Grimson, andR.Mösges,Eds. SpringerBerlinHeidelberg, -1997,vol.1205,pp.357–366.[Online].Available: PCA principal component analysis. 5 +1997,vol.1205,pp.357–366.[Online].Available: http://dx.doi.org/10.1007/BFb0029257 [YBCK10] Z. Yin, R. Bise, M. Chen, and T. Kanade, “Cell -RBF radial basis function. 8 segmentation in microscopy imagery using a bag of local bayesian classifiers,” in Biomedical -Imaging: From Nano to Macro, 2010 IEEE SIFT scale-invariant feature transform. 5 -InternationalSymposiumon,Apr.2010,pp.125– SVM Support Vector Machine. 4, 6–8 +Imaging: From Nano to Macro, 2010 IEEE +InternationalSymposiumon,Apr.2010,pp.125– 128.[Online].Available:http://ieeexplore.ieee.org/ xpls/abs_all.jsp?arnumber=5490399 [YHRF12] Y. Yang, S. Hallman, D. Ramanan, and @@ -1063,12 +1687,33 @@ of the IEEE International Conference on Computer Vision, 2015, pp. 1529–1537. [Online]. Available: http://www.robots.ox.ac.uk/~szheng/ papers/CRFasRNN.pdf +GLOSSARY +ACM active contour model. 6 +BOV bag-of-visual-words. 5 +CNN Convolution Neuronal Network. 5, 9 +CRF Conditional Random Field. 4, 8, 9, 11 +GPU graphics processing unit. 3 +HOG histogram of oriented gradients. 5, 6, 8 +ILSVRC ImageNet Large-Scale Visual Recognition +Challenge. 9 +MAP Maximum A Posteriori. 8 +MR magnetic resonance. 2, 6 +MRF Markov Random Field. 4, 8 +PCA principal component analysis. 5 +RBF radial basis function. 8 +SIFT scale-invariant feature transform. 5 +SVM Support Vector Machine. 4, 6–8 16 APPENDIXA TABLES -Number Number -Database ImageResolution(width×height) of of Channels Datasource -Images Classes +Database ImageResolution(width×height) +Number +of +Images +Number +of +Classes +Channels Datasource ColonCryptDB (302px−1116px)×(349px−875px) 389 2 3 [CRSS] DIARETDB1 1500px×1500px 89 4 3 [KKV+14] KITTIRoad (1226px−1242px)×(370px−376px) 289 2 3 [FKG13] diff --git a/read/results/pdfplumber/1707.09725.txt b/read/results/pdfplumber/1707.09725.txt index 4846f19..c3dca81 100644 --- a/read/results/pdfplumber/1707.09725.txt +++ b/read/results/pdfplumber/1707.09725.txt @@ -1,13 +1,8 @@ Analysis and Optimization of Convolutional Neural Network Architectures -7102 -luJ -13 Master Thesis of -]VC.sc[ Martin Thoma -1v52790.7071:viXra Department of Computer Science Institute for Anthropomatics and @@ -17,6 +12,40 @@ Second reviewer: Prof. Dr.–Ing. J. M. Zöllner Advisor: Dipl.–Inform. Michael Weber Research Period: 03. May 2017 – 03. August 2017 KIT–UniversityoftheStateofBaden-WuerttembergandNationalResearchCenteroftheHelmholtzAssociation www.kit.edu +a +r +X +i +v +: +1 +7 +0 +7 +. +0 +9 +7 +2 +5 +v +1 +[ +c +s +. +C +V +] +3 +1 +J +u +l +2 +0 +1 +7 Analysis and Optimization of Convolutional Neural Network Architectures @@ -237,27 +266,50 @@ CNNs are explained in Section 2.2. The layer block pattern is described in Secti transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5. 2.1. Linear Image Filters A linear image filter (also called a filter bank or a kernel) is an element F ∈ Rkw×k h×d, -where k represents the filter’s width, k the filter’s height and d the number of input -w h +where k +w +represents the filter’s width, k +h +the filter’s height and d the number of input channels. The filter F is convolved with the image I ∈ Rw×h×d to produce a new image I(cid:48). The output image I(cid:48) has only one channel. Each pixel I(cid:48)(x,y) of the output image gets calculated by point-wise multiplication of one filter element with one element of the original image I: -(cid:98) (cid:88)k 2w(cid:99) (cid:98) (cid:88)k 2h(cid:99) (cid:88)d -I(cid:48)(x,y) = I(x+i ,y+i ,i )·F(i ,i ,i ) -x y c x y c -ix=1−(cid:100)k 2w(cid:101)iy=1−(cid:100)k 2h(cid:101)ic=1 +I(cid:48)(x,y) = +(cid:98)kw 2 (cid:99) (cid:88) +ix=1−(cid:100)kw +2 +(cid:101) +(cid:98)kh 2 (cid:99) (cid:88) +iy=1−(cid:100)kh +2 +(cid:101) +d (cid:88) +ic=1I(x+i x,y+i y,i c)·F(i x,i y,i c) This procedure is explained by Figure 2.1. It is essentially a discrete convolution. -I ∈ R7×7 I(cid:48) ∈ R7×7 -47 187 -47 -642 -58 -849 -1 1 1 1 1 10 0 1 0 1 14 9 6 1 4 1 19 4 4 91 07 7 7 96 4 1 1 1 1 9 91 1 1 0 9 76 4 1 9 1 1 1 19 9 61 1 0 17 7 12 6 9 6 1 1 19 4 40 1 17 6 95 5 6 1 1 14 9 41 1 16 9 60 6 2 1 1 14 9 40 0 05 7 80 1 4 - F9 26 ilter- -53 8 kerne- 3 01 Re- s9 2 9 u3 8 4 l6 2 to- -5 f3 74 p3 953 2 oin- 2 t1 09 -0 w19 - - - - -5 3 7 5 4-4 2 4 1 6 00 0 8 7 - 5 4 5-2 4 55 2 95 5 39 9 64 - - -2 3 -4 1 574 59 0 550 08 5 - 2 1 1 3 26 81 8 7 6 26 01 5 3 8 32 - 83 2 51 2 18 5 73 6 18 1 18 - -2 2 91 1 51 6 78 2 45 8 60 8 - -5 1 68 6 20 5 66 5 43 6 01 5 -0 -6 l ise 8 -118 879 -63 F ∈ R3×3 multiplication 647 -112 302 +I ∈ R7×7 +Filterkernel +F ∈ R3×3 +Resultofpoint-wise +multiplication +I(cid:48) ∈ R7×7 +104 116 116 112 +58 +47 +47 +109 97 114 116 105 110 45 116 104 111 109 97 46 100 101 47 109 97 115 116 101 114 47 99 97 116 99 97 116 99 97 116 46 112 104 +112 +63 +118 +61 49 46 48 9 -3 -1 -6 5 3 2 -8 0 936 -333 -109 -282 545 291 94 -792 0 -4 -254 -498 -662 +-849 +-642 +187 +-520 45 240 211 388 215 -861 -340 559 -105 185 -138 -180 503 -718 429 350 173 251 268 -655 -567 -53 -75 80 571 -128 24 -408 596 -550 368 26 976 156 +302 +647 +879 +223 811 54 660 Figure 2.1.: Visualization of the application of a linear k×k×1 image filter. For each pixel of the outputimage,k2 multiplicationsandk2 additionsoftheproductshavetobecalculated. 3 @@ -309,27 +361,38 @@ Traditional CNNs have three important building tools: Convolutional layers take several feature maps as input and produce n feature maps1 as output, where n is the number of filters in the convolution layer. The filter weights of the linear convolutions are the parameters which are adapted to the training data. The -number n of filters as well as the filter’s size k ×k are hyperparameters of convolutional -w h -layers. Sometimes, it is denoted as n@k ×k . Although the filter depth is usually omitted -w h -in the notation, the filters are of dimension k ×k ×d(i−1), where d(i−1) is the number of -w h +number n of filters as well as the filter’s size k +w +×k +h +are hyperparameters of convolutional +layers. Sometimes, it is denoted as n@k w×k h. Although the filter depth is usually omitted +in the notation, the filters are of dimension k +w +×k h×d(i−1), where d(i−1) is the number of feature maps of the input layer (i−1). -Another hyperparameter of convolution layers is the stride s ∈ N and the padding. +Another hyperparameter of convolution layers is the stride s ∈ N ≥1 +and the padding. Padding (usually zero-padding [SCL12, SEZ+13, HZRS15a]) is used to make sure that the size of the feature maps doesn’t change. The hyperparameters of convolutional layers are -• the number of filters n ∈ N , +• the number of filters n ∈ N ≥1, +• k w,k +h +∈ N ≥1 -• k ,k ∈ N of the filter size k ×k ×d(i−1), -w h ≥1 w h +of the filter size k +w +×k h×d(i−1), • the activation function of the layer (see Table B.3) and • the stride s ∈ N ≥1 -Typical choices are n ∈ {32,64,128}, k = k = k ∈ {1,3,5,11} such as in [KSH12, -w h +Typical choices are n ∈ {32,64,128}, k +w += k +h += k ∈ {1,3,5,11} such as in [KSH12, SZ14, SLJ+15], rectified linear unit (ReLU) activation and s = 1. TheconceptofweightsharingiscrucialforCNNs. Thisconceptwasintroducedin[WHH+89]. With weight sharing, the filters can be learned with stochastic gradient descent (SGD) just @@ -339,15 +402,25 @@ if only the flattened output is compared. 5 2. Convolutional Neural Networks This is easier to see when the filtering operation is denoted formally: +o(i)(x) = b+ k (cid:88) -o(i)(x) = b+ w ·x with i ∈ {1,...,w}×{1,...,h}×{1,...,d} [2.1] -ij j -j=1 -(cid:98) (cid:88)k 2w(cid:99) (cid:98) (cid:88)k 2h(cid:99) (cid:88)d -o(x,y,z)(I) = b+ F (i ,i ,i )·I(x+i ,y+i ,i ) [2.2] -z x y c x y c -ix=1−(cid:100)k 2w(cid:101)iy=1−(cid:100)k 2h(cid:101)ic=1 +j=1w +ij +·x +j +with i ∈ {1,...,w}×{1,...,h}×{1,...,d} [2.1] +o(x,y,z)(I) = b+ +(cid:98)kw 2 (cid:99) (cid:88) +ix=1−(cid:100)kw +2 +(cid:101) +(cid:98)kh 2 (cid:99) (cid:88) +iy=1−(cid:100)kh +2 +(cid:101) +d (cid:88) +ic=1F z(i x,i y,i c)·I(x+i x,y+i y,i c) [2.2] with a bias b ∈ R, x ∈ {1,...,w}, y ∈ {1,...,h} and z ∈ {1,...,d} One can see that most weights of the equivalent MLP are zero and many weights are equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters. @@ -356,36 +429,52 @@ estimations for those. This means a MLP which is able to compute the same functi CNN will likely have worse results on the same dataset, if a CNN architecture is suitable for the dataset. See Figure 2.2 for a visualization of the application of a convolutional layer. +3 feature maps +(e.g. RGB) +n feature maps n filters of size k×k×3 -... +widthw widthw +h +e +i +g +h +t +h +h +e +i +g +h +t +h neural -... network -... +data apply ... -h h -w w -data h ... h thgieh thgieh -t t -d d -wi wi ... -3 feature maps -n feature maps -(e.g. RGB) +... +... +... +... Figure 2.2.: Application of a single convolutional layer with n filters of size k×k×3 with stride s=1 to input data of size width×height with three channels. 6 2.2. CNN Layer Types -A convolutional layer with n filters of size k ×k and SAME padding after d(i−1) feature -w h -maps of size s ×s has n·d(i−1)·(k ·k ) parameters if no bias is used. In contrast, a fully -x y w h +A convolutional layer with n filters of size k +w +×k +h +and SAME padding after d(i−1) feature +maps of size s x×s +y +has n·d(i−1)·(k w·k h) parameters if no bias is used. In contrast, a fully connected layer which produces the same output size and does not use a bias would have -n·d(i−1) ·(s ×s )2 parameters. This means a convolutional layer has drastically fewer -x y +n·d(i−1) ·(s +x +×s y)2 parameters. This means a convolutional layer has drastically fewer parameters. Onetheonehand, thismeansitcanlearnlesscomplexdecisionboundaries. On the other hand, it means fewer parameters have to be learned and hence the optimization procedure needs fewer examples and the optimization objective is simpler. @@ -396,8 +485,11 @@ Another insight recently got important: Every fully connected layer has an equiv convolutional layer which has the same weights.2 This way, one can use the complete classification network as a very complex non-linear image filter which can be used for semantic segmentation. -A fully connected layer with d ∈ N inputs and n ∈ N nodes can be interpreted as a -≥1 ≥1 +A fully connected layer with d ∈ N +≥1 +inputs and n ∈ N +≥1 +nodes can be interpreted as a convolutional layer with an input of shape 1×1×d and n filters of size 1×1. This will produce an output shape 1×1×n. Every single output is connected to all of the inputs. When a convolutional layer is followed by a fully connected layer, it is necessary to vectorize @@ -407,8 +499,7 @@ omitted if a convolution layer without padding and a filter size equal to the fe size is applied. This was used by [LSD15]. 2.2.2. Pooling Layers Pooling summarizes a p×p area of the input feature map. Just like convolutional layers, -pooling can be used with a stride of s ∈ N . As s ≥ 2 is the usual choice, pooling layers ->1 +pooling can be used with a stride of s ∈ N >1. As s ≥ 2 is the usual choice, pooling layers are sometimes also called subsampling layers. Typically, p ∈ {2,3,4,5} and s = 2 such as for AlexNet [KSH12] and VGG-16 [SZ14]. The type of summary for the set of activations A varies between the functions listed @@ -419,47 +510,84 @@ functions as introduced in [LGT16]. 2. Convolutional Neural Networks Name Definition Used by Max pooling max{a ∈ A} [BPL10, KSH12] -Average / mean pooling 1 (cid:80) a LeNet-5 [LBBH98] and [KSlB+10] -|A| a∈A -(cid:96) pooling (cid:112)(cid:80) a2 [Le13] -2 a∈A +Average / mean pooling 1 +|A| +(cid:80) a∈Aa LeNet-5 [LBBH98] and [KSlB+10] +(cid:96) +2 +pooling (cid:112)(cid:80) a∈Aa2 [Le13] Stochastic pooling * [ZF13] Table 2.1.: Pooling types for a set A of activations a∈R. -(*)Forstochasticpooling,eachofthep×pactivationvaluesa inthepoolingregiongets +(*)Forstochasticpooling,eachofthep×pactivationvaluesa +i +inthepoolingregiongets +pickedwithprobabilityp i -pickedwithprobabilityp i = (cid:80) aja ∈i Aaj. Thisassumestheactivationsa i arenon-negative. += ai +(cid:80) +aj∈Aaj. Thisassumestheactivationsa +i +arenon-negative. Pooling is applied for three reasons: To get local translational invariance, to get invariance -against minor local changes and, most important, for data reduction to 1 th of the data by -s2 +against minor local changes and, most important, for data reduction to 1 s2th of the data by using strides of s > 1. See Figure 2.3 for a visualization of max pooling. -2 +7 9 3 5 9 4 +0 7 0 0 9 0 +5 0 9 3 7 5 9 2 9 6 4 3 +2×2 max pooling +9 5 9 +9 9 7 +2 2 -5 0 9 3 7 5 2×2 max pooling 9 9 7 -0 7 0 0 9 0 9 5 9 -7 9 3 5 9 4 Figure 2.3.: 2×2 max pooling applied to a feature map of size 6×4 with stride s=2 and padding. Average pooling of p×p areas with stride s can be replaced by a convolutional layer. If the input of the pooling layer are d(i−1) feature maps, the convolutional layer has to have d(i−1) filters of size p×p and stride s. The ith filter has the values -  -1 ... 1 -p2 p2 - . . .  -. . . - . . .  -  -1 ... 1 -p2 p2 + + + + +1 +p2 +... 1 +p2 +. +. +. +. +. +. +. +. +. +1 +p2 +... 1 +p2 + + + for the dimension i and the zero matrix -  -0 ... 0 -. . . -. . . -. . . -  -0 ... 0 + + + +0 ... 0 +. +. +. +. +. +. +. +. +. +0 ... +0 + + + for all other dimensions i = 1,...,d(i−1). 8 2.2. CNN Layer Types @@ -468,20 +596,25 @@ Dropout is a technique used to prevent overfitting and co-adaptations of neurons the output of any neuron to zero with probability p. It was introduced in [HSK+12] and is well-described in [SHK+14]. A Dropout layer can be implemented as follows: For an input in of any shape s, a tensor of -the same shape D ∈ {0,1}s is sampled, where each element d is sampled independently +the same shape D ∈ {0,1}s is sampled, where each element d i +is sampled independently from a Bernoulli distribution. The results are element-wise multiplied to calculate the output out of the Dropout layer: -out = D(cid:12)in with d ∼ B(1,p) +out = D(cid:12)in with d i +∼ B(1,p) where (cid:12) is the Hadamard product -(A(cid:12)B) := (A) (B) -i,j i,j i,j +(A(cid:12)B) +i,j +:= (A) i,j(B) +i,j Hence every value of the input gets set to zero with a dropout probability of p. Typically, Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout prob- ability than later layers. In order to keep the expected output at the same value, the -output of a dropout layer is multiplied with 1 when dropout is enabled [Las17, tf-16b]. +output of a dropout layer is multiplied with 1 1−p +when dropout is enabled [Las17, tf-16b]. At inference time, dropout is disabled. Dropout is usually only applied after fully connected layers, but not after convolutional layers as it usually increases the test error as pointed out in [GG16]. @@ -504,25 +637,35 @@ input features might drastically change over time. One way to approach this problem is by normalizing mini-batches as described in [IS15]. A Batch Normalization layer with d-dimensional input x = (x(1),...,x(d)) is first normalized point-wise to -x(k)−x¯(k) -xˆ(k) = +ˆ x(k) = +x(k)− ¯ x(k) (cid:112) s(cid:48)[x(k)]2+ε -with x¯(k) = 1 (cid:80)m x(k) being the sample mean and s(cid:48)[x(k)]2 = 1 (cid:80)m (x(k) −x¯(k)) the -m i=1 i m i=1 i -sample variance where m ∈ N is the number of training samples per mini-batch, ε > 0 +with ¯ x(k) = 1 +m +(cid:80)m i=1x(k) +i +being the sample mean and s(cid:48)[x(k)]2 = 1 +m +(cid:80)m i=1(x(k) +i +− ¯ x(k)) the +sample variance where m ∈ N ≥1 -being a small constant to prevent division by zero and x(k) is the activation of neuron k for +is the number of training samples per mini-batch, ε > 0 +being a small constant to prevent division by zero and x(k) i +is the activation of neuron k for training sample i. Additionally, for each activation x(k) two parameters γ(k),β(k) are introduced which scale and shift the feature: -y(k) = γ(k)·xˆ(k)+β(k) +y(k) = γ(k)· ˆ x(k)+β(k) Inthecaseoffullyconnectedlayers, thisisappliedtotheactivation, beforethenon-linearity is applied. If it is applied after the activation, it harms the training in early stages. For convolution, only one γ and one β is learned per feature map. +One important special case is γ(k) = (cid:112) -One important special case is γ(k) = s(cid:48)[x(k)]2+ε and β(k) = x¯(k), which would make the +s(cid:48)[x(k)]2+ε and β(k) = ¯ x(k), which would make the Batch Normalization layer an identity layer. During evaluation time,3 the expected value and the variance are calculated once for the complete dataset. An unbiased estimate of the empirical variance is used. @@ -537,8 +680,9 @@ The authors of [IS15] suggest to use Batch Normalization before the activation f as in Items 1 and 4. Batch Normalization after the activation lead to better results in https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md Another normalization layer is Local Response Normalization as described in [KSH12], -which includes (cid:96) normalization as described in [WWQ13]. Those two normalization layers, +which includes (cid:96) 2 +normalization as described in [WWQ13]. Those two normalization layers, however, are superseded by Batch Normalization. 3also called inference time 10 @@ -554,19 +698,25 @@ as introduced in [HZRS15a] is to add an identity connection which skips two laye identity connection adds the feature maps onto the other feature maps and thus requires the output of the input layer of the residual block to be of the same dimension as last layer of the residual block. -Formally, it can be described as follows. If x are the feature maps after layer i and x is -i 0 +Formally, it can be described as follows. If x +i +are the feature maps after layer i and x +0 +is the input image, H is a non-linear transformation of feature maps, then y = H(x) describes a traditional CNN. Note that this could be multiple layers. A residual block as visualized in Figure 2.4 is described by y = H(x)+x In [HZRS15a], they only used residual skip connections to skip two layers. Hence, if -conv (x ) describes the application of the convolutional layer i to the input x without the -i i i +conv i(x i) describes the application of the convolutional layer i to the input x +i +without the nonlinearity, then such a residual block is -x = conv (ReLU(conv (x )))+x -i+2 i+1 i i i +x +i+2 += conv i+1(ReLU(conv i(x i)))+x +i Figure 2.4.: ResNet module Image source: [HZRS15a] [HM16] provides some insights why deep residual networks are successful. @@ -576,18 +726,22 @@ Image source: [HZRS15a] Two common ways to add more parameters to neural networks are increasing their depth by adding more layers or increasing their width by adding more neurons / filters. Inception blocks [AM15] implicitly started a new idea which was explicitly described in [XGD+16] as -“ResNeXt block”: Increasing the cardinality C ∈ N . By cardinality, the authors describe -≥1 +“ResNeXt block”: Increasing the cardinality C ∈ N ≥1. By cardinality, the authors describe the concept of having C small convolutional networks with the same topology but different weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not combine aggregation blocks with residual blocks as the authors did. 256-d in -4 @ 1×1×256 4 @ 1×1×256 total 32 4 @ 1×1×256 +concatenate +total 32 groups ... -4 @ 3×3×4 4 @ 3×3×4 4 @ 3×3×4 -concatenate 128-d out +4 @ 1×1×256 +4 @ 3×3×4 +4 @ 1×1×256 +4 @ 3×3×4 +4 @ 1×1×256 +4 @ 3×3×4 Figure 2.5.: Aggregation block with a cardinality of C = 32. Each of the 32 groups is a 2-layer convolutional network. The first layer receives 256 feature maps and applies four 1×1 filters to it. The second layer applies four 3×3 filters. Although every group has @@ -595,8 +749,7 @@ the same topology, the learned weights are different. The outputs of the groups concatenated. The hyperparameters of an aggregation block are: • The topology of the group members. -• The cardinality C ∈ N . Note that a cardinality of C = 1 is equivalent in every -≥1 +• The cardinality C ∈ N ≥1. Note that a cardinality of C = 1 is equivalent in every aspect to using the group network without an aggregation block. 12 2.3. CNN Blocks @@ -604,28 +757,36 @@ aspect to using the group network without an aggregation block. Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The idea is to connect each convolutional layer directly to subsequent convolutional layers. Traditional CNNs with L layers and one input layer have L connections between layers, -but dense blocks have L(L+1) connections between layers. The input feature maps are +but dense blocks have L(L+1) 2 +connections between layers. The input feature maps are concatenated in depth. According to the authors, this prevents features from being re- learned and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16 have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors used only on the order of 12 feature maps per layer. A dense block is visualized in Figure 2.6. 256-d in -256-d k @ 3×3 -k-d concatenate -(256+k)-d k @ 3×3 -k-d concatenate +256-d +k-d +(256+k)-d +k-d (256+L·k)-d out Figure 2.6.: Dense block with L=2 layers and a growth factor of k. Dense block have five hyperparameters: • The activation function being used. The authors use ReLU. -• The size k ×k of filters. The authors use k = k = 3. -w h w h +• The size k +w +×k +h +of filters. The authors use k +w += k +h += 3. • The number of layers L, where L = 2 is a simple convolutional layer. • The number k of filters added per layer (called growth rate in the paper) It might be necessary use 1×1 convolutions to reduce the number of L·k feature maps. @@ -635,10 +796,14 @@ It might be necessary use 1×1 convolutions to reduce the number of L·k feature Transition layers are used to overcome constraints imposed by resource limitations or architecturaldesignchoices. Oneconstraintisthenumberoffeaturemaps(seeAppendixC.3 for details). In order to reduce the number of feature maps while still keeping as much -relevant information as possible in the network, a convolutional layer i with k filters of +relevant information as possible in the network, a convolutional layer i with k i -the shape 1×1×k is added. The number of filters k directly controls the number of -i−1 i +filters of +the shape 1×1×k +i−1 +is added. The number of filters k +i +directly controls the number of generated feature maps. In order to reduce the dimensionality (width and height) of the feature maps, one typically applies pooling. @@ -680,45 +845,61 @@ are not covered by the training set and thus indicate the need to collect more d 15 2. Convolutional Neural Networks 2.5.2. Confusion Matrices -A confusion matrix is a matrix (c) ∈ NK×K, where K ∈ N is the number of classes, -ij ≥0 ≥2 -which contains all correct and wrong classifications. The item c is the number of times +A confusion matrix is a matrix (c) +ij +∈ NK×K +≥0 +, where K ∈ N +≥2 +is the number of classes, +which contains all correct and wrong classifications. The item c ij +is the number of times items of class i were classified as class j. This means the correct classification is on the -diagonal c and all wrong classifications are of the diagonal. The sum (cid:80)K (cid:80)K c is the -ii i=1 j=1 ij -total number of samples which were evaluated and (cid:80) i=1cii is the accuracy. +diagonal c +ii +and all wrong classifications are of the diagonal. The sum (cid:80)K i=1(cid:80)K j=1c +ij +is the +total number of samples which were evaluated and (cid:80) i=1cii (cid:80)K i=1(cid:80)K j=1cij -The sums r(i) = (cid:80)K c of each class i are worth being investigated as they show if the -j=1 ij +is the accuracy. +The sums r(i) = (cid:80)K j=1c +ij +of each class i are worth being investigated as they show if the classes are skewed. If the number of samples of one class dominates the data set, then the classifier can get a high accuracy by simply always prediction the most common class. If the accuracy of the classifier is close to the a priory probability of the most common class, techniques to deal with skewed classes might help. An automatic criterion to check for this problem is +accuracy ≤ max({r(i) | i = 1,...,k}) -accuracy ≤ +ε (cid:80)k -r(i) -i=1 +i=1r(i) ++ε where ε is a small value to compensate the fact that some examples might be correct just by chance. Other values which should be checked are the class-wise sensitivities: -# correctly identified instances of class k c +s(k) = +# correctly identified instances of class k +# instances of class k += +c kk -s(k) = = ∈ [0,1] -# instances of class k r(k) +r(k) +∈ [0,1] If s(i) is much lower than s(j), it is an indicator that more or cleaner training data is necessary for s(i). The class-wise confusion +f confusability(k 1,k 2) = c k1k2 -f (k ,k ) = -confusability 1 2 (cid:80)K -c -j=1 k1j -indicates if class k gets often classified as class k . The highest values here can indicate -1 2 +(cid:80)K +j=1c +k1j +indicates if class k +1 +gets often classified as class k 2. The highest values here can indicate if two classes should be merged or a specialized model for separating those classes could improve the overall system. 2.5.3. Validation Curves: Accuracy, loss and other metrics @@ -731,15 +912,16 @@ validation curves give an indicator if training longer improves the model’s pe 2.5. Analysis Techniques plotting the error on the training set as well as the error on a validation set, one can also estimate if overfitting might become a problem. See Figure 2.7 for an example. +10 20 30 40 50 60 70 80 90 100 +0.2 +0.4 +0.6 +0.8 +overfitting +Epochs Error Training set Validation set -0.8 -0.6 -0.4 -0.2 overfitting -Epochs -10 20 30 40 50 60 70 80 90 100 Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs and the quality metric is the error (1−accuracy). The longer the network is trained, the better it gets on the training set. At some point the network is fit too well to the @@ -754,22 +936,39 @@ Loss functions The loss function (also called error function or cost function) is a function which assigns a real value to a complex event like the predicted class of a feature vector. It is used to define the objective function. For classification problems the loss function is typically cross-entropy -with (cid:96) or (cid:96) regularization, as it was described in [NH92]: -1 2 -(cid:96)1 (cid:96)2 -K (cid:122) (cid:125)(cid:124) (cid:123) (cid:122) (cid:125)(cid:124) (cid:123) -(cid:88)(cid:88) (cid:88) (cid:88) -E (W) = − [txlog(ox)+(1−tx)log(1−ox)]+λ · |w|+λ · w2 -CE k k k k 1 2 -x∈Xk=1 w∈W w∈W -(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125) -cross-entropydataloss modelcomplexityloss -where W are the weights, X is the training data set, K ∈ N 0 is the number of classes and -≥ -tx indicates if the training example x is of class k. ox is the output of the classification -k k -algorithm which depends on the weights. λ ,λ ∈ [0,∞) weights the regularization and is -1 2 +with (cid:96) +1 +or (cid:96) +2 +regularization, as it was described in [NH92]: +E CE(W) = +−(cid:88) +x∈X +K +(cid:88) +k=1[tx klog(ox k)+(1−tx k)log(1−ox k)] +(cid:124) (cid:123)(cid:122) (cid:125) +cross-entropydataloss ++λ 1· +(cid:96)1 +(cid:122) (cid:125)(cid:124) (cid:123) +(cid:88) +w∈W|w|+λ 2· +(cid:96)2 +(cid:122) (cid:125)(cid:124) (cid:123) +(cid:88) +w∈Ww2 +(cid:124) (cid:123)(cid:122) (cid:125) +modelcomplexityloss +where W are the weights, X is the training data set, K ∈ N ≥0 is the number of classes and +tx +k +indicates if the training example x is of class k. ox +k +is the output of the classification +algorithm which depends on the weights. λ 1,λ +2 +∈ [0,∞) weights the regularization and is typically smaller than 0.1. 17 2. Convolutional Neural Networks @@ -797,29 +996,39 @@ tion might be bad. 2.5. Analysis Techniques Quality criteria There are several quality criteria for classification models. Most quality criteria are based -the confusion matrix c which denotes at c the number of times the real class was i and j +the confusion matrix c which denotes at c ij +the number of times the real class was i and j was predicted. This means the diagonal contains the number of correct predictions. For -the following, let t = (cid:80)k c be the number of training samples for class i. The most -i j=1 ij +the following, let t +i += (cid:80)k j=1c +ij +be the number of training samples for class i. The most common quality criterion is accuracy: +accuracy(c) = (cid:80)k -c -accuracy(c) = i=1 ii ∈ [0,1] +i=1c +ii (cid:80)k -t -i=1 i +i=1t +i +∈ [0,1] One problem of accuracy as a quality criterion are skewed classes. If one class is by far more common than all other classes, then the simplest way to achieve a high score is to always classify everything as the most common class. In order to fix this problem, one can use the mean accuracy: +mean-accuracy(c) = 1 k -mean-accuracy(c) = 1 ·(cid:88) c ii ∈ [0,1] -k t -i +· +k +(cid:88) i=1 -For two-class problems there are many other metrics like precision, recall and F -score. -β +c ii +t +i +∈ [0,1] +For two-class problems there are many other metrics like precision, recall and F β-score. Quality criteria for semantic segmentation are explained in [Tho16]. Besides the quality of the classification result, several other quality criteria are important in practice: @@ -850,16 +1059,17 @@ not help. Instead, the model or the training algorithm need to be adjusted. If the training set’s learning curve is significantly higher than the validation set’s learning curve, then removing features (e.g., by decreasing the images resolution), more training samples or more regularization will help. -Error -Validation set -Training set +10 20 30 40 50 60 70 80 90 100 +0.2 +0.4 0.6 -0.4 variance avoidable bias +variance human-level error -0.2 Training samples -10 20 30 40 50 60 70 80 90 100 +Error +Validation set +Training set Figure 2.9.: A typical learning curve: The more data is used for training, the more errors a given architecture will make to fit the given training data. At the same time, it is expected that the training data gets more similar to the true distribution of the data which @@ -895,20 +1105,24 @@ image by something. This could be a gray square as in [ZF14] or a black superpix in [RSG16]. Then the classifier is run on the image again. This is done for each region (e.g., superpixel or position of the square) and the regions are then colored to generate either a correct class heatmap of the most-likely class image. It is important to note that the color -at region r denotes the result if r is occluded. -i i +at region r +i +denotes the result if r +i +is occluded. Both visualizations are shown in Figure 2.10. One can see that the network makes sensible predictionsforthisimageoftheclass“Pomeranian”. However, theimageoftheclass“Afghan Hound” gets confused with “Ice lolly”, which is a sign that this needs further investigation. Gradient-based approaches In [SVZ13], a gradient-based approach was used to generate image-specific class saliency maps. The authors describe the problem as a ranking problem, where each pixel of the -image I is assigned a score S (I ) for a class c of interest. CNNs are non-linear functions, -0 c 0 -but they can be approximated by the first order Taylor expansion S (I) ≈ wTI +b where +image I +0 +is assigned a score S c(I 0) for a class c of interest. CNNs are non-linear functions, +but they can be approximated by the first order Taylor expansion S c(I) ≈ wTI +b where +w is the derivative of S c -w is the derivative of S at I . -c 0 +at I 0. 21 2. Convolutional Neural Networks 2.5.6. Argmax Method @@ -950,31 +1164,37 @@ If the set of learned filters changes with initialization, this might be an indi little capacity of that layer. Hence adding more filters to that layer could improve the performance. Filters can be compared with the k-translation correlation as introduced in [ZCZL16]: -(cid:104)W ,T(W ,x,y)(cid:105) -i j f -ρ (W ,W ) = max ∈ [−1,1], -k i j -(x,y)∈{−k,...,k}2\(0,0) (cid:107)W i(cid:107) 2(cid:107)W j(cid:107) +ρ k(W i,W j) = max +(x,y)∈{−k,...,k}2\(0,0) +(cid:104)W i,T(W j,x,y)(cid:105) +f +(cid:107)W i(cid:107) 2(cid:107)W j(cid:107) 2 +∈ [−1,1], where T(·,x,y) denotes the translation of the first operand by (x,y), with zero padding at -the borders to keep the shape. (cid:104)·,·(cid:105) denotes the flattened inner product, where the two +the borders to keep the shape. (cid:104)·,·(cid:105) f +denotes the flattened inner product, where the two operands are flattened into column vectors before applying the standard inner product. The closer the absolute value of the k-translation correlation to one, the more similar two filters -W ,W are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and -i j +W i,W +j +are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and VGG-16 (see Appendix D.3) have many filters which are highly correlated. They found this by comparing the averaged maximum k-translational correlation of the networks with Gaussian-distributed initialized filters. The averaged maximum k-translational correlation is defined as +¯ ρ k(W) = +1 N -1 (cid:88) N -ρ¯ (W) = max ρ (W ,W ) -k k i j -N j=1,j(cid:54)=i +N +(cid:88) i=1 -where N is the number of filters in the layer W and W denotes the ith filter. +N +max j=1,j(cid:54)=iρ k(W i,W j) +where N is the number of filters in the layer W and W i +denotes the ith filter. 2.5.9. Weight update tracking Andrej Karpathy proposed in the 5th lecture of CS231n to track weight updates to check if the learning rate is well-chosen. He suggests that the weight update should be in the order @@ -1046,17 +1266,26 @@ connected to the output nodes. 3. Topology Learning 4. Correlation Maximization: Train the weights of the candidates by maximizing S, the correlation between candidates output value V with the networks residual error: +S = (cid:88) +o∈O(cid:12) +(cid:12) (cid:12) (cid:12) +(cid:12) +(cid:12)(cid:88) p∈T(cid:0) V p− ¯ V(cid:1) (E p,o− ¯ E +o)(cid:12) +(cid:12) (cid:12) (cid:12) -S = (cid:88)(cid:12) (cid:12)(cid:88)(cid:0) V p−V¯(cid:1) (E p,o−E¯ o)(cid:12) (cid:12) -(cid:12) (cid:12) -o∈O(cid:12)p∈T (cid:12) -where O is the set of output nodes, T is the training set, V is the candidate neurons +(cid:12) +(cid:12) +where O is the set of output nodes, T is the training set, V p -activation for a training pattern p. E is the residual output error at node o for +is the candidate neurons +activation for a training pattern p. E p,o -pattern p. V¯ and E¯ are averaged values over all elements of T. This step is finished +is the residual output error at node o for +pattern p. ¯ V and ¯ E o +are averaged values over all elements of T. This step is finished when the correlation no longer increases. 5. Candidate selection: Keep the candidate node with the highest correlation, freeze its incoming weights and add connections to the output nodes. @@ -1069,35 +1298,47 @@ right corner. The black squares represent frozen weights which are found by corr maximization whereas the white squares are trainable weights. 3.1.2. Meiosis Networks Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, where -weights are deterministic and fixed at prediction time, each weight w in Meiosis networks +weights are deterministic and fixed at prediction time, each weight w ij +in Meiosis networks follows a normal distribution: -w ∼ N(µ ,σ2) -ij ij ij +w +ij +∼ N(µ ij,σ2 ij) 28 3.2. Pruning approaches -Hence every connection has two learned parameters: µ and σ2. -ij ij +Hence every connection has two learned parameters: µ +ij +and σ2 ij. The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell division. A node j is splitted, when the random part dominates the value of the sampled weights: -(cid:80) (cid:80) -σ σ -i ij > 1 and k jk > 1 -(cid:80) (cid:80) -µ µ -i ij k jk +(cid:80) +iσ +ij +(cid:80) +iµ +ij +> 1 and +(cid:80) +kσ +jk +(cid:80) +kµ +jk +> 1 The mean of the new nodes is sampled around the old mean, half the variance is assigned to the new connections. Hence Meiosis networks only change the number of neurons per layer. They do not add layers or add skip connections. 3.1.3. Automatic Structure Optimization Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of on- -line handwriting recognition. It makes use of the confusion matrix C = (c ) ∈ Nk×k -ij ≥0 +line handwriting recognition. It makes use of the confusion matrix C = (c ij) ∈ Nk×k +≥0 (see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix -S with s j = s i = c ·c . The maximum of S defines where the ASO algorithm adds -i j ij ji +S with s ij = s ji = c +ij +·c ji. The maximum of S defines where the ASO algorithm adds more parameters. The details how the resources are added are not transferable to CNNs. 3.2. Pruning approaches Pruning approaches start with a network which is bigger than necessary and prune it. The @@ -1115,18 +1356,17 @@ Damage (OBD) as introduced in [LDS+89]. For every single parameter k, OBD calcul the effect on the objective function of deleting k. The authors call the effect of the deletion 29 3. Topology Learning -of parameter k the saliency s . The parameters with the lowest saliency are deleted, which -k +of parameter k the saliency s k. The parameters with the lowest saliency are deleted, which means they are set to 0 and are not updated anymore. A follow-up method called Optimal Brain Surgeon [HSW93] claims to choose the weights in a much better way. This requires, however, to calculate the inverse Hessian matrix H−1 ∈ Rn×n where n ∈ N is typically n > 106. A much simpler and computationally cheaper pruning criterion is the weight magnitude. [HPTD15] prunes all weights w which are below a threshold θ: - -w if w ≥ θ w ← -0 otherwise + + w if w ≥ θ +0 otherwise 3.3. Genetic approaches The general idea of genetic algorithms (GAs) is to encode the solution space as genes, which can recombine themselves via crossover and inversion. An introduction to such algorithms @@ -1186,15 +1426,16 @@ One idea to approach this problem is by building a hierarchy of classifiers. The classifier distinguishes clusters of classes, whereas the leaf classifiers distinguish single classes. Figure 4.1 gives an example for an hierarchy of classifiers. Figure 4.1.: Exampleforahierarchyofclassifiers. Eachclassifierisvisualizedbyaroundedrectangle. -The root classifier C has to distinguish six coarse classes (pedestrian, four+-wheelers, +The root classifier C 0 -traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C predicts a +has to distinguish six coarse classes (pedestrian, four+-wheelers, +traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C 0 +predicts a pedestrian, another classifier has to predict if it is an adult or a child. Similar, if C 0 predicts traffic sign, then another classifier has to predict if it is a speed limit, a -sign indicating danger or something else. If C , however, predicts road, then no other -0 +sign indicating danger or something else. If C 0, however, predicts road, then no other classifier will become active. In this example, the problem has 17 classes. The hierarchical approach introduces 7 clusters of classes and thus uses 8 classifiers. @@ -1210,8 +1451,7 @@ children. Siblings are not affected. In the example from Figure 4.1, the classif which distinguishes traffic signs can be changed while the classification as pedestrian, four+-wheelers, traffic sign, street, other will not be affected. Also, the classification between speed limits, danger signs and other signs will not change. -• Faster training: Except for the root classifier C , each other classifier will have -0 +• Faster training: Except for the root classifier C 0, each other classifier will have less than the total amount of training data. Depending on the combined classes, the models could also be simpler. Hence the training time is reduced. • Weighting of errors: In practice, some errors are more severe than others. For @@ -1234,8 +1474,9 @@ can be directly with standard clustering algorithms such as k-means, DBSCAN [EKS OPTICS [ABKS99], CLARANS [NH02], DIANA [KR09], AHC (see [HPK11]) or spectral clustering as in [XZY+14]. Those clusterings, however, are hard to interpret and most of them do not allow a human to improve the found clustering manually. -The confusion matrix (c) ∈ Nk×k states how often class i was present and class j was +The confusion matrix (c) ij +∈ Nk×k states how often class i was present and class j was 34 4.2. Clustering classes predicted. The more often this confusion happens, the more similar those two classes are to @@ -1252,11 +1493,15 @@ have to be swapped to in order to keep the same confusion matrix. • If two classes are confused often, then they are similar to the classifier. Hence the order of the classes is permutated in such a way that the highest errors are close to the diagonal. One possible objective function to be minimized is -n n -(cid:88)(cid:88) -f(C) = C ·|i−j| [4.1] +f(C) = +n +(cid:88) +i=1 +n +(cid:88) +j=1C ij -i=1 j=1 +·|i−j| [4.1] which punishes errors linearly with the distance to the diagonal. This method is called CMO in the following. As pointed out by Tobias Ribizel (personal communication), this optimization problem @@ -1286,8 +1531,9 @@ Those will be moved to the corners of the confusion matrix by optimizing Equatio Once a permutation of the classes is found which has a low score Equation (4.1), the clusters can either be made by hand by deciding why classes should not be in one clusters. With such a permutation, only n−1 binary decisions have to be made and hence only the list of -classes has to be read. Alternatively, one can calculate the confusions C(cid:48) +C(cid:48) for -i,i+1 i+1,i +classes has to be read. Alternatively, one can calculate the confusions C(cid:48) i,i+1+C(cid:48) +i+1,i +for each pair of classes which are neighbors in the confusion matrix. The higher this value, the more similar are the classes according to the classifier. Hence a threshold θ can be applied. θ can either be set automatically (e.g., such that 10% of all pairs are above the threshold) @@ -1369,8 +1615,9 @@ Conv-Block(2) is added at the input. For MNIST, the images are bilinearly upsamp 32px×32px. 38 5.1. Baseline Model and Training setup -# Type Filters @ Parameters FLOPs Output size +# Type Filters @ Patch size / stride +Parameters FLOPs Output size Input 0 0 3@32× 32 1 Convolution 32@3×3×3 /1 896 1736704 32@32× 32 2 BN + ELU 64 163904 32@32× 32 @@ -1394,19 +1641,42 @@ Dropout 0.5 0 0 512@ 1× 1 15 Convolution k @1×1×512/1 k·(512+1) 1024·k k @ 1× 1 Global avg Pooling 1×1 0 k k @ 1× 1 16 BN + Softmax 2k 7k k @ 1× 1 -(cid:80) 515k 1032k 103424+2k -+892512 +55729664 +(cid:80) 515k ++892512 +1032k ++55729664 +103424+2k Table 5.1.: Baseline architecture with 3 input channels of size 32×32. All convolutional layers use SAME padding, except for layer 11 which used VALID padding in order to decrease the feature map size to 1×1. If the input feature map is bigger than 32×32, for eachpoweroftwotherearetwoConvolution + BN + ELUblocksandoneMax pooling block added. This is the framed part in the table. -Input maxpooling2×2/2 maxpooling2×2/2 maxpooling2×2/2 C512@1×1/1 -32×32 16×16 8×8 4×4 1×1 -C32@3×3/1 C64@3×3/1 C64@3×3/1 C512@4×4/1(V) BN+ELU -BN+ELU BN+ELU BN+ELU BN+ELU Dropout,p=0.5 -C32@3×3/1 C64@3×3/1 Dropout,p=0.5 Ck@1×1/1 -BN+ELU BN+ELU GlobalAVGpooling +32×32Input +C32@3×3/1 +BN+ELU +C32@3×3/1 +BN+ELU +16×16 +maxpooling2×2/2 +C64@3×3/1 +BN+ELU +C64@3×3/1 +BN+ELU +8×8 +maxpooling2×2/2 +C64@3×3/1 +BN+ELU +4×4 +maxpooling2×2/2 +C512@4×4/1(V) +BN+ELU +Dropout,p=0.5 +1×1 +C512@1×1/1 +BN+ELU +Dropout,p=0.5 +Ck@1×1/1 +GlobalAVGpooling BN+Softmax Figure 5.1.: Architectureofthebaselinemodel. C 32@3×3/1isaconvolutionallayerwith32filters of kernel size 3×3 with stride 1. @@ -1415,8 +1685,8 @@ of kernel size 3×3 with stride 1. 5.1.1. Baseline Evaluation The results for the baseline model evaluated on eight datasets are given in Table 5.2. The speed for inference for different GPUs is given in Table 5.3. -Single Model Accuracy Ensemble of 10 Dataset +Single Model Accuracy Ensemble of 10 Training Set Test Set Training Set Test Set Asirra 94.22% σ = 3.49 94.37% σ = 3.47 97.07% 97.37% CIFAR-10 91.23% σ = 1.10 85.84% σ = 0.87 92.36% 86.75% @@ -1431,8 +1701,8 @@ used in the ensemble. The empirical standard deviation σ of the accuracy is als CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the models uses unlabeled data or data from other datasets. For HASYv2 no test time transformations are used. -Inference per Training Network GPU Tensorflow +Inference per Training 1 Image 128 images time / epoch Baseline Default Intel i7-4930K 3ms 244ms 231.0s Baseline Optimized Intel i7-4930K 2ms 143ms 149.0s @@ -1516,31 +1786,49 @@ training. The image might lead to the wrong conclusion that models which are bet the start are also better at the end. In order to check this hypothesis, the relative order of validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering stays approximately the same, then it can be considered to run the first few epochs many -times and only train the best models to the end. For 10 models, there can be 102−10 = 45 +times and only train the best models to the end. For 10 models, there can be 102−10 2 += 45 pair-wise changes in the ordering at maximum if the relative order of validation accuracies is reversed. For the baseline model, 21.8 changes in the relative order of accuracies occurred in average for each pair of epochs (i,i+1). This means if one knows only the relative order of the validation accuracy of two models m and m(cid:48) in epoch i, it is doubtful if one can make any statement about the ordering of m and m(cid:48) in epoch i+1. +0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 +0.2 +0.3 +0.4 +0.5 +0.6 0.7 +epoch +v +a l i +d a +t i +o +n +a c +c +u +r +a +c y +maximum validation accuracy +minimum validation accuracy 1.5 -0.6 2 -2.5 ycarucca -0.5 -3 ssol -noitadilav -0.4 +2 +2.5 +3 3.5 -0.3 maximum validation accuracy 4 -mmainxiimmuummvvaalliiddaattiioonnaaccccuurraaccyy -minimummvaelaidnaltoiossn accuracy +4 4.5 -0.2 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 2 3 4 5 6 7 8 9 0 1 2 3 4 -1 1 1 1 1 -epoch +l +o s +s +maximum validation accuracy +minimum validation accuracy +mean loss Figure 5.7.: Minimum and maximum validation accuracy of the 10 trained models by epoch. The differences do not exceed 1% and does not increase by training epoch. Four models stopped the first training stage at epoch 133 which causes the shift in the loss and the @@ -1598,8 +1886,8 @@ to Equation (4.1). The diagonal elements are set to 0 in order to make other ele easier to see. The symbols next to the label on the vertical axis indicate the shape and the color of the signs. The second image shows the same, but with baseline model. -49 Best viewed in electronic form. +49 Figure 5.13.: The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal elements are set to 0 in order to make other elements easier to see. The top image shows arbitrary class ordering, the bottom image shows the optimized ordering. @@ -1624,17 +1912,28 @@ based on CMO as described in Section 4.2. 51 5. Experimental Evaluation Cluster Spectral clustering Errors CMO Errors -fish aquarium fish, orchid + flatfish 5 aquarium fish, orchid + flatfish 4 -+ ray, shark + trout, lion + ray + shark, trout -flowers orchid, aquarium fish + sun- 5 orchid, aquarium fish + sun- 2 -flower + poppy, tulip + rose, flower, poppy, tulip, rose +fish aquarium fish, orchid + flatfish ++ ray, shark + trout, lion +5 aquarium fish, orchid + flatfish ++ ray + shark, trout +4 +flowers orchid, aquarium fish + sun- +flower + poppy, tulip + rose, train +5 orchid, aquarium fish + sun- +flower, poppy, tulip, rose +2 people baby, boy, man + girl + woman 2 baby, boy, girl, woman, man 0 -reptiles crocodile, plain, road, table, 9 crocodile, lizard, lobster, cater- 6 -wardrobe + dinosaur + lizard pillar+dinosaur+snake+tur- -+ snake, worm + turtle tle, crab -trees maple,oak,pine+willow,forest 3 palm, willow, pine, maple, oak 0 +reptiles crocodile, plain, road, table, +wardrobe + dinosaur + lizard ++ snake, worm + turtle +9 crocodile, lizard, lobster, cater- +pillar+dinosaur+snake+tur- +tle, crab +6 +trees maple,oak,pine+willow,forest + palm +3 palm, willow, pine, maple, oak 0 Total 24 12 Table 5.4.: Differences in spectral clustering and CMO. Classes in a cluster are separated by , whereas clusters are separated by +. @@ -1650,8 +1949,12 @@ K K, κ 0 K, κ 0 L L, (cid:98) and L, L 1 L, (cid:98) and L, L 1 M M and M and M 2 M and µ, M and M 3 N N and N, N and N 2 N and N, N and N, ℵ 3 -O O, O, 0, ◦, °, and o 1 O, O, 0, ◦, ° and and o 2 -P P, P and p, ρ(cid:35)and P and ℘ 3 P and P, P, ℘ an(cid:35)d p, ρ 2 +O O, O, 0, ◦, °, +(cid:35) +and o 1 O, O, 0, ◦, ° and +(cid:35) +and o 2 +P P, P and p, ρ and P and ℘ 3 P and P, P, ℘ and p, ρ 2 Q Q, Q, Q, ι, (cid:116), (cid:38), (cid:96), (cid:61), Æ, 1 7 Q and Q, Q 1 R R, R and R, R, k and (cid:60) 3 R and (cid:60), R, R, R 1 S S, s, S 0 S, s, S 0 @@ -1683,8 +1986,9 @@ The leaf classifiers use the same topology as the root classifier. By initializi the root classifiers weights their performance can be pushed at about the inner accuracy. They are, however, only useful if their accuracy is well above the inner accuracy of the root classifier. Hence, for CIFAR-100, building hierarchies of classifiers is not useful. +Cluster Classes accuracy -Cluster Classes root classifier leaf classifier +root classifier leaf classifier cluster identified class identified | cluster class identified | cluster 1 3 69.67% 84.27% 72.98% 2 5 46.60% 58.54% 43.47% @@ -1709,56 +2013,69 @@ consider data points where the root classifier correctly identified the cluster. 5. Experimental Evaluation 5.5. Increased width for faster learning More filters in one layer could simplify the optimization problem as each filter needs smaller -updates. Hence a CNN N with n filters in layer i is expected to take more epochs than a +updates. Hence a CNN N with n i -CNN N(cid:48) with 2·n filters in layer i to achieve the same validation accuracy. +filters in layer i is expected to take more epochs than a +CNN N(cid:48) with 2·n i +filters in layer i to achieve the same validation accuracy. This hypothesis can be falsified by training a CNN N and a CNN N(cid:48) and comparing the trained number of epochs. As more filters can lead to different results depending on the layer where they are added, five models are trained. The details about those models are given in Table 5.7 -Filter count Total Name Layer +Filter count Total Baseline New parameters -m 9 64 638 5978566 +m 9 -m(cid:48) 9 64 974 8925622 +9 64 638 5978566 +m(cid:48) 9 -m 11 512 3786 5982698 +9 64 974 8925622 +m 11 -m(cid:48) 11 512 1024 1731980 +11 512 3786 5982698 +m(cid:48) 11 -m 13 512 8704 5982092 +11 512 1024 1731980 +m 13 +13 512 8704 5982092 Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer was increased. The detailed results are given in Table 5.8. As expected, the number of training epochs of the models with increased numbers of parameters is lower. The wall-clock time, however, is higher due to the increase in computation per forward- and backward-pass. -For m , m and m , the filter weight range of the layer with increased capacity decreases -9 11 13 +For m 9, m +11 +and m 13, the filter weight range of the layer with increased capacity decreases compared to Figure 5.6, the filter weights of the layer with increased capacity are more -concentrated around zero compared to Figure 5.2. For model m , the distribution of -13 +concentrated around zero compared to Figure 5.2. For model m 13, the distribution of weight of the output layer changed to a more bell-shaped distribution. Except for this, the distribution of filter weights in other layers did not change for all three models compared to the baseline. +Model Parameters Accuracy Training -Model Parameters Single Model Ensemble Mean Epochs Mean Time +Single Model Ensemble Mean Epochs Mean Time Mean std baseline 944012 63.38% 0.55 64.70% 154.7 3856s -m 5978566 65.53% 0.37 66.72% 105.7 4472s +m 9 -m(cid:48) 8925622 65.10% 1.09 66.54% 95.6 5261s +5978566 65.53% 0.37 66.72% 105.7 4472s +m(cid:48) 9 -m 5982698 65.73% 0.77 67.38% 149.2 5450s +8925622 65.10% 1.09 66.54% 95.6 5261s +m 11 -m(cid:48) 1731980 62.12% 0.48 62.89% 143.6 3665s +5982698 65.73% 0.77 67.38% 149.2 5450s +m(cid:48) 11 -m 5982092 62.39% 0.66 63.77% 147.8 4485s +1731980 62.12% 0.48 62.89% 143.6 3665s +m +13 +5982092 62.39% 0.66 63.77% 147.8 4485s +Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m 9, m 11, m 13 -Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m , m , m -9 11 13 as well as their accuracies. 54 5.6. Weight updates @@ -1789,14 +2106,14 @@ SAME padding and each layer can have an arbitrary number of filters. A convoluti with more filters is called wider [ZK16], a convolutional layer with fewer filters is thus called narrower and the number of filters in a convolutional layer is the layers width. If the number of parameters which may be used for the feature map scale is fixed and high -enough, there are still many combinations. If n with i = 0,...,k is the number of output +enough, there are still many combinations. If n i +with i = 0,...,k is the number of output feature maps of layer i where i = 0 is the input layer and all filters are 3×3 filters without a bias, then the number of parameters is +Parameters = k -Parameters = (cid:88)(cid:0) (n ·32+1)·n (cid:1) -i−1 i -i=1 +(cid:88) i=1(cid:0) (n i−1·32+1)·n i(cid:1) Hence the width of one layer does not only influence the parameters in this layer, but also in the next layer. The number of possible subsequent layers of one feature map size is enormous, even if @@ -1865,17 +2182,19 @@ Hence the effect of removing Batch Normalization from the baseline is investigat experiment. As before, 10 models are trained on CIFAR-100. The training setup and the model m no-bn -are identical to the baseline model m, except that in m the Batch Normalization layers +are identical to the baseline model m, except that in m no-bn +the Batch Normalization layers are removed. One notable difference is the training time: While m needs 21ms per epoch in average on -a GTX 980, m only needs 21ms per epoch. The number of epochs used for training, +a GTX 980, m no-bn +only needs 21ms per epoch. The number of epochs used for training, however, also increased noticeably from 149 epochs to 178 epochs in average. The standard -deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs for m . -no-bn -The mean accuracy of m is 62.86% and hence 0.52 percentage points worse. The +deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs for m no-bn. +The mean accuracy of m no-bn +is 62.86% and hence 0.52 percentage points worse. The standard deviation between models increased from 0.55 to 0.61. This is likely a result of the early stopping policy and the differences in training epochs. This can potentially be fixed by retraining the models which stopped earlier than the model which was trained for the @@ -1886,12 +2205,14 @@ Figure5.2,butthedistributionofbiasweightschangednoticeably: Whilethebiasweightso thebaselinearespreadoutinthefirstlayerandmuchmoreconcentratedinsubsequentlayers (see Figure 5.3), the model without Batch Normalization has rather concentrated weights in the first layers and only the bias weights of the last layer is spread out (see Figure A.2). -Another model m(cid:48) which has one more filter in the convolutional layer 1, 3, 5, and 7 to +Another model m(cid:48) no-bn +which has one more filter in the convolutional layer 1, 3, 5, and 7 to compensate for the loss of parameters in Batch Normalization. The mean test accuracy of 10 such models is 62.87% which is 0.51 percentage points worse than the baseline. The -ensemble of m(cid:48) achieves 64.33% which is 0.37 percentage points worse than the baseline. +ensemble of m(cid:48) no-bn +achieves 64.33% which is 0.37 percentage points worse than the baseline. The mean training time was 14s per epoch and 157.4 epochs with a standard deviation of 20.7 epochs. Hence it is not advisable to remove Batch Normalization for the final model. It could, @@ -1901,8 +2222,9 @@ Batch Normalization. 58 5.9. Batch size 5.9. Batch size -The mini-batch size m ∈ N influences +The mini-batch size m ∈ N ≥1 +influences • Epochs until convergence: The smaller m, the more often the model is updated in one epoch. Those updates, however, are based on fewer samples of the dataset. Hence the gradients of different mini-batches can noticeably differ. In the literature, @@ -1914,29 +2236,38 @@ accuracy of the classifier when training is finished. [KMN+16] supports the view smaller m result in less sharp minima. Hence smaller m lead to better generalization. Empiric evaluation results can be found in Table 5.9. Those results confirm the claim of [KMN+16] that lower batch sizes generalize better. -Training Mean total Single model Ensemble -m Epochs +m +Training +Epochs +Mean total Single model Ensemble time training time Accuracy std Accuracy -8 118 s 81 – 153 14131s 61.93% σ = 1.03 65.68% +8 118 s epoch -16 62 s 103 – 173 8349s 64.16% σ = 0.81 66.98% +81 – 153 14131s 61.93% σ = 1.03 65.68% +16 62 s epoch -32 35 s 119 – 179 5171s 64.11% σ = 0.75 65.89% +103 – 173 8349s 64.16% σ = 0.81 66.98% +32 35 s epoch -64 25 s 133 – 195 2892s 63.38% σ = 0.55 64.70% +119 – 179 5171s 64.11% σ = 0.75 65.89% +64 25 s epoch -128 18 s 145 – 239 3126s 62.23% σ = 0.73 63.55% +133 – 195 2892s 63.38% σ = 0.55 64.70% +128 18 s epoch +145 – 239 3126s 62.23% σ = 0.73 63.55% Table 5.9.: Trainingtimeperepochandsinglemodeltestsetaccuracy(meanandstandarddeviation) of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on CIFAR-100. 5.10. Bias Figure 5.3 suggests that the bias is not important for the layers 11, 13 and 15. Hence a -model m is created which is identical to the baseline model m, except that the bias of +model m no-bias +is created which is identical to the baseline model m, except that the bias of layers 11, 13 and 15 is removed. -The mean test accuracy of 10 trained m is 63.74% which is an improvement of +The mean test accuracy of 10 trained m no-bias +is 63.74% which is an improvement of 0.36 percentage points over the baseline. The ensemble achieves a test accuracy of 65.13% which is 0.43 percentage points better than the baseline. Hence the bias can safely be removed. @@ -2003,9 +2334,10 @@ tanh and softplus performed worse than the identity and it is unclear why the pu network performed so much better than the logistic function. One hypothesis why the logistic function performs so bad is that it cannot produce negative outputs. Hence the logistic− function was developed: +logistic−(x) = 1 -logistic−(x) = −0.5 1+e−x +−0.5 The logistic− function has the same derivative as the logistic function and hence still suffers from the vanishing gradient problem. The network with the logistic− function achieves an accuracy which is 11.30% better than the network with the logistic function, but is still @@ -2021,16 +2353,26 @@ This contradicts [GBB11, SMGS14]. A key difference between the logistic− function and ELU is that ELU does neither suffers from the vanishing gradient problem nor is its range of values bound. For this reason, the S2ReLU activation function, defined as +S2ReLU(x) = +ReLU(x +2 ++1)−ReLU(−x +2 ++1) =  -−x +1 if x ≤ −2 -  2 -x x  -S2ReLU(x) = ReLU( +1)−ReLU(− +1) = x if −2 ≤ x ≤ 2 -2 2 + +  +   - x +1 if x > −2 + +−x 2 ++1 if x ≤ −2 +x if −2 ≤ x ≤ 2 +x +2 ++1 if x > −2 ThisfunctionissimilartoSReLUsasintroducedin[JXF+16]. ThedifferenceisthatS2ReLU does not introduce learnable parameters. The S2ReLU was designed to be symmetric, be the identity close to zero and have a smaller absolute value than the identity farther away. @@ -2056,8 +2398,8 @@ Table 5.10.: Properties of activation functions. 1The dying ReLU problem is similar to the vanishing gradient problem. 62 5.13. Activation Functions -Single model Ensemble of 10 Function +Single model Ensemble of 10 Training set Test set Training set Test set Identity 66.25% σ = 0.77 56.74% σ = 0.51 68.77% 58.78% Logistic 51.87% σ = 3.64 46.54% σ = 3.22 61.19% 54.58% @@ -2074,41 +2416,55 @@ PReLU 80.01% σ = 2.03 62.16% σ = 0.73 83.50% 64.79% ELU 76.64% σ = 1.48 63.38% σ = 0.55 78.30% 64.70% Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation functions on CIFAR-100. For LReLU, α=0.3 was chosen. -Inference per Training Mean total -Function Epochs +Function +Inference per Training +Epochs +Mean total 1 Image 128 time training time -Identity 8ms 42ms 31 s 108 – 148 3629s +Identity 8ms 42ms 31 s epoch -Logistic 6ms 31ms 24 s 101 – 167 2234s +108 – 148 3629s +Logistic 6ms 31ms 24 s epoch -Logistic− 6ms 31ms 22 s 133 – 255 3421s +101 – 167 2234s +Logistic− 6ms 31ms 22 s epoch -Softmax 7ms 37ms 33 s 127 – 248 5250s +133 – 255 3421s +Softmax 7ms 37ms 33 s epoch -Tanh 6ms 31ms 23 s 125 – 211 3141s +127 – 248 5250s +Tanh 6ms 31ms 23 s epoch -Softsign 6ms 31ms 23 s 122 – 205 3505s +125 – 211 3141s +Softsign 6ms 31ms 23 s epoch -ReLU 6ms 31ms 23 s 118 – 192 3449s +122 – 205 3505s +ReLU 6ms 31ms 23 s epoch -Softplus 6ms 31ms 24 s 101 – 165 2718s +118 – 192 3449s +Softplus 6ms 31ms 24 s epoch -S2ReLU 5ms 32ms 26 s 108 – 209 3231s +101 – 165 2718s +S2ReLU 5ms 32ms 26 s epoch -LReLU 7ms 34ms 25 s 109 – 198 3388s +108 – 209 3231s +LReLU 7ms 34ms 25 s epoch -PReLU 7ms 34ms 28 s 131 – 215 3970s +109 – 198 3388s +PReLU 7ms 34ms 28 s epoch -ELU 6ms 31ms 23 s 146 – 232 3692s +131 – 215 3970s +ELU 6ms 31ms 23 s epoch +146 – 232 3692s Table 5.12.: Training time and inference time of adjusted baseline models trained with different activation functions on GTX 970 GPUs on CIFAR-100. It was expected that the identity is the fastest function. This result is likely an implementation specific problem of Keras 2.0.4 or Tensorflow 1.1.0. 63 5. Experimental Evaluation -Single model Ensemble Epochs Function +Single model Ensemble Epochs Accuracy std Accuracy Range Mean Identity 99.45% σ = 0.09 99.63% 55 – 77 62.2 Logistic 97.27% σ = 2.10 99.48% 37 – 76 54.5 @@ -2129,10 +2485,8 @@ such as self-driving cars is that they increase the computation by a factor of n why they improve the test accuracy is by reducing the variance. The idea of label smoothing is to use the ensemble prediction of the training data as labels for another classifier. For every element x of the training set, the one-hot encoded target -t(x) is smoothed by the ensemble prediction y (x) -E -t(cid:48)(x) = α·t(x)+(1−α)y (x) -E +t(x) is smoothed by the ensemble prediction y E(x) +t(cid:48)(x) = α·t(x)+(1−α)y E(x) where α ∈ [0,1] is the smoothing factor. There are three reasons why label smoothing could be beneficial: • Training speed: The ensemble prediction contains more information about the @@ -2150,12 +2504,14 @@ that the classifier gets into bad local minima. be clear which label is the correct one. Also, labeling errors can be present in training datasets. Those errors severely harm the training. By smoothing the labels errors could be relaxed. -10 models m are trained with the α = 0.5 smoothed labels from the prediction +10 models m smooth +are trained with the α = 0.5 smoothed labels from the prediction of an ensemble of 10 baseline models. The mean accuracy of the models trained on the smoothedtrainingsetlabelswas63.61%(+0.23%)andthestandarddeviationwasσ = 0.72 -(+0.17%). Theensembleof10m modelsachieved64.79%accuracy(+0.09%). Hence +(+0.17%). Theensembleof10m smooth +modelsachieved64.79%accuracy(+0.09%). Hence the effect of this kind of label smoothing on the final accuracy is questionable. The training speed didn’t noticeably change either: The number of trained epochs ranged from 144 to 205, the mean number of epochs was 177. The baseline training ranged from @@ -2175,8 +2531,9 @@ map, the bias is removed • More filters in the first layers ThedetailedarchitectureisgiveninTable5.14andvisualizedinFigure5.16. Theevaluation is given in Table 5.15 and the timing comparison is given in Table 5.16. -# Type Filters @ Parameters FLOPs Output size +# Type Filters @ Patch size / stride +Parameters FLOPs Output size Input 0 0 3@32× 32 1 Convolution 69@3×3×3 /1 1932 3744768 69@32× 32 2 BN + ELU 138 353418 69@32× 32 @@ -2200,8 +2557,11 @@ Dropout 0.5 0 0 512@ 1× 1 15 Convolution k @1×1×512/1 512·k 512·k k @ 1× 1 Global avg Pooling 1×1 0 k k @ 1× 1 16 BN + Softmax 2k 7k k @ 1× 1 -(cid:80) 514k 520k 179200+2k -+947654 +87870996 +(cid:80) 514k ++947654 +520k ++87870996 +179200+2k Table 5.14.: Optimized architecture with 3 input channels of size 32×32. All convolutional layers use SAME padding, except for layer 11 which used VALID padding in order to decrease the feature map size to 1×1. If the input feature map is bigger than 32×32, for each @@ -2209,17 +2569,37 @@ power of two there are two Convolution + BN + ELU blocks and one Max pooling block added. This is the framed part in the table. 66 5.15. Optimized Classifier -Input maxpooling3×3/2 maxpooling3×3/2 maxpooling3×3/2 C*512@1×1/1 -32×32 16×16 8×8 4×4 1×1 -C69@3×3/1 C64@3×3/1 C64@3×3/1 C*512@4×4/1(V) BN+ELU -BN+ELU BN+ELU BN+ELU BN+ELU Dropout,p=0.5 -C69@3×3/1 C64@3×3/1 Dropout,p=0.5 C*k@1×1/1 -BN+ELU BN+ELU GlobalAVGpooling +32×32Input +C69@3×3/1 +BN+ELU +C69@3×3/1 +BN+ELU +16×16 +maxpooling3×3/2 +C64@3×3/1 +BN+ELU +C64@3×3/1 +BN+ELU +8×8 +maxpooling3×3/2 +C64@3×3/1 +BN+ELU +4×4 +maxpooling3×3/2 +C*512@4×4/1(V) +BN+ELU +Dropout,p=0.5 +1×1 +C*512@1×1/1 +BN+ELU +Dropout,p=0.5 +C*k@1×1/1 +GlobalAVGpooling BN+Softmax Figure 5.16.: Architecture of the optimized model. C 32@3×3/1 is a convolutional layer with 32 filters of kernel size 3×3 with stride 1. The * indicates that no bias is used. -Single Model Accuracy Ensemble of 10 Dataset +Single Model Accuracy Ensemble of 10 Training Set Test Set Training Set Test Set Asirra 95.83% σ = 4.70 90.75% σ = 4.73 98.78% 93.09% CIFAR-10 94.58% σ = 0.70 87.92% σ = 0.46 96.47% 89.86% @@ -2234,8 +2614,8 @@ used in the ensemble. The empirical standard deviation σ of the accuracy is als CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the models uses unlabeled data or data from other datasets. For MNIST, GTSRB, SVHN and HASY, no test time transformations are used. -Inference per Training Network GPU Tensorflow +Inference per Training 1 Image 128 images time / epoch Optimized Default Intel i7-4930K 5ms 432ms 386s Optimized Optimized Intel i7-4930K 4ms 307ms 315s @@ -2281,8 +2661,9 @@ improve the results when the number of epochs is fixed, but notably improved the when the training loss was used as the early stopping criterion. 5.17. Regularization Stronger regularization might even improve the results when using the training loss as an -early stopping criterion. (cid:96) regularization with a weighting factor of λ = 0.0001 is used in +early stopping criterion. (cid:96) 2 +regularization with a weighting factor of λ = 0.0001 is used in all other experiments. While the accuracy as shown in Table 5.19 does not show a clear pattern, the number of epochs increases with lower model regularization (see Table 5.20). 2Except data augmentation and test time transformations. @@ -2290,8 +2671,8 @@ pattern, the number of epochs increases with lower model regularization (see Tab 4Only 3 models are in this ensemble due to the long training time of more than 8 hours per model. 68 5.17. Regularization -Early Stopping Fixed epochs Dataset +Early Stopping Fixed epochs val. acc train loss Asirra 93.09% 96.01%3 96.01% CIFAR-10 89.86% 91.75% 88.88% @@ -2304,21 +2685,23 @@ compared training setups without a validation set and thus more training data. T second column uses the training loss as a stopping criterion, the third column uses a fixed number of epochs which is equal to the mean number of training epochs of the models with early stopping on the validation set accuracy. -Single Model Accuracy Ensemble of 10 λ +Single Model Accuracy Ensemble of 10 Training Set Test Set Training Set Test Set λ = 0.01 73.83% σ = 1.78 58.94% σ = 1.33 87.78% 69.98% λ = 0.001 82.86% σ = 0.89 63.03% σ = 0.67 91.86% 71.02% λ = 0.0001 77.96% σ = 2.18 64.42% σ = 0.73 81.44% 67.03% -Table 5.19.: Different choices of (cid:96) model regularization applied to the optimized model. +Table 5.19.: Different choices of (cid:96) 2 +model regularization applied to the optimized model. λ min max mean std λ = 0.01 457 503 404.6 37.2 λ = 0.001 516 649 588.4 41.6 λ = 0.0001 579 833 696.1 79.1 Table 5.20.: Training time in epochs of models with early stopping on training loss by different -choices of (cid:96) model regularization applied to the optimized model. +choices of (cid:96) 2 +model regularization applied to the optimized model. 69 5. Experimental Evaluation 70 @@ -2408,8 +2791,8 @@ A. Figures, Tables and Algorithms (a) Original image (b) Smoothing filter (c) Laplace edge detection filter (d) Sobel edge detection filter (e) Prewitt edge detection filter (f) Canny filter Figure A.1.: Examples of image filters. Best viewed in electronic form. -99-percentile interval Layer +99-percentile interval filter bias 1 [-0.50, 0.48] [-0.06, 0.07] 3 [-0.21, 0.19] [-0.07, 0.07] @@ -2437,8 +2820,9 @@ j ← randomInteger(1,...,n)\{i} p ← randomUniform(0,1) C(cid:48) ← swap(C,i,j) s ← accuracy(C(cid:48)) -if p < exp(s−bestScore) then +if p < exp(s−bestScore T +) then C ← C(cid:48) if s > bestScore then bestScore ← s @@ -2453,8 +2837,8 @@ return bestM 76 Figure A.3.: Maximum weight updates between epochs by layer. The model is the baseline model, but with layer 5 reduced to 3 filters. -Single model Ensemble of 10 Epochs Function +Single model Ensemble of 10 Epochs Training set Test set Train Test Range Mean Identity 87.92% σ = 0.40 84.69% σ = 0.08 88.59% 85.43% 92 – 140 114.5 Logistic 81.46% σ = 5.08 79.67% σ = 4.85 86.38% 84.60% 58 – 91 77.3 @@ -2471,8 +2855,8 @@ HASYv2. For LReLU, α=0.3 was chosen. 77 Figure A.4.: Sum of weight updates between epochs by layer. The model is the baseline model, but with layer 5 reduced to 3 filters. -Single model Ensemble of 10 Epochs Function +Single model Ensemble of 10 Epochs Training set Test set Train Test Range Mean Identity 87.49% σ = 2.50 69.86% σ = 1.41 89.78% 71.90% 51 – 65 53.4 Logistic 45.32% σ = 14.88 40.85% σ = 12.56 51.06% 45.49% 38 – 93 74.6 @@ -2551,49 +2935,66 @@ Table B.2 shows six commonly used weight initialization schemes. Several schemes same idea, that unit-variance is desired for each layer as the training converges faster [IS15]. Name α β γ Reference Constant α = 0 β = 0 γ ≥ 0 used by [ZF14] +Xavier/Glorot uniform α = (cid:113) -Xavier/Glorot uniform α = 6 β = 0 γ = 0 [GB10] +6 nin+nout -(cid:16) (cid:17)2 -Xavier/Glorot normal α = 0 β = 2 γ = 0 [GB10] -(nin+nout) -He α = 0 β = 2 γ = 0 [HZRS15b] +β = 0 γ = 0 [GB10] +Xavier/Glorot normal α = 0 β = +(cid:16) +2 +(nin+nout)(cid:17)2 +γ = 0 [GB10] +He α = 0 β = 2 nin +γ = 0 [HZRS15b] Orthogonal — — γ = 0 [SMG13] LSUV — — γ = 0 [MM15] Table B.2.: Weight initialization schemes of the form w ∼α·U[−1,1]+β·N(0,1)+γ. -n ,n are the number of units in the previous layer and the next layer. Typically, -in out +n in,n +out +are the number of units in the previous layer and the next layer. Typically, biasesareinitializedwithconstant0andweightsbyoneoftheotherschemesto prevent unit-coadaptation. However, dropout makes it possible to use constant initialization for all parameters. LSUV and Orthogonal initialization cannot be described with this simple pattern. B.4. Objective function For classification tasks, the cross-entropy +E CE(W) = +−(cid:88) +x∈X K -(cid:88)(cid:88) -E (W) = − [txlog(ox)+(1−tx)log(1−ox)] -CE k k k k -x∈Xk=1 +(cid:88) +k=1[tx klog(ox k)+(1−tx k)log(1−ox k)] is by far the most commonly used objective function (e.g., used by [ZF14]). In this equation, -X is the set of training examples, K is the number of classes, tx ∈ {0,1} indicates if the +X is the set of training examples, K is the number of classes, tx k -training example x is of class k, ox is the output of the classifier for the training example x +∈ {0,1} indicates if the +training example x is of class k, ox k +is the output of the classifier for the training example x and class k. However, regularization terms weighted with a constant λ ∈ (0,+∞) are sometimes added: -• LASSO: (cid:96) (e.g., used in [HPTD15]) +• LASSO: (cid:96) 1 -• Weight decay: (cid:96) (e.g., λ = 0.0005 as in [MSM16]) +(e.g., used in [HPTD15]) +• Weight decay: (cid:96) 2 +(e.g., λ = 0.0005 as in [MSM16]) • Orthogonality regularization (|(WT ·W −I)|, see [VTKP17]) 81 B.5. Optimization Techniques Most relevant optimization techniques for CNNs are based on SGD, which updates the weights according to the rule -∂E -w ← w +∆w with ∆w = −η x -ji ji ji ji +w +ji +← w ji+∆w +ji +with ∆w +ji += +−η∂E +x ∂w ji where η ∈ (0,1), typically 0.01 (e.g., [MSM16]), is called the learning rate. @@ -2602,32 +3003,49 @@ mini-batch sizes are |B| ∈ {32,64,128,256,512}, e.g. [ZF14]). Larger mini-batc lead to sharp minima and thus poor generalization [KMN+16]. Smaller mini-batch sizes lead to longer training times due to computational overhead and to more training steps due to gradient noise. -∂E -w ← w +∆w with ∆w = −η B -ji ji ji ji +w +ji +← w ji+∆w +ji +with ∆w +ji += +−η∂E +B ∂w ji Nine variations which adjust the learning rate during training are: • Momentum: -∂E -w(t+1) ← w(t) +∆w(t+1) with ∆w(t+1) = −η B +α∆w(t) -ji ji ji ji ∂w ji +w(t+1) +ji +← w(t) +ji ++∆w(t+1) +ji +with ∆w(t+1) +ji += +−η∂E +B +∂w +ji ++α∆w(t) ji with α ∈ [0,1], typically 0.9 (e.g., [ZF14, MSM16]) • Adagrad [DHS11] • RProp and the mini-batch version RMSProp [TH12] • Adadelta [Zei12] -• Power Scheduling [Xu11]: η(t) = η(0)(1+a·t)−c, where t ∈ N is the training step, +• Power Scheduling [Xu11]: η(t) = η(0)(1+a·t)−c, where t ∈ N 0 +is the training step, a,c are constants. • Performance Scheduling [SHY+13]: Measure the error on the cross validation set and decrease the learning rate when the algorithms improvement is below a threshold. -• Exponential Decay Learning Rate [SHY+13]: η(t) = η(0)·10− kt where t ∈ N is the -0 -training step, η(0) is the initial learning rate, k ∈ N is the number of training steps +• Exponential Decay Learning Rate [SHY+13]: η(t) = η(0)·10−t k where t ∈ N 0 is the +training step, η(0) is the initial learning rate, k ∈ N ≥1 -until the learning rate is decreased by 1 th. -10 +is the number of training steps +until the learning rate is decreased by 1 10th. • NewbobScheduling[new00]: StartwithPerformanceScheduling,thenuseExponential Decay Scheduling. • Adam and AdaMax [KB14] @@ -2657,41 +3075,65 @@ CNNs have the following hyperparameters: – Activation Functions as shown in Table B.3 – For more, see Sections 2.2 and 2.3. Name Function ϕ(x) Range of Values ϕ(cid:48)(x) Used by +Sign function†  -+1 if x ≥ 0 -Sign function† {−1,1} 0 [KS02] -−1 if x < 0 - -+1 if x > 0 -Heaviside {0,1} 0 [MP43] -0 if x < 0 + +1 if x ≥ 0 +−1 if x < 0 +{−1,1} 0 [KS02] +Heaviside step function† -Logistic function 1 [0,1] ex [DJ99] -1+e−x (ex+1)2 -Tanh ex−e−x = tanh(x) [−1,1] sech2(x) [LBBH98, Tho14a] -ex+e−x  -1 if x > 0 -ReLU† max(0,x) [0,+∞) [KSH12] -0 if x < 0 + +1 if x > 0 +0 if x < 0 +{0,1} 0 [MP43] +Logistic function 1 +1+e−x +[0,1] ex +(ex+1)2 +[DJ99] +Tanh ex−e−x +ex+e−x += tanh(x) [−1,1] sech2(x) [LBBH98, Tho14a] +ReLU† max(0,x) [0,+∞)  -1 if x > 0 -LReLU†2 ϕ(x) = max(αx,x) (−∞,+∞) [MHN13, HZRS15b] -α if x < 0 + 1 if x > 0 +0 if x < 0 +[KSH12] +LReLU†2 (PReLU) -Softplus log(ex+1) (0,+∞) ex [DBB+01, GBB11] +ϕ(x) = max(αx,x) (−∞,+∞) + + 1 if x > 0 +α if x < 0 +[MHN13, HZRS15b] +Softplus log(ex+1) (0,+∞) ex ex+1 -  -x if x > 0 1 if x > 0 -ELU (−∞,+∞) [CUH15] -α(ex−1) if x ≤ 0 αex otherwise -Softmax‡ o(x) = exj [0,1]K o(x) · (cid:80)K k=1exk−exj [KSH12, Tho14a] -j (cid:80)K exk j (cid:80)K exk -k=1  k=1 -1 if x = maxx -Maxout‡ o(x) = max x (−∞,+∞) i [GWFM+13] -x∈x -0 otherwise +[DBB+01, GBB11] +ELU + + x if x > 0 +α(ex−1) if x ≤ 0 +(−∞,+∞) + + 1 if x > 0 +αex otherwise +[CUH15] +Softmax‡ o(x) +j += exj +(cid:80)K k=1exk +[0,1]K o(x) +j +· (cid:80)K k=1exk−exj +(cid:80)K k=1exk +[KSH12, Tho14a] +Maxout‡ o(x) = max x∈xx (−∞,+∞) + + 1 if x +i += maxx +0 otherwise +[GWFM+13] Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0 and functions marked with ‡ operate on all elements of a layer simultaneously. The hyperparameters α ∈ (0,1) of Leaky ReLU and ELU are typically α = 0.01. Other @@ -2703,30 +3145,28 @@ Softmax is the standard activation function for the last layer of a classificati as it produces a probability distribution. See Figure B.1 for a plot of some of them. 2α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function. 84 -2.0 y -ϕ (x) = 1 -1 1+e−x -ϕ (x) = tanh(x) 1.5 -2 -ϕ (x) = max(0,x) -3 -ϕ (x) = log(ex+1) -4 1.0 -ϕ (x) = max(x,ex−1) -5 -0.5 -x −2.0 −1.5 −1.0 −0.5 0.5 1.0 1.5 2.0 -−0.5 −1.0 +−0.5 +0.5 +1.0 +1.5 +2.0 +x +y +ϕ 1(x) = 1 +1+e−x +ϕ 2(x) = tanh(x) +ϕ 3(x) = max(0,x) +ϕ 4(x) = log(ex+1) +ϕ 5(x) = max(x,ex−1) Figure B.1.: Activation functions plotted in [−2,+2]. tanh and ELU are able to produce negative numbers. The image of ELU, ReLU and Softplus is not bound on the positive side, whereas tanh and the logistic function are always below 1. B.7. Regularization Regularization techniques aim to make the fitted function smoother and reduce overfitting. Regularization techniques are: -• (cid:96) , (cid:96) , and Orthogonality regularization: See Appendix B.4 -1 2 +• (cid:96) 1, (cid:96) 2, and Orthogonality regularization: See Appendix B.4 • Max-norm regularization (e.g. used ins [SHK+14]) • Dropout (introduced in [SHK+14]), DropConnect (see [WZZ+13]), Stochastic Depth (see [HSL+16]) @@ -2741,52 +3181,61 @@ C. Calculating Network Characteristics C.1. Parameter Numbers • A fully connected layer with n nodes, k inputs has n·(k+1) parameters. The +1 is due to the bias. -• A convolutional layer i with k filters of size n×m being applied to k feature maps -i i−1 -has k ·k (n·m+1) parameters. The +1 is due to the bias. -i i−1 -• A fully connected layer with n nodes after k feature maps of size m × m has -1 2 -n·(k·m ·m +1) parameters. -1 2 +• A convolutional layer i with k +i +filters of size n×m being applied to k +i−1 +feature maps +has k i·k i−1(n·m+1) parameters. The +1 is due to the bias. +• A fully connected layer with n nodes after k feature maps of size m +1 +× m +2 +has +n·(k·m 1·m 2+1) parameters. • A dense block with a depth of L, a growth rate of n and 3×3 filters has L+n·32+ -32·n2(cid:80)L (L−i) = L+9n+9n2L2−L parameters. -i=0 2 +32·n2(cid:80)L i=0(L−i) = L+9n+9n2L2−L +2 +parameters. According to [HPTD15], AlexNet has 60 million parameters which is roughly the number calculated in Table D.2. C.2. FLOPs The FLOPs of a layer depend on the implementation, the compiler and the hardware. Hence the following number are only giving rough estimates. -In the following, n denotes the number of FLOPs to compute the non-linearity ϕ. For +In the following, n ϕ -simplicity, n = 5 was chosen. +denotes the number of FLOPs to compute the non-linearity ϕ. For +simplicity, n ϕ += 5 was chosen. • A fully connected layer with n nodes and k inputs has to calculate ϕ(W ·x+b) with W ∈ Rn×k, x ∈ Rk×1, b ∈ Rn×1. It hence needs about n·(k+(k−1)+1) = 2nk additions / multiplications before the non-linearity ϕ is calculated. The total number -of FLOPs is 2·n·k+n·n . -ϕ -• In the following, biases are ignored. A convolutional layer with k filters of size n×m +of FLOPs is 2·n·k+n·n ϕ. +• In the following, biases are ignored. A convolutional layer with k i -being applied to k filter maps of size w×h results in k filter maps of size w×h if -i−1 i -padding is applied. For each element of each filter map, n·m·k multiplications and +filters of size n×m +being applied to k i−1 -(n·m·k −1) additions have to be made. This results in (2nmk −1)·(k ·w·h) -i−1 i−1 i -operations. The total number of FLOPs is (2·n·m·k −1)·(k ·w·h)+k ·w·h·n . -i−1 i i ϕ +filter maps of size w×h results in k +i +filter maps of size w×h if +padding is applied. For each element of each filter map, n·m·k +i−1 +multiplications and +(n·m·k i−1−1) additions have to be made. This results in (2nmk i−1−1)·(k i·w·h) +operations. The total number of FLOPs is (2·n·m·k i−1−1)·(k i·w·h)+k i·w·h·n ϕ. This is, of course, a naive way of calculating a convolution. There are other ways of calculating convolutions [LG16]. 87 • Afullyconnectedlayerwithnnodesafterk featuremapsofsizew×hneeds2n(k·w·h) -FLOPs. The total number of FLOPs is 2n·(k·w·h)+n·n . -ϕ +FLOPs. The total number of FLOPs is 2n·(k·w·h)+n·n ϕ. • As Dropout is only calculated during training, the number of FLOPs was set to 0. • The number of FLOPs for max pooling is dominated by the number of positions to which the pooling kernel is applied. For a feature map of size w×h a max pooling -filter with stride s gets applied w·h. The number of FLOPs per application depends +filter with stride s gets applied w·h s2 +. The number of FLOPs per application depends on the kernel size. A 2×2 kernel is assumed to need 5 FLOPs. • The number of FLOPs for Batch Normalization is the same as the number of its parameters. @@ -2834,8 +3283,9 @@ non-linear combination of the features of the feature maps. Its exact architecture is shown in Figure D.1 and described in Table D.1. It reaches a test error rate of 0.8% on MNIST. Figure D.1.: Architecture of LeNet-5 as shown in [LBBH98]. -# Type Filters @ Parameters FLOPs Output size +# Type Filters @ Patch size / stride +Parameters FLOPs Output size Input 0 0 1@32×32 1 Convolution 6@5×5×1/1 156 307800 6@28×28 2 Scaled average pooling 2×2 /2 2 336 6@14×14 @@ -2853,14 +3303,16 @@ than fully connected layers. D.2. AlexNet ThefirstCNNwhichachievedmajorimprovementsontheImageNetdatasetwasAlexNet[KSH12]. ItsarchitectureisshowninFigureD.2anddescribedinTableD.2. Ithasabout60·106param- -eters. A trained AlexNet can be downloaded at www.cs.toronto.edu/g˜uerzhoy/tf_alexnet. -Note that the uncompressed size is at least 60965224floats·32 bit ≈ 244MB. +eters. A trained AlexNet can be downloaded at www.cs.toronto.edu/˜ guerzhoy/tf_alexnet. +Note that the uncompressed size is at least 60965224floats·32 bit float +≈ 244MB. Figure D.2.: Architecture of AlexNet as shown in [KSH12]: Convolutional Layers are followed by pooling layers multiple times. At the end, a fully connected network is applied. Conceptually, it is identical to the architecture of LeNet-5 (see Figure D.1). -# Type Filters @ Parameters FLOPs Output size +# Type Filters @ Patch size / stride +Parameters FLOPs Output size Input 3 @ 224×224 1 Convolution 96 @ 11×11×3 / 4 34944 211M 96@ 55× 55 LCN 12M 96@ 55× 55 @@ -2893,22 +3345,47 @@ learn parameters. A major difference compared to AlexNet is that VGG-16 uses onl filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a detailed textual description is given in Table D.3. AtrainedVGG-16DforTensorflowcanbedownloadedathttps://github.com/machrisaa/ -tensorflow-vgg. Note that the uncompressed size is at least 138357544floats·32 bit ≈ +tensorflow-vgg. Note that the uncompressed size is at least 138357544floats·32 bit float +≈ 520MB. The downloaded Numpy binary file npz needs 553MB without compression and 514MB with compression. -Input maxpooling2×2/1 maxpooling2×2/1 maxpooling2×2/1 maxpooling2×2/1 maxpooling2×2/1 -224×224 112×112 56×56 28×28 14×14 7×7 -C64@3×3/1 C128@3×3/1 C256@3×3/1 C512@3×3/1 C512@3×3/1 FullyConnected4096 -C64@3×3/1 C128@3×3/1 C256@3×3/1 C512@3×3/1 C512@3×3/1 Dropout,p=0.5 -C256@3×3/1 C512@3×3/1 C512@3×3/1 FullyConnected4096 +224×224 +Input +C64@3×3/1 +C64@3×3/1 +112×112 +maxpooling2×2/1 +C128@3×3/1 +C128@3×3/1 +56×56 +maxpooling2×2/1 +C256@3×3/1 +C256@3×3/1 +C256@3×3/1 +28×28 +maxpooling2×2/1 +C512@3×3/1 +C512@3×3/1 +C512@3×3/1 +14×14 +maxpooling2×2/1 +C512@3×3/1 +C512@3×3/1 +C512@3×3/1 +7×7 +maxpooling2×2/1 +FullyConnected4096 +Dropout,p=0.5 +FullyConnected4096 Dropout,p=0.5 FullyConnected1000 Figure D.3.: Architecture of VGG-16 D. C 512@3×3/1 is a convolutional layer with 512 filters of kernel size 3×3 with stride 1. All convolutional layers use SAME padding. 92 -# Type Filters @ Parameters FLOPs Output size +# Type Filters @ Patch size / stride +Parameters FLOPs Output size Input 3 @ 224×224 1 Convolution 64 @ 3×3× 3 / 1 1792 186M 64@ 224×224 2 Convolution 64 @ 3×3× 64 / 1 36928 3712M 64@ 224×224 @@ -2988,39 +3465,51 @@ Well-known benchmark datasets for classification problems in computer vision are in Table E.1. The best results known to me are given in Table E.2. However, every semantic segmentation dataset (e.g., PASCAL VOC) can also be used to benchmark image classifiers using Algorithm 2. -NumberNumber +Database Image Resolution -Database of of Channels Data source (width × height) -Images Classes +Number +of +Images +Number +of +Classes +Channels Data source MNIST 28px×28px 70000 10 1 [YL98, LBBH98] HASYv2 32px×32px 168233 369 1 [Tho17a] -[NWC+11b], SVHN 32px×32px 630420 10 3 +[NWC+11b], [NWC+11a] CIFAR-10 32px×32px 60000 10 3 [Kri, KH09] CIFAR-100 32px×32px 60000 100 3 [Kri, KH09] STL-10 96px×96px 13000 10 3 [CLN11, CLN10] +Caltech-101 (80px−3481px) -Caltech-101 9144 102 3 [FFP03, FFFP06] ×(92px−3999px) +9144 102 3 [FFP03, FFFP06] +Caltech-256 (75px−7913px) -Caltech-256 30607 257 3 [Gri06, GG07] ×(75px−7913px) +30607 257 3 [Gri06, GG07] +ILSVRC 20121 (8px−9331px) -ILSVRC 20121 1.2·106 1000 3 [Ima12, RDS+14] ×(10px−6530px) +1.2·106 1000 3 [Ima12, RDS+14] +Places3652 (290px−3158px) -Places3652 1.8·106 365 3 [Zho16, ZKL+16] ×(225px−2630px) +1.8·106 365 3 [Zho16, ZKL+16] +GTSRB (25px−266px) -GTSRB 51839 43 3 [SSSI, SSSI12] ×(25px−232px) +51839 43 3 [SSSI, SSSI12] +Asirra3 (4px−500px) -Asirra3 25000 2 3 [Asi17, EDHS07] ×(4px−500px) +25000 2 3 [Asi17, EDHS07] +Graz-02 480px×640px -Graz-02 and 640px×480px 1096 3 3 [Mar08, MS07] +and 640px×480px 1096 3 3 [Mar08, MS07] Table E.1.: An overview over publicly available image databases for classification. The number of images row gives the sum of the training and the test images. Some datasets, like SVHN, have additional unlabeled data which is not given in this table. @@ -3028,8 +3517,8 @@ SVHN, have additional unlabeled data which is not given in this table. 2The dimensions are only calculated for the validation set. 3Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs” competition on Kaggle 97 -Achieved / Dataset Model type / name Result Score +Achieved / Claimed by MNIST — 0.21% error [WZZ+13] HASYv2 TF-CNN 81.00% accuracy [Tho17a] @@ -3045,27 +3534,27 @@ Asirra SVM 82.7% accuracy [Gol08] Graz-02 Optimal NBNN 78.98% accuracy [BMDP10] Table E.2.: An overview over state of the art results achieved in computer vision datasets. Algorithm 2 Create a classification dataset from a semantic segmentation dataset -Require: Semantic segmentation dataset (D ) -S -procedure CreateDataset(Annotated dataset D ) -S -D ← List +Require: Semantic segmentation dataset (D S) +procedure CreateDataset(Annotated dataset D S) +D C +← List w ← desired image width h ← desired image height -for Image and associated label (x,y) in D do +for Image and associated label (x,y) in D S +do i ← randint(0,L.width−w) j ← randint(0,L.height−h) -c ← crop(y,(i,j),(i+w,j +h)) +c L +← crop(y,(i,j),(i+w,j +h)) if at least 50% of s are of one class then -c ← crop(x,(i,j),(i+w,j +h)) +c I -D.append((c ,c )) -I L -return (D ) -C +← crop(x,(i,j),(i+w,j +h)) +D.append((c I,c L)) +return (D C) 98 F. List of Tables 2.1 Pooling types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 diff --git a/read/results/pdfplumber/2201.00021.txt b/read/results/pdfplumber/2201.00021.txt index fa83b99..0085371 100644 --- a/read/results/pdfplumber/2201.00021.txt +++ b/read/results/pdfplumber/2201.00021.txt @@ -9,189 +9,328 @@ e-mail:yyan@mpifr-bonn.mpg.de 2 AstronomyDepartment,FacultyofScience,KingAbdulazizUniversity,P.O.Box80203,Jeddah21589,SaudiArabia 3 XinjiangAstronomicalObservatory,ChineseAcademyofSciences,830011Urumqi,PRChina 4 NationalRadioAstronomyObservatory,520EdgemontRoad,Charlottesville,VA22903-2475,USA -2202 5 CenterforAstrophysics,GuangzhouUniversity,510006Guangzhou,People’sRepublicofChina 6 SchoolofAstronomyandSpaceScience,NanjingUniversity,163XianlinAvenue,Nanjing210023,People’sRepublicofChina 7 KeyLaboratoryofModernAstronomyandAstrophysics(NanjingUniversity),MinistryofEducation,Nanjing210023,People’s RepublicofChina -rpA Received13December2021/Accepted30December2021 -ABSTRACT 9 +ABSTRACT Context. Molecularmaserlinesaresignpostsofhigh-massstarformation,probingtheexcitationandkinematicsofverycompact -]AG.hp-ortsa[ regionsinthecloseenvironmentofyoungstellarobjectsandprovidingusefultargetsfortrigonometricparallaxmeasurements. -Aims.OnlyafewNH (9,6)masersareknownsofar,andtheiroriginisstillpoorlyunderstood.HereweaimtofindnewNH (9,6) -3 3 +Aims.OnlyafewNH 3(9,6)masersareknownsofar,andtheiroriginisstillpoorlyunderstood.HereweaimtofindnewNH 3(9,6) maserstoprovideabetterobservationalbasisforstudyingtheirroleinhigh-massstar-formingregions. -Methods.WecarriedoutNH (9,6)observationstowardCepheusAandG34.26+0.15withtheEffelsberg100-metertelescope(beam -3 -size49(cid:48)(cid:48))andtheKarlG.JanskyVeryLargeArray(JVLA;beamsizeabout1(cid:48).(cid:48)2). -Results.WediscoverednewNH (9,6)masersinCepAandG34.26+0.15,whichincreasesthenumberofknownhigh-massstar- -3 -formingregionshostingNH (9,6)masersfromfivetoseven.Long-termmonitoring(20months)atEffelsbergshowsthattheintensity +Methods.WecarriedoutNH 3(9,6)observationstowardCepheusAandG34.26+0.15withtheEffelsberg100-metertelescope(beam +size49(cid:48)(cid:48))andtheKarlG.JanskyVeryLargeArray(JVLA;beamsizeabout1(cid:48)(cid:48) .2). +Results.WediscoverednewNH 3 +(9,6)masersinCepAandG34.26+0.15,whichincreasesthenumberofknownhigh-massstar- +formingregionshostingNH 3(9,6)masersfromfivetoseven.Long-termmonitoring(20months)atEffelsbergshowsthattheintensity ofthe(9,6)maserinG34.26+0.15isdecreasing,whiletheCepAmaserremainsstable.ComparedtotheEffelsbergdataandassuming -linearvariationsbetweentheepochsofobservation,theJVLAdataindicatenomissingflux.ThissuggeststhattheNH (9,6)emission -3 +linearvariationsbetweentheepochsofobservation,theJVLAdataindicatenomissingflux.ThissuggeststhattheNH 3(9,6)emission arisesfromsinglecompactemissionregionsthatarenotresolvedbytheinterferometricmeasurements.AsJVLAimagingshows,the -NH (9,6)emissioninCepAoriginatesfromasub-arcsecond-sizedregion,slightlytothewest(0(cid:48).(cid:48)28±0(cid:48).(cid:48)10)ofthepeakposition +NH 3 -ofthe1.36cmcontinuumobject,HW2.InG34.26+0.15,threeNH (9,6)maserspotsareobserved:oneisclosetotheheadofthe -3 cometaryultracompactHiiregionC,andtheothertwoareemittedfromacompactregiontothewestofthehypercompactHiiregion 3v12000.1022:viXra +(9,6)emissioninCepAoriginatesfromasub-arcsecond-sizedregion,slightlytothewest(0(cid:48)(cid:48) .28±0(cid:48)(cid:48) .10)ofthepeakposition +ofthe1.36cmcontinuumobject,HW2.InG34.26+0.15,threeNH +3 +(9,6)maserspotsareobserved:oneisclosetotheheadofthe +cometaryultracompactHiiregionC,andtheothertwoareemittedfromacompactregiontothewestofthehypercompactHiiregion A. Conclusions.Thenewlyfound(9,6)masersappeartoberelatedtooutflows.ThehigherangularresolutionofJVLAandverylong baselineinterferometryobservationsareneededtoprovidemoreaccuratepositionsandconstraintsforpumpingscenarios. Keywords. Masers–ISM:clouds–ISM:individualobjects:CepA,G34.26+0.15–ISM:Hiiregions–Radiolines:ISM -1. Introduction et al. 2007), NH (7,7), NH (9,9), and NH (12,12) (Henkel -3 3 3 -etal.2013).Thesehaveledtothediscoveryofmetastablemaser -Since its discovery more than five decades ago (Cheung et al. lines in 22 different regions (Mauersberger et al. 1986, 1987; -1968), ammonia (NH ) has been a most valuable molecule for -3 Wilson & Henkel 1988; Wilson et al. 1990; Pratap et al. 1991; +1. Introduction +Since its discovery more than five decades ago (Cheung et al. +1968), ammonia (NH 3) has been a most valuable molecule for investigating the physical properties of molecular clouds (e.g., -Cesaronietal.1992;Wilson&Schilke1993;Mangum&Woot- Ho & Townes 1983). While thermally excited transitions in -ten1994;Kraemer&Jackson1995;Zhang&Ho1995;Zhang thecentimeter-wavelengthinversiontransitionsofammoniaare -etal.1999;Walshetal.2007;Hunteretal.2008;Galván-Madrid regarded as a reliable thermometer of molecular clouds (e.g., -et al. 2009; Brogan et al. 2011; Urquhart et al. 2011; Walsh -Walmsley & Ungerechts 1983; Danby et al. 1988), ammonia et al. 2011; Wang et al. 2012; Henkel et al. 2013; Hoffman & +Walmsley & Ungerechts 1983; Danby et al. 1988), ammonia masershaveattractedattentionsincethefirstdetectionofmaser -Joyce2014;McEwenetal.2016;Millsetal.2018;Hoggeetal. action in the (J,K) = (3,3) metastable (J = K) line toward the -2019;Meietal.2020;Towneretal.2021).Comparedwiththe massive star-forming region W33 (Wilson et al. 1982). Subse- -metastable ammonia masers, detected non-metastable (J > K) quent observations have led to the detection of new metastable -ammoniamasertransitionsaremorenumerous.Thefirsthighly -ammonia masers, including 15NH (3,3) (Mauersberger et al. -3 excited non-metastable ammonia maser was detected by Mad- -1986), NH 3 (1,1) (Gaume et al. 1996), NH 3 (2,2) (Mills et al. denetal.(1986)inthe(J,K)=(9,6)and(6,3)lines.Thereafter, -2018), NH (5,5) (Cesaroni et al. 1992), NH (6,6) (Beuther -3 3 many other NH non-metastable inversion transition lines have -3 -(cid:63) Member of the International Max Planck Research School (IM- beenidentifiedasmasers,includingthe(5,3),(5,4),(6,1),(6,2), -PRS)forAstronomyandAstrophysicsattheuniversitiesofBonnand (6,4),(6,5),(7,3),(7,4),(7,5)(7,6),(8,3),(8,4),(8,5),(8,6),(9,3), -Cologne. (9,4),(9,5),(9,7),(9,8),(10,7),(10,8),(10,9),and(11,9)transi- -Articlenumber,page1of10 -A&Aproofs:manuscriptno.mainArxiv -tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007; away from the source. For observations made before 2021 Au- -Henkel et al. 2013; Mei et al. 2020). Except for the NH (3,3) gust,weusedaspectrometerthatcovered2GHzwidebackends +ammonia masers, including 15NH 3 -masersproposedtobeassociatedwithfoursupernovaremnants withachannelwidthof38.1kHz,correspondingto∼0.62kms−1 -(McEwenetal.2016),almostalltheotherammoniamasersare at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar -detected in high-mass star-forming regions (HMSFRs). How- 1975).Ahighspectralresolutionbackendwith65536channels -ever, while many HMSFRs host water (H O), hydroxyl (OH), and a bandwidth of 300 MHz was employed in 2021 August, -2 -or methanol (CH OH) masers, ammonia masers are quite rare providing a channel width of 0.07 km s−1 at 18.5 GHz. Point- +(3,3) (Mauersberger et al. +1986), NH 3 (1,1) (Gaume et al. 1996), NH 3 (2,2) (Mills et al. +2018), NH 3 -in these sources, and the role that the environment of a young ing was checked every 2 hours using 3C 286 or NGC 7027. -high-mass star plays in their excitation remains unclear. There- Focus calibrations were done at the beginning of the observa- -fore, dedicated searches for ammonia masers in HMSFRs are tionsandduringsunsetandsunrisetowardtheabovementioned -indispensable in regard to their overall incidence and associa- pointingsources.Thesystemtemperatureswere100–130Kon -tion with different environments, which can provide additional amain-beambrightnesstemperature,T ,scale.Thisfluxden- -MB -constraintsonthepumpingmechanismofammoniamasers. sitywascalibratedassumingaT /S ratioof1.95K/Jy,derived -MB -So far, a total of 32 NH inversion transitions (∆K = 0 fromcontinuumcrossscansofNGC7027(thefluxdensitywas +(5,5) (Cesaroni et al. 1992), NH 3 -and ∆J = 0) have been identified as masers. Among these, and adoptedfromOttetal.1994).Calibrationuncertaintiesareesti- -despite arising from energy levels as high as 1090 K above matedtobe∼10%. -the ground state, the NH (9,6) maser stands out as being the We used the GILDAS/CLASS2 package (Pety 2005) to re- +(6,6) (Beuther +(cid:63) Member of the International Max Planck Research School (IM- +PRS)forAstronomyandAstrophysicsattheuniversitiesofBonnand +Cologne. +et al. 2007), NH 3 -strongestandmostvariableoneinW51-IRS2(e.g.,Henkeletal. duce the spectral line data. A first-order polynomial was sub- -2013).Maseremissioninthislinehasonlybeendetectedinfive tractedfromeachspectrumforbaselineremoval. +(7,7), NH +3 +(9,9), and NH +3 +(12,12) (Henkel +etal.2013).Thesehaveledtothediscoveryofmetastablemaser +lines in 22 different regions (Mauersberger et al. 1986, 1987; +Wilson & Henkel 1988; Wilson et al. 1990; Pratap et al. 1991; +Cesaronietal.1992;Wilson&Schilke1993;Mangum&Woot- +ten1994;Kraemer&Jackson1995;Zhang&Ho1995;Zhang +etal.1999;Walshetal.2007;Hunteretal.2008;Galván-Madrid +et al. 2009; Brogan et al. 2011; Urquhart et al. 2011; Walsh +et al. 2011; Wang et al. 2012; Henkel et al. 2013; Hoffman & +Joyce2014;McEwenetal.2016;Millsetal.2018;Hoggeetal. +2019;Meietal.2020;Towneretal.2021).Comparedwiththe +metastable ammonia masers, detected non-metastable (J > K) +ammoniamasertransitionsaremorenumerous.Thefirsthighly +excited non-metastable ammonia maser was detected by Mad- +denetal.(1986)inthe(J,K)=(9,6)and(6,3)lines.Thereafter, +many other NH +3 +non-metastable inversion transition lines have +beenidentifiedasmasers,includingthe(5,3),(5,4),(6,1),(6,2), +(6,4),(6,5),(7,3),(7,4),(7,5)(7,6),(8,3),(8,4),(8,5),(8,6),(9,3), +(9,4),(9,5),(9,7),(9,8),(10,7),(10,8),(10,9),and(11,9)transi- +Articlenumber,page1of10 +a +r +X +i +v +: +2 +2 +0 +1 +. +0 +0 +0 +2 +1 +v +3 +[ +a +s +t +r +o +- +p +h +. +G +A +] +9 +A +p +r +2 +0 +2 +2 +A&Aproofs:manuscriptno.mainArxiv +tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007; +Henkel et al. 2013; Mei et al. 2020). Except for the NH +3 +(3,3) +masersproposedtobeassociatedwithfoursupernovaremnants +(McEwenetal.2016),almostalltheotherammoniamasersare +detected in high-mass star-forming regions (HMSFRs). How- +ever, while many HMSFRs host water (H 2O), hydroxyl (OH), +or methanol (CH 3OH) masers, ammonia masers are quite rare +in these sources, and the role that the environment of a young +high-mass star plays in their excitation remains unclear. There- +fore, dedicated searches for ammonia masers in HMSFRs are +indispensable in regard to their overall incidence and associa- +tion with different environments, which can provide additional +constraintsonthepumpingmechanismofammoniamasers. +So far, a total of 32 NH +3 +inversion transitions (∆K = 0 +and ∆J = 0) have been identified as masers. Among these, and +despite arising from energy levels as high as 1090 K above +the ground state, the NH +3 +(9,6) maser stands out as being the +strongestandmostvariableoneinW51-IRS2(e.g.,Henkeletal. +2013).Maseremissioninthislinehasonlybeendetectedinfive HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al. -1986), and Sgr B2(N) (Mei et al. 2020). The NH (3,3) masers +1986), and Sgr B2(N) (Mei et al. 2020). The NH 3 -2.2. JVLAobservationsanddatareduction +(3,3) masers arethoughttobecollisionallyexcited(e.g.,Floweretal.1990; Mangum & Wootten 1994); in contrast, the pumping mecha- -Observations of the NH (9,6) line toward Cep A and -3 -nismofNH 3(9,6)masersislesswellconstrained(Maddenetal. G34.26+0.15 were obtained on 2021 July 13 with the JVLA -1986).Brown&Cragg(1991)havestudiedortho-ammoniaand of the National Radio Astronomy Observatory3 (NRAO) in the +nismofNH 3(9,6)masersislesswellconstrained(Maddenetal. +1986).Brown&Cragg(1991)havestudiedortho-ammoniaand found that it could possibly pump the (6,3) inversion line, but -C configuration (project ID: 21A-157, PI: Yaoting Yan). We theydidnotextendtheirmodeltothe(9,6)transitionduetothe -employed 27 antennas for the observations. The primary beam -factthatcollisionratesareonlyknownforinversionlevelsupto of the JVLA antennas is 150(cid:48)(cid:48) (FWHM) at 18.5 GHz. A mix- +factthatcollisionratesareonlyknownforinversionlevelsupto J = 6(e.g.,Danbyetal.1988). +NH 3(9,6)masersarefoundtobestronglyvariable,similarto +H 2Omasers(Maddenetal.1986;Pratapetal.1991;Henkeletal. +2013). In W51-IRS2, Henkel et al. (2013) found that the (9,6) +lineshowedsignificantvariationinlineshapewithinatimein- +tervalofonlytwodays.Mappingofthe(9,6)masertowardW51 +withverylongbaselineinterferometry(VLBI)suggeststhatthe +masers are closer to the H 2O masers than to the OH masers or +to ultracompact (UC) Hii regions (Pratap et al. 1991). While +Henkeletal.(2013)andGoddietal.(2015)showedthattheSiO +andNH 3masersinW51-IRS2areveryclosetoeachother,their +positions,differingby0(cid:48)(cid:48) .065(∼0.015pc),donotfullycoincide. +In this paper we report the discovery of NH 3 (9,6) masers +in two HMSFRs, Cepheus A and G34.26+0.15. This increases +the number of (9,6) maser detections in our Galaxy from five +to seven. In Sect. 2 observations with the Effelsberg 100-meter +telescopeandtheKarlG.JanskyVeryLargeArray(JVLA)are +described. Results are presented in Sect. 3. The morphology of +CepAandG34.26+0.15aswellasacomparisonoftheemission +distributions of different tracers with the NH 3 (9,6) masers are +presentedinSect.4.OurmainresultsaresummarizedinSect.5. +2. Observationsanddatareduction +2.1. Effelsbergobservationsanddatareduction +The NH +3 +(9,6) line was observed toward Cep A and +G34.26+0.15 with the 100-meter Effelsberg telescope1 in 2020 +Januaryand2021February,July,andAugust.TheS14mmdou- +blebeamsecondaryfocusreceiverwasemployed.Thefullwidth +at half maximum (FWHM) beam size is 49(cid:48)(cid:48) at 18.5 GHz, the +frequencyofthetargetline.Theobservationswereperformedin +positionswitchingmode,andtheoffpositionwas10(cid:48)inazimuth +1 Based on observations with the 100-meter telescope of the MPIfR +(Max-Planck-InstitutfürRadioastronomie)atEffelsberg. +away from the source. For observations made before 2021 Au- +gust,weusedaspectrometerthatcovered2GHzwidebackends +withachannelwidthof38.1kHz,correspondingto∼0.62kms−1 +at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar +1975).Ahighspectralresolutionbackendwith65536channels +and a bandwidth of 300 MHz was employed in 2021 August, +providing a channel width of 0.07 km s−1 at 18.5 GHz. Point- +ing was checked every 2 hours using 3C 286 or NGC 7027. +Focus calibrations were done at the beginning of the observa- +tionsandduringsunsetandsunrisetowardtheabovementioned +pointingsources.Thesystemtemperatureswere100–130Kon +amain-beambrightnesstemperature,T MB,scale.Thisfluxden- +sitywascalibratedassumingaT MB/S ratioof1.95K/Jy,derived +fromcontinuumcrossscansofNGC7027(thefluxdensitywas +adoptedfromOttetal.1994).Calibrationuncertaintiesareesti- +matedtobe∼10%. +We used the GILDAS/CLASS2 package (Pety 2005) to re- +duce the spectral line data. A first-order polynomial was sub- +tractedfromeachspectrumforbaselineremoval. +2.2. JVLAobservationsanddatareduction +Observations of the NH +3 +(9,6) line toward Cep A and +G34.26+0.15 were obtained on 2021 July 13 with the JVLA +of the National Radio Astronomy Observatory3 (NRAO) in the +C configuration (project ID: 21A-157, PI: Yaoting Yan). We +employed 27 antennas for the observations. The primary beam +of the JVLA antennas is 150(cid:48)(cid:48) (FWHM) at 18.5 GHz. A mix- tureofmixedthree-bitandeight-bitsamplerswereusedtoper- -NH (9,6)masersarefoundtobestronglyvariable,similarto -3 form the observations. For the NH (9,6) line observations, we +form the observations. For the NH 3 -H Omasers(Maddenetal.1986;Pratapetal.1991;Henkeletal. -2 used one subband with the eight-bit sampler covering a band- -2013). In W51-IRS2, Henkel et al. (2013) found that the (9,6) +(9,6) line observations, we +used one subband with the eight-bit sampler covering a band- widthof16MHzwithfullpolarization,eightrecirculations,and -lineshowedsignificantvariationinlineshapewithinatimein- four baseline board pairs (BIBPs) to provide a velocity range -tervalofonlytwodays.Mappingofthe(9,6)masertowardW51 of 260 km s−1 with a channel spacing of 0.13 km s−1. Two -withverylongbaselineinterferometry(VLBI)suggeststhatthe +of 260 km s−1 with a channel spacing of 0.13 km s−1. Two additional subbands of bandwidth 16 MHz were used to cover -masers are closer to the H O masers than to the OH masers or -2 the NH (8,5) and (10,7) lines. The three-bit sampler with 32 -to ultracompact (UC) Hii regions (Pratap et al. 1991). While 3 +the NH +3 +(8,5) and (10,7) lines. The three-bit sampler with 32 subbands, each with a bandwidth of 128 MHz to cover a to- -Henkeletal.(2013)andGoddietal.(2015)showedthattheSiO tal range of 4 GHz between 20–24 GHz, was used to mea- -andNH masersinW51-IRS2areveryclosetoeachother,their -3 sure the continuum emission. 3C 286 with a flux density of -positions,differingby0(cid:48).(cid:48)065(∼0.015pc),donotfullycoincide. +sure the continuum emission. 3C 286 with a flux density of 2.89 Jy at 18.5 GHz (Perley & Butler 2013) was used as a -In this paper we report the discovery of NH 3 (9,6) masers calibratorforpointing,fluxdensity,bandpass,andpolarization. -in two HMSFRs, Cepheus A and G34.26+0.15. This increases J2230+6946andJ1851+0035servedasgaincalibratorsforCep -the number of (9,6) maser detections in our Galaxy from five A and G34.26+0.15, respectively. The on-source times were -to seven. In Sect. 2 observations with the Effelsberg 100-meter 4m30sand4m50stowardCepAandG34.26+0.15,respectively. -telescopeandtheKarlG.JanskyVeryLargeArray(JVLA)are +calibratorforpointing,fluxdensity,bandpass,andpolarization. +J2230+6946andJ1851+0035servedasgaincalibratorsforCep +A and G34.26+0.15, respectively. The on-source times were +4m30sand4m50stowardCepAandG34.26+0.15,respectively. Data from two antennas were lost due to technical is- -described. Results are presented in Sect. 3. The morphology of sues. The data from the remaining 25 antennas were reduced -CepAandG34.26+0.15aswellasacomparisonoftheemission through the Common Astronomy Software Applications pack- -distributions of different tracers with the NH 3 (9,6) masers are age(CASA4;McMullinetal.2007).Wecalibratedthedatawith -presentedinSect.4.OurmainresultsaresummarizedinSect.5. +age(CASA4;McMullinetal.2007).Wecalibratedthedatawith the JVLA CASA calibration pipeline using CASA 6.1.2. The results were obtained after flagging data that contain artifacts. We inspected the phase, amplitude, and bandpass variations of -2. Observationsanddatareduction thecalibratedvisibilitydatatosearchforadditionalartifactsbe- fore imaging. Then, the uvcontsub task in CASA was used to -2.1. Effelsbergobservationsanddatareduction separatethecalibratedvisibilitiesintotwoparts,onewithline- -The NH (9,6) line was observed toward Cep A and onlydataandtheotherwiththecontinuumdata.Thetcleantask -3 -G34.26+0.15 with the 100-meter Effelsberg telescope1 in 2020 withacellsizeof0(cid:48).(cid:48)2andBriggsweightingwithrobust=0was -Januaryand2021February,July,andAugust.TheS14mmdou- usedtoproducetheimagesofspectrallineandcontinuumemis- -blebeamsecondaryfocusreceiverwasemployed.Thefullwidth sion. The synthesized beams for NH (9,6) are 1(cid:48).(cid:48)47×0(cid:48).(cid:48)99 at -3 -at half maximum (FWHM) beam size is 49(cid:48)(cid:48) at 18.5 GHz, the -frequencyofthetargetline.Theobservationswereperformedin 2 https://www.iram.fr/IRAMFR/GILDAS/ -positionswitchingmode,andtheoffpositionwas10(cid:48)inazimuth 3 TheNationalRadioAstronomyObservatoryisafacilityoftheNa- +onlydataandtheotherwiththecontinuumdata.Thetcleantask +withacellsizeof0(cid:48)(cid:48) .2andBriggsweightingwithrobust=0was +usedtoproducetheimagesofspectrallineandcontinuumemis- +sion. The synthesized beams for NH +3 +(9,6) are 1(cid:48)(cid:48) .47×0(cid:48)(cid:48) .99 at +2 https://www.iram.fr/IRAMFR/GILDAS/ +3 TheNationalRadioAstronomyObservatoryisafacilityoftheNa- tionalScienceFoundationoperatedundercooperativeagreementbyAs- -1 Based on observations with the 100-meter telescope of the MPIfR sociatedUniversities,Inc. -(Max-Planck-InstitutfürRadioastronomie)atEffelsberg. 4 https://casa.nrao.edu/ +sociatedUniversities,Inc. +4 https://casa.nrao.edu/ Articlenumber,page2of10 Y.T.Yan(闫耀庭) etal.:Discoveryofammonia(9,6)masersintwohigh-massstar-formingregions -P.A. = 58◦.79 and 1(cid:48).(cid:48)33 × 1(cid:48).(cid:48)06 at P.A. = 5◦.36 toward Cep A +P.A. = 58◦.79 and 1(cid:48)(cid:48) .33 × 1(cid:48)(cid:48) .06 at P.A. = 5◦.36 toward Cep A and G34.26+0.15, respectively. For the 1.36cm (20–24 GHz) -continuumemission,thesynthesizedbeamsare1(cid:48).(cid:48)08×0(cid:48).(cid:48)67at -P.A.=60◦.64and0(cid:48).(cid:48)95×0(cid:48).(cid:48)71atP.A.=5◦.91towardCepAand +continuumemission,thesynthesizedbeamsare1(cid:48)(cid:48) .08×0(cid:48)(cid:48) .67at +P.A.=60◦.64and0(cid:48)(cid:48) .95×0(cid:48)(cid:48) .71atP.A.=5◦.91towardCepAand G34.26+0.15. The typical absolute astrometric accuracy of the JVLAis∼10%ofthesynthesizedbeam5.Thefluxdensityscale calibrationaccuracyisestimatedtobewithin15%. -Fig. 2. NH (9,6) line profiles emphasizing, in contrast to the spectra +Fig. 1. Spectra from NH +3 +(9,6) transition lines. Left: Top to bottom: +TimesequenceofNH +3 +(9,6)profilesobservedtowardCepAwiththe +Effelsberg 100-meter telescope (after subtracting a first-order polyno- +mialbaseline).AJVLAspectrumisinterspersed.Thesystemicveloc- +ity from CO and HCO+ lines is indicated by a dashed blue line. The +two dashed red lines at LSR velocities, V LSR, of −0.90 km s−1 and +−0.28 km s−1 indicate the central velocities of the two major compo- +nents.Right:NH 3 (9,6)spectrafromG34.26+0.15.Thesystemicve- +locityfromC17Oisindicatedbyadashedblueline.Thethreedashed +redlinesatV LSR=54.1kms−1,55.8kms−1,and62.5kms−1showthe +centralvelocitiesofthemainammoniaemissioncomponents. +3. Results +The spectra from different epochs are shown in Figs. 1 and 2. +TowardCepA,theNH +3 +(9,6)lineprofilefromtheJVLAisex- +tractedfromanEffelsberg-beam-sizedregion(FWHM,49(cid:48)(cid:48)).In +the case of G34.26+0.15, the NH +3 +spectrum is below the noise +level if a similarly large beam size is used. Therefore, we de- +rivedtheJVLANH +3 +(9,6)spectrumfromasmallerregion,with +radius3(cid:48)(cid:48) .5,thatcontainsallthedetectedNH +3 +(9,6)emission.In +Table A.1, the observed NH 3 +(9,6) line parameters obtained by +Gaussianfitsarelisted.NH 3(8,5)and(10,7)emissionisnotde- +tected by our JVLA observations. The 3σ upper limits for the +NH +3 +(8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1 +5 https://science.nrao.edu/facilities/vla/docs/manuals/oss/performance- +/positional-accuracy +Fig. 2. NH +3 +(9,6) line profiles emphasizing, in contrast to the spectra in Fig. 1, weaker features. Cep A spectra are presented on the left, G34.26+0.15spectraontheright.Thetwodashedredlinesintheleft -panelsindicateV =1.48kms−1and2.89kms−1.Intherightpanels, -LSR +panelsindicateV LSR=1.48kms−1and2.89kms−1.Intherightpanels, thetwodashedredlinesreferto54.1kms−1and55.8kms−1. and 27.2 mJy beam−1, respectively. In G34.26+0.15, the corre- -sponding3σupperlimitsfortheNH (8,5)and(10,7)linesare +sponding3σupperlimitsfortheNH 3 +(8,5)and(10,7)linesare 22.1mJybeam−1 and30.4mJybeam−1.Forbothsources,sen- sitivity levels refer to emission from a single channel of width 0.13kms−1.Takingthelargermeasuredlinewidthsofthe(9,6) @@ -203,336 +342,458 @@ toward Cep A is presented in Fig. 3. Six published compact sources,HW2,HW3a,HW3b,HW3c,HW3d,andHW9,arede- tected in our observations. Figure 4 shows the 1.36cm contin- uuminG34.26+0.15.Threemaincontinuumobjects,A,B,and -Fig. 1. Spectra from NH (9,6) transition lines. Left: Top to bottom: -3 C,aredetected.Byusingtheimfit taskinCASA,wemeasured -TimesequenceofNH (9,6)profilesobservedtowardCepAwiththe -3 -Effelsberg 100-meter telescope (after subtracting a first-order polyno- thecontinuumfluxat1.36cmtowardindividualcompactsource +C,aredetected.Byusingtheimfit taskinCASA,wemeasured +thecontinuumfluxat1.36cmtowardindividualcompactsource componentsinCepAandG34.26+0.15.DetailsaregiveninTa- -mialbaseline).AJVLAspectrumisinterspersed.Thesystemicveloc- -ity from CO and HCO+ lines is indicated by a dashed blue line. The bleA.2. -two dashed red lines at LSR velocities, V , of −0.90 km s−1 and -LSR -−0.28 km s−1 indicate the central velocities of the two major compo- -nents.Right:NH 3 (9,6)spectrafromG34.26+0.15.Thesystemicve- 3.2. NH 3 (9,6)emissioninCepA -locityfromC17Oisindicatedbyadashedblueline.Thethreedashed -redlinesatV =54.1kms−1,55.8kms−1,and62.5kms−1showthe In2020January,NH 3 (9,6)emissionwithapeakfluxdensityof -centralvelociL tS ieR 0.67±0.07JywasfirstdetectedwiththeEffelsberg100-meter -softhemainammoniaemissioncomponents. +bleA.2. +3.2. NH 3 (9,6)emissioninCepA +In2020January,NH 3 (9,6)emissionwithapeakfluxdensityof +0.67±0.07JywasfirstdetectedwiththeEffelsberg100-meter telescopeinCepA.Emissionwithsimilarstrengthwasalsode- tected in 2021 February and August with the same telescope. Higher velocity resolution data, which were obtained in 2021 August, again with the Effelsberg 100-meter telescope, show -3. Results thatthe(9,6)emissioncontainstwomainvelocitycomponents. -The spectra from different epochs are shown in Figs. 1 and 2. Overall, the flux densities of the NH 3 (9,6) emission line mea- +Overall, the flux densities of the NH 3 (9,6) emission line mea- suredwiththeEffelsberg100-metertelescopeare,withinthecal- -TowardCepA,theNH (9,6)lineprofilefromtheJVLAisex- -3 -tractedfromanEffelsberg-beam-sizedregion(FWHM,49(cid:48)(cid:48)).In ibrationuncertainties,unchanged.Thisisvalidforthetimeinter- -the case of G34.26+0.15, the NH spectrum is below the noise valbetween2020JanuaryandAugust2021,whenwesmoothed -3 -level if a similarly large beam size is used. Therefore, we de- the obtained spectra to the same velocity resolution. We also -rivedtheJVLANH (9,6)spectrumfromasmallerregion,with seeanothertwoweakercomponents.Figure2emphasizesthese -3 -radius3(cid:48).(cid:48)5,thatcontainsallthedetectedNH (9,6)emission.In weakcomponentswithanexpandedfluxdensityscale. -3 -Table A.1, the observed NH (9,6) line parameters obtained by Higher angular resolution data from the JVLA pinpoint the -3 -Gaussianfitsarelisted.NH (8,5)and(10,7)emissionisnotde- position of the NH (9,6) emission with an offset of (−0(cid:48).(cid:48)28, -3 3 -tected by our JVLA observations. The 3σ upper limits for the 0(cid:48).(cid:48)02) relative to the 1.36cm continuum peak of Cep A HW2 -NH (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1 (Fig.3).ThedeconvolvedNH (9,6)componentsizeis(0(cid:48).(cid:48)29± -3 3 -0(cid:48).(cid:48)15)×(0(cid:48).(cid:48)19±0(cid:48).(cid:48)14)atP.A.=174◦,derivedwiththeimfittask -5 https://science.nrao.edu/facilities/vla/docs/manuals/oss/performance- inCASA,andcanthusbeconsidered,accountingfortheuncer- -/positional-accuracy tainties,asunresolved. +ibrationuncertainties,unchanged.Thisisvalidforthetimeinter- +valbetween2020JanuaryandAugust2021,whenwesmoothed +the obtained spectra to the same velocity resolution. We also +seeanothertwoweakercomponents.Figure2emphasizesthese +weakcomponentswithanexpandedfluxdensityscale. +Higher angular resolution data from the JVLA pinpoint the +position of the NH +3 +(9,6) emission with an offset of (−0(cid:48)(cid:48) .28, +0(cid:48)(cid:48) .02) relative to the 1.36cm continuum peak of Cep A HW2 +(Fig.3).ThedeconvolvedNH +3 +(9,6)componentsizeis(0(cid:48)(cid:48) .29± +0(cid:48)(cid:48) .15)×(0(cid:48)(cid:48) .19±0(cid:48)(cid:48) .14)atP.A.=174◦,derivedwiththeimfittask +inCASA,andcanthusbeconsidered,accountingfortheuncer- +tainties,asunresolved. Articlenumber,page3of10 A&Aproofs:manuscriptno.mainArxiv Fig. 3. Cepheus A. White contours mark the 1.36cm JVLA continuum map of Cep A; levels are −5, 5, 10, 20, 30, 40, 50, 70, 90, and 110 × 0.125 mJy beam−1. The background image is the Spitzer 4.5µm emission, taken from the Galactic Legacy Infrared Mid-Plane -Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is α = 22h56m17s.972, and +Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is α J2000 -δ = 62◦01(cid:48)49(cid:48).(cid:48)587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black += 22h56m17s.972, and +δ J2000 -ellipsedenotingthepositionoftheNH (9,6)emissionwithapurplestaratitscenter.OH(Bartkiewiczetal.2005),H O(Sobolevetal.2018), -3 2 -andCH OH(Sannaetal.2017)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarontheright-handsideindicates += 62◦01(cid:48)49(cid:48)(cid:48) .587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black +ellipsedenotingthepositionoftheNH 3 +(9,6)emissionwithapurplestaratitscenter.OH(Bartkiewiczetal.2005),H 2O(Sobolevetal.2018), +andCH 3OH(Sannaetal.2017)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarontheright-handsideindicates theLSRvelocityrangeofthemaserspots. Fig. 4. 1.36cm JVLA continuum map of G34.26+0.15 presented as white contours with levels of −5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130, 150,180,and200×5.0mJybeam−1.ThebackgroundimageistheSpitzer 4.5µmemission,takenfromGLIMPSE.Thereferencepositionis -α =18h53m18s.560,andδ =01◦14(cid:48)58(cid:48).(cid:48)201,thepeakposition,ismarkedbyablackcross.TheblackellipsesshowthepositionsofNH -J2000 J2000 3 -(9,6)emissionswithstarsattheircenter(i.e.,M1,M2,andM3).OH(Zhengetal.2000),H O(Imaietal.2011),andCH OH(Bartkiewiczetal. -2 3 -2016)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarindicatesthevelocityrange(V )ofmaserspots. -LSR -InviewoftheconstancyofthefluxdensitiesobtainedatEf- velocity resolution data from 2021 August show the NH (9,6) -3 -felsberg and the similar JVLA flux density, measured in 2021 emissiontobecomposedoftwodifferentcomponents.Thespec- -July,thereisnomissinginterferometricfluxdensityintheJVLA traofweakcomponentsonasmallerfluxdensityscalearepre- -data. sentedinFig.2. -3.3. NH 3 (9,6)emissioninG34.26+0.15 Three different locations showing NH 3 (9,6) emission are -foundtowardG34.26+0.15(Fig.4).ThedeconvolvedNH (9,6) -3 -TheNH (9,6)emissionwasfirstdetectedtowardG34.26+0.15 componentsizesare(1(cid:48).(cid:48)42±0(cid:48).(cid:48)43)×(0(cid:48).(cid:48)54±0(cid:48).(cid:48)62)atP.A.=97◦ -3 -in2020JanuarywiththeEffelsberg100-metertelescope.Higher (M1),(0(cid:48).(cid:48)42±0(cid:48).(cid:48)27)×(0(cid:48).(cid:48)15±0(cid:48).(cid:48)27)atP.A.=150◦ (M2),and +α +J2000 +=18h53m18s.560,andδ +J2000 +=01◦14(cid:48)58(cid:48)(cid:48) .201,thepeakposition,ismarkedbyablackcross.TheblackellipsesshowthepositionsofNH +3 +(9,6)emissionswithstarsattheircenter(i.e.,M1,M2,andM3).OH(Zhengetal.2000),H 2O(Imaietal.2011),andCH 3OH(Bartkiewiczetal. +2016)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarindicatesthevelocityrange(V LSR)ofmaserspots. +InviewoftheconstancyofthefluxdensitiesobtainedatEf- +felsberg and the similar JVLA flux density, measured in 2021 +July,thereisnomissinginterferometricfluxdensityintheJVLA +data. +3.3. NH 3 (9,6)emissioninG34.26+0.15 +TheNH +3 +(9,6)emissionwasfirstdetectedtowardG34.26+0.15 +in2020JanuarywiththeEffelsberg100-metertelescope.Higher +velocity resolution data from 2021 August show the NH +3 +(9,6) +emissiontobecomposedoftwodifferentcomponents.Thespec- +traofweakcomponentsonasmallerfluxdensityscalearepre- +sentedinFig.2. +Three different locations showing NH 3 (9,6) emission are +foundtowardG34.26+0.15(Fig.4).ThedeconvolvedNH 3(9,6) +componentsizesare(1(cid:48)(cid:48) .42±0(cid:48)(cid:48) .43)×(0(cid:48)(cid:48) .54±0(cid:48)(cid:48) .62)atP.A.=97◦ +(M1),(0(cid:48)(cid:48) .42±0(cid:48)(cid:48) .27)×(0(cid:48)(cid:48) .15±0(cid:48)(cid:48) .27)atP.A.=150◦ (M2),and Articlenumber,page4of10 Y.T.Yan(闫耀庭) etal.:Discoveryofammonia(9,6)masersintwohigh-massstar-formingregions -(1(cid:48).(cid:48)17±0(cid:48).(cid:48)34)×(0(cid:48).(cid:48)27±0(cid:48).(cid:48)46)atP.A.=53◦ (M3)andarethus the NH (9,6) emission in Cep A is due to maser action. Be- -3 -comparabletoorsmallerthanthebeamsize. causeG34.26+0.15islocatedataboutfivetimesthedistanceto -Overall, the NH (9,6) line from G34.26+0.15 weakened CepA,beamdilutioneffectsreducethelowermainbeambright- +(1(cid:48)(cid:48) .17±0(cid:48)(cid:48) .34)×(0(cid:48)(cid:48) .27±0(cid:48)(cid:48) .46)atP.A.=53◦ (M3)andarethus +comparabletoorsmallerthanthebeamsize. +Overall, the NH +3 +(9,6) line from G34.26+0.15 weakened +during the time interval from 2020 January to 2021 August by +about70%.AcomparisonbetweentheJVLAspectrumandthe +Effelsbergdata,assumingalineardecreaseintheintegratedin- +tensity as a function of time between different epochs of the +100-meterobservations,suggeststhereisnomissingfluxinthe +JVLAdata.ThisissimilartothesituationinCepA. +4. Discussion +4.1. MorphologyofCepAandG34.26+0.15 +Cep A, at a trigonometric parallax distance of 0.70±0.04 kpc +(Moscadellietal.2009;Dzibetal.2011),isthesecondclosest +HMSFR (after Orion) and by far the closest NH +3 +(9,6) maser +known.About16compact(∼1(cid:48)(cid:48))radiosources(e.g.,Hughes& +Wouterloot 1984; Hughes 1991; Garay et al. 1996) have been +identified in Cep A. Hughes & Wouterloot (1984) discovered +thesetargetsatradiowavelengths,whichareUCandhypercom- +pact(HC)Hiiregionsand/orstellarwindsources,subsequently +namedasHWsources.TheHW2objectisoneofthebestknown +examplesofaprotostellarjetordisksystemdrivingapowerful +outflow(e.g.,Rodriguezetal.1980;Güstenetal.1984;Torrelles +et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021). +TheobservedNH 3(9,6)emissionisslightlyoffset(−0(cid:48)(cid:48) .28,0(cid:48)(cid:48) .02) +fromthecenterofHW2(seeFig.3). +G34.26+0.15isanHMSFRlocatedatadistanceof3.3kpc +(Kuchar & Bania 1994). It hosts four radio continuum compo- +nents named A, B, C, and D. Component C is a prototypical +cometaryUCHiiregioncontainingacompactheadandadiffuse +tailthatextendsfromeasttowest(e.g.,Reid&Ho1985;Garay +etal.1986;Sewiloetal.2004;Sewiłoetal.2011).Components +A and B are HC Hii regions, located to the east of component +C.Anextendedring-likeHiiregion,calledcomponentD,islo- +cated southeast of components A-C. One of the three observed +NH 3(9,6)emissionlinesources,M1,isclosetotheheadofcom- +ponentC,whereasM2andM3originatefromanothercompact +regioninthewestoftheHCHiicomponentA(seeFig.4). +4.2. NH +3 +(9,6)emissionpossiblycausedbymaseraction +As shown in Fig. 1, the NH +3 +(9,6) profiles in Cep A and +G34.26+0.15 are narrow (∆V +1/2 +≤2.0 km s−1), much narrower +than the expected line widths ((cid:38)4 km s−1) of thermal lines ob- +servedatasimilarangularresolution(e.g.,Torrellesetal.1985, +1986,1993,1999;Henkeletal.1987;Comitoetal.2007;Mook- +erjeaetal.2007;Wyrowskietal.2012;Beutheretal.2018).Ve- +locity shifts with respect to the systemic velocities of the two +sourcesarebothobserved,thatis,V ∼10kms−1 inCepAand +V ∼4kms−1 inG34.26+0.15(seedetailsinSect.4.3).Further- +more, time variability is observed in the case of G34.26+0.15, +whichisalsoacharacteristicfeatureofmaseremission. +Additionalevidenceoftheirmasernatureisthehighbright- +nesstemperaturesofthe(9,6)emissionspotstowardCepAand +G34.26+0.15. The spectral parameters are listed in Table A.3. +Because at least a significant part of the NH +3 +(9,6) emission +is not resolved by our JVLA observations, the derived bright- +nesstemperaturesareonlylowerlimits.Nevertheless,thelower +limits on the brightness temperature are >800 K in Cep A (see +Table A.3), which is much higher than the expected thermal +gas temperature of ∼250 K (e.g., Patel et al. 2005; Comito +et al. 2007; Beuther et al. 2018). This strongly suggests that +the NH +3 +(9,6) emission in Cep A is due to maser action. Be- +causeG34.26+0.15islocatedataboutfivetimesthedistanceto +CepA,beamdilutioneffectsreducethelowermainbeambright- +ness temperature limit to 400 K in G34.26+0.15 (M2) (see Ta- +bleA.3).WealsonotethattheluminosityoftheNH 3(9,6)emis- +sioninG34.26+0.15ishigherthanorcomparabletothatinCep +A,dependingontheepochofourobservations. +Finally,thenon-detectionsofthe(8,5)and(10,7)linesalso +indicate that the (9,6) line is special. This allows us to derive +lower 3σ limits of the (9,6)/(8,5) and (9,6)/(10,7) line intensity +ratios.The(9,6)linearisesfromortho-NH 3 -during the time interval from 2020 January to 2021 August by ness temperature limit to 400 K in G34.26+0.15 (M2) (see Ta- -about70%.AcomparisonbetweentheJVLAspectrumandthe bleA.3).WealsonotethattheluminosityoftheNH (9,6)emis- +(K = 3n),whereas +the NH 3 -Effelsbergdata,assumingalineardecreaseintheintegratedin- sioninG34.26+0.15ishigherthanorcomparabletothatinCep -tensity as a function of time between different epochs of the A,dependingontheepochofourobservations. -100-meterobservations,suggeststhereisnomissingfluxinthe Finally,thenon-detectionsofthe(8,5)and(10,7)linesalso -JVLAdata.ThisissimilartothesituationinCepA. indicate that the (9,6) line is special. This allows us to derive -lower 3σ limits of the (9,6)/(8,5) and (9,6)/(10,7) line intensity -ratios.The(9,6)linearisesfromortho-NH (K = 3n),whereas +(8,5) and (10,7) lines are para-NH 3 -4. Discussion the NH (8,5) and (10,7) lines are para-NH (K (cid:44) 3n) lines. -3 3 -4.1. MorphologyofCepAandG34.26+0.15 Theminimumortho-to-pararatiosareintherange12–42and1– +(K (cid:44) 3n) lines. +Theminimumortho-to-pararatiosareintherange12–42and1– 8 toward Cep A and G34.26+0.15, respectively. The statistical -Cep A, at a trigonometric parallax distance of 0.70±0.04 kpc weights for the ortho states are twice as large as those for the -(Moscadellietal.2009;Dzibetal.2011),isthesecondclosest parastates(e.g.,Umemotoetal.1999;Goddietal.2011;Henkel -HMSFR (after Orion) and by far the closest NH (9,6) maser etal.2013).InCepA,thelineintensityratiosarefarhigherthan -3 -known.About16compact(∼1(cid:48)(cid:48))radiosources(e.g.,Hughes& thisfactoroftwo.Thus,atleastinCepAthehighermainbeam -Wouterloot 1984; Hughes 1991; Garay et al. 1996) have been brightness peak temperature of the (9,6) emission is caused by -identified in Cep A. Hughes & Wouterloot (1984) discovered maser action, perhaps involving exponential amplification, and -thesetargetsatradiowavelengths,whichareUCandhypercom- thecaseofG34.26+0.15islikelysimilar. -pact(HC)Hiiregionsand/orstellarwindsources,subsequently -namedasHWsources.TheHW2objectisoneofthebestknown -4.3. ComparisonofNH (9,6)maserswithpreviously -examplesofaprotostellarjetordisksystemdrivingapowerful 3 -published(quasi-)thermalNH emission -outflow(e.g.,Rodriguezetal.1980;Güstenetal.1984;Torrelles 3 -et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021). The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines -TheobservedNH 3(9,6)emissionisslightlyoffset(−0(cid:48).(cid:48)28,0(cid:48).(cid:48)02) show thermal emission toward Cep A over a velocity range of -fromthecenterofHW2(seeFig.3). −13 km s−1 ≤ V ≤ −4 km s−1 (Brown et al. 1981; Güsten +weights for the ortho states are twice as large as those for the +parastates(e.g.,Umemotoetal.1999;Goddietal.2011;Henkel +etal.2013).InCepA,thelineintensityratiosarefarhigherthan +thisfactoroftwo.Thus,atleastinCepAthehighermainbeam +brightness peak temperature of the (9,6) emission is caused by +maser action, perhaps involving exponential amplification, and +thecaseofG34.26+0.15islikelysimilar. +4.3. ComparisonofNH +3 +(9,6)maserswithpreviously +published(quasi-)thermalNH +3 +emission +The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines +show thermal emission toward Cep A over a velocity range of +−13 km s−1 ≤ V LSR -G34.26+0.15isanHMSFRlocatedatadistanceof3.3kpc +≤ −4 km s−1 (Brown et al. 1981; Güsten etal.1984;Torrellesetal.1985,1986,1993,1999).Anaverage -(Kuchar & Bania 1994). It hosts four radio continuum compo- NH columndensityof∼5×1015cm−2wasestimatedforaregion -3 -nents named A, B, C, and D. Component C is a prototypical of3(cid:48)(cid:48)aroundHW2(Torrellesetal.1999).ThishighNH abun- +NH 3columndensityof∼5×1015cm−2wasestimatedforaregion +of3(cid:48)(cid:48)aroundHW2(Torrellesetal.1999).ThishighNH 3 -cometaryUCHiiregioncontainingacompactheadandadiffuse +abun- dance could provide a suitable environment for maser species. -tailthatextendsfromeasttowest(e.g.,Reid&Ho1985;Garay Largelinewidths(∆V (cid:39)7.0kms−1)withV ∼ −10kms−1 -1/2 LSR -etal.1986;Sewiloetal.2004;Sewiłoetal.2011).Components inboth(1,1)and(2,2)lineswerefoundtowardHW2(Torrelles -A and B are HC Hii regions, located to the east of component et al. 1993). The velocity is similar to the cloud’s systemic lo- -C.Anextendedring-likeHiiregion,calledcomponentD,islo- cal standard of rest (LSR) velocity of −11.2 km s−1, which -cated southeast of components A-C. One of the three observed is based on CO (Narayanan & Walker 1996) and HCO+ ob- -NH 3(9,6)emissionlinesources,M1,isclosetotheheadofcom- servations (Gómez et al. 1999). Our (9,6) maser is redshifted -ponentC,whereasM2andM3originatefromanothercompact (−0.9 km s−1 ≤ V ≤2.9 km s−1) and shares positions with +Largelinewidths(∆V +1/2 +(cid:39)7.0kms−1)withV LSR -regioninthewestoftheHCHiicomponentA(seeFig.4). the outflowing gas seen in CO and HCO+ with similarly red- +∼ −10kms−1 +inboth(1,1)and(2,2)lineswerefoundtowardHW2(Torrelles +et al. 1993). The velocity is similar to the cloud’s systemic lo- +cal standard of rest (LSR) velocity of −11.2 km s−1, which +is based on CO (Narayanan & Walker 1996) and HCO+ ob- +servations (Gómez et al. 1999). Our (9,6) maser is redshifted +(−0.9 km s−1 ≤ V +LSR +≤2.9 km s−1) and shares positions with +the outflowing gas seen in CO and HCO+ with similarly red- shiftedvelocities.Therefore,wearguethatthe(9,6)masersare relatedtooutflowinggas. -4.2. NH (9,6)emissionpossiblycausedbymaseraction -3 In G34.26+0.15, a large NH column density, -3 -As shown in Fig. 1, the NH (9,6) profiles in Cep A and 1018.5±0.2 cm−2, and a kinetic temperature of 225±75 K -3 -G34.26+0.15 are narrow (∆V ≤2.0 km s−1), much narrower were derived by Henkel et al. (1987) based on measurements +In G34.26+0.15, a large NH +3 +column density, +1018.5±0.2 cm−2, and a kinetic temperature of 225±75 K +were derived by Henkel et al. (1987) based on measurements +of 15 NH +3 +inversion transitions in the frequency range of +22.0–26.0 GHz. These did not include the (9,6) transition. +While these lines were measured with a beam size of about +40(cid:48)(cid:48), a comparison of the peak intensities of the optically thick +lines with the kinetic temperature reveals the size of the hot, +ammonia-emitting core to be only ∼2.5(cid:48)(cid:48). All those measured +NH +3 +lines were quasi-thermal and had LSR velocities of +∼ 58.5 km s−1, close to the systemic velocity of ∼ 58.1 km s−1 +obtained from C17O observations (Wyrowski et al. 2012). +Their line widths (∆V 1/2 -than the expected line widths ((cid:38)4 km s−1) of thermal lines ob- of 15 NH inversion transitions in the frequency range of -3 -servedatasimilarangularresolution(e.g.,Torrellesetal.1985, 22.0–26.0 GHz. These did not include the (9,6) transition. -1986,1993,1999;Henkeletal.1987;Comitoetal.2007;Mook- While these lines were measured with a beam size of about -erjeaetal.2007;Wyrowskietal.2012;Beutheretal.2018).Ve- 40(cid:48)(cid:48), a comparison of the peak intensities of the optically thick -locity shifts with respect to the systemic velocities of the two lines with the kinetic temperature reveals the size of the hot, -sourcesarebothobserved,thatis,V ∼10kms−1 inCepAand ammonia-emitting core to be only ∼2.5(cid:48)(cid:48). All those measured -V ∼4kms−1 inG34.26+0.15(seedetailsinSect.4.3).Further- NH lines were quasi-thermal and had LSR velocities of -3 -more, time variability is observed in the case of G34.26+0.15, ∼ 58.5 km s−1, close to the systemic velocity of ∼ 58.1 km s−1 -whichisalsoacharacteristicfeatureofmaseremission. obtained from C17O observations (Wyrowski et al. 2012). -Additionalevidenceoftheirmasernatureisthehighbright- Their line widths (∆V ≥3.6 km s−1) are larger than what +≥3.6 km s−1) are larger than what +we find (0.35 km s−1 ≤ ∆V 1/2 -nesstemperaturesofthe(9,6)emissionspotstowardCepAand we find (0.35 km s−1 ≤ ∆V ≤ 0.94 km s−1) for each (9,6) -1/2 -G34.26+0.15. The spectral parameters are listed in Table A.3. maser component (see details in Table A.3). In all, we may -Because at least a significant part of the NH (9,6) emission have observed four different (9,6) velocity features. Three -3 -is not resolved by our JVLA observations, the derived bright- are blueshifted at V ∼ 53.8 km s−1, 55.8 km s−1, and +≤ 0.94 km s−1) for each (9,6) +maser component (see details in Table A.3). In all, we may +have observed four different (9,6) velocity features. Three +are blueshifted at V LSR -nesstemperaturesareonlylowerlimits.Nevertheless,thelower 56.8kms−1,andafourth,tentativelydetected,at62.5 kms−1. -limits on the brightness temperature are >800 K in Cep A (see This tentative redshifted feature was only potentially detected -Table A.3), which is much higher than the expected thermal with Effelsberg in 2020 January. The velocity is similar to that -gas temperature of ∼250 K (e.g., Patel et al. 2005; Comito of the JVLA measurements on the NH (1,1) absorption line -3 -et al. 2007; Beuther et al. 2018). This strongly suggests that againstcontinuumsourceC(∼ 7(cid:48)(cid:48) resolution;Ketoetal.1987) +∼ 53.8 km s−1, 55.8 km s−1, and +56.8kms−1,andafourth,tentativelydetected,at62.5 kms−1. +This tentative redshifted feature was only potentially detected +with Effelsberg in 2020 January. The velocity is similar to that +of the JVLA measurements on the NH +3 +(1,1) absorption line +againstcontinuumsourceC(∼ 7(cid:48)(cid:48) resolution;Ketoetal.1987) Articlenumber,page5of10 A&Aproofs:manuscriptno.mainArxiv -andtheNH (3,3)emissionsurroundingcontinuumsourceBas etal.2013).BothCepAandG34.26+0.15havesimilarkinetic +andtheNH +3 +(3,3)emissionsurroundingcontinuumsourceBas +wellastheheadofC(1(cid:48)(cid:48) .4×1(cid:48)(cid:48) .2resolution;Heatonetal.1989). +However, we did not find this redshifted component in our +JVLAobservations.Therefore,itspositionwithinG34.26+0.15 +cannot be determined. The blueshifted (9,6) masers with a +velocity range of 53.8–56.8 km s−1 (M1, M2, and M3) show +velocities compatible with those of the NH +3 +(3,3) emission at +the proper positions (Heaton et al. 1989), which might be a +suitableenvironmentformaserspecies. +4.4. ComparisonofNH 3 (9,6)maserswithothermaserlines +To characterize the environment of NH +3 +(9,6) masers, we can +compare their positions with respect to those of other maser +species (i.e., OH, H 2O, and CH 3OH). Toward Cep A HW2, +manyCH 3OH(e.g.,Menten1991;Sugiyamaetal.2008;Sanna +et al. 2017) and H 2O maser spots (e.g., Torrelles et al. 1998, +2011;Sobolevetal.2018)aredetectedandareassociatedwith +its disk. Sobolev et al. (2018) also found that most of the H 2O +maserfluxisassociatedwiththecompactHiiregionHW3d.OH +maser features close to the Hii regions are also seen in HW2 +(e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These +three kinds of masers in Cep A have a large velocity range of +−25 km s−1 ≤ V +LSR +≤ −2 km s−1 and are widespread around +HW2 and HW3, while NH 3 -wellastheheadofC(1(cid:48).(cid:48)4×1(cid:48).(cid:48)2resolution;Heatonetal.1989). temperatures of (cid:38)200 K (Henkel et al. 1987; Patel et al. 2005; -However, we did not find this redshifted component in our Comito et al. 2007; Beuther et al. 2018). This suggests that -JVLAobservations.Therefore,itspositionwithinG34.26+0.15 highkinetictemperaturesareneededtoexciteNH (9,6)masers. +(9,6) emission is only detected at +−0.9 km s−1 ≤ V +LSR +≤2.9 km s−1 toward a sub-arcsecond- +sizedregiontothewestofthepeakcontinuumpositionofHW2 +(see Fig. 3). This suggests that the NH 3 -cannot be determined. The blueshifted (9,6) masers with a However,itshouldbenotedthatthesilicatedustabsorptionfea- -velocity range of 53.8–56.8 km s−1 (M1, M2, and M3) show turemightdominateat10µm(seethespectralenergydistribu- -velocities compatible with those of the NH (3,3) emission at tion of Cep A in De Buizer et al. 2017). Additionally, there is +(9,6) maser in Cep A +isuniqueandnotrelatedtomaserspotsseeninothermolecular +species. +In G34.26+0.15, OH (Zheng et al. 2000), H 2O (Imai et al. +2011),andCH 3OH(Bartkiewiczetal.2016)masershavebeen +detected east of source C (Fig. 4), and none of them coincides +with the head of C. The NH 3 -the proper positions (Heaton et al. 1989), which might be a nobrightinfraredemissionaroundthetwo(9,6)masers,M2and -suitableenvironmentformaserspecies. M3, in G34.26+0.15 (see Fig. 4; see also Fig. 11 in De Buizer +(9,6) maser M1 is also found +slightly off the head of source C. This could suggest that M1 +ispoweredbycontinuumsourceCorbyanoutflow.Nearcom- +ponent B, there are some OH and CH 3OH masers but no H 2O +or NH 3 masers. A group of H 2O masers, well-known tracers +of outflows, with a large velocity distribution of 43 km s−1 ≤ +V +LSR +≤54 km s−1, was found to the west of the centimeter- +continuum source A and close to the peak of the millimeter- +continuumemission(seedetailsinourFig.A.2andalsoinFig.5 +ofImaietal.2011).TheclosenessofNH 3(9,6)maserspotsM2 +andM3tothisgroupofwatermasersandtheirsimilarvelocities +again suggest an association of NH +3 +(9,6) masers with outflow +activity. +4.5. Constraintsonpumpingscenarios +Our observations have resulted in the detection of NH 3 (9,6) +masers in Cep A and G34.26+0.15. The new detections could +provide additional constraints on the maser line’s pumping +mechanism. As mentioned in Sect. 1, the pumping mechanism +of the (9,6) maser is unclear (Madden et al. 1986; Brown & +Cragg1991).Previousstudieshavesuggestedthattherearethree +main pumping scenarios to explain the observed NH +3 +maser +lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared ra- +diationfromthedustcontinuumemission,(2)lineoverlap,and +(3)collisionalpumping. +For the first mechanism, infrared photons near 10 µm are +needed for vibrational excitation. The high dust temperature +(∼300 K) of W51-IRS2 can provide substantial infrared pho- +tons near 10 µm, which is used for radiative pumping (Henkel +etal.2013).BothCepAandG34.26+0.15havesimilarkinetic +temperatures of (cid:38)200 K (Henkel et al. 1987; Patel et al. 2005; +Comito et al. 2007; Beuther et al. 2018). This suggests that +highkinetictemperaturesareneededtoexciteNH 3(9,6)masers. +However,itshouldbenotedthatthesilicatedustabsorptionfea- +turemightdominateat10µm(seethespectralenergydistribu- +tion of Cep A in De Buizer et al. 2017). Additionally, there is +nobrightinfraredemissionaroundthetwo(9,6)masers,M2and +M3, in G34.26+0.15 (see Fig. 4; see also Fig. 11 in De Buizer etal.2003fora10.5µmmap).Thisindicatesthatthepumping mechanism via infrared photons near 10 µm may not be viable -4.4. ComparisonofNH 3 (9,6)maserswithothermaserlines toexplainthe(9,6)masersinCepAandG34.26+0.15.Further- +toexplainthe(9,6)masersinCepAandG34.26+0.15.Further- more,Wilson&Schilke(1993)arguedthatradiativepumpingby -To characterize the environment of NH (9,6) masers, we can -3 dustemissiontendstoexcitemultipleadjacentammoniamaser -compare their positions with respect to those of other maser +dustemissiontendstoexcitemultipleadjacentammoniamaser transitions,whichappearstocontradictourfailuretodetectthe -species (i.e., OH, H O, and CH OH). Toward Cep A HW2, -2 3 adjacent(8,5)and(10,7)lines(withrespecttoquantumnumbers -manyCH OH(e.g.,Menten1991;Sugiyamaetal.2008;Sanna -3 andfrequency)andtoonlymeasurethe(9,6)transitionsinCep -et al. 2017) and H O maser spots (e.g., Torrelles et al. 1998, -2 A and G34.26+0.15. Therefore, we suggest that infrared radia- -2011;Sobolevetal.2018)aredetectedandareassociatedwith +adjacent(8,5)and(10,7)lines(withrespecttoquantumnumbers +andfrequency)andtoonlymeasurethe(9,6)transitionsinCep +A and G34.26+0.15. Therefore, we suggest that infrared radia- tionfromdustisnotthemainpumpingsource. -its disk. Sobolev et al. (2018) also found that most of the H O -maserfluxisassociatedwiththecompactHiiregionHW3d.O2 Madden et al. (1986) suggested that there might be some -H -maser features close to the Hii regions are also seen in HW2 line overlaps between the rotational NH 3 transitions in the far- +Madden et al. (1986) suggested that there might be some +line overlaps between the rotational NH 3 transitions in the far- infraredband.However,thiswouldbeunlikelytoaffectonlythe -(e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These (9,6) line. Nevertheless, far-infrared spectral observations will -three kinds of masers in Cep A have a large velocity range of beneededtoclarifythisscenario. -−25 km s−1 ≤ V ≤ −2 km s−1 and are widespread around -LSR Based on our observations, the (9,6) maser spots are close -HW2 and HW3, while NH (9,6) emission is only detected at -3 to, but not coincident with, the peaks of the radio continuum -−0.9 km s−1 ≤ V ≤2.9 km s−1 toward a sub-arcsecond- -LSR emission in Cep A and G34.26+0.15. Furthermore, the (9,6) -sizedregiontothewestofthepeakcontinuumpositionofHW2 +Based on our observations, the (9,6) maser spots are close +to, but not coincident with, the peaks of the radio continuum +emission in Cep A and G34.26+0.15. Furthermore, the (9,6) masers show velocity offsets with respect to their systemic ve- -(see Fig. 3). This suggests that the NH (9,6) maser in Cep A -3 locities. This indicates that the (9,6) masers are located at the -isuniqueandnotrelatedtomaserspotsseeninothermolecular -base of outflows, similar to the H O masers. This is supported -species. 2 +locities. This indicates that the (9,6) masers are located at the +base of outflows, similar to the H 2O masers. This is supported by VLBI observations that show that (9,6) masers tend to be -In G34.26+0.15, OH (Zheng et al. 2000), H O (Imai et al. -2 closelyassociatedwithH Omasers(Pratapetal.1991).Theob- -2011),andCH 3OH(Bartkiewiczetal.2016)masershavebeen servedtimevariabilityin2 G34.26+0.15andW51-IRS2canalso -detected east of source C (Fig. 4), and none of them coincides +closelyassociatedwithH 2Omasers(Pratapetal.1991).Theob- +servedtimevariabilityinG34.26+0.15andW51-IRS2canalso beattributedtoepisodicmolecularoutflows.Thisindicatesthat -with the head of C. The NH (9,6) maser M1 is also found -3 collisional pumping could be the driver of the (9,6) maser. On -slightly off the head of source C. This could suggest that M1 +collisional pumping could be the driver of the (9,6) maser. On the other hand, collisional pumping has been successfully used -ispoweredbycontinuumsourceCorbyanoutflow.Nearcom- -toexplaintheNH (3,3)maser(Walmsley&Ungerechts1983; -ponent B, there are some OH and CH OH masers but no H O 3 -3 2 Floweretal.1990;Mangum&Wootten1994).Collisionstendto -or NH 3 masers. A group of H 2O masers, well-known tracers pumpfromtheK=0leveltotheK=3levelwithparitychanges, -of outflows, with a large velocity distribution of 43 km s−1 ≤ +toexplaintheNH +3 +(3,3)maser(Walmsley&Ungerechts1983; +Floweretal.1990;Mangum&Wootten1994).Collisionstendto +pumpfromtheK=0leveltotheK=3levelwithparitychanges, thatis,theupperlevelofthe(3,3)metastabletransitionwillbe -V ≤54 km s−1, was found to the west of the centimeter- -LSR overpopulated.NH (9,6)arisesfromtheorthospecies,soasim- -continuum source A and close to the peak of the millimeter- 3 +overpopulated.NH 3(9,6)arisesfromtheorthospecies,soasim- ilarmechanismmightalsooccurinthecaseofthe(9,6)transi- -continuumemission(seedetailsinourFig.A.2andalsoinFig.5 tion.Furthermeasurementsofcollisionalratesofammoniawill -ofImaietal.2011).TheclosenessofNH (9,6)maserspotsM2 -3 allowustotestthisscenario. -andM3tothisgroupofwatermasersandtheirsimilarvelocities -again suggest an association of NH (9,6) masers with outflow -3 -activity. 5. Summary -We report the discovery of NH (9,6) masers in two HMSFRs, +allowustotestthisscenario. +5. Summary +We report the discovery of NH 3 -4.5. Constraintsonpumpingscenarios Cep A and G34.26+0.15. The narrow line width of the emis- -sionfeatures(∆V ≤2.0kms−1)andtheirhighbrightnesstem- +(9,6) masers in two HMSFRs, +Cep A and G34.26+0.15. The narrow line width of the emis- +sionfeatures(∆V 1/2 -Our observations have resulted in the detection of NH 3 (9,6) peratures (> 400 K) indicate the maser nature of the lines. -masers in Cep A and G34.26+0.15. The new detections could The intensity of the (9,6) maser in G34.26+0.15 is decreasing -provide additional constraints on the maser line’s pumping withtime, whiletoward CepA themaser isstable basedon 20 -mechanism. As mentioned in Sect. 1, the pumping mechanism months of monitoring at Effelsberg. Linearly interpolating the -of the (9,6) maser is unclear (Madden et al. 1986; Brown & integratedintensitiesobtainedatEffelsbergasafunctionoftime, -Cragg1991).Previousstudieshavesuggestedthattherearethree theJVLAmeasurementsshowthatthereisnomissingfluxden- -main pumping scenarios to explain the observed NH maser sityonscalesontheorderof1.2arcsec(4×10−3and2×10−2pc) -3 -lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared ra- to the total single-dish flux. The JVLA-detected emission in- -diationfromthedustcontinuumemission,(2)lineoverlap,and dicates that the NH (9,6) maser in Cep A originates from a -3 -(3)collisionalpumping. sub-arcsecond-sized region slightly (0(cid:48).(cid:48)28 ± 0(cid:48).(cid:48)10) to the west -For the first mechanism, infrared photons near 10 µm are of the peak position of the 1.36cm continuum object, HW2. In -needed for vibrational excitation. The high dust temperature G34.26+0.15,threeNH (9,6)maserspotsareobserved:oneis -3 -(∼300 K) of W51-IRS2 can provide substantial infrared pho- closetotheheadofthecometaryUCHiiregionC,andtheother -tons near 10 µm, which is used for radiative pumping (Henkel twoareemittedfromacompactregiontothewestoftheHCHii +≤2.0kms−1)andtheirhighbrightnesstem- +peratures (> 400 K) indicate the maser nature of the lines. +The intensity of the (9,6) maser in G34.26+0.15 is decreasing +withtime, whiletoward CepA themaser isstable basedon 20 +months of monitoring at Effelsberg. Linearly interpolating the +integratedintensitiesobtainedatEffelsbergasafunctionoftime, +theJVLAmeasurementsshowthatthereisnomissingfluxden- +sityonscalesontheorderof1.2arcsec(4×10−3and2×10−2pc) +to the total single-dish flux. The JVLA-detected emission in- +dicates that the NH +3 +(9,6) maser in Cep A originates from a +sub-arcsecond-sized region slightly (0(cid:48)(cid:48) .28 ± 0(cid:48)(cid:48) .10) to the west +of the peak position of the 1.36cm continuum object, HW2. In +G34.26+0.15,threeNH +3 +(9,6)maserspotsareobserved:oneis +closetotheheadofthecometaryUCHiiregionC,andtheother +twoareemittedfromacompactregiontothewestoftheHCHii Articlenumber,page6of10 Y.T.Yan(闫耀庭) etal.:Discoveryofammonia(9,6)masersintwohigh-massstar-formingregions -regionA.Wesuggestthatthe(9,6)masersmaybeconnectedto Mauersberger,R.,Wilson,T.L.,&Henkel,C.1988,A&A,201,123 -outflowinggas.HigherangularresolutionJVLAandVLBIob- McEwen,B.C.,Pihlström,Y.M.,&Sjouwerman,L.O.2016,ApJ,826,189 -servations are planned to provide more accurate positions and McMullin,J.P.,Waters,B.,Schiebel,D.,Young,W.,&Golap,K.2007,inAs- -tronomicalSocietyofthePacificConferenceSeries,Vol.376,Astronomical +regionA.Wesuggestthatthe(9,6)masersmaybeconnectedto +outflowinggas.HigherangularresolutionJVLAandVLBIob- +servations are planned to provide more accurate positions and constraintsonpumpingscenarios. -DataAnalysisSoftwareandSystemsXVI,ed.R.A.Shaw,F.Hill,&D.J. -Acknowledgements. Wewouldliketothanktheanonymousrefereefortheuse- Bell,127 -fulcommentsthatimprovethemanuscript.Y.T.Y.isamemberoftheInterna- Mei,Y.,Chen,X.,Shen,Z.-Q.,&Li,B.2020,ApJ,898,157 -tionalMaxPlanckResearchSchool(IMPRS)forAstronomyandAstrophysics Menten,K.M.1991,ApJ,380,L75 -attheUniversitiesofBonnandCologne.Y.T.Y.wouldliketothanktheChina Mills,E.A.C.,Ginsburg,A.,Clements,A.R.,etal.2018,ApJ,869,L14 -ScholarshipCouncil(CSC)foritssupport.Wewouldliketothankthestaffat Mookerjea,B.,Casper,E.,Mundy,L.G.,&Looney,L.W.2007,ApJ,659,447 -theEffelsbergfortheirhelpprovidedduringtheobservations.Wethankthestaff Moscadelli,L.,Reid,M.J.,Menten,K.M.,etal.2009,ApJ,693,406 -oftheJVLA,especiallyTonyPerreaultandEdwardStarr,fortheirassistance Narayanan,G.&Walker,C.K.1996,ApJ,466,844 -with the observations and data reduction. This research has made use of the Ott,M.,Witzel,A.,Quirrenbach,A.,etal.1994,A&A,284,331 -NASA/IPACInfraredScienceArchive,whichisfundedbytheNationalAero- Patel,N.A.,Curiel,S.,Sridharan,T.K.,etal.2005,Nature,437,109 -nautics and Space Administration and operated by the California Institute of Perley,R.A.&Butler,B.J.2013,ApJS,204,19 -Technology. Pety,J.2005,inSF2A-2005:Semainedel’AstrophysiqueFrancaise,ed.F.Ca- -soli,T.Contini,J.M.Hameury,&L.Pagani,721 -Poynter,R.L.&Kakar,R.K.1975,ApJS,29,87 -Pratap,P.,Menten,K.M.,Reid,M.J.,Moran,J.M.,&Walmsley,C.M.1991, -References ApJ,373,L13 -Reid,M.J.&Ho,P.T.P.1985,ApJ,288,L17 -Bartkiewicz,A.,Szymczak,M.,Cohen,R.J.,&Richards,A.M.S.2005,MN- Rodriguez,L.F.,Ho,P.T.P.,&Moran,J.M.1980,ApJ,240,L149 -RAS,361,623 Sanna,A.,Moscadelli,L.,Surcis,G.,etal.2017,A&A,603,A94 -Bartkiewicz,A.,Szymczak,M.,&vanLangevelde,H.J.2016,A&A,587,A104 Sewilo,M.,Churchwell,E.,Kurtz,S.,Goss,W.M.,&Hofner,P.2004,ApJ, -Benjamin,R.A.,Churchwell,E.,Babler,B.L.,etal.2003,PASP,115,953 605,285 -Beuther,H.,Mottram,J.C.,Ahmadi,A.,etal.2018,A&A,617,A100 Sewiło,M.,Churchwell,E.,Kurtz,S.,Goss,W.M.,&Hofner,P.2011,ApJS, -Beuther,H.,Walsh,A.J.,Thorwirth,S.,etal.2007,A&A,466,989 194,44 -Brogan,C.L.,Hunter,T.R.,Cyganowski,C.J.,etal.2011,ApJ,739,L16 Sobolev,A.M.,Moran,J.M.,Gray,M.D.,etal.2018,ApJ,856,60 -Brown,A.T.,Little,L.T.,MacDonald,G.H.,Riley,P.W.,&Matheson,D.N. Sugiyama,K.,Fujisawa,K.,Doi,A.,etal.2008,PASJ,60,1001 -1981,MNRAS,195,607 Torrelles,J.M.,Gómez,J.F.,Garay,G.,etal.1998,ApJ,509,262 -Brown,R.D.&Cragg,D.M.1991,ApJ,378,445 Torrelles,J.M.,Gómez,J.F.,Garay,G.,etal.1999,MNRAS,307,58 -Carrasco-González,C.,Sanna,A.,Rodríguez-Kamenetzky,A.,etal.2021,ApJ, Torrelles,J.M.,Ho,P.T.P.,Rodriguez,L.F.,&Canto,J.1985,ApJ,288,595 -914,L1 Torrelles,J.M.,Ho,P.T.P.,Rodriguez,L.F.,&Canto,J.1986,ApJ,305,721 -Cesaroni,R.,Walmsley,C.M.,&Churchwell,E.1992,A&A,256,618 Torrelles,J.M.,Patel,N.A.,Curiel,S.,etal.2011,MNRAS,410,627 -Cheung,A.C.,Rank,D.M.,Townes,C.H.,Thornton,D.D.,&Welch,W.J. Torrelles,J.M.,Verdes-Montenegro,L.,Ho,P.T.P.,Rodriguez,L.F.,&Canto, -1968,Phys.Rev.Lett.,21,1701 J.1993,ApJ,410,202 -Churchwell,E.,Babler,B.L.,Meade,M.R.,etal.2009,PASP,121,213 Towner,A.P.M.,Brogan,C.L.,Hunter,T.R.,&Cyganowski,C.J.2021,ApJ, -Cohen,R.J.&Brebner,G.C.1985,MNRAS,216,51P 923,263 -Comito,C.,Schilke,P.,Endesfelder,U.,Jiménez-Serra,I.,&Martín-Pintado,J. Umemoto,T.,Mikami,H.,Yamamoto,S.,&Hirano,N.1999,ApJ,525,L105 -2007,A&A,469,207 Urquhart,J.S.,Morgan,L.K.,Figura,C.C.,etal.2011,MNRAS,418,1689 -Curiel,S.,Ho,P.T.P.,Patel,N.A.,etal.2006,ApJ,638,878 Walmsley,C.M.&Ungerechts,H.1983,A&A,122,164 -Danby, G., Flower, D. R., Valiron, P., Schilke, P., & Walmsley, C. M. 1988, Walsh,A.J.,Breen,S.L.,Britton,T.,etal.2011,MNRAS,416,1764 -MNRAS,235,229 Walsh,A.J.,Longmore,S.N.,Thorwirth,S.,Urquhart,J.S.,&Purcell,C.R. -DeBuizer,J.M.,Liu,M.,Tan,J.C.,etal.2017,ApJ,843,33 2007,MNRAS,382,L35 -DeBuizer,J.M.,Radomski,J.T.,Telesco,C.M.,&Piña,R.K.2003,ApJ,598, Wang,K.,Zhang,Q.,Wu,Y.,Li,H.-b.,&Zhang,H.2012,ApJ,745,L30 -1127 Wilson,T.L.,Batrla,W.,&Pauls,T.A.1982,A&A,110,L20 -Dzib,S.,Loinard,L.,Rodríguez,L.F.,Mioduszewski,A.J.,&Torres,R.M. Wilson,T.L.&Henkel,C.1988,A&A,206,L26 -2011,ApJ,733,71 Wilson,T.L.,Johnston,K.J.,&Henkel,C.1990,A&A,229,L1 +Acknowledgements. Wewouldliketothanktheanonymousrefereefortheuse- +fulcommentsthatimprovethemanuscript.Y.T.Y.isamemberoftheInterna- +tionalMaxPlanckResearchSchool(IMPRS)forAstronomyandAstrophysics +attheUniversitiesofBonnandCologne.Y.T.Y.wouldliketothanktheChina +ScholarshipCouncil(CSC)foritssupport.Wewouldliketothankthestaffat +theEffelsbergfortheirhelpprovidedduringtheobservations.Wethankthestaff +oftheJVLA,especiallyTonyPerreaultandEdwardStarr,fortheirassistance +with the observations and data reduction. This research has made use of the +NASA/IPACInfraredScienceArchive,whichisfundedbytheNationalAero- +nautics and Space Administration and operated by the California Institute of +Technology. +References +Bartkiewicz,A.,Szymczak,M.,Cohen,R.J.,&Richards,A.M.S.2005,MN- +RAS,361,623 +Bartkiewicz,A.,Szymczak,M.,&vanLangevelde,H.J.2016,A&A,587,A104 +Benjamin,R.A.,Churchwell,E.,Babler,B.L.,etal.2003,PASP,115,953 +Beuther,H.,Mottram,J.C.,Ahmadi,A.,etal.2018,A&A,617,A100 +Beuther,H.,Walsh,A.J.,Thorwirth,S.,etal.2007,A&A,466,989 +Brogan,C.L.,Hunter,T.R.,Cyganowski,C.J.,etal.2011,ApJ,739,L16 +Brown,A.T.,Little,L.T.,MacDonald,G.H.,Riley,P.W.,&Matheson,D.N. +1981,MNRAS,195,607 +Brown,R.D.&Cragg,D.M.1991,ApJ,378,445 +Carrasco-González,C.,Sanna,A.,Rodríguez-Kamenetzky,A.,etal.2021,ApJ, +914,L1 +Cesaroni,R.,Walmsley,C.M.,&Churchwell,E.1992,A&A,256,618 +Cheung,A.C.,Rank,D.M.,Townes,C.H.,Thornton,D.D.,&Welch,W.J. +1968,Phys.Rev.Lett.,21,1701 +Churchwell,E.,Babler,B.L.,Meade,M.R.,etal.2009,PASP,121,213 +Cohen,R.J.&Brebner,G.C.1985,MNRAS,216,51P +Comito,C.,Schilke,P.,Endesfelder,U.,Jiménez-Serra,I.,&Martín-Pintado,J. +2007,A&A,469,207 +Curiel,S.,Ho,P.T.P.,Patel,N.A.,etal.2006,ApJ,638,878 +Danby, G., Flower, D. R., Valiron, P., Schilke, P., & Walmsley, C. M. 1988, +MNRAS,235,229 +DeBuizer,J.M.,Liu,M.,Tan,J.C.,etal.2017,ApJ,843,33 +DeBuizer,J.M.,Radomski,J.T.,Telesco,C.M.,&Piña,R.K.2003,ApJ,598, +1127 +Dzib,S.,Loinard,L.,Rodríguez,L.F.,Mioduszewski,A.J.,&Torres,R.M. +2011,ApJ,733,71 Flower,D.R.,Offer,A.,&Schilke,P.1990,MNRAS,244,4P -Wilson,T.L.&Schilke,P.1993,inLectureNotesinPhysics,Astrophysical -Galván-Madrid,R.,Keto,E.,Zhang,Q.,etal.2009,ApJ,706,1036 Masers,ed.A.W.Clegg&G.E.Nedoluha,Vol.412,123–126 -Garay,G.,Ramirez,S.,Rodriguez,L.F.,Curiel,S.,&Torrelles,J.M.1996,ApJ, Wyrowski,F.,Güsten,R.,Menten,K.M.,Wiesemeyer,H.,&Klein,B.2012, -459,193 A&A,542,L15 -Garay,G.,Rodriguez,L.F.,&vanGorkom,J.H.1986,ApJ,309,553 Zhang,Q.&Ho,P.T.P.1995,ApJ,450,L63 -Gaume,R.A.,Wilson,T.L.,&Johnston,K.J.1996,ApJ,457,L47 Zhang,Q.,Hunter,T.R.,Sridharan,T.K.,&Cesaroni,R.1999,ApJ,527,L117 -Goddi,C.,Greenhill,L.J.,Humphreys,E.M.L.,Chandler,C.J.,&Matthews, Zheng,X.W.,Moran,J.M.,&Reid,M.J.2000,MNRAS,317,192 +Galván-Madrid,R.,Keto,E.,Zhang,Q.,etal.2009,ApJ,706,1036 +Garay,G.,Ramirez,S.,Rodriguez,L.F.,Curiel,S.,&Torrelles,J.M.1996,ApJ, +459,193 +Garay,G.,Rodriguez,L.F.,&vanGorkom,J.H.1986,ApJ,309,553 +Gaume,R.A.,Wilson,T.L.,&Johnston,K.J.1996,ApJ,457,L47 +Goddi,C.,Greenhill,L.J.,Humphreys,E.M.L.,Chandler,C.J.,&Matthews, L.D.2011,ApJ,739,L13 Goddi,C.,Henkel,C.,Zhang,Q.,Zapata,L.,&Wilson,T.L.2015,A&A,573, A109 @@ -557,20 +818,79 @@ Madden,S.C.,Irvine,W.M.,Matthews,H.E.,Brown,R.D.,&Godfrey,P.D. Mangum,J.G.&Wootten,A.1994,ApJ,428,L33 Mauersberger,R.,Henkel,C.,&Wilson,T.L.1987,A&A,173,352 Mauersberger,R.,Wilson,T.L.,&Henkel,C.1986,A&A,160,L13 +Mauersberger,R.,Wilson,T.L.,&Henkel,C.1988,A&A,201,123 +McEwen,B.C.,Pihlström,Y.M.,&Sjouwerman,L.O.2016,ApJ,826,189 +McMullin,J.P.,Waters,B.,Schiebel,D.,Young,W.,&Golap,K.2007,inAs- +tronomicalSocietyofthePacificConferenceSeries,Vol.376,Astronomical +DataAnalysisSoftwareandSystemsXVI,ed.R.A.Shaw,F.Hill,&D.J. +Bell,127 +Mei,Y.,Chen,X.,Shen,Z.-Q.,&Li,B.2020,ApJ,898,157 +Menten,K.M.1991,ApJ,380,L75 +Mills,E.A.C.,Ginsburg,A.,Clements,A.R.,etal.2018,ApJ,869,L14 +Mookerjea,B.,Casper,E.,Mundy,L.G.,&Looney,L.W.2007,ApJ,659,447 +Moscadelli,L.,Reid,M.J.,Menten,K.M.,etal.2009,ApJ,693,406 +Narayanan,G.&Walker,C.K.1996,ApJ,466,844 +Ott,M.,Witzel,A.,Quirrenbach,A.,etal.1994,A&A,284,331 +Patel,N.A.,Curiel,S.,Sridharan,T.K.,etal.2005,Nature,437,109 +Perley,R.A.&Butler,B.J.2013,ApJS,204,19 +Pety,J.2005,inSF2A-2005:Semainedel’AstrophysiqueFrancaise,ed.F.Ca- +soli,T.Contini,J.M.Hameury,&L.Pagani,721 +Poynter,R.L.&Kakar,R.K.1975,ApJS,29,87 +Pratap,P.,Menten,K.M.,Reid,M.J.,Moran,J.M.,&Walmsley,C.M.1991, +ApJ,373,L13 +Reid,M.J.&Ho,P.T.P.1985,ApJ,288,L17 +Rodriguez,L.F.,Ho,P.T.P.,&Moran,J.M.1980,ApJ,240,L149 +Sanna,A.,Moscadelli,L.,Surcis,G.,etal.2017,A&A,603,A94 +Sewilo,M.,Churchwell,E.,Kurtz,S.,Goss,W.M.,&Hofner,P.2004,ApJ, +605,285 +Sewiło,M.,Churchwell,E.,Kurtz,S.,Goss,W.M.,&Hofner,P.2011,ApJS, +194,44 +Sobolev,A.M.,Moran,J.M.,Gray,M.D.,etal.2018,ApJ,856,60 +Sugiyama,K.,Fujisawa,K.,Doi,A.,etal.2008,PASJ,60,1001 +Torrelles,J.M.,Gómez,J.F.,Garay,G.,etal.1998,ApJ,509,262 +Torrelles,J.M.,Gómez,J.F.,Garay,G.,etal.1999,MNRAS,307,58 +Torrelles,J.M.,Ho,P.T.P.,Rodriguez,L.F.,&Canto,J.1985,ApJ,288,595 +Torrelles,J.M.,Ho,P.T.P.,Rodriguez,L.F.,&Canto,J.1986,ApJ,305,721 +Torrelles,J.M.,Patel,N.A.,Curiel,S.,etal.2011,MNRAS,410,627 +Torrelles,J.M.,Verdes-Montenegro,L.,Ho,P.T.P.,Rodriguez,L.F.,&Canto, +J.1993,ApJ,410,202 +Towner,A.P.M.,Brogan,C.L.,Hunter,T.R.,&Cyganowski,C.J.2021,ApJ, +923,263 +Umemoto,T.,Mikami,H.,Yamamoto,S.,&Hirano,N.1999,ApJ,525,L105 +Urquhart,J.S.,Morgan,L.K.,Figura,C.C.,etal.2011,MNRAS,418,1689 +Walmsley,C.M.&Ungerechts,H.1983,A&A,122,164 +Walsh,A.J.,Breen,S.L.,Britton,T.,etal.2011,MNRAS,416,1764 +Walsh,A.J.,Longmore,S.N.,Thorwirth,S.,Urquhart,J.S.,&Purcell,C.R. +2007,MNRAS,382,L35 +Wang,K.,Zhang,Q.,Wu,Y.,Li,H.-b.,&Zhang,H.2012,ApJ,745,L30 +Wilson,T.L.,Batrla,W.,&Pauls,T.A.1982,A&A,110,L20 +Wilson,T.L.&Henkel,C.1988,A&A,206,L26 +Wilson,T.L.,Johnston,K.J.,&Henkel,C.1990,A&A,229,L1 +Wilson,T.L.&Schilke,P.1993,inLectureNotesinPhysics,Astrophysical +Masers,ed.A.W.Clegg&G.E.Nedoluha,Vol.412,123–126 +Wyrowski,F.,Güsten,R.,Menten,K.M.,Wiesemeyer,H.,&Klein,B.2012, +A&A,542,L15 +Zhang,Q.&Ho,P.T.P.1995,ApJ,450,L63 +Zhang,Q.,Hunter,T.R.,Sridharan,T.K.,&Cesaroni,R.1999,ApJ,527,L117 +Zheng,X.W.,Moran,J.M.,&Reid,M.J.2000,MNRAS,317,192 Articlenumber,page7of10 A&Aproofs:manuscriptno.mainArxiv AppendixA: -TableA.1.SummaryofNH (9,6)maserobservations. -3 +TableA.1.SummaryofNH 3(9,6)maserobservations. +Source Telescope Beam Epoch Channel S +ν +rms (cid:82) -Source Telescope Beam Epoch Channel S rms S dv V ∆V -ν ν LSR 1/2 +S νdv V +LSR +∆V +1/2 size spacing (kms−1) (Jy) (mJy) (Jykms−1) (kms−1) CepA Effelsberg 49(cid:48)(cid:48) 2020,Jan.04 0.62 0.67 3.41 1.19±0.02 -1.11±0.02 1.67±0.04 Effelsberg 49(cid:48)(cid:48) 2021,Feb.11 0.62 0.59 5.97 1.08±0.02 -0.74±0.02 1.70±0.04 Effelsberg 49(cid:48)(cid:48) 2021,Feb.15 0.62 0.65 10.98 1.11±0.03 -0.75±0.02 1.60±0.05 -JVLAa 1(cid:48).(cid:48)47 × 0(cid:48).(cid:48)99 2021,Jul.13 0.13 1.13 144 0.89±0.09 -0.86±0.03 0.74±0.12 +JVLAa 1(cid:48)(cid:48) .47 × 0(cid:48)(cid:48) .99 2021,Jul.13 0.13 1.13 144 0.89±0.09 -0.86±0.03 0.74±0.12 Effelsberg 49(cid:48)(cid:48) 2021,Aug.11 0.07 0.98 13.36 0.49±0.02 -0.90±0.01 0.47±0.01 0.35 0.26±0.02 -0.28±0.02 0.69±0.05 Effelsberg 49(cid:48)(cid:48) 2021,Aug.12 0.07 0.98 13.35 0.50±0.01 -0.89±0.07 0.48±0.07 @@ -581,7 +901,7 @@ Effelsberg 49(cid:48)(cid:48) 2021,Aug.12 0.07 0.98 13.35 0.50±0.01 -0.89±0.07 G34.26+0.15 Effelsberg 49(cid:48)(cid:48) 2020,Jan.03 0.62 0.30 1.26 0.65±0.03 62.50±0.05 2.05±0.13 Effelsberg 49(cid:48)(cid:48) 2021,Feb.11 0.62 0.24 2.42 0.40±0.02 55.76±0.04 1.60±0.12 Effelsberg 49(cid:48)(cid:48) 2021,Feb.15 0.62 0.20 4.86 0.38±0.02 55.71±0.05 1.80±0.14 -JVLAb 1(cid:48).(cid:48)33 × 1(cid:48).(cid:48)06 2021,Jul.13 0.13 0.23 37.1 0.09±0.02 54.41±0.03 0.38±0.09 +JVLAb 1(cid:48)(cid:48) .33 × 1(cid:48)(cid:48) .06 2021,Jul.13 0.13 0.23 37.1 0.09±0.02 54.41±0.03 0.38±0.09 0.22 0.22±0.02 55.82±0.05 0.95±0.12 0.15 0.06±0.01 57.21±0.04 0.35±0.08 Effelsberg 49(cid:48)(cid:48) 2021,Aug.11 0.07 0.08 13.92 0.06±0.007 54.10±0.05 0.68±0.12 @@ -589,10 +909,8 @@ Effelsberg 49(cid:48)(cid:48) 2021,Aug.11 0.07 0.08 13.92 0.06±0.007 54.10±0.0 0.12 0.10±0.006 55.85±0.02 0.75±0.06 Effelsberg 49(cid:48)(cid:48) 2021,Aug.12 0.07 0.16 27.40 0.09±0.008 55.83±0.02 0.56±0.05 Notes.ThespectralparametersareobtainedfromGaussianfitting.(a)TheJVLAspectrumtowardCepAisextractedfromtheEffelsberg-beam- -sizedregion(FWHM49(cid:48)(cid:48)).(b)ForG34.26+0.15,theJVLAbeamsamplestheNH (9,6)spectrumoveraregionofradius3(cid:48).(cid:48)5,whichcontainsall -3 -detectedNH (9,6)emissions. -3 +sizedregion(FWHM49(cid:48)(cid:48)).(b)ForG34.26+0.15,theJVLAbeamsamplestheNH 3(9,6)spectrumoveraregionofradius3(cid:48)(cid:48) .5,whichcontainsall +detectedNH 3(9,6)emissions. TableA.2.1.36cmJVLAfluxdensitiesofindividualcontinuumsources. Source R.A. Dec. Size P.A. S ν @@ -608,10 +926,15 @@ B 185318.649±0.005 +011500.071±0.180 (2.31±0.49)×(0.85±0.21) 17.4 597±110 C 185318.560±0.004 +011458.201±0.112 (2.03±0.30)×(1.34±0.20) 178.0 5070±660 Articlenumber,page8of10 Y.T.Yan(闫耀庭) etal.:Discoveryofammonia(9,6)masersintwohigh-massstar-formingregions -TableA.3.NH (9,6)maserpositionsderivedfromtheJVLAobservations. -3 -Source R.A. Dec. S T V ∆V -ν MB LSR 1/2 +TableA.3.NH 3(9,6)maserpositionsderivedfromtheJVLAobservations. +Source R.A. Dec. S +ν +T +MB +V +LSR +∆V +1/2 (h m s) (◦ (cid:48) (cid:48)(cid:48)) (mJybeam−1) (K) (kms−1) CepA M 225617.933±0.002 +620149.608±0.011 985.2 2464.8 -0.88±0.01 0.51±0.02 343.2 829.5 -0.24±0.03 0.63±0.05 @@ -621,28 +944,26 @@ M2 185318.696±0.002 +011455.807±0.034 48.4 122.4 53.77±0.05 0.35±0.08 180.8 457.6 55.83±0.01 0.59±0.03 M3 185318.667±0.005 +011455.348±0.066 78.1 197.2 54.22±0.04 0.94±0.08 73.7 186.3 55.78±0.04 0.79±0.08 -Fig.A.1.CepheusA.Thegreyshadedareasmarkthe1.36cmJVLAcontinuummapofCepA.Thereferencepositionisα =22h56m17s.972, -J2000 -andδ =62◦01(cid:48)49(cid:48).(cid:48)587,thepeakpositionofthecontinuummap,ismarkedbyaredcross.Slightlytothewestofthecrossisthewhiteellipse -J2000 -denotingthepositionoftheNH (9,6)emissionwithapurplestaratitscenter.TheredcontoursshowtheNOrthernExtendedMillimeterArray +Fig.A.1.CepheusA.Thegreyshadedareasmarkthe1.36cmJVLAcontinuummapofCepA.Thereferencepositionisα J2000=22h56m17s.972, +andδ J2000=62◦01(cid:48)49(cid:48)(cid:48) .587,thepeakpositionofthecontinuummap,ismarkedbyaredcross.Slightlytothewestofthecrossisthewhiteellipse +denotingthepositionoftheNH 3 +(9,6)emissionwithapurplestaratitscenter.TheredcontoursshowtheNOrthernExtendedMillimeterArray (NOEMA)1.37mmcontinuum,takenfromBeutheretal.(2018).Contourlevelsare-5,5,10,20,40,80,100,150,and200×2.43mJybeam−1. -OH(Bartkiewiczetal.2005),H O(Sobolevetal.2018),andCH OH(Sannaetal.2017)masersarepresentedasdiamonds,circles,andsquares, -2 3 -respectively.Thecolorbarontheright-handsideindicatesthevelocityrange(V )ofmaserspots. -LSR +OH(Bartkiewiczetal.2005),H 2O(Sobolevetal.2018),andCH 3OH(Sannaetal.2017)masersarepresentedasdiamonds,circles,andsquares, +respectively.Thecolorbarontheright-handsideindicatesthevelocityrange(V LSR)ofmaserspots. Articlenumber,page9of10 A&Aproofs:manuscriptno.mainArxiv -Fig.A.2.1.36cmJVLAcontinuummapofG34.26+0.15presentedasgrayshadedareas.Thereferencepositionisα =18h53m18s.560,and +Fig.A.2.1.36cmJVLAcontinuummapofG34.26+0.15presentedasgrayshadedareas.Thereferencepositionisα J2000 -δ =01◦14(cid:48)58(cid:48).(cid:48)201,thepeakposition,ismarkedbyaredcross.TheredellipsesshowthepositionsofNH (9,6)emissionwithstarsattheir -J2000 3 -center(i.e.,M1,M2,andM3).ThebluecontoursshowtheBerkeley-Illinois-MarylandAssociation(BIMA)array2.8mmcontinuum,takenfrom -Mookerjeaetal.(2007).Contourlevelsare-3,3,10,20,30,40,50,70,90,100,120,and140×20mJybeam−1.OH(Zhengetal.2000),H O(Imai -2 -etal.2011),andCH OH(Bartkiewiczetal.2016)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarindicates +=18h53m18s.560,and +δ +J2000 +=01◦14(cid:48)58(cid:48)(cid:48) .201,thepeakposition,ismarkedbyaredcross.TheredellipsesshowthepositionsofNH 3 -thevelocityrange(V )ofmaserspots. -LSR +(9,6)emissionwithstarsattheir +center(i.e.,M1,M2,andM3).ThebluecontoursshowtheBerkeley-Illinois-MarylandAssociation(BIMA)array2.8mmcontinuum,takenfrom +Mookerjeaetal.(2007).Contourlevelsare-3,3,10,20,30,40,50,70,90,100,120,and140×20mJybeam−1.OH(Zhengetal.2000),H 2O(Imai +etal.2011),andCH 3OH(Bartkiewiczetal.2016)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarindicates +thevelocityrange(V LSR)ofmaserspots. Articlenumber,page10of10 diff --git a/read/results/pdfplumber/2201.00022.txt b/read/results/pdfplumber/2201.00022.txt index a77c95c..2d9662e 100644 --- a/read/results/pdfplumber/2201.00022.txt +++ b/read/results/pdfplumber/2201.00022.txt @@ -1,246 +1,358 @@ -Draft version January 4, 2022 +Draft version July 7, 2022 TypesetusingLATEXtwocolumnstyleinAASTeX631 The Formation of Intermediate Mass Black Holes in Galactic Nuclei Sanaea C. Rose,1,2 Smadar Naoz,1,2 Re’em Sari,3 and Itai Linial3 1Department of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA 2Mani L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA 3Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel -1202 ABSTRACT Most stellar evolution models predict that black holes (BHs) should not exist above approximately -50−70M . However,recentLIGO/VirgodetectionsindicatetheexistenceofBHswithmassesatand -(cid:12) ceD -abovethisthreshold. WesuggestthatmassiveBHs,includingintermediatemassblackholes(IMBHs), -can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding -main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relax- -13 -ation, we find that this channel can be quite efficient, forming IMBHs as massive as 104 M . Our +50−70 M (cid:12), the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections +indicate the existence of BHs with masses at and above this threshold. We suggest that massive +BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions +between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical +processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite +efficient, forming IMBHs as massive as 104 M (cid:12). This upper limit assumes that (1) the BHs accrete a +substantial fraction of the stellar mass captured during each collision and (2) that the rate at which +new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar +disruptionsandstar-starcollisions. Wediscussdeviationsfromthesekeyassumptionsinthetext. Our +results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic +centers. This formation channel has implications for observations. Collisions between stars and BHs +can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. +Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge +with the supermassive black hole at the center of a galactic nucleus through gravitational waves. +Thesegravitationalwaveeventsareextremeandintermediatemassratioinspirals(EMRIsandIMRIs, +respectively). +1. INTRODUCTION +The recently detected gravitational wave source +GW190521 (The LIGO Scientific Collaboration et al. +2020a,b) produced an intermediate mass black hole of +approximately142M (cid:12). Thiseventmayhavealsohada +85M (cid:12) -results suggest that massive black holes and IMBHs may be ubiquitous in galactic centres. This for- -mation channel also has implications for observations. Collisions between stars and BHs can produce ]AG.hp-ortsa[ -electromagnetic signatures, for example, fromx-raybinaries and tidaldisruption events. Additionally, -formed through this channel, both black holes in the mass gap and IMBHs can merge with the super- -massiveblackholeatthecenterofagalacticnucleusthroughgravitationalwaves. Thesegravitational -wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively). -1. INTRODUCTION tic nuclei as well. Several studies propose that our +progenitor,whichfallswithinthepair-instability +mass gap that limits stellar black holes (BHs) to no +more than +∼ +< 50 M +(cid:12) +(e.g., Heger et al. 2003; Woosley +2017)1. Similarly, the merger products of GW150914, +GW170104, and GW170814 fall within the mass gap +(e.g., Abbott et al. 2016, 2017a,b). BH mergers that +form second generation BHs and, in some cases, inter- +mediate mass BHs (IMBHs), these gravitational wave +(GW) events can occur in globular clusters, young stel- +Correspondingauthor: SanaeaC.Rose +srose@astro.ucla.edu +1Note that the exact lower and upper limits may be sensitive to +metallicityoftheprogenitor(e.g.,Woosley2017;Spera&Mapelli +2017a;Limongi&Chieffi2018a;Saksteinetal.2020;Belczynski +etal.2020a;Renzoetal.2020;Vinketal.2021). +lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro- +driguez et al. 2019; Fishbach et al. 2020; Mapelli et al. +2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. +2021; Arca Sedda et al. 2021). However, IMBHs are +not limited to these locations and may reside in galac- +tic nuclei as well. Several studies propose that our own galactic center may host an IMBH in the inner pc -The recently detected gravitational wave source (e.g.,Hansen&Milosavljevi´c2003;Maillardetal.2004; -GW190521 (The LIGO Scientific Collaboration et al. -Gu¨rkan&Rasio2005;Gualandris&Merritt2009;Chen -2020a,b) produced an intermediate mass black hole of 1v22000.1022:viXra +G¨ urkan&Rasio2005;Gualandris&Merritt2009;Chen &Liu2013;Generozov&Madigan2020;Fragioneetal. -approximately142M . Thiseventmayhavealsohada -(cid:12) 2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY -85M progenitor,whichfallswithinthepair-instability -(cid:12) Collaboration et al. 2020). -mass gap that limits stellar black holes (BHs) to no -more than < 50 M (e.g., Heger et al. 2003; Woosley SeveralIMBHformationchannelshavebeensuggested -∼ (cid:12) -2017)1. Similarly, the merger products of GW150914, in the literature. For example, IMBHs may have a cos- +SeveralIMBHformationchannelshavebeensuggested +in the literature. For example, IMBHs may have a cos- mological origin, forming in the early universe either -GW170104, and GW170814 fall within the mass gap as a result of the very first stars (e.g., Madau & Rees -(e.g., Abbott et al. 2016, 2017a,b). BH mergers that 2001; Schneider et al. 2002; Johnson & Bromm 2007; -form second generation BHs and, in some cases, inter- Valiante et al. 2016) or from direct collapse of accumu- -mediate mass BHs (IMBHs), these gravitational wave lated gas (e.g., Begelman et al. 2006; Yue et al. 2014; -(GW) events can occur in globular clusters, young stel- Ferrara et al. 2014; Choi et al. 2015; Shlosman et al. -lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro- 2016). These high redshift IMBHs would need to sur- -driguez et al. 2019; Fishbach et al. 2020; Mapelli et al. vive galaxy evolution and mergers to present day (e.g., -2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. +a +r +X +i +v +: +2 +2 +0 +1 +. +0 +0 +0 +2 +2 +v +2 +[ +a +s +t +r +o +- +p +h +. +G +A +] +6 +J +u +l +2 +0 +2 +2 +2 Rose et al. Rashkov&Madau2014),withsignificanteffectsontheir -2021; Arca Sedda et al. 2021). However, IMBHs are stellarandevendarkmattersurroundings(e.g.,Bertone -not limited to these locations and may reside in galac- etal.2009;Chen&Liu2013;Bringmannetal.2012;Eda etal.2013;Naoz&Silk2014;Naozetal.2019). Another -Correspondingauthor: SanaeaC.Rose popular formation channel relies on the coalescence of -srose@astro.ucla.edu many stellar-mass black holes. For example, IMBHs -1Note that the exact lower and upper limits may be sensitive to may form in the centers of globular clusters, where few- -metallicityoftheprogenitor(e.g.,Woosley2017;Spera&Mapelli bodyinteractionsleadtothemergerofstellar-massBHs -2017a;Limongi&Chieffi2018a;Saksteinetal.2020;Belczynski (e.g., O’Leary et al. 2006; Gu¨rkan et al. 2006; Blecha -etal.2020a;Renzoetal.2020;Vinketal.2021). +popular formation channel relies on the coalescence of +many stellar-mass black holes, which may seed objects +asmassiveasSMBHs(e.g.,Kroupaetal.2020). IMBHs +may form in the centers of globular clusters, where few- +bodyinteractionsleadtothemergerofstellar-massBHs +(e.g., O’Leary et al. 2006; G¨ urkan et al. 2006; Blecha et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro- -2 Rose et al. -driguezetal.2018;Rodriguezetal.2019;Fragioneetal. a statistical approach to estimate the stellar encounters -2020b). Other formation mechanisms invoke successive and final IMBH masses. -collisions and mergers of massive stars (e.g., Portegies -Zwart & McMillan 2002; Portegies Zwart et al. 2004; 2.1. Physical Picture -Freitag et al. 2006; Kremer et al. 2020; Gonz´alez et al. -WeconsiderapopulationofBHswithintheinnerfew -2021; Di Carlo et al. 2021). -parsecsoftheSMBHinagalacticnucleus(GN).Weas- +driguezetal.2018;Rodriguezetal.2019;Fragioneetal. +2020b). Other formation mechanisms invoke successive +collisions and mergers of massive stars (e.g., Ebisuzaki +etal.2001;PortegiesZwart&McMillan2002;Portegies +Zwartetal.2004;Freitagetal.2006;Sakuraietal.2017; +Kremer et al. 2020; Gonz´alez et al. 2021; Di Carlo et al. +2021; Das et al. 2021a,b; Escala 2021). The main obstacle to sequential BH mergers in clus- -sume that the BH mass distribution follows that of the ters is that the merger recoil velocity kick often exceeds -stars from which they originate, a Kroupa initial mass -the escape velocity from the cluster (e.g., Schnittman functiondN/dm∝m−2.35. Whilethischoicerepresents +the escape velocity from the cluster (e.g., Schnittman & Buonanno 2007; Centrella et al. 2010; O’Leary et al. -a gross oversimplification, it has very little bearing on 2006; Baibhav et al. 2020, Rom & Sari, in prep.). How- -our final results. Future work may address the particu- ever, nuclear star clusters at the centers of galaxies do -lars of the BH mass distribution, but we do not expect notencounterthisproblem. Forexample,Fragioneetal. -that it will significantly alter the outcome. The upper (2021) explore repeated BH-BH mergers in nuclear star -and lower limits of the BH mass distribution are 5 and clusters without a SMBH. They considered BH binary- -50M , respectively. We select the upper limit to en- -(cid:12) single interactions, binary BH GW merger, and GW -compass the range of upper bounds predicted by stellar mergerrecoilkicks. Thepost-kickmergerproductsinks +back towards the cluster center over a dynamical fric- +tion timescale. Using this approach, they showed that +103−104 M +(cid:12) +IMBHs can form efficiently over the life- +time of a cluster. +However, as discussed in Section 2.2, direct BH-star +collisions are much more frequent than BH-BH collision +in galactic nuclei, making the former a promising chan- +nel for BH growth. In an N-body study of young star +clusters, Rizzuto et al. (2022) find that BH-star colli- +sions are a main contributor to the formation of BHs +in the mass gap and IMBHs. In a similar vein, Stone +et al. (2017) demonstrate that massive BHs can form +from repeated tidal encounters between stars and BHs. +Moregenerally, several studieshaveexplored therole of +collisions in a GN, with implications for the stellar and +red giant populations (e.g., Dale & Davies 2006; Dale +et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti +etal.2021). WeproposethatIMBHscanformnaturally +within the central pc of a galactic center through re- +peatedcollisionsbetweenBHsandmain sequence stars. +During a collision, the BH can accrete some portion of +the star’s mass. Over many collisions, it can grow ap- +preciablyinsize. Wedemonstratethatthischannelcan +createIMBHswithmassesaslargeas104 M (cid:12),anupper +limitthatdependsonthedensityprofileofthesurround- +ing stars and the efficiency of the accretion. +The paper is structured as follows: we describe rele- +vant physical processes and our approach in Section 2. +In particular, we provide an overview of collisions in +Section 2.2 and present our statistical approach in Sec- +tion 2.3. Section 2.4 discusses our treatment of the +mass growth with each collision and presents analytic +solutions to our equations in two different regimes, ef- +ficient collisions and inefficient collisions We compare +these solutions to our statistical results. Sections 2.6 +and 2.8 discuss implications for GW merger events be- +tween IMBHs and the SMBH. We then incorporate re- +laxation processes and discuss the subsequent results in +Section2.9. Finally,wediscussandsummarizeourfind- +ings in Section 3. +2. METHODOLOGY +We consider a population of stellar mass BHs embed- +ded in a cluster of 1 M +(cid:12) +stars. When stars and BHs +collide, the BHs can accrete mass. The growth rate de- +pends on the physical processes outlined below. We use +a statistical approach to estimate the stellar encounters +and final IMBH masses. +2.1. Physical Picture +WeconsiderapopulationofBHswithintheinnerfew +parsecsoftheSMBHinagalacticnucleus(GN).Weas- +sume that the BH mass distribution follows that of the +stars from which they originate, a Kroupa initial mass +functiondN/dm∝m−2.35. Whilethischoicerepresents +a gross oversimplification, it has very little bearing on +our final results. Future work may address the particu- +lars of the BH mass distribution, but we do not expect +that it will significantly alter the outcome. The upper +and lower limits of the BH mass distribution are 5 and +50M (cid:12), respectively. We select the upper limit to en- +compass the range of upper bounds predicted by stellar evolution models, which vary between 40 and 125M (cid:12) -back towards the cluster center over a dynamical fric- dependingonthemetallicity(Hegeretal.2003;Woosley -tion timescale. Using this approach, they showed that 2017; Spera & Mapelli 2017b; Limongi & Chieffi 2018b; -103−104 M IMBHs can form efficiently over the life- -(cid:12) Belczynski et al. 2020b; Renzo et al. 2020). We assume -time of a cluster. +Belczynski et al. 2020b; Renzo et al. 2020). We assume that the orbits of the BHs follow a thermal eccentricity -However, as discussed in Section 2.2, direct star-BH -distribution. We draw their semimajor axes, a , from a -• -collisions are much more frequent than BH-BH collision +distribution. We draw their semimajor axes, a •, from a uniform distribution in log distance, dN/d(logr) being -in galactic nuclei, making the former a promising chan- constant. While this distribution is not necessarily rep- -nel for BH growth. We propose that IMBHs can form resentative of actual conditions in the GN, we use it to -naturally within the central pc of a SMBH in a galactic build a comprehensive physical picture of BH growth at -center. Specifically,theseIMBHsformthroughrepeated all distances from the SMBH, including within 0.01 pc. -collisions with main sequence stars, accreting some or Otherwise, the innermost region of the GN would be -all of the star’s mass depending on the details of the poorly represented in our sample. We consider other -collision. We demonstrate that this channel can create -observationally motivated distributions in Section 2.8, -IMBHs with masses as large as 104 M , depending on -(cid:12) butreserveamoredetailedexaminationofthedistribu- -the density profile of the surrounding stars. +IMBH Formation in Galactic Nuclei 3 +Figure 1. We plot the relevant timescales, including col- +lision (green), relaxation (gold), and BH-BH GW capture +(purple),forasingleBHintheGNasafunctionofdistance +from the SMBH. For the collision timescale, we assume the +BH is on a circular orbit. The timescales depend on the +density, so we adopt a range of density profiles, bounded by +α=1(dashedcurve)toα=2(dark,solidcurve). Thedark +bluelinerepresentsthetimefora105 M (cid:12) BHtomergewith +the SMBH through GW emission. +observationally motivated distributions in Section 2.9, +butreserveamoredetailedexaminationofthedistribu- tion’s impact for future work. -The paper is structured as follows: we describe rele- -vant physical processes and our approach in Section 2. 2.2. Direct Collisions -In particular, we provide an overview of collisions in BHsintheGNcanundergodirectcollisionswithother -Section 2.2 and present our statistical approach in Sec- -objects. The timescale for this process, t , can be es- -tion 2.3. Section 2.4 discusses our treatment of the coll -timated using a simple rate calculation: t−1 = nσA, -mass growth with each collision and presents analytic coll +objects. The timescale for this process, t coll, can be es- +timated using a simple rate calculation: t−1 +coll += nσA, where n is the number density of objects, σ is the ve- -solutions to our equations in two different regimes, ef- locitydispersion, andAisthecross-section. Weusethe -ficient collisions and inefficient collisions We compare collision timescale from Rose et al. (2020): -these solutions to our statistical results. Sections 2.5 -and 2.7 discuss implications for GW merger events be- -t−1 =πn(a )σ(a ) -tween IMBHs and the SMBH. We then incorporate re- coll • • -(cid:18) (cid:19) -2G(m +m ) -laxation processes and discuss the subsequent results in × f (e )r2+f (e )r BH (cid:63) . (1) -Section2.8. Finally,wediscussandsummarizeourfind- 1 • c 2 • c σ(a •)2 -ings in Section 3. -where G is the gravitational constant and r is the sum +t−1 coll=πn(a •)σ(a •) +×(cid:18) +f 1(e •)r2 c ++f 2(e •)r +c2G(m +BH ++m (cid:63)) +σ(a •)2 +(cid:19) +. (1) +where G is the gravitational constant and r +c +is the sum of the radii of the interacting objects, a black hole with -2. METHODOLOGY -mass m and a star with mass m . Detailed in Rose -BH (cid:63) -We consider a population of stellar mass BHs embed- et al. (2020), f (e ) and f (e ) account for the effect of -1 • 2 • -ded in a cluster of 1 M stars. When stars and BHs the eccentricity of the BH’s orbit about the SMBH on +mass m +BH +and a star with mass m (cid:63). Detailed in Rose +et al. (2020), f 1(e •) and f 2(e •) account for the effect of +the eccentricity of the BH’s orbit about the SMBH on +the collision rate, while n and σ are simply evaluated +at the semimajor axis of the orbit (see below). Note +thatthistimescaleequationincludestheeffectsofgrav- +itational focusing, which enhances the cross-section of +interaction. +Assuming a circular orbit for simplicity, we plot the +timescale for a BH orbiting in the GN to collide with +a 1M (cid:12) -collide, the BHs can accrete mass. The growth rate de- the collision rate, while n and σ are simply evaluated -pends on the physical processes outlined below. We use at the semimajor axis of the orbit (see below). Note -IMBH Formation in Galactic Nuclei 3 +star as a function of distance from the SMBH +in Figure 1.2 As this timescale depends on the density +of surrounding stars, we adopt a density profile of the +form: +ρ(r •)=ρ +0(cid:18) +r +• +r +0(cid:19)−α +, (2) +wherer •denotesthedistancefromtheSMBH.Weadopt +a SMBH mass of 4×106 M +(cid:12) +such that our fiducial GN +matches our own galactic center (e.g., Ghez et al. 2005; +Genzel et al. 2003). In this case, the normalization in +Eq.(2)isρ +0 +=1.35×106M (cid:12)/pc3 atr +0 +=0.25pc(Gen- +zel et al. 2010). Additionally, in Eq. (2), α gives the +slope of the power law. We assume that a uniform pop- +ulation of solar mass stars account for most of the mass +in the GN, making the stellar number density: +n(r •)= +ρ(r •) +1M +(cid:12) +. (3) The collision timescale also depends on the velocity dis- persion, which we express as: -(cid:115) +σ(r +•)=(cid:115) GM -σ(r )= • , (4) -• r (1+α) • -where α is the slope of the density profile and M de- +r +•(1+α), (4) +where α is the slope of the density profile and M • +de- notes the mass of the SMBH (Alexander 1999; Alexan- der&Pfuhl2014). Asmentionedabove,Eq.(1)depends -on the sum of the radii of the colliding objects, r . We -c -take r =1R because these interactions involve a BH -c (cid:12) +on the sum of the radii of the colliding objects, r c. We +take r c =1R (cid:12) because these interactions involve a BH and a star, and the former has a much smaller physi- calcross-section. Forexample,theSchwarzschildradius -of a 10M BH is only 30 km, or 4.31×10−5R . For -(cid:12) (cid:12) -Figure 1. We plot the relevant timescales, including col- +of a 10M (cid:12) BH is only 30 km, or 4.31×10−5R (cid:12). For this reason, direct collisions between compact objects -lision (green), relaxation (gold), and BH-BH GW capture are very rare and not included in our model. -(purple),forasingleBHintheGNasafunctionofdistance We note that direct collisions between BHs, via GW -from the SMBH. For the collision timescale, we assume the -BH is on a circular orbit. The timescales depend on the emission, wereshowntobeefficientinnuclearstarclus- -density, so we adopt a range of density profiles, bounded by ters without SMBHs (e.g., Portegies Zwart & McMil- -α=1(dashedcurve)toα=2(dark,solidcurve). Thedark lan 2000; O’Leary et al. 2006; Rodriguez et al. 2016). -bluelinerepresentsthetimefora105 M (cid:12) BHtomergewith However, in the GN, star-BH collisions are much more -the SMBH through GW emission. +emission, wereshowntobeefficientinnuclearstarclus- +ters without SMBHs (e.g., Portegies Zwart & McMil- +lan 2000; O’Leary et al. 2006; Rodriguez et al. 2016). +However, in the GN, star-BH collisions are much more frequent than direct BH-BH collisions. As depicted in Figure 1, the star-BH collision timescale for a range -thatthistimescaleequationincludestheeffectsofgrav- of density profiles is many orders of magnitude shorter -itational focusing, which enhances the cross-section of than the BH-BH GW collision timescale (for the rele- -interaction. vant equations, see O’Leary et al. 2009; Gond´an et al. -Assuming a circular orbit for simplicity, we plot the 2018, for example). Thus, we expect that star-BH col- -timescale for a BH orbiting in the GN to collide with lisions will be the main driver of IMBH growth in the -a 1M (cid:12) star as a function of distance from the SMBH GN. -in Figure 1.2 As this timescale depends on the density -of surrounding stars, we adopt a density profile of the 2.3. Statistical Approach to Collisions -form: We simulate the mass growth of a population of BHs +of density profiles is many orders of magnitude shorter +than the BH-BH GW collision timescale (for the rele- +vant equations, see O’Leary et al. 2009; Gond´an et al. +2018, for example). Thus, we expect that star-BH col- +lisions will be the main driver of IMBH growth in the +GN. +2We note that the eccentricity has a very minor effect on the +collisiontimescale(Roseetal.2020). +4 Rose et al. +2.3. Statistical Approach to Collisions +We simulate the mass growth of a population of BHs with initial conditions detailed in Section 2.1. Over an -(cid:18) (cid:19)−α increment ∆t of 106 yr, we calculate the probability of -r -ρ(r )=ρ • , (2) a collision occurring, given by ∆t/t . This choice of -• 0 r coll -0 ∆t is motivated by our galactic center’s star formation +increment ∆t of 106 yr, we calculate the probability of +a collision occurring, given by ∆t/t coll. This choice of +∆t is motivated by our galactic center’s star formation timescale (e.g., Lu et al. 2009), allowing for regular re- -wherer denotesthedistancefromtheSMBH.Weadopt plenishmentofthestellarpopulationintheGN.Wehave -• -a SMBH mass of 4×106 M such that our fiducial GN checked that the results are not sensitive to this choice -(cid:12) -matches our own galactic center (e.g., Ghez et al. 2005; of∆t,omittedheretoavoidclutter. Wedrawanumber -Genzel et al. 2003). In this case, the normalization in between 0 and 1 using a random number generator. If -Eq.(2)isρ =1.35×106M /pc3 atr =0.25pc(Gen- that number is less than or equal to the probability, we -0 (cid:12) 0 -zel et al. 2010). Additionally, in Eq. (2), α gives the increasetheBH’smassby∆m,themassthattheBHis -slope of the power law. We assume that a uniform pop- expected to accrete in a single collision (see Section 2.4 -ulation of solar mass stars account for most of the mass for details). We recalculate the collision timescale using -in the GN, making the stellar number density: the updated BH mass and repeat this process until the +plenishmentofthestellarpopulationintheGN.Wehave +checked that the results are not sensitive to this choice +of∆t,omittedheretoavoidclutter. Wedrawanumber +between 0 and 1 using a random number generator. If +that number is less than or equal to the probability, we +increasetheBH’smassby∆m,themassthattheBHis +expected to accrete in a single collision (see Section 2.4 +for details). We recalculate the collision timescale using +the updated BH mass and repeat this process until the time elapsed equals the simulation time of 10 Gyr3. -ρ(r ) -n(r )= • . (3) -• 1M 3Closer to the SMBH, ∆t may exceed the collision timescale by -(cid:12) -a factor of a few for steep density profiles. We include a safe- -guard in our code which takes the ratio t /∆t and rounds it -coll -2We note that the eccentricity has a very minor effect on the tothenearestinteger. Wetakethisintegertobethenumberof -collisiontimescale(Roseetal.2020). collisionsandincreasetheBHmassaccordingly. -4 Rose et al. 2.4. Mass Growth When a BH collides with a star, it may accrete ma- terial and grow in mass. The details of the accretion @@ -250,453 +362,984 @@ two objects experience a head on collision, with the BH passing through the star’s center. We begin by con- sidering the escape velocity from the BH at the star’s outermost point, its surface, which corresponds to the -maximum impact parameter 1 R . Qualitatively, one -(cid:12) -might expect that the BH could accrete the entire star -(i.e.,∆m∼1M )iftherelativevelocityissmallerthan -(cid:12) +maximum impact parameter 1 R (cid:12). Qualitatively, one +might expect that the BH could capture the entire star +(i.e.,∆m∼1M (cid:12))iftherelativevelocityissmallerthan theescapevelocityfromtheBHatthispoint. However, -Figure 2. Weconsideranexamplethathighlightsthemass in the vicinity of the SMBH, the dispersion velocity of -growthasafunctionofdistancefromtheSMBH.Greydots the stars may be much larger than the escape velocity -represent the initial masses and distances from the SMBH from the BH at the star’s surface. In this case, the BH +captures a “tunnel” of material through the star. This +tunnel has radius equal to the Bondi radius and length +approximately 1R (cid:12). For the purposes of this study, we +assume that the BH accretes all of the material that +it captures. The details of the accretion are uncertain, +however, and it may be much less efficient than our re- +sults imply. We discuss accretion in Section 2.5. +To estimate ∆m, we begin with the Bondi-Hoyle ac- +cretion rate, ˙ m, given by: +˙ m= +4πG2m2 BHρ +star +(c2 s+σ2)3/2 +, (5) +3Closer to the SMBH, ∆t may exceed the collision timescale by +a factor of a few for steep density profiles. We include a safe- +guard in our code which takes the ratio t coll/∆t and rounds it +tothenearestinteger. Wetakethisintegertobethenumberof +collisionsandincreasetheBHmassaccordingly. +Figure 2. Weconsideranexamplethathighlightsthemass +growthasafunctionofdistancefromtheSMBH.Greydots +represent the initial masses and distances from the SMBH oftheBHsinvolvedinthesimulation. Forsimplicity,weset -accretes a “tunnel” of material through the star. This -theinitalmassequalto10M foralloftheBHs. Assuming +theinitalmassequalto10M (cid:12) -tunnel has radius equal to the Bondi radius and length thedensityprofileofstarshasα=1,weconsidertwocases: -approximately 1R . BHsaccreteallofthestar’smassduringacollision(red)and -(cid:12) -To estimate ∆m, we begin with the Bondi-Hoyle ac- onlyaportionofthestar’smassisaccretedduringacollision -cretion rate, m˙, given by: given by Eq. 6 (blue). The latter case results in less growth +foralloftheBHs. Assuming +thedensityprofileofstarshasα=1,weconsidertwocases: +BHsaccreteallofthestar’smassduringacollision(red)and +onlyaportionofthestar’smassisaccretedduringacollision +given by Eq. 6 (blue). The latter case results in less growth closer to the SMBH where the velocity dispersion becomes -m˙ = 4πG2m2 BHρ star , (5) high. The shaded regions and dashed lines represent the -(c2+σ2)3/2 analytical predictions detailed in Section 2.4. -s -wherec isthespeedofsoundinthestarandρ isits -s star -and their final masses can be approximated using the +high. The shaded regions and dashed lines represent the +analytical predictions detailed in Section 2.4. +wherec s isthespeedofsoundinthestarandρ star isits density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima -following equation: et al. 1985; Edgar 2004, see latter for a review). We -approximate the density as 1M /(4πR3/3) and take T -(cid:12) (cid:12) m (t →const.)=m +∆m , (7) -the conservative value of c = 500 km s−1, which is final coll initial t -s coll -consistent with the sound speed inside a 1 M star -(cid:12) in which T represents the simulation time and ∆m and +approximate the density as 1M (cid:12)/(4πR3 (cid:12)/3) and take +the conservative value of c s = 500 km s−1, which is +consistent with the sound speed inside a 1 M (cid:12) star (Christensen-Dalsgaardetal.1996)andallowsustoset -t remain constant, approximated as their initial val- -coll a lower limit on ∆m. To find ∆m, at each collision, we -ues. have: -This equation is plotted in Figure 2 for both cases, -∆m=min(m˙ ×t ,1 M ) , (6) -(cid:63),cross (cid:12) ∆m=1M (red) and ∆m from Bondi-Hoyle-Lyttleton -(cid:12) -where t ∼R /σ is the crossing time of the BH in accretion (blue), and the curves coincide with the cor- -(cid:63),cross (cid:12) -thestar. Wetaketheminimumbetweenm˙ ×t and responding simulated results. The shaded regions rep- +∆m=min(˙ m×t (cid:63),cross,1 M (cid:12)) , (6) +where t +(cid:63),cross +∼R (cid:12)/σ is the crossing time of the BH in +thestar. Wetaketheminimumbetween ˙ m×t (cid:63),cross -1 M because the BH cannot accrete more mass than resent one standard deviation from Eq. (7), calculated +and +1 M (cid:12) -one star at each collision. usingthesquarerootofthenumberofcollisions,T/t . -coll -Figure2juxtaposestheexpectedgrowthusingBondi- As indicated by the results in red, in the absence of -Hoyle-Lyttleton accretion (blue small points) with a Bondi-Hoyle-Lyttletonaccretion, theBHsclosesttothe -muchsimplermodelinwhichtheBHaccretesthestar’s SMBH experience the most growth because they have -entire mass, 1M (red large points). Both examples shorter collision timescales. However, Bondi-Hoyle- +because the BH cannot accrete more mass than +one star at each collision. +Figure2juxtaposestheexpectedgrowthusingBondi- +Hoyle-Lyttleton accretion (blue small points) with a +muchsimplermodelinwhichtheBHaccretesthestar’s +entire mass, 1M (cid:12) -start with identical populations of 10M BHs (grey) Lyttleton accretion becomes important closer to the +(red large points). Both examples +start with identical populations of 10M (cid:12) -and simulate growth through collisions using a statisti- SMBH, where the velocity dispersion is large compared -cal approach. As the BHs grow, the collision timescale, with the stars’ escape velocity, and curtails the mass -which depends on m , decreases. Simultaneously, growthforBHsinthisregion. Outsideof10−2 pc,aBH -BH -∆m, which also depends on m , increases. The re- consumes the star’s entire mass: the accretion-limited -BH -sult is exponential growth (see discussion and details ∆m governed by Eq. (7) is greater than or equal to the -surrounding Eq. (8)). In Figure 2, however, the simula- star’s mass. -tions assume α=1 for the stellar density profile, ensur- Eq. 7 does not apply for other values of α. When the -ing the collision timescale is long compared to the sim- collision timescale is shorter, corresponding to a larger -ulation time, 10 Gyr. Therefore, the BHs grow slowly, index α in the density profile (see Figure 1), the growth +BHs (grey) +and simulate growth through collisions using a statisti- +cal approach. As the BHs grow, the collision timescale, +which depends on m BH, decreases. Simultaneously, +∆m, which also depends on m BH, increases. The re- +sult is exponential growth (see discussion and details +surrounding Eq. (8)). In Figure 2, however, the simula- +tions assume α=1 for the stellar density profile, ensur- +ing the collision timescale is long compared to the sim- IMBH Formation in Galactic Nuclei 5 -isveryefficientand∆mquicklyapproaches1M . Con- muchsmallerthatthe10Gyrsimulationtime. Figure3 -(cid:12) -sequently, while we can now assume ∆m = 1M , we confirmsthisexpectation. Itdepictsthemassgrowthof -(cid:12) -cannolongerassumethecollisiontimescaleisconstant. auniformdistributionofBHswithinitialconditionsde- -The final mass grows exponentially as a result. For tailedinSection2.1forfiveαvalues,spanning1(green) -∆m = 1M , the general solution is reached by solving to 2 (purple). The most massive IMBHs form inwards +ulation time, 10 Gyr. Therefore, the BHs grow slowly, +and their final masses can be approximated using the +following equation: +m final(t +coll +→const.)=m initial+∆m +T +t +coll +, (7) +in which T represents the simulation time and ∆m and +t +coll +remain constant, approximated as their initial val- +ues. +This equation is plotted in Figure 2 for both cases, +∆m=1M (cid:12) -the differential equation dm/dt=1M /t (m), which of 0.25 pc for the α=2 case. -(cid:12) coll +(red) and ∆m from Bondi-Hoyle-Lyttleton +accretion (blue), and the curves coincide with the cor- +responding simulated results. The shaded regions rep- +resent one standard deviation from Eq. (7), calculated +usingthesquarerootofthenumberofcollisions,T/t coll. +As indicated by the results in red, in the absence of +Bondi-Hoyle-Lyttletonaccretion, theBHsclosesttothe +SMBH experience the most growth because they have +shorter collision timescales. However, Bondi-Hoyle- +Lyttleton accretion becomes important closer to the +SMBH, where the velocity dispersion is large compared +with the stars’ escape velocity, and curtails the mass +growthforBHsinthisregion. Outsideof10−2 pc,aBH +consumes the star’s entire mass: the accretion-limited +∆m governed by Eq. (7) is greater than or equal to the +star’s mass. +Eq. 7 does not apply for other values of α. When the +collision timescale is shorter, corresponding to a larger +index α in the density profile (see Figure 1), the growth +isveryefficientand∆mquicklyapproaches1M (cid:12). Con- +sequently, while we can now assume ∆m = 1M (cid:12), we +cannolongerassumethecollisiontimescaleisconstant. +The final mass grows exponentially as a result. For +∆m = 1M (cid:12), the general solution is reached by solving +the differential equation dm/dt=1M (cid:12)/t coll(m), which gives: -2.7. Gravitational Wave Mergers and Intermediate -m (∆m→1M )=−A+(m +A)eCT (8) and Extreme Mass Ratio Inspiral Candidates -final (cid:12) initial -where A=σ2R /G and C =2πGn R /σ. As an TowardstheSMBH,efficientcollisionscancreateBHs -star star star -massiveenoughtomergewiththeSMBHthroughGWs. +m final(∆m→1M (cid:12))=−A+(m initial+A)eCT (8) +where A=σ2R star/G and C =2πGn starR star/σ. As an example,weplotthiscurveinpurplefortheα=2case, -Following the method detailed in Section 2.5, when a in Figure 3, which agrees with the simulated masses. -given BH meets the criterion t < t , we mark -GW elapsed -2.5. GW Inspiral it as merged with the SMBH. We assume that at this -pointthedynamicsoftheBHwillbedeterminedbyGW +2.5. Uncertainties in Accretion +We note that the ∆M calculated in this proof-of- +concept study assumes that the BH accretes all of the +material that it captures. Estimating the true fraction +of the material accreted by the BH is very challeng- +ing; this complex problem requires numerically solving +the generalized GR fluid equations with cooling, heat- +ing, and radiative transfer, etc. and remains an active +fieldofresearch(e.g.,Blandford&Begelman1999;Park +& Ostriker 2001; Narayan et al. 2003; Igumenshchev +et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang +et al. 2014; McKinney et al. 2014; Narayan et al. 2022). +Heuristically, if a collision between a BH and a star re- +sults in an accretion disk, the disk’s viscous timescale +may be as low as days. The resultant luminosity can +unbind most of the captured material, though details +such as the amount accreted and peak luminosity re- +main uncertain (e.g., Yuan et al. (2012); Jiang et al. +(2014), see also the discussion in Stone et al. (2017), +Rizzuto et al. (2022), and Kremer et al. (2022)). The +question becomes whether or not a BH can still accu- +mulatesignificantamountsofmassovermanycollisions +even if it accretes very little in a single one. We ex- +plore the viability of our channel using a physically mo- +tivated inefficient accretion model. Several studies have +invoked momentum-driven winds in BH accretion (e.g., +Murray et al. 2005; Ostriker et al. 2010; Brennan et al. +2018). We thus estimate the fraction of captured mass +accreted to be approximately v esc/(cη), where v esc is +the escape velocity from the BH at 1 R (cid:12) and η is the +accretion efficiency at the ISCO. We take η to be 0.1 +(e.g., Yu & Tremaine 2002). This expression for the +fractionaccretedisconsistentwithKremeretal.(2022) +equation 19 for s = 0.5, which is a reasonable value for +s, a free parameter between 0.2 and 0.8. We discuss +the results of the momentum-driven winds estimate in +Section 3. We note that the accretion process may be +more efficient than this estimate implies if, for example, +jets or other instabilities result in the beaming of radi- +ation away from the captured material (e.g., Blandford +& Znajek 1977; Begelman 1979; De Villiers et al. 2005; +McKinney & Gammie 2004; McKinney 2006; Igumen- +shchev 2008; Begelman 2012a,b; McKinney et al. 2014). +2.6. GW Inspiral When a BH is close to the SMBH, GW emission can -emission, shrinking and circularizing the BHs orbit un- circularize and shrink its orbit. We implement the ef- -til it undergoes an extreme or intermediate mass ratio fects of GW emission on the BH’s semimajor axis and -inspiral(EMRIandIMRI,respectively). Therighthand eccentricity following Peters & Mathews (1963a). The -plot in Figure 3 shows the BH masses versus time of characteristic timescale to merge a BH with an SMBH -merger. It is interesting to note that even in the ab- is given by: -sence of relaxation processes, which are often invoked -(cid:18) M (cid:19)−1(cid:18) m (cid:19)−1 toexplaintheformationofEMRIs, EMRIsandnotably -t ≈2.9×1012 yr • BH -GW 106 M 106 M IMRIs can form in this region. -(cid:12) (cid:12) -(cid:18) (cid:19)−1(cid:18) (cid:19)4 -M +m a -× • BH • 2.8. Two Body Relaxation Processes -2×106 M 10−4 pc -(cid:12) A BH orbiting the SMBH experiences weak gravita- -×f(e •)(1−e2 •)7/2 , (9) tionalinteractionswithotherobjectsintheGN.Overa -relaxation time, these interactions alter its orbit about -where f(e ) is a function of e . For all values of e , -• • • the SMBH. The two-body relaxation timescale for a -f(e ) is between 0.979 and 1.81 (Blaes et al. 2002). We -• single-mass system is: -plot this timescale for a 1×105M BH in Figure 1 in -(cid:12) -blue. σ3 -t =0.34 , (10) -In our simulations, we assume a BH has merged with relax G2ρ(cid:104)M (cid:105)lnΛ -∗ rlx -the SMBH when the condition t < t is met. -GW elapsed -When this condition is satisfied, we terminate mass where lnΛ rlx is the Coulomb logarithm and (cid:104)M ∗(cid:105) is the -growth through collisions for that BH.4 average mass of the surrounding objects, here assumed -to be 1M (Spitzer 1987; Binney & Tremaine 2008, +t +GW +≈2.9×1012 +yr(cid:18) +M +• +106 M +(cid:12)(cid:19)−1(cid:18) +m +BH +106 M +(cid:12)(cid:19)−1 +×(cid:18) +M •+m BH +2×106 M +(cid:12)(cid:19)−1(cid:18) +a • +10−2 +pc(cid:19)4 +×f(e •)(1−e2 •)7/2 , (9) +where f(e •) is a function of e •. For all values of e •, +f(e •) is between 0.979 and 1.81 (Blaes et al. 2002). We +plot this timescale for a 1×105M (cid:12) -2.6. IMBH growth Eq. (7.106)). This equation represents the approximate -As detailed above, BH-stellar collisions can increase timescale for a BH on a semi-circular orbit to change -the BH masses as a function of time. Here, we examine its orbital energy and angular momentum by order of -the sensitivity of the BH growth to the density power themselves. The BH experiences diffusion in its angular -law. From Eq. (1), it is clear that the growth rate de- momentumandenergyasafunctionoftime(depending -pends on the stellar density profile, governed by the in- ontheeccentricityoftheorbit,thisprocesscanbemore -dex α. We expect that higher values of α, or steeper efficientFragione&Sari2018;Sari&Fragione2019). In -profiles, will result in more efficient mass growth. In Figure 1, we plot the relaxation timescale in gold for a -Figure 1, larger values of α lead to collision timescales rangeofα. WenotethattheBahcall&Wolf(1976)pro- -in the GN’s inner region, inwards of 0.25 pc, that are file, α=7/4, corresponds to zero net flux and therefore -does not preferentially migrate objects inward. -Additionally, because they are more massive on -4For comparison, we also incrementally changed the semimajor average than the surrounding objects, BHs are ex- -axis and eccentricity from GW emission following the equations -pected to segregate inwards in the GN (e.g., Shapiro -in Peters & Mathews (1963b). This method leads to a slight -increase in the final IMBH masses because it accounts for the & Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; -collisionsthattakeplacewhiletheorbitisgraduallyshrinking. Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004). +BH in Figure 1 in +blue. 6 Rose et al. Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to -cuspy(α=2). Forthelattercase,thepurplelineshowstheanalyticalresultfromEq.8,takingm tobetheaveragemass +cuspy(α=2). Forthelattercase,thepurplelineshowstheanalyticalresultfromEq.8,takingm initial +tobetheaveragemass of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and merger times of these BHs. -They sink toward the SMBH on the mass segregation Therefore, after the initial mass segregation, we allow -timescale, t ≈ (cid:104)M (cid:105)/m ×t (e.g., Spitzer 1987; the BHs to begin diffusing over a relaxation timescale, -seg ∗ BH relax -Fregeau et al. 2002; Merritt 2006), which is typically an their orbital parameters changing slowly through a ran- -orderofmagnitudesmallerthantherelaxationtimescale dom process. In this random process, some of the BHs -plotted in Figure 1. may migrate closer to the SMBH. We terminate mass -We incorporate relaxation processes by introducing a growthwhentheBHenterstheinner200auoftheGN, -small change in the BH’s energy and angular momen- withinwhichthedensityofstarsisuncertain. Thiscut- -tum each time it orbits the SMBH. We apply a small off is based on the 120 au pericenter of S0-2, the closest -instantaneous velocity kick to the BH, denoted as ∆v. known star to the SMBH (e.g., Ghez et al. 2005). -We draw ∆v from a Guassian distribution with average Another physical process that causes inward migra- -√ -of zero and a standard deviation of ∆v / 3, where tion is dynamical friction. A cursory derivation based +In our simulations, we assume a BH has merged with +the SMBH when the condition t +GW +< t +elapsed +is met. +When this condition is satisfied, we terminate mass +growth through collisions for that BH.4 +2.7. IMBH growth +As detailed above, BH-stellar collisions can increase +the BH masses as a function of time. Here, we examine +the sensitivity of the BH growth to the density power +law. From Eq. (1), it is clear that the growth rate de- +pends on the stellar density profile, governed by the in- +dex α. We expect that higher values of α, or steeper +profiles, will result in more efficient mass growth. In +Figure 1, larger values of α lead to collision timescales +in the GN’s inner region, inwards of 0.25 pc, that are +muchsmallerthatthe10Gyrsimulationtime. Figure3 +confirmsthisexpectation. Itdepictsthemassgrowthof +auniformdistributionofBHswithinitialconditionsde- +tailedinSection2.1forfiveαvalues,spanning1(green) +to 2 (purple). The most massive IMBHs form inwards +of 0.25 pc for the α=2 case. +2.8. Gravitational Wave Mergers and Intermediate +and Extreme Mass Ratio Inspiral Candidates +TowardstheSMBH,efficientcollisionscancreateBHs +massiveenoughtomergewiththeSMBHthroughGWs. +Following the method detailed in Section 2.6, when a +given BH meets the criterion t +GW +< t elapsed, we mark +4For comparison, we also incrementally changed the semimajor +axis and eccentricity from GW emission following the equations +in Peters & Mathews (1963b). This method leads to a slight +increase in the final IMBH masses because it accounts for the +collisionsthattakeplacewhiletheorbitisgraduallyshrinking. +it as merged with the SMBH. We assume that at this +pointthedynamicsoftheBHwillbedeterminedbyGW +emission, shrinking and circularizing the BHs orbit un- +til it undergoes an extreme or intermediate mass ratio +inspiral(EMRIandIMRI,respectively). Therighthand +plot in Figure 3 shows the BH masses versus time of +merger. It is interesting to note that even in the ab- +sence of relaxation processes, which are often invoked +toexplaintheformationofEMRIs, EMRIsandnotably +IMRIs can form in this region. +2.9. Two Body Relaxation Processes +A BH orbiting the SMBH experiences weak gravita- +tionalinteractionswithotherobjectsintheGN.Overa +relaxation time, these interactions alter its orbit about +the SMBH. The two-body relaxation timescale for a +single-mass system is: +t +relax +=0.34 +σ3 +G2ρ(cid:104)M ∗(cid:105)lnΛ +rlx, (10) +where lnΛ +rlx +is the Coulomb logarithm and (cid:104)M ∗(cid:105) is the +average mass of the surrounding objects, here assumed +to be 1M +(cid:12) +(Spitzer 1987; Binney & Tremaine 2008, +Eq. (7.106)). This equation represents the approximate +timescale for a BH on a semi-circular orbit to change +its orbital energy and angular momentum by order of +themselves. The BH experiences diffusion in its angular +momentumandenergyasafunctionoftime(depending +ontheeccentricityoftheorbit,thisprocesscanbemore +efficient Fragione & Sari 2018; Sari & Fragione 2019). +Relaxation can cause the orbit of an object in a GN to +reach high eccentricities. If the object is a BH, it can +spiral into the SMBH and form an EMRI, while a star +IMBH Formation in Galactic Nuclei 7 +can be tidally disrupted by the SMBH (e.g. Magorrian +& Tremaine 1999; Wang & Merritt 2004; Hopman & +Alexander 2005; Aharon & Perets 2016; Stone & Met- +zger 2016; Amaro-Seoane 2018; Sari & Fragione 2019; +Naoz et al. 2022). The relaxation process is therefore +crucial to our study. In Figure 1, we plot the relaxation +timescaleingoldforarangeofα. WenotethattheBah- +call & Wolf (1976) profile, α=7/4, corresponds to zero +net flux and therefore does not preferentially migrate +objects inward. +Additionally, because BHs are more massive on av- +erage than the surrounding objects, they are expected +to segregate inwards in the GN (e.g., Shapiro & +Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; +Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004). +They sink toward the SMBH on the mass segregation +timescale, t +seg +≈ (cid:104)M ∗(cid:105)/m BH×t +relax +(e.g., Spitzer 1987; +Fregeau et al. 2002; Merritt 2006), which is typically an +orderofmagnitudesmallerthantherelaxationtimescale +plotted in Figure 1. +We incorporate relaxation processes by introducing a +small change in the BH’s energy and angular momen- +tum each time it orbits the SMBH. We apply a small +instantaneous velocity kick to the BH, denoted as ∆v. +We draw ∆v from a Guassian distribution with average +of zero and a standard deviation of ∆v +rlx/√ +3, where +∆v rlx -(cid:112) -∆v = v P /t (see Bradnick et al. 2017, for an onthedynamicalfrictionequationsdescribedinBinney -rlx • • rlx -approach to changes in the angular momentum). The & Tremaine (2008) reveals the process to have a simi- -new orbital parameters can be calculated following Lu lar timescale to mass segregation. If a BH diffuses to -& Naoz (2019), and see Naoz et al. in prep for full set a distance greater than 2 pc from the SMBH, exiting -of equations. the sphere of influence, we have it sink inwards, back -We account for the effects of relaxation processes, towards the center, over a dynamical friction timescale. -including mass-segregation, using a multi-faceted ap- After one dynamical friction timescale has passed, we -proach. We begin by migrating each BH towards the restart diffusion. -centeroveritsmass-segregationtimescale,shiftingitin- Wenotethatourprescriptionignoresself-interactions -crementally inward such that its orbital energy changes betweentheBHs. Asmentionedabove, astheBHssink -by order of itself within the segregation timescale. towards the SMBH, their concentration in the inner re- -As the BHs segregate down the potential well, their gionoftheGNincreases,allowingthemtodominatethe -abundancewithrespecttostarsincreases,untilatsome scattering. Wereservetheinclusionoftheseinteractions -turnover radius, BHs become the dominant source of for future study. += v +•(cid:112) +P •/t +rlx +(see Bradnick et al. 2017, for an +approach to changes in the angular momentum). The +new orbital parameters can be calculated following Lu +& Naoz (2019), and see Naoz et al. (2022) for the full +set of equations. +We account for the effects of relaxation processes, +including mass-segregation, using a multi-faceted ap- +proach. We begin by migrating each BH towards the +centeroveritsmass-segregationtimescale,shiftingitin- +crementally inward such that its orbital energy changes +by order of itself within the segregation timescale. +As the BHs segregate down the potential well, their +abundancewithrespecttostarsincreases,untilatsome +turnover radius, BHs become the dominant source of scatteringforbothblackholesandstars. Withinthisra- dius, BH self-interaction dominates over two-body scat- -2.9. Effect of Relaxation Processes terings with the now rarer main-sequence stars. The -AsdepictedinFigure4,two-bodyrelaxationprocesses BHs will then settle onto a Bahcall-Wolf profile, while -result in more EMRIs and IMRIs events. These pro- the stars may follow a shallower profile, with approx- -cesses allow BHs that begin further from the SMBH -imately n ∝ r−1.5, inwards of the transition radius +imately n (cid:63) -to migrate inwards and grow more efficiently in mass. +∝ r−1.5, inwards of the transition radius (Linial & Sari in prep.). +Therefore, after the initial mass segregation, we allow +the BHs to begin diffusing over a relaxation timescale, +their orbital parameters changing slowly through a ran- +dom process. In this random process, some of the BHs +may migrate closer to the SMBH. We terminate mass +growthwhentheBHenterstheinner200auoftheGN, +withinwhichthedensityofstarsisuncertain. Thiscut- +off is based on the 120 au pericenter of S0-2, the closest +known star to the SMBH (e.g., Ghez et al. 2005). +Another physical process that causes inward migra- +tion is dynamical friction. A cursory derivation based +onthedynamicalfrictionequationsdescribedinBinney +& Tremaine (2008) reveals the process to have a simi- +lar timescale to mass segregation. If a BH diffuses to +a distance greater than 2 pc from the SMBH, exiting +the sphere of influence, we have it sink inwards, back +towards the center, over a dynamical friction timescale. +After one dynamical friction timescale has passed, we +restart diffusion. +Wenotethatourprescriptionignoresself-interactions +betweentheBHs. Asmentionedabove, astheBHssink +towards the SMBH, their concentration in the inner re- +gionoftheGNincreases,allowingthemtodominatethe +scattering. Wereservetheinclusionoftheseinteractions +for future study. +2.10. Effect of Relaxation Processes +AsdepictedinFigure4,two-bodyrelaxationprocesses +result in more EMRIs and IMRIs events. These pro- +cesses allow BHs that begin further from the SMBH +to migrate inwards and grow more efficiently in mass. However, it also impedes the growth of BHs that are initially closer to the SMBH by allowing them to dif- -IMBH Formation in Galactic Nuclei 7 -Figure 4. SimilartoFigure3,weplottheinitialmassesversusinitialdistance(grey)andfinalmassversusfinaldistance(red) -for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. We -assume α = 1.75 for the GN density profile. Faded stars represent BHs that merged with the SMBH. As a result of inward -migration,BHsmergemorequicklywiththeSMBH,beforetheycanbecomeasmassiveasthoseinFigure3. Additionally,more -BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses for two -differentvaluesofα,1.5(orange,solid),α,1.75(red,dashed),and2(purple,dash-dotted),accountingforrelaxationprocesses. -The dashed, faded lines represent the corresponding initial histograms. We assume α=1.75 for the GN density profile. Faded -stars represent BHs that merged with the SMBH. -fuseoutoftheinnerregionwherecollisionsareefficient. 10−2 pcfromtheSMBHcanaccretetheentirestar(see -As can be seen in Figure 4, the net result is that more Figure 2). -BHs grow, but the maximum mass is lower compared The efficiency of collisions, and therefore IMBH, -to the scenario that ignores two-body relaxation. The EMRI, and IMRI formation as well, are sensitive to -histograminFigure4presentsthefinalBHmassdistri- the underlying stellar density. As shown in Figure 3, a -butions for different power law indices α. As expected, steeperdensityprofileresultsinlargerIMBHs. Thisbe- -the two-body relaxation suppresses the α dependence havior can be understood from the collision timescale’s -highlighted in Figure 3. In fact, using a KS test, we dependenceonthestellardensityprofile. Asteeperpro- -find that we cannot reject the hypothesis that the two file yields shorter collision timescales near the SMBH. -distributions were drawn from the same sample for the However, the inclusion of relaxation processes in the -α = 1.75 and α = 2 results. Interestingly, a BH mass simulations dampens the influence of the stellar density -IMF with an average of 10 M leads to a final distri- profile by allowing BHs to diffuse into regions of more +fuseoutoftheinnerregionwherecollisionsareefficient. +As can be seen in Figure 4, the net result is that more +BHs grow, but the maximum mass is lower compared +to the scenario that ignores two-body relaxation. The +histograminFigure4presentsthefinalBHmassdistri- +butions for different power law indices α. As expected, +the two-body relaxation suppresses the α dependence +highlighted in Figure 3. In fact, using a KS test, we +find that we cannot reject the hypothesis that the two +distributions were drawn from the same sample for the +α = 1.75 and α = 2 results. Interestingly, a BH mass +IMF with an average of 10 M (cid:12) -bution with an average of ∼ 200 M and a median of or less efficient growth. As a result, more BHs grow in +leads to a final distri- +bution with an average of ∼ 200 M (cid:12) -∼45 M , which lies within the mass gap. mass, but their maximum mass is smaller (∼ 104 M ). -(cid:12) (cid:12) +and a median of +∼45 M (cid:12), which lies within the mass gap. +3. DISCUSSION AND PREDICTIONS +We explore the feasibility of forming IMBHs in a +GN through successive collisions between a stellar-mass +BH and main-sequence stars. Taking both a statisti- +cal and analytic approach, we show that this channel +can produce IMBHs efficiently with masses as high as +103−4 M +(cid:12) +and may result in many IMBH-SMBH merg- +ers (intermediate-mass ratio inspirals, or IMRIs) and +EMRIs. +8 Rose et al. +Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance +(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. +We assume α=1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward +migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, +more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses +for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation +processes. We also show the results for a simulation with α=1.75 that accounts for momentum-driven winds (black, dotted). +Despite the substantially reduced accretion, BHs in the mass gap still form. +As the stellar mass BH collides with a star, the BH +will grow in mass. The increase may equal star’s en- +tire mass if the relative velocity is smaller than the es- +cape velocity from the BH at 1 R (cid:12). However, near the +SMBH, the velocity dispersion may be larger than the +escapevelocityfromtheBHatthestar’sradius. Inthis +limit, the BH captures a “tunnel” of material through +the star, estimated using Bondi-Hoyle-Lyttleton accre- +tion. In our statistical analysis, we account for Bondi- +Hoyle-Lyttleton accretion and find that BHs outside of +10−2 pcfromtheSMBHcancapturetheentirestar(see +Figure 2). +The efficiency of collisions, and therefore IMBH, +EMRI, and IMRI formation as well, are sensitive to +the underlying stellar density. As shown in Figure 3, a +steeperdensityprofileresultsinlargerIMBHs. Thisbe- +havior can be understood from the collision timescale’s +dependenceonthestellardensityprofile. Asteeperpro- +file yields shorter collision timescales near the SMBH. +However, the inclusion of relaxation processes in the +simulations dampens the influence of the stellar density +profile by allowing BHs to diffuse into regions of more +or less efficient growth. As a result, more BHs grow in +mass, but their maximum mass is smaller (∼ 104 M (cid:12)). Additionally, the final masses have no apparent depen- dence on distance from the SMBH (see Figure 4). -3. DISCUSSION AND PREDICTIONS Mass growth through BH-main-sequence star colli- -We explore the feasibility of forming IMBHs in a sions may act in concert with other IMBH formation -GN through successive collisions between a stellar-mass channels, such as compact object binary mergers (e.g., -BH and main-sequence stars. Taking both a statisti- Hoang et al. 2018; Stephan et al. 2019; Fragione et al. -cal and analytic approach, we show that this channel 2021; Wang et al. 2021). While in some cases colli- -can produce IMBHs efficiently with masses as high as sions can unbind a binary (e.g., Sigurdsson & Phinney -103−4 M and may result in many IMBH-SMBH merg- 1993; Fregeau et al. 2004), BH binaries can be tightly +Most simulations in our study assume that the BHs +accrete all of the mass that they capture. The final BH +masses can be taken as an upper limit. We note that +the accretion is a highly uncertain process and repre- +sents an active field of study (e.g., Blandford & Begel- +man 1999; Park & Ostriker 2001; Narayan et al. 2003; +Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan +et al. 2012; Jiang et al. 2014; McKinney et al. 2014; +Narayan et al. 2022). To assess the limits of our model, +wealsoconsideraphysicallymotivatedaccretionmodel, +momentum-driven winds (Section 2.5). We present the +final mass distribution for momentum-driven winds in +Figure 4. Importantly, we find that BHs within the +mass gap still form naturally despite the substantially +reduced accretion. About 5% of the BHs grow by 10 +to 100 M (cid:12). Furthermore, if we increase this ∆M esti- +mate by a factor of 2 (i.e., use η = 0.05), the simula- +tionproducesa3.5×103 M (cid:12) -ers (intermediate-mass ratio inspiral, IMRIs) and EM- bound enough to withstand the collisions. Wide bina- -RIs. ries may also become unbound due to interactions with -As the stellar mass BH collides with a star, the BH theneighboringstarsandcompactobjects(e.g.,Binney -will grow in mass. The increase may equal star’s en- & Tremaine 1987; Rose et al. 2020, see latter study for -tire mass if the relative velocity is smaller than the es- the timescale for an arbitrary eccentricity). However, -cape velocity from the BH at 1 R . However, near the as highlighted in previous studies, a substantial frac- +IMBHforthesameinitial +conditions. Ourproof-of-conceptdemonstratesthatcol- +lisions between BH and stars are an important process +that should be taken into account in dense places such +as a GN. +Mass growth through BH-main-sequence star colli- +sions may act in concert with other IMBH formation +channels, such as compact object binary mergers (e.g., +Hoang et al. 2018; Stephan et al. 2019; Fragione et al. +2021; Wang et al. 2021). While in some cases colli- +sions can unbind a binary (e.g., Sigurdsson & Phinney +1993; Fregeau et al. 2004), BH binaries can be tightly +bound enough to withstand the collisions. Wide bina- +ries may also become unbound due to interactions with +theneighboringstarsandcompactobjects(e.g.,Binney +& Tremaine 1987; Rose et al. 2020, see latter study for +the timescale for an arbitrary eccentricity). However, +as highlighted in previous studies, a substantial frac- +tion of these binaries may merge due to the Eccentric +Kozai Lidov mechanism, leaving behind a single star or +a single compact object (e.g., Stephan et al. 2016, 2019; +Hoang et al. 2018). Additionally, to be susceptible to +evaporation, BH binaries must have a wider configura- +tion. Otherwise, they will be more tightly bound than +the average kinetic energy of the surrounding objects +and will only harden through weak gravitational inter- +IMBH Formation in Galactic Nuclei 9 +actions with neighboring stars (see for example Figure +6 in Rose et al. 2020). +We note that we assume a steady-state and treat the +starsasareservoirinthismodel. Futureworkwilltakea +more nuanced approach to the background stars, whose +densityasafunctionoftimecanbeinfluencedbyseveral +factors. Firstly, the relaxation of the stellar population +occurs on Gyr timescales. Some studies have suggested +that in situ star formation can occur in the Galactic +Center as close as 0.04 pc from the SMBH (e.g., Levin +& Beloborodov 2003; Paumard et al. 2006), and star +formation episodes can occur as often as every ∼5 Myr +(e.g. Lu et al. 2009). Therefore, we expect that after +the first Gyr, stars within (cid:46)0.01 pc will be replenished +at intervals consistent with the star formation episodes; +the infalling populations of stars are separated by ∼ +5−10Myr,whichisshorterthanthecollisiontimescale. +However, star-star collisions may complicate this pic- +ture within ∼0.01 pc. As discussed above, regular star +formationensurestheBHsalwayshaveastellarpopula- +tion to interact with outside of ∼0.01 pc.5 At 0.01 pc, +however, the kinetic energy during a collision between +two 1 M (cid:12) -SMBH, the velocity dispersion may be larger than the tion of these binaries may merge due to the Eccentric -escapevelocityfromtheBHatthestar’sradius. Inthis Kozai Lidov mechanism, leaving behind a single star or -limit, the BH accretes a “tunnel” of material through a single compact object (e.g., Stephan et al. 2016, 2019; -the star, estimated using Bondi-Hoyle-Lyttleton accre- Hoang et al. 2018). Additionally, to be susceptible to -tion. In our statistical analysis, we account for Bondi- evaporation, BH binaries must have a wider configura- -Hoyle-Lyttleton accretion and find that BHs outside of tion. Otherwise, they will be more tightly bound that -8 Rose et al. -the average kinetic energy of the surrounding objects, Our results also suggest that IMBHs are likely to ex- -and will only harden through weak gravitational inter- ists in many galactic nuclei, as well as within our own -actions with neighboring stars (see for example Figure galactic center. This implication seems to be consis- -6 in Rose et al. 2020). tent with recent observational and theoretical studies -Not included in this study, collisions between the BH (e.g.,Hansen&Milosavljevi´c2003;Maillardetal.2004; -and other compact objects will increase the BH growth Gu¨rkan&Rasio2005;Gualandris&Merritt2009;Chen -rate. BH-BH mergers (e.g., O’Leary et al. 2009; Fra- &Liu2013;Generozov&Madigan2020;Fragioneetal. -gione et al. 2021) and even neutron star BH mergers 2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY -(e.g., Hoang et al. 2020) become more likely as the BHs Collaboration et al. 2020). -increase in mass through stellar collisions. As a result, Lastly, the collisions between stellar mass BHs and -the BH-BH collision timescale, discussed in Section 2.2, stars may contribute to the x-ray emission from our -will become relevant to our simulations, allowing the galacticcentre(e.g.,Munoetal.2005,2009;Haileyetal. -BHs to grow through this channel in addition to stel- 2018; Zhu et al. 2018; Cheng et al. 2018)5. These inter- -larcollisions. Additionally,thiscompactobjectmergers actions, in particular grazing collisions, may also result -result in GW recoil, which may have a large impact on in tidal disruption events (e.g., Perets et al. 2016; Sam- -the dynamics (e.g., Baibhav et al. 2020; Fragione et al. sing et al. 2019; Kremer et al. 2021). Thus, the process -2021) outlined here may produce electromagnetic signatures -The BH’s mass growth increases GW emission, which in addition to GW mergers. -dissipates energy from the orbit. Along with relaxation -processes, GW emission causes BHs to sink towards the SR thanks the Charles E Young fellowship, the Nina -SMBH and eventually undergo a merger. As a result, Byers Fellowship, and the Michael A. Jura Memorial -the GN environment is conducive to the formation of Graduate Award for support. SR and SN acknowledge -EMRIsandIMRIs. TheGWemissionfromEMRIsand the partial support from NASA ATP 80NSSC20K0505. -IMRIs is expected to be at mHz frequencies, making SN thanks Howard and Astrid Preston for their gener- -them promising candidates for LISA to observe. While oussupport. ILthankssupportfromtheAdamsFellow- -the exact rate calculation is beyond the scope of this ship. SN and RS thank the Bhaumik Institute visitor -study, the mechanism outlined here seems very promis- program. -ing. +stars is larger than their binding energies. +Collisions can therefore thin out the stellar populations +during the time it takes them to diffuse to these small +radii, (cid:46)0.01 pc, and may reduce the BH growth in the +innermost region. We reserve the inclusion of star-star +collisions for future work. We also note that the disrup- +tion of binary stars by the SMBH may help replenish +the stellar population even as collisions work to deplete +it(e.g.,Balbergetal.2013);whenabinaryisdisrupted, +one of the stars is captured on a tightly bound orbit +about the SMBH. +An IMBH may also affect the stellar density profile. +AsitspiralsintotheSMBH,itcanperturbstellarorbits, +and these interactions can lead to hypervelocity stars +(e.g., Baumgardt et al. 2006a; L¨ockmann & Baumgardt +2008). L¨ockmann & Baumgardt (2008) show that an +IMBH can modify an initially steep stellar density pro- +file to become consistent with the flatter cusp observed +in the Galactic Center. The stars may then be replen- +ished on 100 Myr timescales (Baumgardt et al. 2006a). +Therefore, after the formation of the first few IMBHs, +subsequent BH growth may occur in bursts, coinciding +with replenishment of the stars. +Whiletherearemanycompetingdynamicalprocesses +that shape the stellar density profile, we stress that α +5In fact, the star-star collision timescale is greater than 10 Myr +fortheentireparameterspace,saveat0.001pcforlargervalues +ofα;theBH-starcollisiontimescaleplottedinFig. 1isthesame +orderofmagnitudeasthestar-starcollisiontimescale. +can simply be chosen to encapsulate all of the relevant +physics. A value for α that is constrained by observa- +tions must already reflect ongoing processes like star- +star collisions and replenishment. Sch¨odel et al. (2018) +findtheobservedstellarmassenclosedwithin0.01pcof +the Milky Way’s Galactic Center to be approximately +180 M (cid:12). This estimate is consistent to order of magni- +tude with our α = 1.25 case. In a simulation like those +depictedinFigure4, whichincluderelaxation, α=1.25 +leads to a maximum IMBH mass of 140 M (cid:12). Further- +more, while the stellar mass within 0.01 pc may be a +few hundred M (cid:12), Do et al. (2019) and GRAVITY Col- +laboration et al. (2020) set an upper limit on the mass +enclosedwithintheorbitofS0-2tobeaboutafewthou- +sand M (cid:12), or 0.1% of the central mass. This upper limit +canincludemassthatwaspreviouslyinstarsbutisnow +inBHs. Inthatcase,the180M +(cid:12) +iswhatremainsofthe +stars, while BHs and IMBHs make up the ∼ 1000 M +(cid:12) +in the innermost region. +Alsonotincludedinthisstudy, collisionsbetweenthe +BH and other compact objects will increase the BH +growth rate. BH-BH mergers (e.g., O’Leary et al. 2009; +Fragione et al. 2021) and even neutron star BH mergers +(e.g., Hoang et al. 2020) become more likely as the BHs +increase in mass through stellar collisions. As a result, +the BH-BH collision timescale, discussed in Section 2.2, +will become relevant to our simulations, allowing the +BHs to grow through this channel in addition to stel- +larcollisions. Additionally,thiscompactobjectmergers +result in GW recoil, which may have a large impact on +the dynamics (e.g., Baibhav et al. 2020; Fragione et al. +2021). +The BH’s mass growth increases GW emission, which +dissipatesenergyfromtheorbit. Alongwithrelaxation, +GW emission causes BHs to sink towards the SMBH +and eventually undergo a merger. As a result, the GN +environment is conducive to the formation of EMRIs +and IMRIs. The GW emission from EMRIs and IM- +RIs is expected to be at mHz frequencies, making them +promising candidates for LISA to observe. While the +exact rate calculation is beyond the scope of this study, +the mechanism outlined here seems very promising. +OurresultsalsosuggestthatBHswithinthemassgap +aswellasIMBHslikely existinmanygalactic nuclei, as +well as within our own galactic center. This implication +seems to be consistent with recent observational and +theoretical studies (e.g., Hansen & Milosavljevi´c 2003; +Maillard et al. 2004; G¨ urkan & Rasio 2005; Gualandris +& Merritt 2009; Chen & Liu 2013; Generozov & Madi- +gan2020;Fragioneetal.2020a;Zhengetal.2020;Naoz +et al. 2020; GRAVITY Collaboration et al. 2020). +10 Rose et al. +Lastly, the collisions between stellar mass BHs and +stars may contribute to the x-ray emission from our +galactic centre (e.g., Muno et al. 2005, 2009; Hailey +et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kre- +meretal.(2022)foradiscussionofelectromagneticsig- +natures from BH-star collisions)6. These interactions, +in particular grazing collisions, may also result in tidal +disruption events (e.g., Baumgardt et al. 2006b; Perets +et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kre- +mer et al. 2021). Thus, the process outlined here may +produce electromagnetic signatures in addition to GW +mergers. +We thank the anonymous referee for useful comments. +We also thank Jessica Lu, Fred Rasio, Kyle Kremer, +Ryosuke Hirai, Ilya Mandel, and Erez Michaely for use- +ful discussion. +SRthankstheCharlesE.YoungFellowship, theNina +Byers Fellowship, and the Michael A. Jura Memorial +Graduate Award for support. SR and SN acknowledge +the partial support from NASA ATP 80NSSC20K0505. +SN thanks Howard and Astrid Preston for their gener- +oussupport. ILthankssupportfromtheAdamsFellow- +ship. SN and RS thank the Bhaumik Institute visitor +program. This work was performed in part at the As- +pen Center for Physics, which is supported by National +Science Foundation grant PHY-1607611. REFERENCES -Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016, Baibhav,V.,Gerosa,D.,Berti,E.,etal.2020,PhRvD,102, -PhRvL, 116, 241102, 043002, doi: 10.1103/PhysRevD.102.043002 -doi: 10.1103/PhysRevLett.116.241102 Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ, -—. 2017a, PhRvL, 118, 221101, 613, 1143, doi: 10.1086/423299 -doi: 10.1103/PhysRevLett.118.221101 Begelman, M. C., Volonteri, M., & Rees, M. J. 2006, -—. 2017b, PhRvL, 119, 141101, MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x -doi: 10.1103/PhysRevLett.119.141101 Belczynski,K., Hirschi,R., Kaiser,E.A., etal.2020a, ApJ, -Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129 890, 113, doi: 10.3847/1538-4357/ab6d77 -Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, —. 2020b, ApJ, 890, 113, doi: 10.3847/1538-4357/ab6d77 -doi: 10.1088/0004-637X/780/2/148 Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R. -2009, New Journal of Physics, 11, 105016, +Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016, +PhRvL, 116, 241102, +doi: 10.1103/PhysRevLett.116.241102 +—. 2017a, PhRvL, 118, 221101, +doi: 10.1103/PhysRevLett.118.221101 +—. 2017b, PhRvL, 119, 141101, +doi: 10.1103/PhysRevLett.119.141101 +Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1, +doi: 10.3847/2041-8205/830/1/L1 +Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129 +Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, +doi: 10.1088/0004-637X/780/2/148 +Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4, +doi: 10.1007/s41114-018-0013-8 +6TheconnectionbetweentheobservedX-raysourcesattheGalac- +tic Center and tidal capture has been suggested by Generozov +etal.(2018),butseeZhuetal.(2018);Stephanetal.(2019)for +alternativechannels. Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. -doi: 10.1088/1367-2630/11/10/105016 2021, arXiv e-prints, arXiv:2109.12119. -Binney, J., & Tremaine, S. 1987, Galactic dynamics https://arxiv.org/abs/2109.12119 -—. 2008, Galactic Dynamics: Second Edition Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214, -Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775, doi: 10.1086/154711 +Baibhav,V.,Gerosa,D.,Berti,E.,etal.2020,PhRvD,102, +043002, doi: 10.1103/PhysRevD.102.043002 +Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26, +doi: 10.1093/mnrasl/slt071 +Baumgardt, H., Gualandris, A., & Portegies Zwart, S. +2006a, MNRAS, 372, 174, +doi: 10.1111/j.1365-2966.2006.10818.x +Baumgardt, H., Hopman, C., Portegies Zwart, S., & +Makino, J. 2006b, MNRAS, 372, 467, +doi: 10.1111/j.1365-2966.2006.10885.x +Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ, +613, 1143, doi: 10.1086/423299 +Begelman, M. C. 1979, MNRAS, 187, 237, +doi: 10.1093/mnras/187.2.237 +—.2012a,ApJL,749,L3,doi:10.1088/2041-8205/749/1/L3 +IMBH Formation in Galactic Nuclei 11 +—. 2012b, MNRAS, 420, 2912, +doi: 10.1111/j.1365-2966.2011.20071.x +Begelman, M. C., Volonteri, M., & Rees, M. J. 2006, +MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x +Belczynski,K., Hirschi,R., Kaiser,E.A., etal.2020a, ApJ, +890, 113, doi: 10.3847/1538-4357/ab6d77 +—. 2020b, ApJ, 890, 113, doi: 10.3847/1538-4357/ab6d77 +Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R. +2009, New Journal of Physics, 11, 105016, +doi: 10.1088/1367-2630/11/10/105016 +Binney, J., & Tremaine, S. 1987, Galactic dynamics +—. 2008, Galactic Dynamics: Second Edition +Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775, doi: 10.1086/342655 +Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303, +L1, doi: 10.1046/j.1365-8711.1999.02358.x +Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433, +doi: 10.1093/mnras/179.3.433 Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642, -5TheconnectionbetweentheobservedX-raysourcesattheGalac- 427, doi: 10.1086/500727 -tic Center and tidal capture has been suggested by Generozov +427, doi: 10.1086/500727 Bondi, H. 1952, MNRAS, 112, 195, -etal.(2018),butseeZhuetal.(2018);Stephanetal.(2019)for -alternativechannels. doi: 10.1093/mnras/112.2.195 -IMBH Formation in Galactic Nuclei 9 -Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273, Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ, -doi: 10.1093/mnras/104.5.273 649, 91, doi: 10.1086/506193 -Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469, Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137, -2042, doi: 10.1093/mnras/stx1007 doi: 10.3847/1538-4357/ab94bc -Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger, Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker, -C. 2012, JCAP, 2012, 054, J. P. 2018, MNRAS, 478, 4030, -doi: 10.1088/1475-7516/2012/07/054 doi: 10.1093/mnras/sty1262 -Centrella, J., Baker, J. G., Kelly, B. J., & van Meter, J. R. Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of -2010, Reviews of Modern Physics, 82, 3069, Modern Physics, 82, 3121, -doi: 10.1103/RevModPhys.82.3069 doi: 10.1103/RevModPhys.82.3121 -Chen, X., & Liu, F. K. 2013, ApJ, 762, 95, Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812, -doi: 10.1088/0004-637X/762/2/95 doi: 10.1086/377127 -Cheng, Z., Li, Z., Xu, X., & Li, X. 2018, ApJ, 858, 33, Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ, -doi: 10.3847/1538-4357/aaba16 620, 744, doi: 10.1086/427175 -Choi, J.-H., Shlosman, I., & Begelman, M. C. 2015, Gonda´n, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ, -MNRAS, 450, 4411, doi: 10.1093/mnras/stv694 860, 5, doi: 10.3847/1538-4357/aabfee -Christensen-Dalsgaard, J., Dappen, W., Ajukov, S. V., Gonza´lez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL, -et al. 1996, Science, 272, 1286, 908, L29, doi: 10.3847/2041-8213/abdf5b -doi: 10.1126/science.272.5266.1286 GRAVITY Collaboration, Abuter, R., Amorim, A., et al. -Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087, 2020, A&A, 636, L5, doi: 10.1051/0004-6361/202037813 -doi: 10.1086/156685 Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361, -Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, doi: 10.1088/0004-637X/705/1/361 -MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783 Gu¨rkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL, -Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019, 640, L39, doi: 10.1086/503295 -MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453 Gu¨rkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236, -Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021, doi: 10.1086/430694 -MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390 Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature, -Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL, 556, 70, doi: 10.1038/nature25029 -110, 221101, doi: 10.1103/PhysRevLett.110.221101 Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593, -Edgar, R. 2004, NewAR, 48, 843, L77, doi: 10.1086/378182 -doi: 10.1016/j.newar.2004.06.001 Heger, A., Fryer, C. L., Woosley, S. E., Langer, N., & -Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014, Hartmann, D. H. 2003, ApJ, 591, 288, -Monthly Notices of the Royal Astronomical Society, 443, doi: 10.1086/375341 -2410, doi: 10.1093/mnras/stu1280 Hoang, B.-M., Naoz, S., Kocsis, B., Rasio, F. A., & -Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891, Dosopoulou, F. 2018, ApJ, 856, 140, -L31, doi: 10.3847/2041-8213/ab77c9 doi: 10.3847/1538-4357/aaafce -Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021, Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8, -arXiv e-prints, arXiv:2107.04639. doi: 10.3847/1538-4357/abb66a -https://arxiv.org/abs/2107.04639 Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the -Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a, Royal Astronomical Society, 374, 1557, -ApJ, 897, 46, doi: 10.3847/1538-4357/ab94b2 doi: 10.1111/j.1365-2966.2006.11275.x -Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902, Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104, -L26, doi: 10.3847/2041-8213/abbc0a doi: 10.3847/1538-4357/abeb14 -Fragione, G., & Sari, R. 2018, ApJ, 852, 51, Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, -doi: 10.3847/1538-4357/aaa0d7 45, doi: 10.3847/1538-4357/abb945 -Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., & Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, -Rasio, F. A. 2004, MNRAS, 352, 1, doi: 10.3847/1538-4365/aacb24 -doi: 10.1111/j.1365-2966.2004.07914.x —. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 -Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., & Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506, -Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576 doi: 10.1093/mnras/stz036 -10 Rose et al. -Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ, Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., & -690, 1463, doi: 10.1088/0004-637X/690/2/1463 Rasio, F. A. 2018, PhRvL, 120, 151101, -Madau, P., & Rees, M. J. 2001, ApJL, 551, L27, doi: 10.1103/PhysRevLett.120.151101 -doi: 10.1086/319848 Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016, -Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F. PhRvD, 93, 084029, doi: 10.1103/PhysRevD.93.084029 -2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147 Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019, -Phys. Rev. D, 100, 043027, +doi: 10.1093/mnras/112.2.195 +Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273, +doi: 10.1093/mnras/104.5.273 +Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469, +2042, doi: 10.1093/mnras/stx1007 +Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ, +860, 14, doi: 10.3847/1538-4357/aac2c4 +Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger, +C. 2012, JCAP, 2012, 054, +doi: 10.1088/1475-7516/2012/07/054 +Centrella, J., Baker, J. G., Kelly, B. J., & van Meter, J. R. +2010, Reviews of Modern Physics, 82, 3069, +doi: 10.1103/RevModPhys.82.3069 +Chen, X., & Liu, F. K. 2013, ApJ, 762, 95, +doi: 10.1088/0004-637X/762/2/95 +Cheng, Z., Li, Z., Xu, X., & Li, X. 2018, ApJ, 858, 33, +doi: 10.3847/1538-4357/aaba16 +Choi, J.-H., Shlosman, I., & Begelman, M. C. 2015, +MNRAS, 450, 4411, doi: 10.1093/mnras/stv694 +Christensen-Dalsgaard, J., Dappen, W., Ajukov, S. V., +et al. 1996, Science, 272, 1286, +doi: 10.1126/science.272.5266.1286 +Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087, +doi: 10.1086/156685 +Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424, +doi: 10.1111/j.1365-2966.2005.09937.x +Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M. +2009, MNRAS, 393, 1016, +doi: 10.1111/j.1365-2966.2008.14254.x +Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, +MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783 +Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T. +C. N. 2021a, MNRAS, 505, 2186, +doi: 10.1093/mnras/stab1428 +Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt, +T. C. N. 2021b, MNRAS, 503, 1051, +doi: 10.1093/mnras/stab402 +De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S. +2005, ApJ, 620, 878, doi: 10.1086/427142 +Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019, +MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453 +Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021, +MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390 +Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664, +doi: 10.1126/science.aav8137 +Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL, +562, L19, doi: 10.1086/338118 +Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL, +110, 221101, doi: 10.1103/PhysRevLett.110.221101 +Edgar, R. 2004, NewAR, 48, 843, +doi: 10.1016/j.newar.2004.06.001 +Escala, A. 2021, ApJ, 908, 57, +doi: 10.3847/1538-4357/abd93c +Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014, +Monthly Notices of the Royal Astronomical Society, 443, +2410, doi: 10.1093/mnras/stu1280 +Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891, +L31, doi: 10.3847/2041-8213/ab77c9 +Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021, +arXiv e-prints, arXiv:2107.04639. +https://arxiv.org/abs/2107.04639 +Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a, +ApJ, 897, 46, doi: 10.3847/1538-4357/ab94b2 +Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902, +L26, doi: 10.3847/2041-8213/abbc0a +Fragione, G., & Sari, R. 2018, ApJ, 852, 51, +doi: 10.3847/1538-4357/aaa0d7 +Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., & +Rasio, F. A. 2004, MNRAS, 352, 1, +doi: 10.1111/j.1365-2966.2004.07914.x +Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., & +Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576 +Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ, +649, 91, doi: 10.1086/506193 +Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137, +doi: 10.3847/1538-4357/ab94bc +Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker, +J. P. 2018, MNRAS, 478, 4030, +doi: 10.1093/mnras/sty1262 +12 Rose et al. +Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of +Modern Physics, 82, 3121, +doi: 10.1103/RevModPhys.82.3121 +Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812, +doi: 10.1086/377127 +Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ, +620, 744, doi: 10.1086/427175 +Gond´ an, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ, +860, 5, doi: 10.3847/1538-4357/aabfee +Gonz´ alez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL, +908, L29, doi: 10.3847/2041-8213/abdf5b +GRAVITY Collaboration, Abuter, R., Amorim, A., et al. +2020, A&A, 636, L5, doi: 10.1051/0004-6361/202037813 +Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361, +doi: 10.1088/0004-637X/705/1/361 +G¨ urkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL, +640, L39, doi: 10.1086/503295 +G¨ urkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236, +doi: 10.1086/430694 +Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature, +556, 70, doi: 10.1038/nature25029 +Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593, +L77, doi: 10.1086/378182 +Heger, A., Fryer, C. L., Woosley, S. E., Langer, N., & +Hartmann, D. H. 2003, ApJ, 591, 288, +doi: 10.1086/375341 +Hoang, B.-M., Naoz, S., Kocsis, B., Rasio, F. A., & +Dosopoulou, F. 2018, ApJ, 856, 140, +doi: 10.3847/1538-4357/aaafce +Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8, +doi: 10.3847/1538-4357/abb66a +Hopman, C., & Alexander, T. 2005, ApJ, 629, 362, +doi: 10.1086/431475 +Igumenshchev, I. V. 2008, ApJ, 677, 317, +doi: 10.1086/529025 +Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A. +2003, ApJ, 592, 1042, doi: 10.1086/375769 +Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796, +106, doi: 10.1088/0004-637X/796/2/106 +Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the +Royal Astronomical Society, 374, 1557, +doi: 10.1111/j.1365-2966.2006.11275.x +Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., & +Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368. +https://arxiv.org/abs/2201.12368 +Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104, +doi: 10.3847/1538-4357/abeb14 +Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, +45, doi: 10.3847/1538-4357/abb945 +Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020, +MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276 +Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33, +doi: 10.1086/376675 +Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, +doi: 10.3847/1538-4365/aacb24 +—. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 +L¨ ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323, +doi: 10.1111/j.1365-2966.2007.12699.x +Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506, +doi: 10.1093/mnras/stz036 +Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ, +690, 1463, doi: 10.1088/0004-637X/690/2/1463 +Madau, P., & Rees, M. J. 2001, ApJL, 551, L27, +doi: 10.1086/319848 +Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447, +doi: 10.1046/j.1365-8711.1999.02853.x +Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F. +2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147 Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda, -doi: 10.1103/PhysRevD.100.043027 M., & Artale, M. C. 2021a, arXiv e-prints, -Rose,S.C.,Naoz,S.,Gautam,A.K.,etal. 2020,ApJ,904, arXiv:2109.06222. https://arxiv.org/abs/2109.06222 -113, doi: 10.3847/1538-4357/abc557 Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b, -Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., MNRAS, 505, 339, doi: 10.1093/mnras/stab1334 -& Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213. +Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B. +2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409 +McKinney, J. C. 2006, MNRAS, 368, 1561, +doi: 10.1111/j.1365-2966.2006.10256.x +McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977, +doi: 10.1086/422244 +McKinney, J. C., Tchekhovskoy, A., Sadowski, A., & +Narayan, R. 2014, MNRAS, 441, 3177, +doi: 10.1093/mnras/stu762 Merritt, D. 2006, Reports on Progress in Physics, 69, 2513, -https://arxiv.org/abs/2009.01213 doi: 10.1088/0034-4885/69/9/R01 -Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD, Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847, -100, 043009, doi: 10.1103/PhysRevD.100.043009 doi: 10.1086/317837 -Sari, R., & Fragione, G. 2019, ApJ, 885, 24, Morris, M. 1993, ApJ, 408, 496, doi: 10.1086/172607 -doi: 10.3847/1538-4357/ab43df Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL, -Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K. 622, L113, doi: 10.1086/429721 +Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, +ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110 +Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ, +618, 569, doi: 10.1086/426067 +Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927, +L18, doi: 10.3847/2041-8213/ac574b +Naoz, S., & Silk, J. 2014, ApJ, 795, 102, +doi: 10.1088/0004-637X/795/2/102 +Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885, +L35, doi: 10.3847/2041-8213/ab4fed +IMBH Formation in Galactic Nuclei 13 +Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL, +888, L8, doi: 10.3847/2041-8213/ab5e3b +Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., & +Curd, B. 2022, MNRAS, 511, 3795, +doi: 10.1093/mnras/stac285 +Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A. +2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69 +Ohsuga,K., Mori,M.,Nakamoto,T., &Mineshige,S.2005, +ApJ, 628, 368, doi: 10.1086/430728 +O’Leary,R.M.,Kocsis,B.,&Loeb,A.2009,MNRAS,395, +2127, doi: 10.1111/j.1365-2966.2009.14653.x +O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N., +& O’Shaughnessy, R. 2006, ApJ, 637, 937, +doi: 10.1086/498446 +Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga, +D. 2010, ApJ, 722, 642, +doi: 10.1088/0004-637X/722/1/642 +Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100, +doi: 10.1086/319042 +Paumard,T.,Genzel,R.,Martins,F.,etal.2006,ApJ,643, +1011, doi: 10.1086/503273 +Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek, +Stephen R., J. 2016, ApJ, 823, 113, +doi: 10.3847/0004-637X/823/2/113 +Peters, P. C., & Mathews, J. 1963a, Physical Review, 131, +435, doi: 10.1103/PhysRev.131.435 +—. 1963b, Physical Review, 131, 435, +doi: 10.1103/PhysRev.131.435 +Portegies Zwart, S. F., Baumgardt, H., Hut, P., Makino, J., +& McMillan, S. L. W. 2004, Nature, 428, 724, +doi: 10.1038/nature02448 +Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL, +528, L17, doi: 10.1086/312422 +—. 2002, ApJ, 576, 899, doi: 10.1086/341798 +Rashkov, V., & Madau, P. 2014, ApJ, 780, 187, +doi: 10.1088/0004-637X/780/2/187 +Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640, +A56, doi: 10.1051/0004-6361/202037710 +Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022, +MNRAS, doi: 10.1093/mnras/stac231 +Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., & +Rasio, F. A. 2018, PhRvL, 120, 151101, +doi: 10.1103/PhysRevLett.120.151101 +Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016, +PhRvD, 93, 084029, doi: 10.1103/PhysRevD.93.084029 +Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019, +Phys. Rev. D, 100, 043027, +doi: 10.1103/PhysRevD.100.043027 +Rose,S.C.,Naoz,S.,Gautam,A.K.,etal. 2020,ApJ,904, +113, doi: 10.3847/1538-4357/abc557 +Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., +& Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213. +https://arxiv.org/abs/2009.01213 +Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017, +MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044 +Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD, +100, 043009, doi: 10.1103/PhysRevD.100.043009 +Sari, R., & Fragione, G. 2019, ApJ, 885, 24, +doi: 10.3847/1538-4357/ab43df +Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K. 2002, The Astrophysical Journal, 571, 30, -Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, doi: 10.1086/339917 -ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110 Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63, -Naoz, S., & Silk, J. 2014, ApJ, 795, 102, doi: 10.1086/519309 -doi: 10.1088/0004-637X/795/2/102 Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603, -Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885, doi: 10.1086/156521 -L35, doi: 10.3847/2041-8213/ab4fed Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985, -Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL, MNRAS, 217, 367, doi: 10.1093/mnras/217.2.367 -888, L8, doi: 10.3847/2041-8213/ab5e3b Shlosman, I., Choi, J.-H., Begelman, M. C., & Nagamine, -O’Leary,R.M.,Kocsis,B.,&Loeb,A.2009,MNRAS,395, K. 2016, MNRAS, 456, 500, doi: 10.1093/mnras/stv2700 -2127, doi: 10.1111/j.1365-2966.2009.14653.x Sigurdsson, S., & Phinney, E. S. 1993, ApJ, 415, 631, -O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N., doi: 10.1086/173190 +doi: 10.1086/339917 +Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63, +doi: 10.1086/519309 +Sch¨odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A, +609, A27, doi: 10.1051/0004-6361/201730452 +Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603, +doi: 10.1086/156521 +Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985, +MNRAS, 217, 367, doi: 10.1093/mnras/217.2.367 +Shlosman, I., Choi, J.-H., Begelman, M. C., & Nagamine, +K. 2016, MNRAS, 456, 500, doi: 10.1093/mnras/stv2700 +Sigurdsson, S., & Phinney, E. S. 1993, ApJ, 415, 631, +doi: 10.1086/173190 Spera, M., & Mapelli, M. 2017a, MNRAS, 470, 4739, -& O’Shaughnessy, R. 2006, ApJ, 637, 937, doi: 10.1093/mnras/stx1576 -doi: 10.1086/498446 —. 2017b, MNRAS, 470, 4739, doi: 10.1093/mnras/stx1576 -Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek, Spitzer, L. 1987, Dynamical evolution of globular clusters -Stephen R., J. 2016, ApJ, 823, 113, Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv -doi: 10.3847/0004-637X/823/2/113 e-prints. https://arxiv.org/abs/1603.02709 -Peters, P. C., & Mathews, J. 1963a, Physical Review, 131, —. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d -435, doi: 10.1103/PhysRev.131.435 +Stone, N. C., K¨ upper, A. H. W., & Ostriker, J. P. 2017, +MNRAS, 467, 4180, doi: 10.1093/mnras/stx097 +Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859, +doi: 10.1093/mnras/stv2281 The LIGO Scientific Collaboration, the Virgo -—. 1963b, Physical Review, 131, 435, Collaboration, Abbott, R., et al. 2020a, arXiv e-prints, -doi: 10.1103/PhysRev.131.435 arXiv:2009.01075. https://arxiv.org/abs/2009.01075 -Portegies Zwart, S. F., Baumgardt, H., Hut, P., Makino, J., —. 2020b, arXiv e-prints, arXiv:2009.01190. -& McMillan, S. L. W. 2004, Nature, 428, 724, https://arxiv.org/abs/2009.01190 -doi: 10.1038/nature02448 Umbreit, S., Fregeau, J. M., Chatterjee, S., & Rasio, F. A. -Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL, 2012, ApJ, 750, 31, doi: 10.1088/0004-637X/750/1/31 -528, L17, doi: 10.1086/312422 Valiante, R., Schneider, R., Volonteri, M., & Omukai, K. -—. 2002, ApJ, 576, 899, doi: 10.1086/341798 2016, Monthly Notices of the Royal Astronomical -Rashkov, V., & Madau, P. 2014, ApJ, 780, 187, Society, 457, 3356, doi: 10.1093/mnras/stw225 -doi: 10.1088/0004-637X/780/2/187 Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit, -Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640, G. N. 2021, MNRAS, 504, 146, -A56, doi: 10.1051/0004-6361/202037710 doi: 10.1093/mnras/stab842 -IMBH Formation in Galactic Nuclei 11 -Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., & Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. -2014, Monthly Notices of the Royal Astronomical +2016, Monthly Notices of the Royal Astronomical +Society, 457, 3356, doi: 10.1093/mnras/stw225 +Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit, +G. N. 2021, MNRAS, 504, 146, +doi: 10.1093/mnras/stab842 +14 Rose et al. +Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., & Breivik, K. 2021, ApJ, 917, 76, +doi: 10.3847/1538-4357/ac088d +Wang, J., & Merritt, D. 2004, ApJ, 600, 149, +doi: 10.1086/379767 +Woosley, S. E. 2017, ApJ, 836, 244, +doi: 10.3847/1538-4357/836/2/244 +Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965, +doi: 10.1046/j.1365-8711.2002.05532.x +Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129, +doi: 10.1088/0004-637X/761/2/129 +Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. +2014, Monthly Notices of the Royal Astronomical Society, 440, 1263, doi: 10.1093/mnras/stu351 -doi: 10.3847/1538-4357/ac088d Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints, +Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints, arXiv:2011.04653. https://arxiv.org/abs/2011.04653 -Woosley, S. E. 2017, ApJ, 836, 244, Zhu, Z., Li, Z., & Morris, M. R. 2018, ApJS, 235, 26, -doi: 10.3847/1538-4357/836/2/244 doi: 10.3847/1538-4365/aab14f +doi: 10.3847/1538-4365/aab14f diff --git a/read/results/pdfplumber/2201.00029.txt b/read/results/pdfplumber/2201.00029.txt index 2700519..9884fb4 100644 --- a/read/results/pdfplumber/2201.00029.txt +++ b/read/results/pdfplumber/2201.00029.txt @@ -51,8 +51,9 @@ The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon previous studies, this research investigated novel techniques of analyzing variability in white dwarfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on the star, allowing for the validation of results using our methods. KIC 8626021 has an effective -temperature of 29,700 K, log g = 7.890, and mass of 0.56 M (Córsico, 2020). Other research +temperature of 29,700 K, log g = 7.890, and mass of 0.56 M ☉ +(Córsico, 2020). Other research has found that this white dwarf is the DBV with the highest known temperature, and its helium layer is the thinnest (Bischoff-Kim et al., 2015). Despite the long-cadence light curve being too noisy to draw many conclusions, other FTs of short-cadence data have been performed to find @@ -67,10 +68,14 @@ analyzed. All computations were made in Wolfram Mathematica and Microsoft Excel, were performed in Mathematica. The re-binning process consisted of summing adjacent light curve data points in each quarter, therefore doubling the sampling interval from 0.5 hour to one hour, and then repeating this process on the data sample for a total of three times. In addition, a -significant detection was defined as being 3 above the mean of the relative flux, and 0 on the -graphs below represents this 3 cutoff. (Koch, D. G., 2010), (Wolfram Research, Inc., 2021). To -find the SNR, we converted to decibels. Usi𝝈ng these SNRs, we were able to easily identify -improvement in signal strength𝝈. +significant detection was defined as being 3 +𝝈 +above the mean of the relative flux, and 0 on the +graphs below represents this 3 +𝝈 +cutoff. (Koch, D. G., 2010), (Wolfram Research, Inc., 2021). To +find the SNR, we converted to decibels. Using these SNRs, we were able to easily identify +improvement in signal strength. Results Figure 1 presents the lightcurves constructed for quarters seven (Q7) and thirteen (Q13), with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs @@ -95,30 +100,47 @@ binning process. 6 FIG. 3: The graphs show the initial FT of Q13, and then the FTs of the three successive re-bins of the light curve data. The significant frequencies of 5.784 µHz and 5.787 µHz are circled. In -addition, in the third re-bin, the frequencies 11.641 µHz and 16.823 µHz rise above 3 and are +addition, in the third re-bin, the frequencies 11.641 µHz and 16.823 µHz rise above 3 +𝝈 +and are nearly perfect integer multiples of 5.787 µHz. These harmonics are potentially indications of a -starspot (Santos et al., 2017). 𝝈 +starspot (Santos et al., 2017). 7 -Q7 Significant Light Corrected Flux Period (days) Signal-to-Noise -Data Points Variability Magnitude (dB) -Frequency (ppm) +Q7 Significant +Data Points +Light +Variability +Frequency (µHz) -Q7 First 5.886 -1.198 1.966 9.9 +Corrected Flux +Magnitude +(ppm) +Period (days) Signal-to-Noise +(dB) +Q7 First Iteration +5.886 -1.198 1.966 9.9 Q7 Re-bin 1 5.886 -1.477 1.966 12.8 Q7 Re-bin 2 5.889 0.597 1.965 19.2 TABLE I: The table displays the various frequencies collected from Q7 and the information found through calculations to find period and SNR. The frequency of 5.464 µHz is not included, and therefore was not used in any calculations determining the average period of rotation. The -values under corrected flux magnitude are relative to our significant frequency cutoff of 3 , thus +values under corrected flux magnitude are relative to our significant frequency cutoff of 3 𝝈, thus negative numbers are under the cutoff. -𝝈 -Q13 Significant Light Corrected Flux Period (days) Signal-to-Noise -Data Points Variability Magnitude (dB) -Frequency (ppm) +Q13 Significant +Data Points +Light +Variability +Frequency (µHz) -Q13 First 5.784 1.555 2.001 15.6 +Corrected Flux +Magnitude +(ppm) +Period (days) Signal-to-Noise +(dB) +Q13 First Iteration +5.784 1.555 2.001 15.6 Q13 Re-bin 1 5.784 2.873 2.001 17.7 Q13 Re-bin 2 5.787 4.938 2.000 22.6 Q13 Re-bin 3 5.787 6.909 2.000 26.3 @@ -128,8 +150,7 @@ TABLE II: The table displays the various frequencies collected from Q13 and the found through calculations to find period and SNR. The last two significant frequencies (11.641 µHz and 16.823 µHz) for Q13 Re-bin 3 represent potential harmonics, which are discussed in further detail in the Conclusions section of this paper. The values under corrected flux magnitude -are relative to our significant frequency cutoff of 3 , thus negative numbers are under the cutoff. -𝝈 +are relative to our significant frequency cutoff of 3 𝝈, thus negative numbers are under the cutoff. 8 First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) 0.933 0.933 0.215 0.216 @@ -160,9 +181,10 @@ First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µ 16.463 16.894 TABLE III: The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm) -above the cutoff of 3 The minor shifting of significant frequencies between re-bins is a by- -product of the method, and we calculated for such errors when finding our average. +above the cutoff of 3 𝝈. +The minor shifting of significant frequencies between re-bins is a by- +product of the method, and we calculated for such errors when finding our average. 9 First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) 3.094 2.018 2.019 1.951 @@ -185,17 +207,18 @@ First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µ 15.881 16.823 TABLE IV: The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm) -above the cutoff of 3 . The minor shifting of significant frequencies between re-bins is a by- +above the cutoff of 3 𝝈. The minor shifting of significant frequencies between re-bins is a by- product of the method, and we calculated for such errors when finding our average. -𝝈 Conclusions As our research used the long-cadence data from Kepler, much of the high-frequency variability due to gravitational wave pulsations is lost. However, this presents an opportunity to verify our results with the work of research groups that analyzed short-cadence data.With the data analyzed, the lower frequencies between 5-6 µHz emerged. After finding the average of the -periods and accounting for a 1 margin of error, our research hypothesizes that the rotation +periods and accounting for a 1 +𝝈 +margin of error, our research hypothesizes that the rotation period of KIC 8626021 is 1.99 ± 0.02 days. Other short-cadence research has found the rotation -period to be 1.8 ± 0.4 days, by𝝈 analyzing the structures of independent modes (Bischoff-Kim et +period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff-Kim et al., 2015). Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011), and these periods indicate that the more precise significant period identified through our re-binning relates to the rotation of the white dwarf. @@ -207,9 +230,11 @@ processes. The frequency 5.464 µHz rises as another significant frequency; howe that this new frequency is simply an artifact of the re-binning process. In Q13, we saw SNR improvement ranging from 1.1 dB to 1.3 dB. Through the re-binning process, more lines, or significant frequencies, appeared above -the 3 cutoff, particularly at lower frequencies. These findings suggest that as an alternative to +the 3 +𝝈 +cutoff, particularly at lower frequencies. These findings suggest that as an alternative to short-cadence analysis, the re-binning process of long-cadence data can be used to identify -signif𝝈icant lower frequencies in white dwarfs. The methods we used are also simple and +significant lower frequencies in white dwarfs. The methods we used are also simple and replicable, which allows even those with less experience to quickly analyze the large amounts of data being collected by orbiting telescopes, such as the currently active TESS (Transiting Exoplanet Survey Satellite) telescope. diff --git a/read/results/pdfplumber/2201.00037.txt b/read/results/pdfplumber/2201.00037.txt index 99ce833..35b3bae 100644 --- a/read/results/pdfplumber/2201.00037.txt +++ b/read/results/pdfplumber/2201.00037.txt @@ -3,19 +3,55 @@ The influence of a fluid core and a solid inner core on the Cassini sate of Mercury Mathieu Dumberry 1 1DepartmentofPhysics,UniversityofAlberta,Edmonton,Alberta,Canada. -1202 Key Points: • The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid planet by no more than 0.01 arcmin. -ceD • For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid cores into a common precession motion. -• The larger the inner core is, the more the obliquity of the polar moment of inertia ap- 13 +• The larger the inner core is, the more the obliquity of the polar moment of inertia ap- proaches that expected for a rigid planet. -]PE.hp-ortsa[ -1v73000.1022:viXra Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca –1– +a +r +X +i +v +: +2 +2 +0 +1 +. +0 +0 +0 +3 +7 +v +1 +[ +a +s +t +r +o +- +p +h +. +E +P +] +3 +1 +D +e +c +2 +0 +2 +1 Confidential manuscript submitted to JGR-Planets Abstract We present a model of the Cassini state of Mercury that comprises an inner core, a fluid core @@ -54,8 +90,7 @@ its present-day orientation can be reconstructed from ephemerides data [Yseboodt 2006; Baland et al., 2017]. Likewise, the rate of precession is also not observed directly, but is reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513 yr with an inclination angle of I =8.5330◦ between the orbit and Laplace plane normals [Ba- -land et al., 2017]. Measurements of the obliquity ε , defined as the angle of misalignment be- -m +land et al., 2017]. Measurements of the obliquity ε m, defined as the angle of misalignment be- tween the spin-symmetry axis and the orbit normal, have been obtained by different techniques, including ground based radar observations [Margot et al., 2007, 2012], and stereo digital ter- rain images [Stark et al., 2015a] and radio tracking data [Mazarico et al., 2014; Verma and Mar- @@ -65,37 +100,51 @@ all techniques yield an obliquity which is coplanar with the orbit and Laplace p and consistent with a Cassini state. Furthermore, the observed obliquity angle (2.042±0.08 –2– Confidential manuscript submitted to JGR-Planets -êL êI -3 3 -I êp -ε -m 3 êL -Ω 3 +I +descending +node of orbit +Ω p -êI -3 -ple aq nu eatorial I a ns oc de en od fi n og -rbit +ê 3I +I +ê 3L +ε +m +I ê 3p +ascending +node of orbit +descending +node of equator +equatorial +plane +orbital +direction +S +ê 3I ê 3L M +ε +m orbital -o dr irb ei cta til plane -on -ε -descending m -node of equator S -I -descending -node of orbit Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded -rectangle) and the Cassini state of Mercury. The normal to the orbital plane (eˆI) is offset from the nor- +rectangle) and the Cassini state of Mercury. The normal to the orbital plane (ˆ eI 3) is offset from the nor- +mal to the Laplace plane (ˆ eL 3) by an angle I = 8.5330◦. The symmetry axis of the mantle ˆ ep +3 +is offset +from ˆ eI +3 +by ε +m +≈ 2 arcmin. ˆ eI 3 -mal to the Laplace plane (eˆL) by an angle I = 8.5330◦. The symmetry axis of the mantle eˆp is offset -3 3 -from eˆI by ε ≈ 2 arcmin. eˆI and eˆp are coplanar with, and precess about, eˆL in a retrograde direction -3 m 3 3 3 -at frequency Ω = 2π/325,513 yr−1. The blue (orange) shaded region indicates the portion of the orbit +and ˆ ep +3 +are coplanar with, and precess about, ˆ eL +3 +in a retrograde direction +at frequency Ω p += 2π/325,513 yr−1. The blue (orange) shaded region indicates the portion of the orbit when Mercury is above (below) the Laplace plane. Angles are not drawn to scale. arcmin [Margot et al., 2012], 2.029±0.085 arcmin [Stark et al., 2015a] and 1.968±0.027 [Gen- ova et al., 2019] to list a few) matches that expected if Mercury occupies Cassini state 1. @@ -115,15 +164,15 @@ approximate limit of 800 km on the inner core radius [Grott et al., 2011]. Howev core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history. –3– Confidential manuscript submitted to JGR-Planets -With a fluid core, and possibly a solid inner core, the observed obliquity ε reflects the +With a fluid core, and possibly a solid inner core, the observed obliquity ε m +reflects the orientation of the spin-symmetry axis of the precessing mantle and crust alone. Neglecting dis- sipation, and at equilibrium in the Cassini state, the spin axis of the fluid core and the spin- symmetry axis of the inner core should both also precess about the normal to the Laplace plane in a retrograde direction with a period of 325,513 yr. Both of these axes should also lie in the plane that defines the equilibrium Cassini state [e.g. Dumberry and Wieczorek, 2016], although -their obliquity angles may be different than ε . Whether the spin axis of the fluid core is brought -m +their obliquity angles may be different than ε m. Whether the spin axis of the fluid core is brought into an alignment with the mantle obliquity depends primarily on the pressure torque (also re- ferred to as the inertial torque) exerted by the centrifugal force of the rotating fluid core on the misaligned elliptical shape of the core-mantle boundary (CMB) [Poincar´e, 1910]. The more flat- @@ -139,8 +188,7 @@ thermore, viscous and electromagnetic (EM) coupling at the CMB can further restr alignment between the mantle and core [Peale et al., 2014]. If an inner core is present, its obliquity angle is determined by the sum of the torques act- ing on it. This includes the gravitational torque from the Sun acting on its tilted figure, anal- -ogous to the torque applied on the tilted mantle that sets the obliquity ε . In addition, the -m +ogous to the torque applied on the tilted mantle that sets the obliquity ε m. In addition, the tilt of the inner core also depends on the gravitational torque imposed by the mantle and the pressure torque at the inner core boundary (ICB) imposed by the fluid core. If the mantle grav- itational torque dominates, the inner core tilt is expected to remain closely aligned with the @@ -181,29 +229,33 @@ symmetry axis of the mantle and gravity field may differ. 2.1 The interior structure of Mercury Our model of Mercury consists of four layers of uniform density: a solid inner core, a fluid outer core, a solid mantle, and a thin crust. The outer radii of each of these layers, are denoted -by r , r , r , and R, and their densities by ρ , ρ , ρ , and ρ , respectively. The inner core ra- -s f m s f m c -dius r corresponds to the ICB radius, the fluid core radius r to the CMB radius, and R= -s f +by r s, r f, r m, and R, and their densities by ρ s, ρ f, ρ m, and ρ c, respectively. The inner core ra- +dius r +s +corresponds to the ICB radius, the fluid core radius r +f +to the CMB radius, and R= 2439.36 km to the planetary radius of Mercury. Compressibility effects from increasing pres- sure with depth are not negligible in the core of Mercury. However adopting uniform densities simplifies the analytical expressions of the model while still capturing the first order rotational dynamics. Uniform densities were also adopted by Peale et al. [2016] and following the same strategy facilitates comparisons between our results. -We build our interior model as detailed in Peale et al. [2016]. We first specify r , ρ (or -s s -a density contrast at the ICB), the crustal density ρ and crustal thickness h=R−r . The -c m -three unknowns r , ρ and ρ are then solved such that the interior model is consistent with -f f m -the known mass M and chosen values of the moments of inertia of the whole planet C and that -of the mantle and crust C . +We build our interior model as detailed in Peale et al. [2016]. We first specify r s, ρ +s +(or +a density contrast at the ICB), the crustal density ρ +c +and crustal thickness h=R−r m. The +three unknowns r f, ρ +f +and ρ m +are then solved such that the interior model is consistent with +the known mass M and chosen values of the moments of inertia of the whole planet C and that +of the mantle and crust C m. Each layer is triaxial in shape. We denote the polar flattening (or geometrical ellipticity) -by (cid:15) , defined as the difference between the mean equatorial and polar radii, divided by the mean -i -spherical radius. Likewise, we denote the equatorial flattening by the variable ξ , defined as the -i +by (cid:15) i, defined as the difference between the mean equatorial and polar radii, divided by the mean +spherical radius. Likewise, we denote the equatorial flattening by the variable ξ i, defined as the difference between the maximum and minimum equatorial radii, divided by the mean spher- ical radius. As above, we use the subscript i = s, f, m and r, to denote the polar or equa- torial flattenings at the ICB, CMB, crust-mantle boundary (CrMB), and surface. @@ -211,119 +263,227 @@ The measured polar and equatorial flattenings are taken from Perry et al. [2015] numerical values are given in Table 1. We then assume that the ICB and CMB are both at hy- drostatic equilibrium with the imposed gravitational potential induced by the flattenings at the CrMB and surface. The flattenings at all interior boundaries are specified such that they are -consistent with the observed degree 2 spherical harmonic coefficients of gravity J and C ; their -2 22 -numerical values are given in Table 1. Specifically, J and C are connected to the principal -2 22 +consistent with the observed degree 2 spherical harmonic coefficients of gravity J +2 +and C 22; their +numerical values are given in Table 1. Specifically, J +2 +and C +22 +are connected to the principal moments of inertia of Mercury (C >B >A) and to the polar and equatorial flattenings by -J = C−A¯ = 8π 1 (cid:2) (ρ −ρ )r5(cid:15) +(ρ −ρ )r5(cid:15) +(ρ −ρ )r5 (cid:15) +ρ R5(cid:15) (cid:3) , (1a) -2 MR2 15MR2 s f s s f m f f m c m m c r -C = B−A = 8π 1 (cid:2) (ρ −ρ )r5ξ +(ρ −ρ )r5ξ +(ρ −ρ )r5 ξ +ρ R5ξ (cid:3) . (1b) -22 4MR2 154MR2 s f s s f m f f m c m m c r -where A¯ is the mean equatorial moment of inertia defined below. The same procedure was used +J +2 += C− ¯ A +MR2 += 8π +15 +1 +MR2 +(cid:2) (ρ s−ρ f)r5 s(cid:15) s+(ρ +f +−ρ m)r5 f(cid:15) +f ++(ρ m−ρ c)r5 m(cid:15) m+ρ cR5(cid:15) r(cid:3) , (1a) +C +22 += B−A +4MR2 += 8π +15 +1 +4MR2 +(cid:2) (ρ s−ρ f)r5 sξ s+(ρ +f +−ρ m)r5 fξ +f ++(ρ m−ρ c)r5 mξ m+ρ cR5ξ r(cid:3) . (1b) +where ¯ A is the mean equatorial moment of inertia defined below. The same procedure was used in Peale et al. [2016] and the mathematical details are given in Equations (18-20) of Dumberry –5– Confidential manuscript submitted to JGR-Planets Mercury Parameter Numerical value Reference mean motion, n 2π/87.96935 day−1 Stark et al. [2015b] -rotation rate, Ω =1.5n 2π/58.64623 day−1 Stark et al. [2015b] +rotation rate, Ω o -orbit precession rate, Ω 2π/325,513 yr−1 Baland et al. [2017] +=1.5n 2π/58.64623 day−1 Stark et al. [2015b] +orbit precession rate, Ω p -Poincar´e number, δω =Ω /Ω 4.9327×10−7 -p o -orbital eccentricity, e 0.20563 Baland et al. [2017] +2π/325,513 yr−1 Baland et al. [2017] +Poincar´e number, δω =Ω p/Ω +o +4.9327×10−7 +orbital eccentricity, e c +0.20563 Baland et al. [2017] orbital inclination, I 8.5330◦ Baland et al. [2017] mean planetary radius, R 2439.360 km Perry et al. [2015] mass, M 3.3012×1023 kg Genova et al. [2019] -mean density, ρ¯ 5429.5 kg m−3 -J 5.0291×10−5 Genova et al. [2019] +mean density, ¯ ρ 5429.5 kg m−3 +J 2 -C 8.0415×10−6 Genova et al. [2019] +5.0291×10−5 Genova et al. [2019] +C 22 -polar surface flattening, (cid:15) 6.7436×10−4 Perry et al. [2015] +8.0415×10−6 Genova et al. [2019] +polar surface flattening, (cid:15) r -equatorial surface flattening, ξ 5.1243×10−4 Perry et al. [2015] +6.7436×10−4 Perry et al. [2015] +equatorial surface flattening, ξ r +5.1243×10−4 Perry et al. [2015] Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031.8636×109 -m3/s2 taken from Genova et al. [2019]. The mean density is calculated from 4πρ¯R3 =M. The numerical +m3/s2 taken from Genova et al. [2019]. The mean density is calculated from 4π 3 -values of (cid:15) and ξ are calculated from (cid:15) =(a¯−c)/R and ξ =(a−b)/R, where a¯= 1(a+b) and where -r r r r 2 +¯ ρR3 =M. The numerical +values of (cid:15) +r +and ξ +r +are calculated from (cid:15) +r +=(¯ a−c)/R and ξ +r +=(a−b)/R, where ¯ a= 1 2(a+b) and where a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semimajor, intermediate and semiminor -axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015]. J and C are -2 22 +axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015]. J +2 +and C +22 +are computed from Equation (4) in the Supporting Information of Genova et al. [2019]. and Wieczorek [2016] who adopted the same strategy in their interior modelling of the Moon. Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topog- raphy and the axes of the principal moments of inertia, which amount to a polar offset of ∼2◦ and an equatorial offset of ∼15◦ [Perry et al., 2015]. Once the densities and flattenings of all interior regions are known, we can specify the mo- -ments of inertia of the fluid core (C > B > A ) and solid inner core (C > B > A ) -f f f s s s +ments of inertia of the fluid core (C +f +> B +f +> A f) and solid inner core (C +s +> B +s +> A s) along with the mean equatorial moments of inertia -1 1 1 -A¯= (A+B), A¯ = (A +B ), A¯ = (A +B ). (2) -2 f 2 f f s 2 s s -From these, we define the polar (e, e , e ) and equatorial (γ, γ ) dynamical ellipticities of the -f s s +¯ A= +1 +2(A+B), ¯ A +f += +1 +2(A +f ++B f), ¯ A +s += +1 +2(A s+B s). (2) +From these, we define the polar (e, e f, e s) and equatorial (γ, γ s) dynamical ellipticities of the whole planet (no subscript), fluid core (subscript f) and solid inner core (subscript s), which enter our rotational model, -C−A¯ C −A¯ C −A¯ -e= e = f f e = s s , (3a) -A¯ f A¯ s A¯ -f s -B−A B −A -γ = γ = s s . (3b) -A¯ s A¯ -s -We further note that e and γ are connected to J and C by -2 22 -MR2 4MR2 -e= J , γ = C . (4) -A¯ 2 A¯ 22 +e= +C− ¯ A +¯ A +e +f += +C +f +− ¯ A +f +¯ A +f +e +s += +C s− ¯ A +s +¯ A +s +, (3a) +γ = +B−A +¯ A +γ +s += +B s−A +s +¯ A +s +. (3b) +We further note that e and γ are connected to J +2 +and C +22 +by +e= +MR2 +¯ A +J 2, γ = +4MR2 +¯ A +C 22. (4) –6– Confidential manuscript submitted to JGR-Planets -êp -a) 3 b) -êI Ω -3 ê 3s ê 3p θ -ε m Ω -êL m θ n s -3 I ê 3I Cassini -θ s Ω ê 3L plane +m +θ n +θ s θ f +Ω +Ω +s +Ω +f +ê 3p +ê 3s +ê 3I +I +ε +m +θ p -θ f I ε -m êp -2 +ê 3L +ê 1p +ê 2p +Cassini +plane +ωΩ ot +ê 3I +I ε +m +ê 3p ê 1 -ωΩt -o -êp -1 êp -2 +ê 2p +ê 3L +a) b) Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b) -in a frame attached to the rotating mantle. The orbit normal (eˆI) is tilted by an angle I = 8.533◦ from -3 -the Laplace normal (eˆL) and the symmetry axis of Mercury’s mantle (eˆp) is tilted by an obliquity ε -3 3 m -with respect to eˆI. Shown in (a) are the orientations of the symmetry axis of the inner core (eˆs), the -3 3 -rotation rate vectors of the mantle (Ω), fluid core (Ω ) and inner core (Ω ) and angles θ , θ , θ , θ -f f p n m f -and θ in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer +in a frame attached to the rotating mantle. The orbit normal (ˆ eI 3) is tilted by an angle I = 8.533◦ from +the Laplace normal (ˆ eL 3) and the symmetry axis of Mercury’s mantle (ˆ ep 3) is tilted by an obliquity ε +m +with respect to ˆ eI 3. Shown in (a) are the orientations of the symmetry axis of the inner core (ˆ es 3), the +rotation rate vectors of the mantle (Ω), fluid core (Ω f) and inner core (Ω f) and angles θ p, θ n, θ m, θ +f +and θ s +in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer to as the Cassini plane. The light grey, white, and dark grey ellipsoid represent a polar cross-section of the mantle, fluid core and inner core, respectively; blue shaded parts show an equatorial cross section. The black curved arrow in the equatorial plane in (a) indicates the direction of rotation of the equatorial -mantle axes eˆp and eˆp with respect to the Cassini plane. Viewed in the frame attached to the rotating -1 2 -mantle (b), the Cassini plane is rotating at frequency ωΩ = −Ω − Ω cosI in the longitudinal direc- -o o p +mantle axes ˆ ep +1 +and ˆ ep +2 +with respect to the Cassini plane. Viewed in the frame attached to the rotating +mantle (b), the Cassini plane is rotating at frequency ωΩ +o += −Ω +o +− Ω pcosI in the longitudinal direc- tion. The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of illustration. –7– @@ -332,81 +492,120 @@ Confidential manuscript submitted to JGR-Planets Mercury’s rotation is characterized by a 3:2 spin-orbit resonance in which it completes 3 rotations around itself for every 2 orbital revolutions around the Sun. The orbital period is 87.96935 day and the sidereal rotation period is 58.64623 day [Stark et al., 2015b]. These de- -fine the mean motion n = 2π/87.96935 day−1 and the sidereal frequency Ω = 2π/58.64623 +fine the mean motion n = 2π/87.96935 day−1 and the sidereal frequency Ω o -day−1, with Ω =1.5n. Mercury’s rotational state is also characterized by a Cassini state whereby += 2π/58.64623 +day−1, with Ω o -the orientations of the orbit normal (eˆI) and of the mantle symmetry axis (eˆp) are both copla- -3 3 -nar with, and precess about, the normal to the Laplace plane (eˆL). The orientation of the Laplace -3 +=1.5n. Mercury’s rotational state is also characterized by a Cassini state whereby +the orientations of the orbit normal (ˆ eI 3) and of the mantle symmetry axis (ˆ ep 3) are both copla- +nar with, and precess about, the normal to the Laplace plane (ˆ eL 3). The orientation of the Laplace plane varies on long timescales, but it can be taken as invariable in inertial space for our present -purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between eˆL and eˆI -3 3 -is the orbital inclination I =8.5330◦ [Baland et al., 2017], the angle between eˆI and eˆp is the -3 3 -obliquity ε and the angle between eˆL and eˆp is θ = I +ε . The precession of eˆI and eˆp -m 3 3 p m 3 3 -about the Laplace pole is retrograde with frequency Ω =2π/325,513 yr−1 [Baland et al., 2017]. +purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between ˆ eL +3 +and ˆ eI +3 +is the orbital inclination I =8.5330◦ [Baland et al., 2017], the angle between ˆ eI +3 +and ˆ ep +3 +is the +obliquity ε +m +and the angle between ˆ eL +3 +and ˆ ep +3 +is θ +p += I +ε m. The precession of ˆ eI +3 +and ˆ ep +3 +about the Laplace pole is retrograde with frequency Ω p +=2π/325,513 yr−1 [Baland et al., 2017]. The mantle and crust are welded together and form a single rotating region which we re- fer to as the ‘mantle’ in the context of our rotational model. The rotation and symmetry axes of the mantle are expected to remain in close alignment, but they do not coincide exactly. We -define the rotation rate vector of the mantle by Ω, and its misalignment from eˆp by an angle +define the rotation rate vector of the mantle by Ω, and its misalignment from ˆ ep 3 -θ . Note that θ (cid:28) ε and it is often the spin axis of Mercury which is used to define the -m m m -obliquity ε [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, eˆp and Ω would -m 3 +by an angle +θ m. Note that θ +m +(cid:28) ε +m +and it is often the spin axis of Mercury which is used to define the +obliquity ε +m +[e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, ˆ ep +3 +and Ω would characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and -the angles I, ε and θ would completely describe the Cassini state. The presence of a fluid -m m +the angles I, ε +m +and θ +m +would completely describe the Cassini state. The presence of a fluid outer core and solid inner core require three additional orientation vectors and angles. The sym- -metry axis of the inner core is defined by unit vector eˆs and its misalignment from eˆp by an -3 3 -angle θ . The rotation vectors of the fluid core and inner core are defined as Ω and Ω , re- -n f s +metry axis of the inner core is defined by unit vector ˆ es +3 +and its misalignment from ˆ ep +3 +by an +angle θ n. The rotation vectors of the fluid core and inner core are defined as Ω +f +and Ω s, re- spectively, and their misalignment from the rotation vector of the mantle Ω are defined by an- -gles θ and θ (see Figure 2a). The rotation and symmetry axes of the inner core remain in close -f s -alignment, so θ ≈θ . To be formal in our definition of the different angles of misalignment, -n s +gles θ +f +and θ +s +(see Figure 2a). The rotation and symmetry axes of the inner core remain in close +alignment, so θ +n +≈θ s. To be formal in our definition of the different angles of misalignment, for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise direction. -At equilibrium in the Cassini state, the three orientation vectors (eˆI, eˆp, eˆs) and three -3 3 3 -rotation vectors (Ω, Ω , Ω ) are forced to precess about eˆL at the same frequency. If we ne- -f s 3 +At equilibrium in the Cassini state, the three orientation vectors (ˆ eI 3, ˆ ep 3, ˆ es 3) and three +rotation vectors (Ω, Ω f, Ω s) are forced to precess about ˆ eL +3 +at the same frequency. If we ne- glect dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed -in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ω . Viewed -p -in the frame attached to the mantle rotating at sidereal frequency Ω , the Cassini plane is ro- -o -tating in a retrograde direction at frequency ωΩ (see Figure 2b), where ω, expressed in cycles +in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ω p. Viewed +in the frame attached to the mantle rotating at sidereal frequency Ω o, the Cassini plane is ro- +tating in a retrograde direction at frequency ωΩ o +(see Figure 2b), where ω, expressed in cycles per Mercury day, is equal to -ω =−1−δωcos(θ ). (5) -p -The factor δω = Ω /Ω = 4.933×10−7 is the Poincar´e number, expressing the ratio of the -p o +ω =−1−δωcos(θ p). (5) +The factor δω = Ω p/Ω +o += 4.933×10−7 is the Poincar´e number, expressing the ratio of the forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal as seen in the mantle frame is expressed as d -eˆL+Ω×eˆL =0, (6) -dt 3 3 +dtˆ eL +3 ++Ω׈ eL +3 +=0, (6) or equivalently, by Equation (19e) of Stys and Dumberry [2018], -ωsin(θ )+sin(θ +θ )=0. (7) -p m p +ωsin(θ p)+sin(θ m+θ p)=0. (7) –8– Confidential manuscript submitted to JGR-Planets -This expresses a formal connection between θ and θ which is independent of the interior struc- -p m -ture of Mercury. Using Equation (5) and cos(θ )→1, this connection can be rewritten as -m -sin(θ )=δω sin(θ ). (8) -m p -and thus the relative amplitudes of θ and θ depend of the Poincar´e number δω. -m p +This expresses a formal connection between θ +p +and θ +m +which is independent of the interior struc- +ture of Mercury. Using Equation (5) and cos(θ m)→1, this connection can be rewritten as +sin(θ m)=δω sin(θ p). (8) +and thus the relative amplitudes of θ +m +and θ +p +depend of the Poincar´e number δω. To investigate Mercury’s response to the gravitational torque from the Sun, we take ad- vantage of the framework developed in Mathews et al. [1991] to model the forced nutations of Earth [see also Mathews et al., 2002; Dehant and Mathews, 2015]. This model takes into ac- @@ -423,49 +622,61 @@ ods of Mercury, the gravitational solar torque that is relevant to the Cassini s torque averaged over one orbit. This mean torque is perpendicular to the Cassini plane, point- ing in the same direction as the vector connecting the Sun to the descending node of Mercury’s orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque -is periodic, rotating at frequency ωΩ . Setting the equatorial directions eˆp and eˆp to correspond -o 1 2 +is periodic, rotating at frequency ωΩ o. Setting the equatorial directions ˆ ep +1 +and ˆ ep +2 +to correspond to the real and imaginary axes of the complex plane, respectively, we can write the equatorial components of this periodic applied torque in a compact form as -Γ (t)+iΓ (t)=−iΓ˜(ω) exp[iωΩ t], (9) -1 2 o -where Γ˜(ω) represents the amplitude of the torque at frequency ωΩ . In response to this torque, -o -the axes defining all angles (θ , ε , θ , θ , θ , θ ) as viewed in the mantle frame are also ro- -p m m f s n -tating at frequency ωΩ (see Figure 2). The longitudinal direction of each of these angles at +Γ 1(t)+iΓ 2(t)=−i˜ Γ(ω) exp[iωΩ ot], (9) +where ˜ Γ(ω) represents the amplitude of the torque at frequency ωΩ o. In response to this torque, +the axes defining all angles (θ p, ε m, θ m, θ f, θ s, θ n) as viewed in the mantle frame are also ro- +tating at frequency ωΩ o +(see Figure 2). The longitudinal direction of each of these angles at a specific time t can then also be written in the equatorial complex plane and is proportional -to exp[iωΩ t]. For instance, the two equatorial time-dependent components θ and θ of the -o m1 m2 -angle θ , as seen in the mantle frame, can be written as -m -θ (t)+iθ (t)=m˜ exp[iωΩ t], (10a) -m1 m2 o +to exp[iωΩ ot]. For instance, the two equatorial time-dependent components θ +m1 +and θ +m2 +of the +angle θ m, as seen in the mantle frame, can be written as +θ m1(t)+iθ m2(t)= ˜ m exp[iωΩ ot], (10a) where -m˜ ≡m˜(ω)=Re[m˜]+iIm[m˜], (10b) -is the amplitude at frequency ωΩ . Equivalent definitions apply for all other angles, with the -o +˜ m≡ ˜ m(ω)=Re[˜ m]+iIm[˜ m], (10b) +is the amplitude at frequency ωΩ o. Equivalent definitions apply for all other angles, with the connection as follows: -θ ⇔m˜ , θ ⇔m˜ , θ ⇔m˜ , θ ⇔n˜ , θ ⇔p˜, ε ⇔ε˜ . (11) -m f f s s n s p m m -The notation m˜, m˜ , m˜ , n˜ follows that introduced in the original model of Mathews et al. [1991]. -f s s +θ +m +⇔ ˜ m, θ +f +⇔ ˜ m f, θ +s +⇔ ˜ m s, θ +n +⇔ ˜ n s, θ +p +⇔ ˜ p, ε +m +⇔ ˜ ε m. (11) +The notation ˜ m, ˜ m f, ˜ m s, ˜ n +s +follows that introduced in the original model of Mathews et al. [1991]. Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase re- sponse to the applied torque as a result of dissipation, for instance from viscous or EM coupling –9– Confidential manuscript submitted to JGR-Planets at the boundaries of the fluid core. In the absence of dissipation, all tilded variables are purely real. We concentrate our analysis in this work on the real part of the solutions, which corre- -sponds to the mutual alignment of these five rotation angles in the Cassini plane. As such, ε˜ -m -corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to ε , +sponds to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜ ε m +corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to ε m, though we keep the tilde notation in the presentation of our results to emphasize that it rep- -resents the real part of the solution from our system. Furthermore, since m˜ (cid:28) ε˜ , we often -m -refer to ε˜ as the orientation of spin axis of the mantle, since the Cassini state of Mercury is +resents the real part of the solution from our system. Furthermore, since ˜ m (cid:28) ˜ ε m, we often +refer to ˜ ε m +as the orientation of spin axis of the mantle, since the Cassini state of Mercury is more customarily described in terms of the latter in the literature. The model of Mathews et al. [1991] is developed under the assumption of small angles as appropriate for the nutations on Earth. The details on how the equations of the model are de- @@ -473,119 +684,282 @@ rived can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. T tions describe, respectively, the time rate of change of the angular momenta of the whole of Mer- cury, the fluid core, and the inner core in the reference frame of the rotating mantle. These three equations are -(cid:34) (cid:35) -A¯ A¯ A¯ 1 (cid:16) (cid:17) -(ω−e)m˜ +(1+ω) fm˜ + sm˜ +α e sn˜ = Γ˜ , (12a) -A¯ f A¯ s 3 s A¯ s iΩ2A¯ sun +(ω−e)˜ +m+(1+ω)(cid:34) +¯ A +f +¯ A +˜ m +f ++ +¯ A +s +¯ A +˜ m s+α 3e +s +¯ A +s +¯ A +˜ n +s(cid:35) += +1 +iΩ2 +o +¯ +A(cid:16) +˜ Γ +sun(cid:17) +, (12a) +ω˜ m+(1+ω+e f) ˜ m +f +−ωα 1e +s +¯ A +s +¯ A +f˜ n +s += +1 +iΩ2 +o +¯ A +f(cid:16) +−˜ Γ cmb−˜ Γ +icb(cid:17) +, (12b) +(ω−α 3e s)˜ m+α 1e s˜ m +f ++(1+ω) ˜ m s+(1+ω−α 2)e s˜ n +s += +1 +iΩ2 o -A¯ 1 (cid:16) (cid:17) -ωm˜ +(1+ω+e )m˜ −ωα e sn˜ = −Γ˜ −Γ˜ , (12b) -f f 1 s A¯ s iΩ2A¯ cmb icb -f o f -1 (cid:16) (cid:17) -(ω−α e )m˜ +α e m˜ +(1+ω)m˜ +(1+ω−α )e n˜ = Γ˜s +Γ˜ , (12c) -3 s 1 s f s 2 s s iΩ2A¯ sun icb -o s +¯ A +s(cid:16) +˜ Γs sun+˜ Γ +icb(cid:17) +, (12c) and a fourth equation consists of a kinematic relation that expresses the change in the orien- tation of the inner core figure as a result of its own rotation, -m˜ +ωn˜ =0. (12d) -s s -In these equations, the parameters α , α and α involve the density contrast at the ICB -1 2 3 +˜ m s+ω˜ n +s +=0. (12d) +In these equations, the parameters α 1, α +2 +and α +3 +involve the density contrast at the ICB and are given by +α +1 += +ρ +f ρ -α = f , α =1−α , α =α −α α , (13a) -1 ρ 3 1 2 1 3 g s -where the parameter α is a measure of the ratio of the gravitational to inertial torque applied +, α +3 +=1−α 1, α +2 +=α 1−α 3α g, (13a) +where the parameter α g +is a measure of the ratio of the gravitational to inertial torque applied on the inner core, +α +g += 8πG -α = [ρ ((cid:15) −(cid:15) )+ρ ((cid:15) −(cid:15) )+ρ (cid:15) ] , (13b) -g 5Ω2 c r m m m f f f +5Ω2 o +[ρ c((cid:15) r−(cid:15) m)+ρ m((cid:15) m−(cid:15) f)+ρ f(cid:15) f] , (13b) where G is the gravitational constant. -Γ˜ is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For +˜ Γ sun -a small mantle obliquity ε˜ and a small inner core tilt n˜ , it is given by -m s -(cid:18) A¯ (cid:19) -Γ˜ =−iΩ2A¯ φ ε˜ + sα φ n˜ , (14) -sun o m m A¯ 3 s s +is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For +a small mantle obliquity ˜ ε +m +and a small inner core tilt ˜ n s, it is given by +˜ Γ +sun +=−iΩ2 +o +¯ +A(cid:18) +φ m˜ ε m+ +¯ A +s +¯ A +α 3φ s˜ n +s(cid:19) +, (14) where –10– Confidential manuscript submitted to JGR-Planets -3n2 (cid:20) 1 (cid:21) -φ = G e+ G γ , (15a) -m 2Ω2 210 2 201 +φ +m += +3 2n2 +Ω2 o -3n2 (cid:20) 1 (cid:21) -φ = G e + G γ , (15b) -s 2Ω2 210 s 2 201 s +(cid:20) +G 210e+ +1 +2G +201γ(cid:21) +, (15a) +φ +s += +3 2n2 +Ω2 o -and where G and G are functions of the orbital eccentricity e , -210 201 c +(cid:20) +G 210e s+ +1 +2G 201γ +s(cid:21) +, (15b) +and where G +210 +and G +201 +are functions of the orbital eccentricity e c, +G +210 += 1 -G = , (16a) -210 (1−e2)3/2 +(1−e2 c)3/2 +, (16a) +G +201 += +7 +2e c− +123 +16 +e3 c -7 123 489 -G = e − e3+ e5. (16b) -201 2 c 16 c 128 c -The gravitational torque by the Sun acting on the inner core alone, Γ˜s , is ++ +489 +128e5 c. (16b) +The gravitational torque by the Sun acting on the inner core alone, ˜ Γs sun, is +˜ Γs sun -Γ˜s =−iΩ2A¯ α φ (ε˜ +n˜ ). (17) -sun o s 3 s m s -Γ˜ and Γ˜ are the torques from tangential stresses by the fluid core on the mantle at the -cmb icb +=−iΩ2 +o +¯ A sα 3φ s(˜ ε m+˜ n s). (17) +˜ Γ +cmb +and ˜ Γ +icb +are the torques from tangential stresses by the fluid core on the mantle at the CMB and on the inner core at the ICB, respectively. These torques can be parameterized in -terms of dimensionless complex coupling constants K and K and the differential angu- -icb cmb +terms of dimensionless complex coupling constants K +icb +and K +cmb +and the differential angu- lar velocities at each boundary [e.g Buffett, 1992; Buffett et al., 2002], -Γ˜ =iΩ2A¯ K (m˜ −m˜ ), (18a) -icb o s icb f s -Γ˜ =iΩ2A¯ K m˜ . (18b) -cmb o f cmb f -Specific expressions for K and K are delayed to sections 4 and 5 when we consider the -icb cmb +˜ Γ +icb +=iΩ2 +o +¯ A sK icb(˜ m +f +− ˜ m s), (18a) +˜ Γ +cmb +=iΩ2 +o +¯ A fK +cmb +˜ m f. (18b) +Specific expressions for K +icb +and K +cmb +are delayed to sections 4 and 5 when we consider the effects of viscous and EM coupling, respectively. A fifth equation is required to connect this interior model to the obliquity of the mantle, -and this is provided by Equation (7). For small angles θ and θ , this gives [e.g. Mathews et al., -m p +and this is provided by Equation (7). For small angles θ +m +and θ p, this gives [e.g. Mathews et al., 1991; Dumberry and Wieczorek, 2016; Baland et al., 2019] -m˜ +(1+ω)p˜=0. (19) -For Mercury, it is more convenient to connect the internal model with ε˜ instead of p˜. This +˜ m+(1+ω)˜ p=0. (19) +For Mercury, it is more convenient to connect the internal model with ˜ ε +m +instead of ˜ p. This +is because θ +p +≈ 8.567◦ whereas ˜ ε m -is because θ ≈ 8.567◦ whereas ε˜ ≈ 2 arcmin and thus the latter obeys more strictly the -p m +≈ 2 arcmin and thus the latter obeys more strictly the condition of small angles assumed in our framework. Furthermore, the external torques act- -ing on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ε˜ . Writ- +ing on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜ ε m. Writ- +ten in terms of ˜ ε m, and with the approximation of ˜ ε m -ten in terms of ε˜ , and with the approximation of ε˜ (cid:28)1 and m˜ (cid:28)1, Equation (7) becomes -m m -m˜ +(1+ω)ε˜ =−(1+ω)tanI. (20) +(cid:28)1 and ˜ m(cid:28)1, Equation (7) becomes +˜ m+(1+ω)˜ ε m +=−(1+ω)tanI. (20) Likewise, the frequency ω from Equation (5) can be written simply in terms of I, ω =−1−δωcosI. (21) The set of four Equations (12) with the addition of Equation (20) form a linear system -of equations for the five rotational variables m˜, m˜ , m˜ , n˜ and ε˜ . It captures the response -f s s m +of equations for the five rotational variables ˜ m, ˜ m f, ˜ m s, ˜ n +s +and ˜ ε m. It captures the response of Mercury, in the frequency domain, when subject to a periodic solar torque applied at fre- quency ω. The system can be written in a matrix form as –11– Confidential manuscript submitted to JGR-Planets M·x =y, (22a) where the solution (x) and forcing (y) vectors are -xT =[m˜,m˜ ,m˜ ,n˜ ,ε˜ ] , (22b) -f s s m +xT =[˜ m, ˜ m f, ˜ m s,˜ n s,˜ ε m] , (22b) yT =[0,0,0,0,−(1+ω)tanI] , (22c) and the elements of matrix M are - ω−e (1+ω)A¯ A¯f (1+ω)A A¯ ¯s A A¯ ¯sα 3(cid:0) (1+ω)e s+φ s(cid:1) φ m  -M=  ω−ω 1+ω+e αf 1e+ sK −c Kmb ic+ AA ¯¯ fsK icb 1+− AA ω¯¯ fs +K Kicb (1+ω− −ωe αs 2α )1 sAA ¯¯ +fs 30   - α 3e s b icb e α 3φ s α φ s   . -  - 0 0 1 ω 0  -1 0 0 0 (1+ω) +M= + + +  + + +ω−e (1+ω) ¯ Af +¯ A +(1+ω) ¯ As +¯ A +¯ As +¯ A +α 3(cid:0) (1+ω)e s+φ s(cid:1) φ +m +ω 1+ω+e +f ++K cmb+ ¯ As +¯ +AfK +icb +− ¯ As +¯ +AfK +icb +−ωe sα +1 +¯ As +¯ Af +0 +ω−α 3e s α 1e s−K icb 1+ω+K icb (1+ω−α 2)e s+α 3φ s α 3φ s +0 0 1 ω 0 +1 0 0 0 +(1+ω) + + +  + + +. (22d) Solutions of the homogeneous system (i.e. y=0) represent free modes of precession. Three modes have periods which, when seen in inertial space, are typically in the range of a few hun- @@ -625,216 +999,373 @@ Confidential manuscript submitted to JGR-Planets 2.3.1 The Cassini state of a single-body, rigid Mercury For a rigid planet with no fluid and solid cores, our system of equations reduces to Equa- tions (12a) and (20), -(ω−e)m˜ +φ ε˜ =0, (23a) -m m -m˜ +(1+ω)ε˜ =−(1+ω)tanI. (23b) +(ω−e)˜ m+φ m˜ ε +m +=0, (23a) +˜ m+(1+ω)˜ ε m -Using Equation (21), δω (cid:28)1, and the approximation A¯(1+e+δωcosI)=C+A¯δωcosI ≈ +=−(1+ω)tanI. (23b) +Using Equation (21), δω (cid:28)1, and the approximation ¯ A(1+e+δωcosI)=C+ ¯ AδωcosI ≈ C, these can be written as -Cm˜ =A¯φ ε˜ , (24a) -m m -(cid:0) (cid:1) -m˜ =δω sinI+cosIε˜ . (24b) +C˜ m= ¯ Aφ m˜ ε m, (24a) +˜ +m=δω(cid:0) +sinI+cosI ˜ ε +m(cid:1) +. (24b) +Equation (24b) gives a direct relationship between ˜ m and ˜ ε m. For I = 8.5330◦, δω = +4.9327×10−7 and taking ˜ ε m -Equation (24b) gives a direct relationship between m˜ and ε˜ . For I = 8.5330◦, δω = +=2.04 arcmin, this gives ˜ m=2.52×10−4 arcmin, much smaller +than ˜ ε m: the offset of the rotation axis of the mantle with respect to its symmetry axis is very +small. Substituting Equation (24b) in Equation (24a) gives +CΩ p(cid:0) sinI+cosI ˜ ε m(cid:1) = ¯ AΩ oφ m˜ ε m, (25) +and isolating for ˜ ε m, +˜ ε m -4.9327×10−7 and taking ε˜ =2.04 arcmin, this gives m˜ =2.52×10−4 arcmin, much smaller += +CΩ psinI +−CΩ pcosI+ ¯ AΩ oφ m -than ε˜ : the offset of the rotation axis of the mantle with respect to its symmetry axis is very +. (26) +Upon using Equations (4), (15a), and Ω +o += 3 2n, we can write +˜ ε m -small. Substituting Equation (24b) in Equation (24a) gives -CΩ (cid:0) sinI+cosIε˜ (cid:1) =A¯Ω φ ε˜ , (25) -p m o m m -and isolating for ε˜ , -m -CΩ sinI -ε˜ = p . (26) -m −CΩ cosI+A¯Ω φ -p o m -Upon using Equations (4), (15a), and Ω = 3n, we can write -o 2 -CΩ sinI -ε˜ = p . (27) -m −CΩ cosI+nMR2(G J +2G C ) -p 210 2 201 22 += +CΩ psinI +−CΩ pcosI+nMR2(G 210J 2+2G 201C +22). (27) This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1 -[see for instance Equation (1) of Baland et al., 2017, where their definition of Ω˙ is equal to −Ω ]. -p +[see for instance Equation (1) of Baland et al., 2017, where their definition of ˙ Ω is equal to −Ω p]. Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized mo- -ment of inertia Cˆ, -C n G J +2G C -Cˆ = = 210 2 201 22 . (28) -MR2 Ω cosI+sinI/ε˜ -p m +ment of inertia ˆ C, +ˆ C = +C +MR2 += +n +Ω +pG 210J 2+2G 201C +22 +cosI+sinI/˜ ε +m +. (28) which is equivalent to Equation (89) of Van Hoolst [2015]. It is based on the latter equation -that a measurement of the obliquity gives a constraint on Cˆ. +that a measurement of the obliquity gives a constraint on ˆ C. Two free modes of precession are found by setting y=0 in Equation (23). One mode cor- responds to the Eulerian wobble, or Chandler wobble, and represents the prograde precession of the rotation axis about the symmetry axis. The second mode is the free retrograde axial pre- cession of Mercury. As seen in the inertial frame, its frequency is given by –13– Confidential manuscript submitted to JGR-Planets -MR2(cid:16) (cid:17) -ω =n G J +2G C , (29) -fp C 210 2 201 22 +ω +fp +=nMR2 +C +(cid:16) +G 210J 2+2G 201C +22(cid:17) +, (29) which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical com- ponent. Note that in Peale [2005] it was assumed that only the mantle was involved in the solid- -body precession and hence C was replaced by C . Using C = 0.346 · MR2 [Margot et al., -m -2012] and the numerical values for n, J , C and e given in Table 1, we obtain a free preces- -2 22 c -sion period of T =2π/ω =1298 yr. If we use C instead of C in Equation (29), and take -fp fp m -C =0.431·C =0.431·0.346·MR2 [Margot et al., 2012], we obtain T =2π/ω =560 yr. -m fp fp +body precession and hence C was replaced by C m. Using C = 0.346 · MR2 [Margot et al., +2012] and the numerical values for n, J 2, C +22 +and e +c +given in Table 1, we obtain a free preces- +sion period of T +fp +=2π/ω +fp +=1298 yr. If we use C +m +instead of C in Equation (29), and take +C +m +=0.431·C =0.431·0.346·MR2 [Margot et al., 2012], we obtain T +fp +=2π/ω +fp +=560 yr. These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical, the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid core, the degree of which depends on the amplitude of the pole-to-equator CMB flattening. The true free precession period lies somewhere between 560 and 1298 yr. Regardless of its exact value, the free precession period is much shorter than the forcing period of 325 kyr. Using Equation (29), Equation (27) can be written as [e.g. Baland et al., 2017] -Ω sinI -ε˜ = p . (30) -m −Ω cosI+ω -p fp -The obliquity of Mercury is thus determined by how the forcing frequency Ω compares with +˜ ε +m += +Ω psinI +−Ω pcosI+ω +fp +. (30) +The obliquity of Mercury is thus determined by how the forcing frequency Ω p -the free precession frequency ω . Because ω >Ω , Mercury occupies Cassini state 1 [Peale, -fp fp p +compares with +the free precession frequency ω fp. Because ω +fp +>Ω p, Mercury occupies Cassini state 1 [Peale, 1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant -amplification if Ω ≈ ω . Since ω (cid:29) Ω , resonant amplification is minimal and the re- -p fp fp p -sulting obliquity, ε˜ ≈2 arcmin, is much smaller than the inclination angle I ≈8.5◦. +amplification if Ω +p +≈ ω fp. Since ω +fp +(cid:29) Ω p, resonant amplification is minimal and the re- +sulting obliquity, ˜ ε m +≈2 arcmin, is much smaller than the inclination angle I ≈8.5◦. 2.3.2 The misalignment of the fluid and solid cores -With ω =−1−δωcosI and δω (cid:28)1, Equation (12d) gives n˜ ≈m˜ ; as for the mantle, -s s +With ω =−1−δωcosI and δω (cid:28)1, Equation (12d) gives ˜ n +s +≈ ˜ m s; as for the mantle, the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state. -The relationship between m˜ and ε˜ of Equation (24b) is independent of the interior structure, +The relationship between ˜ m and ˜ ε m +of Equation (24b) is independent of the interior structure, so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equa- -tion (12a), and setting n˜ =m˜ , the angular momentum equation of the whole planet becomes -s s -CΩ (cid:0) sinI+cosIε˜ (cid:1) +(A¯ cosIΩ )m˜ +A¯ (cosIΩ −Ω α φ )n˜ =A¯Ω φ ε˜ . (31) -p m f p f s p o 3 s s o m m +tion (12a), and setting ˜ n +s += ˜ m s, the angular momentum equation of the whole planet becomes +CΩ p(cid:0) sinI+cosI ˜ ε m(cid:1) +(¯ A fcosIΩ p)˜ m +f ++ ¯ A s(cosIΩ p−Ω oα 3φ s)˜ n +s += ¯ AΩ oφ m˜ ε m. (31) This latter equation shows how the misaligned inner core and fluid core can lead to a modifi- -cation of the mantle obliquity ε˜ . Approximate analytical solutions of n˜ and m˜ are given by -m s f -(cid:18) (cid:19) -n˜ ≈ Ω p 1+ Ω o(K icb−α 1e s) (cid:0) sinI+cosIε˜ (cid:1) − Ω oα 3φ sε˜ , (32a) -s κλ λ m κλ m -s f s -m˜ ≈ Ω p(cid:0) sinI+cosIε˜ (cid:1) + Ω o A¯ s(cid:0) K −α e (cid:1) n˜ , (32b) -f λ m λ A¯ icb 1 s s -f f f +cation of the mantle obliquity ˜ ε m. Approximate analytical solutions of ˜ n +s +and ˜ m +f +are given by +˜ n +s +≈ Ω p +κλ +s +(cid:18) +1+ Ω o(K icb−α 1e s) +λ +f +(cid:19) +(cid:0) sinI+cosI ˜ ε m(cid:1) − Ω oα 3φ s +κλ +s +˜ ε m, (32a) +˜ m +f +≈ Ω p +λ +f(cid:0) sinI+cosI ˜ ε m(cid:1) + Ω o +λ +f +¯ A s +¯ A +f(cid:0) K icb−α 1e s(cid:1) ˜ n s, (32b) where -A¯ Ω2(cid:0) K −α e (cid:1)2 -κ=1− s o icb 1 s , (33a) -A¯ λ λ -f s f -λ =σ¯ −Ω cosI, (33b) -f f p -λ =σ¯ −Ω cosI, (33c) -s s p +κ=1− +¯ A +s +¯ A +f +Ω2 o(cid:0) K icb−α 1e s(cid:1)2 +λ sλ +f +, (33a) +λ +f += ¯ σ +f +−Ω pcosI, (33b) +λ +s += ¯ σ s−Ω pcosI, (33c) –14– Confidential manuscript submitted to JGR-Planets and where we have introduced the frequencies -(cid:18) A¯ (cid:19) -σ¯ =Ω e +K + sK , (33d) -f o f cmb A¯ icb +¯ σ f -(cid:16) (cid:17) -σ¯ =Ω e α α −e α +α φ +K . (33e) -s o s 3 g s 1 3 s icb +=Ω +o(cid:18) +e +f ++K cmb+ +¯ A +s +¯ A +fK +icb(cid:19) +, (33d) +¯ σ +s +=Ω +o(cid:16) +e sα 3α g−e sα 1+α 3φ s+K +icb(cid:17) +. (33e) These solutions are good approximations for all the results that we present in section 3. For -an observed mantle obliquity ε˜ and for a chosen set of interior model parameters, they pro- +an observed mantle obliquity ˜ ε m -vide useful predictions of n˜ and m˜ . -s f -In the limit of a very strong coupling between the fluid core, solid core and mantle, σ¯ (cid:29) +and for a chosen set of interior model parameters, they pro- +vide useful predictions of ˜ n +s +and ˜ m f. +In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯ σ +s +(cid:29) +Ω +p +and ¯ σ +f +(cid:29) Ω p, so that ˜ n s -Ω and σ¯ (cid:29) Ω , so that n˜ → 0, m˜ → 0 and Equation (31) reverts back to Equation (25) -p f p s f +→ 0, ˜ m +f +→ 0 and Equation (31) reverts back to Equation (25) for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and -mantle (i.e. for spherical internal boundaries, e = e = γ = 0 and no viscous or EM cou- -f s s -pling, K =K =0), then -cmb icb -φ =0, κ=1, λ =λ =−Ω cosI, m˜ =n˜ =−(tanI+ε˜ ). (34) -s f s p f s m -Inserting these in Equation (31), and with the moment of inertia of the mantle equal to C = -m -C−A¯ −A¯ , we obtain -f s -C Ω (cid:0) sinI+cosIε˜ (cid:1) =A¯Ω φ ε˜ . (35) -m p m o m m +mantle (i.e. for spherical internal boundaries, e +f += e +s += γ +s += 0 and no viscous or EM cou- +pling, K +cmb +=K +icb +=0), then +φ +s +=0, κ=1, λ +f +=λ +s +=−Ω pcosI, ˜ m +f += ˜ n +s +=−(tanI+˜ ε m). (34) +Inserting these in Equation (31), and with the moment of inertia of the mantle equal to C +m += +C− ¯ A +f +− ¯ A s, we obtain +C mΩ p(cid:0) sinI+cosI ˜ ε m(cid:1) = ¯ AΩ oφ m˜ ε m. (35) which describes, as expected, a forced precession of the mantle alone. If this was the case for -Mercury, taking C /C =0.431, the obliquity should be ε˜ ≈0.88 arcmin, substantially smaller -m m -than the observed obliquity of ε˜ ≈2 arcmin. -m -If σ¯ ≈ Ω (and thus λ → 0) and/or σ¯ ≈ Ω (and thus λ → 0) resonant amplifica- -f p f s p s -tion leads to large amplitudes for m˜ , n˜ and the mantle obliquity ε˜ . The frequencies σ¯ and -f s m f -σ¯ are closely related to the FCN and FICN frequencies ω and ω , respectively. Hence, -s fcn ficn +Mercury, taking C m/C =0.431, the obliquity should be ˜ ε +m +≈0.88 arcmin, substantially smaller +than the observed obliquity of ˜ ε +m +≈2 arcmin. +If ¯ σ +f +≈ Ω +p +(and thus λ +f +→ 0) and/or ¯ σ +s +≈ Ω +p +(and thus λ +s +→ 0) resonant amplifica- +tion leads to large amplitudes for ˜ m f, ˜ n +s +and the mantle obliquity ˜ ε m. The frequencies ¯ σ +f +and +¯ σ +s +are closely related to the FCN and FICN frequencies ω +fcn +and ω ficn, respectively. Hence, just as a large mantle obliquity can result from resonant amplification when the forcing frequency approaches the free precession frequency, a large mantle obliquity can likewise result from res- onant amplification when the forcing frequency approaches the FCN or FICN frequencies. These frequencies depend on the interior density structure and are not known. However, we will show that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not ex- -pect an important amplification effect. Furthermore, since ω ,ω (cid:29) Ω , then σ¯ (cid:29) Ω -fcn ficn p f p -and σ¯ (cid:29)Ω , and we are in the strong coupling limit. The mantle obliquity should be close -s p -to that expected for a rigid planet, as observations suggest. Therefore, we expect that m˜ and -f -n˜ should be of the order of ε˜ or smaller. This further justifies the assumption of small an- -s m +pect an important amplification effect. Furthermore, since ω fcn,ω +ficn +(cid:29) Ω p, then ¯ σ +f +(cid:29) Ω +p +and ¯ σ +s +(cid:29)Ω p, and we are in the strong coupling limit. The mantle obliquity should be close +to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜ m +f +and +˜ n +s +should be of the order of ˜ ε +m +or smaller. This further justifies the assumption of small an- gles that we have adopted. 3 Results 3.1 Geodetic constraints and interior density structure All our interior models are constrained to match the mass M of Mercury and specific choices -of Cˆ = C/MR2 and C /C. The choice of Cˆ is determined from Equation (28). For the pa- +of ˆ C = C/MR2 and C m/C. The choice of ˆ C is determined from Equation (28). For the pa- +rameters listed in Table 1, and an observed obliquity of ε m -rameters listed in Table 1, and an observed obliquity of ε =2.04 arcmin [Margot et al., 2012], -m -this gives Cˆ = C/MR2 = 0.3455 and all our interior models are consistent with this choice. +=2.04 arcmin [Margot et al., 2012], +this gives ˆ C = C/MR2 = 0.3455 and all our interior models are consistent with this choice. Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are –15– Confidential manuscript submitted to JGR-Planets perfectly aligned with the mantle, which is not strictly correct. Hence, we make an error in es- -timating Cˆ from Equation (28), or conversely in predicting ε based on a given choice for Cˆ. -m -Part of the objective of our study is to estimate how large this error is. The ratio C /C is ob- +timating ˆ C from Equation (28), or conversely in predicting ε m -tained from the amplitude of the 88-day longitudinal mantle libration φ , which is given by +based on a given choice for ˆ C. +Part of the objective of our study is to estimate how large this error is. The ratio C m/C is ob- +tained from the amplitude of the 88-day longitudinal mantle libration φ o, which is given by +φ o -MR2 C 1 -φ =6·f(e )C , (36) -o c 22 C C 1+ζ +=6·f(e c)C +22MR2 +C +C +C m +1 +1+ζ +, (36) where +f(e c)=1−11e2 +c ++ 959 -f(e )=1−11e2+ e4, (37) -c c 48 c +48 +e4 c, (37) and where ζ is a correction that takes into account the entrainment of the inner core in the li- bration [Van Hoolst et al., 2012; Dumberry et al., 2013; Dumberry and Rivoldini, 2015]; this cor- rection is small and, to simplify, we neglect it here. Taking the observed libration amplitude -to be 38.5 arcsec [Margot et al., 2012], Cˆ = C/MR2 = 0.3455 and C and e from Table 1, -22 c -this corresponds to a ratio C /C =0.4269, or equivalently Cˆ =C /MR2 =0.1475. -m m m -For all results presented in our study, the crustal density is set at ρ =2974 kg m−3 [Sori, +to be 38.5 arcsec [Margot et al., 2012], ˆ C = C/MR2 = 0.3455 and C +22 +and e c +from Table 1, +this corresponds to a ratio C m/C =0.4269, or equivalently ˆ C +m +=C m/MR2 =0.1475. +For all results presented in our study, the crustal density is set at ρ +c +=2974 kg m−3 [Sori, 2018]. Our standard choice for the crustal thickness is h = 26 km [Sori, 2018], although in section 3.2 we also present some results with other choices of h. We have considered two pos- sible prescriptions connected to the density of the inner core. First, for all the results presented -in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρ =8800 kg m−3 ap- +in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρ s +=8800 kg m−3 ap- proximately that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure Fe composition in face-centered cubic phase. This captures an end-member scenario where the core composition is an Fe-S alloy; at Mercury’s core conditions, crystallization of Fe is relatively @@ -845,304 +1376,490 @@ ICB is expected to be small, although since density increases with depth, the co the mean densities of the fluid and solid cores is larger. It is these mean densities that enter our Mercury model with uniform density layers. To capture this other end-member core com- position scenario, in section 3.5 we present results where we instead prescribe a fixed density -contrast between the fluid and solid core; specifically, we set the numerical value of α . -3 -For a given choice of inner core radius r , the densities of the mantle (ρ ) and fluid core -s m -(ρ ) and the radius of the CMB (r ) are determined such that the interior model matches M, -f f -Cˆ = 0.3455 and Cˆ = 0.1475. Figure 3a shows how ρ , ρ and r vary as a function of in- -m m f f -ner core radius r for each of the two inner core density scenarios: a fixed ρ , or a fixed α . When -s s 3 +contrast between the fluid and solid core; specifically, we set the numerical value of α 3. +For a given choice of inner core radius r s, the densities of the mantle (ρ m) and fluid core +(ρ f) and the radius of the CMB (r f) are determined such that the interior model matches M, +ˆ C = 0.3455 and ˆ C +m += 0.1475. Figure 3a shows how ρ m, ρ +f +and r +f +vary as a function of in- +ner core radius r +s +for each of the two inner core density scenarios: a fixed ρ s, or a fixed α 3. When the inner core is small, its presence has a limited influence on the resulting density structure, -and we find ρ = 3197 kg m−3, ρ = 7263 kg m−3 and r = 2000 km in each of the two -m f f -scenarios. When ρ is fixed to 8800 kg m−3, as the inner core reaches 1500 km in size, r in- -s f -creases to above 2100 km, ρ approaches 4000 kg m−3 and ρ is reduced to below 5000 kg m−3. -m f -Figure 3a illustrates that when adopting a fixed ρ , there is a limit in the possible inner core +and we find ρ +m += 3197 kg m−3, ρ +f += 7263 kg m−3 and r +f += 2000 km in each of the two +scenarios. When ρ s -size, as otherwise ρ gets unreasonably large and ρ gets inappropriately small (as it would -m f +is fixed to 8800 kg m−3, as the inner core reaches 1500 km in size, r +f +in- +creases to above 2100 km, ρ +m +approaches 4000 kg m−3 and ρ +f +is reduced to below 5000 kg m−3. +Figure 3a illustrates that when adopting a fixed ρ s, there is a limit in the possible inner core +size, as otherwise ρ +m +gets unreasonably large and ρ +f +gets inappropriately small (as it would require an excessively large concentration of light elements). When adopting instead a fixed den- -sity contrast, with α =0.1, the changes in r , ρ and ρ with inner core radius are more mod- -3 f m f -est, allowing larger possible inner core sizes. Different assumptions on ρ and h would alter the +sity contrast, with α +3 +=0.1, the changes in r f, ρ +m +and ρ +f +with inner core radius are more mod- +est, allowing larger possible inner core sizes. Different assumptions on ρ c -numerical values shown on Figure 3a but not their trends with r . -s -Figure 3b shows how the FCN and FICN periods vary with r for each of the two inner +and h would alter the +numerical values shown on Figure 3a but not their trends with r s. +Figure 3b shows how the FCN and FICN periods vary with r s -core density scenarios and in the absence of viscous and EM coupling (i.e. K = K = -cmb icb +for each of the two inner +core density scenarios and in the absence of viscous and EM coupling (i.e. K +cmb += K +icb += –16– Confidential manuscript submitted to JGR-Planets +0 +200 +400 +600 800 1000 +1200 +1400 +p e r i o d ( y r ) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +3000 +4000 +5000 6000 +7000 8000 +d e n s i t y ( k g / m 3 ) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +2000 +2020 +2040 2060 +2080 +2100 +F l +u i +d c o r e r a d i u s ( k +m ) +fluid core density +CMB radius +FICN +FCNint +mantle density a b -2100 1400 -7000 fluid core density -1200 -2080 FCNint )mk( -)3m/gk( -6000 1000 suidar )ry( -2060 -800 doirep -5000 radius ytisned eroc -2040 -B 600 M diulF -C FCN -4000 2020 400 -mantle density 200 FICN -3000 2000 -0 -0 200 400 600 800 1000 1200 1400 0 200 400 600 800 1000 1200 1400 -Inner core radius (km) Inner core radius (km) +FCN Figure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand side scale) and b) FICN (blue) and FCN (red) periods as a function of inner core radius. The FCN period when the external torque is set to zero (FCNint) is shown in orange. Solid lines correspond to a scenario where the density of the inner core is set to 8800 kg m−3; thin dashed lines correspond to a -scenario where the density contrast between the fluid and solid cores is set to α =0.1. +scenario where the density contrast between the fluid and solid cores is set to α 3 +=0.1. 0). Both of these free modes are retrograde. The FCN period is close to 400 yr for a small in- -ner core, increasing to approximately 600 yr at the largest r . The FICN period is shorter, close -s +ner core, increasing to approximately 600 yr at the largest r s. The FICN period is shorter, close to 100 yr (160 yr) for a small inner core and decreasing to approximately 40 yr (120 yr) at the -largest r under the fixed ρ (fixed α ) scenario. This confirms that the FCN and FICN peri- -s s 3 +largest r +s +under the fixed ρ +s +(fixed α 3) scenario. This confirms that the FCN and FICN peri- ods are both much shorter than the forcing precession period of 325 kyr and sufficiently far away -from it that we do not expect large m˜ and n˜ from resonant amplification. -f s +from it that we do not expect large ˜ m +f +and ˜ n +s +from resonant amplification. The FCN and FICN periods that we have computed include the influence of the exter- nal torque. As shown by Baland et al. [2019], the external torque allow solid regions to have a free motion in inertial space thereby affecting the free rotational modes. To a good approx- -imation, the FCN and FICN frequencies (as seen in an inertial frame) for K = K = 0 -cmb icb +imation, the FCN and FICN frequencies (as seen in an inertial frame) for K +cmb += K +icb += 0 are given by -(cid:18) A¯ (cid:19)(cid:16) (cid:17) e φ -ω ≈−Ω e +φ +Ω f m , (38a) -fcn o A¯ +A¯ f m o(e +φ ) -m s f m -(cid:18) A¯+A¯ (cid:19)(cid:16) (cid:17) -ω ≈Ω s e α −e α α −α φ . (38b) -ficn o A¯−A¯ s 1 s 3 g 3 s -s -The expression of the FICN frequency involves the inertial torque (term e α ) and the grav- -s 1 -itational torque from the rest of Mercury (e α α ) and the Sun (α φ ) acting on the inner core. -s 3 g 3 s -For both of our inner core density scenarios (and our choices of ρ =8800 kg m−3 and α = -s 3 -0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α α (cid:29)α ; -3 g 1 +ω +fcn +≈−Ω +o(cid:18) ¯ A +¯ A m+ ¯ A +s(cid:19)(cid:16) +e +f ++φ +m(cid:17) ++Ω +o +e fφ +m +(e +f ++φ +m), (38a) +ω +ficn +≈Ω +o(cid:18) ¯ A+ ¯ A +s +¯ A− ¯ A +s(cid:19)(cid:16) +e sα 1−e sα 3α g−α 3φ +s(cid:17) +. (38b) +The expression of the FICN frequency involves the inertial torque (term e sα 1) and the grav- +itational torque from the rest of Mercury (e sα 3α g) and the Sun (α 3φ s) acting on the inner core. +For both of our inner core density scenarios (and our choices of ρ +s +=8800 kg m−3 and α +3 += +0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α 3α +g +(cid:29)α 1; the gravitational torque dominates the inertial torque, in large part because of the slow rota- tion rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek, 2016; Stys and -Dumberry, 2018], but it is different for Earth, where α >α α because of its faster rotation -1 3 g +Dumberry, 2018], but it is different for Earth, where α +1 +>α 3α +g +because of its faster rotation and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approximate expres- –17– Confidential manuscript submitted to JGR-Planets -sion for the FICN differs by a factor (A¯+A¯ )/(A¯−A¯ ) compared to that given in Dumberry -s s +sion for the FICN differs by a factor (¯ A+ ¯ A s)/(¯ A− ¯ A s) compared to that given in Dumberry and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon. The expression for FCN frequency differs from the usual expression for Earth. First, it -involves the external torque from the Sun captured by the parameter φ . If we set φ = 0, -m m -we obtain the FCN frequency for a decoupled model in which only interior torques contribute, -(cid:18) A¯ (cid:19) -ω ≈−Ω e . (38c) -fcn,int o A¯ +A¯ f -m s -This frequency is slightly different from the usual expression for Earth, involving the ratio A¯/(A¯ + +involves the external torque from the Sun captured by the parameter φ m. If we set φ m -A¯ ) rather than A¯/A¯ . This is because of the relatively thin mantle of Mercury; for the largest -s m -r considered, the moment of inertia of the inner core can get close to 40% of that of the man- += 0, +we obtain the FCN frequency for a decoupled model in which only interior torques contribute, +ω +fcn,int +≈−Ω +o(cid:18) ¯ A +¯ A m+ ¯ A +s(cid:19) +e f. (38c) +This frequency is slightly different from the usual expression for Earth, involving the ratio ¯ A/(¯ A m+ +¯ A s) rather than ¯ A/¯ A m. This is because of the relatively thin mantle of Mercury; for the largest +r s +considered, the moment of inertia of the inner core can get close to 40% of that of the man- tle and is not negligible. The period of the FCN when only interior torques contribute is shown in Figure 3b. It is close to 1100 yr for a small inner core, increasing to approximately 1500 yr -at the largest r . Hence, the influence of the solar torque reduces the FCN period by a factor -s +at the largest r s. Hence, the influence of the solar torque reduces the FCN period by a factor of approximately 3. We note that the FICN period, in contrast, is not altered substantially when the external torque is set to zero. 3.2 Gravitational and inertial coupling Let us now investigate the obliquities of the mantle, fluid core and inner core in their equi- -librium Cassini state. We assume a fixed inner core density scenario in this section, with ρ = +librium Cassini state. We assume a fixed inner core density scenario in this section, with ρ s += 8800 kg m−3. Viscous and EM coupling are set to zero in order to isolate the influence of grav- -itational and inertial coupling. Figure 4 shows how ε˜ , m˜ and n˜ vary as functions of inner -m f s +itational and inertial coupling. Figure 4 shows how ˜ ε m, ˜ m +f +and ˜ n +s +vary as functions of inner core radius. We show calculations for three different choices of crustal thickness, but let us con- -centrate first on the case for h=26 km. For small r , we retrieve an obliquity of ε˜ =2.0494 -s m -arcmin (Figure 4a). ε˜ decreases with r , but not substantially; at the largest r (1500 km), -m s s -ε˜ = 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ε˜ = 2.04 -m m -arcmin, the obliquity that we used in setting the constraint for Cˆ – and hence the prediction +centrate first on the case for h=26 km. For small r s, we retrieve an obliquity of ˜ ε +m +=2.0494 +arcmin (Figure 4a). ˜ ε +m +decreases with r s, but not substantially; at the largest r +s +(1500 km), +˜ ε +m += 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜ ε +m += 2.04 +arcmin, the obliquity that we used in setting the constraint for ˆ C – and hence the prediction we should recover for a rigid planet – is an overestimate of approximately 0.01 arcmin which occurs for small inner cores. -The deviation of ε˜ from that of a rigid planet is due to the misalignments of the fluid +The deviation of ˜ ε m -core (m˜ ) and solid inner core (n˜ ) with respect to the mantle (Figure 4b). The misalignment -f s -of the fluid core spin axis from the mantle is significant: m˜ is approximately 4.02 arcmin for +from that of a rigid planet is due to the misalignments of the fluid +core (˜ m f) and solid inner core (˜ n s) with respect to the mantle (Figure 4b). The misalignment +of the fluid core spin axis from the mantle is significant: ˜ m f +is approximately 4.02 arcmin for a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin -at the largest r . Recall that m˜ is measured with respect to the mantle rotation axis (which -s f +at the largest r s. Recall that ˜ m +f +is measured with respect to the mantle rotation axis (which coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with -respect to the orbit normal is ε˜ +m˜ ≈6 arcmin. The reason why the obliquity of the spin -m f +respect to the orbit normal is ˜ ε m+˜ m +f +≈6 arcmin. The reason why the obliquity of the spin axis of the fluid core is larger than that of the mantle can be understood from Equation (32b), -which shows that m˜ is determined by the resonant amplification of the FCN mode at the forc- +which shows that ˜ m f +is determined by the resonant amplification of the FCN mode at the forc- ing frequency. When the FCN frequency is much larger than the forcing frequency, as is the -case for Mercury, the resonant amplification is very weak but remains present and m˜ is larger +case for Mercury, the resonant amplification is very weak but remains present and ˜ m f +is larger than zero. -In contrast to m˜ , the misalignment of the inner core with respect to the mantle is much -f -smaller; n˜ is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ε˜ . -s m +In contrast to ˜ m f, the misalignment of the inner core with respect to the mantle is much +smaller; ˜ n +s +is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜ ε m. Physically, this is because the gravitational torque acting on the inner core when it is tilted from the mantle is much stronger than the inertial torque acting at the ICB. As a result, the inner core must remain in close alignment with the mantle. Presented differently, since the FICN pe- riod is more than 3000 times shorter than the forced precession period, the inner core can eas- –18– Confidential manuscript submitted to JGR-Planets -a 4.5 b -2.050 -4.0 -2.048 )nimcra( )nimcra( -2.046 crustal th 1i 6c k kn mess ε εm 3.5 crustal th 1i 6c k kn mess m f -26 km g -36 km 26 km elgna elgna -2.044 3.0 36 km -n (x100) -s ytiuqilbO ytiuqilbO -2.042 -2.5 -ε -m for a rigid planet -2.040 -2.0 2.038 +2.040 +2.042 +2.044 2.046 2.048 +2.050 +O b l i q u i t +y +a n g l e ( a r c m i n ) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) 1.5 -0 200 400 600 800 1000 1200 1400 0 200 400 600 800 1000 1200 1400 -Inner core radius (km) Inner core radius (km) -Figure 4. a) Obliquity of the mantle (ε˜ , solid lines) and of the principal moment of inertia (ε˜ , -m g -dashed line) b) m˜ (solid lines) and n˜ (dashed lines, x100) as a function of inner core radius and for -f s +2.0 +2.5 +3.0 3.5 +4.0 +4.5 +O b l i q u i t +y +a n g l e ( a r c m i n ) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +crustal thickness 16 km 36 km 26 km crustal thickness 16 km 36 km 26 km ε m ε g +for a rigid planet ε m +m f n s(x100) +a b +Figure 4. a) Obliquity of the mantle (˜ ε m, solid lines) and of the principal moment of inertia (˜ ε g, +dashed line) b) ˜ m +f +(solid lines) and ˜ n +s +(dashed lines, x100) as a function of inner core radius and for different choices of crustal thickness. -ily follow the forced precession of the mantle and remains gravitationally locked to it. n˜ does +ily follow the forced precession of the mantle and remains gravitationally locked to it. ˜ n s +does not change substantially as the inner core increases in size. -When K =K =0, a good approximation of ε˜ is given by -icb cmb m -C(cid:48)Ω sinI -ε˜ = p , (39) -m −C(cid:48)Ω cosI+A¯Ω φ -p o m +When K +icb +=K +cmb +=0, a good approximation of ˜ ε +m +is given by +˜ ε +m += +C(cid:48)Ω psinI +−C(cid:48)Ω pcosI+ ¯ AΩ oφ +m +, (39) which is identical to the prediction of Equation (26) for a rigid Mercury, except C is replaced by C(cid:48). The latter represents an effective moment of inertia that accounts for the coupling of the core to the mantle, -C(cid:48) =C+A¯ χ, (40) +C(cid:48) =C+ ¯ A cχ, (40) +where ¯ A c -where A¯ =A¯ +A¯ and -c f s -Ω cosI (cid:18) A¯ A¯ (cid:19) A¯ Ω α φ -χ= p f + s − s o 3 s . (41) -A¯ (σ¯ −Ω cosI) (σ¯ −Ω cosI) A¯ (σ¯ −Ω cosI) -c f p s p c s p -The frequencies σ¯ and σ¯ are given in Equations (33d-33e) and closely approximate the FCN -f s += ¯ A +f ++ ¯ A +s +and +χ= +Ω pcosI +¯ A +c +(cid:18) ¯ A +f +(¯ σ +f +−Ω pcosI) ++ +¯ A +s +(¯ σ s−Ω +pcosI)(cid:19) +− +¯ A +s +¯ A +c +Ω oα 3φ +s +(¯ σ s−Ω +pcosI). (41) +The frequencies ¯ σ +f +and ¯ σ +s +are given in Equations (33d-33e) and closely approximate the FCN and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then how the core is entrained to precess with the mantle, with the coupling between the two ex- pressed in terms of the resonant amplification of the FCN and FICN frequencies. In the limit -of σ¯ ,σ¯ → 0, then χ = −1, C(cid:48) = C , the core is fully decoupled from the mantle and we -f s m -retrieve Equation (35). If instead σ¯ ,σ¯ → ∞, then χ = 0, C(cid:48) = C and we retrieve the pre- -f s -diction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ω , -p +of ¯ σ f,¯ σ +s +→ 0, then χ = −1, C(cid:48) = C m, the core is fully decoupled from the mantle and we +retrieve Equation (35). If instead ¯ σ f,¯ σ +s +→ ∞, then χ = 0, C(cid:48) = C and we retrieve the pre- +diction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ω p, as is the case here, resonant amplification is weak, χ is small and positive, C(cid:48) > C and this -leads to a slightly larger ε˜ compared to a rigid planet. Because the inner core core is grav- +leads to a slightly larger ˜ ε m +compared to a rigid planet. Because the inner core core is grav- itationally locked to the mantle, deviations from a rigid planet are dominantly caused by the -misalignment of the fluid core. In Equation (41), σ¯ (cid:29)σ¯ , so to a good approximation -s f +misalignment of the fluid core. In Equation (41), ¯ σ +s +(cid:29) ¯ σ f, so to a good approximation –19– Confidential manuscript submitted to JGR-Planets -A¯ Ω cosI -χ≈ f o . (42) -A¯ (σ¯ −Ω cosI) -c f p -For a small inner core, χ≈7.55×10−3. As the inner core grows, A¯ decreases, and the com- +χ≈ +¯ A f -bination A¯ χ also decreases. This implies that C(cid:48) decreases with inner core size and, consequently, +¯ A c -ε˜ also decreases with inner core size, as seen in Figure 4a, though it remains larger than the +Ω ocosI +(¯ σ +f +−Ω +pcosI). (42) +For a small inner core, χ≈7.55×10−3. As the inner core grows, ¯ A +f +decreases, and the com- +bination ¯ A cχ also decreases. This implies that C(cid:48) decreases with inner core size and, consequently, +˜ ε m +also decreases with inner core size, as seen in Figure 4a, though it remains larger than the prediction for a rigid planet. -The specific predictions of ε˜ , m˜ and n˜ on Figure 4 depend sensitively on the assumed -m f s -interior density model and on the dynamical ellipticities of the inner core (e ) and fluid core -s -(e ). Hence, it depends on the choices we have made for the inner core density ρ , the crustal -f s -density ρ and its thickness h. Changing ρ , ρ and/or h requires a different combination of ρ , -c s c f -ρ and r in order to match M, Cˆ and Cˆ . In turn, this leads to different ellipticities at in- -m f m -terior boundary in order to match J and C , and thus different predictions for ε˜ , m˜ and -2 22 m f -n˜ . To illustrate this, we show on Figure 4 two additional predictions computed with crustal -s -thicknesses changed to h=16 and 36 km. The change in ε˜ remains modest, ∼0.025%, but -m -the changes in m˜ and n˜ are more substantial, ∼5% and ∼10%, respectively. -f s +The specific predictions of ˜ ε m, ˜ m +f +and ˜ n +s +on Figure 4 depend sensitively on the assumed +interior density model and on the dynamical ellipticities of the inner core (e s) and fluid core +(e f). Hence, it depends on the choices we have made for the inner core density ρ s, the crustal +density ρ +c +and its thickness h. Changing ρ s, ρ +c +and/or h requires a different combination of ρ f, +ρ +m +and r +f +in order to match M, ˆ C and ˆ C m. In turn, this leads to different ellipticities at in- +terior boundary in order to match J +2 +and C 22, and thus different predictions for ˜ ε m, ˜ m +f +and +˜ n s. To illustrate this, we show on Figure 4 two additional predictions computed with crustal +thicknesses changed to h=16 and 36 km. The change in ˜ ε +m +remains modest, ∼0.025%, but +the changes in ˜ m +f +and ˜ n +s +are more substantial, ∼5% and ∼10%, respectively. We also show on Figure 4a (only for h=26 km) the obliquity of the principal moment -of inertia of the whole planet, which we denote by ε˜ . A difference between ε˜ and ε˜ occurs -g g m +of inertia of the whole planet, which we denote by ˜ ε g. A difference between ˜ ε +g +and ˜ ε +m +occurs if the inner core is misaligned with the mantle. As seen in the mantle frame, a tilted inner core -(with n˜ assumed small) leads to an off-diagonal component of the moment of inertia tensor +(with ˜ n +s +assumed small) leads to an off-diagonal component of the moment of inertia tensor +of (C s−¯ A s)α 3˜ n s -of (C −A¯ )α n˜ =A¯ e α n˜ . The angle by which the mantle frame must be rotated so that -s s 3 s s s 3 s -the moment of inertia of the whole planet is purely diagonal is (A¯ e α n˜ )/(A¯e), and hence a -s s 3 s -good approximation of ε˜ is += ¯ A se sα 3˜ n s. The angle by which the mantle frame must be rotated so that +the moment of inertia of the whole planet is purely diagonal is (¯ A se sα 3˜ n s)/(¯ Ae), and hence a +good approximation of ˜ ε g -A¯ e -ε˜ =ε˜ + s sα n˜ . (43) -g m A¯e 3 s +is +˜ ε +g += ˜ ε m+ +¯ A se +s +¯ Ae +α 3˜ n s. (43) Since the inner core is gravitationally forced into a close alignment with the mantle, the dif- -ference between ε˜ and ε˜ remains very small. For the largest inner core radius that we have -g m -considered, ε˜ differs from ε˜ only by approximately 0.001 arcmin. -g m +ference between ˜ ε +g +and ˜ ε +m +remains very small. For the largest inner core radius that we have +considered, ˜ ε +g +differs from ˜ ε +m +only by approximately 0.001 arcmin. 3.3 Viscous coupling We now investigate how viscous coupling at the CMB and ICB affects the equilibrium Cassini state. Peale et al. [2014] present two different parameterizations of viscous coupling based on the timescale of attenuation of the differential rotation between the fluid core and mantle. More complete analytical solutions for the flow resulting from a differentially precessing shell have been derived [e.g. Stewartson and Roberts, 1963; Busse, 1968; Rochester, 1976] and we exploit -these solutions here. The parametrization of the viscous coupling constants K and K based -cmb icb +these solutions here. The parametrization of the viscous coupling constants K +cmb +and K +icb +based on them are given in Mathews and Guo [2005], -πρ fr f4(cid:114) ν (cid:16) 0.195−1.976i(cid:17) -K = , (44a) -cmb A¯ 2Ω -f o -πρ r4(cid:114) ν (cid:16) (cid:17) -K = f s 0.195−1.976i , (44b) -icb A¯ 2Ω -s o +K +cmb += +πρ fr4 +f +¯ A +f +(cid:114) ν +2Ω +o(cid:16) 0.195−1.976i(cid:17) +, (44a) +K +icb += +πρ fr4 +s +¯ A +s +(cid:114) ν +2Ω +o(cid:16) 0.195−1.976i(cid:17) +, (44b) where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary inte- rior is not well known but based on theoretical and experimental studies it is expected to be of the order of 10−6 m2 s−1 [e.g. Gans, 1972; de Wijs et al., 1998; Alf`e et al., 2000; Rutter et al., @@ -1151,25 +1868,40 @@ of the order of 10−6 m2 s−1 [e.g. Gans, 1972; de Wijs et al., 1998; Alf`e et Confidential manuscript submitted to JGR-Planets The above parameterizations are valid only under the assumption that the flow in the bound- ary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds -number Re=r ∆u /ν, associated with the differential velocity ∆u =r Ω m˜ at the CMB. -f f f f o f -For r = 2000 km, and taking m˜ = 4 arcmin ≈ 0.001 rad from the results in the previous -f f -section, we get ∆u ∼ 2 mm/s and Re ∼ 6×109. Such a large Reynolds number indicates +number Re=r f∆u f/ν, associated with the differential velocity ∆u +f +=r fΩ o˜ m +f +at the CMB. +For r f += 2000 km, and taking ˜ m +f += 4 arcmin ≈ 0.001 rad from the results in the previous +section, we get ∆u +f +∼ 2 mm/s and Re ∼ 6×109. Such a large Reynolds number indicates that the viscous friction between the fluid core and mantle should induce turbulent flows, as is the case for the Cassini state of the Moon [Yoder, 1981; Williams et al., 2001; C´ebron et al., 2019]. For a boundary layer that involves turbulent flows, the viscous torque should be inde- pendent of the fluid viscosity and proportional to the square of the differential velocity. The -coupling constant K should be in the form +coupling constant K +cmb +should be in the form +K cmb -(cid:12) (cid:12)(cid:16) (cid:17) -K cmb =f cmb(cid:12)m˜ f(cid:12) 0.195−1.976i , (45) -where f is a numerical factor that depends among other things on surface roughness. In- +=f +cmb(cid:12) +(cid:12)˜ m +f(cid:12) (cid:12)(cid:16) 0.195−1.976i(cid:17) +, (45) +where f cmb +is a numerical factor that depends among other things on surface roughness. In- corporating a viscous coupling of this form in our rotational model is more challenging not only -because f is not known but also because the viscous torque is no longer linear in m˜ . One -cmb f +because f +cmb +is not known but also because the viscous torque is no longer linear in ˜ m f. One strategy is to find solutions through an iterative process. The simpler alternative strategy that we adopt is to use the laminar formulas of Equation (44) but with the understanding that ν represents an effective turbulent viscosity. @@ -1177,20 +1909,38 @@ To give an estimate of an appropriate turbulent value for ν, we turn to the Cas of the Moon. A measure of the viscous dissipation at the CMB of the Moon has been obtained by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR) [Williams et al., 2001, 2014; Williams and Boggs, 2015]. Viscous dissipation is reported in terms -of a coupling parameter K and a recent estimate is K/C =(1.41±0.34)×10−8 day−1 [Williams +of a coupling parameter K and a recent estimate is K/C L -and Boggs, 2015], where C is the lunar polar moment of inertia. The connection between K +=(1.41±0.34)×10−8 day−1 [Williams +and Boggs, 2015], where C L -and K is +is the lunar polar moment of inertia. The connection between K +and K cmb -(cid:12) (cid:12) K C 1 -(cid:12)Im[K ](cid:12)= L , (46) -(cid:12) cmb (cid:12) C C Ω -L fL L -where C is the moment of inertia of the lunar core and Ω = 2.66 × 10−6 s−1 the lunar -fL L -rotation rate. With C /C ∼7×10−4 [e.g. Williams et al., 2014], this gives |Im[K ]|∼ -fL L cmb +is +(cid:12) +(cid:12) (cid:12)Im[K +cmb](cid:12) +(cid:12) (cid:12)= +K +C +L +C +L +C +fL +1 +Ω +L +, (46) +where C +fL +is the moment of inertia of the lunar core and Ω +L += 2.66 × 10−6 s−1 the lunar +rotation rate. With C fL/C +L +∼7×10−4 [e.g. Williams et al., 2014], this gives |Im[K cmb]|∼ 9×10−5. In order to match this amplitude in Equation (44a), with lunar parameters and as- suming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10−4 m2 s−1, about 500 times larger than the laminar viscosity. Note that the differential velocity at the @@ -1200,80 +1950,118 @@ cmb is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mer- cury should be smaller. Thus, ν ≈5×10−4 m2 s−1 gives a conservative upper bound for the possible effective turbulent viscosity that can be expected for Mercury. -Figure 5 shows how ε˜ , m˜ and n˜ vary as functions of inner core radius for different choices -m f s -of effective viscosities. For ν = 10−5 m2 s−1, viscous coupling is too weak to affect ε˜ and +Figure 5 shows how ˜ ε m, ˜ m +f +and ˜ n +s +vary as functions of inner core radius for different choices +of effective viscosities. For ν = 10−5 m2 s−1, viscous coupling is too weak to affect ˜ ε m -m˜ and they are essentially unchanged from the solutions shown in Figure 4. With increasing +and +˜ m f +and they are essentially unchanged from the solutions shown in Figure 4. With increasing ν, the stronger viscous coupling between the core and the mantle reduces their differential ve- -locity, and m˜ is reduced. With the reduced differential velocity at the CMB, the prediction +locity, and ˜ m f -of ε˜ gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB +is reduced. With the reduced differential velocity at the CMB, the prediction +of ˜ ε m -viscous coupling model is different than the one used by Peale et al. [2014], our results for ε˜ +gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB +viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜ ε m -and m˜ are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the +and ˜ m f +are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent vis- cosity that we have identified above (i.e ν ≈ 5×10−4 m2 s−1), the influence of viscous cou- –21– Confidential manuscript submitted to JGR-Planets -kinematic viscosity: 0.01 m2 s-1 0.001 m2 s-1 0.0005 m2 s-1 0.0001 m2 s-1 0.00001 m2 s-1 -4.5 -a b +ε m +ε g +m f +n s +2.038 +2.040 +2.042 +2.044 +2.046 +2.048 2.050 +O +b l i q u i t +y a n g l e +( +a r c +m i +n ) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +0.0 +0.5 +1.0 +1.5 +2.0 2.5 +3.0 +3.5 4.0 -2.048 3.5 )nimcra( )nimcra( -2.046 ε g 3.0 -ε m 2.5 elgna elgna -2.044 -m -2.0 f ytiuqilbO ytiuqilbO -n -2.042 1.5 s +4.5 +O +b l i q u i t +y a n g l e +( +a r c +m i +n ) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +kinematic viscosity: 0.01 m2 s-1 0.00001 m2 s-1 0.0001 m2 s-1 0.0005 m2 s-1 0.001 m2 s-1 +a b +for a rigid planet ε -m for a rigid planet -2.040 1.0 -0.5 -2.038 -0.0 -0 200 400 600 800 1000 1200 1400 0 200 400 600 800 1000 1200 1400 -Inner core radius (km) Inner core radius (km) -Figure 5. a) Obliquity of the mantle (ε˜ , solid lines) and gravity field (ε˜ , dashed lines) b) m˜ -m g f -(solid lines) and n˜ (dashed lines) as a function of inner core radius and for different choices of kinematic +m +Figure 5. a) Obliquity of the mantle (˜ ε m, solid lines) and gravity field (˜ ε g, dashed lines) b) ˜ m +f +(solid lines) and ˜ n s +(dashed lines) as a function of inner core radius and for different choices of kinematic viscosity (color in legend). -pling on ε˜ remains modest, reducing its amplitude by a maximum of approximately 0.0015 +pling on ˜ ε m +remains modest, reducing its amplitude by a maximum of approximately 0.0015 arcmin. The inclusion of viscous coupling at the ICB can lead to a substantial change in inner core tilt. A larger viscosity leads to stronger viscous coupling and to a closer alignment of the in- ner core with the fluid core spin axis. The viscous coupling strength is inversely proportional -to r , so a larger viscosity results in a larger inner core radius at which viscous coupling is of -s +to r s, so a larger viscosity results in a larger inner core radius at which viscous coupling is of a similar magnitude to gravitational coupling. Taking again an upper bound of ν =5×10−4 -m2 s−1, Figure 5 indicates that n˜ may be 1 arcmin or larger only if the inner core radius is +m2 s−1, Figure 5 indicates that ˜ n s +may be 1 arcmin or larger only if the inner core radius is smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravi- tational coupling is much larger than viscous coupling, and the inner core tilt is limited to a fraction of 1 arcmin. The larger inner core tilt observed with increasing effective viscosity results in a larger -offset between the obliquity of the principal moment of inertia ε˜ and that of the mantle ε˜ , -g m -though it remains limited. For the upper bound of ν = 5×10−4 m2 s−1, and for r = 1500 +offset between the obliquity of the principal moment of inertia ˜ ε +g +and that of the mantle ˜ ε m, +though it remains limited. For the upper bound of ν = 5×10−4 m2 s−1, and for r s -km, the difference between ε˜ and ε˜ is limited to 0.0013 arcmin. -g m += 1500 +km, the difference between ˜ ε +g +and ˜ ε +m +is limited to 0.0013 arcmin. The conclusion that emerges from Figure 5 is that the larger the inner core is, the smaller the misalignments of both the fluid core and inner core are with respect to the mantle. This implies that the larger the inner core is, the more we approach a planet precessing as a rigid body, although the misalignment of the spin axis of the fluid core remains important, approx- -imately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ε˜ , m˜ -m f -and n˜ change with inner core size would certainly be different for a turbulent model of viscous +imately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜ ε m, ˜ m +f +and ˜ n s +change with inner core size would certainly be different for a turbulent model of viscous coupling. But the general conclusion remains that the addition of viscous coupling at the CMB and ICB does not significantly modify the Cassini state equilibrium angle of the mantle. –22– @@ -1286,68 +2074,104 @@ electrically conducting regions stretches existing magnetic field lines that thr This induces a secondary magnetic field (or equivalently, an electrical current) and an associ- ated tangential EM stress resisting the differential motion. EM coupling at the CMB and ICB acts then in a similar way to viscous coupling, and this ’magnetic friction’ depends on the strength -of the radial magnetic field B and the electrical conductivity σ on either side of the bound- +of the radial magnetic field B r +and the electrical conductivity σ on either side of the bound- ary [Rochester, 1960, 1962, 1968]. -The parametrization of EM coupling in terms of the coupling constants K and K -cmb icb +The parametrization of EM coupling in terms of the coupling constants K +cmb +and K +icb has been developed in a few studies [e.g. Buffett, 1992; Buffett et al., 2002; Dumberry and Koot, 2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given +by B +r += √ -3(cid:10) Bd(cid:11) (cid:10) Bd(cid:11) -by B = cosθ, where is the r.m.s. strength of the field, the coupling constant -r r r -K can be written is the form +3(cid:10) Bd r(cid:11) cosθ, where (cid:10) Bd r(cid:11) is the r.m.s. strength of the field, the coupling constant +K +cmb +can be written is the form +K cmb -(cid:10) Bd(cid:11)2 -K =3(1−i)F , (47) -cmb cmb r +=3(1−i)F cmb(cid:10) Bd r(cid:11)2 , (47) where -(cid:18) (cid:19)−1 -1 1 1 -F = + , (48) -cmb Ω ρ r σ δ σ δ -o f f m m f f -(cid:112) (cid:112) -and where σ , δ = 2/(σ µΩ ) and σ , δ = 2/(σ µΩ ) are the electrical conductivi- -m m m o f f f o +F +cmb += +1 +Ω oρ fr +f +(cid:18) +1 +σ mδ +m ++ +1 +σ fδ +f(cid:19)−1 +, (48) +and where σ m, δ +m += +(cid:112) +2/(σ mµΩ o) and σ f, δ +f += +(cid:112) +2/(σ fµΩ o) are the electrical conductivi- ties and magnetic skin depths in the mantle and fluid core, respectively, with µ=4π×10−7 -N A−2 the magnetic permeability of free space. The r.m.s. field strength (cid:10) Bd(cid:11) is connected to -r -the Gauss coefficient g0 of the surface magnetic field by +N A−2 the magnetic permeability of free space. The r.m.s. field strength (cid:10) Bd r(cid:11) is connected to +the Gauss coefficient g0 1 -(cid:18) R(cid:19)3 -(cid:10) Bd(cid:11) √2 (cid:12) (cid:12)g0(cid:12) -= (cid:12) . (49) -r 3 r f 1 -We can readily build an estimate of the amplitude of K . The electrical conductivity -cmb +of the surface magnetic field by +(cid:10) Bd r(cid:11) = 2 √ +3(cid:18) +R +r +f(cid:19)3 +(cid:12) (cid:12)g0 1(cid:12) (cid:12) . (49) +We can readily build an estimate of the amplitude of K cmb. The electrical conductivity of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding -to the CMB of Mercury is in the range of σ ∼ 0.01 − 1 S m−1 [Constable, 2015]. In con- +to the CMB of Mercury is in the range of σ m -trast, the electrical conductivity of Fe in planetary cores is expected to be close σ ∼ 106 S +∼ 0.01 − 1 S m−1 [Constable, 2015]. In con- +trast, the electrical conductivity of Fe in planetary cores is expected to be close σ f -m−1 [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σ δ )−1 (cid:29)(σ δ )−1. Tak- -m m f f -(cid:12) (cid:12) -ing σ = 1 S m−1, (cid:12)g 10(cid:12) = 190 nT for Mercury’s dipole field [Anderson et al., 2012], r = -m f -2000 km, ρ =7000 kg m−3, this gives K ≈(3.1×10−11)·(1−i). To put this amplitude -f cmb +∼ 106 S +m−1 [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σ mδ m)−1 (cid:29)(σ fδ f)−1. Tak- +ing σ m = 1 S m−1, +(cid:12) +(cid:12)g0 +1(cid:12) +(cid:12) = 190 nT for Mercury’s dipole field [Anderson et al., 2012], r f = +2000 km, ρ +f +=7000 kg m−3, this gives K +cmb +≈(3.1×10−11)·(1−i). To put this amplitude in perspective, taking a molecular viscosity of ν =10−6 m2 s−1 in Equation (44a) gives a vis- -cous coupling constant of K ≈(6.0×10−7)·(0.195−1.976i). Hence, EM coupling at the +cous coupling constant of K cmb +≈(6.0×10−7)·(0.195−1.976i). Hence, EM coupling at the CMB is much weaker than viscous coupling, even if we include other spherical harmonic com- ponents of the radial magnetic field. EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by -CMB cavities [Buffett, 2010; Glane and Buffett, 2018], in which case the effective σ could be +CMB cavities [Buffett, 2010; Glane and Buffett, 2018], in which case the effective σ +m +could be +closer to σ f. Likewise, σ m -closer to σ . Likewise, σ can be increased if a more electrically conducting layer has formed -f m +can be increased if a more electrically conducting layer has formed at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even -in the extreme case of σ = σ = 106 S m−1, K ≈ (1.6×10−8)·(1−i), which remains -m f cmb +in the extreme case of σ +m += σ +f += 106 S m−1, K +cmb +≈ (1.6×10−8)·(1−i), which remains –23– Confidential manuscript submitted to JGR-Planets smaller by a factor ∼60 than the smallest possible viscous coupling constant. Viscous forces @@ -1357,59 +2181,75 @@ and fluid core to be similar, and because the radial magnetic field is likely mu coupling can be much larger and dominate viscous coupling. We assume that the magnetic field morphology at the ICB is dominantly comprised of small spatial scales for example as predicted by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in -terms of an equivalent uniform radial magnetic field (cid:104)B (cid:105) capturing its r.m.s. strength [Buf- -r +terms of an equivalent uniform radial magnetic field (cid:104)B r(cid:105) capturing its r.m.s. strength [Buf- fett et al., 2002; Dumberry and Koot, 2012]. Assuming an electrical conductivity σ equal in the -fluid and solid core, the coupling constant K can be written in the form +fluid and solid core, the coupling constant K +icb +can be written in the form +K icb += 5 -K = (1−i)F (cid:104)B (cid:105)2 , (50) -icb 4 icb r +4(1−i)F icb(cid:104)B r(cid:105)2 , (50) where +F +icb += σδ -F = , (51) -icb Ω ρ r -o s s +Ω oρ sr +s +, (51) +and where δ = (cid:112) -and where δ = 2/(σµΩ ) is the magnetic skin depth. As F is inversely proportional to -o icb -r , K is inversely proportional to inner core size. Note that computing the EM coupling based -s icb -on the r.m.s. strength (cid:104)B (cid:105) rather than a true field morphology tends to overestimate the strength -r +2/(σµΩ o) is the magnetic skin depth. As F +icb +is inversely proportional to +r s, K +icb +is inversely proportional to inner core size. Note that computing the EM coupling based +on the r.m.s. strength (cid:104)B r(cid:105) rather than a true field morphology tends to overestimate the strength of the coupling [Koot and Dumberry, 2013]. However, since the strength of the radial magnetic field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are -absorbed in the range of possible (cid:104)B (cid:105) values. -r +absorbed in the range of possible (cid:104)B r(cid:105) values. The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al., 2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected. -When (cid:104)B (cid:105) is sufficiently large, this is no longer the case. EM coupling then enters a ’strong -r +When (cid:104)B r(cid:105) is sufficiently large, this is no longer the case. EM coupling then enters a ’strong field’ regime [Buffett et al., 2002; Dumberry and Koot, 2012; Koot and Dumberry, 2013] in which -K increases linearly with (cid:104)B (cid:105) instead of quadratically. A good approximation of K cal- -icb r icb +K +icb +increases linearly with (cid:104)B r(cid:105) instead of quadratically. A good approximation of K +icb +cal- culated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012], -KE =(0.175−i0.138)(cid:104)B (cid:105) , (52) -icb r -where (cid:104)B (cid:105) is in units of Tesla. The superscript E emphasizes that the numerical factors are -r +KE +icb +=(0.175−i0.138)(cid:104)B r(cid:105) , (52) +where (cid:104)B r(cid:105) is in units of Tesla. The superscript E emphasizes that the numerical factors are appropriate for the parameter values adopted for Earth in the computation of Dumberry and Koot [2012]. To adapt these numerical factors to Mercury, we write, -F -K =(0.175−i0.138) icb (cid:104)B (cid:105) , (53) -icb FE r +K icb -where FE is defined as in Equation (51) but using the parameters for Earth as defined in Dumb- +=(0.175−i0.138)F icb -erry and Koot [2012]. These are Ω = 7.292 × 10−5 s−1, ρ = 12846 kg m−3, r = 1221.5 -o s s -km, σ =5×105 S m−1, which gives FE =90.36 T−2. +FE icb -To compute F , we assume an electrical conductivity of σ =106 S m−1 in the core of +(cid:104)B r(cid:105) , (53) +where FE icb +is defined as in Equation (51) but using the parameters for Earth as defined in Dumb- +erry and Koot [2012]. These are Ω +o += 7.292 × 10−5 s−1, ρ +s += 12846 kg m−3, r +s += 1221.5 +km, σ =5×105 S m−1, which gives FE +icb +=90.36 T−2. +To compute F icb, we assume an electrical conductivity of σ =106 S m−1 in the core of Mercury [e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and -strong field regime occurs when (cid:104)B (cid:105) ≈ 1.53 mT for the real part of K . (cid:104)B (cid:105) at the ICB -r icb r +strong field regime occurs when (cid:104)B r(cid:105) ≈ 1.53 mT for the real part of K icb. (cid:104)B r(cid:105) at the ICB of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geom- etry inside the core could be dominated by small length scales, yet only the weaker lower har- monics of the field would penetrate through a thermally stratified layer in the upper region of @@ -1417,120 +2257,181 @@ monics of the field would penetrate through a thermally stratified layer in the Confidential manuscript submitted to JGR-Planets the fluid core and reach the surface. If so, the field strength inside the core can exceed the sur- face field strength by a factor 1000. Taking a surface field strength equal to ∼300 nT [e.g An- -derson et al., 2012], (cid:104)B (cid:105) at the ICB could be as large as 0.3 mT, corresponding to approxi- -r +derson et al., 2012], (cid:104)B r(cid:105) at the ICB could be as large as 0.3 mT, corresponding to approxi- mately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mer- cury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of Mercury remains in the weak field regime. -Figure 6 shows how ε˜ , m˜ and n˜ vary as functions of inner core radius for different choices -m f s -of (cid:104)B (cid:105). The larger (cid:104)B (cid:105) is, the stronger is the EM coupling at the ICB, and the smaller is the -r r +Figure 6 shows how ˜ ε m, ˜ m +f +and ˜ n +s +vary as functions of inner core radius for different choices +of (cid:104)B r(cid:105). The larger (cid:104)B r(cid:105) is, the stronger is the EM coupling at the ICB, and the smaller is the differential rotation between the fluid core and inner core. The inner core and fluid core are vir- -tually locked into a common precession motion when (cid:104)B (cid:105)>0.3 mT. Further increasing (cid:104)B (cid:105) -r r +tually locked into a common precession motion when (cid:104)B r(cid:105)>0.3 mT. Further increasing (cid:104)B r(cid:105) above 1 mT does not change the solution as EM coupling already dominates all other torques on the inner core. This is the case even when EM coupling transitions into the strong field regime. -(cid:12) (cid:12) -EM coupling at the CMB is included in these calculations, with σ = 1 S m−1 and (cid:12)g 10(cid:12) = -m +EM coupling at the CMB is included in these calculations, with σ m = 1 S m−1 and +(cid:12) +(cid:12)g0 +1(cid:12) +(cid:12) = 190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core -we retrieved the solutions of ε˜ and m˜ shown in Figure 4. -m f -As the inner core radius is increased, both ε˜ and m˜ get smaller, as it was the case with -m f +we retrieved the solutions of ˜ ε +m +and ˜ m +f +shown in Figure 4. +As the inner core radius is increased, both ˜ ε +m +and ˜ m +f +get smaller, as it was the case with viscous coupling alone, although the addition of EM coupling lead to more substantial changes. The inner core needs to be larger than approximately 500 km for changes in the Cassini state -equilibrium to be noticeable. It is important to point out that m˜ is reduced not because of +equilibrium to be noticeable. It is important to point out that ˜ m f +is reduced not because of EM coupling at the CMB, but rather from the combination of EM coupling at the ICB, which pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the -greater is the reduction in ε˜ and m˜ . -m f +greater is the reduction in ˜ ε +m +and ˜ m f. When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are -locked into a common precession motion, a good approximation of ε˜ is given by the same pre- +locked into a common precession motion, a good approximation of ˜ ε m +is given by the same pre- diction as Equations (39-40) involving the effective moment of inertia C(cid:48), except χ is now given by -A¯ Ω cosI−A¯ Ω α φ -χ= c p s o 3 s . (54) -A¯ Ω (e +K )+A¯ Ω e α α −A¯ Ω cosI -f o f cmb s o s 3 g c p -For a small inner core, A¯ Ω cosI >A¯ Ω α φ and χ is positive. Because A¯ Ω α φ increases -c p s o 3 s s o 3 s -with inner core size, χ gets smaller, and so do C(cid:48) and ε˜ . The mantle obliquity drops from 2.049 -m +χ= +¯ A cΩ pcosI− ¯ A sΩ oα 3φ +s +¯ A fΩ o(e +f ++K cmb)+ ¯ A sΩ oe sα 3α g− ¯ A cΩ pcosI +. (54) +For a small inner core, ¯ A cΩ pcosI > ¯ A sΩ oα 3φ +s +and χ is positive. Because ¯ A sΩ oα 3φ +s +increases +with inner core size, χ gets smaller, and so do C(cid:48) and ˜ ε m. The mantle obliquity drops from 2.049 arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015 -arcmin. For an inner core larger than ≈1000 km, A¯ Ω cosI 120GeV) <2.10×10−12cm−2s−1 ofthemostprolificrepeaters,locatedat𝑧=0.19273(8).Persistent -or Φ(𝐸 > 120GeV) < 1.7×10−12ergcm−2s−1. A variation of radioemissionfromFRB20201124AwasdetectedbytheuGMRT -±0.5oftheassumedspectralindexleadstoavariationintheupper (Whartonetal.2021)andtheJVLA(Riccietal.2021)onangular -limitoflessthan±19%.Amapofenergyfluxupperlimitscovering scalesofafewarcseconds.However,itisresolvedoutatscalesof -the full region accessible within the H.E.S.S. field of view above ∼0.1arcsecondswiththeEuropeanVLBINetwork(Marcoteetal. -120GeVisgiveninFigure6. 2021)suggestingthatitisnotacompactsourcedirectlyassociated +gamma-rayfluxabovethatthresholdandassuminganenergydepen- +dencefollowing𝐸−2isΦ(𝐸 >120GeV) <2.10×10−12cm−2s−1 +or Φ(𝐸 > 120GeV) < 1.7×10−12ergcm−2s−1. A variation of +±0.5oftheassumedspectralindexleadstoavariationintheupper +limitoflessthan±19%.Amapofenergyfluxupperlimitscovering +the full region accessible within the H.E.S.S. field of view above +120GeVisgiveninFigure6. +4 DISCUSSION +Of the targeted FRB fields reported here, only FRB 20190714A +isobservedtobespatiallycoincidentwithapersistentradiocon- +tinuumsource.Weobtainanupperlimitof∼ 15𝜇Jybeam−1 for +FRBs20190711Aand20171019A,respectively,andapeakinten- +sity of ∼ 53𝜇Jy beam−1 for the emission coincident with FRB +20190714A. This source is detected at both epochs with similar +intensitieswithinthemeasuredrmsoftheimages(seeTables1and +2 for details). The values in the Table 2 are derived by carrying +out 2D Gaussian fit using similar ellipses enclosing the detected +persistentemission.Theaveragefluxdensityis∼3timeslessthan +thatofthepersistentsourceassociatedwithFRBs20121102A,one +ofthemostprolificrepeaters,locatedat𝑧=0.19273(8).Persistent +radioemissionfromFRB20201124AwasdetectedbytheuGMRT +(Whartonetal.2021)andtheJVLA(Riccietal.2021)onangular +scalesofafewarcseconds.However,itisresolvedoutatscalesof +∼0.1arcsecondswiththeEuropeanVLBINetwork(Marcoteetal. +2021)suggestingthatitisnotacompactsourcedirectlyassociated with the FRB. In contrast, the other localised, prolific repeating FRB20180916Ahasnopersistentradiocounterpart. -4 DISCUSSION IntheimageinFigure3onecanseethatthepersistentradio -Of the targeted FRB fields reported here, only FRB 20190714A source lies at the edge of the optical extent of the host galaxy -isobservedtobespatiallycoincidentwithapersistentradiocon- as seen in PanSTARRS observations (Heintz et al. 2020). Our -tinuumsource.Weobtainanupperlimitof∼ 15𝜇Jybeam−1 for derived 1283MHz peak position with MeerKAT places it just -FRBs20190711Aand20171019A,respectively,andapeakinten- 1(cid:48).(cid:48)68awayfromthepositionofFRB20190714A(𝛼 𝐽2000,𝛿 𝐽2000 -sity of ∼ 53𝜇Jy beam−1 for the emission coincident with FRB = 12ℎ 15𝑚 55𝑠 .12, -13◦01(cid:48)15(cid:48).(cid:48)70; Heintz et al. 2020). The posi- -20190714A. This source is detected at both epochs with similar tionaluncertaintyontheFRBpositionis0(cid:48).(cid:48)283.Similarly,thepeak -intensitieswithinthemeasuredrmsoftheimages(seeTables1and 1.51GHze-MERLINpositionofthepersistentradiosourceissepa- -2 for details). The values in the Table 2 are derived by carrying ratedfromthepositionofFRB20190714Aby0(cid:48).(cid:48)53.Thepersistent -out 2D Gaussian fit using similar ellipses enclosing the detected sourcenearFRB20190714Ahasafluxbroadlyconsistentwiththe -persistentemission.Theaveragefluxdensityis∼3timeslessthan MeerKATfluxandisunresolvedonthee-MERLINbaselines.The +source lies at the edge of the optical extent of the host galaxy +as seen in PanSTARRS observations (Heintz et al. 2020). Our +derived 1283MHz peak position with MeerKAT places it just +1. (cid:48)(cid:48)68awayfromthepositionofFRB20190714A(𝛼 𝐽2000,𝛿 𝐽2000 += 12ℎ 15𝑚 55𝑠 .12, -13◦01(cid:48)15. (cid:48)(cid:48)70; Heintz et al. 2020). The posi- +tionaluncertaintyontheFRBpositionis0. (cid:48)(cid:48)283.Similarly,thepeak +1.51GHze-MERLINpositionofthepersistentradiosourceissepa- +ratedfromthepositionofFRB20190714Aby0. (cid:48)(cid:48)53.Thepersistent +sourcenearFRB20190714Ahasafluxbroadlyconsistentwiththe +MeerKATfluxandisunresolvedonthee-MERLINbaselines.The MNRAS000,1–15(2021) MeerKAT,e-MERLIN, SwiftandH.E.S.S.,observationsofthreelocalisedFRBs 7 Figure2. FRB20190714AMeerKATepochIimage(top)andazoom-in(bottom)aroundthepositionoftheFRBindicatedbythecyancircle.Whitecontours @@ -437,213 +582,330 @@ etal.(2019). Table1.DetailsoftheFRBfieldsobservedwithMeerKAT. Fieldname Observationdate Synthesizedbeam rms(𝜇Jybeam−1) Detected? FRB20171019A 28September2019 – No(calibrationfailure) -FRB20171019A 18October2019 6(cid:48).(cid:48)8×5(cid:48).(cid:48)0 5.2 <15𝜇Jybeam−1 -FRB20190711A 23August2019 11(cid:48).(cid:48)7×4(cid:48).(cid:48)9 4.9 <15𝜇Jybeam−1 -FRB20190711A 09September2019 12(cid:48).(cid:48)5×4(cid:48).(cid:48)9 4.6 <15𝜇Jybeam−1 -FRB20190714A 14September2019 7(cid:48).(cid:48)1×6(cid:48).(cid:48)2 4.2 54.4𝜇Jybeam−1 -FRB20190714A 28September2019 6(cid:48).(cid:48)5×5(cid:48).(cid:48)1 5.8 52.0𝜇Jybeam−1 +FRB20171019A 18October2019 6. (cid:48)(cid:48)8×5. (cid:48)(cid:48)0 5.2 <15𝜇Jybeam−1 +FRB20190711A 23August2019 11. (cid:48)(cid:48)7×4. (cid:48)(cid:48)9 4.9 <15𝜇Jybeam−1 +FRB20190711A 09September2019 12. (cid:48)(cid:48)5×4. (cid:48)(cid:48)9 4.6 <15𝜇Jybeam−1 +FRB20190714A 14September2019 7. (cid:48)(cid:48)1×6. (cid:48)(cid:48)2 4.2 54.4𝜇Jybeam−1 +FRB20190714A 28September2019 6. (cid:48)(cid:48)5×5. (cid:48)(cid:48)1 5.8 52.0𝜇Jybeam−1 Table2.DetailsoftheradiocontinuumsourceassociatedwithFRB20190714A. Fieldname Observationdate Telescope 𝜈 centre(GHz) 𝛼 J2000 𝛿 J2000 Maj.×min.axis Pos.angle Int.fluxdensity -FRB20190714A 28September2019 MeerKAT 1.283 12ℎ15𝑚55𝑠.154 -13◦01(cid:48)17(cid:48).(cid:48)30 9(cid:48).(cid:48)6×7(cid:48).(cid:48)4 88.7◦ 87.4𝜇Jy -FRB20190714A 18October2019 MeerKAT 1.283 12ℎ15𝑚55𝑠.193 −13◦01(cid:48)17(cid:48).(cid:48)18 8(cid:48).(cid:48)2×6(cid:48).(cid:48)4 12.2◦ 80.7𝜇Jy -FRB20190714A 13January2021 e-MERLIN 1.510 12ℎ15𝑚55𝑠.116 −13◦01(cid:48)14(cid:48).(cid:48)51 0(cid:48).(cid:48)15×0(cid:48).(cid:48)65 17.6◦ 107.5𝜇Jy -largeoffsetfromthecentreofthegalaxymakesthepersistentsource sufficientsensitivityinthesub-bandimages,thus,weareunableto -unlikelytobeanAGN.SofarthisFRBhasnotbeenseentorepeat. derivethespectralindexoftheemissionofthehostgalaxy. +FRB20190714A 28September2019 MeerKAT 1.283 12ℎ15𝑚55𝑠.154 -13◦01(cid:48)17. (cid:48)(cid:48)30 9. (cid:48)(cid:48)6×7. (cid:48)(cid:48)4 88.7◦ 87.4𝜇Jy +FRB20190714A 18October2019 MeerKAT 1.283 12ℎ15𝑚55𝑠.193 −13◦01(cid:48)17. (cid:48)(cid:48)18 8. (cid:48)(cid:48)2×6. (cid:48)(cid:48)4 12.2◦ 80.7𝜇Jy +FRB20190714A 13January2021 e-MERLIN 1.510 12ℎ15𝑚55𝑠.116 −13◦01(cid:48)14. (cid:48)(cid:48)51 0. (cid:48)(cid:48)15×0. (cid:48)(cid:48)65 17.6◦ 107.5𝜇Jy +largeoffsetfromthecentreofthegalaxymakesthepersistentsource +unlikelytobeanAGN.SofarthisFRBhasnotbeenseentorepeat. Higherresolutionimagingwillberequiredtobecertainofadirect -Our e-MERLIN observations probe a different spatial associationofthepersistentsourcewiththeFRB.Wedidnothave +sufficientsensitivityinthesub-bandimages,thus,weareunableto +derivethespectralindexoftheemissionofthehostgalaxy. +Our e-MERLIN observations probe a different spatial scale than the size of the persistent radio source associated with FRB 20121102A. At the angular diameter distance of MNRAS000,1–15(2021) 10 Chibuezeetal. Figure5.XRTsummedimageofFRB20171019AregiontakenduringtheMWLobservationcampaigninSeptember-October2019.Thepositionofthe Wolf1561starisshownincyanandislabelled.ThegreenboxindicatesFRB20171019A90%localisationregionasreportedinKumaretal.(2019). -FRB 20190714A (780 Mpc), an unresolved source with an an- tentradioluminosity.Thesevaluesareexpectedtodecreaseona -gularsizeof0(cid:48).(cid:48)6correspondstoaphysicalextentof(cid:46)2.3kpc.The timescaleofafewdecadestocenturies.Giventheassociationofa -uGMRTreportedthedetectionofanunresolvedradioemissionat comparativelyfainterpersistentsource,FRB20190714Amaypo- -650MHzwithafluxdensityof700±100𝜇Jy(Whartonetal.2021), tentiallybearepeatingFRBwhoseageliesbetweenthatofFRB -whiletheJVLAdetectedpersistentemissionwithafluxdensityof 20121102AandFRB20180916A.Millisecondmagnetarsformed -340±30𝜇Jyat3GHz(Riccietal.2021).Assumingtheestimated throughstandardastrophysicalchannelssuchashydrogenpoorsu- -spectralindexbetweenthesefrequencies(∼−0.5,Riccietal.2021), perluminous supernovae and long duration gamma-ray bursts are -the 1.3GHz flux density would be ∼ 500𝜇Jy (similar to the 3-𝜎 consistentwiththeprogenitorsofFRBsexpectedinlow-metallicity -upperlimitonobservationsfrom1−2GHz;Lawetal.2021).The dwarf galaxies with high specific star-formation rate such as for -fluxdensitywemeasuredforFRB20190714Aisafactorof∼10 FRB20121102A.However,Margalitetal.(2019)notethatitisalso -lowerthanFRB20201124A,butFRB20190714Aisalsoafactor possibletoformsuchsourcesthroughavarietyofchannels,includ- -2.6moredistant.Therefore,thefluxdensitieswouldbecomparable ingbinaryneutronstarmergersandaccretioninducedcollapseof -iftheywereatsimilardistances. whitedwarfsinenvironmentsandhostgalaxydemographicsdiffer- -enttoFRB20121102A.Suchsuggestionsareconsistentwithrecent +FRB 20190714A (780 Mpc), an unresolved source with an an- +gularsizeof0. (cid:48)(cid:48)6correspondstoaphysicalextentof(cid:46)2.3kpc.The +uGMRTreportedthedetectionofanunresolvedradioemissionat +650MHzwithafluxdensityof700±100𝜇Jy(Whartonetal.2021), +whiletheJVLAdetectedpersistentemissionwithafluxdensityof +340±30𝜇Jyat3GHz(Riccietal.2021).Assumingtheestimated +spectralindexbetweenthesefrequencies(∼−0.5,Riccietal.2021), +the 1.3GHz flux density would be ∼ 500𝜇Jy (similar to the 3-𝜎 +upperlimitonobservationsfrom1−2GHz;Lawetal.2021).The +fluxdensitywemeasuredforFRB20190714Aisafactorof∼10 +lowerthanFRB20201124A,butFRB20190714Aisalsoafactor +2.6moredistant.Therefore,thefluxdensitieswouldbecomparable +iftheywereatsimilardistances. Given the resolution of MeerKAT we are unable to defini- -localisations(e.g.Heintzetal.2020). tively state whether the persistent emission is associated with a -star-formingregionortheFRBitself.However,theincreasedreso- The X-ray and VHE observations with Swift and H.E.S.S. -lutionwiththee-MERLINbaselineswouldtendtofavouracompact allows us to probe non-thermal persistent emission associated to -sourcesimilartotheoneobservedinFRB20121102A.Oneofthe the FRB host galaxy or its source. Recently, H.E.S.S. observed -leading models to explain the bursts from, and radio counterpart SGR1935+2154 (H.E.S.S. collaboration 2021) that is a Galactic -to FRB 20121102A, is a young nebula powered flaring magnetar magnetarlinkedtoarepeatingFRBanditsfirstX-raycounterpart. -embedded in a 20–50 year-old supernova remnant (Beloborodov MagnetarX-rayflarescouldinfactbenon-thermalinnature(Lietal. -2017; Metzger et al. 2019). The lack of a bright persistent radio 2021)indicatingthepresenceofparticleaccelerationthatcouldpo- -sourceassociatedwiththerepeaterFRB20180916Asuggeststhat tentiallyreachtheVHEdomain.TheinverseComptonprocessisa -it is comparatively older at (cid:38) 200−500 years and the persistent primarycandidatefortheproductionofVHEnon-thermalemission. -radiosourcemayhavefaded.InthemodelbyMetzgeretal.(2019), H.E.S.S.observationsdidnotleadtoadetectionofapersistentora -the nebula is suggested to contribute significantly to the rotation transientsourceassociatedtoFRB20171019A.WefoundnoX-ray -measure and dispersion measure (DM), as well as to the persis- counterparts and thus derived the upper limits to constrain these +star-formingregionortheFRBitself.However,theincreasedreso- +lutionwiththee-MERLINbaselineswouldtendtofavouracompact +sourcesimilartotheoneobservedinFRB20121102A.Oneofthe +leading models to explain the bursts from, and radio counterpart +to FRB 20121102A, is a young nebula powered flaring magnetar +embedded in a 20–50 year-old supernova remnant (Beloborodov +2017; Metzger et al. 2019). The lack of a bright persistent radio +sourceassociatedwiththerepeaterFRB20180916Asuggeststhat +it is comparatively older at (cid:38) 200−500 years and the persistent +radiosourcemayhavefaded.InthemodelbyMetzgeretal.(2019), +the nebula is suggested to contribute significantly to the rotation +measure and dispersion measure (DM), as well as to the persis- +tentradioluminosity.Thesevaluesareexpectedtodecreaseona +timescaleofafewdecadestocenturies.Giventheassociationofa +comparativelyfainterpersistentsource,FRB20190714Amaypo- +tentiallybearepeatingFRBwhoseageliesbetweenthatofFRB +20121102AandFRB20180916A.Millisecondmagnetarsformed +throughstandardastrophysicalchannelssuchashydrogenpoorsu- +perluminous supernovae and long duration gamma-ray bursts are +consistentwiththeprogenitorsofFRBsexpectedinlow-metallicity +dwarf galaxies with high specific star-formation rate such as for +FRB20121102A.However,Margalitetal.(2019)notethatitisalso +possibletoformsuchsourcesthroughavarietyofchannels,includ- +ingbinaryneutronstarmergersandaccretioninducedcollapseof +whitedwarfsinenvironmentsandhostgalaxydemographicsdiffer- +enttoFRB20121102A.Suchsuggestionsareconsistentwithrecent +localisations(e.g.Heintzetal.2020). +The X-ray and VHE observations with Swift and H.E.S.S. +allows us to probe non-thermal persistent emission associated to +the FRB host galaxy or its source. Recently, H.E.S.S. observed +SGR1935+2154 (H.E.S.S. collaboration 2021) that is a Galactic +magnetarlinkedtoarepeatingFRBanditsfirstX-raycounterpart. +MagnetarX-rayflarescouldinfactbenon-thermalinnature(Lietal. +2021)indicatingthepresenceofparticleaccelerationthatcouldpo- +tentiallyreachtheVHEdomain.TheinverseComptonprocessisa +primarycandidatefortheproductionofVHEnon-thermalemission. +H.E.S.S.observationsdidnotleadtoadetectionofapersistentora +transientsourceassociatedtoFRB20171019A.WefoundnoX-ray +counterparts and thus derived the upper limits to constrain these MNRAS000,1–15(2021) MeerKAT,e-MERLIN, SwiftandH.E.S.S.,observationsofthreelocalisedFRBs 11 Figure6.MapofupperlimitsontheVHEgamma-rayenergyfluxderivedfromtheH.E.S.S.observations.Thelimitsarevalidabove120GeVandassume aphotonfluxdistributionfollowingan𝐸−2dependence.ThegreenboxindicatestheFRB20171019A90%localisationregionasreportedinKumaretal. (2019).Theoversamplingradiusis0.1◦. -emissions.InthecaseofexistenceofX-raynon-thermaloutbursts, SwiftandH.E.S.S.instrumentsandobtainedupperlimitsinthethree -thelackofVHEdetectioncouldindicatethatinverseComptonis domainsconstrainingtheMWLemissionsfromFRB20171019A. -weakinthevicinityofthemagnetarsorthattheVHEgamma-ray The search for FRB MWL counterparts is ongoing within the -emissionisquenched.Thislatterscenariocouldbeexplainedbythe H.E.S.S. collaboration and more results will be published in fu- -factthatinverseComptonistakingplacetooclosetothemagne- tureworks. -tar’ssurface,wherepairproductionandphotonsplittingcouldbe Given the association of a comparatively fainter persistent -responsibleforsignificantenergylosses(Huetal.2019),preventing source,FRB20190714AmaypotentiallybearepeatingFRBwhose -energeticparticlesandphotonstoreachthenebula. ageliesbetweenthatofFRB20121102AandFRB20180916A. +emissions.InthecaseofexistenceofX-raynon-thermaloutbursts, +thelackofVHEdetectioncouldindicatethatinverseComptonis +weakinthevicinityofthemagnetarsorthattheVHEgamma-ray +emissionisquenched.Thislatterscenariocouldbeexplainedbythe +factthatinverseComptonistakingplacetooclosetothemagne- +tar’ssurface,wherepairproductionandphotonsplittingcouldbe +responsibleforsignificantenergylosses(Huetal.2019),preventing +energeticparticlesandphotonstoreachthenebula. No persistent emissions were detected towards FRB 20190711A and FRB 20171019A in our MeerKAT observations -(seeFigures7,8,and9),thereforenofollowupobservationswere ACKNOWLEDGEMENTS +(seeFigures7,8,and9),thereforenofollowupobservationswere conductedtowardsthoseFRBs. +5 CONCLUSIONS +SeveralFRBmodelsenvisionpersistentemissiontobeassociated +withthesesources.Inthispaper,weconductedradioobservations +of three FRBs (FRB 20190714A, 20190711A and 20171019A), +and also a multi-wavelength campaign on one of these (FRB +20171019A). +Wedetectedpersistentcompactradioemissionassociatedwith +FRB 20190714A (at 𝑧 = 0.2365) using the MeerKAT and e- +MERLINradiotelescope.Thisrepresentsthefirstdetectionofthe +radiocontinuumemissionassociatedwiththehost(galaxy)ofFRB +20190714AandisonlythethirdknownFRBtohavesuchanas- +sociation.Wefurthermoreobtainedaradioupperlimitof∼15𝜇Jy +beam−1fortherepeatingFRBs20190711Aand20171019A. +WealsoperformedUV,X-rayandVHEobservationswiththe +SwiftandH.E.S.S.instrumentsandobtainedupperlimitsinthethree +domainsconstrainingtheMWLemissionsfromFRB20171019A. +The search for FRB MWL counterparts is ongoing within the +H.E.S.S. collaboration and more results will be published in fu- +tureworks. +Given the association of a comparatively fainter persistent +source,FRB20190714AmaypotentiallybearepeatingFRBwhose +ageliesbetweenthatofFRB20121102AandFRB20180916A. +ACKNOWLEDGEMENTS This paper makes use of the MeerKAT data (Project ID: SCI- 20190418-VC-01). The MeerKAT telescope is operated by the South African Radio Astronomy Observatory, which is a facility -5 CONCLUSIONS of the National Research Foundation, an agency of the Depart- -SeveralFRBmodelsenvisionpersistentemissiontobeassociated mentofScienceandInnovation(DSI).Thisworkmadeuseofthe -withthesesources.Inthispaper,weconductedradioobservations Inter-UniversityInstituteforDataIntensiveAstronomy(IDIA)vi- -of three FRBs (FRB 20190714A, 20190711A and 20171019A), sualizationlabhttps://vislab.idia.ac.za.IDIAisapartnershipofthe -and also a multi-wavelength campaign on one of these (FRB UniversityofCapeTown,theUniversityofPretoria,theUniversity -20171019A). oftheWesternCapeandtheSouthAfricanRadioastronomyObser- -Wedetectedpersistentcompactradioemissionassociatedwith vatory.e-MERLINisaNationalFacilityoperatedbytheUniversity -FRB 20190714A (at 𝑧 = 0.2365) using the MeerKAT and e- ofManchesteratJodrellBankObservatoryonbehalfofSTFC. -MERLINradiotelescope.Thisrepresentsthefirstdetectionofthe TheauthorsacknowledgefundingfromtheEuropeanResearch -radiocontinuumemissionassociatedwiththehost(galaxy)ofFRB Council(ERC)undertheEuropeanUnion’sHorizon2020research -20190714AandisonlythethirdknownFRBtohavesuchanas- andinnovationprogramme(grantagreementNo694745).Thesup- -sociation.Wefurthermoreobtainedaradioupperlimitof∼15𝜇Jy portoftheNamibianauthoritiesandoftheUniversityofNamibia -beam−1fortherepeatingFRBs20190711Aand20171019A. infacilitatingtheconstructionandoperationofH.E.S.S.isgrate- -WealsoperformedUV,X-rayandVHEobservationswiththe fullyacknowledged,asisthesupportbytheGermanMinistryfor +mentofScienceandInnovation(DSI).Thisworkmadeuseofthe +Inter-UniversityInstituteforDataIntensiveAstronomy(IDIA)vi- +sualizationlabhttps://vislab.idia.ac.za.IDIAisapartnershipofthe +UniversityofCapeTown,theUniversityofPretoria,theUniversity +oftheWesternCapeandtheSouthAfricanRadioastronomyObser- +vatory.e-MERLINisaNationalFacilityoperatedbytheUniversity +ofManchesteratJodrellBankObservatoryonbehalfofSTFC. +TheauthorsacknowledgefundingfromtheEuropeanResearch +Council(ERC)undertheEuropeanUnion’sHorizon2020research +andinnovationprogramme(grantagreementNo694745).Thesup- +portoftheNamibianauthoritiesandoftheUniversityofNamibia +infacilitatingtheconstructionandoperationofH.E.S.S.isgrate- +fullyacknowledged,asisthesupportbytheGermanMinistryfor MNRAS000,1–15(2021) 12 Chibuezeetal. Figure7. FRB20171019AMeerKATimageandazoom-in(insert)aroundthepositionoftheFRB.Thewhiteellipseonthebottomleftcorneroftheinsert representthebeamsizeofMeerKAT. -Education and Research (BMBF), the Max Planck Society, the REFERENCES +Education and Research (BMBF), the Max Planck Society, the GermanResearchFoundation(DFG),theHelmholtzAssociation, -AdámekK.,ArmourW.,2016,arXive-prints,p.arXiv:1611.09704 the Alexander von Humboldt Foundation, the French Ministry of -AdámekK.,ArmourW.,2019,AGPUImplementationoftheHarmonic Higher Education, Research and Innovation, the Centre National +de la Recherche Scientifique (CNRS/IN2P3 and CNRS/INSU), +the Commissariat à l’énergie atomique et aux énergies alterna- +tives (CEA), the U.K. Science and Technology Facilities Council +(STFC),theKnutandAliceWallenbergFoundation,theNational +ScienceCentre,Polandgrantno.2016/22/M/ST9/00382,theSouth +AfricanDepartmentofScienceandTechnologyandNationalRe- +searchFoundation,theUniversityofNamibia,theNationalCom- +missiononResearch,Science&TechnologyofNamibia(NCRST), +theAustrianFederalMinistryofEducation,ScienceandResearch +and the Austrian Science Fund (FWF), the Australian Research +Council (ARC), the Japan Society for the Promotion of Science +andbytheUniversityofAmsterdam.Weappreciatetheexcellent +workofthetechnicalsupportstaffinBerlin,Zeuthen,Heidelberg, +Palaiseau,Paris,Saclay,TübingenandinNamibiaintheconstruc- +tion and operation of the equipment. This work benefited from +servicesprovidedbytheH.E.S.S.VirtualOrganisation,supported +bythenationalresourceprovidersoftheEGIFederation. +DATAAVAILABILITY +Thedataunderlyingthisarticlewillbesharedonreasonablerequest +tothecorrespondingauthors. +REFERENCES +AdámekK.,ArmourW.,2016,arXive-prints,p.arXiv:1611.09704 +AdámekK.,ArmourW.,2019,AGPUImplementationoftheHarmonic SumAlgorithm.p.489 -de la Recherche Scientifique (CNRS/IN2P3 and CNRS/INSU), Adámek K., Dimoudi S., Giles M., Armour W., 2017, arXiv e-prints, p. -the Commissariat à l’énergie atomique et aux énergies alterna- arXiv:1711.10855 -tives (CEA), the U.K. Science and Technology Facilities Council AharonianF.,etal.,2006,A&A,457,899 -(STFC),theKnutandAliceWallenbergFoundation,theNational AlamS.,etal.,2015,TheAstrophysicalJournalSupplementSeries,219,12 -ScienceCentre,Polandgrantno.2016/22/M/ST9/00382,theSouth AshtonT.,etal.,2020,arXive-prints,p.arXiv:2001.04510 -AfricanDepartmentofScienceandTechnologyandNationalRe- BannisterK.W.,etal.,2019,Science,365,565 +Adámek K., Dimoudi S., Giles M., Armour W., 2017, arXiv e-prints, p. +arXiv:1711.10855 +AharonianF.,etal.,2006,A&A,457,899 +AlamS.,etal.,2015,TheAstrophysicalJournalSupplementSeries,219,12 +AshtonT.,etal.,2020,arXive-prints,p.arXiv:2001.04510 +BannisterK.W.,etal.,2019,Science,365,565 BassaC.G.,etal.,2017,ApJ,843,L8 -searchFoundation,theUniversityofNamibia,theNationalCom- BeloborodovA.M.,2017,ApJ,843,L26 -missiononResearch,Science&TechnologyofNamibia(NCRST), BergeD.,FunkS.,HintonJ.,2007,A&A,466,1219 -theAustrianFederalMinistryofEducation,ScienceandResearch BhandariS.,etal.,2020,ApJ,895,L37 -and the Austrian Science Fund (FWF), the Australian Research BolmontJ.,etal.,2014,NuclearInstrumentsandMethodsinPhysicsRe- -Council (ARC), the Japan Society for the Promotion of Science searchSectionA:Accelerators,Spectrometers,DetectorsandAssoci- -andbytheUniversityofAmsterdam.Weappreciatetheexcellent atedEquipment,761,46–57 -workofthetechnicalsupportstaffinBerlin,Zeuthen,Heidelberg, BreeveldA.A.,etal.,2010,MonthlyNoticesoftheRoyalAstronomical -Palaiseau,Paris,Saclay,TübingenandinNamibiaintheconstruc- Society,406,1687 -tion and operation of the equipment. This work benefited from BrunF.,PielQ.,deNauroisM.,BernhardS.,2020,Astropart.Phys.,118, -servicesprovidedbytheH.E.S.S.VirtualOrganisation,supported 102429 +searchSectionA:Accelerators,Spectrometers,DetectorsandAssoci- +atedEquipment,761,46–57 +BreeveldA.A.,etal.,2010,MonthlyNoticesoftheRoyalAstronomical +Society,406,1687 +BrunF.,PielQ.,deNauroisM.,BernhardS.,2020,Astropart.Phys.,118, +102429 BurrowsD.N.,etal.,2005,SpaceSci.Rev.,120,165 -bythenationalresourceprovidersoftheEGIFederation. CalebM.,KeaneE.,2021,Universe,7,453 CalebM.,StappersB.W.,RajwadeK.,FlynnC.,2019,MNRAS,484,5500 CalebM.,etal.,2020,MNRAS,496,4565 ChatterjeeS.,etal.,2017,Nature,541,58 Chime/FRBCollaboration2021,TheAstronomer’sTelegram,14497,1 CordesJ.M.,WassermanI.,2016,MNRAS,457,232 -DATAAVAILABILITY DaiZ.G.,WangJ.S.,YuY.W.,2017,ApJ,838,L7 +DaiZ.G.,WangJ.S.,YuY.W.,2017,ApJ,838,L7 DimoudiS.,ArmourW.,2015,arXive-prints,p.arXiv:1511.07343 -Thedataunderlyingthisarticlewillbesharedonreasonablerequest Dimoudi S., Adamek K., Thiagaraj P., Ransom S. M., Karastergiou A., -tothecorrespondingauthors. ArmourW.,2018,ApJS,239,28 +Dimoudi S., Adamek K., Thiagaraj P., Ransom S. M., Karastergiou A., +ArmourW.,2018,ApJS,239,28 MNRAS000,1–15(2021) MeerKAT,e-MERLIN, SwiftandH.E.S.S.,observationsofthreelocalisedFRBs 13 Figure8. FRB20190711AMeerKATepochIimageandazoom-in(insert)aroundthepositionoftheFRB.Thewhiteellipseonthebottomleftcornerofthe insertrepresentthebeamsizeofMeerKAT. -EftekhariT.,BergerE.,WilliamsP.K.G.,BlanchardP.K.,2018,ApJ,860, MarcoteB.,etal.,2017,ApJ,834,L8 -73 MarcoteB.,etal.,2020,Nature,577,190 -EvansP.A.,etal.,2007,A&A,469,379 MarcoteB.,etal.,2021,TheAstronomer’sTelegram,14603,1 -EvansP.A.,etal.,2009,MNRAS,397,1177 MargalitB.,BergerE.,MetzgerB.D.,2019,ApJ,886,110 -FongW.-f.,etal.,2021,ApJ,919,L23 MauchT.,etal.,2020,ApJ,888,61 -H.E.S.S.collaboration2021,ApJ,919,106 McMullin J. P., Waters B., Schiebel D., Young W., Golap K., 2007, in -HI4PICollaborationetal.,2016,A&A,594,A116 ShawR.A.,HillF.,BellD.J.,eds,AstronomicalSocietyofthePacific -HeintzK.E.,etal.,2020,ApJ,903,152 ConferenceSeriesVol.376,AstronomicalDataAnalysisSoftwareand -HeywoodI.,2020,oxkat:Semi-automatedimagingofMeerKATobserva- SystemsXVI.p.127 -tions(ascl:2009.003) MereghettiS.,etal.,2020,ApJ,898,L29 -HickishJ.,etal.,2016,JournalofAstronomicalInstrumentation,5,1641001 MetzgerB.D.,MargalitB.,SironiL.,2019,MNRAS,485,4091 -HilmarssonG.H.,etal.,2021,ApJ,908,L10 OffringaA.R.,etal.,2014,MNRAS,444,606 -HuK.,BaringM.G.,WadiasinghZ.,HardingA.K.,2019,MNRAS,486, ParsonsR.D.,HintonJ.A.,2014,AstroparticlePhysics,56,26 -3327–3349 Petroff E., Hessels J. W. T., Lorimer D. R., 2021, arXiv e-prints, p. -Insight-HXMT2020,SGRJ1935+2154burstlist,http://hxmten.ihep. arXiv:2107.10113 -ac.cn/bfy/331.jhtml PlattsE.,WeltmanA.,WaltersA.,TendulkarS.P.,GordinJ.E.B.,Kandhai -JamesC.W.,etal.,2020,MNRAS,495,2416 S.,2019,Phys.Rep.,821,1 -JonasJ.,MeerKATTeam2016,inMeerKATScience:OnthePathwayto PopovS.B.,PostnovK.A.,2013,arXive-prints,p.arXiv:1307.4924 -theSKA.p.1 PopovS.B.,PshirkovM.S.,2016,MNRAS,462,L16 -KashiyamaK.,IokaK.,MészárosP.,2013,ApJ,776,L39 PopovS.,PostnovK.,PshirkovM.,2018,InternationalJournalofModern -KenyonJ.S.,SmirnovO.M.,GroblerT.L.,PerkinsS.J.,2018,MNRAS, PhysicsD,27,1844016 -478,2399 ProchaskaJ.X.,etal.,2019,Science,366,231 -KumarP.,etal.,2019,ApJ,887,L30 RaviV.,2019,NatureAstronomy,3,928 -KumarP.,etal.,2021,MNRAS,500,2525 Resmi L., Vink J., Ishwara-Chandra C. H., 2020, arXiv e-prints, p. -LawC.,TendulkarS.,ClarkeT.,AggarwalK.,BethapudyS.,2021,The arXiv:2010.14334 -Astronomer’sTelegram,14526,1 RicciR.,PiroL.,PanessaF.,O’ConnorB.,LottiS.,BruniG.,ZhangB., -LiC.K.,etal.,2021,NatureAstronomy, 2021,TheAstronomer’sTelegram,14549,1 -LiuT.,RomeroG.E.,LiuM.-L.,LiA.,2016,ApJ,826,82 RidnaiaA.,etal.,2021,NatureAstronomy,inpress -LorimerD.R.,BailesM.,McLaughlinM.A.,NarkevicD.J.,CrawfordF., RolkeW.A.,LópezA.M.,ConradJ.,2005,NuclearInstrumentsandMeth- -2007,Science,318,777 odsinPhysicsResearchA,551,493 -LyubarskyY.,2014,MNRAS:Letters,442,L9 RomingP.W.A.,etal.,2005,SpaceScienceReviews,120,95–142 -MacquartJ.P.,etal.,2020,Nature,581,391 TavaniM.,etal.,2021,NatureAstronomy,5,401–407 +EftekhariT.,BergerE.,WilliamsP.K.G.,BlanchardP.K.,2018,ApJ,860, +73 +EvansP.A.,etal.,2007,A&A,469,379 +EvansP.A.,etal.,2009,MNRAS,397,1177 +FongW.-f.,etal.,2021,ApJ,919,L23 +H.E.S.S.collaboration2021,ApJ,919,106 +HI4PICollaborationetal.,2016,A&A,594,A116 +HeintzK.E.,etal.,2020,ApJ,903,152 +HeywoodI.,2020,oxkat:Semi-automatedimagingofMeerKATobserva- +tions(ascl:2009.003) +HickishJ.,etal.,2016,JournalofAstronomicalInstrumentation,5,1641001 +HilmarssonG.H.,etal.,2021,ApJ,908,L10 +HuK.,BaringM.G.,WadiasinghZ.,HardingA.K.,2019,MNRAS,486, +3327–3349 +Insight-HXMT2020,SGRJ1935+2154burstlist,http://hxmten.ihep. +ac.cn/bfy/331.jhtml +JamesC.W.,etal.,2020,MNRAS,495,2416 +JonasJ.,MeerKATTeam2016,inMeerKATScience:OnthePathwayto +theSKA.p.1 +KashiyamaK.,IokaK.,MészárosP.,2013,ApJ,776,L39 +KenyonJ.S.,SmirnovO.M.,GroblerT.L.,PerkinsS.J.,2018,MNRAS, +478,2399 +KumarP.,etal.,2019,ApJ,887,L30 +KumarP.,etal.,2021,MNRAS,500,2525 +LawC.,TendulkarS.,ClarkeT.,AggarwalK.,BethapudyS.,2021,The +Astronomer’sTelegram,14526,1 +LiC.K.,etal.,2021,NatureAstronomy, +LiuT.,RomeroG.E.,LiuM.-L.,LiA.,2016,ApJ,826,82 +LorimerD.R.,BailesM.,McLaughlinM.A.,NarkevicD.J.,CrawfordF., +2007,Science,318,777 +LyubarskyY.,2014,MNRAS:Letters,442,L9 +MacquartJ.P.,etal.,2020,Nature,581,391 +MarcoteB.,etal.,2017,ApJ,834,L8 +MarcoteB.,etal.,2020,Nature,577,190 +MarcoteB.,etal.,2021,TheAstronomer’sTelegram,14603,1 +MargalitB.,BergerE.,MetzgerB.D.,2019,ApJ,886,110 +MauchT.,etal.,2020,ApJ,888,61 +McMullin J. P., Waters B., Schiebel D., Young W., Golap K., 2007, in +ShawR.A.,HillF.,BellD.J.,eds,AstronomicalSocietyofthePacific +ConferenceSeriesVol.376,AstronomicalDataAnalysisSoftwareand +SystemsXVI.p.127 +MereghettiS.,etal.,2020,ApJ,898,L29 +MetzgerB.D.,MargalitB.,SironiL.,2019,MNRAS,485,4091 +OffringaA.R.,etal.,2014,MNRAS,444,606 +ParsonsR.D.,HintonJ.A.,2014,AstroparticlePhysics,56,26 +Petroff E., Hessels J. W. T., Lorimer D. R., 2021, arXiv e-prints, p. +arXiv:2107.10113 +PlattsE.,WeltmanA.,WaltersA.,TendulkarS.P.,GordinJ.E.B.,Kandhai +S.,2019,Phys.Rep.,821,1 +PopovS.B.,PostnovK.A.,2013,arXive-prints,p.arXiv:1307.4924 +PopovS.B.,PshirkovM.S.,2016,MNRAS,462,L16 +PopovS.,PostnovK.,PshirkovM.,2018,InternationalJournalofModern +PhysicsD,27,1844016 +ProchaskaJ.X.,etal.,2019,Science,366,231 +RaviV.,2019,NatureAstronomy,3,928 +Resmi L., Vink J., Ishwara-Chandra C. H., 2020, arXiv e-prints, p. +arXiv:2010.14334 +RicciR.,PiroL.,PanessaF.,O’ConnorB.,LottiS.,BruniG.,ZhangB., +2021,TheAstronomer’sTelegram,14549,1 +RidnaiaA.,etal.,2021,NatureAstronomy,inpress +RolkeW.A.,LópezA.M.,ConradJ.,2005,NuclearInstrumentsandMeth- +odsinPhysicsResearchA,551,493 +RomingP.W.A.,etal.,2005,SpaceScienceReviews,120,95–142 +TavaniM.,etal.,2021,NatureAstronomy,5,401–407 MNRAS000,1–15(2021) 14 Chibuezeetal. Figure9. FRB20190711AMeerKATepochIIimageandazoom-in(insert)aroundthepositionoftheFRB.Thewhiteellipseonthebottomleftcornerof theinsertrepresentthebeamsizeofMeerKAT. -TendulkarS.P.,etal.,2017,ApJ,834,L7 Road,OxfordOX13RH,UK -ThorntonD.,etal.,2013,Science,341,53 10NationalUniversityofIrelandGalway,UniversityRoad,Galway, -TotaniT.,2013,PASJ,65,L12 H91TK33,Ireland -VieyroF.L.,RomeroG.E.,Bosch-RamonV.,MarcoteB.,delValleM.V., 11SKA Observatory, Jodrell Bank Observatory, Macclesfield, +TendulkarS.P.,etal.,2017,ApJ,834,L7 +ThorntonD.,etal.,2013,Science,341,53 +TotaniT.,2013,PASJ,65,L12 +VieyroF.L.,RomeroG.E.,Bosch-RamonV.,MarcoteB.,delValleM.V., 2017,A&A,602,A64 -CheshireSK119DL,UK -WhartonR.,etal.,2021,TheAstronomer’sTelegram,14529,1 12Dublin Institute for Advanced Studies, 31 Fitzwilliam Place, +WhartonR.,etal.,2021,TheAstronomer’sTelegram,14529,1 YamasakiS.,TotaniT.,KiuchiK.,2018,PASJ,70,39 -Dublin2,Ireland ZhangB.,2018,ApJ,854,L21 -13Max-Planck-InstitutfürKernphysik,P.O.Box103980,D69029 deNauroisM.,RollandL.,2009,AstroparticlePhysics,32,231 +APPENDIXA: AUTHORAFFILIATIONS +1CentreforSpaceResearch,North-WestUniversity,Potchefstroom +2531,SouthAfrica +2Department of Physics and Astronomy, Faculty of Physical Sci- +ences,UniversityofNigeria,CarverBuilding,1UniversityRoad, +Nsukka410001,Nigeria +3JodrellBankCentreforAstrophysics,DepartmentofPhysicsand +Astronomy,UniversityofManchester,ManchesterM139PL,UK +4SydneyInstituteforAstronomy,SchoolofPhysics,TheUniversity +ofSydney,NSW2006,Australia +5Max-Planck-InstitutfürRadioastronomie,AufdemHügel69,D- +53121Bonn,Germany +6IRFU, CEA, Université Paris-Saclay, F-91191 Gif-sur-Yvette, +France +7Department of Physics and Electronics, Rhodes University, PO +Box94,Grahamstown6140,SouthAfrica +8SouthAfricanRadioAstronomyObservatory,BlackRiverPark,2 +FirStreet,Observatory,CapeTown7925,SouthAfrica +9Astrophysics,DepartmentofPhysics,UniversityofOxford,Keble +Road,OxfordOX13RH,UK +10NationalUniversityofIrelandGalway,UniversityRoad,Galway, +H91TK33,Ireland +11SKA Observatory, Jodrell Bank Observatory, Macclesfield, +CheshireSK119DL,UK +12Dublin Institute for Advanced Studies, 31 Fitzwilliam Place, +Dublin2,Ireland +13Max-Planck-InstitutfürKernphysik,P.O.Box103980,D69029 Heidelberg,Germany 14HighEnergyAstrophysicsLaboratory,RAU,123HovsepEmin StYerevan0051,Armenia 15Landessternwarte,UniversitätHeidelberg,Königstuhl,D69117 -APPENDIXA: AUTHORAFFILIATIONS Heidelberg,Germany -1CentreforSpaceResearch,North-WestUniversity,Potchefstroom 16Aix Marseille Université, CNRS/IN2P3, CPPM, Marseille, -2531,SouthAfrica France -2Department of Physics and Astronomy, Faculty of Physical Sci- 17LaboratoireLeprince-Ringuet,ÉcolePolytechnique,CNRS,In- -ences,UniversityofNigeria,CarverBuilding,1UniversityRoad, stitutPolytechniquedeParis,F-91128Palaiseau,France -Nsukka410001,Nigeria 18University of Namibia, Department of Physics, Private Bag -3JodrellBankCentreforAstrophysics,DepartmentofPhysicsand +16Aix Marseille Université, CNRS/IN2P3, CPPM, Marseille, +France +17LaboratoireLeprince-Ringuet,ÉcolePolytechnique,CNRS,In- +stitutPolytechniquedeParis,F-91128Palaiseau,France +18University of Namibia, Department of Physics, Private Bag 13301,Windhoek10005,Namibia -Astronomy,UniversityofManchester,ManchesterM139PL,UK 19InstytutFizyki Ja¸drowejPAN, ul.Radzikowskiego 152,31-342 -4SydneyInstituteforAstronomy,SchoolofPhysics,TheUniversity +19InstytutFizyki Ja¸drowejPAN, ul.Radzikowskiego 152,31-342 Kraków,Poland -ofSydney,NSW2006,Australia 20DESY,D-15738Zeuthen,Germany -5Max-Planck-InstitutfürRadioastronomie,AufdemHügel69,D- 21SchoolofPhysics,UniversityoftheWitwatersrand,1JanSmuts -53121Bonn,Germany Avenue,Braamfontein,Johannesburg,2050SouthAfrica -6IRFU, CEA, Université Paris-Saclay, F-91191 Gif-sur-Yvette, 22Université de Paris, CNRS, Astroparticule et Cosmologie, F- -France 75013Paris,France -7Department of Physics and Electronics, Rhodes University, PO 23DepartmentofPhysicsandElectricalEngineering,LinnaeusUni- -Box94,Grahamstown6140,SouthAfrica versity,35195Växjö,Sweden -8SouthAfricanRadioAstronomyObservatory,BlackRiverPark,2 24LaboratoireUniversetThéories,ObservatoiredeParis,Univer- -FirStreet,Observatory,CapeTown7925,SouthAfrica sitéPSL,CNRS,UniversitédeParis,92190Meudon,France -9Astrophysics,DepartmentofPhysics,UniversityofOxford,Keble +20DESY,D-15738Zeuthen,Germany +21SchoolofPhysics,UniversityoftheWitwatersrand,1JanSmuts +Avenue,Braamfontein,Johannesburg,2050SouthAfrica +22Université de Paris, CNRS, Astroparticule et Cosmologie, F- +75013Paris,France +23DepartmentofPhysicsandElectricalEngineering,LinnaeusUni- +versity,35195Växjö,Sweden +24LaboratoireUniversetThéories,ObservatoiredeParis,Univer- +sitéPSL,CNRS,UniversitédeParis,92190Meudon,France MNRAS000,1–15(2021) MeerKAT,e-MERLIN, SwiftandH.E.S.S.,observationsofthreelocalisedFRBs 15 25Sorbonne Université, Université Paris Diderot, Sorbonne Paris diff --git a/read/results/pdfplumber/2201.00151.txt b/read/results/pdfplumber/2201.00151.txt index 0031559..ae8363e 100644 --- a/read/results/pdfplumber/2201.00151.txt +++ b/read/results/pdfplumber/2201.00151.txt @@ -1,3 +1,41 @@ +a +r +X +i +v +: +2 +2 +0 +1 . +0 +0 +1 +5 +1 +v +1 +[ +a +s +t +r +o +- +p +h +. +G +A +] +1 +J +a +n +2 +0 +2 +2 Astronomy&Astrophysicsmanuscriptno.Populations4 ©ESO2022 January4,2022 Multiple stellar populations in Schwarzschild modeling @@ -6,860 +44,1511 @@ KlaudiaKowalczykandEwaL.Łokas NicolausCopernicusAstronomicalCenter,PolishAcademyofSciences,Bartycka18,00-716Warsaw,Poland e-mail:klaudia.kowalczyk@gmail.com, lokas@camk.edu.pl January4,2022 -2202 ABSTRACT Dwarfspheroidal(dSph)galaxiesarebelievedtobestronglydarkmatterdominatedandthusareconsideredperfectobjectstostudy -darkmatterdistributionandtesttheoriesof structureformation. Theypossessresolved, multiplestellarpopulations thatoffer new naJ +darkmatterdistributionandtesttheoriesof structureformation. Theypossessresolved, multiplestellarpopulations thatoffer new possibilitiesfor modeling. A promising tool for the dynamical modeling of these objects isthe Schwarzschild orbit superposition method.Inthisworkweextendourpreviousimplementationoftheschemetoincludemorethanonepopulationofstarsandamore generalformofthemass-to-lightratiofunction.Wetestedtheimprovedapproachonanearlyspherical,gas-freegalaxyformedin -1 thecosmologicalcontextfromtheIllustrissimulation.Wemodeledthebinnedvelocitymomentsforstarssplitintotwopopulations bymetallicityanddemonstratethatinspiteoflargersamplingerrorstheincreasednumberofconstraintsleadstosignificantlytighter -]AG.hp-ortsa[ confidenceregionsontherecovereddensityandvelocityanisotropyprofiles.WethenappliedthemethodtotheFornaxdSphgalaxy withstarssimilarlydividedintotwopopulations.Incomparisonwithourearlierwork,wefindtheanisotropyparametertobeslightly increasing, rather thandecreasing, withradiusand morestrongly constrained. Wearealsoabletoinferanisotropy for eachstellar populationseparatelyandfindthemtobesignificantlydifferent. Keywords. galaxies:kinematicsanddynamics–galaxies:structure–galaxies:fundamentalparameters–galaxies:dwarf–galaxies: starclusters:individual:Fornax -1. Introduction momentsreliably and some assumption on the functionalform +1. Introduction +Dwarf spheroidal (dSph) galaxies of the Local Group (Mateo +1998;Tolstoyetal. 2009)are consideredto bea perfecttoolto +test our current theories of structure formation involving dark +matter in the context of near-field cosmology. The objects are +believedtobestronglydarkmatterdominatedwithmass-to-light +ratiosevenontheorderofafewhundredsolarunits.Duetotheir +proximitytheyarealsotheonlyextragalacticsystemswherein- +dividualstars canberesolvedandtheirvelocitiesmeasuredof- +fering the possibility to create interesting dynamical modeling +techniques. +The first estimates of dark matter content in dSph galaxies +werebasedonasinglemeasurementoftheline-of-sightvelocity +dispersionofthestarsandtheapplicationofthevirialtheorem. +Asthesamplesofthestarswithkinematicmeasurementsgrew, +itbecamepossibletoestimatetheprofileofthevelocitydisper- +sionandmodelitusingtheJeansequation(Binney&Tremaine +2008). Since the stars in the galaxy can move on a variety +of orbits, from circular to radial, the degeneracy between the +anisotropyof the orbitsandthe mass distributionis inherentin +this type of modeling. The reason for this lies in the fact that +differentcombinationsofthesequantitiescanreproducetheve- +locitydispersionprofileequallywell. +Awaytoovercomethisissue,atleastpartially,istoresortto +higher orderline-of-sightvelocity moments,such as the kurto- +sis,andusethecorrespondingJeansequations.Sincethekurto- +sisismoresensitivetothevelocityanisotropythantothemass +distribution,usefulconstraintscanbeobtainedonboth.Still,the +methodrequireslargekinematicsamplestoestimatethevelocity +momentsreliably and some assumption on the functionalform oftheanisotropy(Łokas2002;Łokasetal.2005). -Dwarf spheroidal (dSph) galaxies of the Local Group (Mateo The Schwarzschild modeling technique (Schwarzschild -1v15100.1022:viXra -1998;Tolstoyetal. 2009)are consideredto bea perfecttoolto 1979) offers a different approach to estimate the properties of -test our current theories of structure formation involving dark dSph galaxies without prior assumptions on the type of orbits. -matter in the context of near-field cosmology. The objects are It relies on building a galaxy model out of a set of best-fitting -believedtobestronglydarkmatterdominatedwithmass-to-light orbits probed in the range of energy and angular momenta. In -ratiosevenontheorderofafewhundredsolarunits.Duetotheir this method, the anisotropy of the stellar orbits comes out as a -proximitytheyarealsotheonlyextragalacticsystemswherein- resultofthemodelinginthesamewayasthedensityprofile.Al- -dividualstars canberesolvedandtheirvelocitiesmeasuredof- thoughithasbeenoriginallydevelopedforlargeellipticalgalax- -fering the possibility to create interesting dynamical modeling ies(vanderMareletal.1998;Vallurietal.2004;Gebhardtetal. -techniques. 2015), it has recently been adopted for use on discrete data +The Schwarzschild modeling technique (Schwarzschild +1979) offers a different approach to estimate the properties of +dSph galaxies without prior assumptions on the type of orbits. +It relies on building a galaxy model out of a set of best-fitting +orbits probed in the range of energy and angular momenta. In +this method, the anisotropy of the stellar orbits comes out as a +resultofthemodelinginthesamewayasthedensityprofile.Al- +thoughithasbeenoriginallydevelopedforlargeellipticalgalax- +ies(vanderMareletal.1998;Vallurietal.2004;Gebhardtetal. +2015), it has recently been adopted for use on discrete data characteristic of dSph galaxies and applied to a number of -The first estimates of dark matter content in dSph galaxies dwarfs,includingCarina,Draco,Fornax,Sculptor,andSextans -werebasedonasinglemeasurementoftheline-of-sightvelocity (Jardel&Gebhardt 2008;Jardeletal.2013;Breddels&Helmi -dispersionofthestarsandtheapplicationofthevirialtheorem. 2013;Breddelsetal.2013;Kowalczyketal.2019). -Asthesamplesofthestarswithkinematicmeasurementsgrew, ManydSphgalaxiesshowsignsofthepresenceofmultiple -itbecamepossibletoestimatetheprofileofthevelocitydisper- stellarpopulationsresultingfromafewstarformationepisodes -sionandmodelitusingtheJeansequation(Binney&Tremaine (Bellazzinietal.2001;delPinoetal.2015;Fabrizioetal.2016; -2008). Since the stars in the galaxy can move on a variety Paceetal. 2020). This observationoffers a way to improvethe -of orbits, from circular to radial, the degeneracy between the modeling methods since, assuming dynamical equilibrium, all -anisotropyof the orbitsandthe mass distributionis inherentin populations are supposed to be influenced by the same under- -this type of modeling. The reason for this lies in the fact that lying gravitational potential of the galaxy, but they have dif- -differentcombinationsofthesequantitiescanreproducetheve- ferent distributions so more constraints can be imposed during -locitydispersionprofileequallywell. the modeling. This approach was first used by Battagliaetal. -Awaytoovercomethisissue,atleastpartially,istoresortto (2008) to model the mass distribution in the Sculptor dSph -higher orderline-of-sightvelocity moments,such as the kurto- galaxy. A few attempts have also been made to constrain the -sis,andusethecorrespondingJeansequations.Sincethekurto- inner slope of the dark matter profile in dSph galaxies using -sisismoresensitivetothevelocityanisotropythantothemass thistechnique(Walker&Peñarrubia 2011;Amorisco&Evans -distribution,usefulconstraintscanbeobtainedonboth.Still,the 2012;Hayashietal.2018)inordertoresolvetheso-calledcusp- -methodrequireslargekinematicsamplestoestimatethevelocity core problem. It has been shown to be difficult, however, due +(2008) to model the mass distribution in the Sculptor dSph +galaxy. A few attempts have also been made to constrain the +inner slope of the dark matter profile in dSph galaxies using +thistechnique(Walker&Peñarrubia 2011;Amorisco&Evans +2012;Hayashietal.2018)inordertoresolvetheso-calledcusp- +core problem. It has been shown to be difficult, however, due Articlenumber,page1of12 A&Aproofs:manuscriptno.Populations4 Table1.PropertiesoftheIllustrisgalaxyusedtocreatemockdata. -Property Value 16 +Property Value SubhaloID 16960 -] -1- -Numberofstellarparticles(N ⋆) 70446 12 ry -Numberofdarkmatterparticles(N ) 78448 -DM ⊙ -Stellarmass(M ⋆) 5.74×1010M⊙ M[ -8 +Numberofstellarparticles(N ⋆) 70446 +Numberofdarkmatterparticles(N DM) 78448 +Stellarmass(M ⋆) 5.74×1010M⊙ Darkmattermass(M DM) 4.91×1011M⊙ -RFS Meanmassofstellarparticles 815808M⊙ -Stellarhalf-massradius 9.99kpc 4 -Stellarhalf-numberradius(r ) 9.6kpc +Stellarhalf-massradius 9.99kpc +Stellarhalf-numberradius(r 1/2) 9.6kpc +Axisratioc/awithinr 1/2 -Axisratioc/awithinr 0.907 -1/2 0 -Axisratiob/awithinr 0.949 +0.907 +Axisratiob/awithinr 1/2 -0 2 4 6 8 10 12 +0.949 Triaxiality 0.56 -t [Gyr] -tothenonsphericityofthedwarfsthatintroducesbiasesinsuch Fig.1. StarformationrateasafunctionoftheageoftheUniversein -measurements(Kowalczyketal.2013;Geninaetal.2018). thesimulatedgalaxyfromtheIllustrisprojectusedtocreatemockdata. -Inourrecentpapers(Kowalczyketal.2017,2018,2019)we Theblackandgrayverticalarrowsindicatethelastmergerswhichthe -developedtheSchwarzschildtechniqueintheformapplicableto galaxyunderwent,wetanddry,respectively. +tothenonsphericityofthedwarfsthatintroducesbiasesinsuch +measurements(Kowalczyketal.2013;Geninaetal.2018). +Inourrecentpapers(Kowalczyketal.2017,2018,2019)we +developedtheSchwarzschildtechniqueintheformapplicableto binnedvelocitymomentsofasingletracerandverifieditsabil- itytoreproducethemassdistributionandvelocityanisotropyof -10 6 simulated galaxies. We have also studied biases resulting from the nonsphericityof themodeledobjects.Later,we appliedthe -8 methodtomodelthekinematicsoftheFornaxdSphgalaxyesti- -matingitsmassandanisotropyprofileswithunprecedentedpre- 4 -cision. 6 ]ryG[ 2 ] -01[ +matingitsmassandanisotropyprofileswithunprecedentedpre- +cision. In this paper we extend our Schwarzschild modeling tech- -nique to include multiple stellar populations with the aim to 4 N -constrain the properties of dSph galaxies even more strongly. 2 t +nique to include multiple stellar populations with the aim to +constrain the properties of dSph galaxies even more strongly. We test our approachon a realistic simulated galaxyformedin -2 the cosmological context, originating from the Illustris project (Vogelsbergeretal. 2014a). Although no precise analogues of -0 0 dSphgalaxiesareavailableinthissimulationbecauseoftheres- -olution,we use a moremassive galaxybutwith propertiesoth- 0 1 2 3 4 5 -erwisesimilartodSphs.Thereliabilityofthemodelingdoesnot Z [Z ] -⊙ +olution,we use a moremassive galaxybutwith propertiesoth- +erwisesimilartodSphs.Thereliabilityofthemodelingdoesnot dependon the particularvalue of the mass so we believethese -teststobeviable.Wedonotattempttoconstraintheinnerdark Fig. 2. Number of stars as a function of their metallicity and time of -matterdensityprofile(whichispoorlyresolvedanyway)buttry formation(theageoftheUniverse)inthesimulatedgalaxy.Thevertical -toputtighterlimitsontheestimatesofthemassandanisotropy lineindicatestheappliedsplitintostellarpopulations. +teststobeviable.Wedonotattempttoconstraintheinnerdark +matterdensityprofile(whichispoorlyresolvedanyway)buttry +toputtighterlimitsontheestimatesofthemassandanisotropy profiles.Finally,weapplytheimprovedmethodtotheavailable kinematicdataforthedistinctstellar populationsoftheFornax -dSph. magnetic fields, and the feedback from black holes. Although -Thispaperis organizedasfollows. InSection 2 we present dwarfgalaxiesthatareofourinterestherearenotresolvedinthe -the data for the simulated galaxy as well as their splitting into suite,thiscanbeeasilyovercomewiththeappropriatechoiceof -stellarpopulationsandmockobservationsalongthemainaxes. theobjectandthetreatmentofdata. -Section3containsanoverviewofourmodelingmethod,theap- As the key properties of dSph galaxy equivalents we iden- -plicationof the methodto allstars andto two populations,and tified: the lack of gas, the lack of a black hole, a low spin, -acomparisonoftheresultsobtainedwiththesetwoapproaches. the stellar mass much smaller than the dark matter mass and a -TheresultsoftheapplicationofthemethodtotheFornaxdSph nearlysphericalshape.Thelastconditionwasadoptedinanat- -galaxyare presentedin Section 4. We discuss our findingsand tempttoavoidanystrongbiasintroducedbythesphericalmod- -summarizethepaperinSection5. elingofanonsphericalobject.Moreover,werequiredthegalaxy +dSph. +Thispaperis organizedasfollows. InSection 2 we present +the data for the simulated galaxy as well as their splitting into +stellarpopulationsandmockobservationsalongthemainaxes. +Section3containsanoverviewofourmodelingmethod,theap- +plicationof the methodto allstars andto two populations,and +acomparisonoftheresultsobtainedwiththesetwoapproaches. +TheresultsoftheapplicationofthemethodtotheFornaxdSph +galaxyare presentedin Section 4. We discuss our findingsand +summarizethepaperinSection5. +2. Mockdata +2.1.Selectionofthesimulatedgalaxy +In order to test our modeling method on realistic simulated +data, we decided to use a galaxy from the Illustris project +(Vogelsbergeretal. 2014a,b; Geneletal. 2014; Nelsonetal. +2015),namelytheIllustris-1cosmologicalsimulation.Thissim- +ulationfollowstheformationandevolutionofgalaxiesfromthe +early Universe to the present by solving gravity and hydrody- +namics, as well as modeling of star formation, galactic winds, +S +F +R +[ +M +⊙ +y +r - +1 +] +t [Gyr] +0 +4 +8 +12 +16 +0 2 4 6 8 10 12 +Fig.1. StarformationrateasafunctionoftheageoftheUniversein +thesimulatedgalaxyfromtheIllustrisprojectusedtocreatemockdata. +Theblackandgrayverticalarrowsindicatethelastmergerswhichthe +galaxyunderwent,wetanddry,respectively. +t +[ +G y +r ] +Z [Z ⊙] +0 +2 +4 +6 +8 +10 +0 1 2 3 4 5 +0 +2 +4 +6 +N [ +1 0 +2 ] +Fig. 2. Number of stars as a function of their metallicity and time of +formation(theageoftheUniverse)inthesimulatedgalaxy.Thevertical +lineindicatestheappliedsplitintostellarpopulations. +magnetic fields, and the feedback from black holes. Although +dwarfgalaxiesthatareofourinterestherearenotresolvedinthe +suite,thiscanbeeasilyovercomewiththeappropriatechoiceof +theobjectandthetreatmentofdata. +As the key properties of dSph galaxy equivalents we iden- +tified: the lack of gas, the lack of a black hole, a low spin, +the stellar mass much smaller than the dark matter mass and a +nearlysphericalshape.Thelastconditionwasadoptedinanat- +tempttoavoidanystrongbiasintroducedbythesphericalmod- +elingofanonsphericalobject.Moreover,werequiredthegalaxy to possess a significant number of both stellar and dark mat- ter particles (over 105), and a well resolved center. Due to the -2. Mockdata largesofteningscale for darkmatter particlesin the simulation -(ǫ = 1.42kpc), we looked for an object in which even the -2.1.Selectionofthesimulatedgalaxy DM +largesofteningscale for darkmatter particlesin the simulation +(ǫ +DM += 1.42kpc), we looked for an object in which even the moreconcentratedstellarpopulation(seeSection2.2)extended -In order to test our modeling method on realistic simulated over43kpcsothattheregionaffectedbythenumericalartifacts -data, we decided to use a galaxy from the Illustris project wasenclosedwithin2-3innermostdatabins(weused20linearly -(Vogelsbergeretal. 2014a,b; Geneletal. 2014; Nelsonetal. spacedspatialbins,seeSection3.1). -2015),namelytheIllustris-1cosmologicalsimulation.Thissim- Out of 27345 galaxies listed in the catalog of stellar circu- -ulationfollowstheformationandevolutionofgalaxiesfromthe larities,angularmomenta,andaxisratiospublishedbytheIllus- -early Universe to the present by solving gravity and hydrody- tristeam(Geneletal.2015)containingsubhaloswiththestellar -namics, as well as modeling of star formation, galactic winds, masslargerthan109M⊙,onlyafewmetourrestrictiverequire- +over43kpcsothattheregionaffectedbythenumericalartifacts +wasenclosedwithin2-3innermostdatabins(weused20linearly +spacedspatialbins,seeSection3.1). +Out of 27345 galaxies listed in the catalog of stellar circu- +larities,angularmomenta,andaxisratiospublishedbytheIllus- +tristeam(Geneletal.2015)containingsubhaloswiththestellar +masslargerthan109M⊙,onlyafewmetourrestrictiverequire- Articlenumber,page2of12 K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling -PPPOOOPPPUUULLLAAATTTIIIOOONNN III PPPPPPPPPPPPPPPOOOOOOOOOOOOOOOPPPPPPPPPPPPPPPUUUUUUUUUUUUUUULLLLLLLLLLLLLLLAAAAAAAAAAAAAAATTTTTTTTTTTTTTTIIIIIIIIIIIIIIIOOOOOOOOOOOOOOONNNNNNNNNNNNNNN IIIIIIIIIIIIIIIIIIIIIIIIIIIIII -major intermediate minor major intermediate minor -80 80 -7.7 7.7 -40 7.1 40 7.1 ]2cpk/⊙M[ ]2cpk/⊙M[ -6.5 6.5 ]cpk[ ]cpk[ -0 0 -5.9 5.9 )Σ(gol )Σ(gol --40 -40 -5.3 5.3 --80 160 -80 160 -40 80 40 80 -]s/mk[ ]s/mk[ -]cpk[ ]cpk[ -0 0 0 0 -V V --40 -80 -40 -80 --80 -160 -80 -160 -40 90 40 90 -]s/mk[ ]s/mk[ -]cpk[ ]cpk[ -0 60 0 60 -σ σ --40 30 -40 30 --80 0 -80 0 --80 -40 0 40 -80 -40 0 40 -80 -40 0 40 80 -80 -40 0 40 -80 -40 0 40 -80 -40 0 40 80 -[kpc] [kpc] [kpc] [kpc] [kpc] [kpc] +-80 +-40 +0 40 +80 +POPULATION I +[ k p c ] +major +POPULATION I +intermediate +POPULATION I +minor +5.3 +5.9 6.5 7.1 +7.7 +l o g ( +Σ ) [ M ⊙ / k p c 2 ] +-80 +-40 +0 +40 +POPULATION II +[ k p c ] +POPULATION II POPULATION II +-160 +-80 +0 +80 +160 +V [ k m / s ] +-80 +-40 +0 +40 +-80 -40 0 40 +POPULATION II +[ k p c ] +[kpc] +-80 -40 0 40 +POPULATION II +[kpc] +-80 -40 0 40 80 +POPULATION II +[kpc] +0 +30 +60 +90 +σ [ k m / s ] +-80 +-40 +0 40 +80 +POPULATION II +[ k p c ] +major +POPULATION II +intermediate +POPULATION II +minor +5.3 +5.9 6.5 7.1 +7.7 +l o g ( +Σ ) [ M ⊙ / k p c 2 ] +-80 +-40 +0 +40 +POPULATION II +[ k p c ] +POPULATION II POPULATION II +-160 +-80 +0 +80 +160 +V [ k m / s ] +-80 +-40 +0 +40 +-80 -40 0 40 +POPULATION II +[ k p c ] +[kpc] +-80 -40 0 40 +POPULATION II +[kpc] +-80 -40 0 40 80 +POPULATION II +[kpc] +0 +30 +60 +90 +σ [ k m / s ] Fig.3.Mapsoftheprojectedstellardensity,meanstellarvelocity,andstellarvelocitydispersion(inrows)fortwostellarpopulations:themetal- richpopulationI(left-handsidepanels)andthemetal-poorpopulationII(right-handside),andobservationsalongtheprincipalaxesdetermined forallstars(incolumns,alongthemajor,theintermediate,andtheminoraxis,respectively). -1 120 120 -0.5 100 100 -)r(β )r(σ )r(σ -0 80 80 -r t --0.5 60 60 --1 40 40 -1 10 100 1 10 100 1 10 100 -r [kpc] r [kpc] r [kpc] -1 120 120 -0.5 100 100 -)r(β )r(σ )r(σ -0 80 80 -all stars r t +-1 +-0.5 +0 +0.5 +1 +1 10 100 +β ( r ) +r [kpc] +-1 +-0.5 +0 +0.5 +1 +0 10 20 30 40 50 +β ( r ) +r [kpc] +all stars pop I --0.5 60 60 pop II --1 40 40 -0 10 20 30 40 50 0 10 20 30 40 50 0 10 20 30 40 50 -r [kpc] r [kpc] r [kpc] +40 +60 +80 +100 +120 +1 10 100 +σ r ( r ) +r [kpc] +40 +60 +80 +100 +120 +0 10 20 30 40 50 +σ r ( r ) +r [kpc] +40 +60 +80 +100 +120 +1 10 100 +σ t ( r ) +r [kpc] +40 +60 +80 +100 +120 +0 10 20 30 40 50 +σ t ( r ) +r [kpc] Fig.4.Profilesofthevelocityanisotropyparameter,radialvelocitydispersion,andtangentialvelocitydispersion(inconsecutivecolumns)calcu- latedfromallstars(inred),includingonlypopulationI(inorange),andonlypopulationII(inblue).Theupperrowshowstheprofilesusingthe logarithmicdistancescaleandreachingtheoutskirtsofthegalaxywhereasthebottomrowpresentsinthelinearscaleonlytheradialrangeused inthemodeling. -ments. We decided to use a galaxy labeled as subhalo 16960. forfurthercalculationsinthispaper.Thedifferencebetweenthe -All the relevant properties of the galaxy are given in Table1, twocomesfromasmallgradientinthestellarmass-to-lightratio -includingnumbersofparticlesandtotalmassesforbothcompo- withthedistancefromthegalacticcenter.Sinceinourapproach -nents,anddetailsontheshapeofthestellarcomponent:theaxis we treat stars as equal-massparticles and refer to numberden- -ratios minor to major (shortest to longest) c/a, intermediate to sities (multiplied by the mean mass of a stellar particle when -majorb/a,andthetriaxialityparameterT =(a2−b2)/(a2−c2). needed),theapplicationofthehalf-numberradiusismoreself- -Wedistinguishbetweenthehalf-massradiusprovidedintheIl- consistent. -lustris database and the half-numberradius r , which we use -1/2 +ments. We decided to use a galaxy labeled as subhalo 16960. +All the relevant properties of the galaxy are given in Table1, +includingnumbersofparticlesandtotalmassesforbothcompo- +nents,anddetailsontheshapeofthestellarcomponent:theaxis +ratios minor to major (shortest to longest) c/a, intermediate to +majorb/a,andthetriaxialityparameterT =(a2−b2)/(a2−c2). +Wedistinguishbetweenthehalf-massradiusprovidedintheIl- +lustris database and the half-numberradius r 1/2, which we use +forfurthercalculationsinthispaper.Thedifferencebetweenthe +twocomesfromasmallgradientinthestellarmass-to-lightratio +withthedistancefromthegalacticcenter.Sinceinourapproach +we treat stars as equal-massparticles and refer to numberden- +sities (multiplied by the mean mass of a stellar particle when +needed),theapplicationofthehalf-numberradiusismoreself- +consistent. Articlenumber,page3of12 A&Aproofs:manuscriptno.Populations4 -major intermediate minor -3 -10 +10-3 +10-1 +101 +103 +10 100 +n +⋆ ( +R +) +[ +k +p +c - 2 ] -1 2- -10 cpk[ -)R(⋆n --1 -10 all stars +R [kpc] +major +10 100 +R [kpc] +intermediate +10 100 +R [kpc] +minor +all stars pop I pop II --3 -10 -10 100 10 100 10 100 -R [kpc] R [kpc] R [kpc] Fig.5.Surfacenumberdensityprofilesofthestellardatasamplesforthesimulatedgalaxyobservedalongdifferentlinesofsight(fromtheleftto theright).Differentlinesshowprofilesforallavailablestars(inred),themetal-richpopulationI(inorange),andthemetal-poorpopulationII(in -blue).Thinverticallinesindicater (seetext)andtheouterboundaryofthespectroscopicdata. -0 -2.2.Splittingthestarsintopopulations Fig.4.Throughoutthepaperweusered,orange,andbluecolors -to indicate values calculated or recoveredfor all stars, popula- -Our chosen galaxy shows a complex formation history under- tionI,andpopulationII,respectively.Thetworowsofthefigure -goingmultiplemergerswhichresultinextendedstar formation showthebehavioroftheparametersatdifferentscales.Thetop -with a few star formation bursts. The last wet merger,that is a row plots the profiles with the distance from the center of the -merger with an object containinggas, happensat 6.9Gyr from galaxyinthelogarithmicscaleandshowsthedropofanisotropy -thebeginningofthesimulation,whereasthelastdrymerger(no attheouteredgesoftheobject.Thebottomrowusesthelinear -gastransfer)at12.1Gyr,givingthegalaxyenoughtimetoregain distancescaleandfocusesonthemainbodyofthegalaxy. +blue).Thinverticallinesindicater 0(seetext)andtheouterboundaryofthespectroscopicdata. +2.2.Splittingthestarsintopopulations +Our chosen galaxy shows a complex formation history under- +goingmultiplemergerswhichresultinextendedstar formation +with a few star formation bursts. The last wet merger,that is a +merger with an object containinggas, happensat 6.9Gyr from +thebeginningofthesimulation,whereasthelastdrymerger(no +gastransfer)at12.1Gyr,givingthegalaxyenoughtimetoregain dynamicalequilibrium.Wepresentthestarformationrate(SFR) -Figure 5 shows the surface number density profiles of the -as a functionof time (the age of the Universe)in Fig.1, where starsas measuredin differentdirections.We can see thatwhile -theselastmergersareindicatedwithblackandgrayverticalar- thedifferentsubsampleshavequitedistinguishableprofiles,the -rows.InFig.2weshowthedistributionofstarsasafunctionof differencebetweenthelinesofsightissmallbecausethegalaxy +as a functionof time (the age of the Universe)in Fig.1, where +theselastmergersareindicatedwithblackandgrayverticalar- +rows.InFig.2weshowthedistributionofstarsasafunctionof theirmetallicity(insolarunits)andthetimeofformation.Inor- -isclosetospherical. dertodividethestellarsampleintotwopopulationswecutitin halfbasedonthemetallicityindexofeachstellarparticle.This -splitisindicatedinFig.2withtheverticalline.Withsatisfying 2.3.Observables +splitisindicatedinFig.2withtheverticalline.Withsatisfying accuracyitseparatesthestarsbornbeforeandafter4Gyrsince -the start of the simulation, which correspondsto the formation We generatedninesetsofmockdatabyobservingallstarsand -time beforeand afterthe endof thesecondmajorstar burst, as each populationseparatelyalongthe principalaxesdetermined -showninFig.1.Werefertothemetal-richstarsaspopulationI fromallstars.Fortheobservablestobeusedinthemodelingwe -andtothemetal-pooraspopulationII,followingthecommonly divided the stars into 20 bins spaced linearly in distance from -usednomenclatureinastronomy. the center of the galaxy up to 50kpc, measuring the fraction -of the total number of stars and the 2nd, 3rd, and 4th proper +the start of the simulation, which correspondsto the formation +time beforeand afterthe endof thesecondmajorstar burst, as +showninFig.1.Werefertothemetal-richstarsaspopulationI +andtothemetal-pooraspopulationII,followingthecommonly +usednomenclatureinastronomy. InFig.3wepresentmapsoftheprojectedstellarmassden- -moments of the line-of-sight velocity defined in Eq.8 and 9 sity, line-of-sightvelocity, and line-of-sightvelocity dispersion -of Kowalczyketal. (2018). The profiles of these quantities are forbothpopulationsobtainedbyprojectingthegalaxyalongits -showninconsecutiverowsinFig.6.Columnscorrespondtodif- principal axes. The orientation was determined from the iner- -ferentlinesof sight, fromthe leftto the right:alongthe major, tiatensorcalculatedfromallstarswithinthehalf-numberradius -intermediate,andminoraxisofthegalaxy.Forclarityofthefig- -r and therefore is the same in both panels. The two popula- -1/2 ure,ineachpanelweindicateonlytheerrorbarsforoneofthe +r +1/2 +and therefore is the same in both panels. The two popula- tionsdiffersignificantlyinthespatialdistributionandkinemat- -datasets. However,as the numberof starsin a sample remains ics with the metal-rich(consideredto be younger)populationI -roughly constant between the lines of sight, the error bars are being more concentratedbut having lower centralvelocity dis- -verysimilaramongthepanelsinagivenrow. persion. Both populationsshow a weak rotation signal at large -Although in our previous studies of the reliability of distancesfromthecenter. -the Schwarzschild modeling and its applications to real data -The velocity anisotropy parameter β(r) = 1 − (σ2 + -θ (Kowalczyketal. 2017,2018, 2019) weapproximatedtheden- -σ2 φ)/(2σ2 r),whereσ iarevelocitydispersionsinsphericalcoordi- sityprofileofthetracerwiththeSérsicformula,wefoundthatit -nates(Binney&Tremaine2008),describestheorbitalstructure doesnotprovideagoodapproximationofthedataforthesimu- -ofgalaxies.Itisoneofthemostimportantdynamicalproperties latedgalaxyconsideredhere.Wethereforefittheprojectedden- -of bound systems which cannot be inferred directly from ob- sityprofilewiththeKingformula(King1962) +The velocity anisotropy parameter β(r) = 1 − (σ2 +θ ++ +σ2 φ)/(2σ2 r),whereσ iarevelocitydispersionsinsphericalcoordi- +nates(Binney&Tremaine2008),describestheorbitalstructure +ofgalaxies.Itisoneofthemostimportantdynamicalproperties +of bound systems which cannot be inferred directly from ob- servationsandhastoberecoveredbydynamicalmodeling.The profiles of the anisotropy parameter β as well as the radial σ -r 2 -andtangentialσ =[(σ2+σ2)/2]1/2velocitydispersionsforour 1 1 -t θ φ I(R)=I − , (1) -simulated galaxy are presented in the consecutive columns of 0  1+(R/R c)2 1+(R t/R c)2 -p  -Articlenumber,page4of12 p +r +andtangentialσ +t +=[(σ2 θ+σ2 φ)/2]1/2velocitydispersionsforour +simulated galaxy are presented in the consecutive columns of +Fig.4.Throughoutthepaperweusered,orange,andbluecolors +to indicate values calculated or recoveredfor all stars, popula- +tionI,andpopulationII,respectively.Thetworowsofthefigure +showthebehavioroftheparametersatdifferentscales.Thetop +row plots the profiles with the distance from the center of the +galaxyinthelogarithmicscaleandshowsthedropofanisotropy +attheouteredgesoftheobject.Thebottomrowusesthelinear +distancescaleandfocusesonthemainbodyofthegalaxy. +Figure 5 shows the surface number density profiles of the +starsas measuredin differentdirections.We can see thatwhile +thedifferentsubsampleshavequitedistinguishableprofiles,the +differencebetweenthelinesofsightissmallbecausethegalaxy +isclosetospherical. +2.3.Observables +We generatedninesetsofmockdatabyobservingallstarsand +each populationseparatelyalongthe principalaxesdetermined +fromallstars.Fortheobservablestobeusedinthemodelingwe +divided the stars into 20 bins spaced linearly in distance from +the center of the galaxy up to 50kpc, measuring the fraction +of the total number of stars and the 2nd, 3rd, and 4th proper +moments of the line-of-sight velocity defined in Eq.8 and 9 +of Kowalczyketal. (2018). The profiles of these quantities are +showninconsecutiverowsinFig.6.Columnscorrespondtodif- +ferentlinesof sight, fromthe leftto the right:alongthe major, +intermediate,andminoraxisofthegalaxy.Forclarityofthefig- +ure,ineachpanelweindicateonlytheerrorbarsforoneofthe +datasets. However,as the numberof starsin a sample remains +roughly constant between the lines of sight, the error bars are +verysimilaramongthepanelsinagivenrow. +Although in our previous studies of the reliability of +the Schwarzschild modeling and its applications to real data +(Kowalczyketal. 2017,2018, 2019) weapproximatedtheden- +sityprofileofthetracerwiththeSérsicformula,wefoundthatit +doesnotprovideagoodapproximationofthedataforthesimu- +latedgalaxyconsideredhere.Wethereforefittheprojectedden- +sityprofilewiththeKingformula(King1962) +I(R)=I +0  + +1 +p1+(R/R c)2 +− +1 +p1+(R t/R c)2 +2 +, (1) +Articlenumber,page4of12 K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling -major intermediate minor +10-3 +10-2 +10-1 +100 +0 10 20 30 40 +M ( +R ) +R [kpc] +major +0 10 20 30 40 +R [kpc] +intermediate +0 10 20 30 40 50 +R [kpc] +minor +369 +12 +0 10 20 30 40 +m +2 ( +R ) [ +1 0 +3 ( +k +m +s - +1 ) +2 ] +R [kpc] +0 10 20 30 40 +R [kpc] +0 10 20 30 40 50 +R [kpc] +-10 +-505 10 --1 -10 -)R(M --2 -10 --3 -10 -0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50 -R [kpc] R [kpc] R [kpc] -12 ]2 -)1- -9 s -mk(3 -6 -01[)R( -3 -2 +0 10 20 30 40 m -0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50 -R [kpc] R [kpc] R [kpc] -10 31- ] -) -5 s -mk( -0 4 -01[)R( --5 --10 3m -0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50 -R [kpc] R [kpc] R [kpc] -4 ]4 -all stars )1- -3 pop I s -mk(8 -pop II -2 -01[)R( +3 ( +R ) [ 1 -4 -0 m -0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50 -R [kpc] R [kpc] R [kpc] +0 +4 ( +k +m +s - +1 ) +3 ] +R [kpc] +0 10 20 30 40 +R [kpc] +0 10 20 30 40 50 +R [kpc] +01234 +0 10 20 30 40 +m +4 ( +R ) [ +1 +0 +8 ( +k +m +s - +1 ) +4 ] +R [kpc] +0 10 20 30 40 +R [kpc] +0 10 20 30 40 50 +R [kpc] +all stars +pop I +pop II Fig.6.ObservablesusedinourSchwarzschildmodelingschemeofthesimulatedgalaxy.Inrows:thefractionofthetotalnumberofstars,2nd, 3rd,and4thvelocitymoment.Incolumns:mockdatafromthesimulatedgalaxyalongthemajor,intermediate,andminoraxis.Inredwepresent thevaluesobtainedforallstarswhereasinorangeandbluethoseforpopulationsIandII,respectively.Forclarityofthefigure,ineachpanelwe indicateonlytheerrorbarsforoneofthedatasets. -where I , R , and R are the modelparameters.The profile can 3. Schwarzschildmodeling -0 c t +where I 0, R c, and R +t +are the modelparameters.The profile can beanalyticallydeprojectedtoobtainthe3Ddensity +ρ(r)= +ρ +0 +z2 +"1 +z +arccos(z)− p1−z2 #, (2) +where +ρ +0 += +I +0 +πR c[1+(R t/R c)2]3/2 +(3) +and +z= sr2+R2 c +R2 +c ++R2 +t +. (4) +3. Schwarzschildmodeling In this section we briefly present our modeling method and its applicationtothedatasetsderivedforallstarsandthetwopop- -ρ 1 -ρ(r)= 0 arccos(z)− 1−z2 , (2) ulations of the simulated galaxy separately. In both cases our -z2 z aimwastorecovertheprofilesofthetotalmassandthevelocity -" # -p anisotropy. -where +ulations of the simulated galaxy separately. In both cases our +aimwastorecovertheprofilesofthetotalmassandthevelocity +anisotropy. 3.1.Overviewofthemethod -I -ρ = 0 (3) -0 πR [1+(R/R )2]3/2 We followthe approachintroducedin Kowalczyketal. (2018), -c t c +We followthe approachintroducedin Kowalczyketal. (2018), namely we model the total mass profile with the mass-to-light -and ratioΥvaryingwithradius: -z= r2+R2 c . (4) logΥ(r)= log(Υ 0) r≤r 0 (5) -sR2 +R2 a(logr−logr 0)c+log(Υ 0) r>r -c t ( 0 +ratioΥvaryingwithradius: +logΥ(r)= +( +log(Υ 0) r≤r 0 +a(logr−logr 0)c+log(Υ 0) r>r +0 +(5) Articlenumber,page5of12 A&Aproofs:manuscriptno.Populations4 -ALL POPULATIONS -3 3 +1 +2 +3 +0 +0.5 +1 +1 +2 +3 +ALL +Υ +0 +a +c +1 +2 +3 +0 +0.5 +1 +1 +2 +3 +POPULATIONS +Υ +0 +a +c +10 100 -c 2 c 2 -2χ -1 1 -1 3 1 3 -2 2 -a 0.5 a 0.5 10 -Υ Υ -1 0 1 0 -0 0 -POP I POP II -3 3 +χ +2 +1 +2 +3 +0 +0.5 +1 +1 +2 +3 +POP I +Υ +0 +a +c +1 +2 +3 +0 +0.5 +1 +1 +2 +3 +POP II +Υ +0 +a +c +10 100 -c 2 c 2 -2χ -1 1 -1 3 1 3 -2 2 -a 0.5 a 0.5 10 -Υ Υ -1 0 1 0 -0 0 +χ +2 Fig.7.Absolutevaluesofχ2obtainedfromthefitsofthreedatasets:allstars(topleftpanel),populationI(bottomleft),andpopulationII(bottom right)fortheobservationsalongthemajoraxisofthesimulatedgalaxy.Theresultsforthemodelingoftwopopulations(topright)wereobtained -asanalgebraicsumofvaluesforpopulationsIandII.Toavoidlargenumbersinthefigure,Υ wasdividedbythemeanmassofastellarparticle. -0 -where r is the distance from the center of the galaxy, r is a (∼ a4c4Υ4)thatwerefurtherusedto determinetheglobalmin- -0 0 -constant,while Υ , a,andc are the parametersof a model.We imums(identifiedas the best-fitting models)and 1, 2, 3σ con- +asanalgebraicsumofvaluesforpopulationsIandII.Toavoidlargenumbersinthefigure,Υ 0wasdividedbythemeanmassofastellarparticle. +where r is the distance from the center of the galaxy, r 0 -haveassumedlogr =0.33whichcorrespondstothreesoftening fidence levels which for three parameterscorrespondto ∆χ2 = +is a +constant,while Υ 0, a,andc are the parametersof a model.We +haveassumedlogr 0 -scalesforstellarparticlesintheIllustrissimulation. 3.53, 8.02, 14.2(Pressetal.1992). +=0.33whichcorrespondstothreesoftening +scalesforstellarparticlesintheIllustrissimulation. Weprobedtheparametera∈[0:1.3]withastep∆a=0.04 andc ∈ [1.1 : 2.9]witha step∆c = 0.2,imposingtherequire- -3.2.Applicationtomockdata mentonthetotaldensityprofiletobemonotonicallydecreasing -withradius.Foreachsetofparametersandforeachlineofsight In the following we present the direct and inferred results of -wegenerated1200orbitsusing100valuesofenergy(expressed the Schwarzschild modeling of the data sets described in Sec- -with the radius of a circular orbit) spaced logarithmically and tion2.3. -12valuesoftherelativeangularmomentumspacedlinearly.The First, Fig.7showsthe distributionof theabsolutevaluesof -outerradiusoftheorbitlibrary,thatistheapocenterofthemost theχ2 asafunctionofthreeparametersofthemass-to-lightra- -extendedorbit,wasset tor out = 165kpcinordertocoverover tio. In order to avoid unnecessary repetitions, we include only -0.999 of the total stellar mass based on the fitted King profile the plot for the mock data obtained by observing the Illustris -parameters. galaxyalongitsmajoraxisastheothersarequalitativelysimilar. -Wefitthekinematicsweightedwiththefractionofmasswith Thefourpanelsrefertofitsforallstars(topleft),themetal-rich -the constrained least squares algorithm where different values populationI(bottomleft),themetal-poorpopulationII(bottom -of Υ were obtainedwith a simple transformationof velocities right),andtheonenamed"populations"(topright)whichisthe -0 -given by Eq.12, 13, and 15 in Kowalczyketal. (2018). In or- algebraicsumofvaluesforbothpopulations. -dertosmoothoutthenumericalartifacts,thethree-dimensional Asourparametrizationofthemass-to-lightratioisnotintu- -χ2 spaces were then interpolated with 12-order polynomials itivewepresentitsprofilesexplicitlyinthefirstrowsoftheleft- +withradius.Foreachsetofparametersandforeachlineofsight +wegenerated1200orbitsusing100valuesofenergy(expressed +with the radius of a circular orbit) spaced logarithmically and +12valuesoftherelativeangularmomentumspacedlinearly.The +outerradiusoftheorbitlibrary,thatistheapocenterofthemost +extendedorbit,wasset tor out = 165kpcinordertocoverover +0.999 of the total stellar mass based on the fitted King profile +parameters. +Wefitthekinematicsweightedwiththefractionofmasswith +the constrained least squares algorithm where different values +of Υ +0 +were obtainedwith a simple transformationof velocities +given by Eq.12, 13, and 15 in Kowalczyketal. (2018). In or- +dertosmoothoutthenumericalartifacts,thethree-dimensional +χ2 spaces were then interpolated with 12-order polynomials +(∼ a4c4Υ4 0)thatwerefurtherusedto determinetheglobalmin- +imums(identifiedas the best-fitting models)and 1, 2, 3σ con- +fidence levels which for three parameterscorrespondto ∆χ2 = +3.53, 8.02, 14.2(Pressetal.1992). +3.2.Applicationtomockdata +In the following we present the direct and inferred results of +the Schwarzschild modeling of the data sets described in Sec- +tion2.3. +First, Fig.7showsthe distributionof theabsolutevaluesof +theχ2 asafunctionofthreeparametersofthemass-to-lightra- +tio. In order to avoid unnecessary repetitions, we include only +the plot for the mock data obtained by observing the Illustris +galaxyalongitsmajoraxisastheothersarequalitativelysimilar. +Thefourpanelsrefertofitsforallstars(topleft),themetal-rich +populationI(bottomleft),themetal-poorpopulationII(bottom +right),andtheonenamed"populations"(topright)whichisthe +algebraicsumofvaluesforbothpopulations. +Asourparametrizationofthemass-to-lightratioisnotintu- +itivewepresentitsprofilesexplicitlyinthefirstrowsoftheleft- Articlenumber,page6of12 K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling -AAAAAAAAAAAALLLLLLLLLLLLLLLLLLLLLLLL PPPPPPPPPPPPOOOOOOOOOOOOPPPPPPPPPPPPUUUUUUUUUUUULLLLLLLLLLLLAAAAAAAAAAAATTTTTTTTTTTTIIIIIIIIIIIIOOOOOOOOOOOONNNNNNNNNNNNSSSSSSSSSSSS -major intermediate minor major intermediate minor -1010 1010 -3σ 3σ 109 2 1σ 109 2 1σ ]⊙L/⊙M[ ]⊙L/⊙M[ -σ σ -108 best model 108 best model -data data -107 107 )r(Υ )r(Υ -106 106 -10 100 10 100 10 100 10 100 10 100 10 100 -r [kpc] r [kpc] r [kpc] r [kpc] r [kpc] r [kpc] -]3-cpk ]3-cpk -108 108 -106 106 ⊙M[ ⊙M[ -)r( )r( -104 104 -totν totν -10 100 10 100 10 100 10 100 10 100 10 100 -r [kpc] r [kpc] r [kpc] r [kpc] r [kpc] r [kpc] -1012 1012 ]⊙M[ ]⊙M[ -1011 1011 )r( )r( -totM totM -1010 1010 -10 100 10 100 10 100 10 100 10 100 10 100 -r [kpc] r [kpc] r [kpc] r [kpc] r [kpc] r [kpc] -1 1 -0 0 -)r(β )r(β --1 -1 --2 -2 -0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50 0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50 -r [kpc] r [kpc] r [kpc] r [kpc] r [kpc] r [kpc] +106 107 +108 +109 +1010 +10 100 +ALL +Υ ( r ) [ +M +⊙ / +L +⊙ ] +r [kpc] +major +10 100 +ALL +r [kpc] +intermediate +10 100 +ALL +r [kpc] +minor +3σ 2σ +1σ +best model +data +104 +106 +108 +10 100 +ALL +ν t o t ( r ) +[ +M ⊙ +k +p c - 3 ] +r [kpc] +10 100 +ALL +r [kpc] +10 100 +ALL +r [kpc] +1010 1011 +1012 +10 100 +ALL +M t o t ( r ) [ +M +⊙ ] +r [kpc] +10 100 +ALL +r [kpc] +10 100 +ALL +r [kpc] +-2 +-1 +01 +0 10 20 30 40 +ALL +β ( r ) +r [kpc] +0 10 20 30 40 +ALL +r [kpc] +0 10 20 30 40 50 +ALL +r [kpc] +106 107 +108 +109 +1010 +10 100 +POPULATIONS +Υ ( r ) [ +M +⊙ / +L +⊙ ] +r [kpc] +major +10 100 +POPULATIONS +r [kpc] +intermediate +10 100 +POPULATIONS +r [kpc] +minor +3σ 2σ +1σ +best model +data +104 +106 +108 +10 100 +POPULATIONS +ν t o t ( r ) +[ +M ⊙ +k +p c - 3 ] +r [kpc] +10 100 +POPULATIONS +r [kpc] +10 100 +POPULATIONS +r [kpc] +1010 1011 +1012 +10 100 +POPULATIONS +M t o t ( r ) [ +M +⊙ ] +r [kpc] +10 100 +POPULATIONS +r [kpc] +10 100 +POPULATIONS +r [kpc] +-2 +-1 +01 +0 10 20 30 40 +POPULATIONS +β ( r ) +r [kpc] +0 10 20 30 40 +POPULATIONS +r [kpc] +0 10 20 30 40 50 +POPULATIONS +r [kpc] Fig.8.Left-handside:resultsofSchwarzschildmodelingofthreemockdatasetsobtainedbyobservingthesimulatedgalaxyalongtheprincipal axes.Inrows:derivedmass-to-lightratio,totaldensity,totalmass,andanisotropyparameter.Incolumns:observationsalongthemajor,interme- diate,andminoraxis,respectively.Greenlinesindicatevaluesforthebest-fitmodelswhereasthecoloredareasofdecreasingintensityshowthe -1,2,and3σconfidencelevels.Thetruevaluesarepresentedasblacklines.Thinverticallinesmarkthevaluesofr andtheouterrangeofthe +1,2,and3σconfidencelevels.Thetruevaluesarepresentedasblacklines.Thinverticallinesmarkthevaluesofr 0 +andtheouterrangeofthe datasets,fromlefttoright.Right-handside:sameasleftbutforthefitoftwostellarpopulations. -and right-handside panelsof Fig.8 for the results obtainedfor 3.3.Comparisonoffittingresults +and right-handside panelsof Fig.8 for the results obtainedfor all stars and the populations,respectively. We further calculate thetotaldensity(secondrows)andthetotalmasscontent(third -rows).Weincludetheobtainedorbitanisotropywithinthemod- The main strength of the two populations method comes from -eledrangeinthebottomrows.Theconsecutivecolumnspresent tracingtheunderlyinggravitationalpotentialatdifferentscales. -the results for the observations along the major, intermediate, AscanbeseeninthebottompanelsofFig.7,populationI,which -andminoraxis.Greenlinesindicatevaluesforthebest-fitmod- is more concentrated, is also more sensitive to Υ , but gives -0 -elswhereasthecoloredareasofdecreasingintensitycorrespond weaker constraints on a or c. On the other hand, population II -to1,2,and3σconfidenceregionsobtainedasextremevaluesal- attemptsto reproducethe totalmass contentatlargerdistances -lowedbythemodelswithχ2withinagivenregion.Ineachpanel aswell,thereforeshowingstrongercouplingbetweentheparam- -thetruevaluesfromthesimulationarepresentedwithblacklines eters. -whilethinverticallinesmarkthevaluesofr andtheouterrange -0 +rows).Weincludetheobtainedorbitanisotropywithinthemod- +eledrangeinthebottomrows.Theconsecutivecolumnspresent +the results for the observations along the major, intermediate, +andminoraxis.Greenlinesindicatevaluesforthebest-fitmod- +elswhereasthecoloredareasofdecreasingintensitycorrespond +to1,2,and3σconfidenceregionsobtainedasextremevaluesal- +lowedbythemodelswithχ2withinagivenregion.Ineachpanel +thetruevaluesfromthesimulationarepresentedwithblacklines +whilethinverticallinesmarkthevaluesofr 0andtheouterrange ofthedatasetsbeyondwhichthereliabilityofresultsdropssig- -The global minimums of the χ2 distributions for both ap- nificantly. The true mass-to-light ratio profile was obtained by -proaches, that is modeling one and two populations, which we dividingthetotalmassbythefittedKingprofiles,thereforethe -identifyasthebest-fittingmodels,closelycoincideshowingthat drop at 100kpc is the numerical artifact occurring at the very -thereisnointernalbiasintheimprovedmethod.However,sig- outskirtsofthegalaxy. +Whereasintheright-handsidepanelsofFig.8theresulting +anisotropyisobtainedfromthefitofallstarsandusesonlythe +locationofglobalminimumandconfidencelevelsfromtwopop- +ulations(asinthetoprightpanelofFig.7),inFig.9wepresent +anothermethodofcalculatingtheanisotropy.Inthesecondand +third row we show the derived profiles for population I and II +separately and combine them as stellar mass weighted average +inthetoprow.Asinpreviousfigures,threecolumnsrefertothe +differentlinesofsightwhereasthenarrowfourthoneshowsthe +behaviorofthetrueprofilesoutsidethemodeledrangewhich,as +we noticed in our previousstudies, in a limited way influences +the results. Such an impact is understandablesince the stars at +largerdistancesfromthecenterarestillincludedintheline-of- +sightmeasurements. +3.3.Comparisonoffittingresults +The main strength of the two populations method comes from +tracingtheunderlyinggravitationalpotentialatdifferentscales. +AscanbeseeninthebottompanelsofFig.7,populationI,which +is more concentrated, is also more sensitive to Υ 0, but gives +weaker constraints on a or c. On the other hand, population II +attemptsto reproducethe totalmass contentatlargerdistances +aswell,thereforeshowingstrongercouplingbetweentheparam- +eters. +The global minimums of the χ2 distributions for both ap- +proaches, that is modeling one and two populations, which we +identifyasthebest-fittingmodels,closelycoincideshowingthat +thereisnointernalbiasintheimprovedmethod.However,sig- nificantdifferencescanbeobservedwhencomparingtheconfi- dence levels, mainly at 1 and 3σ. Namely, we find that using two populations, the constraints we obtain on the density and -Whereasintheright-handsidepanelsofFig.8theresulting anisotropyprofilearemuchstronger. -anisotropyisobtainedfromthefitofallstarsandusesonlythe -locationofglobalminimumandconfidencelevelsfromtwopop- -ulations(asinthetoprightpanelofFig.7),inFig.9wepresent Additionally, the more accurate method allows us to study -anothermethodofcalculatingtheanisotropy.Inthesecondand other effects and biases, for example the consequences of the -third row we show the derived profiles for population I and II nonsphericity of the modeled object. Whereas for the fit of all -separately and combine them as stellar mass weighted average starsthetruevaluesofthedensity,mass,andanisotropyprofiles -inthetoprow.Asinpreviousfigures,threecolumnsrefertothe are contained within 1σ confidenceregions, the results for the -differentlinesofsightwhereasthenarrowfourthoneshowsthe populationsaremoreorlessbiaseddependingontheaxis.They -behaviorofthetrueprofilesoutsidethemodeledrangewhich,as are well reproducedfor the observation along the intermediate -we noticed in our previousstudies, in a limited way influences axis, for which the effects of nonsphericityseem to cancelout, -the results. Such an impact is understandablesince the stars at and more biased for the remaining lines of sight. We notice a -largerdistancesfromthecenterarestillincludedintheline-of- trendfromunder-tooverestimationoftheanisotropywhengo- -sightmeasurements. ingfromthemajortotheminoraxis. +Additionally, the more accurate method allows us to study +other effects and biases, for example the consequences of the +nonsphericity of the modeled object. Whereas for the fit of all +starsthetruevaluesofthedensity,mass,andanisotropyprofiles +are contained within 1σ confidenceregions, the results for the +populationsaremoreorlessbiaseddependingontheaxis.They +are well reproducedfor the observation along the intermediate +axis, for which the effects of nonsphericityseem to cancelout, +and more biased for the remaining lines of sight. We notice a +trendfromunder-tooverestimationoftheanisotropywhengo- +ingfromthemajortotheminoraxis. Articlenumber,page7of12 A&Aproofs:manuscriptno.Populations4 -major intermediate minor -1 II -POP -0 )r(β +-1 +01 +0 10 20 30 40 +P +O +P +I + +P +O +P I -POP --1 -0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50 60 70 80 -r [kpc] r [kpc] r [kpc] -1 I -0 POP )r(β +β +( +r +) +r [kpc] +major +0 10 20 30 40 +r [kpc] +intermediate +0 10 20 30 40 +r [kpc] +minor +50 60 70 80 -1 -0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50 60 70 80 -r [kpc] r [kpc] r [kpc] -1 -II -0 POP )r(β +01 +0 10 20 30 40 +P +O +P +I +β ( +r ) +r [kpc] +0 10 20 30 40 +r [kpc] +0 10 20 30 40 +r [kpc] +50 60 70 80 -1 -0 10 20 30 40 0 10 20 30 40 0 10 20 30 40 50 60 70 80 -r [kpc] r [kpc] r [kpc] -data 1σ -best model 2σ +01 +0 10 20 30 40 +P +O +P I +I +β ( +r ) +r [kpc] +0 10 20 30 40 +r [kpc] +0 10 20 30 40 +r [kpc] +50 60 70 80 +data +best model +1σ +2σ 3σ Fig.9.ProfilesoftheanisotropyparameterobtainedwiththeSchwarzschildmodelingoftwostellarpopulationsofthesimulatedgalaxy.Inrows: results for all stars(calculated asthe superposition of two populations), population I, and population II. Colors follow theconvention used in previousfigures.Incolumns:observationsalongthemajor,intermediate,andminoraxis.Thelastnarrowercolumnshowsthedata(blacklines) outsidethemodeledradialrange.Colorlinesindicatevaluesforthebest-fitmodelswhereasthecoloredareasofdecreasingintensityshowthe1, 2,and3σconfidenceregions. -4. ModelingFornaxdSph sample of all stars since only stars with reliable measurements -ofmetallicitycouldbeincluded. +4. ModelingFornaxdSph In this section we presentthe applicationof our Schwarzschild modelingschemetotheobservationaldatafortheFornaxdSph galaxy obtained by delPinoetal. (2015) and delPinoetal. -100 -(2017).Thisstudyisafollow-upoftheworkofKowalczyketal. pop I -(2019) and can be directly compared to the results presented pop II -80 +(2017).Thisstudyisafollow-upoftheworkofKowalczyketal. +(2019) and can be directly compared to the results presented there. Moreover,we refer the reader to these previouspublica- tions for details on the origin of data and our procedures used -60 -forcleaningthespectroscopicsample. N -Similarly to the approachintroducedin Section 2.2, we di- 40 +forcleaningthespectroscopicsample. +Similarly to the approachintroducedin Section 2.2, we di- videdallavailablestarsintotwoequal-sizepopulationsbasedon -their metallicity and then cross-correlatedthe sampleswith the 20 +their metallicity and then cross-correlatedthe sampleswith the datausedinKowalczyketal.(2019).Themetallicityhistogram -ofthefinalspectroscopicsampleisshowninFig.10.Addition- 0 -ally, we color-coded each bin with the population it has been -2.5 -2 -1.5 -1 -0.5 0 +ofthefinalspectroscopicsampleisshowninFig.10.Addition- +ally, we color-coded each bin with the population it has been assignedto,namelyorangeorblueforpopulationIorII.Inter- -[Fe/H] estingly, the case of Fornax is similar to our simulated galaxy as the split at [Fe/H]= −1 also captures an important feature -Fig.10.Metallicityhistogramofthefinalspectroscopicsampleusedin of the object’sstar formationhistory,separating stars into sub- -themodelingoftwostellarpopulationsintheFornaxdSph.Eachbinis -samples older and younger than 6 Gyr, as shown in Fig. 12 of color-codedaccordingtothepopulationithasbeenassignedto,orange -delPinoetal. (2015) and Fig. 8 of delPinoetal. (2017). The orblueforpopulationIandII,respectively. +samples older and younger than 6 Gyr, as shown in Fig. 12 of +delPinoetal. (2015) and Fig. 8 of delPinoetal. (2017). The numbers of stars contained in the samples of all stars, popula- tionI,andpopulationIIaregivenin Table2, wheretheindices -"phot" and "spec" refer to the photometricand kinematic sam- Aswehaveshowninourearlierwork,thelightprofileofthe -ples. The sum of stars in the populations is lower than in the Fornax dSph can be well reproduced with the three-parameter +"phot" and "spec" refer to the photometricand kinematic sam- +ples. The sum of stars in the populations is lower than in the +sample of all stars since only stars with reliable measurements +ofmetallicitycouldbeincluded. +N +[Fe/H] +pop I +pop II +0 +20 +40 +60 +80 +100 +-2.5 -2 -1.5 -1 -0.5 0 +Fig.10.Metallicityhistogramofthefinalspectroscopicsampleusedin +themodelingoftwostellarpopulationsintheFornaxdSph.Eachbinis +color-codedaccordingtothepopulationithasbeenassignedto,orange +orblueforpopulationIandII,respectively. +Aswehaveshowninourearlierwork,thelightprofileofthe +Fornax dSph can be well reproduced with the three-parameter Articlenumber,page8of12 K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling -Table2.PropertiesofthedatasamplesfortheFornaxdSph. 0.25 -all stars -0.2 +Table2.PropertiesofthedatasamplesfortheFornaxdSph. Property ALL POPI POPII -pop I -Numberofstars(N phot) 65797 14882 49205 0.15 )R(M -Numberofstars(N ) 3286 1136 1151 pop II -spec -0.1 +Numberofstars(N phot) 65797 14882 49205 +Numberofstars(N spec) 3286 1136 1151 Starswithin1.8kpc 3268 1134 1130 -Fittednormalization(N 0)[×104] 6.95 1.81 5.45 0.05 -Sérsicradius(R )[kpc] 0.454 0.429 0.420 -S 0 +Fittednormalization(N 0)[×104] 6.95 1.81 5.45 +Sérsicradius(R S)[kpc] 0.454 0.429 0.420 Sérsicparameter(m) 0.808 0.807 0.898 -0 0.4 0.8 1.2 1.6 +102 +103 +104 +105 +0.2 0.5 2 0.1 1 +n +⋆ ( +R ) +[ +k +p +c - 2 ] R [kpc] -5 -10 -200 ]2 -104 160 2- ] )1- -cpk[ s -120 mk([)R( -80 )R( -3 all stars -10 40 ⋆ 2m -popI n -0 +all stars +popI popII -0 0.4 0.8 1.2 1.6 -2 -10 R [kpc] -0.1 0.2 0.5 1 2 -R [kpc] -]3 -16 )1- -Fig.11.Surfacenumberdensityprofilesofthephotometricdatasam- s -8 mk(2 +Fig.11.Surfacenumberdensityprofilesofthephotometricdatasam- plesfortheFornaxdSph:allavailablestars(inred),themetal-richpop- -ulationI (inorange), andthemetal-poor population II(inblue). Thin 0 -verticallinesindicater (seetext)andtheouterboundaryofthespec- 01[)R( +ulationI (inorange), andthemetal-poor population II(inblue). Thin +verticallinesindicater 0 -troscopicdata. -8 --16 -3m -0 0.4 0.8 1.2 1.6 +(seetext)andtheouterboundaryofthespec- +troscopicdata. Sérsicformula(Sérsic1968).Theprofilesofnumberdensityfor -R [kpc] allstarsandbothpopulationstogetherwiththebest-fittingSérsic profilesare presentedin Fig.11. The colorsfollow the conven- -tionintroducedinprevioussections.Thinverticallinesindicate 16 ]4 -the innermost data point for the light profile for all stars and )1- -the outerboundaryof the kinematic sample. The former,set at 12 s -mk(4 +tionintroducedinprevioussections.Thinverticallinesindicate +the innermost data point for the light profile for all stars and +the outerboundaryof the kinematic sample. The former,set at logr = −0.16,isalsousedastheminimumofthemass-to-light -ratio profile (r in Eq.5). The fitted parameters of the profiles, 8 -0 01[)R( -thatisthenormalizationN ,theSérsicradiusR ,andtheSérsic -0 S -parameterm,areincludedinthesecondpartofTable2. 4 -Figure12presentstheprofilesoftheobservablesusedinthe 4 -0 m +ratio profile (r 0 in Eq.5). The fitted parameters of the profiles, +thatisthenormalizationN 0,theSérsicradiusR S,andtheSérsic +parameterm,areincludedinthesecondpartofTable2. +Figure12presentstheprofilesoftheobservablesusedinthe Schwarzschildmodeling:thefractionofstarsandthe 2nd,3rd, -0 0.4 0.8 1.2 1.6 and4thvelocitymoments(toptobottom)forthethreedatasam- -ples:allstars,populationI,andpopulationII(inred,orange,and R [kpc] +ples:allstars,populationI,andpopulationII(inred,orange,and blue,respectively).Theerrorbarsindicate1σsamplingerrors. -The parameter space for Υ(r) has been probed as follows: Fig. 12. Observables of the Fornax dSph used in our Schwarzschild -a ∈ [0 : 1.85]with a step ∆a = 0.05 and c ∈ [1.2 : 6] with a modelingscheme.Inrows:thefractionofthetotalnumberofstars,the -step∆c = 0.2.We pointoutthatinKowalczyketal.(2019)the 2nd,3rd,and4thvelocitymoment.Inredwepresentthevaluesobtained -parameter c was fixed at c = 3 and now we fit it as a free pa- forallstarswhereasinorangeandbluethoseforpopulationsIandII, -rameter.AsforthemockdatainSection3.2,differentvaluesof respectively. -Υ were obtainedwith the transformationof velocity moments +The parameter space for Υ(r) has been probed as follows: +a ∈ [0 : 1.85]with a step ∆a = 0.05 and c ∈ [1.2 : 6] with a +step∆c = 0.2.We pointoutthatinKowalczyketal.(2019)the +parameter c was fixed at c = 3 and now we fit it as a free pa- +rameter.AsforthemockdatainSection3.2,differentvaluesof +Υ 0 +were obtainedwith the transformationof velocity moments withintheχ2 fittingroutine.Thevaluesof∆χ2 forallstarsand thepopulationsareshowninthetwopanelsofFig.13(leftand -right-handside,respectively).Duetothedensecoverageofthe theconfidencelevelsonΥfromthefitoftwopopulations.Green -grid,wedecidedtoincludeonlythevalueswithin3σfromthe linesindicatethevaluesforthebest-fittingmodelswhereasthe -fittedminimums(seeSection3.1). coloredareasofdecreasingintensityshowthe1,2,and3σcon- -The profiles of the mass-to-light ratio, total density, total fidenceregions.Additionally,withblackdashedlinesweinclude -mass,andvelocityanisotropyresultingfromtheχ2distributions theresultsfromKowalczyketal.(2019)forcomparison. -arepresentedintheconsecutiverowsofFig.14.Theanisotropy As a result of freeing the steepness of the mass-to-light -profileforthepopulationsisbasedonthefitofallstarsbutusing ratio profile (parameter c) with respect to the previous study +right-handside,respectively).Duetothedensecoverageofthe +grid,wedecidedtoincludeonlythevalueswithin3σfromthe +fittedminimums(seeSection3.1). +The profiles of the mass-to-light ratio, total density, total +mass,andvelocityanisotropyresultingfromtheχ2distributions +arepresentedintheconsecutiverowsofFig.14.Theanisotropy +profileforthepopulationsisbasedonthefitofallstarsbutusing +0 +0.05 +0.1 +0.15 +0.2 +0.25 +0 0.4 0.8 1.2 1.6 +M +( R +) +R [kpc] +all stars +pop I +pop II +0 +40 +80 +120 +160 +200 +0 0.4 0.8 1.2 1.6 +m +2 ( +R ) [ +( +k +m +s - 1 ) +2 +] +R [kpc] +-16 +-8 +08 +16 +0 0.4 0.8 1.2 1.6 +m +3 +( +R +) +[ +1 +0 +2 +( +k +m +s - +1 +) +3 +] +R [kpc] +048 +12 +16 +0 0.4 0.8 1.2 1.6 +m +4 ( +R ) +[ +1 +0 4 ( +k +m +s - +1 ) +4 +] +R [kpc] +Fig. 12. Observables of the Fornax dSph used in our Schwarzschild +modelingscheme.Inrows:thefractionofthetotalnumberofstars,the +2nd,3rd,and4thvelocitymoment.Inredwepresentthevaluesobtained +forallstarswhereasinorangeandbluethoseforpopulationsIandII, +respectively. +theconfidencelevelsonΥfromthefitoftwopopulations.Green +linesindicatethevaluesforthebest-fittingmodelswhereasthe +coloredareasofdecreasingintensityshowthe1,2,and3σcon- +fidenceregions.Additionally,withblackdashedlinesweinclude +theresultsfromKowalczyketal.(2019)forcomparison. +As a result of freeing the steepness of the mass-to-light +ratio profile (parameter c) with respect to the previous study Articlenumber,page9of12 A&Aproofs:manuscriptno.Populations4 -ALL POPULATIONS -6 6 +0 +0.5 +1 +1.5 +0 +0.5 1 +1.5 +2 +3 +4 +5 +6 +ALL +Υ 0 +a +c +0 +0.5 +1 +1.5 +0 +0.5 1 +1.5 +2 +3 +4 +5 +6 +POPULATIONS +Υ 0 +a +c +0 +3 +6 +9 12 -5 5 -4 4 -c c 9 -nim -3 3 -2χ-2χ -2 2 6 -1.5 1.5 -1 0 1 0 3 -Υ 0 0.5 1 0.5 Υ 0 0.5 1 0.5 -1.5 a 1.5 a -0 0 0 +χ 2 - +χ +2 +m i +n Fig.13.Valuesofχ2relativetothefittedminimumwithintherangeof3σconfidencelevelforallstars(leftpanel)andforthepopulations(right panel)fortheFornaxdSph. -(Kowalczyketal.2019),weobtainedhigherestimatesoftheen- of the real growing profile by observing an object along the -closed total mass at larger radii. In particular, for the mass en- minor and major axis, respectively. However, for the bias to -closedwithin 1.8kpcwe get M (< 1.8kpc) = 3.87+1.48×108 occur in two populations presented here, their inner orienta- -all −1.56 -M⊙ fromthefitforallstarsand M pops(< 1.8 kpc)= 4.71+ −0 1. .8 17 3× tionswouldneedtobeopposite.Sincesuchmorphologicalfea- -108 M⊙ from the fit of populations, while previously we had tures are not supported by the photometric studies of Fornax -M old(<1.8 kpc)=3.7+ −1 1. .4 3×108M⊙. ( sd pe al tiP alin ao lie gt na ml. e2 n0 t1 b5 e; twW ea en ng te ht ea sl. te2 l0 la1 r9 p) ow ph ui lc ah tiora nt sh ,e wrfi en cd oa ncg lo uo dd -e +(Kowalczyketal.2019),weobtainedhigherestimatesoftheen- +closed total mass at larger radii. In particular, for the mass en- +closedwithin 1.8kpcwe get M all(< 1.8kpc) = 3.87+1.48 −1.56×108 +M⊙ fromthefitforallstarsand M pops(< 1.8 kpc)= 4.71+0.87 −1.13× +108 M⊙ from the fit of populations, while previously we had +M old(<1.8 kpc)=3.7+1.4 −1.3×108M⊙. Interestingly, despite the significant shift of the position of -that the anisotropy profiles of the two populations modeled in -χ2 (to c = 4.2 for all stars and 3.6 for populations), the ob- -min thisworkareindeedsignificantlydistinct. +χ2 +min +(to c = 4.2 for all stars and 3.6 for populations), the ob- tainedprofileoftheanisotropyparameterremainsdecreasingor -Finally,itisworthnoticingthattheso-calledmass-follows- flat for all stars but changesto increasing from0 to 0.5 for the -lightmodel,thatis the one followingfromthe assumptionthat populations. Nevertheless, even in the latter case the previous -thetotaldensitytracesthestellar distribution,isnolongersup- resultagreeswiththenewfindingwithin1σ. -ported by the fit of the populations. With our parametrization, -The detailed analysis of the anisotropy is shown in Fig.15 themass-follows-lightmodelcorrespondstoa = 0andwhereas +The detailed analysis of the anisotropy is shown in Fig.15 where the middle and bottom panels present the profiles ob- -it is enclosed within 3σ for the fit of all stars, as was the case tainedforeachpopulationseparately.Wenoticethattheprofile -inKowalczyketal.(2019),theallowedvaluesfortheimproved for populationI is decreasing or has a local minimumwhereas -method are much larger, as demonstratedby the right panel of for population II is increasing (from −0.25 to 0.5 for the best- -Fig.13. fittingmodel).SincepopulationIismoreconcentrated,thelast bins contain very few stars, which limits their credibility. The -top panel of Fig.15 presents the anisotropy of all stars calcu- 5. Summaryanddiscussion +top panel of Fig.15 presents the anisotropy of all stars calcu- latedasaweightedsuperpositionoftwopopulations.Withsuch -Building on the previously created implementation of the approachwestillobtaintheincreasingprofile(from0to0.5)but -Schwarzschildorbitsuperpositionmethodfocusedonmodeling thepreviousresultagreeswithitonlywithin2σ. +Since Fornax dSph is significantly elongated with the pro- +jected ellipticity of ǫ = 0.30 ± 0.01 (Irwin&Hatzidimitriou +1995), we anticipate some bias in the obtained results caused +bythesphericallysymmetricmodeling.Kowalczyketal.(2018) +studied such bias in an axisymmetric simulated object qualita- +tivelysimilartoFornaxandidentifieddifferencesinthesystem- +aticerrorsdependingonwhetherthegalaxywasobservedalong +itsmajororminoraxis.AssumingthatFornaxisobservedalong +the lineof sightin betweenthese extremes,we expectthetotal +massprofiletobeslightlyoverestimatedandtheanisotropytobe +underestimated,further strengtheningthe likelihoodof the real +anisotropytoberadialanditsprofiletobegrowingwithradius +withrespecttotheresultsofKowalczyketal.(2019). +Bothconstant(likeforourpopulationI)andgrowing(pop- +ulation II) anisotropy profiles can arise from biased modeling +of the real growing profile by observing an object along the +minor and major axis, respectively. However, for the bias to +occur in two populations presented here, their inner orienta- +tionswouldneedtobeopposite.Sincesuchmorphologicalfea- +tures are not supported by the photometric studies of Fornax +(delPinoetal.2015;Wangetal.2019)whichratherfindagood +spatial alignmentbetween the stellar populations, we conclude +that the anisotropy profiles of the two populations modeled in +thisworkareindeedsignificantlydistinct. +Finally,itisworthnoticingthattheso-calledmass-follows- +lightmodel,thatis the one followingfromthe assumptionthat +thetotaldensitytracesthestellar distribution,isnolongersup- +ported by the fit of the populations. With our parametrization, +themass-follows-lightmodelcorrespondstoa = 0andwhereas +it is enclosed within 3σ for the fit of all stars, as was the case +inKowalczyketal.(2019),theallowedvaluesfortheimproved +method are much larger, as demonstratedby the right panel of +Fig.13. +5. Summaryanddiscussion +Building on the previously created implementation of the +Schwarzschildorbitsuperpositionmethodfocusedonmodeling dSphgalaxiesoftheLocalGroup(Kowalczyketal.2017,2018, -Since Fornax dSph is significantly elongated with the pro- 2019),weimprovedourtoolbyintroducingmultiplestellarpop- -jected ellipticity of ǫ = 0.30 ± 0.01 (Irwin&Hatzidimitriou ulations. Such an improvement is desirable and justified since -1995), we anticipate some bias in the obtained results caused manyofthedwarfsshowsignsofmultiplestarformationbursts -bythesphericallysymmetricmodeling.Kowalczyketal.(2018) orextendedstarformationepisodes.Asthedifferentpopulations -studied such bias in an axisymmetric simulated object qualita- trace the common underlying gravitational potential, one may -tivelysimilartoFornaxandidentifieddifferencesinthesystem- expectasignificantimprovementintheestimatesofnotonlythe -aticerrorsdependingonwhetherthegalaxywasobservedalong totalmasscontentbutalsotheorbitanisotropysincethisrobust -itsmajororminoraxis.AssumingthatFornaxisobservedalong modeling technique reproduces the anisotropy as a by-product -the lineof sightin betweenthese extremes,we expectthetotal ofthemodelingratherthantakingitasanassumption. -massprofiletobeslightlyoverestimatedandtheanisotropytobe Wehavetestedourhypothesisbymodelingmockdatagener- -underestimated,further strengtheningthe likelihoodof the real atedfromagalaxyformedintheIllustrissimulation.Duetothe -anisotropytoberadialanditsprofiletobegrowingwithradius limitationsoftheresolution,wechoseagalaxyofmassafewor- -withrespecttotheresultsofKowalczyketal.(2019). dersof magnitudelargerthanthe estimatedmasses ofclassical -Bothconstant(likeforourpopulationI)andgrowing(pop- dwarfs.Still, the galaxypossessed appropriatequalitativechar- -ulation II) anisotropy profiles can arise from biased modeling acteristics,suchasthelackofgasandanalmostsphericalshape, +2019),weimprovedourtoolbyintroducingmultiplestellarpop- +ulations. Such an improvement is desirable and justified since +manyofthedwarfsshowsignsofmultiplestarformationbursts +orextendedstarformationepisodes.Asthedifferentpopulations +trace the common underlying gravitational potential, one may +expectasignificantimprovementintheestimatesofnotonlythe +totalmasscontentbutalsotheorbitanisotropysincethisrobust +modeling technique reproduces the anisotropy as a by-product +ofthemodelingratherthantakingitasanassumption. +Wehavetestedourhypothesisbymodelingmockdatagener- +atedfromagalaxyformedintheIllustrissimulation.Duetothe +limitationsoftheresolution,wechoseagalaxyofmassafewor- +dersof magnitudelargerthanthe estimatedmasses ofclassical +dwarfs.Still, the galaxypossessed appropriatequalitativechar- +acteristics,suchasthelackofgasandanalmostsphericalshape, Articlenumber,page10of12 K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling -ALL POPULATIONS -1 +101 +103 105 -II +0.1 1 +Υ ( r ) [ M +⊙ / L ⊙ ] +r [kpc] +ALL +0.1 1 +r [kpc] +POPULATIONS 3σ -2σ 0 ]⊙L/⊙M[ POP -1σ -103 )r(β -best model + -K19 -1 I -)r(Υ POP -101 --2 -0 0.4 0.8 1.2 1.6 -0.1 1 0.1 1 -r [kpc] r [kpc] r [kpc] -1 -]3-cpk -108 -0 -I -⊙M[ POP )r(β -106 --1 -)r( -totν +2σ 1σ best model +K19 104 --2 -0.1 1 0.1 1 -r [kpc] r [kpc] 0 0.4 0.8 1.2 1.6 +106 +108 +0.1 1 +ν t o t ( r ) +[ +M +⊙ +k +p +c - +3 ] r [kpc] -1 +0.1 1 +r [kpc] +105 +107 109 -]⊙M[ -0 II -107 )r( totM POP )r(β +0.1 1 +M t o t ( r ) +[ +M +⊙ ] +r [kpc] +0.1 1 +r [kpc] +-3 +-2 -1 -105 -0.1 1 0.1 1 -r [kpc] r [kpc] -2 +01 0 0.4 0.8 1.2 1.6 -1 +β ( r ) r [kpc] -0 --1 best model 3σ )r(β -1σ K19 --2 2σ --3 -0 0.4 0.8 1.2 1.6 0 0.4 0.8 1.2 1.6 Fig. 15. Profiles of the anisotropy parameter obtained with the -r [kpc] r [kpc] SchwarzschildmodelingoftwostellarpopulationsfortheFornaxdSph. -Inrows:resultsforallstars(calculatedasthesuperpositionoftwopop- -Fig. 14. Results of Schwarzschild modeling of the Fornax dSph. ulations), population I, and population II. Color lines indicate values -In rows: derived mass-to-light ratio, total density, total mass, and forthebest-fitmodelswhereasthecoloredareasofdecreasingintensity -anisotropy parameter.Incolumns: resultsforallstarsandthepopula- showthe1,2,and3σconfidenceregions.Thedashedblacklineshows -tions,respectively.Greenlinesindicatethevaluesforthebest-fitmodels theresultfromKowalczyketal.(2019)forcomparison. +0 0.4 0.8 1.2 1.6 +r [kpc] +Fig. 14. Results of Schwarzschild modeling of the Fornax dSph. +In rows: derived mass-to-light ratio, total density, total mass, and +anisotropy parameter.Incolumns: resultsforallstarsandthepopula- +tions,respectively.Greenlinesindicatethevaluesforthebest-fitmodels whereasthecoloredareasofdecreasingintensityshowthe1,2,and3σ confidenceregions.Thebest-fittingvaluesobtainedbyKowalczyketal. (2019)areshownwithblackdashedlines. +that made it a good test bed for modeling techniques applica- +ble to dSph galaxies. We applied our approach to all data and +totwostellarpopulationsseparately,comparingtheaccuracyof +theobtainedresults. Althoughtheadditionofthe secondtracer +seeminglyincreasesthenumberofconstraintstwice, theincre- +mentissomewhatcompromisedbythesamplingerrorssincethe +numberofstarsineachsampleisthenreduced.Still,wefound +strong improvements in the accuracy of the method when us- +ing two populations.The resultsof the modelingshow that the +density and velocity anisotropyprofilesare more strongly con- +strained, most importantly at the 3σ level, that is the range of +allowedvaluesismuchnarrower. +SimilarlytotheconclusionsofKowalczyketal.(2018)who +explored the effects of nonsphericity using large and small +data samples, the comparison of results presented in the left- +and right-handside panelsof Fig.8 suggeststhat the improved +methodusingtwostellarpopulationsgivesmoreprecisebutless +accurate outcome. However, in both studies the apparent dete- +rioration of the reliability is a consequence of modeling of a +nonspherical object. In both cases, a simpler approach (much +smaller data samples or using one stellar population) resulted +-2 +-1 +01 +0 0.4 0.8 1.2 1.6 +P O P I ++ P O P +I +I +β +( r ) +r [kpc] +-2 +-1 +01 +0 0.4 0.8 1.2 1.6 +P +O +P +I +β ( +r ) +r [kpc] +-2 +-1 +01 +0 0.4 0.8 1.2 1.6 +P O P I +I +β ( r ) +r [kpc] +best model 1σ +2σ +3σ K19 +Fig. 15. Profiles of the anisotropy parameter obtained with the +SchwarzschildmodelingoftwostellarpopulationsfortheFornaxdSph. +Inrows:resultsforallstars(calculatedasthesuperpositionoftwopop- +ulations), population I, and population II. Color lines indicate values +forthebest-fitmodelswhereasthecoloredareasofdecreasingintensity +showthe1,2,and3σconfidenceregions.Thedashedblacklineshows +theresultfromKowalczyketal.(2019)forcomparison. in larger final uncertainties, usually containing the true values within 1σ confidenceregion. On the other hand, the improved methodsexhibitsubstantiallyreduceduncertainties,highlighting -that made it a good test bed for modeling techniques applica- theunderlyingbias. -ble to dSph galaxies. We applied our approach to all data and -totwostellarpopulationsseparately,comparingtheaccuracyof Our method parametrizes the total mass content with the -theobtainedresults. Althoughtheadditionofthe secondtracer mass-to-lightratiovaryingwithradiusasapower-lawinthelog- -seeminglyincreasesthenumberofconstraintstwice, theincre- logscale.Wemadetwomainchangeswithrespecttoourprevi- -mentissomewhatcompromisedbythesamplingerrorssincethe ouswork:weaddedathirdparameterccontrollingthesteepness -numberofstarsineachsampleisthenreduced.Still,wefound ofthemass-to-lightratioprofile(previouslyfixedatthevalueof -strong improvements in the accuracy of the method when us- 3) and allowed for different stellar density profiles (previously -ing two populations.The resultsof the modelingshow that the only Sérsic, now also King). These changesare of course cou- -density and velocity anisotropyprofilesare more strongly con- pledsincedifferentdensityprofilesrequiredifferentexponentsto -strained, most importantly at the 3σ level, that is the range of reproducethesamemassprofile.Itisvisiblealsoinourresults -allowedvaluesismuchnarrower. since the King profile applied in the simulated galaxy gave us -SimilarlytotheconclusionsofKowalczyketal.(2018)who valuesofclowerthan3.Nevertheless,wedecidedtousediffer- -explored the effects of nonsphericity using large and small entdensityprofilestomakeourmethodmoregeneralandappli- -data samples, the comparison of results presented in the left- cabletoobjects,suchasourIllustrisgalaxy,forwhichtheSérsic -and right-handside panelsof Fig.8 suggeststhat the improved formula does not provide a good approximationof the density -methodusingtwostellarpopulationsgivesmoreprecisebutless distribution. -accurate outcome. However, in both studies the apparent dete- Finally,weappliedtheimprovedmethodtothedataforthe -rioration of the reliability is a consequence of modeling of a FornaxdSphgalaxy.Duetotheadditionofanotherfreeparam- -nonspherical object. In both cases, a simpler approach (much eter in our functional form for the mass-to-light ratio, our re- -smaller data samples or using one stellar population) resulted sults for modeling all stars are slightly different from the ones +Our method parametrizes the total mass content with the +mass-to-lightratiovaryingwithradiusasapower-lawinthelog- +logscale.Wemadetwomainchangeswithrespecttoourprevi- +ouswork:weaddedathirdparameterccontrollingthesteepness +ofthemass-to-lightratioprofile(previouslyfixedatthevalueof +3) and allowed for different stellar density profiles (previously +only Sérsic, now also King). These changesare of course cou- +pledsincedifferentdensityprofilesrequiredifferentexponentsto +reproducethesamemassprofile.Itisvisiblealsoinourresults +since the King profile applied in the simulated galaxy gave us +valuesofclowerthan3.Nevertheless,wedecidedtousediffer- +entdensityprofilestomakeourmethodmoregeneralandappli- +cabletoobjects,suchasourIllustrisgalaxy,forwhichtheSérsic +formula does not provide a good approximationof the density +distribution. +Finally,weappliedtheimprovedmethodtothedataforthe +FornaxdSphgalaxy.Duetotheadditionofanotherfreeparam- +eter in our functional form for the mass-to-light ratio, our re- +sults for modeling all stars are slightly different from the ones Articlenumber,page11of12 A&Aproofs:manuscriptno.Populations4 -obtained in Kowalczyketal. (2019). However, in terms of the delPino,A.,Hidalgo,S.L.,Aparicio,A.,etal.2013,MNRAS,433,1505 -total density and mass distribution the estimates obtained here delPino,A.,Aparicio,A.,&Hidalgo,S.L.2015,MNRAS,454,3996 -agree very well with those earlier results in the range covered delPino,A.,Aparicio,A.,Hidalgo,S.L.,&Łokas,E.L.2017,MNRAS,465, -3708 +obtained in Kowalczyketal. (2019). However, in terms of the +total density and mass distribution the estimates obtained here +agree very well with those earlier results in the range covered bythedata.Therefore,thedetailedcomparisonwithotheresti- -Fabrizio,M.,Bono,G.,Nonino,M.,etal.2016,ApJ,830,126 -matesfromtheliteraturepresentedinKowalczyketal.(2019)is Gebhardt,K.,Richstone,D.,Tremaine,S.,etal.2003,ApJ,583,92 -stillvalidandwedonotrepeatithere. Genel,S.,Fall,S.M.,Hernquist,L.,etal.2015,ApJ,804,L40 -Amoresignificantdifferencewithrespecttotheseprevious Genel,S.,Vogelsberger,M.,Springel,V.,etal.2014,MNRAS,445,175 -Genina,A.,Benitez-Llambay,A.,Frenk,C.S.,etal.2018,MNRAS,474,1398 +matesfromtheliteraturepresentedinKowalczyketal.(2019)is +stillvalidandwedonotrepeatithere. +Amoresignificantdifferencewithrespecttotheseprevious estimates is seen in the results of modeling two populationsin -Hayashi,K.,Fabrizio,M.,Łokas,E.L.,etal.2018,MNRAS,481,250 Fornax.Inthiscasewefindtheanisotropytobeslightlyincreas- -Irwin,M.,&Hatzidimitriou,D.1995,MNRAS,277,1354 -ingratherthandecreasingwithradiusand,mostimportantly,the Jardel,J.R.,&Gebhardt,K.2012,ApJ,746,89 -confidence regions for this parameter, as well as for the den- Jardel,J.R.,Gebhardt,K.,Fabricius,M.H.,Drory,N.,&Williams,M.J.2013, -sity,aremuchnarrower.Wewerethusabletoobtaintightercon- ApJ,763,91 -King,I.1962,AJ,67,471 +ingratherthandecreasingwithradiusand,mostimportantly,the +confidence regions for this parameter, as well as for the den- +sity,aremuchnarrower.Wewerethusabletoobtaintightercon- straints on the properties of Fornax, which means that the im- -Kowalczyk,K.,Łokas,E.L.,Kazantzidis,S.,&Mayer,L.2013,MNRAS,431, -provedmethodissuccessful.Forthefirsttime,wewerealsoable 2796 -todeducethevelocityanisotropyprofilesforeachofthepopula- Kowalczyk,K.,Łokas,E.L.,&Valluri,M.2017,MNRAS,470,3959 -tionsseparately.Wefoundthatthemoreconcentrated,metal-rich Kowalczyk,K.,Łokas,E.L.,&Valluri,M.2018,MNRAS,476,2918 -Kowalczyk,K.,delPino,A.,Łokas,E.L.,&Valluri,M.2019,MNRAS,482, +provedmethodissuccessful.Forthefirsttime,wewerealsoable +todeducethevelocityanisotropyprofilesforeachofthepopula- +tionsseparately.Wefoundthatthemoreconcentrated,metal-rich populationIhasadecreasinganisotropyprofilewhilethemore -5241 extended,metal-poorpopulationIIhastheanisotropyincreasing -Łokas,E.L.,2002,MNRAS,333,697 -with radius.Thisfindingmaypartiallyexplainthe largespread Łokas,E.L.,Mamon,G.A.,&Prada,F.2005,MNRAS,363,918 -of the anisotropy values obtained in the literature and summa- Massari,D.,Helmi,A.,Mucciarelli,A.etal.2020,A&A,633,A36 -rized in Table 2 and 3 of Kowalczyketal. (2019), which were Mateo,M.1998,ARA&A,36,435 -Nelson,D.,Pillepich,A.,Genel,S.,etal.2015,AstronomyandComputing,13, +with radius.Thisfindingmaypartiallyexplainthe largespread +of the anisotropy values obtained in the literature and summa- +rized in Table 2 and 3 of Kowalczyketal. (2019), which were often based on modeling subsamples of our spectroscopic data -12 -set. Pace,A.B.,Kaplinghat,M.,Kirby,E.,etal.2020,MNRAS,495,3022 -For both studied objectswe split the stars into two popula- Press,W.H.,Teukolsky,S.A.,Vetterling,W.T.,&Flannery,B.P.1992,Numer- -tions by dividing them in half based on their metallicity, Z (in icalRecipesinC,2ndedn.(CambridgeUniversityPress,Cambridge) -solarunits),fortheIllustrisgalaxyand[Fe/H]forFornax.Such Schwarzschild,M.1979,ApJ,232,236 -Sérsic,J.L.1968,AtlasdeGalaxiasAustrales(ObservatorioAstronomico,Cor- +set. +For both studied objectswe split the stars into two popula- +tions by dividing them in half based on their metallicity, Z (in +solarunits),fortheIllustrisgalaxyand[Fe/H]forFornax.Such amethodisapproximatebutjustified.Bothgalaxieshavecom- -doba,Argentina) -plexstarformationhistorywithmultiplestarformationbursts,as Strigari,L.E.,Bullock,J.S.,&Kaplinghat,M.2007,ApJ,657,L1 -demonstratedbyFig.1inthisworkandFig.7indelPinoetal. Tolstoy,E.,Hill,V.,&Tosi,M.2009,ARA&A,47,371 -(2013), producingmultiplestellar populationswhichcannotbe Valluri,M.,Merritt,D.,&Emsellem,E.2004,ApJ,602,66 -vanderMarel,R.P.,Cretton,N.,deZeeuw,P.T.,&Rix,H.-W.1998,ApJ,493, +plexstarformationhistorywithmultiplestarformationbursts,as +demonstratedbyFig.1inthisworkandFig.7indelPinoetal. +(2013), producingmultiplestellar populationswhichcannotbe easilytrackedasthemetallicityisagoodbutnotperfectproxy -613 -forthestellarage.Moreover,themetallicityhistogramsforboth Vogelsberger,M.,Genel,S.,Springel,V.,etal.2014a,Nature,509,177 -objects are approximately unimodal not allowing for a conve- Vogelsberger,M.,Genel,S.,Springel,V.,etal.2014b,MNRAS,444,1518 -nient separation. More refined methods of division have been Walker,M.G.,&Peñarrubia,J.2011,ApJ,742,20 -Wang,M.Y.,deBoer,T.,Pieres,A.,etal.2019,ApJ,881,118 +forthestellarage.Moreover,themetallicityhistogramsforboth +objects are approximately unimodal not allowing for a conve- +nient separation. More refined methods of division have been suggestedintheliterature,forexampleintheformofthelikeli- hoodfunctionbasedontheposition,velocity,andmetallicityin- dex(Walker&Peñarrubia 2011).However,thelikelihoodfunc- @@ -889,4 +1578,46 @@ versityPress,Princeton) Breddels,M.A.,&Helmi,A.2013,A&A,558,A35 Breddels,M.A.,Helmi,A.,vandenBosch,R.C.E.,vandeVen,G.,&Battaglia, G.2013,MNRAS,433,3173 +delPino,A.,Hidalgo,S.L.,Aparicio,A.,etal.2013,MNRAS,433,1505 +delPino,A.,Aparicio,A.,&Hidalgo,S.L.2015,MNRAS,454,3996 +delPino,A.,Aparicio,A.,Hidalgo,S.L.,&Łokas,E.L.2017,MNRAS,465, +3708 +Fabrizio,M.,Bono,G.,Nonino,M.,etal.2016,ApJ,830,126 +Gebhardt,K.,Richstone,D.,Tremaine,S.,etal.2003,ApJ,583,92 +Genel,S.,Fall,S.M.,Hernquist,L.,etal.2015,ApJ,804,L40 +Genel,S.,Vogelsberger,M.,Springel,V.,etal.2014,MNRAS,445,175 +Genina,A.,Benitez-Llambay,A.,Frenk,C.S.,etal.2018,MNRAS,474,1398 +Hayashi,K.,Fabrizio,M.,Łokas,E.L.,etal.2018,MNRAS,481,250 +Irwin,M.,&Hatzidimitriou,D.1995,MNRAS,277,1354 +Jardel,J.R.,&Gebhardt,K.2012,ApJ,746,89 +Jardel,J.R.,Gebhardt,K.,Fabricius,M.H.,Drory,N.,&Williams,M.J.2013, +ApJ,763,91 +King,I.1962,AJ,67,471 +Kowalczyk,K.,Łokas,E.L.,Kazantzidis,S.,&Mayer,L.2013,MNRAS,431, +2796 +Kowalczyk,K.,Łokas,E.L.,&Valluri,M.2017,MNRAS,470,3959 +Kowalczyk,K.,Łokas,E.L.,&Valluri,M.2018,MNRAS,476,2918 +Kowalczyk,K.,delPino,A.,Łokas,E.L.,&Valluri,M.2019,MNRAS,482, +5241 +Łokas,E.L.,2002,MNRAS,333,697 +Łokas,E.L.,Mamon,G.A.,&Prada,F.2005,MNRAS,363,918 +Massari,D.,Helmi,A.,Mucciarelli,A.etal.2020,A&A,633,A36 +Mateo,M.1998,ARA&A,36,435 +Nelson,D.,Pillepich,A.,Genel,S.,etal.2015,AstronomyandComputing,13, +12 +Pace,A.B.,Kaplinghat,M.,Kirby,E.,etal.2020,MNRAS,495,3022 +Press,W.H.,Teukolsky,S.A.,Vetterling,W.T.,&Flannery,B.P.1992,Numer- +icalRecipesinC,2ndedn.(CambridgeUniversityPress,Cambridge) +Schwarzschild,M.1979,ApJ,232,236 +Sérsic,J.L.1968,AtlasdeGalaxiasAustrales(ObservatorioAstronomico,Cor- +doba,Argentina) +Strigari,L.E.,Bullock,J.S.,&Kaplinghat,M.2007,ApJ,657,L1 +Tolstoy,E.,Hill,V.,&Tosi,M.2009,ARA&A,47,371 +Valluri,M.,Merritt,D.,&Emsellem,E.2004,ApJ,602,66 +vanderMarel,R.P.,Cretton,N.,deZeeuw,P.T.,&Rix,H.-W.1998,ApJ,493, +613 +Vogelsberger,M.,Genel,S.,Springel,V.,etal.2014a,Nature,509,177 +Vogelsberger,M.,Genel,S.,Springel,V.,etal.2014b,MNRAS,444,1518 +Walker,M.G.,&Peñarrubia,J.2011,ApJ,742,20 +Wang,M.Y.,deBoer,T.,Pieres,A.,etal.2019,ApJ,881,118 Articlenumber,page12of12 diff --git a/read/results/pdfplumber/2201.00178.txt b/read/results/pdfplumber/2201.00178.txt index 3d72e5f..8b78181 100644 --- a/read/results/pdfplumber/2201.00178.txt +++ b/read/results/pdfplumber/2201.00178.txt @@ -5,23 +5,20 @@ Prasad Mani ,1 Chris S. Hanson ,2 and Shravan Hanasoge 1,2 1Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India 2Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE ABSTRACT -2202 The technique of normal-mode coupling is a powerful tool with which to seismically image non- axisymmetricphenomenaintheSun. HereweapplymodecouplingintheCartesianapproximationto probe steady, near-surface flows in the Sun. Using Doppler cubes obtained from the Helioseismic and -Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on mode-coupling naJ +Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on mode-coupling measurements to show that the resulting divergence and radial vorticity maps at supergranular length scales (∼30 Mm) near the surface compare extremely well with those obtained using the Local Corre- -lation Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows, 1 +lation Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows, while ≥ 0.8 is obtained for the radial vorticity. -]RS.hp-ortsa[ Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662) 1. INTRODUCTION Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its effect on solar oscillations (see Christensen-Dalsgaard 2002, for a review). These are resonant normal modes of the Sun, behaving as standing waves in a cavity bounded by the solar surface and a depth that depends on the wavenumber of the oscillation. As these waves penetrate the interior, they register information of the properties and dynamics of -1v87100.1022:viXra the solar interior and return to the surface, where they are observed. The internal structure of the Sun can then be retrieved through meticulous inversions of these seismic measurements. Several important flow systems on the Sun have been inferred using various global and local helioseismic methods. @@ -44,6 +41,44 @@ et al. 2021). Local mode-coupling analysis in the Cartesian approximation, formu validated by Hanson et al. (2021) (hereafter H21) by examining the power-spectrum of supergranular waves and comparing with previous time-distance studies (Langfellner et al. 2018). prasad.subramanian@tifr.res.in +a +r +X +i +v +: +2 +2 +0 +1 +. +0 +0 +1 +7 +8 +v +1 +[ +a +s +t +r +o +- +p +h . +S +R +] +1 +J +a +n +2 +0 +2 +2 2 Mani et al. Normal-modecouplingreferstotheconceptofexpressingsolar-oscillationeigenfunctionsasalinearweightedcombi- nationofmodel-eigenfunctions(e.g.,ModelSChristensen-Dalsgaard2021). Themodeleigenfunctionsformacomplete @@ -58,27 +93,29 @@ In this study, we extend the spectral analysis of H21 and develop the method to at supergranulation length scales. A part of the formalism that was used to derive the forward model in H21 is reworked,primarilytoimagesteadyflows. Measurementsarethenconstructed,andinversionstoinferdivergenceflow and radial vorticity are described. We also demonstrate signal associated with supergranular flow in a radial-order -coupling (p -p ), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface. -2 2 +coupling (p 2-p 2), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface. We compare our results with flows obtained using the Local Correlation Tracking method on solar granules. 1.1. Forward problem In favor of algebraic brevity, we only show crucial steps here and refer the interested reader to Appendix A for a complete derivation of the forward problem. Working in the plane-parallel atmosphere (see also Woodard 2006), we -denote the horizontal unit vectors e and e in our local Cartesian domain as pointing towards west and north on the -x y -solar surface, respectively, and e points outwards. This approximation is valid when observing patches of the surface +denote the horizontal unit vectors e +x +and e +y +in our local Cartesian domain as pointing towards west and north on the +solar surface, respectively, and e z +points outwards. This approximation is valid when observing patches of the surface that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood of the supergranular scale (∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the -horizontal wavenumber qR ≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(q ,q )| is the vector horizontal -(cid:12) x y +horizontal wavenumber qR +(cid:12) +≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(q x,q y)| is the vector horizontal wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow perturbationdescribedinahorizontalFourierdomain. Supergranularvelocitiesaresubsonic(300-400m/s,seeRincon -&Rieutord2018), permittingustomodeltheflowvectoruuu=(u ,u ,u )intheCartesiandomainlikeso(Unnoetal. -x y z +&Rieutord2018), permittingustomodeltheflowvectoruuu=(u x,u y,u z)intheCartesiandomainlikeso(Unnoetal. 1989; Woodard 2006) -uσ =∇×[∇×(P e )]+∇×(T e ), (1) -z z +uσ =∇×[∇×(P e z)]+∇×(T e z), (1) where P = Pσ(x) and T = Tσ(x) are poloidal and toroidal scalar functions, varying with position x and temporal frequency σ. ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for @@ -86,87 +123,148 @@ example), here we only consider the frequency bin σ = 0, denoting the temporall of analysis. We therefore suppress σ from all terms this point forward, remembering that temporal dynamics of perturbations may also be studied using the same model outlined in the following paragraphs. Simplifying eq 1 using vector calculus results in -u=−∇2Pe +∇(∂ P)+∇ T×e , (2) -z z h z -where ∇ refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the +u=−∇2Pe z+∇(∂ zP)+∇ hT×e z, (2) +where ∇ h +refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a -functionofhorizontalwavenumberq anddepthze . HencethepoloidalandtoroidalflowsaredescribedbyP (z)and -z q -T (z), respectively. Furthermore, we parametrize the flow along e using basis functions f(z) (Chebyshev, B-spline, -q z +functionofhorizontalwavenumberq anddepthze z. HencethepoloidalandtoroidalflowsaredescribedbyP q(z)and +T q(z), respectively. Furthermore, we parametrize the flow along e +z +using basis functions f(z) (Chebyshev, B-spline, etc). This is expressed as -(cid:88) (cid:88) -P ≡P (z)= f (z)P , T ≡T (z)= f (z)T . (3) -q j qj q j qj -j j -The flow coefficients P and T , represented by the discrete indices q and j, become ideal candidates for inversions, -qj qj +P ≡P +q(z)=(cid:88) +j +f j(z)P qj, T ≡T +q(z)=(cid:88) +j +f j(z)T qj. (3) +The flow coefficients P +qj +and T qj, represented by the discrete indices q and j, become ideal candidates for inversions, where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be -exploited to expedite inversions. Note that P = P∗ and T = T∗ for the flow field to be real in the spatio- -qj −qj qj −qj +exploited to expedite inversions. Note that P +qj += P∗ +−qj +and T +qj += T∗ +−qj +for the flow field to be real in the spatio- temporal domain. To infer flows from wavefields φ scattered by a perturbation of length scale q, cross-correlate them in the manner Imaging near-surface flows using mode-coupling analysis 3 -φω∗φω , wherek istheoscillationmodewavenumber(k ,k )andω isthetemporalfrequency. Relateφω∗φω thus -k k+q x y k k+q -to the flow coefficients P and T (see eq A7) -qj qj +φω∗ +k +φω k+q, wherek istheoscillationmodewavenumber(k x,k y)andω isthetemporalfrequency. Relateφω∗ +k +φω +k+q +thus +to the flow coefficients P +qj +and T +qj +(see eq A7) +(cid:104)φω∗ +k +φω k+q(cid:105)=Hω +kk(cid:48)nn(cid:48) (cid:88) -(cid:104)φω∗φω (cid:105)=Hω C P +D T . (4) -k k+q kk(cid:48)nn(cid:48) qj,k qj qj,k qj j +C qj,kP +qj ++D qj,kT qj. (4) TheweightfactorHω (seeeqA8)isafunctionoffrequency,capturinginformationabouttheextentofcouplingbetween thetwomodes[n,k]and[n(cid:48),k(cid:48)],wherenandn(cid:48) aretheradialordersofthemodes,andk =|k|andk(cid:48) =|k(cid:48)|=|k+q|. Thespectralprofileofthemode(seeeqA9)isapproximatedusingaLorentzian(Andersonetal.1990). Themorethe -Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms C and D are poloidal -qj,k qj,k +Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms C +qj,k +and D +qj,k +are poloidal and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements -and are derived from the solar model see Appendix A. They possess the symmetry relation: C = C and -qj,k −qj,−k -D =D (see eq A6). The kernels, as flows, are expressed on the basis f (z). -qj,k −qj,−k j +and are derived from the solar model see Appendix A. They possess the symmetry relation: C +qj,k += C +−qj,−k +and +D +qj,k +=D +−qj,−k +(see eq A6). The kernels, as flows, are expressed on the basis f j(z). 1.2. Least-squares of cross-correlation -Even though φω∗φω isolates the effect of flow perturbations at individual wavenumbers q, a more compact mea- -k k+q +Even though φω∗ +k +φω +k+q +isolates the effect of flow perturbations at individual wavenumbers q, a more compact mea- surement, knowninmode-couplingliteratureas’B-coefficients’, ismuchbetterdesignedforinversionasitreducesthe -dimensionoftheproblem. Aleast-squaresfittothecross-correlationφω∗φω (seeWoodard2006,2014,2016)results -k k+q -in the B-coefficients B , according to +dimensionoftheproblem. Aleast-squaresfittothecross-correlationφω∗ +k +φω +k+q +(seeWoodard2006,2014,2016)results +in the B-coefficients B k,q, according to +B k,q -(cid:80) Hω∗ φω∗φω -kk(cid:48)nn(cid:48) k k+q -B = ω . (5) -k,q (cid:80) |Hω |2 -kk(cid:48)nn(cid:48) += +(cid:80) ω -Multiplying eq 4 on both sides by Hω∗ and substituting by eq 5 on the left-hand-side results in a concisely defined +Hω∗ kk(cid:48)nn(cid:48)φω∗ +k +φω +k+q +(cid:80) +ω +|Hω kk(cid:48)nn(cid:48)|2 +. (5) +Multiplying eq 4 on both sides by Hω∗ kk(cid:48)nn(cid:48) +and substituting by eq 5 on the left-hand-side results in a concisely defined forward problem (compare with eq 4) -(cid:88) -B = C P +D T . (6) -k,q qj,k qj qj,k qj +B +k,q +=(cid:88) j +C qj,kP +qj ++D qj,kT qj. (6) In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over ω. -Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ω , -nk -(cid:16) (cid:17) -|ω|∈ ω −(cid:15)Γ /2,ω +(cid:15)Γ /2 or -nk nk nk nk -(cid:16) (cid:17) -|ω|∈ ω −(cid:15)Γ /2,ω +(cid:15)Γ /2 . (7) -n(cid:48)k(cid:48) n(cid:48)k(cid:48) n(cid:48)k(cid:48) n(cid:48)k(cid:48) -Summing over ±ω guarantees that the parity B = B∗ (see Appendix A for derivation) is obeyed, thereby -k,q −k,−q +Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ω nk, +|ω|∈(cid:16) +ω nk−(cid:15)Γ nk/2,ω nk+(cid:15)Γ +nk/2(cid:17) +or +|ω|∈(cid:16) +ω +n(cid:48)k(cid:48) +−(cid:15)Γ n(cid:48)k(cid:48)/2,ω +n(cid:48)k(cid:48) ++(cid:15)Γ +n(cid:48)k(cid:48)/2(cid:17) +. (7) +Summing over ±ω guarantees that the parity B +k,q += B∗ +−k,−q +(see Appendix A for derivation) is obeyed, thereby ensuring that the flow field on the right-hand-side of eq 6 is a real physical quantity in the spatio-temporal domain. Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components −q and −k, -(cid:88) -B∗ = C P∗ +D T∗ . (8) -−k,−q −qj,−k −qj −qj,−k −qj +B∗ +−k,−q +=(cid:88) j -Substituting parity and symmetry relations for all terms in the above results in eq 6. As B is constructed by a +C −qj,−kP∗ +−qj ++D −qj,−kT∗ −qj. (8) +Substituting parity and symmetry relations for all terms in the above results in eq 6. As B k,q +is constructed by a least-squares fitting, it is noteworthy that summing over −ω will also lead to improvement in its signal-to-noise as a by-product. 1.3. Noise model @@ -178,19 +276,32 @@ Everyindependentrealizationofamodecanbeunderstoodastheoutputofadampedharmonicosc random forcing function (see Duvall & Harvey 1986). Modes are thus generated with random phases and amplitudes and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters 4 Mani et al. -Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p (orange) and p (green). The shaded -1 2 +Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p +1 +(orange) and p +2 +(green). The shaded regions of the same colours indicate 1-linewidth Γ about the mode frequency. The yellow shaded region indicates the range of -kR andω/2π towhichwehaverestrictedourselvesinthisanalysis. BeyondkR of2000,itisseenthatthetheoreticalfitting -(cid:12) (cid:12) +kR +(cid:12) +andω/2π towhichwehaverestrictedourselvesinthisanalysis. BeyondkR +(cid:12) +of2000,itisseenthatthetheoreticalfitting of mode frequencies start deviating from the observed dispersion relation for the f-mode. -such as its amplitude, frequency and linewidth, and consequently in B in our case. We use the same noise model +such as its amplitude, frequency and linewidth, and consequently in B k,q +in our case. We use the same noise model as in H21, which was motivated by the above discussion, -G ≡(cid:104)|B |2(cid:105), (9) -k,q k,q -where, unlike H21, we again sum over ±ω. G is real, with the symmetry relation G =G (see Appendix A -k,q k,q −k,−q +G +k,q +≡(cid:104)|B k,q|2(cid:105), (9) +where, unlike H21, we again sum over ±ω. G +k,q +is real, with the symmetry relation G +k,q +=G +−k,−q +(see Appendix A for explanation). 2. DATA ANALYSIS In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the @@ -199,8 +310,11 @@ is Postel projected, with a spatial resolution of approximately 0.48Mm, sperated at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194.4×194.4 Mm2 in size, tracked for 24 hours and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number 2197,Carringtonlongitude90◦). ThisDopplercubeisconsideredasthephysicalwavefieldφ(x,y;t). TheFourier-space -wavefieldφω (andsubsequently,thecross-correlationφω∗φω )isobtainedbycomputingthe3Dspatialandtemporal -k k k+q +wavefieldφω +k +(andsubsequently,thecross-correlationφω∗ +k +φω k+q)isobtainedbycomputingthe3Dspatialandtemporal Fourier transform of the Dopplercube. The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in Eq 6, while short enough that supergranules do not substantially evolve (lifetime is purported to be 1.6 days; Rincon @@ -208,217 +322,275 @@ Eq 6, while short enough that supergranules do not substantially evolve (lifetim from center-to-limb systematics (Zhao et al. 2012; Langfellner et al. 2015). Maximum signal can be extracted from the weighted summation of the cross correlations (eq 5) when the spectral profiles of the two modes [n,k] and [n(cid:48),k(cid:48)] closely align in ω space. This implies that their mode frequencies should be -sufficiently close (|ω −ω | ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over -nk n(cid:48)k(cid:48) +sufficiently close (|ω +nk +−ω n(cid:48)k(cid:48)| ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over ±ω is significant only over a few linewidths ((cid:15), the summation parameter; see eq 7). We have empirically found and -tabulated δ in Table 1 for the radial order couplings n-n(cid:48) ∈ f-f, p -p , and p -p (the signal strength depends only -1 1 2 2 +tabulated δ in Table 1 for the radial order couplings n-n(cid:48) ∈ f-f, p 1-p 1, and p 2-p +2 +(the signal strength depends only weakly on (cid:15); we set it to 3 line widths). -Figure 1 shows that for any two adjacent ridges (adjacent n and n(cid:48)), mode frequencies ω and ω become spaced -nk n(cid:48)k -farther apart with increasing wavenumber kR . It is also known that mode linewidth Γ grows with radial orders for +Figure 1 shows that for any two adjacent ridges (adjacent n and n(cid:48)), mode frequencies ω +nk +and ω +n(cid:48)k +become spaced +farther apart with increasing wavenumber kR (cid:12). It is also known that mode linewidth Γ grows with radial orders for +a given kR (cid:12). Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of +observation set the total number of modes within a range of kR (cid:12) -a given kR . Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of +(and ω/2π) that can be clearly observed, thereby +affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually +inspectingthepower-spectrum),theparametersdescribingtheextentofcouplingoverdifferentrangesofkR (cid:12) -observation set the total number of modes within a range of kR (and ω/2π) that can be clearly observed, thereby +atfixed +radial order are different. In wavenumber, we restrict our analysis to within 200≤kR (cid:12) -affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually -inspectingthepower-spectrum),theparametersdescribingtheextentofcouplingoverdifferentrangesofkR atfixed +≤2000 and qR (cid:12) -radial order are different. In wavenumber, we restrict our analysis to within 200≤kR ≤2000 and qR ≤300. Our -(cid:12) (cid:12) +≤300. Our frequency range is confined to span the range over which acoustic modes are observed (2≤ω/2π ≤5 in mHz). Imaging near-surface flows using mode-coupling analysis 5 -Coupling kR range # of δ +Coupling kR (cid:12) +range # of δ modes f-f [400,1000] 5240 4 [1000,1500] 7784 1.1 [1500,2000] 10940 0.4 -p -p [400,1000] 5240 4.5 -1 1 +p 1-p +1 +[400,1000] 5240 4.5 [1000,1750] 12852 2 -p -p [200,1000] 5886 3 -2 2 +p 2-p +2 +[200,1000] 5886 3 [1000,1300] 4280 3 Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different -ranges of kR . -(cid:12) +ranges of kR (cid:12). 3. INVERSION The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements -B from the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and +B k,q +from the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods complement each other (see Sekii 1997), where RLS tries to minimize the misfit between data and model, whereas SOLA gives better localization. For total number of modes M, RLS scales as MxJ where J is the number of basis -functions f (z) (J (cid:28) M; see eq 3 and section 3.1), whereas SOLA scales as M2 (see Appendix B). For M > 5000, -j +functions f j(z) (J (cid:28) M; see eq 3 and section 3.1), whereas SOLA scales as M2 (see Appendix B). For M > 5000, computation starts to quickly become expensive for SOLA. Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is -presenteveninp -p , andp -p (seeFigure3), andpossiblyotherhigherorderself-andcross-couplings. Sinceweare -1 1 2 2 +presenteveninp 1-p 1, andp 2-p +2 +(seeFigure3), andpossiblyotherhigherorderself-andcross-couplings. Sinceweare interested in only surface flows, we leave higher order coupling to future work. -It bears mentioning that the slopes of the ridges in the kR -ν spectrum (Figure 1) increase with radial order. This -(cid:12) -limitsustolow-to-intermediatekR (<1000)forthesehigherradialordersifwearetoremainundertheacousticcut- +It bears mentioning that the slopes of the ridges in the kR (cid:12)-ν spectrum (Figure 1) increase with radial order. This +limitsustolow-to-intermediatekR (cid:12) +(<1000)forthesehigherradialordersifwearetoremainundertheacousticcut- offfrequencyof5.3mHz. Italsobecomesimperativetouseaspatiallylargerobservationpatchtogainaccesstosignals -from low kR - too large an observation region could possibly render invalid the Cartesian geometry approximation. +from low kR (cid:12) +- too large an observation region could possibly render invalid the Cartesian geometry approximation. Regardless,inadditiontoperforminginversionsusingallthecouplingsstackedtogether,wealsodemonstrateinversions separatelyforthethreecouplings(seeTable2)inordertoaccountforthefullgamutofmode-couplingasasignal-rich helioseismic technique. 3.1. RLS For given q, the forward problem may be stated as KU=B, (10) +with the aim to minimize the misfit (cid:80) -with the aim to minimize the misfit ||KU−B|| , with |||| denoting the L norm. Here, K is the matrix formed -2 2 2 k -by the sensitivity kernels: {C ,D }. U is a vector composed of the flow coefficients: {P ,T } and B is a vector -qj,k qj,k qj qj -composed of computed B-coefficients: {B }. The least-squares problem is solved simultaneously for poloidal and -k,q -toroidal flow. We use B-spline basis functions as our f (z), comprising 11 knots spaced uniformly in acoustic radius, -j +||KU−B|| 2, with |||| +2 +denoting the L +2 +norm. Here, K is the matrix formed +by the sensitivity kernels: {C qj,k,D qj,k}. U is a vector composed of the flow coefficients: {P qj,T qj} and B is a vector +composed of computed B-coefficients: {B k,q}. The least-squares problem is solved simultaneously for poloidal and +toroidal flow. We use B-spline basis functions as our f j(z), comprising 11 knots spaced uniformly in acoustic radius, for both poloidal and toroidal coefficients. Hence, for M modes (total number of k for a given q is M) and 11 basis functionsforeachpoloidalandtoroidal,thedimensionsofK,UandBarethusM×22,22×1,andM×1respectively. -Normalizingbothsidesofeq10bythenoisecovarianceΛ(adiagonalmatrixwiththeentriesG ;seeeq9;dimension -k,q +Normalizingbothsidesofeq10bythenoisecovarianceΛ(adiagonalmatrixwiththeentriesG k,q;seeeq9;dimension M ×M) and pre-multiplying by K(cid:124), (K(cid:124) Λ−1K)U=(K(cid:124) Λ−1)B, (11) U=(K(cid:124) Λ−1K)−1K(cid:124) Λ−1B. (12) 6 Mani et al. -Figure 2. Left: Averagingkernelforpoloidalflow(seesectionB.2,eqB17,andleftpanelofFigure8)forqR =[−112,−45], +Figure 2. Left: Averagingkernelforpoloidalflow(seesectionB.2,eqB17,andleftpanelofFigure8)forqR +(cid:12) +=[−112,−45], +at the depth z +o += −0.41 Mm. Right: L-curve for the mode qR (cid:12) -at the depth z = −0.41 Mm. Right: L-curve for the mode qR = [−112,−45]; the knee (λ = 2.48) is marked by a blue -o (cid:12) += [−112,−45]; the knee (λ = 2.48) is marked by a blue diamond. +Since the least-squares problem is typically ill-posed, we restate the minimization as (cid:80) -Since the least-squares problem is typically ill-posed, we restate the minimization as ||KU−B|| +λ||U|| with -2 2 k +||KU−B|| 2+λ||U|| +2 +with the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this regularization makes the problem better conditioned and is now defined as U=(K(cid:124) Λ−1K+λI)−1K(cid:124) Λ−1B, (13) -where I is the identity matrix for L regularization. The knee-point of the L-curve (Hansen 1992), a curve formed +where I is the identity matrix for L 1 -by plotting ||U|| vs ||KU − B|| for different values of λ (see right panel of Figure 2), is usually chosen as the -2 2 +regularization. The knee-point of the L-curve (Hansen 1992), a curve formed +by plotting ||U|| +2 +vs ||KU − B|| +2 +for different values of λ (see right panel of Figure 2), is usually chosen as the regularization parameter. After successfully inverting for U, we reconstruct the flow using eq 3. Results for poloidal -flow P are shown in Figure 3. +flow P q +are shown in Figure 3. 4. LCT Toimproveconfidenceintheimagednear-surfaceflowsthroughmode-coupling,wecomparethemwithflowsobtained from Local Correlation Tracking method (LCT; November & Simon 1988). LCT provides surface-flow maps by -examining the advection of convective granules (1.2 Mm, qR ≈ 3500; Hathaway et al. 2015) by underlying larger- +examining the advection of convective granules (1.2 Mm, qR (cid:12) +≈ 3500; Hathaway et al. 2015) by underlying larger- scale flow systems. Since granules are used as tracers, which are much smaller in size than supergranules (≈35 Mm), LCT is an effective method (see Rieutord et al. 2001) to produce surface horizontal flow maps of supergranulation. Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2 (tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are ob- tained and Postel projected. The horizontal flows are deduced by tracking the proper motions of granules between -consecutive intensity images, which we denote as I ,I . The LCT method selects a patch in two images each -1 2 -(I = I 1e(x−xij)2/2sigma2,I = I 2e(x−xij)2/2sigma2) that observe the same granule at the grid point x = (x i,y j). -1 2 ij +consecutive intensity images, which we denote as I 1,I 2. The LCT method selects a patch in two images each +(I +1 += I 1e(x−xij)2/2sigma2,I +2 += I 2e(x−xij)2/2sigma2) that observe the same granule at the grid point x +ij += (x i,y j). A Gaussian of width sigma allows to isolate a small region surrounding the grid point of interest as the distance moved by granules are usually in sub-pixel regime. The convention for the direction of x is the same as described in -section 1.1. The two patches I ,I are then cross correlated for different values of position shifts ∆x, -1 2 -(cid:90) -C (∆x,∆y)= dxI∗(−x)I (∆x−x). (14) -ij 1 2 -The shift ∆x = (∆x,∆y) that maximizes the cross-correlation C is taken to be the proper motion of the granule. +section 1.1. The two patches I 1,I +2 +are then cross correlated for different values of position shifts ∆x, +C +ij(∆x,∆y)=(cid:90) +dxI∗ 1(−x)I 2(∆x−x). (14) +The shift ∆x = (∆x,∆y) that maximizes the cross-correlation C ij +is taken to be the proper motion of the granule. Provided that the time difference ∆t, here 45 seconds, between the images is less than the lifetime of granules (< 10 -min), the velocities are given by v = ∆x/∆t and v = ∆y/∆t. This exercise is repeated for all grid points in the -x y -images I ,I and for each consecutive pair of images in the cube. -1 2 -In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing v and v . FLCT -x y +min), the velocities are given by v +x += ∆x/∆t and v +y += ∆y/∆t. This exercise is repeated for all grid points in the +images I 1,I +2 +and for each consecutive pair of images in the cube. +In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing v +x +and v y. FLCT requires the input sigma, which we set to 4 pix, that captures the extent of localization desired, and depends on the Imaging near-surface flows using mode-coupling analysis 7 -Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p -p , and p -p as a function of q R and -1 1 2 2 x (cid:12) -q R . Bottom: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the -y (cid:12) +Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p 1-p 1, and p 2-p +2 +as a function of q xR +(cid:12) +and +q yR (cid:12). Bottom: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the mean. Total power appears to increase through the radial orders. Power is in units of m2/s4. dominant length scale of the velocity field in the images. The Postel-projected intensity images are fed as input to the -FLCT code. v and v are then computed for consecutive pairs of images and are averaged over the entire day. -x y +FLCT code. v +x +and v +y +are then computed for consecutive pairs of images and are averaged over the entire day. 5. MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY For mode-coupling, horizontal divergence (hereafter div) and radial vorticity (hereafter curl) are computed by substituting P and T from eq 3 into eq 2 as below - -uuu(q,z)=−∇2Pe +∇(∂ P)+∇ T×e , -z z h z -=−(0, 0, ∂2P +∂2P +∂2P)+(∂ ∂ P, ∂ ∂ P, ∂2P)+(∂ T, −∂ T, 0). (15) -x y z x z y z z y x -Setting ∂2+∂2 =q2, div is given by, -x y -∇ ·uuu(q,z)=q2∂ P, (16) -h z +uuu(q,z)=−∇2Pe z+∇(∂ zP)+∇ hT×e z, +=−(0, 0, ∂2 xP +∂2 yP +∂2 zP)+(∂ x∂ zP, ∂ y∂ zP, ∂2 zP)+(∂ yT, −∂ xT, 0). (15) +Setting ∂2 x+∂2 +y +=q2, div is given by, +∇ h·uuu(q,z)=q2∂ zP, (16) and curl is given by, -(cid:104) (cid:105) -∇×uuu(q,z) =q2T. (17) +(cid:104) ∇×uuu(q,z)(cid:105) z +=q2T. (17) We follow similar steps to those taken in Langfellner et al. (2015) for comparison of flow maps with LCT. The -essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR of +essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR (cid:12) +of interest (see Figure 4), and subsequently convert it to real space. -We seek to show comparisons (see Figures 5, 6, and 7) for qR = 100, 150, 200 and 250. To sufficiently delineate +We seek to show comparisons (see Figures 5, 6, and 7) for qR (cid:12) += 100, 150, 200 and 250. To sufficiently delineate flows at these length scales, we apply a Gaussian filter (see Figure 4) to flows obtained from eqns 16 and 17. The Gaussian is centered at the desired wavenumber with a half-width of 25. We then perform a 2D Fourier transform to obtain a real-space steady-flow map. 8 Mani et al. Figure 4. Left: Divergence-flow power spectrum |div|2, from eqn 16, obtained from inversion using all the couplings. The -power-spectrum is then filtered with a bandpass centered around qR =150 (middle panel). The resulting spectra is shown in +power-spectrum is then filtered with a bandpass centered around qR (cid:12) +=150 (middle panel). The resulting spectra is shown in the right panel. The units of |div|2 are in s−2. For illustration, we show the action of the filter on the power-spectrum |div|2 since it is a real quantity, but recall that it is the Fourier-space flow div (a complex quantity) on which we apply the filter. -For LCT, we first apply a Gaussian smoothing to v and v to average over small-scale features; the extent of -x y -smoothing depends on the length scale qR to be compared with mode-coupling. div and curl are then simply +For LCT, we first apply a Gaussian smoothing to v +x +and v +y +to average over small-scale features; the extent of +smoothing depends on the length scale qR (cid:12) +to be compared with mode-coupling. div and curl are then simply computed by -div =∂ v +∂ v , (18) -x x y y -curl=∂ v −∂ v . (19) -x y y x +div =∂ xv x+∂ yv y, (18) +curl=∂ xv y−∂ yv x. (19) We then perform a 2D Fourier transform on eqns 18 and 19, apply the same Gaussian filters as for mode-coupling, and transform back to real space. Condensing all of the above, the following sequence of operations to compare flows at desired length scales are performed for mode-coupling (M-C) and for LCT - -M-C: φ(x,y;t)=3 =D =F =F =T ⇒φω,B =i =nv =e =rs =io =n ⇒P,T =∇ ==h⇒· eqns 16, 17==F =ilt =e =r, ⇒div,curl -k k,q -∇× 2DFFT +M-C: φ(x,y;t) 3DFFT =====⇒φω k,B +k,q +inversion ======⇒P,T ∇h· ===⇒ +∇× +eqns 16, 17 Filter, =====⇒ +2DFFT +div,curl +LCT: I 1,I 2 +FLCT +====⇒v x,v y +smooth, +======⇒ +∇h· ∇× +eqns 18, 19 +2DFFT, +======⇒ +Filter Filtered, -FLCT smooth, 2DFFT, 2DFFT -LCT: I 1,I ====⇒v x,v ======⇒eqns 18, 19======⇒Fourier-space =====⇒div,curl -2 y -∇h· ∇× Filter +Fourier-space flows +2DFFT +=====⇒div,curl 6. RESULTS Table 2 summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure 5, where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from -the two methods near supergranular scale (qR ≈ 100). Near-surface flows are imaged most faithfully when all the +the two methods near supergranular scale (qR (cid:12) +≈ 100). Near-surface flows are imaged most faithfully when all the couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence flows (this is consistent with the results of Hathaway et al. 2015; Langfellner et al. 2015; Rincon et al. 2017). Due to -insufficientmodesforthep -p case(seeTable1),weareunabletoinfervorticalflowswithconvictionotherthannear -2 2 +insufficientmodesforthep 2-p +2 +case(seeTable1),weareunabletoinfervorticalflowswithconvictionotherthannear the supergranular scale, as can be seen from Table 2. Figure 6 also aligns with what we believe can be accomplished -throughmode-couplinghelioseismology-usingf-forp -p alonetoseismicallyinfernear-surfacedivergenceandvortical -1 1 -flows at different scales (qR = 100,150) can yield extremely good agreement with LCT. As the length scale of the +throughmode-couplinghelioseismology-usingf-forp 1-p 1alonetoseismicallyinfernear-surfacedivergenceandvortical +flows at different scales (qR (cid:12) += 100,150) can yield extremely good agreement with LCT. As the length scale of the inferred flow moves further away from that of supergranules (Figure 7), the demand on signal-to-noise also increases. An adequate number of modes (and coupling strength between higher radial-orders) thus becomes a necessity to comment substantively on the flows at these scales. @@ -427,15 +599,17 @@ Imaging near-surface flows using mode-coupling analysis 9 (a)qR(cid:12)=100,f-f+p1-p1 +p2-p2 Figure5. Real-spacedivergenceflows(leftcolumn,inunitsof10−5s−1)andradialvorticity(rightcolumn,inunitsof10−6s−1) for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass filtered around -qR =100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges +qR (cid:12) +=100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges outfromtheflowmapsandcompareacircularregionofdiameter≈175Mm. Theslopesofthebest-fitlinethroughthescatter plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps are saturated to show only 40% of the maximum values. For both LCT and mode-coupling divergence and vorticity maps, numerous factors, arising from the associated numerous data processing steps, can influence the final inference of flow amplitudes, making it difficult to put forward -a precise statement on them. H21 reported a 60% greater amplitude for p -p over f-f coupling (Figure 3 reflects a -1 1 +a precise statement on them. H21 reported a 60% greater amplitude for p 1-p +1 +over f-f coupling (Figure 3 reflects a similar conclusion), another element to consider when combining different radial orders. The choice of regularization (see right panel of Figure 2) has the potential to affect the amplitudes of the inverted flows to some degree. Flow amplitudes also vary with depth, implying that different radial orders and LCT will measure different flow averages. @@ -447,51 +621,62 @@ Thus, theamplitudesofthemode-couplingflows(andthecorrelationcoefficient)dependup • Smoothing applied to LCT flows (indirectly; see below paragraph), • The depth at which flows are inferred. Here, we report in Table 2 only the maximum correlation found from among the points in the radial grid close -to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR , we first fix the coupling(s) -(cid:12) +to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR (cid:12), we first fix the coupling(s) and the regularization parameter to be used in the inversion. We then separately compute filtered divergence and 10 Mani et al. (a)qR(cid:12)=100,f-f (b)qR(cid:12)=150,p1-p1 Figure6. Real-spacedivergenceflows(leftcolumn,inunitsof10−5s−1)andradialvorticity(rightcolumn,inunitsof10−6s−1) for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around -qR =100, and using (b) p -p coupling (bottom row), bandpass filtered around qR =150. We cut edges out from the flow -(cid:12) 1 1 (cid:12) +qR +(cid:12) +=100, and using (b) p 1-p +1 +coupling (bottom row), bandpass filtered around qR +(cid:12) +=150. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. (a)qR(cid:12)=200,f-f+p1-p1 +p2-p2 (b)qR(cid:12)=250,f-f+p1-p1 +p2-p2 Figure7. Real-spacedivergenceflows(leftcolumn,inunitsof10−5s−1)andradialvorticity(rightcolumn,inunitsof10−6s−1) for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass filtered around -(a)qR =200,and(b)qR =250. Wecutedgesoutfromtheflowmapsandcompareacircularregionofdiameter≈175Mm. -(cid:12) (cid:12) +(a)qR +(cid:12) +=200,and(b)qR +(cid:12) +=250. Wecutedgesoutfromtheflowmapsandcompareacircularregionofdiameter≈175Mm. vorticity maps for LCT for different values of smoothing. These flow maps are then compared with those obtained from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation (corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired -qR . -(cid:12) +qR (cid:12). Ithasbeenshown(seeDeRosa&Toomre2004;Langfellneretal.2015)thatline-of-sightvelocityfromDopplergrams and LCT agree closely in amplitudes. But, to recapitulate, a host of factors described above can skew the amplitudes fordivergenceflowsowingtothemulti-stepprocessinvolvedinobtainingthem. Forexample, therehasbeenahistory (see, e.g., De Rosa et al. 2000; Sekii et al. 2007; Zhao et al. 2007; Langfellner et al. 2018; B¨oning et al. 2020; Korda -& Sˇvanda 2021) of using travel-time difference as only a proxy for horizontal divergence. However, Langfellner et al. +& ˇ Svanda 2021) of using travel-time difference as only a proxy for horizontal divergence. However, Langfellner et al. Imaging near-surface flows using mode-coupling analysis 11 -Coupling qR div curl +Coupling qR (cid:12) +div curl f-f 100 0.97 0.87 -+ p -p 150 0.95 0.76 -1 1 -+ p -p 200 0.92 0.76 -2 2 ++ p 1-p +1 +150 0.95 0.76 ++ p 2-p +2 +200 0.92 0.76 250 0.85 0.65 f-f 100 0.96 0.85 150 0.93 0.76 200 0.89 0.69 250 0.77 0.58 -p -p 100 0.95 0.83 -1 1 +p 1-p +1 +100 0.95 0.83 150 0.95 0.75 200 0.92 0.75 250 0.85 0.61 -p -p 100 0.94 0.7 -2 2 +p 2-p +2 +100 0.94 0.7 150 0.91 0.39 200 0.79 0.3 250 0.55 0.3 @@ -510,105 +695,175 @@ applications to investigate other depth- and time-varying features such as giant Hanson et al. 2020), emerging active regions, meridional flows and Rossby waves. APPENDIX A. DERIVATION OF THE FORWARD MODEL -As described in section 1.1, we seek to describe the flow u as a function of q along e . To that end, substituting -z +As described in section 1.1, we seek to describe the flow u as a function of q along e z. To that end, substituting eq 3 into eq 2, -uσ(z)=(cid:88)(cid:8) q2f e +iqf(cid:48)(cid:9) Pσ +iq×e f Tσ. (A1) -q j z j jq z j jq +uσ q(z)=(cid:88) j +(cid:8) q2f je z+iqf(cid:48) j(cid:9) Pσ jq+iq×e zf jTσ jq. (A1) For flows in the anelastic limit (u (cid:28) speed of sound), we can denote the flow perturbation operator as δLσ = −2iωρuσ·∇ (see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get, -δLσ = −2iωρ(iuσ·k+uσ·e ∂ ), (A2) -q q q z z -=−2iωρ(cid:80)(cid:8) −k·qf(cid:48)Pσ −k·(q×e )f Tσ +q2f Pσ ∂ (cid:9) . (A3) -j jq z j jq j jq z +δLσ +q += −2iωρ(iuσ +q +·k+uσ +q +·e z∂ z), (A2) +=−2iωρ(cid:80) j +(cid:8) −k·qf(cid:48) jPσ jq−k·(q×e z)f jTσ jq+q2f jPσ jq∂ z(cid:9) . (A3) 12 Mani et al. Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006) -ξ ≡ξ (z)=ikˆH (z)e +zˆV (z), (A4) -k nk nk z nk +ξ +k +≡ξ nk(z)=iˆ kH nk(z)e z+ˆ zV nk(z), (A4) where H and V are real-valued functions; n and n(cid:48) are dropped for compactness of notation. Then the coupling of -two modes ξ and ξ (k(cid:48) = k+q), by the flow perturbation operator δLσ, denoted by coupling integral Λk (σ), is -k k(cid:48) q k(cid:48) +two modes ξ +k +and ξ +k(cid:48) +(k(cid:48) = k+q), by the flow perturbation operator δLσ q, denoted by coupling integral Λk k(cid:48)(σ), is given by -(cid:34) -Λk (σ)≡(cid:90) dx(δLσξ )·ξ∗ =(cid:90) dx −2iωρ(cid:88)(cid:110) q2f Pσ (kˆ·kˆ(cid:48) H(cid:48)H∗ +V(cid:48)V∗) -k(cid:48) q k k(cid:48) j jq k k(cid:48) k k(cid:48) +Λk k(cid:48)(σ)≡(cid:90) dx(δLσ qξ k)·ξ∗ +k(cid:48) +=(cid:90) +dx(cid:34) +−2iωρ(cid:88) j -(cid:35) -−(cid:2) k·qf(cid:48)Pσ +k·(q×e )f Tσ(cid:3) (kˆ·kˆ(cid:48) H H∗ +V V∗)(cid:111) (A5) -j jq z j jq k k(cid:48) k k(cid:48) +(cid:110) q2f jPσ jq(ˆ k·ˆ k(cid:48) H(cid:48) kH∗ +k(cid:48) ++V(cid:48) kV∗ k(cid:48)) +−(cid:2) k·qf(cid:48) jPσ jq+k·(q×e z)f jTσ jq(cid:3) (ˆ k·ˆ k(cid:48) H kH∗ +k(cid:48) ++V kV∗ +k(cid:48))(cid:111)(cid:35) +(A5) We desire to linearly relate the coupling integral in the above equation to the flows P and T, through poloidal and -toroidal sensitivity kernels, C and D respectively. Hence, they are given by -qj,k qj,k -C =(cid:90) dzρ(cid:104) q2f (kˆ·kˆ(cid:48) H(cid:48)H∗ +V(cid:48)V∗) -qj,k j k k(cid:48) k k(cid:48) -−k·qf(cid:48)(kˆ·kˆ(cid:48) H H∗ +V V∗)(cid:105) , -j k k(cid:48) k k(cid:48) +toroidal sensitivity kernels, C +qj,k +and D +qj,k +respectively. Hence, they are given by +C +qj,k +=(cid:90) dzρ(cid:104) q2f j(ˆ k·ˆ k(cid:48) H(cid:48) kH∗ +k(cid:48) ++V(cid:48) kV∗ k(cid:48)) +−k·qf(cid:48) j(ˆ k·ˆ k(cid:48) H kH∗ +k(cid:48) ++V kV∗ k(cid:48))(cid:105) , +D +qj,k +=k·(q×e z) (cid:90) -D =k·(q×e ) dzρf (kˆ·kˆ(cid:48) H H∗ +V V∗). (A6) -qj,k z j k k(cid:48) k k(cid:48) -Note the symmetry C = C and D = D . This coupling integral contributes to the cross-spectral -qj,k −qj,−k qj,k −qj,−k +dzρf j(ˆ k·ˆ k(cid:48) H kH∗ +k(cid:48) ++V kV∗ k(cid:48)). (A6) +Note the symmetry C +qj,k += C +−qj,−k +and D +qj,k += D −qj,−k. This coupling integral contributes to the cross-spectral measurement between modes k and k+q From eq 8 of Woodard (2014), we write the first-order effect of flow on wavefield cross-correlation as -(cid:104)φω∗φω+σ(cid:105)=Hω Λk (σ), (A7) -k k+q kk(cid:48)σ k(cid:48) +(cid:104)φω∗ +k +φω+σ k+q(cid:105)=Hω kk(cid:48)σΛk k(cid:48)(σ), (A7) where the function H is given by -Hω =−2iω(N |Rω|2Rω+σ+N |Rω+σ|2Rω∗). (A8) -kk(cid:48)σ k k k(cid:48) k(cid:48) k(cid:48) k +Hω +kk(cid:48)σ +=−2iω(N k|Rω k|2Rω+σ +k(cid:48) ++N k(cid:48)|Rω+σ +k(cid:48) +|2Rω∗ +k +). (A8) We absorb the factor −2iω into the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4. The mode spectral profile R is a Lorentzian, given by +Rω +k += 1 -Rω = , (A9) -k ω2 −ω2−iωγ /2 -nk nk -whereω istheresonantfrequencyofthemode,andγ isthemodelinewidth. EqA9canbederivedbyintroducing -nk nk +ω2 nk−ω2−iωγ +nk/2, (A9) +whereω +nk +istheresonantfrequencyofthemode,andγ +nk +isthemodelinewidth. EqA9canbederivedbyintroducing mode damping −iωγρ as an operator in the differential equation that governs undamped, driven oscillations (see eq 5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation. -Also, the parity Hω =H−ω∗ and Rω =R−ω∗ are established. Mode normalization N is given by -kk(cid:48)σ kk(cid:48)−σ k k -(cid:80) |φω k|2 -(cid:88)Q -1 -N = ω , (A10) -k Q (cid:80) Rω -k k +Also, the parity Hω +kk(cid:48)σ +=H−ω∗ +kk(cid:48)−σ +and Rω +k +=R−ω∗ +k +are established. Mode normalization N is given by +N +k += 1 +Q +Q +(cid:88) +k +(cid:80) +ω +|φω k|2 +(cid:80) ω +Rω +k +, (A10) +where the 1 +Q Q -where the 1 (cid:80) on the right-hand-side implies average over all [k ,k ] (Q terms in all) such that k = |k| is constant. -Q x y +(cid:80) k -This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ω . -nk +on the right-hand-side implies average over all [k x,k y] (Q terms in all) such that k = |k| is constant. +This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ω nk. Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real. The three equations A8 through A10, along with the symmetry relation for kernels, and summation over ±ω, serve -to establish the parity Bσ = B∗−σ . This allows for obtaining Pσ = P∗−σ, and subsequently, purely real flow in -k,q −k,−q q −q +to establish the parity Bσ +k,q += B∗−σ −k,−q. This allows for obtaining Pσ +q += P∗−σ +−q +, and subsequently, purely real flow in the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into -the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσ =G−σ . -k,q −k,−q +the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσ +k,q +=G−σ −k,−q. Imaging near-surface flows using mode-coupling analysis 13 B. SOLA INVERSIONS SubtractiveOptimallyLocalizedAverages(SOLA,Pijpers&Thompson1994)aimstoobtainasetofweightfactors -for the mode q and depth z , which we will call α . A linear weighted sum of the measurements B in the fashion -o k,zo k,q +for the mode q and depth z o, which we will call α k,zo. A linear weighted sum of the measurements B +k,q +in the fashion (cid:80) -α B allows for an average value of the flow P (z) to be estimated at the depth z . To obtain the coefficients -k,zo k,q q o k -α , it is assumed that a set of sensitivity kernels K (z) for the mode q can be summed up coherently to give an -k,zo k,q -’averaging kernel’ thatis localized atthe depth z . Conventionally, a Gaussiancenteredat z and awidth ∆is chosen -o o +α k,zoB +k,q +allows for an average value of the flow P q(z) to be estimated at the depth z o. To obtain the coefficients +α k,zo, it is assumed that a set of sensitivity kernels K k,q(z) for the mode q can be summed up coherently to give an +’averaging kernel’ thatis localized atthe depth z o. Conventionally, a Gaussiancenteredat z +o +and awidth ∆is chosen which the averaging kernel should resemble after performing inversion. B.1. Kernels in the integral form -Since the kernels in eq A6 are manifest as coefficients on a basis f (z), we first derive kernels that can be expressed -j +Since the kernels in eq A6 are manifest as coefficients on a basis f j(z), we first derive kernels that can be expressed as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions: -P ≡ P (z), p ≡ P , F ≡ f (z), B ≡ B C ≡ C and K ≡ K (z), we write (assume only poloidal flow for -q qj j k,q qj,k k,q +P ≡ P q(z), p ≡ P qj, F ≡ f j(z), B ≡ B +k,q +C ≡ C +qj,k +and K ≡ K k,q(z), we write (assume only poloidal flow for simplicity, the same derivations hold true for toroidal flow as well) P =Fp (B11) The size of P is thus the same as the length of the radial grid z. @@ -621,171 +876,227 @@ B =Cp =KP (B13) where K =(FTF)−1FTC, -(cid:88)(cid:104)(cid:90) (cid:105)−1 -i.e., K (z)= dzf (z)f (z) f (z)C (B14) -k,q j j(cid:48) j(cid:48) qj(cid:48),k +i.e., K +k,q(z)=(cid:88) j,j(cid:48) +(cid:104)(cid:90) +dzf j(z)f +j(cid:48)(z)(cid:105)−1 +f j(cid:48)(z)C +qj(cid:48),k +(B14) B.2. Obtaining the coefficients α Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at z o -1 (cid:16)z−z (cid:17) -T(z,z )= √ exp o . (B15) -o 2π∆2 2∆2 +T(z,z o)= +1 +√ +2π∆2 +exp(cid:16)z−z +o +2∆2 +(cid:17) +. (B15) This can be achieved by solving the optimization problem -(cid:90) (cid:104) (cid:105)2 -minimizeX = dz T(z,z )−Θ (z,z ) , (B16) -o q o +minimizeX +=(cid:90) +dz +(cid:104) +T(z,z o)−Θ q(z,z +o)(cid:105)2 +, (B16) where we introduce the averaging kernel for mode q thus -(cid:88) -Θ (z,z )= α K (z). (B17) -q o k,zo k,q +Θ q(z,z +o)=(cid:88) k +α k,zoK k,q(z). (B17) As an aside, we note that averaging kernels can similarly be constructed for RLS (see section 3.1) using eqns 13 and B14. 14 Mani et al. -Figure 8. Left: Kernel K (z) (eq B14) shown vs depth z for the three radial order couplings f-f, p -p , and p -p . qR = -k,q 1 1 2 2 (cid:12) -[−112,−45] and kR = [−853,−157] is chosen for all the radial order couplings for comparison. Right: Averaging kernel +Figure 8. Left: Kernel K k,q(z) (eq B14) shown vs depth z for the three radial order couplings f-f, p 1-p 1, and p 2-p 2. qR +(cid:12) += +[−112,−45] and kR (cid:12) -(eq B17) using SOLA, for qR = [−112,−45] at depth z = −0.48 Mm, and the corresponding target Gaussian (eq B15). -(cid:12) 0 += [−853,−157] is chosen for all the radial order couplings for comparison. Right: Averaging kernel +(eq B17) using SOLA, for qR +(cid:12) += [−112,−45] at depth z +0 += −0.48 Mm, and the corresponding target Gaussian (eq B15). Integral of the averaging kernel over z is 0.89. -Setting ∂X →0 gives us the matrix problem to be solved +Setting ∂X ∂α +→0 gives us the matrix problem to be solved A{α}=v, -(cid:104) (cid:105)−1 -{α}= A+µI v, (B18) -where the square matrix A=(cid:82) dzK (z)K (z) and v =(cid:82) dzK (z)T(z,z ). Here, k(cid:48) is just a dummy index for -k,q k(cid:48),q k,q o +{α}=(cid:104) A+µI(cid:105)−1 +v, (B18) +where the square matrix A=(cid:82) dzK k,q(z)K k(cid:48),q(z) and v =(cid:82) dzK k,q(z)T(z,z o). Here, k(cid:48) is just a dummy index for denotingelementsinthematrixA,(k(cid:48) (cid:54)=k+q). InthelastlineofeqB18,weintroduceregularizationusinganIdentity matrix I, with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α +obtained from eq B18 into last line of eq B13, and (cid:80) -obtained from eq B18 into last line of eq B13, and on both sides k +on both sides +(cid:88) +k +α k,zoBσ +k,q +=(cid:88) +k +α +k,zo (cid:90) -(cid:88) (cid:88) -α Bσ = α dzK (z)Pσ(z), -k,zo k,q k,zo k,q q -k k -(cid:90) -= dzΘ (z,z )Pσ(z), -q o q -≈(cid:104)Pσ(z )(cid:105) (B19) -q o +dzK k,q(z)Pσ q(z), +=(cid:90) +dzΘ q(z,z o)Pσ q(z), +≈(cid:104)Pσ q(z o)(cid:105) (B19) Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Di- vergence flow can then be obtained from eq 16. Results are shown in Figures 9 and 10. REFERENCES -Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M. Bo¨ning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., & -1990, ApJ, 364, 699, doi: 10.1086/169452 Schou, J. 2020, A&A, 635, A181, -Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of doi: 10.1051/0004-6361/201937331 +Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M. +1990, ApJ, 364, 699, doi: 10.1086/169452 +Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of Modern Physics, 64, 885, -Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189, doi: 10.1103/RevModPhys.64.885 -doi: 10.1086/324323 Birch, A. C., Schunker, H., Braun, D. C., et al. 2016, -Christensen-Dalsgaard, J. 2002, Reviews of Modern Science Advances, 2, e1600557, -Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073 doi: 10.1126/sciadv.1600557 -Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019, —. 2021, Living Reviews in Solar Physics, 18, 2, -A&A, 628, A37, doi: 10.1051/0004-6361/201935591 doi: 10.1007/s41116-020-00028-3 +Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019, +A&A, 628, A37, doi: 10.1051/0004-6361/201935591 +B¨ oning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., & +Schou, J. 2020, A&A, 635, A181, +doi: 10.1051/0004-6361/201937331 +Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189, +doi: 10.1086/324323 +Christensen-Dalsgaard, J. 2002, Reviews of Modern +Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073 +—. 2021, Living Reviews in Solar Physics, 18, 2, +doi: 10.1007/s41116-020-00028-3 Imaging near-surface flows using mode-coupling analysis 15 -Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of q R and q R . Right: Corresponding power-spectrum -x (cid:12) y (cid:12) +Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of q xR +(cid:12) +and q yR (cid:12). Right: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1−σ error around the mean. Power is in units of m2/s4. Figure 10. Real-space divergence flows (in units of 10−5s−1) for mode-coupling inversion through SOLA using f-f coupling, -andLCT,bandpassfilteredaroundqR =100. Wecutedgesoutfromtheflowmapsandcompareacircularregionofdiameter +andLCT,bandpassfilteredaroundqR (cid:12) +=100. Wecutedgesoutfromtheflowmapsandcompareacircularregionofdiameter ≈175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is 1.05. For demonstration, we show inversions only for poloidal flow using SOLA. -De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh, Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A, -192, 351, doi: 10.1023/A:1005269001739 652, L6, doi: 10.1051/0004-6361/202141462 -De Rosa, M. L., & Toomre, J. 2004, ApJ, 616, 1242, Greer, B. J., Hindman, B. W., & Toomre, J. 2016, ApJ, -doi: 10.1086/424920 824, 128, doi: 10.3847/0004-637X/824/2/128 -Duvall,T.L.,J.,&Harvey,J.W.1986,inNATOAdvanced Hanasoge, S., & Mandal, K. 2019, ApJL, 871, L32, +De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh, +192, 351, doi: 10.1023/A:1005269001739 +De Rosa, M. L., & Toomre, J. 2004, ApJ, 616, 1242, +doi: 10.1086/424920 +Duvall,T.L.,J.,&Harvey,J.W.1986,inNATOAdvanced Study Institute (ASI) Series C, Vol. 169, Seismology of -doi: 10.3847/2041-8213/aaff60 the Sun and the Distant Stars, ed. D. O. Gough, 105–116 -Hanasoge, S. M., Hotta, H., & Sreenivasan, K. R. 2020, Duvall, T. L., J., Jefferies, S. M., Harvey, J. W., & -Science Advances, 6, eaba9639, Pomerantz, M. A. 1993, Nature, 362, 430, -doi: 10.1126/sciadv.aba9639 doi: 10.1038/362430a0 -Hanasoge, S. M., Woodard, M., Antia, H. M., Gizon, L., & Fisher, G. H., & Welsch, B. T. 2008, in Astronomical -Sreenivasan, K. R. 2017, MNRAS, 470, 1404, Society of the Pacific Conference Series, Vol. 383, -doi: 10.1093/mnras/stx1298 Subsurface and Atmospheric Influences on Solar Activity, -Hansen, P. C. 1992, SIAM review, 34, 561 ed. R. Howe, R. W. Komm, K. S. Balasubramaniam, & -Hanson, C. S., Duvall, T. L., Birch, A. C., Gizon, L., & G. J. D. Petrie, 373. https://arxiv.org/abs/0712.4289 -Sreenivasan, K. R. 2020, A&A, 644, A103, Giles, P. M., Duvall, T. L., Scherrer, P. H., & Bogart, R. S. -doi: 10.1051/0004-6361/202039108 1997, Nature, 390, 52, doi: 10.1038/36294 -Gizon, L., & Birch, A. C. 2004, ApJ, 614, 472, Hanson, C. S., Hanasoge, S., & Sreenivasan, K. R. 2021, -doi: 10.1086/423367 ApJ, 910, 156, doi: 10.3847/1538-4357/abe770 -Gizon, L., Cameron, R. H., Pourabdian, M., et al. 2020, Hathaway, D. H., Teil, T., Norton, A. A., & Kitiashvili, I. -Science, 368, 1469, doi: 10.1126/science.aaz7119 2015, ApJ, 811, 105, doi: 10.1088/0004-637X/811/2/105 +Gizon, L., & Birch, A. C. 2004, ApJ, 614, 472, +doi: 10.1086/423367 +Gizon, L., Cameron, R. H., Pourabdian, M., et al. 2020, +Science, 368, 1469, doi: 10.1126/science.aaz7119 +Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A, +652, L6, doi: 10.1051/0004-6361/202141462 +Greer, B. J., Hindman, B. W., & Toomre, J. 2016, ApJ, +824, 128, doi: 10.3847/0004-637X/824/2/128 +Hanasoge, S., & Mandal, K. 2019, ApJL, 871, L32, +doi: 10.3847/2041-8213/aaff60 +Hanasoge, S. M., Hotta, H., & Sreenivasan, K. R. 2020, +Science Advances, 6, eaba9639, +doi: 10.1126/sciadv.aba9639 +Hanasoge, S. M., Woodard, M., Antia, H. M., Gizon, L., & +Sreenivasan, K. R. 2017, MNRAS, 470, 1404, +doi: 10.1093/mnras/stx1298 +Hansen, P. C. 1992, SIAM review, 34, 561 +Hanson, C. S., Duvall, T. L., Birch, A. C., Gizon, L., & +Sreenivasan, K. R. 2020, A&A, 644, A103, +doi: 10.1051/0004-6361/202039108 +Hanson, C. S., Hanasoge, S., & Sreenivasan, K. R. 2021, +ApJ, 910, 156, doi: 10.3847/1538-4357/abe770 +Hathaway, D. H., Teil, T., Norton, A. A., & Kitiashvili, I. +2015, ApJ, 811, 105, doi: 10.1088/0004-637X/811/2/105 16 Mani et al. -Hathaway,D.H.,Upton,L.,&Colegrove,O.2013,Science, Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar -342, 1217, doi: 10.1126/science.1244682 Physics, 15, 6, doi: 10.1007/s41116-018-0013-5 -Hill, F. 1988, ApJ, 333, 996, doi: 10.1086/166807 Rincon, F., Roudier, T., Schekochihin, A. A., & Rieutord, -M. 2017, A&A, 599, A69, +Hathaway,D.H.,Upton,L.,&Colegrove,O.2013,Science, +342, 1217, doi: 10.1126/science.1244682 +Hill, F. 1988, ApJ, 333, 996, doi: 10.1086/166807 Kashyap, S. G., Das, S. B., Hanasoge, S. M., Woodard, -doi: 10.1051/0004-6361/201629747 M. F., & Tromp, J. 2021, ApJS, 253, 47, -Schad, A., & Roth, M. 2020, ApJ, 890, 32, doi: 10.3847/1538-4365/abdf5e +Korda, D., & ˇ Svanda, M. 2021, A&A, 646, A184, +doi: 10.1051/0004-6361/202039928 +Langfellner, J., Birch, A. C., & Gizon, L. 2018, A&A, 617, +A97, doi: 10.1051/0004-6361/201732471 +Langfellner, J., Gizon, L., & Birch, A. C. 2015, A&A, 581, +A67, doi: 10.1051/0004-6361/201526024 +Lavely, E. M., & Ritzwoller, M. H. 1992, Philosophical +Transactions of the Royal Society of London Series A, +339, 431, doi: 10.1098/rsta.1992.0048 +Lindsey, C., & Braun, D. C. 2000, SoPh, 192, 261, +doi: 10.1023/A:1005227200911 +L¨ optien, B., Birch, A. C., Duvall, T. L., Gizon, L., & +Schou, J. 2016, A&A, 587, A9, +doi: 10.1051/0004-6361/201526805 +L¨ optien, B., Gizon, L., Birch, A. C., et al. 2018, Nature +Astronomy, 2, 568, doi: 10.1038/s41550-018-0460-x +Mandal, K., & Hanasoge, S. 2020, ApJ, 891, 125, +doi: 10.3847/1538-4357/ab7227 +Mandal, K., Hanasoge, S. M., & Gizon, L. 2021, A&A, 652, +A96, doi: 10.1051/0004-6361/202141044 +Mani, P., & Hanasoge, S. 2020, ApJ, 901, 139, +doi: 10.3847/1538-4357/abb133 +—. 2021, ApJ, 920, 36, doi: 10.3847/1538-4357/ac1ad6 +November, L. J., & Simon, G. W. 1988, ApJ, 333, 427, +doi: 10.1086/166758 +Pijpers, F. P., & Thompson, M. J. 1994, A&A, 281, 231 +Rieutord, M., Roudier, T., Ludwig, H. G., Nordlund, ˚A., & +Stein, R. 2001, A&A, 377, L14, +doi: 10.1051/0004-6361:20011160 +Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar +Physics, 15, 6, doi: 10.1007/s41116-018-0013-5 +Rincon, F., Roudier, T., Schekochihin, A. A., & Rieutord, +M. 2017, A&A, 599, A69, +doi: 10.1051/0004-6361/201629747 +Schad, A., & Roth, M. 2020, ApJ, 890, 32, doi: 10.3847/1538-4357/ab65ec -Korda, D., & Sˇvanda, M. 2021, A&A, 646, A184, Scherrer, P. H., Schou, J., Bush, R. I., et al. 2012, SoPh, -doi: 10.1051/0004-6361/202039928 275, 207, doi: 10.1007/s11207-011-9834-2 -Langfellner, J., Birch, A. C., & Gizon, L. 2018, A&A, 617, Schou, J., Antia, H. M., Basu, S., et al. 1998, ApJ, 505, -A97, doi: 10.1051/0004-6361/201732471 390, doi: 10.1086/306146 -Langfellner, J., Gizon, L., & Birch, A. C. 2015, A&A, 581, Sekii, T. 1997, in Sounding Solar and Stellar Interiors, ed. -A67, doi: 10.1051/0004-6361/201526024 J. Provost & F.-X. Schmider, Vol. 181, ISBN0792348389 +Schou, J., Antia, H. M., Basu, S., et al. 1998, ApJ, 505, +390, doi: 10.1086/306146 +Sekii, T. 1997, in Sounding Solar and Stellar Interiors, ed. +J. Provost & F.-X. Schmider, Vol. 181, ISBN0792348389 Sekii, T., Kosovichev, A. G., Zhao, J., et al. 2007, PASJ, -Lavely, E. M., & Ritzwoller, M. H. 1992, Philosophical 59, S637, doi: 10.1093/pasj/59.sp3.S637 -Transactions of the Royal Society of London Series A, Snodgrass, H. B. 1984, SoPh, 94, 13, -339, 431, doi: 10.1098/rsta.1992.0048 doi: 10.1007/BF00154804 -Lindsey, C., & Braun, D. C. 2000, SoPh, 192, 261, Thompson, M. J., Toomre, J., Anderson, E. R., et al. 1996, -doi: 10.1023/A:1005227200911 Science, 272, 1300, doi: 10.1126/science.272.5266.1300 -Lo¨ptien, B., Birch, A. C., Duvall, T. L., Gizon, L., & Unno, W., Osaki, Y., Ando, H., Saio, H., & Shibahashi, H. -Schou, J. 2016, A&A, 587, A9, 1989, Nonradial oscillations of stars -doi: 10.1051/0004-6361/201526805 Verma, M., Steffen, M., & Denker, C. 2013, A&A, 555, -Lo¨ptien, B., Gizon, L., Birch, A. C., et al. 2018, Nature A136, doi: 10.1051/0004-6361/201321628 -Astronomy, 2, 568, doi: 10.1038/s41550-018-0460-x Vorontsov, S. V. 2011, MNRAS, 418, 1146, -Mandal, K., & Hanasoge, S. 2020, ApJ, 891, 125, doi: 10.1111/j.1365-2966.2011.19564.x -doi: 10.3847/1538-4357/ab7227 Woodard, M. 2014, SoPh, 289, 1085, +Verma, M., Steffen, M., & Denker, C. 2013, A&A, 555, +A136, doi: 10.1051/0004-6361/201321628 +Vorontsov, S. V. 2011, MNRAS, 418, 1146, +doi: 10.1111/j.1365-2966.2011.19564.x +Woodard, M. 2014, SoPh, 289, 1085, doi: 10.1007/s11207-013-0386-5 -Mandal, K., Hanasoge, S. M., & Gizon, L. 2021, A&A, 652, Woodard, M., Schou, J., Birch, A. C., & Larson, T. P. -A96, doi: 10.1051/0004-6361/202141044 2013, SoPh, 287, 129, doi: 10.1007/s11207-012-0075-9 -Mani, P., & Hanasoge, S. 2020, ApJ, 901, 139, Woodard, M. F. 1989, ApJ, 347, 1176, doi: 10.1086/168206 -doi: 10.3847/1538-4357/abb133 —. 2006, ApJ, 649, 1140, doi: 10.1086/506927 -—. 2021, ApJ, 920, 36, doi: 10.3847/1538-4357/ac1ad6 —. 2007, ApJ, 668, 1189, doi: 10.1086/521391 -November, L. J., & Simon, G. W. 1988, ApJ, 333, 427, —. 2016, MNRAS, 460, 3292, doi: 10.1093/mnras/stw1223 -doi: 10.1086/166758 Zhao, J., Georgobiani, D., Kosovichev, A. G., et al. 2007, -Pijpers, F. P., & Thompson, M. J. 1994, A&A, 281, 231 ApJ, 659, 848, doi: 10.1086/512009 -Rieutord, M., Roudier, T., Ludwig, H. G., Nordlund, ˚A., & Zhao, J., Nagashima, K., Bogart, R. S., Kosovichev, A. G., +—. 2016, MNRAS, 460, 3292, doi: 10.1093/mnras/stw1223 +Zhao, J., Georgobiani, D., Kosovichev, A. G., et al. 2007, +ApJ, 659, 848, doi: 10.1086/512009 +Zhao, J., Nagashima, K., Bogart, R. S., Kosovichev, A. G., & Duvall, T. L., J. 2012, ApJL, 749, L5, -Stein, R. 2001, A&A, 377, L14, doi: 10.1088/2041-8205/749/1/L5 -doi: 10.1051/0004-6361:20011160 diff --git a/read/results/pdfplumber/2201.00200.txt b/read/results/pdfplumber/2201.00200.txt index 9782285..483c0fb 100644 --- a/read/results/pdfplumber/2201.00200.txt +++ b/read/results/pdfplumber/2201.00200.txt @@ -4,92 +4,134 @@ Local heating due to convective overshooting and the solar modelling problem I.Baraffe1,2,T.Constantino1,J.Clarke1,A.LeSaux1,2,T.Goffrey4,T.Guillet1,J.Pratt3,D.G.Vlaykov1 1 UniversityofExeter,PhysicsandAstronomy,EX44QLExeter,UK(e-mail:i.baraffe@ex.ac.uk) -2 E´coleNormaleSupe´rieure,Lyon,CRAL(UMRCNRS5574),Universite´deLyon,France +2 ´ EcoleNormaleSup´ erieure,Lyon,CRAL(UMRCNRS5574),Universit´ edeLyon,France 3 DepartmentofPhysicsandAstronomy,GeorgiaStateUniversity,AtlantaGA30303,USA 4 CentreforFusion,SpaceandAstrophysics,DepartmentofPhysics,UniversityofWarwick,Coventry,CV47AL,UK ABSTRACT -2202 Recent hydrodynamical simulations of convection in a solar-like model suggest that penetrative convective flows at the boundary oftheconvectiveenvelopemodifythethermalbackgroundintheovershootinglayer.Basedontheseresults,weimplementinone- dimensional stellar evolution codes a simple prescription to modify the temperature gradient below the convective boundary of a solar model. This simple prescription qualitatively reproduces the behaviour found in the hydrodynamical simulations, namely a -naJ localheatingandsmoothingofthetemperaturegradientbelowtheconvectiveboundary.Weshowthatintroducinglocalheatingin theovershootinglayercanreducethesound-speeddiscrepancyusuallyreportedbetweensolarmodelsandthestructureoftheSun inferred from helioseismology. It also affects key quantities in the convective envelope, such as the density, the entropy, and the -speedofsound.Theseeffectscouldhelpreducethediscrepanciesbetweensolarmodelsandobservedconstraintsbasedonseismic 1 +speedofsound.Theseeffectscouldhelpreducethediscrepanciesbetweensolarmodelsandobservedconstraintsbasedonseismic inversions of the Ledoux discriminant. Since mixing due to overshooting and local heating are the result of the same convective -]RS.hp-ortsa[ penetrationprocess,thegoalofthisworkistoinvitesolarmodellerstoconsiderbothprocessesforamoreconsistentapproach. Keywords.Convection–Hydrodynamics–Stars:evolution–Sun:evolution-helioseismology-interior -1. Introduction baseoftheconvectivezone,Christensen-Dalsgaardetal.(2011) -found that models that better fit the helioseismic data have a +1. Introduction Modelling the internal structure of the Sun is still a challenge. -weakly sub-adiabatic temperature gradient in the lower part of A recent review by Christensen-Dalsgaard (2021) describes in -theconvectivezoneandasmoothtransitiontotheradiativegra- detailthelong-standingeffortstoimprovesolarmodels.Theso- -dientintheovershootinglayer.ButChristensen-Dalsgaardetal. -lar modelling problem refers to the discrepancy between helio- (2011)notedthattherequiredtemperaturestratificationisdiffi- -seismology and solar interior models that adopt low metallici- culttoreconcilewithexistingovershootingmodelsandnumer- 1v00200.1022:viXra +lar modelling problem refers to the discrepancy between helio- +seismology and solar interior models that adopt low metallici- tiespredictedbythethree-dimensional(3D)atmospheremodels -ical simulations. They concluded that only non-local turbulent of,forexample,Asplundetal.(2009)andCaffauetal.(2011), -convectionmodelscouldproducethedesireddegreeofsmooth- in contrast to the high metallicities based on previous litera- -nessinthetransition(seeforexampleZhang&Li2012;Zhang ture compilations by, for example, Anders & Grevesse (1989) -et al. 2012). But these non-local models remain uncertain, and and Grevesse & Noels (1993). Asplund et al. (2021) have re- -their description of overshooting under the conditions found at cently confirmed with state-of-the-art 3D simulations the rela- -the base of the solar convective zone is yet to be validated. tively low metal abundances for the Sun. Asplund et al. (2021) -Zhang et al. (2019) explored the impact of overshooting by consider that their study yields the most reliable solar abun- -introducing a parametrised turbulent kinetic energy flux based dancesavailabletoday,suggestingthatthesolarmodellingprob- -on a model with parameters that are adjusted to improve the lemisnolongeraproblemofabundancesbutratheraproblem -helioseismic properties. They suggest that amelioration can be ofstellarphysics.Thetreatmentofmixingbelowtheconvective -obtained specifically below the convective envelope. However, zoneisoneofthekeyprocessesthatcouldimprovesolarmod- -Zhangetal.(2019)findthatthismodelcannotsolvethewhole els.Severalstudiesindeedrevealthattheprocessofconvective -solarproblembecausesuchafluxworsensthesound-speedpro- penetration, also called overshooting, at the bottom of the con- -fileinthedeepradiativeinterioroftheirsolarmodel.Giventhe vectiveenvelopecouldplayanimportantroleinimprovingthe -uncertaintiesregardingthetemperaturestratificationoftheover- -agreement between solar models and helioseismic constraints shootingregion,solarmodellershaveconsideredtheseeffectsas -(seeforexampleChristensen-Dalsgaardetal.2011;Zhangetal. secondaryandhavefocusedtheireffortsonexploringtheimpact +agreement between solar models and helioseismic constraints +(seeforexampleChristensen-Dalsgaardetal.2011;Zhangetal. 2012; Buldgen et al. 2019b). Overshooting in solar models has -ofsolarabundances,microphysics(opacities,equationsofstate, mostoftenbeentreatedusingdiffusiveorinstantaneouschemi- -nuclear reaction rates), and chemical mixing and diffusion (see calmixing.Atemperaturegradientthatsharplytransitionsfrom -details and references in the review of Buldgen et al. 2019a). -a nearly adiabatic form to a radiative form is usually assumed, Additional, more exotic effects such as early disk accretion or +a nearly adiabatic form to a radiative form is usually assumed, as suggested by the theoretical work of Zahn (1991). Models -solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot with a smoother transition have also been investigated. Based -2021)arealsoattractingincreasingattention. on the analysis of models with different stratifications near the +Sendoffprintrequeststo:I.Baraffe +baseoftheconvectivezone,Christensen-Dalsgaardetal.(2011) +found that models that better fit the helioseismic data have a +weakly sub-adiabatic temperature gradient in the lower part of +theconvectivezoneandasmoothtransitiontotheradiativegra- +dientintheovershootinglayer.ButChristensen-Dalsgaardetal. +(2011)notedthattherequiredtemperaturestratificationisdiffi- +culttoreconcilewithexistingovershootingmodelsandnumer- +ical simulations. They concluded that only non-local turbulent +convectionmodelscouldproducethedesireddegreeofsmooth- +nessinthetransition(seeforexampleZhang&Li2012;Zhang +et al. 2012). But these non-local models remain uncertain, and +their description of overshooting under the conditions found at +the base of the solar convective zone is yet to be validated. +Zhang et al. (2019) explored the impact of overshooting by +introducing a parametrised turbulent kinetic energy flux based +on a model with parameters that are adjusted to improve the +helioseismic properties. They suggest that amelioration can be +obtained specifically below the convective envelope. However, +Zhangetal.(2019)findthatthismodelcannotsolvethewhole +solarproblembecausesuchafluxworsensthesound-speedpro- +fileinthedeepradiativeinterioroftheirsolarmodel.Giventhe +uncertaintiesregardingthetemperaturestratificationoftheover- +shootingregion,solarmodellershaveconsideredtheseeffectsas +secondaryandhavefocusedtheireffortsonexploringtheimpact +ofsolarabundances,microphysics(opacities,equationsofstate, +nuclear reaction rates), and chemical mixing and diffusion (see +details and references in the review of Buldgen et al. 2019a). +Additional, more exotic effects such as early disk accretion or +solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot +2021)arealsoattractingincreasingattention. To reinvigorate the debate, Buldgen et al. (2019b) recently -Sendoffprintrequeststo:I.Baraffe highlightedonceagainhowthetransitionofthetemperaturegra- +highlightedonceagainhowthetransitionofthetemperaturegra- +1 +a +r +X i +v +: +2 +2 +0 +1 +. +0 +0 +2 +0 +0 +v +1 +[ +a +s +t +r +o +- +p +h +. +S +R +] 1 +J +a +n +2 +0 +2 +2 Baraffeetal.:Localheatingduetoconvectiveovershootingandthesolarmodellingproblem -dientjustbelowtheconvectiveenvelopecansignificantlyimpact tionprocesstothelocalheatingandtotheradiativebumpinthe -the disagreement between solar models and helioseismic con- overshootinglayer.Thesolar-likestarsimulatedinB21isbased -straints. Their results, based on a method that combines multi- on a model that is not thermally relaxed. It is reasonable to as- -ple structural inversions, suggest that the transition in temper- sumethatthelocalheatingseeninB21ispresentinstarsbecause -ature gradient is improperly reproduced by adopting either an thenegativeheatfluxintheovershootinglayerandthebumpin -adiabatic or a radiative temperature gradient in the overshoot- theradiativefluxthatcompensatesforthisfeaturearepersistent. -ing layer. The solution should be somewhere in between these Thesetwofeaturesarealsocommonlyobservedinotherhydro- -twoextremes.Christensen-Dalsgaardetal.(2018)alsonotethat dynamical simulations, as mentioned above. An exploration of -an increase in the temperature at the transition would remove theimpactofthisheatingonstellarevolutionmodelsmayreveal -aremainingsmallsharpdipinthespeedofsoundimmediately thatheatingisanecessaryaspectofmodelsforovershooting. +dientjustbelowtheconvectiveenvelopecansignificantlyimpact +the disagreement between solar models and helioseismic con- +straints. Their results, based on a method that combines multi- +ple structural inversions, suggest that the transition in temper- +ature gradient is improperly reproduced by adopting either an +adiabatic or a radiative temperature gradient in the overshoot- +ing layer. The solution should be somewhere in between these +twoextremes.Christensen-Dalsgaardetal.(2018)alsonotethat +an increase in the temperature at the transition would remove +aremainingsmallsharpdipinthespeedofsoundimmediately beneaththeconvectivezoneofthemodel.Amajordifficultyis todisentangletheeffectsofovershootfromtheeffectsofopaci- ties,whichcanalsoalterthetemperaturegradientintheselayers. @@ -118,142 +160,221 @@ implicitsimulationsofconvectionandconvectivepenetrationin a solar-like model with the MUlti-dimensional Stellar Implicit Code MUSIC (Viallet et al. 2011, 2016; Goffrey et al. 2017). The main motivation was to explore the impact of an artificial -increaseinthestellarluminosityonthepropertiesofconvection Fig.1. Radial profile of the temperature departure ∆T/T 0 from -and convective penetration. This procedure is a common tactic theinitialprofileT 0andofthesub-adiabaticity(∇−∇ ad)closeto -adopted in hydrodynamical simulations of convection (Rogers theconvectiveboundarypredictedby2Dhydrodynamicalsimu- -etal.2006;Meakin&Arnett2007;Brunetal.2011;Hotta2017; lations(B21)ofsolar-likemodels.Thelowerpanelcorresponds -Edelmannetal.2019).TheexperimentsofB21highlighttheim- to the model with a realistic stellar luminosity and the upper -pactofpenetrativedownflowsonthelocalthermalbackground panel to a model with luminosity enhanced by a factor of ten. -intheovershootinglayer.Theyillustratehowconvectivedown- The dash-dotted red lines show ∆T/T 0 (in %), the relative dif- -flows,whenpenetratingtheregionbelowtheconvectivebound- ferencebetweenthetimeandspaceaveragesofthetemperature, -aryoftheenvelope,caninducealocalheatingandamodification T,andtheinitialtemperature,T 0.Thesolidbluelinesshowthe -ofthetemperaturegradientasaresultofcompressionandshear time and space averages of the sub-adiabaticity (∇−∇ ad). The -in the overshooting layer. This modification of the local back- dashedblacklinesshowtheinitialprofileofthesub-adiabaticity, -ground is connected to a local increase in the radiative flux to (∇−∇ ad) init.Theconvectiveboundaryisindicatedbythevertical -counterbalancethenegativeenthalpyflux(orheatflux)produced solidline(seedetailsinB21) +increaseinthestellarluminosityonthepropertiesofconvection +and convective penetration. This procedure is a common tactic +adopted in hydrodynamical simulations of convection (Rogers +etal.2006;Meakin&Arnett2007;Brunetal.2011;Hotta2017; +Edelmannetal.2019).TheexperimentsofB21highlighttheim- +pactofpenetrativedownflowsonthelocalthermalbackground +intheovershootinglayer.Theyillustratehowconvectivedown- +flows,whenpenetratingtheregionbelowtheconvectivebound- +aryoftheenvelope,caninducealocalheatingandamodification +ofthetemperaturegradientasaresultofcompressionandshear +in the overshooting layer. This modification of the local back- +ground is connected to a local increase in the radiative flux to +counterbalancethenegativeenthalpyflux(orheatflux)produced by penetrating flows. The negative peak of the enthalpy flux andthepositivebumpoftheradiativefluxbelowtheconvective -boundary are well-known features described in many numeri- The behaviour of the thermal profile below the convective -calworks(Hurlburtetal.1986;Muthsametal.1995;Brummell boundary found in the simulations of B21 is illustrated in Fig. -et al. 2002; Brun et al. 2011; Hotta 2017; Ka¨pyla¨ 2019; Cai 1.Itisdisplayedforthemodelwitharealisticstellarluminosity -2020).Afewworks(Rogersetal.2006;Vialletetal.2013;Korre (lowerpanel).Wealsoshowtheresultsforamodelwithanartifi- -et al. 2019; Higl et al. 2021) have also reported a modification cialenhancementintheluminositybyafactoroftenbecausethe -ofthelocalthermalbackgroundintheovershootingregion,but featuresareintensifiedinthese‘boosted’models(upperpanel). -withoutprovidingadetaileddescription.ThesimulationsofB21 Thefigureshowsthelocalheatingintheovershootinglayerand -provideaphysicalexplanationthatlinkstheconvectivepenetra- itsimpactonthesub-adiabaticity(∇−∇ ),with∇ = dlogT the -ad dlogP +boundary are well-known features described in many numeri- +calworks(Hurlburtetal.1986;Muthsametal.1995;Brummell +et al. 2002; Brun et al. 2011; Hotta 2017; K¨ apyl¨ a 2019; Cai +2020).Afewworks(Rogersetal.2006;Vialletetal.2013;Korre +et al. 2019; Higl et al. 2021) have also reported a modification +ofthelocalthermalbackgroundintheovershootingregion,but +withoutprovidingadetaileddescription.ThesimulationsofB21 +provideaphysicalexplanationthatlinkstheconvectivepenetra- +tionprocesstothelocalheatingandtotheradiativebumpinthe +overshootinglayer.Thesolar-likestarsimulatedinB21isbased +on a model that is not thermally relaxed. It is reasonable to as- +sumethatthelocalheatingseeninB21ispresentinstarsbecause +thenegativeheatfluxintheovershootinglayerandthebumpin +theradiativefluxthatcompensatesforthisfeaturearepersistent. +Thesetwofeaturesarealsocommonlyobservedinotherhydro- +dynamical simulations, as mentioned above. An exploration of +theimpactofthisheatingonstellarevolutionmodelsmayreveal +thatheatingisanecessaryaspectofmodelsforovershooting. +Fig.1. Radial profile of the temperature departure ∆T/T 0 from +theinitialprofileT 0andofthesub-adiabaticity(∇−∇ ad)closeto +theconvectiveboundarypredictedby2Dhydrodynamicalsimu- +lations(B21)ofsolar-likemodels.Thelowerpanelcorresponds +to the model with a realistic stellar luminosity and the upper +panel to a model with luminosity enhanced by a factor of ten. +The dash-dotted red lines show ∆T/T 0 (in %), the relative dif- +ferencebetweenthetimeandspaceaveragesofthetemperature, +T,andtheinitialtemperature,T 0.Thesolidbluelinesshowthe +time and space averages of the sub-adiabaticity (∇−∇ ad). The +dashedblacklinesshowtheinitialprofileofthesub-adiabaticity, +(∇−∇ ad) init.Theconvectiveboundaryisindicatedbythevertical +solidline(seedetailsinB21) +The behaviour of the thermal profile below the convective +boundary found in the simulations of B21 is illustrated in Fig. +1.Itisdisplayedforthemodelwitharealisticstellarluminosity +(lowerpanel).Wealsoshowtheresultsforamodelwithanartifi- +cialenhancementintheluminositybyafactoroftenbecausethe +featuresareintensifiedinthese‘boosted’models(upperpanel). +Thefigureshowsthelocalheatingintheovershootinglayerand +itsimpactonthesub-adiabaticity(∇−∇ ad),with∇ = dlogT +dlogP +the 2 Baraffeetal.:Localheatingduetoconvectiveovershootingandthesolarmodellingproblem -temperature gradient and ∇ = dlogT| the adiabatic gradient. entropydiscrepancy(S −S )/S hastwopositivepeaksin -ad dlogP S Sun ref ref -Theinitialstratificationbelowtheconvectiveboundary(located theradiativezone,onejustbelowtheovershootingregionanda -at r = 0.6734 × R for this specific stellar model) is set by larger peak deeper at ∼ 40% of the stellar radius. This discrep- +temperature gradient and ∇ +ad += dlogT dlogP| +S +the adiabatic gradient. +Theinitialstratificationbelowtheconvectiveboundary(located +at r = 0.6734 × R star -thestableradiativegradient,∇ (seethedashedblacklinebe- ancyisnegativeintheconvectivezone.Thecorrectionsapplied +for this specific stellar model) is set by +thestableradiativegradient,∇ rad -lowtheconvectiveboundaryinFig.1).B21showthat,asare- toAhelpreducetheseentropydiscrepanciesinbothregions. -sultofthelocalheatingbelowtheconvectiveboundarycharac- The fourth concerns the density. The quantity (ρ Sun − -terisedbythebumpintemperaturedifference∆T/T 0 displayed ρ ref)/ρ ref has a negative peak in the radiative region, at ∼ 35% -in Fig. 1, the temperature gradient becomes less sub-adiabatic ofthestellarradius,andispositiveintheconvectivezone. -immediately below the convective boundary1. The net result is Importantly,Buldgenetal.(2020)mentionthattheirrecon- -a smoother transition just below the convective boundary with struction procedure gives similar Ledoux discriminant profiles -a temperature gradient that has an intermediate value between for a wide range of initial reference models. We used these re- -the radiative temperature gradient and the adiabatic one. In the sults to gauge whether the modifications of the thermal profile -next section we analyse the impact of this local heating on 1D predicted by B21 can help in qualitatively improving all the -solar structures by adopting a simple prescription that mimics structuralquantitiesusedbyBuldgenetal.(2020). +(seethedashedblacklinebe- +lowtheconvectiveboundaryinFig.1).B21showthat,asare- +sultofthelocalheatingbelowtheconvectiveboundarycharac- +terisedbythebumpintemperaturedifference∆T/T 0 displayed +in Fig. 1, the temperature gradient becomes less sub-adiabatic +immediately below the convective boundary1. The net result is +a smoother transition just below the convective boundary with +a temperature gradient that has an intermediate value between +the radiative temperature gradient and the adiabatic one. In the +next section we analyse the impact of this local heating on 1D +solar structures by adopting a simple prescription that mimics the behaviour of the temperature gradient suggested by hydro- dynamicalsimulations. +3. Impactonone-dimensionalsolarstructure +models +3.1. Helioseismicconstraints +Ourprimarygoalinthisshortpaperistoillustratethepotential, +qualitative impact of the local heating produced by overshoot- +ing. We adopted a strategy inspired by the analysis of Buldgen +et al. (2020), who constructed a static structure of the Sun in +agreement with seismic inversions of the Ledoux discriminant +definedby +A= +1 +Γ +1dlnP +dlnr +− +dlnρ +dlnr, (1) +with Γ +1 += (∂lnP/∂lnρ) ad. Starting from a reference evolu- +tionary model, Buldgen et al. (2020) used an inversion pro- +cedure to iteratively reconstruct a solar model. Successive in- +versions of the Ledoux discriminant allowed them to obtain a +model-independentprofileforthisquantity.Theirreconstruction +method also gives solar structures that are in excellent agree- +mentwithotherstructuralinversions,namelytheentropy,S,the +squareofthespeedofsound,c2 s,andthedensity,ρ.Toillustrate +the convergence of their reconstruction procedure, they show +(rightpanelsoftheirFigs.3-6)thesuccessiveiterationsthatcon- +verge to an excellent level of agreement for the four structural +inversions(A,S,c2 s,ρ)startingfromtheinitialreferencemodel +adoptedintheirwork.Thedifferencesfoundbetweentherecon- +structedmodelandthereferencemodelareusefulastheyindi- +catethemodificationsofthereferencemodelthatarerequiredto +convergetowardsasolarmodelinagreementwithhelioseismic +data. We recall here the major trends found by Buldgen et al. +(2020)forthefourstructuralquantities,whichareusedforour +analysisinSect.3.2. +The first concerns the Ledoux discriminant. The major dis- +crepancy between the Sun and the reference model occurs just +below the convective boundary, with a large positive bump for +thequantity(A Sun-A ref). +Thesecondconcernsthespeedofsound.Thesamepositive +bumpatthesamelocationasfortheLedouxdiscriminant,A,is +observed for the quantity (c2 +s,Sun +− c2 s,ref)/c2 s,ref. The corrections +appliedtoAduringthereconstructionprocedurealsoreducethe +discrepancyinthespeedofsoundintheradiativeregion. +Thethirdconcernstheentropy.Largediscrepanciesareob- +servedinboththeradiativeregionandtheconvectivezone.The +1 Lesssub-adiabaticmeansthat|∇−∇ ad|decreasescomparedtothe +initialprofile. +entropydiscrepancy(S Sun−S ref)/S +ref +hastwopositivepeaksin +theradiativezone,onejustbelowtheovershootingregionanda +larger peak deeper at ∼ 40% of the stellar radius. This discrep- +ancyisnegativeintheconvectivezone.Thecorrectionsapplied +toAhelpreducetheseentropydiscrepanciesinbothregions. +The fourth concerns the density. The quantity (ρ Sun − +ρ ref)/ρ ref has a negative peak in the radiative region, at ∼ 35% +ofthestellarradius,andispositiveintheconvectivezone. +Importantly,Buldgenetal.(2020)mentionthattheirrecon- +struction procedure gives similar Ledoux discriminant profiles +for a wide range of initial reference models. We used these re- +sults to gauge whether the modifications of the thermal profile +predicted by B21 can help in qualitatively improving all the +structuralquantitiesusedbyBuldgenetal.(2020). 3.2. Testingone-dimensionalsolarmodels Ourmainmotivationistoshowthepotentialimpactofthelocal -3. Impactonone-dimensionalsolarstructure heating described in Sect. 2 on stellar models. We are not aim- -models inginthisshortworkatconstructingthebestsolarmodeltofit +inginthisshortworkatconstructingthebestsolarmodeltofit helioseismicconstraints.Usingstellarevolutioncodes,wehave -3.1. Helioseismicconstraints adopted two different methods that can be found in the litera- -Ourprimarygoalinthisshortpaperistoillustratethepotential, turetoconstructsolarmodels(e.g.Zhangetal.2012;Vinyoles -qualitative impact of the local heating produced by overshoot- et al. 2017). Our first method relies on the thermal relaxation -ing. We adopted a strategy inspired by the analysis of Buldgen of a reference model with solar radius and luminosity that is -et al. (2020), who constructed a static structure of the Sun in modifiedtoreproducethetemperaturegradientintheovershoot- -agreement with seismic inversions of the Ledoux discriminant inglayersuggestedbyhydrodynamicalsimulations.Inthiscase, -definedby thechemicalabundancesarenotmodifiedbynuclearreactions, +turetoconstructsolarmodels(e.g.Zhangetal.2012;Vinyoles +et al. 2017). Our first method relies on the thermal relaxation +of a reference model with solar radius and luminosity that is +modifiedtoreproducethetemperaturegradientintheovershoot- +inglayersuggestedbyhydrodynamicalsimulations.Inthiscase, +thechemicalabundancesarenotmodifiedbynuclearreactions, mixing, or microscopic diffusion during the relaxation process. -1 dlnP dlnρ -A= − , (1) For these tests, we used the 1D Lyon stellar evolution code -Γ dlnr dlnr (Baraffeetal.1998).Werepeatedthisexperimentbasedonther- -1 -with Γ = (∂lnP/∂lnρ) . Starting from a reference evolu- mal relaxation with the stellar evolution code MONSTAR (e.g. -1 ad +For these tests, we used the 1D Lyon stellar evolution code +(Baraffeetal.1998).Werepeatedthisexperimentbasedonther- +mal relaxation with the stellar evolution code MONSTAR (e.g. Constantino et al. 2014) and obtained the same qualitative re- -tionary model, Buldgen et al. (2020) used an inversion pro- sults. -cedure to iteratively reconstruct a solar model. Successive in- The second method considers models that account for the -versions of the Ledoux discriminant allowed them to obtain a modification of the temperature gradient in the overshooting -model-independentprofileforthisquantity.Theirreconstruction layer from the zero age main sequence (ZAMS). The models -method also gives solar structures that are in excellent agree- arethenevolveduntiltheyreachthesolarradiusandluminosity. -mentwithotherstructuralinversions,namelytheentropy,S,the -squareofthespeedofsound,c2,andthedensity,ρ.Toillustrate With this approach, changes in the chemical abundances from -s nuclearreactions,microscopicdiffusion,andovershootingmix- -the convergence of their reconstruction procedure, they show +With this approach, changes in the chemical abundances from +nuclearreactions,microscopicdiffusion,andovershootingmix- ing are also consistent with any modification of the structure -(rightpanelsoftheirFigs.3-6)thesuccessiveiterationsthatcon- induced by the forced local heating in the overshooting layer. -verge to an excellent level of agreement for the four structural -inversions(A,S,c2,ρ)startingfromtheinitialreferencemodel These tests were performed with MONSTAR as it includes the -adoptedintheirwos treatmentofmicroscopicdiffusion. -rk.Thedifferencesfoundbetweentherecon- +These tests were performed with MONSTAR as it includes the +treatmentofmicroscopicdiffusion. The first method allows the impact of local heating in -structedmodelandthereferencemodelareusefulastheyindi- the overshooting layer after thermal relaxation to be isolated. -catethemodificationsofthereferencemodelthatarerequiredto The second method provides evolutionary models that are self- -convergetowardsasolarmodelinagreementwithhelioseismic consistentsincetheeffectofthemodificationofthetemperature -data. We recall here the major trends found by Buldgen et al. gradientisaccountedforduringtheirevolutiononthemainse- -(2020)forthefourstructuralquantities,whichareusedforour quence. -analysisinSect.3.2. Inthefollowing,weadoptamodificationofthelocaltemper- -The first concerns the Ledoux discriminant. The major dis- aturegradientintheovershootinglayerthatqualitativelyrepro- -crepancy between the Sun and the reference model occurs just ducesthebehaviourdisplayedinFig.1.Wedefineanovershoot- -below the convective boundary, with a large positive bump for -inglengthd = α H ,with H thepressurescaleheight -thequantity(A -A ). ov ov P,CB P,CB -Sun ref -attheconvectiveboundaryandα afreeparameter.Wealsode- -Thesecondconcernsthespeedofsound.Thesamepositive ov -finetworadiallocations,r =r −d andr =r −d /2, -bumpatthesamelocationasfortheLedouxdiscriminant,A,is ov CB ov mid CB ov -observed for the quantity (c2 − c2 )/c2 . The corrections withr CBtheradiallocationoftheconvectiveboundary.Thetem- -s,Sun s,ref s,ref peraturegradientismodifiedasfollows.Forr ≤ r < r ,we -appliedtoAduringthereconstructionprocedurealsoreducethe mid CB +inglengthd +ov += α ovH P,CB,with H +P,CB +thepressurescaleheight +attheconvectiveboundaryandα ovafreeparameter.Wealsode- +finetworadiallocations,r +ov +=r CB−d ovandr +mid +=r CB−d ov/2, +withr CBtheradiallocationoftheconvectiveboundary.Thetem- +peraturegradientismodifiedasfollows.Forr +mid +≤ r < r CB,we use -discrepancyinthespeedofsoundintheradiativeregion. -Thethirdconcernstheentropy.Largediscrepanciesareob- ∇=g(r)∇ +(1−g(r))∇ , (2) -ad rad -servedinboththeradiativeregionandtheconvectivezone.The +∇=g(r)∇ ad+(1−g(r))∇ rad, (2) with -1 Lesssub-adiabaticmeansthat|∇−∇ |decreasescomparedtothe -ad -initialprofile. g(r)= sin{[(r−r )/(r −r )]a×π/2}. (3) -mid CB mid +g(r)= sin{[(r−r mid)/(r CB−r mid)]a×π/2}. (3) 3 Baraffeetal.:Localheatingduetoconvectiveovershootingandthesolarmodellingproblem -Forr ≤r R +b +is reached +(i.e.,theperiodbecomesindependentofradius).Theexactval- +ues of R b and R s, as well as of the exponents, depend on the current mass (M). We assume that the FM is dominant if the +stellar radius is larger than the critical value R dom,0, which we +computedfromthecurrentstellarmassusingEq.4ofTrabucchi +etal.(2021b). +1 Hereinafter, whenever we discuss periods, it should be understood +thatwerefertoFMperiodsonwhichthisworkisfocused. +2.2. Data +Asafirstsetofdata,weconsideredthecluster-LPVpairsused +byGradyetal.(2019,seetheirtables1and 2).Theseconsistof +19clustersintheLargeMagellanicCloud,hostingatotalof20 +potentialLPVmembers,andeightGalacticclusterseachhosting +apotentialLPVmember. +WeexpandedthislistwithdataforLPVsinafewpopulous +clusters,namelytheGalacticclustersNGC362,NGC2808,47 +Tuc(NGC104),andωCen(NGC5139);theLMCclustersNGC +1978andNGC1846;andtheclusterNGC419intheSmallMag- +ellanicCloud(SMC).ThesourcelistsweretakenfromLebzel- +ter&Wood(2005,2007,2011,2016)andKamathetal.(2010), +whosenotationforthesourcesnamesisadoptedhere.Afterex- +cludingthestarLW3inNGC1846andthestarV129inωCen, +whichareunlikelyclustermembers(cf.Lebzelter&Wood2007, +2016),wereachedatotalof203sources. +The aforementioned studies also provide a lot of informa- +tion,possiblyincluding JHK photometry,oneormoreperiods, +and a spectral type. In order to expand on the available data, +wecrossmatchedtheselectedsamplewiththeTwoMicronAll- +Sky Survey (2MASS, Skrutskie et al. 2006), the all-sky data +release of the Wide-field Infrared Survey Explorer (AllWISE, +Cutri et al. 2013), the catalog of variable stars from the All- +Sky Automated Survey for SuperNovae (ASAS-SN Jayasinghe +etal.2020),thecatalogsofLPVsintheMagellanicCloudsfrom +thethirdphaseoftheOpticalGravitationalLensingExperiment +(OGLE-III,Soszy´ nskietal.2009,2011),theearlythirddatare- lease from the Gaia mission (Gaia EDR3, Gaia Collaboration etal.2021),andthecatalogofLPVcandidatesfromGaiaDR2 (Mowlavietal.2018). -2. Methods Following Grady et al. (2019), we took ages from -2.1. Models Kharchenkoetal.(2016)andBaumgardtetal.(2013)forclusters +Kharchenkoetal.(2016)andBaumgardtetal.(2013)forclusters intheGalaxyandLMC,respectively,therebyensuringthatages -We employed PARSEC-COLIBRI isochrones (Marigo et al. would be homogeneously derived for clusters in both galaxies. -2017) with stellar evolutionary models from Pastorelli et al. Age uncertainties from Baumgardt et al. (2013), provided for -(2019, 2020) for the thermally pulsing asymptotic giant branch each cluster, are generally around σ (cid:39) 0.05. Kharchenko +each cluster, are generally around σ log(τ) -(TP-AGB) phase, and from PARSEC (Bressan et al. 2012, ver- +(cid:39) 0.05. Kharchenko et al. (2016) do not provide age uncertainties, but a reasonable -sion 1.2S) for the preceding evolution. The adopted set of upper limit for their method should be σ = 0.2 based on +upper limit for their method should be σ log(τ) -isochrones covers the range 0.001 to 0.016 in initial metal- += 0.2 based on the analysis of Kharchenko et al. (2005) (the same value was -licity (Z), with a 0.001 step, while it spans the age interval -i adoptedbyGradyetal.2019,intheirFig.7). -8.00 ≤ log(τ/yr) ≤ 10.45 with a step of 0.05. Since the AGB +adoptedbyGradyetal.2019,intheirFig.7). As discussed by Kamath et al. (2010), the age of the SMC -phaseisshort-lived,itonlyspansasmallrangeofinitialmasses -foreachgivenisochrone,oforderof10−2M atmost. cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is -(cid:12) consistentwiththevalueτ = 1.45±0.05GyrfromGoudfrooij -Theadoptedisochronesincludelinearpulsationperiodsfrom +cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is +consistentwiththevalueτ = 1.45±0.05GyrfromGoudfrooij et al. (2014), while it is as young as τ (cid:39) 0.89±0.015 Gyr ac- -Trabucchietal.(2019)forovertonemodesandnonlinearperiods cordingtoPerrenetal.(2017).Sinceanaccurateestimateisnot -computed with the period-mass-radius relation from Trabucchi necessaryforourexploratoryanalysis,wetookaroughaverage -etal.(2021b)fortheFM1.Pulsationpropertieswerecomputed and assumed log(τ/yr) = 9.1±0.1. NGC 419 and NGC 1846 -alongboththeearly-AGBandtheTP-AGB.Wedidnotextend likely exhibit TP-AGB boosting (Girardi et al. 2013). We note -ouranalysistoredsupergiantstarsasthepulsationprescription thatsomeclustersshowmultiplestellarpopulations,whoseage -weemployedarestrictlyvalidonlybelow7M . -(cid:12) spreadhasbeenestimatedinsomecases(e.g.,Mackey&Broby -Werecallthat,withtheadoptednonlinearrelation,theperiod +spreadhasbeenestimatedinsomecases(e.g.,Mackey&Broby Nielsen2007;Joo&Lee2013;Villanovaetal.2014)andiscon- -increaseswithradius(R)asabrokenpowerlaw,whoseexponent sistentwiththeageuncertaintiesweadopted. -decreasesassoonasthe“bendingradius”R isexceeded,itand -b -becomes zero when the “saturation radius” R > R is reached Distances of Galactic clusters were also taken from -s b -(i.e.,theperiodbecomesindependentofradius).Theexactval- Kharchenko et al. (2016), while for the Magellanic Clouds and -cu ue rs reo nf tR mb aa sn sd (MR s ), .a Ws ew ae sl sl ua ms eof tht ah te thex ep Fo Mnen it ss, dod mep ie nn ad nto in t th he 0th .0ei 9r mclu agste ar ns dw µe SMad Cop =ted 18t .h 9e 6d ±ist 0a .n 0c 2e mm ao gdu frl oi mµ LM deC G= ri1 js8. e4 t9 a± -f e l. -stellar radius is larger than the critical value R , which we (2017).Wesearchedfordataoninterstellarextinctionfromsev- -dom,0 -computedfromthecurrentstellarmassusingEq.4ofTrabucchi eralliteratureworks(e.g.,Nayaketal.2016;Kharchenkoetal. -etal.(2021b). 2016; Perren et al. 2017), all of which suggest that extinction -in the K filter is smaller than ∼ 0.1 mag for most of the clus- +Distances of Galactic clusters were also taken from +Kharchenko et al. (2016), while for the Magellanic Clouds and +their clusters we adopted the distance moduli µ LMC = 18.49± 0.09 mag and µ SMC = 18.96 ± 0.02 mag from de Grijs et al. +(2017).Wesearchedfordataoninterstellarextinctionfromsev- +eralliteratureworks(e.g.,Nayaketal.2016;Kharchenkoetal. +2016; Perren et al. 2017), all of which suggest that extinction +in the K s -1 Hereinafter, whenever we discuss periods, it should be understood tersweconsidered,andatmostaslargeas∼ 0.3mag,whichis -thatwerefertoFMperiodsonwhichthisworkisfocused. negligibleforourpurposes. +filter is smaller than ∼ 0.1 mag for most of the clus- +tersweconsidered,andatmostaslargeas∼ 0.3mag,whichis +negligibleforourpurposes. Articlenumber,page2of9 Trabucchietal.:Theperiod-agerelationofLPVs -A detailed membership verification is beyond the scope of On the basis of the average age of these two groups of clus- -this work, and we relied on the checks performed by authors ters and the associated uncertainty, and taking the discrete age -whose source lists we adopted. It should be kept in mind that samplingoftheisochronesintoaccount,weconsideredtheage -somesourcesmaynotberealclustermembers. rangeslog(τ/yr)=9.15±0.10andlog(τ/yr)=10.10±0.20.Pe- -For sources without a spectral type, we used the Gaia- rioddistributionsatthoseagesaredisplayedinpanels(b)and(c) -2MASS diagram (Lebzelter et al. 2018, 2019) to determine ofFig.1,respectively,showinggoodagreementbetweenmodel -whethertheyareO-orC-rich.Weusedthenear-infraredperiod- predictionsandobservations.Wenotethatinbothcases,thedis- -luminosity diagram to identify the most likely pulsation mode tributionisskewedtowardshortperiods,whichseemstobetrue -associated with each period of each observed source. We se- atallagesforO-richstars.Thiscanbeseeninpanel(a)ofFig.2, -lectedonlyFMperiodsandrejectedlongsecondaryperiodsand whichisaversionofthePAplanelimitedtoanO-richcompo- -periods attributed to overtone mode pulsation. The details of sition2. Indeed, although at τ (cid:46) 5 Gyr the observed sample is -these classification steps are provided in Appendix A. Out of veryscarce,itappearstobeconsistentwithmodelspredictinga -203 sources from the initial list, we identified 95 LPVs pulsat- more densely populated region in the shorter-period half of the -ing in the FM, consisting of 40 C-rich and 55 O-rich sources. PAdistribution. -Theyconsistof29Miras,33semi-regularvariables,and33other The case of C-stars, shown in panel (b) of Fig. 2, is differ- -sources(mostlikelyLPVs)whosevariabilitytypehasnotbeen ent. They only form over a restricted range of initial masses -determined.We notethat,withtheexceptionof GaiaDR2,the andages,sotheiroccurrenceinagivenstellarpopulationisan -sourcesofvariabilitydataconsideredheredonotreporttheun- age indicator on its own. Toward the low-mass (old age) side -certaintyassociatedwithobservedperiods.However,sinceperi- of the C-star regime, the behavior is similar to the O-rich case -odswerederivedinmostcasesfromwell-sampled,high-quality withaconcentrationaroundrelativelyshortperiods.C-richmod- -variability observations, relative period uncertainties are most els tend to have a lower surface temperature and larger radii, -likelynegligiblecomparedwiththoseassociatedwithage. at a given mass, compared to O-rich models, and thus they at- -tainlongerperiodsmoreeasily.Thisoccursinparticulartoward -highermasses,sothatyoungerC-richmodelsaremoreconcen- -3. Results trated at longer periods, leading to a steeper PA relation com- -pared with the O-rich case. These predictions agree with ob- -Panel (a) of Fig. 1 shows a comparison between model predic- servations on the old side of the period distribution, while the -tions and observations in the P FM–log(τ/yr) plane. The former scarcityofCstarsatτ(cid:39)0.6Gyrpreventsusfromperforminga -are displayed by a density map showing the expected number comparisonatyoungerages. -N FM of LPVs pulsating in the FM in each period-age bin, nor- InappendixB,weprovideanalyticPArelationsbyfittingthe -malizedtomaximum.Modelpredictionsareingoodagreement high-densitypartsoftheO-andC-richmodels’distribution.We -with data derived from observations (i.e., individual LPVs in emphasizethat,becauseofthelargescatteroftherelation,ages -clusters, represented by symbols), and they show that the pe- estimatedinthiswayforindividualLPVsareboundtobehighly -riodofLPVspulsatingintheFMdecreaseswithincreasingage. uncertain.Asawaytoassesstheerrorinagedetermination,we -Crosses mark the average properties of the three groups of C- also provide analytic best-fit relations to the boundaries of the -rich LPVs from Feast et al. (2006, their table 4), which fit the PA distribution of the models in the appendix. These relations -generalpatternwiththeexceptionoftheirgroup3,estimatedto aredisplayedinFig.2. +A detailed membership verification is beyond the scope of +this work, and we relied on the checks performed by authors +whose source lists we adopted. It should be kept in mind that +somesourcesmaynotberealclustermembers. +For sources without a spectral type, we used the Gaia- +2MASS diagram (Lebzelter et al. 2018, 2019) to determine +whethertheyareO-orC-rich.Weusedthenear-infraredperiod- +luminosity diagram to identify the most likely pulsation mode +associated with each period of each observed source. We se- +lectedonlyFMperiodsandrejectedlongsecondaryperiodsand +periods attributed to overtone mode pulsation. The details of +these classification steps are provided in Appendix A. Out of +203 sources from the initial list, we identified 95 LPVs pulsat- +ing in the FM, consisting of 40 C-rich and 55 O-rich sources. +Theyconsistof29Miras,33semi-regularvariables,and33other +sources(mostlikelyLPVs)whosevariabilitytypehasnotbeen +determined.We notethat,withtheexceptionof GaiaDR2,the +sourcesofvariabilitydataconsideredheredonotreporttheun- +certaintyassociatedwithobservedperiods.However,sinceperi- +odswerederivedinmostcasesfromwell-sampled,high-quality +variability observations, relative period uncertainties are most +likelynegligiblecomparedwiththoseassociatedwithage. +3. Results +Panel (a) of Fig. 1 shows a comparison between model predic- +tions and observations in the P FM–log(τ/yr) plane. The former +are displayed by a density map showing the expected number +N FM of LPVs pulsating in the FM in each period-age bin, nor- +malizedtomaximum.Modelpredictionsareingoodagreement +with data derived from observations (i.e., individual LPVs in +clusters, represented by symbols), and they show that the pe- +riodofLPVspulsatingintheFMdecreaseswithincreasingage. +Crosses mark the average properties of the three groups of C- +rich LPVs from Feast et al. (2006, their table 4), which fit the +generalpatternwiththeexceptionoftheirgroup3,estimatedto beolderthanwhatourmodelspredictatP(cid:39)650. We also show a linear best-fit to the models distribution -(weighted by N ), which shows a fairly good agreement with 4. Discussion -FM +(weighted by N FM), which shows a fairly good agreement with the best-fit to observations by Grady et al. (2019, also shown). -In general agreement with observations, models confirm that However, the best-fit line does not fully capture the properties -LPVs pulsating predominantly in the FM follow a PA relation, ofthepredictions,noroftheobservedtrend.Indeed,modelsare -whichexhibitsanon-negligibledispersion.Thankstothenewly indicativeofasubstantialdispersionaroundtherelation.Forin- -availablenonlinearperiodpredictions,wewereabletobetterex- stance,at1Gyr,theFMperiodrangesfrom∼200daysto∼550 -aminethenatureofthisrelationandtheoriginofitsscatter. days.Conversely,LPVspulsatingintheFMwithaperiodof350 -ThePArelationisintimatelyconnectedwiththePLrelation, daysarepredictedtobeatleast∼200Myrold,buttheycanbeas -bothpatternsemergingbecauseoftheprominentroleofmassin old as ∼3 Gyr. Observed data are consistent with the predicted -shaping stellar structure and evolution. Indeed, stellar mass de- spread,althoughtheagreementcannotbeconsideredastheob- -terminesthelifetimesofthemainevolutionarystages,andthus servedsampleadoptedisnotcomplete. -the age of stars in the AGB phase. Pulsation models (Trabuc- Nonetheless, it is relevant that some clusters host multiple -chi et al. 2021b) show that the radius R (and correspond- -LPVs, which are thus almost coeval, and they do span a wide dom,0 -ingluminosity)attheonsetofdominantFMpulsation(DFMP) +LPVs, which are thus almost coeval, and they do span a wide periodrange.Someoftheseclustershostmultiplestellarpopu- -increases with mass, so that the most massive FM-dominated lationsthatarebelievedtohaveformedoveratimecomparable -LPVs are brighter. They also have longer periods, as this in- with the age uncertainties we adopted. This means that longer- -creases with radius. In other words, the period, luminosity, and period(moremassive)LPVsintheseclustersprobablyleanto- -age near the tip of the AGB are all functions of initial stellar wardtheloweragelimitassumedfortheirhostcluster,andthe -mass(atleasttoagoodapproximation). opposite is true at shorter periods. This tends to strengthen the -WenotethatthiswouldnotbethecaseiftheFMweredom- agreementbetweenmodelsandobservations. -inantalongtheentireAGB,asthelargechangeinradiusduring Our data set samples the intermediate-age range (NGC 419 -thisphasewouldresultinawiderangeofperiodsatagivenage. and NGC 1846) relatively well as well as old ages (ω Cen, 47 -ItistheveryfactthatDFMPoccursonlyduringthefinalportion Tuc, NGC 362, and NGC 2808). This provides us with the op- -portunity to study the period distribution at these ages, and for 2 AfurtherversionofthePAplanehighlightingbothchemicaltypes -a more detailed comparison between models and observations. canbefoundinFig.A.2ofappendixA.1. +portunity to study the period distribution at these ages, and for +a more detailed comparison between models and observations. +On the basis of the average age of these two groups of clus- +ters and the associated uncertainty, and taking the discrete age +samplingoftheisochronesintoaccount,weconsideredtheage +rangeslog(τ/yr)=9.15±0.10andlog(τ/yr)=10.10±0.20.Pe- +rioddistributionsatthoseagesaredisplayedinpanels(b)and(c) +ofFig.1,respectively,showinggoodagreementbetweenmodel +predictionsandobservations.Wenotethatinbothcases,thedis- +tributionisskewedtowardshortperiods,whichseemstobetrue +atallagesforO-richstars.Thiscanbeseeninpanel(a)ofFig.2, +whichisaversionofthePAplanelimitedtoanO-richcompo- +sition2. Indeed, although at τ (cid:46) 5 Gyr the observed sample is +veryscarce,itappearstobeconsistentwithmodelspredictinga +more densely populated region in the shorter-period half of the +PAdistribution. +The case of C-stars, shown in panel (b) of Fig. 2, is differ- +ent. They only form over a restricted range of initial masses +andages,sotheiroccurrenceinagivenstellarpopulationisan +age indicator on its own. Toward the low-mass (old age) side +of the C-star regime, the behavior is similar to the O-rich case +withaconcentrationaroundrelativelyshortperiods.C-richmod- +els tend to have a lower surface temperature and larger radii, +at a given mass, compared to O-rich models, and thus they at- +tainlongerperiodsmoreeasily.Thisoccursinparticulartoward +highermasses,sothatyoungerC-richmodelsaremoreconcen- +trated at longer periods, leading to a steeper PA relation com- +pared with the O-rich case. These predictions agree with ob- +servations on the old side of the period distribution, while the +scarcityofCstarsatτ(cid:39)0.6Gyrpreventsusfromperforminga +comparisonatyoungerages. +InappendixB,weprovideanalyticPArelationsbyfittingthe +high-densitypartsoftheO-andC-richmodels’distribution.We +emphasizethat,becauseofthelargescatteroftherelation,ages +estimatedinthiswayforindividualLPVsareboundtobehighly +uncertain.Asawaytoassesstheerrorinagedetermination,we +also provide analytic best-fit relations to the boundaries of the +PA distribution of the models in the appendix. These relations +aredisplayedinFig.2. +4. Discussion +In general agreement with observations, models confirm that +LPVs pulsating predominantly in the FM follow a PA relation, +whichexhibitsanon-negligibledispersion.Thankstothenewly +availablenonlinearperiodpredictions,wewereabletobetterex- +aminethenatureofthisrelationandtheoriginofitsscatter. +ThePArelationisintimatelyconnectedwiththePLrelation, +bothpatternsemergingbecauseoftheprominentroleofmassin +shaping stellar structure and evolution. Indeed, stellar mass de- +terminesthelifetimesofthemainevolutionarystages,andthus +the age of stars in the AGB phase. Pulsation models (Trabuc- +chi et al. 2021b) show that the radius R +dom,0 +(and correspond- +ingluminosity)attheonsetofdominantFMpulsation(DFMP) +increases with mass, so that the most massive FM-dominated +LPVs are brighter. They also have longer periods, as this in- +creases with radius. In other words, the period, luminosity, and +age near the tip of the AGB are all functions of initial stellar +mass(atleasttoagoodapproximation). +WenotethatthiswouldnotbethecaseiftheFMweredom- +inantalongtheentireAGB,asthelargechangeinradiusduring +thisphasewouldresultinawiderangeofperiodsatagivenage. +ItistheveryfactthatDFMPoccursonlyduringthefinalportion +2 AfurtherversionofthePAplanehighlightingbothchemicaltypes +canbefoundinFig.A.2ofappendixA.1. Articlenumber,page3of9 A&Aproofs:manuscriptno.trabucchi_etal_2022_period_age_relation_of_lpvs Fig.1.Period-agediagram.Panel(a)showsthepredictedperiod-agedistribution(darkertonesindicateahigherexpectednumberofLPVson @@ -273,220 +391,357 @@ inpanel(a)bytheblueandredshadedareas(atlog(τ/yr) ∼ 9.15and∼ 10.10,respectiv suppressedinpanel(a). Fig.2.SimilartoFig.1,butlimitedtoO-rich(leftpanel)andC-rich(rightpanel)LPVs.Thesolidlinemarksthebestfittothemodels,while dashedlinesarebestfitstotheedgesofthemodeldistribution(seethetextformoredetails). -oftheAGBthatlimitstherangeofperiodsaFM-pulsatingLPV Thisfeatureisstrengthenedwhenasetofisochronesiscon- -canhaveatagivenage.Yet,theDFMPpartoftheAGBislong sidered which spans a range of initial metallicities because the -enoughforsignificantvariationsinradiustooccur,whichresult adopted criterion for the onset of DFMP does not depend on -inthedispersionofthePArelationseeninFig.1. metallicity, but the FM period does as metal-poor LPVs are +oftheAGBthatlimitstherangeofperiodsaFM-pulsatingLPV +canhaveatagivenage.Yet,theDFMPpartoftheAGBislong +enoughforsignificantvariationsinradiustooccur,whichresult +inthedispersionofthePArelationseeninFig.1. +AtagiveninitialmetallicityZ i,theshapeoftheperioddis- +tributionprimarilyresultsfromthefactthat,throughouttheTP- +AGB (the stage during which the FM is normally excited), the +envelope expansion accelerates, while the period becomes pro- +gressivelylesssensitivetochangesinradius(seeAppendixC). +In particular, the slope of the period-radius relation decreases +sharply at P b = P(R b). The FM period distribution is roughly +symmetricaroundthatvalue,butatitsshort-periodside,theFM +isnotdominant.Therefore,whenonlyFM-dominatedLPVsare +considered,asisdonehere,theobservedperioddistributionap- +pearsskewedtowardshortperiods. +Thisfeatureisstrengthenedwhenasetofisochronesiscon- +sidered which spans a range of initial metallicities because the +adopted criterion for the onset of DFMP does not depend on +metallicity, but the FM period does as metal-poor LPVs are warmer and have smaller radii compared with metal-rich ones. -AtagiveninitialmetallicityZ i,theshapeoftheperioddis- As a consequence, the bulk of the period distribution of metal- -tributionprimarilyresultsfromthefactthat,throughouttheTP- poorLPVsisatperiodsshorterthanP ,sotheyonlycontribute -b -AGB (the stage during which the FM is normally excited), the to the global distribution (i.e., at all Z at a given age) over a +As a consequence, the bulk of the period distribution of metal- +poorLPVsisatperiodsshorterthanP b,sotheyonlycontribute +to the global distribution (i.e., at all Z i -envelope expansion accelerates, while the period becomes pro- smallperiodrangeatP(cid:38) P .Incontrast,metal-richLPVshave -b -gressivelylesssensitivetochangesinradius(seeAppendixC). periodswellbeyondP ,sotheycontributebothatthatvalueand -b -In particular, the slope of the period-radius relation decreases atlongerperiods.TheresultisanexcessofFM-dominatedLPVs -sharply at P b = P(R b). The FM period distribution is roughly nearP b,thatistosayontheshortsideoftheoverallperioddis- -symmetricaroundthatvalue,butatitsshort-periodside,theFM tribution. -isnotdominant.Therefore,whenonlyFM-dominatedLPVsare -considered,asisdonehere,theobservedperioddistributionap- We note that, in contrast with the prescription we adopted, -pearsskewedtowardshortperiods. the onset of DFMP in reality is probably sensitive to metallic- +at a given age) over a +smallperiodrangeatP(cid:38) P b.Incontrast,metal-richLPVshave +periodswellbeyondP b,sotheycontributebothatthatvalueand +atlongerperiods.TheresultisanexcessofFM-dominatedLPVs +nearP b,thatistosayontheshortsideoftheoverallperioddis- +tribution. +We note that, in contrast with the prescription we adopted, +the onset of DFMP in reality is probably sensitive to metallic- Articlenumber,page4of9 Trabucchietal.:Theperiod-agerelationofLPVs -ity.Whilethegooddegreeofagreementwithobservationssug- the DFMP regime, and they display a smaller range of periods -gests that the dependence is weak at most, it is possible for at a given age (cf. Feast & Whitelock 2000b). In other words, -any discrepancy to be smeared out by the fact that our set of theyshouldexhibitarelativelynarrowPArelation(eventhough, -isochrone implicitly assumes a flat star-formation rate with no basedontheobservationaldatasetweadopted,thereisnocon- -age-metallicityrelation,soitisnotanaccuraterepresentationof clusiveevidencethatconsideringonlyMirasreducesthescatter -anyrealisticstellarenvironment.Inthissense,thePArelationis ofthePArelation). -environment-dependent,anditisnotnecessarilyuniversal. Nonetheless,wecautionagainstthisapproachasitisprone -A further point of uncertainty stems from the fact that the tointroducinguncontrolledbiases,asthetraditionaldistinction -prescription we adopted assumes that the FM period only de- betweenSRVsandMirasisarbitrary(seeTrabucchietal.2021a, -pends upon the mass and radius, and that it is affected by a andreferencestherein).Assuch,itdisregardsthephysicalpro- -changeincompositiononlythroughtheeffectthatsuchavaria- cesses at the origin of the range of amplitudes characterizing -tionhasontheradius.Whilethisistruetoagoodapproximation, LPVs. In particular, photometric amplitudes are largely deter- -linearmodelsshowasmalldependenceofperiodsonmetallic- minedbytheformationanddissociationofmoleculesinthestel- -ityatafixedmassandradius,butthequantitativeimpactinthe laratmosphere,andtheyarelikelytobemetallicity-dependent. -nonlinearcaseisunknown.Wecanonlyestimate,basedonthe It is therefore reasonable to assume that metal-poor (old) Mira -resultsofTrabucchietal.(2019),anuncertaintyof±10%atmost analogs might be classified as SRVs, thereby undermining the -withrespecttotheprescriptionsadoptedhere. potential application of the PA relation if restricted to Miras. -Qualitatively, a realistic age-metallicity relation and the ThisseemstobesupportedbythefactthatthebulkofoldLPVs -metallicitydependenceoftheperiodandoftheonsetofDFMP inoursampleareclassifiedasSRVs.Therefore,studiesinvolv- -are all expected to result in a steeper PA relation than the one ing PA relations of LPVs would advantageously include both -wepredict,butitisdifficulttoassesstherelativeimportanceof MirasandFM-pulsatingSRVs. -theseeffects.Inthissense,thecompositionprobablyaffectsthe ThechallengeassociatedwithSRVsstemsfromthefactthat -shape of the PA relation more than its dispersion. The latter is they are often multiperiodic (even when predominantly pulsat- -likelyaffectedbythecompositionindirectlythroughmassloss, ingintheFM),apropertythatcomplicatesthelightcurveanal- -the analysis of which is beyond the scope of this study. How- ysis and period extraction. At the same time, this feature could -ever,wepointoutthatmasslossrepresentsasourceofscatterin potentially improve age determinations as overtone modes are -combinationwiththeoccurrenceofthermalpulses,becauseitre- expectedtodisplayaPArelationaswell. +ity.Whilethegooddegreeofagreementwithobservationssug- +gests that the dependence is weak at most, it is possible for +any discrepancy to be smeared out by the fact that our set of +isochrone implicitly assumes a flat star-formation rate with no +age-metallicityrelation,soitisnotanaccuraterepresentationof +anyrealisticstellarenvironment.Inthissense,thePArelationis +environment-dependent,anditisnotnecessarilyuniversal. +A further point of uncertainty stems from the fact that the +prescription we adopted assumes that the FM period only de- +pends upon the mass and radius, and that it is affected by a +changeincompositiononlythroughtheeffectthatsuchavaria- +tionhasontheradius.Whilethisistruetoagoodapproximation, +linearmodelsshowasmalldependenceofperiodsonmetallic- +ityatafixedmassandradius,butthequantitativeimpactinthe +nonlinearcaseisunknown.Wecanonlyestimate,basedonthe +resultsofTrabucchietal.(2019),anuncertaintyof±10%atmost +withrespecttotheprescriptionsadoptedhere. +Qualitatively, a realistic age-metallicity relation and the +metallicitydependenceoftheperiodandoftheonsetofDFMP +are all expected to result in a steeper PA relation than the one +wepredict,butitisdifficulttoassesstherelativeimportanceof +theseeffects.Inthissense,thecompositionprobablyaffectsthe +shape of the PA relation more than its dispersion. The latter is +likelyaffectedbythecompositionindirectlythroughmassloss, +the analysis of which is beyond the scope of this study. How- +ever,wepointoutthatmasslossrepresentsasourceofscatterin +combinationwiththeoccurrenceofthermalpulses,becauseitre- ducestheminimumradiusfortheonsetofDFMP.Thus,during the luminosity dips associated with thermal pulses, a LPV can -haveaperiodshorterthantheoneithadwhenitfirstenteredthe 5. Conclusions +haveaperiodshorterthantheoneithadwhenitfirstenteredthe DFMPregime(seeAppendixC).Anadditionalsourceofuncer- -tainty,whichwedisregarded,isrotation(orotherprocessesthat Weusedtheresultsfromrecentnonlinearpulsationcalculations -induce extra mixing in the core) which causes a spread in ages andcombinedthemwithstate-of-the-artisochronemodelstoin- -atagiveninitialmass(cf.Andersonetal.2016,forthecaseof vestigate the PArelation of FM-dominated LPVs, finding good -classicalCepheids). agreement with the distribution of observed LPVs in star clus- -Thefairlygoodagreementbetweenmodelsandobservations ters. The theoretical PA relation displays a non-negligible scat- -encouragestheuseofLPVsasageindicators,butthescatterof ter,whoseoriginweidentifiedduetothefactthat,despitebeing -thePArelationhampersthisapplication.Weattemptedtoreduce very brief, the portion of AGB evolution during which the FM -thescatterthroughcorrectionsinvolvingphotometricproperties, becomes dominant shows a relatively large range in mass and -as is customarily done for classical Cepheids with a color term radiusatagivenage. -(e.g.,Bonoetal.2005),butwithunsatisfactoryresults.Acorrec- The theoretical distribution of FM periods is roughly sym- -tiondependentonthephotometricamplitudeofvariabilityrep- metric,buttheFMisnotdominantattheshortestperiods.Asa -resents a promising alternative, but it cannot be pursued at the result,modelspredictthatthedistributionofdominantFMperi- -moment.Indeed,forcomputationalefficiency,currentpulsation odsatagivenageisskewedtowardshortperiods,inagreement -modelsincludeonlyacrudetreatmentoftheatmosphericlayers withobservations.Dependingonstellarpopulations,metallicity -as they do not affect pulsation periods. On the other hand, the mayenhancethisfeatureasmetal-poorLPVs,whichtendtobe -atmosphere is crucial in determining the spectral energy distri- warmerandmorecompact,onlycontributenearshortperiods. -butionanditsvariationthroughoutthepulsationcycle,andhence Weprovidethebest-fitPArelationseparatelyforO-richand -theamplitudeofvariability.Atthesametime,theobservational C-richFM-pulsatingLPVs.ThelatterLPVsshowasteeperPA -sample adopted here is too heterogeneous for a self-consistent relationbecauseoftheirlowersurfacetemperatures,whichallow -investigationofamplitude,butthiskindofstudycouldbemade themtoreachlongerperiodsmoreeasily. -possible by the upcoming data release 3 of the Gaia mission Our analysis concerns all LPVs predominantly pulsating in -(Gaia Collaboration et al. 2021) and the future Legacy Survey the FM, regardless of whether they are classified as Miras or -ofSpaceandTime(LSST,Ivezic´ etal.2019)oftheVeraRubin SRVs.Wediscouragesuchadistinctioninthatitisarbitraryand -Observatory. pronetoselectionbiasesthatriskcompromisingtheuseofLPVs -It is worth noting that our analysis applies to Miras as well asageindicators. -as SRVs, provided that they predominantly pulsate in the FM. ThemainlimitationintheuseofthePArelationforagede- -The limitation of PA relation studies to Miras, as has mainly terminations of individual LPVs stems from its relatively large -been done in literature so far, undoubtedly has some advan- scatter. We suggest that corrective terms, involving the ampli- -tages: to begin with, the fact that Miras are typically easier to tude of variability, might help to reduce this scatter and antici- -detect than SRVs, and their light curves are easier to process patethatupcomingdatafromongoingandfuturesurveysdedi- -as they tend to be more regular. Moreover, Miras represent the catedtotime-domainastronomywillbehighlyvaluabletoprobe -end-pointofAGBevolution,soinprincipletheycorrespondtoa thispossibility.Astudyoftheimpactofmetallicityonnonlinear -smallerrangeofstellarparameterscomparedtothefullextentof pulsationishighlydesirabletopursuethislineofinvestigation, +tainty,whichwedisregarded,isrotation(orotherprocessesthat +induce extra mixing in the core) which causes a spread in ages +atagiveninitialmass(cf.Andersonetal.2016,forthecaseof +classicalCepheids). +Thefairlygoodagreementbetweenmodelsandobservations +encouragestheuseofLPVsasageindicators,butthescatterof +thePArelationhampersthisapplication.Weattemptedtoreduce +thescatterthroughcorrectionsinvolvingphotometricproperties, +as is customarily done for classical Cepheids with a color term +(e.g.,Bonoetal.2005),butwithunsatisfactoryresults.Acorrec- +tiondependentonthephotometricamplitudeofvariabilityrep- +resents a promising alternative, but it cannot be pursued at the +moment.Indeed,forcomputationalefficiency,currentpulsation +modelsincludeonlyacrudetreatmentoftheatmosphericlayers +as they do not affect pulsation periods. On the other hand, the +atmosphere is crucial in determining the spectral energy distri- +butionanditsvariationthroughoutthepulsationcycle,andhence +theamplitudeofvariability.Atthesametime,theobservational +sample adopted here is too heterogeneous for a self-consistent +investigationofamplitude,butthiskindofstudycouldbemade +possible by the upcoming data release 3 of the Gaia mission +(Gaia Collaboration et al. 2021) and the future Legacy Survey +ofSpaceandTime(LSST,Ivezi´ cetal.2019)oftheVeraRubin +Observatory. +It is worth noting that our analysis applies to Miras as well +as SRVs, provided that they predominantly pulsate in the FM. +The limitation of PA relation studies to Miras, as has mainly +been done in literature so far, undoubtedly has some advan- +tages: to begin with, the fact that Miras are typically easier to +detect than SRVs, and their light curves are easier to process +as they tend to be more regular. Moreover, Miras represent the +end-pointofAGBevolution,soinprincipletheycorrespondtoa +smallerrangeofstellarparameterscomparedtothefullextentof +the DFMP regime, and they display a smaller range of periods +at a given age (cf. Feast & Whitelock 2000b). In other words, +theyshouldexhibitarelativelynarrowPArelation(eventhough, +basedontheobservationaldatasetweadopted,thereisnocon- +clusiveevidencethatconsideringonlyMirasreducesthescatter +ofthePArelation). +Nonetheless,wecautionagainstthisapproachasitisprone +tointroducinguncontrolledbiases,asthetraditionaldistinction +betweenSRVsandMirasisarbitrary(seeTrabucchietal.2021a, +andreferencestherein).Assuch,itdisregardsthephysicalpro- +cesses at the origin of the range of amplitudes characterizing +LPVs. In particular, photometric amplitudes are largely deter- +minedbytheformationanddissociationofmoleculesinthestel- +laratmosphere,andtheyarelikelytobemetallicity-dependent. +It is therefore reasonable to assume that metal-poor (old) Mira +analogs might be classified as SRVs, thereby undermining the +potential application of the PA relation if restricted to Miras. +ThisseemstobesupportedbythefactthatthebulkofoldLPVs +inoursampleareclassifiedasSRVs.Therefore,studiesinvolv- +ing PA relations of LPVs would advantageously include both +MirasandFM-pulsatingSRVs. +ThechallengeassociatedwithSRVsstemsfromthefactthat +they are often multiperiodic (even when predominantly pulsat- +ingintheFM),apropertythatcomplicatesthelightcurveanal- +ysis and period extraction. At the same time, this feature could +potentially improve age determinations as overtone modes are +expectedtodisplayaPArelationaswell. +5. Conclusions +Weusedtheresultsfromrecentnonlinearpulsationcalculations +andcombinedthemwithstate-of-the-artisochronemodelstoin- +vestigate the PArelation of FM-dominated LPVs, finding good +agreement with the distribution of observed LPVs in star clus- +ters. The theoretical PA relation displays a non-negligible scat- +ter,whoseoriginweidentifiedduetothefactthat,despitebeing +very brief, the portion of AGB evolution during which the FM +becomes dominant shows a relatively large range in mass and +radiusatagivenage. +The theoretical distribution of FM periods is roughly sym- +metric,buttheFMisnotdominantattheshortestperiods.Asa +result,modelspredictthatthedistributionofdominantFMperi- +odsatagivenageisskewedtowardshortperiods,inagreement +withobservations.Dependingonstellarpopulations,metallicity +mayenhancethisfeatureasmetal-poorLPVs,whichtendtobe +warmerandmorecompact,onlycontributenearshortperiods. +Weprovidethebest-fitPArelationseparatelyforO-richand +C-richFM-pulsatingLPVs.ThelatterLPVsshowasteeperPA +relationbecauseoftheirlowersurfacetemperatures,whichallow +themtoreachlongerperiodsmoreeasily. +Our analysis concerns all LPVs predominantly pulsating in +the FM, regardless of whether they are classified as Miras or +SRVs.Wediscouragesuchadistinctioninthatitisarbitraryand +pronetoselectionbiasesthatriskcompromisingtheuseofLPVs +asageindicators. +ThemainlimitationintheuseofthePArelationforagede- +terminations of individual LPVs stems from its relatively large +scatter. We suggest that corrective terms, involving the ampli- +tude of variability, might help to reduce this scatter and antici- +patethatupcomingdatafromongoingandfuturesurveysdedi- +catedtotime-domainastronomywillbehighlyvaluabletoprobe +thispossibility.Astudyoftheimpactofmetallicityonnonlinear +pulsationishighlydesirabletopursuethislineofinvestigation, Articlenumber,page5of9 A&Aproofs:manuscriptno.trabucchi_etal_2022_period_age_relation_of_lpvs -aswouldbeatheoreticalinvestigationofthedependenceofpho- Joo,S.-J.&Lee,Y.-W.2013,ApJ,762,36 -tometricamplitudesuponglobalstellarparameters. Jura,M.&Kleinmann,S.G.1992,ApJS,79,105 -Kamath,D.,Wood,P.R.,Soszyn´ski,I.,&Lebzelter,T.2010,MNRAS,408,522 -Acknowledgements. M.T.andN.M.acknowledgethesupportprovidedbythe Kharchenko,N.V.,Piskunov,A.E.,Röser,S.,Schilbach,E.,&Scholz,R.D. -SwissNationalScienceFoundationthroughgrantNr.188697.Wearegrateful 2005,A&A,438,1163 -totheanonymousrefereefortheconstructivecommentsthathelpedimproving Kharchenko,N.V.,Piskunov,A.E.,Schilbach,E.,Röser,S.,&Scholz,R.D. -thispaper,andtoLéoGirardiforhelpingwiththecomputationandinterpre- 2016,A&A,585,A101 -tationofisochrones.Thisresearchhasmadeuseof:datafromtheOGLE-III Kippenhahn,R.&Smith,L.1969,A&A,1,142 -Catalog of Variable Stars; data products from the Two Micron All Sky Sur- Kluyver,T.,Ragan-Kelley,B.,Pérez,F.,etal.2016,inPositioningandPower -vey, which is a joint project of the University of Massachusetts and the In- in Academic Publishing: Players, Agents and Agendas, ed. F. Loizides & +aswouldbeatheoreticalinvestigationofthedependenceofpho- +tometricamplitudesuponglobalstellarparameters. +Acknowledgements. M.T.andN.M.acknowledgethesupportprovidedbythe +SwissNationalScienceFoundationthroughgrantNr.188697.Wearegrateful +totheanonymousrefereefortheconstructivecommentsthathelpedimproving +thispaper,andtoLéoGirardiforhelpingwiththecomputationandinterpre- +tationofisochrones.Thisresearchhasmadeuseof:datafromtheOGLE-III +Catalog of Variable Stars; data products from the Two Micron All Sky Sur- +vey, which is a joint project of the University of Massachusetts and the In- fraredProcessingandAnalysisCenter/CaliforniaInstituteofTechnology,funded -B.Scmidt(Netherlands:IOSPress),87–90 -by the National Aeronautics and Space Administration and the National Sci- Lebzelter,T.,Mowlavi,N.,Marigo,P.,etal.2018,A&A,616,L13 -enceFoundation;datafromtheEuropeanSpaceAgency(ESA)missionGaia Lebzelter,T.,Trabucchi,M.,Mowlavi,N.,etal.2019,A&A,631,A24 -(https://www.cosmos.esa.int/gaia),processedbytheGaiaDataProcess- Lebzelter,T.&Wood,P.R.2005,A&A,441,1117 -ingandAnalysisConsortium(DPAC,https://www.cosmos.esa.int/web/ Lebzelter,T.&Wood,P.R.2007,A&A,475,643 -gaia/dpac/consortium). Funding for the DPAC has been provided by na- Lebzelter,T.&Wood,P.R.2011,A&A,529,A137 -tionalinstitutions,inparticulartheinstitutionsparticipatingintheGaiaMulti- Lebzelter,T.&Wood,P.R.2016,A&A,585,A111 +by the National Aeronautics and Space Administration and the National Sci- +enceFoundation;datafromtheEuropeanSpaceAgency(ESA)missionGaia +(https://www.cosmos.esa.int/gaia),processedbytheGaiaDataProcess- +ingandAnalysisConsortium(DPAC,https://www.cosmos.esa.int/web/ +gaia/dpac/consortium). Funding for the DPAC has been provided by na- +tionalinstitutions,inparticulartheinstitutionsparticipatingintheGaiaMulti- lateralAgreement.Thisresearchhasmadeuseofthefollowingfree/opensource -LloydEvans,T.1976,MNRAS,174,169 softwareand/orlibraries:theStarlinkTablesInfrastructureLibrary(STILTSand -LloydEvans,T.1983a,MNRAS,204,985 -Topcat, Taylor 2006); IPython (Pérez & Granger 2007) and Jupyter (Kluyver LloydEvans,T.1983b,MNRAS,204,961 +Topcat, Taylor 2006); IPython (Pérez & Granger 2007) and Jupyter (Kluyver etal.2016)notebooks;thePythonlibrariesNumPy(Harrisetal.2020),SciPy -LloydEvans,T.&Menzies,J.W.1973,inAstrophysicsandSpaceScienceLi- (Virtanenetal.2020),matplotlib(aPythonlibraryforpublicationqualitygraph- -brary,Vol.36,IAUColloq.21:VariableStarsinGlobularClustersandin ics,Hunter2007),andAstropy(acommunity-developedcorePythonpackage -RelatedSystems,ed.J.D.Fernie,151 -forAstronomy,AstropyCollaborationetal.2018).Thisresearchhasmadeuseof Mackey,A.D.&BrobyNielsen,P.2007,MNRAS,379,151 -NASA’sAstrophysicsDataSystemBibliographicServices,andofthefollowing Marigo,P.,Girardi,L.,Bressan,A.,etal.2017,ApJ,835,77 +forAstronomy,AstropyCollaborationetal.2018).Thisresearchhasmadeuseof +NASA’sAstrophysicsDataSystemBibliographicServices,andofthefollowing servicesprovidedbyCDS,Strasbourg:theSIMBADdatabase,VizieRcatalogue -Menzies,J.,Feast,M.,Tanabé,T.,Whitelock,P.,&Nakada,Y.2002,MNRAS, accesstool(DOI:10.26093/cds/vizier,Ochsenbeinetal.2000),the“Aladinsky -335,923 atlas” (Bonnarel et al. 2000), and the cross-match service (Boch et al. 2012; -Menzies,J.,Feast,M.,Whitelock,P.,etal.2008,MNRAS,385,1045 Pineauetal.2020). -Menzies,J.W.,Feast,M.W.,Whitelock,P.A.,&Matsunaga,N.2011,MNRAS, -414,3492 -Menzies,J.W.,Whitelock,P.A.,&Feast,M.W.2015,MNRAS,452,910 -Menzies,J.W.,Whitelock,P.A.,Feast,M.W.,&Matsunaga,N.2010,MNRAS, -References 406,86 -Merrill,P.W.1923,ApJ,58,215 +References Anderson,R.I.,Saio,H.,Ekström,S.,Georgy,C.,&Meynet,G.2016,A&A, -Mowlavi,N.,Lecoeur-Taïbi,I.,Lebzelter,T.,etal.2018,A&A,618,A58 591,A8 -Nayak, P. K., Subramaniam, A., Choudhury, S., Indu, G., & Sagar, R. 2016, -AstropyCollaboration,Price-Whelan,A.M.,Sipo˝cz,B.M.,etal.2018,AJ,156, -MNRAS,463,1446 +AstropyCollaboration,Price-Whelan,A.M.,Sip˝ ocz,B.M.,etal.2018,AJ,156, 123 -Ochsenbein,F.,Bauer,P.,&Marcout,J.2000,A&AS,143,23 Battinelli,P.&Demers,S.2012,A&A,544,A10 -Pastorelli,G.,Marigo,P.,Girardi,L.,etal.2020,MNRAS,498,3283 Battinelli,P.&Demers,S.2013,A&A,553,A93 -Pastorelli,G.,Marigo,P.,Girardi,L.,etal.2019,MNRAS,485,5666 Baumgardt,H.,Parmentier,G.,Anders,P.,&Grebel,E.K.2013,MNRAS,430, -Pérez,F.&Granger,B.E.2007,ComputinginScienceandEngineering,9,21 676 -Perren,G.I.,Piatti,A.E.,&Vázquez,R.A.2017,A&A,602,A89 -Boch,T.,Pineau,F.,&Derriere,S.2012,inAstronomicalSocietyofthePa- Pineau,F.-X.,Boch,T.,Derrière,S.,&Schaaff,A.2020,inAstronomicalSo- +Boch,T.,Pineau,F.,&Derriere,S.2012,inAstronomicalSocietyofthePa- cificConferenceSeries,Vol.461,AstronomicalDataAnalysisSoftwareand -cietyofthePacificConferenceSeries,Vol.522,AstronomicalDataAnalysis SystemsXXI,ed.P.Ballester,D.Egret,&N.P.F.Lorente,291 -SoftwareandSystemsXXVII,ed.P.Ballester,J.Ibsen,M.Solar,&K.Short- Bonnarel,F.,Fernique,P.,Bienaymé,O.,etal.2000,A&AS,143,33 -ridge,125 Bono,G.,Marconi,M.,Cassisi,S.,etal.2005,ApJ,621,966 -Sakamoto,T.,Matsunaga,N.,Hasegawa,T.,&Nakada,Y.2012,ApJ,761,L10 Bressan,A.,Marigo,P.,Girardi,L.,etal.2012,MNRAS,427,127 -Skrutskie,M.F.,Cutri,R.M.,Stiening,R.,etal.2006,AJ,131,1163 Catchpole,R.M.,Whitelock,P.A.,Feast,M.W.,etal.2016,MNRAS,455, -Soszyn´ski,I.,Olechowska,A.,Ratajczak,M.,etal.2021,ApJ,911,L22 2216 -Soszyn´ski,I.,Udalski,A.,Szyman´ski,M.K.,etal.2009,ActaAstron.,59,239 Cutri,R.M.,Wright,E.L.,Conrow,T.,etal.2013,ExplanatorySupplement -Soszyn´ski,I.,Udalski,A.,Szyman´ski,M.K.,etal.2011,ActaAstron.,61,217 totheAllWISEDataReleaseProducts,ExplanatorySupplementtotheAll- -Taylor, M. B. 2006, in Astronomical Society of the Pacific Conference Se- WISEDataReleaseProducts -ries,Vol.351,AstronomicalDataAnalysisSoftwareandSystemsXV,ed. deGrijs,R.,Courbin,F.,Martínez-Vázquez,C.E.,etal.2017,SpaceSci.Rev., -C.Gabriel,C.Arviset,D.Ponz,&S.Enrique,666 212,1743 -Trabucchi,M.,Mowlavi,N.,&Lebzelter,T.2021a,A&A,656,A66 DeSomma,G.,Marconi,M.,Cassisi,S.,etal.2020,MNRAS,496,5039 -Trabucchi,M.,Wood,P.R.,Montalbán,J.,etal.2017,ApJ,847,139 Eggen,O.J.1998,AJ,115,2435 -Trabucchi,M.,Wood,P.R.,Montalbán,J.,etal.2019,MNRAS,482,929 Feast,M.2007,inAstronomicalSocietyofthePacificConferenceSeries,Vol. -Trabucchi,M.,Wood,P.R.,Mowlavi,N.,etal.2021b,MNRAS,500,1575 378,WhyGalaxiesCareAboutAGBStars:TheirImportanceasActorsand -Urago,R.,Omodaka,T.,Nagayama,T.,etal.2020,ApJ,891,50 Probes,ed.F.Kerschbaum,C.Charbonnel,&R.F.Wing,479 -Villanova,S.,Geisler,D.,Gratton,R.G.,&Cassisi,S.2014,ApJ,791,107 Feast,M.&Whitelock,P.2000a,inAstrophysicsandSpaceScienceLibrary, -Virtanen,P.,Gommers,R.,Oliphant,T.E.,etal.2020,NatureMethods,17,261 Vol.255,AstrophysicsandSpaceScienceLibrary,ed.F.Matteucci&F.Gio- -Wenger,M.,Ochsenbein,F.,Egret,D.,etal.2000,A&AS,143,9 vannelli,229 -Whitelock,P.,Feast,M.,&Catchpole,R.1991,MNRAS,248,276 Feast,M.W.1963,MNRAS,125,367 -Whitelock,P.,Menzies,J.,Feast,M.,etal.1994,MNRAS,267,711 Feast,M.W.1966,TheObservatory,86,120 -Whitelock,P.A.1986,MNRAS,219,525 Feast,M.W.1981,inAstrophysicsandSpaceScienceLibrary,Vol.88,Physical -Whitelock,P.A.,Menzies,J.W.,Feast,M.W.,etal.2009,MNRAS,394,795 ProcessesinRedGiants,ed.J.Iben,I.&A.Renzini,193–204 -Whitelock,P.A.,Menzies,J.W.,Feast,M.W.,Nsengiyumva,F.,&Matsunaga, Feast,M.W.,Robertson,B.S.C.,&Black,C.1980,MNRAS,190,227 -N.2013,MNRAS,428,2216 Feast,M.W.&Whitelock,P.A.2000b,MNRAS,317,460 -Wilson,R.E.&Merrill,P.W.1942,ApJ,95,248 Feast,M.W.,Whitelock,P.A.,&Menzies,J.W.2006,MNRAS,369,791 -Wyatt,S.P.&Cahn,J.H.1983,ApJ,275,225 GaiaCollaboration,Brown,A.G.A.,Vallenari,A.,etal.2021,A&A,649,A1 -Girardi,L.,Marigo,P.,Bressan,A.,&Rosenfield,P.2013,ApJ,777,142 Ya’Ari,A.&Tuchman,Y.1996,ApJ,456,350 +Girardi,L.,Marigo,P.,Bressan,A.,&Rosenfield,P.2013,ApJ,777,142 Goudfrooij,P.,Girardi,L.,Kozhurina-Platais,V.,etal.2014,ApJ,797,35 Grady,J.,Belokurov,V.,&Evans,N.W.2019,MNRAS,483,3022 Grady,J.,Belokurov,V.,&Evans,N.W.2020,MNRAS,492,3128 Harris,C.R.,Millman,K.J.,vanderWalt,S.J.,etal.2020,Nature,585,357 Hunter,J.D.2007,ComputinginScience&Engineering,9,90 -Ivezic´,Ž.,Kahn,S.M.,Tyson,J.A.,etal.2019,ApJ,873,111 +Ivezi´ c,Ž.,Kahn,S.M.,Tyson,J.A.,etal.2019,ApJ,873,111 Jayasinghe,T.,Stanek,K.Z.,Kochanek,C.S.,etal.2020,MNRAS,491,13 +Joo,S.-J.&Lee,Y.-W.2013,ApJ,762,36 +Jura,M.&Kleinmann,S.G.1992,ApJS,79,105 +Kamath,D.,Wood,P.R.,Soszy´ nski,I.,&Lebzelter,T.2010,MNRAS,408,522 +Kharchenko,N.V.,Piskunov,A.E.,Röser,S.,Schilbach,E.,&Scholz,R.D. +2005,A&A,438,1163 +Kharchenko,N.V.,Piskunov,A.E.,Schilbach,E.,Röser,S.,&Scholz,R.D. +2016,A&A,585,A101 +Kippenhahn,R.&Smith,L.1969,A&A,1,142 +Kluyver,T.,Ragan-Kelley,B.,Pérez,F.,etal.2016,inPositioningandPower +in Academic Publishing: Players, Agents and Agendas, ed. F. Loizides & +B.Scmidt(Netherlands:IOSPress),87–90 +Lebzelter,T.,Mowlavi,N.,Marigo,P.,etal.2018,A&A,616,L13 +Lebzelter,T.,Trabucchi,M.,Mowlavi,N.,etal.2019,A&A,631,A24 +Lebzelter,T.&Wood,P.R.2005,A&A,441,1117 +Lebzelter,T.&Wood,P.R.2007,A&A,475,643 +Lebzelter,T.&Wood,P.R.2011,A&A,529,A137 +Lebzelter,T.&Wood,P.R.2016,A&A,585,A111 +LloydEvans,T.1976,MNRAS,174,169 +LloydEvans,T.1983a,MNRAS,204,985 +LloydEvans,T.1983b,MNRAS,204,961 +LloydEvans,T.&Menzies,J.W.1973,inAstrophysicsandSpaceScienceLi- +brary,Vol.36,IAUColloq.21:VariableStarsinGlobularClustersandin +RelatedSystems,ed.J.D.Fernie,151 +Mackey,A.D.&BrobyNielsen,P.2007,MNRAS,379,151 +Marigo,P.,Girardi,L.,Bressan,A.,etal.2017,ApJ,835,77 +Menzies,J.,Feast,M.,Tanabé,T.,Whitelock,P.,&Nakada,Y.2002,MNRAS, +335,923 +Menzies,J.,Feast,M.,Whitelock,P.,etal.2008,MNRAS,385,1045 +Menzies,J.W.,Feast,M.W.,Whitelock,P.A.,&Matsunaga,N.2011,MNRAS, +414,3492 +Menzies,J.W.,Whitelock,P.A.,&Feast,M.W.2015,MNRAS,452,910 +Menzies,J.W.,Whitelock,P.A.,Feast,M.W.,&Matsunaga,N.2010,MNRAS, +406,86 +Merrill,P.W.1923,ApJ,58,215 +Mowlavi,N.,Lecoeur-Taïbi,I.,Lebzelter,T.,etal.2018,A&A,618,A58 +Nayak, P. K., Subramaniam, A., Choudhury, S., Indu, G., & Sagar, R. 2016, +MNRAS,463,1446 +Ochsenbein,F.,Bauer,P.,&Marcout,J.2000,A&AS,143,23 +Pastorelli,G.,Marigo,P.,Girardi,L.,etal.2020,MNRAS,498,3283 +Pastorelli,G.,Marigo,P.,Girardi,L.,etal.2019,MNRAS,485,5666 +Pérez,F.&Granger,B.E.2007,ComputinginScienceandEngineering,9,21 +Perren,G.I.,Piatti,A.E.,&Vázquez,R.A.2017,A&A,602,A89 +Pineau,F.-X.,Boch,T.,Derrière,S.,&Schaaff,A.2020,inAstronomicalSo- +cietyofthePacificConferenceSeries,Vol.522,AstronomicalDataAnalysis +SoftwareandSystemsXXVII,ed.P.Ballester,J.Ibsen,M.Solar,&K.Short- +ridge,125 +Sakamoto,T.,Matsunaga,N.,Hasegawa,T.,&Nakada,Y.2012,ApJ,761,L10 +Skrutskie,M.F.,Cutri,R.M.,Stiening,R.,etal.2006,AJ,131,1163 +Soszy´ nski,I.,Olechowska,A.,Ratajczak,M.,etal.2021,ApJ,911,L22 +Soszy´ nski,I.,Udalski,A.,Szyma´ nski,M.K.,etal.2009,ActaAstron.,59,239 +Soszy´ nski,I.,Udalski,A.,Szyma´ nski,M.K.,etal.2011,ActaAstron.,61,217 +Taylor, M. B. 2006, in Astronomical Society of the Pacific Conference Se- +ries,Vol.351,AstronomicalDataAnalysisSoftwareandSystemsXV,ed. +C.Gabriel,C.Arviset,D.Ponz,&S.Enrique,666 +Trabucchi,M.,Mowlavi,N.,&Lebzelter,T.2021a,A&A,656,A66 +Trabucchi,M.,Wood,P.R.,Montalbán,J.,etal.2017,ApJ,847,139 +Trabucchi,M.,Wood,P.R.,Montalbán,J.,etal.2019,MNRAS,482,929 +Trabucchi,M.,Wood,P.R.,Mowlavi,N.,etal.2021b,MNRAS,500,1575 +Urago,R.,Omodaka,T.,Nagayama,T.,etal.2020,ApJ,891,50 +Villanova,S.,Geisler,D.,Gratton,R.G.,&Cassisi,S.2014,ApJ,791,107 +Virtanen,P.,Gommers,R.,Oliphant,T.E.,etal.2020,NatureMethods,17,261 +Wenger,M.,Ochsenbein,F.,Egret,D.,etal.2000,A&AS,143,9 +Whitelock,P.,Feast,M.,&Catchpole,R.1991,MNRAS,248,276 +Whitelock,P.,Menzies,J.,Feast,M.,etal.1994,MNRAS,267,711 +Whitelock,P.A.1986,MNRAS,219,525 +Whitelock,P.A.,Menzies,J.W.,Feast,M.W.,etal.2009,MNRAS,394,795 +Whitelock,P.A.,Menzies,J.W.,Feast,M.W.,Nsengiyumva,F.,&Matsunaga, +N.2013,MNRAS,428,2216 +Wilson,R.E.&Merrill,P.W.1942,ApJ,95,248 +Wyatt,S.P.&Cahn,J.H.1983,ApJ,275,225 +Ya’Ari,A.&Tuchman,Y.1996,ApJ,456,350 Articlenumber,page6of9 Trabucchietal.:Theperiod-agerelationofLPVs +Fig.A.1.Absolute-K sGaia-2MASSdiagramforthestarswithorwith- +out a spectral type (left and right panels, respectively) in the selected +sample. Symbol colors and shapes indicate the spectral type and host +clusterdescribedinthelegend,respectively,whichalsoreportsthenum- +berofsourcesdisplayed(i.e.,havingbothopticalandNIRphotometry). +The dashed line marks the separation between O- and C-rich sources +accordingtoLebzelteretal.(2018).AnarrowmarksthesourceMSX +LMC124inNGC1830that,havingW BP,RP−W +J,Ks +=9.73mag,liesout- +sidetheplotarea.BackgrounddotsareLPVsintheLMCfromOGLE- +III(lightgray)andMowlavietal.(2018)(darkergray). +AppendixA: ClassificationofobservedLPVs +AppendixA.1: Spectraltype +We adopted the spectral types provided by Lebzelter & Wood +(2007)andKamathetal.(2010)for52oftheLPVstheystudied +in NGC 1846, NGC 1978, and NGC 419. The only exception +isthestar5-3inNGC419,forwhichweadoptedtheS-typeas +reportedbyLloydEvans(1983a). +We also searched the SIMBAD astronomical database +(Wenger et al. 2000) for spectral type information, which we +foundfor26morestars.WeusedtheGaia-2MASSdiagramof +Lebzelteretal.(2018)toconfirmthechemicaltypeclassification +takenfromliteratureandtocharacterizethesurfacechemistryof +sourcesofanunknownspectraltype(seeFig.A.1).Amongthe +latter,weidentified13C-richstarsand106O-richsources. +ThreeofthesourceswithoutaspectraltypelackGaiapho- +tometry,sotheycannotbeclassifiedwiththeGaia-2MASS.Two +of them (LW5 and LW22 in 47 Tuc) have no match in Gaia +EDR3,buttheyhaveNIRdataandareprobablyO-richbasedon +theirpositioninthe J−K +s +versusK +s +color-magnitudediagram. +The third source is one of the two stars in NGC 1903 from the +listofGradyetal.(2019),whichweidentifiedwiththe2MASS +source J05171633-6920298. It is likely C-rich according to the +NIRcolor-magnitudediagram. +Finally, the sources V138 in ω Cen, LW15 in NGC 2808, +andLW4inNGC362lackNIRdata.Theycannotbeplacedin +the NIR PL diagram, upon which we relied to assign pulsation +modes to periods, so we excluded them from the sample. The distributionofO-andC-richsourcesintheperiod-agediagram isshowninFig.A.2. AppendixA.2: Variability @@ -501,7 +756,7 @@ overtonemodeisassociatedwithsequenceA,thefirstovertone modewithsequencesBandC(cid:48),andthefundamentalmodewith sequenceC(e.g.,Trabucchietal.2017).Weexcludedlongsec- ondaryperiodsonsequenceDastheyarenotduetostellarpul- -sation (Soszyn´ski et al. 2021, and references therein), and we +sation (Soszy´ nski et al. 2021, and references therein), and we used the pattern of PL sequences in the LMC as a reference to guidethemodeidentification(cf.Trabucchietal.2021a). Weperformedthisclassificationseparatelyforperiodscom- @@ -509,128 +764,139 @@ ingfromeachdistinctdataset.Iftwoormoreperiodsfromdif- ferent data sets were assigned to the same pulsation mode, we retained only one of those periods, with priority to the values from Lebzelter & Wood and Kamath et al. (2010). If the latter -Fig.A.1.Absolute-K Gaia-2MASSdiagramforthestarswithorwith- authors do not provide this information, we adopted the period -s -out a spectral type (left and right panels, respectively) in the selected from OGLE-III if available, and otherwise from ASAS-SN or -sample. Symbol colors and shapes indicate the spectral type and host fromGaiaDR2. -clusterdescribedinthelegend,respectively,whichalsoreportsthenum- For some sources, the periods reported in different catalogs -berofsourcesdisplayed(i.e.,havingbothopticalandNIRphotometry). +authors do not provide this information, we adopted the period +from OGLE-III if available, and otherwise from ASAS-SN or +fromGaiaDR2. +For some sources, the periods reported in different catalogs wereassignedtothesamemodethroughthisprocedure.Inmost -The dashed line marks the separation between O- and C-rich sources cases, these periods are reasonably similar to each other. Only -accordingtoLebzelteretal.(2018).AnarrowmarksthesourceMSX -LMC124inNGC1830that,havingW −W =9.73mag,liesout- in a few caseswere they significantly different, but this didnot -sidetheplotarea.BackgrounddotsareBP L,R PP VsinJ,K ts heLMCfromOGLE- alterourconclusions. -III(lightgray)andMowlavietal.(2018)(darkergray). Whenavailable,thevariabilitytypewastakenfromOGLE- +in a few caseswere they significantly different, but this didnot +alterourconclusions. +Whenavailable,thevariabilitytypewastakenfromOGLE- IIIorASAS-SN.Wenotethatweareonlyinterestedinwhether a star is classified as a Mira or semi-regular variable. In many -AppendixA: ClassificationofobservedLPVs cases,thistypeisnotgivenorthestarissimplyconsidered,for +cases,thistypeisnotgivenorthestarissimplyconsidered,for instance,asanLPVorAGBinSIMBAD,inwhichcasewecon- -AppendixA.1: Spectraltype sideredthevariabilitytypeasundetermined. -We adopted the spectral types provided by Lebzelter & Wood -(2007)andKamathetal.(2010)for52oftheLPVstheystudied +sideredthevariabilitytypeasundetermined. AppendixB: Fittingrelations -in NGC 1846, NGC 1978, and NGC 419. The only exception -isthestar5-3inNGC419,forwhichweadoptedtheS-typeas WeobtainedanalyticexpressionsforthePArelationsseparately -reportedbyLloydEvans(1983a). for O- and C-rich stars, proceeding as follows. For each bin of -We also searched the SIMBAD astronomical database log(τ/yr), we modeled the period distribution with a Gaussian -(Wenger et al. 2000) for spectral type information, which we kernel density estimator (KDE) and identified the peak of the -foundfor26morestars.WeusedtheGaia-2MASSdiagramof distribution. To describe the boundaries of the PA relation, we -Lebzelteretal.(2018)toconfirmthechemicaltypeclassification adopted, at each age, the values of the period at which the dis- -takenfromliteratureandtocharacterizethesurfacechemistryof tributionequals25%ofitsmaximum.Weselectedthisarbitrary -sourcesofanunknownspectraltype(seeFig.A.1).Amongthe value upon visual inspection of the PA plane. We modeled the -latter,weidentified13C-richstarsand106O-richsources. central trend of the PA relation, as well as its short- and long- -ThreeofthesourceswithoutaspectraltypelackGaiapho- periodedges,withlinearorquadraticfunctionsintheform -tometry,sotheycannotbeclassifiedwiththeGaia-2MASS.Two -log(τ/yr)=a +a (P/P˜)+a (P/P˜)2, (B.1) -of them (LW5 and LW22 in 47 Tuc) have no match in Gaia 0 1 2 -EDR3,buttheyhaveNIRdataandareprobablyO-richbasedon -(where P˜ = 350 days) and employed a Lenvenberg-Marquardt -theirpositioninthe J−K versusK color-magnitudediagram. -s s nonlinearregressionalgorithm3toderivethebest-fitcoefficients, -The third source is one of the two stars in NGC 1903 from the +WeobtainedanalyticexpressionsforthePArelationsseparately +for O- and C-rich stars, proceeding as follows. For each bin of +log(τ/yr), we modeled the period distribution with a Gaussian +kernel density estimator (KDE) and identified the peak of the +distribution. To describe the boundaries of the PA relation, we +adopted, at each age, the values of the period at which the dis- +tributionequals25%ofitsmaximum.Weselectedthisarbitrary +value upon visual inspection of the PA plane. We modeled the +central trend of the PA relation, as well as its short- and long- +periodedges,withlinearorquadraticfunctionsintheform +log(τ/yr)=a 0+a 1(P/˜ P)+a 2(P/˜ P)2, (B.1) +(where ˜ P = 350 days) and employed a Lenvenberg-Marquardt +nonlinearregressionalgorithm3toderivethebest-fitcoefficients, which are listed in Table B.1. We remark that these best-fit ex- -listofGradyetal.(2019),whichweidentifiedwiththe2MASS pressionsareonlyvalidintheintervals8.0 ≤ log(τ/yr) ≤ 10.3 -source J05171633-6920298. It is likely C-rich according to the and 20 < P/days < 700 for O-rich composition, and within -NIRcolor-magnitudediagram. -Finally, the sources V138 in ω Cen, LW15 in NGC 2808, 3 WemadeuseofthePythonlibrarySciPytoperformGaussianKDE -andLW4inNGC362lackNIRdata.Theycannotbeplacedin modeling and best-fit, respectively, by means of the gaussian_kde -the NIR PL diagram, upon which we relied to assign pulsation tool from the stats module and the curve_fit function from the -modes to periods, so we excluded them from the sample. The optimizemodule. +3 WemadeuseofthePythonlibrarySciPytoperformGaussianKDE +modeling and best-fit, respectively, by means of the gaussian_kde +tool from the stats module and the curve_fit function from the +optimizemodule. Articlenumber,page7of9 A&Aproofs:manuscriptno.trabucchi_etal_2022_period_age_relation_of_lpvs Fig.A.2.SimilartoFig.1,excepteachsourceiscolor-codedaccordingtowhetherithasbeenclassifiedasO-rich(blue)orC-rich(red). -TableB.1.Best-fitcoefficientsforthePArelationanditsboundariesin TableB.2.Best-fitcoefficientsfortheperiod-initialmassrelationand -theformgiveninEq.B.1. itsboundariesintheformgiveninEq.B.2. -Sp.type relation a a a Sp.type relation b b b -0 1 2 0 1 2 -center 10.78 -2.660 0.5953 center -0.2790 0.8958 -0.1828 -O-rich loweredge 10.46 -2.818 0.6578 O-rich loweredge -0.1772 0.9975 -0.2203 -upperedge 10.54 -0.8187 -0.2335 upperedge -0.1740 0.2783 0.8247 -center 9.755 -0.7532 center -0.0304 0.2885 -C-rich loweredge 9.982 -1.698 C-rich loweredge -0.0131 0.5752 -upperedge 8.498 -1.827 -0.9959 upperedge -0.2245 -0.2720 0.2343 -8.6 ≤ log(τ/yr) ≤ 9.3 and 140 < P/days < 620 in the C-rich DFMP are indicated by solid lines. Panel (b) shows the period -case. distributionsforafewdifferentcases. -Becauseoftheconnectionbetweenageandinitialmass,the Itisinstructive,tobeginwith,toignoretheeffectofthermal -PArelationcanbetranslatedintoaperiod-initialmassrelation, pulsesandconsideronlythequiescentevolution(greenlinesin -whichwederivedusingthesameapproachdescribedabove,and Fig.C.1).Thesmallestinitialmasscorrespondstoastarthatjust -assumingtheform enteredtheTP-AGB,whentheFMhasaperiodof∼ 240days +TableB.1.Best-fitcoefficientsforthePArelationanditsboundariesin +theformgiveninEq.B.1. +Sp.type relation a +0 +a +1 +a +2 +O-rich +center 10.78 -2.660 0.5953 +loweredge 10.46 -2.818 0.6578 +upperedge 10.54 -0.8187 -0.2335 +C-rich +center 9.755 -0.7532 +loweredge 9.982 -1.698 +upperedge 8.498 -1.827 -0.9959 +8.6 ≤ log(τ/yr) ≤ 9.3 and 140 < P/days < 620 in the C-rich +case. +Becauseoftheconnectionbetweenageandinitialmass,the +PArelationcanbetranslatedintoaperiod-initialmassrelation, +whichwederivedusingthesameapproachdescribedabove,and +assumingtheform +log(M i/M (cid:12))=b 0+b 1(P/˜ P)+b 2(P/˜ P)2. (B.2) +Theresultingbest-fitlinesaredisplayedinFig.B.1,andtheco- +efficientsaregiveninTableB.2. +WeremarkthatboththePAandtheperiod-initialmassrela- +tionsdependonmodelassumptions,inparticularmasslossand +mixing,aswellasonthepropertiesofthepopulationofLPVs, +namelythestar-formationhistoryandage-metallicityrelation. +AppendixC: Theshapeoftheperioddistribution +Asanexamplecase,weconsideranisochroneofagelog(τ/yr)= +8.3andinitialmetallicityZ +i +=0.006.StarsontheTP-AGBhave +initial masses M +i +(cid:39) 3.85M +(cid:12) +over a small range of ∼ 10−3M (cid:12). +The relation between period and initial mass is displayed in +panel (a) of Fig. C.1, where isochrone portions undergoing +TableB.2.Best-fitcoefficientsfortheperiod-initialmassrelationand +itsboundariesintheformgiveninEq.B.2. +Sp.type relation b +0 +b +1 +b +2 +O-rich +center -0.2790 0.8958 -0.1828 +loweredge -0.1772 0.9975 -0.2203 +upperedge -0.1740 0.2783 0.8247 +C-rich +center -0.0304 0.2885 +loweredge -0.0131 0.5752 +upperedge -0.2245 -0.2720 0.2343 +DFMP are indicated by solid lines. Panel (b) shows the period +distributionsforafewdifferentcases. +Itisinstructive,tobeginwith,toignoretheeffectofthermal +pulsesandconsideronlythequiescentevolution(greenlinesin +Fig.C.1).Thesmallestinitialmasscorrespondstoastarthatjust +enteredtheTP-AGB,whentheFMhasaperiodof∼ 240days butisnotdominant.Itonlybecomesdominantaboveathreshold -log(M i/M (cid:12))=b 0+b 1(P/P˜)+b 2(P/P˜)2. (B.2) radiusR dom,0,thatisforperiodslongerthana(mass-dependent) -criticalperiod P (thesolidgraylineinFig.C.1).Theleast +radiusR dom,0,thatisforperiodslongerthana(mass-dependent) +criticalperiod P dom,0 -Theresultingbest-fitlinesaredisplayedinFig.B.1,andtheco- -evolved (quiescent) model with dominant FM has P (cid:39) 360 -efficientsaregiveninTableB.2. FM +(thesolidgraylineinFig.C.1).Theleast +evolved (quiescent) model with dominant FM has P +FM +(cid:39) 360 days(greencircleandhorizontalline),correspondingtoasharp -WeremarkthatboththePAandtheperiod-initialmassrela- cutintheperioddistributionshowninpanel(b)ofFig.C.1. -tionsdependonmodelassumptions,inparticularmasslossand AsastarevolvesalongtheAGBitexpands,anditsperiodbe- -mixing,aswellasonthepropertiesofthepopulationofLPVs, comeslongerinresponsetotheincreaseinradius.Modelswith -namelythestar-formationhistoryandage-metallicityrelation. ahigherinitialmassaremoreevolved,hencetheyhavealarger radiusandalongerperiod.Therateatwhichaperiodincreases withradiusisnotfixed,butratherdecreaseswithevolution.Ac- -AppendixC: Theshapeoftheperioddistribution cordingtotheprescriptionofTrabucchietal.(2021b),aperiod -Asanexamplecase,weconsideranisochroneofagelog(τ/yr)= growswithradiusasabrokenpower-lawwithexponentα(cid:39)1.8 -8.3andinitialmetallicityZ =0.006.StarsontheTP-AGBhave ifR 0, -⊆ ⇔ ∈ -sodass B (x) = y Rn d(x,y) < r U -r -{ ∈ | } ⊆ +1) X = Rn mit der von der euklidischen Metrik erzeugten Topologie T Euklid: +U +⊆ +Rn offen +⇔ +für jedes x +∈ +U gibt es r > 0, +sodass B r(x) = {y +∈ +Rn +| +d(x,y) < r +} ⊆ +U Diese Topologie wird auch „Standardtopologie des Rn“ genannt. Sie beinhaltet unter anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedli- chem Mittelpunkt (vgl. Definition 1.ii). 2) Jeder metrische Raum (X,d) ist auch ein topologischer Raum. -3) Für eine Menge X heißt T = (X) diskrete Topologie. +3) Für eine Menge X heißt T Diskret -P -4) X := R,T := U R R U endlich heißt Zariski-Topologie += P(X) diskrete Topologie. +4) X := R,T Z -{ ⊆ | \ }∪{∅} +:= {U +⊆ +R +| +R \U endlich +}∪{∅} +heißt Zariski-Topologie Beobachtungen: -U T f R[X], sodass R U = V(f) = x R f(x) = 0 -Z -• ∈ ⇔ ∃ ∈ \ { ∈ | } -Es gibt keine disjunkten offenen Mengen in T . +• +U +∈ +T Z +⇔ +∃f +∈ +R[X], sodass R \U = V(f) = {x +∈ +R +| +f(x) = 0 +} • +Es gibt keine disjunkten offenen Mengen in T Z. 4 1.1.TOPOLOGISCHERÄUME -5) X := Rn,T = U Rn Es gibt Polynome f ,...,f R[X ,...,X ] sodass -Z 1 r 1 n -{ ⊆ | ∈ -Rn U = V(f ,...,f ) -1 r -\ } -6) X := 0,1 ,T = , 0,1 , 0 heißt Sierpińskiraum. -{ } {∅ { } { }} -, 0,1 , 1 sind dort alle abgeschlossenen Mengen. -∅ { } { } +5) X := Rn,T +Z += {U +⊆ +Rn |Es gibt Polynome f 1,...,f +r +∈ +R[X 1,...,X n] sodass +Rn \U = V(f 1,...,f r) +} +6) X := {0,1 },T = {∅, {0,1 }, {0 +}} +heißt Sierpińskiraum. +∅, {0,1 }, {1 +} +sind dort alle abgeschlossenen Mengen. Definition 2 -Sei (X,T) ein topologischer Raum und x X. +Sei (X,T) ein topologischer Raum und x +∈ +X. +Eine Teilmenge U +⊆ +X heißt Umgebung von x, wenn es ein U +0 +∈ +T gibt mit x ∈ -Eine Teilmenge U X heißt Umgebung von x, wenn es ein U T gibt mit x U und -0 0 -⊆ ∈ ∈ -U U. +U +0 +und +U 0 ⊆ +U. Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokal gilt. Definition 3 -Sei (X,T) ein topologischer Raum und M X eine Teilmenge. +Sei (X,T) ein topologischer Raum und M ⊆ +X eine Teilmenge. +a) M◦ := {x +∈ +M +| +M ist Umgebung von x +} += (cid:91) -a) M◦ := x M M ist Umgebung von x = U heißt Inneres oder offener -{ ∈ | } U⊆M -U∈T +U∈TU heißt Inneres oder offener Kern von M. +b) M := (cid:92) -b) M := A heißt abgeschlossene Hülle oder Abschluss von M. M⊆A Aabgeschlossen -c) ∂M := M M◦ heißt Rand von M. -\ +A heißt abgeschlossene Hülle oder Abschluss von M. +c) ∂M := M \M◦ heißt Rand von M. d) M heißt dicht in X, wenn M = X ist. Beispiel 2 1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M◦ = ∅ 2) Sei X = R und M = (a,b). Dann gilt: M = [a,b] -3) Sei X = R,T = T und M = (a,b). Dann gilt: M = R +3) Sei X = R,T = T Z +und M = (a,b). Dann gilt: M = R Definition 4 Sei (X,T) ein topologischer Raum. -a) B T heißt Basis der Topologie T, wenn jedes U T Vereinigung von Elementen -⊆ ∈ +a) B +⊆ +T heißt Basis der Topologie T, wenn jedes U +∈ +T Vereinigung von Elementen aus B ist. -b) T heißt Subbasis der Topologie T, wenn jedes U T Vereinigung von endlichen -S ⊆ ∈ -Durchschnitten von Elementen aus ist. +b) +S ⊆ +T heißt Subbasis der Topologie T, wenn jedes U +∈ +T Vereinigung von endlichen +Durchschnitten von Elementen aus S +ist. Beispiel 3 (Basis und Subbasis) 1) Jede Basis ist auch eine Subbasis, z.B. -S = (a,b) a,b R,a < b ist für R mit der Standardtopologie sowohl Basis als -{ | ∈ } +S = {(a,b) +| +a,b +∈ +R,a < b +} +ist für R mit der Standardtopologie sowohl Basis als auch Subbasis. 2) Gegeben sei X = Rn mit euklidischer Topologie T. Dann ist -B = B (x) r Q ,x Qn -r >0 -{ | ∈ ∈ } +B = {B r(x) +| +r +∈ +Q >0,x +∈ +Qn +} ist eine abzählbare Basis von T. -3) Sei(X,T)eintopologischerRaummitX = 0,1,2 undT = , 0 , 0,1 , 0,2 ,X . -{ } {∅ { } { } { } } -Dann ist = , 0,1 , 0,2 eine Subbasis von T, da gilt: -S {∅ { } { }} +3) Sei(X,T)eintopologischerRaummitX = {0,1,2 }undT = {∅, {0 }, {0,1 }, {0,2 },X }. +Dann ist +S += {∅, {0,1 }, {0,2 +}} +eine Subbasis von T, da gilt: 5 1.1.TOPOLOGISCHERÄUME -T • S ⊆ -, 0,1 und 0,2 -• ∅ { } { } ∈ S -0 = 0,1 0,2 -• { } { }∩{ } -X = 0,1 0,2 -• { }∪{ } -Allerings ist keine Basis von (X,T), da 0 nicht als Vereinigung von Elementen -S { } -aus erzeugt werden kann. +T +• +∅, {0,1 +} +und {0,2 +} ∈ S +• +{0 +} += {0,1 }∩{0,2 +} +• +X = {0,1 }∪{0,2 +} +Allerings ist S +keine Basis von (X,T), da {0 +} +nicht als Vereinigung von Elementen +aus +S +erzeugt werden kann. Bemerkung 2 -Sei X eine Menge und (X). Dann gibt es genau eine Topologie T auf X, für die -S ⊆ P S +Sei X eine Menge und +S ⊆ +P(X). Dann gibt es genau eine Topologie T auf X, für die +S Subbasis ist. Definition 5 -Sei (X,T) ein topologischer Raum und Y X. +Sei (X,T) ein topologischer Raum und Y ⊆ -T := U Y U T ist eine Topologie auf Y. +X. +T +Y +:= {U ∩Y +| +U +∈ +T +} +ist eine Topologie auf Y. +T Y -{ ∩ | ∈ } -T heißt Teilraumtopologie und (Y,T ) heißt ein Teilraum von (X,T). -Y Y +heißt Teilraumtopologie und (Y,T Y) heißt ein Teilraum von (X,T). Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt. Definition 6 -Seien X ,X topologische Räume. -1 2 -U X X sei offen, wenn es zu jedem x = (x ,x ) U Umgebungen U um x mit -1 2 1 2 i i -⊆ × ∈ -i = 1,2 gibt, sodass U U U gilt. -1 2 -× ⊆ -T = U X X U offen isteineTopologieaufX X .SieheißtProdukttopologie. -1 2 1 2 -{ ⊆ × | } × -B = U U U offen in X ,i = 1,2 ist eine Basis von T. -1 2 i i -{ × | } -X +Seien X 1,X 2 +topologische Räume. U -x -U x -2 2 +⊆ X 1 +×X +2 +sei offen, wenn es zu jedem x = (x 1,x 2) +∈ +U Umgebungen U +i +um x +i +mit +i = 1,2 gibt, sodass U +1 +×U +2 +⊆ +U gilt. +T = {U +⊆ +X +1 +×X +2 +| +U offen }isteineTopologieaufX +1 +×X 2.SieheißtProdukttopologie. +B = {U +1 +×U +2 +| +U +i +offen in X i,i = 1,2 +} +ist eine Basis von T. +U +x +x +2 x 1 U +2 +U +1 +X +1 +X +2 +Abbildung 1.1: Zu x = (x 1,x 2) gibt es Umgebungen U 1,U +2 +mit U 1 -Abbildung 1.1: Zu x = (x ,x ) gibt es Umgebungen U ,U mit U U U -1 2 1 2 1 2 -× ⊆ +×U +2 +⊆ +U Beispiel 4 (Produkttopologien) -1) X = X = R mit euklidischer Topologie. -1 2 -Die Produkttopologie auf R R = R2 stimmt mit der euklidischen Topologie auf -⇒ × +1) X +1 += X +2 += R mit euklidischer Topologie. +⇒ +Die Produkttopologie auf R ×R = R2 stimmt mit der euklidischen Topologie auf R2 überein. -2) X = X = R mit Zariski-Topologie. T Produkttopologie auf R2: U U -1 2 1 2 -× +2) X +1 += X +2 += R mit Zariski-Topologie. T Produkttopologie auf R2: U +1 +×U +2 (Siehe Abbildung 1.2) 6 1.1.TOPOLOGISCHERÄUME +U +1 += R \N U2 = -R -\ +R\ N -U = R N -1 -\ Abbildung 1.2: Zariski-Topologie auf R2 Definition 7 -Sei X ein topologischer Raum, eine Äquivalenzrelation auf X, X = X/ sei die Menge +Sei X ein topologischer Raum, ∼ +eine Äquivalenzrelation auf X, X = X/ ∼ -der Äquivalenzklassen, π : X X, x [x] . -∼ -→ (cid:55)→ +sei die Menge +der Äquivalenzklassen, π : X +→ +X, x +(cid:55)→ +[x] ∼. T X := (cid:8) U ⊆ X (cid:12) (cid:12) π−1(U) ∈ T X (cid:9) -(X,T ) heißt Quotiententopologie. -X +(X,T X) heißt Quotiententopologie. Beispiel 5 -X = R,a b : a b Z -∼ ⇔ − ∈ -π−1(u) -a +X = R,a +∼ +b : +⇔ +a −b +∈ +Z R -1 0 1 2 3 4 5 +0 a U +a +π−1(u) 0 -0 1, d. h. [0] = [1] ∼ +1, d. h. [0] = [1] Beispiel 6 -Sei X = R2 und (x ,y ) (x ,y ) x x Z und y y Z. Dann ist X/ ein Torus. -1 1 2 2 1 2 1 2 ∼ -∼ ⇔ − ∈ − ∈ +Sei X = R2 und (x 1,y 1) +∼ +(x 2,y 2) +⇔ +x +1 +−x +2 +∈ +Z und y +1 +−y +2 +∈ +Z. Dann ist X/ +∼ +ein Torus. Beispiel 7 (Projektiver Raum) -X = Rn+1 0 , x y λ R× mit y = λx -\{ } ∼ ⇔ ∃ ∈ -x und y liegen auf der gleichen +X = Rn+1 \{0 }, x +∼ +y ⇔ +∃λ +∈ +R× mit y = λx +⇔ +x und y liegen auf der gleichen Ursprungsgerade -X = n(R) -P +X = Pn(R) 7 1.2.METRISCHERÄUME Also für n = 1: -4 -2 -4 2 2 4 6 8 -− − -2 -− -4 -− +−4 −2 2 4 6 8 +−4 +−2 +24 1.2 Metrische Räume Definition 8 -Sei X eine Menge. Eine Abbildung d : X X R+ heißt Metrik, wenn gilt: -× → 0 -(i) Definitheit: d(x,y) = 0 x = y x,y X -⇔ ∀ ∈ -(ii) Symmetrie: d(x,y) = d(y,x) x,y X -∀ ∈ -(iii) Dreiecksungleichung: d(x,z) d(x,y)+d(y,z) x,y,z X -≤ ∀ ∈ +Sei X eine Menge. Eine Abbildung d : X ×X +→ +R+ +0 +heißt Metrik, wenn gilt: +(i) Definitheit: d(x,y) = 0 +⇔ +x = y ∀x,y +∈ +X +(ii) Symmetrie: d(x,y) = d(y,x) ∀x,y +∈ +X +(iii) Dreiecksungleichung: d(x,z) +≤ +d(x,y)+d(y,z) ∀x,y,z +∈ +X Das Paar (X,d) heißt ein metrischer Raum. Bemerkung 3 Sei (X,d) ein metrischer Raum und -B (x) := y X d(x,y) < r für x X,r R+ -r -{ ∈ | } ∈ ∈ -B = B (x) (X) x X,r R+ ist Basis einer Topologie auf X. -r -{ ⊆ P | ∈ ∈ } +B r(x) := {y +∈ +X +| +d(x,y) < r +} +für x +∈ +X,r +∈ +R+ +B = {B r(x) +⊆ +P(X) +| +x +∈ +X,r +∈ +R+ +} +ist Basis einer Topologie auf X. Definition 9 -Seien (X,d ) und (Y,d ) metrische Räume und ϕ : X Y eine Abbildung mit -X Y +Seien (X,d X) und (Y,d Y) metrische Räume und ϕ : X → -x ,x X : d (x ,x ) = d (ϕ(x ),ϕ(x )) -1 2 X 1 2 Y 1 2 -∀ ∈ +Y eine Abbildung mit +∀x 1,x +2 +∈ +X : d X(x 1,x 2) = d Y(ϕ(x 1),ϕ(x 2)) Dann heißt ϕ eine Isometrie von X nach Y. Beispiel 8 (Skalarprodukt erzeugt Metrik) -Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt , . Dann wird V -(cid:112) (cid:104)· ·(cid:105) -durch d(x,y) := x y,x y zum metrischen Raum. -(cid:104) − − (cid:105) +Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt (cid:104)·, ·(cid:105). Dann wird V +durch d(x,y) := +(cid:112) +(cid:104)x −y,x −y +(cid:105) +zum metrischen Raum. Beispiel 9 (diskrete Metrik) Sei X eine Menge. Dann heißt +d(x,y) = (cid:40) 0 falls x = y -d(x,y) = -1 falls x = y -(cid:54) +1 falls x (cid:54)= y die diskrete Metrik. Die Metrik d induziert die diskrete Topologie. 8 1.2.METRISCHERÄUME Beispiel 10 -X = R2 und d((x ,y ),(x ,y )) := max( x x , y y ) ist Metrik. -1 1 2 2 1 2 1 2 -(cid:107) − (cid:107) (cid:107) − (cid:107) +X = R2 und d((x 1,y 1),(x 2,y 2)) := max( (cid:107)x +1 +−x +2 +(cid:107), (cid:107)y +1 +−y +2 +(cid:107)) ist Metrik. Beobachtung: d erzeugt die euklidische Topologie. +B r(0) = r r -B (0) = r r r -(a) B (0) (b) Euklidische Topologie -r +(a) B r(0) (b) Euklidische Topologie Abbildung 1.3: Veranschaulichungen zur Metrik d aus Beispiel 10 9 1.2.METRISCHERÄUME Beispiel 11 (SNCF-Metrik1) X = R2 -4 -2 -4 2 2 4 6 8 -− − -2 -− -4 -− +−4 −2 2 4 6 8 +−4 +−2 +24 Definition 10 -Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x = y in X -(cid:54) -Umgebungen U um x und U um y gibt, sodass U U = . -x y x y -∩ ∅ +Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x (cid:54)= y in X +Umgebungen U +x +um x und U +y +um y gibt, sodass U +x +∩U +y += ∅. Bemerkung 4 (Trennungseigenschaft) Metrische Räume sind hausdorffsch, wegen -d(x,y) > 0 ε > 0 : B (x) B (y) = -ε ε -⇒ ∃ ∩ ∅ +d(x,y) > 0 +⇒ +∃ε > 0 : B ε(x) ∩B ε(y) = +∅ Beispiel 12 (Topologische Räume und Hausdorff-Räume) -1) (R,T ) ist ein topologischer Raum, der nicht hausdorffsch ist. -Z -2) (R,T ) ist ein topologischer Hausdorff-Raum. -Euklid +1) (R,T Z) ist ein topologischer Raum, der nicht hausdorffsch ist. +2) (R,T Euklid) ist ein topologischer Hausdorff-Raum. Bemerkung 5 (Eigenschaften von Hausdorff-Räumen) -Seien X,X ,X Hausdorff-Räume. -1 2 +Seien X,X 1,X +2 +Hausdorff-Räume. a) Jeder Teilraum von X ist hausdorffsch. -b) X X ist hausdorffsch (vgl. Abbildung 1.4). -1 2 -× +b) X +1 +×X +2 +ist hausdorffsch (vgl. Abbildung 1.4). Definition 11 -Sei X ein topologischer Raum und (x) n∈N eine Folge in X. x X heißt Grenzwert oder +Sei X ein topologischer Raum und (x) n∈N eine Folge in X. x ∈ -Limes von (x ), wenn es für jede Umgebung U von x ein n gibt, sodass x U für alle -n 0 n -∈ -n n . +X heißt Grenzwert oder +Limes von (x n), wenn es für jede Umgebung U von x ein n 0 -≥ +gibt, sodass x +n +∈ +U für alle +n +≥ +n 0. Bemerkung 6 Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert. -Beweis: Sei (x ) eine konvergierende Folge und x und y Grenzwerte der Folge. +Beweis: Sei (x n) eine konvergierende Folge und x und y Grenzwerte der Folge. +Da X hausdorffsch ist, gibt es Umgebungen U +x +von x und U +y +von y mit U +x +∩U +y += +∅ +falls +x (cid:54)= y. Da (x n) gegen x und y konvergiert, existiert ein n +0 +mit x n -Da X hausdorffsch ist, gibt es Umgebungen U von x und U von y mit U U = falls -x y x y -∩ ∅ -x = y. Da (x ) gegen x und y konvergiert, existiert ein n mit x U U für alle n n -n 0 n x y 0 -(cid:54) ∈ ∩ ≥ -x = y (cid:4) +∈ +U +x +∩U +y +für alle n +≥ +n +0 ⇒ +x = y (cid:4) 1Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt. 10 1.3.STETIGKEIT -X +(x 1,y 1) (x 2,y 2) +x +1 +x 2 -(x ,y ) (x ,y ) -1 1 2 2 +U +1 +×X +2 +U +2 +×X +2X +1 X +2 +Abbildung 1.4: Wenn X 1,X +2 +hausdorffsch sind, dann auch X 1 -x x -1 2 -U X U X -1 2 2 2 -× × -Abbildung 1.4: Wenn X ,X hausdorffsch sind, dann auch X X -1 2 1 2 -× +×X +2 1.3 Stetigkeit Definition 12 -Seien (X,T ),(Y,T ) topologische Räume und f : X Y eine Abbildung. -X Y +Seien (X,T X),(Y,T Y) topologische Räume und f : X → -a) f heißt stetig : U T : f−1(U) T . -Y X -⇔ ∀ ∈ ∈ +Y eine Abbildung. +a) f heißt stetig : +⇔ +∀U +∈ +T +Y +: f−1(U) +∈ +T X. b) f heißt Homöomorphismus, wenn f stetig ist und es eine stetige Abbildung g : -Y X gibt, sodass g f = id und f g = id . -X Y -→ ◦ ◦ +Y +→ +X gibt, sodass g ◦f = id +X +und f ◦g = id Y. Bemerkung 72 -Seien X,Y metrische Räume und f: X Y eine Abbildung. +Seien X,Y metrische Räume und f: X → -Dann gilt: f ist stetig zu jedem x X und jedem ε > 0 gibt es δ(x,ε) > 0, sodass für -⇔ ∈ -alle y X mit d(x,y) < δ gilt d (f(x),f(y)) < ε. -Y +Y eine Abbildung. +Dann gilt: f ist stetig +⇔ +zu jedem x +∈ +X und jedem ε > 0 gibt es δ(x,ε) > 0, sodass für +alle y ∈ -Beweis: „ “: Sei x X,ε > 0 gegeben und U := B (f(x)). -ε -⇒ ∈ +X mit d(x,y) < δ gilt d Y(f(x),f(y)) < ε. +Beweis: „ ⇒“: Sei x +∈ +X,ε > 0 gegeben und U := B ε(f(x)). Dann ist U offen in Y. -=D =e =f. =1 =2.a f−1(U) ist offen in X. Dann ist x f−1(U). -⇒ ∈ -δ > 0, sodass B (x) f−1(U) -δ -⇒ ∃ ⊆ -f(B (x)) U -δ -⇒ ⊆ -y X d (x,y) < δ Beh. +Def. 12.a ===== +⇒ +f−1(U) ist offen in X. Dann ist x +∈ +f−1(U). +⇒ +∃δ > 0, sodass B δ(x) +⊆ +f−1(U) +⇒ +f(B δ(x)) +⊆ +U +⇒ +{y +∈ X -⇒ { ∈ | } ⇒ -„ “: Sei U Y offen, X f−1(U). -⇐ ⊆ ∈ -Dann gibt es ε > 0, sodass B (f(x)) U -ε +| +d X(x,y) < δ +} ⇒ +Beh. +„ ⇐“: Sei U +⊆ +Y offen, X +∈ +f−1(U). +Dann gibt es ε > 0, sodass B ε(f(x)) ⊆ +U Vor. -== Es gibt δ > 0, sodass f(B (x)) B (f(x))) -δ ε -⇒ ⊆ -B (x) f−1(B (f(x))) f−1(U) (cid:4) -δ ε -⇒ ⊆ ⊆ +== +⇒ +Es gibt δ > 0, sodass f(B δ(x)) +⊆ +B ε(f(x))) +⇒ +B δ(x) +⊆ +f−1(B ε(f(x))) +⊆ +f−1(U) (cid:4) Bemerkung 8 -Seien X,Y topologische Räume und f : X Y eine Abbildung. Dann gilt: +Seien X,Y topologische Räume und f : X → +Y eine Abbildung. Dann gilt: f ist stetig -für jede abgeschlossene Teilmenge A Y gilt : f−1(A) X ist abgeschlossen. -⇔ ⊆ ⊆ +⇔ +für jede abgeschlossene Teilmenge A +⊆ +Y gilt : f−1(A) +⊆ +X ist abgeschlossen. Beispiel 13 (Stetige Abbildungen und Homöomorphismen) -1) Für jeden topologischen Raum X gilt: id : X X ist Homöomorphismus. +1) Für jeden topologischen Raum X gilt: id X +: X → +X ist Homöomorphismus. 2Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt. 11 1.3.STETIGKEIT -2) Ist (Y,T ) trivialer topologischer Raum, d. h. T = T , so ist jede Abbildung -Y Y triv -f : X Y stetig. +2) Ist (Y,T Y) trivialer topologischer Raum, d. h. T +Y += T triv, so ist jede Abbildung +f : X → -3) Ist X diskreter topologischer Raum, so ist f : X Y stetig für jeden topologischen +Y stetig. +3) Ist X diskreter topologischer Raum, so ist f : X → +Y stetig für jeden topologischen Raum Y und jede Abbildung f. -4) Sei X = [0,1),Y = S1 = z C z = 1 und f(t) = e2πit. -{ ∈ | (cid:107) (cid:107) } -f -0 +4) Sei X = [0,1),Y = S1 = {z +∈ +C +| +(cid:107)z +(cid:107) += 1 +} +und f(t) = e2πit. R 0 1 +0 +f g Abbildung 1.5: Beispiel einer stetigen Funktion f, deren Umkehrabbildung g nicht stetig ist. Die Umkehrabbildung g ist nicht stetig, da g−1(U) nicht offen ist (vgl. Abbildung 1.5). Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig) -Seien X,Y,Z topologische Räume, f : X Y und g : Y Z stetige Abbildungen. -→ → -Dann ist g f : X Z stetig. -◦ → +Seien X,Y,Z topologische Räume, f : X +→ +Y und g : Y +→ +Z stetige Abbildungen. +Dann ist g ◦f : X +→ +Z stetig. +X f (cid:47)(cid:47) -X Y -g◦f (cid:32)(cid:32) (cid:127)(cid:127) g +g◦f (cid:32)(cid:32) +Y +g (cid:127)(cid:127) Z -Beweis: Sei U Z offen (g f)−1(U) = f−1(g−1(U)). g−1(U) ist offen in Y weil g stetig -⊆ ⇒ ◦ +Beweis: Sei U +⊆ +Z offen +⇒ +(g ◦f)−1(U) = f−1(g−1(U)). g−1(U) ist offen in Y weil g stetig ist, f−1(g−1(U)) ist offen in X, weil f stetig ist. (cid:4) Bemerkung 10 a) Für jeden topologischen Raum X ist -Homöo(X) := f : X X f ist Homöomorphismus -{ → | } +Homöo(X) := {f : X +→ +X +| +f ist Homöomorphismus +} eine Gruppe. -b) Jede Isometrie f : X Y zwischen metrischen Räumen ist ein Homöomorphismus. +b) Jede Isometrie f : X +→ +Y zwischen metrischen Räumen ist ein Homöomorphismus. +c) Iso(X) := {f : X → -c) Iso(X) := f : X X f ist Isometrie ist eine Untergruppe von Homöo(X) für -{ → | } +X +| +f ist Isometrie +} +ist eine Untergruppe von Homöo(X) für jeden metrischen Raum X. Bemerkung 11 (Projektionen sind stetig) -Seien X,Y topologische Räume. π : X Y X und π : X Y Y die Projektionen -X Y -× → × → -π : (x,y) x und π : (x,y) y -X Y -(cid:55)→ (cid:55)→ -Wird X Y mit der Produkttopologie versehen, so sind π und π stetig. -X Y -× -Beweis: Sei U X offen -π−1(U) =⊆ U Y ist offen in X Y. (cid:4) -⇒ X × × +Seien X,Y topologische Räume. π +X +: X ×Y +→ +X und π +Y +: X ×Y +→ +Y die Projektionen +π +X +: (x,y) +(cid:55)→ +x und π +Y +: (x,y) +(cid:55)→ +y +Wird X ×Y mit der Produkttopologie versehen, so sind π +X +und π +Y +stetig. +Beweis: Sei U +⊆ +X offen +⇒ +π−1 +X +(U) = U ×Y ist offen in X ×Y. (cid:4) Bemerkung 12 -Sei X ein topologischer Raum, eine Äquivalenzrelation auf X, X = X/ der Bahnenraum +Sei X ein topologischer Raum, ∼ +eine Äquivalenzrelation auf X, X = X/ ∼ -versehen mit der Quotiententopologie, π : X X, x [x] . -∼ -→ (cid:55)→ +der Bahnenraum +versehen mit der Quotiententopologie, π : X +→ +X, x +(cid:55)→ +[x] ∼. Dann ist π stetig. 12 1.4.ZUSAMMENHANG -Beweis: Nach Definition ist U X offen π−1(U) X offen. (cid:4) -⊆ ⇔ ⊆ +Beweis: Nach Definition ist U +⊆ +X offen +⇔ +π−1(U) +⊆ +X offen. (cid:4) Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird. Beispiel 14 (Stereographische Projektion) -Rn und Sn N sind homöomorph für beliebiges N Sn. Es gilt: -\{ } ∈ -Sn = (cid:8) x Rn+1 (cid:12) (cid:12) x = 1(cid:9) -∈ (cid:107) (cid:107) -(cid:40) (cid:12) (cid:12) n (cid:88)+1 (cid:41) -= x Rn+1 (cid:12) x2 = 1 -(cid:12) i +Rn und Sn \{N +} +sind homöomorph für beliebiges N +∈ +Sn. Es gilt: +Sn = (cid:8) x +∈ +Rn+1 (cid:12) (cid:12) (cid:107)x +(cid:107) += 1(cid:9) += +(cid:40) +x ∈ +Rn+1 (cid:12) -i=1 -  -0 +(cid:12) +(cid:12) +(cid:12) +(cid:12) +n+1 +(cid:88) +i=1x2 +i += +1(cid:41) +O. B. d. A. sei N = + + + + +0 . -. -.. -O. B. d. A. sei N = Die Gerade durch N und P schneidet die Ebene H in genau -  -0 -1 -einem Punkt Pˆ. P wird auf Pˆ abgebildet. -f :Sn N Rn -\{ } → +. +. +0 +1 + + + +. Die Gerade durch N und P schneidet die Ebene H in genau +einem Punkt ˆ P. P wird auf ˆ P abgebildet. +f :Sn \{N +} → +Rn +P +(cid:55)→ genaueinPunkt (cid:122) (cid:125)(cid:124) (cid:123) -P L H +L P -(cid:55)→ ∩ -  (cid:12)  - x 1 (cid:12)  - . (cid:12)  -wobei Rn = H =   . .   ∈ Rn+1 (cid:12) (cid:12) x n+1 = 0 und L P die Gerade in Rn+1 durch N - (cid:12)  - x n+1 (cid:12)  +∩H +wobei Rn = H = + + + + + +  +x 1 +. +. . +x +n+1 +  ∈ Rn+1 +(cid:12) +(cid:12) +(cid:12) +(cid:12) (cid:12) +(cid:12) +(cid:12) +x n+1 = +0 + + + + +und L P die Gerade in Rn+1 durch N und P ist. -  +Sei P = + +  x 1 . -Sei P =   . .  , so ist x n+1 < 1, also ist L P nicht parallel zu H. Also schneiden sich L P +. . x -n+1 -und H in genau einem Punkt Pˆ. +n+1 + , so ist x n+1 < 1, also ist L P nicht parallel zu H. Also schneiden sich L P +und H in genau einem Punkt ˆ P. Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig. 1.4 Zusammenhang Definition 13 a) EinRaumX heißtzusammenhängend,wenneskeineoffenen,nichtleerenTeilmengen -U ,U von X gibt mit U U = und U U = X. -1 2 1 2 1 2 -∩ ∅ ∪ -b) Eine Teilmenge Y X heißt zusammenhängend, wenn Y als topologischer Raum mit +U 1,U +2 +von X gibt mit U +1 +∩U +2 += +∅ +und U +1 +∪U +2 += X. +b) Eine Teilmenge Y ⊆ +X heißt zusammenhängend, wenn Y als topologischer Raum mit der Teilraumtopologie zusammenhängend ist. 13 1.4.ZUSAMMENHANG +x +y z N -P +ˆ P 0 -y -Pˆ -x +P Abbildung 1.6: Visualisierung der stereographischen Projektion Bemerkung 13 -X ist zusammenhängend Es gibt keine abgeschlossenen, nichtleeren Teilmengen A ,A -1 2 +X ist zusammenhängend ⇔ -mit A A = und A A = X. -1 2 1 2 -∩ ∅ ∪ +Es gibt keine abgeschlossenen, nichtleeren Teilmengen A 1,A +2 +mit A +1 +∩A +2 += +∅ +und A +1 +∪A +2 += X. Beispiel 15 (Zusammenhang von Räumen) -1) (Rn,T ) ist zusammenhängend, denn: +1) (Rn,T Euklid) ist zusammenhängend, denn: +Annahme: Rn = U +1 +˙ ∪U +2 +mit +∅ (cid:54) += U 1,U +2 +∈ +T Euklid -Annahme: Rn = U ˙ U mit = U ,U T existieren. -1 2 1 2 Euklid -∪ ∅ (cid:54) ∈ -Sei x U ,y U und [x,y] die Strecke zwischen x und y. Sei V = [x,y]. Nun -1 2 -∈ ∈ -betrachten wir V (cid:40) Rn als (metrischen) Teilraum mit der Teilraumtopologie T . +existieren. +Sei x +∈ +U 1,y +∈ +U +2 +und [x,y] die Strecke zwischen x und y. Sei V = [x,y]. Nun +betrachten wir V (cid:40) Rn als (metrischen) Teilraum mit der Teilraumtopologie T V. +Somit gilt U +1 +∩[x,y] +∈ +T V -Somit gilt U [x,y] T wegen der Definition der Teilraumtopologie. -1 V -∩ ∈ -Dann gibt es z [x,y] mit z ∂(U [x,y]), aber z / U z U . In jeder -1 1 2 -∈ ∈ ∩ ∈ ⇒ ∈ -Umgebung von z liegt ein Punkt von U Widerspruch zu U offen. -1 2 +wegen der Definition der Teilraumtopologie. +Dann gibt es z +∈ +[x,y] mit z +∈ +∂(U +1 +∩ +[x,y]), aber z / +∈ +U +1 +⇒ +z +∈ +U 2. In jeder +Umgebung von z liegt ein Punkt von U +1 ⇒ -2) R 0 ist nicht zusammenhängend, denn R 0 = R R -<0 >0 -\{ } \{ } ∪ -3) R2 0 ist zusammenhängend. -\{ } -4) Q (cid:40) R ist nicht zusammenhängend, da (Q R √ ) (Q R √ ) = Q -∩ < 2 ∪ ∩ > 2 -5) x ist zusammenhängend für jedes x X, wobei X ein topologischer Raum ist. -{ } ∈ +Widerspruch zu U +2 +offen. +2) R \{0 +} +ist nicht zusammenhängend, denn R \{0 +} += R +<0 +∪R +>0 +3) R2 \{0 +} +ist zusammenhängend. +4) Q (cid:40) R ist nicht zusammenhängend, da (Q ∩R <√ 2) ∪(Q ∩R >√ 2) = Q +5) {x +} +ist zusammenhängend für jedes x +∈ +X, wobei X ein topologischer Raum ist. 6) R mit Zariski-Topologie ist zusammenhängend. Bemerkung 14 -Sei X ein topologischer Raum und A X zusammenhängend. Dann ist auch A zusammen- +Sei X ein topologischer Raum und A ⊆ +X zusammenhängend. Dann ist auch A zusammen- hängend. 14 1.4.ZUSAMMENHANG Beweis: durch Widerspruch -Annahme: A = A A , A abgeschlossen, A = , A A = -1 2 i i 1 2 -∪ (cid:54) ∅ ∩ ∅ -A = (A A ) ˙ (A A ) -1 2 -⇒ ∩ ∪ ∩ -(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125) -abgeschlossen abgeschlossen +Annahme: A = A +1 +∪A 2, A +i +abgeschlossen, A +i +(cid:54)= ∅, A +1 +∩A +2 += +∅ +⇒ +A = (A ∩A 1) +(cid:124) (cid:123)(cid:122) (cid:125) +abgeschlossen +˙ +∪ +(A ∩A 2) +(cid:124) (cid:123)(cid:122) (cid:125) +abgeschlossen (cid:124) (cid:123)(cid:122) (cid:125) disjunkt -Wäre A A = +Wäre A ∩A 1 -∩ ∅ -A A = A ˙ A -1 2 -⇒ ⊆ ∪ -A A A A -2 2 -⇒ ⊆ ⇒ ⊆ -A = += +∅ +⇒ +A +⊆ +A = A +1 +˙ ∪A +2 +⇒ +A +⊆ +A +2 +⇒ +A +⊆ +A +2 +⇒ +A +1 += +∅ +⇒ +Widerspruch zu A 1 -⇒ ∅ -Widerspruch zu A = +(cid:54)= +∅ +⇒ +A ∩A 1 -⇒ (cid:54) ∅ -A A = und analog A A = -1 2 -⇒ ∩ (cid:54) ∅ ∩ (cid:54) ∅ -Widerspruch zu A ist zusammenhängend. (cid:4) +(cid:54)= +∅ +und analog A ∩A +2 +(cid:54)= +∅ ⇒ +Widerspruch zu A ist zusammenhängend. (cid:4) Bemerkung 15 -Sei X ein topologischer Raum und A,B X zusammenhängend. +Sei X ein topologischer Raum und A,B ⊆ -Ist A B = , dann ist A B zusammenhängend. -∩ (cid:54) ∅ ∪ -Beweis: Sei A B = U ˙ U ,U = offen -1 2 i -∪ ∪ (cid:54) ∅ +X zusammenhängend. +Ist A ∩B (cid:54)= ∅, dann ist A ∪B zusammenhängend. +Beweis: Sei A ∪B = U +1 +˙ ∪U 2,U +i +(cid:54)= +∅ +offen o.B.d.A. -====== A = (A U ) ˙ (A U ) offen -1 2 -⇒ ∩ ∪ ∩ +====== +⇒ +A = (A ∩U 1) ˙ ∪(A ∩U 2) offen Azhgd. -==== A U = +==== +⇒ +A ∩U 1 -⇒ ∩ ∅ -A∩B(cid:54)=∅ -==== U B += +∅ +A∩B(cid:54)=∅ +==== +⇒ +U 1 -⇒ ⊆ -B = (B U ) (B U ) ist unerlaubte Zerlegung. -1 2 -∩ ∪ ∩ -(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125) -=U1 =∅ +⊆ +B +B = (B ∩U 1) +(cid:124) (cid:123)(cid:122) (cid:125) +=U1 +∪(B ∩U 2) +(cid:124) (cid:123)(cid:122) (cid:125) +=∅ +ist unerlaubte Zerlegung. (cid:4) Definition 14 Sei X ein topologischer Raum. -Für x X sei Z(x) X definiert durch -∈ ⊆ +Für x +∈ +X sei Z(x) +⊆ +X definiert durch +Z(x) := (cid:91) -Z(x) := A A⊆Xzhgd. -x∈A +x∈AA Z(x) heißt Zusammenhangskomponente. Bemerkung 16 (Eigenschaften von Zusammenhangskomponenten) Sei X ein topologischer Raum. Dann gilt: @@ -742,355 +1235,630 @@ b) Z(x) ist abgeschlossen. c) X ist disjunkte Vereinigung von Zusammenhangskomponenten. Beweis: 15 1.5.KOMPAKTHEIT -a) Sei Z(x) = A ˙ A mit A = abgeschlossen. -1 2 i -∪ (cid:54) ∅ -O. B. d. A. sei x A und y A . y liegt in einer zusammehängenden Teilmenge A, -1 2 -∈ ∈ -die auch x enthält. A = (A A ) (A A ) ist unerlaubte Zerlegung. -1 2 -⇒ ∩ ∪ ∩ -(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125) -(cid:51)x (cid:51)y -b) Nach Bemerkung 14 ist Z(x) zusammenhängend Z(x) Z(x) Z(x) = Z(x) -⇒ ⊆ ⇒ +a) Sei Z(x) = A +1 +˙ ∪A +2 +mit A +i +(cid:54)= +∅ +abgeschlossen. +O. B. d. A. sei x +∈ +A +1 +und y +∈ +A 2. y liegt in einer zusammehängenden Teilmenge A, +die auch x enthält. +⇒ +A = (A ∩A 1) +(cid:124) (cid:123)(cid:122) (cid:125) +(cid:51)x +∪(A ∩A 2) +(cid:124) (cid:123)(cid:122) (cid:125) +(cid:51)y +ist unerlaubte Zerlegung. +b) Nach Bemerkung 14 ist Z(x) zusammenhängend +⇒ +Z(x) +⊆ +Z(x) +⇒ +Z(x) = Z(x) +c) Ist Z(y) ∩Z(x) (cid:54)= +∅ Bem. 15 -c) Ist Z(y) Z(x) = ===== Z(y) Z(x) ist zusammenhängend. -∩ (cid:54) ∅ ⇒ ∪ -Z(x) Z(y) Z(x) Z(y) Z(x) -⇒ ∪ ⊆ ⇒ ⊆ -Z(y) Z(x) Z(y) -⊆ ⇒ ⊆ +===== +⇒ +Z(y) ∪Z(x) ist zusammenhängend. +⇒ +Z(x) ∪Z(y) +⊆ +Z(x) +⇒ +Z(y) +⊆ +Z(x) +⊆ +Z(y) +⇒ +Z(x) +⊆ +Z(y) (cid:4) Bemerkung 17 -Sei f : X Y stetig. Ist A X zusammenhängend, so ist f(A) Y zusammenhängend. -→ ⊆ ⊆ -Beweis: Sei f(A) = U U ,U = , offen, disjunkt. -1 2 i -∪ (cid:54) ∅ -f−1(f(A)) = f−1(U ) f−1(U ) -1 2 -⇒ ∪ -A = (A f−1(U )) (A f−1(U )) (cid:4) -1 2 -⇒ ∩ ∪ ∩ -(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125) -(cid:54)=∅ (cid:54)=∅ +Sei f : X +→ +Y stetig. Ist A +⊆ +X zusammenhängend, so ist f(A) +⊆ +Y zusammenhängend. +Beweis: Sei f(A) = U +1 +∪U 2,U +i +(cid:54)= ∅, offen, disjunkt. +⇒ +f−1(f(A)) = f−1(U 1) ∪f−1(U 2) +⇒ +A = (A ∩f−1(U 1)) +(cid:124) (cid:123)(cid:122) (cid:125) +(cid:54)=∅ +∪(A ∩f−1(U 2)) +(cid:124) (cid:123)(cid:122) (cid:125) +(cid:54)=∅ +(cid:4) 1.5 Kompaktheit Definition 15 -Sei X eine Menge und U (X). -⊆ P +Sei X eine Menge und U +⊆ +P(X). U heißt eine Überdeckung von X, wenn gilt: -x X : M U : x M -∀ ∈ ∃ ∈ ∈ +∀x +∈ +X : ∃M +∈ +U : x +∈ +M Definition 16 Ein topologischer Raum X heißt kompakt, wenn jede offene Überdeckung von X -U = U mit U offen in X -{ i }i∈I i +U = {U +i }i∈I +mit U +i +offen in X eine endliche Teilüberdeckung (cid:91) -U = X mit J N -i -| | ∈ i∈J⊆I +U +i += X mit |J +| ∈ +N besitzt. Bemerkung 18 Das Einheitsintervall I := [0,1] ist kompakt bezüglich der euklidischen Topologie. -Beweis: Sei (U ) eine offene Überdeckung von I. -i i∈J +Beweis: Sei (U i) +i∈J +eine offene Überdeckung von I. Es genügt zu zeigen, dass es ein δ > 0 gibt, sodass jedes Teilintervall der Länge δ von I in -einem der U enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich viele Intervalle +einem der U i +enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich viele Intervalle 16 1.5.KOMPAKTHEIT -der Länge δ unterteilen und alle U in die endliche Überdeckung aufnehmen, die Teilintervalle +der Länge δ unterteilen und alle U i +in die endliche Überdeckung aufnehmen, die Teilintervalle enthalten. -Angenommen, es gibt kein solches δ. Dann gibt es für jedes n N ein Intervall I [0,1] +Angenommen, es gibt kein solches δ. Dann gibt es für jedes n +∈ +N ein Intervall I n -∈ ⊆ -der Länge 1/n sodass I (cid:40) U für alle i J. -n i -∈ -Sei x der Mittelpunkt von I . Die Folge (x ) hat einen Häufungspunkt x [0,1]. Dann -n n n -∈ -gibt es i J mit x U . Da U offen ist, gibt es ein ε > 0, sodass (x ε,x+ε) U . -i i i -∈ ∈ − ⊆ -Dann gibt es n 0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n n : x x < ε/2, also -0 n -≥ | − | -I (x ε,x+ε) U für mindestens ein n N.4 -n i -⊆ − ⊆ ∈ -Widerspruch +⊆ +[0,1] +der Länge 1/n sodass I +n +(cid:40) U +i +für alle i +∈ +J. +Sei x +n +der Mittelpunkt von I n. Die Folge (x n) hat einen Häufungspunkt x +∈ +[0,1]. Dann +gibt es i +∈ +J mit x +∈ +U i. Da U +i +offen ist, gibt es ein ε > 0, sodass (x −ε,x+ε) +⊆ +U i. +Dann gibt es n 0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n +≥ +n +0 +: |x −x +n +| +< ε/2, also +I +n +⊆ +(x −ε,x+ε) +⊆ +U +i +für mindestens ein n +∈ +N.4 ⇒ -Dann überdecke [0,1] mit endlich vielen Intervallen I ,...,I der Länge δ. Jedes I ist in -1 d j -U enthalten. +Widerspruch +Dann überdecke [0,1] mit endlich vielen Intervallen I 1,...,I +d +der Länge δ. Jedes I +j +ist in +U ij -U ,...,U ist endliche Teilüberdeckung von U. (cid:4) -j1 j -⇒ d +enthalten. +⇒ +U j1,...,U +j +d +ist endliche Teilüberdeckung von U. (cid:4) Beispiel 16 (Kompakte Räume) 1) R ist nicht kompakt. 2) (0,1) ist nicht kompakt. -(cid:83) -U = (1/n,1 −1/n) n∈NU = (0,1) -n n +U +n += (1/n,1 −1/n) ⇒ +(cid:83) +n∈NU +n += (0,1) 3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch. Bemerkung 19 -Sei X kompakter Raum, A X abgeschlossen. Dann ist A kompakt. +Sei X kompakter Raum, A +⊆ +X abgeschlossen. Dann ist A kompakt. +Beweis: Sei (V i) +i∈I +offene Überdeckung von A. +Dann gibt es für jedes i +∈ +I eine offene Teilmenge U +i +⊆ +X mit V +i += U +i +∩A. +⇒ +A ⊆ -Beweis: Sei (V ) offene Überdeckung von A. -i i∈I -Dann gibt es für jedes i I eine offene Teilmenge U X mit V = U A. -i i i -∈ ⊆ ∩ (cid:91) -A U +i∈IU i -⇒ ⊆ -i∈I -U = U i I X A ist offene Überdeckung von X +⇒ +U = {U +i +| i -⇒ { | ∈ }∪{ \ } +∈ +I }∪{X \A +} +ist offene Überdeckung von X +X kompakt +======= +⇒ +es gibt i 1,...,i n -X kompakt (cid:91) -======= es gibt i ,...,i I, sodass U (X A) = X -1 n ij -⇒ ∈ ∪ \ -j=1 -  +∈ +I, sodass +n +(cid:91) +j=1U +ij +∪(X \A) = X +⇒ + + n (cid:91) - U ij ∪(X \A) ∩A = A +j=1U ij ∪(X +\A) + ∩A = A ⇒ -j=1 n (cid:91) -(U A) ((X A) A) = A +j=1(U ij -⇒ ∩ ∪ \ ∩ -j=1(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125) -=Vij =∅ -V ,...,V überdecken A. -i1 in +∩A) +(cid:124) (cid:123)(cid:122) (cid:125) +=Vij +∪((X \A) ∩A) +(cid:124) (cid:123)(cid:122) (cid:125) +=∅ += A ⇒ +V i1,...,V +in +überdecken A. (cid:4) Bemerkung 20 -Seien X,Y kompakte topologische Räume. Dann ist X Y mit der Produkttopologie +Seien X,Y kompakte topologische Räume. Dann ist X × +Y mit der Produkttopologie kompakt. -Beweis: Sei (W ) eine offene Überdeckung von X Y. Für jedes (x,y) X Y gibt es -i i∈I -× ∈ × -offene Teilmengen U von X und V von Y sowie ein i I, sodass U V W . -x,y x,y x,y x,y i -∈ × ⊆ -3Dies gilt nicht für alle n≥n , da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. -0 +Beweis: Sei (W i) +i∈I +eine offene Überdeckung von X ×Y. Für jedes (x,y) +∈ +X ×Y gibt es +offene Teilmengen U +x,y +von X und V +x,y +von Y sowie ein i +∈ +I, sodass U +x,y +×V +x,y +⊆ +W i. +3Dies gilt nicht für alle n≥n 0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. 4Sogar für unendlich viele. 17 1.5.KOMPAKTHEIT -X W i x -V y -x,y -Y +y x +V +x,y U x,y +Y +X Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen -Die offenen Mengen U V für festes x und alle y Y überdecken x y. Da Y -x0,y x0,y 0 0 -× ∈ { }× +Die offenen Mengen U +x0,y +×V +x0,y +für festes x +0 +und alle y +∈ +Y überdecken {x +0 +}×y. Da Y +kompakt ist, ist auch {x +0 +}×Y kompakt. Also gibt es y 1,...,y +m(x0) +mit (cid:83)m(x0) -kompakt ist, ist auch x Y kompakt. Also gibt es y ,...,y mit U -{ 0 }× 1 m(x0) i=1 x0,yi × -V x Y. -x0,yi 0 -⊇ { }× -(cid:84)m(x) (cid:83)n -Sei U := U . Da X kompakt ist, gibt es x ,...,x X mit U = X -x0 i=1 x0,yi 1 n ∈ j=1 xj -(cid:83)k (cid:83)m(xj)(cid:0) (cid:1) -U V X Y -⇒ j=1 i=1 xj,yi × xj,yi ⊇ × +i=1 +U +x0,yi × +V +x0,yi +⊇ +{x +0 +}×Y. +Sei U +x0 +:= +(cid:84)m(x) +i=1 +U x0,yi. Da X kompakt ist, gibt es x 1,...,x +n ∈ +X mit +(cid:83)n +j=1U +xj += X +⇒ +(cid:83)k j=1(cid:83)m(xj) +i=1 +(cid:0) +U +xj,yi +×V +xj,yi(cid:1) (cid:124) (cid:123)(cid:122) (cid:125) Eingrün-orangesKästchen -(cid:83) (cid:83) W (x ,y ) = X Y (cid:4) -⇒ j i i j i × +⊇ +X ×Y +⇒ +(cid:83) j(cid:83) iW i(x j,y i) = X ×Y (cid:4) Bemerkung 21 -Sei X ein Hausdorffraum und K X kompakt. Dann ist K abgeschlossen. +Sei X ein Hausdorffraum und K ⊆ +X kompakt. Dann ist K abgeschlossen. Beweis: z. Z.: Komplement ist offen -Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y X K. Für jedes x K seien -∈ \ ∈ -U bzw. V Umgebungen von x bzw. von y, sodass U V = . -x y x y -∩ ∅ +Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y +∈ +X \K. Für jedes x +∈ +K seien +U +x +bzw. V +y +Umgebungen von x bzw. von y, sodass U +x +∩V +y += ∅. X i -x K +x y +Da K kompakt ist, gibt es endlich viele x 1,...,x +n +∈ +K, sodass (cid:83)m -Da K kompakt ist, gibt es endlich viele x ,...,x K, sodass U K. -1 n ∈ i=1 xi ⊇ +i=1U +xi +⊇ +K. +Sei V := n (cid:92) -Sei V := V +i=1V xi -i=1 18 1.6.WEGEUNDKNOTEN -(cid:32) n (cid:33) +⇒ +V +∩(cid:32) n (cid:91) -V U = -xi -⇒ ∩ ∅ -i=1 -V K = -⇒ ∩ ∅ -V ist Überdeckung von y, die ganz in X K enthalten ist. -⇒ \ -X K ist offen -⇒ \ +i=1U +xi(cid:33) += +∅ +⇒ +V ∩K = +∅ +⇒ +V ist Überdeckung von y, die ganz in X \K enthalten ist. +⇒ +X \K ist offen Damit ist K abgeschlossen. (cid:4) Bemerkung 22 -Seien X,Y topologische Räume, f : X Y stetig. +Seien X,Y topologische Räume, f : X → -Ist K X kompakt, so ist f(K) Y kompakt. -⊆ ⊆ -Beweis: Sei (V ) offene Überdeckung von f(K) -i i∈I -=f =s =te =tig (f−1(V )) ist offene Überdeckung von K -i i∈I +Y stetig. +Ist K +⊆ +X kompakt, so ist f(K) +⊆ +Y kompakt. +Beweis: Sei (V i) +i∈I +offene Überdeckung von f(K) +f stetig ==== ⇒ -=K =o =m =p =akt es gibt i ,...,i , sodass f−1(V ),...,f−1(V ) Überdeckung von K ist. -1 n i1 in +(f−1(V i)) +i∈I +ist offene Überdeckung von K +Kompakt ===== ⇒ -f(f−1(V )),...,f(f−1(V )) überdecken f(K). -i1 in +es gibt i 1,...,i n, sodass f−1(V i1),...,f−1(V in) Überdeckung von K ist. ⇒ -Es gilt: f(f−1(V)) = V f(X) (cid:4) -∩ +f(f−1(V i1)),...,f(f−1(V in)) überdecken f(K). +Es gilt: f(f−1(V)) = V ∩f(X) (cid:4) Satz 1.1 (Heine-Borel) Eine Teilmenge von Rn oder Cn ist genau dann kompakt, wenn sie beschränkt und abgeschlossen ist. -Beweis: „ “: Sei K Rn (oder Cn) kompakt. -⇒ ⊆ +Beweis: „ ⇒“: Sei K +⊆ +Rn (oder Cn) kompakt. Da Rn und Cn hausdorffsch sind, ist K nach Bemerkung 21 abgeschlossen. Nach Vorausset- -zung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden K ist +zung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ +K ist beschränkt. -„ “ Sei A Rn (oder Cn) beschränkt und abgeschlossen. -⇐ ⊆ -Dann gibt es einen Würfel W = [ N,N] [ N,N] mit A W bzw. „Polyzylinder“ -− ×···× − ⊆ +„ ⇐“ Sei A +⊆ +Rn (oder Cn) beschränkt und abgeschlossen. +Dann gibt es einen Würfel W = [ −N,N] ×···×[ −N,N] (cid:124) (cid:123)(cid:122) (cid:125) nmal -Z = (z ,...,z ) Cn z N für i = 1,...,n -1 n i -{ ∈ | ≤ } +mit A +⊆ +W bzw. „Polyzylinder“ +Z = {(z 1,...,z n) +∈ +Cn +| +z +i +≤ +N für i = 1,...,n +} Nach Bemerkung 20 und Bemerkung 18 ist W kompakt, also ist A nach Bemerkung 19 auch kompakt. Genauso ist Z kompakt, weil -z C z 1 -{ ∈ (cid:107) | ≤ } +{z +∈ +C +(cid:107) +z +| ≤ +1 +} homöomorph zu -(cid:8) (x,y) R2 (cid:12) (cid:12) (x,y) 1(cid:9) -∈ (cid:107) (cid:107) ≤ +(cid:8) (x,y) +∈ +R2 (cid:12) (cid:12) (cid:107)(x,y) +(cid:107) ≤ +1(cid:9) ist. (cid:4) 1.6 Wege und Knoten Definition 17 Sei X ein topologischer Raum. 19 1.6.WEGEUNDKNOTEN -a) Ein Weg in X ist eine stetige Abbildung γ : [0,1] X. +a) Ein Weg in X ist eine stetige Abbildung γ : [0,1] → +X. b) γ heißt geschlossen, wenn γ(1) = γ(0) gilt. -c) γ heißt einfach, wenn γ injektiv ist. -[0,1) -| +c) γ heißt einfach, wenn γ +|[0,1) +injektiv ist. Beispiel 17 Ist X diskret, so ist jeder Weg konstant, d. h. von der Form -x [0,1] : γ(x) = c, c X -∀ ∈ ∈ +∀x +∈ +[0,1] : γ(x) = c, c +∈ +X Denn γ([0,1]) ist zusammenhängend für jeden Weg γ. Definition 18 Ein topologischer Raum X heißt wegzusammenhängend, wenn es zu je zwei Punkten -x,y X einen Weg γ : [0,1] X gibt mit γ(0) = x und γ(1) = y. -∈ → +x,y +∈ +X einen Weg γ : [0,1] +→ +X gibt mit γ(0) = x und γ(1) = y. Bemerkung 23 Sei X ein topologischer Raum. -a) X ist wegzusammenhängend X ist zusammenhängend +a) X ist wegzusammenhängend ⇒ -b) X ist wegzusammenhängend X ist zusammenhängend +X ist zusammenhängend +b) X ist wegzusammenhängend (cid:54)⇐ +X ist zusammenhängend Beweis: -a) Sei X ein wegzusammenhängender topologischer Raum, A ,A nichtleere, disjunkte, -1 2 -abgeschlossene Teilmengen von X mit A A = X. Sei x A ,y A ,γ : [0,1] X -1 2 1 2 -∪ ∈ ∈ → +a) Sei X ein wegzusammenhängender topologischer Raum, A 1,A +2 +nichtleere, disjunkte, +abgeschlossene Teilmengen von X mit A +1 +∪A +2 += X. Sei x +∈ +A 1,y +∈ +A 2,γ : [0,1] +→ +X ein Weg von x nach y. -Dann ist C := γ([0,1]) X zusammenhängend, weil γ stetig ist. +Dann ist C := γ([0,1]) ⊆ -C = (C A ) (C A ) -1 2 -∩ ∪ ∩ -(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125) -(cid:51)x (cid:51)y -ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen Widerspruch +X zusammenhängend, weil γ stetig ist. +C = (C ∩A 1) +(cid:124) (cid:123)(cid:122) (cid:125) +(cid:51)x +∪(C ∩A 2) +(cid:124) (cid:123)(cid:122) (cid:125) +(cid:51)y +ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒ -(cid:110) (cid:12) (cid:111) -b) Sei X = (x,y) R2 (cid:12) x2+y2 = 1 y = 1+2 e− 11 0x . +Widerspruch +b) Sei X = +(cid:110) +(x,y) +∈ +R2 +(cid:12) +(cid:12) (cid:12) -∈ ∨ · +x2+y2 = 1 ∨y = 1+2 ·e− 1 +10x(cid:111) +. Abbildung 1.8a veranschaulicht diesen Raum. -Sei U U = X,U = U = ,U offen. X = C S. Dann ist C U oder C U , -1 2 1 2 i 1 2 -∪ (cid:54) ∅ ∪ ⊆ ⊆ +Sei U +1 +∪U +2 += X,U +1 +(cid:54)= U +2 += ∅,U +i +offen. X = C ∪S. Dann ist C +⊆ +U +1 +oder C +⊆ +U 2, weil C und S zusammenhängend sind. -Also ist C = U und S = U (oder umgekehrt). -1 2 -Sei y C = U ,ε > 0 und B (y) U eine Umgebung von y, die in U enthalten ist. -1 ε 1 1 -∈ ⊆ -Aber: B (y) S = Widerspruch X S ist zusammenhängend, aber nicht -ε -∩ (cid:54) ∅ ⇒ ⇒ ∪ +Also ist C = U +1 +und S = U +2 +(oder umgekehrt). +Sei y +∈ +C = U 1,ε > 0 und B ε(y) +⊆ +U +1 +eine Umgebung von y, die in U +1 +enthalten ist. +Aber: B ε(y) ∩S (cid:54)= +∅ ⇒ +Widerspruch +⇒ +X ∪S ist zusammenhängend, aber nicht wegzusammenhängend. (cid:4) Beispiel 18 (Hilbert-Kurve) -Es gibt stetige, surjektive Abbildungen [0,1] [0,1] [0,1]. Ein Beispiel ist die in Abbil- -→ × +Es gibt stetige, surjektive Abbildungen [0,1] +→ +[0,1] ×[0,1]. Ein Beispiel ist die in Abbil- dung 1.9 dargestellte Hilbert-Kurve. Definition 19 Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ : -[0,1] C X bzw. γ : S1 C X, wobei C := Bildγ. -→ ⊆ → ⊆ +[0,1] +→ +C +⊆ +X bzw. γ : S1 +→ +C +⊆ +X, wobei C := Bildγ. 20 1.6.WEGEUNDKNOTEN -1 -(x,sin(1)) X Y -{ x ∈ × } -( 1,1) Y -− ⊆ -X -0 Y +(a) Spirale S mit Kreis C 0.1 1 -1 -− -(a) Spirale S mit Kreis C (b) Sinus -Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend -sind. -(a) n=1 (b) n=2 (c) n=3 (d) n=4 (e) n=5 -Abbildung 1.9: Hilbert-Kurve -Jede Jordankurve ist also ein einfacher Weg. +−101 +X +Y +{(x,sin(1 x)) +∈ +X ×Y +} +( −1,1) +⊆ +Y +(b) Sinus +Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend +sind. +(a) n=1 (b) n=2 (c) n=3 (d) n=4 (e) n=5 +Abbildung 1.9: Hilbert-Kurve +Jede Jordankurve ist also ein einfacher Weg. Satz 1.2 (Jordanscher Kurvensatz) -Ist C = γ([0,1]) eine geschlossene Jordankurve in R2, so hat R2 C genau zwei +Ist C = γ([0,1]) eine geschlossene Jordankurve in R2, so hat R2 \ +C genau zwei Zusammenhangskomponenten, von denen eine beschränkt ist und eine unbeschränkt. außen innen @@ -1105,48 +1873,68 @@ Idee: Ersetze Weg C durch Polygonzug. Definition 20 Eine geschlossene Jordankurve in R3 heißt Knoten. Beispiel 19 (Knoten) -(a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten (d) 6 -Knoten -2 +(a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten (d) 6 2-Knoten Abbildung 1.11: Beispiele für verschiedene Knoten Definition 21 -Zwei Knoten γ ,γ : S1 R3 heißen äquivalent, wenn es eine stetige Abbildung -1 2 +Zwei Knoten γ 1,γ +2 +: S1 → -H : S1 [0,1] R3 -× → +R3 heißen äquivalent, wenn es eine stetige Abbildung +H : S1 ×[0,1] +→ +R3 gibt mit -H(z,0) = γ (z) z S1 -1 -∀ ∈ -H(z,1) = γ (z) z S1 -2 -∀ ∈ -und für jedes feste t [0,1] ist +H(z,0) = γ 1(z) ∀z +∈ +S1 +H(z,1) = γ 2(z) ∀z ∈ -H : S1 R3,z H(z,t) +S1 +und für jedes feste t +∈ +[0,1] ist +H z -→ (cid:55)→ -ein Knoten. Die Abbildung H heißt Isotopie zwischen γ und γ . -1 2 +: S1 +→ +R3,z +(cid:55)→ +H(z,t) +ein Knoten. Die Abbildung H heißt Isotopie zwischen γ +1 +und γ 2. Definition 22 -Sei γ : [0,1] R3 ein Knoten, E eine Ebene und π : R3 E eine Projektion auf E. -→ → +Sei γ : [0,1] +→ +R3 ein Knoten, E eine Ebene und π : R3 +→ +E eine Projektion auf E. π heißt Knotendiagramm von γ, wenn gilt: (cid:12) (cid:12)π−1(x)(cid:12) -(cid:12) 2 x π(γ) -≤ ∀ ∈ -Ist (π )−1(x) = y ,y , so liegt y über y , wenn gilt: -γ([0,1]) 1 2 1 2 -| { } -λ > 1 : (y x) = λ(y x) -1 2 -∃ − − +(cid:12) +≤ +2 ∀x +∈ +π(γ) +Ist (π |γ([0,1]))−1(x) = {y 1,y +2 +}, so liegt y +1 +über y 2, wenn gilt: +∃λ > 1 : (y +1 +−x) = λ(y +2 +−x) Satz 1.3 (Satz von Reidemeister) Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können. 22 1.6.WEGEUNDKNOTEN -(a) Ω (b) Ω -1 2 +(a) Ω +1 +(b) Ω +2 (c) Ω 3 Abbildung 1.12: Reidemeister-Züge @@ -1160,41 +1948,59 @@ Abbildung 1.13: Ein 3-gefärber Kleeblattknoten 23 1.6.WEGEUNDKNOTEN Übungsaufgaben Aufgabe 1 (Sierpińskiraum) -Es sei X := 0,1 und T := , 0 ,X . Dies ist der sogenannte Sierpińskiraum. -X -{ } {∅ { } } -(a) Beweisen Sie, dass (X,T ) ein topologischer Raum ist. -X -(b) Ist (X,T ) hausdorffsch? +Es sei X := {0,1 +} +und T X -(c) Ist T von einer Metrik erzeugt? +:= {∅, {0 },X }. Dies ist der sogenannte Sierpińskiraum. +(a) Beweisen Sie, dass (X,T X) ein topologischer Raum ist. +(b) Ist (X,T X) hausdorffsch? +(c) Ist T X +von einer Metrik erzeugt? Aufgabe 2 -Es sei Z mit der von den Mengen U := a+bZ(a Z,b Z 0 ) erzeugten Topologie +Es sei Z mit der von den Mengen U a,b -∈ ∈ \{ } +:= a+bZ(a +∈ +Z,b +∈ +Z \{0 }) erzeugten Topologie versehen. Zeigen Sie: -(a) Jedes U und jede einelementige Teilmenge von Z ist abgeschlossen. +(a) Jedes U a,b -(b) 1,1 ist nicht offen. -{− } +und jede einelementige Teilmenge von Z ist abgeschlossen. +(b) {−1,1 +} +ist nicht offen. (c) Es gibt unendlich viele Primzahlen. Aufgabe 3 (Cantorsches Diskontinuum) -Für jedes i N sei P := 0,1 mit der diskreten Topologie. Weiter Sei P := (cid:81) P . -i i∈N i -∈ { } +Für jedes i +∈ +N sei P +i +:= {0,1 +} +mit der diskreten Topologie. Weiter Sei P := (cid:81) i∈NP i. (a) Wie sehen die offenen Mengen von P aus? (b) Was können Sie über den Zusammenhang von P sagen? Aufgabe 4 (Kompaktheit) -(a) Ist GL (R) = A Rn×n det(A) = 0 kompakt? -n -{ ∈ | (cid:54) } -(b) Ist SL (R) = A Rn×n det(A) = 1 kompakt? -n -{ ∈ | } -(c) Ist (R) kompakt? -P +(a) Ist GL n(R) = {A +∈ +Rn×n +| +det(A) (cid:54)= 0 +} +kompakt? +(b) Ist SL n(R) = {A +∈ +Rn×n +| +det(A) = 1 +} +kompakt? +(c) Ist P(R) kompakt? Aufgabe 5 (Begriffe) Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“. Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist, @@ -1209,25 +2015,41 @@ Definieren Sie die Begriffe „Isomorphismus“, „Isotopie“ und „Isometrie Simplizialkomplexe 2.1 Topologische Mannigfaltigkeiten Definition 24 -Sei (X,T) ein topologischer Raum und n N. +Sei (X,T) ein topologischer Raum und n ∈ -a) Eine n-dimensionale Karte auf X ist ein Paar (U,ϕ), wobei U T und ϕ : U V -∈ → -Homöomorphismus von U auf eine offene Teilmenge V Rn. +N. +a) Eine n-dimensionale Karte auf X ist ein Paar (U,ϕ), wobei U +∈ +T und ϕ : U +→ +V +Homöomorphismus von U auf eine offene Teilmenge V ⊆ -b) Ein n-dimensionaler Atlas auf X ist eine Familie (U ,ϕ ) von Karten auf X, -i i i∈I -(cid:83) A -sodass U = X. -i∈I i +Rn. +b) Ein n-dimensionaler Atlas +A +auf X ist eine Familie (U i,ϕ i) +i∈I +von Karten auf X, +sodass +(cid:83) +i∈IU +i += X. c) X heißt (topologische) n-dimensionale Mannigfaltigkeit, wenn X hausdorffsch ist, eine abzählbare Basis der Topologie hat und einen n-dimensionalen Atlas besitzt. Anschaulich ist also ein n-dimensionale Mannigfaltigkeit lokal dem Rn ähnlich. Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten) -Jede n-dimensionale Mannigfaltigkeit mit n 1 ist mindestens so mächtig wie R. +Jede n-dimensionale Mannigfaltigkeit mit n ≥ -Beweis: Sei (X,T) ein topologischer Raum und (U,ϕ) mit U T und ϕ : U V Rn, wobei -∈ → ⊆ +1 ist mindestens so mächtig wie R. +Beweis: Sei (X,T) ein topologischer Raum und (U,ϕ) mit U +∈ +T und ϕ : U +→ +V +⊆ +Rn, wobei V offen und ϕ ein Homöomorphismus ist, eine Karte auf X. Da jede offene Teilmenge des Rn genauso mächtig ist wie der Rn, ϕ als Homöomorphismus insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig @@ -1236,137 +2058,276 @@ hat, muss jede Mannigfaltigkeit X mindestens so mächtig sein wie der Rn. (cid:4 Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können beliebig viele Elemente haben. Bemerkung 25 -a) Es gibt surjektive, stetige Abbildungen [0,1] [0,1] [0,1] -→ × -b) Für n = m sind Rn und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz -(cid:54) +a) Es gibt surjektive, stetige Abbildungen [0,1] +→ +[0,1] ×[0,1] +b) Für n (cid:54)= m sind Rn und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz von der Gebietstreue“ (Brouwer): -Ist U Rn offen und f : U Rn stetig und injektiv, so ist f(U) offen. -⊆ → +Ist U +⊆ +Rn offen und f : U +→ +Rn stetig und injektiv, so ist f(U) offen. Ist n < m und Rm homöomorph zu Rn, so wäre -f : Rn Rm Rn, (x ,...,x ) (x ,x ,...,x ,0,...,0) -1 n 1 2 n -→ → (cid:55)→ -eine stetige injektive Abbildung. Also müsste f(Rn) offen sein Widerspruch +f : Rn +→ +Rm +→ +Rn, (x 1,...,x n) +(cid:55)→ +(x 1,x 2,...,x n,0,...,0) +eine stetige injektive Abbildung. Also müsste f(Rn) offen sein ⇒ +Widerspruch 26 2.1.TOPOLOGISCHEMANNIGFALTIGKEITEN Beispiel 20 (Mannigfaltigkeiten) -1) Jede offene Teilmenge U Rn ist eine n-dimensionale Mannigfaltigkeit mit einem +1) Jede offene Teilmenge U ⊆ +Rn ist eine n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte. 2) Cn ist eine 2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte: -(z ,...,z ) ( (z ), (z ),..., (z ), (z )) -1 n 1 1 n n -(cid:55)→ (cid:60) (cid:61) (cid:60) (cid:61) -3) n(R) = (Rn+1 0 )/ = Sn/ und n(C) sind Mannigfaltigkeiten der Dimension -∼ ∼ -P \{ } P +(z 1,...,z n) +(cid:55)→ +( (cid:60)(z 1), (cid:61)(z 1),..., (cid:60)(z n), (cid:61)(z n)) +3) Pn(R) = (Rn+1 \{0 })/ +∼ += Sn/ +∼ +und Pn(C) sind Mannigfaltigkeiten der Dimension n bzw. 2n, da gilt: -Sei U := (x : : x ) n(R) x = 0 i 0,...,n. Dann ist n(R) = (cid:83)n U -i { 0 ··· n ∈ P | i (cid:54) } ∀ ∈ P i=0 i +Sei U +i +:= {(x +0 +: +··· +: x n) +∈ +Pn(R) +| +x +i +(cid:54)= 0 +} +∀i +∈ +0,...,n. Dann ist Pn(R) = (cid:83)n i=0U +i und die Abbildung -U Rn +U i → -(cid:18) (cid:19) -x 0 x (cid:1)(cid:1)i x n -(x : : x ) ,..., ,..., -0 n -··· (cid:55)→ x (cid:1)x x -i i i -(y : : y : 1 : y : : y ) (y ,...,y ) -1 i−1 i n 1 n -··· ··· → (cid:55) +Rn +(x +0 +: +··· +: x n) +(cid:55)→ +(cid:18) +x 0 +x +i,..., +(cid:1)(cid:1)(cid:1) x i +x +i,..., +x n +x +i(cid:19) +(y +1 +: +··· +: y +i−1 +: 1 : y +i +: +··· +: y n) +(cid:55)→ +(y 1,...,y n) ist bijektiv. -Die U mit i = 0,...,n bilden einen n-dimensionalen Atlas: +Die U i -x = (1 : 0 : 0) U R2 x (0,0) +mit i = 0,...,n bilden einen n-dimensionalen Atlas: +x = (1 : 0 : 0) +∈ +U 0 -∈ → (cid:55)→ -y = (0 : 1 : 1) U R2 y (0,1) +→ +R2 x +(cid:55)→ +(0,0) +y = (0 : 1 : 1) +∈ +U 2 -∈ → (cid:55)→ -Umgebung: B (0,1) (1 : u : v) (u,v) < 1 = V -1 1 -→ (cid:8){ |(cid:12)(cid:107) (cid:107) }(cid:9) -Umgebung: B 1(0,1) (w : z : 1) (cid:12) w2+z2 < 1 = V 2 → -V V = ? -1 2 -∩ ∅ -(a : b : c) V V -1 2 -∈ ∩ -a = 0 und (b)2+(c)2 < 1 c < 1 -⇒ (cid:54) a a ⇒ a -c = 0 und (a)2+(b)2 < 1 a < 1 -⇒ (cid:54) c c ⇒ c -Widerspruch +R2 y +(cid:55)→ +(0,1) +Umgebung: B 1(0,1) +→ +{(1 : u : v) +| +(cid:107)(u,v) +(cid:107) +< 1 +} += V +1 +Umgebung: B 1(0,1) +→ +(cid:8) +(w : z : 1) +(cid:12) +(cid:12) w2+z2 < +1(cid:9) += V 2 +V +1 +∩V +2 += ∅? +(a : b : c) +∈ +V +1 +∩V +2 +⇒ +a (cid:54)= 0 und (b a)2+(c a)2 < 1 +⇒ +c +a +< 1 +⇒ +c (cid:54)= 0 und (a c)2+(b c)2 < 1 +⇒ +a +c +< 1 ⇒ -4) Sn = (cid:8) x Rn+1 (cid:12) (cid:12) x = 1(cid:9) ist n-dimensionale Mannigfaltigkeit. -∈ (cid:107) (cid:107) +Widerspruch +4) Sn = (cid:8) x +∈ +Rn+1 (cid:12) (cid:12) (cid:107)x +(cid:107) += 1(cid:9) ist n-dimensionale Mannigfaltigkeit. Karten: -D := (x ,...,x ) Sn x > 0 B (0,...,0) -i 1 n+1 i 1 -{ ∈ | } → (cid:124) (cid:123)(cid:122) (cid:125) +D +i +:= {(x 1,...,x n+1) +∈ +Sn |x +i +> 0 +} → +B 1(0,...,0 +(cid:124) (cid:123)(cid:122) (cid:125) ∈Rn -C := (x ,...,x ) Sn x < 0 B (0,...,0) -i 1 n+1 i 1 -{ ∈ | } → -(x 1,...,x n+1) (x 1,...,(cid:26)x(cid:26) i,...,x n+1)1 -(cid:55)→ (cid:113) (cid:113) -(x ,...,x ) (x ,...,x , 1 (cid:80)n x2,x ,...,x ), oder 1 (cid:80)n x2 für C -1 n (cid:55)→ 1 i−1 − k=1 k i n − − k=1 k i -Sn = (cid:83)n+1(C D ) -i=1 i ∪ i +) +C +i +:= {(x 1,...,x n+1) +∈ +Sn |x +i +< 0 +} → +B 1(0,...,0) +(x 1,...,x n+1) +(cid:55)→ +(x 1,...,(cid:26)(cid:26) x i,...,x n+1)1 +(x 1,...,x n) +(cid:55)→ +(x 1,...,x +i−1,(cid:113) +1 −(cid:80)n k=1x2 k,x i,...,x n), oder +−(cid:113) +1 −(cid:80)n k=1x2 +k +für C +i +Sn = (cid:83)n+1 i=1(C +i +∪D i) Als kompakte Mannigfaltigkeit wird Sn auch „geschlossene Mannigfaltigkeit“ genannt. 5) [0,1] ist keine Mannigfaltigkeit, denn: Es gibt keine Umgebung von 0 in [0,1], die homöomorph zu einem offenem Intervall ist. -1x wird rausgenommen +1x i +wird rausgenommen 27 2.1.TOPOLOGISCHEMANNIGFALTIGKEITEN -6) V 1 = (cid:8) (x,y) R2 (cid:12) (cid:12) x y = 0(cid:9) ist keine Mannigfaltigkeit. -∈ · +6) V 1 = (cid:8) (x,y) +∈ +R2 (cid:12) (cid:12) x ·y = 0(cid:9) ist keine Mannigfaltigkeit. Das Problem ist (0,0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4 Zusammenhangskomponenten. Jeder Rn zerfällt jedoch in höchstens zwei Zusammen- hangskomponenten, wenn man einen Punkt entfernt. -7) V 2 = (cid:8) (x,y) R2 (cid:12) (cid:12) x3 = y2(cid:9) ist eine Mannigfaltigkeit. +7) V 2 = (cid:8) (x,y) ∈ -8) X = (R 0 ) (0 ,0 ) -1 2 -\{ } ∪ +R2 (cid:12) (cid:12) x3 = y2(cid:9) ist eine Mannigfaltigkeit. +8) X = (R \{0 }) ∪(0 1,0 2) +U +⊆ +X offen +⇔ (cid:40) -U offen in R 0 , falls 0 / U,0 U -1 2 -U X offen \{ } ∈ ∈ -⊆ ⇔ ε > 0 : ( ε,ε) U falls 0 U,0 U -1 2 -∃ − ⊆ ∈ ∈ -Insbesondere sind (R 0 ) 0 und (R 0 ) 0 offen und homöomorph -1 2 -\{ } ∪{ } \{ } ∪{ } +U offen in R \{0 }, falls 0 +1 +/ +∈ +U,0 +2 +∈ +U +∃ε > 0 : ( −ε,ε) +⊆ +U falls 0 +1 +∈ +U,0 +2 +∈ +U +Insbesondere sind (R \{0 }) ∪{0 +1 +} +und (R \{0 }) ∪{0 +2 +} +offen und homöomorph zu R. Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 0 1 -und 0 . -2 -9) GL (R) ist eine Mannigfaltigkeit der Dimension n2, weil offene Teilmengen von Rn2 -n +und 0 2. +9) GL n(R) ist eine Mannigfaltigkeit der Dimension n2, weil offene Teilmengen von Rn2 eine Mannigfaltigkeit bilden. Definition 25 -Seien X,Y n-dimensionale Mannigfaltigkeiten, U X und V Y offen, Φ : U V ein Ho- -⊆ ⊆ → -möomorphismus Z = (X ˙ Y)/ mit der von u Φ(u) u U erzeugten Äquivalenzrelation +Seien X,Y n-dimensionale Mannigfaltigkeiten, U +⊆ +X und V +⊆ +Y offen, Φ : U +→ +V ein Ho- +möomorphismus Z = (X ˙ ∪Y)/ +∼ +mit der von u ∼ -∪ ∼ ∀ ∈ -und der von induzierten Quotiententopologie. +Φ(u) ∀u +∈ +U erzeugten Äquivalenzrelation +und der von ∼ +induzierten Quotiententopologie. Z heißtVerklebungvonX undY längsU undV.Z besitzteinenAtlasausn-dimensionalen Karten. Falls Z hausdorffsch ist, ist Z eine n-dimensionale Mannigfaltigkeit. Bemerkung 26 -Sind X,Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X Y eine Mannigfaltigkeit -× +Sind X,Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X ×Y eine Mannigfaltigkeit der Dimension n+m. Beweis: Produkte von Karten sind Karten. (cid:4) Beispiel 21 @@ -1379,208 +2340,320 @@ Mannigfaltigkeiten mit Dimension 2: 3) T2 (1 Henkel) 4) oder mehr Henkel, wie z.B. der Zweifachtorus in Abbildung 2.1 Bemerkung 27 -Sei n N,F : Rn R stetig differenzierbar und X = V(F) := x Rn F(x) = 0 das -∈ → { ∈ | } +Sei n +∈ +N,F : Rn +→ +R stetig differenzierbar und X = V(F) := {x +∈ +Rn +| +F(x) = 0 +} +das „vanishing set“. Dann gilt: 28 2.1.TOPOLOGISCHEMANNIGFALTIGKEITEN Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus. a) X ist abgeschlossen in Rn -b) Ist grad(F)(X) = 0 x X, so ist X eine Mannigfaltigkeit der Dimension n 1. -(cid:54) ∀ ∈ − +b) Ist grad(F)(X) (cid:54)= 0 ∀x +∈ +X, so ist X eine Mannigfaltigkeit der Dimension n −1. Beweis: -a) Sei y Rn V(F). Weil F stetig ist, gibt es δ > 0, sodass F(B (y)) B (F(y)) mit -δ ε -∈ \ ⊆ -ε = 1 F(y) . Folgt B (y) V(F) = Rn V(F) ist offen. -2(cid:107) (cid:107) δ ∩ ∅ ⇒ \ -b) Sei x X mit grad(F)(x) = 0, also o. B. d. A. ∂F (x) = 0, x = (x ,...,x ), -∈ (cid:54) ∂X1 (cid:54) 1 n -x(cid:48) := (x ,...,x ) Rn−1. Der Satz von der impliziten Funktion liefert nun: Es -2 n -∈ -gibt Umgebungen U von x(cid:48) und differenzierbare Funktionen g : U R, sodass -→ -G : U Rn, u (g(u),u) eine stetige Abbildung auf eine offene Umgebung V von x -→ (cid:55)→ +a) Sei y +∈ +Rn \V(F). Weil F stetig ist, gibt es δ > 0, sodass F(B δ(y)) +⊆ +B ε(F(y)) mit +ε = 1 2(cid:107)F(y) (cid:107). Folgt B δ(y) ∩V(F) = +∅ ⇒ +Rn \V(F) ist offen. +b) Sei x +∈ +X mit grad(F)(x) (cid:54)= 0, also o. B. d. A. ∂F ∂X1(x) (cid:54)= 0, x = (x 1,...,x n), +x(cid:48) := (x 2,...,x n) +∈ +Rn−1. Der Satz von der impliziten Funktion liefert nun: Es +gibt Umgebungen U von x(cid:48) und differenzierbare Funktionen g : U +→ +R, sodass +G : U +→ +Rn, u +(cid:55)→ +(g(u),u) eine stetige Abbildung auf eine offene Umgebung V von x in X ist. (cid:4) Beispiel 22 -1) F : R3 R, (x,y,z) x2+y2+z2 1,V(F) = S2,grad(F) = (2x,2y,2z) =B =e =m =. =2 =7.b -→ (cid:55)→ − ⇒ +1) F : R3 +→ +R, (x,y,z) +(cid:55)→ +x2+y2+z2 −1,V(F) = S2,grad(F) = (2x,2y,2z) Bem. 27.b ====== +⇒ Sn ist n-dimensionale Mannigfaltigkeit in Rn+1 -2) F : R2 R, (x,y) y2 x3 Es gilt: grad(F) = ( 3x2,2y). Also: grad(0,0) = (0,0). -→ (cid:55)→ − − -y -10 -100 5 -z +2) F : R2 +→ +R, (x,y) +(cid:55)→ +y2 −x3 Es gilt: grad(F) = ( −3x2,2y). Also: grad(0,0) = (0,0). +−5 −4 −3 −2 −1 0 1 2 3 4 5 +−4 +−2 +0 2 4 +−100 0 -x -−100 2 4 6 8 10 12 -f(x,y) −4 5 -−2 − a= 31 100 -−0 100 y 0 2 4 5 4 3 2 1 0 x −1 −2 −3 −4 −5 −10 a a= =1 2 -(a) F(x,y)=y2−x3 (b) y2−ax3 =0 +x y +z +−100 +0100 +f(x,y) +(a) F(x,y)=y2−x3 +2 4 6 8 10 12 +−10 +−55 +10 +x +y +a= 1 +3 +a=1 a=2 +(b) y2−ax3 =0 Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a. DaheristBemerkung27.bnichtanwendbar,aberV(F)isttrotzdemeine1-dimensionale topologische Mannigfaltigkeit. 29 2.1.TOPOLOGISCHEMANNIGFALTIGKEITEN Definition 26 Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n-dimensionale -Mannigfaltigkeit mit Rand, wenn es einen Atlas (U ,ϕ ) gibt, wobei U X offen und -i i i i +Mannigfaltigkeit mit Rand, wenn es einen Atlas (U i,ϕ i) gibt, wobei U +i ⊆ -ϕ ein Homöomorphismus auf eine offene Teilmenge von +X +i +offen und +ϕ i -Rn := (x ,...,x ) Rn x 0 -+,0 1 n n -{ ∈ | ≥ } +ein Homöomorphismus auf eine offene Teilmenge von +Rn ++,0 +:= {(x 1,...,x n) +∈ +Rn +| +x +n +≥ +0 +} ist. -Rn ist ein „Halbraum“. +Rn +,0 +ist ein „Halbraum“. Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten. ∼ = (a) Halbraum ∼ = +(b) Pair of pants ∼ = -(b) Pair of pants (c) Sphäre mit einem Loch +(c) Sphäre mit einem Loch Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand Definition 27 -Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas . Dann heißt -A +Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt +∂X := (cid:91) -∂X := x U ϕ(x) = 0 -{ ∈ | } -(U,ϕ)∈A +(U,ϕ)∈A{x +∈ +U +| +ϕ(x) = 0 +} Rand von X. -∂X ist eine Mannigfaltigkeit der Dimension n 1. -− +∂X ist eine Mannigfaltigkeit der Dimension n −1. Definition 28 -Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U ,ϕ ) -i i i∈I -Für i,j I mit U U = heißt -i j -∈ ∩ (cid:54) ∅ -ϕ := ϕ ϕ−1 -ij j ◦ i -ϕ (U U ) ϕ (U U ) -i i j j i j -∩ → ∩ +Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U i,ϕ i) +i∈I +Für i,j +∈ +I mit U +i +∩U +j +(cid:54)= +∅ +heißt +ϕ +ij +:= ϕ +j +◦ϕ−1 +i +ϕ i(U +i +∩U j) +→ +ϕ j(U +i +∩U j) Kartenwechsel oder Übergangsfunktion. 30 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN -X +Rn Rn U i U j -ϕ ϕ -i j V i V j -Rn Rn +X +ϕ +i +ϕ +j Abbildung 2.4: Kartenwechsel 2.2 Differenzierbare Mannigfaltigkeiten Definition 29 -Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U ,ϕ ) . -i i i∈I +Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U i,ϕ i) i∈I. a) X heißt differenzierbare Mannigfaltigkeit der Klasse Ck, wenn jede Karten- -wechselabbildung ϕ , i,j I k-mal stetig differenzierbar ist. -ij +wechselabbildung ϕ ij, i,j ∈ +I k-mal stetig differenzierbar ist. b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannig- faltigkeit der Klasse C∞ ist. Differenzierbare Mannigfaltigkeiten der Klasse C∞ werden auch glatt genannt. Definition 30 -Sei X eine differenzierbare Mannigfaltigkeit der Klasse Ck (k N ) mit Atlas -∈ ∪ {∞} -= (U ,ϕ ) . -i i i∈I +Sei X eine differenzierbare Mannigfaltigkeit der Klasse Ck (k +∈ +N +∪ +{∞}) mit Atlas A -a) Eine Karte (U,ϕ) auf X heißt verträglich mit , wenn alle Kartenwechsel ϕ ϕ−1 -A ◦ i -und ϕ ϕ−1 (i I mit U U = ) differenzierbar von Klasse Ck sind. -i i -◦ ∈ ∩ (cid:54) ∅ -b) Die Menge aller mit verträglichen Karten auf X bildet einen maximalen Atlas der += (U i,ϕ i) i∈I. +a) Eine Karte (U,ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ϕ−1 +i +und ϕ +i +◦ϕ−1 (i +∈ +I mit U +i +∩U (cid:54)= ∅) differenzierbar von Klasse Ck sind. +b) Die Menge aller mit A +verträglichen Karten auf X bildet einen maximalen Atlas der Klasse Ck. Er heißt Ck-Struktur auf X. Eine C∞-Struktur heißt auch differenzierbare Struktur auf X. Bemerkung 28 -Für n 4 gibt es auf Sn mehrere verschiedene differenzierbare Strukturen, die sogenannten +Für n ≥ +4 gibt es auf Sn mehrere verschiedene differenzierbare Strukturen, die sogenannten „exotische Sphären“. Definition 31 -Seien X,Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x X. +Seien X,Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x ∈ -a) Eine stetige Abbildung f : X Y heißt differenzierbar in x (von Klasse Ck), wenn +X. +a) Eine stetige Abbildung f : X → -es Karten (U,ϕ) von X mit x U und (V,ψ) von Y mit f(U) V gibt, sodass -∈ ⊆ -ψ f ϕ−1 stetig differenzierbar von Klasse Ck in ϕ(x) ist. -◦ ◦ -b) f heißt differenzierbar (von Klasse Ck), wenn f in jedem x X differenzierbar ist. +Y heißt differenzierbar in x (von Klasse Ck), wenn +es Karten (U,ϕ) von X mit x +∈ +U und (V,ψ) von Y mit f(U) +⊆ +V gibt, sodass +ψ ◦f ◦ϕ−1 stetig differenzierbar von Klasse Ck in ϕ(x) ist. +b) f heißt differenzierbar (von Klasse Ck), wenn f in jedem x ∈ +X differenzierbar ist. c) f heißt Diffeomorphismus, wenn f differenzierbar von Klasse C∞ ist und es eine -differenzierbare Abbildung g : Y X von Klasse C∞ gibt mit g f = id und -X -→ ◦ -f g = id . -Y +differenzierbare Abbildung g : Y +→ +X von Klasse C∞ gibt mit g ◦ +f = id +X +und +f ◦g = id Y. 31 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN Bemerkung 29 Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab. -Beweis: Seien (U(cid:48),ϕ(cid:48)) und (V(cid:48),ψ(cid:48)) Karten von X bzw. Y um x bzw. f(x) mit f(U(cid:48)) V(cid:48). -⊆ -ψ(cid:48) f (ϕ(cid:48))−1 -⇒ ◦ ◦ -= ψ(cid:48) (ψ−1 ψ) f (ϕ−1 ϕ) (ϕ(cid:48))−1 -◦ ◦ ◦ ◦ ◦ ◦ -ist genau dann differenzierbar, wenn ψ f ϕ−1 differenzierbar ist. -◦ ◦ +Beweis: Seien (U(cid:48),ϕ(cid:48)) und (V(cid:48),ψ(cid:48)) Karten von X bzw. Y um x bzw. f(x) mit f(U(cid:48)) +⊆ +V(cid:48). +⇒ +ψ(cid:48) ◦f ◦(ϕ(cid:48))−1 += ψ(cid:48) ◦(ψ−1 ◦ψ) ◦f ◦(ϕ−1 ◦ϕ) ◦(ϕ(cid:48))−1 +ist genau dann differenzierbar, wenn ψ ◦f ◦ϕ−1 differenzierbar ist. Beispiel 23 -f : R R, x x3 istkeinDiffeomorphismus,aberHomöomorphismus,damitg(x) := √3 x -→ (cid:55)→ -gilt: f g = idR, g f = idR -◦ ◦ -Bemerkung 30 +f : R +→ +R, x +(cid:55)→ +x3 istkeinDiffeomorphismus,aberHomöomorphismus,damitg(x) := 3 √x +gilt: f ◦g = idR, g ◦f = idR +Bemerkung 30 Sei X eine glatte Mannigfaltigkeit. Dann ist -Diffeo(X) := f : X X f ist Diffeomorphismus -{ → | } +Diffeo(X) := {f : X +→ +X +| +f ist Diffeomorphismus +} eine Untergruppe von Homöo(X). Definition 32 -S R3 heißt reguläre Fläche : s S Umgebung V(s) R3 U R2 offen: -⊆ ⇔ ∀ ∈ ∃ ⊆ ∃ ⊆ -differenzierbare Abbildung F : U V S: Rg(J (u)) = 2 u U. -F -∃ → ∩ ∀ ∈ +S +⊆ +R3 heißt reguläre Fläche : +⇔ +∀s +∈ +S +∃ +Umgebung V(s) +⊆ +R3 ∃U +⊆ +R2 offen: +∃ +differenzierbare Abbildung F : U +→ +V ∩S: Rg(J F(u)) = 2 ∀u +∈ +U. F heißt (lokale) reguläre Parametrisierung von S. F(u,v) = (x(u,v),y(u,v),z(u,v)) -∂x(p) ∂x(p) -∂u ∂v -J F(u,v) =  ∂∂ uy (p) ∂ ∂y v(p) -∂z(p) ∂z(p) -∂u ∂v +J F(u,v) = + ∂x ∂u(p) ∂x ∂v(p) +∂y ∂u(p) ∂y ∂v(p) +∂z ∂u(p) ∂z +∂v(p) + Beispiel 24 -1) Rotationsflächen: Sei r : R R eine differenzierbare Funktion. +1) Rotationsflächen: Sei r : R +→ +R >0 +eine differenzierbare Funktion. +F : R2 → -F : R2 R3 (u,v) (r(u)cos(u),r(v)sin(u),v) -→ (cid:55)→ - r(cid:48)(v)cosu -r(v)sinu -− -J F(u,v) =  r(v)cosu r(cid:48)(v)sinu +R3 (u,v) +(cid:55)→ +(r(u)cos(u),r(v)sin(u),v) +J F(u,v) = + −r(v)sinu r(cid:48)(v)cosu +r(v)cosu r(cid:48)(v)sinu 0 1 -hat Rang 2 für alle (u,v) R2. + + +hat Rang 2 für alle (u,v) ∈ -2) Kugelkoordinaten: F : R2 R3, +R2. +2) Kugelkoordinaten: F : R2 → -(u,v) (Rcosvcosu,Rcosvsinu,Rsinv) +R3, +(u,v) (cid:55)→ -Es gilt: F(u,v) S2, denn -∈ R +(Rcosvcosu,Rcosvsinu,Rsinv) +Es gilt: F(u,v) +∈ +S2 R, denn R2cos2(v)cos2(u)+R2cos2(v)sin2(u)+R2sin2(v) =R2(cos2(v)cos2(u)+cos2(v)sin2(u)+sin2(v)) =R2(cid:0) cos2(v)(cos2(u)+sin2(u))+sin2(v)(cid:1) @@ -1588,312 +2661,475 @@ R2cos2(v)cos2(u)+R2cos2(v)sin2(u)+R2sin2(v) =R2 32 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN N +S +v u +(a) Kugelkoordinaten +−1 +0 1 -0.8 -u v +2 −2 +−1 +0 +1 2 0.6 +0.8 1 -− 0 1 2 -1 0 -1 -S 2 2 − -− -(a) Kugelkoordinaten (b) Rotationskörper -y -1 +(b) Rotationskörper +π +2 +π 3π +2 +2π +−1 +−0.5 0.5 +1 x -π π 3π 2π -2 2 +y sinx -0.5 cosx -− -1 -− (c) Sinus und Kosinus haben keine gemeinsame Nullstelle 33 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN Die Jacobi-Matrix -  -Rcosvsinu Rsinvcosu -− − -J F(u,v) =  Rcosvcosu Rsinvsinu -− +J F(u,v) = + +−Rcosvsinu −Rsinvcosu +Rcosvcosu −Rsinvsinu 0 Rcosv -hat Rang 2 für cosv = 0. In N und S ist cosv = 0. -(cid:54) + + +hat Rang 2 für cosv (cid:54)= 0. In N und S ist cosv = 0. Bemerkung 31 -Jede reguläre Fläche S R3 ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit. +Jede reguläre Fläche S ⊆ +R3 ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit. Beweis: -S R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von -⊆ -regulären Flächen folgt direkt, dass Karten (U ,F ) und (U R2,F : R2 R3) von S mit -i i j j -⊆ → -U U = existieren, wobei F und F nach Definition differenzierbare Abbildungen sind. -i j i j -∩ (cid:54) ∅ -z.Z.: F−1 F ist ein Diffeomorphismus. -j ◦ i S -s -F F -i j +⊆ +R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von +regulären Flächen folgt direkt, dass Karten (U i,F i) und (U +j +⊆ +R2,F +j +: R2 +→ +R3) von S mit +U +i +∩U +j +(cid:54)= +∅ +existieren, wobei F +i +und F +j +nach Definition differenzierbare Abbildungen sind. +z.Z.: F−1 +j +◦F +i +ist ein Diffeomorphismus. U i U j -F j−1◦Fi +S +s +F +i +F +j +F−1 +j +◦Fi Abbildung 2.5: Reguläre Fläche S zum Beweis von Bemerkung 31 -Idee: Finde differenzierbare Funktion F(cid:103)−1 in Umgebung W von s, sodass F(cid:103)−1 = F−1. -j j |S∩W j -Ausführung: Sei u U , v U mit F (u ) = s = F (v ). -0 i 0 j i 0 j 0 -∈ ∈ -Da Rg(J (v )) = 2 ist, ist o. B. d. A. -Fj 0 -(cid:18)∂x ∂x(cid:19) -det ∂u ∂v (v ) = 0 -∂y ∂y 0 -(cid:54) -∂u ∂v -und F (u,v) = (x(u,v),y(u,v),z(u,v)). +Idee: Finde differenzierbare Funktion (cid:103) F−1 j -Definiere F(cid:102)j : U R R3 durch +in Umgebung W von s, sodass (cid:103) F−1 +j |S∩W += F−1 j -× → -F(cid:102)j(u,v,t) := (x(u,v),y(u,v),z(u,v)+t) -Offensichtlich: F(cid:102)j = F -|Uj×{0} j -∂x ∂x 0 -∂u ∂v -J =  ∂∂ uy ∂ ∂y 0 detJ F(cid:102)j(v 0,0) (cid:54)= 0 -F(cid:102)j v ⇒ -∂z ∂z 1 -∂u ∂v -AnalysisII -====== Es gibt Umgebungen W von F von F(cid:102)j(v 0,0) = F j(v 0) = s, sodass F(cid:102)j auf W eine +. +Ausführung: Sei u +0 +∈ +U i, v +0 +∈ +U j -differen⇒ zierbar Inverse F−1 hat. +mit F i(u 0) = s = F j(v 0). +Da Rg(J Fj(v 0)) = 2 ist, ist o. B. d. A. +det(cid:18)∂x +∂u +∂x +∂v +∂y +∂u +∂y +∂v(cid:19) +(v 0) (cid:54)= 0 +und F j(u,v) = (x(u,v),y(u,v),z(u,v)). +Definiere (cid:102) F j : U j ×R +→ +R3 durch +(cid:102) F j(u,v,t) := (x(u,v),y(u,v),z(u,v)+t) +Offensichtlich: (cid:102) F j |Uj×{0} = F j +J +(cid:102) Fj += + ∂x +∂u +∂x +∂v +0 +∂y +∂u +∂y +∂v +0 +∂z +∂u +∂z +∂v +1 + +⇒ +detJ +(cid:102) +Fj(v 0,0) (cid:54)= 0 +AnalysisII +====== +⇒ +Es gibt Umgebungen W von F j von (cid:102) F j(v 0,0) = F j(v 0) = s, sodass (cid:102) F j auf W eine +differenzierbar Inverse F−1 j +hat. 34 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN Weiter gilt: -F(cid:102)j−1 -= F j−1 -|W∩S |W∩S -F−1 F = F−1 F -⇒ j ◦ i |F i−1(W∩S) j ◦ i |F i−1(W∩S) +(cid:102) F +j−1 +|W∩S = F−1 j |W∩S +⇒ +F−1 +j +◦F +i |F−1 i (W∩S) += F−1 +j +◦F +i |F−1 i (W∩S) ist differenzierbar. Definition 33 -Sei G eine Mannigfaltigkeit und (G, ) eine Gruppe. +Sei G eine Mannigfaltigkeit und (G, ◦) eine Gruppe. +a) G heißt topologische Gruppe, wenn die Abbildungen ◦ -a) G heißt topologische Gruppe, wenn die Abbildungen : G G G und ι : G G -◦ × → → +: G ×G +→ +G und ι : G +→ +G definiert durch -g h := g h und ι(g) := g−1 -◦ · +g ◦h := g ·h und ι(g) := g−1 stetig sind. -b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, ) und -◦ +b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, ◦) und (G,ι) differenzierbar sind. Beispiel 25 (Lie-Gruppen) 1) Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen. -2) GL (R) -n -3) (R×, ) -· -4) (R , ) ->0 -· -5) (Rn,+), denn A B(i,j) = (cid:80)n a b ist nach allen Variablen differenzierbar -· k=1 ik kj +2) GL n(R) +3) (R×, ·) +4) (R >0, ·) +5) (Rn,+), denn A ·B(i,j) = (cid:80)n k=1a ikb +kj +ist nach allen Variablen differenzierbar (A−1)(i,j) = det(Aij) detA -  -a ... a -i1 in -A ij =   . . . ... . . .   ∈ R(n−1)×(n−1) -a ... a -n1 nn +A ij = + + +a +i1 +... a +in +. . . ... . . . +a +n1 +... a +nn +  ∈ R(n−1)×(n−1) ist differenzierbar. -detA kann 0 werden, da: +detA ij -(cid:18) (cid:19) +kann 0 werden, da: +(cid:18) 1 1 -1 0 -− -6) SL (R) = A GL (R) det(A) = 1 -n n -{ ∈ | } +−1 +0(cid:19) +6) SL n(R) = {A +∈ +GL n(R) +| +det(A) = 1 +} Bemerkung 32 -Ist G eine Lie-Gruppe und g G, so ist die Abbildung +Ist G eine Lie-Gruppe und g ∈ -l : G G +G, so ist die Abbildung +l g +: G → -h g h -(cid:55)→ · +G +h +(cid:55)→ +g ·h ein Diffeomorphismus. 35 2.3.SIMPLIZIALKOMPLEX 2.3 Simplizialkomplex Definition 34 -Seien v ,...,v Rn Punkte. -0 k -∈ -a) v ,...,v sind in allgemeiner Lage -0 k -esgibtkeinen(k 1)-dimensionalenaffinenUntervektorraum,derv ,...,v enthält -0 k -⇔ − -v v ,...,v v sind linear unabhängig. -1 0 k 0 -⇔ − − -(cid:110) (cid:12) (cid:111) -b) conv(v ,...,v ) := (cid:80)k λ v (cid:12) λ 0,(cid:80)k λ = 1 heißt die konvexe Hülle von -0 k i=0 i i (cid:12) i ≥ i=0 i -v ,...,v . -0 k +Seien v 0,...,v +k +∈ +Rn Punkte. +a) v 0,...,v +k +sind in allgemeiner Lage +⇔esgibtkeinen(k −1)-dimensionalenaffinenUntervektorraum,derv 0,...,v +k +enthält +⇔ +v +1 +−v 0,...,v +k +−v +0 +sind linear unabhängig. +b) conv(v 0,...,v k) := +(cid:110) +(cid:80)k i=0λ iv +i +(cid:12) +(cid:12) +(cid:12) +λ +i +≥ +0,(cid:80)k i=0λ +i += +1(cid:111) +heißt die konvexe Hülle von +v 0,...,v k. Definition 35 -a) Sei ∆n = conv(e ,...,e ) Rn+1 die konvexe Hülle der Standard-Basisvektoren -0 n +a) Sei ∆n = conv(e 0,...,e n) ⊆ -e ,...,e . -0 n +Rn+1 die konvexe Hülle der Standard-Basisvektoren +e 0,...,e n. Dann heißt ∆n Standard-Simplex und n die Dimension des Simplex. -b) FürPunktev ,...,v imRn inallgemeinerLageheißt∆(v ,...,v ) = conv(v ,...,v ) -0 k 0 k 0 k +b) FürPunktev 0,...,v +k +imRn inallgemeinerLageheißt∆(v 0,...,v k) = conv(v 0,...,v k) ein k-Simplex in Rn. -c) Ist ∆(v ,...,v ) ein k-Simplex und I = i ,...,i 0,...,k , so ist s := -0 k 0 r i0,...,ir -{ } ⊆ { } -conv(v ,...,v ) ein r-Simplex und heißt Teilsimplex oder Seite von ∆. -i0 ir +c) Ist ∆(v 0,...,v k) ein k-Simplex und I = {i 0,...,i +r +} ⊆ +{0,...,k }, so ist s +i0,...,ir +:= +conv(v i0,...,v ir) ein r-Simplex und heißt Teilsimplex oder Seite von ∆. (a) 0-Simplex ∆0 -3 3 +1 2 3 +123 +e 0 e 1 +(b) 1-Simplex ∆1 +1 2 3 +123 +e 0 e 1 -2 2 e 2 e 2 -1 1 -e 0 e 0 e 3 -1 2 3 1 2 3 e 0 e 1 -(b) 1-Simplex ∆1 (c) 2-Simplex ∆2 (d) 3-Simplex ∆3 +(c) 2-Simplex ∆2 +e 0 e 1 +e +2 +e 3 +(d) 3-Simplex ∆3 Abbildung 2.6: Beispiele für k-Simplexe Definition 36 a) Eine endliche Menge K von Simplizes im Rn heißt (endlicher) Simplizialkomplex, wenn gilt: -(i) Für ∆ K und S ∆ Teilsimplex ist S K. -∈ ⊆ ∈ -(ii) Für ∆ ,∆ K ist ∆ ∆ leer oder ein Teilsimplex von ∆ und von ∆ . -1 2 1 2 1 2 -∈ ∩ +(i) Für ∆ +∈ +K und S +⊆ +∆ Teilsimplex ist S +∈ +K. +(ii) Für ∆ 1,∆ +2 +∈ +K ist ∆ +1 +∩∆ +2 +leer oder ein Teilsimplex von ∆ +1 +und von ∆ 2. +b) |K +| +:= (cid:83) -b) K := ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K. -| | ∆∈K -c) Ist d = max k N K enthält k-Simplex , so heißt d die Dimension von K. +∆∈K∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K. +c) Ist d = max {k +∈ +N 0 -{ ∈ | } +| +K enthält k-Simplex }, so heißt d die Dimension von K. 36 2.3.SIMPLIZIALKOMPLEX -(a) 1D Simplizialkomplex(b) 2D Simplizialkomplex (c) 2D Simplizialkomplex +(a) 1D Simplizialkomplex(b) 2D Simplizialkomplex (ohne untere Fläche!) +(c) 2D Simplizialkomplex (d) 1D Simplizialkomplex (e) 2D Simplizialkomplex -P P -(f) P ist kein Teilsimplex, da Eigen- (g) Simplizialkomplex +P +(f) P ist kein Teilsimplex, da Eigen- schaft Punkt b.ii verletzt ist +P +(g) Simplizialkomplex Abbildung 2.7: Beispiele für Simplizialkomplexe Definition 37 Seien K,L Simplizialkomplexe. Eine stetige Abbildung -f : K L -| | → | | -heißt simplizial, wenn für jedes ∆ K gilt: +f : |K +| → +|L +| +heißt simplizial, wenn für jedes ∆ ∈ -a) f(∆) L +K gilt: +a) f(∆) ∈ -b) f : ∆ f(∆) ist eine affine Abbildung. -∆ -| → +L +b) f +|∆ +: ∆ +→ +f(∆) ist eine affine Abbildung. Beispiel 26 (Simpliziale Abbildungen) -1) ϕ(e ) := b , ϕ(e ) := b -1 1 2 2 +1) ϕ(e 1) := b 1, ϕ(e 2) := b +2 ϕ ist eine eindeutig bestimmte lineare Abbildung 37 2.3.SIMPLIZIALKOMPLEX -b -2 +0 e 2 e 1 +0 b 1 +b +2 ϕ -0 e 2 0 b 1 -2) Folgende Abbildung ϕ : ∆n ∆n−1 ist simplizial: +2) Folgende Abbildung ϕ : ∆n → +∆n−1 ist simplizial: ϕ 3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8) -b a a -b b b b b a b c -b bc bc b -d M M M -b b b b b b b b -c d d b d -b b b b +M M +a +a +a +b +b +b +c +c +c +d +d +d +M a -b b b b b b b b +b +c +d +b b b +b b b +b b b +b +b +b +b +b +b +b +bb +b b +b b +b b +b +b +b +b Abbildung 2.8: Abbildung eines Torus auf eine Sphäre Definition 38 -Sei K ein endlicher Simplizialkomplex. Für n 0 sei a (K) die Anzahl der n-Simplizes in -n +Sei K ein endlicher Simplizialkomplex. Für n ≥ +0 sei a n(K) die Anzahl der n-Simplizes in K. Dann heißt +χ(K) := dimK (cid:88) -χ(K) := ( 1)na (K) -n -− n=0 +( −1)na n(K) Eulerzahl (oder Euler-Charakteristik) von K. Beispiel 27 -1) χ(∆1) = 2 1 = 1 -− -χ(∆2) = 3 3+1 = 1 -− -χ(∆3) = 4 6+4 1 = 1 -− − -2) χ(Oktaeder-Oberfläche) = 6 12+8 = 2 -− +1) χ(∆1) = 2 −1 = 1 +χ(∆2) = 3 −3+1 = 1 +χ(∆3) = 4 −6+4 −1 = 1 +2) χ(Oktaeder-Oberfläche) = 6 −12+8 = 2 χ(Rand des Tetraeders) = 2 -χ(Ikosaeder) = 12 30+20 = 2 -− -3) χ(Würfel) = 8 12+6 = 2 -− -χ(Würfel, unterteilt in Dreiecksflächen) = 8 (12+6)+(6 2) = 2 -− · +χ(Ikosaeder) = 12 −30+20 = 2 +3) χ(Würfel) = 8 −12+6 = 2 +χ(Würfel, unterteilt in Dreiecksflächen) = 8 −(12+6)+(6 ·2) = 2 Bemerkung 33 -χ(∆n) = 1 für jedes n N -0 +χ(∆n) = 1 für jedes n ∈ +N +0 38 2.3.SIMPLIZIALKOMPLEX -Beweis: ∆n ist die konvexe Hülle von (e ,...,e ) in Rn+1. Jede (k+1)-elementige Teilmenge -0 n -von e ,...,e definiert ein k-Simplex. -0 n -a{ (∆n) = (cid:0)n+} 1(cid:1) , k = 0,...,n -⇒ χk (∆n) = (cid:80)k n+1 ( 1)k(cid:0)n+1(cid:1) -⇒ k=0 − k+1 +Beweis: ∆n ist die konvexe Hülle von (e 0,...,e n) in Rn+1. Jede (k+1)-elementige Teilmenge +von {e 0,...,e +n +} +definiert ein k-Simplex. +⇒ +a k(∆n) = (cid:0)n+1 k+1(cid:1) , k = 0,...,n +⇒ +χ(∆n) = (cid:80)n k=0( −1)k(cid:0)n+1 k+1(cid:1) +f(x) = (x+1)n+1 Binomischer -f(x) = (x+1)n+1 Leh=rsatz (cid:80)n+1(cid:0)n+1(cid:1) xk -k=0 k -0 = (cid:80)n+1(cid:0)n+1(cid:1) ( 1)k = χ(∆n) 1 -⇒ k=0 k − − -χ(∆n) = 1 (cid:4) +Lehrsatz = (cid:80)n+1 k=0(cid:0)n+1 +k +(cid:1) xk +⇒ +0 = (cid:80)n+1 k=0(cid:0)n+1 +k +(cid:1) ( −1)k = χ(∆n) −1 ⇒ +χ(∆n) = 1 (cid:4) Definition 39 a) Ein 1D-Simplizialkomplex heißt Graph. b) Ein Graph, der homöomorph zu S1 ist, heißt Kreis. c) Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält. -(a) Dies wird häufig auch als(b) PlanareEinbettungdesTe- -Multigraph bezeichnet. traeders -(c) K (d) K -5 3,3 +(a) Dies wird häufig auch als +Multigraph bezeichnet. +(b) PlanareEinbettungdesTe- +traeders +(c) K +5 +(d) K +3,3 Abbildung 2.9: Beispiele für Graphen Bemerkung 34 Für jeden Baum T gilt χ(T) = 1. @@ -1901,48 +3137,49 @@ Beweis: Induktion über die Anzahl der Ecken. Bemerkung 35 a) Jeder zusammenhängende Graph Γ enthält einen Teilbaum T, der alle Ecken von Γ enthält.2 -b) Ist n = a (Γ) a (T), so ist χ(Γ) = 1 n. -1 1 -− − +b) Ist n = a 1(Γ) −a 1(T), so ist χ(Γ) = 1 −n. Beweis: a) Siehe „Algorithmus von Kruskal“. 2T wird „Spannbaum“ genannt. 39 2.3.SIMPLIZIALKOMPLEX -b) χ(Γ) = a (Γ) a (Γ) -0 1 -− -= a (Γ) (n+a (T)) -0 1 -− -= a (T) a (T) n -0 1 -− − -= χ(T) n -− -= 1 n -− +b) χ(Γ) = a 0(Γ) −a 1(Γ) += a 0(Γ) −(n+a 1(T)) += a 0(T) −a 1(T) −n += χ(T) −n += 1 −n Bemerkung 36 -Sei ∆ ein n-Simplex und x ∆◦ Rn. Sei K der Simplizialkomplex, der aus ∆ durch -∈ ⊆ +Sei ∆ ein n-Simplex und x +∈ +∆◦ +⊆ +Rn. Sei K der Simplizialkomplex, der aus ∆ durch „Unterteilung“ in x entsteht. Dann ist χ(K) = χ(∆) = 1. (a) K (b) ∆, das aus K durch Unter- teilung entsteht Abbildung 2.10: Beispiel für Bemerkung 36. -n (cid:18) (cid:19) -(cid:88) n+1 -Beweis: χ(K) = χ(∆) ( 1)n + ( 1)k = χ(∆) (cid:4) -− − − k +Beweis: χ(K) = χ(∆) +− +( −1)n (cid:124) (cid:123)(cid:122) (cid:125) -k=0 -n-Simplex +n-Simplex+ +n +(cid:88) +k=0( +−1)k(cid:18) +n+1 +k +(cid:19) (cid:124) (cid:123)(cid:122) (cid:125) (1+(−1))n+1 += χ(∆) (cid:4) Definition 40 Sei X ein topologischer Raum, K ein Simplizialkomplex und -h : K X -| | → -ein Homöomorphismus von der geometrischen Realisierung K auf X. Dann heißt h eine -| | +h : |K +| → +X +ein Homöomorphismus von der geometrischen Realisierung |K +| +auf X. Dann heißt h eine Triangulierung von X. Beispiel 28 (Triangulierung des Torus) Für eine Triangulierung des Torus werden mindestens 14 Dreiecke benötigt. Beispiele für @@ -1951,1328 +3188,2251 @@ in Beispiel 28. Satz 2.1 (Eulersche Polyederformel) Sei P ein konvexes Polyeder in R3, d. h. ∂P ist ein 2-dimensionaler Simplizialkomplex, sodass gilt: -x,y ∂P : [x,y] P -∀ ∈ ⊆ +∀x,y +∈ +∂P : [x,y] +⊆ +P Dann ist χ(∂P) = 2. Beweis: 1) Die Aussage ist richtig für den Tetraeder. -2) O. B. d. A. sei 0 P und P B (0). Projeziere ∂P von 0 aus auf ∂B (0) = S2. -1 1 -∈ ⊆ +2) O. B. d. A. sei 0 +∈ +P und P +⊆ +B 1(0). Projeziere ∂P von 0 aus auf ∂B 1(0) = S2. Erhalte Triangulierung von S2. 40 2.3.SIMPLIZIALKOMPLEX -(a) DiebeidenmarkiertenDreieckeschneidensichim(b) DiebeidenmarkiertenDreieckeschneidensichim -Mittelpunkt und in einer Seite. Mittelpunkt und außen. +(a) DiebeidenmarkiertenDreieckeschneidensichim +Mittelpunkt und in einer Seite. +(b) DiebeidenmarkiertenDreieckeschneidensichim +Mittelpunkt und außen. Abbildung 2.11: Fehlerhafte Triangulierungen (a) Einfache Triangulierung (b) Minimale Triangulierung Abbildung 2.12: Triangulierungen des Torus 41 2.3.SIMPLIZIALKOMPLEX -3) Sind P und P konvexe Polygone und T ,T die zugehörigen Triangulierungen von -1 2 1 2 -S2, so gibt es eine Triangulierung T, die sowohl um T als auch um T Verfeinerung -1 2 +3) Sind P +1 +und P +2 +konvexe Polygone und T 1,T +2 +die zugehörigen Triangulierungen von +S2, so gibt es eine Triangulierung T, die sowohl um T +1 +als auch um T +2 +Verfeinerung ist (vgl. Abbildung 2.13). T 1 T 2 T -Abbildung 2.13: T ist eine Triangulierung, die für T und T eine Verfeinerung ist. -1 2 -NachBemerkung 36istχ(∂P ) = χ(T ) = χ(T) = χ(T ) = χ(∂P ) = 2,weilo.B.d.A. -1 1 2 2 -P ein Tetraeder ist. +Abbildung 2.13: T ist eine Triangulierung, die für T +1 +und T +2 +eine Verfeinerung ist. +NachBemerkung 36istχ(∂P 1) = χ(T 1) = χ(T) = χ(T 2) = χ(∂P 2) = 2,weilo.B.d.A. +P 2 +ein Tetraeder ist. Bemerkung 37 (Der Rand vom Rand ist 0) Sei K ein endlicher Simplizialkomplex mit Knotenmenge V und < eine Totalordnung auf V. -Sei A die Menge der n-Simplizes in K, d. h. -n -A (K) := σ K dim(σ) = n für n = 0,...,d = dim(K) +Sei A n -{ ∈ | } -und C (K) der R-Vektorraum mit Basis A (K), d. h. -n n - (cid:12)  +die Menge der n-Simplizes in K, d. h. +A n(K) := {σ +∈ +K +| +dim(σ) = n +} +für n = 0,...,d = dim(K) +und C n(K) der R-Vektorraum mit Basis A n(K), d. h. +C n(K) = + + + +(cid:88) +σ∈An(K)c σ ·σ (cid:12) - (cid:88) (cid:12) R -C n(K) = c σ σ (cid:12) c σ -· (cid:12) ∈ -  -σ∈An(K) (cid:12) -Sei σ = ∆(x ,...,x ) A (K), sodass x < x < < x . -0 n n 0 1 n -∈ ··· -Für i = 0,...,n sei ∂ σ := ∆(x ,...,xˆ,...,x ) die i-te Seite von σ und d = d σ := -i 0 i n σ n -(cid:80) ( 1)i∂ σ C (K) und d : C (K) C (K) die dadurch definierte lineare -i=0 − i ∈ n−1 n n → n−1 -Abbildung. -Dann gilt: d d = 0 -n−1 n -◦ -c -e e -2 1 +(cid:12) +(cid:12) +(cid:12) +(cid:12) +(cid:12) +c σ +∈ +R + + +Sei σ = ∆(x 0,...,x n) +∈ +A n(K), sodass x +0 +< x +1 +< +··· +< x n. +Für i = 0,...,n sei ∂ iσ := ∆(x 0,..., ˆ x i,...,x n) die i-te Seite von σ und d σ -a -e b -3 -Abbildung 2.14: Simplizialkomplex mit Totalordnung -Beispiel 29 += d nσ := +(cid:80) i=0( −1)i∂ iσ +∈ +C n−1(K) und d +n +: C n(K) +→ +C n−1(K) die dadurch definierte lineare +Abbildung. +Dann gilt: d +n−1 +◦d +n += 0 +a +b +c +σ +e +3 +e +1 +e +2 +Abbildung 2.14: Simplizialkomplex mit Totalordnung +Beispiel 29 Sei a < b < c. Dann gilt: -d σ = e e +e -2 1 2 3 -− -d (e e +e ) = (c b) (c a)+(b a) -1 1 2 3 -− − − − − +d 2σ = e +1 +−e 2+e +3 +d 1(e +1 +−e 2+e 3) = (c −b) −(c −a)+(b −a) 42 2.3.SIMPLIZIALKOMPLEX = 0 Sei a < b < c < d. Dann gilt für Tetraeder: -d (∆(a,b,c,d)) = ∆(b,c,d) ∆(a,c,d)+∆(a,b,d) ∆(a,b,c),wobei: -3 -− − -d ( ∆(b,c,d)) = ∆(c,d) ∆(b,d)+∆(b,c) -2 -− -d ( ∆(a,c,d)) = ∆(c,d)+∆(a,d) ∆(a,c) -2 -− − − -d ( ∆(a,b,d)) = ∆(b,d) ∆(a,d)+∆(a,b) -2 -− -d ( ∆(a,b,c)) = ∆(b,c)+∆(a,c) ∆(a,b) -2 -− − − -d (d (∆(a,b,c,d))) = 0 -2 3 +d 3(∆(a,b,c,d)) = ∆(b,c,d) −∆(a,c,d)+∆(a,b,d) −∆(a,b,c),wobei: +d 2( ∆(b,c,d)) = ∆(c,d) −∆(b,d)+∆(b,c) +d 2( −∆(a,c,d)) = −∆(c,d)+∆(a,d) −∆(a,c) +d 2( ∆(a,b,d)) = ∆(b,d) −∆(a,d)+∆(a,b) +d 2( −∆(a,b,c)) = −∆(b,c)+∆(a,c) −∆(a,b) ⇒ -Beweis: Sei σ A . Dann gilt: -n +d 2(d 3(∆(a,b,c,d))) = 0 +Beweis: Sei σ ∈ +A n. Dann gilt: +d n−1(d nσ) = d n−1( n (cid:88) -d (d σ) = d ( ( 1)i∂ σ) -n−1 n n−1 i -− -i=0 +i=0( −1)i∂ iσ) += n (cid:88) -= ( 1)id (∂ σ) -n−1 i -− -i=0 -n n−1 -(cid:88) (cid:88) -= ( 1)i ∂ (∂ σ)( 1)j -i j -− − -i=0 j=0 -(cid:88) (cid:88) -= ( 1)i+j∂ (∂ (σ))+ ( 1)i+j∂ (∂ σ) -j i i−1 j -− − -0≤i≤j≤n−1 0≤j d(P,C) = d(P,B) + d(B,C) = d(P,A) + d(B,C) ⇒ -d(A,C) > d(B,C) Widerspruch zu Punkt (i) +d(A,C) > d(B,C) ⇒ +Widerspruch zu Punkt (i) b) C liegt zwischen P und B d(P,C)+d(C,A) > d(P,A) = d(P,B) = d(P,C)+d(C,B) +⇒ d(C,A) > d(C,B) ⇒ Widerspruch zu Punkt (i) -⇒ 2. Fall: Q und B liegen auf verschieden Halbebenen bzgl. PA. Dann liegen A und Q in derselben Halbebene bzgl. PB. -Tausche A und B Fall 1 (cid:4) +Tausche A und B ⇒ +Fall 1 (cid:4) Bemerkung 63 -Sei (X,d,G) eine Geometrie, die §1 - §3 erfüllt, P,Q X mit P = Q und ϕ eine Isometrie -∈ (cid:54) +Sei (X,d,G) eine Geometrie, die §1 - §3 erfüllt, P,Q +∈ +X mit P (cid:54)= Q und ϕ eine Isometrie mit ϕ(P) = P und ϕ(Q) = Q. -Dann gilt ϕ(S) = S S PQ. -∀ ∈ +Dann gilt ϕ(S) = S ∀S +∈ +PQ. Beweis: +O. B. d. A. sei S +∈ +PQ 2 -O. B. d. A. sei S PQ d(P,Q) = d(P,S)+d(S,Q) -∈ ⇔ +⇔ +d(P,Q) = d(P,S)+d(S,Q) ϕ∈Iso(X) -d(ϕ(P),ϕ(Q)) = d(ϕ(P),ϕ(S))+d(ϕ(S),ϕ(Q)) ⇒ +d(ϕ(P),ϕ(Q)) = d(ϕ(P),ϕ(S))+d(ϕ(S),ϕ(Q)) P,Q∈Fix(ϕ) +⇒ d(P,Q) = d(P,ϕ(S))+d(ϕ(S),Q) ⇒ ϕ(S) liegt zwischen P und Q ⇒ d(P,S) = d(ϕ(P),ϕ(S)) = d(P,ϕ(S)) -⇒ 3(i) -ϕ(S) = S ⇒ +ϕ(S) = S (cid:4) Proposition 4.2 In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P,P(cid:48),Q,Q(cid:48) mit d(P,Q) = d(P(cid:48),Q(cid:48)) höchstens zwei Isometrien mit ϕ(P) = P(cid:48) und ϕ(Q) = Q(cid:48) 70 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometrien mit -ϕ (P) = P(cid:48) und ϕ (Q) = Q(cid:48) gibt. -i i -Beweis: Seien ϕ ,ϕ ,ϕ Isometrien mit ϕ (P) = P(cid:48), ϕ (Q) = Q(cid:48) mit i = 1,2,3. -1 2 3 i i +ϕ i(P) = P(cid:48) und ϕ i(Q) = Q(cid:48) gibt. +Beweis: Seien ϕ 1,ϕ 2,ϕ +3 +Isometrien mit ϕ i(P) = P(cid:48), ϕ i(Q) = Q(cid:48) mit i = 1,2,3. Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen: -(Teil i) R X PQ mit ϕ (R) = ϕ (R). -1 2 -∃ ∈ \ -(Teil ii) Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = id . -X -Aus (Teil i) und (Teil ii) folgt, dass ϕ−1 ϕ = id , also ϕ = ϕ , da P, Q und R in diesem -2 ◦ 1 X 2 1 +(Teil i) ∃R +∈ +X \PQ mit ϕ 1(R) = ϕ 2(R). +(Teil ii) Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = id X. +Aus (Teil i) und (Teil ii) folgt, dass ϕ−1 +2 +◦ϕ +1 += id X, also ϕ +2 += ϕ 1, da P, Q und R in diesem Fall Fixpunkte sind. Nun zu den Beweisen der Teilaussagen: -(Teil i) Sei R X PQ. Von den drei Punkten ϕ (R),ϕ (R),ϕ (R) liegen zwei in der selben -1 2 3 -∈ \ -Halbebene bzgl. P(cid:48)Q(cid:48) = ϕ (PQ). -i -O. B. d. A. seien ϕ (R) und ϕ (R) in der selben Halbebene. -1 2 -Es gilt: d(P(cid:48),ϕ (R)) = d(ϕ (P),ϕ (R)) -1 1 1 +(Teil i) Sei R +∈ +X \PQ. Von den drei Punkten ϕ 1(R),ϕ 2(R),ϕ 3(R) liegen zwei in der selben +Halbebene bzgl. P(cid:48)Q(cid:48) = ϕ i(PQ). +O. B. d. A. seien ϕ 1(R) und ϕ 2(R) in der selben Halbebene. +Es gilt: d(P(cid:48),ϕ 1(R)) = d(ϕ 1(P),ϕ 1(R)) = d(P,R) -= d(ϕ (P),ϕ (R)) -2 2 -= d(P(cid:48),ϕ (R)) -2 -und analog d(Q(cid:48),ϕ (R)) = d(Q(cid:48),ϕ (R)) -1 2 -(Teil ii) Seien P, Q und R Fixpunkte von ϕ, R / PQ und A / PQ PR QR. Sei B -∈ ∈ ∪ ∪ ∈ -PQ P,Q . Dann ist ϕ(B) = B wegen Bemerkung 63. -\{ } += d(ϕ 2(P),ϕ 2(R)) += d(P(cid:48),ϕ 2(R)) +und analog d(Q(cid:48),ϕ 1(R)) = d(Q(cid:48),ϕ 2(R)) +(Teil ii) Seien P, Q und R Fixpunkte von ϕ, R / +∈ +PQ und A / +∈ +PQ ∪PR ∪QR. Sei B +∈ +PQ \{P,Q }. Dann ist ϕ(B) = B wegen Bemerkung 63. +Ist R +∈ +AB, so enthält AB 2 Fixpunkte von ϕ Bem. 63 -Ist R AB, so enthält AB 2 Fixpunkte von ϕ ===== ϕ(A) = A. -∈ ⇒ -A -R -C +===== +⇒ +ϕ(A) = A. P B Q -Abbildung 4.5: P,Q,R sind Fixpunkte, B PQ P,Q , A / PQ PR QR -∈ \{ } ∈ ∪ ∪ -Ist R / AB, so ist AB PR = oder AB RQ = nach Satz 4.1. Der Schnittpunkt -∈ ∩ (cid:54) ∅ ∈ (cid:54) ∅ -C ist dann Fixpunkt von ϕ(cid:48) nach Bemerkung 63 ϕ(A) = A. +C +R +A +Abbildung 4.5: P,Q,R sind Fixpunkte, B +∈ +PQ \{P,Q }, A / +∈ +PQ ∪PR ∪QR +Ist R / +∈ +AB, so ist AB ∩PR (cid:54)= +∅ +oder AB +∈ +RQ (cid:54)= +∅ +nach Satz 4.1. Der Schnittpunkt +C ist dann Fixpunkt von ϕ(cid:48) nach Bemerkung 63 ⇒ +ϕ(A) = A. Bemerkung 64 (SWS-Kongruenzsatz) -Sei (X,d,G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem ABC und A(cid:48)B(cid:48)C(cid:48) -(cid:52) (cid:52) +Sei (X,d,G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem (cid:52)ABC und (cid:52)A(cid:48)B(cid:48)C(cid:48) Dreiecke, für die gilt: (i) d(A,B) = d(A(cid:48),B(cid:48)) -(ii) ∠CAB = ∠C(cid:48)A(cid:48)B(cid:48) -∼ +(ii) ∠CAB ∼= ∠C(cid:48)A(cid:48)B(cid:48) 71 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE (iii) d(A,C) = d(A(cid:48),C(cid:48)) -Dann ist ABC kongruent zu A(cid:48)B(cid:48)C(cid:48) . -(cid:52) (cid:52) +Dann ist (cid:52)ABC kongruent zu (cid:52)A(cid:48)B(cid:48)C(cid:48) . Beweis: Sei ϕ die Isometrie mit ϕ(A(cid:48)) = A, ϕ(A(cid:48)C(cid:48)+) = AC+ und ϕ(A(cid:48)B(cid:48)+) = AB+. Diese Isometrie existiert wegen Punkt §4. -C ϕ(A(cid:48)C(cid:48)+) und B ϕ(A(cid:48)B(cid:48)+). -⇒ ∈ ∈ -d(A(cid:48),C(cid:48)) = d(ϕ(A(cid:48)),ϕ(C(cid:48))) = d(A,ϕ(C(cid:48))) =3 =(i) ϕ(C(cid:48)) = C ⇒ -d(A(cid:48),B(cid:48)) = d(ϕ(A(cid:48)),ϕ(B(cid:48))) = d(A,ϕ(B(cid:48))) =3 =(i) ϕ(B(cid:48)) = B +C +∈ +ϕ(A(cid:48)C(cid:48)+) und B +∈ +ϕ(A(cid:48)B(cid:48)+). +d(A(cid:48),C(cid:48)) = d(ϕ(A(cid:48)),ϕ(C(cid:48))) = d(A,ϕ(C(cid:48))) +3(i) +== +⇒ +ϕ(C(cid:48)) = C +d(A(cid:48),B(cid:48)) = d(ϕ(A(cid:48)),ϕ(B(cid:48))) = d(A,ϕ(B(cid:48))) +3(i) +== ⇒ -Also gilt insbesondere ϕ( A(cid:48)B(cid:48)C(cid:48)) = ABC. (cid:4) -(cid:52) (cid:52) +ϕ(B(cid:48)) = B +Also gilt insbesondere ϕ( (cid:52)A(cid:48)B(cid:48)C(cid:48)) = (cid:52)ABC. (cid:4) Bemerkung 65 (WSW-Kongruenzsatz) -Sei (X,d,G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem ABC und A(cid:48)B(cid:48)C(cid:48) -(cid:52) (cid:52) +Sei (X,d,G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem (cid:52)ABC und (cid:52)A(cid:48)B(cid:48)C(cid:48) Dreiecke, für die gilt: (i) d(A,B) = d(A(cid:48),B(cid:48)) -(ii) ∠CAB = ∠C(cid:48)A(cid:48)B(cid:48) -∼ -(iii) ∠ABC = ∠A(cid:48)B(cid:48)C(cid:48) -∼ -Dann ist ABC kongruent zu A(cid:48)B(cid:48)C(cid:48) . -(cid:52) (cid:52) +(ii) ∠CAB ∼= ∠C(cid:48)A(cid:48)B(cid:48) +(iii) ∠ABC ∼= ∠A(cid:48)B(cid:48)C(cid:48) +Dann ist (cid:52)ABC kongruent zu (cid:52)A(cid:48)B(cid:48)C(cid:48) . Beweis: Sei ϕ die Isometrie mit ϕ(A(cid:48)) = A, ϕ(B(cid:48)) = B und ϕ(C(cid:48)) liegt in der selben Halbebene bzgl. AB wie C. Diese Isometrie existiert wegen §4. -Aus ∠CAB = ∠C(cid:48)A(cid:48)B(cid:48) = ∠ϕ(C(cid:48))ϕ(A(cid:48))ϕ(B(cid:48)) = ∠ϕ(C(cid:48))AB folgt, dass ϕ(C(cid:48)) AC+. +Aus ∠CAB = ∠C(cid:48)A(cid:48)B(cid:48) = ∠ϕ(C(cid:48))ϕ(A(cid:48))ϕ(B(cid:48)) = ∠ϕ(C(cid:48))AB folgt, dass ϕ(C(cid:48)) ∈ +AC+. Analog folgt aus ∠ABC = ∠A(cid:48)B(cid:48)C(cid:48) = ∠ϕ(A(cid:48))ϕ(B(cid:48))ϕ(C(cid:48)) = ∠ABϕ(C(cid:48)), dass ϕ(C(cid:48)) ∈ BC+. -Dann gilt ϕ(C(cid:48)) AC BC = C ϕ(C(cid:48)) = C. -∈ ∩ { } ⇒ -Es gilt also ϕ( A(cid:48)B(cid:48)C(cid:48)) = ABC. (cid:4) -(cid:52) (cid:52) +Dann gilt ϕ(C(cid:48)) +∈ +AC ∩BC = {C +} ⇒ +ϕ(C(cid:48)) = C. +Es gilt also ϕ( (cid:52)A(cid:48)B(cid:48)C(cid:48)) = (cid:52)ABC. (cid:4) Definition 61 -a) Ein Winkel ist ein Punkt P X zusammen mit 2 Halbgeraden mit Anfangspunkt P. +a) Ein Winkel ist ein Punkt P ∈ -Man schreibt: ∠R PR bzw. ∠R PR 2 -1 2 2 1 +X zusammen mit 2 Halbgeraden mit Anfangspunkt P. +Man schreibt: ∠R 1PR +2 +bzw. ∠R 2PR 12 b) Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den anderen abbildet. -c) ∠R(cid:48)P(cid:48)R(cid:48) heißt kleiner als ∠R PR , wenn es eine Isometrie ϕ gibt, mit ϕ(P(cid:48)) = P, -1 2 1 2 -ϕ(P(cid:48)R(cid:48)+) = PR+ und ϕ(R(cid:48)) liegt in der gleichen Halbebene bzgl. PR wie R und in -1 1 2 1 2 -der gleichen Halbebene bzgl. PR wie R -2 1 -d) Im Dreieck PQR gibt es Innenwinkel und Außenwinkel. -(cid:52) +c) ∠R(cid:48) 1P(cid:48)R(cid:48) +2 +heißt kleiner als ∠R 1PR 2, wenn es eine Isometrie ϕ gibt, mit ϕ(P(cid:48)) = P, +ϕ(P(cid:48)R(cid:48)+ +1 +) = PR+ +1 +und ϕ(R(cid:48) 2) liegt in der gleichen Halbebene bzgl. PR +1 +wie R +2 +und in +der gleichen Halbebene bzgl. PR +2 +wie R +1 +d) Im Dreieck (cid:52)PQR gibt es Innenwinkel und Außenwinkel. Bemerkung 66 In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel. Beweis: Zeige ∠PRQ < ∠RQP(cid:48). -Sei M der Mittelpunkt der Strecke QR und P(cid:48) PQ+ PQ. Sei A MP− mit d(P,M) = -∈ \ ∈ +Sei M der Mittelpunkt der Strecke QR und P(cid:48) +∈ +PQ+ \PQ. Sei A +∈ +MP− mit d(P,M) = d(M,A). -2Für dieses Skript gilt: ∠R PR =∠R PR . Also sind insbesondere alle Winkel ≤180◦. -1 2 2 1 +2Für dieses Skript gilt: ∠R 1PR +2 +=∠R 2PR 1. Also sind insbesondere alle Winkel ≤180◦. 72 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE -P +P R(cid:48) +1 R -2 +1 R(cid:48) 2 +R +2 +(a) ∠R(cid:48) 1P(cid:48)R(cid:48) 2istkleinerals∠R 1PR 2, +vgl. Definition 61.c +P Q R -P R 1(cid:48) R 1 -(a) ∠R(cid:48)P(cid:48)R(cid:48) istkleinerals∠R PR ,(b) Innenwinkel und Außenwin- -1 2 1 2 -vgl. Definition 61.c kel in (cid:52)PQR, vgl. Definiti- +(b) Innenwinkel und Außenwin- +kel in (cid:52)PQR, vgl. Definiti- on 61.d Abbildung 4.6: Situation aus Definition 61 -P -R -R Q M -β A -Qα P -(a) Parallelogramm AQPR(b) Innen- und Außenwin- +P +R +(a) Parallelogramm AQPR +α +βR +Q P +(b) Innen- und Außenwin- kel von (cid:52)PQR Abbildung 4.7: Situation aus Bemerkung 66 -Es gilt: d(Q,M) = d(M,R) und d(P,M) = d(M,A) sowie ∠PMR = ∠AMQ MRQ -⇒ (cid:52) -ist kongruent zu AMQ, denn eine der beiden Isometrien, die ∠PMR auf ∠AMQ abbildet, -(cid:52) +Es gilt: d(Q,M) = d(M,R) und d(P,M) = d(M,A) sowie ∠PMR = ∠AMQ +⇒ +(cid:52)MRQ +ist kongruent zu (cid:52)AMQ, denn eine der beiden Isometrien, die ∠PMR auf ∠AMQ abbildet, bildet R auf Q und P auf A ab. -∠MQA = ∠MRP = ∠QRP = ∠PRQ. ⇒ +∠MQA = ∠MRP = ∠QRP = ∠PRQ. Noch zu zeigen: ∠MQA < ∠RQP(cid:48), denn A liegt in der selben Halbebene bzgl. PQ wie M. Proposition 4.3 (Existenz der Parallelen) Sei (X,d,G) eine Geometrie mit den Axiomen §1 - §4. -Dann gibt es zu jeder Geraden g G und jedem Punkt P X g mindestens eine -∈ ∈ \ -Parallele h G mit P h und g h = . -∈ ∈ ∩ ∅ -Beweis: Seien P,Q f G und ϕ die Isometrie, die Q auf P und P auf P(cid:48) f mit -∈ ∈ ∈ +Dann gibt es zu jeder Geraden g +∈ +G und jedem Punkt P +∈ +X \g mindestens eine +Parallele h +∈ +G mit P +∈ +h und g ∩h = ∅. +Beweis: Seien P,Q +∈ +f +∈ +G und ϕ die Isometrie, die Q auf P und P auf P(cid:48) +∈ +f mit d(P,P(cid:48)) = d(P,Q) abbildet und die Halbebenen bzgl. f erhält. 73 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE -f +Q h -P +f g -Q +P Abbildung 4.8: Situation aus Proposition 4.3 -Annahme: ϕ(g) g = -∩ (cid:54) ∅ -Es gibt einen Schnittpunkt R = ϕ(g) g. -⇒ { } ∩ +Annahme: ϕ(g) ∩g (cid:54)= +∅ +⇒ +Es gibt einen Schnittpunkt {R +} += ϕ(g) ∩g. Dann ist ∠RQP = ∠RQP(cid:48) < ∠RPP(cid:48) nach Bemerkung 66 und ∠RQP = ∠RPP(cid:48), weil ϕ(∠RQP) = ∠RPP(cid:48). +⇒ Widerspruch ⇒ -ϕ(g) g = (cid:4) -⇒ ∩ ∅ +ϕ(g) ∩g = +∅ +(cid:4) Folgerung 4.4 Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π. D. h. es gibt eine Isometrie ϕ mit ϕ(Q) = P und ϕ(QP+) = PR+, sodass ϕ(R) in der gleichen @@ -3806,89 +6211,119 @@ Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidisc Dreiecke mit drei 90◦-Winkeln. Proposition 4.5 In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der -Innenwinkel π. +Innenwinkel ≤ +π. 74 4.1.AXIOMEFÜRDIEEUKLIDISCHEEBENE Sei im Folgenden „IWS“ die „Innenwinkelsumme“. -Beweis: Sei ein Dreieck mit IWS( ) = π+ε -(cid:52) (cid:52) -C A(cid:48) -γ +Beweis: Sei +(cid:52) +ein Dreieck mit IWS( (cid:52)) = π+ε +α β -M -γ α +γ +P +(a) Summe der Winkel α, β und γ α 1 -P α α 2 β +α 2 β +γ +M A B -(a) Summe der Winkel α, β und γ (b) Situation aus Proposition 4.5 +C A(cid:48) +α +(b) Situation aus Proposition 4.5 Abbildung 4.10: Situation aus Proposition 4.5 -Sei α ein Innenwinkel von . -(cid:52) -Beh.: Es gibt ein Dreieck (cid:48) mit IWS( (cid:48)) = IWS( ) und einem Innenwinkel α(cid:48) α. -(cid:52) (cid:52) (cid:52) ≤ 2 -Dann gibt es für jedes n ein mit IWS( ) = IWS( ) und Innenwinkel α(cid:48) α . Für -(cid:52)n (cid:52)n (cid:52) ≤ 2n -α < ε ist dann die Summe der beiden Innenwinkel um größer als π Widerspruch -2n (cid:52)n ⇒ +Sei α ein Innenwinkel von (cid:52). +Beh.: Es gibt ein Dreieck (cid:52)(cid:48) mit IWS( (cid:52)(cid:48)) = IWS( (cid:52)) und einem Innenwinkel α(cid:48) +≤ +α 2. +Dann gibt es für jedes n ein +(cid:52)n +mit IWS( (cid:52)n) = IWS( (cid:52)) und Innenwinkel α(cid:48) +≤ +α 2n. Für +α +2n +< ε ist dann die Summe der beiden Innenwinkel um +(cid:52)n +größer als π +⇒ +Widerspruch zu Folgerung 4.4. -Beweis: Es seien A,B,C X und das Dreieck mit den Eckpunkten A,B,C und α sei -∈ (cid:52) +Beweis: Es seien A,B,C +∈ +X und +(cid:52) +das Dreieck mit den Eckpunkten A,B,C und α sei der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C. -Sei M der Mittelpunkt der Strecke BC. Sei außerdem α = ∠CAM und α = ∠BAM. -1 2 -Sei weiter A(cid:48) MA− mit d(A(cid:48),M) = d(A,M). +Sei M der Mittelpunkt der Strecke BC. Sei außerdem α +1 += ∠CAM und α +2 += ∠BAM. +Sei weiter A(cid:48) ∈ +MA− mit d(A(cid:48),M) = d(A,M). Die Situation ist in Abbildung 4.10b skizziert. -(MA(cid:48)C) und (MAB) sind kongruent. ∠ABM = ∠A(cid:48)CM und ∠MA(cid:48)C = -⇒ (cid:52) (cid:52) ⇒ -∠MAB. α+β+γ = IWS( ABC) = IWS( AA(cid:48)C)undα +α = α,alsoo.B.d.A. -1 2 -⇒ (cid:52) (cid:52) -α α -1 ≤ 2 +⇒ +(cid:52)(MA(cid:48)C) und (cid:52)(MAB) sind kongruent. +⇒ +∠ABM = ∠A(cid:48)CM und ∠MA(cid:48)C = +∠MAB. +⇒ +α+β+γ = IWS( (cid:52)ABC) = IWS( (cid:52)AA(cid:48)C)undα 1+α +2 += α,alsoo.B.d.A. +α +1 ≤ +α +2 Bemerkung 67 In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π. -β(cid:48)C α(cid:48) -α(cid:48)(cid:48) g -γ +α(cid:48) +α(cid:48)(cid:48) α β +β(cid:48) +γ A B +C +g Abbildung 4.11: Situation aus Bemerkung 67 Beweis: Sei g eine Parallele von AB durch C. +• Es gilt α(cid:48) = α wegen Proposition 4.3. • Es gilt β(cid:48) = β wegen Proposition 4.3. • Es gilt α(cid:48)(cid:48) = α(cid:48) wegen Aufgabe 8. -• 75 4.2.WEITEREEIGENSCHAFTENEINEREUKLIDISCHENEBENE -IWS( ABC) = γ +α(cid:48)(cid:48)+β(cid:48) = π -⇒ (cid:52) +⇒ +IWS( (cid:52)ABC) = γ +α(cid:48)(cid:48)+β(cid:48) = π Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW. 4.2 Weitere Eigenschaften einer euklidischen Ebene Satz 4.6 (Strahlensatz) In ähnlichen Dreiecken sind Verhältnisse entsprechender Seiten gleich. +x y -3 λ2z -2 z -1 -x λ2x -0 +−1 0 1 2 3 4 +0123 +z x -1 0 1 2 3 4 -− +λ2z +λ2x Abbildung 4.12: Strahlensatz Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar. +A B(cid:48) C(cid:48) -b(cid:48) +B C +cb a +c(cid:48) +b(cid:48) a(cid:48) -b a -A c B c(cid:48) B(cid:48) -Abbildung 4.13: Die Dreiecke ABC und AB(cid:48)C(cid:48) sind ähnlich. -(cid:52) (cid:52) +Abbildung 4.13: Die Dreiecke (cid:52)ABC und (cid:52)AB(cid:48)C(cid:48) sind ähnlich. 4.2.1 Flächeninhalt Definition 62 „Simplizialkomplexe“ in euklidischer Ebene (X,d) heißen flächengleich, wenn sie sich in @@ -3897,77 +6332,110 @@ kongruente Dreiecke zerlegen lassen. (a) Zwei kongruente Dreiecke (b) ZweiweiterekongruenteDrei- ecke Abbildung 4.14: Flächengleichheit -Der Flächeninhalt eines Dreiecks ist 1/2 Grundseite Höhe. -· · -C +Der Flächeninhalt eines Dreiecks ist 1/2 ·Grundseite ·Höhe. +A B C +L C +h c -L -h A c +(a) 1/2·|AB|·|h c| · +A B +C +L +A h -a -c -L C A B A B -(a) 1/2·|AB|·|h c| (b) 1/2·|BC|·|h a| +ac +(b) 1/2·|BC|·|h a| Abbildung 4.15: Flächenberechnung im Dreieck Zu zeigen: Unabhängigkeit von der gewählten Grundseite. -C α -γ L A -γ α -A L B +γ +γ +A B +C +L A +L C -Abbildung 4.16: ABL und CL B sind ähnlich, weil IWS = π -a C -(cid:52) (cid:52) -=S =tr =a =hl =en =s =atz a = c a h = c h -⇒ hc ha → · a · c +Abbildung 4.16: (cid:52)ABL +a +und (cid:52)CL CB sind ähnlich, weil IWS = π +Strahlensatz ======= +⇒ +a +hc += c +ha → +a ·h +a += c ·h +c Satz 4.7 (Satz des Pythagoras) Im rechtwinkligen Dreieck gilt a2+b2 = c2, wobei c die Hypotenuse und a,b die beiden Katheten sind. -Beweis: (a+b) (a+b) = a2+2ab+b2 = c2+4 (1 a b) -· · 2 · · +Beweis: (a+b) ·(a+b) = a2+2ab+b2 = c2+4 ·(1 +2 +·a ·b) 77 4.2.WEITEREEIGENSCHAFTENEINEREUKLIDISCHENEBENE -a b -b · · -γ +c +b a +A B C -a · -b a a -b -· · -A c B b a -(a) a,b sind Katheten und c ist die Hypo- (b) Beweisskizze +(a) a,b sind Katheten und c ist die Hypo- tenuse +b a +ba +b a +b +a +· +· +·· +γ +(b) Beweisskizze Abbildung 4.17: Satz des Pythagoras Satz 4.8 Bis auf Isometrie gibt es genau eine euklidische Ebene (X,d,G), nämlich X = R2, d = euklidischer Abstand, G = Menge der üblichen Geraden. Beweis: -(i) (R2,d ) ist offensichtlich eine euklidische Ebene. -Euklid -(ii) Sei (X,d) eine euklidische Ebene und g ,g Geraden in X, die sich in einem Punkt 0 -1 2 +(i) (R2,d Euklid) ist offensichtlich eine euklidische Ebene. +(ii) Sei (X,d) eine euklidische Ebene und g 1,g +2 +Geraden in X, die sich in einem Punkt 0 im rechten Winkel schneiden. -Sei P X (g g ) ein Punkt und P der Fußpunkt des Lots von P auf g (vgl. -1 2 X 1 -∈ \ ∪ -Aufgabe 9 (c)) und P der Fußpunkt des Lots von P auf g . -Y 2 -Sei x := d(P ,0) und y := d(P ,0). -P X P Y +Sei P +∈ +X \(g +1 +∪g 2) ein Punkt und P +X +der Fußpunkt des Lots von P auf g +1 +(vgl. +Aufgabe 9 (c)) und P +Y +der Fußpunkt des Lots von P auf g 2. +Sei x +P +:= d(P X,0) und y +P +:= d(P Y,0). In Abbildung 4.19 wurde die Situation skizziert. -Sei h : X R2 eine Abbildung mit h(P) := (x ,y ) Dadurch wird h auf dem -P P +Sei h : X → +R2 eine Abbildung mit h(P) := (x P,y P) Dadurch wird h auf dem Quadranten definiert, in dem P liegt, d. h. -Q X mit PQ g = = PQ g -1 2 -∀ ∈ ∩ ∅ ∩ +∀Q +∈ +X mit PQ ∩g +1 += +∅ += PQ ∩g +2 Fortsetzung auf ganz X durch konsistente Vorzeichenwahl. Im Folgenden werden zwei Aussagen gezeigt: (i) h ist surjektiv @@ -3975,54 +6443,116 @@ Im Folgenden werden zwei Aussagen gezeigt: Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dass h bijektiv ist. Nun zu den Beweisen der Teilaussagen: 78 4.3.HYPERBOLISCHEGEOMETRIE -g g -2 2 -X X +· g 1 +g +2 P +X +(a) Schritt 1 +· g 1 +g +2 +x P +y P -Y P -y +0 P X P -· g 1 0 · x P P X g 1 -(a) Schritt 1 (b) Schritt 2 +Y +X +(b) Schritt 2 Abbildung 4.18: Beweis zu Satz 4.8 -(i) Sei (x,y) R2, z. B. x 0,y 0. Sei P(cid:48) g mit d(0,P(cid:48)) = x und P(cid:48) auf der +(i) Sei (x,y) +∈ +R2, z. B. x +≥ +0,y +≥ +0. Sei P(cid:48) +∈ +g 1 -∈ ≥ ≥ ∈ -gleichen Seite von g wie P. +mit d(0,P(cid:48)) = x und P(cid:48) auf der +gleichen Seite von g 2 -R Q +wie P. +g 1 g 2 -X -P +x P y P -0 x P g 1 +P +Q +0 +R +X Abbildung 4.19: Beweis zu Satz 4.8 (ii) Zu Zeigen: d(P,Q) = d(h(P),h(Q)) +d(P,Q)2 Pythagoras -d(P,Q)2 = d(P,R)2+d(R,Q)2 = (y y )2+(x x )2. -Q P Q P -− − -h(Q) = (x ,y ) -Q Q += d(P,R)2+d(R,Q)2 = (y +Q +−y P)2+(x +Q +−x P)2. +h(Q) = (x Q,y Q) 4.3 Hyperbolische Geometrie Definition 63 Sei -H := z C (z) > 0 = (cid:8) (x,y) R2 (cid:12) (cid:12) y > 0(cid:9) -{ ∈ | (cid:61) } ∈ +H := {z +∈ +C +| +(cid:61)(z) > 0 +} += (cid:8) (x,y) +∈ +R2 (cid:12) (cid:12) y > 0(cid:9) 79 4.3.HYPERBOLISCHEGEOMETRIE -die obere Halbebene bzw. Poincaré-Halbebene und G = G G mit -1 2 -∪ -G = g H m R,r R : g = z H : z m = r -1 1 >0 1 -{ ⊆ | ∃ ∈ ∈ { ∈ | − | }} -G = g H x R : g = z H : (z) = x -2 2 2 -{ ⊆ | ∃ ∈ { ∈ (cid:60) }} +die obere Halbebene bzw. Poincaré-Halbebene und G = G +1 +∪G +2 +mit +G +1 += {g +1 +⊆ +H +| +∃m +∈ +R,r +∈ +R +>0 +: g +1 += {z +∈ +H : +| +z −m +| += r +}} +G +2 += {g +2 +⊆ +H +| +∃x +∈ +R : g +2 += {z +∈ +H : (cid:60)(z) = x +}} Die Elemente aus G heißen hyperbolische Geraden. Bemerkung 68 (Eigenschaften der hyperbolischen Geraden) Die hyperbolischen Geraden erfüllen... @@ -4031,521 +6561,903 @@ b) ...das Anordnungsaxiom §3 (ii) c) ...nicht das Parallelenaxiom §5 Beweis: a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt: -Gegeben z ,z H -1 2 +Gegeben z 1,z +2 ∈ +H Existenz: -Fall 1 (z ) = (z ) -1 2 -(cid:60) (cid:60) -z und z liegen auf -1 2 +Fall 1 (cid:60)(z 1) = (cid:60)(z 2) ⇒ -g = z C (z) = (z ) H +z 1 -{ ∈ | (cid:60) (cid:60) ∧ } +und z +2 +liegen auf +g = {z +∈ +C +| +(cid:60)(z) = (cid:60)(z 1) ∧H +} Siehe Abbildung 4.20a. -Fall 2 (z ) = (z ) -1 2 -(cid:60) (cid:54) (cid:60) -Betrachtenunz undz alsPunkteindereuklidischenEbene.DieMittelsenkrech- -1 2 +Fall 2 (cid:60)(z 1) (cid:54)= (cid:60)(z 2) +Betrachtenunz +1 +undz +2 +alsPunkteindereuklidischenEbene.DieMittelsenkrech- tezudiesenPunktenschneidetdiex-Achse.AllePunkteaufderMittelsenkrechten -zuz undz sindgleichweitvonz undz entfernt.DaheristderSchnittpunktmit -1 2 1 2 -der x-Achse der Mittelpunkt eines Kreises durch z und z (vgl. Abbildung 4.20b) -1 2 -y y -4 4 -3 Z 3 -2 -2 Z 2 -1 Z -2 -1 1 Z 1 -(Z ) -1 0 0 1 2(cid:60) 1 3 4 5 x 1 0 0 1 2 3 4 5 x -− − -(a) Fall 1 (b) Fall 2 +zuz +1 +undz +2 +sindgleichweitvonz +1 +undz +2 +entfernt.DaheristderSchnittpunktmit +der x-Achse der Mittelpunkt eines Kreises durch z +1 +und z +2 +(vgl. Abbildung 4.20b) +x +y +−1 0 1 2 3 4 5 +01234 +Z +1 +Z +2 +(cid:60)(Z 1) +(a) Fall 1 +x +y +−1 0 1 2 3 4 5 +01234 +Z +1 +Z +2 +(b) Fall 2 Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer Geraden -b) Sei g G ˙ G eine hyperbolische Gerade. -1 2 -∈ ∪ +b) Sei g +∈ +G +1 +˙ ∪G +2 +eine hyperbolische Gerade. 80 4.3.HYPERBOLISCHEGEOMETRIE -Es existieren disjunkte Zerlegungen von H g: -\ -Fall 1: g = z H z m = r G +Es existieren disjunkte Zerlegungen von H \g: +Fall 1: g = {z +∈ +H +(cid:107) +z −m +| += r +} ∈ +G 1 -{ ∈ (cid:107) − | } ∈ Dann gilt: -H = z H z m < r ˙ z H z m > r -{ ∈ (cid:107) − | }∪{ ∈ (cid:107) − | } -(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125) -=:H1 (Kreisinneres) =:H2 (Kreisäußeres) -Da r > 0 ist H nicht leer, da r R ist H nicht leer. -1 2 +H = {z +∈ +H +(cid:107) +z −m +| +< r +} +(cid:124) (cid:123)(cid:122) (cid:125) +=:H1 (Kreisinneres) +˙ ∪{z +∈ +H +(cid:107) +z −m +| +> r +} +(cid:124) (cid:123)(cid:122) (cid:125) +=:H2 (Kreisäußeres) +Da r > 0 ist H +1 +nicht leer, da r +∈ +R ist H +2 +nicht leer. +Fall 2: g = {z ∈ -Fall 2: g = z H z = x G +H +| +(cid:60)z = x +} ∈ +G 2 -{ ∈ | (cid:60) } ∈ Die disjunkte Zerlegung ist: -H = z H (z) < x ˙ z H (z) > x -{ ∈ | (cid:60) }∪{ ∈ | (cid:60) } -(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125) -=:H1 (Links) =:H2 (Rechts) -Zu zeigen: A H , B H mit i,j 1,2 gilt: AB g = i = j -i j -∀ ∈ ∈ ∈ { } ∩ (cid:54) ∅ ⇔ (cid:54) -„ “: A H ,B H : AB g = -1 2 -⇐ ∈ ∈ ∩ (cid:54) ∅ -Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte in H haben einen Abstand +H = {z +∈ +H +| +(cid:60)(z) < x +} +(cid:124) (cid:123)(cid:122) (cid:125) +=:H1 (Links) +˙ ∪{z +∈ +H +| +(cid:60)(z) > x +} +(cid:124) (cid:123)(cid:122) (cid:125) +=:H2 (Rechts) +Zu zeigen: ∀A +∈ +H i, B +∈ +H +j +mit i,j +∈ +{1,2 +} +gilt: AB ∩g (cid:54)= +∅ ⇔ +i (cid:54)= j +„ ⇐“: A +∈ +H 1,B +∈ +H +2 +: AB ∩g (cid:54)= +∅ +Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte in H 1 -von m der kleiner ist als r und alle Punkte in H haben einen Abstand von m der +haben einen Abstand +von m der kleiner ist als r und alle Punkte in H 2 +haben einen Abstand von m der größer ist als r. Da man jede Strecke von A nach B insbesondere auch als stetige -Abbildung f : R R auffassen kann, greift der Zwischenwertsatz AB g = +Abbildung f : R +→ +R >0 -→ ⇒ ∩ (cid:54) ∅ -„ “: A H ,B H mit i,j 1,2 : AB g = i = j -i j -⇒ ∈ ∈ ∈ { } ∩ (cid:54) ∅ ⇒ (cid:54) +auffassen kann, greift der Zwischenwertsatz +⇒ +AB ∩g (cid:54)= +∅ +„ ⇒“: A +∈ +H i,B +∈ +H +j +mit i,j +∈ +{1,2 +} +: AB ∩g (cid:54)= +∅ ⇒ +i (cid:54)= j Sei h die Gerade, die durch A und B geht. -Da A,B / g, aber A,B h gilt, haben g und h insbesondere mindestens einen -∈ ∈ +Da A,B / +∈ +g, aber A,B +∈ +h gilt, haben g und h insbesondere mindestens einen unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich g und h in höchstens einen Punkt schneiden. Sei C dieser Punkt. -Aus A,B / g folgt: C = A und C = B. Also liegt C zwischen A und B. Daraus folgt, -∈ (cid:54) (cid:54) +Aus A,B / +∈ +g folgt: C (cid:54)= A und C (cid:54)= B. Also liegt C zwischen A und B. Daraus folgt, dass A und B bzgl. g in verschiedenen Halbebenen liegen. c) Siehe Abbildung 4.21. -y -5 -4 -3 -2 -1 -0 x -5 4 3 2 1 0 1 2 3 4 5 6 -− − − − − +y +−5 −4 −3 −2 −1 0 1 2 3 4 5 6 +012345 Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht. 81 4.3.HYPERBOLISCHEGEOMETRIE Definition 64 -Es seien a,b,c,d R mit ad bc = 0 und σ : C C eine Abbildung definiert durch -∈ − (cid:54) → -az+b +Es seien a,b,c,d +∈ +R mit ad −bc (cid:54)= 0 und σ : C +→ +C eine Abbildung definiert durch σ(z) := +az+b cz+d σ heißt Möbiustransformation. Proposition 4.9 -a) Die Gruppe SL (R) operiert auf H durch die Möbiustransformation -2 -(cid:18) (cid:19) -a b az+b -σ(z) := z := -c d ◦ cz+d -b) Die Gruppe PSL (R) = SL (R)/ operiert durch σ auf H. -2 2 (±I) -c) PSL (R) operiert auf R . Diese Gruppenoperation ist 3-fach transitiv, d. h. -2 -∪{∞} -zu x < x < x R gibt es genau ein σ PSL (R) mit σ(x ) = 0, σ(x ) = 1, -0 1 ∞ 2 0 1 -∈ ∈ -σ(x ) = . -∞ +a) Die Gruppe SL 2(R) operiert auf H durch die Möbiustransformation +σ(z) := +(cid:18) +a b +c +d(cid:19) +◦z := +az+b +cz+d +b) Die Gruppe PSL 2(R) = SL 2(R)/ +(±I) +operiert durch σ auf H. +c) PSL 2(R) operiert auf R ∪{∞}. Diese Gruppenoperation ist 3-fach transitiv, d. h. +zu x +0 +< x +1 +< x ∞ -d) SL (R) wird von den Matrizen -2 -(cid:18) (cid:19) (cid:18) (cid:19) (cid:18) (cid:19) -λ 0 1 t 0 1 -, und mit t,λ R× -0 λ−1 0 1 1 0 ∈ -− -(cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125) (cid:124) (cid:123)(cid:122) (cid:125) -=:A =:Bt =:C +∈ +R gibt es genau ein σ +∈ +PSL 2(R) mit σ(x 0) = 0, σ(x 1) = 1, +σ(x ∞) = ∞. +d) SL 2(R) wird von den Matrizen +(cid:18) +λ 0 +0 +λ−1(cid:19) +(cid:124) (cid:123)(cid:122) (cid:125) +=:A λ +,(cid:18) +1 t +0 +1(cid:19) +(cid:124) (cid:123)(cid:122) (cid:125) +=:Bt +und +(cid:18) +0 1 +−1 +0(cid:19) +(cid:124) (cid:123)(cid:122) (cid:125) +=:C +mit t,λ +∈ +R× erzeugt. -e) PSL (R) operiert auf G. -2 +e) PSL 2(R) operiert auf G. Beweis: -(cid:18) (cid:19) +a) Sei z = x+iy +∈ +H, d. h. y > 0 und σ = +(cid:18) a b -a) Sei z = x+iy H, d. h. y > 0 und σ = SL (R) -∈ c d ∈ 2 -a(x+iy)+b +c +d(cid:19) +∈ +SL 2(R) +⇒ σ(z) = -⇒ c(x+iy)+d -(ax+b)+iay (cx+d) icy -= − -(cx+d)+icy · (cx+d) icy -− -(ax+b)(cx+d)+aycy ay(cx+d) (ax+b)cy -= +i − -(cx+d)2+(cy)2 (cx+d)2+(cy)2 -axcx+axd+bcx+bd+aycy (ad bc)y -= +i − -(cx+d)2+(cy)2 (cx+d)2+(cy)2 -SL =2(R) ac(x2+y2)+adx+bcx+bd y +a(x+iy)+b +c(x+iy)+d += +(ax+b)+iay +(cx+d)+icy · +(cx+d) −icy +(cx+d) −icy += +(ax+b)(cx+d)+aycy +(cx+d)2+(cy)2 ++iay(cx+d) −(ax+b)cy +(cx+d)2+(cy)2 += +axcx+axd+bcx+bd+aycy +(cx+d)2+(cy)2 +i -(cx+d)2+(cy)2 (cx+d)2+(cy)2 +(ad −bc)y +(cx+d)2+(cy)2 +SL2(R) += +ac(x2+y2)+adx+bcx+bd +(cx+d)2+(cy)2 ++i +y +(cx+d)2+(cy)2 +⇒ +(cid:61)(σ(z)) = y -(σ(z)) = > 0 -⇒ (cid:61) (cx+d)2+(cy)2 +(cx+d)2+(cy)2 +> 0 Die Abbildung bildet also nach H ab. Außerdem gilt: -(cid:18) (cid:19) -1 0 x+iy -z = = x+iy = z -0 1 ◦ 1 +(cid:18) +1 0 +0 +1(cid:19) +◦z = +x+iy +1 += x+iy = z 82 4.3.HYPERBOLISCHEGEOMETRIE und -(cid:18) a b(cid:19) (cid:18)(cid:18) a(cid:48) b(cid:48)(cid:19) (cid:19) (cid:18) a b(cid:19) a(cid:48)z+b(cid:48) -z = -c d ◦ c(cid:48) d(cid:48) ◦ c d ◦ c(cid:48)z+d(cid:48) -aa(cid:48)z+b(cid:48) -+b +(cid:18) a b +c +d(cid:19) ◦(cid:18)(cid:18) a(cid:48) b(cid:48) +c(cid:48) +d(cid:48)(cid:19) ◦z(cid:19) += +(cid:18) a b +c +d(cid:19) +◦ +a(cid:48)z+b(cid:48) c(cid:48)z+d(cid:48) = +aa(cid:48)z+b(cid:48) +c(cid:48)z+d(cid:48) ++b ca(cid:48)z+b(cid:48) -+d c(cid:48)z+d(cid:48) ++d += a(a(cid:48)z+b(cid:48))+b(c(cid:48)z+d(cid:48)) c(cid:48)z+d(cid:48) -= c(a(cid:48)z+b(cid:48))+d(c(cid:48)z+d(cid:48)) c(cid:48)z+d(cid:48) -a(a(cid:48)z+b(cid:48))+b(c(cid:48)z+d(cid:48)) = +a(a(cid:48)z+b(cid:48))+b(c(cid:48)z+d(cid:48)) c(a(cid:48)z+b(cid:48))+d(c(cid:48)z+d(cid:48)) -(aa(cid:48)+bc(cid:48))z+ab(cid:48)+bd(cid:48) = +(aa(cid:48)+bc(cid:48))z+ab(cid:48)+bd(cid:48) (ca(cid:48)+db(cid:48))z+cb(cid:48)+dd(cid:48) -(cid:18) aa(cid:48)+bc(cid:48) ab(cid:48)+bd(cid:48)(cid:19) -= z -ca(cid:48)+db(cid:48) cb(cid:48)+dd(cid:48) -◦ -(cid:18)(cid:18) a b(cid:19) (cid:18) a(cid:48) b(cid:48)(cid:19)(cid:19) -= z -c d · c(cid:48) d(cid:48) ◦ -b) Es gilt σ(z) = ( σ)(z) für alle σ SL (R) und z H. -2 -− ∈ ∈ -(cid:18) (cid:19) -c) Ansatz: σ = a b σ(x ) = ax0+b =! 0 ax +b = 0 b = ax -c d 0 cx0+d ⇒ 0 ⇒ − 0 -σ(x ) = cx +d = 0 d = cx -∞ ∞ ∞ -∞ ⇒ ⇒ − -σ(x ) = 1 ax +b = cx +d -1 1 1 -⇒ -a(x x ) = c(x x ) c = a x1−x0 -1 − 0 1 − ∞ ⇒ x1−x∞ -a2 x x1−x0 +a2x x1−x0 = 1 -∞x1−x∞ 0x1−x∞ -⇒ − · -a2 x1−x0 (x x ) = 1 a2 = x1−x∞ -⇒ x0−x∞ 0 − ∞ ⇒ (x1−x∞)(x1−x0) += +(cid:18) aa(cid:48)+bc(cid:48) ab(cid:48)+bd(cid:48) +ca(cid:48)+db(cid:48) +cb(cid:48)+dd(cid:48)(cid:19) +◦z += +(cid:18)(cid:18) a b +c +d(cid:19) ·(cid:18) a(cid:48) b(cid:48) +c(cid:48) +d(cid:48)(cid:19)(cid:19) +◦z +b) Es gilt σ(z) = ( −σ)(z) für alle σ +∈ +SL 2(R) und z +∈ +H. +c) Ansatz: σ = +(cid:18) +a b +c +d(cid:19) +σ(x 0) = ax0+b +cx0+d +! = 0 +⇒ +ax 0+b = 0 +⇒ +b = −ax +0 +σ(x ∞) = +∞ ⇒ +cx ∞+d = 0 +⇒ +d = −cx +∞ +σ(x 1) = 1 +⇒ +ax 1+b = cx 1+d +a(x +1 +−x 0) = c(x +1 +−x ∞) +⇒ +c = a x1−x0 +x1−x∞ +⇒ +−a2 ·x +∞ +x1−x0 +x1−x∞ ++a2x +0 +x1−x0 +x1−x∞ += 1 +⇒ +a2 x1−x0 x0−x∞(x +0 +−x ∞) = 1 +⇒ +a2 = x1−x∞ +(x1−x∞)(x1−x0) d) Es gilt: -A−1 = A -λ 1 +A−1 +λ += A +1 λ -B−1 = B -t −t +B−1 +t += B +−t C−1 = C3 -Daher genügt es zu zeigen, dass man mit A , B und C alle Matrizen aus SL (R) -λ t 2 +Daher genügt es zu zeigen, dass man mit A λ, B +t +und C alle Matrizen aus SL 2(R) erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit -Matrizen der Form A , B und C die Einheitsmatrix zu generieren. -λ t +Matrizen der Form A λ, B +t +und C die Einheitsmatrix zu generieren. Sei also -(cid:18) (cid:19) +M = +(cid:18) a b -M = SL (R) -c d ∈ 2 +c +d(cid:19) +∈ +SL 2(R) beliebig. Fall 1: a = 0 -Da M SL (R) ist, gilt detM = 1 = ad bc = bc. Daher ist insbesondere c = 0. Es -2 -∈ − − (cid:54) +Da M +∈ +SL 2(R) ist, gilt detM = 1 = ad −bc = −bc. Daher ist insbesondere c (cid:54)= 0. Es folgt: -(cid:18) (cid:19) (cid:18) (cid:19) (cid:18) (cid:19) -0 1 a b c d +(cid:18) +0 1 +−1 +0(cid:19) ·(cid:18) +a b +c +d(cid:19) = -1 0 · c d a b -− − − +(cid:18) +c d +−a +−b(cid:19) 83 4.3.HYPERBOLISCHEGEOMETRIE Gehe zu Fall 2. -Fall 2: a = 0 -(cid:54) -Nun wird in M durch M A an der Stelle von a eine 1 erzeugt: +Fall 2: a (cid:54)= 0 +Nun wird in M durch M ·A 1 -· a -(cid:18) b(cid:19) (cid:18)1 0(cid:19) (cid:18) ab(cid:19) -a 1 -a = -c d · 0 a c ad a +an der Stelle von a eine 1 erzeugt: +(cid:18) +a b +c +d(cid:19) ·(cid:18)1 +a +0 +0 +a(cid:19) += +(cid:18) +1 ab +c +a +ad(cid:19) Gehe zu Fall 3. Fall 3: a = 1 -(cid:18) (cid:19) (cid:18) (cid:19) (cid:18) (cid:19) -1 b 1 b 1 0 -− = -c d · 0 1 c d bc -− -Da wir detM = 1 = ad bc = d bc wissen, gilt sogar M = 1. +(cid:18) +1 b +c +d(cid:19) ·(cid:18) +1 −b +0 1 +(cid:19) += +(cid:18) +1 0 +c d +−bc(cid:19) +Da wir detM = 1 = ad −bc = d −bc wissen, gilt sogar M 2,2 -− − += 1. Gehe zu Fall 4. Fall 4: a = 1, b = 0, d = 1 -(cid:18) (cid:19) (cid:18) (cid:19) -1 0 1 0 -A CB C = -−1 c -c 1 0 1 -Daher erzeugen Matrizen der Form A , B und C die Gruppe SL R. (cid:4) -λ t 2 +A −1CB +cC(cid:18) +1 0 +c +1(cid:19) += +(cid:18) +1 0 +0 +1(cid:19) +Daher erzeugen Matrizen der Form A λ, B +t +und C die Gruppe SL 2R. (cid:4) e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen. -(cid:18) (cid:19) +• +σ = +(cid:18) λ 0 -σ = , also σ(z) = λ2z. Daraus ergeben sich die Situationen, die in -• 0 λ−1 +0 +λ−1(cid:19) +, also σ(z) = λ2z. Daraus ergeben sich die Situationen, die in Abbildung 4.22a und Abbildung 4.22b dargestellt sind. +x y -3 λ2z +−1 0 1 2 3 4 5 6 7 +0123 +m λ2m +m+irλ2m+iλ2r +m+1 +(a) Fall 1 +x y -2 z -3 -λ2m+iλ2r -2 1 -m+ir -1 x λ2x -0 -m λ2m x -0 1 0 1 2 3 4 -1 0 1 2 3m+1 4 5 6 7 x − -− -(a) Fall 1 (b) Fall 2 (Strahlensatz) +−1 0 1 2 3 4 +0123 +z +x +λ2z +λ2x +(b) Fall 2 (Strahlensatz) Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix -(cid:18) (cid:19) -1 a +• Offensichtlich gilt die Aussage für σ = -• 0 1 -(cid:18) (cid:19) +(cid:18) +1 a +0 +1(cid:19) +• +Sei nun σ = +(cid:18) 0 1 -Sei nun σ = , also σ(z) = 1 -• 1 0 −z -− +−1 +0(cid:19) +, also σ(z) = −1 +z Bemerkung 69 -Zu hyperbolischen Geraden g ,g gibt es σ PSL (R) mit σ(g ) = g . -1 2 2 1 2 +Zu hyperbolischen Geraden g 1,g +2 +gibt es σ ∈ +PSL 2(R) mit σ(g 1) = g 2. 84 4.3.HYPERBOLISCHEGEOMETRIE -z = r eiϕ -y · -1 · -1 = 1 eiϕ -0 z r · x -1 0 1 -− +y +−1 0 1 +01 +z = r ·eiϕ +1 +z += 1 +r +·eiϕ Abbildung 4.23: Inversion am Kreis -Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a ) = b und σ(a ) = b . Dann existiert -1 1 2 2 -σ(g ) := g wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt. -1 2 +Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a 1) = b +1 +und σ(a 2) = b 2. Dann existiert +σ(g 1) := g +2 +wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt. Definition 65 -Seien z ,z ,z ,z C paarweise verschieden. -1 2 3 4 +Seien z 1,z 2,z 3,z +4 ∈ +C paarweise verschieden. Dann heißt +DV(z 1,z 2,z 3,z 4) := z1−z4 -(z z ) (z z ) -z1−z2 1 4 3 2 -DV(z ,z ,z ,z ) := = − · − -1 2 3 4 z3−z4 (z z ) (z z ) -z3−z2 1 − 2 · 3 − 4 -Doppelverhältnis von z ,...,z . -1 4 +z1−z2 +z3−z4 +z3−z2 += +(z +1 +−z 4) ·(z +3 +−z 2) +(z +1 +−z 2) ·(z +3 +−z 4) +Doppelverhältnis von z 1,...,z 4. Bemerkung 70 (Eigenschaften des Doppelverhältnisses) -a) DV(z ,...,z ) C 0,1 -1 4 -∈ \{ } -b) DV(z ,z ,z ,z ) = 1 -1 4 3 2 DV(z1,z2,z3,z4) -c) DV(z ,z ,z ,z ) = 1 -3 2 1 4 DV(z1,z2,z3,z4) -d) DV ist auch wohldefiniert, wenn eines der z = oder wenn zwei der z gleich sind. -i i +a) DV(z 1,...,z 4) +∈ +C \{0,1 +} +b) DV(z 1,z 4,z 3,z 2) = 1 +DV(z1,z2,z3,z4) +c) DV(z 3,z 2,z 1,z 4) = 1 +DV(z1,z2,z3,z4) +d) DV ist auch wohldefiniert, wenn eines der z +i += ∞ -e) DV(0,1, ,z ) = z (Der Fall z 0,1, ist zugelassen). -4 4 4 -∞ ∈ { ∞} -f) Für σ PSL (C) und z ,...,z C ist -2 1 4 -∈ ∈ ∪{∞} -DV(σ(z ),σ(z ),σ(z ),σ(z )) = DV(z ,z ,z ,z ) -1 2 3 4 1 2 3 4 -und für σ(z) = 1 gilt +oder wenn zwei der z +i +gleich sind. +e) DV(0,1, ∞,z 4) = z +4 +(Der Fall z +4 +∈ +{0,1, +∞} +ist zugelassen). +f) Für σ +∈ +PSL 2(C) und z 1,...,z +4 +∈ +C +∪{∞} +ist +DV(σ(z 1),σ(z 2),σ(z 3),σ(z 4)) = DV(z 1,z 2,z 3,z 4) +und für σ(z) = 1 z -DV(σ(z ),σ(z ),σ(z ),σ(z )) = DV(z ,z ,z ,z ) -1 2 3 4 1 2 3 4 -g) DV(z ,z ,z ,z ) R z ,...,z liegen auf einer hyperbolischen Geraden. -1 2 3 4 1 4 -∈ ∪{∞} ⇔ +gilt +DV(σ(z 1),σ(z 2),σ(z 3),σ(z 4)) = DV(z 1,z 2,z 3,z 4) +g) DV(z 1,z 2,z 3,z 4) +∈ +R +∪{∞} ⇔ +z 1,...,z +4 +liegen auf einer hyperbolischen Geraden. Beweis: -a) DV(z ,...,z ) = 0, da z paarweise verschieden -1 4 i -(cid:54) -DV(z ,...,z ) = 1, da: -1 4 -(cid:54) -Annahme: DV(z ,...,z ) = 1 -1 4 -(z z )(z z ) = (z z )(z z ) -1 2 3 4 1 4 3 2 -⇔ − − − − +a) DV(z 1,...,z 4) (cid:54)= 0, da z +i +paarweise verschieden +DV(z 1,...,z 4) (cid:54)= 1, da: +Annahme: DV(z 1,...,z 4) = 1 +⇔ +(z +1 +−z 2)(z +3 +−z 4) = (z +1 +−z 4)(z +3 +−z 2) 85 4.3.HYPERBOLISCHEGEOMETRIE -z z z z z z +z z = z z z z z z +z z -1 3 2 3 1 4 2 4 1 3 3 4 1 2 2 4 -⇔ − − − − -z z +z z = z z +z z -2 3 1 4 3 4 1 2 ⇔ -z z z z = z z z z -2 3 3 4 1 2 1 4 -⇔ − − -z (z z ) = z (z z ) -3 2 4 1 2 4 -⇔ − − -z = z oder z = z -3 1 2 4 +z 1z +3 +−z 2z +3 +−z 1z 4+z 2z +4 += z 1z +3 +−z 3z +4 +−z 1z 2+z 2z +4 +⇔ +z 2z 3+z 1z +4 += z 3z 4+z 1z +2 +⇔ +z 2z +3 +−z 3z +4 += z 1z +2 +−z 1z +4 +⇔ +z 3(z +2 +−z 4) = z 1(z +2 +−z 4) ⇔ -Alle z sind paarweise verschieden Widerspruch (cid:4) +z +3 += z +1 +oder z +2 += z +4 +Alle z i +sind paarweise verschieden ⇒ -b) DV(z ,z ,z ,z ) = (z1−z2)·(z3−z4) = 1 -1 4 3 2 (z1−z4)·(z3−z2) DV(z1,z2,z3,z4) -c) DV(z ,z ,z ,z ) = (z3−z4)·(z1−z2) = 1 -3 2 1 4 (z3−z2)·(z1−z4) DV(z1,z2,z3,z4) -d) Zwei der z dürfen gleich sein, da: +Widerspruch (cid:4) +b) DV(z 1,z 4,z 3,z 2) = (z1−z2)·(z3−z4) +(z1−z4)·(z3−z2) += 1 +DV(z1,z2,z3,z4) +c) DV(z 3,z 2,z 1,z 4) = (z3−z4)·(z1−z2) +(z3−z2)·(z1−z4) += 1 +DV(z1,z2,z3,z4) +d) Zwei der z +i +dürfen gleich sein, da: +Fall 1 z +1 += z +4 +oder z +3 += z +2 +In diesem Fall ist DV(z 1,...,z 4) = 0 +Fall 2 z +1 += z +2 +oder z +3 += z +4 +Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z 1,...,z 4) = +∞ +gilt. +Fall 3 z +1 += z +3 +oder z +2 += z +4 +Durch Einsetzen ergibt sich DV(z 1,...,z 4) = 1. +Im Fall, dass ein z i -Fall 1 z = z oder z = z -1 4 3 2 -In diesem Fall ist DV(z ,...,z ) = 0 -1 4 -Fall 2 z = z oder z = z -1 2 3 4 -Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z ,...,z ) = gilt. -1 4 += ∞ -Fall 3 z = z oder z = z -1 3 2 4 -Durch Einsetzen ergibt sich DV(z ,...,z ) = 1. -1 4 -Im Fall, dass ein z = ist, ist entweder DV(0,1, ,z ) = 0 oder DV(0,1, ,z ) -i 4 4 -∞ ∞ ∞ ±∞ -(0−z4)·(∞−1) z4·(∞−1) -e) DV(0,1, ,z ) = = = z -∞ 4 (0−1)·(∞−z4) ∞−z4 4 +ist, ist entweder DV(0,1, ∞,z 4) = 0 oder DV(0,1, ∞,z 4) +±∞ +e) DV(0,1, ∞,z 4) = +(0−z4)·(∞−1) +(0−1)·(∞−z4) += +z4·(∞−1) +∞−z4 += z +4 f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. -g) Sei σ PSL (C) mit σ(z ) = 0, σ(z ) = 1, σ(z ) = . Ein solches σ existiert, da man -2 1 2 3 -∈ ∞ +g) Sei σ +∈ +PSL 2(C) mit σ(z 1) = 0, σ(z 2) = 1, σ(z 3) = ∞. Ein solches σ existiert, da man drei Parameter von σ wählen darf. Bem. 70.f -DV(z ,...,z ) = DV(0,1, ,σ(z )) -1 4 4 -⇒ ∞ -DV(z ,...,z ) R -1 4 -⇒ ∈ ∪{∞} -σ(z ) R -4 -⇔ ∈ ∪{∞} -Behauptung folgt, weil σ−1(R ) ein Kreis oder eine Gerade in C ist. -∪∞ +⇒ +DV(z 1,...,z 4) = DV(0,1, ∞,σ(z 4)) +⇒ +DV(z 1,...,z 4) +∈ +R +∪{∞} +⇔ +σ(z 4) +∈ +R +∪{∞} +Behauptung folgt, weil σ−1(R ∪∞) ein Kreis oder eine Gerade in C ist. Definition 66 -Für z ,z H sei g die eindeutige hyperbolische Gerade durch z und z und a ,a die -1 2 z1,z2 1 2 1 2 +Für z 1,z +2 ∈ -„Schnittpunkte“ von g mit R . +H sei g z1,z2 -∪{∞} -Dann sei dH(z 1,z 2) := 21 |lnDV(a 1,z 1,a 2,z 2) und heiße hyperbolische Metrik. +die eindeutige hyperbolische Gerade durch z +1 +und z +2 +und a 1,a +2 +die +„Schnittpunkte“ von g +z1,z2 +mit R ∪{∞}. +Dann sei dH(z 1,z 2) := 1 2|lnDV(a 1,z 1,a 2,z 2) | -Beh.: Für z ,z H sei g die eindeutige hyperbolische Gerade durch z und z und a ,a -1 2 z1,z2 1 2 1 2 +und heiße hyperbolische Metrik. +Beh.: Für z 1,z +2 ∈ -die „Schnittpunkte“ von g mit R . +H sei g z1,z2 -∪{∞} +die eindeutige hyperbolische Gerade durch z +1 +und z +2 +und a 1,a +2 +die „Schnittpunkte“ von g +z1,z2 +mit R ∪{∞}. Dann gilt: -1 1 -lnDV(a ,z ,a ,z ) = lnDV(a ,z ,a ,z ) -1 1 2 2 2 1 1 2 -2| | 2| | +1 +2|lnDV(a 1,z 1,a 2,z 2) +| += +1 +2|lnDV(a 2,z 1,a 1,z 2) +| Beweis: Wegen Bemerkung 70.c gilt: +DV(a 1,z 1,a 2,z 2) = 1 -DV(a ,z ,a ,z ) = -1 1 2 2 -DV(a ,z ,a ,z ) -2 1 1 2 +DV(a 2,z 1,a 1,z 2) Außerdem gilt: +ln 1 -ln = lnx−1 = ( 1) lnx = lnx -x − · − +x += lnx−1 = ( −1) ·lnx = −lnx 86 4.3.HYPERBOLISCHEGEOMETRIE Da der ln im Betrag steht, folgt direkt: -1 1 -lnDV(a ,z ,a ,z ) = lnDV(a ,z ,a ,z ) -1 1 2 2 2 1 1 2 -2| | 2| | +1 +2|lnDV(a 1,z 1,a 2,z 2) +| += +1 +2|lnDV(a 2,z 1,a 1,z 2) +| Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x-Achse im Doppelver- hältnis genutzt werden. (cid:4) Beh.: Die hyperbolische Metrik ist eine Metrik auf H. Beweis: Wegen Bemerkung 70.f ist -d(z ,z ) := d(σ(z ),σ(z )) mit σ(a ) = 0, σ(a ) = -1 2 1 2 1 2 +d(z 1,z 2) := d(σ(z 1),σ(z 2)) mit σ(a 1) = 0, σ(a 2) = ∞ -d. h. σ(g ) = iR (imaginäre Achse). -z1,z2 -also gilt o. B. d. A. z = ia und z = ib mit a,b R und a < b. -1 2 +d. h. σ(g z1,z2) = iR (imaginäre Achse). +also gilt o. B. d. A. z +1 += ia und z +2 += ib mit a,b ∈ -2d(ia,ib) = lnDV(0,ia, ,ib) -| ∞ | -(0 ib)( ia) -= ln − ∞− -| (0 ia)( ib) | -− ∞− +R und a < b. +2d(ia,ib) = +| +lnDV(0,ia, ∞,ib) +| += +| +ln +(0 −ib)( ∞−ia) +(0 −ia)( ∞−ib) | += +| +ln b -= ln -| a | -= lnb lna -− -Also: d(z ,z ) 0, d(z ,z ) = 0 z = z -1 2 1 2 1 2 -≥ ⇔ -2d(z ,z ) = lnDV(a ,z ,a ,z ) -2 1 2 2 1 1 -| | -= lnDV( ,ib,0,ia) -| ∞ | +a | += lnb −lna +Also: d(z 1,z 2) +≥ +0, d(z 1,z 2) = 0 +⇔ +z +1 += z +2 +2d(z 2,z 1) = +| +lnDV(a 2,z 2,a 1,z 1) +| += +| +lnDV( ∞,ib,0,ia) +| Bem. 70.b -= lnDV(0,ib, ,ia) -| ∞ | -= 2d(z ,z ) -1 2 -Liegen drei Punkte z ,z ,z C auf einer hyperbolischen Geraden, so gilt d(z ,z ) = -1 2 3 1 3 += +| +lnDV(0,ib, ∞,ia) +| += 2d(z 1,z 2) +Liegen drei Punkte z 1,z 2,z +3 ∈ -d(z ,z )+d(z ,z ) (wenn z zwischen z und z liegt). -1 2 2 3 2 1 3 +C auf einer hyperbolischen Geraden, so gilt d(z 1,z 3) = +d(z 1,z 2)+d(z 2,z 3) (wenn z +2 +zwischen z +1 +und z +3 +liegt). Dreiecksungleichung: Beweis ist umständlich und wird hier nicht geführt. Es sei auf die Vorlesung „Hyperbolische Geometrie“ verwiesen. Satz 4.10 @@ -4555,8 +7467,9 @@ aber Axiom §5 ist verletzt. 87 4.3.HYPERBOLISCHEGEOMETRIE Übungsaufgaben Aufgabe 8 -Seien (X,d) eine absolute Ebene und P,Q,R X Punkte. Der Scheitelwinkel des Winkels +Seien (X,d) eine absolute Ebene und P,Q,R ∈ +X Punkte. Der Scheitelwinkel des Winkels ∠PQR ist der Winkel, der aus den Halbgeraden QP− und QR− gebildet wird. Die Nebenwinkel von ∠PQR sind die von QP+ und QR− bzw. QP− und QR+ gebildeten Winkel. @@ -4564,150 +7477,246 @@ Zeigen Sie: (a) Die beiden Nebenwinkel von ∠PQR sind gleich. (b) Der Winkel ∠PQR ist gleich seinem Scheitelwinkel. Aufgabe 9 -Sei (X,d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y X von +Sei (X,d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ -Punkten ist definiert durch d(P,Y) := infd(P,y) y Y. -| ∈ +X von +Punkten ist definiert durch d(P,Y) := infd(P,y) |y +∈ +Y. Zeigen Sie: -(a) Ist ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die -(cid:52) +(a) Ist (cid:52)ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die Winkel ∠ABC und ∠BCA gleich. -(b) Ist ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel -(cid:52) +(b) Ist (cid:52)ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel gegenüber und umgekehrt. -(c) Sind g eine Gerade und P / g ein Punkt, so gibt es eine eindeutige Gerade h mit +(c) Sind g eine Gerade und P / ∈ -P h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g +g ein Punkt, so gibt es eine eindeutige Gerade h mit +P ∈ +h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g und der Schnittpunkt des Lots mit g heißt Lotfußpunkt. Aufgabe 10 -Seien f,g,h G und paarweise verschieden. +Seien f,g,h ∈ -Zeigen Sie: f g g h f h -(cid:107) ∧ (cid:107) ⇒ (cid:107) +G und paarweise verschieden. +Zeigen Sie: f +(cid:107) +g ∧g +(cid:107) +h +⇒ +f +(cid:107) +h Aufgabe 11 Beweise den Kongruenzsatz SSS. 5 Krümmung Definition 67 -Sei f : [a,b] Rn eine eine Funktion aus C∞. Dann heißt f Kurve. +Sei f : [a,b] → +Rn eine eine Funktion aus C∞. Dann heißt f Kurve. 5.1 Krümmung von Kurven Definition 68 -Sei γ : I = [a,b] Rn eine Kurve. +Sei γ : I = [a,b] → +Rn eine Kurve. a) Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt: -γ(cid:48)(t) = 1 t I -2 -(cid:107) (cid:107) ∀ ∈ -Dabei ist γ(cid:48)(t) = (γ(cid:48)(t),γ(cid:48)(t),...,γ(cid:48)(t)). -1 2 n -b) l(γ) = (cid:82)b γ(cid:48)(t) dt heißt Länge von γ. -a (cid:107) (cid:107) +(cid:107)γ(cid:48)(t) +(cid:107)2 += 1 ∀t +∈ +I +Dabei ist γ(cid:48)(t) = (γ(cid:48) 1(t),γ(cid:48) 2(t),...,γ(cid:48) n(t)). +b) l(γ) = (cid:82)b +a +(cid:107)γ(cid:48)(t) (cid:107)dt heißt Länge von γ. Bemerkung 71 (Eigenschaften von Kurven I) -Sei γ : I = [a,b] Rn eine C∞-Funktion. +Sei γ : I = [a,b] → -a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b a. -− -b) Ist γ durch Bogenlänge parametrisiert, so ist γ(cid:48)(t) orthogonal zu γ(cid:48)(cid:48)(t) für alle t I. +Rn eine C∞-Funktion. +a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b −a. +b) Ist γ durch Bogenlänge parametrisiert, so ist γ(cid:48)(t) orthogonal zu γ(cid:48)(cid:48)(t) für alle t ∈ +I. Beweis: -a) l(γ) = (cid:82)b γ(cid:48)(t) dt = (cid:82)b 1dt = b a. -a (cid:107) (cid:107) a − -b) ImFolgendenwirddieAussagenurfürγ : [a,b] R2 bewiesen.Allerdingsfunktioniert +a) l(γ) = (cid:82)b +a +(cid:107)γ(cid:48)(t) (cid:107)dt = (cid:82)b +a +1dt = b −a. +b) ImFolgendenwirddieAussagenurfürγ : [a,b] → +R2 bewiesen.Allerdingsfunktioniert der Beweis im Rn analog. Es muss nur die Ableitung angepasst werden. -1 = γ(cid:48)(t) = γ(cid:48)(t) 2 = γ(cid:48)(t),γ(cid:48)(t) -(cid:107) (cid:107) (cid:107) (cid:107) (cid:104) (cid:105) +1 = (cid:107)γ(cid:48)(t) +(cid:107) += (cid:107)γ(cid:48)(t) (cid:107)2 = (cid:104)γ(cid:48)(t),γ(cid:48)(t) +(cid:105) +⇒ +0 = d -0 = γ(cid:48)(t),γ(cid:48)(t) -⇒ dt(cid:104) (cid:105) +dt(cid:104)γ(cid:48)(t),γ(cid:48)(t) +(cid:105) += d -= (γ(cid:48)(t)γ(cid:48)(t)+γ(cid:48)(t)γ(cid:48)(t)) -dt 1 1 2 2 -= 2 (γ(cid:48)(cid:48)(t) γ(cid:48)(t)+γ(cid:48)(cid:48)(t) γ(cid:48)(t)) -· 1 · 1 2 · 2 -= 2 γ(cid:48)(cid:48)(t),γ(cid:48)(t) -·(cid:104) (cid:105) +dt(γ(cid:48) 1(t)γ(cid:48) 1(t)+γ(cid:48) 2(t)γ(cid:48) 2(t)) += 2 ·(γ(cid:48)(cid:48) 1(t) ·γ(cid:48) 1(t)+γ(cid:48)(cid:48) 2(t) ·γ(cid:48) 2(t)) += 2 ·(cid:104)γ(cid:48)(cid:48)(t),γ(cid:48)(t) +(cid:105) Definition 69 -Sei γ : I R2 eine durch Bogenlänge parametrisierte Kurve. +Sei γ : I → -a) Für t I sei n(t) Normalenvektor an γ in t wenn gilt: +R2 eine durch Bogenlänge parametrisierte Kurve. +a) Für t ∈ -n(t),γ(cid:48)(t) = 0, n(t) = 1 und det((γ(cid:48)(t),n(t))) = +1 -(cid:104) (cid:105) (cid:107) (cid:107) +I sei n(t) Normalenvektor an γ in t wenn gilt: +(cid:104)n(t),γ(cid:48)(t) +(cid:105) += 0, (cid:107)n(t) +(cid:107) += 1 und det((γ(cid:48)(t),n(t))) = +1 89 5.1.KRÜMMUNGVONKURVEN -b) Seit κ : I R so, dass gilt: +b) Seit κ : I → -γ(cid:48)(cid:48)(t) = κ(t) n(t) -· +R so, dass gilt: +γ(cid:48)(cid:48)(t) = κ(t) ·n(t) Dann heißt κ(t) Krümmung von γ in t. Da n(t) und γ(cid:48)(cid:48)(t) nach Bemerkung 71.b linear abhängig sind, existiert κ(t). Beispiel 45 Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt: -(cid:18) (cid:19) -t t -γ(t) = r cos ,r sin für t [0,2πr] -· r · r ∈ +γ(t) = +(cid:18) +r ·cos +t +r,r ·sin +t +r(cid:19) +für t +∈ +[0,2πr] ist parametrisiert durch Bogenlänge, da gilt: -(cid:18) (cid:19) -1 t 1 t -γ(cid:48)(t) = (r )( sin ),r cos -· r − r r r -(cid:18) (cid:19) -t t -= sin ,cos -− r r +γ(cid:48)(t) = +(cid:18) +(r +· +1 +r)( −sin +t r),r1 +r +cos +t +r(cid:19) += +(cid:18) +−sin +t +r,cos +t +r(cid:19) Der Normalenvektor von γ in t ist -(cid:18) (cid:19) -t t -n(t) = cos , sin -− r − r +n(t) = +(cid:18) +−cos +t +r, −sin +t +r(cid:19) da gilt: -(cid:28)(cid:18) t(cid:19) (cid:18) t(cid:19)(cid:29) -cos sin -n(t),γ(cid:48)(t) = − r , − r -(cid:104) (cid:105) sin t cos t -− r r -t t t t -= ( cos ) ( sin )+( sin ) (cos ) -− r · − r − r · r +(cid:104)n(t),γ(cid:48)(t) +(cid:105) += +(cid:28)(cid:18) −cos t +r +−sin t +r(cid:19) ,(cid:18) −sin t +r +cos t +r +(cid:19)(cid:29) += ( −cos +t +r) ·( −sin +t +r)+( −sin +t +r) ·(cos +t +r) = 0 -(cid:13) (cid:13) -(cid:13) t t (cid:13) -n(t) = (cid:13)( cos , sin )(cid:13) -(cid:107) (cid:107) (cid:13) − r − r (cid:13) -t t -= ( cos )2+( sin )2 -− r − r +(cid:107)n(t) +(cid:107) += +(cid:13) +(cid:13) +(cid:13) (cid:13)( −cos +t +r, −sin +t +r)(cid:13) +(cid:13) +(cid:13) +(cid:13) += ( −cos +t +r)2+( −sin +t +r)2 = 1 -det(γ(cid:48)(t),n(t)) = (cid:13) (cid:13) (cid:13)(cid:18) −sin rt −cos rt(cid:19)(cid:13) (cid:13) (cid:13) -1 (cid:13) cos t sin t (cid:13) -r − r -t t t -= ( sin )2 ( cos ) cos -− r − − r · r +det(γ(cid:48) 1(t),n(t)) = (cid:13) (cid:13) (cid:13) (cid:13)(cid:18) −sin t r −cos t r +cos t +r +−sin t +r(cid:19)(cid:13) (cid:13) (cid:13) +(cid:13) += ( −sin +t +r)2 −( −cos +t +r) ·cos +t +r = 1 -Die Krümmung ist für jedes t konstant 1, da gilt: +Die Krümmung ist für jedes t konstant 1 r, da gilt: +γ(cid:48)(cid:48)(t) = +(cid:18) +−1 r -(cid:18) (cid:19) -1 t 1 t -γ(cid:48)(cid:48)(t) = cos , sin -−r r −r r -(cid:18) (cid:19) -1 t t -= cos , sin -r · − r − r +cos +t +r, +−1 +r +sin +t +r(cid:19) += 1 +r +·(cid:18) +−cos +t +r, −sin +t +r(cid:19) +⇒ κ(t) = -⇒ r +1 +r 90 5.2.TANGENTIALEBENE Definition 70 -Sei γ : I R3 eine durch Bogenlänge parametrisierte Kurve. +Sei γ : I → -a) Für t I heißt κ(t) := γ(cid:48)(cid:48)(t) die Krümmung von γ in t. -∈ (cid:107) (cid:107) -b) Ist für t I die Ableitung γ(cid:48)(cid:48)(t) = 0, so heißt γ(cid:48)(cid:48)(t) Normalenvektor an γ in t. -∈ (cid:54) (cid:107)γ(cid:48)(cid:48)(t)(cid:107) +R3 eine durch Bogenlänge parametrisierte Kurve. +a) Für t +∈ +I heißt κ(t) := (cid:107)γ(cid:48)(cid:48)(t) +(cid:107) +die Krümmung von γ in t. +b) Ist für t +∈ +I die Ableitung γ(cid:48)(cid:48)(t) (cid:54)= 0, so heißt γ(cid:48)(cid:48)(t) +(cid:107)γ(cid:48)(cid:48)(t)(cid:107) +Normalenvektor an γ in t. c) b(t)seieinVektor,derγ(cid:48)(t),n(t)zueinerorientiertenOrthonormalbasisvonR3 ergänzt. Also gilt: det(γ(cid:48)(t),n(t),b(t)) = 1 @@ -4715,723 +7724,1075 @@ b(t) heißt Binormalenvektor, die Orthonormalbasis (cid:8) γ(cid:48)(t),n(t),b(t)(cid:9) heißt begleitendes Dreibein. Bemerkung 72 (Eigenschaften von Kurven II) -Sei γ : I R3 durch Bogenlänge parametrisierte Kurve. +Sei γ : I → +R3 durch Bogenlänge parametrisierte Kurve. a) n(t) ist orthogonal zu γ(cid:48)(t). b) b(t) aus Definition 70.c ist eindeutig. 5.2 Tangentialebene Erinnerung Sie sich an Definition 32 „reguläre Fläche“. Äquivalent dazu ist: S ist lokal von der Form -V(f) = (cid:8) x R3 (cid:12) (cid:12) f(x) = 0(cid:9) +V(f) = (cid:8) x ∈ -für eine C∞-Funktion f : R3 R. +R3 (cid:12) (cid:12) f(x) = 0(cid:9) +für eine C∞-Funktion f : R3 → +R. Definition 71 -Sei S R3 eine reguläre Fläche, s S, F : U V S eine lokale Parametrisierung um -⊆ ∈ → ∩ -s V: +Sei S +⊆ +R3 eine reguläre Fläche, s +∈ +S, F : U +→ +V ∩S eine lokale Parametrisierung um +s ∈ -(u,v) (x(u,v),y(u,v),z(u,v)) +V: +(u,v) (cid:55)→ -Für p = F−1(s) U sei -∈ ∂x(p) ∂x(p) -∂u ∂v -J F(p) =  ∂∂ uy (p) ∂ ∂y v(p) -∂z(p) ∂z(p) -∂u ∂v -und D F : R2 R3 die durch J (p) definierte lineare Abbildung. -p F -→ -Dann heißt T S := Bild(D F) die Tangentialebene an s S. -s p +(x(u,v),y(u,v),z(u,v)) +Für p = F−1(s) +∈ +U sei +J F(p) = + ∂x ∂u(p) ∂x ∂v(p) +∂y ∂u(p) ∂y ∂v(p) +∂z ∂u(p) ∂z +∂v(p) + +und D pF : R2 +→ +R3 die durch J F(p) definierte lineare Abbildung. +Dann heißt T sS := Bild(D pF) die Tangentialebene an s ∈ +S. Bemerkung 73 (Eigenschaften der Tangentialebene) -a) T S ist 2-dimensionaler Untervektorraum von R3. -s -b) T S = u˜,v˜ , wobei u˜,v˜ die Spaltenvektoren der Jacobi-Matrix J (p) sind. -s F -(cid:104) (cid:105) -c) T S hängt nicht von der gewählten Parametrisierung ab. -s +a) T sS ist 2-dimensionaler Untervektorraum von R3. +b) T sS = (cid:104)˜ u,˜ v (cid:105), wobei ˜ u,˜ v die Spaltenvektoren der Jacobi-Matrix J F(p) sind. +c) T sS hängt nicht von der gewählten Parametrisierung ab. 91 5.2.TANGENTIALEBENE -d) Sei S = V(f) eine reguläre Fläche in R3, also f : V R eine C∞-Funktion, V R3 -→ ⊆ -offen, grad(f)(x) = 0 für alle x S. -(cid:54) ∈ -Dann ist T S = (grad(f)(s))⊥ für jedes s S. -s +d) Sei S = V(f) eine reguläre Fläche in R3, also f : V +→ +R eine C∞-Funktion, V +⊆ +R3 +offen, grad(f)(x) (cid:54)= 0 für alle x +∈ +S. +Dann ist T sS = (grad(f)(s))⊥ für jedes s ∈ +S. Beweis: -a) J ist eine 3 2-Matrix, die mit einem 2 1-Vektor multipliziert wird. Das ist +a) J F -× × +ist eine 3 +× +2-Matrix, die mit einem 2 +× +1-Vektor multipliziert wird. Das ist eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein -Vektorraum ist. Da Rg(J ) = 2, ist auch dim(T S) = 2. -F s +Vektorraum ist. Da Rg(J F) = 2, ist auch dim(T sS) = 2. b) Hier kann man wie in Punkt a) argumentieren -c) T S = x R3 parametrisierte Kurve γ : [ ε,+ε] S für ein ε > 0 mit γ(0) = -s -{ ∈ |∃ − → +c) T sS = {x +∈ +R3 |∃parametrisierte Kurve γ : [ −ε,+ε] +→ +S für ein ε > 0 mit γ(0) = s und γ(cid:48)(0) = x } Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. -d) Sei x T S,γ : [ ε,+ε] S eine parametrisierte Kurve mit ε > 0 und γ(cid:48)(0) = s, -s -∈ − → -sodass γ(cid:48)(0) = x gilt. Da γ(t) S für alle t [ ε,ε], ist f γ = 0 -∈ ∈ − ◦ -0 = (f γ)(cid:48)(0) = grad(f)(γ(0)),γ(cid:48)(0) -⇒ ◦ (cid:104) (cid:105) -T S grad(f)(s)⊥ -s -⇒ ⊆ -=d =i =m ==2 T S = (grad(f)(s))⊥ -s +d) Sei x +∈ +T sS,γ : [ −ε,+ε] +→ +S eine parametrisierte Kurve mit ε > 0 und γ(cid:48)(0) = s, +sodass γ(cid:48)(0) = x gilt. Da γ(t) +∈ +S für alle t +∈ +[ −ε,ε], ist f ◦γ = 0 +⇒ +0 = (f ◦γ)(cid:48)(0) = (cid:104)grad(f)(γ(0)),γ(cid:48)(0) +(cid:105) +⇒ +T sS +⊆ +grad(f)(s)⊥ +dim=2 ==== ⇒ +T sS = (grad(f)(s))⊥ Definition 72 -a) Ein Normalenfeld auf der regulären Fläche S R3 ist eine Abbildung n : S S2 -⊆ → ⊆ -R3 mit n(s) T S⊥ für jedes s S. -s -∈ ∈ +a) Ein Normalenfeld auf der regulären Fläche S +⊆ +R3 ist eine Abbildung n : S +→ +S2 +⊆ +R3 mit n(s) +∈ +T sS⊥ für jedes s +∈ +S. b) S heißt orientierbar, wenn es ein stetiges Normalenfeld auf S gibt. Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden. Im Folgenden werden diese Begriffe jedoch synonym benutzt. Bemerkung 74 (Eigenschaften von Normalenfeldern) a) Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C∞). -b) Zu jedem s S gibt es eine Umgebung V R3 von s und eine lokale Parametrisierung -∈ ⊆ -F : U V von S um s, sodass auf F(U) = V S ein stetiges Normalenfeld existiert. -→ ∩ +b) Zu jedem s +∈ +S gibt es eine Umgebung V +⊆ +R3 von s und eine lokale Parametrisierung +F : U +→ +V von S um s, sodass auf F(U) = V ∩S ein stetiges Normalenfeld existiert. c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas von S aus lokalen -Parametrisierungen F : U V , i I gibt, sodass für alle i,j F und alle -i i i -→ ∈ ∈ -s V V S gilt: -i j -∈ ∩ ∩ +Parametrisierungen F +i +: U +i +→ +V i, i +∈ +I gibt, sodass für alle i,j +∈ +F und alle +s +∈ +V +i +∩V +j +∩S gilt: +det(D +s Vi→Vj (cid:122) (cid:125)(cid:124) (cid:123) -det(D F F−1) > 0 -s j ◦ i +F +j +◦F−1 +i (cid:124) (cid:123)(cid:122) (cid:125) ∈R3×3 +) > 0 Beweis: Wird hier nicht geführt. Beispiel 46 (Normalenfelder) -1) S = S2, n = id ist ein stetiges Normalenfeld. -1 S2 -Auch n = id ist ein stetiges Normalenfeld. -2 S2 -− +1) S = S2, n +1 += id +S2 +ist ein stetiges Normalenfeld. +Auch n +2 += −id +S2 +ist ein stetiges Normalenfeld. 2) S = Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Norma- lenfeld, aber kein stetiges Normalenfeld. 92 5.3.GAUSS-KRÜMMUNG Abbildung 5.1: Möbiusband 5.3 Gauß-Krümmung Bemerkung 75 -Sei S eine reguläre Fläche, s S, n(s) ist ein Normalenvektor in s, x T S, x = 1. -s -∈ ∈ (cid:107) (cid:107) +Sei S eine reguläre Fläche, s +∈ +S, n(s) ist ein Normalenvektor in s, x +∈ +T sS, (cid:107)x +(cid:107) += 1. Sei E der von x und n(s) aufgespannte 2-dimensionale Untervektorraum von R3. -Dann gibt es eine Umgebung V R3 von s, sodass +Dann gibt es eine Umgebung V ⊆ -C := (s+E) S V -∩ ∩ -das Bild einer durch Bogenlänge parametrisierten Kurve γ : [ ε,ε] S enthält mit γ(0) = s -− → +R3 von s, sodass +C := (s+E) ∩S ∩V +das Bild einer durch Bogenlänge parametrisierten Kurve γ : [ −ε,ε] +→ +S enthält mit γ(0) = s und γ(cid:48)(0) = x. Beweis: „Satz über implizite Funktionen“1 Definition 73 -In der Situation aus Bemerkung 75 heißt die Krümmung κ (0) der Kurve γ in der Ebene -γ +In der Situation aus Bemerkung 75 heißt die Krümmung κ γ(0) der Kurve γ in der Ebene (s+E) im Punkt s die Normalkrümmung von S in s in Richtung x = γ(cid:48)(0). -Man schreibt: κ (s,x) := κ (0) -Nor γ +Man schreibt: κ Nor(s,x) := κ γ(0) Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt. Beispiel 47 (Gauß-Krümmung) -1) S = S2 = V(X2+Y2+Z2 1) ist die Kugel um den Ursprung mit Radius 1, n = id, -− +1) S = S2 = V(X2+Y2+Z2 −1) ist die Kugel um den Ursprung mit Radius 1, n = id, s = (0,0,1), x = (1,0,0) -E = R x+R n(s) (x,z-Ebene) -⇒ · · -C = E S ist Kreislinie -∩ -κ (s,x) = 1 = 1 -Nor r -2) S = V(X2+Z2 1) R3 ist ein Zylinder (siehe Abbildung 5.2a). s = (1,0,0) -− ⊆ -x = (0,1,0) E = R e +R e (x,y-Ebene) -1 1 1 2 -⇒ · · -S E = V(X2+Y2 1) E, Kreislinie in E -1 -∩ − ∩ -κ (s,x ) = 1 -Nor 1 -⇒ ± -x = (0,0,1),E = R e +R e (x,z-Ebene) -2 2 1 3 -· · -1Siehe z. B. https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II -93 5.3.GAUSS-KRÜMMUNG -V E 2 S = (cid:8) (1,0,z) R3 (cid:12) (cid:12) z R(cid:9) ist eine Gerade -∩ ∩ ∈ ∈ -κ (s,x ) = 0 -Nor 2 ⇒ -3) S = V(X2 Y2 Z), s = (0,0,0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b) -− − -x = (1,0,0), n(s) = (0,0,1) +E = R ·x+R ·n(s) (x,z-Ebene) +C = E ∩S ist Kreislinie +κ Nor(s,x) = 1 +r += 1 +2) S = V(X2+Z2 −1) +⊆ +R3 ist ein Zylinder (siehe Abbildung 5.2a). s = (1,0,0) +x +1 += (0,1,0) +⇒ +E 1 -x = (0,1,0) += R ·e 1+R ·e 2 -κ (s,x ) = 2 -Nor 1 -κ (s,x ) = 2 -Nor 2 -− -5 -4 +(x,y-Ebene) +S ∩E +1 += V(X2+Y2 −1) ∩E, Kreislinie in E +⇒ +κ Nor(s,x 1) = ±1 +x 2 += (0,0,1),E +2 += R ·e 1+R ·e 3 -0 z z +(x,z-Ebene) +1Siehe z. B. https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II +93 5.3.GAUSS-KRÜMMUNG +V ∩E 2 ∩S = (cid:8) (1,0,z) +∈ +R3 (cid:12) (cid:12) z +∈ +R(cid:9) ist eine Gerade +⇒ +κ Nor(s,x 2) = 0 +3) S = V(X2 −Y2 −Z), s = (0,0,0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b) +x +1 += (1,0,0), n(s) = (0,0,1) +x 2 -1 −2 -0 f(x,y) -42 += (0,1,0) +κ Nor(s,x 1) = 2 +κ Nor(s,x 2) = −2 +−1.5 −1 −0.5 0 0.5 1 1.5 −1 +0 +1 +012345 +x y +z +(a) S =V(X2+Z2−1) +−2 −1.5 −1 −0.5 0 0.5 1 1.5 2 −2 −1 +0 1 -2 1 -0 0 0 -y −1 −1.5 −1 −0.5 0 x 0.5 1 1.5 −− 42 y −1 −2−2 −1.5 −1 −0.5 0 x 0.5 1 1.5 2 -(a) S =V(X2+Z2−1) (b) S =V(X2−Y2−Z) +2 +−2 +02 +x y +z +−4 −2 +024 +f(x,y) +(b) S =V(X2−Y2−Z) Abbildung 5.2: Beispiele für reguläre Flächen Definition 74 -Sei S R3 eine reguläre Fläche, s S und n ein stetiges Normalenfeld auf S. -⊆ ∈ -γ : [ ε,ε] S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und -− → -γ(cid:48)(cid:48)(0) = 0. -(cid:54) +Sei S +⊆ +R3 eine reguläre Fläche, s +∈ +S und n ein stetiges Normalenfeld auf S. +γ : [ −ε,ε] +→ +S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und +γ(cid:48)(cid:48)(0) (cid:54)= 0. +Sei n(0) := γ(cid:48)(cid:48)(0) -Sei n(0) := . Zerlege -(cid:107)γ(cid:48)(cid:48)(0)(cid:107) -n(0) = n(0)t+n(0)⊥ mit n(0)t T S und n(0)⊥ (T S)⊥ -s s -∈ ∈ -Dann ist n(0)⊥ = n(0),n(s) n(s) -(cid:104) (cid:105)· -κ (s,γ) := γ(cid:48)(cid:48)(0),n(s) die Normalkrümmung. -Nor -(cid:104) (cid:105) +(cid:107)γ(cid:48)(cid:48)(0)(cid:107). Zerlege +n(0) = n(0)t+n(0)⊥ mit n(0)t +∈ +T sS und n(0)⊥ +∈ +(T sS)⊥ +Dann ist n(0)⊥ = (cid:104)n(0),n(s) (cid:105)·n(s) +κ Nor(s,γ) := (cid:104)γ(cid:48)(cid:48)(0),n(s) +(cid:105) +die Normalkrümmung. Bemerkung 76 -Sei γ(t) = γ( t), t [ ε,ε]. Dann ist κ (s,γ) = κ (s,γ). -Nor Nor -− ∈ − -Beweis: γ(cid:48)(cid:48)(0) = γ(cid:48)(cid:48)(0), da γ(cid:48)(0) = γ(cid:48)(0). -− -Es gilt: κ (s,γ) hängt nur von γ(cid:48)(0) ab und ist gleich κ (s,γ(cid:48)(0)). -Nor Nor -| | +Sei γ(t) = γ( −t), t +∈ +[ −ε,ε]. Dann ist κ Nor(s,γ) = κ Nor(s,γ). +Beweis: γ(cid:48)(cid:48)(0) = γ(cid:48)(cid:48)(0), da γ(cid:48)(0) = −γ(cid:48)(0). +Es gilt: κ Nor(s,γ) hängt nur von |γ(cid:48)(0) +| +ab und ist gleich κ Nor(s,γ(cid:48)(0)). Bemerkung 77 Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s. -Sei T s1S = x T sS x = 1 ∼= S1. Dann ist -{ ∈ | (cid:107) (cid:107) } -κn (s) : T1S R, x κ (s,x) -Nor s Nor -→ (cid:55)→ -eine glatte Funktion und Bildκn (s) ist ein abgeschlossenes Intervall. -Nor +Sei T1 sS = {x +∈ +T sS +| +(cid:107)x +(cid:107) += 1 +} +∼= S1. Dann ist +κn Nor(s) : T1 sS +→ +R, x +(cid:55)→ +κ Nor(s,x) +eine glatte Funktion und Bildκn Nor(s) ist ein abgeschlossenes Intervall. Definition 75 Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s. 94 5.3.GAUSS-KRÜMMUNG -a) κn(s) : = min(cid:8) κn (s,x) (cid:12) (cid:12) x T1S (cid:9) und heißen Hauptkrümmungen von S in s. -1 Nor ∈ s -κn(s) : = max(cid:8) κn (s,x) (cid:12) (cid:12) x T1S (cid:9) -2 Nor ∈ s -b) K(s) := κn(s) κn(s) heißt Gauß-Krümmung von S in s. -1 · 2 +a) κn 1(s) : = min(cid:8) κn Nor(s,x) (cid:12) (cid:12) x +∈ +T1 sS (cid:9) und +κn 2(s) : = max(cid:8) κn Nor(s,x) (cid:12) (cid:12) x +∈ +T1 sS (cid:9) +heißen Hauptkrümmungen von S in s. +b) K(s) := κn 1(s) ·κn 2(s) heißt Gauß-Krümmung von S in s. Bemerkung 78 -Ersetzt man n durch n, so gilt: -− -κ−n(s,x) = κn (x) x T1S -Nor − Nor ∀ ∈ s -κ−n(s) = κn(s) -⇒ 1 − 2 -κ−n(s) = κn(s) -2 − 1 +Ersetzt man n durch −n, so gilt: +κ−n Nor(s,x) = −κn Nor(x) ∀x +∈ +T1 sS +⇒ +κ−n +1 +(s) = −κn 2(s) +κ−n +2 +(s) = −κn 1(s) und K−n(s) = Kn(s) =: K(s) Beispiel 48 -1) S = S2. Dann ist κ (s) = κ (s) = 1 s S2 -1 2 -± ∀ ∈ -K(s) = 1 +1) S = S2. Dann ist κ 1(s) = κ 2(s) = ±1 ∀s +∈ +S2 ⇒ +K(s) = 1 2) Zylinder: -κ (s) = 0,κ (s) = 1 K(s) = 0 -1 2 +κ 1(s) = 0,κ 2(s) = 1 ⇒ +K(s) = 0 3) Sattelpunkt auf hyperbolischem Paraboloid: -κ (s) < 0,κ (s) = 0 K(s) < 0 -1 2 +κ 1(s) < 0,κ 2(s) = 0 → +K(s) < 0 4) S = Torus. Siehe Abbildung 5.3 s +1 +s 2 -s s -3 1 -Abbildung 5.3: K(s ) > 0, K(s ) = 0, K(s ) < 0 -1 2 3 +s +3 +Abbildung 5.3: K(s 1) > 0, K(s 2) = 0, K(s 3) < 0 Bemerkung 79 -Sei S eine reguläre Fläche, s S ein Punkt. +Sei S eine reguläre Fläche, s ∈ +S ein Punkt. 95 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM -a) Ist K(s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von T S +s. -s -b) Ist K(s) < 0, so schneidet jede Umgebung von s in S beide Seiten von T S +s. -s +a) Ist K(s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von T sS +s. +b) Ist K(s) < 0, so schneidet jede Umgebung von s in S beide Seiten von T sS +s. 5.4 Erste und zweite Fundamentalform -Sei S R3 eine reguläre Fläche, s S, T S die Tangentialebene an S in s und F : U V eine -s -⊆ ∈ → +Sei S +⊆ +R3 eine reguläre Fläche, s +∈ +S, T sS die Tangentialebene an S in s und F : U +→ +V eine lokale Parametrisierung von S um s. Weiter sei p := F−1(s). Definition 76 -Sei I R2×2 definiert als +Sei I S ∈ -(cid:18) (cid:19) (cid:18) (cid:19) -g (s) g (s) E(s) F(s) -1,1 1,2 -I : = = +R2×2 definiert als +I S -g (s) g (s) F(s) G(s) -1,2 2,2 -mit g = g (D F(e ),D F(e )) -i,j s p i p j -∂F ∂F -= (p), (p) i,j 1,2 -(cid:104)∂u ∂u (cid:105) ∈ { } -i j -Die Matrix I heißt erste Fundamentalform von S bzgl. der Parametrisierung F. +: = +(cid:18) +g 1,1(s) g 1,2(s) +g 1,2(s) g +2,2(s)(cid:19) += +(cid:18) +E(s) F(s) +F(s) +G(s)(cid:19) +mit g +i,j += g s(D pF(e i),D pF(e j)) += +(cid:104)∂F +∂u +i(p), +∂F +∂u +j(p) +(cid:105) +i,j +∈ +{1,2 +} +Die Matrix I S +heißt erste Fundamentalform von S bzgl. der Parametrisierung F. Bemerkung 80 -a) Die Einschränkung des Standardskalarproduktes des R3 auf T S macht T S zu einem -s s +a) Die Einschränkung des Standardskalarproduktes des R3 auf T sS macht T sS zu einem euklidischen Vektorraum. -b) D F(e ),D F(e ) ist eine Basis von T S. -p 1 p 2 s -{ } -c) Bzgl. der Basis D F(e ),D F(e ) hat das Standardskalarprodukt aus Bemer- -p 1 p 2 -{ } -kung 80.a die Darstellungsmatrix I . -S -d) g (s) ist eine differenzierbare Funktion von s. -i,j +b) {D pF(e 1),D pF(e 2) +} +ist eine Basis von T sS. +c) Bzgl. der Basis {D pF(e 1),D pF(e 2) +} +hat das Standardskalarprodukt aus Bemer- +kung 80.a die Darstellungsmatrix I S. +d) g i,j(s) ist eine differenzierbare Funktion von s. Bemerkung 81 -(cid:13) (cid:13)2 -(cid:13)∂F ∂F (cid:13) -det(I S) = (cid:13) (p) (p)(cid:13) -(cid:13)∂u × ∂u (cid:13) -1 2 -    -x y -1 1 -Beweis: Sei ∂∂ uF 1(p) = x 2, ∂∂ uF 2(p) = y 2 -x y3 +det(I S) = +(cid:13) +(cid:13) +(cid:13) +(cid:13)∂F +∂u +1(p) +× +∂F +∂u +2(p)(cid:13) +(cid:13) +(cid:13) +(cid:13)2 +Beweis: Sei ∂F ∂u1(p) = + +x +1 +x 2 +x +3 +, ∂F ∂u2(p) = + +y +1 +y 2 +y3 + +Dann ist ∂F ∂u1(p) +× +∂F ∂u2(p) = + +z +1 +z 2 +z +3 + mit +z +1 += x 2y 3 -  +−x 3y +2 z +2 += x 3y 1 -Dann ist ∂∂ uF 1(p) ∂∂ uF 2(p) = z 2 mit -× +−x 1y +3 z 3 -z = x y x y -1 2 3 3 2 -− -z = x y x y -2 3 1 1 3 -− -z = x y x y -3 1 2 2 1 -− -∂F ∂F -(p) (p) = z2+z2+z2 -⇒ (cid:107)∂u × ∂u (cid:107) 1 2 3 -1 2 += x 1y +2 +−x 2y +1 +⇒ +(cid:107)∂F +∂u +1(p) +× +∂F +∂u +2(p) +(cid:107) += z2 +1 ++z2 +2 ++z2 +3 96 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM -det(I ) = g g g2 -S 1,1 2,2 1,2 -− -(cid:42) x   x  (cid:43)(cid:42) y   y  (cid:43) (cid:42) x   y  (cid:43)2 -1 1 1 1 1 1 -= x 2,x 2 y 2,y 2 x 2,y 2 -− -x x y y x y -3 3 3 3 3 3 -= (x2+x2+x2)(y2+y2+y2) (x y +x y +x y )2 -1 2 3 1 2 3 − 1 1 2 2 3 3 +det(I S) = g 1,1g +2,2 +−g2 +1,2 += +(cid:42) x +1 +x +2 +x +3 , x +1 +x +2 +x +3 (cid:43)(cid:42) y +1 +y +2 +y +3 , y +1 +y +2 +y +3 (cid:43) −(cid:42) x +1 +x +2 +x +3 , y +1 +y +2 +y +3 (cid:43)2 += (x2 1+x2 2+x2 3)(y2 +1 ++y2 +2 ++y2 3) −(x 1y 1+x 2y 2+x 3y 3)2 Definition 77 +a) Das Differential dA = (cid:112) -a) Das Differential dA = det(I)du du heißt Flächenelement von S bzgl. der Para- -1 2 +det(I)du 1du +2 +heißt Flächenelement von S bzgl. der Para- metrisierung F. -b) Für eine Funktion f : V R heißt +b) Für eine Funktion f : V → -(cid:90) (cid:90) -(cid:112) -fdA := f(F(u ,u )) detI(s)du du -1 2 1 2 -V U (cid:124) (cid:123)(cid:122) (cid:125) +R heißt +(cid:90) +V +fdA := +(cid:90) +U +f(F(u 1,u 2) +(cid:124) (cid:123)(cid:122) (cid:125) =:s +)(cid:112) +detI(s)du 1du +2 der Wert des Integrals von f über V, falls das Integral rechts existiert. Bemerkung 82 +a) (cid:82) -a) fdA ist unabhängig von der gewählten Parametrisierung. V -b) Sei f : S R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist. +fdA ist unabhängig von der gewählten Parametrisierung. +b) Sei f : S → +R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist. +Dann ist (cid:82) -Dann ist fdA wohldefiniert, falls (z. B.) S kompakt ist. -S +SfdA wohldefiniert, falls (z. B.) S kompakt ist. Etwa: -(cid:90) n (cid:90) -(cid:88) -fdA = fdA -S Vi -i=1 (cid:90) +SfdA = +n (cid:88) +i=1(cid:90) +Vi fdA -− -Vi∩Vj +−(cid:88) i(cid:54)=j (cid:90) -(cid:88) -+ fdA +Vi∩Vj +fdA ++(cid:88) +i,j,k(cid:90) Vi∩Vj∩V -i,j,k k -... -− +k +fdA +−... Beweis: a) Mit Transformationsformel. b) Ist dem Leser überlassen. Proposition 5.1 -Sei S R3 eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S S2. -⊆ → +Sei S +⊆ +R3 eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S +→ +S2. Dann gilt: -a) n induziert für jedes s S eine lineare Abbildung d n : T S T S2 durch -s s n(s) -∈ → -d (cid:12) -d n(x) = n(s„+“tx)(cid:12) -s (cid:12) -dt (cid:124) (cid:123)(cid:122) (cid:125) t=0 +a) n induziert für jedes s +∈ +S eine lineare Abbildung d sn : T sS +→ +T n(s)S2 durch +d sn(x) = +d +dtn(s„+“tx +(cid:124) (cid:123)(cid:122) (cid:125) SollaufFlächeS bleiben -Die Abbildung d n heißt Weingarten-Abbildung -s +)(cid:12) +(cid:12) +(cid:12) +t=0 +Die Abbildung d sn heißt Weingarten-Abbildung 97 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM -b) T S2 = T S. -n(s) s -c) d n ist ein Endomorphismus von T S. -s s -d) d n ist selbstadjungiert bzgl. des Skalarproduktes I . -s S +b) T n(s)S2 = T sS. +c) d sn ist ein Endomorphismus von T sS. +d) d sn ist selbstadjungiert bzgl. des Skalarproduktes I S. Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt. 98 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM Beweis: a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. -b) T S2 = n(s) ⊥ = T S -n(S) s -(cid:104) (cid:105) -c) Wegen Proposition 5.1 (a) ist d n ein Homomorphismus. -s -d) Zu zeigen: x,y I S : x,d n(y) = d n(x),y -s s s -∀ ∈ (cid:104) (cid:105) (cid:104) (cid:105) +b) T n(S)S2 = (cid:104)n(s) (cid:105)⊥ = T sS +c) Wegen Proposition 5.1 (a) ist d sn ein Homomorphismus. +d) Zu zeigen: ∀x,y +∈ +I sS : (cid:104)x,d sn(y) +(cid:105) += (cid:104)d sn(x),y +(cid:105) Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die Basisvektoren zu zeigen. -Sei x = D F(e ) = ∂F (p) i = 1,2 -i p i ∂ui +Sei x +i += D pF(e i) = ∂F ∂ui(p) i = 1,2 +Beh.: (cid:104)x i,d sn(x j) +(cid:105) += +(cid:104) ∂2F -Beh.: x ,d n(x ) = (p),d n(x ) -(cid:104) i s j (cid:105) (cid:104)∂ui∂uj s i (cid:105) +∂ui∂uj(p),d sn(x i) +(cid:105) +⇒ (cid:104) ∂2F -(p),d n(x ) = x ,d n(x ) -⇒ (cid:104)∂ui∂uj s i (cid:105) (cid:104) j s i (cid:105) -∂F -Bew.: 0 = (p+te ),n(p+te ) -j j -(cid:104)∂u (cid:105) -(cid:18) (cid:19)(cid:12) -d ∂F -0 = (p+te ),n(p+te ) (cid:12) -j j (cid:12) -⇒ dt (cid:104)∂u (cid:105) t=0 -d ∂F (cid:12) -= (p+te )(cid:12) ,n(s) + x ,d nD F(e ) -j (cid:12) i s p j -(cid:104)dt∂u i t=0 (cid:105) (cid:104) (cid:124) (cid:123)(cid:122) (cid:125)(cid:105) -(cid:124) (cid:123)(cid:122) (cid:125) xj +∂ui∂uj(p),d sn(x i) +(cid:105) += (cid:104)x j,d sn(x i) +(cid:105) +Bew.: 0 = +(cid:104)∂F +∂u(p+te j),n(p+te j) +(cid:105) +⇒ +0 = +d +dt +(cid:18) +(cid:104)∂F +∂u(p+te j),n(p+te j) +(cid:105)(cid:19)(cid:12) +(cid:12) +(cid:12) +t=0 += +(cid:104) +d dt∂F +∂u +i(p+te j) +(cid:124) (cid:123)(cid:122) (cid:125) ∂2F -(p) -∂uj∂ui +∂uj∂ui(p) +(cid:12) +(cid:12) +(cid:12) +t=0,n(s) (cid:105)+ (cid:104)x i,d snD pF(e j) +(cid:124) (cid:123)(cid:122) (cid:125) +xj +(cid:105) Definition 78 -Die durch d n definierte symmetrische Bilinearform auf T S heißt zweite Fundamental- -s s -− +Die durch −d sn definierte symmetrische Bilinearform auf T sS heißt zweite Fundamental- form von S in s bzgl. F. -Man schreibt: II (x,y) = d n(x),y = I ( d n(x),y) -s s s s -(cid:104)− (cid:105) − +Man schreibt: II s(x,y) = (cid:104)−d sn(x),y +(cid:105) += I s( −d sn(x),y) Bemerkung 83 -Bezüglich der Basis x ,x von T S hat II die Darstellungsmatrix -1 2 s s -{ } -∂2F +Bezüglich der Basis {x 1,x +2 +} +von T sS hat II +s +die Darstellungsmatrix (h(s) -) mit h (s) = (p),n(s) -i,j i,j=1,2 i,j (cid:104)∂u ∂u (cid:105) -i j +i,j) +i,j=1,2 +mit h i,j(s) = +(cid:104) +∂2F +∂u i∂u +j(p),n(s) +(cid:105) Proposition 5.2 -Sei γ : [ ε,ε] S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt: -− → -κ (s,γ) = II (γ(cid:48)(0),γ(cid:48)(0)) -Nor s -Beweis: Nach Definition 74 ist κ (s,γ) = γ(cid:48)(cid:48)(0),n(s) . Nach Voraussetzung gilt -Nor -(cid:104) (cid:105) -n(γ(t)) γ(cid:48)(t) γ(cid:48)(cid:48)(0),n(s) = 0 -⊥ ⇔ (cid:104) (cid:105) +Sei γ : [ −ε,ε] +→ +S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt: +κ Nor(s,γ) = II s(γ(cid:48)(0),γ(cid:48)(0)) +Beweis: Nach Definition 74 ist κ Nor(s,γ) = (cid:104)γ(cid:48)(cid:48)(0),n(s) (cid:105). Nach Voraussetzung gilt +n(γ(t)) +⊥ +γ(cid:48)(t) +⇔ +(cid:104)γ(cid:48)(cid:48)(0),n(s) +(cid:105) += 0 Die Ableitung nach t ergibt +0 = d -0 = ( n(γ(t)),γ(cid:48)(t)) -dt (cid:104) -(cid:28) d (cid:12) (cid:29) -= n(γ(t))(cid:12) ,γ(cid:48)(0) + n(s),γ(cid:48)(cid:48)(0) +dt( (cid:104)n(γ(t)),γ(cid:48)(t)) += +(cid:28) d dtn(γ(t))(cid:12) +(cid:12) (cid:12) -dt t=0 (cid:104) (cid:105) +t=0,γ(cid:48)(0)(cid:29) ++ (cid:104)n(s),γ(cid:48)(cid:48)(0) +(cid:105) 99 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM -= d n(γ(cid:48)(0)),γ(cid:48)(0) +κ (s,γ) -s Nor -(cid:104) (cid:105) -= II (γ(cid:48)(0),γ(cid:48)(0))+κ (s,γ) -s Nor -− += (cid:104)d sn(γ(cid:48)(0)),γ(cid:48)(0) (cid:105)+κ Nor(s,γ) += −II s(γ(cid:48)(0),γ(cid:48)(0))+κ Nor(s,γ) Folgerung 5.3 Die beiden Definitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein: -κ (s,γ) = κ (s,γ(cid:48)(0)) -Nor Nor +κ Nor(s,γ) = κ Nor(s,γ(cid:48)(0)) Satz 5.4 -Sei S R3 eine reguläre, orientierbare Fläche und s S. -⊆ ∈ -a) Die Hauptkrümmungen κ (s),κ (s) sind die Eigenwerte von II . -1 2 s -b) Für die Gauß-Krümmung gilt: K(s) = det(II ) -s +Sei S +⊆ +R3 eine reguläre, orientierbare Fläche und s +∈ +S. +a) Die Hauptkrümmungen κ 1(s),κ 2(s) sind die Eigenwerte von II s. +b) Für die Gauß-Krümmung gilt: K(s) = det(II s) Beweis: -a) II ist symmetrisch, I S hat also eine Orthonormalbasis aus Eigenvektoren y ,y von -s s 1 2 -II . Ist x T S, x = 1, so gibt es ϕ [0,2π) mit x = cosϕ y +sinϕ y . -s s 1 2 -∈ (cid:107) (cid:107) ∈ · · -Seien λ ,λ die Eigenwerte von II , also II (y ,y ) = λ . Dann gilt: -1 2 s s i i i -II (x,x) = cos2ϕλ +sin2ϕλ -s 1 2 -= (1 sin2ϕ)λ +sin2ϕλ -1 2 -− -= λ +sin2ϕ(λ λ ) λ -1 2 1 1 -− ≥ -= cos2ϕ+(1 cos2ϕ)λ +a) II +s +ist symmetrisch, I sS hat also eine Orthonormalbasis aus Eigenvektoren y 1,y 2 -− -= λ cos2ϕ(λ λ ) λ -2 2 1 2 -− − ≤ -=P =r =op =. =5.2 λ 1 = min(cid:8) κ Nor(s,x) (cid:12) (cid:12) x T s1S (cid:9) -⇒ ∈ -λ 2 = max(cid:8) κ Nor(s,x) (cid:12) (cid:12) x T s1S (cid:9) +von +II s. Ist x +∈ +T sS, (cid:107)x +(cid:107) += 1, so gibt es ϕ +∈ +[0,2π) mit x = cosϕ ·y 1+sinϕ ·y 2. +Seien λ 1,λ +2 +die Eigenwerte von II s, also II s(y i,y i) = λ i. Dann gilt: +II s(x,x) = cos2ϕλ 1+sin2ϕλ +2 += (1 −sin2ϕ)λ 1+sin2ϕλ +2 += λ 1+sin2ϕ(λ +2 +−λ 1) +≥ +λ +1 += cos2ϕ+(1 −cos2ϕ)λ +2 += λ +2 +−cos2ϕ(λ +2 +−λ 1) +≤ +λ +2 +Prop. 5.2 ===== +⇒ +λ 1 = min(cid:8) κ Nor(s,x) (cid:12) (cid:12) x ∈ +T1 sS (cid:9) +λ 2 = max(cid:8) κ Nor(s,x) (cid:12) (cid:12) x +∈ +T1 sS (cid:9) Satz 5.5 (Satz von Gauß-Bonnet) -Sei S R3 eine kompakte orientierbare reguläre Fläche. Dann gilt: +Sei S ⊆ +R3 eine kompakte orientierbare reguläre Fläche. Dann gilt: (cid:90) -K(s)dA = 2πχ(S) -S +SK(s)dA = 2πχ(S) Dabei ist χ(S) die Euler-Charakteristik von S. Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen werden. Lösungen der Übungsaufgaben Lösung zu Aufgabe 1 Teilaufgabe a) Es gilt: -(i) ,X T . -X -∅ ∈ -(ii) T ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U ,U -X 1 2 +(i) ∅,X ∈ -T : U U T . -X 1 2 X -∩ ∈ -(iii) Auch unter beliebigen Vereinigungen ist T abgeschlossen, d. h. es gilt für eine +T X. +(ii) T X -(cid:83) -beliebige Indexmenge I und alle U T für alle i I : U T -i ∈ X ∈ i∈I i ∈ X -Also ist (X,T ) ein topologischer Raum. +ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U 1,U +2 +∈ +T X -Teilaufgabe b) Wähle x = 1,y = 0. Dann gilt x = y und die einzige Umgebung von x -(cid:54) -ist X. Da y = 0 X können also x und y nicht durch offene Mengen getrennt werden. +: U +1 +∩U +2 ∈ -(X,T ) ist also nicht hausdorffsch. +T X. +(iii) Auch unter beliebigen Vereinigungen ist T X -Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X,T ) nach +abgeschlossen, d. h. es gilt für eine +beliebige Indexmenge I und alle U +i +∈ +T X -(b)nichthausdorffschist, liefertdieKontrapositionder Trennungseigenschaft, dass(X,T ) +für alle i +∈ +I : +(cid:83) +i∈IU +i +∈ +T X +Also ist (X,T X) ein topologischer Raum. +Teilaufgabe b) Wähle x = 1,y = 0. Dann gilt x (cid:54)= y und die einzige Umgebung von x +ist X. Da y = 0 +∈ +X können also x und y nicht durch offene Mengen getrennt werden. +(X,T X) ist also nicht hausdorffsch. +Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X,T X) nach +(b)nichthausdorffschist, liefertdieKontrapositionder Trennungseigenschaft, dass(X,T X) kein metrischer Raum sein kann. Lösung zu Aufgabe 2 Teilaufgabe a) -Beh.: a Z : a ist abgeschlossen. -∀ ∈ { } -Sei a Z beliebig. Dann gilt: +Beh.: ∀a +∈ +Z : {a +} +ist abgeschlossen. +Sei a ∈ +Z beliebig. Dann gilt: Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de schicken. Teilaufgabe b) -Beh.: 1,1 ist nicht offen -{− } +Beh.: {−1,1 +} +ist nicht offen Bew.: durch Widerspruch -Annahme: 1,1 ist offen. -{− } +Annahme: {−1,1 +} +ist offen. +Dann gibt es T +⊆ +B, sodass (cid:83) -Dann gibt es T B, sodass M = 1,1 . Aber alle U B haben unendlich viele -⊆ M∈T {− } ∈ +M∈T +M = {−1,1 }. Aber alle U +∈ +B haben unendlich viele Elemente. Auch endlich viele Schnitte von Elementen in B haben unendlich viele Elemente -keine endliche nicht-leere Menge kann in dieser Topologie offen sein 1,1 ist -⇒ ⇒ {− } +⇒ +keine endliche nicht-leere Menge kann in dieser Topologie offen sein +⇒ +{−1,1 +} +ist nicht offen. (cid:4) Teilaufgabe c) Beh.: Es gibt unendlich viele Primzahlen. 101 LösungenderÜbungsaufgaben Bew.: durch Widerspruch -Annahme: Es gibt nur endlich viele Primzahlen p P +Annahme: Es gibt nur endlich viele Primzahlen p ∈ +P Dann ist -Z 1,+1 FSd.A =rithmetik (cid:91) U +Z \{−1,+1 +} +FSd.Arithmetik = (cid:91) p∈PU 0,p -\{− } -p∈P -endlich. Das ist ein Widerspruch zu Z ist unendlich und 1,1 ist endlich. (cid:4) -| | |{− }| +endlich. Das ist ein Widerspruch zu |Z +| +ist unendlich und |{−1,1 +}| +ist endlich. (cid:4) Lösung zu Aufgabe 3 (a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form -(cid:89) (cid:89) -U P -j i +(cid:89) +j∈JU +j × -j∈J i∈N,i(cid:54)=j -wobei J N endlich und U P offen ist. -j j -⊆ ⊆ +(cid:89) +i∈N,i(cid:54)=jP +i +wobei J +⊆ +N endlich und U +j +⊆ +P +j +offen ist. Beweis: Nach Definition der Produkttopologie bilden Mengen der Form -(cid:89) (cid:89) -U P -j i +(cid:89) +i∈JU +j × -i∈J i∈N\J -wobei J N endlich und U P offen j J eine Basis der Topologie. -j j -⊆ ⊆ ∀ ∈ +(cid:89) +i∈N\JP +i +wobei J +⊆ +N endlich und U +j +⊆ +P +j +offen ∀j +∈ +J eine Basis der Topologie. Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen Form. (cid:4) (b) Beh.: Die Zusammenhangskomponenten von P sind alle einpunktig. -Beweis: Es seinen x,y P und x sowie y liegen in der gleichen Zusammenhangs- -∈ -komponente Z P. Da Z zusammenhängend ist und i I : p : P P ist -i i -⊆ ∀ ∈ → -stetig, ist p (Z) P zusammenhängend für alle i N. Die zusammenhängenden -i i -⊆ ∈ -Mengen von P sind genau 0 und 1 , d. h. für alle i N gilt entweder +Beweis: Es seinen x,y +∈ +P und x sowie y liegen in der gleichen Zusammenhangs- +komponente Z +⊆ +P. Da Z zusammenhängend ist und ∀i +∈ +I : p +i +: P +→ +P +i +ist +stetig, ist p i(Z) +⊆ +P +i +zusammenhängend für alle i +∈ +N. Die zusammenhängenden +Mengen von P +i +sind genau {0 +} +und {1 }, d. h. für alle i +∈ +N gilt entweder +p i(Z) +⊆ +{0 +} +oder p i(Z) +⊆ +{1 }. Es sei z +i +∈ +{0,1 +} +so, dass p i(Z) +⊆ +{z +i +} +für +alle i +∈ +N. Dann gilt also: +p i(x) +(cid:124)(cid:123)(cid:122)(cid:125) +=xi += z i -{ } { } ∈ -p (Z) 0 oder p (Z) 1 . Es sei z 0,1 so, dass p (Z) z für -i i i i i -⊆ { } ⊆ { } ∈ { } ⊆ { } -alle i N. Dann gilt also: -∈ -p (x) = z = p (y) i N -i i i -∀ ∈ -(cid:124)(cid:123)(cid:122)(cid:125) (cid:124)(cid:123)(cid:122)(cid:125) -=xi =yi += p i(y) +(cid:124)(cid:123)(cid:122)(cid:125) +=yi +∀i +∈ +N Somit folgt: x = y (cid:4) Lösung zu Aufgabe 4 -(a) Beh.: GL (R) ist nicht kompakt. -n -Bew.: det : GL (R) R 0 ist stetig. Außerdem ist det(GL (R)) = R 0 -n n -→ \{ } \{ } -nicht kompakt. 22 GL (R) ist nicht kompakt. (cid:4) -n +(a) Beh.: GL n(R) ist nicht kompakt. +Bew.: det : GL n(R) +→ +R \{0 +} +ist stetig. Außerdem ist det(GL n(R)) = R \{0 +} +nicht kompakt. 22 +⇒ +GL n(R) ist nicht kompakt. (cid:4) +(b) Beh.: SL 1(R) ist nicht kompakt, für n > 1 ist SL n(R) kompakt. +Bew.: Für SL 1(R) gilt: SL 1(R) = (cid:8) A +∈ +R1×1 (cid:12) (cid:12) detA = 1(cid:9) = (cid:0) 1(cid:1) ∼= {1 }. 22 ⇒ -(b) Beh.: SL (R) ist nicht kompakt, für n > 1 ist SL (R) kompakt. -1 n -Bew.: Für SL 1(R) gilt: SL 1(R) = (cid:8) A R1×1 (cid:12) (cid:12) detA = 1(cid:9) = (cid:0) 1(cid:1) ∼= 1 . 22 SL 1(R) -∈ { } ⇒ +SL 1(R) ist kompakt. 102 LösungenderÜbungsaufgaben -SL (R) GL (R) lässt sich mit einer Teilmenge des Rn2 identifizieren. Nach Satz 1.1 -n n +SL n(R) ⊆ +GL n(R) lässt sich mit einer Teilmenge des Rn2 identifizieren. Nach Satz 1.1 sinddiesegenaudannkompakt,wennsiebeschränktundabgeschlossensind.Definiere -nun für für n N ,m N: -≥2 -∈ ∈ +nun für für n +∈ +N ≥2,m +∈ +N: +A +m += diag n(m, 1 -A = diag (m, ,...,1) -m n +m,...,1) +Dann gilt: detA m -Dann gilt: detA = 1, d. h. A SL (R), und A ist unbeschränkt, da A = -m m n m m ∞ -∈ (cid:107) (cid:107) -m . (cid:4) -−m−−→−∞→ ∞ -(c) Beh.: (R) ist kompakt. -P -Bew.: (R) ∼= Sn/ x∼−x. Per Definition der Quotiententopologie ist die Klassenabbil- -P += 1, d. h. A +m +∈ +SL n(R), und A +m +ist unbeschränkt, da (cid:107)A +m (cid:107)∞ += +m +−−−−→ m→∞ +∞. (cid:4) +(c) Beh.: P(R) ist kompakt. +Bew.: P(R) ∼= Sn/ x∼−x. Per Definition der Quotiententopologie ist die Klassenabbil- dung stetig. Da Sn als abgeschlossene und beschränkte Teilmenge des Rn+1 kompakt -ist 22 (R) ist kompakt. (cid:4) -⇒ P +ist 22 +⇒ +P(R) ist kompakt. (cid:4) Lösung zu Aufgabe 5 Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden. Definition 79 -Seien (G, ) und (H, ) Gruppen und ϕ : G H eine Abbildung. -∗ ◦ → +Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G +→ +H eine Abbildung. ϕ heißt Homomorphismus, wenn -g ,g G : ϕ(g g ) = ϕ(g ) ϕ(g ) -1 2 1 2 1 2 -∀ ∈ ∗ ◦ +∀g 1,g +2 +∈ +G : ϕ(g +1 +∗g 2) = ϕ(g 1) ◦ϕ(g 2) gilt. Es folgt direkt: -1) Sei X = R mit der Standarttopologie und ϕ : idR und R = (R,+). Dann ist ϕ ein -1 1 +1) Sei X = R mit der Standarttopologie und ϕ +1 +: idR und R = (R,+). Dann ist ϕ +1 +ein Gruppenhomomorphismus und ein Homöomorphismus. -2) Sei G = (Z,+) und H = (Z/3Z,+). Dann ist ϕ : G H,x x mod 3 ein +2) Sei G = (Z,+) und H = (Z/3Z,+). Dann ist ϕ 2 -→ (cid:55)→ -Gruppenhomomorphismus. Jedoch ist ϕ nicht injektiv, also sicher kein Homöomor- +: G +→ +H,x +(cid:55)→ +x mod 3 ein +Gruppenhomomorphismus. Jedoch ist ϕ 2 +nicht injektiv, also sicher kein Homöomor- phismus. -3) Sei X ein topologischer Raum. Dann ist id ein Homöomorphismus. Da keine +3) Sei X ein topologischer Raum. Dann ist id X +ein Homöomorphismus. Da keine Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Grup- penhomomorphismus. Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten @@ -5440,146 +8801,219 @@ Lösung zu Aufgabe 6 Die Definition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf Seite 6. Definition 80 -Seien (G, ) und (H, ) Gruppen und ϕ : G H eine Abbildung. -∗ ◦ → +Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G +→ +H eine Abbildung. ϕ heißt Isomorphismus, wenn ϕ ein bijektiver Homomorphismus ist. Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen Sinn und ein Isomorphismus benötigt eine Gruppenstruktur. 103 LösungenderÜbungsaufgaben Lösung zu Aufgabe 7 (a) Vor.: Sei M eine topologische Mannigfaltigkeit. -Beh.: M ist wegzusammehängend M ist zusammenhängend +Beh.: M ist wegzusammehängend ⇔ -Beweis: „ “: Da M insbesondere ein topologischer Raum ist folgt diese Richtung -⇒ +M ist zusammenhängend +Beweis: „ ⇒“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung direkt aus Bemerkung 23. -„ “: Seien x,y M und -⇐ ∈ -Z := z M Weg von x nach z -{ ∈ | ∃ } +„ ⇐“: Seien x,y +∈ +M und +Z := {z +∈ +M +| +∃Weg von x nach z +} Es gilt: -(i) Z = , da M lokal wegzusammenhängend ist -(cid:54) ∅ +(i) Z (cid:54)= ∅, da M lokal wegzusammenhängend ist (ii) Z ist offen, da M lokal wegzusammenhängend ist -(iii) ZC := z˜ M (cid:64)Weg von x nach z˜ ist offen -{ ∈ | } -Da M eine Mannigfaltigkeit ist, existiert zu jedem z˜ ZC eine offene und -∈ -wegzusammenhängende Umgebung U M. -z˜ -⊆ -Es gilt sogar U ZC, denn gäbe es ein U z Z, so gäbe es Wege γ : -z˜ z˜ 2 -⊆ (cid:51) ∈ -[0,1] M,γ (0) = z,γ (1) = x und γ : [0,1] M,γ (0) = z˜,γ (1) = z. -2 2 1 1 1 -→ → +(iii) ZC := +{ +˜ z +∈ +M +| +(cid:64)Weg von x nach ˜ z +} +ist offen +Da M eine Mannigfaltigkeit ist, existiert zu jedem ˜ z +∈ +ZC eine offene und +wegzusammenhängende Umgebung U +˜ z +⊆ +M. +Es gilt sogar U +˜ z +⊆ +ZC, denn gäbe es ein U +˜ z +(cid:51) +z +∈ +Z, so gäbe es Wege γ +2 +: +[0,1] +→ +M,γ 2(0) = z,γ 2(1) = x und γ +1 +: [0,1] +→ +M,γ 1(0) = ˜ z,γ 1(1) = z. Dann wäre aber -γ : [0,1] M, +γ : [0,1] → +M, +γ(x) = (cid:40) -γ (2x) falls 0 x 1 -γ(x) = 1 ≤ ≤ 2 -γ (2x 1) falls 1 < x 1 -2 − 2 ≤ -ein stetiger Weg von z˜ nach x Widerspruch. -⇒ -DaM zusammenhängendistundM = Z ZC ,sowieZ = folgtZC = . -(cid:124)(cid:123)(cid:122)(cid:125)∪(cid:124)(cid:123)(cid:122)(cid:125) (cid:54) ∅ ∅ -offen offen +γ 1(2x) falls 0 +≤ +x +≤ +1 +2 +γ 2(2x −1) falls 1 +2 +< x +≤ +1 +ein stetiger Weg von ˜ z nach x +⇒ +Widerspruch. +DaM zusammenhängendistundM = Z +(cid:124)(cid:123)(cid:122)(cid:125) offen∪ +ZC +(cid:124)(cid:123)(cid:122)(cid:125) +offen,sowieZ (cid:54)= ∅folgtZC = ∅. Also ist M = Z wegzusammenhängend. (cid:4) (b) Beh.: X ist wegzusammenhängend. -Beweis: X := (R 0 ) 0 ,0 und (R 0 ) 0 sind homöomorph zu R. -1 2 2 -\{ } ∪{ } \{ } ∪{ } +Beweis: X := (R \{0 }) ∪{0 1,0 +2 +} +und (R \{0 }) ∪{0 +2 +} +sind homöomorph zu R. Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte -0 und 0 . -1 2 -Da (R 0 ) 0 homöomorph zu R ist, exisitert ein Weg γ von 0 zu einem -1 1 1 -\{ } ∪{ } -beliebigen Punkt a R 0 . -∈ \{ } -Da (R 0 ) 0 ebenfalls homöomorph zu R ist, existiert außerdem ein -2 -\{ } ∪{ } -Weg γ von a nach 0 . Damit existiert ein (nicht einfacher) Weg γ von 0 nach -2 2 1 -0 . (cid:4) +0 +1 +und 0 2. +Da (R \{0 }) ∪{0 +1 +} +homöomorph zu R ist, exisitert ein Weg γ +1 +von 0 +1 +zu einem +beliebigen Punkt a +∈ +R \{0 }. +Da (R \{0 }) ∪{0 +2 +} +ebenfalls homöomorph zu R ist, existiert außerdem ein +Weg γ 2 +von a nach 0 2. Damit existiert ein (nicht einfacher) Weg γ von 0 +1 +nach +0 2. (cid:4) Lösung zu Aufgabe 9 -Vor.: Sei (X,d) eine absolute Ebene, A,B,C X und ABC ein Dreieck. -∈ (cid:52) +Vor.: Sei (X,d) eine absolute Ebene, A,B,C +∈ +X und (cid:52)ABC ein Dreieck. 104 LösungenderÜbungsaufgaben -(a) Beh.: AB = AC ∠ABC = ∠ACB -∼ ∼ +(a) Beh.: AB ∼= AC ⇒ -Bew.: Sei AB = AC. -∼ -Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A. +∠ABC ∼= ∠ACB +Bew.: Sei AB ∼= AC. ⇒ ∃ -ϕ(∠ABC) = ∠ACB +Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A. ⇒ -∠ABC ∠ACB (cid:4) -= -∼ +ϕ(∠ABC) = ∠ACB ⇒ -(b) Beh.: Der längeren Seite von ABC liegt der größere Winkel gegenüber und umge- -(cid:52) +∠ABC +∼= +∠ACB (cid:4) +(b) Beh.: Der längeren Seite von (cid:52)ABC liegt der größere Winkel gegenüber und umge- kehrt. -Bew.: Sei d(A,C) > d(A,B). Nach §3 (i) gibt es C(cid:48) AC+ mit d(A,C(cid:48)) = d(A,B) +Bew.: Sei d(A,C) > d(A,B). Nach §3 (i) gibt es C(cid:48) ∈ -C(cid:48) liegt zwischen A und C. +AC+ mit d(A,C(cid:48)) = d(A,B) ⇒ +C(cid:48) liegt zwischen A und C. Es gilt (cid:93)ABC(cid:48) < (cid:93)ABC und aus Aufgabe 9 (a) folgt: (cid:93)ABC(cid:48) = (cid:93)AC(cid:48)B. -∠BC(cid:48)A ist ein nicht anliegender Außenwinkel zu ∠BCA =B =e =m =. =66 (cid:93)BC(cid:48)A > (cid:93)BCA +∠BC(cid:48)A ist ein nicht anliegender Außenwinkel zu ∠BCA Bem. 66 ===== ⇒ -(cid:93)BCA < (cid:93)BC(cid:48)A = (cid:93)ABC(cid:48) < (cid:93)ABC Sei umgekehrt (cid:93)ABC > (cid:93)BCA, kann +(cid:93)BC(cid:48)A > (cid:93)BCA ⇒ +(cid:93)BCA < (cid:93)BC(cid:48)A = (cid:93)ABC(cid:48) < (cid:93)ABC Sei umgekehrt (cid:93)ABC > (cid:93)BCA, kann wegen 1. Teil von Aufgabe 9 (b) nicht d(A,B) > d(A,C) gelten. Wegen Aufgabe 9 (a) kann nicht d(A,B) = d(A,C) gelten. -d(A,B) < d(A,C) (cid:4) ⇒ -(c) Vor.: Sei g eine Gerade, P X und P / g -∈ ∈ -Beh.: ! Lot -∃ +d(A,B) < d(A,C) (cid:4) +(c) Vor.: Sei g eine Gerade, P +∈ +X und P / +∈ +g +Beh.: ∃! Lot Bew.: ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g. -ϕ(P)P schneidet g in F. ⇒ +ϕ(P)P schneidet g in F. Es gibt eine Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g +⇒ ϕ(P)P schneidet g in F. +SeiA +∈ +g \{F }. Danngilt ϕ(∠AFP) = ∠AFϕ(P) = π ⇒ -SeiA g F . Danngilt ϕ(∠AFP) = ∠AFϕ(P) = π ∠AFP istrechterWinkel. -∈ \{ } ⇒ -Gäbe es nun G g F , so dass PG weiteres Lot von P auf g ist, wäre PFG -∈ \{ } (cid:52) -ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4). -P -A -F +∠AFP istrechterWinkel. +Gäbe es nun G +∈ +g \{F }, so dass PG weiteres Lot von P auf g ist, wäre (cid:52)PFG +ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4). · · -G +A +GP +F g Abbildung 5.4: Zwei Lote zu einer Geraden g durch einen Punkt P Nach Folgerung 4.4 ist die Summe von zwei Innenwinkeln immer < π -G gibt es nicht. (cid:4) ⇒ +G gibt es nicht. (cid:4) Lösung zu Aufgabe 10 -Sei f h und o. B. d. A. f g. -(cid:107) (cid:107) -f ∦ h f h = , sei also x f h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele -⇒ ∩ (cid:54) ∅ ∈ ∩ -zu g durch x, da x / g. Diese ist f, da x f und f g. Da aber x h, kann h nicht -∈ ∈ (cid:107) ∈ +Sei f +(cid:107) +h und o. B. d. A. f +(cid:107) +g. +f ∦ h +⇒ +f ∩h (cid:54)= ∅, sei also x +∈ +f ∩h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele +zu g durch x, da x / +∈ +g. Diese ist f, da x +∈ +f und f +(cid:107) +g. Da aber x +∈ +h, kann h nicht 105 LösungenderÜbungsaufgaben -parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f = h). g ∦ h (cid:4) -(cid:54) ⇒ +parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f +(cid:54) += h). +⇒ +g ∦ h (cid:4) Lösung zu Aufgabe 11 -Sei (X,d,G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem ABC und A(cid:48)B(cid:48)C(cid:48) -(cid:52) (cid:52) +Sei (X,d,G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem (cid:52)ABC und (cid:52)A(cid:48)B(cid:48)C(cid:48) Dreiecke, für die gilt: d(A,B) = d(A(cid:48),B(cid:48)) d(A,C) = d(A(cid:48),C(cid:48)) @@ -5589,10 +9023,10 @@ bzgl. AB wie C. Diese Isometrie existiert wegen §4. Es gilt d(A,C) = d(A(cid:48),C(cid:48)) = d(ϕ(A(cid:48)),ϕ(C(cid:48))) = d(A,ϕ(C(cid:48))) und d(B,C) = d(B(cid:48),C(cid:48)) = d(ϕ(B(cid:48)),ϕ(C(cid:48))) = d(B,ϕ(C(cid:48))). Bem. 62 -===== C = ϕ(C). +===== ⇒ -Es gilt also ϕ( A(cid:48)B(cid:48)C(cid:48)) = ABC. (cid:4) -(cid:52) (cid:52) +C = ϕ(C). +Es gilt also ϕ( (cid:52)A(cid:48)B(cid:48)C(cid:48)) = (cid:52)ABC. (cid:4) Bildquellen Alle Bilder, die hier nicht aufgeführt sind, wurden von Martin Thoma erstellt. Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert. @@ -5604,8 +9038,7 @@ Abb. 1.11 Knoten von Jim.belk aus der „Blue knots“-Serie: – Trivialer Knoten: commons.wikimedia.org/wiki/File:Blue_Unknot.png – Kleeblattknoten: commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png – Achterknoten: commons.wikimedia.org/wiki/File:Blue_Figure-Eight_Knot.png -– 6 -Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png -2 +– 6 2-Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png Abb. 1.12 Reidemeister-Züge: YAMASHITA Makoto (1, 2, 3) Abb. 1.13 Kleeblattknoten,3-Färbung:Jim.belk,commons.wikimedia.org/wiki/File:Tricoloring. png @@ -5643,352 +9076,512 @@ benötigten Begriffe definiert und erklärt werden. Die folgenden Begriffe wurde aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ sowie „Lineare Algebra und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen. Definition 81 -Sei D R und x R. x heißt ein Häufungspunkt von D : Folge x in D x -0 0 n 0 -⊆ ∈ ⇔ ∃ \{ } -mit x x . -n 0 +Sei D +⊆ +R und x +0 +∈ +R. x +0 +heißt ein Häufungspunkt von D : +⇔ ∃ +Folge x +n +in D \{x +0 +} +mit x +n → +x 0. Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra entnommen: Definition 82 Es seien V und W K-Vektorräume und A(V) und A(W) die zugehörigen affinen Räume. -Eine Abbildung f : V W heißt affin, falls für alle a,b V und alle λ,µ K mit λ+µ = 1 -→ ∈ ∈ +Eine Abbildung f : V +→ +W heißt affin, falls für alle a,b +∈ +V und alle λ,µ +∈ +K mit λ+µ = 1 gilt: f(λa+µb) = λf(a)+µf(b) Definition 83 -Sei V ein Vektorraum und S V eine Teilmenge. +Sei V ein Vektorraum und S ⊆ +V eine Teilmenge. S heißt eine Orthonormalbasis von V, wenn gilt: (i) S ist eine Basis von V -(ii) v S : v = 1 -∀ ∈ (cid:107) (cid:107) -(iii) v ,v S : v = v v ,v = 0 -1 2 1 2 1 2 -∀ ∈ (cid:54) ⇒ (cid:104) (cid:105) +(ii) ∀v +∈ +S : (cid:107)v +(cid:107) += 1 +(iii) ∀v 1,v +2 +∈ +S : v +1 +(cid:54)= v +2 +⇒ +(cid:104)v 1,v +2 +(cid:105) += 0 Satz (Zwischenwertsatz) -Sei a < b und f C[a,b] := C([a,b]), weiter sei y R und f(a) < y < f(b) oder -0 0 -∈ ∈ -f(b) < y < f(a). Dann existiert ein x [a,b] mit f(x ) = y . -0 0 0 0 +Sei a < b und f +∈ +C[a,b] := C([a,b]), weiter sei y +0 +∈ +R und f(a) < y +0 +< f(b) oder +f(b) < y +0 +< f(a). Dann existiert ein x +0 ∈ +[a,b] mit f(x 0) = y 0. Definition 84 -Sei V ein Vektorraum über einem Körper K und f : V V eine lineare Abbildung. +Sei V ein Vektorraum über einem Körper K und f : V → -v V 0 heißt Eigenvektor : λ K : f(v) = λv. -∈ \{ } ⇔ ∃ ∈ -Wenn ein solches λ K existiert, heißt es Eigenwert von f. +V eine lineare Abbildung. +v ∈ +V \{0 +} +heißt Eigenvektor : +⇔ +∃λ +∈ +K : f(v) = λv. +Wenn ein solches λ +∈ +K existiert, heißt es Eigenwert von f. Satz (Binomischer Lehrsatz) -Sei x,y R. Dann gilt: +Sei x,y +∈ +R. Dann gilt: +(x+y)n = +n +(cid:88) +k=0(cid:18) +n +k(cid:19) +xn−kyk ∀n ∈ -n (cid:18) (cid:19) -(cid:88) n -(x+y)n = xn−kyk n N +N 0 -k ∀ ∈ -k=0 Definition 85 -Seien a,b R3 Vektoren. +Seien a,b ∈ -      -a a a b a b -1 1 2 3 3 2 -− -a b := b 3 b 3 = a 3b 1 a 1b 3 -× × − -a a a b a b -3 3 1 2 2 1 -− +R3 Vektoren. +a ×b := + +a +1 +b 3 +a +3 + +× +a +1 +b 3 +a +3 + = + +a 2b +3 +−a 3b +2 +a 3b 1 −a 1b 3 +a 1b +2 +−a 2b +1 + Symbolverzeichnis -Mengenoperationen Perm(X) Permutationsgruppe -Sym(X) Symmetrische Gruppe +Mengenoperationen Seien A,B und M Mengen. -AC Komplement von A Wege -(M) Potenzmenge von M -P -M Abschluss von M Sei γ : I X ein Weg. -∂M Rand der Menge M → -M◦ Inneres der Menge M [γ] Homotopieklasse von γ -γ γ Zusammenhängen von Wegen -A B Kreuzprodukt 1 2 -∗ -× γ γ Homotopie von Wegen -A B Teilmengenbeziehung 1 2 +AC Komplement von A +P(M) Potenzmenge von M +M Abschluss von M +∂M Rand der Menge M +M◦ Inneres der Menge M +A ×B Kreuzprodukt +A +⊆ +B Teilmengenbeziehung +A (cid:40) B echte Teilmengenbeziehung +A \B Differenzmenge +A ∪B Vereinigung +A ˙ ∪B Disjunkte Vereinigung +A ∩B Schnitt +Geometrie +AB Gerade durch die Punkte A und +B +AB Strecke mit Endpunkten A und B +(cid:52)ABC Dreieck mit Eckpunkten A,B,C +AB ∼= CD Die Strecken AB und CD sind +isometrisch +|K +| +Geometrische Realisierung des +Simplizialkomplexes K +Gruppen +Sei X ein topologischer Raum und K ein Kör- +per. +Homöo(X) Homöomorphismengruppe +Iso(X) Isometriengruppe +GL n(K) Allgemeine lineare Gruppe (von +General Linear Group) +SL n(K) Spezielle lineare Gruppe +PSL n(K) Projektive lineare Gruppe +Perm(X) Permutationsgruppe +Sym(X) Symmetrische Gruppe +Wege +Sei γ : I +→ +X ein Weg. +[γ] Homotopieklasse von γ +γ +1 +∗γ +2 +Zusammenhängen von Wegen +γ +1 ∼ -A ⊆ (cid:40) B echte Teilmengenbeziehung γ(x) Inverser Weg, also γ(x) := γ(1 x) -− +γ +2 +Homotopie von Wegen +γ(x) Inverser Weg, also γ(x) := γ(1 −x) C Bild eines Weges γ, also C := -A B Differenzmenge -\ γ([0,1]) -A B Vereinigung -∪ -A ˙ B Disjunkte Vereinigung -∪ -A B Schnitt +γ([0,1]) Weiteres -∩ -Geometrie B Basis einer Topologie -B (x) δ-Kugel um x -δ -Subbasis einer Topologie -AB Gerade durch die Punkte A und +B Basis einer Topologie +B δ(x) δ-Kugel um x S +Subbasis einer Topologie T Topologie -B -AB Strecke mit Endpunkten A und B Atlas -ABC Dreieck mit Eckpunkten A,B,C A Projektiver Raum -(cid:52) -AB ∼= CD Die Strecken AB und CD sind P , Skalarprodukt -isometrisch (cid:104)· ·(cid:105) -X/ X modulo +A +Atlas +P +Projektiver Raum +(cid:104)·, +·(cid:105) +Skalarprodukt +X/ ∼ -K Geometrische Realisierung des [x] Äquivalenzk∼ lassen von x bzgl. -| | ∼ -Simplizialkomplexes K x Norm von x ∼ -(cid:107) (cid:107) -x Betrag von x -| | -a Erzeugnis von a -Gruppen -(cid:104) (cid:105) +X modulo +∼ [x] +∼ +Äquivalenzklassen von x bzgl. +∼ (cid:107)x +(cid:107) +Norm von x +|x +| +Betrag von x +(cid:104)a +(cid:105) +Erzeugnis von a Sn Sphäre Tn Torus -Sei X ein topologischer Raum und K ein Kör- -per. -f g Verkettung von f und g -Homöo(X) Homöomorphismengruppe ◦ -π Projektion auf X +f ◦g Verkettung von f und g +π X -Iso(X) Isometriengruppe -f f eingeschränkt auf U -U -GL (K) Allgemeine lineare Gruppe (von | -n f−1(M) Urbild von M -General Linear Group) +Projektion auf X +f +|U +f eingeschränkt auf U +f−1(M) Urbild von M Rg(M) Rang von M -SL (K) Spezielle lineare Gruppe -n χ(K) Euler-Charakteristik von K -PSL (K) Projektive lineare Gruppe -n +χ(K) Euler-Charakteristik von K 110 Symbolverzeichnis ∆k Standard-Simplex X#Y Verklebung von X und Y -d Lineare Abbildung aus Bemer- +d n +Lineare Abbildung aus Bemer- kung 37 -A = B A ist isometrisch zu B -∼ -f Abbildung zwischen Fundamental- +A ∼= B A ist isometrisch zu B +f ∗ +Abbildung zwischen Fundamental- gruppen (vgl. Seite 49) 111 Symbolverzeichnis Zahlenmengen -N = 1,2,3,... Natürliche Zahlen -{ } -Z = N 0, 1, 2,... Ganze Zahlen -Q = Z∪{(cid:8) 1,− 1, 2−(cid:9) = (cid:8)} z mit z Z und n Z 0 (cid:9) Rationale Zahlen -R = Q∪ (cid:8) √2 23 , 3 √3 3,...n (cid:9) Reel∈ e Zahlen ∈ \{ } -∪ − -R Echt positive reele Zahlen +N = {1,2,3,... +} +Natürliche Zahlen +Z = N ∪{0, −1, −2,... +} +Ganze Zahlen +Q = Z ∪(cid:8) 1 2, 1 3, 2 +3 +(cid:9) = (cid:8) z +n +mit z +∈ +Z und n +∈ +Z \{0 }(cid:9) Rationale Zahlen +R = Q ∪(cid:8) √2, +− +3 √3,...(cid:9) Reele Zahlen +R + -Rn := (x ,...,x ) Rn x 0 Halbraum -+,0 1 n n -{ ∈ | ≥ } -R× = R 0 Einheitengruppe von R -\{ } -C = a+ib a,b R Komplexe Zahlen -{ | ∈ } -P = 2,3,5,7,... Primzahlen -{ } -H = z C z > 0 obere Halbebene -{ ∈ | (cid:61) } +Echt positive reele Zahlen +Rn ++,0 +:= {(x 1,...,x n) +∈ +Rn +| +x +n +≥ +0 +} +Halbraum +R× = R \{0 +} +Einheitengruppe von R +C = {a+ib +| +a,b +∈ +R +} +Komplexe Zahlen +P = {2,3,5,7,... +} +Primzahlen +H = {z +∈ +C +| +(cid:61)z > 0 +} +obere Halbebene I = [0,1] (cid:40) R Einheitsintervall -f : S1 (cid:44) R2 Einbettung der Kreislinie in die Ebene +f : S1 (cid:44) → -π (X,x) Fundamentalgruppe im topologischen Raum X um x X -1 +R2 Einbettung der Kreislinie in die Ebene +π 1(X,x) Fundamentalgruppe im topologischen Raum X um x ∈ +X Fix(f) Menge der Fixpunkte der Abbildung f +(cid:107)·(cid:107)2 2-Norm; Euklidische Norm -2 -(cid:107)·(cid:107) κ Krümmung -κ Normalenkrümmung +κ Nor +Normalenkrümmung V(f) Nullstellenmenge von f2 Krümmung -D F : R2 R3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89) -p +D pF : R2 → -T S Tangentialebene an S R3 durch s S -s -⊆ ∈ -d n(x) Weingarten-Abbildung -s +R3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89) +T sS Tangentialebene an S +⊆ +R3 durch s +∈ +S +d sn(x) Weingarten-Abbildung 2von Vanishing Set Stichwortverzeichnis -Abbildung einfach zusammenhängend, 49 -affine, 107 Einheitsnormalenfeld, 90 -differenzierbare, 29 Euler-Charakteristik, siehe Eulerzahl -homotope, 50 Eulersche Polyederformel, 38 -offene, 53 Eulerzahl, 36 +Abbildung +affine, 107 +differenzierbare, 29 +homotope, 50 +offene, 53 simpliziale, 35 -Färbbarkeit, 21 stetige, 9 -Faser, siehe Urbild Abschluss, 3 -Fläche Abstand, 86 -orientierbare, 90 Abstandsaxiom, 65 -reguläre, 30 Achterknoten, 20 -Flächenelement, 95 Aktion, siehe Gruppenoperation -Formoperator, siehe Weingarten-Abbildung Anordnungsaxiome, 66 -Fundamentalform Atlas, 24 -erste, 94 Außenwinkel, 70 -zweite, 97 Axiom, 64 -Fundamentalgruppe, 47 Axiomensystem, 64 -Gauß-Krümmung, 92, 91–94 Basis, 3 -Geometrie, 64 Baum, 37 -Gerade, 64 Betti-Zahl, 41 -hyperbolische, 77 Bewegungsaxiom, 66 -Graph, 37 Binormalenvektor, 89 +Cantorsches Diskontinuum, 22 +Ck-Struktur, 29 +Decktransformation, 59 +Decktransformationsgruppe, 59 +Deformationsretrakt, 47 +dicht, 3 +Diffeomorphismus, 29 +Dimension, 34 +diskret, 53 +Doppelverhältnis, 83 +Dreibein +begleitendes, 89 +Ebene +euklidische, 64 +Eigenvektor, 107 +Eigenwert, 107 +einfach zusammenhängend, 49 +Einheitsnormalenfeld, 90 +Euler-Charakteristik, siehe Eulerzahl +Eulersche Polyederformel, 38 +Eulerzahl, 36 +Färbbarkeit, 21 +Faser, siehe Urbild +Fläche +orientierbare, 90 +reguläre, 30 +Flächenelement, 95 +Formoperator, siehe Weingarten-Abbildung +Fundamentalform +erste, 94 +zweite, 97 +Fundamentalgruppe, 47 +Gauß-Krümmung, 92, 91–94 +Geometrie, 64 +Gerade, 64 +hyperbolische, 77 +Graph, 37 Grenzwert, 8 -Cantorsches Diskontinuum, 22 Gruppe -Ck-Struktur, 29 allgemeine lineare, 22, 26 +Gruppe +allgemeine lineare, 22, 26 spezielle lineare, 22 -Decktransformation, 59 topologische, 33 -Decktransformationsgruppe, 59 Gruppe operiert durch Homöomorphismen, -Deformationsretrakt, 47 61 -dicht, 3 Gruppenaktion, siehe Gruppenoperation -Diffeomorphismus, 29 Gruppenoperation, 60, 60–63 -Dimension, 34 stetige, 61 -diskret, 53 -Doppelverhältnis, 83 Häufungspunkt, 107 -Dreibein Hülle -begleitendes, 89 konvexe, 34 +topologische, 33 +Gruppe operiert durch Homöomorphismen, +61 +Gruppenaktion, siehe Gruppenoperation +Gruppenoperation, 60, 60–63 +stetige, 61 +Häufungspunkt, 107 +Hülle +konvexe, 34 Halbebene, 66 -Ebene Halbgerade, 65 -euklidische, 64 Halbraum, 28 -Eigenvektor, 107 Hauptkrümmung, 92 -Eigenwert, 107 Hilbert-Kurve, 19, 19 +Halbgerade, 65 +Halbraum, 28 +Hauptkrümmung, 92 +Hilbert-Kurve, 19, 19 113 Stichwortverzeichnis -Homöomorphismengruppe, 10 lokal, 3 -Homöomorphismus, 9 Lot, 86 -Homologiegruppe, 41 Lotfußpunkt, 86 +Homöomorphismengruppe, 10 +Homöomorphismus, 9 +Homologiegruppe, 41 Homomorphismus, 101 -Möbiusband, 91 Homotopie, 44 -Möbiustransformation, 80 Homotopieklasse, 47 -Mannigfaltigkeit, 24 -Inklusionsabbildung, 47 differenzierbare, 29 -Innenwinkel, 70 geschlossene, 25 -Inneres, 3 glatte, 29 -Inzidenzaxiome, 64 mit Rand, 28 -Isometrie, 6, 10 Menge -Isometriegruppe, 10 abgeschlossene, 2 -Isomorphismus, 101 offene, 2 -Isotopie, 20 zusammenhängende, 11 -Metrik, 6 +Inklusionsabbildung, 47 +Innenwinkel, 70 +Inneres, 3 +Inzidenzaxiome, 64 +Isometrie, 6, 10 +Isometriegruppe, 10 +Isomorphismus, 101 +Isotopie, 20 Jordankurve, 19 -diskrete, 6 geschlossene, 19 -hyperbolische, 84 -SNCF, 8 Karte, 24 Kartenwechsel, 28 -Nebenwinkel, 86 Kern -Neilsche Parabel, 27 offener, 3 -Normalenfeld, 90 Kleeblattknoten, 20 -Normalenvektor, 87, 89 Klumpentopologie, siehe triviale Topologie -Normalkrümmung, 91, 92, 98 Knoten, 20, 17–21 -äquivalente, 20 Oktaeder, 34 -trivialer, 20 Orthonormalbasis, 107 +äquivalente, 20 +trivialer, 20 Knotendiagramm, 20 -Paraboloid kollinear, 65 -hyperbolisches, 92 kongruent, siehe isometrisch -Parallele, 66 Kongruenz, siehe Isometrie -Parallelenaxiom, 64 Kongruenzsatz -parametrisiert SSS, 104 -durch Bogenlänge, 87 SWS, 69 -Parametrisierung SWW, 74 -reguläre, 30 WSW, 70 -Polyzylinder, 17 Krümmung, 88, 89 -Produkttopologie, 4 Kreis, 37 -Projektion Kreuzprodukt, 107 -stereographische, 11 Kurve, 87 -Punkt, 34 Länge einer, 87 -Quotiententopologie, 5, 10, 11 Lage allgemeine, 34 -Rand, 3, 28 Lehrsatz -Raum Binomischer, 107 -hausdorffscher, 8 Lie-Gruppe, 33 -kompakter, 14 liegt zwischen, 65 -metrischer, 6 Liftung, 54 -projektiver, 5, 22, 25, 52 Limes, 8 +lokal, 3 +Lot, 86 +Lotfußpunkt, 86 +Möbiusband, 91 +Möbiustransformation, 80 +Mannigfaltigkeit, 24 +differenzierbare, 29 +geschlossene, 25 +glatte, 29 +mit Rand, 28 +Menge +abgeschlossene, 2 +offene, 2 +zusammenhängende, 11 +Metrik, 6 +diskrete, 6 +hyperbolische, 84 +SNCF, 8 +Nebenwinkel, 86 +Neilsche Parabel, 27 +Normalenfeld, 90 +Normalenvektor, 87, 89 +Normalkrümmung, 91, 92, 98 +Oktaeder, 34 +Orthonormalbasis, 107 +Paraboloid +hyperbolisches, 92 +Parallele, 66 +Parallelenaxiom, 64 +parametrisiert +durch Bogenlänge, 87 +Parametrisierung +reguläre, 30 +Polyzylinder, 17 +Produkttopologie, 4 +Projektion +stereographische, 11 +Punkt, 34 +Quotiententopologie, 5, 10, 11 +Rand, 3, 28 +Raum +hausdorffscher, 8 +kompakter, 14 +metrischer, 6 +projektiver, 5, 22, 25, 52 114 Stichwortverzeichnis -topologischer, 2 verträglich, 29 +topologischer, 2 zusammenhängender, 11 -Würfel, 34 Realisierung -Weg, 17 geometrische, 34 -einfacher, 17 Retraktion, 47 -geschlossener, 17 -Satz von homotope, 44 -Gauß-Bonnet, 98 inverser, 48 -Scheitelwinkel, 86 zusammengesetzter, 46 -Seite, 34 Wegzusammenhang, 18 -Sierpińskiraum, 3, 22 Weingarten-Abbildung, 95 -Simplex, 34 Winkel, 70 +Satz von +Gauß-Bonnet, 98 +Scheitelwinkel, 86 +Seite, 34 +Sierpińskiraum, 3, 22 +Simplex, 34 Simplizialkomplex, 34 -Zusammenhang, 11–14 Simplizialkomplexe -Zusammenhangskomponente, 13 flächengleiche, 74 -Zwischenwertsatz, 107 Sphäre exotische, 29 Standard-Simplex, 34 @@ -6022,3 +9615,17 @@ Umgebungsbasis, 58 vanishing set, 26 Vektorprodukt, siehe Kreuzprodukt Verklebung, 26 +verträglich, 29 +Würfel, 34 +Weg, 17 +einfacher, 17 +geschlossener, 17 +homotope, 44 +inverser, 48 +zusammengesetzter, 46 +Wegzusammenhang, 18 +Weingarten-Abbildung, 95 +Winkel, 70 +Zusammenhang, 11–14 +Zusammenhangskomponente, 13 +Zwischenwertsatz, 107 diff --git a/read/results/pdftotext/1601.03642.txt b/read/results/pdftotext/1601.03642.txt index b6b8e05..85909a9 100644 --- a/read/results/pdftotext/1601.03642.txt +++ b/read/results/pdftotext/1601.03642.txt @@ -39,17 +39,14 @@ w3 wn -(a) Example of an artificial neuron unit.(b) -xi are the input signals and wi are -weights which have to get learned. +(a) Example of an artificial neuron unit.(b) A visualization of a simple feedxi are the input signals and wi are +forward neural network. The 5 inweights which have to get learned. put nodes are red, the 2 bias nodes Each input signal gets multiplied -with its weight, everything gets -summed up and the activation function ϕ is applied. - -A visualization of a simple feedforward neural network. The 5 input nodes are red, the 2 bias nodes are gray, the 3 hidden units are +with its weight, everything gets green and the single output node -is blue. +summed up and the activation func- is blue. +tion ϕ is applied. Fig. 1: Neural networks are based on simple units which get combined to complex networks. @@ -694,35 +691,21 @@ along with this program; if not, write to the Free Software Foundation, * * Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include -#include -#include -#include -#include - - - - - - +#include +#include +#include +#include +#include 8 #include -#include -#include -#include -#include -#include -#include - - - - - - - - +#include +#include +#include +#include +#include +#include #define REG_PG vesa_slot_addr_pack #define PFM_NOCOMP AFSR(0, load) diff --git a/read/results/pdftotext/1602.06541.txt b/read/results/pdftotext/1602.06541.txt index a643151..fad0f1c 100644 --- a/read/results/pdftotext/1602.06541.txt +++ b/read/results/pdftotext/1602.06541.txt @@ -100,21 +100,16 @@ used layered models [YHRF12]. C. Input Data The available data which can be used for the inference of a segmentation varies by application. -• - -• - -• - Grayscale vs colored: Grayscale images are commonly used in medical imaging such as magnetic resonance (MR) imaging or ultrasonography whereas colored photographs are obviously widespread. -Excluding or including depth data: RGB-D, +• Excluding or including depth data: RGB-D, sometimes also called range [HJBJ+ 96] is available in robotics, autonomous cars and recently also in consumer electronics such as Microsoft Kinect [Zha12]. -Single image vs stereo images vs cosegmentation: Single image segmentation is the +• Single +image vs stereo images vs cosegmentation: Single image segmentation is the most wide-spread kind of segmentation, but using stereo images was already tried in [BVZ01]. It can be seen as a more natural way of segmentation as @@ -130,10 +125,11 @@ after the first can be used as an additional source of information to find a meaningful segmentation. This idea can be extended to time series such as videos. -2D vs 3D: Segmenting images is a 2D segmentation task where the smallest unit is called a pixel. +• 2D vs 3D: Segmenting images is a 2D segmentation task where the smallest unit is called a pixel. In 3D data, such as volumetric X-ray CT images as they were used in [HHR01], the smallest unit is called a voxel. +• (a) Example Scene @@ -159,7 +155,6 @@ of the obtained segmentations is by showing examples such as Figure 1. However, this can only support the explanation of particular problems or showcase special situation. For -• meaningful information about the overall accuracy, there are a couple of metrics how accuracy can be defined. For this section, let k ∈ N be the number of classes, @@ -1851,6 +1846,9 @@ Classes 5 Channels + +Data source + 3 3 3 @@ -1860,7 +1858,6 @@ Channels 3 3 -Data source [CRSS] [KKV+ 14] [FKG13] diff --git a/read/results/pdftotext/1707.09725.txt b/read/results/pdftotext/1707.09725.txt index 938a193..718ee7a 100644 --- a/read/results/pdftotext/1707.09725.txt +++ b/read/results/pdftotext/1707.09725.txt @@ -381,30 +381,26 @@ channels. The filter F is convolved with the image I ∈ Rw×h×d to produce a n The output image I 0 has only one channel. Each pixel I 0 (x, y) of the output image gets calculated by point-wise multiplication of one filter element with one element of the original image I: -b k2w c I 0 (x, y) = -ix =1−d k2w +k -kh +b k2w c -c -2 -X +b 2h c -b +X X -e d X I(x + ix , y + iy , ic ) · F (ix , iy , ic ) -k -iy =1−d 2h e ic =1 +ix =1−d k2w e iy =1−d kh e ic =1 +2 This procedure is explained by Figure 2.1. It is essentially a discrete convolution. @@ -656,7 +652,6 @@ wij · xj [2.1] j=1 -b k2w c o @@ -664,18 +659,15 @@ o (I) = b + -b +k -ix =1−d k2w +b k2w c -kh +b 2h c -c -2 X X -e d X @@ -684,8 +676,8 @@ Fz (ix , iy , ic ) · I(x + ix , y + iy , ic ) [2.2] -k -iy =1−d 2h e ic =1 +ix =1−d k2w e iy =1−d kh e ic =1 +2 with a bias b ∈ R, x ∈ { 1, . . . , w } , y ∈ { 1, . . . , h } and z ∈ { 1, . . . , d } @@ -816,14 +808,7 @@ picked with probability pi = P ai aj . This assumes the activations ai are non-n aj ∈A Pooling is applied for three reasons: To get local translational invariance, to get invariance -against minor local changes and, most important, for data reduction to - -1 -th -s2 - -of the data by - +against minor local changes and, most important, for data reduction to s12 th of the data by using strides of s > 1. See Figure 2.3 for a visualization of max pooling. 2 @@ -896,9 +881,7 @@ Figure 2.3.: 2 × 2 max pooling applied to a feature map of size 6 × 4 with str Average pooling of p × p areas with stride s can be replaced by a convolutional layer. If the input of the pooling layer are d(i−1) feature maps, the convolutional layer has to have d(i−1) filters of size p × p and stride s. The ith filter has the values - - -1 +1 p2 . @@ -967,11 +950,8 @@ B)i,j := (A)i,j (B)i,j Hence every value of the input gets set to zero with a dropout probability of p. Typically, Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout probability than later layers. In order to keep the expected output at the same value, the -output of a dropout layer is multiplied with - 1 -1−p - +output of a dropout layer is multiplied with 1−p when dropout is enabled [Las17, tf-16b]. At inference time, dropout is disabled. @@ -1001,28 +981,19 @@ point-wise to x(k) − x̄(k) x̂(k) = p s0 [x(k) ]2 + ε -with x̄(k) = - 1 -m +with x̄(k) = m (k) -i=1 xi - -Pm - -being the sample mean and s0 [x(k) ]2 = - -1 -m - (k) -i=1 (xi +1 Pm +(k) +being the sample mean and s0 [x(k) ]2 = m +i=1 xi +i=1 (xi − x̄ ) the Pm -− x̄(k) ) the - sample variance where m ∈ N≥1 is the number of training samples per mini-batch, ε > 0 (k) @@ -1144,13 +1115,9 @@ aspect to using the group network without an aggregation block. Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The idea is to connect each convolutional layer directly to subsequent convolutional layers. Traditional CNNs with L layers and one input layer have L connections between layers, -but dense blocks have - -L(L+1) -2 - +but dense blocks have L(L+1) connections between layers. The input feature maps are - +2 concatenated in depth. According to the authors, this prevents features from being relearned and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16 have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors used only on the order of 12 feature maps per layer. @@ -1239,25 +1206,25 @@ P PK diagonal cii and all wrong classifications are of the diagonal. The sum K i=1 j=1 cij is the -total number of samples which were evaluated and - P -cii -PK -PK i=1 -i=1 -The sums r(i) = +c -PK +i=1 j=1 cij -j=1 cij +ii +PK +total number of samples which were evaluated and PK i=1 + +The sums r(i) = is the accuracy. -of each class i are worth being investigated as they show if the +PK + +j=1 cij of each class i are worth being investigated as they show if the classes are skewed. If the number of samples of one class dominates the data set, then the classifier can get a high accuracy by simply always prediction the most common class. If @@ -1450,9 +1417,10 @@ One problem of accuracy as a quality criterion are skewed classes. If one class more common than all other classes, then the simplest way to achieve a high score is to always classify everything as the most common class. In order to fix this problem, one can use the mean accuracy: +k + mean-accuracy(c) = -k 1 X cii · ∈ [0, 1] @@ -1646,9 +1614,11 @@ VGG-16 (see Appendix D.3) have many filters which are highly correlated. They fo this by comparing the averaged maximum k-translational correlation of the networks with Gaussian-distributed initialized filters. The averaged maximum k-translational correlation is defined as -ρ̄k (W) = N + +ρ̄k (W) = + 1 X N max ρk (Wi , Wj ) j=1,j6=i @@ -2132,17 +2102,6 @@ Conv-Block(2) is added at the input. For MNIST, the images are bilinearly upsamp 15 16 -Input -Convolution -BN + ELU -Convolution -BN + ELU -Max pooling - -Filters @ -Patch size / stride -32 @ 3 × 3 × 3 - Parameters FLOPs @@ -2163,80 +2122,12 @@ Output size 163 904 40 960 -3 @ 32 × -32 @ 32 × -32 @ 32 × -32 @ 32 × -32 @ 32 × -32 @ 16 × - -32 -32 -32 -32 -32 -16 - -9 420 800 -82 048 -18 857 984 -82 048 -20 480 -4 714 496 -20 608 -5 120 -1 048 064 -3 584 -0 -523 776 -3 584 -0 -1024 · k -k -7k - -64 @ 16 × -64 @ 16 × -64 @ 16 × -64 @ 16 × -64 @ 8 × -64 @ 8 × -64 @ 8 × -64 @ 4 × -512 @ 1 × -512 @ 1 × -512 @ 1 × -512 @ 1 × -512 @ 1 × -512 @ 1 × -k @ 1× -k @ 1× -k @ 1× - -16 -16 -16 -16 -8 -8 -8 -4 -1 -1 -1 -1 -1 -1 -1 -1 -1 - -/1 - -32 @ 3 × 3 × 32 / 1 -2×2 - -/2 +3 @ 32 × 32 +32 @ 32 × 32 +32 @ 32 × 32 +32 @ 32 × 32 +32 @ 32 × 32 +32 @ 16 × 16 Convolution 64 @ 3 × 3 × 32 / 1 @@ -2280,16 +2171,71 @@ Global avg Pooling 0 BN + Softmax 2k + +9 420 800 +82 048 +18 857 984 +82 048 +20 480 +4 714 496 +20 608 +5 120 +1 048 064 +3 584 +0 +523 776 +3 584 +0 +1024 · k +k +7k + +64 @ 16 × 16 +64 @ 16 × 16 +64 @ 16 × 16 +64 @ 16 × 16 +64 @ 8 × 8 +64 @ 8 × 8 +64 @ 8 × 8 +64 @ 4 × 4 +512 @ 1 × 1 +512 @ 1 × 1 +512 @ 1 × 1 +512 @ 1 × 1 +512 @ 1 × 1 +512 @ 1 × 1 +k @ 1× 1 +k @ 1× 1 +k @ 1× 1 + 515k +892 512 -P - 1032k +55 729 664 103 424+2k +Input +Convolution +BN + ELU +Convolution +BN + ELU +Max pooling + +Filters @ +Patch size / stride +32 @ 3 × 3 × 3 + +/1 + +32 @ 3 × 3 × 32 / 1 +2×2 + +/2 + +P + Table 5.1.: Baseline architecture with 3 input channels of size 32 × 32. All convolutional layers use SAME padding, except for layer 11 which used VALID padding in order to decrease the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for @@ -2303,9 +2249,11 @@ max pooling 2 × 2/2 16 × 16 max pooling 2 × 2/2 -8×8 max pooling 2 × 2/2 + +8×8 + 4×4 C 512@1 × 1/1 @@ -2378,23 +2326,14 @@ Test Set 94.12 % 99.02 % -σ -σ -σ -σ -σ -σ -σ -σ - -= 3.49 -= 1.10 -= 1.48 -= 0.00 -= 0.42 -= 0.07 -= 0.87 -= 0.07 +σ = 3.49 +σ = 1.10 +σ = 1.48 +σ = 0.00 +σ = 0.42 +σ = 0.07 +σ = 0.87 +σ = 0.07 94.37 % 85.84 % @@ -2405,23 +2344,14 @@ Test Set 75.67 % 96.28 % -σ -σ -σ -σ -σ -σ -σ -σ - -= 3.47 -= 0.87 -= 0.55 -= 0.11 -= 0.10 -= 0.06 -= 0.34 -= 0.10 +σ = 3.47 +σ = 0.87 +σ = 0.55 +σ = 0.11 +σ = 0.10 +σ = 0.06 +σ = 0.34 +σ = 0.10 Ensemble of 10 Training Set Test Set @@ -2612,13 +2542,9 @@ training. The image might lead to the wrong conclusion that models which are bet the start are also better at the end. In order to check this hypothesis, the relative order of validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering stays approximately the same, then it can be considered to run the first few epochs many -times and only train the best models to the end. For 10 models, there can be - -102 −10 2 -= 45 - +times and only train the best models to the end. For 10 models, there can be 10 2−10 = 45 pair-wise changes in the ordering at maximum if the relative order of validation accuracies is reversed. For the baseline model, 21.8 changes in the relative order of accuracies occurred in average for each pair of epochs (i, i + 1). This means if one knows only the relative order @@ -3196,11 +3122,8 @@ Layer Filter count Baseline New -9 -9 -11 -11 -13 +Total +parameters 64 64 @@ -3208,20 +3131,24 @@ Baseline New 512 512 -638 -974 -3786 -1024 -8704 - -Total -parameters 5 978 566 8 925 622 5 982 698 1 731 980 5 982 092 +9 +9 +11 +11 +13 + +638 +974 +3786 +1024 +8704 + Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer was increased. @@ -3240,6 +3167,9 @@ Model Parameters Single Model +Mean + +std Training @@ -3249,9 +3179,7 @@ Mean Epochs Mean Time -Mean - -std +baseline 944 012 @@ -3335,8 +3263,6 @@ m13 4485 s -baseline - Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m9 , m11 , m13 as well as their accuracies. 54 @@ -3518,13 +3444,14 @@ Mean total Single model +Ensemble + training time Accuracy std -Ensemble Accuracy 8 @@ -3544,7 +3471,14 @@ s 16 -62 +s +62 epoch +s +35 epoch +s +25 epoch +s +18 epoch 103 – 173 @@ -3556,10 +3490,6 @@ s 66.98 % -32 - -35 - 119 – 179 5171 s @@ -3570,10 +3500,6 @@ s 65.89 % -64 - -25 - 133 – 195 2892 s @@ -3584,19 +3510,6 @@ s 64.70 % -128 - -18 - -s -epoch -s -epoch -s -epoch -s -epoch - 145 – 239 3126 s @@ -3607,6 +3520,10 @@ epoch 63.55 % +32 +64 +128 + Table 5.9.: Training time per epoch and single model test set accuracy (mean and standard deviation) of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on CIFAR-100. @@ -3792,6 +3709,8 @@ Yes Yes +ReLU + Yes1 No @@ -3830,8 +3749,6 @@ Yes No -ReLU - Table 5.10.: Properties of activation functions. 1 @@ -4483,19 +4400,12 @@ Output size 353 418 40 960 -3 @ 32 × -69 @ 32 × -69 @ 32 × -69 @ 32 × -69 @ 32 × -32 @ 16 × - -32 -32 -32 -32 -32 -16 +3 @ 32 × 32 +69 @ 32 × 32 +69 @ 32 × 32 +69 @ 32 × 32 +69 @ 32 × 32 +32 @ 16 × 16 39 808 128 @@ -4530,41 +4440,23 @@ Output size k 7k -64 @ 16 × -64 @ 16 × -64 @ 16 × -64 @ 16 × -64 @ 8 × -64 @ 8 × -64 @ 8 × -64 @ 4 × -512 @ 1 × -512 @ 1 × -512 @ 1 × -512 @ 1 × -512 @ 1 × -512 @ 1 × -k @ 1× -k @ 1× -k @ 1× - -16 -16 -16 -16 -8 -8 -8 -4 -1 -1 -1 -1 -1 -1 -1 -1 -1 +64 @ 16 × 16 +64 @ 16 × 16 +64 @ 16 × 16 +64 @ 16 × 16 +64 @ 8 × 8 +64 @ 8 × 8 +64 @ 8 × 8 +64 @ 4 × 4 +512 @ 1 × 1 +512 @ 1 × 1 +512 @ 1 × 1 +512 @ 1 × 1 +512 @ 1 × 1 +512 @ 1 × 1 +k @ 1× 1 +k @ 1× 1 +k @ 1× 1 514k +947 654 @@ -4572,11 +4464,11 @@ k @ 1× 520k +87 870 996 +179 200+2k + 36 928 128 -179 200+2k - Table 5.14.: Optimized architecture with 3 input channels of size 32 × 32. All convolutional layers use SAME padding, except for layer 11 which used VALID padding in order to decrease the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for each @@ -4663,23 +4555,14 @@ Test Set 95.43 % 99.08 % -σ -σ -σ -σ -σ -σ -σ -σ - -= 4.70 -= 0.70 -= 2.18 -= 0.00 -= 0.45 -= 0.10 -= 3.57 -= 0.07 +σ = 4.70 +σ = 0.70 +σ = 2.18 +σ = 0.00 +σ = 0.45 +σ = 0.10 +σ = 3.57 +σ = 0.07 90.75 % 87.92 % @@ -4690,23 +4573,14 @@ Test Set 75.09 % 96.37 % -σ -σ -σ -σ -σ -σ -σ -σ - -= 4.73 -= 0.46 -= 0.73 -= 0.10 -= 0.15 -= 0.13 -= 2.39 -= 0.12 +σ = 4.73 +σ = 0.46 +σ = 0.73 +σ = 0.10 +σ = 0.15 +σ = 0.13 +σ = 2.39 +σ = 0.12 Ensemble of 10 Training Set Test Set @@ -4905,6 +4779,8 @@ Dataset Early Stopping val. acc train loss +Fixed epochs + Asirra CIFAR-10 CIFAR-100 @@ -4919,8 +4795,6 @@ STL-10 99.67 % 78.66 % -Fixed epochs - 96.01 %3 91.75 % 71.01 % @@ -5137,41 +5011,23 @@ Layer 99-percentile interval filter bias -[-0.50, -[-0.21, -[-0.20, -[-0.15, -[-0.14, -[-0.08, -[-0.08, -[-0.10, - -0.48] -0.19] -0.17] -0.14] -0.15] -0.08] -0.08] -0.11] - -[-0.06, -[-0.07, -[-0.07, -[-0.05, -[-0.04, -[-0.00, -[-0.00, -[-0.01, - -0.07] -0.07] -0.05] -0.06] -0.03] -0.00] -0.00] -0.01] +[-0.50, 0.48] +[-0.21, 0.19] +[-0.20, 0.17] +[-0.15, 0.14] +[-0.14, 0.15] +[-0.08, 0.08] +[-0.08, 0.08] +[-0.10, 0.11] + +[-0.06, 0.07] +[-0.07, 0.07] +[-0.07, 0.05] +[-0.05, 0.06] +[-0.04, 0.03] +[-0.00, 0.00] +[-0.00, 0.00] +[-0.01, 0.01] Table A.1.: 99-percentile intervals for filter weights and bias weights by layer of a baseline model trained on CIFAR-100. @@ -5657,8 +5513,6 @@ He α=0 -β= - γ=0 [HZRS15b] @@ -5667,6 +5521,8 @@ Orthogonal — +β = n2in + — γ=0 @@ -5683,9 +5539,6 @@ LSUV [MM15] -2 -nin - Table B.2.: Weight initialization schemes of the form w ∼ α · U[−1, 1] + β · N (0, 1) + γ. nin , nout are the number of units in the previous layer and the next layer. Typically, biases are initialized with constant 0 and weights by one of the other schemes to prevent @@ -5769,10 +5622,9 @@ t • Exponential Decay Learning Rate [SHY+ 13]: η(t) = η(0) · 10− k where t ∈ N0 is the training step, η(0) is the initial learning rate, k ∈ N≥1 is the number of training steps -until the learning rate is decreased by - 1 -10 th. +th. +until the learning rate is decreased by 10 • Newbob Scheduling [new00]: Start with Performance Scheduling, then use Exponential Decay Scheduling. @@ -5808,9 +5660,8 @@ CNNs have the following hyperparameters: Name Sign function† Heaviside -function† +step function† -step Logistic function Function ϕ(x) @@ -5854,8 +5705,11 @@ Tanh 1+e−x x e −e−x += tanh(x) ex +e−x +[−1, 1] + ReLU† max(0, x) @@ -5879,32 +5733,28 @@ log(e Softplus ELU -= tanh(x) - if x > 0 α(ex − 1) if x ≤ 0 +xj + +(−∞, +∞) Softmax‡ -o(x)j = +o(x)j = PKe -xj -PKe -k=1 +[0, 1]K Maxout‡ -exk - o(x) = maxx∈x x -[−1, 1] - -(−∞, +∞) -[0, 1]K (−∞, +∞) +xk +k=1 e + sech  (x) 1 if x > 0 @@ -5959,18 +5809,17 @@ as it produces a probability distribution. See Figure B.1 for a plot of some of α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function. 2.0 -1 -1+e−x - -ϕ1 (x) = +ϕ1 (x) = 1+e1−x ϕ2 (x) = tanh(x) -ϕ3 (x) = max(0, x) -ϕ4 (x) = log(ex + 1) -ϕ5 (x) = max(x, ex − 1) y 1.5 + +ϕ3 (x) = max(0, x) +ϕ4 (x) = log(ex + 1) +ϕ5 (x) = max(x, ex − 1) + 1.0 0.5 x @@ -6055,14 +5904,9 @@ FLOPs. The total number of FLOPs is 2n · (k · w · h) + n · nϕ . • As Dropout is only calculated during training, the number of FLOPs was set to 0. • The number of FLOPs for max pooling is dominated by the number of positions to which the pooling kernel is applied. For a feature map of size w × h a max pooling -filter with stride s gets applied - -w·h -. +filter with stride s gets applied w·h +. The number of FLOPs per application depends s2 - -The number of FLOPs per application depends - on the kernel size. A 2 × 2 kernel is assumed to need 5 FLOPs. • The number of FLOPs for Batch Normalization is the same as the number of its parameters. @@ -6153,6 +5997,8 @@ Parameters FLOPs +Output size + 0 156 2 @@ -6171,11 +6017,6 @@ FLOPs 20 580 1 730 -61 710 - -15 144 446 - -Output size 1 @ 32 × 32 6 @ 28 × 28 6 @ 14 × 14 @@ -6184,6 +6025,11 @@ Output size 120 84 10 + +61 710 + +15 144 446 + 9118 Table D.1.: LeNet-5 architecture: After layers 1, 3, 5 and 6 the tanh activation function is applied. @@ -6243,30 +6089,6 @@ Parameters FLOPs Output size - -0 -885 120 -663 936 -442 624 -0 -37 752 832 -16 781 312 -4 097 000 - -211 M -12 M -301 k -448 M -3M -50 k -299 M -224 M -150 M -50 k -75 M -34 M -8M - 3 @ 224 × 224 96 @ 55 × 55 96 @ 55 × 55 @@ -6281,11 +6103,6 @@ Output size 4096 4096 1000 - -60 965 224 - -3300 M - 1 122 568 96 @ 11 × 11 × 3 / 4 @@ -6300,19 +6117,42 @@ Output size 307 456 3×3 -384 @ 3 × 3 × 256 -384 @ 3 × 3 × 192 -256 @ 3 × 3 × 192 +/2 +384 @ 3 × 3 × 256 / 1 +384 @ 3 × 3 × 192 / 1 +256 @ 3 × 3 × 192 / 1 3×3 +/2 4096 neurons 4096 neurons 1000 neurons -/2 -/1 -/1 -/1 -/2 +0 +885 120 +663 936 +442 624 +0 +37 752 832 +16 781 312 +4 097 000 + +211 M +12 M +301 k +448 M +3M +50 k +299 M +224 M +150 M +50 k +75 M +34 M +8M + +60 965 224 + +3300 M Table D.2.: AlexNet architecture: One special case of AlexNet is grouping of convolutions due to computational restrictions at the time of its development. This also reduces the number @@ -6343,15 +6183,19 @@ Input 224 × 224 max pooling 2 × 2/1 -112 × 112 max pooling 2 × 2/1 -56 × 56 max pooling 2 × 2/1 -28 × 28 max pooling 2 × 2/1 + +112 × 112 + +56 × 56 + +28 × 28 + 14 × 14 max pooling 2 × 2/1 @@ -6444,46 +6288,6 @@ FC Filters @ Patch size / stride -64 @ 3 × 3 × 3 -64 @ 3 × 3 × 64 -2×2 -128 @ 3 × 3 × 64 -128 @ 3 × 3 × 128 -2×2 -256 @ 3 × 3 × 128 -256 @ 3 × 3 × 256 -256 @ 3 × 3 × 256 -2×2 -512 @ 3 × 3 × 256 -512 @ 3 × 3 × 512 -512 @ 3 × 3 × 512 -2×2 -512 @ 3 × 3 × 512 -512 @ 3 × 3 × 512 -512 @ 3 × 3 × 512 -2×2 -4096 neurons -4096 neurons -1000 neurons - -/1 -/1 -/2 -/1 -/1 -/2 -/1 -/1 -/1 -/2 -/1 -/1 -/1 -/2 -/1 -/1 -/1 -/2 Parameters @@ -6491,6 +6295,31 @@ FLOPs Output size +64 @ 3 × 3 × 3 / 1 +64 @ 3 × 3 × 64 / 1 +2×2 +/2 +128 @ 3 × 3 × 64 / 1 +128 @ 3 × 3 × 128 / 1 +2×2 +/2 +256 @ 3 × 3 × 128 / 1 +256 @ 3 × 3 × 256 / 1 +256 @ 3 × 3 × 256 / 1 +2×2 +/2 +512 @ 3 × 3 × 256 / 1 +512 @ 3 × 3 × 512 / 1 +512 @ 3 × 3 × 512 / 1 +2×2 +/2 +512 @ 3 × 3 × 512 / 1 +512 @ 3 × 3 × 512 / 1 +512 @ 3 × 3 × 512 / 1 +2×2 +/2 +4096 neurons + 1 792 36 928 0 @@ -6570,6 +6399,9 @@ Output size 15 245 800 +4096 neurons +1000 neurons + Table D.3.: VGG-16 D architecture: The authors chose to give only layers a number which have learnable parameters. All convolutions are zero padded to prevent size changes and use ReLU activation functions. The channels mean is subtracted from each pixel as @@ -6626,9 +6458,6 @@ P 3× Type - -Parameters - Input Stem Inception A @@ -6640,6 +6469,10 @@ Global Average Pooling Dropout (p=0.8) Softmax +Parameters + +Output size + 605 728 317 632 2 306 112 @@ -6650,36 +6483,15 @@ Softmax 0 1 537 000 -Output size -3 -384 -384 -1024 -1024 -1536 -1536 -1536 -1536 - -@ -@ -@ -@ -@ -@ -@ -@ -@ - -299 × 299 -35 × 35 -35 × 35 -17 × 17 -17 × 17 -8× 8 -8× 8 -1× 1 -1× 1 +3 @ 299 × 299 +384 @ 35 × 35 +384 @ 35 × 35 +1024 @ 17 × 17 +1024 @ 17 × 17 +1536 @ 8 × 8 +1536 @ 8 × 8 +1536 @ 1 × 1 +1536 @ 1 × 1 1000 42 679 816 @@ -6849,6 +6661,13 @@ GTSRB [SSSI, SSSI12] +Asirra3 + +(4 px − 500 px) +×(4 px − 500 px) +480 px × 640 px +and 640 px × 480 px + 25 000 2 @@ -6865,14 +6684,8 @@ GTSRB [Mar08, MS07] -Asirra3 Graz-02 -(4 px − 500 px) -×(4 px − 500 px) -480 px × 640 px -and 640 px × 480 px - Table E.1.: An overview over publicly available image databases for classification. The number of images row gives the sum of the training and the test images. Some datasets, like SVHN, have additional unlabeled data which is not given in this table. @@ -6886,55 +6699,58 @@ Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs” 97 - Dataset - -Model type / name - -MNIST - -— + Result -HASYv2 +Score -TF-CNN +Achieved / -SVHN +Dataset -DenseNet (k = 24) +Model type / name -CIFAR-10 +MNIST -DenseNet-BC (k = 40) +— -CIFAR-100 +0.21 % -Result +error -Score +[WZZ+ 13] -Achieved / -Claimed by +HASYv2 -error +TF-CNN -[WZZ+ 13] +81.00 % accuracy [Tho17a] +SVHN + +DenseNet (k = 24) + 1.59 % error [HLW16] +CIFAR-10 + +DenseNet-BC (k = 40) + 3.46 % error [HLW16] +CIFAR-100 + WRN-28-10 16.21 % @@ -7013,8 +6829,7 @@ accuracy [BMDP10] -0.21 % -81.00 % +Claimed by Table E.2.: An overview over state of the art results achieved in computer vision datasets. @@ -7537,13 +7352,14 @@ Available: https://arxiv.org/abs/1202.2745v1 D.-A. Clevert, T. Unterthiner, and S. Hochreiter, “Fast and accurate deep network learning by exponential linear units (ELUs),” -preprint arXiv:1511.07289, + +arXiv Nov. 2015. [Online]. Available: -arXiv https: +preprint arXiv:1511.07289, //arxiv.org/abs/1511.07289 [CWV+ 14] diff --git a/read/results/pdftotext/2201.00021.txt b/read/results/pdftotext/2201.00021.txt index ac44757..ad6241a 100644 --- a/read/results/pdftotext/2201.00021.txt +++ b/read/results/pdftotext/2201.00021.txt @@ -7,27 +7,26 @@ Discovery of ammonia (9,6) masers in two high-mass star-forming regions Y. T. Yan (闫耀庭)1,? , C. Henkel1, 2, 3 , K. M. Menten1 , Y. Gong (龚龑)1 , J. Ott4 , T. L. Wilson1 , A. Wootten4 , A. Brunthaler1 , J. S. Zhang (张江水)5 , J. L. Chen (陈家梁)5 , and K. Yang (杨楷)6, 7 -1 -2 -3 arXiv:2201.00021v3 [astro-ph.GA] 9 Apr 2022 -4 -5 -6 -7 +1 Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, 53121 Bonn, Germany e-mail: yyan@mpifr-bonn.mpg.de +2 Astronomy Department, Faculty of Science, King Abdulaziz University, P. O. Box 80203, Jeddah 21589, Saudi Arabia +3 Xinjiang Astronomical Observatory, Chinese Academy of Sciences, 830011 Urumqi, PR China +4 National Radio Astronomy Observatory, 520 Edgemont Road, Charlottesville, VA 22903-2475, USA +5 Center for Astrophysics, Guangzhou University, 510006 Guangzhou, People’s Republic of China +6 School of Astronomy and Space Science, Nanjing University, 163 Xianlin Avenue, Nanjing 210023, People’s Republic of China +7 Key Laboratory of Modern Astronomy and Astrophysics (Nanjing University), Ministry of Education, Nanjing 210023, People’s Republic of China - Received 13 December 2021 / Accepted 30 December 2021 ABSTRACT Context. Molecular maser lines are signposts of high-mass star formation, probing the excitation and kinematics of very compact diff --git a/read/results/pdftotext/2201.00022.txt b/read/results/pdftotext/2201.00022.txt index 078cc41..4c7eda3 100644 --- a/read/results/pdftotext/2201.00022.txt +++ b/read/results/pdftotext/2201.00022.txt @@ -1,28 +1,33 @@ -Draft version January 4, 2022 +Draft version July 7, 2022 Typeset using LATEX twocolumn style in AASTeX631 The Formation of Intermediate Mass Black Holes in Galactic Nuclei Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3 -1 Department +1 Department of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA +2 Mani L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA -arXiv:2201.00022v1 [astro-ph.GA] 31 Dec 2021 +arXiv:2201.00022v2 [astro-ph.GA] 6 Jul 2022 -2 Mani - -of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA -L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA 3 Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel ABSTRACT Most stellar evolution models predict that black holes (BHs) should not exist above approximately -50 − 70 M . However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and -above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs), -can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding -main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite efficient, forming IMBHs as massive as 104 M . Our -results suggest that massive black holes and IMBHs may be ubiquitous in galactic centres. This formation channel also has implications for observations. Collisions between stars and BHs can produce -electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. Additionally, -formed through this channel, both black holes in the mass gap and IMBHs can merge with the supermassive black hole at the center of a galactic nucleus through gravitational waves. These gravitational -wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively). +50 − 70 M , the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections +indicate the existence of BHs with masses at and above this threshold. We suggest that massive +BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions +between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical +processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite +efficient, forming IMBHs as massive as 104 M . This upper limit assumes that (1) the BHs accrete a +substantial fraction of the stellar mass captured during each collision and (2) that the rate at which +new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar +disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our +results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic +centers. This formation channel has implications for observations. Collisions between stars and BHs +can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. +Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge +with the supermassive black hole at the center of a galactic nucleus through gravitational waves. +These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, +respectively). 1. INTRODUCTION The recently detected gravitational wave source @@ -37,19 +42,18 @@ more than < GW170104, and GW170814 fall within the mass gap (e.g., Abbott et al. 2016, 2017a,b). BH mergers that form second generation BHs and, in some cases, intermediate mass BHs (IMBHs), these gravitational wave -(GW) events can occur in globular clusters, young stellar clusters, or the field (e.g., Rodriguez et al. 2018; Rodriguez et al. 2019; Fishbach et al. 2020; Mapelli et al. -2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. -2021; Arca Sedda et al. 2021). However, IMBHs are -not limited to these locations and may reside in galacCorresponding author: Sanaea C. Rose +(GW) events can occur in globular clusters, young stelCorresponding author: Sanaea C. Rose srose@astro.ucla.edu -1 +1 Note that the exact lower and upper limits may be sensitive to -Note that the exact lower and upper limits may be sensitive to metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli 2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski et al. 2020a; Renzo et al. 2020; Vink et al. 2021). -tic nuclei as well. Several studies propose that our +lar clusters, or the field (e.g., Rodriguez et al. 2018; Rodriguez et al. 2019; Fishbach et al. 2020; Mapelli et al. +2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. +2021; Arca Sedda et al. 2021). However, IMBHs are +not limited to these locations and may reside in galactic nuclei as well. Several studies propose that our own galactic center may host an IMBH in the inner pc (e.g., Hansen & Milosavljević 2003; Maillard et al. 2004; Gürkan & Rasio 2005; Gualandris & Merritt 2009; Chen @@ -63,26 +67,27 @@ as a result of the very first stars (e.g., Madau & Rees Valiante et al. 2016) or from direct collapse of accumulated gas (e.g., Begelman et al. 2006; Yue et al. 2014; Ferrara et al. 2014; Choi et al. 2015; Shlosman et al. 2016). These high redshift IMBHs would need to survive galaxy evolution and mergers to present day (e.g., + + 2 + +Rose et al. + Rashkov & Madau 2014), with significant effects on their stellar and even dark matter surroundings (e.g., Bertone et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another popular formation channel relies on the coalescence of -many stellar-mass black holes. For example, IMBHs +many stellar-mass black holes, which may seed objects +as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs may form in the centers of globular clusters, where fewbody interactions lead to the merger of stellar-mass BHs (e.g., O’Leary et al. 2006; Gürkan et al. 2006; Blecha -et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro- - - 2 - -Rose et al. - -driguez et al. 2018; Rodriguez et al. 2019; Fragione et al. +et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Rodriguez et al. 2018; Rodriguez et al. 2019; Fragione et al. 2020b). Other formation mechanisms invoke successive -collisions and mergers of massive stars (e.g., Portegies -Zwart & McMillan 2002; Portegies Zwart et al. 2004; -Freitag et al. 2006; Kremer et al. 2020; González et al. -2021; Di Carlo et al. 2021). +collisions and mergers of massive stars (e.g., Ebisuzaki +et al. 2001; Portegies Zwart & McMillan 2002; Portegies +Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017; +Kremer et al. 2020; González et al. 2021; Di Carlo et al. +2021; Das et al. 2021a,b; Escala 2021). The main obstacle to sequential BH mergers in clusters is that the merger recoil velocity kick often exceeds the escape velocity from the cluster (e.g., Schnittman & Buonanno 2007; Centrella et al. 2010; O’Leary et al. @@ -93,29 +98,36 @@ clusters without a SMBH. They considered BH binarysingle interactions, binary BH merger recoil kicks. The post-kick merger product sinks back towards the cluster center over a dynamical friction timescale. Using this approach, they showed that 103 − 104 M IMBHs can form efficiently over the lifetime of a cluster. -However, as discussed in Section 2.2, direct star-BH +However, as discussed in Section 2.2, direct BH-star collisions are much more frequent than BH-BH collision -in galactic nuclei, making the former a promising channel for BH growth. We propose that IMBHs can form -naturally within the central pc of a SMBH in a galactic -center. Specifically, these IMBHs form through repeated -collisions with main sequence stars, accreting some or -all of the star’s mass depending on the details of the -collision. We demonstrate that this channel can create -IMBHs with masses as large as 104 M , depending on -the density profile of the surrounding stars. +in galactic nuclei, making the former a promising channel for BH growth. In an N-body study of young star +clusters, Rizzuto et al. (2022) find that BH-star collisions are a main contributor to the formation of BHs +in the mass gap and IMBHs. In a similar vein, Stone +et al. (2017) demonstrate that massive BHs can form +from repeated tidal encounters between stars and BHs. +More generally, several studies have explored the role of +collisions in a GN, with implications for the stellar and +red giant populations (e.g., Dale & Davies 2006; Dale +et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti +et al. 2021). We propose that IMBHs can form naturally +within the central pc of a galactic center through repeated collisions between BHs and main sequence stars. +During a collision, the BH can accrete some portion of +the star’s mass. Over many collisions, it can grow appreciably in size. We demonstrate that this channel can + +create IMBHs with masses as large as 104 M , an upper +limit that depends on the density profile of the surrounding stars and the efficiency of the accretion. The paper is structured as follows: we describe relevant physical processes and our approach in Section 2. In particular, we provide an overview of collisions in Section 2.2 and present our statistical approach in Section 2.3. Section 2.4 discusses our treatment of the mass growth with each collision and presents analytic solutions to our equations in two different regimes, efficient collisions and inefficient collisions We compare -these solutions to our statistical results. Sections 2.5 -and 2.7 discuss implications for GW merger events between IMBHs and the SMBH. We then incorporate relaxation processes and discuss the subsequent results in -Section 2.8. Finally, we discuss and summarize our findings in Section 3. +these solutions to our statistical results. Sections 2.6 +and 2.8 discuss implications for GW merger events between IMBHs and the SMBH. We then incorporate relaxation processes and discuss the subsequent results in +Section 2.9. Finally, we discuss and summarize our findings in Section 3. 2. METHODOLOGY We consider a population of stellar mass BHs embedded in a cluster of 1 M stars. When stars and BHs collide, the BHs can accrete mass. The growth rate depends on the physical processes outlined below. We use - a statistical approach to estimate the stellar encounters and final IMBH masses. 2.1. Physical Picture @@ -140,9 +152,26 @@ build a comprehensive physical picture of BH growth at all distances from the SMBH, including within 0.01 pc. Otherwise, the innermost region of the GN would be poorly represented in our sample. We consider other -observationally motivated distributions in Section 2.8, -but reserve a more detailed examination of the distribution’s impact for future work. -2.2. Direct Collisions + + 3 + +IMBH Formation in Galactic Nuclei + +in Figure 1.2 As this timescale depends on the density +of surrounding stars, we adopt a density profile of the +form: + +ρ(r• ) = ρ0 + +Figure 1. We plot the relevant timescales, including collision (green), relaxation (gold), and BH-BH GW capture +(purple), for a single BH in the GN as a function of distance +from the SMBH. For the collision timescale, we assume the +BH is on a circular orbit. The timescales depend on the +density, so we adopt a range of density profiles, bounded by +α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark +blue line represents the time for a 105 M BH to merge with +the SMBH through GW emission. + BHs in the GN can undergo direct collisions with other objects. The timescale for this process, tcoll , can be estimated using a simple rate calculation: t−1 coll = nσA, @@ -153,8 +182,7 @@ coll = πn(a• )σ(a• )   2G(mBH + m? ) -2 -× f1 (e• )rc + f2 (e• )rc +× f1 (e• )rc2 + f2 (e• )rc . (1) σ(a• )2 where G is the gravitational constant and rc is the sum @@ -164,41 +192,11 @@ et al. (2020), f1 (e• ) and f2 (e• ) account for the effect of the eccentricity of the BH’s orbit about the SMBH on the collision rate, while n and σ are simply evaluated at the semimajor axis of the orbit (see below). Note - - IMBH Formation in Galactic Nuclei - -3 - -The collision timescale also depends on the velocity dispersion, which we express as: -s -GM• -σ(r• ) = -, -(4) -r• (1 + α) - -Figure 1. We plot the relevant timescales, including collision (green), relaxation (gold), and BH-BH GW capture -(purple), for a single BH in the GN as a function of distance -from the SMBH. For the collision timescale, we assume the -BH is on a circular orbit. The timescales depend on the -density, so we adopt a range of density profiles, bounded by -α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark -blue line represents the time for a 105 M BH to merge with -the SMBH through GW emission. - that this timescale equation includes the effects of gravitational focusing, which enhances the cross-section of interaction. Assuming a circular orbit for simplicity, we plot the timescale for a BH orbiting in the GN to collide with a 1 M star as a function of distance from the SMBH -in Figure 1.2 As this timescale depends on the density -of surrounding stars, we adopt a density profile of the -form: - -ρ(r• ) = ρ0 - -r• -r0 −α , @@ -212,9 +210,7 @@ Genzel et al. 2003). In this case, the normalization in Eq. (2) is ρ0 = 1.35 × 106 M /pc3 at r0 = 0.25 pc (Genzel et al. 2010). Additionally, in Eq. (2), α gives the slope of the power law. We assume that a uniform population of solar mass stars account for most of the mass in the GN, making the stellar number density: - n(r• ) = -2 ρ(r• ) . @@ -222,8 +218,23 @@ n(r• ) = (3) -We note that the eccentricity has a very minor effect on the -collision timescale (Rose et al. 2020). +The collision timescale also depends on the velocity dispersion, which we express as: +s + +observationally motivated distributions in Section 2.9, +but reserve a more detailed examination of the distribution’s impact for future work. +2.2. Direct Collisions + +r• +r0 + +σ(r• ) = + +GM• +, +r• (1 + α) + +(4) where α is the slope of the density profile and M• denotes the mass of the SMBH (Alexander 1999; Alexander & Pfuhl 2014). As mentioned above, Eq. (1) depends on the sum of the radii of the colliding objects, rc . We @@ -241,7 +252,16 @@ of density profiles is many orders of magnitude shorter than the BH-BH GW collision timescale (for the relevant equations, see O’Leary et al. 2009; Gondán et al. 2018, for example). Thus, we expect that star-BH collisions will be the main driver of IMBH growth in the GN. +2 We + +note that the eccentricity has a very minor effect on the +collision timescale (Rose et al. 2020). + + 4 + +Rose et al. 2.3. Statistical Approach to Collisions + We simulate the mass growth of a population of BHs with initial conditions detailed in Section 2.1. Over an increment ∆t of 106 yr, we calculate the probability of @@ -257,18 +277,7 @@ expected to accrete in a single collision (see Section 2.4 for details). We recalculate the collision timescale using the updated BH mass and repeat this process until the time elapsed equals the simulation time of 10 Gyr3 . -3 - -Closer to the SMBH, ∆t may exceed the collision timescale by -a factor of a few for steep density profiles. We include a safeguard in our code which takes the ratio tcoll /∆t and rounds it -to the nearest integer. We take this integer to be the number of -collisions and increase the BH mass accordingly. - - 4 - -Rose et al. 2.4. Mass Growth - When a BH collides with a star, it may accrete material and grow in mass. The details of the accretion depend on the relative velocity between the BH and star. For simplicity, this calculation assumes that the @@ -276,15 +285,18 @@ two objects experience a head on collision, with the BH passing through the star’s center. We begin by considering the escape velocity from the BH at the star’s outermost point, its surface, which corresponds to the maximum impact parameter 1 R . Qualitatively, one -might expect that the BH could accrete the entire star +might expect that the BH could capture the entire star (i.e., ∆m ∼ 1 M ) if the relative velocity is smaller than the escape velocity from the BH at this point. However, in the vicinity of the SMBH, the dispersion velocity of the stars may be much larger than the escape velocity from the BH at the star’s surface. In this case, the BH -accretes a “tunnel” of material through the star. This +captures a “tunnel” of material through the star. This tunnel has radius equal to the Bondi radius and length -approximately 1 R . +approximately 1 R . For the purposes of this study, we +assume that the BH accretes all of the material that +it captures. The details of the accretion are uncertain, +however, and it may be much less efficient than our results imply. We discuss accretion in Section 2.5. To estimate ∆m, we begin with the Bondi-Hoyle accretion rate, ṁ, given by: ṁ = @@ -297,6 +309,25 @@ ṁ = (5) +3 Closer to the SMBH, ∆t may exceed the collision timescale by + +a factor of a few for steep density profiles. We include a safeguard in our code which takes the ratio tcoll /∆t and rounds it +to the nearest integer. We take this integer to be the number of +collisions and increase the BH mass accordingly. + +Figure 2. We consider an example that highlights the mass +growth as a function of distance from the SMBH. Grey dots +represent the initial masses and distances from the SMBH +of the BHs involved in the simulation. For simplicity, we set +the inital mass equal to 10 M for all of the BHs. Assuming +the density profile of stars has α = 1, we consider two cases: +BHs accrete all of the star’s mass during a collision (red) and +only a portion of the star’s mass is accreted during a collision +given by Eq. 6 (blue). The latter case results in less growth +closer to the SMBH where the velocity dispersion becomes +high. The shaded regions and dashed lines represent the +analytical predictions detailed in Section 2.4. + where cs is the speed of sound in the star and ρstar is its density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima et al. 1985; Edgar 2004, see latter for a review). We @@ -319,21 +350,12 @@ start with identical populations of 10 M BHs (grey) and simulate growth through collisions using a statistical approach. As the BHs grow, the collision timescale, which depends on mBH , decreases. Simultaneously, ∆m, which also depends on mBH , increases. The result is exponential growth (see discussion and details -surrounding Eq. (8)). In Figure 2, however, the simulations assume α = 1 for the stellar density profile, ensuring the collision timescale is long compared to the simulation time, 10 Gyr. Therefore, the BHs grow slowly, +surrounding Eq. (8)). In Figure 2, however, the simulations assume α = 1 for the stellar density profile, ensuring the collision timescale is long compared to the sim- -Figure 2. We consider an example that highlights the mass -growth as a function of distance from the SMBH. Grey dots -represent the initial masses and distances from the SMBH -of the BHs involved in the simulation. For simplicity, we set -the inital mass equal to 10 M for all of the BHs. Assuming -the density profile of stars has α = 1, we consider two cases: -BHs accrete all of the star’s mass during a collision (red) and -only a portion of the star’s mass is accreted during a collision -given by Eq. 6 (blue). The latter case results in less growth -closer to the SMBH where the velocity dispersion becomes -high. The shaded regions and dashed lines represent the -analytical predictions detailed in Section 2.4. + 5 +IMBH Formation in Galactic Nuclei +ulation time, 10 Gyr. Therefore, the BHs grow slowly, and their final masses can be approximated using the following equation: mfinal (tcoll → const.) = minitial + ∆m @@ -363,10 +385,6 @@ star’s mass. Eq. 7 does not apply for other values of α. When the collision timescale is shorter, corresponding to a larger index α in the density profile (see Figure 1), the growth - - 5 - -IMBH Formation in Galactic Nuclei is very efficient and ∆m quickly approaches 1 M . Consequently, while we can now assume ∆m = 1 M , we can no longer assume the collision timescale is constant. The final mass grows exponentially as a result. For @@ -377,42 +395,97 @@ mfinal (∆m → 1 M ) = −A + (minitial + A) eCT (8) where A = σ 2 Rstar /G and C = 2πGnstar Rstar /σ. As an example, we plot this curve in purple for the α = 2 case, in Figure 3, which agrees with the simulated masses. -2.5. GW Inspiral +2.5. Uncertainties in Accretion +We note that the ∆M calculated in this proof-ofconcept study assumes that the BH accretes all of the +material that it captures. Estimating the true fraction +of the material accreted by the BH is very challenging; this complex problem requires numerically solving +the generalized GR fluid equations with cooling, heating, and radiative transfer, etc. and remains an active +field of research (e.g., Blandford & Begelman 1999; Park +& Ostriker 2001; Narayan et al. 2003; Igumenshchev + +et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang +et al. 2014; McKinney et al. 2014; Narayan et al. 2022). +Heuristically, if a collision between a BH and a star results in an accretion disk, the disk’s viscous timescale +may be as low as days. The resultant luminosity can +unbind most of the captured material, though details +such as the amount accreted and peak luminosity remain uncertain (e.g., Yuan et al. (2012); Jiang et al. +(2014), see also the discussion in Stone et al. (2017), +Rizzuto et al. (2022), and Kremer et al. (2022)). The +question becomes whether or not a BH can still accumulate significant amounts of mass over many collisions +even if it accretes very little in a single one. We explore the viability of our channel using a physically motivated inefficient accretion model. Several studies have +invoked momentum-driven winds in BH accretion (e.g., +Murray et al. 2005; Ostriker et al. 2010; Brennan et al. +2018). We thus estimate the fraction of captured mass +accreted to be approximately vesc /(cη), where vesc is +the escape velocity from the BH at 1 R and η is the +accretion efficiency at the ISCO. We take η to be 0.1 +(e.g., Yu & Tremaine 2002). This expression for the +fraction accreted is consistent with Kremer et al. (2022) +equation 19 for s = 0.5, which is a reasonable value for +s, a free parameter between 0.2 and 0.8. We discuss +the results of the momentum-driven winds estimate in +Section 3. We note that the accretion process may be +more efficient than this estimate implies if, for example, +jets or other instabilities result in the beaming of radiation away from the captured material (e.g., Blandford +& Znajek 1977; Begelman 1979; De Villiers et al. 2005; +McKinney & Gammie 2004; McKinney 2006; Igumenshchev 2008; Begelman 2012a,b; McKinney et al. 2014). +2.6. GW Inspiral When a BH is close to the SMBH, GW emission can circularize and shrink its orbit. We implement the effects of GW emission on the BH’s semimajor axis and eccentricity following Peters & Mathews (1963a). The characteristic timescale to merge a BH with an SMBH is given by: +tGW ≈ 2.9 × 10  -−1  -−1 -M• -mBH +× + 12 -tGW ≈ 2.9 × 10 yr -106 M -106 M +  + +M• +yr +106 M −1  -4 + M• + mBH -a• -× 2 × 106 M -10−4 pc + × f (e• )(1 − e2• )7/2 , +−1  + +a• +−2 +10 pc + +mBH +106 M +4 + +−1 + (9) where f (e• ) is a function of e• . For all values of e• , f (e• ) is between 0.979 and 1.81 (Blaes et al. 2002). We plot this timescale for a 1 × 105 M BH in Figure 1 in blue. + + 6 + +Rose et al. + +Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to +cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass +of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and +merger times of these BHs. + In our simulations, we assume a BH has merged with the SMBH when the condition tGW < telapsed is met. When this condition is satisfied, we terminate mass growth through collisions for that BH.4 -2.6. IMBH growth +2.7. IMBH growth As detailed above, BH-stellar collisions can increase the BH masses as a function of time. Here, we examine the sensitivity of the BH growth to the density power @@ -420,25 +493,24 @@ law. From Eq. (1), it is clear that the growth rate depends on the stellar densi profiles, will result in more efficient mass growth. In Figure 1, larger values of α lead to collision timescales in the GN’s inner region, inwards of 0.25 pc, that are -4 - -For comparison, we also incrementally changed the semimajor -axis and eccentricity from GW emission following the equations -in Peters & Mathews (1963b). This method leads to a slight -increase in the final IMBH masses because it accounts for the -collisions that take place while the orbit is gradually shrinking. - much smaller that the 10 Gyr simulation time. Figure 3 confirms this expectation. It depicts the mass growth of a uniform distribution of BHs with initial conditions detailed in Section 2.1 for five α values, spanning 1 (green) to 2 (purple). The most massive IMBHs form inwards of 0.25 pc for the α = 2 case. -2.7. Gravitational Wave Mergers and Intermediate +2.8. Gravitational Wave Mergers and Intermediate and Extreme Mass Ratio Inspiral Candidates Towards the SMBH, efficient collisions can create BHs massive enough to merge with the SMBH through GWs. -Following the method detailed in Section 2.5, when a +Following the method detailed in Section 2.6, when a given BH meets the criterion tGW < telapsed , we mark +4 For comparison, we also incrementally changed the semimajor + +axis and eccentricity from GW emission following the equations +in Peters & Mathews (1963b). This method leads to a slight +increase in the final IMBH masses because it accounts for the +collisions that take place while the orbit is gradually shrinking. + it as merged with the SMBH. We assume that at this point the dynamics of the BH will be determined by GW emission, shrinking and circularizing the BHs orbit until it undergoes an extreme or intermediate mass ratio @@ -447,7 +519,7 @@ plot in Figure 3 shows the BH masses versus time of merger. It is interesting to note that even in the absence of relaxation processes, which are often invoked to explain the formation of EMRIs, EMRIs and notably IMRIs can form in this region. -2.8. Two Body Relaxation Processes +2.9. Two Body Relaxation Processes A BH orbiting the SMBH experiences weak gravitational interactions with other objects in the GN. Over a relaxation time, these interactions alter its orbit about the SMBH. The two-body relaxation timescale for a @@ -455,9 +527,10 @@ single-mass system is: trelax = 0.34 σ3 -, G2 ρhM∗ i ln Λrlx +, + (10) where ln Λrlx is the Coulomb logarithm and hM∗ i is the @@ -469,24 +542,24 @@ its orbital energy and angular momentum by order of themselves. The BH experiences diffusion in its angular momentum and energy as a function of time (depending on the eccentricity of the orbit, this process can be more -efficient Fragione & Sari 2018; Sari & Fragione 2019). In -Figure 1, we plot the relaxation timescale in gold for a -range of α. We note that the Bahcall & Wolf (1976) profile, α = 7/4, corresponds to zero net flux and therefore -does not preferentially migrate objects inward. -Additionally, because they are more massive on -average than the surrounding objects, BHs are expected to segregate inwards in the GN (e.g., Shapiro -& Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; -Miralda-Escudé & Gould 2000; Baumgardt et al. 2004). - - 6 - -Rose et al. - -Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to -cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass -of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and -merger times of these BHs. +efficient Fragione & Sari 2018; Sari & Fragione 2019). +Relaxation can cause the orbit of an object in a GN to +reach high eccentricities. If the object is a BH, it can +spiral into the SMBH and form an EMRI, while a star + IMBH Formation in Galactic Nuclei +can be tidally disrupted by the SMBH (e.g. Magorrian +& Tremaine 1999; Wang & Merritt 2004; Hopman & +Alexander 2005; Aharon & Perets 2016; Stone & Metzger 2016; Amaro-Seoane 2018; Sari & Fragione 2019; +Naoz et al. 2022). The relaxation process is therefore +crucial to our study. In Figure 1, we plot the relaxation +timescale in gold for a range of α. We note that the Bahcall & Wolf (1976) profile, α = 7/4, corresponds to zero +net flux and therefore does not preferentially migrate +objects inward. +Additionally, because BHs are more massive on average than the surrounding objects, they are expected +to segregate inwards in the GN (e.g., Shapiro & +Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; +Miralda-Escudé & Gould 2000; Baumgardt et al. 2004). They sink toward the SMBH on the mass segregation timescale, tseg ≈ hM∗ i/mBH × trelax (e.g., Spitzer 1987; Fregeau et al. 2002; Merritt 2006), which is typically an @@ -501,8 +574,8 @@ of zero andpa standard deviation of ∆vrlx / 3, where ∆vrlx = v• P• /trlx (see Bradnick et al. 2017, for an approach to changes in the angular momentum). The new orbital parameters can be calculated following Lu -& Naoz (2019), and see Naoz et al. in prep for full set -of equations. +& Naoz (2019), and see Naoz et al. (2022) for the full +set of equations. We account for the effects of relaxation processes, including mass-segregation, using a multi-faceted approach. We begin by migrating each BH towards the center over its mass-segregation timescale, shifting it incrementally inward such that its orbital energy changes @@ -514,11 +587,13 @@ scattering for both black holes and stars. Within this radius, BH self-interacti BHs will then settle onto a Bahcall-Wolf profile, while the stars may follow a shallower profile, with approximately n? ∝ r−1.5 , inwards of the transition radius (Linial & Sari in prep.). - Therefore, after the initial mass segregation, we allow the BHs to begin diffusing over a relaxation timescale, their orbital parameters changing slowly through a random process. In this random process, some of the BHs may migrate closer to the SMBH. We terminate mass + +7 + growth when the BH enters the inner 200 au of the GN, within which the density of stars is uncertain. This cutoff is based on the 120 au pericenter of S0-2, the closest known star to the SMBH (e.g., Ghez et al. 2005). @@ -535,27 +610,12 @@ between the BHs. As mentioned above, as the BHs sink towards the SMBH, their concentration in the inner region of the GN increases, allowing them to dominate the scattering. We reserve the inclusion of these interactions for future study. -2.9. Effect of Relaxation Processes +2.10. Effect of Relaxation Processes As depicted in Figure 4, two-body relaxation processes result in more EMRIs and IMRIs events. These processes allow BHs that begin further from the SMBH to migrate inwards and grow more efficiently in mass. However, it also impedes the growth of BHs that are -initially closer to the SMBH by allowing them to dif- - - IMBH Formation in Galactic Nuclei - -7 - -Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance (red) -for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. We -assume α = 1.75 for the GN density profile. Faded stars represent BHs that merged with the SMBH. As a result of inward -migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, more -BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses for two -different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation processes. -The dashed, faded lines represent the corresponding initial histograms. We assume α = 1.75 for the GN density profile. Faded -stars represent BHs that merged with the SMBH. - -fuse out of the inner region where collisions are efficient. +initially closer to the SMBH by allowing them to diffuse out of the inner region where collisions are efficient. As can be seen in Figure 4, the net result is that more BHs grow, but the maximum mass is lower compared to the scenario that ignores two-body relaxation. The @@ -573,15 +633,29 @@ We explore the feasibility of forming IMBHs in a GN through successive collisions between a stellar-mass BH and main-sequence stars. Taking both a statistical and analytic approach, we show that this channel can produce IMBHs efficiently with masses as high as -103−4 M and may result in many IMBH-SMBH mergers (intermediate-mass ratio inspiral, IMRIs) and EMRIs. +103−4 M and may result in many IMBH-SMBH mergers (intermediate-mass ratio inspirals, or IMRIs) and +EMRIs. + + 8 + +Rose et al. + +Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance +(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. +We assume α = 1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward +migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, +more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses +for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation +processes. We also show the results for a simulation with α = 1.75 that accounts for momentum-driven winds (black, dotted). +Despite the substantially reduced accretion, BHs in the mass gap still form. + As the stellar mass BH collides with a star, the BH will grow in mass. The increase may equal star’s entire mass if the relative velocity is smaller than the escape velocity from the BH at 1 R . However, near the SMBH, the velocity dispersion may be larger than the escape velocity from the BH at the star’s radius. In this -limit, the BH accretes a “tunnel” of material through +limit, the BH captures a “tunnel” of material through the star, estimated using Bondi-Hoyle-Lyttleton accretion. In our statistical analysis, we account for BondiHoyle-Lyttleton accretion and find that BHs outside of - -10−2 pc from the SMBH can accrete the entire star (see +10−2 pc from the SMBH can capture the entire star (see Figure 2). The efficiency of collisions, and therefore IMBH, EMRI, and IMRI formation as well, are sensitive to @@ -594,6 +668,24 @@ profile by allowing BHs to diffuse into regions of more or less efficient growth. As a result, more BHs grow in mass, but their maximum mass is smaller (∼ 104 M ). Additionally, the final masses have no apparent dependence on distance from the SMBH (see Figure 4). +Most simulations in our study assume that the BHs +accrete all of the mass that they capture. The final BH +masses can be taken as an upper limit. We note that +the accretion is a highly uncertain process and represents an active field of study (e.g., Blandford & Begelman 1999; Park & Ostriker 2001; Narayan et al. 2003; +Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan +et al. 2012; Jiang et al. 2014; McKinney et al. 2014; +Narayan et al. 2022). To assess the limits of our model, + +we also consider a physically motivated accretion model, +momentum-driven winds (Section 2.5). We present the +final mass distribution for momentum-driven winds in +Figure 4. Importantly, we find that BHs within the +mass gap still form naturally despite the substantially +reduced accretion. About 5% of the BHs grow by 10 +to 100 M . Furthermore, if we increase this ∆M estimate by a factor of 2 (i.e., use η = 0.05), the simulation produces a 3.5 × 103 M IMBH for the same initial +conditions. Our proof-of-concept demonstrates that collisions between BH and stars are an important process +that should be taken into account in dense places such +as a GN. Mass growth through BH-main-sequence star collisions may act in concert with other IMBH formation channels, such as compact object binary mergers (e.g., Hoang et al. 2018; Stephan et al. 2019; Fragione et al. @@ -607,18 +699,78 @@ as highlighted in previous studies, a substantial fraction of these binaries may Kozai Lidov mechanism, leaving behind a single star or a single compact object (e.g., Stephan et al. 2016, 2019; Hoang et al. 2018). Additionally, to be susceptible to -evaporation, BH binaries must have a wider configuration. Otherwise, they will be more tightly bound that +evaporation, BH binaries must have a wider configuration. Otherwise, they will be more tightly bound than +the average kinetic energy of the surrounding objects +and will only harden through weak gravitational inter- - 8 + IMBH Formation in Galactic Nuclei +actions with neighboring stars (see for example Figure +6 in Rose et al. 2020). +We note that we assume a steady-state and treat the +stars as a reservoir in this model. Future work will take a +more nuanced approach to the background stars, whose +density as a function of time can be influenced by several +factors. Firstly, the relaxation of the stellar population +occurs on Gyr timescales. Some studies have suggested +that in situ star formation can occur in the Galactic +Center as close as 0.04 pc from the SMBH (e.g., Levin +& Beloborodov 2003; Paumard et al. 2006), and star +formation episodes can occur as often as every ∼ 5 Myr +(e.g. Lu et al. 2009). Therefore, we expect that after +the first Gyr, stars within . 0.01 pc will be replenished +at intervals consistent with the star formation episodes; +the infalling populations of stars are separated by ∼ +5 −10 Myr, which is shorter than the collision timescale. +However, star-star collisions may complicate this picture within ∼ 0.01 pc. As discussed above, regular star +formation ensures the BHs always have a stellar population to interact with outside of ∼ 0.01 pc.5 At 0.01 pc, +however, the kinetic energy during a collision between +two 1 M stars is larger than their binding energies. +Collisions can therefore thin out the stellar populations +during the time it takes them to diffuse to these small +radii, . 0.01 pc, and may reduce the BH growth in the +innermost region. We reserve the inclusion of star-star +collisions for future work. We also note that the disruption of binary stars by the SMBH may help replenish +the stellar population even as collisions work to deplete +it (e.g., Balberg et al. 2013); when a binary is disrupted, +one of the stars is captured on a tightly bound orbit +about the SMBH. +An IMBH may also affect the stellar density profile. +As it spirals into the SMBH, it can perturb stellar orbits, +and these interactions can lead to hypervelocity stars +(e.g., Baumgardt et al. 2006a; Löckmann & Baumgardt +2008). Löckmann & Baumgardt (2008) show that an +IMBH can modify an initially steep stellar density profile to become consistent with the flatter cusp observed +in the Galactic Center. The stars may then be replenished on 100 Myr timescales (Baumgardt et al. 2006a). +Therefore, after the formation of the first few IMBHs, +subsequent BH growth may occur in bursts, coinciding +with replenishment of the stars. +While there are many competing dynamical processes +that shape the stellar density profile, we stress that α +5 In fact, the star-star collision timescale is greater than 10 Myr + +for the entire parameter space, save at 0.001 pc for larger values +of α; the BH-star collision timescale plotted in Fig. 1 is the same +order of magnitude as the star-star collision timescale. -Rose et al. +9 -the average kinetic energy of the surrounding objects, -and will only harden through weak gravitational interactions with neighboring stars (see for example Figure -6 in Rose et al. 2020). -Not included in this study, collisions between the BH -and other compact objects will increase the BH growth -rate. BH-BH mergers (e.g., O’Leary et al. 2009; Fragione et al. 2021) and even neutron star BH mergers +can simply be chosen to encapsulate all of the relevant +physics. A value for α that is constrained by observations must already reflect ongoing processes like starstar collisions and replenishment. Schödel et al. (2018) +find the observed stellar mass enclosed within 0.01 pc of +the Milky Way’s Galactic Center to be approximately +180 M . This estimate is consistent to order of magnitude with our α = 1.25 case. In a simulation like those +depicted in Figure 4, which include relaxation, α = 1.25 +leads to a maximum IMBH mass of 140 M . Furthermore, while the stellar mass within 0.01 pc may be a +few hundred M , Do et al. (2019) and GRAVITY Collaboration et al. (2020) set an upper limit on the mass +enclosed within the orbit of S0-2 to be about a few thousand M , or 0.1% of the central mass. This upper limit +can include mass that was previously in stars but is now +in BHs. In that case, the 180 M is what remains of the +stars, while BHs and IMBHs make up the ∼ 1000 M +in the innermost region. +Also not included in this study, collisions between the +BH and other compact objects will increase the BH +growth rate. BH-BH mergers (e.g., O’Leary et al. 2009; +Fragione et al. 2021) and even neutron star BH mergers (e.g., Hoang et al. 2020) become more likely as the BHs increase in mass through stellar collisions. As a result, the BH-BH collision timescale, discussed in Section 2.2, @@ -626,38 +778,48 @@ will become relevant to our simulations, allowing the BHs to grow through this channel in addition to stellar collisions. Additionally, this compact object mergers result in GW recoil, which may have a large impact on the dynamics (e.g., Baibhav et al. 2020; Fragione et al. -2021) +2021). The BH’s mass growth increases GW emission, which -dissipates energy from the orbit. Along with relaxation -processes, GW emission causes BHs to sink towards the -SMBH and eventually undergo a merger. As a result, -the GN environment is conducive to the formation of -EMRIs and IMRIs. The GW emission from EMRIs and -IMRIs is expected to be at mHz frequencies, making -them promising candidates for LISA to observe. While -the exact rate calculation is beyond the scope of this -study, the mechanism outlined here seems very promising. - -Our results also suggest that IMBHs are likely to exists in many galactic nuclei, as well as within our own -galactic center. This implication seems to be consistent with recent observational and theoretical studies -(e.g., Hansen & Milosavljević 2003; Maillard et al. 2004; -Gürkan & Rasio 2005; Gualandris & Merritt 2009; Chen -& Liu 2013; Generozov & Madigan 2020; Fragione et al. -2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY -Collaboration et al. 2020). +dissipates energy from the orbit. Along with relaxation, +GW emission causes BHs to sink towards the SMBH +and eventually undergo a merger. As a result, the GN +environment is conducive to the formation of EMRIs +and IMRIs. The GW emission from EMRIs and IMRIs is expected to be at mHz frequencies, making them +promising candidates for LISA to observe. While the +exact rate calculation is beyond the scope of this study, +the mechanism outlined here seems very promising. +Our results also suggest that BHs within the mass gap +as well as IMBHs likely exist in many galactic nuclei, as +well as within our own galactic center. This implication +seems to be consistent with recent observational and +theoretical studies (e.g., Hansen & Milosavljević 2003; +Maillard et al. 2004; Gürkan & Rasio 2005; Gualandris +& Merritt 2009; Chen & Liu 2013; Generozov & Madigan 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz +et al. 2020; GRAVITY Collaboration et al. 2020). + + 10 + +Rose et al. + Lastly, the collisions between stellar mass BHs and stars may contribute to the x-ray emission from our -galactic centre (e.g., Muno et al. 2005, 2009; Hailey et al. -2018; Zhu et al. 2018; Cheng et al. 2018)5 . These interactions, in particular grazing collisions, may also result -in tidal disruption events (e.g., Perets et al. 2016; Samsing et al. 2019; Kremer et al. 2021). Thus, the process -outlined here may produce electromagnetic signatures -in addition to GW mergers. -SR thanks the Charles E Young fellowship, the Nina +galactic centre (e.g., Muno et al. 2005, 2009; Hailey +et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kremer et al. (2022) for a discussion of electromagnetic signatures from BH-star collisions)6 . These interactions, +in particular grazing collisions, may also result in tidal +disruption events (e.g., Baumgardt et al. 2006b; Perets +et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kremer et al. 2021). Thus, the process outlined here may +produce electromagnetic signatures in addition to GW +mergers. +We thank the anonymous referee for useful comments. +We also thank Jessica Lu, Fred Rasio, Kyle Kremer, +Ryosuke Hirai, Ilya Mandel, and Erez Michaely for useful discussion. +SR thanks the Charles E. Young Fellowship, the Nina Byers Fellowship, and the Michael A. Jura Memorial Graduate Award for support. SR and SN acknowledge the partial support from NASA ATP 80NSSC20K0505. SN thanks Howard and Astrid Preston for their generous support. IL thanks support from the Adams Fellowship. SN and RS thank the Bhaumik Institute visitor -program. +program. This work was performed in part at the Aspen Center for Physics, which is supported by National +Science Foundation grant PHY-1607611. REFERENCES Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016, @@ -666,60 +828,72 @@ doi: 10.1103/PhysRevLett.116.241102 —. 2017a, PhRvL, 118, 221101, doi: 10.1103/PhysRevLett.118.221101 —. 2017b, PhRvL, 119, 141101, +doi: 10.1103/PhysRevLett.119.141101 +Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1, +doi: 10.3847/2041-8205/830/1/L1 +Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129 +Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, +doi: 10.1088/0004-637X/780/2/148 +Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4, +doi: 10.1007/s41114-018-0013-8 +Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. +2021, arXiv e-prints, arXiv:2109.12119. +https://arxiv.org/abs/2109.12119 +Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214, +doi: 10.1086/154711 Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102, 043002, doi: 10.1103/PhysRevD.102.043002 +Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26, +doi: 10.1093/mnrasl/slt071 +Baumgardt, H., Gualandris, A., & Portegies Zwart, S. +2006a, MNRAS, 372, 174, +doi: 10.1111/j.1365-2966.2006.10818.x +Baumgardt, H., Hopman, C., Portegies Zwart, S., & +Makino, J. 2006b, MNRAS, 372, 467, +doi: 10.1111/j.1365-2966.2006.10885.x Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ, 613, 1143, doi: 10.1086/423299 -Begelman, M. C., Volonteri, M., & Rees, M. J. 2006, -MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x -Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129 +6 The connection between the observed X-ray sources at the Galac- -Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ, -890, 113, doi: 10.3847/1538-4357/ab6d77 +tic Center and tidal capture has been suggested by Generozov +et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for +alternative channels. -Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, +Begelman, M. C. 1979, MNRAS, 187, 237, +doi: 10.1093/mnras/187.2.237 +—. 2012a, ApJL, 749, L3, doi: 10.1088/2041-8205/749/1/L3 + IMBH Formation in Galactic Nuclei +—. 2012b, MNRAS, 420, 2912, +doi: 10.1111/j.1365-2966.2011.20071.x +Begelman, M. C., Volonteri, M., & Rees, M. J. 2006, +MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x +Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ, +890, 113, doi: 10.3847/1538-4357/ab6d77 —. 2020b, ApJ, 890, 113, doi: 10.3847/1538-4357/ab6d77 - -doi: 10.1103/PhysRevLett.119.141101 - -2021, arXiv e-prints, arXiv:2109.12119. - Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R. 2009, New Journal of Physics, 11, 105016, doi: 10.1088/1367-2630/11/10/105016 - -https://arxiv.org/abs/2109.12119 - Binney, J., & Tremaine, S. 1987, Galactic dynamics - -doi: 10.1088/0004-637X/780/2/148 -Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. - -Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214, -doi: 10.1086/154711 - -5 - -The connection between the observed X-ray sources at the Galactic Center and tidal capture has been suggested by Generozov -et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for -alternative channels. - —. 2008, Galactic Dynamics: Second Edition Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775, doi: 10.1086/342655 +Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303, +L1, doi: 10.1046/j.1365-8711.1999.02358.x +Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433, +doi: 10.1093/mnras/179.3.433 Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642, 427, doi: 10.1086/500727 Bondi, H. 1952, MNRAS, 112, 195, doi: 10.1093/mnras/112.2.195 - - IMBH Formation in Galactic Nuclei Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273, doi: 10.1093/mnras/104.5.273 Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469, 2042, doi: 10.1093/mnras/stx1007 +Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ, +860, 14, doi: 10.3847/1538-4357/aac2c4 Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger, C. 2012, JCAP, 2012, 054, doi: 10.1088/1475-7516/2012/07/054 @@ -737,16 +911,38 @@ et al. 1996, Science, 272, 1286, doi: 10.1126/science.272.5266.1286 Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087, doi: 10.1086/156685 +Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424, +doi: 10.1111/j.1365-2966.2005.09937.x +Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M. +2009, MNRAS, 393, 1016, +doi: 10.1111/j.1365-2966.2008.14254.x + +11 + Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783 +Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T. +C. N. 2021a, MNRAS, 505, 2186, +doi: 10.1093/mnras/stab1428 +Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt, +T. C. N. 2021b, MNRAS, 503, 1051, +doi: 10.1093/mnras/stab402 +De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S. +2005, ApJ, 620, 878, doi: 10.1086/427142 Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019, MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453 Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021, MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390 +Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664, +doi: 10.1126/science.aav8137 +Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL, +562, L19, doi: 10.1086/338118 Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL, 110, 221101, doi: 10.1103/PhysRevLett.110.221101 Edgar, R. 2004, NewAR, 48, 843, doi: 10.1016/j.newar.2004.06.001 +Escala, A. 2021, ApJ, 908, 57, +doi: 10.3847/1538-4357/abd93c Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014, Monthly Notices of the Royal Astronomical Society, 443, 2410, doi: 10.1093/mnras/stu1280 @@ -766,9 +962,6 @@ Rasio, F. A. 2004, MNRAS, 352, 1, doi: 10.1111/j.1365-2966.2004.07914.x Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., & Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576 - -9 - Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ, 649, 91, doi: 10.1086/506193 Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137, @@ -776,6 +969,11 @@ doi: 10.3847/1538-4357/ab94bc Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker, J. P. 2018, MNRAS, 478, 4030, doi: 10.1093/mnras/sty1262 + + 12 + +Rose et al. + Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of Modern Physics, 82, 3121, doi: 10.1103/RevModPhys.82.3121 @@ -807,27 +1005,42 @@ Dosopoulou, F. 2018, ApJ, 856, 140, doi: 10.3847/1538-4357/aaafce Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8, doi: 10.3847/1538-4357/abb66a +Hopman, C., & Alexander, T. 2005, ApJ, 629, 362, +doi: 10.1086/431475 +Igumenshchev, I. V. 2008, ApJ, 677, 317, +doi: 10.1086/529025 +Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A. +2003, ApJ, 592, 1042, doi: 10.1086/375769 +Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796, +106, doi: 10.1088/0004-637X/796/2/106 Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the Royal Astronomical Society, 374, 1557, doi: 10.1111/j.1365-2966.2006.11275.x +Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., & +Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368. +https://arxiv.org/abs/2201.12368 Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104, doi: 10.3847/1538-4357/abeb14 Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, 45, doi: 10.3847/1538-4357/abb945 + +Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020, +MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276 +Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33, +doi: 10.1086/376675 Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 —. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 +Löckmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323, +doi: 10.1111/j.1365-2966.2007.12699.x Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506, doi: 10.1093/mnras/stz036 - - 10 - -Rose et al. - Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ, 690, 1463, doi: 10.1088/0004-637X/690/2/1463 Madau, P., & Rees, M. J. 2001, ApJL, 551, L27, doi: 10.1086/319848 +Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447, +doi: 10.1046/j.1365-8711.1999.02853.x Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F. 2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147 Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda, @@ -835,6 +1048,15 @@ M., & Artale, M. C. 2021a, arXiv e-prints, arXiv:2109.06222. https://arxiv.org/abs/2109.06222 Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b, MNRAS, 505, 339, doi: 10.1093/mnras/stab1334 +Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B. +2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409 +McKinney, J. C. 2006, MNRAS, 368, 1561, +doi: 10.1111/j.1365-2966.2006.10256.x +McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977, +doi: 10.1086/422244 +McKinney, J. C., Tchekhovskoy, A., Sadowski, A., & +Narayan, R. 2014, MNRAS, 441, 3177, +doi: 10.1093/mnras/stu762 Merritt, D. 2006, Reports on Progress in Physics, 69, 2513, doi: 10.1088/0034-4885/69/9/R01 Miralda-Escudé, J., & Gould, A. 2000, ApJ, 545, 847, @@ -844,17 +1066,37 @@ Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL, 622, L113, doi: 10.1086/429721 Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110 +Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ, +618, 569, doi: 10.1086/426067 +Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927, +L18, doi: 10.3847/2041-8213/ac574b Naoz, S., & Silk, J. 2014, ApJ, 795, 102, doi: 10.1088/0004-637X/795/2/102 Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885, L35, doi: 10.3847/2041-8213/ab4fed + + IMBH Formation in Galactic Nuclei Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL, 888, L8, doi: 10.3847/2041-8213/ab5e3b +Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., & +Curd, B. 2022, MNRAS, 511, 3795, +doi: 10.1093/mnras/stac285 +Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A. +2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69 +Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005, +ApJ, 628, 368, doi: 10.1086/430728 O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395, 2127, doi: 10.1111/j.1365-2966.2009.14653.x O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N., & O’Shaughnessy, R. 2006, ApJ, 637, 937, doi: 10.1086/498446 +Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga, +D. 2010, ApJ, 722, 642, +doi: 10.1088/0004-637X/722/1/642 +Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100, +doi: 10.1086/319042 +Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643, +1011, doi: 10.1086/503273 Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek, Stephen R., J. 2016, ApJ, 823, 113, doi: 10.3847/0004-637X/823/2/113 @@ -872,7 +1114,8 @@ Rashkov, V., & Madau, P. 2014, ApJ, 780, 187, doi: 10.1088/0004-637X/780/2/187 Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640, A56, doi: 10.1051/0004-6361/202037710 - +Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022, +MNRAS, doi: 10.1093/mnras/stac231 Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., & Rasio, F. A. 2018, PhRvL, 120, 151101, doi: 10.1103/PhysRevLett.120.151101 @@ -883,9 +1126,14 @@ Phys. Rev. D, 100, 043027, doi: 10.1103/PhysRevD.100.043027 Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904, 113, doi: 10.3847/1538-4357/abc557 + +13 + Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., & Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213. https://arxiv.org/abs/2009.01213 +Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017, +MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044 Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD, 100, 043009, doi: 10.1103/PhysRevD.100.043009 Sari, R., & Fragione, G. 2019, ApJ, 885, 24, @@ -895,6 +1143,8 @@ Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K. doi: 10.1086/339917 Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63, doi: 10.1086/519309 +Schödel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A, +609, A27, doi: 10.1051/0004-6361/201730452 Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603, doi: 10.1086/156521 Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985, @@ -910,6 +1160,10 @@ Spitzer, L. 1987, Dynamical evolution of globular clusters Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv e-prints. https://arxiv.org/abs/1603.02709 —. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d +Stone, N. C., Küpper, A. H. W., & Ostriker, J. P. 2017, +MNRAS, 467, 4180, doi: 10.1093/mnras/stx097 +Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859, +doi: 10.1093/mnras/stv2281 The LIGO Scientific Collaboration, the Virgo Collaboration, Abbott, R., et al. 2020a, arXiv e-prints, arXiv:2009.01075. https://arxiv.org/abs/2009.01075 @@ -924,15 +1178,20 @@ Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit, G. N. 2021, MNRAS, 504, 146, doi: 10.1093/mnras/stab842 - IMBH Formation in Galactic Nuclei + 14 Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., & Breivik, K. 2021, ApJ, 917, 76, doi: 10.3847/1538-4357/ac088d +Wang, J., & Merritt, D. 2004, ApJ, 600, 149, +doi: 10.1086/379767 Woosley, S. E. 2017, ApJ, 836, 244, doi: 10.3847/1538-4357/836/2/244 +Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965, +doi: 10.1046/j.1365-8711.2002.05532.x +Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129, +doi: 10.1088/0004-637X/761/2/129 -11 - +Rose et al. Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. 2014, Monthly Notices of the Royal Astronomical Society, 440, 1263, doi: 10.1093/mnras/stu351 diff --git a/read/results/pdftotext/2201.00037.txt b/read/results/pdftotext/2201.00037.txt index ddf92ba..3c351c1 100644 --- a/read/results/pdftotext/2201.00037.txt +++ b/read/results/pdftotext/2201.00037.txt @@ -2,28 +2,19 @@ Confidential manuscript submitted to JGR-Planets The influence of a fluid core and a solid inner core on the Cassini sate of Mercury -Mathieu Dumberry +Mathieu Dumberry 1 arXiv:2201.00037v1 [astro-ph.EP] 31 Dec 2021 -1 Department - -1 - -of Physics, University of Alberta, Edmonton, Alberta, Canada. +1 Department of Physics, University of Alberta, Edmonton, Alberta, Canada. Key Points: -• - -• - -• - The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid planet by no more than 0.01 arcmin. -For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid +• For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid cores into a common precession motion. -The larger the inner core is, the more the obliquity of the polar moment of inertia approaches that expected for a rigid planet. +• The larger the inner core is, the more the obliquity of the polar moment of inertia approaches that expected for a rigid planet. +• Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca @@ -281,7 +272,6 @@ Stark et al. [2015b] Baland et al. [2017] Table 1. -3 Baland et al. [2017] Baland et al. [2017] @@ -294,15 +284,12 @@ Perry et al. [2015] Reference parameters for Mercury. The mass M is computed from GM = 22031.8636 × 109 -2 - -m /s taken from Genova et al. [2019]. The mean density is calculated from - -4π -ρ̄R3 +ρ̄R3 = M . The numerical +m /s taken from Genova et al. [2019]. The mean density is calculated from 4π +3 3 -= M . The numerical +2 values of r and ξr are calculated from r = (ā − c)/R and ξr = (a − b)/R, where ā = 12 (a + b) and where a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semimajor, intermediate and semiminor @@ -350,17 +337,17 @@ ef = Cf − Āf Āf -B−A -Ā - -γs = - es = Cs − Ās , Ās +B−A +Ā + +γs = + B s − As . Ās @@ -626,7 +613,8 @@ Ā Ā Ā iΩo Ā -ω m̃ + (1 + ω + ef ) m̃f − ωα1 es + +(12a)  1  @@ -636,6 +624,10 @@ ñs = 2 Āf iΩo Āf +(12b) + +ω m̃ + (1 + ω + ef ) m̃f − ωα1 es + (ω − α3 es )m̃ + α1 es m̃f + (1 + ω) m̃s + (1 + ω − α2 ) es ñs = 1 @@ -647,10 +639,6 @@ iΩ2o Ās  Γ̃ssun + Γ̃icb , -(12a) - -(12b) - (12c) and a fourth equation consists of a kinematic relation that expresses the change in the orientation of the inner core figure as a result of its own rotation, @@ -732,18 +720,14 @@ and where G210 and G201 are functions of the orbital eccentricity ec , (1 − e2c )3/2 7 123 3 489 5 -= ec − +G201 = ec − e + e . 2 16 c 128 c - G210 = (16a) - -G201 - (16b) The gravitational torque by the Sun acting on the inner core alone, Γ̃ssun , is @@ -755,13 +739,11 @@ The gravitational torque by the Sun acting on the inner core alone, Γ̃ssun , i CMB and on the inner core at the ICB, respectively. These torques can be parameterized in terms of dimensionless complex coupling constants Kicb and Kcmb and the differential angular velocities at each boundary [e.g Buffett, 1992; Buffett et al., 2002], Γ̃icb = iΩ2o Ās Kicb (m̃f − m̃s ) , -Γ̃cmb = -iΩ2o Āf Kcmb +(18a) -m̃f . +Γ̃cmb = iΩ2o Āf Kcmb m̃f . -(18a) (18b) Specific expressions for Kicb and Kcmb are delayed to sections 4 and 5 when we consider the @@ -827,20 +809,21 @@ M = ω − α3 es Ā (1 + ω) Āf -1 + ω + ef + Kcmb + -α1 es − Kicb -0 -0 - Ās Kicb -Āf +1 + ω + ef + Kcmb + Ā +f (1 + ω) ĀĀs Ās − Ā Kicb f + +α1 es − Kicb +0 +0 + 1 + ω + Kicb 1 0 @@ -1301,16 +1284,13 @@ are given by  -ωf cn -ωf icn -   Ā e f φm e f + φm + Ω o , -≈ −Ωo +ωf cn ≈ −Ωo (ef + φm ) Ām + Ās  @@ -1318,7 +1298,7 @@ Ām + Ās  Ā + Ās es α1 − es α3 αg − α3 φs . -≈ Ωo +ωf icn ≈ Ωo Ā − Ās (38a) @@ -1586,6 +1566,8 @@ been derived [e.g. Stewartson and Roberts, 1963; Busse, 1968; Rochester , 1976] these solutions here. The parametrization of the viscous coupling constants Kcmb and Kicb based on them are given in Mathews and Guo [2005], +πρf rf4 + r  @@ -1597,15 +1579,12 @@ Āf 4r πρf rs ν  -= +Kicb = 0.195 − 1.976i , 2Ωo Ās Kcmb = -Kicb - -πρf rf4 (44a) (44b) diff --git a/read/results/pdftotext/2201.00069.txt b/read/results/pdftotext/2201.00069.txt index 773e5c6..8f003d7 100644 --- a/read/results/pdftotext/2201.00069.txt +++ b/read/results/pdftotext/2201.00069.txt @@ -126,9 +126,8 @@ angular scales of a few arcseconds, but resolved out to scales of 2021). Localisations of four one-off FRBs through imaging of -★ +★ james.chibueze@nwu.ac.za -james.chibueze@nwu.ac.za † manisha.caleb@manchester.ac.uk 1 https://www.wis-tns.org/ @@ -349,15 +348,16 @@ CT1-4 telescopes (Ashton et al. 2020) and the large 28 m-diameter https://github.com/e-merlin/eMERLIN_CASA_pipeline 1 +√︃ +𝜂𝑐 𝑛 + SEFD . -√︃ -𝜂 𝑐 𝑛 × 𝑁 (𝑁 − 1) × Δ𝜈 × 𝑡 -pol -int (1) +pol × 𝑁 (𝑁 − 1) × Δ𝜈 × 𝑡 int + The system equivalent flux density (SEFD) of MeerKAT at the 1.28 GHz is 443 Jy and 𝜂 𝑐 is the correlator efficiency. We used 𝑛pol = 2 polarisation products (XX and YY), N = 64 telescopes, Δ𝜈 = @@ -592,6 +592,8 @@ limits while the magenta region indicates the background region used. The green et al. (2019). Table 1. Details of the FRB fields observed with MeerKAT. +Field name + Observation date Synthesized beam @@ -644,13 +646,8 @@ FRB 20190714A 54.4 𝜇Jy beam−1 52.0 𝜇Jy beam−1 -Field name - Table 2. Details of the radio continuum source associated with FRB 20190714A. Field name -FRB 20190714A -FRB 20190714A -FRB 20190714A Observation date @@ -668,6 +665,10 @@ Pos. angle Int. flux density +FRB 20190714A +FRB 20190714A +FRB 20190714A + 28 September 2019 18 October 2019 13 January 2021 @@ -978,9 +979,8 @@ Zhang B., 2018, ApJ, 854, L21 de Naurois M., Rolland L., 2009, Astroparticle Physics, 32, 231 APPENDIX A: AUTHOR AFFILIATIONS -1 Centre +1 Centre for Space Research, North-West University, Potchefstroom -for Space Research, North-West University, Potchefstroom 2531, South Africa 2 Department of Physics and Astronomy, Faculty of Physical Sciences, University of Nigeria, Carver Building, 1 University Road, Nsukka 410001, Nigeria @@ -1027,9 +1027,8 @@ Avenue, Braamfontein, Johannesburg, 2050 South Africa MNRAS 000, 1–15 (2021) MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs -25 Sorbonne +25 Sorbonne Université, Université Paris Diderot, Sorbonne Paris -Université, Université Paris Diderot, Sorbonne Paris Cité, CNRS/IN2P3, Laboratoire de Physique Nucléaire et de Hautes Energies, LPNHE, 4 Place Jussieu, F-75252 Paris, France diff --git a/read/results/pdftotext/2201.00151.txt b/read/results/pdftotext/2201.00151.txt index 23b6b9c..bd76df4 100644 --- a/read/results/pdftotext/2201.00151.txt +++ b/read/results/pdftotext/2201.00151.txt @@ -272,6 +272,10 @@ intermediate 80 +log(Σ) [M⊙/kpc2] + +major + 6.5 0 @@ -280,7 +284,6 @@ intermediate -40 5.3 -160 40 @@ -298,24 +301,14 @@ intermediate 0 -V [km/s] +-40 -80 -[kpc] - -160 - -V [km/s] +-40 -80 -[kpc] - -log(Σ) [M⊙/kpc2] - -major - -80 -160 @@ -352,7 +345,6 @@ major -80 --80 -80 -40 @@ -387,19 +379,29 @@ major σ [km/s] --80 +[kpc] + +σ [km/s] [kpc] --40 +-80 -σ [km/s] +V [km/s] + +160 + +[kpc] -80 +V [km/s] + +160 + [kpc] --40 +-80 -40 @@ -890,26 +892,25 @@ aim was to recover the profiles of the total mass and the velocity anisotropy. where -ρ0 = +3.1. Overview of the method I0 πRc [1 + (Rt /Rc )2 ]3/2 -3.1. Overview of the method - (3) +s + +(4) + +ρ0 = and z= -s - r2 + R2c . R2c + R2t -(4) - We follow the approach introduced in Kowalczyk et al. (2018), namely we model the total mass profile with the mass-to-light ratio Υ varying with radius: diff --git a/read/results/pdftotext/2201.00178.txt b/read/results/pdftotext/2201.00178.txt index 794e2ab..c4a1ed3 100644 --- a/read/results/pdftotext/2201.00178.txt +++ b/read/results/pdftotext/2201.00178.txt @@ -10,11 +10,8 @@ Prasad Mani 1, 2 -1 Department -2 Center - -of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India -for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE +1 Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India +2 Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE arXiv:2201.00178v1 [astro-ph.SR] 1 Jan 2022 @@ -149,14 +146,11 @@ and toroidal flow sensitivity kernels respectively, that allow us to relate the and are derived from the solar model see Appendix A. They possess the symmetry relation: Cqj,k = C−qj,−k and Dqj,k = D−qj,−k (see eq A6). The kernels, as flows, are expressed on the basis fj (z). 1.2. Least-squares of cross-correlation -φω∗ -k - -φω -k+q +ω +Even though φω∗ +k φk+q isolates the effect of flow perturbations at individual wavenumbers q, a more compact mea- -Even though -isolates the effect of flow perturbations at individual wavenumbers q, a more compact measurement, known in mode-coupling literature as ’B-coefficients’, is much better designed for inversion as it reduces the +surement, known in mode-coupling literature as ’B-coefficients’, is much better designed for inversion as it reduces the ω dimension of the problem. A least-squares fit to the cross-correlation φω∗ k φk+q (see Woodard 2006, 2014, 2016) results @@ -174,14 +168,10 @@ Bk,q = ω ω∗ -Hkk -0 nn0 +Multiplying eq 4 on both sides by Hkk +0 nn0 and substituting by eq 5 on the left-hand-side results in a concisely defined -Multiplying eq 4 on both sides by forward problem (compare with eq 4) - -and substituting by eq 5 on the left-hand-side results in a concisely defined - Bk,q = X @@ -915,10 +905,8 @@ Nk = Q k -where the - 1 -Q +where the Q Q P @@ -1050,12 +1038,8 @@ Figure 8. Left: Kernel Kk,q (z) (eq B14) shown vs depth z for the three radial o (eq B17) using SOLA, for qR = [−112, −45] at depth z0 = −0.48 Mm, and the corresponding target Gaussian (eq B15). Integral of the averaging kernel over z is 0.89. -Setting - -∂X -∂α - -→ 0 gives us the matrix problem to be solved +Setting ∂X +∂α → 0 gives us the matrix problem to be solved A{α} = v, h i−1 diff --git a/read/results/pdftotext/2201.00200.txt b/read/results/pdftotext/2201.00200.txt index 9c1ccdf..252161b 100644 --- a/read/results/pdftotext/2201.00200.txt +++ b/read/results/pdftotext/2201.00200.txt @@ -7,14 +7,14 @@ Local heating due to convective overshooting and the solar modelling problem I. Baraffe1,2 , T. Constantino1 , J. Clarke1 , A. Le Saux1,2 , T. Goffrey4 , T. Guillet1 , J. Pratt3 , D. G. Vlaykov1 1 -2 -3 -4 University of Exeter, Physics and Astronomy, EX4 4QL Exeter, UK (e-mail: i.baraffe@ex.ac.uk) École Normale Supérieure, Lyon, CRAL (UMR CNRS 5574), Université de Lyon, France +3 Department of Physics and Astronomy, Georgia State University, Atlanta GA 30303, USA +4 Centre for Fusion, Space and Astrophysics, Department of Physics, University of Warwick, Coventry, CV4 7AL, UK +2 arXiv:2201.00200v1 [astro-ph.SR] 1 Jan 2022 diff --git a/read/results/pdftotext/2201.00214.txt b/read/results/pdftotext/2201.00214.txt index 798aa9e..a60c98d 100644 --- a/read/results/pdftotext/2201.00214.txt +++ b/read/results/pdftotext/2201.00214.txt @@ -175,9 +175,7 @@ to each point of the loop’s direction. Then by using these data, we straighten considered box with the thickness of 15 to 40 pixels (macro-pixels, depending on the available empty area around each loop and the distance to the neighbor loop). The area around the loop is needed for calculations of background subtraction. The selected loop segment is cut in -1 Based - -on data on these WebSites: https://solarflare.njit.edu/webapp.html, and https://www.swpc.noaa.gov/ +1 Based on data on these WebSites: https://solarflare.njit.edu/webapp.html, and https://www.swpc.noaa.gov/ all wavelengths and at the same considered box from the images set. These loop images are necessary entrances for our thermal analysis process. Then the loop is divided into different diff --git a/read/results/pdftotext/GeoTopo-book.txt b/read/results/pdftotext/GeoTopo-book.txt index 6dfe023..94f64fb 100644 --- a/read/results/pdftotext/GeoTopo-book.txt +++ b/read/results/pdftotext/GeoTopo-book.txt @@ -60,205 +60,13 @@ Zahlentheorie“ gehört zu haben. Inhaltsverzeichnis 1 Topologische Grundbegriffe -1.1 Topologische Räume . . -1.2 Metrische Räume . . . . -1.3 Stetigkeit . . . . . . . . -1.4 Zusammenhang . . . . . -1.5 Kompaktheit . . . . . . -1.6 Wege und Knoten . . . . -Übungsaufgaben . . . . . . . - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. +1.1 Topologische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +1.2 Metrische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +1.3 Stetigkeit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +1.4 Zusammenhang . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +1.5 Kompaktheit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +1.6 Wege und Knoten . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2 2 @@ -270,125 +78,10 @@ Zahlentheorie“ gehört zu haben. 22 2 Mannigfaltigkeiten und Simplizialkomplexe -2.1 Topologische Mannigfaltigkeiten . . . . . -2.2 Differenzierbare Mannigfaltigkeiten . . . -2.3 Simplizialkomplex . . . . . . . . . . . . -Übungsaufgaben . . . . . . . . . . . . . . . . - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. +2.1 Topologische Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . . . +2.2 Differenzierbare Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . +2.3 Simplizialkomplex . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 24 24 @@ -396,410 +89,37 @@ Zahlentheorie“ gehört zu haben. 34 43 -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -. -. -. -. -. -. -. - -3 Fundamentalgruppe und Überlagerungen -3.1 Homotopie von Wegen . . . . . . . . . -3.2 Fundamentalgruppe . . . . . . . . . . -3.3 Überlagerungen . . . . . . . . . . . . . -3.4 Gruppenoperationen . . . . . . . . . . - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -44 -44 -47 -51 -61 - -4 Euklidische und nichteuklidische Geometrie -4.1 Axiome für die euklidische Ebene . . . . . . . . -4.2 Weitere Eigenschaften einer euklidischen Ebene -4.2.1 Flächeninhalt . . . . . . . . . . . . . . . -4.3 Hyperbolische Geometrie . . . . . . . . . . . . . -Übungsaufgaben . . . . . . . . . . . . . . . . . . . . - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -. -. -. -. -. - -64 -64 -74 -74 -77 -86 - -5 Krümmung -5.1 Krümmung von Kurven . . . . . . -5.2 Tangentialebene . . . . . . . . . . . -5.3 Gauß-Krümmung . . . . . . . . . . -5.4 Erste und zweite Fundamentalform - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. +3 Fundamentalgruppe und Überlagerungen +3.1 Homotopie von Wegen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +3.2 Fundamentalgruppe . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +3.3 Überlagerungen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +3.4 Gruppenoperationen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . -. -. -. -. +44 +44 +47 +51 +61 -. -. -. -. +4 Euklidische und nichteuklidische Geometrie +4.1 Axiome für die euklidische Ebene . . . . . . . . . . . . . . . . . . . . . . . . . . . +4.2 Weitere Eigenschaften einer euklidischen Ebene . . . . . . . . . . . . . . . . . . . +4.2.1 Flächeninhalt . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +4.3 Hyperbolische Geometrie . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . -. -. -. -. +64 +64 +74 +74 +77 +86 -. -. -. -. +5 Krümmung +5.1 Krümmung von Kurven . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +5.2 Tangentialebene . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +5.3 Gauß-Krümmung . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +5.4 Erste und zweite Fundamentalform . . . . . . . . . . . . . . . . . . . . . . . . . . 87 87 @@ -809,61 +129,6 @@ Zahlentheorie“ gehört zu haben. Lösungen der Übungsaufgaben -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - -. -. -. -. - 99 Bildquellen @@ -1712,9 +977,7 @@ V xi Sm -i=1 Uxi - -⊇ K. +i=1 Uxi ⊇ K. 18 @@ -2050,9 +1313,6 @@ n bzw. 2n, da gilt: S Sei Ui := { (x0 : · · · : xn ) ∈ P n (R) | xi 6= 0 } ∀i ∈ 0, . . . , n. Dann ist P n (R) = ni=0 Ui und die Abbildung - -(y1 : · · · : yi−1 - Ui → Rn   @@ -2064,16 +1324,15 @@ x0 xi xi xi -: 1 : yi : · · · : yn ) →7 (y1 , . . . , yn ) - +(y1 : · · · : yi−1 : 1 : yi : · · · : yn ) →7 (y1 , . . . , yn ) ist bijektiv. Die Ui mit i = 0, . . . , n bilden einen n-dimensionalen Atlas: x = (1 : 0 : 0) ∈ U0 → R2 -x 7→ (0, 0) - y = (0 : 1 : 1) ∈ U2 → R2 +x 7→ (0, 0) + y 7→ (0, 1) Umgebung: B1 (0, 1) →  @@ -2089,26 +1348,22 @@ V1 ∩ V2 = ∅? Karten: Di := {(x1 , . . . , xn+1 ) ∈ S n |xi > 0} → B1 (0, . . . , 0) | {z } -S n |xi ∈Rn - -Ci := {(x1 , . . . , xn+1 ) ∈ -< 0} → B1 (0, . . . , 0) -1 +n +Ci := {(x1 , . . . , xn+1 ) ∈ S |xi < 0} → B1 (0, . . . , 0) (x1 , . . . , xn+1 ) 7→ (x1 , . . . ,  x -, -. -. -i q. , xn+1 ) +. . . , xn+1 )1 +i , q q -Pn P -(x1 , . . . , xn ) 7→ (x1 , . . . , xi−1 , 1 − k=1 x2k , xi , . . . , xn ), oder − 1 − nk=1 x2k für Ci +P +(x1 , . . . , xn ) 7→ (x1 , . . . , xi−1 , 1 − nk=1 x2k , xi , . . . , xn ), oder − 1 − nk=1 x2k für Ci S S n = n+1 i=1 (Ci ∪ Di ) + Als kompakte Mannigfaltigkeit wird S n auch „geschlossene Mannigfaltigkeit“ genannt. 5) [0, 1] ist keine Mannigfaltigkeit, denn: Es gibt keine Umgebung von 0 in [0, 1], die homöomorph zu einem offenem Intervall @@ -2558,11 +1813,9 @@ det ∂x  ∂v -∂y (v0 ) +∂y (v0 ) 6= 0 ∂v -6= 0 - und Fj (u, v) = (x(u, v), y(u, v), z(u, v)). fj : Uj × R → R3 durch Definiere F @@ -2572,11 +1825,10 @@ fj |U ×{ 0 } = Fj Offensichtlich: F j  ∂x -JFfj = ∂u - ∂y -∂u +∂y +JFfj =  ∂u ∂z ∂u @@ -2632,9 +1884,7 @@ Beispiel 25 (Lie-Gruppen) Pn -k=1 aik bkj - -ist nach allen Variablen differenzierbar +k=1 aik bkj ist nach allen Variablen differenzierbar det(Aij ) det A @@ -2760,10 +2010,10 @@ c) Ist d = max { k ∈ N0 | K enthält k-Simplex }, so heißt d die Dimension vo (a) 1D Simplizialkomplex (b) 2D Simplizialkomplex (ohne untere Fläche!) -(d) 1D Simplizialkomplex - (c) 2D Simplizialkomplex +(d) 1D Simplizialkomplex + (e) 2D Simplizialkomplex P @@ -4612,13 +3862,9 @@ O. B. d. A. seien ϕ1 (R) und ϕ2 (R) in der selben Halbebene. Es gilt: d(P 0 , ϕ1 (R)) = d(ϕ1 (P ), ϕ1 (R)) = d(P, R) = d(ϕ2 (P ), ϕ2 (R)) -und analog - = d(P 0 , ϕ2 (R)) -= d(Q0 , ϕ2 (R)) - -d(Q0 , ϕ1 (R)) - +0 +und analog d(Q , ϕ1 (R)) = d(Q0 , ϕ2 (R)) (Teil ii) Seien P , Q und R Fixpunkte von ϕ, R ∈ / P Q und A ∈ / P Q ∪ P R ∪ QR. Sei B ∈ @@ -4955,29 +4201,22 @@ hc ha -c - LC +c A A B -(a) - -1/2 - B -· |AB| · |hc | +(a) 1/2 · |AB| · |hc | -(b) +(b) 1/2 · |BC| · |h -1/2 - -· |BC| · |ha | +a| Abbildung 4.15: Flächenberechnung im Dreieck Zu zeigen: Unabhängigkeit von der gewählten Grundseite. @@ -4998,18 +4237,7 @@ B Abbildung 4.16: 4ABLa und 4CLC B sind ähnlich, weil IWS = π Strahlensatz -=======⇒ - -a -hc - -= - -c -ha - -→ a · ha = c · hc - +=======⇒ hac = hca → a · ha = c · hc Satz 4.7 (Satz des Pythagoras) Im rechtwinkligen Dreieck gilt a2 + b2 = c2 , wobei c die Hypotenuse und a, b die beiden Katheten sind. @@ -5429,13 +4657,7 @@ SL2 (R) ac(x + y ) + adx + bcx + bd ⇒ σ(z) = -⇒ =(σ(z)) = - -y -(cx+d)2 +(cy)2 - ->0 - +⇒ =(σ(z)) = (cx+d)y2 +(cy)2 > 0 Die Abbildung bildet also nach H ab. Außerdem gilt:   @@ -5468,7 +4690,6 @@ c0 d0 c d c0 z + d0 = -= 0 @@ -5484,14 +4705,16 @@ a ac0 z+d z+b c ac0 z+d 0 + d + a(a0 z+b0 )+b(c0 z+d0 ) +0 z+d0 += c(a0 z+bc0 )+d(c +0 z+d0 ) c0 z+d0 -c(a0 z+b0 )+d(c0 z+d0 ) -c0 z+d0 -0 -a(a z + b0 ) + b(c0 z +a(a0 z + b0 ) + b(c0 z + d0 ) + += -+ d0 ) c(a0 z + b0 ) + d(c0 z + d0 ) (aa0 + bc0 )z + ab0 + bd0 = @@ -5511,8 +4734,6 @@ a b ◦z c d c d0 -= - b) Es gilt σ(z) = (−σ)(z) für alle σ ∈ SL2 (R) und z ∈ H.   @@ -5546,12 +4767,12 @@ A−1 λ -Bt−1 +Bt−1 = B−t −1 +3 C -= B−t -= C3 +=C Daher genügt es zu zeigen, dass man mit Aλ , Bt und C alle Matrizen aus SL2 (R) erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit @@ -5672,14 +4893,16 @@ m + ir 0 -m 0 1 -2 +m λ2 m + +2 + 3m + 14 (a) Fall 1 @@ -5741,14 +4964,9 @@ y −1 1 -z - -= - 1 -r - -· eiϕ +iϕ +z = r ·e 0 @@ -5762,11 +4980,11 @@ Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a1 ) = b1 und σ(a2 ) = b2 . Definition 65 Seien z1 , z2 , z3 , z4 ∈ C paarweise verschieden. Dann heißt -DV(z1 , z2 , z3 , z4 ) := z1 −z4 -z1 −z2 -z3 −z4 +2 +DV(z1 , z2 , z3 , z4 ) := zz13 −z +−z4 z3 −z2 = @@ -5777,28 +4995,14 @@ z3 −z2 Doppelverhältnis von z1 , . . . , z4 . Bemerkung 70 (Eigenschaften des Doppelverhältnisses) a) DV(z1 , . . . , z4 ) ∈ C \ { 0, 1 } -b) DV(z1 , z4 , z3 , z2 ) = - -1 -DV(z1 ,z2 ,z3 ,z4 ) - -c) DV(z3 , z2 , z1 , z4 ) = - -1 -DV(z1 ,z2 ,z3 ,z4 ) - +b) DV(z1 , z4 , z3 , z2 ) = DV(z1 ,z12 ,z3 ,z4 ) +c) DV(z3 , z2 , z1 , z4 ) = DV(z1 ,z12 ,z3 ,z4 ) d) DV ist auch wohldefiniert, wenn eines der zi = ∞ oder wenn zwei der zi gleich sind. e) DV(0, 1, ∞, z4 ) = z4 (Der Fall z4 ∈ { 0, 1, ∞ } ist zugelassen). f) Für σ ∈ PSL2 (C) und z1 , . . . , z4 ∈ C ∪ { ∞ } ist DV(σ(z1 ), σ(z2 ), σ(z3 ), σ(z4 )) = DV(z1 , z2 , z3 , z4 ) -und für σ(z) = - -1 -z - -gilt +und für σ(z) = z1 gilt DV(σ(z1 ), σ(z2 ), σ(z3 ), σ(z4 )) = DV(z1 , z2 , z3 , z4 ) - g) DV(z1 , z2 , z3 , z4 ) ∈ R ∪ { ∞ } ⇔ z1 , . . . , z4 liegen auf einer hyperbolischen Geraden. Beweis: a) DV(z1 , . . . , z4 ) 6= 0, da zi paarweise verschieden @@ -5819,27 +5023,17 @@ Annahme: DV(z1 , . . . , z4 ) = 1 ⇔ z3 = z1 oder z2 = z4 Alle zi sind paarweise verschieden ⇒ Widerspruch -b) DV(z1 , z4 , z3 , z2 ) = - -(z1 −z2 )·(z3 −z4 ) -(z1 −z4 )·(z3 −z2 ) -= + 1 -DV(z1 ,z2 ,z3 ,z4 ) - -c) DV(z3 , z2 , z1 , z4 ) = - +1 −z2 )·(z3 −z4 ) +b) DV(z1 , z4 , z3 , z2 ) = (z +(z1 −z4 )·(z3 −z2 ) = DV(z1 ,z2 ,z3 ,z4 ) (z3 −z4 )·(z1 −z2 ) -(z3 −z2 )·(z1 −z4 ) - -= - -1 -DV(z1 ,z2 ,z3 ,z4 ) - - +c) DV(z3 , z2 , z1 , z4 ) = (z += DV(z1 ,z12 ,z3 ,z4 ) +3 −z2 )·(z1 −z4 ) d) Zwei der zi dürfen gleich sein, da: Fall 1 z1 = z4 oder z3 = z2 @@ -5849,17 +5043,13 @@ Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1 , . . . , z4 ) = Fall 3 z1 = z3 oder z2 = z4 Durch Einsetzen ergibt sich DV(z1 , . . . , z4 ) = 1. Im Fall, dass ein zi = ∞ ist, ist entweder DV(0, 1, ∞, z4 ) = 0 oder DV(0, 1, ∞, z4 ) ± ∞ -e) DV(0, 1, ∞, z4 ) = - +·(∞−1) (0−z4 )·(∞−1) -(0−1)·(∞−z4 ) - -= - -z4 ·(∞−1) -∞−z4 - += z4∞−z +e) DV(0, 1, ∞, z4 ) = (0−1)·(∞−z = z4 +4 +4) f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. g) Sei σ ∈ PSL2 (C) mit σ(z1 ) = 0, σ(z2 ) = 1, σ(z3 ) = ∞. Ein solches σ existiert, da man @@ -6002,14 +5192,13 @@ Beweis: a) l(γ) = Rb -a -kγ 0 (t)kdt = +0 +a kγ (t)kdt = Rb -a -1dt = b − a. +a 1dt = b − a. b) Im Folgenden wird die Aussage nur für γ : [a, b] → R2 bewiesen. Allerdings funktioniert der Beweis im Rn analog. Es muss nur die Ableitung angepasst werden. @@ -6157,11 +5346,10 @@ r Definition 70 Sei γ : I → R3 eine durch Bogenlänge parametrisierte Kurve. a) Für t ∈ I heißt κ(t) := kγ 00 (t)k die Krümmung von γ in t. -b) Ist für t ∈ I die Ableitung γ 00 (t) 6= 0, so heißt - -γ 00 (t) -kγ 00 (t)k +00 +(t) +b) Ist für t ∈ I die Ableitung γ 00 (t) 6= 0, so heißt kγγ 00 (t)k Normalenvektor an γ in t. c) b(t) sei ein Vektor, der γ 0 (t), n(t) zu einer orientierten Orthonormalbasis von R3 ergänzt. @@ -6190,11 +5378,12 @@ s∈V: Für p = F −1 (s) ∈ U sei  ∂x -JF (p) = ∂u (p) - ∂y (p) -∂u +∂y + +JF (p) = ∂u +(p) ∂z ∂u (p) @@ -6402,13 +5591,10 @@ Definition 74 Sei S ⊆ R3 eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S. γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und γ 00 (0) 6= 0. -Sei n(0) := - -γ 00 (0) -kγ 00 (0)k . - -Zerlege +00 +Sei n(0) := kγγ 00 (0) +(0)k . Zerlege n(0) = n(0)t + n(0)⊥ mit n(0)t ∈ Ts S und n(0)⊥ ∈ (Ts S)⊥ Dann ist n(0)⊥ = hn(0), n(s)i · n(s) κNor (s, γ) := hγ 00 (0), n(s)i die Normalkrümmung. @@ -6516,7 +5702,7 @@ b) { Dp F (e1 ), Dp F (e2 ) } ist eine Basis von Ts S. c) Bzgl. der Basis { Dp F (e1 ), Dp F (e2 ) } hat das Standardskalarprodukt aus Bemerkung 80.a die Darstellungsmatrix IS . d) gi,j (s) ist eine differenzierbare Funktion von s. Bemerkung 81 - +2 ∂F ∂F (p) × @@ -6550,8 +5736,6 @@ Dann ist ∂u 1 z3 -2 - det(IS ) = z1 = x2 y3 − x3 y2 @@ -6658,9 +5842,8 @@ ds n(x) = d n(s„+“tx) -dt | {z } +dt | {z } t=0 -t=0 Soll auf Fläche S bleiben Die Abbildung ds n heißt Weingarten-Abbildung @@ -6686,13 +5869,10 @@ c) Wegen Proposition 5.1 (a) ist ds n ein Homomorphismus. d) Zu zeigen: ∀x, y ∈ Is S : hx, ds n(y)i = hds n(x), yi Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die Basisvektoren zu zeigen. -Sei xi = Dp F (ei ) = - ∂F -∂ui (p) - -i = 1, 2 - +Sei xi = Dp F (ei ) = ∂u +(p) i = 1, 2 +i 2 F @@ -6943,12 +6123,11 @@ nun für für n ∈ N≥2 , m ∈ N: 1 , . . . , 1) m -∈ SLn (R), und Am ist unbeschränkt, da kAm k∞ = +Dann gilt: det Am = 1, d. h. Am ∈ SLn (R), und Am ist unbeschränkt, da kAm k∞ = +m −−−−→ ∞.  - Am = diagn (m, -Dann gilt: det Am = 1, d. h. Am -m −−−−→ ∞. + m→∞ (c) Beh.: P(R) ist kompakt. @@ -7041,8 +6220,8 @@ Vor.: Sei (X, d) eine absolute Ebene, A, B, C ∈ X und 4ABC ein Dreieck. Lösungen der Übungsaufgaben -∼ AC ⇒ ∠ABC ∼ -(a) Beh.: AB = +(a) Beh.: AB ∼ += AC ⇒ ∠ABC ∼ = ∠ACB ∼ Bew.: Sei AB = AC. @@ -7230,30 +6409,21 @@ Symmetrische Gruppe Seien A, B und M Mengen. AC -P(M ) -M -∂M -M◦ -A×B -A⊆B -A(B -A\B -A∪B -A ∪˙ B -A∩B - Komplement von A -Potenzmenge von M +P(M ) Potenzmenge von M +M Abschluss von M +∂M Rand der Menge M +M◦ Inneres der Menge M -Kreuzprodukt -Teilmengenbeziehung -echte Teilmengenbeziehung -Differenzmenge -Vereinigung -Disjunkte Vereinigung -Schnitt +A × B Kreuzprodukt +A ⊆ B Teilmengenbeziehung +A ( B echte Teilmengenbeziehung +A \ B Differenzmenge +A ∪ B Vereinigung +A ∪˙ B Disjunkte Vereinigung +A ∩ B Schnitt Geometrie AB @@ -7286,62 +6456,48 @@ PSLn (K) Projektive lineare Gruppe Wege Sei γ : I → X ein Weg. [γ] -γ1 ∗ γ2 -γ1 ∼ γ2 -γ(x) -C - Homotopieklasse von γ -Zusammenhängen von Wegen -Homotopie von Wegen +γ1 ∗ γ2 Zusammenhängen von Wegen +γ1 ∼ γ2 Homotopie von Wegen +γ(x) Inverser Weg, also γ(x) := γ(1 − x) Bild eines Weges γ, also C := +C γ([0, 1]) Weiteres B -Bδ (x) +Basis einer Topologie +Bδ (x) δ-Kugel um x S +Subbasis einer Topologie T +Topologie A +Atlas P -h·, ·i -X/∼ -[x]∼ -kxk -|x| -hai +Projektiver Raum +h·, ·i Skalarprodukt +X/∼ X modulo ∼ +[x]∼ Äquivalenzklassen von x bzgl. ∼ +kxk Norm von x +|x| Betrag von x +hai Erzeugnis von a Sn Tn -Basis einer Topologie -δ-Kugel um x -Subbasis einer Topologie -Topologie - -Atlas -Projektiver Raum -Skalarprodukt -X modulo ∼ -Äquivalenzklassen von x bzgl. ∼ -Norm von x -Betrag von x -Erzeugnis von a Sphäre Torus f ◦g -πX -f |U f -f −1 (M ) -Rg(M ) -χ(K) - Verkettung von f und g +πX Projektion auf X +f |U f eingeschränkt auf U -Urbild von M -Rang von M +f −1 (M ) Urbild von M +Rg(M ) Rang von M +χ(K) Euler-Charakteristik von K 110 @@ -7381,22 +6537,21 @@ C = { a + ib | a, b ∈ R } Komplexe Zahlen P = { 2, 3, 5, 7, . . . } Primzahlen H = { z ∈ C | =z > 0 } obere Halbebene I = [0, 1] ( R Einheitsintervall -f : S 1 ,→ R2 -π1 (X, x) -Fix(f ) -k · k2 -κ -κNor -V (f ) Rationale Zahlen -Einbettung der Kreislinie in die Ebene +f : S 1 ,→ R2 Einbettung der Kreislinie in die Ebene +π1 (X, x) Fundamentalgruppe im topologischen Raum X um x ∈ X +Fix(f ) Menge der Fixpunkte der Abbildung f +k · k2 2-Norm; Euklidische Norm +κ Krümmung +κNor Normalenkrümmung +V (f ) Nullstellenmenge von f 2 Krümmung diff --git a/read/results/pymupdf/2201.00022.txt b/read/results/pymupdf/2201.00022.txt index d97554f..ce51fa4 100644 --- a/read/results/pymupdf/2201.00022.txt +++ b/read/results/pymupdf/2201.00022.txt @@ -1,4 +1,4 @@ -Draft version January 4, 2022 +Draft version July 7, 2022 Typeset using LATEX twocolumn style in AASTeX631 The Formation of Intermediate Mass Black Holes in Galactic Nuclei Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3 @@ -7,17 +7,23 @@ Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3 3Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel ABSTRACT Most stellar evolution models predict that black holes (BHs) should not exist above approximately -50−70 M⊙. However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and -above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs), -can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding -main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relax- -ation, we find that this channel can be quite efficient, forming IMBHs as massive as 104 M⊙. Our -results suggest that massive black holes and IMBHs may be ubiquitous in galactic centres. This for- -mation channel also has implications for observations. Collisions between stars and BHs can produce -electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. Additionally, -formed through this channel, both black holes in the mass gap and IMBHs can merge with the super- -massive black hole at the center of a galactic nucleus through gravitational waves. These gravitational -wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively). +50 − 70 M⊙, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections +indicate the existence of BHs with masses at and above this threshold. +We suggest that massive +BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions +between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical +processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite +efficient, forming IMBHs as massive as 104 M⊙. This upper limit assumes that (1) the BHs accrete a +substantial fraction of the stellar mass captured during each collision and (2) that the rate at which +new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar +disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our +results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic +centers. This formation channel has implications for observations. Collisions between stars and BHs +can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. +Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge +with the supermassive black hole at the center of a galactic nucleus through gravitational waves. +These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, +respectively). 1. INTRODUCTION The recently @@ -39,18 +45,18 @@ BH mergers that form second generation BHs and, in some cases, inter- mediate mass BHs (IMBHs), these gravitational wave (GW) events can occur in globular clusters, young stel- -lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro- -driguez et al. 2019; Fishbach et al. 2020; Mapelli et al. -2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. -2021; Arca Sedda et al. 2021). -However, IMBHs are -not limited to these locations and may reside in galac- Corresponding author: Sanaea C. Rose srose@astro.ucla.edu 1 Note that the exact lower and upper limits may be sensitive to metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli 2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski et al. 2020a; Renzo et al. 2020; Vink et al. 2021). +lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro- +driguez et al. 2019; Fishbach et al. 2020; Mapelli et al. +2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. +2021; Arca Sedda et al. 2021). +However, IMBHs are +not limited to these locations and may reside in galac- tic nuclei as well. Several studies propose that our own galactic center may host an IMBH in the inner pc @@ -69,27 +75,28 @@ lated gas (e.g., Begelman et al. 2006; Yue et al. 2014; Ferrara et al. 2014; Choi et al. 2015; Shlosman et al. 2016). These high redshift IMBHs would need to sur- vive galaxy evolution and mergers to present day (e.g., +arXiv:2201.00022v2 [astro-ph.GA] 6 Jul 2022 + +2 +Rose et al. Rashkov & Madau 2014), with significant effects on their stellar and even dark matter surroundings (e.g., Bertone et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another popular formation channel relies on the coalescence of -many stellar-mass black holes. -For example, IMBHs +many stellar-mass black holes, which may seed objects +as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs may form in the centers of globular clusters, where few- body interactions lead to the merger of stellar-mass BHs (e.g., O’Leary et al. 2006; G¨urkan et al. 2006; Blecha et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro- -arXiv:2201.00022v1 [astro-ph.GA] 31 Dec 2021 - -2 -Rose et al. driguez et al. 2018; Rodriguez et al. 2019; Fragione et al. 2020b). Other formation mechanisms invoke successive -collisions and mergers of massive stars (e.g., Portegies -Zwart & McMillan 2002; Portegies Zwart et al. 2004; -Freitag et al. 2006; Kremer et al. 2020; Gonz´alez et al. -2021; Di Carlo et al. 2021). +collisions and mergers of massive stars (e.g., Ebisuzaki +et al. 2001; Portegies Zwart & McMillan 2002; Portegies +Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017; +Kremer et al. 2020; Gonz´alez et al. 2021; Di Carlo et al. +2021; Das et al. 2021a,b; Escala 2021). The main obstacle to sequential BH mergers in clus- ters is that the merger recoil velocity kick often exceeds the escape velocity from the cluster (e.g., Schnittman @@ -105,17 +112,28 @@ back towards the cluster center over a dynamical fric- tion timescale. Using this approach, they showed that 103 − 104 M⊙ IMBHs can form efficiently over the life- time of a cluster. -However, as discussed in Section 2.2, direct star-BH +However, as discussed in Section 2.2, direct BH-star collisions are much more frequent than BH-BH collision in galactic nuclei, making the former a promising chan- -nel for BH growth. We propose that IMBHs can form -naturally within the central pc of a SMBH in a galactic -center. Specifically, these IMBHs form through repeated -collisions with main sequence stars, accreting some or -all of the star’s mass depending on the details of the -collision. We demonstrate that this channel can create -IMBHs with masses as large as 104 M⊙, depending on -the density profile of the surrounding stars. +nel for BH growth. In an N-body study of young star +clusters, Rizzuto et al. (2022) find that BH-star colli- +sions are a main contributor to the formation of BHs +in the mass gap and IMBHs. In a similar vein, Stone +et al. (2017) demonstrate that massive BHs can form +from repeated tidal encounters between stars and BHs. +More generally, several studies have explored the role of +collisions in a GN, with implications for the stellar and +red giant populations (e.g., Dale & Davies 2006; Dale +et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti +et al. 2021). We propose that IMBHs can form naturally +within the central pc of a galactic center through re- +peated collisions between BHs and main sequence stars. +During a collision, the BH can accrete some portion of +the star’s mass. Over many collisions, it can grow ap- +preciably in size. We demonstrate that this channel can +create IMBHs with masses as large as 104 M⊙, an upper +limit that depends on the density profile of the surround- +ing stars and the efficiency of the accretion. The paper is structured as follows: we describe rele- vant physical processes and our approach in Section 2. In particular, we provide an overview of collisions in @@ -126,11 +144,11 @@ mass growth with each collision and presents analytic solutions to our equations in two different regimes, ef- ficient collisions and inefficient collisions We compare these solutions to our statistical results. -Sections 2.5 -and 2.7 discuss implications for GW merger events be- +Sections 2.6 +and 2.8 discuss implications for GW merger events be- tween IMBHs and the SMBH. We then incorporate re- laxation processes and discuss the subsequent results in -Section 2.8. Finally, we discuss and summarize our find- +Section 2.9. Finally, we discuss and summarize our find- ings in Section 3. 2. METHODOLOGY We consider a population of stellar mass BHs embed- @@ -166,7 +184,20 @@ all distances from the SMBH, including within 0.01 pc. Otherwise, the innermost region of the GN would be poorly represented in our sample. We consider other -observationally motivated distributions in Section 2.8, + +IMBH Formation in Galactic Nuclei +3 +Figure 1. We plot the relevant timescales, including col- +lision (green), relaxation (gold), and BH-BH GW capture +(purple), for a single BH in the GN as a function of distance +from the SMBH. For the collision timescale, we assume the +BH is on a circular orbit. +The timescales depend on the +density, so we adopt a range of density profiles, bounded by +α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark +blue line represents the time for a 105 M⊙ BH to merge with +the SMBH through GW emission. +observationally motivated distributions in Section 2.9, but reserve a more detailed examination of the distribu- tion’s impact for future work. 2.2. Direct Collisions @@ -195,19 +226,6 @@ the eccentricity of the BH’s orbit about the SMBH on the collision rate, while n and σ are simply evaluated at the semimajor axis of the orbit (see below). Note - -IMBH Formation in Galactic Nuclei -3 -Figure 1. We plot the relevant timescales, including col- -lision (green), relaxation (gold), and BH-BH GW capture -(purple), for a single BH in the GN as a function of distance -from the SMBH. For the collision timescale, we assume the -BH is on a circular orbit. -The timescales depend on the -density, so we adopt a range of density profiles, bounded by -α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark -blue line represents the time for a 105 M⊙ BH to merge with -the SMBH through GW emission. that this timescale equation includes the effects of grav- itational focusing, which enhances the cross-section of interaction. @@ -236,8 +254,6 @@ n(r•) = ρ(r•) 1 M⊙ . (3) -2 We note that the eccentricity has a very minor effect on the -collision timescale (Rose et al. 2020). The collision timescale also depends on the velocity dis- persion, which we express as: σ(r•) = @@ -268,6 +284,11 @@ vant equations, see O’Leary et al. 2009; Gond´an et al. 2018, for example). Thus, we expect that star-BH col- lisions will be the main driver of IMBH growth in the GN. +2 We note that the eccentricity has a very minor effect on the +collision timescale (Rose et al. 2020). + +4 +Rose et al. 2.3. Statistical Approach to Collisions We simulate the mass growth of a population of BHs with initial conditions detailed in Section 2.1. Over an @@ -285,14 +306,6 @@ expected to accrete in a single collision (see Section 2.4 for details). We recalculate the collision timescale using the updated BH mass and repeat this process until the time elapsed equals the simulation time of 10 Gyr3. -3 Closer to the SMBH, ∆t may exceed the collision timescale by -a factor of a few for steep density profiles. We include a safe- -guard in our code which takes the ratio tcoll/∆t and rounds it -to the nearest integer. We take this integer to be the number of -collisions and increase the BH mass accordingly. - -4 -Rose et al. 2.4. Mass Growth When a BH collides with a star, it may accrete ma- terial and grow in mass. The details of the accretion @@ -304,15 +317,19 @@ We begin by con- sidering the escape velocity from the BH at the star’s outermost point, its surface, which corresponds to the maximum impact parameter 1 R⊙. Qualitatively, one -might expect that the BH could accrete the entire star +might expect that the BH could capture the entire star (i.e., ∆m ∼ 1 M⊙) if the relative velocity is smaller than the escape velocity from the BH at this point. However, in the vicinity of the SMBH, the dispersion velocity of the stars may be much larger than the escape velocity from the BH at the star’s surface. In this case, the BH -accretes a “tunnel” of material through the star. This +captures a “tunnel” of material through the star. This tunnel has radius equal to the Bondi radius and length -approximately 1 R⊙. +approximately 1 R⊙. For the purposes of this study, we +assume that the BH accretes all of the material that +it captures. The details of the accretion are uncertain, +however, and it may be much less efficient than our re- +sults imply. We discuss accretion in Section 2.5. To estimate ∆m, we begin with the Bondi-Hoyle ac- cretion rate, ˙m, given by: ˙m = 4πG2m2 @@ -320,6 +337,24 @@ BHρstar (c2s + σ2)3/2 , (5) +3 Closer to the SMBH, ∆t may exceed the collision timescale by +a factor of a few for steep density profiles. We include a safe- +guard in our code which takes the ratio tcoll/∆t and rounds it +to the nearest integer. We take this integer to be the number of +collisions and increase the BH mass accordingly. +Figure 2. We consider an example that highlights the mass +growth as a function of distance from the SMBH. Grey dots +represent the initial masses and distances from the SMBH +of the BHs involved in the simulation. For simplicity, we set +the inital mass equal to 10 M⊙ for all of the BHs. Assuming +the density profile of stars has α = 1, we consider two cases: +BHs accrete all of the star’s mass during a collision (red) and +only a portion of the star’s mass is accreted during a collision +given by Eq. 6 (blue). The latter case results in less growth +closer to the SMBH where the velocity dispersion becomes +high. +The shaded regions and dashed lines represent the +analytical predictions detailed in Section 2.4. where cs is the speed of sound in the star and ρstar is its density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima et al. 1985; Edgar 2004, see latter for a review). @@ -352,20 +387,10 @@ sult is exponential growth (see discussion and details surrounding Eq. (8)). In Figure 2, however, the simula- tions assume α = 1 for the stellar density profile, ensur- ing the collision timescale is long compared to the sim- + +IMBH Formation in Galactic Nuclei +5 ulation time, 10 Gyr. Therefore, the BHs grow slowly, -Figure 2. We consider an example that highlights the mass -growth as a function of distance from the SMBH. Grey dots -represent the initial masses and distances from the SMBH -of the BHs involved in the simulation. For simplicity, we set -the inital mass equal to 10 M⊙ for all of the BHs. Assuming -the density profile of stars has α = 1, we consider two cases: -BHs accrete all of the star’s mass during a collision (red) and -only a portion of the star’s mass is accreted during a collision -given by Eq. 6 (blue). The latter case results in less growth -closer to the SMBH where the velocity dispersion becomes -high. -The shaded regions and dashed lines represent the -analytical predictions detailed in Section 2.4. and their final masses can be approximated using the following equation: mfinal(tcoll → const.) = minitial + ∆m T @@ -396,9 +421,6 @@ star’s mass. Eq. 7 does not apply for other values of α. When the collision timescale is shorter, corresponding to a larger index α in the density profile (see Figure 1), the growth - -IMBH Formation in Galactic Nuclei -5 is very efficient and ∆m quickly approaches 1 M⊙. Con- sequently, while we can now assume ∆m = 1 M⊙, we can no longer assume the collision timescale is constant. @@ -412,7 +434,53 @@ mfinal(∆m → 1 M⊙) = −A + (minitial + A) eCT where A = σ2Rstar/G and C = 2πGnstarRstar/σ. As an example, we plot this curve in purple for the α = 2 case, in Figure 3, which agrees with the simulated masses. -2.5. GW Inspiral +2.5. Uncertainties in Accretion +We note that the ∆M calculated in this proof-of- +concept study assumes that the BH accretes all of the +material that it captures. Estimating the true fraction +of the material accreted by the BH is very challeng- +ing; this complex problem requires numerically solving +the generalized GR fluid equations with cooling, heat- +ing, and radiative transfer, etc. and remains an active +field of research (e.g., Blandford & Begelman 1999; Park +& Ostriker 2001; Narayan et al. 2003; Igumenshchev +et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang +et al. 2014; McKinney et al. 2014; Narayan et al. 2022). +Heuristically, if a collision between a BH and a star re- +sults in an accretion disk, the disk’s viscous timescale +may be as low as days. The resultant luminosity can +unbind most of the captured material, though details +such as the amount accreted and peak luminosity re- +main uncertain (e.g., Yuan et al. (2012); Jiang et al. +(2014), see also the discussion in Stone et al. (2017), +Rizzuto et al. (2022), and Kremer et al. (2022)). The +question becomes whether or not a BH can still accu- +mulate significant amounts of mass over many collisions +even if it accretes very little in a single one. +We ex- +plore the viability of our channel using a physically mo- +tivated inefficient accretion model. Several studies have +invoked momentum-driven winds in BH accretion (e.g., +Murray et al. 2005; Ostriker et al. 2010; Brennan et al. +2018). We thus estimate the fraction of captured mass +accreted to be approximately vesc/(cη), where vesc is +the escape velocity from the BH at 1 R⊙ and η is the +accretion efficiency at the ISCO. We take η to be 0.1 +(e.g., Yu & Tremaine 2002). +This expression for the +fraction accreted is consistent with Kremer et al. (2022) +equation 19 for s = 0.5, which is a reasonable value for +s, a free parameter between 0.2 and 0.8. +We discuss +the results of the momentum-driven winds estimate in +Section 3. We note that the accretion process may be +more efficient than this estimate implies if, for example, +jets or other instabilities result in the beaming of radi- +ation away from the captured material (e.g., Blandford +& Znajek 1977; Begelman 1979; De Villiers et al. 2005; +McKinney & Gammie 2004; McKinney 2006; Igumen- +shchev 2008; Begelman 2012a,b; McKinney et al. 2014). +2.6. GW Inspiral When a BH is close to the SMBH, GW emission can circularize and shrink its orbit. We implement the ef- fects of GW emission on the BH’s semimajor axis and @@ -431,7 +499,7 @@ M• 2 × 106 M⊙ �−1 � a• -10−4 pc +10−2 pc �4 × f(e•)(1 − e2 •)7/2 , @@ -441,11 +509,18 @@ For all values of e•, f(e•) is between 0.979 and 1.81 (Blaes et al. 2002). We plot this timescale for a 1 × 105 M⊙ BH in Figure 1 in blue. + +6 +Rose et al. +Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to +cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass +of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and +merger times of these BHs. In our simulations, we assume a BH has merged with the SMBH when the condition tGW < telapsed is met. When this condition is satisfied, we terminate mass growth through collisions for that BH.4 -2.6. IMBH growth +2.7. IMBH growth As detailed above, BH-stellar collisions can increase the BH masses as a function of time. Here, we examine the sensitivity of the BH growth to the density power @@ -456,24 +531,24 @@ profiles, will result in more efficient mass growth. In Figure 1, larger values of α lead to collision timescales in the GN’s inner region, inwards of 0.25 pc, that are -4 For comparison, we also incrementally changed the semimajor -axis and eccentricity from GW emission following the equations -in Peters & Mathews (1963b). -This method leads to a slight -increase in the final IMBH masses because it accounts for the -collisions that take place while the orbit is gradually shrinking. much smaller that the 10 Gyr simulation time. Figure 3 confirms this expectation. It depicts the mass growth of a uniform distribution of BHs with initial conditions de- tailed in Section 2.1 for five α values, spanning 1 (green) to 2 (purple). The most massive IMBHs form inwards of 0.25 pc for the α = 2 case. -2.7. Gravitational Wave Mergers and Intermediate +2.8. Gravitational Wave Mergers and Intermediate and Extreme Mass Ratio Inspiral Candidates Towards the SMBH, efficient collisions can create BHs massive enough to merge with the SMBH through GWs. -Following the method detailed in Section 2.5, when a +Following the method detailed in Section 2.6, when a given BH meets the criterion tGW < telapsed, we mark +4 For comparison, we also incrementally changed the semimajor +axis and eccentricity from GW emission following the equations +in Peters & Mathews (1963b). +This method leads to a slight +increase in the final IMBH masses because it accounts for the +collisions that take place while the orbit is gradually shrinking. it as merged with the SMBH. We assume that at this point the dynamics of the BH will be determined by GW emission, shrinking and circularizing the BHs orbit un- @@ -484,7 +559,7 @@ merger. It is interesting to note that even in the ab- sence of relaxation processes, which are often invoked to explain the formation of EMRIs, EMRIs and notably IMRIs can form in this region. -2.8. Two Body Relaxation Processes +2.9. Two Body Relaxation Processes A BH orbiting the SMBH experiences weak gravita- tional interactions with other objects in the GN. Over a relaxation time, these interactions alter its orbit about @@ -504,24 +579,28 @@ its orbital energy and angular momentum by order of themselves. The BH experiences diffusion in its angular momentum and energy as a function of time (depending on the eccentricity of the orbit, this process can be more -efficient Fragione & Sari 2018; Sari & Fragione 2019). In -Figure 1, we plot the relaxation timescale in gold for a -range of α. We note that the Bahcall & Wolf (1976) pro- -file, α = 7/4, corresponds to zero net flux and therefore -does not preferentially migrate objects inward. -Additionally, -because they are more massive on -average than the surrounding objects, BHs are ex- -pected to segregate inwards in the GN (e.g., Shapiro -& Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; -Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004). +efficient Fragione & Sari 2018; Sari & Fragione 2019). +Relaxation can cause the orbit of an object in a GN to +reach high eccentricities. If the object is a BH, it can +spiral into the SMBH and form an EMRI, while a star -6 -Rose et al. -Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to -cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass -of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and -merger times of these BHs. +IMBH Formation in Galactic Nuclei +7 +can be tidally disrupted by the SMBH (e.g. Magorrian +& Tremaine 1999; Wang & Merritt 2004; Hopman & +Alexander 2005; Aharon & Perets 2016; Stone & Met- +zger 2016; Amaro-Seoane 2018; Sari & Fragione 2019; +Naoz et al. 2022). The relaxation process is therefore +crucial to our study. In Figure 1, we plot the relaxation +timescale in gold for a range of α. We note that the Bah- +call & Wolf (1976) profile, α = 7/4, corresponds to zero +net flux and therefore does not preferentially migrate +objects inward. +Additionally, because BHs are more massive on av- +erage than the surrounding objects, they are expected +to segregate inwards in the GN (e.g., Shapiro & +Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; +Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004). They sink toward the SMBH on the mass segregation timescale, tseg ≈ ⟨M∗⟩/mBH × trelax (e.g., Spitzer 1987; Fregeau et al. 2002; Merritt 2006), which is typically an @@ -540,8 +619,8 @@ of zero and a standard deviation of ∆vrlx/ P•/trlx (see Bradnick et al. 2017, for an approach to changes in the angular momentum). The new orbital parameters can be calculated following Lu -& Naoz (2019), and see Naoz et al. in prep for full set -of equations. +& Naoz (2019), and see Naoz et al. (2022) for the full +set of equations. We account for the effects of relaxation processes, including mass-segregation, using a multi-faceted ap- proach. We begin by migrating each BH towards the @@ -584,7 +663,7 @@ towards the SMBH, their concentration in the inner re- gion of the GN increases, allowing them to dominate the scattering. We reserve the inclusion of these interactions for future study. -2.9. Effect of Relaxation Processes +2.10. Effect of Relaxation Processes As depicted in Figure 4, two-body relaxation processes result in more EMRIs and IMRIs events. These pro- @@ -592,17 +671,6 @@ cesses allow BHs that begin further from the SMBH to migrate inwards and grow more efficiently in mass. However, it also impedes the growth of BHs that are initially closer to the SMBH by allowing them to dif- - -IMBH Formation in Galactic Nuclei -7 -Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance (red) -for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. We -assume α = 1.75 for the GN density profile. Faded stars represent BHs that merged with the SMBH. As a result of inward -migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, more -BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses for two -different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation processes. -The dashed, faded lines represent the corresponding initial histograms. We assume α = 1.75 for the GN density profile. Faded -stars represent BHs that merged with the SMBH. fuse out of the inner region where collisions are efficient. As can be seen in Figure 4, the net result is that more BHs grow, but the maximum mass is lower compared @@ -626,19 +694,30 @@ Taking both a statisti- cal and analytic approach, we show that this channel can produce IMBHs efficiently with masses as high as 103−4 M⊙ and may result in many IMBH-SMBH merg- -ers (intermediate-mass ratio inspiral, IMRIs) and EM- -RIs. +ers (intermediate-mass ratio inspirals, or IMRIs) and +EMRIs. + +8 +Rose et al. +Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance +(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. +We assume α = 1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward +migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, +more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses +for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation +processes. We also show the results for a simulation with α = 1.75 that accounts for momentum-driven winds (black, dotted). +Despite the substantially reduced accretion, BHs in the mass gap still form. As the stellar mass BH collides with a star, the BH will grow in mass. The increase may equal star’s en- tire mass if the relative velocity is smaller than the es- cape velocity from the BH at 1 R⊙. However, near the SMBH, the velocity dispersion may be larger than the escape velocity from the BH at the star’s radius. In this -limit, the BH accretes a “tunnel” of material through +limit, the BH captures a “tunnel” of material through the star, estimated using Bondi-Hoyle-Lyttleton accre- tion. In our statistical analysis, we account for Bondi- Hoyle-Lyttleton accretion and find that BHs outside of -10−2 pc from the SMBH can accrete the entire star (see +10−2 pc from the SMBH can capture the entire star (see Figure 2). The efficiency of collisions, and therefore IMBH, EMRI, and IMRI formation as well, are sensitive to @@ -654,6 +733,29 @@ or less efficient growth. As a result, more BHs grow in mass, but their maximum mass is smaller (∼ 104 M⊙). Additionally, the final masses have no apparent depen- dence on distance from the SMBH (see Figure 4). +Most simulations in our study assume that the BHs +accrete all of the mass that they capture. The final BH +masses can be taken as an upper limit. We note that +the accretion is a highly uncertain process and repre- +sents an active field of study (e.g., Blandford & Begel- +man 1999; Park & Ostriker 2001; Narayan et al. 2003; +Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan +et al. 2012; Jiang et al. 2014; McKinney et al. 2014; +Narayan et al. 2022). To assess the limits of our model, +we also consider a physically motivated accretion model, +momentum-driven winds (Section 2.5). We present the +final mass distribution for momentum-driven winds in +Figure 4. +Importantly, we find that BHs within the +mass gap still form naturally despite the substantially +reduced accretion. About 5% of the BHs grow by 10 +to 100 M⊙. Furthermore, if we increase this ∆M esti- +mate by a factor of 2 (i.e., use η = 0.05), the simula- +tion produces a 3.5 × 103 M⊙ IMBH for the same initial +conditions. Our proof-of-concept demonstrates that col- +lisions between BH and stars are an important process +that should be taken into account in dense places such +as a GN. Mass growth through BH-main-sequence star colli- sions may act in concert with other IMBH formation channels, such as compact object binary mergers (e.g., @@ -673,18 +775,87 @@ Kozai Lidov mechanism, leaving behind a single star or a single compact object (e.g., Stephan et al. 2016, 2019; Hoang et al. 2018). Additionally, to be susceptible to evaporation, BH binaries must have a wider configura- -tion. Otherwise, they will be more tightly bound that - -8 -Rose et al. -the average kinetic energy of the surrounding objects, +tion. Otherwise, they will be more tightly bound than +the average kinetic energy of the surrounding objects and will only harden through weak gravitational inter- + +IMBH Formation in Galactic Nuclei +9 actions with neighboring stars (see for example Figure 6 in Rose et al. 2020). -Not included in this study, collisions between the BH -and other compact objects will increase the BH growth -rate. BH-BH mergers (e.g., O’Leary et al. 2009; Fra- -gione et al. 2021) and even neutron star BH mergers +We note that we assume a steady-state and treat the +stars as a reservoir in this model. Future work will take a +more nuanced approach to the background stars, whose +density as a function of time can be influenced by several +factors. Firstly, the relaxation of the stellar population +occurs on Gyr timescales. Some studies have suggested +that in situ star formation can occur in the Galactic +Center as close as 0.04 pc from the SMBH (e.g., Levin +& Beloborodov 2003; Paumard et al. 2006), and star +formation episodes can occur as often as every ∼ 5 Myr +(e.g. Lu et al. 2009). Therefore, we expect that after +the first Gyr, stars within ≲ 0.01 pc will be replenished +at intervals consistent with the star formation episodes; +the infalling populations of stars are separated by ∼ +5−10 Myr, which is shorter than the collision timescale. +However, star-star collisions may complicate this pic- +ture within ∼ 0.01 pc. As discussed above, regular star +formation ensures the BHs always have a stellar popula- +tion to interact with outside of ∼ 0.01 pc.5 At 0.01 pc, +however, the kinetic energy during a collision between +two 1 M⊙ stars is larger than their binding energies. +Collisions can therefore thin out the stellar populations +during the time it takes them to diffuse to these small +radii, ≲ 0.01 pc, and may reduce the BH growth in the +innermost region. We reserve the inclusion of star-star +collisions for future work. We also note that the disrup- +tion of binary stars by the SMBH may help replenish +the stellar population even as collisions work to deplete +it (e.g., Balberg et al. 2013); when a binary is disrupted, +one of the stars is captured on a tightly bound orbit +about the SMBH. +An IMBH may also affect the stellar density profile. +As it spirals into the SMBH, it can perturb stellar orbits, +and these interactions can lead to hypervelocity stars +(e.g., Baumgardt et al. 2006a; L¨ockmann & Baumgardt +2008). +L¨ockmann & Baumgardt (2008) show that an +IMBH can modify an initially steep stellar density pro- +file to become consistent with the flatter cusp observed +in the Galactic Center. The stars may then be replen- +ished on 100 Myr timescales (Baumgardt et al. 2006a). +Therefore, after the formation of the first few IMBHs, +subsequent BH growth may occur in bursts, coinciding +with replenishment of the stars. +While there are many competing dynamical processes +that shape the stellar density profile, we stress that α +5 In fact, the star-star collision timescale is greater than 10 Myr +for the entire parameter space, save at 0.001 pc for larger values +of α; the BH-star collision timescale plotted in Fig. 1 is the same +order of magnitude as the star-star collision timescale. +can simply be chosen to encapsulate all of the relevant +physics. A value for α that is constrained by observa- +tions must already reflect ongoing processes like star- +star collisions and replenishment. Sch¨odel et al. (2018) +find the observed stellar mass enclosed within 0.01 pc of +the Milky Way’s Galactic Center to be approximately +180 M⊙. This estimate is consistent to order of magni- +tude with our α = 1.25 case. In a simulation like those +depicted in Figure 4, which include relaxation, α = 1.25 +leads to a maximum IMBH mass of 140 M⊙. Further- +more, while the stellar mass within 0.01 pc may be a +few hundred M⊙, Do et al. (2019) and GRAVITY Col- +laboration et al. (2020) set an upper limit on the mass +enclosed within the orbit of S0-2 to be about a few thou- +sand M⊙, or 0.1% of the central mass. This upper limit +can include mass that was previously in stars but is now +in BHs. In that case, the 180 M⊙ is what remains of the +stars, while BHs and IMBHs make up the ∼ 1000 M⊙ +in the innermost region. +Also not included in this study, collisions between the +BH and other compact objects will increase the BH +growth rate. BH-BH mergers (e.g., O’Leary et al. 2009; +Fragione et al. 2021) and even neutron star BH mergers (e.g., Hoang et al. 2020) become more likely as the BHs increase in mass through stellar collisions. As a result, the BH-BH collision timescale, discussed in Section 2.2, @@ -693,45 +864,56 @@ BHs to grow through this channel in addition to stel- lar collisions. Additionally, this compact object mergers result in GW recoil, which may have a large impact on the dynamics (e.g., Baibhav et al. 2020; Fragione et al. -2021) +2021). The BH’s mass growth increases GW emission, which -dissipates energy from the orbit. Along with relaxation -processes, GW emission causes BHs to sink towards the -SMBH and eventually undergo a merger. As a result, -the GN environment is conducive to the formation of -EMRIs and IMRIs. The GW emission from EMRIs and -IMRIs is expected to be at mHz frequencies, making -them promising candidates for LISA to observe. While -the exact rate calculation is beyond the scope of this -study, the mechanism outlined here seems very promis- -ing. -Our results also suggest that IMBHs are likely to ex- -ists in many galactic nuclei, as well as within our own -galactic center. -This implication seems to be consis- -tent with recent observational and theoretical studies -(e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004; -G¨urkan & Rasio 2005; Gualandris & Merritt 2009; Chen -& Liu 2013; Generozov & Madigan 2020; Fragione et al. -2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY -Collaboration et al. 2020). +dissipates energy from the orbit. Along with relaxation, +GW emission causes BHs to sink towards the SMBH +and eventually undergo a merger. As a result, the GN +environment is conducive to the formation of EMRIs +and IMRIs. +The GW emission from EMRIs and IM- +RIs is expected to be at mHz frequencies, making them +promising candidates for LISA to observe. While the +exact rate calculation is beyond the scope of this study, +the mechanism outlined here seems very promising. +Our results also suggest that BHs within the mass gap +as well as IMBHs likely exist in many galactic nuclei, as +well as within our own galactic center. This implication +seems to be consistent with recent observational and +theoretical studies (e.g., Hansen & Milosavljevi´c 2003; +Maillard et al. 2004; G¨urkan & Rasio 2005; Gualandris +& Merritt 2009; Chen & Liu 2013; Generozov & Madi- +gan 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz +et al. 2020; GRAVITY Collaboration et al. 2020). + +10 +Rose et al. Lastly, the collisions between stellar mass BHs and stars may contribute to the x-ray emission from our -galactic centre (e.g., Muno et al. 2005, 2009; Hailey et al. -2018; Zhu et al. 2018; Cheng et al. 2018)5. These inter- -actions, in particular grazing collisions, may also result -in tidal disruption events (e.g., Perets et al. 2016; Sam- -sing et al. 2019; Kremer et al. 2021). Thus, the process -outlined here may produce electromagnetic signatures -in addition to GW mergers. -SR thanks the Charles E Young fellowship, the Nina +galactic centre (e.g., Muno et al. 2005, 2009; Hailey +et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kre- +mer et al. (2022) for a discussion of electromagnetic sig- +natures from BH-star collisions)6. These interactions, +in particular grazing collisions, may also result in tidal +disruption events (e.g., Baumgardt et al. 2006b; Perets +et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kre- +mer et al. 2021). Thus, the process outlined here may +produce electromagnetic signatures in addition to GW +mergers. +We thank the anonymous referee for useful comments. +We also thank Jessica Lu, Fred Rasio, Kyle Kremer, +Ryosuke Hirai, Ilya Mandel, and Erez Michaely for use- +ful discussion. +SR thanks the Charles E. Young Fellowship, the Nina Byers Fellowship, and the Michael A. Jura Memorial Graduate Award for support. SR and SN acknowledge the partial support from NASA ATP 80NSSC20K0505. SN thanks Howard and Astrid Preston for their gener- ous support. IL thanks support from the Adams Fellow- ship. SN and RS thank the Bhaumik Institute visitor -program. +program. This work was performed in part at the As- +pen Center for Physics, which is supported by National +Science Foundation grant PHY-1607611. REFERENCES Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016, PhRvL, 116, 241102, @@ -740,22 +922,42 @@ doi: 10.1103/PhysRevLett.116.241102 doi: 10.1103/PhysRevLett.118.221101 —. 2017b, PhRvL, 119, 141101, doi: 10.1103/PhysRevLett.119.141101 +Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1, +doi: 10.3847/2041-8205/830/1/L1 Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129 Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, doi: 10.1088/0004-637X/780/2/148 +Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4, +doi: 10.1007/s41114-018-0013-8 +6 The connection between the observed X-ray sources at the Galac- +tic Center and tidal capture has been suggested by Generozov +et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for +alternative channels. Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. 2021, arXiv e-prints, arXiv:2109.12119. https://arxiv.org/abs/2109.12119 Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214, doi: 10.1086/154711 -5 The connection between the observed X-ray sources at the Galac- -tic Center and tidal capture has been suggested by Generozov -et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for -alternative channels. Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102, 043002, doi: 10.1103/PhysRevD.102.043002 +Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26, +doi: 10.1093/mnrasl/slt071 +Baumgardt, H., Gualandris, A., & Portegies Zwart, S. +2006a, MNRAS, 372, 174, +doi: 10.1111/j.1365-2966.2006.10818.x +Baumgardt, H., Hopman, C., Portegies Zwart, S., & +Makino, J. 2006b, MNRAS, 372, 467, +doi: 10.1111/j.1365-2966.2006.10885.x Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ, 613, 1143, doi: 10.1086/423299 +Begelman, M. C. 1979, MNRAS, 187, 237, +doi: 10.1093/mnras/187.2.237 +—. 2012a, ApJL, 749, L3, doi: 10.1088/2041-8205/749/1/L3 + +IMBH Formation in Galactic Nuclei +11 +—. 2012b, MNRAS, 420, 2912, +doi: 10.1111/j.1365-2966.2011.20071.x Begelman, M. C., Volonteri, M., & Rees, M. J. 2006, MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ, @@ -768,17 +970,20 @@ Binney, J., & Tremaine, S. 1987, Galactic dynamics —. 2008, Galactic Dynamics: Second Edition Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775, doi: 10.1086/342655 +Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303, +L1, doi: 10.1046/j.1365-8711.1999.02358.x +Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433, +doi: 10.1093/mnras/179.3.433 Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642, 427, doi: 10.1086/500727 Bondi, H. 1952, MNRAS, 112, 195, doi: 10.1093/mnras/112.2.195 - -IMBH Formation in Galactic Nuclei -9 Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273, doi: 10.1093/mnras/104.5.273 Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469, 2042, doi: 10.1093/mnras/stx1007 +Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ, +860, 14, doi: 10.3847/1538-4357/aac2c4 Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger, C. 2012, JCAP, 2012, 054, doi: 10.1088/1475-7516/2012/07/054 @@ -796,16 +1001,35 @@ et al. 1996, Science, 272, 1286, doi: 10.1126/science.272.5266.1286 Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087, doi: 10.1086/156685 +Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424, +doi: 10.1111/j.1365-2966.2005.09937.x +Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M. +2009, MNRAS, 393, 1016, +doi: 10.1111/j.1365-2966.2008.14254.x Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783 +Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T. +C. N. 2021a, MNRAS, 505, 2186, +doi: 10.1093/mnras/stab1428 +Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt, +T. C. N. 2021b, MNRAS, 503, 1051, +doi: 10.1093/mnras/stab402 +De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S. +2005, ApJ, 620, 878, doi: 10.1086/427142 Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019, MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453 Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021, MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390 +Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664, +doi: 10.1126/science.aav8137 +Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL, +562, L19, doi: 10.1086/338118 Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL, 110, 221101, doi: 10.1103/PhysRevLett.110.221101 Edgar, R. 2004, NewAR, 48, 843, doi: 10.1016/j.newar.2004.06.001 +Escala, A. 2021, ApJ, 908, 57, +doi: 10.3847/1538-4357/abd93c Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014, Monthly Notices of the Royal Astronomical Society, 443, 2410, doi: 10.1093/mnras/stu1280 @@ -832,6 +1056,9 @@ doi: 10.3847/1538-4357/ab94bc Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker, J. P. 2018, MNRAS, 478, 4030, doi: 10.1093/mnras/sty1262 + +12 +Rose et al. Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of Modern Physics, 82, 3121, doi: 10.1103/RevModPhys.82.3121 @@ -863,25 +1090,41 @@ Dosopoulou, F. 2018, ApJ, 856, 140, doi: 10.3847/1538-4357/aaafce Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8, doi: 10.3847/1538-4357/abb66a +Hopman, C., & Alexander, T. 2005, ApJ, 629, 362, +doi: 10.1086/431475 +Igumenshchev, I. V. 2008, ApJ, 677, 317, +doi: 10.1086/529025 +Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A. +2003, ApJ, 592, 1042, doi: 10.1086/375769 +Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796, +106, doi: 10.1088/0004-637X/796/2/106 Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the Royal Astronomical Society, 374, 1557, doi: 10.1111/j.1365-2966.2006.11275.x +Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., & +Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368. +https://arxiv.org/abs/2201.12368 Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104, doi: 10.3847/1538-4357/abeb14 Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, 45, doi: 10.3847/1538-4357/abb945 +Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020, +MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276 +Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33, +doi: 10.1086/376675 Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 —. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 +L¨ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323, +doi: 10.1111/j.1365-2966.2007.12699.x Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506, doi: 10.1093/mnras/stz036 - -10 -Rose et al. Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ, 690, 1463, doi: 10.1088/0004-637X/690/2/1463 Madau, P., & Rees, M. J. 2001, ApJL, 551, L27, doi: 10.1086/319848 +Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447, +doi: 10.1046/j.1365-8711.1999.02853.x Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F. 2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147 Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda, @@ -889,6 +1132,15 @@ M., & Artale, M. C. 2021a, arXiv e-prints, arXiv:2109.06222. https://arxiv.org/abs/2109.06222 Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b, MNRAS, 505, 339, doi: 10.1093/mnras/stab1334 +Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B. +2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409 +McKinney, J. C. 2006, MNRAS, 368, 1561, +doi: 10.1111/j.1365-2966.2006.10256.x +McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977, +doi: 10.1086/422244 +McKinney, J. C., Tchekhovskoy, A., Sadowski, A., & +Narayan, R. 2014, MNRAS, 441, 3177, +doi: 10.1093/mnras/stu762 Merritt, D. 2006, Reports on Progress in Physics, 69, 2513, doi: 10.1088/0034-4885/69/9/R01 Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847, @@ -898,17 +1150,38 @@ Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL, 622, L113, doi: 10.1086/429721 Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110 +Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ, +618, 569, doi: 10.1086/426067 +Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927, +L18, doi: 10.3847/2041-8213/ac574b Naoz, S., & Silk, J. 2014, ApJ, 795, 102, doi: 10.1088/0004-637X/795/2/102 Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885, L35, doi: 10.3847/2041-8213/ab4fed + +IMBH Formation in Galactic Nuclei +13 Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL, 888, L8, doi: 10.3847/2041-8213/ab5e3b +Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., & +Curd, B. 2022, MNRAS, 511, 3795, +doi: 10.1093/mnras/stac285 +Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A. +2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69 +Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005, +ApJ, 628, 368, doi: 10.1086/430728 O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395, 2127, doi: 10.1111/j.1365-2966.2009.14653.x O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N., & O’Shaughnessy, R. 2006, ApJ, 637, 937, doi: 10.1086/498446 +Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga, +D. 2010, ApJ, 722, 642, +doi: 10.1088/0004-637X/722/1/642 +Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100, +doi: 10.1086/319042 +Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643, +1011, doi: 10.1086/503273 Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek, Stephen R., J. 2016, ApJ, 823, 113, doi: 10.3847/0004-637X/823/2/113 @@ -926,6 +1199,8 @@ Rashkov, V., & Madau, P. 2014, ApJ, 780, 187, doi: 10.1088/0004-637X/780/2/187 Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640, A56, doi: 10.1051/0004-6361/202037710 +Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022, +MNRAS, doi: 10.1093/mnras/stac231 Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., & Rasio, F. A. 2018, PhRvL, 120, 151101, doi: 10.1103/PhysRevLett.120.151101 @@ -939,6 +1214,8 @@ Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904, Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., & Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213. https://arxiv.org/abs/2009.01213 +Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017, +MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044 Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD, 100, 043009, doi: 10.1103/PhysRevD.100.043009 Sari, R., & Fragione, G. 2019, ApJ, 885, 24, @@ -948,6 +1225,8 @@ Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K. doi: 10.1086/339917 Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63, doi: 10.1086/519309 +Sch¨odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A, +609, A27, doi: 10.1051/0004-6361/201730452 Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603, doi: 10.1086/156521 Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985, @@ -963,6 +1242,10 @@ Spitzer, L. 1987, Dynamical evolution of globular clusters Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv e-prints. https://arxiv.org/abs/1603.02709 —. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d +Stone, N. C., K¨upper, A. H. W., & Ostriker, J. P. 2017, +MNRAS, 467, 4180, doi: 10.1093/mnras/stx097 +Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859, +doi: 10.1093/mnras/stv2281 The LIGO Scientific Collaboration, the Virgo Collaboration, Abbott, R., et al. 2020a, arXiv e-prints, arXiv:2009.01075. https://arxiv.org/abs/2009.01075 @@ -977,13 +1260,19 @@ Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit, G. N. 2021, MNRAS, 504, 146, doi: 10.1093/mnras/stab842 -IMBH Formation in Galactic Nuclei -11 +14 +Rose et al. Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., & Breivik, K. 2021, ApJ, 917, 76, doi: 10.3847/1538-4357/ac088d +Wang, J., & Merritt, D. 2004, ApJ, 600, 149, +doi: 10.1086/379767 Woosley, S. E. 2017, ApJ, 836, 244, doi: 10.3847/1538-4357/836/2/244 +Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965, +doi: 10.1046/j.1365-8711.2002.05532.x +Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129, +doi: 10.1088/0004-637X/761/2/129 Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. 2014, Monthly Notices of the Royal Astronomical Society, 440, 1263, doi: 10.1093/mnras/stu351 diff --git a/read/results/pypdf/2201.00022.txt b/read/results/pypdf/2201.00022.txt index 08cbfa5..069fe3c 100644 --- a/read/results/pypdf/2201.00022.txt +++ b/read/results/pypdf/2201.00022.txt @@ -1,4 +1,4 @@ -Draft version January 4, 2022 +Draft version July 7, 2022 Typeset using L ATEXtwocolumn style in AASTeX631 The Formation of Intermediate Mass Black Holes in Galactic Nuclei Sanaea C. Rose,1, 2Smadar Naoz,1, 2Re’em Sari,3and Itai Linial3 @@ -7,17 +7,22 @@ Sanaea C. Rose,1, 2Smadar Naoz,1, 2Re’em Sari,3and Itai Linial3 3Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel ABSTRACT Most stellar evolution models predict that black holes (BHs) should not exist above approximately -50−70 M ⊙. However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and -above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs), -can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding -main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relaxation, - we find that this channel can be quite efficient, forming IMBHs as massive as 104M⊙. Our -results suggest that massive black holes and IMBHs may be ubiquitous in galactic centres. This formation - channel also has implications for observations. Collisions between stars and BHs can produce -electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. Additionally, -formed through this channel, both black holes in the mass gap and IMBHs can merge with the supermassive - black hole at the center of a galactic nucleus through gravitational waves. These gravitational -wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively). +50−70 M ⊙, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections +indicate the existence of BHs with masses at and above this threshold. We suggest that massive +BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions +between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical +processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite +efficient, forming IMBHs as massive as 104M⊙. This upper limit assumes that (1) the BHs accrete a +substantial fraction of the stellar mass captured during each collision and (2) that the rate at which +new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar +disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our +results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic +centers. This formation channel has implications for observations. Collisions between stars and BHs +can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. +Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge +with the supermassive black hole at the center of a galactic nucleus through gravitational waves. +These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, +respectively). 1.INTRODUCTION The recently detected gravitational wave source GW190521 (The LIGO Scientific Collaboration et al. @@ -31,18 +36,18 @@ GW170104, and GW170814 fall within the mass gap (e.g., Abbott et al. 2016, 2017a,b). BH mergers that form second generation BHs and, in some cases, intermediate mass BHs (IMBHs), these gravitational wave -(GW) events can occur in globular clusters, young stellar - clusters, or the field (e.g., Rodriguez et al. 2018; Rodriguez - et al. 2019; Fishbach et al. 2020; Mapelli et al. -2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. -2021; Arca Sedda et al. 2021). However, IMBHs are -not limited to these locations and may reside in galacCorresponding +(GW) events can occur in globular clusters, young stelCorresponding author: Sanaea C. Rose srose@astro.ucla.edu 1Note that the exact lower and upper limits may be sensitive to metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli 2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski -et al. 2020a; Renzo et al. 2020; Vink et al. 2021).tic nuclei as well. Several studies propose that our +et al. 2020a; Renzo et al. 2020; Vink et al. 2021).lar clusters, or the field (e.g., Rodriguez et al. 2018; Rodriguez + et al. 2019; Fishbach et al. 2020; Mapelli et al. +2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. +2021; Arca Sedda et al. 2021). However, IMBHs are +not limited to these locations and may reside in galactic + nuclei as well. Several studies propose that our own galactic center may host an IMBH in the inner pc (e.g., Hansen & Milosavljevi´ c 2003; Maillard et al. 2004; G¨ urkan & Rasio 2005; Gualandris & Merritt 2009; Chen @@ -58,24 +63,26 @@ Valiante et al. 2016) or from direct collapse of accumulated gas (e.g., Begelman et al. 2006; Yue et al. 2014; Ferrara et al. 2014; Choi et al. 2015; Shlosman et al. 2016). These high redshift IMBHs would need to survive - galaxy evolution and mergers to present day (e.g., + galaxy evolution and mergers to present day (e.g.,arXiv:2201.00022v2 [astro-ph.GA] 6 Jul 2022 + Rose et al. Rashkov & Madau 2014), with significant effects on their stellar and even dark matter surroundings (e.g., Bertone et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another popular formation channel relies on the coalescence of -many stellar-mass black holes. For example, IMBHs +many stellar-mass black holes, which may seed objects +as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs may form in the centers of globular clusters, where fewbody interactions lead to the merger of stellar-mass BHs (e.g., O’Leary et al. 2006; G¨ urkan et al. 2006; Blecha -et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro-arXiv:2201.00022v1 [astro-ph.GA] 31 Dec 202 - Rose et al. -driguez et al. 2018; Rodriguez et al. 2019; Fragione et al. +et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Rodriguez + et al. 2018; Rodriguez et al. 2019; Fragione et al. 2020b). Other formation mechanisms invoke successive -collisions and mergers of massive stars (e.g., Portegies -Zwart & McMillan 2002; Portegies Zwart et al. 2004; -Freitag et al. 2006; Kremer et al. 2020; Gonz´ alez et al. -2021; Di Carlo et al. 2021). +collisions and mergers of massive stars (e.g., Ebisuzaki +et al. 2001; Portegies Zwart & McMillan 2002; Portegies +Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017; +Kremer et al. 2020; Gonz´ alez et al. 2021; Di Carlo et al. +2021; Das et al. 2021a,b; Escala 2021). The main obstacle to sequential BH mergers in clusters is that the merger recoil velocity kick often exceeds the escape velocity from the cluster (e.g., Schnittman @@ -91,17 +98,27 @@ back towards the cluster center over a dynamical friction timescale. Using this approach, they showed that 103−104M⊙IMBHs can form efficiently over the lifetime of a cluster. -However, as discussed in Section 2.2, direct star-BH +However, as discussed in Section 2.2, direct BH-star collisions are much more frequent than BH-BH collision in galactic nuclei, making the former a promising channel - for BH growth. We propose that IMBHs can form -naturally within the central pc of a SMBH in a galactic -center. Specifically, these IMBHs form through repeated -collisions with main sequence stars , accreting some or -all of the star’s mass depending on the details of the -collision. We demonstrate that this channel can create -IMBHs with masses as large as 104M⊙, depending on -the density profile of the surrounding stars. + for BH growth. In an N-body study of young star +clusters, Rizzuto et al. (2022) find that BH-star collisions + are a main contributor to the formation of BHs +in the mass gap and IMBHs. In a similar vein, Stone +et al. (2017) demonstrate that massive BHs can form +from repeated tidal encounters between stars and BHs. +More generally, several studies have explored the role of +collisions in a GN, with implications for the stellar and +red giant populations (e.g., Dale & Davies 2006; Dale +et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti +et al. 2021). We propose that IMBHs can form naturally +within the central pc of a galactic center through repeated + collisions between BHs and main sequence stars . +During a collision, the BH can accrete some portion of +the star’s mass. Over many collisions, it can grow appreciably + in size. We demonstrate that this channel cancreate IMBHs with masses as large as 104M⊙, an upper +limit that depends on the density profile of the surrounding + stars and the efficiency of the accretion. The paper is structured as follows: we describe relevant physical processes and our approach in Section 2. In particular, we provide an overview of collisions in @@ -110,17 +127,18 @@ Section 2.2 and present our statistical approach in Section mass growth with each collision and presents analytic solutions to our equations in two different regimes, efficient collisions and inefficient collisions We compare -these solutions to our statistical results. Sections 2.5 -and 2.7 discuss implications for GW merger events between +these solutions to our statistical results. Sections 2.6 +and 2.8 discuss implications for GW merger events between IMBHs and the SMBH. We then incorporate relaxation processes and discuss the subsequent results in -Section 2.8. Finally, we discuss and summarize our findings +Section 2.9. Finally, we discuss and summarize our findings in Section 3. 2.METHODOLOGY We consider a population of stellar mass BHs embedded in a cluster of 1 M ⊙stars. When stars and BHs collide, the BHs can accrete mass. The growth rate depends - on the physical processes outlined below. We usea statistical approach to estimate the stellar encounters + on the physical processes outlined below. We use +a statistical approach to estimate the stellar encounters and final IMBH masses. 2.1. Physical Picture We consider a population of BHs within the inner few @@ -148,7 +166,17 @@ build a comprehensive physical picture of BH growth at all distances from the SMBH, including within 0 .01 pc. Otherwise, the innermost region of the GN would be poorly represented in our sample. We consider other -observationally motivated distributions in Section 2.8, +IMBH Formation in Galactic Nuclei 3 +Figure 1. We plot the relevant timescales, including collision + (green), relaxation (gold), and BH-BH GW capture +(purple), for a single BH in the GN as a function of distance +from the SMBH. For the collision timescale, we assume the +BH is on a circular orbit. The timescales depend on the +density, so we adopt a range of density profiles, bounded by +α= 1 (dashed curve) to α= 2 (dark, solid curve). The dark +blue line represents the time for a 105M⊙BH to merge with +the SMBH through GW emission. +observationally motivated distributions in Section 2.9, but reserve a more detailed examination of the distribution’s impact for future work. 2.2. Direct Collisions @@ -173,23 +201,12 @@ et al. (2020), f1(e•) andf2(e•) account for the effect of the eccentricity of the BH’s orbit about the SMBH on the collision rate, while nandσare simply evaluated at the semimajor axis of the orbit (see below). Note -IMBH Formation in Galactic Nuclei 3 -Figure 1. We plot the relevant timescales, including collision - (green), relaxation (gold), and BH-BH GW capture -(purple), for a single BH in the GN as a function of distance -from the SMBH. For the collision timescale, we assume the -BH is on a circular orbit. The timescales depend on the -density, so we adopt a range of density profiles, bounded by -α= 1 (dashed curve) to α= 2 (dark, solid curve). The dark -blue line represents the time for a 105M⊙BH to merge with -the SMBH through GW emission. that this timescale equation includes the effects of gravitational focusing, which enhances the cross-section of interaction. Assuming a circular orbit for simplicity, we plot the timescale for a BH orbiting in the GN to collide with -a 1M⊙star as a function of distance from the SMBH -in Figure 1.2As this timescale depends on the density +a 1M⊙star as a function of distance from the SMBHin Figure 1.2As this timescale depends on the density of surrounding stars, we adopt a density profile of the form: ρ(r•) =ρ0(r• @@ -206,8 +223,7 @@ slope of the power law. We assume that a uniform population in the GN, making the stellar number density: n(r•) =ρ(r•) 1M⊙. (3) -2We note that the eccentricity has a very minor effect on the -collision timescale (Rose et al. 2020).The collision timescale also depends on the velocity dispersion, +The collision timescale also depends on the velocity dispersion, which we express as: σ(r•) =√ GM• @@ -235,6 +251,9 @@ than the BH-BH GW collision timescale (for the relevant 2018, for example). Thus, we expect that star-BH collisions will be the main driver of IMBH growth in the GN. +2We note that the eccentricity has a very minor effect on the +collision timescale (Rose et al. 2020). + Rose et al. 2.3. Statistical Approach to Collisions We simulate the mass growth of a population of BHs with initial conditions detailed in Section 2.1. Over an @@ -252,12 +271,6 @@ expected to accrete in a single collision (see Section 2.4 for details). We recalculate the collision timescale using the updated BH mass and repeat this process until the time elapsed equals the simulation time of 10 Gyr3. -3Closer to the SMBH, ∆ tmay exceed the collision timescale by -a factor of a few for steep density profiles. We include a safeguard - in our code which takes the ratio tcoll/∆tand rounds it -to the nearest integer. We take this integer to be the number of -collisions and increase the BH mass accordingly. - Rose et al. 2.4. Mass Growth When a BH collides with a star, it may accrete material and grow in mass. The details of the accretion @@ -268,20 +281,41 @@ passing through the star’s center. We begin by considering the escape velocity from the BH at the star’s outermost point, its surface, which corresponds to the maximum impact parameter 1 R ⊙. Qualitatively, one -might expect that the BH could accrete the entire star +might expect that the BH could capture the entire star (i.e., ∆m∼1 M⊙) if the relative velocity is smaller than the escape velocity from the BH at this point. However, in the vicinity of the SMBH, the dispersion velocity of the stars may be much larger than the escape velocity from the BH at the star’s surface. In this case, the BH -accretes a “tunnel” of material through the star. This +captures a “tunnel” of material through the star. This tunnel has radius equal to the Bondi radius and length -approximately 1 R⊙. +approximately 1 R⊙. For the purposes of this study, we +assume that the BH accretes all of the material that +it captures. The details of the accretion are uncertain, +however, and it may be much less efficient than our results + imply. We discuss accretion in Section 2.5. To estimate ∆ m, we begin with the Bondi-Hoyle accretion rate, ˙ m, given by: ˙m=4πG2m2 BHρstar (c2s+σ2)3/2, (5) +3Closer to the SMBH, ∆ tmay exceed the collision timescale by +a factor of a few for steep density profiles. We include a safeguard + in our code which takes the ratio tcoll/∆tand rounds it +to the nearest integer. We take this integer to be the number of +collisions and increase the BH mass accordingly. +Figure 2. We consider an example that highlights the mass +growth as a function of distance from the SMBH. Grey dots +represent the initial masses and distances from the SMBH +of the BHs involved in the simulation. For simplicity, we set +the inital mass equal to 10 M⊙for all of the BHs. Assuming +the density profile of stars has α= 1, we consider two cases: +BHs accrete all of the star’s mass during a collision (red) and +only a portion of the star’s mass is accreted during a collision +given by Eq. 6 (blue). The latter case results in less growth +closer to the SMBH where the velocity dispersion becomes +high. The shaded regions and dashed lines represent the +analytical predictions detailed in Section 2.4. wherecsis the speed of sound in the star and ρstaris its density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima et al. 1985; Edgar 2004, see latter for a review). We @@ -309,20 +343,9 @@ which depends on mBH, decreases. Simultaneously, is exponential growth (see discussion and details surrounding Eq. (8)). In Figure 2, however, the simulations assume α= 1 for the stellar density profile, ensuring - the collision timescale is long compared to the simulation - time, 10 Gyr. Therefore, the BHs grow slowly, -Figure 2. We consider an example that highlights the mass -growth as a function of distance from the SMBH. Grey dots -represent the initial masses and distances from the SMBH -of the BHs involved in the simulation. For simplicity, we set -the inital mass equal to 10 M⊙for all of the BHs. Assuming -the density profile of stars has α= 1, we consider two cases: -BHs accrete all of the star’s mass during a collision (red) and -only a portion of the star’s mass is accreted during a collision -given by Eq. 6 (blue). The latter case results in less growth -closer to the SMBH where the velocity dispersion becomes -high. The shaded regions and dashed lines represent the -analytical predictions detailed in Section 2.4. + the collision timescale is long compared to the sim- +IMBH Formation in Galactic Nuclei 5 +ulation time, 10 Gyr. Therefore, the BHs grow slowly, and their final masses can be approximated using the following equation: mfinal(tcoll→const.) =minitial + ∆mT @@ -350,7 +373,6 @@ star’s mass. Eq. 7 does not apply for other values of α. When the collision timescale is shorter, corresponding to a larger indexαin the density profile (see Figure 1), the growth -IMBH Formation in Galactic Nuclei 5 is very efficient and ∆ mquickly approaches 1 M ⊙. Consequently, while we can now assume ∆ m= 1 M ⊙, we can no longer assume the collision timescale is constant. @@ -362,7 +384,49 @@ mfinal(∆m→1 M⊙) =−A+ (minitial +A)eCT(8) whereA=σ2Rstar/GandC= 2πGn starRstar/σ. As an example, we plot this curve in purple for the α= 2 case, in Figure 3, which agrees with the simulated masses. -2.5. GW Inspiral +2.5. Uncertainties in Accretion +We note that the ∆ Mcalculated in this proof-ofconcept + study assumes that the BH accretes all of the +material that it captures. Estimating the true fraction +of the material accreted by the BH is very challenging; + this complex problem requires numerically solving +the generalized GR fluid equations with cooling, heating, + and radiative transfer, etc. and remains an active +field of research (e.g., Blandford & Begelman 1999; Park +& Ostriker 2001; Narayan et al. 2003; Igumenshchevet al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang +et al. 2014; McKinney et al. 2014; Narayan et al. 2022). +Heuristically, if a collision between a BH and a star results + in an accretion disk, the disk’s viscous timescale +may be as low as days. The resultant luminosity can +unbind most of the captured material, though details +such as the amount accreted and peak luminosity remain + uncertain (e.g., Yuan et al. (2012); Jiang et al. +(2014), see also the discussion in Stone et al. (2017), +Rizzuto et al. (2022), and Kremer et al. (2022)). The +question becomes whether or not a BH can still accumulate + significant amounts of mass over many collisions +even if it accretes very little in a single one. We explore + the viability of our channel using a physically motivated + inefficient accretion model. Several studies have +invoked momentum-driven winds in BH accretion (e.g., +Murray et al. 2005; Ostriker et al. 2010; Brennan et al. +2018). We thus estimate the fraction of captured mass +accreted to be approximately vesc/(cη), wherevescis +the escape velocity from the BH at 1 R ⊙andηis the +accretion efficiency at the ISCO. We take ηto be 0.1 +(e.g., Yu & Tremaine 2002). This expression for the +fraction accreted is consistent with Kremer et al. (2022) +equation 19 for s= 0.5, which is a reasonable value for +s, a free parameter between 0 .2 and 0.8. We discuss +the results of the momentum-driven winds estimate in +Section 3. We note that the accretion process may be +more efficient than this estimate implies if, for example, +jets or other instabilities result in the beaming of radiation + away from the captured material (e.g., Blandford +& Znajek 1977; Begelman 1979; De Villiers et al. 2005; +McKinney & Gammie 2004; McKinney 2006; Igumenshchev + 2008; Begelman 2012a,b; McKinney et al. 2014). +2.6. GW Inspiral When a BH is close to the SMBH, GW emission can circularize and shrink its orbit. We implement the effects of GW emission on the BH’s semimajor axis and @@ -374,18 +438,23 @@ tGW≈2.9×1012yr(M• 106M⊙)−1 ×(M•+mBH 2×106M⊙)−1(a• -10−4pc)4 +10−2pc)4 ×f(e•)(1−e2 •)7/2, (9) wheref(e•) is a function of e•. For all values of e•, f(e•) is between 0 .979 and 1.81 (Blaes et al. 2002). We plot this timescale for a 1 ×105M⊙BH in Figure 1 in blue. + Rose et al. +Figure 3. On the right, we plot final masses of 500 BHs using different values of αin the density profile, shallow ( α= 1) to +cuspy (α= 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass +of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and +merger times of these BHs. In our simulations, we assume a BH has merged with the SMBH when the condition tGW< telapsed is met. When this condition is satisfied, we terminate mass growth through collisions for that BH.4 -2.6. IMBH growth +2.7. IMBH growth As detailed above, BH-stellar collisions can increase the BH masses as a function of time. Here, we examine the sensitivity of the BH growth to the density power @@ -395,23 +464,23 @@ law. From Eq. (1), it is clear that the growth rate depends profiles, will result in more efficient mass growth. In Figure 1, larger values of αlead to collision timescales in the GN’s inner region, inwards of 0 .25 pc, that are -4For comparison, we also incrementally changed the semimajor -axis and eccentricity from GW emission following the equations -in Peters & Mathews (1963b). This method leads to a slight -increase in the final IMBH masses because it accounts for the -collisions that take place while the orbit is gradually shrinking.much smaller that the 10 Gyr simulation time. Figure 3 +much smaller that the 10 Gyr simulation time. Figure 3 confirms this expectation. It depicts the mass growth of a uniform distribution of BHs with initial conditions detailed in Section 2.1 for five αvalues, spanning 1 (green) to 2 (purple). The most massive IMBHs form inwards of 0.25 pc for the α= 2 case. -2.7. Gravitational Wave Mergers and Intermediate +2.8. Gravitational Wave Mergers and Intermediate and Extreme Mass Ratio Inspiral Candidates Towards the SMBH, efficient collisions can create BHs massive enough to merge with the SMBH through GWs. -Following the method detailed in Section 2.5, when a +Following the method detailed in Section 2.6, when a given BH meets the criterion tGW< telapsed , we mark -it as merged with the SMBH. We assume that at this +4For comparison, we also incrementally changed the semimajor +axis and eccentricity from GW emission following the equations +in Peters & Mathews (1963b). This method leads to a slight +increase in the final IMBH masses because it accounts for the +collisions that take place while the orbit is gradually shrinking.it as merged with the SMBH. We assume that at this point the dynamics of the BH will be determined by GW emission, shrinking and circularizing the BHs orbit until it undergoes an extreme or intermediate mass ratio @@ -421,7 +490,7 @@ merger. It is interesting to note that even in the absence of relaxation processes, which are often invoked to explain the formation of EMRIs, EMRIs and notably IMRIs can form in this region. -2.8. Two Body Relaxation Processes +2.9. Two Body Relaxation Processes A BH orbiting the SMBH experiences weak gravitational interactions with other objects in the GN. Over a relaxation time, these interactions alter its orbit about @@ -438,21 +507,26 @@ its orbital energy and angular momentum by order of themselves. The BH experiences diffusion in its angular momentum and energy as a function of time (depending on the eccentricity of the orbit, this process can be more -efficient Fragione & Sari 2018; Sari & Fragione 2019). In -Figure 1, we plot the relaxation timescale in gold for a -range ofα. We note that the Bahcall & Wolf (1976) profile,α= - 7/4, corresponds to zero net flux and therefore -does not preferentially migrate objects inward. -Additionally, because they are more massive on -average than the surrounding objects, BHs are expected - to segregate inwards in the GN (e.g., Shapiro -& Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; +efficient Fragione & Sari 2018; Sari & Fragione 2019). +Relaxation can cause the orbit of an object in a GN to +reach high eccentricities. If the object is a BH, it can +spiral into the SMBH and form an EMRI, while a star +IMBH Formation in Galactic Nuclei 7 +can be tidally disrupted by the SMBH (e.g. Magorrian +& Tremaine 1999; Wang & Merritt 2004; Hopman & +Alexander 2005; Aharon & Perets 2016; Stone & Metzger + 2016; Amaro-Seoane 2018; Sari & Fragione 2019; +Naoz et al. 2022). The relaxation process is therefore +crucial to our study. In Figure 1, we plot the relaxation +timescale in gold for a range of α. We note that the Bahcall + & Wolf (1976) profile, α= 7/4, corresponds to zero +net flux and therefore does not preferentially migrate +objects inward. +Additionally, because BHs are more massive on average + than the surrounding objects, they are expected +to segregate inwards in the GN (e.g., Shapiro & +Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; Miralda-Escud´ e & Gould 2000; Baumgardt et al. 2004). - Rose et al. -Figure 3. On the right, we plot final masses of 500 BHs using different values of αin the density profile, shallow ( α= 1) to -cuspy (α= 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass -of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and -merger times of these BHs. They sink toward the SMBH on the mass segregation timescale,tseg≈⟨M∗⟩/mBH×trelax (e.g., Spitzer 1987; Fregeau et al. 2002; Merritt 2006), which is typically an @@ -469,8 +543,8 @@ of zero and a standard deviation of ∆ vrlx/√ P•/trlx(see Bradnick et al. 2017, for an approach to changes in the angular momentum). The new orbital parameters can be calculated following Lu -& Naoz (2019), and see Naoz et al. in prep for full set -of equations. +& Naoz (2019), and see Naoz et al. (2022) for the full +set of equations. We account for the effects of relaxation processes, including mass-segregation, using a multi-faceted approach. We begin by migrating each BH towards the @@ -486,12 +560,12 @@ scattering for both black holes and stars. Within this radius, BHs will then settle onto a Bahcall-Wolf profile, while the stars may follow a shallower profile, with approximatelyn⋆∝r−1.5, inwards of the transition radius -(Linial & Sari in prep.).Therefore, after the initial mass segregation, we allow +(Linial & Sari in prep.). +Therefore, after the initial mass segregation, we allow the BHs to begin diffusing over a relaxation timescale, their orbital parameters changing slowly through a random process. In this random process, some of the BHs -may migrate closer to the SMBH. We terminate mass -growth when the BH enters the inner 200 au of the GN, +may migrate closer to the SMBH. We terminate massgrowth when the BH enters the inner 200 au of the GN, within which the density of stars is uncertain. This cutoff is based on the 120 au pericenter of S0-2, the closest known star to the SMBH (e.g., Ghez et al. 2005). @@ -511,23 +585,14 @@ towards the SMBH, their concentration in the inner region of the GN increases, allowing them to dominate the scattering. We reserve the inclusion of these interactions for future study. -2.9. Effect of Relaxation Processes +2.10. Effect of Relaxation Processes As depicted in Figure 4, two-body relaxation processes result in more EMRIs and IMRIs events. These processes allow BHs that begin further from the SMBH to migrate inwards and grow more efficiently in mass. However, it also impedes the growth of BHs that are -initially closer to the SMBH by allowing them to dif- -IMBH Formation in Galactic Nuclei 7 -Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance (red) -for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. We -assumeα= 1.75 for the GN density profile. Faded stars represent BHs that merged with the SMBH. As a result of inward -migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, more -BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses for two -different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation processes. -The dashed, faded lines represent the corresponding initial histograms. We assume α= 1.75 for the GN density profile. Faded -stars represent BHs that merged with the SMBH. -fuse out of the inner region where collisions are efficient. +initially closer to the SMBH by allowing them to diffuse + out of the inner region where collisions are efficient. As can be seen in Figure 4, the net result is that more BHs grow, but the maximum mass is lower compared to the scenario that ignores two-body relaxation. The @@ -548,18 +613,28 @@ BH and main-sequence stars. Taking both a statistical and analytic approach, we show that this channel can produce IMBHs efficiently with masses as high as 103−4M⊙and may result in many IMBH-SMBH mergers - (intermediate-mass ratio inspiral, IMRIs) and EMRIs. - + (intermediate-mass ratio inspirals, or IMRIs) and +EMRIs. + Rose et al. +Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance +(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. +We assume α= 1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward +migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, +more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses +for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation +processes. We also show the results for a simulation with α= 1.75 that accounts for momentum-driven winds (black, dotted). +Despite the substantially reduced accretion, BHs in the mass gap still form. As the stellar mass BH collides with a star, the BH will grow in mass. The increase may equal star’s entire mass if the relative velocity is smaller than the escape velocity from the BH at 1 R ⊙. However, near the SMBH, the velocity dispersion may be larger than the escape velocity from the BH at the star’s radius. In this -limit, the BH accretes a “tunnel” of material through +limit, the BH captures a “tunnel” of material through the star, estimated using Bondi-Hoyle-Lyttleton accretion. In our statistical analysis, we account for BondiHoyle-Lyttleton - accretion and find that BHs outside of10−2pc from the SMBH can accrete the entire star (see + accretion and find that BHs outside of +10−2pc from the SMBH can capture the entire star (see Figure 2). The efficiency of collisions, and therefore IMBH, EMRI, and IMRI formation as well, are sensitive to @@ -575,6 +650,27 @@ or less efficient growth. As a result, more BHs grow in mass, but their maximum mass is smaller ( ∼104M⊙). Additionally, the final masses have no apparent dependence on distance from the SMBH (see Figure 4). +Most simulations in our study assume that the BHs +accrete all of the mass that they capture. The final BH +masses can be taken as an upper limit. We note that +the accretion is a highly uncertain process and represents + an active field of study (e.g., Blandford & Begelman + 1999; Park & Ostriker 2001; Narayan et al. 2003; +Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan +et al. 2012; Jiang et al. 2014; McKinney et al. 2014; +Narayan et al. 2022). To assess the limits of our model,we also consider a physically motivated accretion model, +momentum-driven winds (Section 2.5). We present the +final mass distribution for momentum-driven winds in +Figure 4. Importantly, we find that BHs within the +mass gap still form naturally despite the substantially +reduced accretion. About 5% of the BHs grow by 10 +to 100 M ⊙. Furthermore, if we increase this ∆ Mestimate + by a factor of 2 (i.e., use η= 0.05), the simulation + produces a 3 .5×103M⊙IMBH for the same initial +conditions. Our proof-of-concept demonstrates that collisions + between BH and stars are an important process +that should be taken into account in dense places such +as a GN. Mass growth through BH-main-sequence star collisions may act in concert with other IMBH formation channels, such as compact object binary mergers (e.g., @@ -593,16 +689,83 @@ Kozai Lidov mechanism, leaving behind a single star or a single compact object (e.g., Stephan et al. 2016, 2019; Hoang et al. 2018). Additionally, to be susceptible to evaporation, BH binaries must have a wider configuration. - Otherwise, they will be more tightly bound that - Rose et al. -the average kinetic energy of the surrounding objects, -and will only harden through weak gravitational interactions - with neighboring stars (see for example Figure + Otherwise, they will be more tightly bound than +the average kinetic energy of the surrounding objects +and will only harden through weak gravitational inter- +IMBH Formation in Galactic Nuclei 9 +actions with neighboring stars (see for example Figure 6 in Rose et al. 2020). -Not included in this study, collisions between the BH -and other compact objects will increase the BH growth -rate. BH-BH mergers (e.g., O’Leary et al. 2009; Fragione - et al. 2021) and even neutron star BH mergers +We note that we assume a steady-state and treat the +stars as a reservoir in this model. Future work will take a +more nuanced approach to the background stars, whose +density as a function of time can be influenced by several +factors. Firstly, the relaxation of the stellar population +occurs on Gyr timescales. Some studies have suggested +that in situ star formation can occur in the Galactic +Center as close as 0.04 pc from the SMBH (e.g., Levin +& Beloborodov 2003; Paumard et al. 2006), and star +formation episodes can occur as often as every ∼5 Myr +(e.g. Lu et al. 2009). Therefore, we expect that after +the first Gyr, stars within ≲0.01 pc will be replenished +at intervals consistent with the star formation episodes; +the infalling populations of stars are separated by ∼ +5−10 Myr, which is shorter than the collision timescale. +However, star-star collisions may complicate this picture + within∼0.01 pc. As discussed above, regular star +formation ensures the BHs always have a stellar population + to interact with outside of ∼0.01 pc.5At 0.01 pc, +however, the kinetic energy during a collision between +two 1 M ⊙stars is larger than their binding energies. +Collisions can therefore thin out the stellar populations +during the time it takes them to diffuse to these small +radii,≲0.01 pc, and may reduce the BH growth in the +innermost region. We reserve the inclusion of star-star +collisions for future work. We also note that the disruption + of binary stars by the SMBH may help replenish +the stellar population even as collisions work to deplete +it (e.g., Balberg et al. 2013); when a binary is disrupted, +one of the stars is captured on a tightly bound orbit +about the SMBH. +An IMBH may also affect the stellar density profile. +As it spirals into the SMBH, it can perturb stellar orbits, +and these interactions can lead to hypervelocity stars +(e.g., Baumgardt et al. 2006a; L¨ ockmann & Baumgardt +2008). L¨ ockmann & Baumgardt (2008) show that an +IMBH can modify an initially steep stellar density profile + to become consistent with the flatter cusp observed +in the Galactic Center. The stars may then be replenished + on 100 Myr timescales (Baumgardt et al. 2006a). +Therefore, after the formation of the first few IMBHs, +subsequent BH growth may occur in bursts, coinciding +with replenishment of the stars. +While there are many competing dynamical processes +that shape the stellar density profile, we stress that α +5In fact, the star-star collision timescale is greater than 10 Myr +for the entire parameter space, save at 0 .001 pc for larger values +ofα; the BH-star collision timescale plotted in Fig. 1 is the same +order of magnitude as the star-star collision timescale.can simply be chosen to encapsulate all of the relevant +physics. A value for αthat is constrained by observations + must already reflect ongoing processes like starstar + collisions and replenishment. Sch¨ odel et al. (2018) +find the observed stellar mass enclosed within 0.01 pc of +the Milky Way’s Galactic Center to be approximately +180 M ⊙. This estimate is consistent to order of magnitude + with our α= 1.25 case. In a simulation like those +depicted in Figure 4, which include relaxation, α= 1.25 +leads to a maximum IMBH mass of 140 M ⊙. Furthermore, + while the stellar mass within 0.01 pc may be a +few hundred M ⊙, Do et al. (2019) and GRAVITY Collaboration + et al. (2020) set an upper limit on the mass +enclosed within the orbit of S0-2 to be about a few thousand + M ⊙, or 0.1% of the central mass. This upper limit +can include mass that was previously in stars but is now +in BHs. In that case, the 180 M ⊙is what remains of the +stars, while BHs and IMBHs make up the ∼1000 M ⊙ +in the innermost region. +Also not included in this study, collisions between the +BH and other compact objects will increase the BH +growth rate. BH-BH mergers (e.g., O’Leary et al. 2009; +Fragione et al. 2021) and even neutron star BH mergers (e.g., Hoang et al. 2020) become more likely as the BHs increase in mass through stellar collisions. As a result, the BH-BH collision timescale, discussed in Section 2.2, @@ -611,43 +774,53 @@ BHs to grow through this channel in addition to stellar collisions. Additionally, this compact object mergers result in GW recoil, which may have a large impact on the dynamics (e.g., Baibhav et al. 2020; Fragione et al. -2021) +2021). The BH’s mass growth increases GW emission, which -dissipates energy from the orbit. Along with relaxation -processes, GW emission causes BHs to sink towards the -SMBH and eventually undergo a merger. As a result, -the GN environment is conducive to the formation of -EMRIs and IMRIs. The GW emission from EMRIs and -IMRIs is expected to be at mHz frequencies, making -them promising candidates for LISA to observe. While -the exact rate calculation is beyond the scope of this -study, the mechanism outlined here seems very promising.Our - results also suggest that IMBHs are likely to exists - in many galactic nuclei, as well as within our own -galactic center. This implication seems to be consistent - with recent observational and theoretical studies -(e.g., Hansen & Milosavljevi´ c 2003; Maillard et al. 2004; -G¨ urkan & Rasio 2005; Gualandris & Merritt 2009; Chen -& Liu 2013; Generozov & Madigan 2020; Fragione et al. -2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY -Collaboration et al. 2020). +dissipates energy from the orbit. Along with relaxation, +GW emission causes BHs to sink towards the SMBH +and eventually undergo a merger. As a result, the GN +environment is conducive to the formation of EMRIs +and IMRIs. The GW emission from EMRIs and IMRIs + is expected to be at mHz frequencies, making them +promising candidates for LISA to observe. While the +exact rate calculation is beyond the scope of this study, +the mechanism outlined here seems very promising. +Our results also suggest that BHs within the mass gap +as well as IMBHs likely exist in many galactic nuclei, as +well as within our own galactic center. This implication +seems to be consistent with recent observational and +theoretical studies (e.g., Hansen & Milosavljevi´ c 2003; +Maillard et al. 2004; G¨ urkan & Rasio 2005; Gualandris +& Merritt 2009; Chen & Liu 2013; Generozov & Madigan + 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz +et al. 2020; GRAVITY Collaboration et al. 2020). + Rose et al. Lastly, the collisions between stellar mass BHs and stars may contribute to the x-ray emission from our -galactic centre (e.g., Muno et al. 2005, 2009; Hailey et al. -2018; Zhu et al. 2018; Cheng et al. 2018)5. These interactions, - in particular grazing collisions, may also result -in tidal disruption events (e.g., Perets et al. 2016; Samsing - et al. 2019; Kremer et al. 2021). Thus, the process -outlined here may produce electromagnetic signatures -in addition to GW mergers. -SR thanks the Charles E Young fellowship, the Nina +galactic centre (e.g., Muno et al. 2005, 2009; Hailey +et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kremer + et al. (2022) for a discussion of electromagnetic signatures + from BH-star collisions)6. These interactions, +in particular grazing collisions, may also result in tidal +disruption events (e.g., Baumgardt et al. 2006b; Perets +et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kremer + et al. 2021). Thus, the process outlined here may +produce electromagnetic signatures in addition to GW +mergers. +We thank the anonymous referee for useful comments. +We also thank Jessica Lu, Fred Rasio, Kyle Kremer, +Ryosuke Hirai, Ilya Mandel, and Erez Michaely for useful + discussion. +SR thanks the Charles E. Young Fellowship, the Nina Byers Fellowship, and the Michael A. Jura Memorial Graduate Award for support. SR and SN acknowledge the partial support from NASA ATP 80NSSC20K0505. SN thanks Howard and Astrid Preston for their generous support. IL thanks support from the Adams Fellowship. SN and RS thank the Bhaumik Institute visitor -program. +program. This work was performed in part at the Aspen + Center for Physics, which is supported by National +Science Foundation grant PHY-1607611. REFERENCES Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016, PhRvL, 116, 241102, @@ -656,21 +829,39 @@ doi: 10.1103/PhysRevLett.116.241102 doi: 10.1103/PhysRevLett.118.221101 —. 2017b, PhRvL, 119, 141101, doi: 10.1103/PhysRevLett.119.141101 +Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1, +doi: 10.3847/2041-8205/830/1/L1 Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129 Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, doi: 10.1088/0004-637X/780/2/148 -Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. +Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4, +doi: 10.1007/s41114-018-0013-8 +6The connection between the observed X-ray sources at the Galactic + Center and tidal capture has been suggested by Generozov +et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for +alternative channels.Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. 2021, arXiv e-prints, arXiv:2109.12119. https://arxiv.org/abs/2109.12119 Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214, doi: 10.1086/154711 -5The connection between the observed X-ray sources at the Galactic - Center and tidal capture has been suggested by Generozov -et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for -alternative channels.Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102, +Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102, 043002, doi: 10.1103/PhysRevD.102.043002 +Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26, +doi: 10.1093/mnrasl/slt071 +Baumgardt, H., Gualandris, A., & Portegies Zwart, S. +2006a, MNRAS, 372, 174, +doi: 10.1111/j.1365-2966.2006.10818.x +Baumgardt, H., Hopman, C., Portegies Zwart, S., & +Makino, J. 2006b, MNRAS, 372, 467, +doi: 10.1111/j.1365-2966.2006.10885.x Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ, 613, 1143, doi: 10.1086/423299 +Begelman, M. C. 1979, MNRAS, 187, 237, +doi: 10.1093/mnras/187.2.237 +—. 2012a, ApJL, 749, L3, doi: 10.1088/2041-8205/749/1/L3 +IMBH Formation in Galactic Nuclei 11 +—. 2012b, MNRAS, 420, 2912, +doi: 10.1111/j.1365-2966.2011.20071.x Begelman, M. C., Volonteri, M., & Rees, M. J. 2006, MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ, @@ -683,15 +874,20 @@ Binney, J., & Tremaine, S. 1987, Galactic dynamics —. 2008, Galactic Dynamics: Second Edition Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775, doi: 10.1086/342655 +Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303, +L1, doi: 10.1046/j.1365-8711.1999.02358.x +Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433, +doi: 10.1093/mnras/179.3.433 Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642, 427, doi: 10.1086/500727 Bondi, H. 1952, MNRAS, 112, 195, doi: 10.1093/mnras/112.2.195 -IMBH Formation in Galactic Nuclei 9 Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273, doi: 10.1093/mnras/104.5.273 Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469, 2042, doi: 10.1093/mnras/stx1007 +Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ, +860, 14, doi: 10.3847/1538-4357/aac2c4 Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger, C. 2012, JCAP, 2012, 054, doi: 10.1088/1475-7516/2012/07/054 @@ -709,16 +905,34 @@ et al. 1996, Science, 272, 1286, doi: 10.1126/science.272.5266.1286 Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087, doi: 10.1086/156685 -Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, +Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424, +doi: 10.1111/j.1365-2966.2005.09937.x +Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M. +2009, MNRAS, 393, 1016, +doi: 10.1111/j.1365-2966.2008.14254.xDall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783 +Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T. +C. N. 2021a, MNRAS, 505, 2186, +doi: 10.1093/mnras/stab1428 +Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt, +T. C. N. 2021b, MNRAS, 503, 1051, +doi: 10.1093/mnras/stab402 +De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S. +2005, ApJ, 620, 878, doi: 10.1086/427142 Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019, MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453 Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021, MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390 +Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664, +doi: 10.1126/science.aav8137 +Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL, +562, L19, doi: 10.1086/338118 Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL, 110, 221101, doi: 10.1103/PhysRevLett.110.221101 Edgar, R. 2004, NewAR, 48, 843, doi: 10.1016/j.newar.2004.06.001 +Escala, A. 2021, ApJ, 908, 57, +doi: 10.3847/1538-4357/abd93c Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014, Monthly Notices of the Royal Astronomical Society, 443, 2410, doi: 10.1093/mnras/stu1280 @@ -737,13 +951,15 @@ Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., & Rasio, F. A. 2004, MNRAS, 352, 1, doi: 10.1111/j.1365-2966.2004.07914.x Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., & -Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ, +Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576 +Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ, 649, 91, doi: 10.1086/506193 Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137, doi: 10.3847/1538-4357/ab94bc Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker, J. P. 2018, MNRAS, 478, 4030, doi: 10.1093/mnras/sty1262 + Rose et al. Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of Modern Physics, 82, 3121, doi: 10.1103/RevModPhys.82.3121 @@ -775,23 +991,40 @@ Dosopoulou, F. 2018, ApJ, 856, 140, doi: 10.3847/1538-4357/aaafce Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8, doi: 10.3847/1538-4357/abb66a +Hopman, C., & Alexander, T. 2005, ApJ, 629, 362, +doi: 10.1086/431475 +Igumenshchev, I. V. 2008, ApJ, 677, 317, +doi: 10.1086/529025 +Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A. +2003, ApJ, 592, 1042, doi: 10.1086/375769 +Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796, +106, doi: 10.1088/0004-637X/796/2/106 Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the Royal Astronomical Society, 374, 1557, doi: 10.1111/j.1365-2966.2006.11275.x +Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., & +Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368. +https://arxiv.org/abs/2201.12368 Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104, doi: 10.3847/1538-4357/abeb14 Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, -45, doi: 10.3847/1538-4357/abb945 +45, doi: 10.3847/1538-4357/abb945Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020, +MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276 +Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33, +doi: 10.1086/376675 Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 —. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 +L¨ ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323, +doi: 10.1111/j.1365-2966.2007.12699.x Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506, doi: 10.1093/mnras/stz036 - Rose et al. Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ, 690, 1463, doi: 10.1088/0004-637X/690/2/1463 Madau, P., & Rees, M. J. 2001, ApJL, 551, L27, doi: 10.1086/319848 +Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447, +doi: 10.1046/j.1365-8711.1999.02853.x Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F. 2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147 Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda, @@ -799,6 +1032,15 @@ M., & Artale, M. C. 2021a, arXiv e-prints, arXiv:2109.06222. https://arxiv.org/abs/2109.06222 Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b, MNRAS, 505, 339, doi: 10.1093/mnras/stab1334 +Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B. +2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409 +McKinney, J. C. 2006, MNRAS, 368, 1561, +doi: 10.1111/j.1365-2966.2006.10256.x +McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977, +doi: 10.1086/422244 +McKinney, J. C., Tchekhovskoy, A., Sadowski, A., & +Narayan, R. 2014, MNRAS, 441, 3177, +doi: 10.1093/mnras/stu762 Merritt, D. 2006, Reports on Progress in Physics, 69, 2513, doi: 10.1088/0034-4885/69/9/R01 Miralda-Escud´ e, J., & Gould, A. 2000, ApJ, 545, 847, @@ -808,17 +1050,36 @@ Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL, 622, L113, doi: 10.1086/429721 Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110 +Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ, +618, 569, doi: 10.1086/426067 +Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927, +L18, doi: 10.3847/2041-8213/ac574b Naoz, S., & Silk, J. 2014, ApJ, 795, 102, doi: 10.1088/0004-637X/795/2/102 Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885, L35, doi: 10.3847/2041-8213/ab4fed +IMBH Formation in Galactic Nuclei 13 Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL, 888, L8, doi: 10.3847/2041-8213/ab5e3b +Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., & +Curd, B. 2022, MNRAS, 511, 3795, +doi: 10.1093/mnras/stac285 +Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A. +2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69 +Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005, +ApJ, 628, 368, doi: 10.1086/430728 O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395, 2127, doi: 10.1111/j.1365-2966.2009.14653.x O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N., & O’Shaughnessy, R. 2006, ApJ, 637, 937, doi: 10.1086/498446 +Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga, +D. 2010, ApJ, 722, 642, +doi: 10.1088/0004-637X/722/1/642 +Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100, +doi: 10.1086/319042 +Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643, +1011, doi: 10.1086/503273 Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek, Stephen R., J. 2016, ApJ, 823, 113, doi: 10.3847/0004-637X/823/2/113 @@ -835,7 +1096,10 @@ Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL, Rashkov, V., & Madau, P. 2014, ApJ, 780, 187, doi: 10.1088/0004-637X/780/2/187 Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640, -A56, doi: 10.1051/0004-6361/202037710Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., & +A56, doi: 10.1051/0004-6361/202037710 +Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022, +MNRAS, doi: 10.1093/mnras/stac231 +Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., & Rasio, F. A. 2018, PhRvL, 120, 151101, doi: 10.1103/PhysRevLett.120.151101 Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016, @@ -844,10 +1108,11 @@ Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019, Phys. Rev. D, 100, 043027, doi: 10.1103/PhysRevD.100.043027 Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904, -113, doi: 10.3847/1538-4357/abc557 -Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., +113, doi: 10.3847/1538-4357/abc557Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., & Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213. https://arxiv.org/abs/2009.01213 +Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017, +MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044 Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD, 100, 043009, doi: 10.1103/PhysRevD.100.043009 Sari, R., & Fragione, G. 2019, ApJ, 885, 24, @@ -857,6 +1122,8 @@ Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K. doi: 10.1086/339917 Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63, doi: 10.1086/519309 +Sch¨ odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A, +609, A27, doi: 10.1051/0004-6361/201730452 Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603, doi: 10.1086/156521 Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985, @@ -872,6 +1139,10 @@ Spitzer, L. 1987, Dynamical evolution of globular clusters Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv e-prints. https://arxiv.org/abs/1603.02709 —. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d +Stone, N. C., K¨ upper, A. H. W., & Ostriker, J. P. 2017, +MNRAS, 467, 4180, doi: 10.1093/mnras/stx097 +Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859, +doi: 10.1093/mnras/stv2281 The LIGO Scientific Collaboration, the Virgo Collaboration, Abbott, R., et al. 2020a, arXiv e-prints, arXiv:2009.01075. https://arxiv.org/abs/2009.01075 @@ -885,12 +1156,18 @@ Society, 457, 3356, doi: 10.1093/mnras/stw225 Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit, G. N. 2021, MNRAS, 504, 146, doi: 10.1093/mnras/stab842 -IMBH Formation in Galactic Nuclei 11 + Rose et al. Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., & Breivik, K. 2021, ApJ, 917, 76, doi: 10.3847/1538-4357/ac088d +Wang, J., & Merritt, D. 2004, ApJ, 600, 149, +doi: 10.1086/379767 Woosley, S. E. 2017, ApJ, 836, 244, -doi: 10.3847/1538-4357/836/2/244Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. +doi: 10.3847/1538-4357/836/2/244 +Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965, +doi: 10.1046/j.1365-8711.2002.05532.x +Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129, +doi: 10.1088/0004-637X/761/2/129Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. 2014, Monthly Notices of the Royal Astronomical Society, 440, 1263, doi: 10.1093/mnras/stu351 Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints, diff --git a/read/results/tika/1601.03642.txt b/read/results/tika/1601.03642.txt index 711312f..182436f 100644 --- a/read/results/tika/1601.03642.txt +++ b/read/results/tika/1601.03642.txt @@ -25,6 +25,19 @@ + + + + + + + + + + + + + @@ -106,19 +119,14 @@ in T , as measured by P , improves with experience E. Σ ϕ x0 - x1 - x2 - x3 xn w0 - w1 - w2 w3 @@ -178,9 +186,7 @@ functions called artificial neurons which take n ∈ N num- bers x1, . . . , xn ∈ R as input, multiply them with weights w1, . . . , wn ∈ R, add them and apply a so called activation function ϕ as visualized in Figure 1(a). One example of such -an activation function is the sigmoid function ϕ(x) = 1 - -1+e−x . +an activation function is the sigmoid function ϕ(x) = 11+e−x . Those functions act as building blocks for more complex systems as they can be chained and grouped in layers as visualized in Figure 1(b). The interesting question is how @@ -523,7 +529,6 @@ music. Instead of taking notes directly or MIDI files, Nayebi and Vitelli took raw audio waveforms as input. Those audio waveforms are feature vectors given for time steps 0, 1, . . . , t− 1, t. The network is given those feature vectors X1, . . . , Xt - and has to predict the following feature vector Xt+1. This means it continues the music. As the input is continuous, the problem was modeled as a regression task. Discrete Fourier diff --git a/read/results/tika/1602.06541.txt b/read/results/tika/1602.06541.txt index 5778993..3cf5654 100644 --- a/read/results/tika/1602.06541.txt +++ b/read/results/tika/1602.06541.txt @@ -25,6 +25,19 @@ + + + + + + + + + + + + + @@ -324,9 +337,8 @@ car” Three accuracy metrics which do not suffer from problem P1 are used in [LSD14]: -• mean accuracy: 1 +• mean accuracy: 1k · -k · ∑k i=1 @@ -348,8 +360,8 @@ ti−nii+ j=1 nji ∈ [0, 1] - • frequency weighted intersection over union: + ( ∑k i=1 ti) @@ -364,8 +376,8 @@ ti−nii+ j=1 nji ∈ [0, 1] - Another problem might be pixels which cannot be + assigned to one of the known classes. For this reason, [SWRC06] makes use of a void class. This class gets completely ignored for all quality measures. Hence the @@ -383,11 +395,10 @@ benchmark [FKG13] or crypt segmentation as done by [CRSS14]. It is calculated as “the harmonic mean of the precision and recall” [PH05]: -Fβ = (1 + β)2 -tp +Fβ = (1 + β) +2 tp (1 + β2) · tp + β2 · fn + fp - where β = 1 is chosen in most cases and tp means true positive, fn means false negative and fp means false positive. @@ -938,9 +949,7 @@ duction of slack variables to relax the requirement of linear separability solves this problem. The trade-off between accepting some errors and a more complex model is weighted by a parameter -C ∈ R+ - -0 . The bigger C, the more errors are +C ∈ R+0 . The bigger C, the more errors are accepted. The new optimization problem is: minimize @@ -1058,9 +1067,11 @@ binary classifiers to multi-class classification is the one-vs-all and the one-vs-one strategy. In the one-vs-all strategy n classifiers have to be trained which can distinguish one of the n classes against -all other classes. In the one-vs-one strategy n2−n +all other classes. In the one-vs-one strategy n +2−n 2 + classifiers are trained; one classifier for each pair of classes. @@ -1174,7 +1185,6 @@ cliques of size two x,y = (x1, x2) is [KP06] { +w if x1 6= x2 - −w if x1 = x2 According to [Mur12], the most common way of @@ -1211,9 +1221,7 @@ P (x,y) and joint probability distribution -P (y|x) = -1 - +P (y|x) = 1 Z(x) ∏ diff --git a/read/results/tika/1707.09725.txt b/read/results/tika/1707.09725.txt index 194789f..123c7a6 100644 --- a/read/results/tika/1707.09725.txt +++ b/read/results/tika/1707.09725.txt @@ -25,6 +25,19 @@ + + + + + + + + + + + + + @@ -443,15 +456,14 @@ b kw 2 c∑ -ix=1−d kw -2 -e +ix=1−d kw2 e b kh 2 c∑ -iy=1−d kh +iy=1−d +kh 2 e @@ -684,9 +696,7 @@ smoothing, sharpening, median filtering, box filtering. See Figure A.1 for five Please note that the result of a filtering operation is again an image. This means filters can be applied successively. While each pixel after one filtering operation with a 3 × 3 - filter got influenced by 3 · 3 = 9 pixels of the original image, two successively applied 3× 3 - filters increase the area of the original image which influenced the output. The output is then influenced by 25 pixel. This is called the receptive field. The kind of pattern which is detected by a filter is called a feature. The bigger the receptive field is, the more complex @@ -785,15 +795,14 @@ b kw 2 c∑ -ix=1−d kw -2 -e +ix=1−d kw2 e b kh 2 c∑ -iy=1−d kh +iy=1−d +kh 2 e @@ -917,12 +926,12 @@ functions as introduced in [LGT16]. Name Definition Used by Max pooling max { a ∈ A } [BPL10, KSH12] -Average / mean pooling 1 +Average / mean pooling 1|A| -|A| ∑ +a∈A a LeNet-5 [LBBH98] and [KSlB -a∈A a LeNet-5 [LBBH98] and [KSlB+10] ++10] `2 pooling √∑ @@ -936,7 +945,8 @@ Table 2.1.: Pooling types for a set A of activations a ∈ R. (*) For stochastic pooling, each of the p×p activation values ai in the pooling region gets picked with probability pi = ai∑ -aj∈A aj +aj∈A +aj . This assumes the activations ai are non-negative. Pooling is applied for three reasons: To get local translational invariance, to get invariance @@ -1028,9 +1038,7 @@ where � is the Hadamard product Hence every value of the input gets set to zero with a dropout probability of p. Typically, Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout prob- ability than later layers. In order to keep the expected output at the same value, the -output of a dropout layer is multiplied with 1 - -1−p when dropout is enabled [Las17, tf-16b]. +output of a dropout layer is multiplied with 11−p when dropout is enabled [Las17, tf-16b]. At inference time, dropout is disabled. Dropout is usually only applied after fully connected layers, but not after convolutional @@ -1068,28 +1076,22 @@ x̂(k) = x(k) − x̄(k)√ s′[x(k)]2 + ε -with x̄(k) = 1 -m - +with x̄(k) = 1m ∑m -i=1 x +i=1 x (k) -i being the sample mean and s′[x(k)]2 = 1 - -m +i being the sample mean and s +′[x(k)]2 = 1m ∑m -i=1(x +i=1(x (k) i − x̄(k)) the sample variance where m ∈ N≥1 is the number of training samples per mini-batch, ε > 0 - -being a small constant to prevent division by zero and x(k) -i is the activation of neuron k for - +being a small constant to prevent division by zero and x(k)i is the activation of neuron k for training sample i. Additionally, for each activation x(k) two parameters γ(k), β(k) are introduced which scale @@ -1234,9 +1236,7 @@ aspect to using the group network without an aggregation block. Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The idea is to connect each convolutional layer directly to subsequent convolutional layers. Traditional CNNs with L layers and one input layer have L connections between layers, -but dense blocks have L(L+1) - -2 connections between layers. The input feature maps are +but dense blocks have L(L+1)2 connections between layers. The input feature maps are concatenated in depth. According to the authors, this prevents features from being re- learned and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16 have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors @@ -1488,7 +1488,10 @@ x∈X K∑ k=1 -[txk log(oxk) + (1− txk) log(1− oxk)]︸ ︷︷ ︸ +[txk log(o +x +k) + (1− txk) log(1− oxk)]︸ ︷︷ ︸ + cross-entropy data loss +λ1 · @@ -1501,13 +1504,15 @@ w∈W `2︷ ︸︸ ︷∑ w∈W -w2 - -︸ ︷︷ ︸ +w2︸ ︷︷ ︸ model complexity loss where W are the weights, X is the training data set, K ∈ N≥0 is the number of classes and -txk indicates if the training example x is of class k. oxk is the output of the classification +txk indicates if the training example x is of class k. o + +x +k is the output of the classification + algorithm which depends on the weights. λ1, λ2 ∈ [0,∞) weights the regularization and is typically smaller than 0.1. @@ -1563,8 +1568,8 @@ j=1 cij be the number of training samples for class i. The most common quality criterion is accuracy: accuracy(c) = - ∑k + i=1 cii∑k i=1 ti @@ -1975,9 +1980,7 @@ Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, w weights are deterministic and fixed at prediction time, each weight wij in Meiosis networks follows a normal distribution: -wij ∼ N (µij , σ -2 -ij) +wij ∼ N (µij , σ2ij) 28 @@ -1985,8 +1988,7 @@ ij) 3.2. Pruning approaches -Hence every connection has two learned parameters: µij and σ2 -ij . +Hence every connection has two learned parameters: µij and σ2ij . The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell division. A node j is splitted, when the random part dominates the value of the sampled @@ -2013,7 +2015,6 @@ layers or add skip connections. Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of on- line handwriting recognition. It makes use of the confusion matrix C = (cij) ∈ Nk×k≥0 - (see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix S with sij = sji = cij · cji. The maximum of S defines where the ASO algorithm adds more parameters. The details how the resources are added are not transferable to CNNs. @@ -2058,10 +2059,7 @@ A much simpler and computationally cheaper pruning criterion is the weight magni w ← -w if w ≥ θ - -0 otherwise - +w if w ≥ θ0 otherwise 3.3. Genetic approaches The general idea of genetic algorithms (GAs) is to encode the solution space as genes, which @@ -2154,7 +2152,6 @@ Figure 4.1.: Example for a hierarchy of classifiers. Each classifier is visualiz The root classifier C0 has to distinguish six coarse classes (pedestrian, four+-wheelers, traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C0 predicts a pedestrian, another classifier has to predict if it is an adult or a child. Similar, if C0 - predicts traffic sign, then another classifier has to predict if it is a speed limit, a sign indicating danger or something else. If C0, however, predicts road, then no other classifier will become active. @@ -2264,7 +2261,6 @@ but besides that it is also chosen uniformly random. Simple row-swapping can exploit local improvements. For example, in the context of ImageNet, it can swap the dog-class Silky Terrier to the dog-class Yorkshire terrier - and both dog classes Dalmatian and Greyhound next to each other. Both the two clusters of dog breeds could be separated by car and bus due to random chance. Moving any single class increases the score, but moving either one of the dog breed clusters or the vehicle @@ -2288,7 +2284,11 @@ Those will be moved to the corners of the confusion matrix by optimizing Equatio Once a permutation of the classes is found which has a low score Equation (4.1), the clusters can either be made by hand by deciding why classes should not be in one clusters. With such a permutation, only n− 1 binary decisions have to be made and hence only the list of -classes has to be read. Alternatively, one can calculate the confusions C ′i,i+1 + C ′i+1,i for +classes has to be read. Alternatively, one can calculate the confusions C ′i,i+1 + C + +′ +i+1,i for + each pair of classes which are neighbors in the confusion matrix. The higher this value, the more similar are the classes according to the classifier. Hence a threshold θ can be applied. θ can either be set automatically (e.g., such that 10 % of all pairs are above the threshold) @@ -2453,7 +2453,6 @@ each power of two there are two Convolution + BN + ELU blocks and one Max poolin block added. This is the framed part in the table. 32× 32 - Input C 32@3× 3/1 @@ -2465,7 +2464,6 @@ C 32@3× 3/1 BN + ELU 16× 16 - max pooling 2× 2/2 C 64@3× 3/1 @@ -2477,7 +2475,6 @@ C 64@3× 3/1 BN + ELU 8× 8 - max pooling 2× 2/2 C 64@3× 3/1 @@ -2485,7 +2482,6 @@ C 64@3× 3/1 BN + ELU 4× 4 - max pooling 2× 2/2 C 512@4× 4/1 (V) @@ -2495,7 +2491,6 @@ BN + ELU Dropout, p = 0.5 1× 1 - C 512@1× 1/1 BN + ELU @@ -2522,8 +2517,7 @@ of kernel size 3× 3 with stride 1. The results for the baseline model evaluated on eight datasets are given in Table 5.2. The speed for inference for different GPUs is given in Table 5.3. -Dataset Single Model Accuracy Ensemble of 10 -Training Set Test Set Training Set Test Set +Dataset Single Model Accuracy Ensemble of 10Training Set Test Set Training Set Test Set Asirra 94.22 % σ = 3.49 94.37 % σ = 3.47 97.07 % 97.37 % CIFAR-10 91.23 % σ = 1.10 85.84 % σ = 0.87 92.36 % 86.75 % @@ -2540,8 +2534,7 @@ CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the models uses unlabeled data or data from other datasets. For HASYv2 no test time transformations are used. -Network GPU Tensorflow Inference per Training -1 Image 128 images time / epoch +Network GPU Tensorflow Inference per Training1 Image 128 images time / epoch Baseline Default Intel i7-4930K 3 ms 244 ms 231.0 s Baseline Optimized Intel i7-4930K 2 ms 143 ms 149.0 s @@ -2667,8 +2660,9 @@ training. The image might lead to the wrong conclusion that models which are bet the start are also better at the end. In order to check this hypothesis, the relative order of validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering stays approximately the same, then it can be considered to run the first few epochs many -times and only train the best models to the end. For 10 models, there can be 102−10 +times and only train the best models to the end. For 10 models, there can be 10 +2−10 2 = 45 pair-wise changes in the ordering at maximum if the relative order of validation accuracies @@ -2974,31 +2968,19 @@ root classifier leaf classifier cluster identified class identified | cluster class identified | cluster 1 3 69.67 % 84.27 % 72.98 % - 2 5 46.60 % 58.54 % 43.47 % - 3 2 58.50 % 92.13 % 83.46 % - 4 2 50.50 % 87.83 % 81.74 % - 5 3 44.67 % 79.29 % 71.01 % - 6 2 29.50 % 78.67 % 72.00 % - 7 2 52.50 % 92.11 % 87.72 % - 8 2 59.50 % 86.23 % 81.88 % - 9 2 59.00 % 90.08 % 87.79 % 10 2 62.00 % 85.52 % 73.10 % - 11 2 67.00 % 87.01 % 75.32 % - 12 2 72.50 % 94.77 % 76.77 % - 13 2 64.00 % 82.58 % 86.27 % - 14 2 79.67 % 89.85 % 89.10 % Table 5.6.: Accuracies of the root classifier trained on the full set of 100 classes evaluated on @@ -3023,8 +3005,7 @@ trained number of epochs. As more filters can lead to different results dependin layer where they are added, five models are trained. The details about those models are given in Table 5.7 -Name Layer Filter count Total -Baseline New parameters +Name Layer Filter count TotalBaseline New parameters m9 9 64 638 5 978 566 m′9 9 64 974 8 925 622 @@ -3053,19 +3034,13 @@ Single Model Ensemble Mean Epochs Mean Time Mean std baseline 944 012 63.38 % 0.55 64.70 % 154.7 3856 s - m9 5 978 566 65.53 % 0.37 66.72 % 105.7 4472 s - m′9 8 925 622 65.10 % 1.09 66.54 % 95.6 5261 s - m11 5 982 698 65.73% 0.77 67.38% 149.2 5450 s - m′11 1 731 980 62.12 % 0.48 62.89 % 143.6 3665 s - m13 5 982 092 62.39 % 0.66 63.77 % 147.8 4485 s Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m9, m11, m13 - as well as their accuracies. 54 @@ -3191,12 +3166,10 @@ removed. The first convolutional layer was increased from 32 filters to 59 filte convolutional layer was increased from 32 filter s to 58 filters in order to keep the amount of parameters of the model constant. The adjusted model achieved 62.72 % (-0.66) mean test accuracy with a standard deviation of σ = 0.84 (+0.29). The ensemble achieved 63.88 % - test accuracy (-0.66). Even more extreme, if both convolutional layers are removed from the 16× 16 feature map scale, the mean test accuracy drops to 61.21 % (-2.17) with a standard deviation of σ = 0.51 - (-0.04). The ensemble achieves a test accuracy of 63.07 % (-1.63). Thus it is very important to have at least one convolutional layer at this feature map scale. @@ -3214,7 +3187,6 @@ Hence the effect of removing Batch Normalization from the baseline is investigat experiment. As before, 10 models are trained on CIFAR-100. The training setup and the model mno-bn - are identical to the baseline model m, except that in mno-bn the Batch Normalization layers are removed. @@ -3281,20 +3253,11 @@ Mean total Single model Ensemble time training time Accuracy std Accuracy -8 118 s -epoch 81 – 153 14 131 s 61.93 % σ = 1.03 65.68 % - -16 62 s -epoch 103 – 173 8349 s 64.16% σ = 0.81 66.98% - -32 35 s -epoch 119 – 179 5171 s 64.11 % σ = 0.75 65.89 % - -64 25 s -epoch 133 – 195 2892 s 63.38 % σ = 0.55 64.70 % - -128 18 s -epoch 145 – 239 3126 s 62.23 % σ = 0.73 63.55 % +8 118 sepoch 81 – 153 14 131 s 61.93 % σ = 1.03 65.68 % +16 62 sepoch 103 – 173 8349 s 64.16% σ = 0.81 66.98% +32 35 sepoch 119 – 179 5171 s 64.11 % σ = 0.75 65.89 % +64 25 sepoch 133 – 195 2892 s 63.38 % σ = 0.55 64.70 % +128 18 sepoch 145 – 239 3126 s 62.23 % σ = 0.73 63.55 % Table 5.9.: Training time per epoch and single model test set accuracy (mean and standard deviation) of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on @@ -3308,7 +3271,6 @@ layers 11, 13 and 15 is removed. The mean test accuracy of 10 trained mno-bias is 63.74 % which is an improvement of 0.36 percentage points over the baseline. The ensemble achieves a test accuracy of 65.13 % - which is 0.43 percentage points better than the baseline. Hence the bias can safely be removed. @@ -3325,7 +3287,6 @@ of the Batch Normalization layers did not noticeably change. 5.11. Learned Color Space Transformation In [MSM16] it is described that placing one convolutional layer with 10 filters of size 1× 1 - directly after the input and then another convolutional layer with 3 filters of size 1× 1 acts as a learned transformation in another color space and boosts the accuracy. @@ -3349,7 +3310,6 @@ stride 2. This approach was evaluated on CIFAR-100 by replacing all max pooling layers with the 3× 3 kernel max pooling (and SAME padding). The mean accuracy of 10 models was 63.32 % - (−0.06) and the standard deviation was 0.57 (+0.02). The ensemble achieved 65.15 % test accuracy (+0.45). @@ -3445,12 +3405,9 @@ x + 1) =  -−x - -2 + 1 if x ≤ −2 +−x2 + 1 if x ≤ −2 x if − 2 ≤ x ≤ 2 - x 2 + 1 if x > −2 @@ -3495,29 +3452,17 @@ Single model Ensemble of 10 Training set Test set Training set Test set Identity 66.25 % σ = 0.77 56.74 % σ = 0.51 68.77 % 58.78 % - Logistic 51.87 % σ = 3.64 46.54 % σ = 3.22 61.19 % 54.58 % - Logistic− 66.49 % σ = 1.99 57.84 % σ = 1.15 69.04 % 60.10 % - Softmax 75.22 % σ = 2.41 59.49 % σ = 1.25 78.87 % 63.06 % - Tanh 67.27 % σ = 2.38 55.70 % σ = 1.44 70.21 % 58.10 % - Softsign 66.43 % σ = 1.74 55.75 % σ = 0.93 69.78 % 58.40 % - ReLU 78.62 % σ = 2.15 62.18 % σ = 0.99 81.81 % 64.57 % - ReLU− 76.01 % σ = 2.31 62.87 % σ = 1.08 78.18 % 64.81 % - Softplus 66.75 % σ = 2.45 56.68 % σ = 1.32 71.27 % 60.26 % - S2ReLU 63.32 % σ = 1.69 56.99 % σ = 1.14 65.80 % 59.20 % - LReLU 74.92 % σ = 2.49 61.86 % σ = 1.23 77.67 % 64.01 % - PReLU 80.01% σ = 2.03 62.16 % σ = 0.73 83.50% 64.79% - ELU 76.64 % σ = 1.48 63.38% σ = 0.55 78.30 % 64.70 % Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation @@ -3531,43 +3476,22 @@ Mean total 1 Image 128 time training time -Identity 8 ms 42 ms 31 s -epoch 108 – 148 3629 s - -Logistic 6 ms 31ms 24 s -epoch 101 – 167 2234 s - +Identity 8 ms 42 ms 31 sepoch 108 – 148 3629 s +Logistic 6 ms 31ms 24 sepoch 101 – 167 2234 s Logistic− 6 ms 31ms 22 s -epoch +epoch 133 – 255 3421 s -Softmax 7 ms 37 ms 33 s -epoch 127 – 248 5250 s - -Tanh 6 ms 31ms 23 s -epoch 125 – 211 3141 s - -Softsign 6 ms 31ms 23 s -epoch 122 – 205 3505 s - -ReLU 6 ms 31ms 23 s -epoch 118 – 192 3449 s - -Softplus 6 ms 31ms 24 s -epoch 101 – 165 2718 s - -S2ReLU 5ms 32 ms 26 s -epoch 108 – 209 3231 s - -LReLU 7 ms 34 ms 25 s -epoch 109 – 198 3388 s - -PReLU 7 ms 34 ms 28 s -epoch 131 – 215 3970 s - -ELU 6 ms 31ms 23 s -epoch 146 – 232 3692 s +Softmax 7 ms 37 ms 33 sepoch 127 – 248 5250 s +Tanh 6 ms 31ms 23 sepoch 125 – 211 3141 s +Softsign 6 ms 31ms 23 sepoch 122 – 205 3505 s +ReLU 6 ms 31ms 23 sepoch 118 – 192 3449 s +Softplus 6 ms 31ms 24 sepoch 101 – 165 2718 s +S2ReLU 5ms 32 ms 26 sepoch 108 – 209 3231 s +LReLU 7 ms 34 ms 25 sepoch 109 – 198 3388 s +PReLU 7 ms 34 ms 28 sepoch 131 – 215 3970 s +ELU 6 ms 31ms 23 sepoch 146 – 232 3692 s Table 5.12.: Training time and inference time of adjusted baseline models trained with different activation functions on GTX 970 GPUs on CIFAR-100. It was expected that the @@ -3642,7 +3566,6 @@ could be relaxed. 10 models msmooth are trained with the α = 0.5 smoothed labels from the prediction of an ensemble of 10 baseline models. The mean accuracy of the models trained on the smoothed training set labels was 63.61 % (+0.23 %) and the standard deviation was σ = 0.72 - (+0.17 %). The ensemble of 10 msmooth models achieved 64.79 % accuracy (+0.09 %). Hence the effect of this kind of label smoothing on the final accuracy is questionable. @@ -3730,7 +3653,6 @@ block added. This is the framed part in the table. 5.15. Optimized Classifier 32× 32 - Input C 69@3× 3/1 @@ -3742,7 +3664,6 @@ C 69@3× 3/1 BN + ELU 16× 16 - max pooling 3× 3/2 C 64@3× 3/1 @@ -3754,7 +3675,6 @@ C 64@3× 3/1 BN + ELU 8× 8 - max pooling 3× 3/2 C 64@3× 3/1 @@ -3762,7 +3682,6 @@ C 64@3× 3/1 BN + ELU 4× 4 - max pooling 3× 3/2 C* 512@4× 4/1 (V) @@ -3772,7 +3691,6 @@ BN + ELU Dropout, p = 0.5 1× 1 - C* 512@1× 1/1 BN + ELU @@ -3788,8 +3706,7 @@ BN + Softmax Figure 5.16.: Architecture of the optimized model. C 32@3 × 3/1 is a convolutional layer with 32 filters of kernel size 3× 3 with stride 1. The * indicates that no bias is used. -Dataset Single Model Accuracy Ensemble of 10 -Training Set Test Set Training Set Test Set +Dataset Single Model Accuracy Ensemble of 10Training Set Test Set Training Set Test Set Asirra 95.83 % σ = 4.70 90.75 % σ = 4.73 98.78 % 93.09 % CIFAR-10 94.58 % σ = 0.70 87.92 % σ = 0.46 96.47 % 89.86 % @@ -3806,8 +3723,7 @@ CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the models uses unlabeled data or data from other datasets. For MNIST, GTSRB, SVHN and HASY, no test time transformations are used. -Network GPU Tensorflow Inference per Training -1 Image 128 images time / epoch +Network GPU Tensorflow Inference per Training1 Image 128 images time / epoch Optimized Default Intel i7-4930K 5 ms 432 ms 386 s Optimized Optimized Intel i7-4930K 4 ms 307 ms 315 s @@ -3880,8 +3796,7 @@ pattern, the number of epochs increases with lower model regularization (see Tab 5.17. Regularization -Dataset Early Stopping Fixed epochs -val. acc train loss +Dataset Early Stopping Fixed epochsval. acc train loss Asirra 93.09 % 96.01 %3 96.01 % CIFAR-10 89.86 % 91.75 % 88.88 % @@ -4011,7 +3926,6 @@ from 81.00 % [Tho17a] to 85.92 %, for GTSRB the state of the art was improved fr This was mainly achieved by the combination of ELU, Dropout, ensembles, training data augmentation and test-time transformations. The removal of the bias of layers close to the output and re-usage of those parameters in layers close to the input as well as using 3× 3 - pooling instead of 2× 2 pooling improved the baseline. While writing this masters thesis, several related questions could not be answered: @@ -4061,8 +3975,7 @@ A. Figures, Tables and Algorithms Figure A.1.: Examples of image filters. Best viewed in electronic form. -Layer 99-percentile interval -filter bias +Layer 99-percentile intervalfilter bias 1 [-0.50, 0.48] [-0.06, 0.07] 3 [-0.21, 0.19] [-0.07, 0.07] @@ -4085,33 +3998,26 @@ CIFAR-100. Algorithm 1 Simulated Annealing for minimizing Equation (4.1). Require: C ∈ Nn×n, steps ∈ N, T ∈ R+, c ∈ (0, 1) - procedure SimulatedAnnealing(C, steps, T , c) -bestScore← accuracy(C) +bestScore← accuracy(C) bestC← C - for i = 0; i < steps; i← i+ 1 do -p← randomFloat(0, 1) +p← randomFloat(0, 1) if p < 0.5 then . Swap rows -i← randomInteger(1, . . . , n) +i← randomInteger(1, . . . , n) j ← randomInteger(1, . . . , n) \ { i } p← randomUniform(0, 1) - C ′ ← swap(C, i, j) - s← accuracy(C ′) - -if p < exp( s−bestScore -T ) then +if p < exp( s−bestScoreT ) then C ← C ′ if s > bestScore then bestScore← s - bestC← C T ← T · c @@ -4284,13 +4190,12 @@ same idea, that unit-variance is desired for each layer as the training converge Name α β γ Reference Constant α = 0 β = 0 γ ≥ 0 used by [ZF14] - Xavier/Glorot uniform α = -√ +√ 6 -nin+nout +nin+nout β = 0 γ = 0 [GB10] Xavier/Glorot normal α = 0 β = @@ -4302,10 +4207,7 @@ Xavier/Glorot normal α = 0 β = )2 γ = 0 [GB10] -He α = 0 β = 2 -nin - -γ = 0 [HZRS15b] +He α = 0 β = 2nin γ = 0 [HZRS15b] Orthogonal — — γ = 0 [SMG13] LSUV — — γ = 0 [MM15] @@ -4327,7 +4229,9 @@ x∈X K∑ k=1 -[txk log(oxk) + (1− txk) log(1− oxk)] +[txk log(o +x +k) + (1− txk) log(1− oxk)] is by far the most commonly used objective function (e.g., used by [ZF14]). In this equation, X is the set of training examples, K is the number of classes, txk ∈ { 0, 1 } indicates if the @@ -4351,7 +4255,8 @@ B.5. Optimization Techniques Most relevant optimization techniques for CNNs are based on SGD, which updates the weights according to the rule -wji ← wji + ∆wji with ∆wji = −η ∂Ex +wji ← wji + ∆wji with ∆wji = −η +∂Ex ∂wji where η ∈ (0, 1), typically 0.01 (e.g., [MSM16]), is called the learning rate. @@ -4362,7 +4267,8 @@ lead to sharp minima and thus poor generalization [KMN+16]. Smaller mini-batch s lead to longer training times due to computational overhead and to more training steps due to gradient noise. -wji ← wji + ∆wji with ∆wji = −η∂EB +wji ← wji + ∆wji with ∆wji = −η +∂EB ∂wji Nine variations which adjust the learning rate during training are: @@ -4380,11 +4286,12 @@ ji + ∆w ji with ∆w (t+1) -ji = −η∂EB +ji = −η +∂EB ∂wji -+ α∆w ++ α∆w (t) ji @@ -4407,9 +4314,7 @@ t k where t ∈ N0 is the training step, η(0) is the initial learning rate, k ∈ N≥1 is the number of training steps -until the learning rate is decreased by 1 - -10th. +until the learning rate is decreased by 110th. • Newbob Scheduling [new00]: Start with Performance Scheduling, then use Exponential Decay Scheduling. @@ -4467,85 +4372,65 @@ Name Function ϕ(x) Range of Values ϕ′(x) Used by Sign function† -+1 if x ≥ 0 - -−1 if x < 0 -{ −1, 1 } 0 [KS02] - ++1 if x ≥ 0−1 if x < 0 { −1, 1 } 0 [KS02] Heaviside step function† -+1 if x > 0 - -0 if x < 0 -{ 0, 1 } 0 [MP43] - ++1 if x > 00 if x < 0 { 0, 1 } 0 [MP43] Logistic function 1 -1+e−x [0, 1] ex + +1+e−x [0, 1] +ex (ex+1)2 [DJ99] -Tanh ex−e−x +Tanh e +x−e−x +ex+e−x = tanh(x) [−1, 1] sech -ex+e−x = tanh(x) [−1, 1] sech2(x) [LBBH98, Tho14a] +2(x) [LBBH98, Tho14a] ReLU† max(0, x) [0,+∞) -1 if x > 0 - -0 if x < 0 -[KSH12] - +1 if x > 00 if x < 0 [KSH12] LReLU†2 (PReLU) ϕ(x) = max(αx, x) (−∞,+∞) -1 if x > 0 - -α if x < 0 -[MHN13, HZRS15b] +1 if x > 0α if x < 0 [MHN13, HZRS15b] +Softplus log(ex + 1) (0,+∞) exex+1 [DBB -Softplus log(ex + 1) (0,+∞) ex - -ex+1 [DBB+01, GBB11] ++01, GBB11] ELU -x if x > 0 - -α(ex − 1) if x ≤ 0 -(−∞,+∞) - -1 if x > 0 +x if x > 0α(ex − 1) if x ≤ 0 (−∞,+∞) +1 if x > 0αex otherwise [CUH15] -αex otherwise -[CUH15] +Softmax‡ o(x)j = e +xj∑K -Softmax‡ o(x)j = exj∑K k=1 e - xk -[0, 1]K o(x)j · +[0, 1]K o(x)j · ∑K -k=1 e -xk−exj∑K k=1 e +xk−exj∑K +k=1 e xk + [KSH12, Tho14a] Maxout‡ o(x) = maxx∈x x (−∞,+∞) -1 if xi = maxx - -0 otherwise -[GWFM+13] - +1 if xi = maxx0 otherwise [GWFM+13] Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0 + and functions marked with ‡ operate on all elements of a layer simultaneously. The hyperparameters α ∈ (0, 1) of Leaky ReLU and ELU are typically α = 0.01. Other activation function like randomized leaky ReLUs exist [XWCL15], but are far less @@ -4578,17 +4463,20 @@ as it produces a probability distribution. See Figure B.1 for a plot of some of x y -ϕ1(x) = 1 +ϕ1(x) = +1 1+e−x ϕ2(x) = tanh(x) ϕ3(x) = max(0, x) -ϕ4(x) = log(ex + 1) +ϕ4(x) = log(e +x + 1) -ϕ5(x) = max(x, ex − 1) +ϕ5(x) = max(x, e +x − 1) Figure B.1.: Activation functions plotted in [−2,+2]. tanh and ELU are able to produce negative numbers. The image of ELU, ReLU and Softplus is not bound on the positive side, @@ -4638,11 +4526,12 @@ has ki · ki−1(n ·m+ 1) parameters. The +1 is due to the bias. n · (k ·m1 ·m2 + 1) parameters. • A dense block with a depth of L, a growth rate of n and 3× 3 filters has L+ n · 32 + - 32 · n2 + ∑L +i=0(L− i) = L+ 9n+ 9n2 -i=0(L− i) = L+ 9n+ 9n2L2−L +L2−L 2 parameters. According to [HPTD15], AlexNet has 60 million parameters which is roughly the number @@ -4658,7 +4547,6 @@ simplicity, nϕ = 5 was chosen. • A fully connected layer with n nodes and k inputs has to calculate ϕ(W · x+ b) with W ∈ Rn×k, x ∈ Rk×1, b ∈ Rn×1. It hence needs about n · (k + (k − 1) + 1) = 2nk - additions / multiplications before the non-linearity ϕ is calculated. The total number of FLOPs is 2 · n · k + n · nϕ. @@ -4666,7 +4554,6 @@ of FLOPs is 2 · n · k + n · nϕ. being applied to ki−1 filter maps of size w× h results in ki filter maps of size w× h if padding is applied. For each element of each filter map, n ·m ·ki−1 multiplications and (n ·m · ki−1 − 1) additions have to be made. This results in (2nmki−1 − 1) · (ki ·w · h) - operations. The total number of FLOPs is (2 ·n ·m ·ki−1−1) · (ki ·w ·h)+ki ·w ·h ·nϕ. This is, of course, a naive way of calculating a convolution. There are other ways of calculating convolutions [LG16]. @@ -4676,8 +4563,8 @@ calculating convolutions [LG16]. • A fully connected layer with n nodes after k feature maps of size w×h needs 2n(k ·w ·h) - FLOPs. The total number of FLOPs is 2n · (k · w · h) + n · nϕ. + • As Dropout is only calculated during training, the number of FLOPs was set to 0. • The number of FLOPs for max pooling is dominated by the number of positions to which the pooling kernel is applied. For a feature map of size w × h a max pooling @@ -4790,9 +4677,7 @@ D.2. AlexNet The first CNN which achieved major improvements on the ImageNet dataset was AlexNet [KSH12]. Its architecture is shown in Figure D.2 and described in Table D.2. It has about 60·106 param- eters. A trained AlexNet can be downloaded at www.cs.toronto.edu/g̃uerzhoy/tf_alexnet. -Note that the uncompressed size is at least 60 965 224 floats · 32 bit - -float ≈ 244 MB. +Note that the uncompressed size is at least 60 965 224 floats · 32 bitfloat ≈ 244 MB. Figure D.2.: Architecture of AlexNet as shown in [KSH12]: Convolutional Layers are followed by pooling layers multiple times. At the end, a fully connected network is applied. @@ -4841,14 +4726,11 @@ D.3. VGG-16 D Another widespread architecture is the VGG-16 (D) [SZ14]. VGG comes from the Visual Geometry Group in Oxford which developed this architecture. It has 16 layers which can learn parameters. A major difference compared to AlexNet is that VGG-16 uses only 3× 3 - filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a detailed textual description is given in Table D.3. A trained VGG-16 D for Tensorflow can be downloaded at https://github.com/machrisaa/ -tensorflow-vgg. Note that the uncompressed size is at least 138 357 544 floats · 32 bit - -float ≈ +tensorflow-vgg. Note that the uncompressed size is at least 138 357 544 floats · 32 bitfloat ≈ 520 MB. The downloaded Numpy binary file npz needs 553 MB without compression and 514 MB with compression. @@ -5067,38 +4949,21 @@ CIFAR-10 32 px× 32 px 60 000 10 3 [Kri, KH09] CIFAR-100 32 px× 32 px 60 000 100 3 [Kri, KH09] STL-10 96 px× 96 px 13 000 10 3 [CLN11, CLN10] -Caltech-101 (80 px− 3481 px) -×(92 px− 3999 px) - -9144 102 3 [FFP03, FFFP06] +Caltech-101 (80 px− 3481 px)×(92 px− 3999 px) 9144 102 3 [FFP03, FFFP06] -Caltech-256 (75 px− 7913 px) -×(75 px− 7913 px) +Caltech-256 (75 px− 7913 px)×(75 px− 7913 px) 30 607 257 3 [Gri06, GG07] -30 607 257 3 [Gri06, GG07] +ILSVRC 20121 (8 px− 9331 px)×(10 px− 6530 px) 1.2 · 10 +6 1000 3 [Ima12, RDS+14] -ILSVRC 20121 (8 px− 9331 px) -×(10 px− 6530 px) +Places3652 (290px− 3158px)×(225px− 2630px) 1.8 · 10 +6 365 3 [Zho16, ZKL+16] -1.2 · 106 1000 3 [Ima12, RDS+14] +GTSRB (25 px− 266 px)×(25 px− 232 px) 51 839 43 3 [SSSI, SSSI12] -Places3652 (290px− 3158px) -×(225px− 2630px) +Asirra3 (4 px− 500 px)×(4 px− 500 px) 25 000 2 3 [Asi17, EDHS07] -1.8 · 106 365 3 [Zho16, ZKL+16] - -GTSRB (25 px− 266 px) -×(25 px− 232 px) - -51 839 43 3 [SSSI, SSSI12] - -Asirra3 (4 px− 500 px) -×(4 px− 500 px) - -25 000 2 3 [Asi17, EDHS07] - -Graz-02 480 px× 640 px -and 640 px× 480 px 1096 3 3 [Mar08, MS07] +Graz-02 480 px× 640 pxand 640 px× 480 px 1096 3 3 [Mar08, MS07] Table E.1.: An overview over publicly available image databases for classification. The number of images row gives the sum of the training and the test images. Some datasets, like @@ -5136,20 +5001,16 @@ Require: Semantic segmentation dataset (DS) procedure CreateDataset(Annotated dataset DS) DC ← List - w ← desired image width h← desired image height for Image and associated label (x, y) in DS do i← randint(0, L.width− w) - j ← randint(0, L.height− h) - cL ← crop(y, (i, j), (i+ w, j + h)) - if at least 50% of s are of one class then -cI ← crop(x, (i, j), (i+ w, j + h)) +cI ← crop(x, (i, j), (i+ w, j + h)) D.append((cI , cL)) return (DC) diff --git a/read/results/tika/2201.00021.txt b/read/results/tika/2201.00021.txt index 43b8728..7fd9f6b 100644 --- a/read/results/tika/2201.00021.txt +++ b/read/results/tika/2201.00021.txt @@ -18,6 +18,27 @@ + + + + + + + + + + + + + + + + + + + + + @@ -233,7 +254,6 @@ position switching mode, and the off position was 10′ in azimuth away from the source. For observations made before 2021 Au- gust, we used a spectrometer that covered 2 GHz wide backends with a channel width of 38.1 kHz, corresponding to ∼0.62 km s−1 - at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar 1975). A high spectral resolution backend with 65536 channels and a bandwidth of 300 MHz was employed in 2021 August, @@ -540,7 +560,6 @@ NH3 column density of ∼5×1015 cm−2 was estimated for a region of 3′′ around HW2 (Torrelles et al. 1999). This high NH3 abun- dance could provide a suitable environment for maser species. Large line widths (∆V1/2 '7.0 km s−1) with VLSR ∼ −10 km s−1 - in both (1,1) and (2,2) lines were found toward HW2 (Torrelles et al. 1993). The velocity is similar to the cloud’s systemic lo- cal standard of rest (LSR) velocity of −11.2 km s−1, which @@ -562,7 +581,6 @@ lines with the kinetic temperature reveals the size of the hot, ammonia-emitting core to be only ∼2.5′′. All those measured NH3 lines were quasi-thermal and had LSR velocities of ∼ 58.5 km s−1, close to the systemic velocity of ∼ 58.1 km s−1 - obtained from C17O observations (Wyrowski et al. 2012). Their line widths (∆V1/2 ≥3.6 km s−1) are larger than what we find (0.35 km s−1 ≤ ∆V1/2 ≤ 0.94 km s−1) for each (9,6) @@ -621,7 +639,6 @@ is powered by continuum source C or by an outflow. Near com- ponent B, there are some OH and CH3OH masers but no H2O or NH3 masers. A group of H2O masers, well-known tracers of outflows, with a large velocity distribution of 43 km s−1 ≤ - VLSR ≤54 km s−1, was found to the west of the centimeter- continuum source A and close to the peak of the millimeter- continuum emission (see details in our Fig. A.2 and also in Fig. 5 @@ -931,10 +948,9 @@ detected NH3 (9,6) emissions. Table A.2. 1.36 cm JVLA flux densities of individual continuum sources. Source R.A. Dec. Size P.A. S ν - (h m s) (◦ ′ ′′) (arcsec) (deg) (mJy) -Cep A HW2 22 56 17.972 ± 0.003 +62 01 49.587 ± 0.015 (0.45 ± 0.19) × (0.22 ± 0.10) 50.0 20.2 ± 1.4 +Cep A HW2 22 56 17.972 ± 0.003 +62 01 49.587 ± 0.015 (0.45 ± 0.19) × (0.22 ± 0.10) 50.0 20.2 ± 1.4 HW3a 22 56 17.420 ± 0.022 +62 01 44.576 ± 0.076 (2.35 ± 0.45) × (0.55 ± 0.14) 66.6 4.75 ± 0.74 HW3b 22 56 17.578 ± 0.009 +62 01 45.041 ± 0.043 (1.43 ± 0.24) × (0.45 ± 0.10) 59.9 3.19 ± 0.36 HW3c 22 56 17.956 ± 0.016 +62 01 46.224 ± 0.038 (1.44 ± 0.37) × (0.36 ± 0.19) 86.0 9.90 ± 1.7 diff --git a/read/results/tika/2201.00022.txt b/read/results/tika/2201.00022.txt index ed7049d..f41aedf 100644 --- a/read/results/tika/2201.00022.txt +++ b/read/results/tika/2201.00022.txt @@ -34,7 +34,28 @@ -Draft version January 4, 2022 + + + + + + + + + + + + + + + + + + + + + +Draft version July 7, 2022 Typeset using LATEX twocolumn style in AASTeX631 The Formation of Intermediate Mass Black Holes in Galactic Nuclei @@ -50,27 +71,36 @@ ABSTRACT Most stellar evolution models predict that black holes (BHs) should not exist above approximately -50−70 M�. However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and +50− 70 M�, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections +indicate the existence of BHs with masses at and above this threshold. We suggest that massive + +BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions + +between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical + +processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite -above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs), +efficient, forming IMBHs as massive as 104 M�. This upper limit assumes that (1) the BHs accrete a -can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding +substantial fraction of the stellar mass captured during each collision and (2) that the rate at which -main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relax- +new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar -ation, we find that this channel can be quite efficient, forming IMBHs as massive as 104 M�. Our +disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our -results suggest that massive black holes and IMBHs may be ubiquitous in galactic centres. This for- +results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic -mation channel also has implications for observations. Collisions between stars and BHs can produce +centers. This formation channel has implications for observations. Collisions between stars and BHs -electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. Additionally, +can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. -formed through this channel, both black holes in the mass gap and IMBHs can merge with the super- +Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge -massive black hole at the center of a galactic nucleus through gravitational waves. These gravitational +with the supermassive black hole at the center of a galactic nucleus through gravitational waves. -wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively). +These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, + +respectively). 1. INTRODUCTION @@ -81,16 +111,15 @@ GW190521 (The LIGO Scientific Collaboration et al. 2020a,b) produced an intermediate mass black hole of approximately 142 M�. This event may have also had a - 85 M� progenitor, which falls within the pair-instability mass gap that limits stellar black holes (BHs) to no more than ∼< 50 M� (e.g., Heger et al. 2003; Woosley - 2017)1. Similarly, the merger products of GW150914, GW170104, and GW170814 fall within the mass gap + (e.g., Abbott et al. 2016, 2017a,b). BH mergers that form second generation BHs and, in some cases, inter- @@ -99,16 +128,6 @@ mediate mass BHs (IMBHs), these gravitational wave (GW) events can occur in globular clusters, young stel- -lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro- - -driguez et al. 2019; Fishbach et al. 2020; Mapelli et al. - -2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. - -2021; Arca Sedda et al. 2021). However, IMBHs are - -not limited to these locations and may reside in galac- - Corresponding author: Sanaea C. Rose srose@astro.ucla.edu @@ -118,6 +137,15 @@ metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli 2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski et al. 2020a; Renzo et al. 2020; Vink et al. 2021). +lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro- + +driguez et al. 2019; Fishbach et al. 2020; Mapelli et al. + +2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. + +2021; Arca Sedda et al. 2021). However, IMBHs are + +not limited to these locations and may reside in galac- tic nuclei as well. Several studies propose that our own galactic center may host an IMBH in the inner pc @@ -152,26 +180,6 @@ Ferrara et al. 2014; Choi et al. 2015; Shlosman et al. vive galaxy evolution and mergers to present day (e.g., -Rashkov & Madau 2014), with significant effects on their - -stellar and even dark matter surroundings (e.g., Bertone - -et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda - -et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another - -popular formation channel relies on the coalescence of - -many stellar-mass black holes. For example, IMBHs - -may form in the centers of globular clusters, where few- - -body interactions lead to the merger of stellar-mass BHs - -(e.g., O’Leary et al. 2006; Gürkan et al. 2006; Blecha - -et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro- - ar X @@ -185,7 +193,7 @@ iv 02 2v -1 +2 [ as @@ -199,33 +207,55 @@ ph A ] - 3 -1 - -D -ec + 6 + J +ul 2 -02 -1 +02 +2 mailto: srose@astro.ucla.edu 2 Rose et al. +Rashkov & Madau 2014), with significant effects on their + +stellar and even dark matter surroundings (e.g., Bertone + +et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda + +et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another + +popular formation channel relies on the coalescence of + +many stellar-mass black holes, which may seed objects + +as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs + +may form in the centers of globular clusters, where few- + +body interactions lead to the merger of stellar-mass BHs + +(e.g., O’Leary et al. 2006; Gürkan et al. 2006; Blecha + +et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro- + driguez et al. 2018; Rodriguez et al. 2019; Fragione et al. 2020b). Other formation mechanisms invoke successive -collisions and mergers of massive stars (e.g., Portegies +collisions and mergers of massive stars (e.g., Ebisuzaki + +et al. 2001; Portegies Zwart & McMillan 2002; Portegies -Zwart & McMillan 2002; Portegies Zwart et al. 2004; +Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017; -Freitag et al. 2006; Kremer et al. 2020; González et al. +Kremer et al. 2020; González et al. 2021; Di Carlo et al. -2021; Di Carlo et al. 2021). +2021; Das et al. 2021a,b; Escala 2021). The main obstacle to sequential BH mergers in clus- @@ -254,30 +284,51 @@ back towards the cluster center over a dynamical fric- tion timescale. Using this approach, they showed that 103 − 104 M� IMBHs can form efficiently over the life- - time of a cluster. -However, as discussed in Section 2.2, direct star-BH +However, as discussed in Section 2.2, direct BH-star collisions are much more frequent than BH-BH collision in galactic nuclei, making the former a promising chan- -nel for BH growth. We propose that IMBHs can form +nel for BH growth. In an N-body study of young star + +clusters, Rizzuto et al. (2022) find that BH-star colli- + +sions are a main contributor to the formation of BHs + +in the mass gap and IMBHs. In a similar vein, Stone + +et al. (2017) demonstrate that massive BHs can form + +from repeated tidal encounters between stars and BHs. -naturally within the central pc of a SMBH in a galactic +More generally, several studies have explored the role of -center. Specifically, these IMBHs form through repeated +collisions in a GN, with implications for the stellar and -collisions with main sequence stars, accreting some or +red giant populations (e.g., Dale & Davies 2006; Dale -all of the star’s mass depending on the details of the +et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti -collision. We demonstrate that this channel can create +et al. 2021). We propose that IMBHs can form naturally -IMBHs with masses as large as 104 M�, depending on +within the central pc of a galactic center through re- -the density profile of the surrounding stars. +peated collisions between BHs and main sequence stars. + +During a collision, the BH can accrete some portion of + +the star’s mass. Over many collisions, it can grow ap- + +preciably in size. We demonstrate that this channel can + +create IMBHs with masses as large as 104 M�, an upper + +limit that depends on the density profile of the surround- + +ing stars and the efficiency of the accretion. The paper is structured as follows: we describe rele- @@ -295,15 +346,15 @@ solutions to our equations in two different regimes, ef- ficient collisions and inefficient collisions We compare -these solutions to our statistical results. Sections 2.5 +these solutions to our statistical results. Sections 2.6 -and 2.7 discuss implications for GW merger events be- +and 2.8 discuss implications for GW merger events be- tween IMBHs and the SMBH. We then incorporate re- laxation processes and discuss the subsequent results in -Section 2.8. Finally, we discuss and summarize our find- +Section 2.9. Finally, we discuss and summarize our find- ings in Section 3. @@ -332,7 +383,6 @@ sume that the BH mass distribution follows that of the stars from which they originate, a Kroupa initial mass function dN/dm ∝ m−2.35. While this choice represents - a gross oversimplification, it has very little bearing on our final results. Future work may address the particu- @@ -372,7 +422,21 @@ Otherwise, the innermost region of the GN would be poorly represented in our sample. We consider other -observationally motivated distributions in Section 2.8, + + +IMBH Formation in Galactic Nuclei 3 + +Figure 1. We plot the relevant timescales, including col- +lision (green), relaxation (gold), and BH-BH GW capture +(purple), for a single BH in the GN as a function of distance +from the SMBH. For the collision timescale, we assume the +BH is on a circular orbit. The timescales depend on the +density, so we adopt a range of density profiles, bounded by +α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark +blue line represents the time for a 105 M� BH to merge with +the SMBH through GW emission. + +observationally motivated distributions in Section 2.9, but reserve a more detailed examination of the distribu- @@ -384,8 +448,7 @@ BHs in the GN can undergo direct collisions with other objects. The timescale for this process, tcoll, can be es- -timated using a simple rate calculation: t−1 -coll = nσA, +timated using a simple rate calculation: t−1coll = nσA, where n is the number density of objects, σ is the ve- @@ -393,14 +456,15 @@ locity dispersion, and A is the cross-section. We use the collision timescale from Rose et al. (2020): -t−1 -coll =πn(a•)σ(a•) +t−1coll =πn(a•)σ(a•) × ( -f1(e•)r2 +f1(e•)r +2 c + f2(e•)rc + 2G(mBH +m?) σ(a•)2 @@ -422,20 +486,6 @@ the collision rate, while n and σ are simply evaluated at the semimajor axis of the orbit (see below). Note - - -IMBH Formation in Galactic Nuclei 3 - -Figure 1. We plot the relevant timescales, including col- -lision (green), relaxation (gold), and BH-BH GW capture -(purple), for a single BH in the GN as a function of distance -from the SMBH. For the collision timescale, we assume the -BH is on a circular orbit. The timescales depend on the -density, so we adopt a range of density profiles, bounded by -α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark -blue line represents the time for a 105 M� BH to merge with -the SMBH through GW emission. - that this timescale equation includes the effects of grav- itational focusing, which enhances the cross-section of @@ -461,19 +511,16 @@ r• r0 )−α - , (2) where r• denotes the distance from the SMBH. We adopt a SMBH mass of 4× 106 M� such that our fiducial GN - matches our own galactic center (e.g., Ghez et al. 2005; Genzel et al. 2003). In this case, the normalization in Eq. (2) is ρ0 = 1.35× 106M�/pc3 at r0 = 0.25 pc (Gen- - zel et al. 2010). Additionally, in Eq. (2), α gives the slope of the power law. We assume that a uniform pop- @@ -488,9 +535,6 @@ n(r•) = 1M� . (3) -2 We note that the eccentricity has a very minor effect on the -collision timescale (Rose et al. 2020). - The collision timescale also depends on the velocity dis- persion, which we express as: @@ -518,7 +562,6 @@ and a star, and the former has a much smaller physi- cal cross-section. For example, the Schwarzschild radius of a 10M� BH is only 30 km, or 4.31 × 10−5R�. For - this reason, direct collisions between compact objects are very rare and not included in our model. @@ -549,6 +592,13 @@ lisions will be the main driver of IMBH growth in the GN. +2 We note that the eccentricity has a very minor effect on the +collision timescale (Rose et al. 2020). + + + +4 Rose et al. + 2.3. Statistical Approach to Collisions We simulate the mass growth of a population of BHs @@ -583,16 +633,6 @@ the updated BH mass and repeat this process until the time elapsed equals the simulation time of 10 Gyr3. -3 Closer to the SMBH, ∆t may exceed the collision timescale by -a factor of a few for steep density profiles. We include a safe- -guard in our code which takes the ratio tcoll/∆t and rounds it -to the nearest integer. We take this integer to be the number of -collisions and increase the BH mass accordingly. - - - -4 Rose et al. - 2.4. Mass Growth When a BH collides with a star, it may accrete ma- @@ -613,10 +653,9 @@ outermost point, its surface, which corresponds to the maximum impact parameter 1 R�. Qualitatively, one -might expect that the BH could accrete the entire star +might expect that the BH could capture the entire star (i.e., ∆m ∼ 1 M�) if the relative velocity is smaller than - the escape velocity from the BH at this point. However, in the vicinity of the SMBH, the dispersion velocity of @@ -625,26 +664,52 @@ the stars may be much larger than the escape velocity from the BH at the star’s surface. In this case, the BH -accretes a “tunnel” of material through the star. This +captures a “tunnel” of material through the star. This tunnel has radius equal to the Bondi radius and length -approximately 1R�. +approximately 1R�. For the purposes of this study, we + +assume that the BH accretes all of the material that + +it captures. The details of the accretion are uncertain, + +however, and it may be much less efficient than our re- + +sults imply. We discuss accretion in Section 2.5. To estimate ∆m, we begin with the Bondi-Hoyle ac- cretion rate, ṁ, given by: ṁ = -4πG2m2 +4πG2m2BHρstar -BHρstar +(c2s + σ +2) -(c2s + σ2) 3/2 - , (5) +3 Closer to the SMBH, ∆t may exceed the collision timescale by +a factor of a few for steep density profiles. We include a safe- +guard in our code which takes the ratio tcoll/∆t and rounds it +to the nearest integer. We take this integer to be the number of +collisions and increase the BH mass accordingly. + +Figure 2. We consider an example that highlights the mass +growth as a function of distance from the SMBH. Grey dots +represent the initial masses and distances from the SMBH +of the BHs involved in the simulation. For simplicity, we set +the inital mass equal to 10M� for all of the BHs. Assuming +the density profile of stars has α = 1, we consider two cases: +BHs accrete all of the star’s mass during a collision (red) and +only a portion of the star’s mass is accreted during a collision +given by Eq. 6 (blue). The latter case results in less growth +closer to the SMBH where the velocity dispersion becomes +high. The shaded regions and dashed lines represent the +analytical predictions detailed in Section 2.4. + where cs is the speed of sound in the star and ρstar is its density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima @@ -655,7 +720,8 @@ approximate the density as 1M�/(4πR 3 �/3) and take -the conservative value of cs = 500 km s−1, which is +the conservative value of cs = 500 km s +−1, which is consistent with the sound speed inside a 1 M� star @@ -668,9 +734,7 @@ have: ∆m = min(ṁ× t?,cross, 1 M�) , (6) where t?,cross ∼ R�/σ is the crossing time of the BH in - the star. We take the minimum between ṁ× t?,cross and - 1 M� because the BH cannot accrete more mass than one star at each collision. @@ -701,20 +765,11 @@ tions assume α = 1 for the stellar density profile, ensur- ing the collision timescale is long compared to the sim- -ulation time, 10 Gyr. Therefore, the BHs grow slowly, -Figure 2. We consider an example that highlights the mass -growth as a function of distance from the SMBH. Grey dots -represent the initial masses and distances from the SMBH -of the BHs involved in the simulation. For simplicity, we set -the inital mass equal to 10M� for all of the BHs. Assuming -the density profile of stars has α = 1, we consider two cases: -BHs accrete all of the star’s mass during a collision (red) and -only a portion of the star’s mass is accreted during a collision -given by Eq. 6 (blue). The latter case results in less growth -closer to the SMBH where the velocity dispersion becomes -high. The shaded regions and dashed lines represent the -analytical predictions detailed in Section 2.4. + +IMBH Formation in Galactic Nuclei 5 + +ulation time, 10 Gyr. Therefore, the BHs grow slowly, and their final masses can be approximated using the @@ -772,10 +827,6 @@ collision timescale is shorter, corresponding to a larger index α in the density profile (see Figure 1), the growth - - -IMBH Formation in Galactic Nuclei 5 - is very efficient and ∆m quickly approaches 1 M�. Con- sequently, while we can now assume ∆m = 1 M�, we @@ -798,116 +849,205 @@ example, we plot this curve in purple for the α = 2 case, in Figure 3, which agrees with the simulated masses. -2.5. GW Inspiral +2.5. Uncertainties in Accretion -When a BH is close to the SMBH, GW emission can +We note that the ∆M calculated in this proof-of- -circularize and shrink its orbit. We implement the ef- +concept study assumes that the BH accretes all of the -fects of GW emission on the BH’s semimajor axis and +material that it captures. Estimating the true fraction -eccentricity following Peters & Mathews (1963a). The +of the material accreted by the BH is very challeng- -characteristic timescale to merge a BH with an SMBH +ing; this complex problem requires numerically solving -is given by: +the generalized GR fluid equations with cooling, heat- -tGW ≈2.9× 1012 yr +ing, and radiative transfer, etc. and remains an active -( -M• +field of research (e.g., Blandford & Begelman 1999; Park -106 M� +& Ostriker 2001; Narayan et al. 2003; Igumenshchev -)−1( -mBH +et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang -106 M� +et al. 2014; McKinney et al. 2014; Narayan et al. 2022). -)−1 +Heuristically, if a collision between a BH and a star re- -× -( -M• +mBH +sults in an accretion disk, the disk’s viscous timescale -2× 106 M� +may be as low as days. The resultant luminosity can -)−1( -a• +unbind most of the captured material, though details -10−4 pc +such as the amount accreted and peak luminosity re- -)4 +main uncertain (e.g., Yuan et al. (2012); Jiang et al. -× f(e•)(1− e2 -•)7/2 , (9) +(2014), see also the discussion in Stone et al. (2017), -where f(e•) is a function of e•. For all values of e•, +Rizzuto et al. (2022), and Kremer et al. (2022)). The -f(e•) is between 0.979 and 1.81 (Blaes et al. 2002). We +question becomes whether or not a BH can still accu- -plot this timescale for a 1 × 105M� BH in Figure 1 in +mulate significant amounts of mass over many collisions -blue. +even if it accretes very little in a single one. We ex- -In our simulations, we assume a BH has merged with +plore the viability of our channel using a physically mo- -the SMBH when the condition tGW < telapsed is met. +tivated inefficient accretion model. Several studies have -When this condition is satisfied, we terminate mass +invoked momentum-driven winds in BH accretion (e.g., -growth through collisions for that BH.4 +Murray et al. 2005; Ostriker et al. 2010; Brennan et al. -2.6. IMBH growth +2018). We thus estimate the fraction of captured mass -As detailed above, BH-stellar collisions can increase +accreted to be approximately vesc/(cη), where vesc is -the BH masses as a function of time. Here, we examine +the escape velocity from the BH at 1 R� and η is the -the sensitivity of the BH growth to the density power +accretion efficiency at the ISCO. We take η to be 0.1 -law. From Eq. (1), it is clear that the growth rate de- +(e.g., Yu & Tremaine 2002). This expression for the -pends on the stellar density profile, governed by the in- +fraction accreted is consistent with Kremer et al. (2022) -dex α. We expect that higher values of α, or steeper +equation 19 for s = 0.5, which is a reasonable value for -profiles, will result in more efficient mass growth. In +s, a free parameter between 0.2 and 0.8. We discuss -Figure 1, larger values of α lead to collision timescales +the results of the momentum-driven winds estimate in -in the GN’s inner region, inwards of 0.25 pc, that are +Section 3. We note that the accretion process may be -4 For comparison, we also incrementally changed the semimajor -axis and eccentricity from GW emission following the equations -in Peters & Mathews (1963b). This method leads to a slight -increase in the final IMBH masses because it accounts for the -collisions that take place while the orbit is gradually shrinking. +more efficient than this estimate implies if, for example, -much smaller that the 10 Gyr simulation time. Figure 3 +jets or other instabilities result in the beaming of radi- -confirms this expectation. It depicts the mass growth of +ation away from the captured material (e.g., Blandford -a uniform distribution of BHs with initial conditions de- +& Znajek 1977; Begelman 1979; De Villiers et al. 2005; -tailed in Section 2.1 for five α values, spanning 1 (green) +McKinney & Gammie 2004; McKinney 2006; Igumen- -to 2 (purple). The most massive IMBHs form inwards +shchev 2008; Begelman 2012a,b; McKinney et al. 2014). -of 0.25 pc for the α = 2 case. +2.6. GW Inspiral -2.7. Gravitational Wave Mergers and Intermediate +When a BH is close to the SMBH, GW emission can -and Extreme Mass Ratio Inspiral Candidates +circularize and shrink its orbit. We implement the ef- + +fects of GW emission on the BH’s semimajor axis and + +eccentricity following Peters & Mathews (1963a). The + +characteristic timescale to merge a BH with an SMBH + +is given by: + +tGW ≈2.9× 1012 yr +( + +M• +106 M� + +)−1( +mBH + +106 M� + +)−1 +× +( +M• +mBH +2× 106 M� + +)−1( +a• + +10−2 pc + +)4 +× f(e•)(1− e2•)7/2 , (9) + +where f(e•) is a function of e•. For all values of e•, + +f(e•) is between 0.979 and 1.81 (Blaes et al. 2002). We + +plot this timescale for a 1 × 105M� BH in Figure 1 in +blue. + + + +6 Rose et al. + +Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to +cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass +of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and +merger times of these BHs. + +In our simulations, we assume a BH has merged with + +the SMBH when the condition tGW < telapsed is met. + +When this condition is satisfied, we terminate mass + +growth through collisions for that BH.4 + +2.7. IMBH growth + +As detailed above, BH-stellar collisions can increase + +the BH masses as a function of time. Here, we examine + +the sensitivity of the BH growth to the density power + +law. From Eq. (1), it is clear that the growth rate de- + +pends on the stellar density profile, governed by the in- + +dex α. We expect that higher values of α, or steeper + +profiles, will result in more efficient mass growth. In + +Figure 1, larger values of α lead to collision timescales + +in the GN’s inner region, inwards of 0.25 pc, that are + +much smaller that the 10 Gyr simulation time. Figure 3 + +confirms this expectation. It depicts the mass growth of + +a uniform distribution of BHs with initial conditions de- + +tailed in Section 2.1 for five α values, spanning 1 (green) + +to 2 (purple). The most massive IMBHs form inwards + +of 0.25 pc for the α = 2 case. + +2.8. Gravitational Wave Mergers and Intermediate + +and Extreme Mass Ratio Inspiral Candidates Towards the SMBH, efficient collisions can create BHs massive enough to merge with the SMBH through GWs. -Following the method detailed in Section 2.5, when a +Following the method detailed in Section 2.6, when a given BH meets the criterion tGW < telapsed, we mark +4 For comparison, we also incrementally changed the semimajor +axis and eccentricity from GW emission following the equations +in Peters & Mathews (1963b). This method leads to a slight +increase in the final IMBH masses because it accounts for the +collisions that take place while the orbit is gradually shrinking. + it as merged with the SMBH. We assume that at this point the dynamics of the BH will be determined by GW @@ -928,7 +1068,7 @@ to explain the formation of EMRIs, EMRIs and notably IMRIs can form in this region. -2.8. Two Body Relaxation Processes +2.9. Two Body Relaxation Processes A BH orbiting the SMBH experiences weak gravita- @@ -947,7 +1087,6 @@ G2ρ〈M∗〉 ln Λrlx , (10) where ln Λrlx is the Coulomb logarithm and 〈M∗〉 is the - average mass of the surrounding objects, here assumed to be 1M� (Spitzer 1987; Binney & Tremaine 2008, @@ -964,39 +1103,51 @@ momentum and energy as a function of time (depending on the eccentricity of the orbit, this process can be more -efficient Fragione & Sari 2018; Sari & Fragione 2019). In +efficient Fragione & Sari 2018; Sari & Fragione 2019). -Figure 1, we plot the relaxation timescale in gold for a +Relaxation can cause the orbit of an object in a GN to -range of α. We note that the Bahcall & Wolf (1976) pro- +reach high eccentricities. If the object is a BH, it can -file, α = 7/4, corresponds to zero net flux and therefore +spiral into the SMBH and form an EMRI, while a star -does not preferentially migrate objects inward. -Additionally, because they are more massive on -average than the surrounding objects, BHs are ex- +IMBH Formation in Galactic Nuclei 7 -pected to segregate inwards in the GN (e.g., Shapiro +can be tidally disrupted by the SMBH (e.g. Magorrian -& Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; +& Tremaine 1999; Wang & Merritt 2004; Hopman & -Miralda-Escudé & Gould 2000; Baumgardt et al. 2004). +Alexander 2005; Aharon & Perets 2016; Stone & Met- +zger 2016; Amaro-Seoane 2018; Sari & Fragione 2019; +Naoz et al. 2022). The relaxation process is therefore -6 Rose et al. +crucial to our study. In Figure 1, we plot the relaxation -Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to -cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass -of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and -merger times of these BHs. +timescale in gold for a range of α. We note that the Bah- + +call & Wolf (1976) profile, α = 7/4, corresponds to zero + +net flux and therefore does not preferentially migrate + +objects inward. + +Additionally, because BHs are more massive on av- + +erage than the surrounding objects, they are expected + +to segregate inwards in the GN (e.g., Shapiro & + +Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; + +Miralda-Escudé & Gould 2000; Baumgardt et al. 2004). They sink toward the SMBH on the mass segregation timescale, tseg ≈ 〈M∗〉/mBH × trelax (e.g., Spitzer 1987; - Fregeau et al. 2002; Merritt 2006), which is typically an order of magnitude smaller than the relaxation timescale @@ -1026,11 +1177,12 @@ approach to changes in the angular momentum). The new orbital parameters can be calculated following Lu -& Naoz (2019), and see Naoz et al. in prep for full set +& Naoz (2019), and see Naoz et al. (2022) for the full -of equations. +set of equations. We account for the effects of relaxation processes, + including mass-segregation, using a multi-faceted ap- proach. We begin by migrating each BH towards the @@ -1058,7 +1210,6 @@ BHs will then settle onto a Bahcall-Wolf profile, while the stars may follow a shallower profile, with approx- imately n? ∝ r−1.5, inwards of the transition radius - (Linial & Sari in prep.). Therefore, after the initial mass segregation, we allow @@ -1111,7 +1262,7 @@ scattering. We reserve the inclusion of these interactions for future study. -2.9. Effect of Relaxation Processes +2.10. Effect of Relaxation Processes As depicted in Figure 4, two-body relaxation processes @@ -1125,19 +1276,6 @@ However, it also impedes the growth of BHs that are initially closer to the SMBH by allowing them to dif- - - -IMBH Formation in Galactic Nuclei 7 - -Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance (red) -for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. We -assume α = 1.75 for the GN density profile. Faded stars represent BHs that merged with the SMBH. As a result of inward -migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, more -BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses for two -different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation processes. -The dashed, faded lines represent the corresponding initial histograms. We assume α = 1.75 for the GN density profile. Faded -stars represent BHs that merged with the SMBH. - fuse out of the inner region where collisions are efficient. As can be seen in Figure 4, the net result is that more @@ -1163,12 +1301,12 @@ distributions were drawn from the same sample for the IMF with an average of 10 M� leads to a final distri- bution with an average of ∼ 200 M� and a median of - ∼ 45 M�, which lies within the mass gap. 3. DISCUSSION AND PREDICTIONS We explore the feasibility of forming IMBHs in a + GN through successive collisions between a stellar-mass BH and main-sequence stars. Taking both a statisti- @@ -1179,9 +1317,22 @@ can produce IMBHs efficiently with masses as high as 103−4 M� and may result in many IMBH-SMBH merg- -ers (intermediate-mass ratio inspiral, IMRIs) and EM- +ers (intermediate-mass ratio inspirals, or IMRIs) and + +EMRIs. + + + +8 Rose et al. -RIs. +Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance +(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. +We assume α = 1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward +migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, +more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses +for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation +processes. We also show the results for a simulation with α = 1.75 that accounts for momentum-driven winds (black, dotted). +Despite the substantially reduced accretion, BHs in the mass gap still form. As the stellar mass BH collides with a star, the BH @@ -1195,7 +1346,7 @@ SMBH, the velocity dispersion may be larger than the escape velocity from the BH at the star’s radius. In this -limit, the BH accretes a “tunnel” of material through +limit, the BH captures a “tunnel” of material through the star, estimated using Bondi-Hoyle-Lyttleton accre- @@ -1203,7 +1354,7 @@ tion. In our statistical analysis, we account for Bondi- Hoyle-Lyttleton accretion and find that BHs outside of -10−2 pc from the SMBH can accrete the entire star (see +10−2 pc from the SMBH can capture the entire star (see Figure 2). @@ -1230,11 +1381,53 @@ profile by allowing BHs to diffuse into regions of more or less efficient growth. As a result, more BHs grow in mass, but their maximum mass is smaller (∼ 104 M�). - Additionally, the final masses have no apparent depen- dence on distance from the SMBH (see Figure 4). +Most simulations in our study assume that the BHs + +accrete all of the mass that they capture. The final BH + +masses can be taken as an upper limit. We note that + +the accretion is a highly uncertain process and repre- + +sents an active field of study (e.g., Blandford & Begel- + +man 1999; Park & Ostriker 2001; Narayan et al. 2003; + +Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan + +et al. 2012; Jiang et al. 2014; McKinney et al. 2014; + +Narayan et al. 2022). To assess the limits of our model, + +we also consider a physically motivated accretion model, + +momentum-driven winds (Section 2.5). We present the + +final mass distribution for momentum-driven winds in + +Figure 4. Importantly, we find that BHs within the + +mass gap still form naturally despite the substantially + +reduced accretion. About 5% of the BHs grow by 10 + +to 100 M�. Furthermore, if we increase this ∆M esti- + +mate by a factor of 2 (i.e., use η = 0.05), the simula- + +tion produces a 3.5× 103 M� IMBH for the same initial +conditions. Our proof-of-concept demonstrates that col- + +lisions between BH and stars are an important process + +that should be taken into account in dense places such + +as a GN. + Mass growth through BH-main-sequence star colli- sions may act in concert with other IMBH formation @@ -1271,27 +1464,153 @@ Hoang et al. 2018). Additionally, to be susceptible to evaporation, BH binaries must have a wider configura- -tion. Otherwise, they will be more tightly bound that +tion. Otherwise, they will be more tightly bound than +the average kinetic energy of the surrounding objects +and will only harden through weak gravitational inter- -8 Rose et al. -the average kinetic energy of the surrounding objects, -and will only harden through weak gravitational inter- +IMBH Formation in Galactic Nuclei 9 actions with neighboring stars (see for example Figure 6 in Rose et al. 2020). -Not included in this study, collisions between the BH +We note that we assume a steady-state and treat the + +stars as a reservoir in this model. Future work will take a + +more nuanced approach to the background stars, whose + +density as a function of time can be influenced by several + +factors. Firstly, the relaxation of the stellar population + +occurs on Gyr timescales. Some studies have suggested + +that in situ star formation can occur in the Galactic + +Center as close as 0.04 pc from the SMBH (e.g., Levin + +& Beloborodov 2003; Paumard et al. 2006), and star + +formation episodes can occur as often as every ∼ 5 Myr +(e.g. Lu et al. 2009). Therefore, we expect that after + +the first Gyr, stars within . 0.01 pc will be replenished +at intervals consistent with the star formation episodes; + +the infalling populations of stars are separated by ∼ +5−10 Myr, which is shorter than the collision timescale. + +However, star-star collisions may complicate this pic- + +ture within ∼ 0.01 pc. As discussed above, regular star +formation ensures the BHs always have a stellar popula- + +tion to interact with outside of ∼ 0.01 pc.5 At 0.01 pc, +however, the kinetic energy during a collision between + +two 1 M� stars is larger than their binding energies. + +Collisions can therefore thin out the stellar populations + +during the time it takes them to diffuse to these small + +radii, . 0.01 pc, and may reduce the BH growth in the +innermost region. We reserve the inclusion of star-star + +collisions for future work. We also note that the disrup- + +tion of binary stars by the SMBH may help replenish + +the stellar population even as collisions work to deplete + +it (e.g., Balberg et al. 2013); when a binary is disrupted, + +one of the stars is captured on a tightly bound orbit + +about the SMBH. + +An IMBH may also affect the stellar density profile. + +As it spirals into the SMBH, it can perturb stellar orbits, + +and these interactions can lead to hypervelocity stars + +(e.g., Baumgardt et al. 2006a; Löckmann & Baumgardt + +2008). Löckmann & Baumgardt (2008) show that an + +IMBH can modify an initially steep stellar density pro- + +file to become consistent with the flatter cusp observed + +in the Galactic Center. The stars may then be replen- + +ished on 100 Myr timescales (Baumgardt et al. 2006a). + +Therefore, after the formation of the first few IMBHs, + +subsequent BH growth may occur in bursts, coinciding + +with replenishment of the stars. -and other compact objects will increase the BH growth +While there are many competing dynamical processes -rate. BH-BH mergers (e.g., O’Leary et al. 2009; Fra- +that shape the stellar density profile, we stress that α -gione et al. 2021) and even neutron star BH mergers +5 In fact, the star-star collision timescale is greater than 10 Myr +for the entire parameter space, save at 0.001 pc for larger values +of α; the BH-star collision timescale plotted in Fig. 1 is the same +order of magnitude as the star-star collision timescale. + +can simply be chosen to encapsulate all of the relevant + +physics. A value for α that is constrained by observa- + +tions must already reflect ongoing processes like star- + +star collisions and replenishment. Schödel et al. (2018) + +find the observed stellar mass enclosed within 0.01 pc of + +the Milky Way’s Galactic Center to be approximately + +180 M�. This estimate is consistent to order of magni- + +tude with our α = 1.25 case. In a simulation like those + +depicted in Figure 4, which include relaxation, α = 1.25 + +leads to a maximum IMBH mass of 140 M�. Further- + +more, while the stellar mass within 0.01 pc may be a + +few hundred M�, Do et al. (2019) and GRAVITY Col- + +laboration et al. (2020) set an upper limit on the mass + +enclosed within the orbit of S0-2 to be about a few thou- + +sand M�, or 0.1% of the central mass. This upper limit + +can include mass that was previously in stars but is now + +in BHs. In that case, the 180 M� is what remains of the + +stars, while BHs and IMBHs make up the ∼ 1000 M� +in the innermost region. + +Also not included in this study, collisions between the + +BH and other compact objects will increase the BH + +growth rate. BH-BH mergers (e.g., O’Leary et al. 2009; + +Fragione et al. 2021) and even neutron star BH mergers (e.g., Hoang et al. 2020) become more likely as the BHs @@ -1309,67 +1628,82 @@ result in GW recoil, which may have a large impact on the dynamics (e.g., Baibhav et al. 2020; Fragione et al. -2021) +2021). The BH’s mass growth increases GW emission, which -dissipates energy from the orbit. Along with relaxation +dissipates energy from the orbit. Along with relaxation, -processes, GW emission causes BHs to sink towards the +GW emission causes BHs to sink towards the SMBH -SMBH and eventually undergo a merger. As a result, +and eventually undergo a merger. As a result, the GN +environment is conducive to the formation of EMRIs -the GN environment is conducive to the formation of +and IMRIs. The GW emission from EMRIs and IM- -EMRIs and IMRIs. The GW emission from EMRIs and +RIs is expected to be at mHz frequencies, making them -IMRIs is expected to be at mHz frequencies, making +promising candidates for LISA to observe. While the -them promising candidates for LISA to observe. While +exact rate calculation is beyond the scope of this study, -the exact rate calculation is beyond the scope of this +the mechanism outlined here seems very promising. -study, the mechanism outlined here seems very promis- +Our results also suggest that BHs within the mass gap -ing. +as well as IMBHs likely exist in many galactic nuclei, as -Our results also suggest that IMBHs are likely to ex- +well as within our own galactic center. This implication -ists in many galactic nuclei, as well as within our own +seems to be consistent with recent observational and -galactic center. This implication seems to be consis- +theoretical studies (e.g., Hansen & Milosavljević 2003; -tent with recent observational and theoretical studies +Maillard et al. 2004; Gürkan & Rasio 2005; Gualandris -(e.g., Hansen & Milosavljević 2003; Maillard et al. 2004; +& Merritt 2009; Chen & Liu 2013; Generozov & Madi- -Gürkan & Rasio 2005; Gualandris & Merritt 2009; Chen +gan 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz -& Liu 2013; Generozov & Madigan 2020; Fragione et al. +et al. 2020; GRAVITY Collaboration et al. 2020). -2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY -Collaboration et al. 2020). + +10 Rose et al. Lastly, the collisions between stellar mass BHs and stars may contribute to the x-ray emission from our -galactic centre (e.g., Muno et al. 2005, 2009; Hailey et al. +galactic centre (e.g., Muno et al. 2005, 2009; Hailey + +et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kre- + +mer et al. (2022) for a discussion of electromagnetic sig- + +natures from BH-star collisions)6. These interactions, -2018; Zhu et al. 2018; Cheng et al. 2018)5. These inter- +in particular grazing collisions, may also result in tidal -actions, in particular grazing collisions, may also result +disruption events (e.g., Baumgardt et al. 2006b; Perets -in tidal disruption events (e.g., Perets et al. 2016; Sam- +et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kre- -sing et al. 2019; Kremer et al. 2021). Thus, the process +mer et al. 2021). Thus, the process outlined here may -outlined here may produce electromagnetic signatures +produce electromagnetic signatures in addition to GW -in addition to GW mergers. +mergers. -SR thanks the Charles E Young fellowship, the Nina +We thank the anonymous referee for useful comments. + +We also thank Jessica Lu, Fred Rasio, Kyle Kremer, + +Ryosuke Hirai, Ilya Mandel, and Erez Michaely for use- + +ful discussion. + +SR thanks the Charles E. Young Fellowship, the Nina Byers Fellowship, and the Michael A. Jura Memorial @@ -1383,7 +1717,11 @@ ous support. IL thanks support from the Adams Fellow- ship. SN and RS thank the Bhaumik Institute visitor -program. +program. This work was performed in part at the As- + +pen Center for Physics, which is supported by National + +Science Foundation grant PHY-1607611. REFERENCES @@ -1401,12 +1739,25 @@ doi: 10.1103/PhysRevLett.118.221101 doi: 10.1103/PhysRevLett.119.141101 +Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1, + +doi: 10.3847/2041-8205/830/1/L1 + Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129 Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, doi: 10.1088/0004-637X/780/2/148 +Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4, + +doi: 10.1007/s41114-018-0013-8 + +6 The connection between the observed X-ray sources at the Galac- +tic Center and tidal capture has been suggested by Generozov +et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for +alternative channels. + Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. 2021, arXiv e-prints, arXiv:2109.12119. @@ -1417,19 +1768,60 @@ Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214, doi: 10.1086/154711 -5 The connection between the observed X-ray sources at the Galac- -tic Center and tidal capture has been suggested by Generozov -et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for -alternative channels. - Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102, 043002, doi: 10.1103/PhysRevD.102.043002 +Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26, + +doi: 10.1093/mnrasl/slt071 + +Baumgardt, H., Gualandris, A., & Portegies Zwart, S. + +2006a, MNRAS, 372, 174, + +doi: 10.1111/j.1365-2966.2006.10818.x + +Baumgardt, H., Hopman, C., Portegies Zwart, S., & + +Makino, J. 2006b, MNRAS, 372, 467, + +doi: 10.1111/j.1365-2966.2006.10885.x + Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ, 613, 1143, doi: 10.1086/423299 +Begelman, M. C. 1979, MNRAS, 187, 237, + +doi: 10.1093/mnras/187.2.237 + +—. 2012a, ApJL, 749, L3, doi: 10.1088/2041-8205/749/1/L3 + +http://doi.org/10.1103/PhysRevLett.116.241102 +http://doi.org/10.1103/PhysRevLett.118.221101 +http://doi.org/10.1103/PhysRevLett.119.141101 +http://doi.org/10.3847/2041-8205/830/1/L1 +http://doi.org/10.1086/308129 +http://doi.org/10.1088/0004-637X/780/2/148 +http://doi.org/10.1007/s41114-018-0013-8 +https://arxiv.org/abs/2109.12119 +http://doi.org/10.1086/154711 +http://doi.org/10.1103/PhysRevD.102.043002 +http://doi.org/10.1093/mnrasl/slt071 +http://doi.org/10.1111/j.1365-2966.2006.10818.x +http://doi.org/10.1111/j.1365-2966.2006.10885.x +http://doi.org/10.1086/423299 +http://doi.org/10.1093/mnras/187.2.237 +http://doi.org/10.1088/2041-8205/749/1/L3 + + +IMBH Formation in Galactic Nuclei 11 + +—. 2012b, MNRAS, 420, 2912, + +doi: 10.1111/j.1365-2966.2011.20071.x + Begelman, M. C., Volonteri, M., & Rees, M. J. 2006, MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x @@ -1454,6 +1846,14 @@ Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775, doi: 10.1086/342655 +Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303, + +L1, doi: 10.1046/j.1365-8711.1999.02358.x + +Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433, + +doi: 10.1093/mnras/179.3.433 + Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642, 427, doi: 10.1086/500727 @@ -1462,26 +1862,6 @@ Bondi, H. 1952, MNRAS, 112, 195, doi: 10.1093/mnras/112.2.195 -http://doi.org/10.1103/PhysRevLett.116.241102 -http://doi.org/10.1103/PhysRevLett.118.221101 -http://doi.org/10.1103/PhysRevLett.119.141101 -http://doi.org/10.1086/308129 -http://doi.org/10.1088/0004-637X/780/2/148 -https://arxiv.org/abs/2109.12119 -http://doi.org/10.1086/154711 -http://doi.org/10.1103/PhysRevD.102.043002 -http://doi.org/10.1086/423299 -http://doi.org/10.1111/j.1365-2966.2006.10467.x -http://doi.org/10.3847/1538-4357/ab6d77 -http://doi.org/10.3847/1538-4357/ab6d77 -http://doi.org/10.1088/1367-2630/11/10/105016 -http://doi.org/10.1086/342655 -http://doi.org/10.1086/500727 -http://doi.org/10.1093/mnras/112.2.195 - - -IMBH Formation in Galactic Nuclei 9 - Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273, doi: 10.1093/mnras/104.5.273 @@ -1490,6 +1870,10 @@ Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469, 2042, doi: 10.1093/mnras/stx1007 +Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ, + +860, 14, doi: 10.3847/1538-4357/aac2c4 + Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger, C. 2012, JCAP, 2012, 054, @@ -1524,10 +1908,36 @@ Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087, doi: 10.1086/156685 +Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424, + +doi: 10.1111/j.1365-2966.2005.09937.x + +Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M. + +2009, MNRAS, 393, 1016, + +doi: 10.1111/j.1365-2966.2008.14254.x + Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783 +Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T. + +C. N. 2021a, MNRAS, 505, 2186, + +doi: 10.1093/mnras/stab1428 + +Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt, + +T. C. N. 2021b, MNRAS, 503, 1051, + +doi: 10.1093/mnras/stab402 + +De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S. + +2005, ApJ, 620, 878, doi: 10.1086/427142 + Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019, MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453 @@ -1536,6 +1946,14 @@ Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021, MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390 +Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664, + +doi: 10.1126/science.aav8137 + +Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL, + +562, L19, doi: 10.1086/338118 + Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL, 110, 221101, doi: 10.1103/PhysRevLett.110.221101 @@ -1544,6 +1962,10 @@ Edgar, R. 2004, NewAR, 48, 843, doi: 10.1016/j.newar.2004.06.001 +Escala, A. 2021, ApJ, 908, 57, + +doi: 10.3847/1538-4357/abd93c + Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014, Monthly Notices of the Royal Astronomical Society, 443, @@ -1596,6 +2018,54 @@ J. P. 2018, MNRAS, 478, 4030, doi: 10.1093/mnras/sty1262 +http://doi.org/10.1111/j.1365-2966.2011.20071.x +http://doi.org/10.1111/j.1365-2966.2006.10467.x +http://doi.org/10.3847/1538-4357/ab6d77 +http://doi.org/10.3847/1538-4357/ab6d77 +http://doi.org/10.1088/1367-2630/11/10/105016 +http://doi.org/10.1086/342655 +http://doi.org/10.1046/j.1365-8711.1999.02358.x +http://doi.org/10.1093/mnras/179.3.433 +http://doi.org/10.1086/500727 +http://doi.org/10.1093/mnras/112.2.195 +http://doi.org/10.1093/mnras/104.5.273 +http://doi.org/10.1093/mnras/stx1007 +http://doi.org/10.3847/1538-4357/aac2c4 +http://doi.org/10.1088/1475-7516/2012/07/054 +http://doi.org/10.1103/RevModPhys.82.3069 +http://doi.org/10.1088/0004-637X/762/2/95 +http://doi.org/10.3847/1538-4357/aaba16 +http://doi.org/10.1093/mnras/stv694 +http://doi.org/10.1126/science.272.5266.1286 +http://doi.org/10.1086/156685 +http://doi.org/10.1111/j.1365-2966.2005.09937.x +http://doi.org/10.1111/j.1365-2966.2008.14254.x +http://doi.org/10.1093/mnras/stab2783 +http://doi.org/10.1093/mnras/stab1428 +http://doi.org/10.1093/mnras/stab402 +http://doi.org/10.1086/427142 +http://doi.org/10.1093/mnras/stz1453 +http://doi.org/10.1093/mnras/stab2390 +http://doi.org/10.1126/science.aav8137 +http://doi.org/10.1086/338118 +http://doi.org/10.1103/PhysRevLett.110.221101 +http://doi.org/10.1016/j.newar.2004.06.001 +http://doi.org/10.3847/1538-4357/abd93c +http://doi.org/10.1093/mnras/stu1280 +http://doi.org/10.3847/2041-8213/ab77c9 +https://arxiv.org/abs/2107.04639 +http://doi.org/10.3847/1538-4357/ab94b2 +http://doi.org/10.3847/2041-8213/abbc0a +http://doi.org/10.3847/1538-4357/aaa0d7 +http://doi.org/10.1111/j.1365-2966.2004.07914.x +http://doi.org/10.1086/339576 +http://doi.org/10.1086/506193 +http://doi.org/10.3847/1538-4357/ab94bc +http://doi.org/10.1093/mnras/sty1262 + + +12 Rose et al. + Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of Modern Physics, 82, 3121, @@ -1658,12 +2128,34 @@ Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8, doi: 10.3847/1538-4357/abb66a +Hopman, C., & Alexander, T. 2005, ApJ, 629, 362, + +doi: 10.1086/431475 + +Igumenshchev, I. V. 2008, ApJ, 677, 317, + +doi: 10.1086/529025 + +Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A. + +2003, ApJ, 592, 1042, doi: 10.1086/375769 + +Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796, + +106, doi: 10.1088/0004-637X/796/2/106 + Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the Royal Astronomical Society, 374, 1557, doi: 10.1111/j.1365-2966.2006.11275.x +Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., & + +Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368. + +https://arxiv.org/abs/2201.12368 + Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104, doi: 10.3847/1538-4357/abeb14 @@ -1672,64 +2164,27 @@ Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, 45, doi: 10.3847/1538-4357/abb945 +Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020, + +MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276 + +Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33, + +doi: 10.1086/376675 + Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 —. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 -Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506, - -doi: 10.1093/mnras/stz036 +Löckmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323, -http://doi.org/10.1093/mnras/104.5.273 -http://doi.org/10.1093/mnras/stx1007 -http://doi.org/10.1088/1475-7516/2012/07/054 -http://doi.org/10.1103/RevModPhys.82.3069 -http://doi.org/10.1088/0004-637X/762/2/95 -http://doi.org/10.3847/1538-4357/aaba16 -http://doi.org/10.1093/mnras/stv694 -http://doi.org/10.1126/science.272.5266.1286 -http://doi.org/10.1086/156685 -http://doi.org/10.1093/mnras/stab2783 -http://doi.org/10.1093/mnras/stz1453 -http://doi.org/10.1093/mnras/stab2390 -http://doi.org/10.1103/PhysRevLett.110.221101 -http://doi.org/10.1016/j.newar.2004.06.001 -http://doi.org/10.1093/mnras/stu1280 -http://doi.org/10.3847/2041-8213/ab77c9 -https://arxiv.org/abs/2107.04639 -http://doi.org/10.3847/1538-4357/ab94b2 -http://doi.org/10.3847/2041-8213/abbc0a -http://doi.org/10.3847/1538-4357/aaa0d7 -http://doi.org/10.1111/j.1365-2966.2004.07914.x -http://doi.org/10.1086/339576 -http://doi.org/10.1086/506193 -http://doi.org/10.3847/1538-4357/ab94bc -http://doi.org/10.1093/mnras/sty1262 -http://doi.org/10.1103/RevModPhys.82.3121 -http://doi.org/10.1086/377127 -http://doi.org/10.1086/427175 -http://doi.org/10.3847/1538-4357/aabfee -http://doi.org/10.3847/2041-8213/abdf5b -http://doi.org/10.1051/0004-6361/202037813 -http://doi.org/10.1088/0004-637X/705/1/361 -http://doi.org/10.1086/503295 -http://doi.org/10.1086/430694 -http://doi.org/10.1038/nature25029 -http://doi.org/10.1086/378182 -http://doi.org/10.1086/375341 -http://doi.org/10.3847/1538-4357/aaafce -http://doi.org/10.3847/1538-4357/abb66a -http://doi.org/10.1111/j.1365-2966.2006.11275.x -http://doi.org/10.3847/1538-4357/abeb14 -http://doi.org/10.3847/1538-4357/abb945 -http://doi.org/10.3847/1538-4365/aacb24 -http://doi.org/10.3847/1538-4365/aacb24 -http://doi.org/10.1093/mnras/stz036 +doi: 10.1111/j.1365-2966.2007.12699.x +Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506, -10 Rose et al. +doi: 10.1093/mnras/stz036 Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ, @@ -1739,6 +2194,10 @@ Madau, P., & Rees, M. J. 2001, ApJL, 551, L27, doi: 10.1086/319848 +Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447, + +doi: 10.1046/j.1365-8711.1999.02853.x + Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F. 2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147 @@ -1753,6 +2212,24 @@ Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b, MNRAS, 505, 339, doi: 10.1093/mnras/stab1334 +Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B. + +2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409 + +McKinney, J. C. 2006, MNRAS, 368, 1561, + +doi: 10.1111/j.1365-2966.2006.10256.x + +McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977, + +doi: 10.1086/422244 + +McKinney, J. C., Tchekhovskoy, A., Sadowski, A., & + +Narayan, R. 2014, MNRAS, 441, 3177, + +doi: 10.1093/mnras/stu762 + Merritt, D. 2006, Reports on Progress in Physics, 69, 2513, doi: 10.1088/0034-4885/69/9/R01 @@ -1771,6 +2248,14 @@ Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110 +Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ, + +618, 569, doi: 10.1086/426067 + +Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927, + +L18, doi: 10.3847/2041-8213/ac574b + Naoz, S., & Silk, J. 2014, ApJ, 795, 102, doi: 10.1088/0004-637X/795/2/102 @@ -1779,10 +2264,75 @@ Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885, L35, doi: 10.3847/2041-8213/ab4fed +http://doi.org/10.1103/RevModPhys.82.3121 +http://doi.org/10.1086/377127 +http://doi.org/10.1086/427175 +http://doi.org/10.3847/1538-4357/aabfee +http://doi.org/10.3847/2041-8213/abdf5b +http://doi.org/10.1051/0004-6361/202037813 +http://doi.org/10.1088/0004-637X/705/1/361 +http://doi.org/10.1086/503295 +http://doi.org/10.1086/430694 +http://doi.org/10.1038/nature25029 +http://doi.org/10.1086/378182 +http://doi.org/10.1086/375341 +http://doi.org/10.3847/1538-4357/aaafce +http://doi.org/10.3847/1538-4357/abb66a +http://doi.org/10.1086/431475 +http://doi.org/10.1086/529025 +http://doi.org/10.1086/375769 +http://doi.org/10.1088/0004-637X/796/2/106 +http://doi.org/10.1111/j.1365-2966.2006.11275.x +https://arxiv.org/abs/2201.12368 +http://doi.org/10.3847/1538-4357/abeb14 +http://doi.org/10.3847/1538-4357/abb945 +http://doi.org/10.1093/mnras/staa2276 +http://doi.org/10.1086/376675 +http://doi.org/10.3847/1538-4365/aacb24 +http://doi.org/10.3847/1538-4365/aacb24 +http://doi.org/10.1111/j.1365-2966.2007.12699.x +http://doi.org/10.1093/mnras/stz036 +http://doi.org/10.1088/0004-637X/690/2/1463 +http://doi.org/10.1086/319848 +http://doi.org/10.1046/j.1365-8711.1999.02853.x +http://doi.org/10.1051/0004-6361:20034147 +https://arxiv.org/abs/2109.06222 +http://doi.org/10.1093/mnras/stab1334 +http://doi.org/10.1093/mnras/stab1409 +http://doi.org/10.1111/j.1365-2966.2006.10256.x +http://doi.org/10.1086/422244 +http://doi.org/10.1093/mnras/stu762 +http://doi.org/10.1088/0034-4885/69/9/R01 +http://doi.org/10.1086/317837 +http://doi.org/10.1086/172607 +http://doi.org/10.1086/429721 +http://doi.org/10.1088/0067-0049/181/1/110 +http://doi.org/10.1086/426067 +http://doi.org/10.3847/2041-8213/ac574b +http://doi.org/10.1088/0004-637X/795/2/102 +http://doi.org/10.3847/2041-8213/ab4fed + + +IMBH Formation in Galactic Nuclei 13 + Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL, 888, L8, doi: 10.3847/2041-8213/ab5e3b +Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., & + +Curd, B. 2022, MNRAS, 511, 3795, + +doi: 10.1093/mnras/stac285 + +Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A. + +2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69 + +Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005, + +ApJ, 628, 368, doi: 10.1086/430728 + O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395, 2127, doi: 10.1111/j.1365-2966.2009.14653.x @@ -1793,6 +2343,20 @@ O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N., doi: 10.1086/498446 +Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga, + +D. 2010, ApJ, 722, 642, + +doi: 10.1088/0004-637X/722/1/642 + +Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100, + +doi: 10.1086/319042 + +Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643, + +1011, doi: 10.1086/503273 + Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek, Stephen R., J. 2016, ApJ, 823, 113, @@ -1827,6 +2391,10 @@ Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640, A56, doi: 10.1051/0004-6361/202037710 +Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022, + +MNRAS, doi: 10.1093/mnras/stac231 + Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., & Rasio, F. A. 2018, PhRvL, 120, 151101, @@ -1853,6 +2421,10 @@ Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., https://arxiv.org/abs/2009.01213 +Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017, + +MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044 + Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD, 100, 043009, doi: 10.1103/PhysRevD.100.043009 @@ -1871,6 +2443,10 @@ Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63, doi: 10.1086/519309 +Schödel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A, + +609, A27, doi: 10.1051/0004-6361/201730452 + Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603, doi: 10.1086/156521 @@ -1901,6 +2477,14 @@ e-prints. https://arxiv.org/abs/1603.02709 —. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d +Stone, N. C., Küpper, A. H. W., & Ostriker, J. P. 2017, + +MNRAS, 467, 4180, doi: 10.1093/mnras/stx097 + +Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859, + +doi: 10.1093/mnras/stv2281 + The LIGO Scientific Collaboration, the Virgo Collaboration, Abbott, R., et al. 2020a, arXiv e-prints, @@ -1927,21 +2511,15 @@ G. N. 2021, MNRAS, 504, 146, doi: 10.1093/mnras/stab842 -http://doi.org/10.1088/0004-637X/690/2/1463 -http://doi.org/10.1086/319848 -http://doi.org/10.1051/0004-6361:20034147 -https://arxiv.org/abs/2109.06222 -http://doi.org/10.1093/mnras/stab1334 -http://doi.org/10.1088/0034-4885/69/9/R01 -http://doi.org/10.1086/317837 -http://doi.org/10.1086/172607 -http://doi.org/10.1086/429721 -http://doi.org/10.1088/0067-0049/181/1/110 -http://doi.org/10.1088/0004-637X/795/2/102 -http://doi.org/10.3847/2041-8213/ab4fed http://doi.org/10.3847/2041-8213/ab5e3b +http://doi.org/10.1093/mnras/stac285 +http://doi.org/10.1093/pasj/55.6.L69 +http://doi.org/10.1086/430728 http://doi.org/10.1111/j.1365-2966.2009.14653.x http://doi.org/10.1086/498446 +http://doi.org/10.1088/0004-637X/722/1/642 +http://doi.org/10.1086/319042 +http://doi.org/10.1086/503273 http://doi.org/10.3847/0004-637X/823/2/113 http://doi.org/10.1103/PhysRev.131.435 http://doi.org/10.1103/PhysRev.131.435 @@ -1950,15 +2528,18 @@ http://doi.org/10.1086/312422 http://doi.org/10.1086/341798 http://doi.org/10.1088/0004-637X/780/2/187 http://doi.org/10.1051/0004-6361/202037710 +http://doi.org/10.1093/mnras/stac231 http://doi.org/10.1103/PhysRevLett.120.151101 http://doi.org/10.1103/PhysRevD.93.084029 http://doi.org/10.1103/PhysRevD.100.043027 http://doi.org/10.3847/1538-4357/abc557 https://arxiv.org/abs/2009.01213 +http://doi.org/10.1093/mnras/stx2044 http://doi.org/10.1103/PhysRevD.100.043009 http://doi.org/10.3847/1538-4357/ab43df http://doi.org/10.1086/339917 http://doi.org/10.1086/519309 +http://doi.org/10.1051/0004-6361/201730452 http://doi.org/10.1086/156521 http://doi.org/10.1093/mnras/217.2.367 http://doi.org/10.1093/mnras/stv2700 @@ -1967,6 +2548,8 @@ http://doi.org/10.1093/mnras/stx1576 http://doi.org/10.1093/mnras/stx1576 https://arxiv.org/abs/1603.02709 http://doi.org/10.3847/1538-4357/ab1e4d +http://doi.org/10.1093/mnras/stx097 +http://doi.org/10.1093/mnras/stv2281 https://arxiv.org/abs/2009.01075 https://arxiv.org/abs/2009.01190 http://doi.org/10.1088/0004-637X/750/1/31 @@ -1974,7 +2557,7 @@ http://doi.org/10.1093/mnras/stw225 http://doi.org/10.1093/mnras/stab842 -IMBH Formation in Galactic Nuclei 11 +14 Rose et al. Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., & @@ -1982,10 +2565,22 @@ Breivik, K. 2021, ApJ, 917, 76, doi: 10.3847/1538-4357/ac088d +Wang, J., & Merritt, D. 2004, ApJ, 600, 149, + +doi: 10.1086/379767 + Woosley, S. E. 2017, ApJ, 836, 244, doi: 10.3847/1538-4357/836/2/244 +Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965, + +doi: 10.1046/j.1365-8711.2002.05532.x + +Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129, + +doi: 10.1088/0004-637X/761/2/129 + Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. 2014, Monthly Notices of the Royal Astronomical @@ -2001,7 +2596,10 @@ Zhu, Z., Li, Z., & Morris, M. R. 2018, ApJS, 235, 26, doi: 10.3847/1538-4365/aab14f http://doi.org/10.3847/1538-4357/ac088d +http://doi.org/10.1086/379767 http://doi.org/10.3847/1538-4357/836/2/244 +http://doi.org/10.1046/j.1365-8711.2002.05532.x +http://doi.org/10.1088/0004-637X/761/2/129 http://doi.org/10.1093/mnras/stu351 https://arxiv.org/abs/2011.04653 http://doi.org/10.3847/1538-4365/aab14f @@ -2012,11 +2610,12 @@ http://doi.org/10.3847/1538-4365/aab14f 2.2 Direct Collisions 2.3 Statistical Approach to Collisions 2.4 Mass Growth - 2.5 GW Inspiral - 2.6 IMBH growth - 2.7 Gravitational Wave Mergers and Intermediate and Extreme Mass Ratio Inspiral Candidates - 2.8 Two Body Relaxation Processes - 2.9 Effect of Relaxation Processes + 2.5 Uncertainties in Accretion + 2.6 GW Inspiral + 2.7 IMBH growth + 2.8 Gravitational Wave Mergers and Intermediate and Extreme Mass Ratio Inspiral Candidates + 2.9 Two Body Relaxation Processes + 2.10 Effect of Relaxation Processes 3 Discussion and Predictions diff --git a/read/results/tika/2201.00029.txt b/read/results/tika/2201.00029.txt index efed233..1ae811b 100644 --- a/read/results/tika/2201.00029.txt +++ b/read/results/tika/2201.00029.txt @@ -22,6 +22,12 @@ + + + + + + diff --git a/read/results/tika/2201.00037.txt b/read/results/tika/2201.00037.txt index 6c9f216..ff2f257 100644 --- a/read/results/tika/2201.00037.txt +++ b/read/results/tika/2201.00037.txt @@ -26,6 +26,13 @@ + + + + + + + @@ -202,17 +209,12 @@ descending node of orbit Ωp -ê3 - -I +ê3I I -ê3 -Lεm - -I ê3 -p +ê3Lεm +I ê3p ascending node of orbit @@ -229,10 +231,7 @@ direction S -ê3 -Iê3 - -L +ê3Iê3L M @@ -242,28 +241,19 @@ orbital plane Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded -rectangle) and the Cassini state of Mercury. The normal to the orbital plane (êI -3) is offset from the nor- - -mal to the Laplace plane (êL -3 ) by an angle I = 8.5330◦. The symmetry axis of the mantle êp - -3 is offset - -from êI -3 by εm ≈ 2 arcmin. êI +rectangle) and the Cassini state of Mercury. The normal to the orbital plane (êI3) is offset from the nor- -3 and êp -3 are coplanar with, and precess about, êL +mal to the Laplace plane (êL3 ) by an angle I = 8.5330 +◦. The symmetry axis of the mantle êp3 is offset -3 in a retrograde direction +from êI3 by εm ≈ 2 arcmin. êI3 and êp3 are coplanar with, and precess about, êL3 in a retrograde direction -at frequency Ωp = 2π/325, 513 yr−1. The blue (orange) shaded region indicates the portion of the orbit +at frequency Ωp = 2π/325, 513 yr +−1. The blue (orange) shaded region indicates the portion of the orbit when Mercury is above (below) the Laplace plane. Angles are not drawn to scale. arcmin [Margot et al., 2012], 2.029±0.085 arcmin [Stark et al., 2015a] and 1.968±0.027 [Gen- - ova et al., 2019] to list a few) matches that expected if Mercury occupies Cassini state 1. The prediction of Mercury’s obliquity is based on the assumption that the whole planet @@ -289,7 +279,6 @@ its outermost part must be. A solid inner core may have nucleated at the centre size is not well constrained. Inner core growth leads to planetary contraction, and the inferred radial contraction of ∼ 7 km since the late heavy bombardment [Byrne et al., 2014] places an - approximate limit of 800 km on the inner core radius [Grott et al., 2011]. However, the inner core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history. @@ -445,7 +434,6 @@ strategy facilitates comparisons between our results. We build our interior model as detailed in Peale et al. [2016]. We first specify rs, ρs (or a density contrast at the ICB), the crustal density ρc and crustal thickness h = R−rm. The - three unknowns rf , ρf and ρm are then solved such that the interior model is consistent with the known mass M and chosen values of the moments of inertia of the whole planet C and that @@ -490,15 +478,9 @@ MR2 1 MR2 - [ -(ρs − ρf )r5 +(ρs − ρf )r5sεs + (ρf − ρm)r5f εf + (ρm − ρc)r5mεm + ρcR5εr -sεs + (ρf − ρm)r5 -f εf + (ρm − ρc)r5 - -mεm + ρcR -5εr ] , (1a) @@ -514,15 +496,9 @@ B −A 1 4MR2 - [ -(ρs − ρf )r5 +(ρs − ρf )r5sξs + (ρf − ρm)r5fξf + (ρm − ρc)r5mξm + ρcR5ξr -sξs + (ρf − ρm)r5 -fξf + (ρm − ρc)r5 - -mξm + ρcR -5ξr ] . (1b) @@ -540,12 +516,13 @@ Mercury Parameter Numerical value Reference mean motion, n 2π/87.96935 day−1 Stark et al. [2015b] -rotation rate, Ωo = 1.5n 2π/58.64623 day−1 Stark et al. [2015b] +rotation rate, Ωo = 1.5n 2π/58.64623 day +−1 Stark et al. [2015b] -orbit precession rate, Ωp 2π/325, 513 yr−1 Baland et al. [2017] +orbit precession rate, Ωp 2π/325, 513 yr +−1 Baland et al. [2017] Poincaré number, δω = Ωp/Ωo 4.9327× 10−7 - orbital eccentricity, ec 0.20563 Baland et al. [2017] orbital inclination, I 8.5330◦ Baland et al. [2017] @@ -553,15 +530,11 @@ orbital inclination, I 8.5330◦ Baland et al. [2017] mean planetary radius, R 2439.360 km Perry et al. [2015] mass, M 3.3012× 1023 kg Genova et al. [2019] - mean density, ρ̄ 5429.5 kg m−3 J2 5.0291× 10−5 Genova et al. [2019] - C22 8.0415× 10−6 Genova et al. [2019] - polar surface flattening, εr 6.7436× 10−4 Perry et al. [2015] - equatorial surface flattening, ξr 5.1243× 10−4 Perry et al. [2015] Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031.8636 × 109 @@ -570,9 +543,7 @@ m3/s2 taken from Genova et al. [2019]. The mean density is calculated from 4π 3 ρ̄R3 = M . The numerical -values of εr and ξr are calculated from εr = (ā− c)/R and ξr = (a− b)/R, where ā = 1 -2 -(a+ b) and where +values of εr and ξr are calculated from εr = (ā− c)/R and ξr = (a− b)/R, where ā = 12 (a+ b) and where a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semimajor, intermediate and semiminor @@ -585,7 +556,6 @@ and Wieczorek [2016] who adopted the same strategy in their interior modelling o Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topog- raphy and the axes of the principal moments of inertia, which amount to a polar offset of ∼ 2◦ - and an equatorial offset of ∼ 15◦ [Perry et al., 2015]. Once the densities and flattenings of all interior regions are known, we can specify the mo- @@ -622,13 +592,12 @@ Ā ef = Cf − Āf - Āf -es = +es = Cs − Ās - Ās + , (3a) γ = @@ -637,8 +606,8 @@ Ā γs = Bs −As - Ās + . (3b) We further note that e and γ are connected to J2 and C22 by @@ -673,12 +642,9 @@ Confidential manuscript submitted to JGR-Planets Ωf -ê3 -p - -ê3 -sê3 +ê3p +ê3sê3 I I @@ -687,50 +653,39 @@ I θp -ê3 -L +ê3L -ê1 -p - -ê2 -p +ê1p ê2p Cassini plane ωΩot -ê3 -I +ê3I I εm -ê3 -p +ê3p ê1 -ê2 -p +ê2p -ê3 -L +ê3L a) b) Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b) -in a frame attached to the rotating mantle. The orbit normal (êI -3) is tilted by an angle I = 8.533◦ from - -the Laplace normal (êL -3 ) and the symmetry axis of Mercury’s mantle (êp +in a frame attached to the rotating mantle. The orbit normal (êI3) is tilted by an angle I = 8.533 +◦ from +the Laplace normal (êL3 ) and the symmetry axis of Mercury’s mantle (ê +p 3) is tilted by an obliquity εm -with respect to êI -3. Shown in (a) are the orientations of the symmetry axis of the inner core (ês - +with respect to êI3. Shown in (a) are the orientations of the symmetry axis of the inner core (ê +s 3), the rotation rate vectors of the mantle (Ω), fluid core (Ωf ) and inner core (Ωf ) and angles θp, θn, θm, θf @@ -743,9 +698,8 @@ the mantle, fluid core and inner core, respectively; blue shaded parts show an e The black curved arrow in the equatorial plane in (a) indicates the direction of rotation of the equatorial -mantle axes êp -1 and êp - +mantle axes êp1 and ê +p 2 with respect to the Cassini plane. Viewed in the frame attached to the rotating mantle (b), the Cassini plane is rotating at frequency ωΩo = −Ωo − Ωp cos I in the longitudinal direc- @@ -772,17 +726,37 @@ fine the mean motion n = 2π/87.96935 day−1 and the sidereal frequency Ωo = day−1, with Ωo = 1.5n. Mercury’s rotational state is also characterized by a Cassini state whereby -the orientations of the orbit normal (êI3) and of the mantle symmetry axis (êp3) are both copla- +the orientations of the orbit normal (êI3) and of the mantle symmetry axis (ê +p +3) are both copla- nar with, and precess about, the normal to the Laplace plane (êL3 ). The orientation of the Laplace plane varies on long timescales, but it can be taken as invariable in inertial space for our present -purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between êL3 and êI3 -is the orbital inclination I = 8.5330◦ [Baland et al., 2017], the angle between êI3 and êp3 is the +purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between êL3 and ê +I +3 -obliquity εm and the angle between êL3 and êp3 is θp = I + εm. The precession of êI3 and êp3 -about the Laplace pole is retrograde with frequency Ωp = 2π/325, 513 yr−1 [Baland et al., 2017]. +is the orbital inclination I = 8.5330◦ [Baland et al., 2017], the angle between êI3 and ê +p +3 is the + +obliquity εm and the angle between ê +L +3 and ê + +p +3 is θp = I + εm. The precession of ê + +I +3 and ê + +p +3 + +about the Laplace pole is retrograde with frequency Ωp = 2π/325, 513 yr +−1 [Baland et al., 2017]. The mantle and crust are welded together and form a single rotating region which we re- @@ -793,8 +767,10 @@ of the mantle are expected to remain in close alignment, but they do not coincid define the rotation rate vector of the mantle by Ω, and its misalignment from êp3 by an angle θm. Note that θm � εm and it is often the spin axis of Mercury which is used to define the +obliquity εm [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, ê -obliquity εm [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, êp3 and Ω would +p +3 and Ω would characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and @@ -802,7 +778,9 @@ the angles I, εm and θm would completely describe the Cassini state. The prese outer core and solid inner core require three additional orientation vectors and angles. The sym- -metry axis of the inner core is defined by unit vector ês3 and its misalignment from êp3 by an +metry axis of the inner core is defined by unit vector ês3 and its misalignment from ê +p +3 by an angle θn. The rotation vectors of the fluid core and inner core are defined as Ωf and Ωs, re- @@ -811,14 +789,20 @@ spectively, and their misalignment from the rotation vector of the mantle Ω ar gles θf and θs (see Figure 2a). The rotation and symmetry axes of the inner core remain in close alignment, so θn ≈ θs. To be formal in our definition of the different angles of misalignment, - for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise direction. -At equilibrium in the Cassini state, the three orientation vectors (êI3, êp3, ês3) and three +At equilibrium in the Cassini state, the three orientation vectors (êI3, ê +p +3, ê -rotation vectors (Ω, Ωf , Ωs) are forced to precess about êL3 at the same frequency. If we ne- +s +3) and three + +rotation vectors (Ω, Ωf , Ωs) are forced to precess about ê +L +3 at the same frequency. If we ne- glect dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed @@ -833,7 +817,6 @@ per Mercury day, is equal to ω = −1− δω cos(θp) . (5) The factor δω = Ωp/Ωo = 4.933 × 10−7 is the Poincaré number, expressing the ratio of the - forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal as seen in the mantle frame is expressed as @@ -893,7 +876,12 @@ ing in the same direction as the vector connecting the Sun to the descending nod orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque -is periodic, rotating at frequency ωΩo. Setting the equatorial directions êp1 and êp2 to correspond +is periodic, rotating at frequency ωΩo. Setting the equatorial directions ê +p +1 and ê + +p +2 to correspond to the real and imaginary axes of the complex plane, respectively, we can write the equatorial @@ -947,7 +935,6 @@ corresponds to the observed obliquity of the mantle symmetry axis. It is thus eq though we keep the tilde notation in the presentation of our results to emphasize that it rep- resents the real part of the solution from our system. Furthermore, since m̃ � ε̃m, we often - refer to ε̃m as the orientation of spin axis of the mantle, since the Cassini state of Mercury is more customarily described in terms of the latter in the literature. @@ -968,17 +955,14 @@ equations are [ Āf - Ā m̃f + Ās - Ā m̃s + α3es Ās - Ā ñs @@ -987,8 +971,7 @@ ñs 1 -iΩ2 -oĀ +iΩ2oĀ ( Γ̃sun @@ -998,14 +981,12 @@ oĀ ωm̃+ (1 + ω + ef ) m̃f − ωα1es Ās - Āf -ñs = +ñs = 1 -iΩ2 -oĀf +iΩ2oĀf ( − Γ̃cmb − Γ̃icb @@ -1016,12 +997,10 @@ oĀf (ω − α3es)m̃+ α1esm̃f + (1 + ω) m̃s + (1 + ω − α2) esñs = 1 -iΩ2 -oĀs +iΩ2oĀs ( -Γ̃s -sun + Γ̃icb +Γ̃ssun + Γ̃icb ) , (12c) @@ -1048,9 +1027,7 @@ on the inner core, αg = 8πG -5Ω2 -o - +5Ω2o [ρc(εr − εm) + ρm(εm − εf ) + ρf εf ] , (13b) where G is the gravitational constant. @@ -1059,14 +1036,11 @@ where G is the gravitational constant. a small mantle obliquity ε̃m and a small inner core tilt ñs, it is given by -Γ̃sun = −iΩ2 -oĀ - +Γ̃sun = −iΩ2oĀ ( φmε̃m + Ās - Ā α3φsñs @@ -1088,8 +1062,7 @@ Confidential manuscript submitted to JGR-Planets n2 -Ω2 -o +Ω2o [ G210 e+ @@ -1109,8 +1082,7 @@ G201 γ n2 -Ω2 -o +Ω2o [ G210 es + @@ -1128,9 +1100,7 @@ and where G210 and G201 are functions of the orbital eccentricity ec, G210 = 1 -(1− e2 -c)3/2 - +(1− e2c)3/2 , (16a) G201 = @@ -1142,22 +1112,16 @@ ec − 123 16 -e3 -c + +e3c + 489 128 -e5 -c . (16b) - -The gravitational torque by the Sun acting on the inner core alone, Γ̃s -sun, is +e5c . (16b) -Γ̃s -sun = −iΩ2 +The gravitational torque by the Sun acting on the inner core alone, Γ̃ssun, is -oĀsα3φs(ε̃m + ñs) . (17) +Γ̃ssun = −iΩ2oĀsα3φs(ε̃m + ñs) . (17) Γ̃cmb and Γ̃icb are the torques from tangential stresses by the fluid core on the mantle at the @@ -1167,10 +1131,12 @@ terms of dimensionless complex coupling constants Kicb and Kcmb and the differen lar velocities at each boundary [e.g Buffett , 1992; Buffett et al., 2002], -Γ̃icb = iΩ2 +Γ̃icb = iΩ +2 oĀsKicb(m̃f − m̃s) , (18a) -Γ̃cmb = iΩ2 +Γ̃cmb = iΩ +2 oĀfKcmb m̃f . (18b) Specific expressions for Kicb and Kcmb are delayed to sections 4 and 5 when we consider the @@ -1188,7 +1154,6 @@ m̃+ (1 + ω)p̃ = 0 . (19) For Mercury, it is more convenient to connect the internal model with ε̃m instead of p̃. This is because θp ≈ 8.567◦ whereas ε̃m ≈ 2 arcmin and thus the latter obeys more strictly the - condition of small angles assumed in our framework. Furthermore, the external torques act- ing on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ε̃m. Writ- @@ -1216,7 +1181,6 @@ quency ω. The system can be written in a matrix form as Confidential manuscript submitted to JGR-Planets M · x = y , (22a) - where the solution (x) and forcing (y) vectors are xT = [m̃, m̃f , m̃s, ñs, ε̃m] , (22b) @@ -1228,36 +1192,29 @@ and the elements of matrix M are M =  -ω − e (1 + ω) - -Āf +ω − e (1 + ω) Āf Ā (1 + ω) Ās Ā Ās - Ā α3 - ( (1 + ω)es + φs ) φm -ω 1 + ω + ef +Kcmb + Ās - -Āf -Kicb − Ās - +ω 1 + ω + ef +Kcmb + +Ās Āf -Kicb −ωesα1 +Kicb − ĀsĀf Kicb −ωesα1 Ās - Āf + 0 ω − α3es α1es −Kicb 1 + ω +Kicb (1 + ω − α2)es + α3φs α3φs @@ -1347,7 +1304,6 @@ For a rigid planet with no fluid and solid cores, our system of equations reduce tions (12a) and (20), (ω − e)m̃+ φm ε̃m = 0 , (23a) - m̃+ (1 + ω)ε̃m = −(1 + ω) tan I . (23b) Using Equation (21), δω � 1, and the approximation Ā(1 + e+ δω cos I) = C+ Āδω cos I ≈ @@ -1362,20 +1318,20 @@ sin I + cos I ε̃m ) . (24b) -Equation (24b) gives a direct relationship between m̃ and ε̃m. For I = 8.5330◦, δω = +Equation (24b) gives a direct relationship between m̃ and ε̃m. For I = 8.5330 +◦, δω = 4.9327×10−7 and taking ε̃m = 2.04 arcmin, this gives m̃ = 2.52×10−4 arcmin, much smaller - than ε̃m: the offset of the rotation axis of the mantle with respect to its symmetry axis is very small. Substituting Equation (24b) in Equation (24a) gives CΩp - ( -sin I + cos I ε̃m +sin I + cos I ε̃m ) + = ĀΩoφmε̃m , (25) and isolating for ε̃m, @@ -1386,7 +1342,8 @@ CΩp sin I −CΩp cos I + ĀΩoφm . (26) -Upon using Equations (4), (15a), and Ωo = 3 +Upon using Equations (4), (15a), and Ωo = +3 2n, we can write ε̃m = @@ -1398,7 +1355,6 @@ CΩp sin I This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1 [see for instance Equation (1) of Baland et al., 2017, where their definition of Ω̇ is equal to −Ωp]. - Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized mo- @@ -1416,8 +1372,8 @@ n Ωp G210J2 + 2G201C22 - cos I + sin I/ε̃m + . (28) which is equivalent to Equation (89) of Van Hoolst [2015]. It is based on the latter equation @@ -1454,13 +1410,11 @@ which is equivalent to the prediction by Peale [2005] when neglecting its small ponent. Note that in Peale [2005] it was assumed that only the mantle was involved in the solid- body precession and hence C was replaced by Cm. Using C = 0.346 · MR2 [Margot et al., - 2012] and the numerical values for n, J2, C22 and ec given in Table 1, we obtain a free preces- sion period of Tfp = 2π/ωfp = 1298 yr. If we use Cm instead of C in Equation (29), and take Cm = 0.431 ·C = 0.431 ·0.346 ·MR2 [Margot et al., 2012], we obtain Tfp = 2π/ωfp = 560 yr. - These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical, the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid @@ -1486,13 +1440,11 @@ the free precession frequency ωfp. Because ωfp > Ωp, Mercury occupies Cassin 1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant amplification if Ωp ≈ ωfp. Since ωfp � Ωp, resonant amplification is minimal and the re- - sulting obliquity, ε̃m ≈ 2 arcmin, is much smaller than the inclination angle I ≈ 8.5◦. 2.3.2 The misalignment of the fluid and solid cores With ω = −1− δω cos I and δω � 1, Equation (12d) gives ñs ≈ m̃s; as for the mantle, - the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state. The relationship between m̃ and ε̃m of Equation (24b) is independent of the interior structure, @@ -1502,11 +1454,11 @@ so it remains unchanged when a fluid and a solid cores are present. Substituting tion (12a), and setting ñs = m̃s, the angular momentum equation of the whole planet becomes CΩp - ( -sin I + cos I ε̃m +sin I + cos I ε̃m ) + + (Āf cos I Ωp)m̃f + Ās(cos I Ωp − Ωoα3φs)ñs = ĀΩoφmε̃m . (31) This latter equation shows how the misaligned inner core and fluid core can lead to a modifi- @@ -1515,14 +1467,12 @@ cation of the mantle obliquity ε̃m. Approximate analytical solutions of ñs a ñs ≈ Ωp - κλs ( 1 + Ωo(Kicb − α1es) - λf )( @@ -1536,7 +1486,6 @@ sin I + cos I ε̃m m̃f ≈ Ωp - λf ( @@ -1546,11 +1495,9 @@ sin I + cos I ε̃m + Ωo - λf Ās - Āf ( @@ -1562,12 +1509,9 @@ ñs , (32b) where κ = 1− Ās - Āf -Ω2 -o - +Ω2o ( Kicb − α1es @@ -1577,7 +1521,6 @@ Kicb − α1es , (33a) λf = σ̄f − Ωp cos I , (33b) - λs = σ̄s − Ωp cos I , (33c) –14– @@ -1594,8 +1537,8 @@ and where we have introduced the frequencies ef +Kcmb + Ās - Āf + Kicb ) @@ -1617,7 +1560,6 @@ vide useful predictions of ñs and m̃f . In the limit of a very strong coupling between the fluid core, solid core and mantle, σ̄s � Ωp and σ̄f � Ωp, so that ñs → 0, m̃f → 0 and Equation (31) reverts back to Equation (25) - for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and mantle (i.e. for spherical internal boundaries, ef = es = γs = 0 and no viscous or EM cou- @@ -1631,21 +1573,19 @@ Inserting these in Equation (31), and with the moment of inertia of the mantle e C − Āf − Ās, we obtain Cm Ωp - ( -sin I + cos I ε̃m +sin I + cos I ε̃m ) + = ĀΩoφmε̃m . (35) which describes, as expected, a forced precession of the mantle alone. If this was the case for Mercury, taking Cm/C = 0.431, the obliquity should be ε̃m ≈ 0.88 arcmin, substantially smaller - than the observed obliquity of ε̃m ≈ 2 arcmin. If σ̄f ≈ Ωp (and thus λf → 0) and/or σ̄s ≈ Ωp (and thus λs → 0) resonant amplifica- - tion leads to large amplitudes for m̃f , ñs and the mantle obliquity ε̃m. The frequencies σ̄f and σ̄s are closely related to the FCN and FICN frequencies ωfcn and ωficn, respectively. Hence, @@ -1663,9 +1603,7 @@ that for reasonable interior models of Mercury, the FCN and FICN periods are in a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not ex- pect an important amplification effect. Furthermore, since ωfcn, ωficn � Ωp, then σ̄f � Ωp - and σ̄s � Ωp, and we are in the strong coupling limit. The mantle obliquity should be close - to that expected for a rigid planet, as observations suggest. Therefore, we expect that m̃f and ñs should be of the order of ε̃m or smaller. This further justifies the assumption of small an- @@ -1716,14 +1654,11 @@ Cm where -f(ec) = 1− 11e2 -c + - +f(ec) = 1− 11e2c + 959 48 -e4 -c , (37) +e4c , (37) and where ζ is a correction that takes into account the entrainment of the inner core in the li- @@ -1733,9 +1668,11 @@ rection is small and, to simplify, we neglect it here. Taking the observed libra to be 38.5 arcsec [Margot et al., 2012], Ĉ = C/MR2 = 0.3455 and C22 and ec from Table 1, -this corresponds to a ratio Cm/C = 0.4269, or equivalently Ĉm = Cm/MR2 = 0.1475. +this corresponds to a ratio Cm/C = 0.4269, or equivalently Ĉm = Cm/MR +2 = 0.1475. -For all results presented in our study, the crustal density is set at ρc = 2974 kg m−3 [Sori , +For all results presented in our study, the crustal density is set at ρc = 2974 kg m +−3 [Sori , 2018]. Our standard choice for the crustal thickness is h = 26 km [Sori , 2018], although in @@ -1743,7 +1680,8 @@ section 3.2 we also present some results with other choices of h. We have consid sible prescriptions connected to the density of the inner core. First, for all the results presented -in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρs = 8800 kg m−3 ap- +in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρs = 8800 kg m +−3 ap- proximately that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure @@ -1777,11 +1715,18 @@ ner core radius rs for each of the two inner core density scenarios: a fixed ρs the inner core is small, its presence has a limited influence on the resulting density structure, -and we find ρm = 3197 kg m−3, ρf = 7263 kg m−3 and rf = 2000 km in each of the two +and we find ρm = 3197 kg m +−3, ρf = 7263 kg m + +−3 and rf = 2000 km in each of the two -scenarios. When ρs is fixed to 8800 kg m−3, as the inner core reaches 1500 km in size, rf in- +scenarios. When ρs is fixed to 8800 kg m +−3, as the inner core reaches 1500 km in size, rf in- -creases to above 2100 km, ρm approaches 4000 kg m−3 and ρf is reduced to below 5000 kg m−3. +creases to above 2100 km, ρm approaches 4000 kg m +−3 and ρf is reduced to below 5000 kg m + +−3. Figure 3a illustrates that when adopting a fixed ρs, there is a limit in the possible inner core @@ -1889,7 +1834,10 @@ m fluid core density -CMB ra +CM +B + +ra diu s @@ -1937,8 +1885,8 @@ imation, the FCN and FICN frequencies (as seen in an inertial frame) for Kcmb = are given by ωfcn ≈ −Ωo - ( + Ā Ām + Ās @@ -1955,10 +1903,8 @@ efφm , (38a) ωficn ≈ Ωo - ( Ā+ Ās - Ā− Ās )( @@ -1971,10 +1917,10 @@ The expression of the FICN frequency involves the inertial torque (term esα1) a itational torque from the rest of Mercury (esα3αg) and the Sun (α3φs) acting on the inner core. -For both of our inner core density scenarios (and our choices of ρs = 8800 kg m−3 and α3 = +For both of our inner core density scenarios (and our choices of ρs = 8800 kg m +−3 and α3 = 0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α3αg � α1; - the gravitational torque dominates the inertial torque, in large part because of the slow rota- tion rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion @@ -1992,7 +1938,6 @@ and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approxi Confidential manuscript submitted to JGR-Planets sion for the FICN differs by a factor (Ā+Ās)/(Ā−Ās) compared to that given in Dumberry - and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon. The expression for FCN frequency differs from the usual expression for Earth. First, it @@ -2002,8 +1947,8 @@ involves the external torque from the Sun captured by the parameter φm. If we s we obtain the FCN frequency for a decoupled model in which only interior torques contribute, ωfcn,int ≈ −Ωo - ( + Ā Ām + Ās @@ -2064,7 +2009,6 @@ at the largest rs. Recall that m̃f is measured with respect to the mantle rotat coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with respect to the orbit normal is ε̃m+m̃f ≈ 6 arcmin. The reason why the obliquity of the spin - axis of the fluid core is larger than that of the mantle can be understood from Equation (32b), which shows that m̃f is determined by the resonant amplification of the FCN mode at the forc- @@ -2230,12 +2174,10 @@ Āf + Ās - (σ̄s − Ωp cos I) ) − Ās - Āc Ωoα3φs @@ -2252,9 +2194,7 @@ how the core is entrained to precess with the mantle, with the coupling between pressed in terms of the resonant amplification of the FCN and FICN frequencies. In the limit of σ̄f , σ̄s → 0, then χ = −1, C ′ = Cm, the core is fully decoupled from the mantle and we - retrieve Equation (35). If instead σ̄f , σ̄s → ∞, then χ = 0, C ′ = C and we retrieve the pre- - diction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ωp, as is the case here, resonant amplification is weak, χ is small and positive, C ′ > C and this @@ -2272,7 +2212,6 @@ misalignment of the fluid core. In Equation (41), σ̄s � σ̄f , so to a good Confidential manuscript submitted to JGR-Planets χ ≈ Āf - Āc Ωo cos I @@ -2281,8 +2220,9 @@ Āc . (42) For a small inner core, χ ≈ 7.55×10−3. As the inner core grows, Āf decreases, and the com- +bination Ācχ also decreases. This implies that C -bination Ācχ also decreases. This implies that C ′ decreases with inner core size and, consequently, +′ decreases with inner core size and, consequently, ε̃m also decreases with inner core size, as seen in Figure 4a, though it remains larger than the @@ -2303,7 +2243,6 @@ terior boundary in order to match J2 and C22, and thus different predictions for ñs. To illustrate this, we show on Figure 4 two additional predictions computed with crustal thicknesses changed to h = 16 and 36 km. The change in ε̃m remains modest, ∼ 0.025%, but - the changes in m̃f and ñs are more substantial, ∼ 5% and ∼ 10%, respectively. We also show on Figure 4a (only for h = 26 km) the obliquity of the principal moment @@ -2315,7 +2254,6 @@ if the inner core is misaligned with the mantle. As seen in the mantle frame, a (with ñs assumed small) leads to an off-diagonal component of the moment of inertia tensor of (Cs−Ās)α3ñs = Āsesα3ñs. The angle by which the mantle frame must be rotated so that - the moment of inertia of the whole planet is purely diagonal is (Āsesα3ñs)/(Āe), and hence a good approximation of ε̃g is @@ -2407,9 +2345,7 @@ ary layer remains laminar. Whether this is reasonable can be assessed by evaluat number Re = rf∆uf/ν, associated with the differential velocity ∆uf = rfΩom̃f at the CMB. For rf = 2000 km, and taking m̃f = 4 arcmin ≈ 0.001 rad from the results in the previous - section, we get ∆uf ∼ 2 mm/s and Re ∼ 6 × 109. Such a large Reynolds number indicates - that the viscous friction between the fluid core and mantle should induce turbulent flows, as is the case for the Cassini state of the Moon [Yoder , 1981; Williams et al., 2001; Cébron et al., @@ -2421,12 +2357,7 @@ pendent of the fluid viscosity and proportional to the square of the differentia coupling constant Kcmb should be in the form Kcmb = fcmb - -∣∣m̃f - -∣∣(0.195− 1.976i -) -, (45) +∣∣m̃f ∣∣(0.195− 1.976i) , (45) where fcmb is a numerical factor that depends among other things on surface roughness. In- @@ -2449,18 +2380,13 @@ by fitting a rotation model to the librations of the Moon observed by Lunar Lase [Williams et al., 2001, 2014; Williams and Boggs, 2015]. Viscous dissipation is reported in terms of a coupling parameter K and a recent estimate is K/CL = (1.41±0.34)×10−8 day−1 [Williams - and Boggs, 2015], where CL is the lunar polar moment of inertia. The connection between K and Kcmb is -∣∣∣Im[Kcmb] -∣∣∣ = - -K +∣∣∣Im[Kcmb]∣∣∣ = K CL CL - CfL 1 @@ -2469,22 +2395,17 @@ CfL , (46) where CfL is the moment of inertia of the lunar core and ΩL = 2.66 × 10−6 s−1 the lunar - rotation rate. With CfL/CL ∼ 7× 10−4 [e.g. Williams et al., 2014], this gives |Im[Kcmb]| ∼ 9×10−5. In order to match this amplitude in Equation (44a), with lunar parameters and as- - suming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10−4 m2 - s−1, about 500 times larger than the laminar viscosity. Note that the differential velocity at the CMB of the Moon is closer to 3 cm/s [Yoder , 1981; Williams et al., 2001], more than 10 times larger than our estimate for Mercury above. Since the effective turbulent coupling constant Kcmb - is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mer- cury should be smaller. Thus, ν ≈ 5×10−4 m2 s−1 gives a conservative upper bound for the - possible effective turbulent viscosity that can be expected for Mercury. Figure 5 shows how ε̃m, m̃f and ñs vary as functions of inner core radius for different choices @@ -2513,7 +2434,6 @@ cosity that we have identified above (i.e ν ≈ 5 × 10−4 m2 s−1), the infl Confidential manuscript submitted to JGR-Planets εm - εg mf @@ -2623,7 +2543,6 @@ ner core with the fluid core spin axis. The viscous coupling strength is inverse to rs, so a larger viscosity results in a larger inner core radius at which viscous coupling is of a similar magnitude to gravitational coupling. Taking again an upper bound of ν = 5×10−4 - m2 s−1, Figure 5 indicates that ñs may be 1 arcmin or larger only if the inner core radius is smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravi- @@ -2637,7 +2556,6 @@ The larger inner core tilt observed with increasing effective viscosity results offset between the obliquity of the principal moment of inertia ε̃g and that of the mantle ε̃m, though it remains limited. For the upper bound of ν = 5 × 10−4 m2 s−1, and for rs = 1500 - km, the difference between ε̃g and ε̃m is limited to 0.0013 arcmin. The conclusion that emerges from Figure 5 is that the larger the inner core is, the smaller @@ -2649,7 +2567,6 @@ implies that the larger the inner core is, the more we approach a planet precess body, although the misalignment of the spin axis of the fluid core remains important, approx- imately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ε̃m, m̃f - and ñs change with inner core size would certainly be different for a turbulent model of viscous coupling. But the general conclusion remains that the addition of viscous coupling at the CMB @@ -2683,7 +2600,6 @@ of the radial magnetic field Br and the electrical conductivity σ on either sid ary [Rochester , 1960, 1962, 1968]. The parametrization of EM coupling in terms of the coupling constants Kcmb and Kicb - has been developed in a few studies [e.g. Buffett , 1992; Buffett et al., 2002; Dumberry and Koot , 2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given @@ -2693,30 +2609,21 @@ by Br = 3 〈 -Bd - -r - +Bdr 〉 -cos θ, where +cos θ, where 〈 -Bd - -r - +Bdr 〉 + is the r.m.s. strength of the field, the coupling constant Kcmb can be written is the form Kcmb = 3(1− i)Fcmb - 〈 -Bd - -r - +Bdr 〉2 , (47) @@ -2738,7 +2645,6 @@ Fcmb = σfδf )−1 - , (48) and where σm, δm = @@ -2750,27 +2656,21 @@ and where σm, δm = 2/(σfµΩo) are the electrical conductivi- ties and magnetic skin depths in the mantle and fluid core, respectively, with µ = 4π×10−7 - N A−2 the magnetic permeability of free space. The r.m.s. field strength -〈 -Bd - -r +〈 +Bdr 〉 + is connected to -the Gauss coefficient g0 -1 of the surface magnetic field by +the Gauss coefficient g01 of the surface magnetic field by 〈 -Bd - -r - +Bdr 〉 -= += 2√ 3 @@ -2779,34 +2679,27 @@ R rf -)3 ∣∣g0 -1 - -∣∣ . (49) - +)3 ∣∣g01∣∣ . (49) We can readily build an estimate of the amplitude of Kcmb. The electrical conductivity of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding to the CMB of Mercury is in the range of σm ∼ 0.01 − 1 S m−1 [Constable, 2015]. In con- - trast, the electrical conductivity of Fe in planetary cores is expected to be close σf ∼ 106 S +m−1 [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σmδm) -m−1 [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σmδm)−1 � (σfδf )−1. Tak- +−1 � (σfδf )−1. Tak- +ing σm = 1 S m -ing σm = 1 S m−1, -∣∣g0 +−1, +∣∣g01∣∣ = 190 nT for Mercury’s dipole field [Anderson et al., 2012], rf = -1 - -∣∣ = 190 nT for Mercury’s dipole field [Anderson et al., 2012], rf = - -2000 km, ρf = 7000 kg m−3, this gives Kcmb ≈ (3.1× 10−11) · (1− i). To put this amplitude +2000 km, ρf = 7000 kg m +−3, this gives Kcmb ≈ (3.1× 10−11) · (1− i). To put this amplitude in perspective, taking a molecular viscosity of ν = 10−6 m2 s−1 in Equation (44a) gives a vis- cous coupling constant of Kcmb ≈ (6.0× 10−7) · (0.195− 1.976i). Hence, EM coupling at the - CMB is much weaker than viscous coupling, even if we include other spherical harmonic com- ponents of the radial magnetic field. @@ -2821,7 +2714,8 @@ at the bottom of Mercury’s mantle, for instance by the upward sedimentation an of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even -in the extreme case of σm = σf = 106 S m−1, Kcmb ≈ (1.6 × 10−8) · (1 − i), which remains +in the extreme case of σm = σf = 10 +6 S m−1, Kcmb ≈ (1.6 × 10−8) · (1 − i), which remains –23– @@ -2830,7 +2724,6 @@ in the extreme case of σm = σf = 106 S m−1, Kcmb ≈ (1.6 × 10−8) · (1 Confidential manuscript submitted to JGR-Planets smaller by a factor ∼ 60 than the smallest possible viscous coupling constant. Viscous forces - dominate the tangential stress on the CMB of Mercury. At the ICB, because we can expect the electrical conductivity in both the solid inner core @@ -2844,7 +2737,6 @@ morphology at the ICB is dominantly comprised of small spatial scales for exampl by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in terms of an equivalent uniform radial magnetic field 〈Br〉 capturing its r.m.s. strength [Buf- - fett et al., 2002; Dumberry and Koot , 2012]. Assuming an electrical conductivity σ equal in the fluid and solid core, the coupling constant Kicb can be written in the form @@ -2867,11 +2759,9 @@ and where δ = √ 2/(σµΩo) is the magnetic skin depth. As Ficb is inversely proportional to - rs, Kicb is inversely proportional to inner core size. Note that computing the EM coupling based on the r.m.s. strength 〈Br〉 rather than a true field morphology tends to overestimate the strength - of the coupling [Koot and Dumberry , 2013]. However, since the strength of the radial magnetic field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are @@ -2883,44 +2773,32 @@ The parametrization of Equation (50) is only valid in a ’weak field’ regime 2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected. When 〈Br〉 is sufficiently large, this is no longer the case. EM coupling then enters a ’strong - field’ regime [Buffett et al., 2002; Dumberry and Koot , 2012; Koot and Dumberry , 2013] in which Kicb increases linearly with 〈Br〉 instead of quadratically. A good approximation of Kicb cal- - culated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012], -KE -icb = (0.175− i0.138) 〈Br〉 , (52) +KEicb = (0.175− i0.138) 〈Br〉 , (52) where 〈Br〉 is in units of Tesla. The superscript E emphasizes that the numerical factors are - appropriate for the parameter values adopted for Earth in the computation of Dumberry and Koot [2012]. To adapt these numerical factors to Mercury, we write, Kicb = (0.175− i0.138) Ficb - -FE -icb +FEicb 〈Br〉 , (53) -where FE -icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumb- - +where FEicb is defined as in Equation (51) but using the parameters for Earth as defined in Dumb- erry and Koot [2012]. These are Ωo = 7.292 × 10−5 s−1, ρs = 12846 kg m−3, rs = 1221.5 - -km, σ = 5× 105 S m−1, which gives FE -icb = 90.36 T−2. +km, σ = 5× 105 S m−1, which gives FEicb = 90.36 T−2. To compute Ficb, we assume an electrical conductivity of σ = 106 S m−1 in the core of - Mercury [e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and strong field regime occurs when 〈Br〉 ≈ 1.53 mT for the real part of Kicb. 〈Br〉 at the ICB - of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geom- etry inside the core could be dominated by small length scales, yet only the weaker lower har- @@ -2936,9 +2814,7 @@ Confidential manuscript submitted to JGR-Planets the fluid core and reach the surface. If so, the field strength inside the core can exceed the sur- face field strength by a factor 1000. Taking a surface field strength equal to ∼ 300 nT [e.g An- - derson et al., 2012], 〈Br〉 at the ICB could be as large as 0.3 mT, corresponding to approxi- - mately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mer- cury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of @@ -2948,7 +2824,6 @@ Mercury remains in the weak field regime. Figure 6 shows how ε̃m, m̃f and ñs vary as functions of inner core radius for different choices of 〈Br〉. The larger 〈Br〉 is, the stronger is the EM coupling at the ICB, and the smaller is the - differential rotation between the fluid core and inner core. The inner core and fluid core are vir- tually locked into a common precession motion when 〈Br〉 > 0.3 mT. Further increasing 〈Br〉 @@ -2956,13 +2831,10 @@ above 1 mT does not change the solution as EM coupling already dominates all oth on the inner core. This is the case even when EM coupling transitions into the strong field regime. -EM coupling at the CMB is included in these calculations, with σm = 1 S m−1 and -∣∣g0 - -1 - -∣∣ = +EM coupling at the CMB is included in these calculations, with σm = 1 S m +−1 and +∣∣g01∣∣ = 190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core we retrieved the solutions of ε̃m and m̃f shown in Figure 4. @@ -3004,7 +2876,6 @@ with inner core size, χ gets smaller, and so do C ′ and ε̃m. The mantle obl arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015 arcmin. For an inner core larger than ≈ 1000 km, ĀcΩp cos I < ĀsΩoα3φs, so χ becomes neg- - ative, C ′ becomes smaller than the moment of inertia of a rigid Mercury C, and ε̃m becomes smaller than the prediction based on a rigid planet. @@ -3149,7 +3020,6 @@ We choose an effective viscosity at the CMB of ν = 10−4 m2 s−1, which we be representative value given the comparison with the Moon (see section 3.3). We take a radial field strength at the ICB of 〈Br〉 = 0.3 mT, approximately the field strength expected under - the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representa- tive’ coupling model, although the uncertainty on ν and 〈Br〉 obviously remains high. @@ -3296,13 +3166,24 @@ Figure 7. a) Obliquity of the mantle (ε̃m, solid lines) and gravity field (ε 8800 kg m−3 (black lines) and for different choices of α3 (coloured lines). -i′m, i′f and i′s; these represent the obliquities with respect to the orbital plane and are connected +i′m, i +′ +f and i -to our variables by: i′m = ε̃m, i′f = ε̃m+m̃+m̃f ≈ ε̃m+m̃f and i′s = ε̃m+ ñs. To summarize +′ +s; these represent the obliquities with respect to the orbital plane and are connected -their results, i′f and i′s vary substantially for different inner core sizes, are always of compara- +to our variables by: i′m = ε̃m, i +′ +f = ε̃m+m̃+m̃f ≈ ε̃m+m̃f and i′s = ε̃m+ ñs. To summarize -ble amplitude, and i′s is always larger than i′f . Furthermore, they find that as the inner core +their results, i′f and i +′ +s vary substantially for different inner core sizes, are always of compara- + +ble amplitude, and i′s is always larger than i +′ +f . Furthermore, they find that as the inner core size is increased, the mantle obliquity i′m gets progressively larger and is displaced further away @@ -3311,9 +3192,7 @@ from its expected orientation based of a rigid planet (see their Figure 6). The obtain between a case with no inner core and an inner core radius equal to 0.6 times the plan- etary radius (≈ 1463 km, close to the maximum inner core size of 1500 km we have considered), - is approximately an increase of 5 × 10−5 rad = 0.17 arcmin. This also corresponds approxi- - mately to the deviation of the obliquity with respect to that of a rigid planet. When only viscous stress is included in our model (section 3.3), our results are substan- @@ -3394,7 +3273,6 @@ est changes of the mantle obliquity εm compared to the obliquity predicted on t entirely rigid planet (εrm). Let us denote this difference as ∆εm = εm−εrm. The largest ∆εm occurs for a small or no inner core, and is ∆εm ≈ 0.01 arcmin. This difference is decreased - as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM coupling and large density contrast at the ICB, ∆εm can be negative, but its absolute value @@ -3420,15 +3298,12 @@ Nevertheless, our results show that the presence of a fluid core and inner core resulting mantle obliquity by as much as 0.01 arcmin. This is of the same order as the change in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec (≈ 0.006 - arcmin) [Baland et al., 2017]. This is also of the same order as the amplitude of the nutation motion about the mean equilibrium Cassini state forced by the precession of the pericenter, which is approximately 0.85 arcsec (≈ 0.014 arcmin) [Baland et al., 2017]. The precision on the obliq- - uity from the upcoming BepiColombo satellite mission is expected to be ≤ 0.5 arcsec (≤ 0.008 - arcmin) [Cicalò et al., 2016]. Thus, in addition to including tidal deformation and the preces- sion of the pericenter, a Cassini state model that includes a fluid and solid core will then be @@ -3456,20 +3331,15 @@ tle. Since gravitational coupling prevents a large inner core tilt with respect Confidential manuscript submitted to JGR-Planets find that the misalignment ∆εg = εg − εm is limited. The maximum offset that we obtain - is approximately ∆εg ≈ 0.007 arcmin. This limited magnitude of offset is important in the - light of the recent obliquity of the gravity field estimated in Genova et al. [2019], εg = 1.968± 0.027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the spin-symmetry axis of the mantle: εm = 2.04 ± 0.08 arcmin [Margot et al., 2012] and εm = - 2.029±0.085 arcmin [Stark et al., 2015a], although all three measurements remain consistent - with one another within their error estimates. In their interpretation, Genova et al. [2019] sug- gest that the different central value of the obliquity that they obtain (smaller by ∼ 0.07 ar- - cmin) is perhaps explained by an offset ∆εg due to the presence of a (possibly large) solid in- ner core. However, this is one order of magnitude larger than the maximum magnitude of ∆εg @@ -3494,7 +3364,6 @@ lar to the Cassini plane [e.g Peale et al., 2014]. Indeed, the two measurements ing surface topographic features from Margot et al. [2012] and Stark et al. [2015a] suggest that the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼ 0.03 arcmin). - Although this offset is smaller than the measurement errors, so that the observed obliquity is still consistent with no deviation away from the Cassini plane, some amount of dissipation in- diff --git a/read/results/tika/2201.00069.txt b/read/results/tika/2201.00069.txt index 3e3d064..edb4cfb 100644 --- a/read/results/tika/2201.00069.txt +++ b/read/results/tika/2201.00069.txt @@ -18,6 +18,27 @@ + + + + + + + + + + + + + + + + + + + + + @@ -726,11 +747,9 @@ FRB 20171019A 28 September 2019 – No (calibration failure) FRB 20171019A 18 October 2019 6.′′8 × 5.′′0 5.2 < 15`Jy beam−1 FRB 20190711A 23 August 2019 11.′′7 × 4.′′9 4.9 < 15`Jy beam−1 - FRB 20190711A 09 September 2019 12.′′5 × 4.′′9 4.6 < 15`Jy beam−1 FRB 20190714A 14 September 2019 7.′′1 × 6.′′2 4.2 54.4 `Jy beam−1 - FRB 20190714A 28 September 2019 6.′′5 × 5.′′1 5.8 52.0 `Jy beam−1 Table 2. Details of the radio continuum source associated with FRB 20190714A. diff --git a/read/results/tika/2201.00151.txt b/read/results/tika/2201.00151.txt index 128b28f..a157736 100644 --- a/read/results/tika/2201.00151.txt +++ b/read/results/tika/2201.00151.txt @@ -18,6 +18,22 @@ + + + + + + + + + + + + + + + + @@ -186,8 +202,12 @@ Subhalo ID 16960 Number of stellar particles (N⋆) 70446 Number of dark matter particles (NDM) 78448 -Stellar mass (M⋆) 5.74 × 1010 M⊙ -Dark matter mass (MDM) 4.91 × 1011 M⊙ +Stellar mass (M⋆) 5.74 × 10 +10 M⊙ + +Dark matter mass (MDM) 4.91 × 10 +11 M⊙ + Mean mass of stellar particles 815808 M⊙ Stellar half-mass radius 9.99 kpc Stellar half-number radius (r1/2) 9.6 kpc @@ -255,9 +275,9 @@ M ⊙ y -r-1 +r- -] +1 ] t [Gyr] @@ -742,11 +762,7 @@ All the relevant properties of the galaxy are given in Table 1, including numbers of particles and total masses for both compo- nents, and details on the shape of the stellar component: the axis ratios minor to major (shortest to longest) c/a, intermediate to -major b/a, and the triaxiality parameter T = (a2 - -− b2)/(a2 -− c2). - +major b/a, and the triaxiality parameter T = (a2 − b2)/(a2 − c2). We distinguish between the half-mass radius provided in the Il- lustris database and the half-number radius r1/2, which we use @@ -847,9 +863,7 @@ The velocity anisotropy parameter β(r) = 1 − (σ2 θ + -σ2 -φ)/(2σ - +σ2φ)/(2σ 2 r ), whereσi are velocity dispersions in spherical coordi- @@ -858,12 +872,13 @@ of galaxies. It is one of the most important dynamical properties of bound systems which cannot be inferred directly from ob- servations and has to be recovered by dynamical modeling. The profiles of the anisotropy parameter β as well as the radial σr +and tangential σt = [(σ -and tangential σt = [(σ2 +2 θ -+σ2 ++σ2φ)/2] -φ)/2]1/2 velocity dispersions for our +1/2 velocity dispersions for our simulated galaxy are presented in the consecutive columns of @@ -1005,13 +1020,15 @@ R )[ 10 -3 (k +3 ( -m +km s -1 -)2 ] +)2 + +] R [kpc] 0 10 20 30 40 @@ -1040,13 +1057,15 @@ R )[ 10 -4 (k +4 ( -m +km s -1 -)3 ] +)3 + +] R [kpc] 0 10 20 30 40 @@ -1075,13 +1094,15 @@ R )[ 10 -8 (k +8 ( -m +km s -1 -)4 ] +)4 + +] R [kpc] 0 10 20 30 40 @@ -1119,7 +1140,6 @@ arccos(z) − √ 1 − z2 - ] , (2) @@ -1138,12 +1158,10 @@ z = √ -r2 + R2 -c - -R2 -c + R2 +r2 + R2c +R2c + R +2 t . (4) @@ -1167,8 +1185,9 @@ logΥ(r) = { log(Υ0) r ≤ r0 +a(log r − log r0) -a(log r − log r0)c + log(Υ0) r > r0 +c + log(Υ0) r > r0 (5) Article number, page 5 of 12 @@ -1331,7 +1350,6 @@ der to smooth out the numerical artifacts, the three-dimensional imums (identified as the best-fitting models) and 1, 2, 3σ con- fidence levels which for three parameters correspond to ∆χ2 = - 3.53, 8.02, 14.2 (Press et al. 1992). 3.2. Application to mock data @@ -1447,9 +1465,7 @@ ALL r [kpc] 1010 - 1011 - 1012 10 100 @@ -1594,9 +1610,7 @@ POPULATIONS r [kpc] 1010 - 1011 - 1012 10 100 @@ -1907,7 +1921,9 @@ Number of stars (Nphot) 65 797 14 882 49 205 Number of stars (Nspec) 3286 1136 1151 Stars within 1.8 kpc 3268 1134 1130 -Fitted normalization (N0) [×104] 6.95 1.81 5.45 +Fitted normalization (N0) [×10 +4] 6.95 1.81 5.45 + Sérsic radius (RS) [kpc] 0.454 0.429 0.420 Sérsic parameter (m) 0.808 0.807 0.898 @@ -2027,7 +2043,9 @@ m s -1 -)2 ] +)2 +] + R [kpc] -16 @@ -2049,13 +2067,15 @@ R )[ 10 -2 (k +2 ( -m +km s -1 -)3 ] +)3 + +] R [kpc] @@ -2078,13 +2098,15 @@ R )[ 10 -4 (k +4 ( -m +km s -1 -)4 ] +)4 + +] R [kpc] @@ -2185,9 +2207,10 @@ c 12 -χ2 -χ -2 m +χ2 +-χ +2 m in Fig. 13. Values of χ2 relative to the fitted minimum within the range of 3σ confidence level for all stars (left panel) and for the populations (right @@ -2195,18 +2218,21 @@ panel) for the Fornax dSph. (Kowalczyk et al. 2019), we obtained higher estimates of the en- closed total mass at larger radii. In particular, for the mass en- -closed within 1.8 kpc we get Mall(< 1.8 kpc) = 3.87+1.48 +closed within 1.8 kpc we get Mall(< 1.8 kpc) = 3.87 ++1.48 −1.56 × 108 -M⊙ from the fit for all stars and Mpops(< 1.8 kpc) = 4.71+0.87 +M⊙ from the fit for all stars and Mpops(< 1.8 kpc) = 4.71 ++0.87 −1.13 × 108 M⊙ from the fit of populations, while previously we had -Mold(< 1.8 kpc) = 3.7+1.4 +Mold(< 1.8 kpc) = 3.7 ++1.4 −1.3 × 108 M⊙. diff --git a/read/results/tika/2201.00178.txt b/read/results/tika/2201.00178.txt index 015befd..1aed9f1 100644 --- a/read/results/tika/2201.00178.txt +++ b/read/results/tika/2201.00178.txt @@ -18,6 +18,27 @@ + + + + + + + + + + + + + + + + + + + + + @@ -57,9 +78,7 @@ Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on measurements to show that the resulting divergence and radial vorticity maps at supergranular length scales (∼30 Mm) near the surface compare extremely well with those obtained using the Local Corre- - lation Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows, - while ≥ 0.8 is obtained for the radial vorticity. Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662) @@ -205,9 +224,7 @@ solar surface, respectively, and ez points outwards. This approximation is valid that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood of the supergranular scale (∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the - horizontal wavenumber qR� ≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(qx, qy)| is the vector horizontal - wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, see Rincon @@ -221,7 +238,6 @@ uσ =∇×[∇×(P ez)] +∇×(T ez), (1) where P = Pσ(x) and T = Tσ(x) are poloidal and toroidal scalar functions, varying with position x and temporal frequency σ. ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying - perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for example), here we only consider the frequency bin σ = 0, denoting the temporally averaged flow over the period @@ -235,7 +251,6 @@ vector calculus results in u = −∇2Pez +∇(∂zP ) +∇hT×ez, (2) where ∇h refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the - Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a function of horizontal wavenumber q and depth zez. Hence the poloidal and toroidal flows are described by Pq(z) and @@ -258,7 +273,12 @@ The flow coefficients Pqj and Tqj , represented by the discrete indices q and j, where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be -exploited to expedite inversions. Note that Pqj = P ∗−qj and Tqj = T ∗−qj for the flow field to be real in the spatio- +exploited to expedite inversions. Note that Pqj = P +∗ +−qj and Tqj = T + +∗ +−qj for the flow field to be real in the spatio- temporal domain. @@ -268,19 +288,25 @@ To infer flows from wavefields φ scattered by a perturbation of length scale q, Imaging near-surface flows using mode-coupling analysis 3 -φω∗k φωk+q, where k is the oscillation mode wavenumber (kx, ky) and ω is the temporal frequency. Relate φω∗k φωk+q thus +φω∗k φ +ω +k+q, where k is the oscillation mode wavenumber (kx, ky) and ω is the temporal frequency. Relate φ + +ω∗ +k φ + +ω +k+q thus to the flow coefficients Pqj and Tqj (see eq A7) 〈φω∗k φωk+q〉 = Hωkk′nn′ - ∑ j Cqj,kPqj +Dqj,kTqj . (4) The weight factorHω (see eq A8) is a function of frequency, capturing information about the extent of coupling between - the two modes [n, k] and [n′, k′], where n and n′ are the radial orders of the modes, and k = |k| and k′ = |k′| = |k+q|. The spectral profile of the mode (see eq A9) is approximated using a Lorentzian (Anderson et al. 1990). The more the @@ -294,11 +320,15 @@ Dqj,k = D−qj,−k (see eq A6). The kernels, as flows, are expressed on the bas 1.2. Least-squares of cross-correlation -Even though φω∗k φωk+q isolates the effect of flow perturbations at individual wavenumbers q, a more compact mea- +Even though φω∗k φ +ω +k+q isolates the effect of flow perturbations at individual wavenumbers q, a more compact mea- surement, known in mode-coupling literature as ’B-coefficients’, is much better designed for inversion as it reduces the -dimension of the problem. A least-squares fit to the cross-correlation φω∗k φωk+q (see Woodard 2006, 2014, 2016) results +dimension of the problem. A least-squares fit to the cross-correlation φω∗k φ +ω +k+q (see Woodard 2006, 2014, 2016) results in the B-coefficients Bk,q, according to @@ -313,7 +343,6 @@ Hω∗kk′nn′φω∗k φωk+q∑ . (5) Multiplying eq 4 on both sides by Hω∗kk′nn′ and substituting by eq 5 on the left-hand-side results in a concisely defined - forward problem (compare with eq 4) Bk,q = @@ -341,11 +370,9 @@ or . (7) Summing over ±ω guarantees that the parity Bk,q = B∗−k,−q (see Appendix A for derivation) is obeyed, thereby - ensuring that the flow field on the right-hand-side of eq 6 is a real physical quantity in the spatio-temporal domain. Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components −q and - −k, B∗−k,−q = @@ -362,7 +389,6 @@ C−qj,−kP Substituting parity and symmetry relations for all terms in the above results in eq 6. As Bk,q is constructed by a least-squares fitting, it is noteworthy that summing over −ω will also lead to improvement in its signal-to-noise as a - by-product. 1.3. Noise model @@ -397,7 +423,6 @@ as in H21, which was motivated by the above discussion, Gk,q ≡ 〈|Bk,q|2〉, (9) where, unlike H21, we again sum over ±ω. Gk,q is real, with the symmetry relation Gk,q = G−k,−q (see Appendix A - for explanation). 2. DATA ANALYSIS @@ -409,12 +434,16 @@ Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO, Sch is Postel projected, with a spatial resolution of approximately 0.48Mm, sperated in time by 45 seconds, and is tracked at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194.4× 194.4 Mm2 in size, tracked for 24 hours - and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number 2197, Carrington longitude 90◦). This Dopplercube is considered as the physical wavefield φ(x, y; t). The Fourier-space -wavefield φωk (and subsequently, the cross-correlation φω∗k φωk+q) is obtained by computing the 3D spatial and temporal +wavefield φωk (and subsequently, the cross-correlation φ +ω∗ +k φ + +ω +k+q) is obtained by computing the 3D spatial and temporal Fourier transform of the Dopplercube. @@ -431,11 +460,8 @@ Maximum signal can be extracted from the weighted summation of the cross correla profiles of the two modes [n, k] and [n′, k′] closely align in ω space. This implies that their mode frequencies should be sufficiently close (|ωnk − ωn′k′ | ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over - ±ω is significant only over a few linewidths (ε, the summation parameter; see eq 7). We have empirically found and - tabulated δ in Table 1 for the radial order couplings n-n′ ∈ f-f, p1-p1, and p2-p2 (the signal strength depends only - weakly on ε; we set it to 3 line widths). Figure 1 shows that for any two adjacent ridges (adjacent n and n′), mode frequencies ωnk and ωn′k become spaced @@ -451,7 +477,6 @@ affecting the quality of the seismic measurements. Owing to these factors, to ma inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR� at fixed radial order are different. In wavenumber, we restrict our analysis to within 200 ≤ kR� ≤ 2000 and qR� ≤ 300. Our - frequency range is confined to span the range over which acoustic modes are observed (2 ≤ ω/2π ≤ 5 in mHz). @@ -492,7 +517,6 @@ complement each other (see Sekii 1997), where RLS tries to minimize the misfit b SOLA gives better localization. For total number of modes M , RLS scales as MxJ where J is the number of basis functions fj(z) (J � M ; see eq 3 and section 3.1), whereas SOLA scales as M2 (see Appendix B). For M > 5000, - computation starts to quickly become expensive for SOLA. Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While @@ -530,15 +554,12 @@ k ||KU−B||2, with || ||2 denoting the L2 norm. Here, K is the matrix formed by the sensitivity kernels: {Cqj,k,Dqj,k}. U is a vector composed of the flow coefficients: {Pqj , Tqj} and B is a vector - composed of computed B-coefficients: {Bk,q}. The least-squares problem is solved simultaneously for poloidal and - toroidal flow. We use B-spline basis functions as our fj(z), comprising 11 knots spaced uniformly in acoustic radius, for both poloidal and toroidal coefficients. Hence, for M modes (total number of k for a given q is M) and 11 basis functions for each poloidal and toroidal, the dimensions of K, U and B are thus M×22, 22×1, and M×1 respectively. - Normalizing both sides of eq 10 by the noise covariance Λ (a diagonal matrix with the entries Gk,q; see eq 9; dimension M ×M) and pre-multiplying by Kᵀ, @@ -574,7 +595,6 @@ U = (KᵀΛ−1K + λI)−1KᵀΛ−1B, (13) where I is the identity matrix for L1 regularization. The knee-point of the L-curve (Hansen 1992), a curve formed by plotting ||U||2 vs ||KU − B||2 for different values of λ (see right panel of Figure 2), is usually chosen as the - regularization parameter. After successfully inverting for U, we reconstruct the flow using eq 3. Results for poloidal flow Pq are shown in Figure 3. @@ -586,9 +606,7 @@ To improve confidence in the imaged near-surface flows through mode-coupling, we from Local Correlation Tracking method (LCT; November & Simon 1988). LCT provides surface-flow maps by examining the advection of convective granules (1.2 Mm, qR� ≈ 3500; Hathaway et al. 2015) by underlying larger- - scale flow systems. Since granules are used as tracers, which are much smaller in size than supergranules (≈ 35 Mm), - LCT is an effective method (see Rieutord et al. 2001) to produce surface horizontal flow maps of supergranulation. Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2 @@ -598,10 +616,9 @@ Time series of intensity images from HMI, with the same properties of the Dopple tained and Postel projected. The horizontal flows are deduced by tracking the proper motions of granules between consecutive intensity images, which we denote as I1, I2. The LCT method selects a patch in two images each - (I1 = I1e(x−xij) -2/2 sigma2 , I2 = I2e(x−xij) +2/2 sigma2 , I2 = I2e(x−xij) 2/2 sigma2) that observe the same granule at the grid point xij = (xi, yj). A Gaussian of width sigma allows to isolate a small region surrounding the grid point of interest as the distance @@ -622,7 +639,6 @@ Provided that the time difference ∆t, here 45 seconds, between the images is l min), the velocities are given by vx = ∆x/∆t and vy = ∆y/∆t. This exercise is repeated for all grid points in the images I1, I2 and for each consecutive pair of images in the cube. - In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing vx and vy. FLCT requires the input sigma, which we set to 4 pix, that captures the extent of localization desired, and depends on the @@ -646,12 +662,13 @@ For mode-coupling, horizontal divergence (hereafter div) and radial vorticity (h substituting P and T from eq 3 into eq 2 as below - uuu(q, z) = −∇2Pez +∇(∂zP ) +∇hT×ez, -= −(0, 0, ∂2xP + ∂2yP + ∂2zP ) + (∂x∂zP, ∂y∂zP, ∂ += −(0, 0, ∂2xP + ∂2yP + ∂2zP ) + (∂x∂zP, ∂y∂zP, ∂2zP ) + (∂yT, −∂xT, 0). (15) +Setting ∂2x + ∂ 2 -zP ) + (∂yT, −∂xT, 0). (15) +y = q -Setting ∂2x + ∂2y = q2, div is given by, +2, div is given by, ∇h · uuu(q, z) = q2∂zP, (16) @@ -709,14 +726,12 @@ M-C : φ(x, y; t) =====⇒ φωk , Bk,q inversion -======⇒ P, T +======⇒ P, T ∇h·===⇒ -∇h·===⇒ ∇× - eqns 16, 17 -Filter, +Filter, =====⇒ 2D FFT @@ -750,7 +765,6 @@ Table 2 summarizes the results of the comparison between flows obtained from mod where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from the two methods near supergranular scale (qR� ≈ 100). Near-surface flows are imaged most faithfully when all the - couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between @@ -817,7 +831,6 @@ Thus, the amplitudes of the mode-coupling flows (and the correlation coefficient Here, we report in Table 2 only the maximum correlation found from among the points in the radial grid close to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR�, we first fix the coupling(s) - and the regularization parameter to be used in the inversion. We then separately compute filtered divergence and @@ -931,26 +944,25 @@ uσq(z) = j { -q2 fjez + iq f ′j +q2 fjez + iq f + +′ +j } Pσjq + iq×ez fjTσjq. (A1) For flows in the anelastic limit (u � speed of sound), we can denote the flow perturbation operator as δLσ = - −2iωρuσ ·∇ (see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get, δLσq = −2iω ρ (iuσq · k + uσq · ez∂z), (A2) - =−2iωρ + ∑ j { -−k · q f ′jPσjq − k · (q×ez) fjTσjq + q2 fjP - -σ -jq ∂z +−k · q f ′jPσjq − k · (q×ez) fjTσjq + q2 fjPσjq ∂z } . (A3) @@ -965,16 +977,17 @@ Express the mode eigenfunction describing oscillations in the Cartesian domain b where H and V are real-valued functions; n and n′ are dropped for compactness of notation. Then the coupling of -two modes ξk and ξk′ (k′ = k + q), by the flow perturbation operator δLσq , denoted by coupling integral Λk -k′(σ), is +two modes ξk and ξk′ (k +′ = k + q), by the flow perturbation operator δLσq , denoted by coupling integral Λkk′(σ), is given by -Λk -k′(σ) ≡ - +Λkk′(σ) ≡ ∫ -dx (δLσqξk) · ξ∗k′ = + +dx (δLσqξk) · ξ +∗ +k′ = ∫ dx @@ -995,8 +1008,10 @@ jq (k̂ · k̂ H ′kH ∗ -k′ + V ′kV +k′ + V +′ +kV ∗ k′) @@ -1035,8 +1050,10 @@ q2 fj (k̂ · k̂ H ′kH ∗ -k′ + V ′kV +k′ + V +′ +kV ∗ k′) @@ -1066,40 +1083,48 @@ k′ + VkV k′). (A6) Note the symmetry Cqj,k = C−qj,−k and Dqj,k = D−qj,−k. This coupling integral contributes to the cross-spectral - measurement between modes k and k + q From eq 8 of Woodard (2014), we write the first-order effect of flow on wavefield cross-correlation as -〈φω∗k φω+σk+q 〉 = Hω -kk′σΛk +〈φω∗k φω+σk+q 〉 = H +ω +kk′σΛ +k k′(σ), (A7) where the function H is given by -Hωkk′σ = −2iω(Nk|Rωk |2Rω+σk′ +Nk′ |Rω+σk′ | +Hωkk′σ = −2iω(Nk|Rωk |2Rω+σk′ +Nk′ |R +ω+σ +k′ | + 2Rω∗k ). (A8) We absorb the factor −2iω into the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4. - The mode spectral profile R is a Lorentzian, given by Rωk = 1 -ω2 -nk − ω2 − iωγnk/2 - +ω2nk − ω2 − iωγnk/2 , (A9) where ωnk is the resonant frequency of the mode, and γnk is the mode linewidth. Eq A9 can be derived by introducing mode damping −iωγρ as an operator in the differential equation that governs undamped, driven oscillations (see eq - 5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation. -Also, the parity Hωkk′σ = H−ω∗kk′−σ and Rωk = R−ω∗k are established. Mode normalization N is given by +Also, the parity Hωkk′σ = H +−ω∗ +kk′−σ and R + +ω +k = R + +−ω∗ +k are established. Mode normalization N is given by Nk = 1 @@ -1118,8 +1143,7 @@ Rωk , (A10) -where the 1 -Q +where the 1Q Q∑ k @@ -1131,12 +1155,22 @@ This forces N to be isotropic, i.e., to only depend on k, and not k. The sum ove Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real. The three equations A8 through A10, along with the symmetry relation for kernels, and summation over ±ω, serve +to establish the parity Bσk,q = B + +∗−σ +−k,−q. This allows for obtaining P + +σ +q = P -to establish the parity Bσk,q = B∗−σ−k,−q. This allows for obtaining Pσq = P ∗−σ−q , and subsequently, purely real flow in +∗−σ +−q , and subsequently, purely real flow in the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into -the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσk,q = G−σ−k,−q. +the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσk,q = G +−σ +−k,−q. @@ -1164,7 +1198,6 @@ Since the kernels in eq A6 are manifest as coefficients on a basis fj(z), we fir as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions: P ≡ Pq(z), p ≡ Pqj , F ≡ fj(z), B ≡ Bk,q C ≡ Cqj,k and K ≡ Kk,q(z), we write (assume only poloidal flow for - simplicity, the same derivations hold true for toroidal flow as well) P = Fp (B11) @@ -1218,8 +1251,8 @@ exp This can be achieved by solving the optimization problem minimize X = - ∫ + dz [ T (z, zo)−Θq(z, zo) @@ -1248,8 +1281,7 @@ Figure 8. Left : Kernel Kk,q(z) (eq B14) shown vs depth z for the three radial o (eq B17) using SOLA, for qR� = [−112,−45] at depth z0 = −0.48 Mm, and the corresponding target Gaussian (eq B15). Integral of the averaging kernel over z is 0.89. -Setting ∂X -∂α → 0 gives us the matrix problem to be solved +Setting ∂X∂α → 0 gives us the matrix problem to be solved A{α} = v, @@ -1267,9 +1299,7 @@ dz Kk,q(z)Kk′,q(z) and v = ∫ dz Kk,q(z)T (z, zo). Here, k′ is just a dummy index for - denoting elements in the matrix A, (k′ 6= k+q). In the last line of eq B18, we introduce regularization using an Identity - matrix I, with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α @@ -1293,7 +1323,10 @@ k αk,zo ∫ -dz Kk,q(z)Pσq (z), +dz Kk,q(z)P + +σ +q (z), = diff --git a/read/results/tika/2201.00200.txt b/read/results/tika/2201.00200.txt index 83244e5..cfe723b 100644 --- a/read/results/tika/2201.00200.txt +++ b/read/results/tika/2201.00200.txt @@ -18,6 +18,27 @@ + + + + + + + + + + + + + + + + + + + + + @@ -332,15 +353,11 @@ versions of the Ledoux discriminant allowed them to obtain a model-independent profile for this quantity. Their reconstruction method also gives solar structures that are in excellent agree- ment with other structural inversions, namely the entropy, S , the -square of the speed of sound, c2 - -s , and the density, ρ. To illustrate +square of the speed of sound, c2s , and the density, ρ. To illustrate the convergence of their reconstruction procedure, they show (right panels of their Figs. 3-6) the successive iterations that con- verge to an excellent level of agreement for the four structural -inversions (A, S , c2 - -s , ρ) starting from the initial reference model +inversions (A, S , c2s , ρ) starting from the initial reference model adopted in their work. The differences found between the recon- structed model and the reference model are useful as they indi- cate the modifications of the reference model that are required to @@ -356,9 +373,9 @@ the quantity (ASun - Aref). The second concerns the speed of sound. The same positive bump at the same location as for the Ledoux discriminant, A, is -observed for the quantity (c2 +observed for the quantity (c2s,Sun − c -s,Sun − c2 +2 s,ref)/c 2 @@ -380,7 +397,6 @@ ancy is negative in the convective zone. The corrections applied to A help reduce these entropy discrepancies in both regions. The fourth concerns the density. The quantity (ρSun − - ρref)/ρref has a negative peak in the radiative region, at ∼ 35% of the stellar radius, and is positive in the convective zone. @@ -537,15 +553,11 @@ solid line. The vertical dashed line in each panel is located at a distance dov below the convective boundary. The impact on the whole stellar structure was quantified by -comparing the four structural quantities (A, S , c2 - -s , ρ) between the +comparing the four structural quantities (A, S , c2s , ρ) between the modified and the reference model. The results are displayed in Fig. 3, with ∆X defined as (X−Xref) for any structural quantity X. The forced local heating in the overshooting layer produces sim- -ilar positive peaks for ∆A, ∆S , and ∆c2 - -s , as found for the temper- +ilar positive peaks for ∆A, ∆S , and ∆c2s , as found for the temper- ature. The modification thus provides the correction required to improve the discrepancy for the Ledoux discriminant described in the first of the trends outlined in Sect. 3.1. Unsurprisingly, @@ -580,21 +592,20 @@ These trends are insensitive to the depth over which the tem- perature gradient is modified. Increasing the depth increases the magnitude of the differences but has no impact on their sign. We find that the maximum variation in the model properties, such as -the speed of sound, ∆c2 +the speed of sound, ∆c2s/c -s/c 2 -s,ref , roughly scales with d2 +s,ref , roughly scales with d +2 ov. This scal- + ing is linked to the integrated area between the modified temper- ature gradient curve and the one for the reference (non-modified) temperature gradient, which roughly decreases linearly with r. This area is proportional to the square of the overshooting depth, and consequently, the maximum variation in the model proper- -ties is also proportional to d2 - -ov. The qualitative trends also remain +ties is also proportional to d2ov. The qualitative trends also remain the same whether overshooting mixing in the reference model is ignored or included using a step function (with instantaneous mixing) or an exponential decay for the diffusion coefficient (e.g. @@ -629,9 +640,7 @@ mixing and whether microscopic diffusion is included or not. In the convective zone, all models give a positive difference for the density between the model with a modified temperature gra- dient and the relevant reference model. For the other quantities -(S , c2 - -s ), the differences in the convective zone are very sensitive +(S , c2s ), the differences in the convective zone are very sensitive Fig. 3. Difference of various structural quantities between a model with a modified temperature gradient in the overshoot- @@ -685,9 +694,7 @@ this problem, as mentioned in Sect. 1. However, the details of the physical process responsible for this local heating have been lacking, whereas we can now suggest an explanation based on the B21 results. The trends that we find for the four structural -quantities (A, S , c2 - -s , ρ) are robust below the convective bound- +quantities (A, S , c2s , ρ) are robust below the convective bound- ary and in a large fraction of the radiative core, independently of the treatment of mixing and diffusion and of the method for con- structing the models in Sects. 3.2.1 and 3.2.2. Our experiments diff --git a/read/results/tika/2201.00201.txt b/read/results/tika/2201.00201.txt index fc0a70e..3da09c0 100644 --- a/read/results/tika/2201.00201.txt +++ b/read/results/tika/2201.00201.txt @@ -18,6 +18,27 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/read/results/tika/2201.00214.txt b/read/results/tika/2201.00214.txt index 759766b..95a891e 100644 --- a/read/results/tika/2201.00214.txt +++ b/read/results/tika/2201.00214.txt @@ -18,6 +18,24 @@ + + + + + + + + + + + + + + + + + + @@ -79,17 +97,10 @@ Temperature Analysis of Flaring Coronal Loops -N. Fathalian -1, S. S. Hosseini Rad - -2, N. Alipour -2, H. Safari - -2 - +N. Fathalian1, S. S. Hosseini Rad2, N. Alipour2, H. Safari2 1Department of Physics, Payame Noor University (PNU), 19395-3697, Tehran, Iran. -2Department of Physics, Faculty of Science, University of Zanjan, 45195-313, Zanjan, Iran. +2Department of Physics, Faculty of Science, University of Zanjan, 45195-313, Zanjan, Iran. e-mail: narges_fathalian@alum.sharif.edu January 4, 2022 @@ -293,7 +304,6 @@ has ten different wavelength channels, three in white light and UV, and the othe channels. Between these seven, the 304 filter, which is mostly sensitive to chromospheric temper- atures (in order of T = 104.7K), not the corona, is not taken into account (Aschwanden et al. 2015). - Therefore, we consider the images of the events in the six wavelengths (94, 131, 171, 193, 211, 335 ). These are covering the coronal temperature range from T ≈ 0.6 to T ≥ 16MK. @@ -431,9 +441,7 @@ dT [log (T)− log (Tp,i) -2σ2 -T,i - +2σ2T,i ). (1) In which, Tp,i is the DEM peak temperature, EMp,i is the peak EM function, and σT,i is the @@ -454,7 +462,6 @@ k EM(Tk)Rλ(Tk). (2) Here, Rλ(T) is the instrumental temperature response function of each wavelength filter λ, which - is obtained by the code aia_get_response.pro in the SSW package. As time has passed, the AIA response functions calibration has partly changed. Here, we use the updated calibration of the @@ -465,8 +472,7 @@ DEM to the background-subtracted observed fluxes in multiple wavelengths, the th rameters, temperature width (σT,i), peak of temperature (Tp,i), and peak emission measure (EMp,i) -are found by minimizing χ2 -i . +are found by minimizing χ2i . Our data sample is uneven because of omitting some damaged images in between. There- fore to analyze the temperature oscillations, we use the Lomb-Scargle method. This method is @@ -742,7 +748,6 @@ with the non-flaring ones. And figure 9 shows that the increasing and decreasing range, or the difference between maximum and minimum of the temperature value (max(log(T))- min(log(T))), is much higher on average for the loops’ strips of the flaring AR in comparison with - the loops’ strips of the non-flaring one. V. Summery @@ -849,7 +854,6 @@ region are also hot loops with the mean temperature above this range. They also oscillations. Hence we think the above evidence confirms the slow-mode oscillations for flaring loops. The temperature of the non-flaring loops are lower (log(T) < 6) and as discussed above, - we believe that the observed oscillation-like periods in non-flaring loops should be more probably related to the high amplitude fluctuations. @@ -1002,11 +1006,8 @@ view of the area, marked by a box in the left, the loops are distinguished in re 6.6 6.8 -L -o - -g -T +Lo +gT F−LoopA @@ -1018,11 +1019,8 @@ F−LoopA 6.6 6.8 -L -o - -g -T +Lo +gT 22:10 22:20 22:30 22:40 22:50 23:00 5.8 @@ -1035,11 +1033,8 @@ T time -L -o - -g -T +Lo +gT @@ -1055,11 +1050,8 @@ T 6.8 -L -o - -g -T +Lo +gT F−LoopB1 @@ -1079,11 +1071,8 @@ F−LoopB1 time -L -o - -g -T +Lo +gT Figure 3: From up to down: The time-series of the temperature oscillations for the first 3 strips of Loop A (strip 1 to @@ -1115,10 +1104,10 @@ op Le ng -th -(M +th( +Mm -m) +) 5.8 @@ -1206,10 +1195,10 @@ op Le ng -th -(M +th( +Mm -m) +) 5.8 @@ -1245,10 +1234,10 @@ op Le ng -th -(M +th( +Mm -m) +) 5.6 @@ -1561,13 +1550,13 @@ ali ze d I -nt +nte -en -sit +ns +ity -y F -e X + Fe + X VI II @@ -1603,11 +1592,8 @@ work. 6.8 -L -o - -g -T +Lo +gT NonF−LoopA @@ -1627,11 +1613,8 @@ NonF−LoopA time -L -o - -g -T +Lo +gT @@ -1647,11 +1630,8 @@ T 6.8 -L -o - -g -T +Lo +gT NonF−LoopB @@ -1671,11 +1651,8 @@ NonF−LoopB time -L -o - -g -T +Lo +gT Figure 6: from top to down: The time-series of the temperature for the first 2 strips (from top to down) of the non- @@ -1699,19 +1676,16 @@ Time -L -o - -o -p +Lo +op L en -g -th +gt +h( -(M +M m ) @@ -1744,19 +1718,16 @@ Time -L -o - -o -p +Lo +op L en -g -th +gt +h( -(M +M m ) @@ -1789,19 +1760,16 @@ Time -L -o - -o -p +Lo +op L en -g -th +gt +h( -(M +M m ) @@ -1851,27 +1819,22 @@ P er ce -n - -ta -g +nt +ag e -o -f -T +of + T em -p +p. -. P + P er io -d - -s +ds Figure 8: Hisogram of the temperature periods percentages for the loops’ strips of the flaring (blue bars) and non- @@ -1895,12 +1858,10 @@ flaring (red bars) ARs. The horizontal axis shows the temperature periods in min max(log(T))−min(log(T)) N -u +um -m -b - -er +be +r Figure 9: Hisogram of the parameter of (max(log(T))-min(log(T))) for each strip of the loops of the flaring (blue bars) diff --git a/read/results/tika/GeoTopo-book.txt b/read/results/tika/GeoTopo-book.txt index ba3c3e7..4f88346 100644 --- a/read/results/tika/GeoTopo-book.txt +++ b/read/results/tika/GeoTopo-book.txt @@ -21,6 +21,21 @@ + + + + + + + + + + + + + + + @@ -175,7 +190,6 @@ Ein topologischer Raum ist ein Paar (X,T) bestehend aus einer Menge X und T ⊆ mit folgenden Eigenschaften (i) ∅, X ∈ T - (ii) Sind U1, U2 ∈ T, so ist U1 ∩ U2 ∈ T (iii) Ist I eine Menge und Ui ∈ T für jedes i ∈ I, so ist @@ -228,7 +242,6 @@ Rn \ U = V (f1, . . . , fr)} Definition 2 Sei (X,T) ein topologischer Raum und x ∈ X. - Eine Teilmenge U ⊆ X heißt Umgebung von x, wenn es ein U0 ∈ T gibt mit x ∈ U0 und U0 ⊆ U . @@ -294,7 +307,6 @@ Dann ist S = { ∅, { 0, 1 } , { 0, 2 } } eine Subbasis von T, da gilt: 5 1.1. TOPOLOGISCHE RÄUME • S ⊆ T - • ∅, { 0, 1 } und { 0, 2 } ∈ S • { 0 } = { 0, 1 } ∩ { 0, 2 } • X = { 0, 1 } ∪ { 0, 2 } @@ -309,7 +321,6 @@ Subbasis ist. Definition 5 Sei (X,T) ein topologischer Raum und Y ⊆ X. TY := { U ∩ Y | U ∈ T } ist eine Topologie auf Y . - TY heißt Teilraumtopologie und (Y,TY ) heißt ein Teilraum von (X,T). Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt. @@ -318,7 +329,6 @@ Definition 6 Seien X1, X2 topologische Räume. U ⊆ X1 × X2 sei offen, wenn es zu jedem x = (x1, x2) ∈ U Umgebungen Ui um xi mit i = 1, 2 gibt, sodass U1 × U2 ⊆ U gilt. - T = { U ⊆ X1 ×X2 | U offen } ist eine Topologie auf X1×X2. Sie heißt Produkttopologie. B = { U1 × U2 | Ui offen in Xi, i = 1, 2 } ist eine Basis von T. @@ -344,7 +354,6 @@ Beispiel 4 (Produkttopologien) R2 überein. 2) X1 = X2 = R mit Zariski-Topologie. T Produkttopologie auf R2: U1 × U2 - (Siehe Abbildung 1.2) @@ -371,9 +380,7 @@ TX := { U ⊆ X -∣∣ π−1(U) ∈ TX -} - +∣∣ π−1(U) ∈ TX } (X,TX) heißt Quotiententopologie. Beispiel 5 @@ -387,17 +394,17 @@ a U -aπ−1(u) +aπ +−1(u) 0 ∼ 1, d. h. [0] = [1] - Beispiel 6 + Sei X = R2 und (x1, y1) ∼ (x2, y2)⇔ x1− x2 ∈ Z und y1− y2 ∈ Z. Dann ist X/∼ ein Torus. Beispiel 7 (Projektiver Raum) X = Rn+1 \ { 0 } , x ∼ y ⇔ ∃λ ∈ R× mit y = λx - ⇔ x und y liegen auf der gleichen Ursprungsgerade @@ -422,10 +429,7 @@ Also für n = 1: 1.2 Metrische Räume Definition 8 -Sei X eine Menge. Eine Abbildung d : X ×X → R+ - -0 heißt Metrik, wenn gilt: - +Sei X eine Menge. Eine Abbildung d : X ×X → R+0 heißt Metrik, wenn gilt: (i) Definitheit: d(x, y) = 0⇔ x = y ∀x, y ∈ X (ii) Symmetrie: d(x, y) = d(y, x) ∀x, y ∈ X (iii) Dreiecksungleichung: d(x, z) ≤ d(x, y) + d(y, z) ∀x, y, z ∈ X @@ -437,8 +441,8 @@ Sei (X, d) ein metrischer Raum und Br(x) := { y ∈ X | d(x, y) < r } für x ∈ X, r ∈ R+ B = {Br(x) ⊆ P(X) | x ∈ X, r ∈ R+ } ist Basis einer Topologie auf X. - Definition 9 + Seien (X, dX) und (Y, dY ) metrische Räume und ϕ : X → Y eine Abbildung mit ∀x1, x2 ∈ X : dX(x1, x2) = dY (ϕ(x1), ϕ(x2)) @@ -459,7 +463,6 @@ d(x, y) = { 0 falls x = y - 1 falls x 6= y die diskrete Metrik. Die Metrik d induziert die diskrete Topologie. @@ -470,7 +473,6 @@ die diskrete Metrik. Die Metrik d induziert die diskrete Topologie. Beispiel 10 X = R2 und d ((x1, y1), (x2, y2)) := max(‖x1 − x2‖, ‖y1 − y2‖) ist Metrik. - Beobachtung: d erzeugt die euklidische Topologie. Br(0) = @@ -522,8 +524,8 @@ Seien X,X1, X2 Hausdorff-Räume. a) Jeder Teilraum von X ist hausdorffsch. b) X1 ×X2 ist hausdorffsch (vgl. Abbildung 1.4). - Definition 11 + Sei X ein topologischer Raum und (x)n∈N eine Folge in X. x ∈ X heißt Grenzwert oder Limes von (xn), wenn es für jede Umgebung U von x ein n0 gibt, sodass xn ∈ U für alle n ≥ n0. @@ -535,7 +537,6 @@ Beweis: Sei (xn) eine konvergierende Folge und x und y Grenzwerte der Folge. Da X hausdorffsch ist, gibt es Umgebungen Ux von x und Uy von y mit Ux ∩ Uy = ∅ falls x 6= y. Da (xn) gegen x und y konvergiert, existiert ein n0 mit xn ∈ Ux ∩ Uy für alle n ≥ n0 - ⇒ x = y � 1Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt. @@ -569,7 +570,6 @@ Y → X gibt, sodass g ◦ f = idX und f ◦ g = idY . Bemerkung 72 Seien X,Y metrische Räume und f : X → Y eine Abbildung. - Dann gilt: f ist stetig ⇔ zu jedem x ∈ X und jedem ε > 0 gibt es δ(x, ε) > 0, sodass für alle y ∈ X mit d(x, y) < δ gilt dY (f(x), f(y)) < ε. @@ -580,7 +580,6 @@ Def. 12.a ⇒ ∃δ > 0, sodass Bδ(x) ⊆ f−1(U) ⇒ f(Bδ(x)) ⊆ U ⇒ { y ∈ X | dX(x, y) < δ } ⇒ Beh. - „⇐“: Sei U ⊆ Y offen, X ∈ f−1(U). Dann gibt es ε > 0, sodass Bε(f(x)) ⊆ U Vor. @@ -589,7 +588,6 @@ Vor. Bemerkung 8 Seien X,Y topologische Räume und f : X → Y eine Abbildung. Dann gilt: - f ist stetig ⇔ für jede abgeschlossene Teilmenge A ⊆ Y gilt : f−1(A) ⊆ X ist abgeschlossen. @@ -623,7 +621,6 @@ Die Umkehrabbildung g ist nicht stetig, da g−1(U) nicht offen ist (vgl. Abbild Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig) Seien X,Y, Z topologische Räume, f : X → Y und g : Y → Z stetige Abbildungen. - Dann ist g ◦ f : X → Z stetig. X @@ -659,14 +656,11 @@ Seien X,Y topologische Räume. πX : X × Y → X und πY : X × Y → Y die Pro Wird X × Y mit der Produkttopologie versehen, so sind πX und πY stetig. Beweis: Sei U ⊆ X offen -⇒ π−1 - -X (U) = U × Y ist offen in X × Y . � +⇒ π−1X (U) = U × Y ist offen in X × Y . � Bemerkung 12 Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ der Bahnenraum versehen mit der Quotiententopologie, π : X → X, x 7→ [x]∼. - Dann ist π stetig. @@ -684,9 +678,7 @@ Sn = { x ∈ Rn+1 -∣∣ ‖x‖ = 1 -} - +∣∣ ‖x‖ = 1 } = { @@ -696,8 +688,7 @@ x ∈ Rn+1 n+1∑ i=1 -x2 -i = 1 +x2i = 1 } @@ -710,7 +701,6 @@ O. B. d. A. sei N = 1 . Die Gerade durch N und P schneidet die Ebene H in genau - einem Punkt P̂ . P wird auf P̂ abgebildet. f :Sn \ {N } → Rn @@ -722,28 +712,21 @@ LP ∩H wobei Rn = H =  - x1 - -... + x1... xn+1  ∈ Rn+1 - ∣∣∣∣∣∣∣ xn+1 = 0  und LP die Gerade in Rn+1 durch N - und P ist. Sei P = - x1 -... - + x1... xn+1 , so ist xn+1 < 1, also ist LP nicht parallel zu H. Also schneiden sich LP - und H in genau einem Punkt P̂ . Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig. @@ -780,27 +763,21 @@ Abbildung 1.6: Visualisierung der stereographischen Projektion Bemerkung 13 X ist zusammenhängend ⇔ Es gibt keine abgeschlossenen, nichtleeren Teilmengen A1, A2 - mit A1 ∩A2 = ∅ und A1 ∪A2 = X. Beispiel 15 (Zusammenhang von Räumen) 1) (Rn,TEuklid) ist zusammenhängend, denn: Annahme: Rn = U1 ∪̇ U2 mit ∅ 6= U1, U2 ∈ TEuklid existieren. - Sei x ∈ U1, y ∈ U2 und [x, y] die Strecke zwischen x und y. Sei V = [x, y]. Nun betrachten wir V ( Rn als (metrischen) Teilraum mit der Teilraumtopologie TV . Somit gilt U1 ∩ [x, y] ∈ TV wegen der Definition der Teilraumtopologie. - Dann gibt es z ∈ [x, y] mit z ∈ ∂(U1 ∩ [x, y]), aber z /∈ U1 ⇒ z ∈ U2. In jeder Umgebung von z liegt ein Punkt von U1 ⇒ Widerspruch zu U2 offen. 2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R<0 ∪ R>0 - 3) R2 \ { 0 } ist zusammenhängend. - 4) Q ( R ist nicht zusammenhängend, da (Q ∩ R<√2) ∪ (Q ∩ R>√2) = Q - 5) { x } ist zusammenhängend für jedes x ∈ X, wobei X ein topologischer Raum ist. 6) R mit Zariski-Topologie ist zusammenhängend. @@ -826,9 +803,7 @@ disjunkt Wäre A ∩A1 = ∅ ⇒ A ⊆ A = A1 ∪̇A2 - ⇒ A ⊆ A2 ⇒ A ⊆ A2 - ⇒ A1 = ∅ ⇒ Widerspruch zu A1 6= ∅ ⇒ A ∩A1 6= ∅ und analog A ∩A2 6= ∅ @@ -836,7 +811,6 @@ Wäre A ∩A1 = ∅ Bemerkung 15 Sei X ein topologischer Raum und A,B ⊆ X zusammenhängend. - Ist A ∩B 6= ∅, dann ist A ∪B zusammenhängend. Beweis: Sei A ∪B = U1 ∪̇ U2, Ui 6= ∅ offen @@ -900,11 +874,9 @@ ist unerlaubte Zerlegung. b) Nach Bemerkung 14 ist Z(x) zusammenhängend ⇒ Z(x) ⊆ Z(x) ⇒ Z(x) = Z(x) -c) Ist Z(y) ∩ Z(x) 6= ∅ Bem. 15 -=====⇒ Z(y) ∪ Z(x) ist zusammenhängend. +c) Ist Z(y) ∩ Z(x) 6= ∅ Bem. 15=====⇒ Z(y) ∪ Z(x) ist zusammenhängend. ⇒ Z(x) ∪ Z(y) ⊆ Z(x)⇒ Z(y) ⊆ Z(x) - ⊆ Z(y)⇒ Z(x) ⊆ Z(y) � @@ -914,8 +886,8 @@ Sei f : X → Y stetig. Ist A ⊆ X zusammenhängend, so ist f(A) ⊆ Y zusammen Beweis: Sei f(A) = U1 ∪ U2, Ui 6= ∅, offen, disjunkt. ⇒ f−1(f(A)) = f−1(U1) ∪ f−1(U2) - ⇒ A = (A ∩ f−1(U1))︸ ︷︷ ︸ + 6=∅ ∪ (A ∩ f−1(U2))︸ ︷︷ ︸ @@ -967,23 +939,23 @@ Dann gibt es n0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n ≥ n0 : | In ⊆ (x− ε, x+ ε) ⊆ Ui für mindestens ein n ∈ N.4 ⇒ Widerspruch - Dann überdecke [0, 1] mit endlich vielen Intervallen I1, . . . , Id der Länge δ. Jedes Ij ist in Uij enthalten. ⇒ Uj1 , . . . , Ujd ist endliche Teilüberdeckung von U . � - Beispiel 16 (Kompakte Räume) + 1) R ist nicht kompakt. 2) (0, 1) ist nicht kompakt. -Un = (1/n, 1− 1/n)⇒ ⋃ +Un = (1/n, 1− 1/n)⇒ +⋃ n∈N Un = (0, 1) 3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch. - Bemerkung 19 + Sei X kompakter Raum, A ⊆ X abgeschlossen. Dann ist A kompakt. Beweis: Sei (Vi)i∈I offene Überdeckung von A. @@ -996,7 +968,6 @@ i∈I Ui ⇒ U = { Ui | i ∈ I } ∪ {X \A } ist offene Überdeckung von X - X kompakt =======⇒ es gibt i1, . . . , in ∈ I, sodass @@ -1013,8 +984,8 @@ j=1 Uij ∪ (X \A)  ∩A = A - ⇒ + n⋃ j=1 @@ -1075,27 +1046,14 @@ i=1 Ux0,yi . Da X kompakt ist, gibt es x1, . . . , xn ∈ X mit ⋃n j=1 Uxj = X -⇒ ⋃k -j=1 - -⋃m(xj) -i=1 - -( -Uxj ,yi × Vxj ,yi - -)︸ ︷︷ ︸ +⇒ ⋃kj=1⋃m(xj)i=1 (Uxj ,yi × Vxj ,yi)︸ ︷︷ ︸ Ein grün-oranges Kästchen ⊇ X × Y -⇒ ⋃ -j - -⋃ -iWi(xj , yi) = X × Y � - +⇒ ⋃j ⋃iWi(xj , yi) = X × Y � Bemerkung 21 + Sei X ein Hausdorffraum und K ⊆ X kompakt. Dann ist K abgeschlossen. Beweis: z. Z.: Komplement ist offen @@ -1116,7 +1074,6 @@ Da K kompakt ist, gibt es endlich viele x1, . . . , xn ∈ K, sodass i=1 Uxi ⊇ K. Sei V := - n⋂ i=1 @@ -1153,7 +1110,6 @@ f stetig Kompakt =====⇒ es gibt i1, . . . , in, sodass f−1(Vi1), . . . , f−1(Vin) Überdeckung von K ist. ⇒ f(f−1(Vi1)), . . . , f(f−1(Vin)) überdecken f(K). - Es gilt: f(f−1(V )) = V ∩ f(X) � Satz 1.1 (Heine-Borel) @@ -1161,7 +1117,6 @@ Eine Teilmenge von Rn oder Cn ist genau dann kompakt, wenn sie beschränkt und abgeschlossen ist. Beweis: „⇒“: Sei K ⊆ Rn (oder Cn) kompakt. - Da Rn und Cn hausdorffsch sind, ist K nach Bemerkung 21 abgeschlossen. Nach Vorausset- zung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ K ist beschränkt. @@ -1183,9 +1138,7 @@ kompakt. Genauso ist Z kompakt, weil homöomorph zu { (x, y) ∈ R2 -∣∣ ‖(x, y)‖ ≤ 1 -} - +∣∣ ‖(x, y)‖ ≤ 1 } ist. � 1.6 Wege und Knoten @@ -1198,7 +1151,6 @@ Sei X ein topologischer Raum. 19 1.6. WEGE UND KNOTEN a) Ein Weg in X ist eine stetige Abbildung γ : [0, 1]→ X. - b) γ heißt geschlossen, wenn γ(1) = γ(0) gilt. c) γ heißt einfach, wenn γ|[0,1) injektiv ist. @@ -1218,7 +1170,6 @@ Bemerkung 23 Sei X ein topologischer Raum. a) X ist wegzusammenhängend ⇒ X ist zusammenhängend - b) X ist wegzusammenhängend 6⇐ X ist zusammenhängend Beweis: @@ -1241,12 +1192,7 @@ b) Sei X = { (x, y) ∈ R2 -∣∣∣ x2 + y2 = 1 ∨ y = 1 + 2 · e− 1 - -10 -x -} -. +∣∣∣ x2 + y2 = 1 ∨ y = 1 + 2 · e− 110x }. Abbildung 1.8a veranschaulicht diesen Raum. @@ -1256,7 +1202,6 @@ weil C und S zusammenhängend sind. Also ist C = U1 und S = U2 (oder umgekehrt). Sei y ∈ C = U1, ε > 0 und Bε(y) ⊆ U1 eine Umgebung von y, die in U1 enthalten ist. - Aber: Bε(y) ∩ S 6= ∅ ⇒ Widerspruch ⇒ X ∪ S ist zusammenhängend, aber nicht wegzusammenhängend. � @@ -1286,9 +1231,7 @@ X Y -{(x, sin( 1 -x)) ∈ X × Y } - +{(x, sin( 1x)) ∈ X × Y } (−1, 1) ⊆ Y (b) Sinus @@ -1346,18 +1289,15 @@ H(z, 1) = γ2(z) ∀z ∈ S1 und für jedes feste t ∈ [0, 1] ist -Hz : S1 → R3, z 7→ H(z, t) +Hz : S +1 → R3, z 7→ H(z, t) ein Knoten. Die Abbildung H heißt Isotopie zwischen γ1 und γ2. Definition 22 Sei γ : [0, 1]→ R3 ein Knoten, E eine Ebene und π : R3 → E eine Projektion auf E. - -π heißt Knotendiagramm von γ, wenn gilt:∣∣π−1(x) -∣∣ ≤ 2 ∀x ∈ π(γ) - -Ist (π|γ([0,1])) -−1(x) = { y1, y2 }, so liegt y1 über y2, wenn gilt: +π heißt Knotendiagramm von γ, wenn gilt:∣∣π−1(x)∣∣ ≤ 2 ∀x ∈ π(γ) +Ist (π|γ([0,1]))−1(x) = { y1, y2 }, so liegt y1 über y2, wenn gilt: ∃λ > 1 : (y1 − x) = λ(y2 − x) @@ -1395,7 +1335,6 @@ Abbildung 1.13: Ein 3-gefärber Kleeblattknoten Aufgabe 1 (Sierpińskiraum) Es sei X := { 0, 1 } und TX := { ∅, { 0 } , X }. Dies ist der sogenannte Sierpińskiraum. - (a) Beweisen Sie, dass (X,TX) ein topologischer Raum ist. (b) Ist (X,TX) hausdorffsch? @@ -1493,12 +1432,11 @@ beliebig viele Elemente haben. Bemerkung 25 a) Es gibt surjektive, stetige Abbildungen [0, 1]→ [0, 1]× [0, 1] - b) Für n 6= m sind Rn und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz + von der Gebietstreue“ (Brouwer): Ist U ⊆ Rn offen und f : U → Rn stetig und injektiv, so ist f(U) offen. - Ist n < m und Rm homöomorph zu Rn, so wäre f : Rn → Rm → Rn, (x1, . . . , xn) 7→ (x1, x2, . . . , xn, 0, . . . , 0) @@ -1532,7 +1470,6 @@ Ui → Rn (x0 : · · · : xn) 7→ ( x0 - xi , . . . , @@ -1553,40 +1490,26 @@ ist bijektiv. Die Ui mit i = 0, . . . , n bilden einen n-dimensionalen Atlas: x = (1 : 0 : 0) ∈ U0 → R2 x 7→ (0, 0) - y = (0 : 1 : 1) ∈ U2 → R2 y 7→ (0, 1) Umgebung: B1(0, 1)→ { (1 : u : v) | ‖(u, v)‖ < 1 } = V1 - Umgebung: B1(0, 1)→ -{ +{ (w : z : 1) -∣∣ w2 + z2 < 1 - -} -= V2 +∣∣ w2 + z2 < 1 } = V2 V1 ∩ V2 = ∅? (a : b : c) ∈ V1 ∩ V2 - -⇒ a 6= 0 und ( ba)2 + ( ca)2 < 1⇒ c -a < 1 - -⇒ c 6= 0 und (ac )2 + ( bc) -2 < 1⇒ a - -c < 1 +⇒ a 6= 0 und ( ba)2 + ( ca)2 < 1⇒ ca < 1 +⇒ c 6= 0 und (ac )2 + ( bc)2 < 1⇒ ac < 1 ⇒ Widerspruch 4) Sn = { x ∈ Rn+1 -∣∣ ‖x‖ = 1 -} -ist n-dimensionale Mannigfaltigkeit. - +∣∣ ‖x‖ = 1 } ist n-dimensionale Mannigfaltigkeit. Karten: Di := {(x1, . . . , xn+1) ∈ Sn|xi > 0} → B1(0, . . . , 0︸ ︷︷ ︸ @@ -1596,22 +1519,10 @@ Di := {(x1, . . . , xn+1) ∈ Sn|xi > 0} → B1(0, . . . , 0︸ ︷︷ ︸ Ci := {(x1, . . . , xn+1) ∈ Sn|xi < 0} → B1(0, . . . , 0) (x1, . . . , xn+1) 7→ (x1, . . . ,��xi, . . . , xn+1)1 - (x1, . . . , xn) 7→ (x1, . . . , xi−1, -√ - -1−∑n -k=1 x - -2 -k, xi, . . . , xn), oder − √ -1−∑n - -k=1 x -2 -k für Ci +1−∑nk=1 x2k, xi, . . . , xn), oder −√1−∑nk=1 x2k für Ci Sn = ⋃n+1 @@ -1633,10 +1544,7 @@ ist. { (x, y) ∈ R2 -∣∣ x · y = 0 - -} -ist keine Mannigfaltigkeit. +∣∣ x · y = 0 } ist keine Mannigfaltigkeit. Das Problem ist (0, 0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4 Zusammenhangskomponenten. Jeder Rn zerfällt jedoch in höchstens zwei Zusammen- @@ -1646,10 +1554,7 @@ hangskomponenten, wenn man einen Punkt entfernt. { (x, y) ∈ R2 -∣∣ x3 = y2 - -} -ist eine Mannigfaltigkeit. +∣∣ x3 = y2 } ist eine Mannigfaltigkeit. 8) X = (R \ { 0 }) ∪ (01, 02) @@ -1662,10 +1567,10 @@ Insbesondere sind (R \ { 0 }) ∪ { 01 } und (R \ { 0 }) ∪ { 02 } offen und ho zu R. Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 01 - und 02. -9) GLn(R) ist eine Mannigfaltigkeit der Dimension n2, weil offene Teilmengen von Rn2 +9) GLn(R) ist eine Mannigfaltigkeit der Dimension n2, weil offene Teilmengen von Rn +2 eine Mannigfaltigkeit bilden. @@ -1673,7 +1578,6 @@ Definition 25 Seien X,Y n-dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y offen, Φ : U → V ein Ho- möomorphismus Z = (X ∪̇Y )/∼ mit der von u ∼ Φ(u) ∀u ∈ U erzeugten Äquivalenzrelation und der von ∼ induzierten Quotiententopologie. - Z heißtVerklebung vonX und Y längs U und V . Z besitzt einen Atlas aus n-dimensionalen Karten. Falls Z hausdorffsch ist, ist Z eine n-dimensionale Mannigfaltigkeit. @@ -1719,14 +1623,9 @@ b) Ist grad(F )(X) 6= 0 ∀x ∈ X, so ist X eine Mannigfaltigkeit der Dimension Beweis: a) Sei y ∈ Rn \ V (F ). Weil F stetig ist, gibt es δ > 0, sodass F (Bδ(y)) ⊆ Bε(F (y)) mit -ε = 1 +ε = 12‖F (y)‖. Folgt Bδ(y) ∩ V (F ) = ∅ ⇒ Rn \ V (F ) ist offen. -2‖F (y)‖. Folgt Bδ(y) ∩ V (F ) = ∅ ⇒ Rn \ V (F ) ist offen. - -b) Sei x ∈ X mit grad(F )(x) 6= 0, also o. B. d. A. ∂F -∂X1 - -(x) 6= 0, x = (x1, . . . , xn), +b) Sei x ∈ X mit grad(F )(x) 6= 0, also o. B. d. A. ∂F∂X1 (x) 6= 0, x = (x1, . . . , xn), x′ := (x2, . . . , xn) ∈ Rn−1. Der Satz von der impliziten Funktion liefert nun: Es gibt Umgebungen U von x′ und differenzierbare Funktionen g : U → R, sodass G : U → Rn, u 7→ (g(u), u) eine stetige Abbildung auf eine offene Umgebung V von x @@ -1736,10 +1635,7 @@ in X ist. Beispiel 22 -1) F : R3 → R, (x, y, z) 7→ x2+y2+z2−1, V (F ) = S2, grad(F ) = (2x, 2y, 2z) -Bem. 27.b -======⇒ - +1) F : R3 → R, (x, y, z) 7→ x2+y2+z2−1, V (F ) = S2, grad(F ) = (2x, 2y, 2z) Bem. 27.b======⇒ Sn ist n-dimensionale Mannigfaltigkeit in Rn+1 2) F : R2 → R, (x, y) 7→ y2−x3 Es gilt: grad(F ) = (−3x2, 2y). Also: grad(0, 0) = (0, 0). @@ -1799,9 +1695,7 @@ x y -a = 1 -3 - +a = 13 a = 1 a = 2 @@ -1859,15 +1753,13 @@ Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt Rand von X. ∂X ist eine Mannigfaltigkeit der Dimension n− 1. - Definition 28 + Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui, ϕi)i∈I Für i, j ∈ I mit Ui ∩ Uj 6= ∅ heißt -ϕij := ϕj ◦ ϕ−1 -i - +ϕij := ϕj ◦ ϕ−1i ϕi(Ui ∩ Uj)→ ϕj(Ui ∩ Uj) Kartenwechsel oder Übergangsfunktion. @@ -1905,20 +1797,16 @@ Definition 30 Sei X eine differenzierbare Mannigfaltigkeit der Klasse Ck (k ∈ N ∪ {∞ }) mit Atlas A = (Ui, ϕi)i∈I . -a) Eine Karte (U,ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ ϕ−1 -i - +a) Eine Karte (U,ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ ϕ−1i und ϕi ◦ ϕ−1 (i ∈ I mit Ui ∩ U 6= ∅) differenzierbar von Klasse Ck sind. b) Die Menge aller mit A verträglichen Karten auf X bildet einen maximalen Atlas der - Klasse Ck. Er heißt Ck-Struktur auf X. Eine C∞-Struktur heißt auch differenzierbare Struktur auf X. Bemerkung 28 Für n ≥ 4 gibt es auf Sn mehrere verschiedene differenzierbare Strukturen, die sogenannten - „exotische Sphären“. Definition 31 @@ -1943,17 +1831,12 @@ Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab. Beweis: Seien (U ′, ϕ′) und (V ′, ψ′) Karten von X bzw. Y um x bzw. f(x) mit f(U ′) ⊆ V ′. ⇒ ψ′ ◦ f ◦ (ϕ′)−1 - = ψ′ ◦ (ψ−1 ◦ ψ) ◦ f ◦ (ϕ−1 ◦ ϕ) ◦ (ϕ′)−1 ist genau dann differenzierbar, wenn ψ ◦ f ◦ ϕ−1 differenzierbar ist. - Beispiel 23 -f : R→ R, x 7→ x3 ist kein Diffeomorphismus, aber Homöomorphismus, da mit g(x) := 3 - -√ -x +f : R→ R, x 7→ x3 ist kein Diffeomorphismus, aber Homöomorphismus, da mit g(x) := 3√x gilt: f ◦ g = idR, g ◦ f = idR Bemerkung 30 @@ -1966,38 +1849,33 @@ eine Untergruppe von Homöo(X). Definition 32 S ⊆ R3 heißt reguläre Fläche :⇔ ∀s ∈ S ∃ Umgebung V (s) ⊆ R3 ∃U ⊆ R2 offen: ∃ differenzierbare Abbildung F : U → V ∩ S: Rg(JF (u)) = 2 ∀u ∈ U . - F heißt (lokale) reguläre Parametrisierung von S. F (u, v) = (x(u, v), y(u, v), z(u, v)) JF (u, v) = -∂x -∂u(p) ∂x +∂x∂u(p) ∂x∂v (p)∂y +∂u(p) -∂v (p) ∂y -∂u(p) ∂y - ∂v (p) + ∂z -∂u(p) ∂z +∂u(p) +∂z ∂v (p)  Beispiel 24 1) Rotationsflächen: Sei r : R→ R>0 eine differenzierbare Funktion. - F : R2 → R3 (u, v) 7→ (r(u) cos(u), r(v) sin(u), v) JF (u, v) = -−r(v) sinu r′(v) cosu -r(v) cosu r′(v) sinu - +−r(v) sinu r′(v) cosur(v) cosu r′(v) sinu 0 1  @@ -2005,9 +1883,7 @@ hat Rang 2 für alle (u, v) ∈ R2. 2) Kugelkoordinaten: F : R2 → R3, (u, v) 7→ (R cos v cosu,R cos v sinu,R sin v) -Es gilt: F (u, v) ∈ S2 - -R, denn +Es gilt: F (u, v) ∈ S2R, denn R2 cos2(v) cos2(u) +R2 cos2(v) sin2(u) +R2 sin2(v) @@ -2042,12 +1918,11 @@ vu 0 1 -2−2 +2−2 −1 -−1 0 - 1 + 2 0.6 @@ -2091,9 +1966,7 @@ Die Jacobi-Matrix JF (u, v) = -−R cos v sinu −R sin v cosu -R cos v cosu −R sin v sinu - +−R cos v sinu −R sin v cosuR cos v cosu −R sin v sinu 0 R cos v  @@ -2107,9 +1980,7 @@ Beweis: S ⊆ R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von regulären Flächen folgt direkt, dass Karten (Ui, Fi) und (Uj ⊆ R2, Fj : R2 → R3) von S mit Ui ∩ Uj 6= ∅ existieren, wobei Fi und Fj nach Definition differenzierbare Abbildungen sind. - -z.Z.: F−1 -j ◦ Fi ist ein Diffeomorphismus. +z.Z.: F−1j ◦ Fi ist ein Diffeomorphismus. Ui Uj @@ -2119,19 +1990,15 @@ s Fi Fj -F−1 -j ◦Fi +F−1j ◦Fi Abbildung 2.5: Reguläre Fläche S zum Beweis von Bemerkung 31 -Idee: Finde differenzierbare Funktion F̃−1 -j in Umgebung W von s, sodass F̃−1 - -j |S∩W = F−1 -j . +Idee: Finde differenzierbare Funktion F̃−1j in Umgebung W von s, sodass F̃ +−1 +j |S∩W = F−1j . Ausführung: Sei u0 ∈ Ui, v0 ∈ Uj mit Fi(u0) = s = Fj(v0). - Da Rg(JFj (v0)) = 2 ist, ist o. B. d. A. det @@ -2164,13 +2031,7 @@ F̃j = -∂x -∂u - -∂x -∂v 0 - -∂y +∂x∂u ∂x∂v 0∂y ∂u ∂y @@ -2189,9 +2050,7 @@ F̃j Analysis II ======⇒ Es gibt Umgebungen W von Fj von F̃j(v0, 0) = Fj(v0) = s, sodass F̃j auf W eine -differenzierbar Inverse F−1 - -j hat. +differenzierbar Inverse F−1j hat. @@ -2200,16 +2059,11 @@ j hat. Weiter gilt: F̃j -−1|W∩S = F−1 - -j |W∩S -⇒ F−1 - -j ◦ Fi|F−1 -i (W∩S) = F−1 +−1|W∩S = F−1j |W∩S -j ◦ Fi|F−1 -i (W∩S) +⇒ F−1j ◦ Fi|F−1i (W∩S) = F +−1 +j ◦ Fi|F−1i (W∩S) ist differenzierbar. @@ -2233,10 +2087,7 @@ Beispiel 25 (Lie-Gruppen) 3) (R×, ·) 4) (R>0, ·) -5) (Rn,+), denn A ·B(i, j) = - -∑n -k=1 aikbkj ist nach allen Variablen differenzierbar +5) (Rn,+), denn A ·B(i, j) = ∑nk=1 aikbkj ist nach allen Variablen differenzierbar (A−1)(i, j) = det(Aij) @@ -2245,12 +2096,7 @@ detA Aij = -ai1 . . . ain -... - -. . . -... - +ai1 . . . ain... . . . ... an1 . . . ann  ∈ R(n−1)×(n−1) @@ -2268,7 +2114,6 @@ Bemerkung 32 Ist G eine Lie-Gruppe und g ∈ G, so ist die Abbildung lg : G→ G - h 7→ g · h ein Diffeomorphismus. @@ -2291,13 +2136,7 @@ b) conv(v0, . . . , vk) := i=0 λivi -∣∣∣ λi ≥ 0, -∑k - -i=0 λi = 1 -} -heißt die konvexe Hülle von - +∣∣∣ λi ≥ 0,∑ki=0 λi = 1 } heißt die konvexe Hülle von v0, . . . , vk. Definition 35 @@ -2308,11 +2147,9 @@ e0, . . . , en. Dann heißt ∆n Standard-Simplex und n die Dimension des Simplex. b) Für Punkte v0, . . . , vk im Rn in allgemeiner Lage heißt ∆(v0, . . . , vk) = conv(v0, . . . , vk) - ein k-Simplex in Rn. c) Ist ∆(v0, . . . , vk) ein k-Simplex und I = { i0, . . . , ir } ⊆ { 0, . . . , k }, so ist si0,...,ir := - conv(vi0 , . . . , vir) ein r-Simplex und heißt Teilsimplex oder Seite von ∆. (a) 0-Simplex ∆0 @@ -2330,7 +2167,6 @@ e0 e1 (b) 1-Simplex ∆1 - 1 2 3 1 @@ -2346,7 +2182,6 @@ e1 e2 (c) 2-Simplex ∆2 - e0 e1 e2 @@ -2363,11 +2198,9 @@ a) Eine endliche Menge K von Simplizes im Rn heißt (endlicher) Simplizialkomple wenn gilt: (i) Für ∆ ∈ K und S ⊆ ∆ Teilsimplex ist S ∈ K. - (ii) Für ∆1,∆2 ∈ K ist ∆1 ∩∆2 leer oder ein Teilsimplex von ∆1 und von ∆2. b) |K| := ⋃∆∈K ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K. - c) Ist d = max { k ∈ N0 | K enthält k-Simplex }, so heißt d die Dimension von K. @@ -2398,7 +2231,6 @@ Seien K,L Simplizialkomplexe. Eine stetige Abbildung f : |K| → |L| heißt simplizial, wenn für jedes ∆ ∈ K gilt: - a) f(∆) ∈ L b) f |∆ : ∆→ f(∆) ist eine affine Abbildung. @@ -2422,7 +2254,6 @@ b2 ϕ 2) Folgende Abbildung ϕ : ∆n → ∆n−1 ist simplizial: - ϕ 3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8) @@ -2535,9 +2366,8 @@ Bemerkung 33 Beweis: ∆n ist die konvexe Hülle von (e0, . . . , en) in Rn+1. Jede (k + 1)-elementige Teilmenge von { e0, . . . , en } definiert ein k-Simplex. -⇒ ak(∆ +⇒ ak(∆n) = -n) = ( n+1 k+1 @@ -2545,15 +2375,7 @@ k+1 ) , k = 0, . . . , n -⇒ χ(∆n) = -∑n - -k=0(−1)k -( -n+1 -k+1 - -) +⇒ χ(∆n) = ∑nk=0(−1)k(n+1k+1) f(x) = (x+ 1)n+1 Binomischer @@ -2569,18 +2391,7 @@ k ) xk -⇒ 0 = -∑n+1 - -k=0 - -( -n+1 -k - -) -(−1)k = χ(∆n)− 1 - +⇒ 0 = ∑n+1k=0 (n+1k )(−1)k = χ(∆n)− 1 ⇒ χ(∆n) = 1 � Definition 39 @@ -2623,9 +2434,7 @@ a) Siehe „Algorithmus von Kruskal“. 39 2.3. SIMPLIZIALKOMPLEX b) χ(Γ) = a0(Γ)− a1(Γ) - = a0(Γ)− (n+ a1(T )) - = a0(T )− a1(T )− n = χ(T )− n = 1− n @@ -2712,9 +2521,7 @@ S2, so gibt es eine Triangulierung T , die sowohl um T1 als auch um T2 Verfeiner ist (vgl. Abbildung 2.13). T1 - T2 - T Abbildung 2.13: T ist eine Triangulierung, die für T1 und T2 eine Verfeinerung ist. @@ -2739,13 +2546,12 @@ Cn(K) = cσ · σ ∣∣∣∣∣∣ cσ ∈ R -  -Sei σ = ∆(x0, . . . , xn) ∈ An(K), sodass x0 < x1 < · · · < xn. +Sei σ = ∆(x0, . . . , xn) ∈ An(K), sodass x0 < x1 < · · · < xn. Für i = 0, . . . , n sei ∂iσ := ∆(x0, . . . , x̂i, . . . , xn) die i-te Seite von σ und dσ = dnσ :=∑ -i=0(−1)i∂iσ ∈ Cn−1(K) und dn : Cn(K) → Cn−1(K) die dadurch definierte lineare +i=0(−1)i∂iσ ∈ Cn−1(K) und dn : Cn(K) → Cn−1(K) die dadurch definierte lineare Abbildung. Dann gilt: dn−1 ◦ dn = 0 @@ -2766,7 +2572,6 @@ Beispiel 29 Sei a < b < c. Dann gilt: d2σ = e1 − e2 + e3 - d1(e1 − e2 + e3) = (c− b)− (c− a) + (b− a) @@ -2779,11 +2584,8 @@ Sei a < b < c < d. Dann gilt für Tetraeder: d3(∆(a, b, c, d)) = ∆(b, c, d)−∆(a, c, d) + ∆(a, b, d)−∆(a, b, c),wobei: d2( ∆(b, c, d)) = ∆(c, d)−∆(b, d) + ∆(b, c) - d2(−∆(a, c, d)) = −∆(c, d) + ∆(a, d)−∆(a, c) - d2( ∆(a, b, d)) = ∆(b, d)−∆(a, d) + ∆(a, b) - d2(−∆(a, b, c)) = −∆(b, c) + ∆(a, c)−∆(a, b) ⇒ d2(d3(∆(a, b, c, d))) = 0 @@ -2819,11 +2621,11 @@ j=0 ∑ 0≤i≤j≤n−1 - (−1)i+j∂j(∂i(σ)) + -∑ +∑ 0≤j 0 } = { (x, y) ∈ R2 -∣∣ y > 0 - -} +∣∣ y > 0 } @@ -5260,19 +4892,16 @@ b) Sei g ∈ G1 ∪̇G2 eine hyperbolische Gerade. Es existieren disjunkte Zerlegungen von H \ g: Fall 1: g = { z ∈ H ‖ z −m| = r } ∈ G1 - Dann gilt: -H = { z ∈ H ‖ z −m| < r }︸ ︷︷ ︸ +H = { z ∈ H ‖ z −m| < r }︸ ︷︷ ︸ =:H1 (Kreisinneres) ∪̇ { z ∈ H ‖ z −m| > r }︸ ︷︷ ︸ =:H2 (Kreisäußeres) Da r > 0 ist H1 nicht leer, da r ∈ R ist H2 nicht leer. - Fall 2: g = { z ∈ H | 0 auffassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g 6= ∅ „⇒“: A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } : AB ∩ g 6= ∅ ⇒ i 6= j - Sei h die Gerade, die durch A und B geht. Da A,B /∈ g, aber A,B ∈ h gilt, haben g und h insbesondere mindestens einen @@ -5344,9 +4971,7 @@ a b c d ) -◦ z := - -az + b +◦ z := az + b cz + d @@ -5396,7 +5021,6 @@ e) PSL2(R) operiert auf G. Beweis: a) Sei z = x+ iy ∈ H, d. h. y > 0 und σ = - ( a b c d @@ -5404,9 +5028,7 @@ c d ) ∈ SL2(R) -⇒ σ(z) = -a(x+ iy) + b - +⇒ σ(z) = a(x+ iy) + b c(x+ iy) + d = @@ -5424,7 +5046,6 @@ c(x+ iy) + d + i ay(cx+ d)− (ax+ b)cy - (cx+ d)2 + (cy)2 = @@ -5458,9 +5079,7 @@ Die Abbildung bildet also nach H ab. Außerdem gilt:( 0 1 ) -◦ z = - -x+ iy +◦ z = x+ iy 1 = x+ iy = z @@ -5555,44 +5174,36 @@ c′ d′ b) Es gilt σ(z) = (−σ)(z) für alle σ ∈ SL2(R) und z ∈ H. c) Ansatz: σ = - ( a b c d ) -σ(x0) = ax0+b +σ(x0) = +ax0+b cx0+d -! +! = 0 ⇒ ax0 + b = 0⇒ b = −ax0 σ(x∞) =∞⇒ cx∞ + d = 0⇒ d = −cx∞ σ(x1) = 1⇒ ax1 + b = cx1 + d -a(x1 − x0) = c(x1 − x∞)⇒ c = a x1−x0 - -x1−x∞ -⇒ −a2 · x∞ x1−x0 +a(x1 − x0) = c(x1 − x∞)⇒ c = a x1−x0x1−x∞ +⇒ −a2 · x∞ x1−x0x1−x∞ + a -x1−x∞ + a2x0 +2x0 x1−x0 x1−x∞ = 1 -⇒ a2 x1−x0 -x0−x∞ (x0 − x∞) = 1 ⇒ a2 = x1−x∞ - -(x1−x∞)(x1−x0) +⇒ a2 x1−x0x0−x∞ (x0 − x∞) = 1 ⇒ a +2 = x1−x∞(x1−x∞)(x1−x0) d) Es gilt: -A−1 -λ = A 1 - -λ +A−1λ = A 1λ -B−1 -t = B−t +B−1t = B−t C−1 = C3 @@ -5692,7 +5303,6 @@ c d− bc ) Da wir detM = 1 = ad− bc = d− bc wissen, gilt sogar M2,2 = 1. - Gehe zu Fall 4. Fall 4: a = 1, b = 0, d = 1 @@ -5716,7 +5326,6 @@ Daher erzeugen Matrizen der Form Aλ, Bt und C die Gruppe SL2R. � e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen. • σ = - ( λ 0 0 λ−1 @@ -5775,8 +5384,8 @@ x Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix • Offensichtlich gilt die Aussage für σ = - ( + 1 a 0 1 @@ -5788,9 +5397,7 @@ Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix −1 0 ) -, also σ(z) = −1 - -z +, also σ(z) = −1z Bemerkung 69 Zu hyperbolischen Geraden g1, g2 gibt es σ ∈ PSL2(R) mit σ(g1) = g2. @@ -5813,8 +5420,9 @@ y z = r · eiϕ 1 -z = 1 +z = +1 r · eiϕ Abbildung 4.23: Inversion am Kreis @@ -5824,7 +5432,6 @@ Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a1) = b1 und σ(a2) = b2. Dan Definition 65 Seien z1, z2, z3, z4 ∈ C paarweise verschieden. - Dann heißt DV(z1, z2, z3, z4) := @@ -5835,29 +5442,23 @@ z3−z2 = (z1 − z4) · (z3 − z2) - (z1 − z2) · (z3 − z4) Doppelverhältnis von z1, . . . , z4. Bemerkung 70 (Eigenschaften des Doppelverhältnisses) a) DV(z1, . . . , z4) ∈ C \ { 0, 1 } -b) DV(z1, z4, z3, z2) = 1 - -DV(z1,z2,z3,z4) +b) DV(z1, z4, z3, z2) = 1DV(z1,z2,z3,z4) -c) DV(z3, z2, z1, z4) = 1 -DV(z1,z2,z3,z4) +c) DV(z3, z2, z1, z4) = 1DV(z1,z2,z3,z4) d) DV ist auch wohldefiniert, wenn eines der zi =∞ oder wenn zwei der zi gleich sind. - e) DV(0, 1,∞, z4) = z4 (Der Fall z4 ∈ { 0, 1,∞} ist zugelassen). f) Für σ ∈ PSL2(C) und z1, . . . , z4 ∈ C ∪ {∞ } ist DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4) -und für σ(z) = 1 -z gilt +und für σ(z) = 1z gilt DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4) @@ -5877,46 +5478,43 @@ Annahme: DV(z1, . . . , z4) = 1 85 4.3. HYPERBOLISCHE GEOMETRIE ⇔ z1z3 − z2z3 − z1z4 + z2z4 = z1z3 − z3z4 − z1z2 + z2z4 - ⇔ z2z3 + z1z4 = z3z4 + z1z2 - ⇔ z2z3 − z3z4 = z1z2 − z1z4 - ⇔ z3(z2 − z4) = z1(z2 − z4) ⇔ z3 = z1 oder z2 = z4 Alle zi sind paarweise verschieden ⇒ Widerspruch � +b) DV(z1, z4, z3, z2) = -b) DV(z1, z4, z3, z2) = (z1−z2)·(z3−z4) -(z1−z4)·(z3−z2) = 1 +(z1−z2)·(z3−z4) +(z1−z4)·(z3−z2) = +1 DV(z1,z2,z3,z4) -c) DV(z3, z2, z1, z4) = (z3−z4)·(z1−z2) -(z3−z2)·(z1−z4) = 1 +c) DV(z3, z2, z1, z4) = +(z3−z4)·(z1−z2) +(z3−z2)·(z1−z4) = +1 DV(z1,z2,z3,z4) d) Zwei der zi dürfen gleich sein, da: Fall 1 z1 = z4 oder z3 = z2 - In diesem Fall ist DV(z1, . . . , z4) = 0 Fall 2 z1 = z2 oder z3 = z4 - Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1, . . . , z4) =∞ gilt. Fall 3 z1 = z3 oder z2 = z4 - Durch Einsetzen ergibt sich DV(z1, . . . , z4) = 1. Im Fall, dass ein zi =∞ ist, ist entweder DV(0, 1,∞, z4) = 0 oder DV(0, 1,∞, z4)±∞ -e) DV(0, 1,∞, z4) = (0−z4)·(∞−1) -(0−1)·(∞−z4) = z4·(∞−1) - +e) DV(0, 1,∞, z4) = (0−z4)·(∞−1)(0−1)·(∞−z4) = +z4·(∞−1) ∞−z4 = z4 f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. @@ -5932,19 +5530,18 @@ Behauptung folgt, weil σ−1(R ∪∞) ein Kreis oder eine Gerade in C ist. Definition 66 Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 die „Schnittpunkte“ von gz1,z2 mit R ∪ {∞ }. -Dann sei dH(z1, z2) := 1 - -2 | ln DV(a1, z1, a2, z2)| und heiße hyperbolische Metrik. +Dann sei dH(z1, z2) := 12 | ln DV(a1, z1, a2, z2)| und heiße hyperbolische Metrik. Beh.: Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 - die „Schnittpunkte“ von gz1,z2 mit R ∪ {∞ }. Dann gilt: 1 2 -| ln DV(a1, z1, a2, z2)| = 1 +| ln DV(a1, z1, a2, z2)| = + +1 2 | ln DV(a2, z1, a1, z2)| @@ -5973,7 +5570,9 @@ Da der ln im Betrag steht, folgt direkt: 1 2 -| ln DV(a1, z1, a2, z2)| = 1 +| ln DV(a1, z1, a2, z2)| = + +1 2 | ln DV(a2, z1, a1, z2)| @@ -5994,7 +5593,6 @@ also gilt o. B. d. A. z1 = ia und z2 = ib mit a, b ∈ R und a < b. 2d(ia, ib) =| ln DV(0, ia,∞, ib) | =| ln (0− ib)(∞− ia) - (0− ia)(∞− ib) | =| ln b @@ -6046,7 +5644,6 @@ Aufgabe 9 Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ X von Punkten ist definiert durch d(P, Y ) := inf d(P, y)|y ∈ Y . - Zeigen Sie: (a) Ist 4ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die @@ -6056,14 +5653,12 @@ Winkel ∠ABC und ∠BCA gleich. gegenüber und umgekehrt. (c) Sind g eine Gerade und P /∈ g ein Punkt, so gibt es eine eindeutige Gerade h mit - P ∈ h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g und der Schnittpunkt des Lots mit g heißt Lotfußpunkt . Aufgabe 10 Seien f, g, h ∈ G und paarweise verschieden. - Zeigen Sie: f ‖ g ∧ g ‖ h⇒ f ‖ h Aufgabe 11 @@ -6112,9 +5707,7 @@ der Beweis im Rn analog. Es muss nur die Ableitung angepasst werden. 1 = ‖γ′(t)‖ = ‖γ′(t)‖2 = 〈γ′(t), γ′(t)〉 -⇒ 0 = -d - +⇒ 0 = d dt 〈γ′(t), γ′(t)〉 @@ -6122,10 +5715,18 @@ dt d dt -(γ′1(t)γ′1(t) + γ′2(t)γ′2(t)) +(γ′1(t)γ -= 2 · (γ′′1 (t) · γ′1(t) + γ′′2 (t) · γ′2(t)) +′ +1(t) + γ + +′ +2(t)γ +′ +2(t)) + += 2 · (γ′′1 (t) · γ′1(t) + γ′′2 (t) · γ′2(t)) = 2 · 〈γ′′(t), γ′(t)〉 Definition 69 @@ -6152,9 +5753,7 @@ Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt: γ(t) = ( -r · cos - -t +r · cos t r , r · sin t @@ -6172,9 +5771,7 @@ ist parametrisiert durch Bogenlänge, da gilt: (r · 1 r -)(− sin - -t +)(− sin t r ), r @@ -6192,9 +5789,7 @@ r = ( -− sin - -t +− sin t r , cos @@ -6209,14 +5804,10 @@ Der Normalenvektor von γ in t ist n(t) = ( -− cos - -t +− cos t r -,− sin - -t +,− sin t r @@ -6224,45 +5815,28 @@ r da gilt: 〈n(t), γ′(t)〉 = - 〈( -− cos t - -r -− sin t - -r +− cos tr +− sin tr ) , ( -− sin t - -r -cos t - -r +− sin tr +cos tr )〉 -= (− cos - -t += (− cos t r -) · (− sin - -t +) · (− sin t r -) + (− sin - -t +) + (− sin t r -) · (cos - -t +) · (cos t r ) @@ -6270,27 +5844,13 @@ r = 0 ‖n(t)‖ = - -∥∥∥∥(− cos -t - -r -,− sin - -t - -r -) +∥∥∥∥(− cos tr ,− sin tr ) ∥∥∥∥ -= (− cos - -t += (− cos t r -)2 + (− sin - -t +)2 + (− sin t r )2 @@ -6299,44 +5859,29 @@ r det(γ′1(t), n(t)) = -∥∥∥∥(− sin t -r − cos t - -r -cos t - -r − sin t -r - +∥∥∥∥(− sin tr − cos trcos tr − sin tr )∥∥∥∥ -= (− sin - -t += (− sin t r -)2 − (− cos - -t +)2 − (− cos t r -) · cos - -t +) · cos t r + = 1 -Die Krümmung ist für jedes t konstant 1 -r , da gilt: +Die Krümmung ist für jedes t konstant 1r , da gilt: γ′′(t) = ( −1 - r -cos +cos t r @@ -6357,21 +5902,15 @@ r r · ( -− cos - -t +− cos t r -,− sin - -t +,− sin t r ) -⇒ κ(t) = - -1 +⇒ κ(t) = 1 r @@ -6384,8 +5923,7 @@ Sei γ : I → R3 eine durch Bogenlänge parametrisierte Kurve. a) Für t ∈ I heißt κ(t) := ‖γ′′(t)‖ die Krümmung von γ in t. -b) Ist für t ∈ I die Ableitung γ′′(t) 6= 0, so heißt γ′′(t) -‖γ′′(t)‖ Normalenvektor an γ in t. +b) Ist für t ∈ I die Ableitung γ′′(t) 6= 0, so heißt γ′′(t)‖γ′′(t)‖ Normalenvektor an γ in t. c) b(t) sei ein Vektor, der γ′(t), n(t) zu einer orientierten Orthonormalbasis von R3 ergänzt. Also gilt: @@ -6415,32 +5953,28 @@ V (f) = { x ∈ R3 -∣∣ f(x) = 0 -} - +∣∣ f(x) = 0 } für eine C∞-Funktion f : R3 → R. - Definition 71 + Sei S ⊆ R3 eine reguläre Fläche, s ∈ S, F : U → V ∩ S eine lokale Parametrisierung um s ∈ V : (u, v) 7→ (x(u, v), y(u, v), z(u, v)) - Für p = F−1(s) ∈ U sei JF (p) = -∂x -∂u(p) ∂x +∂x∂u(p) ∂x∂v (p)∂y +∂u(p) -∂v (p) ∂y -∂u(p) ∂y - ∂v (p) + ∂z -∂u(p) ∂z +∂u(p) +∂z ∂v (p)  @@ -6452,7 +5986,6 @@ Bemerkung 73 (Eigenschaften der Tangentialebene) a) TsS ist 2-dimensionaler Untervektorraum von R3. b) TsS = 〈ũ, ṽ〉, wobei ũ, ṽ die Spaltenvektoren der Jacobi-Matrix JF (p) sind. - c) TsS hängt nicht von der gewählten Parametrisierung ab. @@ -6460,7 +5993,6 @@ c) TsS hängt nicht von der gewählten Parametrisierung ab. 91 5.2. TANGENTIALEBENE d) Sei S = V (f) eine reguläre Fläche in R3, also f : V → R eine C∞-Funktion, V ⊆ R3 - offen, grad(f)(x) 6= 0 für alle x ∈ S. Dann ist TsS = (grad(f)(s))⊥ für jedes s ∈ S. @@ -6480,7 +6012,6 @@ d) Sei x ∈ TsS, γ : [−ε,+ε] → S eine parametrisierte Kurve mit ε > 0 u sodass γ′(0) = x gilt. Da γ(t) ∈ S für alle t ∈ [−ε, ε], ist f ◦ γ = 0 ⇒ 0 = (f ◦ γ)′(0) = 〈grad(f)(γ(0)), γ′(0)〉 ⇒ TsS ⊆ grad(f)(s)⊥ - dim=2 ====⇒ TsS = (grad(f)(s))⊥ @@ -6507,9 +6038,7 @@ s ∈ Vi ∩ Vj ∩ S gilt: det(Ds Vi→Vj︷ ︸︸ ︷ -Fj ◦ F−1 - -i︸ ︷︷ ︸ +Fj ◦ F−1i︸ ︷︷ ︸ ∈R3×3 ) > 0 @@ -6520,8 +6049,8 @@ Beispiel 46 (Normalenfelder) 1) S = S2, n1 = idS2 ist ein stetiges Normalenfeld. Auch n2 = −idS2 ist ein stetiges Normalenfeld. - 2) S = Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Norma- + lenfeld, aber kein stetiges Normalenfeld. @@ -6534,7 +6063,6 @@ Abbildung 5.1: Möbiusband Bemerkung 75 Sei S eine reguläre Fläche, s ∈ S, n(s) ist ein Normalenvektor in s, x ∈ TsS, ‖x‖ = 1. - Sei E der von x und n(s) aufgespannte 2-dimensionale Untervektorraum von R3. Dann gibt es eine Umgebung V ⊆ R3 von s, sodass @@ -6559,10 +6087,10 @@ Beispiel 47 (Gauß-Krümmung) s = (0, 0, 1), x = (1, 0, 0) ⇒ E = R · x+ R · n(s) (x, z-Ebene) - C = E ∩ S ist Kreislinie -κNor(s, x) = 1 +κNor(s, x) = +1 r = 1 2) S = V (X2 + Z2 − 1) ⊆ R3 ist ein Zylinder (siehe Abbildung 5.2a). s = (1, 0, 0) @@ -6582,10 +6110,7 @@ V ∩ E2 ∩ S = { (1, 0, z) ∈ R3 -∣∣ z ∈ R - -} -ist eine Gerade +∣∣ z ∈ R } ist eine Gerade ⇒ κNor(s, x2) = 0 @@ -6595,10 +6120,10 @@ x2 = (0, 1, 0) κNor(s, x1) = 2 κNor(s, x2) = −2 -−1.5 -−1 −0.5 +−1.5 −1 +−0.5 0 -0 0.5 +0.5 1 1.5 −1 @@ -6626,8 +6151,12 @@ y z (a) S = V (X2 + Z2 − 1) +−2 −1.5 −1 + +−0.5 0 +0.5 1 -−2 −1.5 −1 −0.5 0 0.5 1 1.5 2 +1.5 2 −2 @@ -6652,9 +6181,7 @@ y z −4 - −2 - 0 2 @@ -6669,11 +6196,12 @@ Abbildung 5.2: Beispiele für reguläre Flächen Definition 74 Sei S ⊆ R3 eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S. - γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und γ′′(0) 6= 0. -Sei n(0) := γ′′(0) +Sei n(0) := γ +′′(0) + ‖γ′′(0)‖ . Zerlege n(0) = n(0)t + n(0)⊥ mit n(0)t ∈ TsS und n(0)⊥ ∈ (TsS)⊥ @@ -6685,16 +6213,15 @@ Bemerkung 76 Sei γ(t) = γ(−t), t ∈ [−ε, ε]. Dann ist κNor(s, γ) = κNor(s, γ). Beweis: γ′′(0) = γ′′(0), da γ′(0) = −γ′(0). - Es gilt: κNor(s, γ) hängt nur von |γ′(0)| ab und ist gleich κNor(s, γ′(0)). Bemerkung 77 Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s. -Sei T 1 -s S = { x ∈ TsS | ‖x‖ = 1 } ∼= S1. Dann ist +Sei T 1s S = { x ∈ TsS | ‖x‖ = 1 } ∼= S1. Dann ist -κnNor(s) : T 1 +κnNor(s) : T +1 s S → R, x 7→ κNor(s, x) eine glatte Funktion und BildκnNor(s) ist ein abgeschlossenes Intervall. @@ -6710,37 +6237,28 @@ a) κn1 (s) : = min { κnNor(s, x) -∣∣ x ∈ T 1 -s S -} - -und - +∣∣ x ∈ T 1s S } und κn2 (s) : = max + { κnNor(s, x) -∣∣ x ∈ T 1 -s S -} heißen Hauptkrümmungen von S in s. +∣∣ x ∈ T 1s S } +heißen Hauptkrümmungen von S in s. b) K(s) := κn1 (s) · κn2 (s) heißt Gauß-Krümmung von S in s. - Bemerkung 78 -Ersetzt man n durch −n, so gilt: -κ−nNor(s, x) = −κnNor(x) ∀x ∈ T 1 -s S +Ersetzt man n durch −n, so gilt: +κ−nNor(s, x) = −κnNor(x) ∀x ∈ T 1s S ⇒ κ−n1 (s) = −κn2 (s) - κ−n2 (s) = −κn1 (s) und K−n(s) = Kn(s) =: K(s) Beispiel 48 1) S = S2. Dann ist κ1(s) = κ2(s) = ±1 ∀s ∈ S2 - ⇒ K(s) = 1 2) Zylinder: @@ -6811,8 +6329,8 @@ a) Die Einschränkung des Standardskalarproduktes des R3 auf TsS macht TsS zu ei euklidischen Vektorraum. b) {DpF (e1), DpF (e2) } ist eine Basis von TsS. - c) Bzgl. der Basis {DpF (e1), DpF (e2) } hat das Standardskalarprodukt aus Bemer- + kung 80.a die Darstellungsmatrix IS . d) gi,j(s) ist eine differenzierbare Funktion von s. @@ -6821,135 +6339,88 @@ Bemerkung 81 det(IS) = -∥∥∥∥ ∂F∂u1 -(p)× ∂F - -∂u2 -(p) - +∥∥∥∥ ∂F∂u1 (p)× ∂F∂u2 (p) ∥∥∥∥2 -Beweis: Sei ∂F -∂u1 - -(p) = - -x1 - -x2 +Beweis: Sei ∂F∂u1 (p) = +x1x2 x3 - , ∂F -∂u2 - -(p) = - -y1 - -y2 - + , ∂F∂u2 (p) = +y1y2 y3  -Dann ist ∂F - -∂u1 -(p)× ∂F +Dann ist ∂F∂u1 (p)× +∂F ∂u2 -(p) = - -z1 -z2 +(p) = +z1z2 z3  mit - z1 = x2y3 − x3y2 - z2 = x3y1 − x1y3 - z3 = x1y2 − x2y1 ⇒ ‖ ∂F ∂u1 (p)× ∂F - ∂u2 -(p)‖ = z2 - -1 + z2 -2 + z2 -3 +(p)‖ = z21 + z22 + z23 96 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM -det(IS) = g1,1g2,2 − g2 -1,2 +det(IS) = g1,1g2,2 − g21,2 = -〈x1 - -x2 - +〈x1x2 x3  , - -x1 - -x2 - +x1x2 x3 -〉〈y1 - -y2 - +〉〈y1y2 y3  , - -y1 - -y2 - +y1y2 y3 〉−〈 -x1 - -x2 - +x1x2 x3  , - -y1 - -y2 - +y1y2 y3 〉2 -= (x2 -1 + x2 += (x21 + x +2 +2 + x -2 + x2 -3)(y2 +2 +3)(y -1 + y2 -2 + y2 +2 +1 + y +2 +2 + y + +2 3)− (x1y1 + x2y2 + x3y3)2 Definition 77 @@ -6983,8 +6454,8 @@ a) V fdA ist unabhängig von der gewählten Parametrisierung. b) Sei f : S → R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist. - Dann ist + ∫ S fdA wohldefiniert, falls (z. B.) S kompakt ist. @@ -7028,8 +6499,7 @@ Proposition 5.1 Sei S ⊆ R3 eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S → S2. Dann gilt: -a) n induziert für jedes s ∈ S eine lineare Abbildung dsn : TsS → Tn(s)S -2 durch +a) n induziert für jedes s ∈ S eine lineare Abbildung dsn : TsS → Tn(s)S2 durch dsn(x) = d @@ -7049,8 +6519,7 @@ Die Abbildung dsn heißt Weingarten-Abbildung 97 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM -b) Tn(s)S -2 = TsS. +b) Tn(s)S2 = TsS. c) dsn ist ein Endomorphismus von TsS. @@ -7066,38 +6535,29 @@ Beweis: a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. -b) Tn(S)S -2 = 〈n(s)〉⊥ = TsS - +b) Tn(S)S2 = 〈n(s)〉⊥ = TsS c) Wegen Proposition 5.1 (a) ist dsn ein Homomorphismus. d) Zu zeigen: ∀x, y ∈ IsS : 〈x, dsn(y)〉 = 〈dsn(x), y〉 Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die Basisvektoren zu zeigen. -Sei xi = DpF (ei) = ∂F -∂ui +Sei xi = DpF (ei) = ∂F∂ui (p) i = 1, 2 -(p) i = 1, 2 +Beh.: 〈xi, dsn(xj)〉 = 〈 ∂ +2F -Beh.: 〈xi, dsn(xj)〉 = 〈 ∂2F ∂ui∂uj - (p), dsn(xi)〉 -⇒ 〈 ∂2F -∂ui∂uj - -(p), dsn(xi)〉 = 〈xj , dsn(xi)〉 +⇒ 〈 ∂2F∂ui∂uj (p), dsn(xi)〉 = 〈xj , dsn(xi)〉 Bew.: 0 = 〈∂F ∂u (p+ tej), n(p+ tej)〉 -⇒ 0 = -d - +⇒ 0 = d dt ( @@ -7110,7 +6570,6 @@ dt t=0 = 〈 d - dt ∂F @@ -7136,13 +6595,15 @@ Die durch −dsn definierte symmetrische Bilinearform auf TsS heißt zweite Fund form von S in s bzgl. F . Man schreibt: IIs(x, y) = 〈−dsn(x), y〉 = Is(−dsn(x), y) - Bemerkung 83 + Bezüglich der Basis { x1, x2 } von TsS hat IIs die Darstellungsmatrix (h (s) -i,j )i,j=1,2 mit hi,j(s) = 〈 ∂2F +i,j )i,j=1,2 mit hi,j(s) = 〈 + +∂2F ∂ui∂uj (p), n(s)〉 @@ -7186,7 +6647,6 @@ t=0 99 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM = 〈dsn(γ′(0)), γ′(0)〉+ κNor(s, γ) - = −IIs(γ′(0), γ′(0)) + κNor(s, γ) Folgerung 5.3 @@ -7206,17 +6666,18 @@ Beweis: a) IIs ist symmetrisch, IsS hat also eine Orthonormalbasis aus Eigenvektoren y1, y2 von IIs. Ist x ∈ TsS, ‖x‖ = 1, so gibt es ϕ ∈ [0, 2π) mit x = cosϕ · y1 + sinϕ · y2. - Seien λ1, λ2 die Eigenwerte von IIs, also IIs(yi, yi) = λi. Dann gilt: -IIs(x, x) = cos2 ϕλ1 + sin2 ϕλ2 +IIs(x, x) = cos +2 ϕλ1 + sin -= (1− sin2 ϕ)λ1 + sin2 ϕλ2 +2 ϕλ2 -= λ1 + sin2 ϕ(λ2 − λ1) ≥ λ1 += (1− sin2 ϕ)λ1 + sin2 ϕλ2 += λ1 + sin +2 ϕ(λ2 − λ1) ≥ λ1 = cos2 ϕ+ (1− cos2 ϕ)λ2 - = λ2 − cos2 ϕ(λ2 − λ1) ≤ λ2 Prop. 5.2 @@ -7225,17 +6686,13 @@ Prop. 5.2 { κNor(s, x) -∣∣ x ∈ T 1 -s S -} - +∣∣ x ∈ T 1s S } λ2 = max + { κNor(s, x) -∣∣ x ∈ T 1 -s S -} +∣∣ x ∈ T 1s S } Satz 5.5 (Satz von Gauß-Bonnet) Sei S ⊆ R3 eine kompakte orientierbare reguläre Fläche. Dann gilt:∫ @@ -7257,13 +6714,12 @@ Lösung zu Aufgabe 1 Teilaufgabe a) Es gilt: (i) ∅, X ∈ TX . - (ii) TX ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U1, U2 ∈ -TX : U1 ∩ U2 ∈ TX . +TX : U1 ∩ U2 ∈ TX . (iii) Auch unter beliebigen Vereinigungen ist TX abgeschlossen, d. h. es gilt für eine -beliebige Indexmenge I und alle Ui ∈ TX für alle i ∈ I : +beliebige Indexmenge I und alle Ui ∈ TX für alle i ∈ I : ⋃ i∈I Ui ∈ TX @@ -7283,7 +6739,6 @@ Teilaufgabe a) Beh.: ∀a ∈ Z : { a } ist abgeschlossen. Sei a ∈ Z beliebig. Dann gilt: - Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de schicken. @@ -7293,11 +6748,7 @@ Beh.: { −1, 1 } ist nicht offen Bew.: durch Widerspruch Annahme: { −1, 1 } ist offen. -Dann gibt es T ⊆ B, sodass - -⋃ -M∈T M = { −1, 1 }. Aber alle U ∈ B haben unendlich viele - +Dann gibt es T ⊆ B, sodass ⋃M∈T M = { −1, 1 }. Aber alle U ∈ B haben unendlich viele Elemente. Auch endlich viele Schnitte von Elementen in B haben unendlich viele Elemente ⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒ {−1, 1 } ist nicht offen. � @@ -7313,11 +6764,9 @@ Beh.: Es gibt unendlich viele Primzahlen. Bew.: durch Widerspruch Annahme: Es gibt nur endlich viele Primzahlen p ∈ P - Dann ist -Z \ { −1,+1 } FS d. Arithmetik -= +Z \ { −1,+1 } FS d. Arithmetik= ⋃ p∈P @@ -7348,7 +6797,6 @@ i∈N\J Pi wobei J ⊆ N endlich und Uj ⊆ Pj offen ∀j ∈ J eine Basis der Topologie. - Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen Form. � @@ -7383,14 +6831,7 @@ Bew.: Für SL1(R) gilt: SL1(R) = { A ∈ R1×1 -∣∣ detA = 1 -} - -= -( -1 -) ∼= { 1 }. 22⇒ SL1(R) - +∣∣ detA = 1 } = (1) ∼= { 1 }. 22⇒ SL1(R) ist kompakt. @@ -7424,11 +6865,9 @@ Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden. Definition 79 Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G→ H eine Abbildung. - ϕ heißt Homomorphismus, wenn ∀g1, g2 ∈ G : ϕ(g1 ∗ g2) = ϕ(g1) ◦ ϕ(g2) - gilt. Es folgt direkt: @@ -7454,7 +6893,6 @@ Seite 6. Definition 80 Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G→ H eine Abbildung. - ϕ heißt Isomorphismus, wenn ϕ ein bijektiver Homomorphismus ist. Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen @@ -7479,13 +6917,11 @@ Z := { z ∈M | ∃Weg von x nach z } Es gilt: (i) Z 6= ∅, da M lokal wegzusammenhängend ist - (ii) Z ist offen, da M lokal wegzusammenhängend ist (iii) ZC := { z̃ ∈M | @Weg von x nach z̃ } ist offen Da M eine Mannigfaltigkeit ist, existiert zu jedem z̃ ∈ ZC eine offene und wegzusammenhängende Umgebung Uz̃ ⊆M . - Es gilt sogar Uz̃ ⊆ ZC , denn gäbe es ein Uz̃ 3 z ∈ Z, so gäbe es Wege γ2 : [0, 1] → M,γ2(0) = z, γ2(1) = x und γ1 : [0, 1] → M,γ1(0) = z̃, γ1(1) = z. Dann wäre aber @@ -7495,16 +6931,12 @@ Dann wäre aber γ(x) = { -γ1(2x) falls 0 ≤ x ≤ 1 - -2 - -γ2(2x− 1) falls 1 -2 < x ≤ 1 +γ1(2x) falls 0 ≤ x ≤ 12 +γ2(2x− 1) falls 12 < x ≤ 1 ein stetiger Weg von z̃ nach x ⇒ Widerspruch. - DaM zusammenhängend ist undM = Z︸︷︷︸ + offen ∪ ZC︸︷︷︸ @@ -7545,9 +6977,7 @@ kehrt. Bew.: Sei d(A,C) > d(A,B). Nach §3 (i) gibt es C ′ ∈ AC+ mit d(A,C ′) = d(A,B) ⇒ C ′ liegt zwischen A und C. Es gilt ]ABC ′ < ]ABC und aus Aufgabe 9 (a) folgt: ]ABC ′ = ]AC ′B. -∠BC ′A ist ein nicht anliegender Außenwinkel zu ∠BCA Bem. 66 - -=====⇒ ]BC ′A > ]BCA +∠BC ′A ist ein nicht anliegender Außenwinkel zu ∠BCA Bem. 66=====⇒ ]BC ′A > ]BCA ⇒ ]BCA < ]BC ′A = ]ABC ′ < ]ABC Sei umgekehrt ]ABC > ]BCA, kann wegen 1. Teil von Aufgabe 9 (b) nicht d(A,B) > d(A,C) gelten. Wegen Aufgabe 9 (a) kann nicht d(A,B) = d(A,C) gelten. @@ -7558,12 +6988,9 @@ Beh.: ∃! Lot Bew.: ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g. ⇒ ϕ(P )P schneidet g in F . - Es gibt eine Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g ⇒ ϕ(P )P schneidet g in F . - Sei A ∈ g\{ F }. Dann gilt ϕ(∠AFP ) = ∠AFϕ(P ) = π ⇒ ∠AFP ist rechter Winkel. - Gäbe es nun G ∈ g \ { F }, so dass PG weiteres Lot von P auf g ist, wäre 4PFG ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4). @@ -7615,7 +7042,6 @@ Es gilt d(A,C) = d(A′, C ′) = d(ϕ(A′), ϕ(C ′)) = d(A,ϕ(C ′)) und d( d(ϕ(B′), ϕ(C ′)) = d(B,ϕ(C ′)). Bem. 62 =====⇒ C = ϕ(C). - Es gilt also ϕ(4A′B′C ′) = 4ABC. � @@ -7756,13 +7182,11 @@ f(λa+ µb) = λf(a) + µf(b) Definition 83 Sei V ein Vektorraum und S ⊆ V eine Teilmenge. - S heißt eine Orthonormalbasis von V , wenn gilt: (i) S ist eine Basis von V (ii) ∀v ∈ S : ‖v‖ = 1 - (iii) ∀v1, v2 ∈ S : v1 6= v2 ⇒ 〈v1, v2〉 = 0 Satz (Zwischenwertsatz) @@ -7771,12 +7195,10 @@ f(b) < y0 < f(a). Dann existiert ein x0 ∈ [a, b] mit f(x0) = y0. Definition 84 Sei V ein Vektorraum über einem Körper K und f : V → V eine lineare Abbildung. - v ∈ V \ { 0 } heißt Eigenvektor :⇔ ∃λ ∈ K : f(v) = λv. - Wenn ein solches λ ∈ K existiert, heißt es Eigenwert von f . -Satz (Binomischer Lehrsatz) +Satz (Binomischer Lehrsatz) Sei x, y ∈ R. Dann gilt: (x+ y)n = @@ -7797,21 +7219,15 @@ Seien a, b ∈ R3 Vektoren. a× b := -a1 - -b3 +a1b3 a3 × -a1 - -b3 +a1b3 a3  = - -a2b3 − a3b2 -a3b1 − a1b3 +a2b3 − a3b2a3b1 − a1b3 a1b2 − a2b1  @@ -7870,7 +7286,6 @@ Sym(X) Symmetrische Gruppe Wege Sei γ : I → X ein Weg. - [γ] Homotopieklasse von γ γ1 ∗ γ2 Zusammenhängen von Wegen γ1 ∼ γ2 Homotopie von Wegen @@ -7894,7 +7309,6 @@ X/∼ X modulo ∼ ‖x‖ Norm von x |x| Betrag von x 〈a〉 Erzeugnis von a - Sn Sphäre Tn Torus From a640d1efd7aac88b7201d4f44beb35cc46c3d662 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 19 Feb 2025 21:15:49 -0500 Subject: [PATCH 05/18] feat: add PLAYA-PDF --- benchmark.py | 11 +++++++++++ pdf_benchmark/library_code.py | 20 ++++++++++++++++++++ requirements/main.in | 1 + 3 files changed, 32 insertions(+) diff --git a/benchmark.py b/benchmark.py index ae44b78..8e7e514 100644 --- a/benchmark.py +++ b/benchmark.py @@ -15,6 +15,7 @@ import pdfminer import pdfplumber import pdfrw +import playa import pypdf import pypdfium2 import tika @@ -30,6 +31,7 @@ pdfplubmer_get_text, pdfrw_watermarking, pdftotext_get_text, + playa_get_text, pymupdf_get_text, pymupdf_image_extraction, pymupdf_watermarking, @@ -251,5 +253,14 @@ def write_single_result( last_release_date="2017-09-18", dependencies="", ), + "playa": Library( + "playa", + "playa", + "https://pypi.org/project/playa-pdf/", + text_extraction_function=playa_get_text, + version=playa.__version__, + license="MIT", + last_release_date="2025-02-18", + ), } main(docs, libraries) diff --git a/pdf_benchmark/library_code.py b/pdf_benchmark/library_code.py index ac0ce2b..73e353f 100644 --- a/pdf_benchmark/library_code.py +++ b/pdf_benchmark/library_code.py @@ -1,3 +1,4 @@ +import argparse import os import subprocess import tempfile @@ -6,16 +7,35 @@ import fitz as PyMuPDF import pdfminer import pdfplumber +import playa import pypdf import pypdfium2 as pdfium from borb.pdf.pdf import PDF from borb.toolkit.text.simple_text_extraction import SimpleTextExtraction +from playa.cli import extract_text as playa_extract_text from pdfminer.high_level import extract_pages from requests import ReadTimeout from .text_extraction_post_processing import postprocess, PDFIUM_ZERO_WIDTH_NO_BREAK_SPACE +def playa_get_text(data: bytes) -> str: + with tempfile.TemporaryDirectory() as tempdir: + path = os.path.join(tempdir, "pdf.pdf") + with open(path, "wb") as outfh: + outfh.write(data) + outpath = os.path.join(tempdir, "pdf.txt") + with open(outpath, "wt") as outfh: + args = argparse.Namespace(pages="all", outfile=outfh) + with playa.open(path) as pdf: + playa_extract_text(pdf, args) + page_labels = [page.label for page in pdf.pages] + with open(outpath) as infh: + text = infh.read() + text = postprocess(text, page_labels) + return text + + def pymupdf_get_text(data: bytes) -> str: with PyMuPDF.open(stream=data, filetype="pdf") as doc: text = "" diff --git a/requirements/main.in b/requirements/main.in index 8238a61..a378426 100644 --- a/requirements/main.in +++ b/requirements/main.in @@ -6,6 +6,7 @@ pypdf requests rich tika +playa-pdf python-Levenshtein pdftotext pydantic From 45928039ce6e18fdedf89647b9fa69b87fa9f622 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 19 Feb 2025 21:17:39 -0500 Subject: [PATCH 06/18] chore: update all versions --- requirements/dev.txt | 32 +++++++---------- requirements/main.txt | 80 ++++++++++++++++++++----------------------- 2 files changed, 50 insertions(+), 62 deletions(-) diff --git a/requirements/dev.txt b/requirements/dev.txt index da4bbc0..cd48c0c 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,26 +1,20 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile requirements/dev.in -# -build==0.9.0 +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/dev.in +build==1.2.2.post1 # via pip-tools -click==8.1.3 +click==8.1.8 # via pip-tools -packaging==22.0 +packaging==24.2 # via build -pep517==0.13.0 - # via build -pip-tools==6.12.1 +pip==25.0.1 + # via pip-tools +pip-tools==7.4.1 # via -r requirements/dev.in -tomli==2.0.1 +pyproject-hooks==1.2.0 # via # build - # pep517 -wheel==0.38.4 + # pip-tools +setuptools==75.8.0 + # via pip-tools +wheel==0.45.1 # via pip-tools - -# The following packages are considered to be unsafe in a requirements file: -# pip -# setuptools diff --git a/requirements/main.txt b/requirements/main.txt index c740d56..67dc7a1 100644 --- a/requirements/main.txt +++ b/requirements/main.txt @@ -1,32 +1,28 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile requirements/main.in -# -annotated-types==0.5.0 +# This file was autogenerated by uv via the following command: +# uv pip compile requirements/main.in +annotated-types==0.7.0 # via pydantic -borb==2.1.17 +borb==2.1.25 # via -r requirements/main.in -certifi==2023.7.22 +certifi==2025.1.31 # via requests -cffi==1.15.1 +cffi==1.17.1 # via cryptography -charset-normalizer==3.2.0 +charset-normalizer==3.4.1 # via # pdfminer-six # requests -cryptography==41.0.3 +cryptography==44.0.1 # via # borb # pdfminer-six -fonttools==4.42.1 +fonttools==4.56.0 # via borb -idna==3.4 +idna==3.10 # via requests -levenshtein==0.21.1 +levenshtein==0.26.1 # via python-levenshtein -lxml==4.9.3 +lxml==5.3.1 # via # -r requirements/main.in # borb @@ -34,67 +30,65 @@ markdown-it-py==3.0.0 # via rich mdurl==0.1.2 # via markdown-it-py -numpy==1.25.2 +numpy==2.2.3 # via -r requirements/main.in -pdfminer-six==20221105 +pdfminer-six==20231228 # via # -r requirements/main.in # pdfplumber -pdfplumber==0.10.2 +pdfplumber==0.11.5 # via -r requirements/main.in pdfrw==0.4 # via -r requirements/main.in -pdftotext==2.2.2 +pdftotext==3.0.0 # via -r requirements/main.in -pillow==10.0.0 +pillow==11.1.0 # via # borb # pdfplumber # qrcode -pycparser==2.21 +playa-pdf==0.2.10 + # via -r requirements/main.in +pycparser==2.22 # via cffi -pydantic==2.3.0 +pydantic==2.10.6 # via -r requirements/main.in -pydantic-core==2.6.3 +pydantic-core==2.27.2 # via pydantic -pygments==2.16.1 +pygments==2.19.1 # via rich -pymupdf==1.23.3 +pymupdf==1.25.3 # via -r requirements/main.in -pymupdfb==1.23.3 - # via pymupdf -pypdf==3.15.5 +pypdf==5.3.0 # via -r requirements/main.in -pypdfium2==4.19.0 +pypdfium2==4.30.1 # via # -r requirements/main.in # pdfplumber -pypng==0.20220715.0 - # via qrcode python-barcode==0.15.1 # via borb -python-levenshtein==0.21.1 +python-levenshtein==0.26.1 # via -r requirements/main.in -qrcode[pil]==7.4.2 +qrcode==8.0 # via borb -rapidfuzz==3.2.0 +rapidfuzz==3.12.1 # via levenshtein -requests==2.31.0 +requests==2.32.3 # via # -r requirements/main.in # borb # tika -rich==13.5.2 +rich==13.9.4 # via -r requirements/main.in +setuptools==75.8.0 + # via + # borb + # tika tika==2.6.0 # via -r requirements/main.in -typing-extensions==4.7.1 +typing-extensions==4.12.2 # via # pydantic # pydantic-core - # qrcode -urllib3==2.0.4 +urllib3==2.3.0 # via requests - -# The following packages are considered to be unsafe in a requirements file: -# setuptools From ff365e1c0de8912b46f045179c8cc91f68f40faa Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 19 Feb 2025 21:31:13 -0500 Subject: [PATCH 07/18] fix: nope! postprocess not compatible with playa (yet) --- pdf_benchmark/library_code.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pdf_benchmark/library_code.py b/pdf_benchmark/library_code.py index 73e353f..923529c 100644 --- a/pdf_benchmark/library_code.py +++ b/pdf_benchmark/library_code.py @@ -29,10 +29,8 @@ def playa_get_text(data: bytes) -> str: args = argparse.Namespace(pages="all", outfile=outfh) with playa.open(path) as pdf: playa_extract_text(pdf, args) - page_labels = [page.label for page in pdf.pages] with open(outpath) as infh: text = infh.read() - text = postprocess(text, page_labels) return text From 2c2da358def4e05f7955cca522622d564dbf0a5a Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 19 Feb 2025 22:10:04 -0500 Subject: [PATCH 08/18] chore: rerun with playa --- README.md | 64 +- cache.json | 687 +- read/results/pdfium/1601.03642.txt | 1170 +- read/results/pdfium/1602.06541.txt | 3175 +++--- read/results/pdfium/1707.09725.txt | 8306 +++++++------- read/results/pdfium/2201.00021.txt | 1514 ++- read/results/pdfium/2201.00022.txt | 2174 ++-- read/results/pdfium/2201.00029.txt | 530 +- read/results/pdfium/2201.00037.txt | 4041 ++++--- read/results/pdfium/2201.00069.txt | 1751 ++- read/results/pdfium/2201.00151.txt | 2743 +++-- read/results/pdfium/2201.00178.txt | 2084 ++-- read/results/pdfium/2201.00200.txt | 1059 +- read/results/pdfium/2201.00201.txt | 1421 ++- read/results/pdfium/2201.00214.txt | 1857 ++-- read/results/pdfium/GeoTopo-book.txt | 12368 ++++++++++----------- read/results/pdfminer/1602.06541.txt | 8 +- read/results/pdfminer/1707.09725.txt | 22 +- read/results/pdfminer/GeoTopo-book.txt | 62 +- read/results/pdfplumber/1601.03642.txt | 39 +- read/results/pdfplumber/1602.06541.txt | 38 +- read/results/pdfplumber/1707.09725.txt | 71 +- read/results/pdfplumber/2201.00021.txt | 42 +- read/results/pdfplumber/2201.00022.txt | 42 +- read/results/pdfplumber/2201.00037.txt | 200 +- read/results/pdfplumber/2201.00069.txt | 44 +- read/results/pdfplumber/2201.00151.txt | 435 +- read/results/pdfplumber/2201.00178.txt | 41 +- read/results/pdfplumber/2201.00200.txt | 41 +- read/results/pdfplumber/2201.00201.txt | 45 +- read/results/pdfplumber/2201.00214.txt | 171 +- read/results/pdfplumber/GeoTopo-book.txt | 8 +- read/results/playa/1601.03642.txt | 632 ++ read/results/playa/1602.06541.txt | 1782 +++ read/results/playa/1707.09725.txt | 4293 +++++++ read/results/playa/2201.00021.txt | 1102 ++ read/results/playa/2201.00022.txt | 1383 +++ read/results/playa/2201.00029.txt | 12 + read/results/playa/2201.00037.txt | 2848 +++++ read/results/playa/2201.00069.txt | Bin 0 -> 55856 bytes read/results/playa/2201.00151.txt | 1146 ++ read/results/playa/2201.00178.txt | 1272 +++ read/results/playa/2201.00200.txt | 736 ++ read/results/playa/2201.00201.txt | 932 ++ read/results/playa/2201.00214.txt | 766 ++ read/results/playa/GeoTopo-book.txt | 7991 +++++++++++++ read/results/pymupdf/1601.03642.txt | 6 +- read/results/pymupdf/1602.06541.txt | 110 +- read/results/pymupdf/1707.09725.txt | 479 +- read/results/pymupdf/2201.00021.txt | 36 +- read/results/pymupdf/2201.00022.txt | 120 +- read/results/pymupdf/2201.00037.txt | 467 +- read/results/pymupdf/2201.00069.txt | 208 +- read/results/pymupdf/2201.00151.txt | 36 +- read/results/pymupdf/2201.00178.txt | 256 +- read/results/pymupdf/2201.00200.txt | 34 +- read/results/pymupdf/2201.00201.txt | 48 +- read/results/pymupdf/2201.00214.txt | 8 +- read/results/pymupdf/GeoTopo-book.txt | 3191 +++--- read/results/pypdf/1601.03642.txt | 113 +- read/results/pypdf/1602.06541.txt | 929 +- read/results/pypdf/1707.09725.txt | 3576 +++--- read/results/pypdf/2201.00021.txt | 606 +- read/results/pypdf/2201.00022.txt | 341 +- read/results/pypdf/2201.00029.txt | 372 +- read/results/pypdf/2201.00037.txt | 1735 +-- read/results/pypdf/2201.00069.txt | Bin 52966 -> 53482 bytes read/results/pypdf/2201.00151.txt | 1233 +- read/results/pypdf/2201.00178.txt | 781 +- read/results/pypdf/2201.00200.txt | 247 +- read/results/pypdf/2201.00201.txt | 248 +- read/results/pypdf/2201.00214.txt | 906 +- read/results/pypdf/GeoTopo-book.txt | 6019 +++++----- read/results/tika/1601.03642.txt | 23 +- read/results/tika/1602.06541.txt | 292 +- read/results/tika/1707.09725.txt | 814 +- read/results/tika/2201.00021.txt | 706 +- read/results/tika/2201.00022.txt | 458 +- read/results/tika/2201.00029.txt | 6 - read/results/tika/2201.00037.txt | 1059 +- read/results/tika/2201.00069.txt | 507 +- read/results/tika/2201.00151.txt | 124 +- read/results/tika/2201.00178.txt | 628 +- read/results/tika/2201.00200.txt | 61 +- read/results/tika/2201.00201.txt | 21 - read/results/tika/2201.00214.txt | 197 +- read/results/tika/GeoTopo-book.txt | 7754 +------------ 87 files changed, 63928 insertions(+), 41975 deletions(-) create mode 100644 read/results/playa/1601.03642.txt create mode 100644 read/results/playa/1602.06541.txt create mode 100644 read/results/playa/1707.09725.txt create mode 100644 read/results/playa/2201.00021.txt create mode 100644 read/results/playa/2201.00022.txt create mode 100644 read/results/playa/2201.00029.txt create mode 100644 read/results/playa/2201.00037.txt create mode 100644 read/results/playa/2201.00069.txt create mode 100644 read/results/playa/2201.00151.txt create mode 100644 read/results/playa/2201.00178.txt create mode 100644 read/results/playa/2201.00200.txt create mode 100644 read/results/playa/2201.00201.txt create mode 100644 read/results/playa/2201.00214.txt create mode 100644 read/results/playa/GeoTopo-book.txt diff --git a/README.md b/README.md index 9f95f4e..01fa509 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not documents that applied OCR. ## Benchmarking machine - Intel(R) Core(TM) i5 CPU 650 @ 3.20GHz + Intel(R) Core(TM) i7 CPU 860 @ 2.80GHz ## Input Documents | # | Name | File Size | Pages | @@ -26,13 +26,14 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | Name | Last PyPI Release | License | Version | Dependencies | | -----------: | :---------------- | ------------------------------: | -----------------------------------------: | :-------------------------------------------------------- | | Borb | 2023-06-23 | AGPL/Commercial | 2.1.16 | | -| pypdfium2 | 2023-07-04 | Apache-2.0 or BSD-3-Clause | 4.19.0 | PDFium (Foxit/Google) | -| pdfminer.six | 2022-11-05 | MIT/X | 20221105 | | +| pypdfium2 | 2023-07-04 | Apache-2.0 or BSD-3-Clause | 4.30.1 | PDFium (Foxit/Google) | +| pdfminer.six | 2022-11-05 | MIT/X | 20231228 | | | pdfplumber | 2023-07-29 | MIT | git+https://github.com/dhdaines/pdfplumber | pdfminer.six | | pdfrw | 2017-09-18 | MIT | 0.4 | | | pdftotext | - | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | -| PyMuPDF | 2023-08-24 | GNU AFFERO GPL 3.0 / Commerical | 1.23.3 | MuPDF | -| pypdf | 2023-08-26 | BSD 3-Clause | 3.15.5 | | +| playa | 2025-02-18 | MIT | 0.2.10 | | +| PyMuPDF | 2023-08-24 | GNU AFFERO GPL 3.0 / Commerical | 1.25.3 | MuPDF | +| pypdf | 2023-08-26 | BSD 3-Clause | 5.3.0 | | | Tika | 2023-01-01 | Apache v2 | 2.6.0 | Apache Tika | @@ -40,51 +41,54 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :-------------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.1s | 0.6s | 0.3s | 0.2s | 0.2s | 0.0s | 0.1s | 0.0s | 0.1s | 0.0s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | -| 2 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 0.2s | 0.7s | 0.4s | 0.2s | 0.3s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | -| 3 | [pdftotext ](https://poppler.freedesktop.org/) | 0.3s | 0.9s | 1.0s | 0.3s | 0.7s | 0.1s | 0.3s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.0s | -| 4 | [Tika ](https://pypi.org/project/tika/) | 0.3s | 1.5s | 0.8s | 0.6s | 0.5s | 0.1s | 0.3s | 0.2s | 0.1s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | -| 5 | [pypdf ](https://pypi.org/project/pypdf/) | 3.6s | 26.0s | 6.6s | 7.5s | 3.2s | 0.9s | 1.5s | 0.7s | 0.7s | 0.4s | 0.7s | 0.7s | 0.6s | 0.6s | 0.4s | -| 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 8.1s | 51.6s | 20.9s | 14.0s | 7.8s | 2.1s | 3.6s | 1.6s | 2.0s | 1.3s | 2.5s | 1.7s | 1.8s | 1.0s | 0.9s | -| 7 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 10.7s | 70.3s | 17.7s | 19.3s | 12.1s | 3.5s | 6.1s | 3.0s | 2.8s | 2.6s | 3.9s | 2.8s | 3.2s | 1.6s | 1.5s | -| 8 | [Borb ](https://pypi.org/project/borb/) | 38.6s | 170.2s | 130.2s | 1.8s | 77.9s | 17.4s | 10.5s | 66.3s | 14.2s | 15.5s | 7.3s | 5.1s | 16.7s | 4.4s | 2.5s | +| 1 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 0.1s | 0.8s | 0.3s | 0.2s | 0.2s | 0.0s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | +| 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 1.3s | 0.4s | 0.7s | 0.3s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.0s | +| 3 | [pdftotext ](https://poppler.freedesktop.org/) | 0.3s | 1.0s | 1.1s | 0.3s | 0.8s | 0.1s | 0.3s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | +| 4 | [pypdf ](https://pypi.org/project/pypdf/) | 4.1s | 28.7s | 8.1s | 8.1s | 3.9s | 1.2s | 2.0s | 0.8s | 1.0s | 0.8s | 1.0s | 0.9s | 0.8s | 0.6s | 0.4s | +| 5 | [playa ](https://pypi.org/project/playa-pdf/) | 4.3s | 33.4s | 7.9s | 8.2s | 3.6s | 0.6s | 1.5s | 0.9s | 0.9s | 0.6s | 1.0s | 0.4s | 0.8s | 0.0s | 0.3s | +| 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 9.0s | 55.9s | 23.7s | 16.8s | 8.9s | 2.3s | 4.0s | 1.8s | 2.2s | 1.5s | 2.7s | 1.8s | 2.0s | 1.1s | 0.9s | +| 7 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 12.6s | 84.0s | 20.8s | 23.3s | 13.6s | 3.8s | 6.8s | 3.4s | 3.1s | 2.8s | 4.4s | 3.1s | 3.5s | 1.8s | 1.7s | +| 8 | [Tika ](https://pypi.org/project/tika/) | 24.4s | 17.8s | 100.1s | 0.6s | 23.4s | 47.3s | 48.3s | 31.5s | 34.5s | 0.1s | 13.2s | 0.1s | 24.2s | 0.1s | 0.1s | +| 9 | [Borb ](https://pypi.org/project/borb/) | 50.5s | 188.4s | 149.1s | 2.3s | 113.6s | 28.4s | 11.7s | 112.3s | 23.7s | 27.1s | 8.4s | 5.7s | 27.7s | 4.9s | 2.9s | ## Image Extraction Speed | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :-------------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.5s | 0.3s | 0.6s | 0.0s | 1.9s | 0.5s | 0.0s | 3.0s | 0.4s | 0.4s | 0.1s | 0.0s | 0.3s | 0.2s | 0.0s | -| 2 | [pypdf ](https://pypi.org/project/pypdf/) | 3.8s | 24.0s | 2.3s | 1.2s | 14.3s | 1.3s | 0.0s | 6.7s | 1.0s | 1.1s | 0.3s | 0.0s | 0.8s | 0.2s | 0.1s | -| 3 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 10.5s | 59.1s | 21.9s | 16.1s | 30.9s | 2.3s | 3.8s | 2.3s | 2.0s | 1.4s | 2.4s | 1.7s | 1.9s | 1.0s | 0.8s | +| 1 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.6s | 0.3s | 0.7s | 0.0s | 2.2s | 0.6s | 0.0s | 3.3s | 0.5s | 0.5s | 0.1s | 0.0s | 0.4s | 0.3s | 0.0s | +| 2 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 1.3s | 1.5s | 2.3s | 0.0s | 4.3s | 1.2s | 0.2s | 5.7s | 0.9s | 0.9s | 0.3s | 0.1s | 0.7s | 0.3s | 0.0s | +| 3 | [pypdf ](https://pypi.org/project/pypdf/) | 5.2s | 24.6s | 7.0s | 6.6s | 18.9s | 1.7s | 0.7s | 7.6s | 1.5s | 1.5s | 0.9s | 0.2s | 1.3s | 0.3s | 0.2s | +| 4 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 12.3s | 69.2s | 24.6s | 20.6s | 36.6s | 2.6s | 4.1s | 2.4s | 2.3s | 1.5s | 2.7s | 2.0s | 2.1s | 1.1s | 0.9s | ## Watermarking Speed | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :--------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.0s | 0.0s | 0.2s | 0.0s | 0.1s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | 0.0s | -| 2 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.1s | 0.1s | 0.5s | 0.1s | 0.4s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.0s | -| 3 | [pypdf ](https://pypi.org/project/pypdf/) | 0.5s | 0.7s | 2.0s | 0.4s | 1.2s | 0.3s | 0.3s | 0.3s | 0.3s | 0.3s | 0.5s | 0.1s | 0.5s | 0.1s | 0.1s | +| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.1s | 0.1s | 0.5s | 0.1s | 0.4s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.0s | +| 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 0.5s | 0.7s | 0.2s | 0.5s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | +| 3 | [pypdf ](https://pypi.org/project/pypdf/) | 0.6s | 0.7s | 2.3s | 0.5s | 1.7s | 0.3s | 0.4s | 0.5s | 0.4s | 0.2s | 0.5s | 0.2s | 0.6s | 0.1s | 0.1s | ## Watermarking File Size | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :--------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 3.5MB | 2.5MB | 5.7MB | 1.6MB | 7.3MB | 2.7MB | 3.1MB | 15.4MB | 2.4MB | 1.3MB | 3.0MB | 0.3MB | 1.2MB | 0.8MB | 1.0MB | -| 2 | [pypdf ](https://pypi.org/project/pypdf/) | 3.5MB | 2.5MB | 5.7MB | 1.6MB | 7.3MB | 2.7MB | 3.1MB | 15.4MB | 2.4MB | 1.3MB | 3.0MB | 0.3MB | 1.2MB | 0.8MB | 1.0MB | -| 3 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 3.7MB | 2.7MB | 6.8MB | 1.7MB | 8.5MB | 2.8MB | 3.4MB | 15.5MB | 2.5MB | 1.4MB | 3.2MB | 0.3MB | 1.3MB | 0.9MB | 1.1MB | +| 1 | [pypdf ](https://pypi.org/project/pypdf/) | 3.4MB | 2.5MB | 5.6MB | 1.6MB | 7.2MB | 2.7MB | 3.1MB | 15.4MB | 2.4MB | 1.3MB | 3.0MB | 0.3MB | 1.2MB | 0.8MB | 1.0MB | +| 2 | [pdfrw ](https://pypi.org/project/pdfrw/) | 3.5MB | 2.5MB | 5.7MB | 1.6MB | 7.3MB | 2.7MB | 3.1MB | 15.4MB | 2.4MB | 1.3MB | 3.0MB | 0.3MB | 1.2MB | 0.8MB | 1.0MB | +| 3 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 3.7MB | 2.7MB | 6.9MB | 1.7MB | 8.5MB | 2.8MB | 3.4MB | 15.5MB | 2.5MB | 1.4MB | 3.2MB | 0.3MB | 1.3MB | 0.9MB | 1.1MB | ## Text Extraction Quality | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :-------------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 97% | 99% | 97% | 94% | 99% | 98% | 96% | 99% | 98% | 99% | 99% | 98% | 78% | 99% | 99% | -| 2 | [pypdf ](https://pypi.org/project/pypdf/) | 96% | 98% | 93% | 94% | 98% | 98% | 96% | 97% | 98% | 99% | 99% | 98% | 78% | 98% | 99% | -| 3 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 96% | 98% | 96% | 93% | 97% | 98% | 96% | 98% | 98% | 98% | 98% | 97% | 77% | 98% | 99% | -| 4 | [Tika ](https://pypi.org/project/tika/) | 95% | 99% | 99% | 92% | 97% | 98% | 96% | 93% | 97% | 98% | 93% | 98% | 73% | 98% | 96% | -| 5 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 93% | 96% | 89% | 88% | 98% | 92% | 94% | 93% | 95% | 93% | 97% | 94% | 76% | 99% | 98% | -| 6 | [pdftotext ](https://poppler.freedesktop.org/) | 92% | 96% | 94% | 91% | 95% | 92% | 96% | 96% | 96% | 97% | 83% | 94% | 77% | 96% | 79% | -| 7 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 89% | 95% | 79% | 86% | 92% | 86% | 93% | 95% | 93% | 92% | 92% | 93% | 71% | 98% | 86% | -| 8 | [Borb ](https://pypi.org/project/borb/) | 45% | 70% | 79% | 0% | 40% | 48% | 92% | 0% | 64% | 51% | 41% | 55% | 41% | 0% | 53% | +| 1 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 97% | 99% | 97% | 94% | 99% | 98% | 96% | 99% | 99% | 99% | 99% | 98% | 78% | 99% | 99% | +| 2 | [pypdf ](https://pypi.org/project/pypdf/) | 96% | 99% | 95% | 93% | 98% | 99% | 96% | 97% | 99% | 99% | 99% | 99% | 78% | 100% | 99% | +| 3 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 96% | 98% | 96% | 93% | 97% | 98% | 95% | 99% | 98% | 98% | 98% | 97% | 77% | 98% | 99% | +| 4 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 93% | 96% | 89% | 89% | 98% | 92% | 94% | 93% | 95% | 93% | 97% | 94% | 76% | 99% | 98% | +| 5 | [pdftotext ](https://poppler.freedesktop.org/) | 92% | 96% | 94% | 91% | 95% | 92% | 96% | 96% | 96% | 97% | 83% | 94% | 77% | 96% | 79% | +| 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 89% | 95% | 79% | 86% | 92% | 86% | 93% | 95% | 93% | 92% | 92% | 93% | 71% | 98% | 86% | +| 7 | [playa ](https://pypi.org/project/playa-pdf/) | 88% | 98% | 93% | 92% | 98% | 97% | 95% | 97% | 96% | 98% | 98% | 97% | 77% | 0% | 99% | +| 8 | [Tika ](https://pypi.org/project/tika/) | 83% | 99% | 0% | 92% | 95% | 77% | 86% | 81% | 82% | 98% | 88% | 98% | 67% | 98% | 96% | +| 9 | [Borb ](https://pypi.org/project/borb/) | 45% | 70% | 79% | 0% | 40% | 48% | 92% | 0% | 64% | 51% | 41% | 55% | 41% | 0% | 53% | diff --git a/cache.json b/cache.json index 7164f81..e8bef14 100644 --- a/cache.json +++ b/cache.json @@ -1,505 +1,531 @@ { "benchmark_times": { - "pdfplumber": { + "borb": { "2201.00214": { - "read": 70.30025053024292 + "read": 188.4488205909729 }, "GeoTopo-book": { - "read": 17.71614122390747 + "read": 149.14154720306396 }, "2201.00151": { - "read": 19.277418613433838 + "read": 2.308486223220825 }, "1707.09725": { - "read": 12.056224346160889 + "read": 113.59780859947205 }, "2201.00021": { - "read": 3.5353875160217285 + "read": 28.409748315811157 }, "2201.00037": { - "read": 6.122551918029785 + "read": 11.665522575378418 }, "2201.00069": { - "read": 2.9618067741394043 + "read": 112.33900618553162 }, "2201.00178": { - "read": 2.80582332611084 + "read": 23.70449709892273 }, "2201.00201": { - "read": 2.5980660915374756 + "read": 27.119436264038086 }, "1602.06541": { - "read": 3.878690004348755 + "read": 8.40593433380127 }, "2201.00200": { - "read": 2.7724621295928955 + "read": 5.670783281326294 }, "2201.00022": { - "read": 3.221090793609619 + "read": 27.747946977615356 }, "2201.00029": { - "read": 1.5970120429992676 + "read": 4.8552985191345215 }, "1601.03642": { - "read": 1.4764699935913086 + "read": 2.92044734954834 } }, - "pypdf": { + "pdfium": { "2201.00214": { - "read": 26.020102977752686, - "watermark": 0.6621012687683105, - "image_extraction": 23.955000400543213 + "read": 0.7582509517669678, + "image_extraction": 1.5329885482788086 }, "GeoTopo-book": { - "read": 6.64851450920105, - "watermark": 2.0183393955230713, - "image_extraction": 2.250887632369995 + "read": 0.31653857231140137, + "image_extraction": 2.299833297729492 }, "2201.00151": { - "read": 7.473206520080566, - "watermark": 0.428668737411499, - "image_extraction": 1.1657159328460693 + "read": 0.22646355628967285, + "image_extraction": 0.027804136276245117 }, "1707.09725": { - "read": 3.212459087371826, - "watermark": 1.1811449527740479, - "image_extraction": 14.264930486679077 + "read": 0.22682547569274902, + "image_extraction": 4.282248497009277 }, "2201.00021": { - "read": 0.9077854156494141, - "watermark": 0.3055300712585449, - "image_extraction": 1.2899971008300781 + "read": 0.04979705810546875, + "image_extraction": 1.1562087535858154 }, "2201.00037": { - "read": 1.5470695495605469, - "watermark": 0.270383358001709, - "image_extraction": 0.022456884384155273 + "read": 0.11386513710021973, + "image_extraction": 0.22445988655090332 }, "2201.00069": { - "read": 0.692096471786499, - "watermark": 0.2626011371612549, - "image_extraction": 6.7051780223846436 + "read": 0.05641007423400879, + "image_extraction": 5.704399824142456 }, "2201.00178": { - "read": 0.722989559173584, - "watermark": 0.2680470943450928, - "image_extraction": 1.019047498703003 + "read": 0.0558314323425293, + "image_extraction": 0.8747310638427734 }, "2201.00201": { - "read": 0.4151496887207031, - "watermark": 0.3354034423828125, - "image_extraction": 1.0872318744659424 + "read": 0.040203094482421875, + "image_extraction": 0.853954553604126 }, "1602.06541": { - "read": 0.6642146110534668, - "watermark": 0.5023202896118164, - "image_extraction": 0.3495500087738037 + "read": 0.06276416778564453, + "image_extraction": 0.33101916313171387 }, "2201.00200": { - "read": 0.743006706237793, - "watermark": 0.12680268287658691, - "image_extraction": 0.007048606872558594 + "read": 0.037505149841308594, + "image_extraction": 0.051642656326293945 }, "2201.00022": { - "read": 0.5628504753112793, - "watermark": 0.5410447120666504, - "image_extraction": 0.7783563137054443 + "read": 0.054618120193481445, + "image_extraction": 0.6855518817901611 }, "2201.00029": { - "read": 0.5693433284759521, - "watermark": 0.0550692081451416, - "image_extraction": 0.23301029205322266 + "read": 0.024730443954467773, + "image_extraction": 0.3229193687438965 }, "1601.03642": { - "read": 0.36649465560913086, - "watermark": 0.10128593444824219, - "image_extraction": 0.0654292106628418 + "read": 0.02708148956298828, + "image_extraction": 0.03443098068237305 } }, - "borb": { + "pdfminer": { "2201.00214": { - "read": 170.16125798225403 + "read": 55.93182134628296, + "image_extraction": 69.16859841346741 }, "GeoTopo-book": { - "read": 130.1867437362671 + "read": 23.7318594455719, + "image_extraction": 24.565200805664062 }, "2201.00151": { - "read": 1.7567353248596191 + "read": 16.848540544509888, + "image_extraction": 20.575064420700073 }, "1707.09725": { - "read": 77.86134815216064 + "read": 8.933068037033081, + "image_extraction": 36.61323118209839 }, "2201.00021": { - "read": 17.365882873535156 + "read": 2.2727904319763184, + "image_extraction": 2.606733798980713 }, "2201.00037": { - "read": 10.51969861984253 + "read": 3.968254566192627, + "image_extraction": 4.147768974304199 }, "2201.00069": { - "read": 66.33814978599548 + "read": 1.8122689723968506, + "image_extraction": 2.4438936710357666 }, "2201.00178": { - "read": 14.184422016143799 + "read": 2.186378002166748, + "image_extraction": 2.2748231887817383 }, "2201.00201": { - "read": 15.534729957580566 + "read": 1.4637563228607178, + "image_extraction": 1.5430595874786377 }, "1602.06541": { - "read": 7.256805181503296 + "read": 2.655424118041992, + "image_extraction": 2.690824270248413 }, "2201.00200": { - "read": 5.089946269989014 + "read": 1.847485065460205, + "image_extraction": 1.9874897003173828 }, "2201.00022": { - "read": 16.713526487350464 + "read": 1.9943366050720215, + "image_extraction": 2.0818636417388916 }, "2201.00029": { - "read": 4.427474021911621 + "read": 1.0527269840240479, + "image_extraction": 1.1150856018066406 }, "1601.03642": { - "read": 2.4990572929382324 + "read": 0.9015827178955078, + "image_extraction": 0.9357635974884033 } }, - "pdfium": { + "pdfplumber": { "2201.00214": { - "read": 0.7194085121154785 + "read": 83.96660041809082 }, "GeoTopo-book": { - "read": 0.35127973556518555 + "read": 20.792396068572998 }, "2201.00151": { - "read": 0.2381303310394287 + "read": 23.254782676696777 }, "1707.09725": { - "read": 0.2519388198852539 + "read": 13.614238023757935 }, "2201.00021": { - "read": 0.05813455581665039 + "read": 3.8416290283203125 }, "2201.00037": { - "read": 0.13362622261047363 + "read": 6.843621730804443 }, "2201.00069": { - "read": 0.08023524284362793 + "read": 3.3514842987060547 }, "2201.00178": { - "read": 0.06390500068664551 + "read": 3.1007227897644043 }, "2201.00201": { - "read": 0.05460667610168457 + "read": 2.8469505310058594 }, "1602.06541": { - "read": 0.0795130729675293 + "read": 4.3501877784729 }, "2201.00200": { - "read": 0.043161869049072266 + "read": 3.129256248474121 }, "2201.00022": { - "read": 0.06084847450256348 + "read": 3.5482122898101807 }, "2201.00029": { - "read": 0.028944969177246094 + "read": 1.827713966369629 }, "1601.03642": { - "read": 0.03023362159729004 + "read": 1.7083258628845215 } }, - "pdfminer": { + "pdfrw": { "2201.00214": { - "read": 51.59387469291687, - "image_extraction": 59.07294750213623 + "watermark": 0.06678032875061035 }, "GeoTopo-book": { - "read": 20.944347858428955, - "image_extraction": 21.889408588409424 + "watermark": 0.5417091846466064 }, "2201.00151": { - "read": 13.961804151535034, - "image_extraction": 16.127032041549683 + "watermark": 0.05851268768310547 }, "1707.09725": { - "read": 7.790618896484375, - "image_extraction": 30.880011081695557 + "watermark": 0.4045860767364502 }, "2201.00021": { - "read": 2.0765562057495117, - "image_extraction": 2.265204906463623 + "watermark": 0.11277961730957031 }, "2201.00037": { - "read": 3.551035165786743, - "image_extraction": 3.7663705348968506 + "watermark": 0.0764768123626709 }, "2201.00069": { - "read": 1.6111178398132324, - "image_extraction": 2.334484338760376 + "watermark": 0.13684296607971191 }, "2201.00178": { - "read": 1.9711239337921143, - "image_extraction": 2.026402711868286 + "watermark": 0.11200451850891113 }, "2201.00201": { - "read": 1.3471250534057617, - "image_extraction": 1.4003777503967285 + "watermark": 0.07941722869873047 }, "1602.06541": { - "read": 2.4590721130371094, - "image_extraction": 2.4072012901306152 + "watermark": 0.13727259635925293 }, "2201.00200": { - "read": 1.65625, - "image_extraction": 1.7444770336151123 + "watermark": 0.06008315086364746 }, "2201.00022": { - "read": 1.8396813869476318, - "image_extraction": 1.8976826667785645 + "watermark": 0.15844249725341797 }, "2201.00029": { - "read": 1.0377953052520752, - "image_extraction": 1.0113511085510254 + "watermark": 0.017334461212158203 }, "1601.03642": { - "read": 0.859734058380127, - "image_extraction": 0.8416221141815186 + "watermark": 0.04838132858276367 } }, - "pdfrw": { + "pdftotext": { "2201.00214": { - "watermark": 0.06189870834350586 + "read": 0.9697160720825195 }, "GeoTopo-book": { - "watermark": 0.4900834560394287 + "read": 1.0543584823608398 }, "2201.00151": { - "watermark": 0.05237531661987305 + "read": 0.3154182434082031 }, "1707.09725": { - "watermark": 0.37189579010009766 + "read": 0.7857511043548584 }, "2201.00021": { - "watermark": 0.0998694896697998 + "read": 0.0954442024230957 }, "2201.00037": { - "watermark": 0.07250571250915527 + "read": 0.25443029403686523 }, "2201.00069": { - "watermark": 0.1360166072845459 + "read": 0.21236205101013184 }, "2201.00178": { - "watermark": 0.10211992263793945 + "read": 0.14896798133850098 }, "2201.00201": { - "watermark": 0.07320046424865723 + "read": 0.06666803359985352 }, "1602.06541": { - "watermark": 0.12589788436889648 + "read": 0.1229856014251709 }, "2201.00200": { - "watermark": 0.05352663993835449 + "read": 0.07712578773498535 }, "2201.00022": { - "watermark": 0.1426396369934082 + "read": 0.11791729927062988 }, "2201.00029": { - "watermark": 0.016480207443237305 + "read": 0.04947781562805176 }, "1601.03642": { - "watermark": 0.04465961456298828 + "read": 0.05305290222167969 } }, - "pdftotext": { + "playa": { "2201.00214": { - "read": 0.9134228229522705 + "read": 33.35531687736511 }, "GeoTopo-book": { - "read": 0.9935319423675537 + "read": 7.86867094039917 }, "2201.00151": { - "read": 0.3033919334411621 + "read": 8.23330020904541 }, "1707.09725": { - "read": 0.7321336269378662 + "read": 3.6483688354492188 }, "2201.00021": { - "read": 0.08934545516967773 + "read": 0.6167638301849365 }, "2201.00037": { - "read": 0.2617814540863037 + "read": 1.4718003273010254 }, "2201.00069": { - "read": 0.20723533630371094 + "read": 0.8515617847442627 }, "2201.00178": { - "read": 0.13864588737487793 + "read": 0.8537464141845703 }, "2201.00201": { - "read": 0.06463861465454102 + "read": 0.5607175827026367 }, "1602.06541": { - "read": 0.11054682731628418 + "read": 0.9524991512298584 }, "2201.00200": { - "read": 0.07274961471557617 + "read": 0.39577698707580566 }, "2201.00022": { - "read": 0.11013388633728027 + "read": 0.7865602970123291 }, "2201.00029": { - "read": 0.046718597412109375 + "read": 0.028035879135131836 }, "1601.03642": { - "read": 0.0490870475769043 + "read": 0.32507753372192383 } }, "pymupdf": { "2201.00214": { - "read": 0.5514135360717773, - "watermark": 0.026504039764404297, - "image_extraction": 0.27562856674194336 + "read": 1.2650783061981201, + "watermark": 0.48745298385620117, + "image_extraction": 0.2923619747161865 + }, + "GeoTopo-book": { + "read": 0.4023463726043701, + "watermark": 0.6608211994171143, + "image_extraction": 0.6644651889801025 + }, + "2201.00151": { + "read": 0.6669011116027832, + "watermark": 0.2024550437927246, + "image_extraction": 0.0034143924713134766 + }, + "1707.09725": { + "read": 0.2984461784362793, + "watermark": 0.5301051139831543, + "image_extraction": 2.2000043392181396 + }, + "2201.00021": { + "read": 0.12205195426940918, + "watermark": 0.08205199241638184, + "image_extraction": 0.613532543182373 + }, + "2201.00037": { + "read": 0.16483521461486816, + "watermark": 0.13814139366149902, + "image_extraction": 0.0038840770721435547 + }, + "2201.00069": { + "read": 0.06914305686950684, + "watermark": 0.09702491760253906, + "image_extraction": 3.283721923828125 + }, + "2201.00178": { + "read": 0.0685114860534668, + "watermark": 0.1016242504119873, + "image_extraction": 0.4525175094604492 + }, + "2201.00201": { + "read": 0.05706334114074707, + "watermark": 0.06307482719421387, + "image_extraction": 0.4692685604095459 + }, + "1602.06541": { + "read": 0.0886693000793457, + "watermark": 0.11176776885986328, + "image_extraction": 0.08822441101074219 + }, + "2201.00200": { + "read": 0.0522770881652832, + "watermark": 0.04518556594848633, + "image_extraction": 0.003078937530517578 + }, + "2201.00022": { + "read": 0.07638287544250488, + "watermark": 0.10881543159484863, + "image_extraction": 0.38236427307128906 + }, + "2201.00029": { + "read": 0.0361933708190918, + "watermark": 0.03627157211303711, + "image_extraction": 0.2511467933654785 + }, + "1601.03642": { + "read": 0.03939104080200195, + "watermark": 0.04615473747253418, + "image_extraction": 0.003738880157470703 + } + }, + "pypdf": { + "2201.00214": { + "read": 28.712388277053833, + "watermark": 0.7213070392608643, + "image_extraction": 24.60779356956482 }, "GeoTopo-book": { - "read": 0.33759236335754395, - "watermark": 0.15517616271972656, - "image_extraction": 0.587421178817749 + "read": 8.068076133728027, + "watermark": 2.343616008758545, + "image_extraction": 6.996605157852173 }, "2201.00151": { - "read": 0.20269060134887695, - "watermark": 0.016669273376464844, - "image_extraction": 0.0031175613403320312 + "read": 8.089233875274658, + "watermark": 0.4587695598602295, + "image_extraction": 6.647898435592651 }, "1707.09725": { - "read": 0.19739532470703125, - "watermark": 0.14671063423156738, - "image_extraction": 1.8962862491607666 + "read": 3.891724109649658, + "watermark": 1.7476551532745361, + "image_extraction": 18.940555572509766 }, "2201.00021": { - "read": 0.04858541488647461, - "watermark": 0.0232088565826416, - "image_extraction": 0.5244166851043701 + "read": 1.1562883853912354, + "watermark": 0.30209898948669434, + "image_extraction": 1.6896071434020996 }, "2201.00037": { - "read": 0.10872936248779297, - "watermark": 0.03429841995239258, - "image_extraction": 0.002572774887084961 + "read": 2.0014760494232178, + "watermark": 0.3821859359741211, + "image_extraction": 0.749286413192749 }, "2201.00069": { - "read": 0.048812150955200195, - "watermark": 0.041498422622680664, - "image_extraction": 2.9812166690826416 + "read": 0.7707874774932861, + "watermark": 0.46254944801330566, + "image_extraction": 7.584951162338257 }, "2201.00178": { - "read": 0.05117154121398926, - "watermark": 0.027867794036865234, - "image_extraction": 0.3875887393951416 + "read": 0.9634733200073242, + "watermark": 0.3837471008300781, + "image_extraction": 1.4831831455230713 }, "2201.00201": { - "read": 0.0379939079284668, - "watermark": 0.02063298225402832, - "image_extraction": 0.392880916595459 + "read": 0.7547926902770996, + "watermark": 0.21503210067749023, + "image_extraction": 1.460836410522461 }, "1602.06541": { - "read": 0.0582125186920166, - "watermark": 0.03419327735900879, - "image_extraction": 0.08664584159851074 + "read": 0.967684268951416, + "watermark": 0.5418281555175781, + "image_extraction": 0.8519599437713623 }, "2201.00200": { - "read": 0.04317927360534668, - "watermark": 0.0125732421875, - "image_extraction": 0.002869129180908203 + "read": 0.8903443813323975, + "watermark": 0.1604924201965332, + "image_extraction": 0.18567204475402832 }, "2201.00022": { - "read": 0.053061723709106445, - "watermark": 0.029779672622680664, - "image_extraction": 0.3130757808685303 + "read": 0.8163042068481445, + "watermark": 0.610785722732544, + "image_extraction": 1.2961516380310059 }, "2201.00029": { - "read": 0.030493736267089844, - "watermark": 0.010860204696655273, - "image_extraction": 0.22451543807983398 + "read": 0.6088814735412598, + "watermark": 0.07402253150939941, + "image_extraction": 0.26814889907836914 }, "1601.03642": { - "read": 0.026759624481201172, - "watermark": 0.013607501983642578, - "image_extraction": 0.003281831741333008 + "read": 0.35593676567077637, + "watermark": 0.12810969352722168, + "image_extraction": 0.21191930770874023 } }, "tika": { "2201.00214": { - "read": 1.4826128482818604 + "read": 17.824857473373413 }, "GeoTopo-book": { - "read": 0.8197121620178223 + "read": 100.1345567703247 }, "2201.00151": { - "read": 0.5797288417816162 + "read": 0.6399288177490234 }, "1707.09725": { - "read": 0.510779619216919 + "read": 23.35584807395935 }, "2201.00021": { - "read": 0.11818695068359375 + "read": 47.338046073913574 }, "2201.00037": { - "read": 0.2835381031036377 + "read": 48.305400133132935 }, "2201.00069": { - "read": 0.15885210037231445 + "read": 31.513932704925537 }, "2201.00178": { - "read": 0.11153674125671387 + "read": 34.47823882102966 }, "2201.00201": { - "read": 0.10024166107177734 + "read": 0.11622309684753418 }, "1602.06541": { - "read": 0.15640830993652344 + "read": 13.186578750610352 }, "2201.00200": { - "read": 0.07609415054321289 + "read": 0.12495112419128418 }, "2201.00022": { - "read": 0.13745427131652832 + "read": 24.17951250076294 }, "2201.00029": { - "read": 0.058022499084472656 + "read": 0.08729672431945801 }, "1601.03642": { - "read": 0.08746814727783203 + "read": 0.07596778869628906 } } }, "read_quality": { - "pdfplumber": { - "2201.00214": 0.9599753755721704, - "GeoTopo-book": 0.8932105947573745, - "2201.00151": 0.8827261192677179, - "1707.09725": 0.9778392493054645, - "2201.00021": 0.9170581778265642, - "2201.00037": 0.9422577069826292, - "2201.00069": 0.9317511947117907, - "2201.00178": 0.9527037762830358, - "2201.00201": 0.9313577012811591, - "1602.06541": 0.9739415077617999, - "2201.00200": 0.937365010799136, - "2201.00022": 0.7643161565284773, - "2201.00029": 0.9927616243405717, - "1601.03642": 0.981982138212087 - }, - "pypdf": { - "2201.00214": 0.984773043075498, - "GeoTopo-book": 0.9267843483814432, - "2201.00151": 0.9366784193042167, - "1707.09725": 0.9799128437947946, - "2201.00021": 0.9806264058057124, - "2201.00037": 0.959331208757123, - "2201.00069": 0.9668886543437042, - "2201.00178": 0.9825861828182239, - "2201.00201": 0.9865737079024715, - "1602.06541": 0.9879715846375909, - "2201.00200": 0.9839537609635827, - "2201.00022": 0.7783073130649137, - "2201.00029": 0.9798789064888997, - "1601.03642": 0.9927797833935018 - }, "borb": { "2201.00214": 0.7037028842821007, "GeoTopo-book": 0.7910254212656228, @@ -517,26 +543,26 @@ "1601.03642": 0.5295431890832847 }, "pdfium": { - "2201.00214": 0.9932975353472919, - "GeoTopo-book": 0.9656593310168123, - "2201.00151": 0.9371048049607478, - "1707.09725": 0.9869033794742829, - "2201.00021": 0.9825806792373105, - "2201.00037": 0.9617606084193095, - "2201.00069": 0.9894269749096088, - "2201.00178": 0.9849444987879046, - "2201.00201": 0.9860127582372564, - "1602.06541": 0.9919005142642908, - "2201.00200": 0.9836863694438841, - "2201.00022": 0.7771305119401257, - "2201.00029": 0.988813497157528, - "1601.03642": 0.9935736623251659 + "2201.00214": 0.9934491487758218, + "GeoTopo-book": 0.9662662046401999, + "2201.00151": 0.9371906885806807, + "1707.09725": 0.9868031113572143, + "2201.00021": 0.9834949300569906, + "2201.00037": 0.9621590520170431, + "2201.00069": 0.9897771809137501, + "2201.00178": 0.9850943175410412, + "2201.00201": 0.9860800858410964, + "1602.06541": 0.9920750604984221, + "2201.00200": 0.9838848533763788, + "2201.00022": 0.7772343606810399, + "2201.00029": 0.9934588336455487, + "1601.03642": 0.9936929660245063 }, "pdfminer": { "2201.00214": 0.9487280293804596, - "GeoTopo-book": 0.7883238686104862, + "GeoTopo-book": 0.7883106543377503, "2201.00151": 0.8602045202371076, - "1707.09725": 0.9189741613844499, + "1707.09725": 0.9190023540909966, "2201.00021": 0.8588197275011207, "2201.00037": 0.9301479087658201, "2201.00069": 0.9540472289854548, @@ -548,6 +574,22 @@ "2201.00029": 0.975523516322736, "1601.03642": 0.8623963054819123 }, + "pdfplumber": { + "2201.00214": 0.9624093076027349, + "GeoTopo-book": 0.8932082690274208, + "2201.00151": 0.8857353838250874, + "1707.09725": 0.977952891119146, + "2201.00021": 0.9174005666220104, + "2201.00037": 0.9432015121388418, + "2201.00069": 0.9320623652220378, + "2201.00178": 0.9530470165622914, + "2201.00201": 0.9316913879761284, + "1602.06541": 0.9741434157570039, + "2201.00200": 0.9378122018297131, + "2201.00022": 0.7645679514756893, + "2201.00029": 0.9927616243405717, + "1601.03642": 0.982476230133944 + }, "pdfrw": {}, "pdftotext": { "2201.00214": 0.9600762653108389, @@ -565,60 +607,76 @@ "2201.00029": 0.9649219467401285, "1601.03642": 0.7867700010287713 }, + "playa": { + "2201.00214": 0.9761700404077421, + "GeoTopo-book": 0.927598899820742, + "2201.00151": 0.9222810491856283, + "1707.09725": 0.9757090668337609, + "2201.00021": 0.9719382936299716, + "2201.00037": 0.9513322686391528, + "2201.00069": 0.9697201017811705, + "2201.00178": 0.960335879151019, + "2201.00201": 0.9768103792804297, + "1602.06541": 0.9822372862286228, + "2201.00200": 0.9697131992609057, + "2201.00022": 0.7673033675330817, + "2201.00029": 0.0014646649578908821, + "1601.03642": 0.9891916003293989 + }, "pymupdf": { - "2201.00214": 0.9780968228783716, - "GeoTopo-book": 0.9644376202326115, - "2201.00151": 0.9262640520751881, - "1707.09725": 0.9705185650275407, - "2201.00021": 0.9773729808638253, - "2201.00037": 0.9550639423053028, - "2201.00069": 0.9811348240949814, - "2201.00178": 0.9792454038818782, - "2201.00201": 0.9810750465567505, - "1602.06541": 0.9798295776242781, - "2201.00200": 0.9749010314275711, - "2201.00022": 0.7742949731877629, + "2201.00214": 0.9780473882293753, + "GeoTopo-book": 0.957868684569868, + "2201.00151": 0.9261222831606744, + "1707.09725": 0.9700781181218339, + "2201.00021": 0.9771989038544963, + "2201.00037": 0.9543154784114144, + "2201.00069": 0.9856916902090933, + "2201.00178": 0.9783809778252739, + "2201.00201": 0.980766604896128, + "1602.06541": 0.9796117742003992, + "2201.00200": 0.9745551529519525, + "2201.00022": 0.7739993052070868, "2201.00029": 0.9771271181366386, - "1601.03642": 0.988502191286414 + "1601.03642": 0.9884500360936372 + }, + "pypdf": { + "2201.00214": 0.9876438905903474, + "GeoTopo-book": 0.9519678772970627, + "2201.00151": 0.9317966019515546, + "1707.09725": 0.9834021823012359, + "2201.00021": 0.9852542946602353, + "2201.00037": 0.9643816837117355, + "2201.00069": 0.9718067652608781, + "2201.00178": 0.9882277602860344, + "2201.00201": 0.9880860613748353, + "1602.06541": 0.9921834657310404, + "2201.00200": 0.9866232437960919, + "2201.00022": 0.779580889163322, + "2201.00029": 0.9953323774367735, + "1601.03642": 0.9937638509508839 }, "tika": { - "2201.00214": 0.9909526851172147, - "GeoTopo-book": 0.9853733000277547, - "2201.00151": 0.9216634911767934, - "1707.09725": 0.9695899981614268, - "2201.00021": 0.9805897449326979, - "2201.00037": 0.9643852585939919, - "2201.00069": 0.9325631793594185, - "2201.00178": 0.9675184507534486, - "2201.00201": 0.9815221704916128, - "1602.06541": 0.9286278537009243, - "2201.00200": 0.9774111438357077, - "2201.00022": 0.7298005166475316, - "2201.00029": 0.9827089337175793, - "1601.03642": 0.9550548853743231 + "2201.00214": 0.9905843784546182, + "GeoTopo-book": 0.00047713271306082383, + "2201.00151": 0.9216462958343726, + "1707.09725": 0.9520265054911324, + "2201.00021": 0.770705041657062, + "2201.00037": 0.8572065203619317, + "2201.00069": 0.8050052291240113, + "2201.00178": 0.8168859839727433, + "2201.00201": 0.981721720946443, + "1602.06541": 0.8827184830564161, + "2201.00200": 0.9774490203918432, + "2201.00022": 0.6698799418093457, + "2201.00029": 0.9828859664925239, + "1601.03642": 0.9551993153165015 } }, "watermarking_result_file_size": { - "pdfplumber": {}, - "pypdf": { - "2201.00214": 2511916.0, - "GeoTopo-book": 5732063.0, - "2201.00151": 1575105.0, - "1707.09725": 7273085.0, - "2201.00021": 2727836.0, - "2201.00037": 3113158.0, - "2201.00069": 15399764.0, - "2201.00178": 2398354.0, - "2201.00201": 1327004.0, - "1602.06541": 3024013.0, - "2201.00200": 285365.0, - "2201.00022": 1211522.0, - "2201.00029": 830633.0, - "1601.03642": 1021439.0 - }, "borb": {}, "pdfium": {}, "pdfminer": {}, + "pdfplumber": {}, "pdfrw": { "2201.00214": 2515466.0, "GeoTopo-book": 5738184.0, @@ -636,21 +694,38 @@ "1601.03642": 1026759.0 }, "pdftotext": {}, + "playa": {}, "pymupdf": { "2201.00214": 2716298.0, - "GeoTopo-book": 6838694.0, + "GeoTopo-book": 6857999.0, "2201.00151": 1682101.0, - "1707.09725": 8524289.0, - "2201.00021": 2802599.0, - "2201.00037": 3395981.0, - "2201.00069": 15520607.0, - "2201.00178": 2518436.0, - "2201.00201": 1400680.0, - "1602.06541": 3163793.0, - "2201.00200": 341709.0, - "2201.00022": 1299852.0, + "1707.09725": 8546399.0, + "2201.00021": 2804209.0, + "2201.00037": 3401294.0, + "2201.00069": 15523022.0, + "2201.00178": 2521012.0, + "2201.00201": 1402129.0, + "1602.06541": 3166433.0, + "2201.00200": 342826.0, + "2201.00022": 1302162.0, "2201.00029": 935908.0, - "1601.03642": 1091306.0 + "1601.03642": 1092594.0 + }, + "pypdf": { + "2201.00214": 2503373.0, + "GeoTopo-book": 5642629.0, + "2201.00151": 1568954.0, + "1707.09725": 7197331.0, + "2201.00021": 2713512.0, + "2201.00037": 3094398.0, + "2201.00069": 15384192.0, + "2201.00178": 2365349.0, + "2201.00201": 1315587.0, + "1602.06541": 3000282.0, + "2201.00200": 278934.0, + "2201.00022": 1170781.0, + "2201.00029": 830154.0, + "1601.03642": 1014378.0 }, "tika": {} } diff --git a/read/results/pdfium/1601.03642.txt b/read/results/pdfium/1601.03642.txt index 706658e..631c030 100644 --- a/read/results/pdfium/1601.03642.txt +++ b/read/results/pdfium/1601.03642.txt @@ -1,592 +1,592 @@ -1 -Creativity in Machine Learning -Martin Thoma -E-Mail: info@martin-thoma.de -Abstract—Recent machine learning techniques can be modified -to produce creative results. Those results did not exist before; it -is not a trivial combination of the data which was fed into the -machine learning system. The obtained results come in multiple -forms: As images, as text and as audio. -This paper gives a high level overview of how they are created -and gives some examples. It is meant to be a summary of the -current work and give people who are new to machine learning -some starting points. -I. INTRODUCTION -According to [Gad06] creativity is “the ability to use your -imagination to produce new ideas, make things etc.” and -imagination is “the ability to form pictures or ideas in your -mind”. -Recent advances in machine learning produce results which the -author would intuitively call creative. A high-level overview -over several of those algorithms are described in the following. -This paper is structured as follows: Section II introduces the -reader on a very simple and superficial level to machine -learning, Section III gives examples of creativity with images, -Section IV gives examples of machines producing textual -content, and Section V gives examples of machine learning -and music. A discussion follows in Section VI. -II. BASICS OF MACHINE LEARNING -The traditional approach of solving problems with software -is to program machines to do so. The task is divided in as -simple sub-tasks as possible, the subtasks are analyzed and the -machine is instructed to process the input with human-designed -algorithms to produce the desired output. However, for some -tasks like object recognition this approach is not feasible. There -are way to many different objects, different lighting situations, -variations in rotation and the arrangement of a scene for a -human to think of all of them and model them. But with the -internet, cheap computers, cameras, crowd-sourcing platforms -like Wikipedia and lots of Websites, services like Amazon -Mechanical Turk and several other changes in the past decades -a lot of data has become available. The idea of machine learning -is to make use of this data. -A formal definition of the field of Machine Learning is given -by Tom Mitchel [Mit97]: -A computer program is said to learn from experi￾ence E with respect to some class of tasks T and -performance measure P, if its performance at tasks -in T, as measured by P, improves with experience E. -Σ ϕ -x0 -x1 -x2 -x3 -xn -w0 -w1 -w2 -w3 -wn -. -. -. -(a) Example of an artificial neuron unit. -xi are the input signals and wi are -weights which have to get learned. -Each input signal gets multiplied -with its weight, everything gets -summed up and the activation func￾tion ϕ is applied. -(b) A visualization of a simple feed￾forward neural network. The 5 in￾put nodes are red, the 2 bias nodes -are gray, the 3 hidden units are -green and the single output node -is blue. -Fig. 1: Neural networks are based on simple units which get -combined to complex networks. -This means that machine learning programs adjust internal -parameters to fit the data they are given. Those computer -programs are still developed by software developers, but the -developer writes them in a way which makes it possible to -adjust them without having to re-program everything. Machine -learning programs should generally improve when they are fed -with more data. -The field of machine learning is related to statistics. Some -algorithms directly try to find models which are based on well￾known distribution assumptions of the developer, others are -more general. -A common misunderstanding of people who are not related -in this field is that the developers don’t understand what their -machine learning program is doing. It is understood very well -in the sense that the developer, given only a pen, lots of paper -and a calculator could calculate the same result as the machine -does when he gets the same data. And lots of time, of course. It -is not understood in the sense that it is hard to make predictions -how the algorithm behaves without actually trying it. However, -this is similar to expecting from an electrical engineer to -explain how a computer works. The electrical engineer could -probably get the knowledge he needs to do so, but the amount -of time required to understand such a complex system from -basic building blocks is a time-intensive and difficult task. -An important group of machine learning algorithms was -inspired by biological neurons and are thus called artificial -neural networks. Those networks are based on mathematical -functions called artificial neurons which take n ∈ N num￾bers x1, . . . , xn ∈ R as input, multiply them with weights -w1, . . . , wn ∈ R, add them and apply a so called activation -function ϕ as visualized in Figure 1(a). One example of such -an activation function is the sigmoid function ϕ(x) = 1 -1+e−x . -Those functions act as building blocks for more complex -systems as they can be chained and grouped in layers as -visualized in Figure 1(b). The interesting question is how -the parameters wi are learned. This is usually done by an -optimization technique called gradient descent. The gradient -descent algorithm takes a function which has to be derivable, -starts at any point of the surface of this error function and + +Creativity in Machine Learning +Martin Thoma +E-Mail: info@martin-thoma.de +Abstract—Recent machine learning techniques can be modified +to produce creative results. Those results did not exist before; it +is not a trivial combination of the data which was fed into the +machine learning system. The obtained results come in multiple +forms: As images, as text and as audio. +This paper gives a high level overview of how they are created +and gives some examples. It is meant to be a summary of the +current work and give people who are new to machine learning +some starting points. +I. INTRODUCTION +According to [Gad06] creativity is “the ability to use your +imagination to produce new ideas, make things etc.” and +imagination is “the ability to form pictures or ideas in your +mind”. +Recent advances in machine learning produce results which the +author would intuitively call creative. A high-level overview +over several of those algorithms are described in the following. +This paper is structured as follows: Section II introduces the +reader on a very simple and superficial level to machine +learning, Section III gives examples of creativity with images, +Section IV gives examples of machines producing textual +content, and Section V gives examples of machine learning +and music. A discussion follows in Section VI. +II. BASICS OF MACHINE LEARNING +The traditional approach of solving problems with software +is to program machines to do so. The task is divided in as +simple sub-tasks as possible, the subtasks are analyzed and the +machine is instructed to process the input with human-designed +algorithms to produce the desired output. However, for some +tasks like object recognition this approach is not feasible. There +are way to many different objects, different lighting situations, +variations in rotation and the arrangement of a scene for a +human to think of all of them and model them. But with the +internet, cheap computers, cameras, crowd-sourcing platforms +like Wikipedia and lots of Websites, services like Amazon +Mechanical Turk and several other changes in the past decades +a lot of data has become available. The idea of machine learning +is to make use of this data. +A formal definition of the field of Machine Learning is given +by Tom Mitchel [Mit97]: +A computer program is said to learn from experience E with respect to some class of tasks T and +performance measure P, if its performance at tasks +in T, as measured by P, improves with experience E. +Σ ϕ +x0 +x1 +x2 +x3 +xn +w0 +w1 +w2 +w3 +wn +. +. +. +(a) Example of an artificial neuron unit. +xi are the input signals and wi are +weights which have to get learned. +Each input signal gets multiplied +with its weight, everything gets +summed up and the activation function ϕ is applied. +(b) A visualization of a simple feedforward neural network. The 5 input nodes are red, the 2 bias nodes +are gray, the 3 hidden units are +green and the single output node +is blue. +Fig. 1: Neural networks are based on simple units which get +combined to complex networks. +This means that machine learning programs adjust internal +parameters to fit the data they are given. Those computer +programs are still developed by software developers, but the +developer writes them in a way which makes it possible to +adjust them without having to re-program everything. Machine +learning programs should generally improve when they are fed +with more data. +The field of machine learning is related to statistics. Some +algorithms directly try to find models which are based on wellknown distribution assumptions of the developer, others are +more general. +A common misunderstanding of people who are not related +in this field is that the developers don’t understand what their +machine learning program is doing. It is understood very well +in the sense that the developer, given only a pen, lots of paper +and a calculator could calculate the same result as the machine +does when he gets the same data. And lots of time, of course. It +is not understood in the sense that it is hard to make predictions +how the algorithm behaves without actually trying it. However, +this is similar to expecting from an electrical engineer to +explain how a computer works. The electrical engineer could +probably get the knowledge he needs to do so, but the amount +of time required to understand such a complex system from +basic building blocks is a time-intensive and difficult task. +An important group of machine learning algorithms was +inspired by biological neurons and are thus called artificial +neural networks. Those networks are based on mathematical +functions called artificial neurons which take n ∈ N numbers x1, . . . , xn ∈ R as input, multiply them with weights +w1, . . . , wn ∈ R, add them and apply a so called activation +function ϕ as visualized in Figure 1(a). One example of such +an activation function is the sigmoid function ϕ(x) = 1 +1+e−x . +Those functions act as building blocks for more complex +systems as they can be chained and grouped in layers as +visualized in Figure 1(b). The interesting question is how +the parameters wi are learned. This is usually done by an +optimization technique called gradient descent. The gradient +descent algorithm takes a function which has to be derivable, +starts at any point of the surface of this error function and arXiv:1601.03642v1 [cs.CV] 12 Jan 2016 -2 -makes a step in the direction which goes downwards. Hence -it tries to find a minimum of this high-dimensional function. -There is, of course, a lot more to say about machine learning. -The interested reader might want to read the introduction given -by Mitchell [Mit97]. -III. IMAGE DATA -Applying a simple neural network on image data directly can -work, but the number of parameters gets extraordinary large. -One would take one neuron per pixel and channel. This means -for 500 px×500 px RGB images one would get 750,000 input -signals. To approach this problem, so called Convolutional -Neural Networks (CNNs) were introduced. Instead of learning -the full connection between the input layer and the first -hidden layer, those networks make use of convolution layers. -Convolution layers learn a convolution; this means they learn -the weights of an image filter. An additional advantage is that -CNNs make use of spacial relationships of the pixels instead -of flattening the image to a stream of single numbers. -An excellent introduction into CNNs is given by [Nie15]. -A. Google DeepDream -The gradient descent algorithm which optimizes most of the -parameters in neural networks is well-understood. However, the -effect it has on the recognition system is difficult to estimate. -[MOT15] proposes a technique to analyze the weights learned -by such a network. A similar idea was applied by [VKMT13]. -For example, consider a neural network which was trained to -recognize various images like bananas. This technique turns -the network upside down and starts with random noise. To -analyze what the network considers bananas to look like, the -random noise image is gradually tweaked so that it generates -the output “banana”. Additionally, the changes can be restricted -in a way that the statistics of the input image have to be similar -to natural images. One example of this is that neighboring -pixels are correlated. -Another technique is to amplify the output of layers. This was -described in [MOT15]: -We ask the network: “Whatever you see there, I want -more of it!” This creates a feedback loop: if a cloud -looks a little bit like a bird, the network will make -it look more like a bird. This in turn will make the -network recognize the bird even more strongly on -the next pass and so forth, until a highly detailed -bird appears, seemingly out of nowhere. -The name “Inceptionism” in the title of [MOT15] comes from -the science-fiction movie “Inception” (2010). One reason it -might be chosen is because neural networks are structured -in layers. Recent publications tend to have more and more -layers [HZRS15]. The used jargon is to say they get “deeper”. -As this technique as published by Google engineers, the -technique is called Google DeepDream. -Fig. 2: Aurelia aurita -Fig. 3: DeepDream impression of Aurelia aurita -It has become famous in the internet [Red]. Usually, the images -are generated in iterations and in each iteration it is zoomed -into the image. -Images and videos published by the Google engineers can be -seen at [goo15]. Figure 2 shows the original image from which -Figure 3 was created with the deep dream algorithm. -B. Artistic Style Imitation -A key idea of neural networks is that they learn different -representations of the data in each layer. In the case of -CNNs, this can easily be visualized as it was done in various -papers [ZF14]. Usually, one finds that the network learned -to build edge detectors in the first layer and more complex -structures in the upper layers. -Gatys, Ecker and Bethge showed in [GEB15] that with a clever -choice of features it is possible to separate the general style of -an image in terms of local image appearance from the content -of an image. They support their claim by applying the style of + +makes a step in the direction which goes downwards. Hence +it tries to find a minimum of this high-dimensional function. +There is, of course, a lot more to say about machine learning. +The interested reader might want to read the introduction given +by Mitchell [Mit97]. +III. IMAGE DATA +Applying a simple neural network on image data directly can +work, but the number of parameters gets extraordinary large. +One would take one neuron per pixel and channel. This means +for 500 px×500 px RGB images one would get 750,000 input +signals. To approach this problem, so called Convolutional +Neural Networks (CNNs) were introduced. Instead of learning +the full connection between the input layer and the first +hidden layer, those networks make use of convolution layers. +Convolution layers learn a convolution; this means they learn +the weights of an image filter. An additional advantage is that +CNNs make use of spacial relationships of the pixels instead +of flattening the image to a stream of single numbers. +An excellent introduction into CNNs is given by [Nie15]. +A. Google DeepDream +The gradient descent algorithm which optimizes most of the +parameters in neural networks is well-understood. However, the +effect it has on the recognition system is difficult to estimate. +[MOT15] proposes a technique to analyze the weights learned +by such a network. A similar idea was applied by [VKMT13]. +For example, consider a neural network which was trained to +recognize various images like bananas. This technique turns +the network upside down and starts with random noise. To +analyze what the network considers bananas to look like, the +random noise image is gradually tweaked so that it generates +the output “banana”. Additionally, the changes can be restricted +in a way that the statistics of the input image have to be similar +to natural images. One example of this is that neighboring +pixels are correlated. +Another technique is to amplify the output of layers. This was +described in [MOT15]: +We ask the network: “Whatever you see there, I want +more of it!” This creates a feedback loop: if a cloud +looks a little bit like a bird, the network will make +it look more like a bird. This in turn will make the +network recognize the bird even more strongly on +the next pass and so forth, until a highly detailed +bird appears, seemingly out of nowhere. +The name “Inceptionism” in the title of [MOT15] comes from +the science-fiction movie “Inception” (2010). One reason it +might be chosen is because neural networks are structured +in layers. Recent publications tend to have more and more +layers [HZRS15]. The used jargon is to say they get “deeper”. +As this technique as published by Google engineers, the +technique is called Google DeepDream. +Fig. 2: Aurelia aurita +Fig. 3: DeepDream impression of Aurelia aurita +It has become famous in the internet [Red]. Usually, the images +are generated in iterations and in each iteration it is zoomed +into the image. +Images and videos published by the Google engineers can be +seen at [goo15]. Figure 2 shows the original image from which +Figure 3 was created with the deep dream algorithm. +B. Artistic Style Imitation +A key idea of neural networks is that they learn different +representations of the data in each layer. In the case of +CNNs, this can easily be visualized as it was done in various +papers [ZF14]. Usually, one finds that the network learned +to build edge detectors in the first layer and more complex +structures in the upper layers. +Gatys, Ecker and Bethge showed in [GEB15] that with a clever +choice of features it is possible to separate the general style of +an image in terms of local image appearance from the content +of an image. They support their claim by applying the style of different artists to an arbitrary image of their choice. -3 -(a) Original Image (b) Style image -(c) The artistic style of Van Gogh’s “Starry Night” applied to the photograph -of a Scottish Highland Cattle. -Fig. 4: The algorithm takes both, the original image and the -style image to produce the result. -This artistic style imitation can be seen itself as creative work. -An example is given by Figure 4. The code which created this -example is available under [Joh16]. -Something similar was done by [SPB+14], where the style of -a portrait photograph was transferred to another photograph. -A demo can be seen on [Shi14]. -C. Drawing Robots -Patrick Tresset and Frdric Fol Leymarie created a system called -AIKON (Automatic IKONic drawing) which can automatically -generated sketches for portraits [TL05]. AIKON takes a digital -photograph, detects faces on them and sketches them with a -pen-plotter. -Tresset and Leymaire use k-means clustering [KMN+02] to -segment regions of the photograph with similar color which, -in turn, will get a similar shading. -Such a drawing robot could apply machine learning techniques -known from computer vision for detecting the human. It -could apply self-learning techniques to draw results most -similar to the artists impression of the image. However, the -system described in [TL05] seems not to be a machine -learning computer program according to the definition by Tom -Mitchell [Mit97]. -IV. TEXT DATA -Digital text is the first form of natural communication which -involved computers. It is used in the form of chats, websites, -on collaborative projects like Wikipedia, in scientific literature. -Of course, it was used in pre-digital times, too: In newspaper, -in novels, in dramas, in religious texts like the bible, in books -for education, in notes from conversations. -This list could be continued and most of these kinds of texts -are now available in digital form. This digital form can be -used to teach machines to generate similar texts. -The most simple language model which is of use is an n-gram -model. This model makes use of sequences of the length n to -model language. It can be used to get the probability of a third -word, given the previous two words. This way, a complete text -can be generated word by word. Refinements and extensions -to this model are discussed in the field of Natural Language -Processing (NLP). -However, there are much more sophisticated models. One -of those are character predictors based on Recurrent Neural -Networks (RNNs). Those character predictors take a sequence -of characters as input and predict the next character. In that -sense they are similar to the n-gram model, but operate on -a lower level. Using such a predictor, one can generate texts -character by character. If the model is good, the text can have -the correct punctuation. This would not be possible with a -word predictor. -Character predictors can be implemented with RNNs. In con￾trast to standard feed-forward neural networks like multilayer -Perceptrons (MLPs) which was shown in Figure 1(b), those -networks are trained to take their output at some point as well as -the normal input. This means they can keep some information -over time. One of the most common variant to implement -RNNs is by using so called Long short-term memory (LSTM) -cells [HS97]. -Recurrent networks apply two main ideas in order to learn: The -first is called unrolling and means that an recurrent network -is imagined to be an infinite network over time. At each time -step the recurrent neurons get duplicated. The second idea is -weight sharing which means that those unrolled neurons share -the same weight. -A. Similar Texts Generation -Karpathy trained multiple character RNNs on different datasets -and gave an excellent introduction [Kar15b]. He trained it on -Paul Graham’s essays, all the works of Shakespeare, the Hutter -Prize [hut] 100 MB dataset of raw Wikipedia articles, the raw -LATEX source file of a book about algebraic stacks and geometry -and Linux C code. -With that training data, the models can generate similar texts. -New works which look like Shakespeare plays, new Wikipedia -articles, new Linux code and new papers about algebraic + +(a) Original Image (b) Style image +(c) The artistic style of Van Gogh’s “Starry Night” applied to the photograph +of a Scottish Highland Cattle. +Fig. 4: The algorithm takes both, the original image and the +style image to produce the result. +This artistic style imitation can be seen itself as creative work. +An example is given by Figure 4. The code which created this +example is available under [Joh16]. +Something similar was done by [SPB+14], where the style of +a portrait photograph was transferred to another photograph. +A demo can be seen on [Shi14]. +C. Drawing Robots +Patrick Tresset and Frdric Fol Leymarie created a system called +AIKON (Automatic IKONic drawing) which can automatically +generated sketches for portraits [TL05]. AIKON takes a digital +photograph, detects faces on them and sketches them with a +pen-plotter. +Tresset and Leymaire use k-means clustering [KMN+02] to +segment regions of the photograph with similar color which, +in turn, will get a similar shading. +Such a drawing robot could apply machine learning techniques +known from computer vision for detecting the human. It +could apply self-learning techniques to draw results most +similar to the artists impression of the image. However, the +system described in [TL05] seems not to be a machine +learning computer program according to the definition by Tom +Mitchell [Mit97]. +IV. TEXT DATA +Digital text is the first form of natural communication which +involved computers. It is used in the form of chats, websites, +on collaborative projects like Wikipedia, in scientific literature. +Of course, it was used in pre-digital times, too: In newspaper, +in novels, in dramas, in religious texts like the bible, in books +for education, in notes from conversations. +This list could be continued and most of these kinds of texts +are now available in digital form. This digital form can be +used to teach machines to generate similar texts. +The most simple language model which is of use is an n-gram +model. This model makes use of sequences of the length n to +model language. It can be used to get the probability of a third +word, given the previous two words. This way, a complete text +can be generated word by word. Refinements and extensions +to this model are discussed in the field of Natural Language +Processing (NLP). +However, there are much more sophisticated models. One +of those are character predictors based on Recurrent Neural +Networks (RNNs). Those character predictors take a sequence +of characters as input and predict the next character. In that +sense they are similar to the n-gram model, but operate on +a lower level. Using such a predictor, one can generate texts +character by character. If the model is good, the text can have +the correct punctuation. This would not be possible with a +word predictor. +Character predictors can be implemented with RNNs. In contrast to standard feed-forward neural networks like multilayer +Perceptrons (MLPs) which was shown in Figure 1(b), those +networks are trained to take their output at some point as well as +the normal input. This means they can keep some information +over time. One of the most common variant to implement +RNNs is by using so called Long short-term memory (LSTM) +cells [HS97]. +Recurrent networks apply two main ideas in order to learn: The +first is called unrolling and means that an recurrent network +is imagined to be an infinite network over time. At each time +step the recurrent neurons get duplicated. The second idea is +weight sharing which means that those unrolled neurons share +the same weight. +A. Similar Texts Generation +Karpathy trained multiple character RNNs on different datasets +and gave an excellent introduction [Kar15b]. He trained it on +Paul Graham’s essays, all the works of Shakespeare, the Hutter +Prize [hut] 100 MB dataset of raw Wikipedia articles, the raw +LATEX source file of a book about algebraic stacks and geometry +and Linux C code. +With that training data, the models can generate similar texts. +New works which look like Shakespeare plays, new Wikipedia +articles, new Linux code and new papers about algebraic geometry can thus automatically be generated. At a first -4 -glance, they do look authentic. The syntax was mostly used -correctly, the formatting looks as expected, the sentences are -grammatically correct. However, when one looks at the broader -context it is easy to recognize that the algorithm has no insight -in what it is doing. It does match patterns really well, but it -fails to follow a central theme. In the context of C code this -means that new variables are introduced, but not used. At the -same time, variables which were not declared are used. In -the context of Shakespear plays this means that a lot of new -characters are introduced, but they don’t speak with each other -or about each other. -The code used to generate these examples is available and -ready to use through [Kar15a]. A couple of examples are -in Section A. -B. Chatbots -Chatbots are computer programs which participate in chat -rooms as autonomous agents. This means they have similar -permissions and possibilities as usual human users have, but -users can trigger a set of commands to make the bot give them -valuable information or features. -A special category of chatbots are such bots which actively -participate in the conversation, which is usually not the case. -One of the earliest programs in this category is ELIZA, a bot -created by Joseph Weizenbaum in the 1960s [Wei76]. This -program had a set of patterns implemented to which it would -reply in a seemingly smart way in a psychologists fashion. -This means quite often the program would simply repeat the -last sentence and add something meaningless like “How do -you feel about it?”. According to [Cur14], Weizenbaum once -found his secretary — who was aware of the fact that this is -a computer program — chatting with the machine. When he -looked over her shoulder, she asked him “would you mind -leaving the room”. -Today, much more sophisticated chatbots exist. They make use -of the vast amount of data which is available by millions of -Wikipedia articles, chat protocols, websites, help desk protocols, -subtitles of movies as well as the astonishing increase in -computing power to train RNNs and language models similar -to the ones described before. -Interesting results like the following were obtained by [VL15]: -Human: what is the purpose of life ? -Machine: to serve the greater good . -Human: what is the purpose of living ? -Machine: to live forever . -V. AUDIO DATA -Common machine learning tasks which involve audio data -are speech recognition, speaker identification, identification of -songs. This leads to some less-common, but interesting topics: -The composition of music, the synthesizing of audio as art. -While the composition might be considered in Section IV, -we will now investigate the work which was done in audio -synthesization. -A. Emily Howell -David Cope created a project called “Experiments in Musical -Intelligence” (short: EMI or Emmy) in 1984 [Cop87]. He -introduces the idea of seeing music as a language which -can be analyzed with natural language processing (NLP) -methods. Cope mentions that EMI was more useful to him, -when he used the system to “create small phrase-size textures -as next possibilities using its syntactic dictionary and rule -base” [Cop87]. -In 2003, Cope started a new project which was based on EMI: -Emily Howell [Cop13]. This program is able to “creat[e] both -highly authentic replications and novel music compositions”. -The reader might want to listen to [Cop12] to get an impression -of the beauty of the created music. -According to Cope, an essential part of music is “a set of -instructions for creating different, but highly related self￾replications”. Emmy was programmed to find this set of -instructions. It tries to find the “signature” of a composer, -which Cope describes as “contiguous patterns that recur in two -or more works of the composer”. -The new feature of Emily Howell compared to Emmy is that -Emily Howell does not necessarily remain in a single, already -known style. -Emily Howell makes use of association network. Cope empha￾sizes that this is not a form of a neural network. However, it -is not clear from [Cop13] how exactly an association network -is trained. Cope mentions that Emily Howell is explained in -detail in [Cop05]. -B. GRUV -Recurrent neural networks — LSTM networks, to be exact -— are used in [NV15] together with Gated Recurrent Units -(GRU) to build a network which can be trained to generate -music. Instead of taking notes directly or MIDI files, Nayebi -and Vitelli took raw audio waveforms as input. Those audio -waveforms are feature vectors given for time steps 0, 1, . . . , t− -1, t. The network is given those feature vectors X1, . . . , Xt -and has to predict the following feature vector Xt+1. This -means it continues the music. As the input is continuous, the -problem was modeled as a regression task. Discrete Fourier -Transformation (DFT) was used on chunks of length N of the -music to obtain features in the frequency domain. -An implementation can be found at [VN15] and a demonstration -can be found at [Vit15]. -C. Audio Synthesization -Audio synthesization is generating new audio files. This can + +glance, they do look authentic. The syntax was mostly used +correctly, the formatting looks as expected, the sentences are +grammatically correct. However, when one looks at the broader +context it is easy to recognize that the algorithm has no insight +in what it is doing. It does match patterns really well, but it +fails to follow a central theme. In the context of C code this +means that new variables are introduced, but not used. At the +same time, variables which were not declared are used. In +the context of Shakespear plays this means that a lot of new +characters are introduced, but they don’t speak with each other +or about each other. +The code used to generate these examples is available and +ready to use through [Kar15a]. A couple of examples are +in Section A. +B. Chatbots +Chatbots are computer programs which participate in chat +rooms as autonomous agents. This means they have similar +permissions and possibilities as usual human users have, but +users can trigger a set of commands to make the bot give them +valuable information or features. +A special category of chatbots are such bots which actively +participate in the conversation, which is usually not the case. +One of the earliest programs in this category is ELIZA, a bot +created by Joseph Weizenbaum in the 1960s [Wei76]. This +program had a set of patterns implemented to which it would +reply in a seemingly smart way in a psychologists fashion. +This means quite often the program would simply repeat the +last sentence and add something meaningless like “How do +you feel about it?”. According to [Cur14], Weizenbaum once +found his secretary — who was aware of the fact that this is +a computer program — chatting with the machine. When he +looked over her shoulder, she asked him “would you mind +leaving the room”. +Today, much more sophisticated chatbots exist. They make use +of the vast amount of data which is available by millions of +Wikipedia articles, chat protocols, websites, help desk protocols, +subtitles of movies as well as the astonishing increase in +computing power to train RNNs and language models similar +to the ones described before. +Interesting results like the following were obtained by [VL15]: +Human: what is the purpose of life ? +Machine: to serve the greater good . +Human: what is the purpose of living ? +Machine: to live forever . +V. AUDIO DATA +Common machine learning tasks which involve audio data +are speech recognition, speaker identification, identification of +songs. This leads to some less-common, but interesting topics: +The composition of music, the synthesizing of audio as art. +While the composition might be considered in Section IV, +we will now investigate the work which was done in audio +synthesization. +A. Emily Howell +David Cope created a project called “Experiments in Musical +Intelligence” (short: EMI or Emmy) in 1984 [Cop87]. He +introduces the idea of seeing music as a language which +can be analyzed with natural language processing (NLP) +methods. Cope mentions that EMI was more useful to him, +when he used the system to “create small phrase-size textures +as next possibilities using its syntactic dictionary and rule +base” [Cop87]. +In 2003, Cope started a new project which was based on EMI: +Emily Howell [Cop13]. This program is able to “creat[e] both +highly authentic replications and novel music compositions”. +The reader might want to listen to [Cop12] to get an impression +of the beauty of the created music. +According to Cope, an essential part of music is “a set of +instructions for creating different, but highly related selfreplications”. Emmy was programmed to find this set of +instructions. It tries to find the “signature” of a composer, +which Cope describes as “contiguous patterns that recur in two +or more works of the composer”. +The new feature of Emily Howell compared to Emmy is that +Emily Howell does not necessarily remain in a single, already +known style. +Emily Howell makes use of association network. Cope emphasizes that this is not a form of a neural network. However, it +is not clear from [Cop13] how exactly an association network +is trained. Cope mentions that Emily Howell is explained in +detail in [Cop05]. +B. GRUV +Recurrent neural networks — LSTM networks, to be exact +— are used in [NV15] together with Gated Recurrent Units +(GRU) to build a network which can be trained to generate +music. Instead of taking notes directly or MIDI files, Nayebi +and Vitelli took raw audio waveforms as input. Those audio +waveforms are feature vectors given for time steps 0, 1, . . . , t− +1, t. The network is given those feature vectors X1, . . . , Xt +and has to predict the following feature vector Xt+1. This +means it continues the music. As the input is continuous, the +problem was modeled as a regression task. Discrete Fourier +Transformation (DFT) was used on chunks of length N of the +music to obtain features in the frequency domain. +An implementation can be found at [VN15] and a demonstration +can be found at [Vit15]. +C. Audio Synthesization +Audio synthesization is generating new audio files. This can either be music or speech. With the techniques described before, -5 -neural networks can be trained to generate music note by note. -However, it is desirable to allow multiple notes being played -at the same time. -This idea and some others were applied by Daniel Johnson. He -wrote a very good introduction into neural networks for music -composition which explains those ideas [Joh15b]. Example -compositions are available there, too. He also made the code for -his Biaxial Recurrent Neural Network available under [Joh15a]. -VI. DISCUSSION -What does these examples mean for our understanding of -creativity? Does it influence how much we value art? Could -we define art and creativity better after having those and similar -results? -I think we might readjust our understanding of creativity just -like we adjusted our understanding of algorithmically hard -problems after Deep Blue won against the reigning world -chess champion Garry Kasparov in 1997. -However, by now it is obvious that machine learning algorithms -cannot compete with human artists. Today’s state of the art -algorithms which are purely based on machine learning don’t -follow a central theme. They lack the ability to plan. Although -clever algorithms were implemented for composing music, it -seems as if there is still a lot of supervision involved. -REFERENCES -[Cop87] D. Cope, “Experiments in music intelligence (emi),” 1987. -[Online]. Available: http://hdl.handle.net/2027/spo.bbp2372.1987. -025 -[Cop05] ——, Computer models of musical creativity. MIT Press -Cambridge, 2005. -[Cop12] ——, “Emily howell fugue,” YouTube, Oct. 2012. [Online]. -Available: https://www.youtube.com/watch?v=jLR- c uCwI -[Cop13] ——, “The well-programmed clavier: Style in computer music -composition,” XRDS: Crossroads, The ACM Magazine for -Students, vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available: -http://dl.acm.org/citation.cfm?id=2460444 -[Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [On￾line]. Available: http://www.bbc.co.uk/blogs/adamcurtis/entries/ -78691781-c9b7-30a0-9a0a-3ff76e8bfe58 -[Gad06] A. Gadsby, Ed., Dictionary of Contemporary English. Pearson -Education Limited, 2006. -[GEB15] L. A. Gatys, A. S. Ecker, and M. Bethge, “A neural algorithm of -artistic style,” arXiv preprint arXiv:1508.06576, 2015. [Online]. -Available: http://arxiv.org/abs/1508.06576 -[goo15] “Inceptionism: Going deeper into neural networks,” Google -Photos, Jun. 2015. [Online]. Available: https://goo.gl/Bydofw -[HS97] S. Hochreiter and J. Schmidhuber, “Long short-term memory,” -Neural computation, vol. 9, no. 8, pp. 1735–1780, 1997. -[Online]. Available: http://ieeexplore.ieee.org/xpl/freeabs all.jsp? -arnumber=6795963 -[hut] “50’000 euro prize for compressing human knowledge.” [Online]. -Available: http://prize.hutter1.net/ -[HZRS15] K. He, X. Zhang, S. Ren, and J. Sun, “Deep residual learning -for image recognition,” arXiv preprint arXiv:1512.03385, 2015. -[Online]. Available: http://arxiv.org/abs/1512.03385 -[Joh15a] D. Johnson, “Biaxial recurrent neural network for music -composition,” GitHub, Aug. 2015. [Online]. Available: https: -//github.com/hexahedria/biaxial-rnn-music-composition -[Joh15b] ——, “Composing music with recurrent neu￾ral networks,” Personal Blog, Aug. 2015. [On￾line]. Available: http://www.hexahedria.com/2015/08/03/ -composing-music-with-recurrent-neural-networks/ -[Joh16] J. Johnson, “neural-style,” GitHub, Jan. 2016. [Online]. Available: -https://github.com/jcjohnson/neural-style -[Kar15a] A. Karpathy, “char-rnn,” GitHub, Nov. 2015. [Online]. Available: -https://github.com/karpathy/char-rnn -[Kar15b] ——, “The unreasonable effectiveness of recurrent neural -networks,” Personal Blog, May 2015. [Online]. Available: -http://karpathy.github.io/2015/05/21/rnn-effectiveness/ -[KMN+02] T. Kanungo, D. Mount, N. Netanyahu, C. Piatko, R. Silverman, -and A. Wu, “An efficient k-means clustering algorithm: analysis -and implementation,” Pattern Analysis and Machine Intelligence, -IEEE Transactions on, vol. 24, no. 7, pp. 881–892, Jul 2002. -[Mit97] T. M. Mitchell, Machine learning, ser. McGraw Hill series in -computer science. McGraw-Hill, 1997. -[MOT15] A. Mordvintsev, C. Olah, and M. Tyka, “Inceptionism: Going -deeper into neural networks,” googleresearch.blogspot.co.uk, -Jun. 2015. [Online]. Available: http://googleresearch.blogspot.de/ -2015/06/inceptionism-going-deeper-into-neural.html -[Nie15] M. A. Nielsen, Neural Networks and Deep Learn￾ing. Determination Press, 2015. [Online]. Avail￾able: http://neuralnetworksanddeeplearning.com/chap6.html# -introducing convolutional networks -[NV15] A. Nayebi and M. Vitelli, “GRUV: Algorithmic music generation -using recurrent neural networks,” 2015. [Online]. Available: -http://cs224d.stanford.edu/reports/NayebiAran.pdf -[Red] “Deepdream,” Reddit. [Online]. Available: https://www.reddit. -com/r/deepdream/ -[Shi14] Y. Shih, “Style transfer for headshot portraits,” YouTube, Jun. -2014. [Online]. Available: https://www.youtube.com/watch?v= -Hj5lGFzlubU -[SPB+14] Y. Shih, S. Paris, C. Barnes, W. T. Freeman, and F. Durand, -“Style transfer for headshot portraits,” ACM Transactions on -Graphics (TOG), vol. 33, no. 4, p. 148, 2014. [Online]. Available: -http://dl.acm.org/citation.cfm?id=2601137 -[TL05] P. Tresset and F. F. Leymarie, “Generative portrait sketching,” in -Proceedings of VSMM, 2005, pp. 739–748. -[Vit15] M. Vitelli, “Algorithmic music generation with recurrent -neural networks,” YouTube, Jun. 2015. [Online]. Available: -https://youtu.be/0VTI1BBLydE -[VKMT13] C. Vondrick, A. Khosla, T. Malisiewicz, and A. Torralba, -“Hoggles: Visualizing object detection features,” in Computer -Vision (ICCV), 2013 IEEE International Conference on. IEEE, -2013, pp. 1–8. [Online]. Available: http://ieeexplore.ieee.org/ -xpls/abs all.jsp?arnumber=6751109 -[VL15] O. Vinyals and Q. Le, “A neural conversational model,” -arXiv preprint arXiv:1506.05869, Jul. 2015. [Online]. Available: -http://arxiv.org/abs/1506.05869v2 -[VN15] M. Vitelli and A. Nayebi, “GRUV,” Aug. 2015. [Online]. -Available: https://github.com/MattVitelli/GRUV -[Wei76] J. Weizenbaum, Computer Power and Human Reason: From -Judgement to Calculation. W.H.Freeman & Co Ltd, 1976. -[ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding con￾volutional networks,” in Computer Vision–ECCV 2014. Springer, + +neural networks can be trained to generate music note by note. +However, it is desirable to allow multiple notes being played +at the same time. +This idea and some others were applied by Daniel Johnson. He +wrote a very good introduction into neural networks for music +composition which explains those ideas [Joh15b]. Example +compositions are available there, too. He also made the code for +his Biaxial Recurrent Neural Network available under [Joh15a]. +VI. DISCUSSION +What does these examples mean for our understanding of +creativity? Does it influence how much we value art? Could +we define art and creativity better after having those and similar +results? +I think we might readjust our understanding of creativity just +like we adjusted our understanding of algorithmically hard +problems after Deep Blue won against the reigning world +chess champion Garry Kasparov in 1997. +However, by now it is obvious that machine learning algorithms +cannot compete with human artists. Today’s state of the art +algorithms which are purely based on machine learning don’t +follow a central theme. They lack the ability to plan. Although +clever algorithms were implemented for composing music, it +seems as if there is still a lot of supervision involved. +REFERENCES +[Cop87] D. Cope, “Experiments in music intelligence (emi),” 1987. +[Online]. Available: http://hdl.handle.net/2027/spo.bbp2372.1987. +025 +[Cop05] ——, Computer models of musical creativity. MIT Press +Cambridge, 2005. +[Cop12] ——, “Emily howell fugue,” YouTube, Oct. 2012. [Online]. +Available: https://www.youtube.com/watch?v=jLR- c uCwI +[Cop13] ——, “The well-programmed clavier: Style in computer music +composition,” XRDS: Crossroads, The ACM Magazine for +Students, vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available: +http://dl.acm.org/citation.cfm?id=2460444 +[Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [Online]. Available: http://www.bbc.co.uk/blogs/adamcurtis/entries/ +78691781-c9b7-30a0-9a0a-3ff76e8bfe58 +[Gad06] A. Gadsby, Ed., Dictionary of Contemporary English. Pearson +Education Limited, 2006. +[GEB15] L. A. Gatys, A. S. Ecker, and M. Bethge, “A neural algorithm of +artistic style,” arXiv preprint arXiv:1508.06576, 2015. [Online]. +Available: http://arxiv.org/abs/1508.06576 +[goo15] “Inceptionism: Going deeper into neural networks,” Google +Photos, Jun. 2015. [Online]. Available: https://goo.gl/Bydofw +[HS97] S. Hochreiter and J. Schmidhuber, “Long short-term memory,” +Neural computation, vol. 9, no. 8, pp. 1735–1780, 1997. +[Online]. Available: http://ieeexplore.ieee.org/xpl/freeabs all.jsp? +arnumber=6795963 +[hut] “50’000 euro prize for compressing human knowledge.” [Online]. +Available: http://prize.hutter1.net/ +[HZRS15] K. He, X. Zhang, S. Ren, and J. Sun, “Deep residual learning +for image recognition,” arXiv preprint arXiv:1512.03385, 2015. +[Online]. Available: http://arxiv.org/abs/1512.03385 +[Joh15a] D. Johnson, “Biaxial recurrent neural network for music +composition,” GitHub, Aug. 2015. [Online]. Available: https: +//github.com/hexahedria/biaxial-rnn-music-composition +[Joh15b] ——, “Composing music with recurrent neural networks,” Personal Blog, Aug. 2015. [Online]. Available: http://www.hexahedria.com/2015/08/03/ +composing-music-with-recurrent-neural-networks/ +[Joh16] J. Johnson, “neural-style,” GitHub, Jan. 2016. [Online]. Available: +https://github.com/jcjohnson/neural-style +[Kar15a] A. Karpathy, “char-rnn,” GitHub, Nov. 2015. [Online]. Available: +https://github.com/karpathy/char-rnn +[Kar15b] ——, “The unreasonable effectiveness of recurrent neural +networks,” Personal Blog, May 2015. [Online]. Available: +http://karpathy.github.io/2015/05/21/rnn-effectiveness/ +[KMN+02] T. Kanungo, D. Mount, N. Netanyahu, C. Piatko, R. Silverman, +and A. Wu, “An efficient k-means clustering algorithm: analysis +and implementation,” Pattern Analysis and Machine Intelligence, +IEEE Transactions on, vol. 24, no. 7, pp. 881–892, Jul 2002. +[Mit97] T. M. Mitchell, Machine learning, ser. McGraw Hill series in +computer science. McGraw-Hill, 1997. +[MOT15] A. Mordvintsev, C. Olah, and M. Tyka, “Inceptionism: Going +deeper into neural networks,” googleresearch.blogspot.co.uk, +Jun. 2015. [Online]. Available: http://googleresearch.blogspot.de/ +2015/06/inceptionism-going-deeper-into-neural.html +[Nie15] M. A. Nielsen, Neural Networks and Deep Learning. Determination Press, 2015. [Online]. Available: http://neuralnetworksanddeeplearning.com/chap6.html# +introducing convolutional networks +[NV15] A. Nayebi and M. Vitelli, “GRUV: Algorithmic music generation +using recurrent neural networks,” 2015. [Online]. Available: +http://cs224d.stanford.edu/reports/NayebiAran.pdf +[Red] “Deepdream,” Reddit. [Online]. Available: https://www.reddit. +com/r/deepdream/ +[Shi14] Y. Shih, “Style transfer for headshot portraits,” YouTube, Jun. +2014. [Online]. Available: https://www.youtube.com/watch?v= +Hj5lGFzlubU +[SPB+14] Y. Shih, S. Paris, C. Barnes, W. T. Freeman, and F. Durand, +“Style transfer for headshot portraits,” ACM Transactions on +Graphics (TOG), vol. 33, no. 4, p. 148, 2014. [Online]. Available: +http://dl.acm.org/citation.cfm?id=2601137 +[TL05] P. Tresset and F. F. Leymarie, “Generative portrait sketching,” in +Proceedings of VSMM, 2005, pp. 739–748. +[Vit15] M. Vitelli, “Algorithmic music generation with recurrent +neural networks,” YouTube, Jun. 2015. [Online]. Available: +https://youtu.be/0VTI1BBLydE +[VKMT13] C. Vondrick, A. Khosla, T. Malisiewicz, and A. Torralba, +“Hoggles: Visualizing object detection features,” in Computer +Vision (ICCV), 2013 IEEE International Conference on. IEEE, +2013, pp. 1–8. [Online]. Available: http://ieeexplore.ieee.org/ +xpls/abs all.jsp?arnumber=6751109 +[VL15] O. Vinyals and Q. Le, “A neural conversational model,” +arXiv preprint arXiv:1506.05869, Jul. 2015. [Online]. Available: +http://arxiv.org/abs/1506.05869v2 +[VN15] M. Vitelli and A. Nayebi, “GRUV,” Aug. 2015. [Online]. +Available: https://github.com/MattVitelli/GRUV +[Wei76] J. Weizenbaum, Computer Power and Human Reason: From +Judgement to Calculation. W.H.Freeman & Co Ltd, 1976. +[ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional networks,” in Computer Vision–ECCV 2014. Springer, 2014, pp. 818–833. -6 -APPENDIX A -AUTOMATICALLY GENERATED TEXTS -The following texts were generated by [Kar15a] and published by Karpathy on [Kar15b]. This is meant to be a copy for -convenience and the case that the website is not reachable. -A. Shakespeare -PANDARUS: -Alas, I think he shall be come approached and the day -When little srain would be attain’d into being never fed, -And who is but a chain and subjects of his death, -I should not sleep. -Second Senator: -They are away this miseries, produced upon my soul, -Breaking and strongly should be buried, when I perish -The earth and thoughts of many states. -DUKE VINCENTIO: -Well, your wit is in the care of side and that. -Second Lord: -They would be ruled after this chamber, and -my fair nues begun out of the fact, to be conveyed, -Whose noble souls I’ll have the heart of the wars. -Clown: -Come, sir, I will make did behold your worship. -VIOLA: -I’ll drink it. -B. Wikipeda -Naturalism and decision for the majority of Arab countries’ capitalide was grounded -by the Irish language by [[John Clair]], [[An Imperial Japanese Revolt]], associated -with Guangzham’s sovereignty. His generals were the powerful ruler of the Portugal -in the [[Protestant Immineners]], which could be said to be directly in Cantonese -Communication, which followed a ceremony and set inspired prison, training. The -emperor travelled back to [[Antioch, Perth, October 25|21]] to note, the Kingdom -of Costa Rica, unsuccessful fashioned the [[Thrales]], [[Cynth’s Dajoard]], known -in western [[Scotland]], near Italy to the conquest of India with the conflict. -Copyright was the succession of independence in the slop of Syrian influence that -was a famous German movement based on a more popular servicious, non-doctrinal -and sexual power post. Many governments recognize the military housing of the -[[Civil Liberalization and Infantry Resolution 265 National Party in Hungary]], -that is sympathetic to be to the [[Punjab Resolution]] -(PJS)[http://www.humah.yahoo.com/guardian. -cfm/7754800786d17551963s89.htm Official economics Adjoint for the Nazism, Montgomery -was swear to advance to the resources for those Socialism’s rule, -was starting to signing a major tripad of aid exile.]] -C. Linux Code, 1 + +APPENDIX A +AUTOMATICALLY GENERATED TEXTS +The following texts were generated by [Kar15a] and published by Karpathy on [Kar15b]. This is meant to be a copy for +convenience and the case that the website is not reachable. +A. Shakespeare +PANDARUS: +Alas, I think he shall be come approached and the day +When little srain would be attain’d into being never fed, +And who is but a chain and subjects of his death, +I should not sleep. +Second Senator: +They are away this miseries, produced upon my soul, +Breaking and strongly should be buried, when I perish +The earth and thoughts of many states. +DUKE VINCENTIO: +Well, your wit is in the care of side and that. +Second Lord: +They would be ruled after this chamber, and +my fair nues begun out of the fact, to be conveyed, +Whose noble souls I’ll have the heart of the wars. +Clown: +Come, sir, I will make did behold your worship. +VIOLA: +I’ll drink it. +B. Wikipeda +Naturalism and decision for the majority of Arab countries’ capitalide was grounded +by the Irish language by [[John Clair]], [[An Imperial Japanese Revolt]], associated +with Guangzham’s sovereignty. His generals were the powerful ruler of the Portugal +in the [[Protestant Immineners]], which could be said to be directly in Cantonese +Communication, which followed a ceremony and set inspired prison, training. The +emperor travelled back to [[Antioch, Perth, October 25|21]] to note, the Kingdom +of Costa Rica, unsuccessful fashioned the [[Thrales]], [[Cynth’s Dajoard]], known +in western [[Scotland]], near Italy to the conquest of India with the conflict. +Copyright was the succession of independence in the slop of Syrian influence that +was a famous German movement based on a more popular servicious, non-doctrinal +and sexual power post. Many governments recognize the military housing of the +[[Civil Liberalization and Infantry Resolution 265 National Party in Hungary]], +that is sympathetic to be to the [[Punjab Resolution]] +(PJS)[http://www.humah.yahoo.com/guardian. +cfm/7754800786d17551963s89.htm Official economics Adjoint for the Nazism, Montgomery +was swear to advance to the resources for those Socialism’s rule, +was starting to signing a major tripad of aid exile.]] +C. Linux Code, 1 /* -7 -* Increment the size file of the new incorrect UI_FILTER group information -* of the size generatively. -*/ -static int indicate_policy(void) -{ -int error; -if (fd == MARN_EPT) { -/* -* The kernel blank will coeld it to userspace. -*/ -if (ss->segment < mem_total) -unblock_graph_and_set_blocked(); -else -ret = 1; -goto bail; -} -segaddr = in_SB(in.addr); -selector = seg / 16; -setup_works = true; -for (i = 0; i < blocks; i++) { -seq = buf[i++]; -bpf = bd->bd.next + i * search; -if (fd) { -current = blocked; -} -} -rw->name = "Getjbbregs"; -bprm_self_clearl(&iv->version); -regs->new = blocks[(BPF_STATS << info->historidac)] | PFMR_CLOBATHINC_SECONDS << 12; -return segtable; -} -D. Linux Code, 2 -/* -* Copyright (c) 2006-2010, Intel Mobile Communications. All rights reserved. -* -* This program is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 as published by -* the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software Foundation, -* Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ -#include -#include -#include -#include -#include -8 -#include -#include -#include -#include -#include -#include -#include -#define REG_PG vesa_slot_addr_pack -#define PFM_NOCOMP AFSR(0, load) -#define STACK_DDR(type) (func) -#define SWAP_ALLOCATE(nr) (e) -#define emulate_sigs() arch_get_unaligned_child() -#define access_rw(TST) asm volatile("movd %%esp, %0, %3" : : "r" (0)); \ -if (__type & DO_READ) -static void stat_PC_SEC __read_mostly offsetof(struct seq_argsqueue, \ -pC>[1]); -static void -os_prefix(unsigned long sys) -{ -#ifdef CONFIG_PREEMPT -PUT_PARAM_RAID(2, sel) = get_state_state(); -set_pid_sum((unsigned long)state, current_state_str(), -(unsigned long)-1->lr_full; low; + +* Increment the size file of the new incorrect UI_FILTER group information +* of the size generatively. +*/ +static int indicate_policy(void) +{ +int error; +if (fd == MARN_EPT) { +/* +* The kernel blank will coeld it to userspace. +*/ +if (ss->segment < mem_total) +unblock_graph_and_set_blocked(); +else +ret = 1; +goto bail; +} +segaddr = in_SB(in.addr); +selector = seg / 16; +setup_works = true; +for (i = 0; i < blocks; i++) { +seq = buf[i++]; +bpf = bd->bd.next + i * search; +if (fd) { +current = blocked; +} } +rw->name = "Getjbbregs"; +bprm_self_clearl(&iv->version); +regs->new = blocks[(BPF_STATS << info->historidac)] | PFMR_CLOBATHINC_SECONDS << 12; +return segtable; +} +D. Linux Code, 2 +/* +* Copyright (c) 2006-2010, Intel Mobile Communications. All rights reserved. +* +* This program is free software; you can redistribute it and/or modify it +* under the terms of the GNU General Public License version 2 as published by +* the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#define REG_PG vesa_slot_addr_pack +#define PFM_NOCOMP AFSR(0, load) +#define STACK_DDR(type) (func) +#define SWAP_ALLOCATE(nr) (e) +#define emulate_sigs() arch_get_unaligned_child() +#define access_rw(TST) asm volatile("movd %%esp, %0, %3" : : "r" (0)); \ +if (__type & DO_READ) +static void stat_PC_SEC __read_mostly offsetof(struct seq_argsqueue, \ +pC>[1]); +static void +os_prefix(unsigned long sys) +{ +#ifdef CONFIG_PREEMPT +PUT_PARAM_RAID(2, sel) = get_state_state(); +set_pid_sum((unsigned long)state, current_state_str(), +(unsigned long)-1->lr_full; low; +} \ No newline at end of file diff --git a/read/results/pdfium/1602.06541.txt b/read/results/pdfium/1602.06541.txt index 8d5991e..5abc5ff 100644 --- a/read/results/pdfium/1602.06541.txt +++ b/read/results/pdfium/1602.06541.txt @@ -1,1610 +1,1595 @@ -1 -A Survey of Semantic Segmentation -Martin Thoma -info@martin-thoma.de -Abstract—This survey gives an overview over different -techniques used for pixel-level semantic segmentation. -Metrics and datasets for the evaluation of segmenta￾tion algorithms and traditional approaches for segmen￾tation such as unsupervised methods, Decision Forests -and SVMs are described and pointers to the relevant -papers are given. Recently published approaches with -convolutional neural networks are mentioned and typical -problematic situations for segmentation algorithms are -examined. A taxonomy of segmentation algorithms is -given. -I. INTRODUCTION -Semantic segmentation is the task of clustering -parts of images together which belong to the same -object class. This type of algorithm has several use￾cases such as detecting road signs [MBLAGJ+07], -detecting tumors [MBVLG02], detecting medical in￾struments in operations [WAH97], colon crypts segmen￾tation [CRSS14], land use and land cover classifica￾tion [HDT02]. In contrast, non-semantic segmentation -only clusters pixels together based on general character￾istics of single objects. Hence the task of non-semantic -segmentation is not well-defined, as many different -segmentations might be acceptable. -Several applications of segmentation in medicine are -listed in [PXP00]. -Object detection, in comparison to semantic seg￾mentation, has to distinguish different instances of the -same object. While having a semantic segmentation -is certainly a big advantage when trying to get object -instances, there are a couple of problems: neighboring -pixels of the same class might belong to different object -instances and regions which are not connected my -belong to the same object instance. For example, a -tree in front of a car which visually divides the car into -two parts. -This paper is organized as follows: It begins by giving -a taxonomy of segmentation algorithms in Section II. -A summary of quality measures and datasets which are -used for semantic segmentation follows in Section III. -A summary of traditional segmentation algorithms and -their characteristics follows in Section V, as well as a -brief, non-exhaustive summary of recently published -semantic segmentation algorithms which are based on -neural networks in Section VI. Finally, Section VII -informs the reader about typical problematic cases for -segmentation algorithms. -II. TAXONOMY OF SEGMENTATION ALGORITHMS -The computer vision community has published a -wide range of segmentation algorithms so far. Those -algorithms can be grouped by the kind of data they -operate on and the kind of segmentation they are able -to produce. -The following subsections will give four different -criteria by which segmentation algorithms can be -classified. -This survey describes fixed-class (see Section II-A), -single-class affiliation (see Section II-B) algorithms -which work on grayscale or colored single pixel images -(see Section II-C) in a completely automated, passive -fashion (see Section II-D). -A. Allowed classes -Semantic segmentation is a classification task. As -such, the classes on which the algorithm is trained is a -central design decision. -Most algorithms work with a fixed set of classes; -some even only work on binary classes like fore￾ground vs background [RM07], [CS10] or street vs -no street [BKTT15]. -However, there are also unsupervised segmentation -algorithms which do not distinguish classes at all (see -Section V-B) as well as segmentation algorithms which -are able to recognize when they don’t know a class. -For example, in [GRC+08] a void class was added -for classes which were not in the training set. Such -a void class was also used in the MSRCv2 dataset -(see Section III-B2) to make it possible to make more -coarse segmentations and thus having to spend less -time annotating the image. -B. Class affiliation of pixels -Humans do an incredible job when looking at the -world. For example, when we see a glass of water -standing on a table we can automatically say that there -is the glass and behind it the table, even if we only had a -single image and were not allowed to move. This means -we simultaneously two labels to the coordinates of the -glass: Glass and table. Although there is much more -work being done on single class affiliation segmenta￾tion algorithms, there is a publication about multiple -class affiliation segmentation [LRAL08]. Similarly, -recent publications in pixel-level object segmentation -used layered models [YHRF12]. + +A Survey of Semantic Segmentation +Martin Thoma +info@martin-thoma.de +Abstract—This survey gives an overview over different +techniques used for pixel-level semantic segmentation. +Metrics and datasets for the evaluation of segmentation algorithms and traditional approaches for segmentation such as unsupervised methods, Decision Forests +and SVMs are described and pointers to the relevant +papers are given. Recently published approaches with +convolutional neural networks are mentioned and typical +problematic situations for segmentation algorithms are +examined. A taxonomy of segmentation algorithms is +given. +I. INTRODUCTION +Semantic segmentation is the task of clustering +parts of images together which belong to the same +object class. This type of algorithm has several usecases such as detecting road signs [MBLAGJ+07], +detecting tumors [MBVLG02], detecting medical instruments in operations [WAH97], colon crypts segmentation [CRSS14], land use and land cover classification [HDT02]. In contrast, non-semantic segmentation +only clusters pixels together based on general characteristics of single objects. Hence the task of non-semantic +segmentation is not well-defined, as many different +segmentations might be acceptable. +Several applications of segmentation in medicine are +listed in [PXP00]. +Object detection, in comparison to semantic segmentation, has to distinguish different instances of the +same object. While having a semantic segmentation +is certainly a big advantage when trying to get object +instances, there are a couple of problems: neighboring +pixels of the same class might belong to different object +instances and regions which are not connected my +belong to the same object instance. For example, a +tree in front of a car which visually divides the car into +two parts. +This paper is organized as follows: It begins by giving +a taxonomy of segmentation algorithms in Section II. +A summary of quality measures and datasets which are +used for semantic segmentation follows in Section III. +A summary of traditional segmentation algorithms and +their characteristics follows in Section V, as well as a +brief, non-exhaustive summary of recently published +semantic segmentation algorithms which are based on +neural networks in Section VI. Finally, Section VII +informs the reader about typical problematic cases for +segmentation algorithms. +II. TAXONOMY OF SEGMENTATION ALGORITHMS +The computer vision community has published a +wide range of segmentation algorithms so far. Those +algorithms can be grouped by the kind of data they +operate on and the kind of segmentation they are able +to produce. +The following subsections will give four different +criteria by which segmentation algorithms can be +classified. +This survey describes fixed-class (see Section II-A), +single-class affiliation (see Section II-B) algorithms +which work on grayscale or colored single pixel images +(see Section II-C) in a completely automated, passive +fashion (see Section II-D). +A. Allowed classes +Semantic segmentation is a classification task. As +such, the classes on which the algorithm is trained is a +central design decision. +Most algorithms work with a fixed set of classes; +some even only work on binary classes like foreground vs background [RM07], [CS10] or street vs +no street [BKTT15]. +However, there are also unsupervised segmentation +algorithms which do not distinguish classes at all (see +Section V-B) as well as segmentation algorithms which +are able to recognize when they don’t know a class. +For example, in [GRC+08] a void class was added +for classes which were not in the training set. Such +a void class was also used in the MSRCv2 dataset +(see Section III-B2) to make it possible to make more +coarse segmentations and thus having to spend less +time annotating the image. +B. Class affiliation of pixels +Humans do an incredible job when looking at the +world. For example, when we see a glass of water +standing on a table we can automatically say that there +is the glass and behind it the table, even if we only had a +single image and were not allowed to move. This means +we simultaneously two labels to the coordinates of the +glass: Glass and table. Although there is much more +work being done on single class affiliation segmentation algorithms, there is a publication about multiple +class affiliation segmentation [LRAL08]. Similarly, +recent publications in pixel-level object segmentation +used layered models [YHRF12]. arXiv:1602.06541v2 [cs.CV] 11 May 2016 -2 -C. Input Data -The available data which can be used for the -inference of a segmentation varies by application. -• Grayscale vs colored: Grayscale images are -commonly used in medical imaging such as -magnetic resonance (MR) imaging or ultrasonog￾raphy whereas colored photographs are obviously -widespread. -• Excluding or including depth data: RGB-D, -sometimes also called range [HJBJ+96] is avail￾able in robotics, autonomous cars and recently -also in consumer electronics such as Microsoft -Kinect [Zha12]. -• Single image vs stereo images vs co￾segmentation: Single image segmentation is the -most wide-spread kind of segmentation, but using -stereo images was already tried in [BVZ01]. It can -be seen as a more natural way of segmentation as -most mammals have two eyes. It can also be seen -as being related to having depth data. -Co-segmentation as in [RMBK06], [CXGS12] is -the problem of finding a consistent segmentation -for multiple images. This problem can be seen -in two ways: One the one hand, it can be seen -as the problem of finding common objects in at -least two images. On the other hand, every image -after the first can be used as an additional source -of information to find a meaningful segmentation. -This idea can be extended to time series such as -videos. -• 2D vs 3D: Segmenting images is a 2D segmenta￾tion task where the smallest unit is called a pixel. -In 3D data, such as volumetric X-ray CT images -as they were used in [HHR01], the smallest unit -is called a voxel. -D. Operation state -The operation state of the classifying machine can -either be active as in [SUM+11], [SSA12] where robots -can move objects to find a segmentation or passive, -where the received image cannot be influenced. Among -the passive algorithms, some segment in a completely -automatic fashion, others work in an interactive mode. -One example would be a system where the user clicks -on the background or marks a coarse segmentation and -the algorithm finds a fine-grained segmentation. [BJ00], -[RKB04], [PS07] describe systems which work in an -interactive mode. -(a) Example Scene (b) Visualization of a found seg￾mentation -Figure 1: An example of a scene and a possible visu￾alization of a found segmentation. -III. EVALUATION AND DATASETS -A. Quality measures for evaluation -A performance measure is a crucial part of any -machine learning system. As users of a semantic -segmentation system expect correct results, the accuracy -is the most commonly used performance measure, but -there are other measures of quality which matter when -segmentation algorithms are compared. This section -gives an overview of those quality measures. -1) Accuracy: Showing the correctness of the segmen￾tation hypotheses is done in most publications about -semantic segmentation. However, there are a couple -of different ways how this accuracy can be displayed. -One way to give readers a first qualitative impression -of the obtained segmentations is by showing examples -such as Figure 1. -However, this can only support the explanation of -particular problems or showcase special situation. For -meaningful information about the overall accuracy, there -are a couple of metrics how accuracy can be defined. -For this section, let k ∈ N be the number of classes, -nij ∈ N0 with i, j ∈ 1, . . . , k be the number of pixels -which belong to class i and were labeled as class j. -(nij ) is called a confusion matrix. Let ti = -Pk -j=1 nij -be the total number of pixels of class i. -One way to compare segmentation algorithms is by -the pixel-wise accuracy of the predicted segmentation -as done in many publications [SWRC06], [CP08], -[LSD14]. This is also called per-pixel rate and de￾fined as -Pk -Pi=1 nii -k -i=1 ti -. Taking the pixel-wise classification -accuracy has two major drawbacks: -P1 Tasks like segmenting images for autonomous cars -have large regions which have one class. This -makes achieving classification accuracies of more -than 30 % with a priori knowledge only possible. -For example, a system might learn that a certain -position of the image is most of the time “sky” + +C. Input Data +The available data which can be used for the +inference of a segmentation varies by application. +• Grayscale vs colored: Grayscale images are +commonly used in medical imaging such as +magnetic resonance (MR) imaging or ultrasonography whereas colored photographs are obviously +widespread. +• Excluding or including depth data: RGB-D, +sometimes also called range [HJBJ+96] is available in robotics, autonomous cars and recently +also in consumer electronics such as Microsoft +Kinect [Zha12]. +• Single image vs stereo images vs cosegmentation: Single image segmentation is the +most wide-spread kind of segmentation, but using +stereo images was already tried in [BVZ01]. It can +be seen as a more natural way of segmentation as +most mammals have two eyes. It can also be seen +as being related to having depth data. +Co-segmentation as in [RMBK06], [CXGS12] is +the problem of finding a consistent segmentation +for multiple images. This problem can be seen +in two ways: One the one hand, it can be seen +as the problem of finding common objects in at +least two images. On the other hand, every image +after the first can be used as an additional source +of information to find a meaningful segmentation. +This idea can be extended to time series such as +videos. +• 2D vs 3D: Segmenting images is a 2D segmentation task where the smallest unit is called a pixel. +In 3D data, such as volumetric X-ray CT images +as they were used in [HHR01], the smallest unit +is called a voxel. +D. Operation state +The operation state of the classifying machine can +either be active as in [SUM+11], [SSA12] where robots +can move objects to find a segmentation or passive, +where the received image cannot be influenced. Among +the passive algorithms, some segment in a completely +automatic fashion, others work in an interactive mode. +One example would be a system where the user clicks +on the background or marks a coarse segmentation and +the algorithm finds a fine-grained segmentation. [BJ00], +[RKB04], [PS07] describe systems which work in an +interactive mode. +(a) Example Scene (b) Visualization of a found segmentation +Figure 1: An example of a scene and a possible visualization of a found segmentation. +III. EVALUATION AND DATASETS +A. Quality measures for evaluation +A performance measure is a crucial part of any +machine learning system. As users of a semantic +segmentation system expect correct results, the accuracy +is the most commonly used performance measure, but +there are other measures of quality which matter when +segmentation algorithms are compared. This section +gives an overview of those quality measures. +1) Accuracy: Showing the correctness of the segmentation hypotheses is done in most publications about +semantic segmentation. However, there are a couple +of different ways how this accuracy can be displayed. +One way to give readers a first qualitative impression +of the obtained segmentations is by showing examples +such as Figure 1. +However, this can only support the explanation of +particular problems or showcase special situation. For +meaningful information about the overall accuracy, there +are a couple of metrics how accuracy can be defined. +For this section, let k ∈ N be the number of classes, +nij ∈ N0 with i, j ∈ 1, . . . , k be the number of pixels +which belong to class i and were labeled as class j. +(nij ) is called a confusion matrix. Let ti = +Pk +j=1 nij +be the total number of pixels of class i. +One way to compare segmentation algorithms is by +the pixel-wise accuracy of the predicted segmentation +as done in many publications [SWRC06], [CP08], +[LSD14]. This is also called per-pixel rate and defined as +Pk +Pi=1 nii +k +i=1 ti +. Taking the pixel-wise classification +accuracy has two major drawbacks: +P1 Tasks like segmenting images for autonomous cars +have large regions which have one class. This +makes achieving classification accuracies of more +than 30 % with a priori knowledge only possible. +For example, a system might learn that a certain +position of the image is most of the time “sky” while another position is most of the time “road”. -3 -P2 The manually labeled images could have a more -coarse labeling. For example, a human classifier -could have labeled a region as “car” and the -algorithm could have split that region into the -general “car” and the more specific “wheel of a -car” -Three accuracy metrics which do not suffer from -problem P1 are used in [LSD14]: -• mean accuracy: -1 -k -· -Pk -i=1 -nii -ti -∈ [0, 1] -• mean intersection over union: -1 -k -· -Pk -i=1 -nii -ti−nii+ -Pk -j=1 nji -∈ [0, 1] -• frequency weighted intersection over union: -( -Pk -i=1 ti) -−1 Pk -i=1 ti -· -nii -ti−nii+ -Pk -j=1 nji -∈ [0, 1] -Another problem might be pixels which cannot be -assigned to one of the known classes. For this reason, -[SWRC06] makes use of a void class. This class gets -completely ignored for all quality measures. Hence the -total number of pixels is assumed to be width·height− -number of void pixels. -One way to deal with problem P1 and problem P2 -is giving the confusion matrix as done in [SWRC06]. -However, this approach is not feasible if many classes -are given. -The F-measure is useful for binary classifica￾tion task such as the KITTI road segmentation -benchmark [FKG13] or crypt segmentation as done -by [CRSS14]. It is calculated as “the harmonic mean -of the precision and recall” [PH05]: -Fβ = (1 + β) -2 -tp -(1 + β -2) · tp + β -2 · fn + fp -where β = 1 is chosen in most cases and tp means -true positive, fn means false negative and fp means -false positive. -Finally, it should be noted that a lot of other measures -for the accuracy of segmentations were proposed for -non-semantic segmentation. One of those accuracy -measures is Normalized Probabilistic Rand (NPR) -index which was introduced in [UPH05] and eval￾uated in [CSI+09] on dermoscopy images. Other -non-semantic segmentation measures were introduced -in [MFTM01], but the reason for creating them seems to -be to deal with the under-defined task description of non￾semantic segmentation. These accuracy measures try to -deal with different levels of coarsity of the segmentation. -This is much less of a problem in semantic segmentation -and thus those measures are not explained here. -2) Speed: A maximum upper bound on the execution -time for the inference on a single image is a hard -requirement for some applications. For example, in the -case of autonomous cars an algorithm which classifies -pixel as street or no-street and thus makes a semantic -segmentation, every image needs to be processed within -20 ms [BKTT15]. This time is called latency. -Most papers do not give exact values for the time -their application needs. One reason might be that this is -very hardware, implementation and in some cases even -data specific. For example, [HJBJ+96] notes that their -algorithm needs 10 s on a Sun SparcStation 20. The -fastest CPU ever produced for this system had 200 MHz. -Comparing this directly with results which were ob￾tained using an Intel i7-4820K with 3.9 GHz would not -be meaningful. -However, it does still make sense to mention the -execution time as well as the hardware in individual -papers. This gives the interested reader the possibility to -estimate how difficult it might be to adjust the algorithm -to work in the required time-constraints. -Besides the latency, the throughput is another -relevant characteristic of algorithms and implementa￾tions for semantic segmentation. For example, for the -automatic description of images in order to enable text -search the throughput is of much higher importance -than latency. -3) Stability: A reasonable requirement on semantic -segmentation algorithms is the stability of a segmen￾tation over slight changes in the input image. When -the image data is sightly blurred by smoke such as -in Figure 4(c), the segmentation should not change. -Also, two images which show a slight change in -perspective should also only result in slight changes in -the segmentation [PH05]. -4) Memory usage: Peak memory usage matters -when segmentation algorithms are used in devices like -smartphones or cameras, or when the algorithms have -to finish in a given time frame, run on the graphics -processing unit (GPU) and consume so much memory -for single image segmentation that only the latest -graphic cards can be used. However, no publication -were available mentioning the peak memory usage. -B. Datasets -The computer vision community produced a couple -of different datasets which are publicly available. In -the following, only the most widely used ones as well -as three medical databases are described. An overview -over the quantity and the kind of data is given by -Table I. -1) PASCAL VOC: The PASCAL1 VOC2 -challenge -was organized eight times with different datasets: -Once every year from 2005 to 2012 [EVGW+b]. -1pattern analysis, statistical modelling and computational learning, -an EU network of excellence + +P2 The manually labeled images could have a more +coarse labeling. For example, a human classifier +could have labeled a region as “car” and the +algorithm could have split that region into the +general “car” and the more specific “wheel of a +car” +Three accuracy metrics which do not suffer from +problem P1 are used in [LSD14]: +• mean accuracy: +1 +k +· +Pk +i=1 +nii +ti +∈ [0, 1] +• mean intersection over union: +1 +k +· +Pk +i=1 +nii +ti−nii+ +Pk +j=1 nji +∈ [0, 1] +• frequency weighted intersection over union: +( +Pk +i=1 ti) +−1 Pk +i=1 ti +· +nii +ti−nii+ +Pk +j=1 nji +∈ [0, 1] +Another problem might be pixels which cannot be +assigned to one of the known classes. For this reason, +[SWRC06] makes use of a void class. This class gets +completely ignored for all quality measures. Hence the +total number of pixels is assumed to be width·height− +number of void pixels. +One way to deal with problem P1 and problem P2 +is giving the confusion matrix as done in [SWRC06]. +However, this approach is not feasible if many classes +are given. +The F-measure is useful for binary classification task such as the KITTI road segmentation +benchmark [FKG13] or crypt segmentation as done +by [CRSS14]. It is calculated as “the harmonic mean +of the precision and recall” [PH05]: +Fβ = (1 + β) +2 +tp +(1 + β +2) · tp + β2 · fn + fp +where β = 1 is chosen in most cases and tp means +true positive, fn means false negative and fp means +false positive. +Finally, it should be noted that a lot of other measures +for the accuracy of segmentations were proposed for +non-semantic segmentation. One of those accuracy +measures is Normalized Probabilistic Rand (NPR) +index which was introduced in [UPH05] and evaluated in [CSI+09] on dermoscopy images. Other +non-semantic segmentation measures were introduced +in [MFTM01], but the reason for creating them seems to +be to deal with the under-defined task description of nonsemantic segmentation. These accuracy measures try to +deal with different levels of coarsity of the segmentation. +This is much less of a problem in semantic segmentation +and thus those measures are not explained here. +2) Speed: A maximum upper bound on the execution +time for the inference on a single image is a hard +requirement for some applications. For example, in the +case of autonomous cars an algorithm which classifies +pixel as street or no-street and thus makes a semantic +segmentation, every image needs to be processed within +20 ms [BKTT15]. This time is called latency. +Most papers do not give exact values for the time +their application needs. One reason might be that this is +very hardware, implementation and in some cases even +data specific. For example, [HJBJ+96] notes that their +algorithm needs 10 s on a Sun SparcStation 20. The +fastest CPU ever produced for this system had 200 MHz. +Comparing this directly with results which were obtained using an Intel i7-4820K with 3.9 GHz would not +be meaningful. +However, it does still make sense to mention the +execution time as well as the hardware in individual +papers. This gives the interested reader the possibility to +estimate how difficult it might be to adjust the algorithm +to work in the required time-constraints. +Besides the latency, the throughput is another +relevant characteristic of algorithms and implementations for semantic segmentation. For example, for the +automatic description of images in order to enable text +search the throughput is of much higher importance +than latency. +3) Stability: A reasonable requirement on semantic +segmentation algorithms is the stability of a segmentation over slight changes in the input image. When +the image data is sightly blurred by smoke such as +in Figure 4(c), the segmentation should not change. +Also, two images which show a slight change in +perspective should also only result in slight changes in +the segmentation [PH05]. +4) Memory usage: Peak memory usage matters +when segmentation algorithms are used in devices like +smartphones or cameras, or when the algorithms have +to finish in a given time frame, run on the graphics +processing unit (GPU) and consume so much memory +for single image segmentation that only the latest +graphic cards can be used. However, no publication +were available mentioning the peak memory usage. +B. Datasets +The computer vision community produced a couple +of different datasets which are publicly available. In +the following, only the most widely used ones as well +as three medical databases are described. An overview +over the quantity and the kind of data is given by +Table I. +1) PASCAL VOC: The PASCAL1 VOC2challenge +was organized eight times with different datasets: +Once every year from 2005 to 2012 [EVGW+b]. +1pattern analysis, statistical modelling and computational learning, +an EU network of excellence 2Visual Object Classes -4 -Beginning with 2007, a segmentation challenge was -added [EVGW+a]. -The dataset consists of annotated photographs from -www.flicker.com, a photo sharing website. There are -multiple challenges for PASCAL VOC. The 2012 -competition had five challenges of which one is a -segmentation challenge where a single class label was -given for each pixel. The classes are: aeroplane, bicycle, -bird, boat, bottle, bus, car, cat, chair, cow, dining table, -dog, horse, motorbike, person, potted plant, sheep, sofa, -train, tv/monitor. -Although no new competitions will be held, new -algorithms can be evaluated on the 2010, 2011 and -2012 data via http://host.robots.ox.ac.uk:8080/ -The PASCAL VOC segmentation challenges use the -segmentation over union criterion (see Section III-A). -2) MSRCv2: Microsoft Research has published a -database of 591 photographs with pixel-level annotation -of 21 classes: aeroplane, bike, bird, boat, body, book, -building, car, cat, chair, cow, dog, face, flower, grass, -road, sheep, sign, sky, tree, water. Additionally, there -is a void label for pixels which do not belong to -any of the 21 classes or which are close to the -segmentation boundary. This allows a “rough and quick -hand-segmentation which does not align exactly with -the object boundaries” [SWRC06]. -3) Medical Databases: The Warwick-QU Dataset -consists of 165 images with pixel-level annotation of -5 classes: “healthy, adenomatous, moderately differen￾tiated, moderately-to-poorly differentiated, and poorly -differentiated” [CSM09]. This dataset is part of the -Gland Segmentation (GlaS) challenge. -The DIARETDB1 [KKV+14] is a dataset of 89 im￾ages fundus images. Those images show the interior -surface of the eye. Fundus images can be used to detect -diabetic retinopathy. The images have four classes of -coarse annotations: hard and soft exudates, hemorrhages -and red small dots. -20 test and additionally 20 training retinal fun￾dus images are available through the DRIVE data -set [SAN+04]. The vessels were annotated. Addition￾ally, [AP11] added vascular features. -The Open-CAS Endoscopic Datasets [MHMK+14] -are 60 images taken from laparoscopic adrenalectomies -and 60 images taken from laparoscopic pancreatic -resections. Those are from 3 surgical procedures each. -Half of the data was annotated by a medical expert for -“medial instrument” and “no medical instrument”. All -images were labeled by anonymous untrained workers -to which they refer to as knowledge workers (KWs). -One crowd annotation was obtained for each image by -a majority vote on a pixel basis of 10 segmentations -given by 10 different KWs. -Training -Prediction -Post￾processing -Window-wise -Classification -Window -extraction -Data -augmentation Feature extraction -Preprocessing -Figure 2: A typical segmentation pipeline gets raw -pixel data, applies preprocessing techniques -like scaling and feature extraction like HOG -features. For training, data augmentation -techniques such as image rotation can be -applied. For every single image, patches of -the image called windows are extracted and -those windows are classified. The resulting -semantic segmentation can be refined by -simple morphologic operations or by more -complex approaches such as Markov Random -Fields (MRFs). -IV. SEGMENTATION PIPELINE -Typically, semantic segmentation is done with a -classifier which operates on fixed-size feature inputs -and a sliding-window approach [DT05], [YBCK10], -[SCZ08]. This means a classifier is trained on images -of a fixed size. The trained classifier is then fed with -rectangular regions of the image which are called win￾dows. Although the classifier gets an image patch of e.g. -51 px×51 px of the environment, it might only classify -the center pixel or a subset of the complete window. -This segmentation pipeline is visualized in Figure 2. -This approach was taken by [BKTT15] and a major￾ity of the VOC2007 participants [EVGW+a]. As this -approach has to apply the patch classifier 512 · 512 = -262 144 times for images of size 512 px×512 px, there -are techniques for speeding it up such as applying a -stride and interpolating the results. -Neural networks are able to apply the sliding window -approach in a very efficient way by handling a trained -network as a convolution and applying the convolution -on the complete image. -However, there are alternatives. Namely MRFs and -Conditional Random Fields (CRFs) which take the -information of the complete image and segment it in + +Beginning with 2007, a segmentation challenge was +added [EVGW+a]. +The dataset consists of annotated photographs from +www.flicker.com, a photo sharing website. There are +multiple challenges for PASCAL VOC. The 2012 +competition had five challenges of which one is a +segmentation challenge where a single class label was +given for each pixel. The classes are: aeroplane, bicycle, +bird, boat, bottle, bus, car, cat, chair, cow, dining table, +dog, horse, motorbike, person, potted plant, sheep, sofa, +train, tv/monitor. +Although no new competitions will be held, new +algorithms can be evaluated on the 2010, 2011 and +2012 data via http://host.robots.ox.ac.uk:8080/ +The PASCAL VOC segmentation challenges use the +segmentation over union criterion (see Section III-A). +2) MSRCv2: Microsoft Research has published a +database of 591 photographs with pixel-level annotation +of 21 classes: aeroplane, bike, bird, boat, body, book, +building, car, cat, chair, cow, dog, face, flower, grass, +road, sheep, sign, sky, tree, water. Additionally, there +is a void label for pixels which do not belong to +any of the 21 classes or which are close to the +segmentation boundary. This allows a “rough and quick +hand-segmentation which does not align exactly with +the object boundaries” [SWRC06]. +3) Medical Databases: The Warwick-QU Dataset +consists of 165 images with pixel-level annotation of +5 classes: “healthy, adenomatous, moderately differentiated, moderately-to-poorly differentiated, and poorly +differentiated” [CSM09]. This dataset is part of the +Gland Segmentation (GlaS) challenge. +The DIARETDB1 [KKV+14] is a dataset of 89 images fundus images. Those images show the interior +surface of the eye. Fundus images can be used to detect +diabetic retinopathy. The images have four classes of +coarse annotations: hard and soft exudates, hemorrhages +and red small dots. +20 test and additionally 20 training retinal fundus images are available through the DRIVE data +set [SAN+04]. The vessels were annotated. Additionally, [AP11] added vascular features. +The Open-CAS Endoscopic Datasets [MHMK+14] +are 60 images taken from laparoscopic adrenalectomies +and 60 images taken from laparoscopic pancreatic +resections. Those are from 3 surgical procedures each. +Half of the data was annotated by a medical expert for +“medial instrument” and “no medical instrument”. All +images were labeled by anonymous untrained workers +to which they refer to as knowledge workers (KWs). +One crowd annotation was obtained for each image by +a majority vote on a pixel basis of 10 segmentations +given by 10 different KWs. +Training +Prediction +Postprocessing +Window-wise +Classification +Window +extraction +Data +augmentation Feature extraction +Preprocessing +Figure 2: A typical segmentation pipeline gets raw +pixel data, applies preprocessing techniques +like scaling and feature extraction like HOG +features. For training, data augmentation +techniques such as image rotation can be +applied. For every single image, patches of +the image called windows are extracted and +those windows are classified. The resulting +semantic segmentation can be refined by +simple morphologic operations or by more +complex approaches such as Markov Random +Fields (MRFs). +IV. SEGMENTATION PIPELINE +Typically, semantic segmentation is done with a +classifier which operates on fixed-size feature inputs +and a sliding-window approach [DT05], [YBCK10], +[SCZ08]. This means a classifier is trained on images +of a fixed size. The trained classifier is then fed with +rectangular regions of the image which are called windows. Although the classifier gets an image patch of e.g. +51 px×51 px of the environment, it might only classify +the center pixel or a subset of the complete window. +This segmentation pipeline is visualized in Figure 2. +This approach was taken by [BKTT15] and a majority of the VOC2007 participants [EVGW+a]. As this +approach has to apply the patch classifier 512 · 512 = +262 144 times for images of size 512 px×512 px, there +are techniques for speeding it up such as applying a +stride and interpolating the results. +Neural networks are able to apply the sliding window +approach in a very efficient way by handling a trained +network as a convolution and applying the convolution +on the complete image. +However, there are alternatives. Namely MRFs and +Conditional Random Fields (CRFs) which take the +information of the complete image and segment it in an holistic approach. -5 -V. TRADITIONAL APPROACHES -Image segmentation algorithms which use traditional -approaches, hence don’t apply neural networks and -make heavy use of domain knowledge, are wide-spread -in the computer vision community. Features which can -be used for segmentation are described in Section V-A, -a very brief overview of unsupervised, non-semantic -segmentation is given in Section V-B, Random Decision -Forests are described in Section V-C, Markov Random -Fields in Section V-E and Support Vector Machines -(SVMs) in Section V-D. Postprocessing is covered in -Section V-G. -It should be noted that algorithms can use combina￾tion of methods. For example, [TNL14] makes use of a -combination of a SVM and a MRF. Also, auto-encoders -can be used to learn features which in turn can be used -by any classifier. -A. Features and Preprocessing methods -The choice of features is very important in traditional -approaches. The most commonly used local and global -features are explained in the following as well as feature -dimensionality reduction algorithms. -1) Pixel Color: Pixel color in different image spaces -(e.g. 3 features for RGB, 3 features for HSV, 1 feature -for the gray-value) are the most widely used features. A -typical image is in the RGB color space, but depending -on the classifier and the problem another color space -might result in better segmentations. RGB, YcBcr, HSL, -Lab and YIQ are some examples used by [CRSS14]. -No single color space has been proven to be superior -to all others in all contexts [CJSW01]. However, the -most common choices seem to be RGB and HSI. -Reasons for choosing RGB is simplicity and the support -by programming languages, whereas the choice of -the HSI color space might make it simpler for the -classifier to become invariant to illumination. One -reason for choosing CIE-L*a*b* color space is that it -approximates human perception of brightness [KP92]. -It follows that choosing the L*a*b color space helps -algorithms to detect structures which are seen by -humans. Another way of improving the structure within -an image is histogram equalization, which can be -applied to improve contrast [PAA+87], [RM07]. -2) Histogram of oriented Gradients: Histogram of -oriented gradients (HOG) features interpret the image -as a discrete function I : N -2 → { 0, . . . , 255 } which -maps the position (x, y) to a color. For each pixel, there -are two gradients: The partial derivative of x and y. -Now the original image is transformed to two feature -maps of equal size which represents the gradient. These -feature maps are splitted into patches and a histogram of -the directions is calculated for each patch. HOG features -were proposed in [DT05] and are used in [BMBM10], -[FGMR10] for segmentation tasks. -3) SIFT: Scale-invariant feature transform (SIFT) -feature descriptors describe keypoints in an image. The -image patch of the size 16 × 16 around the keypoint -is taken. This patch is divided in 16 distinct parts of -the size 4 × 4. For each of those parts a histogram of -8 orientations is calculated similar as for HOG features. -This results in a 128-dimensional feature vector for -each keypoint. -It should be emphasized that SIFT is a global feature -for a complete image. -SIFT is described in detail in [Low04] and are used -in [PTN09]. -4) BOV: Bag-of-visual-words (BOV), also called -bag of keypoints, is based on vector quantization. -Similar to HOG features, BOV features are histograms -which count the number of occurrences of certain -patterns within a patch of the image. BOV are described -in [CDF+04] and used in combination with SIFT -feature descriptors in [CP08]. -5) Poselets: Poselets rely on manually added extra -keypoints such as “right shoulder”, “left shoulder”, -“right knee” and “left knee”. They were originally -used for human pose estimation. Finding those extra -keypoints is easily possible for well-known image -classes like humans. However, it is difficult for classes -like airplanes, ships, organs or cells where the human -annotators do not know the keypoints. Additionally, the -keypoints have to be chosen for every single class. There -are strategies to deal with those problems like viewpoint￾dependent keypoints. Poselets were used in [BMBM10] -to detect people and in [BBMM11] for general object -detection of the PASCAL VOC dataset. -6) Textons: A texton is the minimal building block -of vision. The computer vision literature does not give a -strict definition for textons, but edge detectors could be -one example. One might argue that deep learning tech￾niques with Convolution Neuronal Networks (CNNs) -learn textons in the first filters. -An excellent explanation of textons can be found -in [ZGWX05]. -7) Dimensionality Reduction: High-resolution im￾ages have a lot of pixels. Having one or more feature per -pixel results in well over a million features. This makes -training difficult while the higher resolution might not -contain much more information. A simple approach -to deal with this is downsampling the high-resolution -image to a low-resolution variant. Another way of -doing dimensionality reduction is principal component -analysis (PCA), which is applied by [COWR11]. The + +V. TRADITIONAL APPROACHES +Image segmentation algorithms which use traditional +approaches, hence don’t apply neural networks and +make heavy use of domain knowledge, are wide-spread +in the computer vision community. Features which can +be used for segmentation are described in Section V-A, +a very brief overview of unsupervised, non-semantic +segmentation is given in Section V-B, Random Decision +Forests are described in Section V-C, Markov Random +Fields in Section V-E and Support Vector Machines +(SVMs) in Section V-D. Postprocessing is covered in +Section V-G. +It should be noted that algorithms can use combination of methods. For example, [TNL14] makes use of a +combination of a SVM and a MRF. Also, auto-encoders +can be used to learn features which in turn can be used +by any classifier. +A. Features and Preprocessing methods +The choice of features is very important in traditional +approaches. The most commonly used local and global +features are explained in the following as well as feature +dimensionality reduction algorithms. +1) Pixel Color: Pixel color in different image spaces +(e.g. 3 features for RGB, 3 features for HSV, 1 feature +for the gray-value) are the most widely used features. A +typical image is in the RGB color space, but depending +on the classifier and the problem another color space +might result in better segmentations. RGB, YcBcr, HSL, +Lab and YIQ are some examples used by [CRSS14]. +No single color space has been proven to be superior +to all others in all contexts [CJSW01]. However, the +most common choices seem to be RGB and HSI. +Reasons for choosing RGB is simplicity and the support +by programming languages, whereas the choice of +the HSI color space might make it simpler for the +classifier to become invariant to illumination. One +reason for choosing CIE-L*a*b* color space is that it +approximates human perception of brightness [KP92]. +It follows that choosing the L*a*b color space helps +algorithms to detect structures which are seen by +humans. Another way of improving the structure within +an image is histogram equalization, which can be +applied to improve contrast [PAA+87], [RM07]. +2) Histogram of oriented Gradients: Histogram of +oriented gradients (HOG) features interpret the image +as a discrete function I : N +2 → { 0, . . . , 255 } which +maps the position (x, y) to a color. For each pixel, there +are two gradients: The partial derivative of x and y. +Now the original image is transformed to two feature +maps of equal size which represents the gradient. These +feature maps are splitted into patches and a histogram of +the directions is calculated for each patch. HOG features +were proposed in [DT05] and are used in [BMBM10], +[FGMR10] for segmentation tasks. +3) SIFT: Scale-invariant feature transform (SIFT) +feature descriptors describe keypoints in an image. The +image patch of the size 16 × 16 around the keypoint +is taken. This patch is divided in 16 distinct parts of +the size 4 × 4. For each of those parts a histogram of +8 orientations is calculated similar as for HOG features. +This results in a 128-dimensional feature vector for +each keypoint. +It should be emphasized that SIFT is a global feature +for a complete image. +SIFT is described in detail in [Low04] and are used +in [PTN09]. +4) BOV: Bag-of-visual-words (BOV), also called +bag of keypoints, is based on vector quantization. +Similar to HOG features, BOV features are histograms +which count the number of occurrences of certain +patterns within a patch of the image. BOV are described +in [CDF+04] and used in combination with SIFT +feature descriptors in [CP08]. +5) Poselets: Poselets rely on manually added extra +keypoints such as “right shoulder”, “left shoulder”, +“right knee” and “left knee”. They were originally +used for human pose estimation. Finding those extra +keypoints is easily possible for well-known image +classes like humans. However, it is difficult for classes +like airplanes, ships, organs or cells where the human +annotators do not know the keypoints. Additionally, the +keypoints have to be chosen for every single class. There +are strategies to deal with those problems like viewpointdependent keypoints. Poselets were used in [BMBM10] +to detect people and in [BBMM11] for general object +detection of the PASCAL VOC dataset. +6) Textons: A texton is the minimal building block +of vision. The computer vision literature does not give a +strict definition for textons, but edge detectors could be +one example. One might argue that deep learning techniques with Convolution Neuronal Networks (CNNs) +learn textons in the first filters. +An excellent explanation of textons can be found +in [ZGWX05]. +7) Dimensionality Reduction: High-resolution images have a lot of pixels. Having one or more feature per +pixel results in well over a million features. This makes +training difficult while the higher resolution might not +contain much more information. A simple approach +to deal with this is downsampling the high-resolution +image to a low-resolution variant. Another way of +doing dimensionality reduction is principal component +analysis (PCA), which is applied by [COWR11]. The idea behind PCA is to find a hyperplane on which all -6 -feature vectors can be projected with a minimal loss -of information. A detailed description of PCA is given -by [Smi02]. -One problem of PCA is the fact that it does not -distinguish different classes. This means it can happen -that a perfectly linearly separable set of feature vectors -becomes not separable at all after applying PCA. -There are many other techniques for dimensionality -reduction. An overview and a comparison over some -of them is given by [vdMPvdH09]. -B. Unsupervised Segmentation -Unsupervised segmentation algorithms can be used -in supervised segmentation as another source of infor￾mation or to refine a segmentation. While unsupervised -segmentation algorithms can never be semantic, they are -well-studied and deserve at least a very brief overview. -Semantic segmentation algorithms store information -about the classes they were trained to segment while -non-semantic segmentation algorithms try to detect -consistent regions or region boundaries. -1) Clustering Algorithms: Clustering algorithms can -directly be applied on the pixels, when one gives a -feature vector per pixel. Two clustering algorithms are -k-means and the mean-shift algorithm. -The k-means algorithm is a general-purpose cluster￾ing algorithm which requires the number of clusters to -be given beforehand. Initially, it places the k centroids -randomly in the feature space. Then it assigns each -data point to the nearest centroid, moves the centroid -to the center of the cluster and continues the process -until a stopping criterion is reached. A faster variant is -described in [Har75]. -k-means was applied by [CLP98] for medical image -segmentation. -Another clustering algorithm is the mean-shift algo￾rithm which was introduced by [CM02] for segmen￾tation tasks. The algorithm finds the cluster centers -by initializing centroids at random seed points and -iteratively shifting them to the mean coordinate within -a certain range. Instead of taking a hard range constraint, -the mean can also be calculated by using any kernel. -This effectively applies a weight to the coordinates -of the points. The mean shift algorithm finds cluster -centers at positions with a highest local density of -points. -2) Graph Based Image Segmentation: Graph-based -image segmentation algorithms typically interpret pixels -as vertices and an edge weight is a measure of -dissimilarity such as the difference in color [FH04], -[Fel]. There are several different candidates for edges. -The 4-neighborhood (north, east, south west) or an 8- -neighborhood (north, north-east, east, south-east, south, -south-west, west, north-west) are plausible choices. -One way to cut the edges is by building a minimum -spanning tree and removing edges above a threshold. -This threshold can either be constant, adapted to the -graph or adjusted by the user. After the edge-cutting -step, the connected components are the segments. -A graph-based method which ranked 2nd in the -Pascal VOC 2010 challenge [EVGW+10] is described -in [CS10]. The system makes heavy use of the multi￾cue contour detector globalPb [MAFM08] and needs -about 10 GB of main memory [CS11]. -3) Random Walks: Random walks belong to the -graph-based image segmentation algorithms. Random -walk image segmentation usually works as follows: -Seed points are placed on the image for the different -objects in the image. From every single pixel, the -probability to reach the different seed points by a -random walk is calculated. This is done by taking -image gradients as described in Section V-A for HOG -features. The class of the pixel is the class of which a -seed point will be reached with highest probability. At -first, this is an interactive segmentation method, but it -can be extended to be non-interactive by using another -segmentation methods output as seed points. -4) Active Contour Models: Active contour models -(ACMs) are algorithms which segment images roughly -along edges, but also try to find a border which is -smooth. This is done by defining a so called energy -function which will be minimized. They were initially -described in [KWT88]. ACMs can be used to segment -an image or to refine segmentation as it was done -in [AM98] for brain MR images. -5) Watershed Segmentation: The watershed algo￾rithm takes a grayscale image and interprets it as a -height map. Low values are catchment basins and -the higher values between two neighboring catchment -basins is the watershed. The catchment basins should -contain what the developer wants to capture. This -implies that those areas must be dark on grayscale -images. The algorithm starts to fill the basins from -the lowest point. When two basins are connected, a -watershed is found. The algorithm stops when the -highest point is reached. -A detailed description of the watershed segmentation -algorithm is given in [RM00]. -The watershed segmentation was used in [JLD03] to -segment white blood cells. As the authors describe, -the segmentation by watershed transform has two -flaws: Over-segmentation due to local minima and thick + +feature vectors can be projected with a minimal loss +of information. A detailed description of PCA is given +by [Smi02]. +One problem of PCA is the fact that it does not +distinguish different classes. This means it can happen +that a perfectly linearly separable set of feature vectors +becomes not separable at all after applying PCA. +There are many other techniques for dimensionality +reduction. An overview and a comparison over some +of them is given by [vdMPvdH09]. +B. Unsupervised Segmentation +Unsupervised segmentation algorithms can be used +in supervised segmentation as another source of information or to refine a segmentation. While unsupervised +segmentation algorithms can never be semantic, they are +well-studied and deserve at least a very brief overview. +Semantic segmentation algorithms store information +about the classes they were trained to segment while +non-semantic segmentation algorithms try to detect +consistent regions or region boundaries. +1) Clustering Algorithms: Clustering algorithms can +directly be applied on the pixels, when one gives a +feature vector per pixel. Two clustering algorithms are +k-means and the mean-shift algorithm. +The k-means algorithm is a general-purpose clustering algorithm which requires the number of clusters to +be given beforehand. Initially, it places the k centroids +randomly in the feature space. Then it assigns each +data point to the nearest centroid, moves the centroid +to the center of the cluster and continues the process +until a stopping criterion is reached. A faster variant is +described in [Har75]. +k-means was applied by [CLP98] for medical image +segmentation. +Another clustering algorithm is the mean-shift algorithm which was introduced by [CM02] for segmentation tasks. The algorithm finds the cluster centers +by initializing centroids at random seed points and +iteratively shifting them to the mean coordinate within +a certain range. Instead of taking a hard range constraint, +the mean can also be calculated by using any kernel. +This effectively applies a weight to the coordinates +of the points. The mean shift algorithm finds cluster +centers at positions with a highest local density of +points. +2) Graph Based Image Segmentation: Graph-based +image segmentation algorithms typically interpret pixels +as vertices and an edge weight is a measure of +dissimilarity such as the difference in color [FH04], +[Fel]. There are several different candidates for edges. +The 4-neighborhood (north, east, south west) or an 8neighborhood + (north, north-east, east, south-east, south, +south-west, west, north-west) are plausible choices. +One way to cut the edges is by building a minimum +spanning tree and removing edges above a threshold. +This threshold can either be constant, adapted to the +graph or adjusted by the user. After the edge-cutting +step, the connected components are the segments. +A graph-based method which ranked 2nd in the +Pascal VOC 2010 challenge [EVGW+10] is described +in [CS10]. The system makes heavy use of the multicue contour detector globalPb [MAFM08] and needs +about 10 GB of main memory [CS11]. +3) Random Walks: Random walks belong to the +graph-based image segmentation algorithms. Random +walk image segmentation usually works as follows: +Seed points are placed on the image for the different +objects in the image. From every single pixel, the +probability to reach the different seed points by a +random walk is calculated. This is done by taking +image gradients as described in Section V-A for HOG +features. The class of the pixel is the class of which a +seed point will be reached with highest probability. At +first, this is an interactive segmentation method, but it +can be extended to be non-interactive by using another +segmentation methods output as seed points. +4) Active Contour Models: Active contour models +(ACMs) are algorithms which segment images roughly +along edges, but also try to find a border which is +smooth. This is done by defining a so called energy +function which will be minimized. They were initially +described in [KWT88]. ACMs can be used to segment +an image or to refine segmentation as it was done +in [AM98] for brain MR images. +5) Watershed Segmentation: The watershed algorithm takes a grayscale image and interprets it as a +height map. Low values are catchment basins and +the higher values between two neighboring catchment +basins is the watershed. The catchment basins should +contain what the developer wants to capture. This +implies that those areas must be dark on grayscale +images. The algorithm starts to fill the basins from +the lowest point. When two basins are connected, a +watershed is found. The algorithm stops when the +highest point is reached. +A detailed description of the watershed segmentation +algorithm is given in [RM00]. +The watershed segmentation was used in [JLD03] to +segment white blood cells. As the authors describe, +the segmentation by watershed transform has two +flaws: Over-segmentation due to local minima and thick watersheds due to plateaus. -7 -C. Random Decision Forests -Random Decision Forests were first proposed -in [Ho95]. This type of classifier applies techniques -called ensemble learning, where multiple classifiers -are trained and a combination of their hypotheses is -used. One ensemble learning technique is the random -subspaces method where each classifier is trained -on a random subspace of the feature space. Another -ensemble learning technique is bagging, which is -training the trees on random subsets of the training set. -In the case of Random Decision Forests, the classifiers -are decision trees. A decision tree is a tree where each -inner node uses one or more features to decide in which -branch to descend. Each leaf is a class. -One strength of Random Decision Forests compared -to many other classifiers like SVMs and neural networks -is that the scale of measure of the features (nominal, -ordinal, interval, ratio) can be arbitrary. Another advan￾tage of Random Decision Forests compared to SVMs, -for example, is the speed of training and classification. -Decision trees were extensively studied in the past -20 years and a multitude of training algorithms have -been proposed (e.g. ID3 in [Qui86], C4.5 in [Qui93]). -Possible training hyperparameters are the measure to -evaluate the “goodness of split” [Min89], the number of -decision trees being used, and if the depth of the trees -is restricted. Typically in the context of classification, -decision trees are trained by adding new nodes until -each leaf contains only nodes of a single class or until it -is not possible to split further. This is called a stopping -criterion. -There are two typical training modes: Central axis -projection and perceptron training. In training, for -each node a hyperplane is searched which is optimal -according to an error function. -Random Decision Forests with texton features (see -Section V-A6) are applied in [SJC08] for segmentation. -In the [MSC] dataset, they report a per-pixel accuracy -rate of 66.9 % for their best system. This system -requires 415 ms for the segmentation of 320 px×213 px -images on a single 2.7 GHz core. On the Pascal -VOC 2007 dataset, they report an average per-pixel -accuracy for their best segmentation system of 42 %. -An excellent introduction to Random Decision -Forests for semantic segmentation is given by [SCZ08]. -D. SVMs -SVMs are well-studied binary classifiers which can -be described by five central ideas. For those ideas, the -training data is represented as (xi -, yi) where xi -is the -feature vector and yi ∈ { −1, 1 } the binary label for -training example i ∈ { 1, . . . , m }. -1) If data is linearly separable, it can be separated -by a hyperplane. There is one hyperplane which -maximizes the distance to the next datapoints -(support vectors). This hyperplane should be taken: -minimize -w,b -1 -2 -kwk -2 -s.t. ∀ -m -i=1yi -· (hw, xii + b) -| {z } -sgn applied to this gives the classification -≥ 1 -2) Even if the underlying process which generates the -features for the two classes is linearly separable, -noise can make the data not separable. The intro￾duction of slack variables to relax the requirement -of linear separability solves this problem. The -trade-off between accepting some errors and a -more complex model is weighted by a parameter -C ∈ R -+ -0 -. The bigger C, the more errors are -accepted. The new optimization problem is: -minimize -w -1 -2 -kwk -2 + C · -Xm -i=1 -ξi -s.t. ∀ -m -i=1yi -· (hw, xii + b) ≥ 1 − ξi -Note that 0 ≤ ξi ≤ 1 means that the data point -is within the margin, whereas ξi ≥ 1 means it is -misclassified. An SVM with C > 0 is also called -a soft-margin SVM. -3) The primal problem is to find the normal vector -w and the bias b. The dual problem is to express -w as a linear combination of the training data xi -: -w = -Xm -i=1 -αiyixi -where yi ∈ { −1, 1 } represents the class of the -training example and αi are Lagrange multipliers. -The usage of Lagrange multipliers is explained -with some examples in [Smi04]. The usage of the -Lagrange multipliers αi changes the optimization -problem depend on the αi which are weights for -the feature vectors. It turns out that most αi will -be zero. The non-zero weighted vectors are called -support vectors. -The optimization problem is now, according -to [Bur98]: -maximize -αi -Xm -i=1 -αi − -1 -2 -Xm -i=1 -Xm -j=1 -αiαjyiyj hxi -, xj i -s.t. ∀ -m -i=10 ≤ αi ≤ C -s.t. Xm -i=1 + +C. Random Decision Forests +Random Decision Forests were first proposed +in [Ho95]. This type of classifier applies techniques +called ensemble learning, where multiple classifiers +are trained and a combination of their hypotheses is +used. One ensemble learning technique is the random +subspaces method where each classifier is trained +on a random subspace of the feature space. Another +ensemble learning technique is bagging, which is +training the trees on random subsets of the training set. +In the case of Random Decision Forests, the classifiers +are decision trees. A decision tree is a tree where each +inner node uses one or more features to decide in which +branch to descend. Each leaf is a class. +One strength of Random Decision Forests compared +to many other classifiers like SVMs and neural networks +is that the scale of measure of the features (nominal, +ordinal, interval, ratio) can be arbitrary. Another advantage of Random Decision Forests compared to SVMs, +for example, is the speed of training and classification. +Decision trees were extensively studied in the past +20 years and a multitude of training algorithms have +been proposed (e.g. ID3 in [Qui86], C4.5 in [Qui93]). +Possible training hyperparameters are the measure to +evaluate the “goodness of split” [Min89], the number of +decision trees being used, and if the depth of the trees +is restricted. Typically in the context of classification, +decision trees are trained by adding new nodes until +each leaf contains only nodes of a single class or until it +is not possible to split further. This is called a stopping +criterion. +There are two typical training modes: Central axis +projection and perceptron training. In training, for +each node a hyperplane is searched which is optimal +according to an error function. +Random Decision Forests with texton features (see +Section V-A6) are applied in [SJC08] for segmentation. +In the [MSC] dataset, they report a per-pixel accuracy +rate of 66.9 % for their best system. This system +requires 415 ms for the segmentation of 320 px×213 px +images on a single 2.7 GHz core. On the Pascal +VOC 2007 dataset, they report an average per-pixel +accuracy for their best segmentation system of 42 %. +An excellent introduction to Random Decision +Forests for semantic segmentation is given by [SCZ08]. +D. SVMs +SVMs are well-studied binary classifiers which can +be described by five central ideas. For those ideas, the +training data is represented as (xi, yi) where xiis the +feature vector and yi ∈ { −1, 1 } the binary label for +training example i ∈ { 1, . . . , m }. +1) If data is linearly separable, it can be separated +by a hyperplane. There is one hyperplane which +maximizes the distance to the next datapoints +(support vectors). This hyperplane should be taken: +minimize +w,b +1 +2 +kwk +2 +s.t. ∀ +m +i=1yi +· (hw, xii + b) +| {z } +sgn applied to this gives the classification +≥ 1 +2) Even if the underlying process which generates the +features for the two classes is linearly separable, +noise can make the data not separable. The introduction of slack variables to relax the requirement +of linear separability solves this problem. The +trade-off between accepting some errors and a +more complex model is weighted by a parameter +C ∈ R ++ +0 +. The bigger C, the more errors are +accepted. The new optimization problem is: +minimize +w +1 +2 +kwk +2 + C · +Xm +i=1 +ξi +s.t. ∀ +m +i=1yi +· (hw, xii + b) ≥ 1 − ξi +Note that 0 ≤ ξi ≤ 1 means that the data point +is within the margin, whereas ξi ≥ 1 means it is +misclassified. An SVM with C > 0 is also called +a soft-margin SVM. +3) The primal problem is to find the normal vector +w and the bias b. The dual problem is to express +w as a linear combination of the training data xi: +w = +Xm +i=1 +αiyixi +where yi ∈ { −1, 1 } represents the class of the +training example and αi are Lagrange multipliers. +The usage of Lagrange multipliers is explained +with some examples in [Smi04]. The usage of the +Lagrange multipliers αi changes the optimization +problem depend on the αi which are weights for +the feature vectors. It turns out that most αi will +be zero. The non-zero weighted vectors are called +support vectors. +The optimization problem is now, according +to [Bur98]: +maximize +αi +Xm +i=1 +αi − +1 +2 +Xm +i=1 +Xm +j=1 +αiαjyiyj hxi, xj i +s.t. ∀ +m +i=10 ≤ αi ≤ C +s.t. Xm +i=1 αiyi = 0 -8 -4) Not every dataset is linearly separable. This prob￾lem is approached by transforming the feature -vectors x with a non-linear mapping Φ into -a higher dimensional (probably ∞-dimensional) -space. As the feature vectors x are only used -within scalar product hxi -, xj i, it is not necessary -to do the transformation. It is enough to do the -calculation -K(xi -, xj ) = hxi -, xj i -This function K is called a kernel. The idea of -never explicitly transforming the vectors xi -to the -higher dimensional space is called the kernel trick. -Common kernels include the polynomial kernel -KP (xi -, xj ) = (hxi -, xj i + r) -p -of degree p and coefficient r, the Gaussian radial -basis function (RBF) kernel -KGauss(xi -, xj ) = e -−γkxi−xj k -2 -2σ2 -and the sigmoid kernel -Ktanh(xi -, xj ) = tanh(γhxi -, xj i − r) -where the parameter γ determines how much -influence single training examples have. -5) The described SVMs can only distinguish between -two classes. Common strategies to expand those -binary classifiers to multi-class classification is -the one-vs-all and the one-vs-one strategy. In the -one-vs-all strategy n classifiers have to be trained -which can distinguish one of the n classes against -all other classes. In the one-vs-one strategy n -2−n -2 -classifiers are trained; one classifier for each pair -of classes. -A detailed description of SVMs can be found -in [Bur98]. -SVMs are used by [YHRF12] on the 2009 and 2010 -PASCAL segmentation challenge [EVGW+10]. They -did not hand their classifier in to the challenge itself, -but calculated an average rank of 7 among the different -categories. -[FGMR10] also used an SVM based method with -HOG features and achieved the 7th rank in the 2010 -PASCAL segmentation challenge by mean accuracy. It -needs about 2 s on a 2.8 GHz 8-core Intel processor. -E. Markov Random Fields -MRFs are undirected probabilistic graphical models -which are wide-spread model in computer vision. The -overall idea of MRFs is to assign a random variable for -each feature and a random variable for each pixel which -x1 x2 x3 -x4 x5 x6 -x7 x8 x9 -y1 y2 y3 -y4 y5 y6 -y7 y8 y9 -x1 x2 x3 -x4 x5 x6 -x7 x8 x9 -y1 y2 y3 -y4 y5 y6 -y7 y8 y9 -Figure 3: CRF with 4-neighborhood. Each node xi -represents a pixel and each node yi represents -a label. -gets labeled as shown in Figure 3. For example, a MRF -which is trained on images of the size 224 px×224 pixel -and gets the raw RGB values as features has -224 · 224 · 3 -| {z } -input -+ 224 · 224 -| {z } -output -= 200 704 -random variables. Those random variables are condi￾tionally independent, given their local neighborhood. -These (in)dependencies can be expressed with a graph. -Let G = (V, E) be the associated undirected graph -of an MRF and C be the set of all maximal cliques in -that graph. Nodes represent random variables x, y and -edges represent conditional dependencies. Just like in -he 4-neighborhood [SWRC06] and the 8-neighborhood -are reasonable choices for constructing the graph. -Typically, random variables y represent the class of a -single pixel, random variables x represent a pixel values -and edges represent pixel neighborhood in computer -vision problems segmentation problems where MRFs -are used. Accordingly, the random variables y live -on 1, . . . , nr of classes and the random variables x -typically live on 0, . . . , 255 or [0, 1]. -The probability of x, y can be expressed as -P(x, y) = 1 -Z -e -−E(x,y) -where Z = -P -x,y -e -−E(x,y) -is a normalization term -called the partition function and E is called the energy -function. A common choice for the energy function is -E(x, y) = X -c∈C -ψc(x, y) -where ψ is called a clique potential. One choice for -cliques of size two x, y = (x1, x2) is [KP06] -ψc(x1, x2) = wδ(x1, x2) = ( -+w if x1 6= x2 -−w if x1 = x2 -According to [Mur12], the most common way of -inference over the posterior MRF in computer vision + +4) Not every dataset is linearly separable. This problem is approached by transforming the feature +vectors x with a non-linear mapping Φ into +a higher dimensional (probably ∞-dimensional) +space. As the feature vectors x are only used +within scalar product hxi, xj i, it is not necessary +to do the transformation. It is enough to do the +calculation +K(xi, xj ) = hxi, xj i +This function K is called a kernel. The idea of +never explicitly transforming the vectors xito the +higher dimensional space is called the kernel trick. +Common kernels include the polynomial kernel +KP (xi, xj ) = (hxi, xj i + r) +p +of degree p and coefficient r, the Gaussian radial +basis function (RBF) kernel +KGauss(xi, xj ) = e +−γkxi−xj k +2 +2σ2 +and the sigmoid kernel +Ktanh(xi, xj ) = tanh(γhxi, xj i − r) +where the parameter γ determines how much +influence single training examples have. +5) The described SVMs can only distinguish between +two classes. Common strategies to expand those +binary classifiers to multi-class classification is +the one-vs-all and the one-vs-one strategy. In the +one-vs-all strategy n classifiers have to be trained +which can distinguish one of the n classes against +all other classes. In the one-vs-one strategy n +2−n +2 +classifiers are trained; one classifier for each pair +of classes. +A detailed description of SVMs can be found +in [Bur98]. +SVMs are used by [YHRF12] on the 2009 and 2010 +PASCAL segmentation challenge [EVGW+10]. They +did not hand their classifier in to the challenge itself, +but calculated an average rank of 7 among the different +categories. +[FGMR10] also used an SVM based method with +HOG features and achieved the 7th rank in the 2010 +PASCAL segmentation challenge by mean accuracy. It +needs about 2 s on a 2.8 GHz 8-core Intel processor. +E. Markov Random Fields +MRFs are undirected probabilistic graphical models +which are wide-spread model in computer vision. The +overall idea of MRFs is to assign a random variable for +each feature and a random variable for each pixel which +x1 x2 x3 +x4 x5 x6 +x7 x8 x9 +y1 y2 y3 +y4 y5 y6 +y7 y8 y9 +x1 x2 x3 +x4 x5 x6 +x7 x8 x9 +y1 y2 y3 +y4 y5 y6 +y7 y8 y9 +Figure 3: CRF with 4-neighborhood. Each node xi +represents a pixel and each node yi represents +a label. +gets labeled as shown in Figure 3. For example, a MRF +which is trained on images of the size 224 px×224 pixel +and gets the raw RGB values as features has +224 · 224 · 3 +| {z } +input ++ 224 · 224 +| {z } +output += 200 704 +random variables. Those random variables are conditionally independent, given their local neighborhood. +These (in)dependencies can be expressed with a graph. +Let G = (V, E) be the associated undirected graph +of an MRF and C be the set of all maximal cliques in +that graph. Nodes represent random variables x, y and +edges represent conditional dependencies. Just like in +he 4-neighborhood [SWRC06] and the 8-neighborhood +are reasonable choices for constructing the graph. +Typically, random variables y represent the class of a +single pixel, random variables x represent a pixel values +and edges represent pixel neighborhood in computer +vision problems segmentation problems where MRFs +are used. Accordingly, the random variables y live +on 1, . . . , nr of classes and the random variables x +typically live on 0, . . . , 255 or [0, 1]. +The probability of x, y can be expressed as +P(x, y) = 1 +Z +e +−E(x,y) +where Z = +P +x,y +e +−E(x,y) +is a normalization term +called the partition function and E is called the energy +function. A common choice for the energy function is +E(x, y) = X +c∈C +ψc(x, y) +where ψ is called a clique potential. One choice for +cliques of size two x, y = (x1, x2) is [KP06] +ψc(x1, x2) = wδ(x1, x2) = ( ++w if x1 6= x2 +−w if x1 = x2 +According to [Mur12], the most common way of +inference over the posterior MRF in computer vision problems is Maximum A Posteriori (MAP) estimation. -9 -Detailed introductions to MRFs are given by -[BKR11], [Mur12]. MRFs are used by [ZBS01] and -[MSB12] for image segmentation. -F. Conditional Random Fields -CRFs are MRFs where all clique potentials are -conditioned on input features [Mur12]. This means, -instead of learning the distribution P(y, x), the task -is reformulated to learn the distribution P(y|x). One -consequence of this reformulation is that CRFs need -much less parameters as the distribution of x does -not have to be estimated. Another advantage of CRFs -compared to MRFs is that no distribution assumption -about x has to be made. -A CRF has the partition function Z: -Z(x) = X -y -P(x, y) -and joint probability distribution -P(y|x) = 1 -Z(x) -Y -c∈C -ψc(yc|x) -The simplest way to define the clique potentials ψ is -the count of the class yc given x added with a positive -smoothing constant to prevent the complete term from -getting zero. -CRFs as described in [LRKT09] have reached top -performance in PASCAL VOC 2010 [VOC10] and -are also used in [HZCP04], [SWRC06] for semantic -segmentation. -A method similar to CRFs was proposed -in [GBVdW+10]. The system of Gonfaus et.al. -ranked 1st by mean accuracy in the segmentation task -of the PASCAL VOC 2010 challenge [EVGW+10]. -An introduction to CRFs is given by [SM11]. -G. Post-processing methods -Post-processing refine a found segmentation and -remove obvious errors. For example, the morphological -operations opening and closing can remove noise. The -opening operation is a dilation followed by a erosion. -This removes tiny segments. The closing operation is a -erosion followed by a dilation. This removes tiny gaps -in otherwise filled regions. They were used in [CLP98] -for biomedical image segmentation. -Another way of refinement of the found segmentation -is by adjusting the segmentation to match close edges. -This was used in [BBMM11] with an ultra-metric -contour map [AMFM09]. -Active contour models are another example of a -post-processing method [KWT88]. -VI. NEURAL NETWORKS FOR SEMANTIC -SEGMENTATION -Artificial neural networks are classifiers which are -inspired by biologic neurons. Every single artificial -neuron has some inputs which are weighted and sumed -up. Then, the neuron applies a so called activation -function to the weighted sum and gives an output. Those -neurons can take either a feature vector as input or the -output of other neurons. In this way, they build up -feature hierarchies. -The parameters they learn are the weights w ∈ R. -They are learned by gradient descent. To do so, an error -function — usually cross-entropy or mean squared error -— is necessary. For the gradient descent algorithm, one -sees the labeled training data as given, the weights -as variables and the error function as a surface in -this weight-space. Minimizing the error function in the -weight space adapts the neural network to the problem. -There are lots of ideas around neural networks like -regularization, better optimization algorithms, automat￾ically building up architectures, design choices for -activation functions. This is not explained in detail here, -but some of the mayor breakthroughs are outlined. -CNNs are neural networks which learn image filters. -They drastically reduce the number of parameters which -have to be learned while being still general enough for -the problem domain of images. This was shown by Alex -Krizhevsky et al. in [KSH12]. One major idea was a -clever regularization called dropout training, which set -the output of neurons while training randomly to zero. -Another contribution was the usage of an activation -function called rectified linear unit: -ϕReLU(x) = max(0, x) -Those are much faster to train than the commonly used -sigmoid activation functions -ϕSigmoid(x) = 1 -e−x + 1 -Krizhevsky et al. implemented those ideas and partici￾pated in the ImageNet Large-Scale Visual Recognition -Challenge (ILSVRC). The best other system, which -used SIFT features and Fisher Vectors, had a perfor￾mance of about 25.7 % while the network by Alex -Krizhevsky et al. got 17.0 % error rate on the ILSVRC￾2010 dataset. As a preprocessing step, they downsam￾pled all images to a fixed size of 256 px×256 px before -they fed the features into their network. This network -is commonly known as AlexNet. -Since AlexNet was developed, a lot of different -neural networks have been proposed. One interesting -example is [PC13], where a recurrent CNN for semantic + +Detailed introductions to MRFs are given by +[BKR11], [Mur12]. MRFs are used by [ZBS01] and +[MSB12] for image segmentation. +F. Conditional Random Fields +CRFs are MRFs where all clique potentials are +conditioned on input features [Mur12]. This means, +instead of learning the distribution P(y, x), the task +is reformulated to learn the distribution P(y|x). One +consequence of this reformulation is that CRFs need +much less parameters as the distribution of x does +not have to be estimated. Another advantage of CRFs +compared to MRFs is that no distribution assumption +about x has to be made. +A CRF has the partition function Z: +Z(x) = X +y +P(x, y) +and joint probability distribution +P(y|x) = 1 +Z(x) +Y +c∈C +ψc(yc|x) +The simplest way to define the clique potentials ψ is +the count of the class yc given x added with a positive +smoothing constant to prevent the complete term from +getting zero. +CRFs as described in [LRKT09] have reached top +performance in PASCAL VOC 2010 [VOC10] and +are also used in [HZCP04], [SWRC06] for semantic +segmentation. +A method similar to CRFs was proposed +in [GBVdW+10]. The system of Gonfaus et.al. +ranked 1st by mean accuracy in the segmentation task +of the PASCAL VOC 2010 challenge [EVGW+10]. +An introduction to CRFs is given by [SM11]. +G. Post-processing methods +Post-processing refine a found segmentation and +remove obvious errors. For example, the morphological +operations opening and closing can remove noise. The +opening operation is a dilation followed by a erosion. +This removes tiny segments. The closing operation is a +erosion followed by a dilation. This removes tiny gaps +in otherwise filled regions. They were used in [CLP98] +for biomedical image segmentation. +Another way of refinement of the found segmentation +is by adjusting the segmentation to match close edges. +This was used in [BBMM11] with an ultra-metric +contour map [AMFM09]. +Active contour models are another example of a +post-processing method [KWT88]. +VI. NEURAL NETWORKS FOR SEMANTIC +SEGMENTATION +Artificial neural networks are classifiers which are +inspired by biologic neurons. Every single artificial +neuron has some inputs which are weighted and sumed +up. Then, the neuron applies a so called activation +function to the weighted sum and gives an output. Those +neurons can take either a feature vector as input or the +output of other neurons. In this way, they build up +feature hierarchies. +The parameters they learn are the weights w ∈ R. +They are learned by gradient descent. To do so, an error +function — usually cross-entropy or mean squared error +— is necessary. For the gradient descent algorithm, one +sees the labeled training data as given, the weights +as variables and the error function as a surface in +this weight-space. Minimizing the error function in the +weight space adapts the neural network to the problem. +There are lots of ideas around neural networks like +regularization, better optimization algorithms, automatically building up architectures, design choices for +activation functions. This is not explained in detail here, +but some of the mayor breakthroughs are outlined. +CNNs are neural networks which learn image filters. +They drastically reduce the number of parameters which +have to be learned while being still general enough for +the problem domain of images. This was shown by Alex +Krizhevsky et al. in [KSH12]. One major idea was a +clever regularization called dropout training, which set +the output of neurons while training randomly to zero. +Another contribution was the usage of an activation +function called rectified linear unit: +ϕReLU(x) = max(0, x) +Those are much faster to train than the commonly used +sigmoid activation functions +ϕSigmoid(x) = 1 +e−x + 1 +Krizhevsky et al. implemented those ideas and participated in the ImageNet Large-Scale Visual Recognition +Challenge (ILSVRC). The best other system, which +used SIFT features and Fisher Vectors, had a performance of about 25.7 % while the network by Alex +Krizhevsky et al. got 17.0 % error rate on the ILSVRC2010 dataset. As a preprocessing step, they downsampled all images to a fixed size of 256 px×256 px before +they fed the features into their network. This network +is commonly known as AlexNet. +Since AlexNet was developed, a lot of different +neural networks have been proposed. One interesting +example is [PC13], where a recurrent CNN for semantic segmentation is presented. -10 -Another notable paper is [LSD14]. The algorithm -presented there makes use of a classifying network such -as AlexNet, but applies the complete network as an -image filter. This way, each pixel gets a probability -distribution for each of the trained classes. By taking -the most likely class, a semantic segmentation can be -done with arbitrary image sizes. -A very recent publication by Dai et al. [DHS15] -showed that segmentation with much deeper networks -is possible and achieves better results. -More detailed explanations to neural networks for -visual recognition is given by [LKJ15]. -VII. POSSIBLE PROBLEMS IN THE DATA FOR -SEGMENTATION ALGORITHMS -Different segmentation workflows have different -problems. However, there are a couple of special cases -which should be tested. Those cases might not occur -often in the training data, but it could still happen in -the productive system. -I am not aware of any systematic work which exam￾ined the influence of problems such as the following. -A. Lens Flare -Lens flare is the effect of light getting scattered in -the lens system of the camera. The testing data set of -the KITTI road evaluation benchmark [FKG13] has a -couple of photos with this problem. Figure 4(a) shows -an extreme example of lens flare. -B. Vignetting -Vignetting is the effect of a photograph getting darker -in the corners. This can have many reasons, for example -filters on the camera blocking light at the corners. -C. Blurred images -Images can be blurred for a couple of reasons. A -problem with the lenses mechanics, focusing on the -wrong point, too quick movement, smoke or foam. One -example of a blurred image is Figure 4(c), which was -taken during an in vivo porcine procedure of diaphragm -dissection. The smoke was caused by cauterization. -D. Other Problems -If the following effects can occur at all and if they -are problems depends heavily on the problem domain -and the used model. -1) Partial Occlusions: Segmentation systems which -employ a model of the objects which should be -segmented might suffer from partial occlusions. -(a) Lens Flare -Image by [Hus07] -(b) Vignetting -Image by [Man12] -(c) Smoke by cauterization -Image by [GVSY13] -(d) Camouflage -Image by [Kaf07] -(e) Transparency (f) Viewpoint -Figure 4: Examples of images which might cause -semantic segmentation systems to fail. -2) Camouflage: Some objects, like animals in the -wild, actively try to hide (see Figure 4(d) as an example). -In other cases it might just be bad luck that objects -are hard for humans to detect. This problem has two -interesting aspects: On the one hand, the segmenting -system might suffer from the same problems as humans -do. On the other hand, the segmenting system might be -better than humans are, but it is forced to learn from -images labeled by humans. If the labels are wrong, the -system is forced to learn something wrong. -3) Semi-transparent Occlusion: Some objects like -drinking glasses can be visible and still leave the object -behind them visible as shown in Figure 4(e). This is -mainly a definition problem: Is the seen pixel the glass -label or the smartphone label? -4) Viewpoints: Changes in viewpoints can be a -problem, if they don’t occur in the training data. For -example, an image captioning system which was trained -on photographs of professional photographers might -not have photos from the point of view of a child. This + +Another notable paper is [LSD14]. The algorithm +presented there makes use of a classifying network such +as AlexNet, but applies the complete network as an +image filter. This way, each pixel gets a probability +distribution for each of the trained classes. By taking +the most likely class, a semantic segmentation can be +done with arbitrary image sizes. +A very recent publication by Dai et al. [DHS15] +showed that segmentation with much deeper networks +is possible and achieves better results. +More detailed explanations to neural networks for +visual recognition is given by [LKJ15]. +VII. POSSIBLE PROBLEMS IN THE DATA FOR +SEGMENTATION ALGORITHMS +Different segmentation workflows have different +problems. However, there are a couple of special cases +which should be tested. Those cases might not occur +often in the training data, but it could still happen in +the productive system. +I am not aware of any systematic work which examined the influence of problems such as the following. +A. Lens Flare +Lens flare is the effect of light getting scattered in +the lens system of the camera. The testing data set of +the KITTI road evaluation benchmark [FKG13] has a +couple of photos with this problem. Figure 4(a) shows +an extreme example of lens flare. +B. Vignetting +Vignetting is the effect of a photograph getting darker +in the corners. This can have many reasons, for example +filters on the camera blocking light at the corners. +C. Blurred images +Images can be blurred for a couple of reasons. A +problem with the lenses mechanics, focusing on the +wrong point, too quick movement, smoke or foam. One +example of a blurred image is Figure 4(c), which was +taken during an in vivo porcine procedure of diaphragm +dissection. The smoke was caused by cauterization. +D. Other Problems +If the following effects can occur at all and if they +are problems depends heavily on the problem domain +and the used model. +1) Partial Occlusions: Segmentation systems which +employ a model of the objects which should be +segmented might suffer from partial occlusions. +(a) Lens Flare +Image by [Hus07] +(b) Vignetting +Image by [Man12] +(c) Smoke by cauterization +Image by [GVSY13] +(d) Camouflage +Image by [Kaf07] +(e) Transparency (f) Viewpoint +Figure 4: Examples of images which might cause +semantic segmentation systems to fail. +2) Camouflage: Some objects, like animals in the +wild, actively try to hide (see Figure 4(d) as an example). +In other cases it might just be bad luck that objects +are hard for humans to detect. This problem has two +interesting aspects: On the one hand, the segmenting +system might suffer from the same problems as humans +do. On the other hand, the segmenting system might be +better than humans are, but it is forced to learn from +images labeled by humans. If the labels are wrong, the +system is forced to learn something wrong. +3) Semi-transparent Occlusion: Some objects like +drinking glasses can be visible and still leave the object +behind them visible as shown in Figure 4(e). This is +mainly a definition problem: Is the seen pixel the glass +label or the smartphone label? +4) Viewpoints: Changes in viewpoints can be a +problem, if they don’t occur in the training data. For +example, an image captioning system which was trained +on photographs of professional photographers might +not have photos from the point of view of a child. This is visualized in Figure 4(f). -11 -VIII. DISCUSSION -Ohta et al. wrote [OKS78] 38 years ago. It is one -of the first papers mentioning semantic segmentation. -In this time, a lot of work was done and many -different directions have been explored. Different kinds -of semantic segmentation have emerged. -This paper presents a taxonomy of those kinds -of semantic segmentation and a brief overview of -completely automatic, passive, semantic segmentation -algorithms. -Future work includes a comparative study of -those algorithms on publicly available dataset such -as the ones presented in Table I. Another open -question is the influence of the problems described -in Section VII. This could be done using a subset of the -thousands of images of Wikipedia Commons, such as -https://commons.wikimedia.org/wiki/Category:Blurring -for blurred images. -A combination of different classifiers in an ensemble -would be an interesting option to explore in order to -improve accuracy. Another direction which is currently -studied is combining classifiers such as neural networks -with CRFs [ZJRP+15]. -REFERENCES -[AM98] M. S. Atkins and B. T. Mackiewich, “Fully -automatic segmentation of the brain in -mri,” Medical Imaging, IEEE Transactions -on, vol. 17, no. 1, pp. 98–107, Feb. 1998. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=668699 -[AMFM09] P. Arbelaez, M. Maire, C. Fowlkes, and -J. Malik, “From contours to regions: An -empirical evaluation,” in Computer Vision and -Pattern Recognition, 2009. CVPR 2009. IEEE -Conference on. IEEE, Jun. 2009, pp. 2294–2301. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=5206707 -[AP11] G. Azzopardi and N. Petkov, “Detection of -retinal vascular bifurcations by trainable v4-like -filters,” in Computer Analysis of Images and -Patterns. Springer, 2011, pp. 451–459. [Online]. -Available: http://www.cs.rug.nl/~imaging/databases/ -retina_database/retinalfeatures_database.html -[BBMM11] T. Brox, L. Bourdev, S. Maji, and J. Malik, -“Object segmentation by alignment of poselet -activations to image contours,” in Computer Vision -and Pattern Recognition (CVPR), 2011 IEEE -Conference on. IEEE, Jun. 2011, pp. 2225–2232. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=5995659 -[BJ00] Y. Boykov and M.-P. Jolly, “Interactive organ -segmentation using graph cuts,” in Medical Image -Computing and Computer-Assisted Intervention– -MICCAI 2000. Springer, 2000, pp. 276– -286. [Online]. Available: http://link.springer.com/ -chapter/10.1007/978-3-540-40899-4_28 -[BKR11] A. Blake, P. Kohli, and C. Rother, Markov random -fields for vision and image processing. Mit Press, -2011. -[BKTT15] S. Bittel, V. Kaiser, M. Teichmann, and M. Thoma, -“Pixel-wise segmentation of street with neural -networks,” arXiv preprint arXiv:1511.00513, 2015. -[Online]. Available: http://arxiv.org/abs/1511.00513 -[BMBM10] L. Bourdev, S. Maji, T. Brox, and J. Malik, -“Detecting people using mutually consistent -poselet activations,” in Computer Vision–ECCV -2010. Springer, 2010, pp. 168–181. [Online]. -Available: http://link.springer.com/chapter/10.1007/ -978-3-642-15567-3_13#page-1 -[Bur98] C. J. Burges, “A tutorial on support vector machines -for pattern recognition,” Data mining and knowledge -discovery, vol. 2, no. 2, pp. 121–167, 1998. -[BVZ01] Y. Boykov, O. Veksler, and R. Zabih, “Fast -approximate energy minimization via graph cuts,” -Pattern Analysis and Machine Intelligence, IEEE -Transactions on, vol. 23, no. 11, pp. 1222–1239, -2001. [Online]. Available: http://ieeexplore.ieee.org/ -xpls/abs_all.jsp?arnumber=969114 -[CDF+04] G. Csurka, C. Dance, L. Fan, J. Willamowski, -and C. Bray, “Visual categorization with bags of -keypoints,” in Workshop on statistical learning in -computer vision, ECCV, vol. 1, no. 1-22. Prague, -2004, pp. 1–2. -[CJSW01] H.-D. Cheng, X. Jiang, Y. Sun, and J. Wang, -“Color image segmentation: advances and prospects,” -Pattern recognition, vol. 34, no. 12, pp. 2259–2281, -2001. -[CLP98] C. W. Chen, J. Luo, and K. J. Parker, “Image -segmentation via adaptive k-mean clustering and -knowledge-based morphological operations with -biomedical applications,” Image Processing, IEEE + +VIII. DISCUSSION +Ohta et al. wrote [OKS78] 38 years ago. It is one +of the first papers mentioning semantic segmentation. +In this time, a lot of work was done and many +different directions have been explored. Different kinds +of semantic segmentation have emerged. +This paper presents a taxonomy of those kinds +of semantic segmentation and a brief overview of +completely automatic, passive, semantic segmentation +algorithms. +Future work includes a comparative study of +those algorithms on publicly available dataset such +as the ones presented in Table I. Another open +question is the influence of the problems described +in Section VII. This could be done using a subset of the +thousands of images of Wikipedia Commons, such as +https://commons.wikimedia.org/wiki/Category:Blurring +for blurred images. +A combination of different classifiers in an ensemble +would be an interesting option to explore in order to +improve accuracy. Another direction which is currently +studied is combining classifiers such as neural networks +with CRFs [ZJRP+15]. +REFERENCES +[AM98] M. S. Atkins and B. T. Mackiewich, “Fully +automatic segmentation of the brain in +mri,” Medical Imaging, IEEE Transactions +on, vol. 17, no. 1, pp. 98–107, Feb. 1998. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=668699 +[AMFM09] P. Arbelaez, M. Maire, C. Fowlkes, and +J. Malik, “From contours to regions: An +empirical evaluation,” in Computer Vision and +Pattern Recognition, 2009. CVPR 2009. IEEE +Conference on. IEEE, Jun. 2009, pp. 2294–2301. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=5206707 +[AP11] G. Azzopardi and N. Petkov, “Detection of +retinal vascular bifurcations by trainable v4-like +filters,” in Computer Analysis of Images and +Patterns. Springer, 2011, pp. 451–459. [Online]. +Available: http://www.cs.rug.nl/~imaging/databases/ +retina_database/retinalfeatures_database.html +[BBMM11] T. Brox, L. Bourdev, S. Maji, and J. Malik, +“Object segmentation by alignment of poselet +activations to image contours,” in Computer Vision +and Pattern Recognition (CVPR), 2011 IEEE +Conference on. IEEE, Jun. 2011, pp. 2225–2232. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=5995659 +[BJ00] Y. Boykov and M.-P. Jolly, “Interactive organ +segmentation using graph cuts,” in Medical Image +Computing and Computer-Assisted Intervention– +MICCAI 2000. Springer, 2000, pp. 276– +286. [Online]. Available: http://link.springer.com/ +chapter/10.1007/978-3-540-40899-4_28 +[BKR11] A. Blake, P. Kohli, and C. Rother, Markov random +fields for vision and image processing. Mit Press, +2011. +[BKTT15] S. Bittel, V. Kaiser, M. Teichmann, and M. Thoma, +“Pixel-wise segmentation of street with neural +networks,” arXiv preprint arXiv:1511.00513, 2015. +[Online]. Available: http://arxiv.org/abs/1511.00513 +[BMBM10] L. Bourdev, S. Maji, T. Brox, and J. Malik, +“Detecting people using mutually consistent +poselet activations,” in Computer Vision–ECCV +2010. Springer, 2010, pp. 168–181. [Online]. +Available: http://link.springer.com/chapter/10.1007/ +978-3-642-15567-3_13#page-1 +[Bur98] C. J. Burges, “A tutorial on support vector machines +for pattern recognition,” Data mining and knowledge +discovery, vol. 2, no. 2, pp. 121–167, 1998. +[BVZ01] Y. Boykov, O. Veksler, and R. Zabih, “Fast +approximate energy minimization via graph cuts,” +Pattern Analysis and Machine Intelligence, IEEE +Transactions on, vol. 23, no. 11, pp. 1222–1239, +2001. [Online]. Available: http://ieeexplore.ieee.org/ +xpls/abs_all.jsp?arnumber=969114 +[CDF+04] G. Csurka, C. Dance, L. Fan, J. Willamowski, +and C. Bray, “Visual categorization with bags of +keypoints,” in Workshop on statistical learning in +computer vision, ECCV, vol. 1, no. 1-22. Prague, +2004, pp. 1–2. +[CJSW01] H.-D. Cheng, X. Jiang, Y. Sun, and J. Wang, +“Color image segmentation: advances and prospects,” +Pattern recognition, vol. 34, no. 12, pp. 2259–2281, +2001. +[CLP98] C. W. Chen, J. Luo, and K. J. Parker, “Image +segmentation via adaptive k-mean clustering and +knowledge-based morphological operations with +biomedical applications,” Image Processing, IEEE Transactions on, vol. 7, no. 12, pp. 1673–1683, Dec. -12 -1998. [Online]. Available: http://ieeexplore.ieee.org/ -xpls/abs_all.jsp?arnumber=730379 -[CM02] D. Comaniciu and P. Meer, “Mean shift: A -robust approach toward feature space analysis,” -Pattern Analysis and Machine Intelligence, IEEE -Transactions on, vol. 24, no. 5, pp. 603–619, 2002. -[Online]. Available: http://ieeexplore.ieee.org/xpl/ -login.jsp?tp=&arnumber=1000236 -[COWR11] C. Chen, J. Ozolek, W. Wang, and G. K. Rohde, -“A pixel classification system for segmenting -biomedical images using intensity neighborhoods -and dimension reduction,” in Biomedical Imaging: -From Nano to Macro, 2011 IEEE International -Symposium on. IEEE, 2011, pp. 1649–1652. -[Online]. Available: https://www.andrew.cmu.edu/ -user/gustavor/chen_isbi_11.pdf -[CP08] G. Csurka and F. Perronnin, “A simple high -performance approach to semantic segmentation.” -in BMVC, 2008, pp. 1–10. [Online]. Avail￾able: http://www.xrce.xerox.com/layout/set/print/ -content/download/16654/118653/file/2008-023.pdf -[CRSS] A. Cohen, E. Rivlin, I. Shimshoni, and -E. Sabo, “Colon crypt segmentation website.” [On￾line]. Available: http://mis.haifa.ac.il/~ishimshoni/ -SegmentCrypt/Download.htm -[CRSS14] ——, “Memory based active contour algorithm -using pixel-level classified images for colon crypt -segmentation,” Computerized Medical Imaging -and Graphics, Nov. 2014. [Online]. Available: -http://mis.haifa.ac.il/~ishimshoni/SegmentCrypt/ -Active%20contour%20based%20on%20pixel￾level%20classified%20image%20for%20colon% -20crypts%20segmentation.pdf -[CS10] J. Carreira and C. Sminchisescu, “Constrained -parametric min-cuts for automatic object segmenta￾tion,” in Computer Vision and Pattern Recognition -(CVPR), 2010 IEEE Conference on. IEEE, 2010, -pp. 3241–3248. -[CS11] ——, “Cpmc: Constrained parametric min-cuts for -automatic object segmentation,” Feb. 2011. [Online]. -Available: http://www.maths.lth.se/matematiklth/ -personal/sminchis/code/cpmc/ -[CSI+09] M. E. Celebi, G. Schaefer, H. Iyatomi, W. V. -Stoecker, J. M. Malters, and J. M. Grichnik, “An -improved objective evaluation measure for border -detection in dermoscopy images,” Skin Research -and Technology, vol. 15, no. 4, pp. 444–450, 2009. -[Online]. Available: http://arxiv.org/abs/1009.1020 -[CSM09] L. P. Coelho, A. Shariff, and R. F. Murphy, “Nuclear -segmentation in microscope cell images: a hand￾segmented dataset and comparison of algorithms,” -in Biomedical Imaging: From Nano to Macro, -2009. ISBI’09. IEEE International Symposium on. -IEEE, 2009, pp. 518–521. [Online]. Available: -http://murphylab.web.cmu.edu/data -[CXGS12] M. D. Collins, J. Xu, L. Grady, and V. Singh, -“Random walks based multi-image segmentation: -Quasiconvexity results and gpu-based solutions,” -in Computer Vision and Pattern Recognition -(CVPR), 2012 IEEE Conference on. IEEE, -2012, pp. 1656–1663. [Online]. Available: http: -//pages.cs.wisc.edu/~jiaxu/pub/rwcoseg.pdf -[DHS15] J. Dai, K. He, and J. Sun, “Instance-aware seman￾tic segmentation via multi-task network cascades,” -arXiv preprint arXiv:1512.04412, 2015. -[DT05] N. Dalal and B. Triggs, “Histograms of oriented -gradients for human detection,” in Computer -Vision and Pattern Recognition, 2005. CVPR -2005. IEEE Computer Society Conference on, -vol. 1, June 2005, pp. 886–893 vol. 1. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=1467360 -[EVGW+a] M. Everingham, L. Van Gool, C. K. I. -Williams, J. Winn, and A. Zisserman, “The -PASCAL Visual Object Classes Challenge -2007 (VOC2007) Results,” http://www.pascal￾network.org/challenges/VOC/voc2007/workshop/index.html. -[Online]. Available: http://host.robots.ox.ac.uk: -8080/pascal/VOC/voc2007/index.html -[EVGW+b] ——, “The PASCAL Visual Object Classes Chal￾lenge 2012 (VOC2012) Results,” http://www.pascal￾network.org/challenges/VOC/voc2012/workshop/index.html. -[Online]. Available: http://host.robots.ox.ac.uk: -8080/pascal/VOC/voc2012/index.html -[EVGW+10] M. Everingham, L. Van Gool, C. K. Williams, -J. Winn, and A. Zisserman, “The pascal visual object -classes (voc) challenge,” International journal of -computer vision, vol. 88, no. 2, pp. 303–338, 2010. -[EVGW+12] M. Everingham, L. Van Gool, C. K. I. Williams, -J. Winn, and A. Zisserman, “Visual object -classes challenge 2012 (voc2012),” 2012. [Online]. -Available: http://host.robots.ox.ac.uk:8080/pascal/ -VOC/voc2012/index.html -[Fel] P. F. Felzenszwalb, “Graph based im￾age segmentation.” [Online]. Available: http: -//cs.brown.edu/~pff/segment/ -[FGMR10] P. F. Felzenszwalb, R. B. Girshick, D. McAllester, -and D. Ramanan, “Object detection with discrimina￾tively trained part-based models,” Pattern Analysis -and Machine Intelligence, IEEE Transactions on, -vol. 32, no. 9, pp. 1627–1645, 2010. -[FH04] P. F. Felzenszwalb and D. P. Huttenlocher, -“Efficient graph-based image segmentation,” -International Journal of Computer Vision, -vol. 59, no. 2, pp. 167–181, 2004. [Online]. -Available: http://link.springer.com/article/10.1023/ -B:VISI.0000022288.19776.77 -[FKG13] J. Fritsch, T. Kuehnl, and A. Geiger, “A -new performance measure and evaluation -benchmark for road detection algorithms,” in -International Conference on Intelligent Transporta￾tion Systems (ITSC), 2013. [Online]. Available: -http://www.cvlibs.net/datasets/kitti/eval_road.php -[GBVdW+10] J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D. -Bagdanov, J. Serrat, and J. Gonzalez, “Harmony po￾tentials for joint classification and segmentation,” in -Computer Vision and Pattern Recognition (CVPR), -2010 IEEE Conference on. IEEE, 2010, pp. 3280– -3287. -[GRC+08] S. Gould, J. Rodgers, D. Cohen, G. Elidan, and -D. Koller, “Multi-class segmentation with relative -location prior,” International Journal of Computer -Vision, vol. 80, no. 3, pp. 300–316, Apr. 2008. -[GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.- -Z. Yang, “Probabilistic tracking of affine-invariant -anisotropic regions,” Pattern Analysis and Machine -Intelligence, IEEE Transactions on, vol. 35, no. 1, -pp. 130–143, 2013. -[Har75] J. A. Hartigan, Clustering algorithms. John Wiley -& Sons, Inc., 1975. -[HDT02] C. Huang, L. Davis, and J. Townshend, “An -assessment of support vector machines for land -cover classification,” International Journal of remote -sensing, vol. 23, no. 4, pp. 725–749, 2002. -[HHR01] S. Hu, E. Hoffman, and J. Reinhardt, “Automatic -lung segmentation for accurate quantitation of + +1998. [Online]. Available: http://ieeexplore.ieee.org/ +xpls/abs_all.jsp?arnumber=730379 +[CM02] D. Comaniciu and P. Meer, “Mean shift: A +robust approach toward feature space analysis,” +Pattern Analysis and Machine Intelligence, IEEE +Transactions on, vol. 24, no. 5, pp. 603–619, 2002. +[Online]. Available: http://ieeexplore.ieee.org/xpl/ +login.jsp?tp=&arnumber=1000236 +[COWR11] C. Chen, J. Ozolek, W. Wang, and G. K. Rohde, +“A pixel classification system for segmenting +biomedical images using intensity neighborhoods +and dimension reduction,” in Biomedical Imaging: +From Nano to Macro, 2011 IEEE International +Symposium on. IEEE, 2011, pp. 1649–1652. +[Online]. Available: https://www.andrew.cmu.edu/ +user/gustavor/chen_isbi_11.pdf +[CP08] G. Csurka and F. Perronnin, “A simple high +performance approach to semantic segmentation.” +in BMVC, 2008, pp. 1–10. [Online]. Available: http://www.xrce.xerox.com/layout/set/print/ +content/download/16654/118653/file/2008-023.pdf +[CRSS] A. Cohen, E. Rivlin, I. Shimshoni, and +E. Sabo, “Colon crypt segmentation website.” [Online]. Available: http://mis.haifa.ac.il/~ishimshoni/ +SegmentCrypt/Download.htm +[CRSS14] ——, “Memory based active contour algorithm +using pixel-level classified images for colon crypt +segmentation,” Computerized Medical Imaging +and Graphics, Nov. 2014. [Online]. Available: +http://mis.haifa.ac.il/~ishimshoni/SegmentCrypt/ +Active%20contour%20based%20on%20pixellevel%20classified%20image%20for%20colon% +20crypts%20segmentation.pdf +[CS10] J. Carreira and C. Sminchisescu, “Constrained +parametric min-cuts for automatic object segmentation,” in Computer Vision and Pattern Recognition +(CVPR), 2010 IEEE Conference on. IEEE, 2010, +pp. 3241–3248. +[CS11] ——, “Cpmc: Constrained parametric min-cuts for +automatic object segmentation,” Feb. 2011. [Online]. +Available: http://www.maths.lth.se/matematiklth/ +personal/sminchis/code/cpmc/ +[CSI+09] M. E. Celebi, G. Schaefer, H. Iyatomi, W. V. +Stoecker, J. M. Malters, and J. M. Grichnik, “An +improved objective evaluation measure for border +detection in dermoscopy images,” Skin Research +and Technology, vol. 15, no. 4, pp. 444–450, 2009. +[Online]. Available: http://arxiv.org/abs/1009.1020 +[CSM09] L. P. Coelho, A. Shariff, and R. F. Murphy, “Nuclear +segmentation in microscope cell images: a handsegmented dataset and comparison of algorithms,” +in Biomedical Imaging: From Nano to Macro, +2009. ISBI’09. IEEE International Symposium on. +IEEE, 2009, pp. 518–521. [Online]. Available: +http://murphylab.web.cmu.edu/data +[CXGS12] M. D. Collins, J. Xu, L. Grady, and V. Singh, +“Random walks based multi-image segmentation: +Quasiconvexity results and gpu-based solutions,” +in Computer Vision and Pattern Recognition +(CVPR), 2012 IEEE Conference on. IEEE, +2012, pp. 1656–1663. [Online]. Available: http: +//pages.cs.wisc.edu/~jiaxu/pub/rwcoseg.pdf +[DHS15] J. Dai, K. He, and J. Sun, “Instance-aware semantic segmentation via multi-task network cascades,” +arXiv preprint arXiv:1512.04412, 2015. +[DT05] N. Dalal and B. Triggs, “Histograms of oriented +gradients for human detection,” in Computer +Vision and Pattern Recognition, 2005. CVPR +2005. IEEE Computer Society Conference on, +vol. 1, June 2005, pp. 886–893 vol. 1. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=1467360 +[EVGW+a] M. Everingham, L. Van Gool, C. K. I. +Williams, J. Winn, and A. Zisserman, “The +PASCAL Visual Object Classes Challenge +2007 (VOC2007) Results,” http://www.pascalnetwork.org/challenges/VOC/voc2007/workshop/index.html. +[Online]. Available: http://host.robots.ox.ac.uk: +8080/pascal/VOC/voc2007/index.html +[EVGW+b] ——, “The PASCAL Visual Object Classes Challenge 2012 (VOC2012) Results,” http://www.pascalnetwork.org/challenges/VOC/voc2012/workshop/index.html. +[Online]. Available: http://host.robots.ox.ac.uk: +8080/pascal/VOC/voc2012/index.html +[EVGW+10] M. Everingham, L. Van Gool, C. K. Williams, +J. Winn, and A. Zisserman, “The pascal visual object +classes (voc) challenge,” International journal of +computer vision, vol. 88, no. 2, pp. 303–338, 2010. +[EVGW+12] M. Everingham, L. Van Gool, C. K. I. Williams, +J. Winn, and A. Zisserman, “Visual object +classes challenge 2012 (voc2012),” 2012. [Online]. +Available: http://host.robots.ox.ac.uk:8080/pascal/ +VOC/voc2012/index.html +[Fel] P. F. Felzenszwalb, “Graph based image segmentation.” [Online]. Available: http: +//cs.brown.edu/~pff/segment/ +[FGMR10] P. F. Felzenszwalb, R. B. Girshick, D. McAllester, +and D. Ramanan, “Object detection with discriminatively trained part-based models,” Pattern Analysis +and Machine Intelligence, IEEE Transactions on, +vol. 32, no. 9, pp. 1627–1645, 2010. +[FH04] P. F. Felzenszwalb and D. P. Huttenlocher, +“Efficient graph-based image segmentation,” +International Journal of Computer Vision, +vol. 59, no. 2, pp. 167–181, 2004. [Online]. +Available: http://link.springer.com/article/10.1023/ +B:VISI.0000022288.19776.77 +[FKG13] J. Fritsch, T. Kuehnl, and A. Geiger, “A +new performance measure and evaluation +benchmark for road detection algorithms,” in +International Conference on Intelligent Transportation Systems (ITSC), 2013. [Online]. Available: +http://www.cvlibs.net/datasets/kitti/eval_road.php +[GBVdW+10] J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D. +Bagdanov, J. Serrat, and J. Gonzalez, “Harmony potentials for joint classification and segmentation,” in +Computer Vision and Pattern Recognition (CVPR), +2010 IEEE Conference on. IEEE, 2010, pp. 3280– +3287. +[GRC+08] S. Gould, J. Rodgers, D. Cohen, G. Elidan, and +D. Koller, “Multi-class segmentation with relative +location prior,” International Journal of Computer +Vision, vol. 80, no. 3, pp. 300–316, Apr. 2008. +[GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.Z. + Yang, “Probabilistic tracking of affine-invariant +anisotropic regions,” Pattern Analysis and Machine +Intelligence, IEEE Transactions on, vol. 35, no. 1, +pp. 130–143, 2013. +[Har75] J. A. Hartigan, Clustering algorithms. John Wiley +& Sons, Inc., 1975. +[HDT02] C. Huang, L. Davis, and J. Townshend, “An +assessment of support vector machines for land +cover classification,” International Journal of remote +sensing, vol. 23, no. 4, pp. 725–749, 2002. +[HHR01] S. Hu, E. Hoffman, and J. Reinhardt, “Automatic +lung segmentation for accurate quantitation of volumetric x-ray ct images,” Medical Imaging, IEEE -13 -Transactions on, vol. 20, no. 6, pp. 490–498, Jun. -2001. -[HJBJ+96] A. Hoover, G. Jean-Baptiste, X. Jiang, P. J. -Flynn, H. Bunke, D. B. Goldgof, K. Bowyer, -D. W. Eggert, A. Fitzgibbon, and R. B. -Fisher, “An experimental comparison of range -image segmentation algorithms,” Pattern Analysis -and Machine Intelligence, IEEE Transactions -on, vol. 18, no. 7, pp. 673–689, Jul. 1996. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=506791 -[Ho95] T. K. Ho, “Random decision forests,” in -Document Analysis and Recognition, 1995., -Proceedings of the Third International Conference -on, vol. 1. IEEE, 1995, pp. 278–282. -[Online]. Available: http://ect.bell-labs.com/who/ -tkh/publications/papers/odt.pdf -[Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia -Commons, Nov. 2007. [Online]. Avail￾able: https://commons.wikimedia.org/wiki/File: -CCTV_Lens_flare.jpg -[HZCP04] X. He, R. Zemel, and M. Carreira-Perpindn, -“Multiscale conditional random fields for image -labeling,” in Computer Vision and Pattern -Recognition, 2004. CVPR 2004. Proceedings -of the 2004 IEEE Computer Society Conference -on, vol. 2, Jun. 2004, pp. II–695–II–702 Vol.2. -[Online]. Available: http://ieeexplore.ieee.org/xpl/ -login.jsp?tp=&arnumber=1315232 -[JLD03] K. Jiang, Q.-M. Liao, and S.-Y. Dai, “A novel white -blood cell segmentation scheme using scale-space -filtering and watershed clustering,” in Machine -Learning and Cybernetics, 2003 International -Conference on, vol. 5, Nov 2003, pp. 2820–2825 -Vol.5. [Online]. Available: http://ieeexplore.ieee.org/ -xpl/login.jsp?tp=&arnumber=1260033 -[Kaf07] L. Kaffer, “File:great male leopard in south afrika￾jd.jpg,” Wikipedia Commons, Jul. 2007. [Online]. -Available: https://commons.wikimedia.org/wiki/File: -Great_male_Leopard_in_South_Afrika-JD.JPG -[KKV+14] V. Kalesnykiene, J.-k. Kamarainen, R. Voutilainen, -J. Pietilä, H. Kälviäinen, and H. Uusitalo, -“Diaretdb1 diabetic retinopathy database and -evaluation protocol,” 2014. [Online]. Available: -http://www2.it.lut.fi/project/imageret/diaretdb1/ -[KP92] J. M. Kasson and W. Plouffe, “An analysis of -selected computer interchange color spaces,” ACM -Transactions on Graphics (TOG), vol. 11, no. 4, pp. -373–405, 1992. -[KP06] Z. Kato and T.-C. Pong, “A markov random -field image segmentation model for color -textured images,” Image and Vision Computing, -vol. 24, no. 10, pp. 1103–1114, 2006. [Online]. -Available: http://www.sciencedirect.com/science/ -article/pii/S0262885606001223 -[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, -“Imagenet classification with deep convolutional -neural networks,” in Advances in neural information -processing systems, 2012, pp. 1097–1105. -[KWT88] M. Kass, A. Witkin, and D. Terzopoulos, -“Snakes: Active contour models,” International -journal of computer vision, vol. 1, no. 4, pp. -321–331, Jan. 1988. [Online]. Available: http: -//link.springer.com/article/10.1007/BF00133570 -[LKJ15] F.-F. Li, A. Karpathy, and J. Johnson, -“CS231n: Convolutional neural networks for -visual recognition,” 2015. [Online]. Available: -http://cs231n.stanford.edu/ -[Low04] D. Lowe, “Distinctive image features from scale￾invariant keypoints,” International Journal of -Computer Vision, vol. 60, no. 2, pp. 91–110, 2004. -[Online]. Available: http://dx.doi.org/10.1023/B% -3AVISI.0000029664.99615.94 -[LRAL08] A. Levin, A. Rav-Acha, and D. Lischinski, -“Spectral matting,” Pattern Analysis and -Machine Intelligence, IEEE Transactions on, -vol. 30, no. 10, pp. 1699–1712, 2008. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=4547428 -[LRKT09] L. Ladický, C. Russell, P. Kohli, and P. Torr, -“Associative hierarchical crfs for object class image -segmentation,” in Computer Vision, 2009 IEEE 12th -International Conference on, 2009, pp. 739–746. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=5459248 -[LSD14] J. Long, E. Shelhamer, and T. Darrell, “Fully -convolutional networks for semantic segmentation,” -arXiv preprint arXiv:1411.4038, 2014. [Online]. -Available: http://arxiv.org/abs/1411.4038 -[MAFM08] M. Maire, P. Arbelaez, C. Fowlkes, and -J. Malik, “Using contours to detect and localize -junctions in natural images,” in Computer Vision -and Pattern Recognition, 2008. CVPR 2008. -IEEE Conference on, June 2008, pp. 1–8. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=4587420 -[Man12] M. Manske, “File:randabschattung mikroskop -kamera 6.jpg,” Wikipedia Com￾mons, Dec. 2012. [Online]. Avail￾able: https://commons.wikimedia.org/wiki/File: -Randabschattung_Mikroskop_Kamera_6.JPG -[MBLAGJ+07] S. Maldonado-Bascon, S. Lafuente-Arroyo, P. Gil￾Jimenez, H. Gomez-Moreno, and F. Lopez￾Ferreras, “Road-sign detection and recognition -based on support vector machines,” Intelligent -Transportation Systems, IEEE Transactions on, -vol. 8, no. 2, pp. 264–278, Jun. 2007. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=4220659 -[MBVLG02] N. Moon, E. Bullitt, K. Van Leemput, and G. Gerig, -“Automatic brain and tumor segmentation,” in Med￾ical Image Computing and Computer-Assisted In￾tervention—MICCAI 2002. Springer, 2002, pp. -372–379. -[MFTM01] D. Martin, C. Fowlkes, D. Tal, and J. Malik, -“A database of human segmented natural -images and its application to evaluating -segmentation algorithms and measuring ecological -statistics,” in Computer Vision, 2001. ICCV -2001. Proceedings. Eighth IEEE International -Conference on, vol. 2. IEEE, 2001, pp. 416–423. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=937655 -[MHMK+14] L. Maier-Hein, S. Mersmann, D. Kondermann, -S. Bodenstedt, A. Sanchez, C. Stock, H. G. -Kenngott, M. Eisenmann, and S. Speidel, “Can -masses of non-experts train highly accurate -image classifiers?” in Medical Image Computing -and Computer-Assisted Intervention–MICCAI 2014. -Springer, 2014, pp. 438–445. [Online]. Available: -http://opencas.webarchiv.kit.edu/?q=node/26 -[Min89] J. Mingers, “An empirical comparison of selection -measures for decision-tree induction,” Machine -Learning, vol. 3, no. 4, pp. 319–342, 1989. -[Online]. Available: http://dx.doi.org/10.1023/A% -3A1022645801436 -[MSB12] G. Moser, S. B. Serpico, and J. A. Benediktsson, + +Transactions on, vol. 20, no. 6, pp. 490–498, Jun. +2001. +[HJBJ+96] A. Hoover, G. Jean-Baptiste, X. Jiang, P. J. +Flynn, H. Bunke, D. B. Goldgof, K. Bowyer, +D. W. Eggert, A. Fitzgibbon, and R. B. +Fisher, “An experimental comparison of range +image segmentation algorithms,” Pattern Analysis +and Machine Intelligence, IEEE Transactions +on, vol. 18, no. 7, pp. 673–689, Jul. 1996. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=506791 +[Ho95] T. K. Ho, “Random decision forests,” in +Document Analysis and Recognition, 1995., +Proceedings of the Third International Conference +on, vol. 1. IEEE, 1995, pp. 278–282. +[Online]. Available: http://ect.bell-labs.com/who/ +tkh/publications/papers/odt.pdf +[Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia +Commons, Nov. 2007. [Online]. Available: https://commons.wikimedia.org/wiki/File: +CCTV_Lens_flare.jpg +[HZCP04] X. He, R. Zemel, and M. Carreira-Perpindn, +“Multiscale conditional random fields for image +labeling,” in Computer Vision and Pattern +Recognition, 2004. CVPR 2004. Proceedings +of the 2004 IEEE Computer Society Conference +on, vol. 2, Jun. 2004, pp. II–695–II–702 Vol.2. +[Online]. Available: http://ieeexplore.ieee.org/xpl/ +login.jsp?tp=&arnumber=1315232 +[JLD03] K. Jiang, Q.-M. Liao, and S.-Y. Dai, “A novel white +blood cell segmentation scheme using scale-space +filtering and watershed clustering,” in Machine +Learning and Cybernetics, 2003 International +Conference on, vol. 5, Nov 2003, pp. 2820–2825 +Vol.5. [Online]. Available: http://ieeexplore.ieee.org/ +xpl/login.jsp?tp=&arnumber=1260033 +[Kaf07] L. Kaffer, “File:great male leopard in south afrikajd.jpg,” Wikipedia Commons, Jul. 2007. [Online]. +Available: https://commons.wikimedia.org/wiki/File: +Great_male_Leopard_in_South_Afrika-JD.JPG +[KKV+14] V. Kalesnykiene, J.-k. Kamarainen, R. Voutilainen, +J. Pietilä, H. Kälviäinen, and H. Uusitalo, +“Diaretdb1 diabetic retinopathy database and +evaluation protocol,” 2014. [Online]. Available: +http://www2.it.lut.fi/project/imageret/diaretdb1/ +[KP92] J. M. Kasson and W. Plouffe, “An analysis of +selected computer interchange color spaces,” ACM +Transactions on Graphics (TOG), vol. 11, no. 4, pp. +373–405, 1992. +[KP06] Z. Kato and T.-C. Pong, “A markov random +field image segmentation model for color +textured images,” Image and Vision Computing, +vol. 24, no. 10, pp. 1103–1114, 2006. [Online]. +Available: http://www.sciencedirect.com/science/ +article/pii/S0262885606001223 +[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, +“Imagenet classification with deep convolutional +neural networks,” in Advances in neural information +processing systems, 2012, pp. 1097–1105. +[KWT88] M. Kass, A. Witkin, and D. Terzopoulos, +“Snakes: Active contour models,” International +journal of computer vision, vol. 1, no. 4, pp. +321–331, Jan. 1988. [Online]. Available: http: +//link.springer.com/article/10.1007/BF00133570 +[LKJ15] F.-F. Li, A. Karpathy, and J. Johnson, +“CS231n: Convolutional neural networks for +visual recognition,” 2015. [Online]. Available: +http://cs231n.stanford.edu/ +[Low04] D. Lowe, “Distinctive image features from scaleinvariant keypoints,” International Journal of +Computer Vision, vol. 60, no. 2, pp. 91–110, 2004. +[Online]. Available: http://dx.doi.org/10.1023/B% +3AVISI.0000029664.99615.94 +[LRAL08] A. Levin, A. Rav-Acha, and D. Lischinski, +“Spectral matting,” Pattern Analysis and +Machine Intelligence, IEEE Transactions on, +vol. 30, no. 10, pp. 1699–1712, 2008. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4547428 +[LRKT09] L. Ladický, C. Russell, P. Kohli, and P. Torr, +“Associative hierarchical crfs for object class image +segmentation,” in Computer Vision, 2009 IEEE 12th +International Conference on, 2009, pp. 739–746. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=5459248 +[LSD14] J. Long, E. Shelhamer, and T. Darrell, “Fully +convolutional networks for semantic segmentation,” +arXiv preprint arXiv:1411.4038, 2014. [Online]. +Available: http://arxiv.org/abs/1411.4038 +[MAFM08] M. Maire, P. Arbelaez, C. Fowlkes, and +J. Malik, “Using contours to detect and localize +junctions in natural images,” in Computer Vision +and Pattern Recognition, 2008. CVPR 2008. +IEEE Conference on, June 2008, pp. 1–8. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4587420 +[Man12] M. Manske, “File:randabschattung mikroskop +kamera 6.jpg,” Wikipedia Commons, Dec. 2012. [Online]. Available: https://commons.wikimedia.org/wiki/File: +Randabschattung_Mikroskop_Kamera_6.JPG +[MBLAGJ+07] S. Maldonado-Bascon, S. Lafuente-Arroyo, P. GilJimenez, H. Gomez-Moreno, and F. LopezFerreras, “Road-sign detection and recognition +based on support vector machines,” Intelligent +Transportation Systems, IEEE Transactions on, +vol. 8, no. 2, pp. 264–278, Jun. 2007. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4220659 +[MBVLG02] N. Moon, E. Bullitt, K. Van Leemput, and G. Gerig, +“Automatic brain and tumor segmentation,” in Medical Image Computing and Computer-Assisted Intervention—MICCAI 2002. Springer, 2002, pp. +372–379. +[MFTM01] D. Martin, C. Fowlkes, D. Tal, and J. Malik, +“A database of human segmented natural +images and its application to evaluating +segmentation algorithms and measuring ecological +statistics,” in Computer Vision, 2001. ICCV +2001. Proceedings. Eighth IEEE International +Conference on, vol. 2. IEEE, 2001, pp. 416–423. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=937655 +[MHMK+14] L. Maier-Hein, S. Mersmann, D. Kondermann, +S. Bodenstedt, A. Sanchez, C. Stock, H. G. +Kenngott, M. Eisenmann, and S. Speidel, “Can +masses of non-experts train highly accurate +image classifiers?” in Medical Image Computing +and Computer-Assisted Intervention–MICCAI 2014. +Springer, 2014, pp. 438–445. [Online]. Available: +http://opencas.webarchiv.kit.edu/?q=node/26 +[Min89] J. Mingers, “An empirical comparison of selection +measures for decision-tree induction,” Machine +Learning, vol. 3, no. 4, pp. 319–342, 1989. +[Online]. Available: http://dx.doi.org/10.1023/A% +3A1022645801436 +[MSB12] G. Moser, S. B. Serpico, and J. A. Benediktsson, “Markov random field models for supervised land -14 -cover classification from very high resolution -multispectral remote sensing images,” in Advances -in Radar and Remote Sensing (TyWRRS), 2012 -Tyrrhenian Workshop on. IEEE, 2012, pp. 235– -242. [Online]. Available: http://ieeexplore.ieee.org/ -xpl/login.jsp?tp=&arnumber=6381135 -[MSC] “Object class recognition image database.” -[Online]. Available: http://research.microsoft.com/ -vision/cambridge/recognition/ -[MSR] “Image understanding - research data,” -Microsoft Research. [Online]. Avail￾able: http://research.microsoft.com/en-us/projects/ -objectclassrecognition/ -[Mur12] K. P. Murphy, Machine learning: a probabilistic -perspective. MIT press, 2012. -[OKS78] Y.-i. Ohta, T. Kanade, and T. Sakai, “An analysis -system for scenes containing objects with substruc￾tures,” in Proceedings of the Fourth International -Joint Conference on Pattern Recognitions, 1978, pp. -752–754. -[PAA+87] S. M. Pizer, E. P. Amburn, J. D. Austin, -R. Cromartie, A. Geselowitz, T. Greer, B. ter -Haar Romeny, J. B. Zimmerman, and K. Zuiderveld, -“Adaptive histogram equalization and its variations,” -Computer vision, graphics, and image processing, -vol. 39, no. 3, pp. 355–368, 1987. [Online]. -Available: http://www.sciencedirect.com/science/ -article/pii/S0734189X8780186X -[PC13] P. H. Pinheiro and R. Collobert, “Recurrent -convolutional neural networks for scene parsing,” -arXiv preprint arXiv:1306.2795, 2013. [Online]. -Available: http://arxiv.org/abs/1306.2795v1 -[PH05] C. Pantofaru and M. Hebert, “A -comparison of image segmentation algorithms,” -Robotics Institute, p. 336, 2005. [Online]. -Available: http://riweb-backend.ri.cmu.edu/ -pub_files/pub4/pantofaru_caroline_2005_1/ -pantofaru_caroline_2005_1.pdf -[PS07] A. Protiere and G. Sapiro, “Interactive -image segmentation via adaptive weighted -distances,” Image Processing, IEEE Transactions -on, vol. 16, no. 4, pp. 1046–1057, 2007. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=4130436 -[PTN09] N. Plath, M. Toussaint, and S. Nakajima, “Multi￾class image segmentation using conditional random -fields and global classification,” in Proceedings -of the 26th Annual International Conference on -Machine Learning. ACM, 2009, pp. 817–824. -[PXP00] D. L. Pham, C. Xu, and J. L. Prince, “A -survey of current methods in medical image -segmentation,” Annual Review of Biomedical -Engineering, vol. 2, no. 1, pp. 315–337, 2000, -pMID: 11701515. [Online]. Available: http:// -dx.doi.org/10.1146/annurev.bioeng.2.1.315 -[Qui86] J. R. Quinlan, “Induction of decision trees,” -Machine learning, vol. 1, no. 1, pp. 81–106, -Aug. 1986. [Online]. Available: http://dx.doi.org/ -10.1023/A%3A1022643204877 -[Qui93] ——, C4.5: Programs for Machine Learning, P. Lan￾gley, Ed. Morgan Kaufmann Publishers, Inc., 1993. -[RKB04] C. Rother, V. Kolmogorov, and A. Blake, “Grabcut: -Interactive foreground extraction using iterated -graph cuts,” ACM Transactions on Graphics -(TOG), vol. 23, no. 3, pp. 309–314, 2004. [Online]. -Available: http://delivery.acm.org/10.1145/1020000/ -1015720/p309-rother.pdf -[RM00] J. B. Roerdink and A. Meijster, “The watershed -transform: Definitions, algorithms and paralleliza￾tion strategies,” Fundam. Inform., vol. 41, no. 1-2, -pp. 187–228, 2000. -[RM07] J. Reynolds and K. Murphy, “Figure-ground -segmentation using a hierarchical conditional -random field,” in Computer and Robot -Vision, 2007. CRV ’07. Fourth Canadian -Conference on, May 2007, pp. 175–182. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=4228537 -[RMBK06] C. Rother, T. Minka, A. Blake, and V. Kolmogorov, -“Cosegmentation of image pairs by histogram -matching - incorporating a global constraint -into mrfs,” in Computer Vision and Pattern -Recognition, 2006 IEEE Computer Society -Conference on, vol. 1, June 2006, pp. 993– -1000. [Online]. Available: http://ieeexplore.ieee.org/ -xpls/abs_all.jsp?arnumber=1640859 -[SAN+04] J. Staal, M. D. Abràmoff, M. Niemeijer, -M. Viergever, B. Van Ginneken et al., “Ridge-based -vessel segmentation in color images of the retina,” -Medical Imaging, IEEE Transactions on, vol. 23, -no. 4, pp. 501–509, 2004. [Online]. Available: -http://www.isi.uu.nl/Research/Databases/DRIVE/ -[SCZ08] F. Schroff, A. Criminisi, and A. Zisserman, -“Object class segmentation using random -forests.” in BMVC, 2008, pp. 1–10. [On￾line]. Available: http://research.microsoft.com/pubs/ -72423/Criminisi_bmvc2008.pdf -[SJC08] J. Shotton, M. Johnson, and R. Cipolla, -“Semantic texton forests for image categorization -and segmentation,” in Computer vision and -pattern recognition, 2008. CVPR 2008. IEEE -Conference on. IEEE, Jun. 2008, pp. 1–8. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=4587503 -[SM11] C. Sutton and A. McCallum, “An introduction -to conditional random fields,” Machine Learning, -vol. 4, no. 4, pp. 267–373, 2011. [Online]. -Available: http://homepages.inf.ed.ac.uk/csutton/ -publications/crftutv2.pdf -[Smi02] L. I. Smith, “A tutorial on principal components -analysis,” Cornell University, USA, vol. 51, p. 52, -2002. -[Smi04] B. T. Smith, “Lagrange multipliers tutorial in the -context of support vector machines,” Memorial Uni￾versity of Newfoundland St. John’s, Newfoundland, -Canada, Jun. 2004. -[SSA12] D. Schiebener, J. Schill, and T. Asfour, “Discovery, -segmentation and reactive grasping of unknown -objects.” in Humanoids, 2012, pp. 71–77. [On￾line]. Available: http://h2t.anthropomatik.kit.edu/ -pdf/Schiebener2012.pdf -[SUM+11] D. Schiebener, A. Ude, J. Morimotot, -T. Asfour, and R. Dillmann, “Segmentation -and learning of unknown objects through physical -interaction,” in Humanoid Robots (Humanoids), -2011 11th IEEE-RAS International Conference -on. IEEE, 2011, pp. 500–506. [Online]. -Available: http://ieeexplore.ieee.org/ielx5/6086637/ -6100798/06100843.pdf -[SWRC06] J. Shotton, J. Winn, C. Rother, and A. Criminisi, -“Textonboost: Joint appearance, shape and context -modeling for multi-class object recognition and -segmentation,” in Computer Vision–ECCV 2006. -Springer, 2006, pp. 1–15. [Online]. Available: http: -//link.springer.com/chapter/10.1007/11744023_1 -[TNL14] J. Tighe, M. Niethammer, and S. Lazebnik, -“Scene parsing with object instances and + +cover classification from very high resolution +multispectral remote sensing images,” in Advances +in Radar and Remote Sensing (TyWRRS), 2012 +Tyrrhenian Workshop on. IEEE, 2012, pp. 235– +242. [Online]. Available: http://ieeexplore.ieee.org/ +xpl/login.jsp?tp=&arnumber=6381135 +[MSC] “Object class recognition image database.” +[Online]. Available: http://research.microsoft.com/ +vision/cambridge/recognition/ +[MSR] “Image understanding - research data,” +Microsoft Research. [Online]. Available: http://research.microsoft.com/en-us/projects/ +objectclassrecognition/ +[Mur12] K. P. Murphy, Machine learning: a probabilistic +perspective. MIT press, 2012. +[OKS78] Y.-i. Ohta, T. Kanade, and T. Sakai, “An analysis +system for scenes containing objects with substructures,” in Proceedings of the Fourth International +Joint Conference on Pattern Recognitions, 1978, pp. +752–754. +[PAA+87] S. M. Pizer, E. P. Amburn, J. D. Austin, +R. Cromartie, A. Geselowitz, T. Greer, B. ter +Haar Romeny, J. B. Zimmerman, and K. Zuiderveld, +“Adaptive histogram equalization and its variations,” +Computer vision, graphics, and image processing, +vol. 39, no. 3, pp. 355–368, 1987. [Online]. +Available: http://www.sciencedirect.com/science/ +article/pii/S0734189X8780186X +[PC13] P. H. Pinheiro and R. Collobert, “Recurrent +convolutional neural networks for scene parsing,” +arXiv preprint arXiv:1306.2795, 2013. [Online]. +Available: http://arxiv.org/abs/1306.2795v1 +[PH05] C. Pantofaru and M. Hebert, “A +comparison of image segmentation algorithms,” +Robotics Institute, p. 336, 2005. [Online]. +Available: http://riweb-backend.ri.cmu.edu/ +pub_files/pub4/pantofaru_caroline_2005_1/ +pantofaru_caroline_2005_1.pdf +[PS07] A. Protiere and G. Sapiro, “Interactive +image segmentation via adaptive weighted +distances,” Image Processing, IEEE Transactions +on, vol. 16, no. 4, pp. 1046–1057, 2007. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4130436 +[PTN09] N. Plath, M. Toussaint, and S. Nakajima, “Multiclass image segmentation using conditional random +fields and global classification,” in Proceedings +of the 26th Annual International Conference on +Machine Learning. ACM, 2009, pp. 817–824. +[PXP00] D. L. Pham, C. Xu, and J. L. Prince, “A +survey of current methods in medical image +segmentation,” Annual Review of Biomedical +Engineering, vol. 2, no. 1, pp. 315–337, 2000, +pMID: 11701515. [Online]. Available: http:// +dx.doi.org/10.1146/annurev.bioeng.2.1.315 +[Qui86] J. R. Quinlan, “Induction of decision trees,” +Machine learning, vol. 1, no. 1, pp. 81–106, +Aug. 1986. [Online]. Available: http://dx.doi.org/ +10.1023/A%3A1022643204877 +[Qui93] ——, C4.5: Programs for Machine Learning, P. Langley, Ed. Morgan Kaufmann Publishers, Inc., 1993. +[RKB04] C. Rother, V. Kolmogorov, and A. Blake, “Grabcut: +Interactive foreground extraction using iterated +graph cuts,” ACM Transactions on Graphics +(TOG), vol. 23, no. 3, pp. 309–314, 2004. [Online]. +Available: http://delivery.acm.org/10.1145/1020000/ +1015720/p309-rother.pdf +[RM00] J. B. Roerdink and A. Meijster, “The watershed +transform: Definitions, algorithms and parallelization strategies,” Fundam. Inform., vol. 41, no. 1-2, +pp. 187–228, 2000. +[RM07] J. Reynolds and K. Murphy, “Figure-ground +segmentation using a hierarchical conditional +random field,” in Computer and Robot +Vision, 2007. CRV ’07. Fourth Canadian +Conference on, May 2007, pp. 175–182. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4228537 +[RMBK06] C. Rother, T. Minka, A. Blake, and V. Kolmogorov, +“Cosegmentation of image pairs by histogram +matching - incorporating a global constraint +into mrfs,” in Computer Vision and Pattern +Recognition, 2006 IEEE Computer Society +Conference on, vol. 1, June 2006, pp. 993– +1000. [Online]. Available: http://ieeexplore.ieee.org/ +xpls/abs_all.jsp?arnumber=1640859 +[SAN+04] J. Staal, M. D. Abràmoff, M. Niemeijer, +M. Viergever, B. Van Ginneken et al., “Ridge-based +vessel segmentation in color images of the retina,” +Medical Imaging, IEEE Transactions on, vol. 23, +no. 4, pp. 501–509, 2004. [Online]. Available: +http://www.isi.uu.nl/Research/Databases/DRIVE/ +[SCZ08] F. Schroff, A. Criminisi, and A. Zisserman, +“Object class segmentation using random +forests.” in BMVC, 2008, pp. 1–10. [Online]. Available: http://research.microsoft.com/pubs/ +72423/Criminisi_bmvc2008.pdf +[SJC08] J. Shotton, M. Johnson, and R. Cipolla, +“Semantic texton forests for image categorization +and segmentation,” in Computer vision and +pattern recognition, 2008. CVPR 2008. IEEE +Conference on. IEEE, Jun. 2008, pp. 1–8. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4587503 +[SM11] C. Sutton and A. McCallum, “An introduction +to conditional random fields,” Machine Learning, +vol. 4, no. 4, pp. 267–373, 2011. [Online]. +Available: http://homepages.inf.ed.ac.uk/csutton/ +publications/crftutv2.pdf +[Smi02] L. I. Smith, “A tutorial on principal components +analysis,” Cornell University, USA, vol. 51, p. 52, +2002. +[Smi04] B. T. Smith, “Lagrange multipliers tutorial in the +context of support vector machines,” Memorial University of Newfoundland St. John’s, Newfoundland, +Canada, Jun. 2004. +[SSA12] D. Schiebener, J. Schill, and T. Asfour, “Discovery, +segmentation and reactive grasping of unknown +objects.” in Humanoids, 2012, pp. 71–77. [Online]. Available: http://h2t.anthropomatik.kit.edu/ +pdf/Schiebener2012.pdf +[SUM+11] D. Schiebener, A. Ude, J. Morimotot, +T. Asfour, and R. Dillmann, “Segmentation +and learning of unknown objects through physical +interaction,” in Humanoid Robots (Humanoids), +2011 11th IEEE-RAS International Conference +on. IEEE, 2011, pp. 500–506. [Online]. +Available: http://ieeexplore.ieee.org/ielx5/6086637/ +6100798/06100843.pdf +[SWRC06] J. Shotton, J. Winn, C. Rother, and A. Criminisi, +“Textonboost: Joint appearance, shape and context +modeling for multi-class object recognition and +segmentation,” in Computer Vision–ECCV 2006. +Springer, 2006, pp. 1–15. [Online]. Available: http: +//link.springer.com/chapter/10.1007/11744023_1 +[TNL14] J. Tighe, M. Niethammer, and S. Lazebnik, +“Scene parsing with object instances and occlusion ordering,” in Computer Vision and -15 -Pattern Recognition (CVPR), 2014 IEEE -Conference on. IEEE, 2014, pp. 3748–3755. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=6909874 -[UPH05] R. Unnikrishnan, C. Pantofaru, and M. Hebert, -“A measure for objective evaluation of -image segmentation algorithms,” in Computer -Vision and Pattern Recognition-Workshops, 2005. -CVPR Workshops. IEEE Computer Society -Conference on. IEEE, 2005, pp. 34–34. -[Online]. Available: http://repository.cmu.edu/cgi/ -viewcontent.cgi?article=1365&context=robotics -[vdMPvdH09] L. J. van der Maaten, E. O. Postma, and H. J. -van den Herik, “Dimensionality reduction: A com￾parative review,” Journal of Machine Learning -Research, vol. 10, no. 1-41, pp. 66–71, 2009. -[VOC10] “Voc2010 preliminary results,” 2010. [Online]. -Available: http://host.robots.ox.ac.uk/pascal/VOC/ -voc2010/results/index.html -[WAH97] G.-Q. Wei, K. Arbter, and G. Hirzinger, “Automatic -tracking of laparoscopic instruments by color -coding,” in CVRMed-MRCAS’97, ser. Lecture -Notes in Computer Science, J. Troccaz, E. Grimson, -and R. Mösges, Eds. Springer Berlin Heidelberg, -1997, vol. 1205, pp. 357–366. [Online]. Available: -http://dx.doi.org/10.1007/BFb0029257 -[YBCK10] Z. Yin, R. Bise, M. Chen, and T. Kanade, “Cell -segmentation in microscopy imagery using a -bag of local bayesian classifiers,” in Biomedical -Imaging: From Nano to Macro, 2010 IEEE -International Symposium on, Apr. 2010, pp. 125– -128. [Online]. Available: http://ieeexplore.ieee.org/ -xpls/abs_all.jsp?arnumber=5490399 -[YHRF12] Y. Yang, S. Hallman, D. Ramanan, and -C. C. Fowlkes, “Layered object models for -image segmentation,” Pattern Analysis and -Machine Intelligence, IEEE Transactions on, -vol. 34, no. 9, pp. 1731–1743, Sep. 2012. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=6042883 -[ZBS01] Y. Zhang, M. Brady, and S. Smith, “Segmentation -of brain MR images through a hidden Markov -random field model and the expectation￾maximization algorithm,” Medical Imaging, IEEE -Transactions on, vol. 20, no. 1, pp. 45–57, 2001. -[Online]. Available: http://ieeexplore.ieee.org/xpls/ -abs_all.jsp?arnumber=906424 -[ZGWX05] S.-C. Zhu, C.-E. Guo, Y. Wang, and Z. Xu, “What -are textons?” International Journal of Computer -Vision, vol. 62, no. 1-2, pp. 121–143, 2005. -[Zha12] Z. Zhang, “Microsoft kinect sensor and its effect,” -MultiMedia, IEEE, vol. 19, no. 2, pp. 4–10, Feb. -2012. -[ZJRP+15] S. Zheng, S. Jayasumana, B. Romera-Paredes, -V. Vineet, Z. Su, D. Du, C. Huang, and -P. H. Torr, “Conditional random fields as -recurrent neural networks,” in Proceedings -of the IEEE International Conference on -Computer Vision, 2015, pp. 1529–1537. [Online]. -Available: http://www.robots.ox.ac.uk/~szheng/ -papers/CRFasRNN.pdf -GLOSSARY -ACM active contour model. 6 -BOV bag-of-visual-words. 5 -CNN Convolution Neuronal Network. 5, 9 -CRF Conditional Random Field. 4, 8, 9, 11 -GPU graphics processing unit. 3 -HOG histogram of oriented gradients. 5, 6, 8 -ILSVRC ImageNet Large-Scale Visual Recognition -Challenge. 9 -MAP Maximum A Posteriori. 8 -MR magnetic resonance. 2, 6 -MRF Markov Random Field. 4, 8 -PCA principal component analysis. 5 -RBF radial basis function. 8 -SIFT scale-invariant feature transform. 5 + +Pattern Recognition (CVPR), 2014 IEEE +Conference on. IEEE, 2014, pp. 3748–3755. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=6909874 +[UPH05] R. Unnikrishnan, C. Pantofaru, and M. Hebert, +“A measure for objective evaluation of +image segmentation algorithms,” in Computer +Vision and Pattern Recognition-Workshops, 2005. +CVPR Workshops. IEEE Computer Society +Conference on. IEEE, 2005, pp. 34–34. +[Online]. Available: http://repository.cmu.edu/cgi/ +viewcontent.cgi?article=1365&context=robotics +[vdMPvdH09] L. J. van der Maaten, E. O. Postma, and H. J. +van den Herik, “Dimensionality reduction: A comparative review,” Journal of Machine Learning +Research, vol. 10, no. 1-41, pp. 66–71, 2009. +[VOC10] “Voc2010 preliminary results,” 2010. [Online]. +Available: http://host.robots.ox.ac.uk/pascal/VOC/ +voc2010/results/index.html +[WAH97] G.-Q. Wei, K. Arbter, and G. Hirzinger, “Automatic +tracking of laparoscopic instruments by color +coding,” in CVRMed-MRCAS’97, ser. Lecture +Notes in Computer Science, J. Troccaz, E. Grimson, +and R. Mösges, Eds. Springer Berlin Heidelberg, +1997, vol. 1205, pp. 357–366. [Online]. Available: +http://dx.doi.org/10.1007/BFb0029257 +[YBCK10] Z. Yin, R. Bise, M. Chen, and T. Kanade, “Cell +segmentation in microscopy imagery using a +bag of local bayesian classifiers,” in Biomedical +Imaging: From Nano to Macro, 2010 IEEE +International Symposium on, Apr. 2010, pp. 125– +128. [Online]. Available: http://ieeexplore.ieee.org/ +xpls/abs_all.jsp?arnumber=5490399 +[YHRF12] Y. Yang, S. Hallman, D. Ramanan, and +C. C. Fowlkes, “Layered object models for +image segmentation,” Pattern Analysis and +Machine Intelligence, IEEE Transactions on, +vol. 34, no. 9, pp. 1731–1743, Sep. 2012. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=6042883 +[ZBS01] Y. Zhang, M. Brady, and S. Smith, “Segmentation +of brain MR images through a hidden Markov +random field model and the expectationmaximization algorithm,” Medical Imaging, IEEE +Transactions on, vol. 20, no. 1, pp. 45–57, 2001. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=906424 +[ZGWX05] S.-C. Zhu, C.-E. Guo, Y. Wang, and Z. Xu, “What +are textons?” International Journal of Computer +Vision, vol. 62, no. 1-2, pp. 121–143, 2005. +[Zha12] Z. Zhang, “Microsoft kinect sensor and its effect,” +MultiMedia, IEEE, vol. 19, no. 2, pp. 4–10, Feb. +2012. +[ZJRP+15] S. Zheng, S. Jayasumana, B. Romera-Paredes, +V. Vineet, Z. Su, D. Du, C. Huang, and +P. H. Torr, “Conditional random fields as +recurrent neural networks,” in Proceedings +of the IEEE International Conference on +Computer Vision, 2015, pp. 1529–1537. [Online]. +Available: http://www.robots.ox.ac.uk/~szheng/ +papers/CRFasRNN.pdf +GLOSSARY +ACM active contour model. 6 +BOV bag-of-visual-words. 5 +CNN Convolution Neuronal Network. 5, 9 +CRF Conditional Random Field. 4, 8, 9, 11 +GPU graphics processing unit. 3 +HOG histogram of oriented gradients. 5, 6, 8 +ILSVRC ImageNet Large-Scale Visual Recognition +Challenge. 9 +MAP Maximum A Posteriori. 8 +MR magnetic resonance. 2, 6 +MRF Markov Random Field. 4, 8 +PCA principal component analysis. 5 +RBF radial basis function. 8 +SIFT scale-invariant feature transform. 5 SVM Support Vector Machine. 4, 6–8 -16 -APPENDIX A -TABLES -Database Image Resolution (width × height) -Number -of -Images -Number -of -Classes -Channels Data source -Colon Crypt DB (302 px − 1116 px) × (349 px − 875 px) 389 2 3 [CRSS] -DIARETDB1 1500 px × 1500 px 89 4 3 [KKV+14] -KITTI Road (1226 px − 1242 px) × (370 px − 376 px) 289 2 3 [FKG13] -MSRCv1 (213 px − 320 px) × (213 px − 320 px) 240 9 3 [MSR] -MSRCv2 (213 px − 320 px) × (162 px − 320 px) 591 23 3 [MSR] -Open-CAS Endoscopic Datasets 640 px × 480 px 120 2 3 [MHMK+14] -PASCAL VOC 2012 (142 px − 500 px) × ( 71 px − 500 px) 2913 20 3 [EVGW+12] -Warwick-QU (567 px − 775 px) × (430 px − 522 px) 165 5 3 [CSM09] -Table I: An overview over publicly available image databases with a semantic segmentation ground trouth. + +APPENDIX A +TABLES +Database Image Resolution (width × height) +Number +of +Images +Number +of +Classes +Channels Data source +Colon Crypt DB (302 px − 1116 px) × (349 px − 875 px) 389 2 3 [CRSS] +DIARETDB1 1500 px × 1500 px 89 4 3 [KKV+14] +KITTI Road (1226 px − 1242 px) × (370 px − 376 px) 289 2 3 [FKG13] +MSRCv1 (213 px − 320 px) × (213 px − 320 px) 240 9 3 [MSR] +MSRCv2 (213 px − 320 px) × (162 px − 320 px) 591 23 3 [MSR] +Open-CAS Endoscopic Datasets 640 px × 480 px 120 2 3 [MHMK+14] +PASCAL VOC 2012 (142 px − 500 px) × ( 71 px − 500 px) 2913 20 3 [EVGW+12] +Warwick-QU (567 px − 775 px) × (430 px − 522 px) 165 5 3 [CSM09] +Table I: An overview over publicly available image databases with a semantic segmentation ground trouth. \ No newline at end of file diff --git a/read/results/pdfium/1707.09725.txt b/read/results/pdfium/1707.09725.txt index f499152..6c84dcd 100644 --- a/read/results/pdfium/1707.09725.txt +++ b/read/results/pdfium/1707.09725.txt @@ -1,4193 +1,4149 @@ -Analysis and Optimization of -Convolutional Neural Network -Architectures -Master Thesis of -Martin Thoma -Department of Computer Science -Institute for Anthropomatics -and -FZI Research Center for Information Technology -Reviewer: Prof. Dr.–Ing. R. Dillmann -Second reviewer: Prof. Dr.–Ing. J. M. Zöllner -Advisor: Dipl.–Inform. Michael Weber -Research Period: 03. May 2017 – 03. August 2017 -KIT – University of the State of Baden-Wuerttemberg and National Research Center of the Helmholtz Association www.kit.edu +Analysis and Optimization of +Convolutional Neural Network +Architectures +Master Thesis of +Martin Thoma +Department of Computer Science +Institute for Anthropomatics +and +FZI Research Center for Information Technology +Reviewer: Prof. Dr.–Ing. R. Dillmann +Second reviewer: Prof. Dr.–Ing. J. M. Zöllner +Advisor: Dipl.–Inform. Michael Weber +Research Period: 03. May 2017 – 03. August 2017 +KIT – University of the State of Baden-Wuerttemberg and National Research Center of the Helmholtz Association www.kit.edu arXiv:1707.09725v1 [cs.CV] 31 Jul 2017 -Analysis and Optimization of Convolutional Neural -Network Architectures -by -Martin Thoma -Master Thesis +Analysis and Optimization of Convolutional Neural +Network Architectures +by +Martin Thoma +Master Thesis August 2017 -Master Thesis, FZI -Department of Computer Science, 2017 -Gutachter: Prof. Dr.–Ing. R. Dillmann, Prof. Dr.–Ing. J. M. Zöllner -Abteilung Technisch Kognitive Assistenzsysteme +Master Thesis, FZI +Department of Computer Science, 2017 +Gutachter: Prof. Dr.–Ing. R. Dillmann, Prof. Dr.–Ing. J. M. Zöllner +Abteilung Technisch Kognitive Assistenzsysteme FZI Research Center for Information Technology -Affirmation -Ich versichere wahrheitsgemäß, die Arbeit selbstständig angefertigt, alle benutzten Hilfs￾mittel vollständig und genau angegeben und alles kenntlich gemacht zu haben, was aus -Arbeiten anderer unverändert oder mit Abänderungen entnommen wurde. -Karlsruhe, Martin Thoma -August 2017 -v - -Abstract -Convolutional Neural Networks (CNNs) dominate various computer vision tasks since -Alex Krizhevsky showed that they can be trained effectively and reduced the top-5 error -from 26.2 % to 15.3 % on the ImageNet large scale visual recognition challenge. Many -aspects of CNNs are examined in various publications, but literature about the analysis -and construction of neural network architectures is rare. This work is one step to close this -gap. A comprehensive overview over existing techniques for CNN analysis and topology -construction is provided. A novel way to visualize classification errors with confusion -matrices was developed. Based on this method, hierarchical classifiers are described and -evaluated. Additionally, some results are confirmed and quantified for CIFAR-100. For -example, the positive impact of smaller batch sizes, averaging ensembles, data augmentation -and test-time transformations on the accuracy. Other results, such as the positive impact of -learned color transformation on the test accuracy could not be confirmed. A model which -has only one million learned parameters for an input size of 32 × 32 × 3 and 100 classes and -which beats the state of the art on the benchmark dataset Asirra, GTSRB, HASYv2 and -STL-10 was developed. -vii -Zusammenfassung -Modelle welche auf Convolutional Neural Networks (CNNs) basieren sind in verschiedenen -Aufgaben der Computer Vision dominant seit Alex Krizhevsky gezeigt hat dass diese -effektiv trainiert werden können und er den Top-5 Fehler in dem ImageNet large scale visual -recognition challenge Benchmark von 26.2 % auf 15.3 % drücken konnte. Viele Aspekte -von CNNs wurden in verschiedenen Publikationen untersucht, aber es wurden vergleich￾sweise wenige Arbeiten über die Analyse und die Konstruktion von Neuronalen Netzen -geschrieben. Diese Masterarbeit stellt einen Schritt dar um diese Lücke zu schließen. Eine -umfassende Überblick über Analyseverfahren und Topologielernverfahren wird gegeben. Ein -neues Verfahren zur Visualisierung der Klassifikationsfehler mit Konfusionsmatrizen wurde -entwickelt. Basierend auf diesem Verfahren wurden hierarchische Klassifizierer eingeführt -und evaluiert. Zusätzlich wurden einige bereits in der Literatur beschriebene Beobachtun￾gen wie z.B. der positive Einfluss von kleinen Batch-Größen, Ensembles, Erhöhung der -Trainingsdatenmenge durch künstliche Transformationen (Data Augmentation) und die In￾varianzbildung durch künstliche Transformationen zur Test-Zeit (Test-time transformations) -experimentell bestätigt. Andere Beobachtungen, wie beispielsweise der positive Einfluss -gelernter Farbraumtransformationen konnten nicht bestätigt werden. Ein Modell welches -weniger als eine Millionen Parameter nutzt und auf den Benchmark-Datensätzen Asirra, +Affirmation +Ich versichere wahrheitsgemäß, die Arbeit selbstständig angefertigt, alle benutzten Hilfsmittel vollständig und genau angegeben und alles kenntlich gemacht zu haben, was aus +Arbeiten anderer unverändert oder mit Abänderungen entnommen wurde. +Karlsruhe, Martin Thoma +August 2017 + + +Abstract +Convolutional Neural Networks (CNNs) dominate various computer vision tasks since +Alex Krizhevsky showed that they can be trained effectively and reduced the top-5 error +from 26.2 % to 15.3 % on the ImageNet large scale visual recognition challenge. Many +aspects of CNNs are examined in various publications, but literature about the analysis +and construction of neural network architectures is rare. This work is one step to close this +gap. A comprehensive overview over existing techniques for CNN analysis and topology +construction is provided. A novel way to visualize classification errors with confusion +matrices was developed. Based on this method, hierarchical classifiers are described and +evaluated. Additionally, some results are confirmed and quantified for CIFAR-100. For +example, the positive impact of smaller batch sizes, averaging ensembles, data augmentation +and test-time transformations on the accuracy. Other results, such as the positive impact of +learned color transformation on the test accuracy could not be confirmed. A model which +has only one million learned parameters for an input size of 32 × 32 × 3 and 100 classes and +which beats the state of the art on the benchmark dataset Asirra, GTSRB, HASYv2 and +STL-10 was developed. + +Zusammenfassung +Modelle welche auf Convolutional Neural Networks (CNNs) basieren sind in verschiedenen +Aufgaben der Computer Vision dominant seit Alex Krizhevsky gezeigt hat dass diese +effektiv trainiert werden können und er den Top-5 Fehler in dem ImageNet large scale visual +recognition challenge Benchmark von 26.2 % auf 15.3 % drücken konnte. Viele Aspekte +von CNNs wurden in verschiedenen Publikationen untersucht, aber es wurden vergleichsweise wenige Arbeiten über die Analyse und die Konstruktion von Neuronalen Netzen +geschrieben. Diese Masterarbeit stellt einen Schritt dar um diese Lücke zu schließen. Eine +umfassende Überblick über Analyseverfahren und Topologielernverfahren wird gegeben. Ein +neues Verfahren zur Visualisierung der Klassifikationsfehler mit Konfusionsmatrizen wurde +entwickelt. Basierend auf diesem Verfahren wurden hierarchische Klassifizierer eingeführt +und evaluiert. Zusätzlich wurden einige bereits in der Literatur beschriebene Beobachtungen wie z.B. der positive Einfluss von kleinen Batch-Größen, Ensembles, Erhöhung der +Trainingsdatenmenge durch künstliche Transformationen (Data Augmentation) und die Invarianzbildung durch künstliche Transformationen zur Test-Zeit (Test-time transformations) +experimentell bestätigt. Andere Beobachtungen, wie beispielsweise der positive Einfluss +gelernter Farbraumtransformationen konnten nicht bestätigt werden. Ein Modell welches +weniger als eine Millionen Parameter nutzt und auf den Benchmark-Datensätzen Asirra, GTSRB, HASYv2 und STL-10 den Stand der Technik neu definiert wurde entwickelt. -Acknowledgment -I would like to thank Stephan Gocht and Marvin Teichmann for the many inspiring -conversations we had about various topics, including machine learning. -I also want to thank my father for the support he gave me. He made it possible for me to -study without having to worry about anything besides my studies. Thank you! -Finally, I want to thank Timothy Gebhard, Daniel Schütz and Yang Zhang for proof-reading -my masters thesis and Stephan Gocht for giving me access to a GTX 1070. -ix -This work can be cited the following way: -@MastersThesis{Thoma:2017, -Title = {Analysis and Optimization of Convolutional Neural Network -Architectures}, -Author = {Martin Thoma}, -School = {Karlsruhe Institute of Technology}, -Year = {2017}, -Address = {Karlsruhe, Germany}, -Month = jun, -Type = {Masters’s Thesis}, -Keywords = {machine learning; artificial neural networks; -classification; supervised learning; CNNs}, -Url = {https://martin-thoma.com/msthesis/} -} -A DVD with a digital version of this master thesis and the source code as well as the used +Acknowledgment +I would like to thank Stephan Gocht and Marvin Teichmann for the many inspiring +conversations we had about various topics, including machine learning. +I also want to thank my father for the support he gave me. He made it possible for me to +study without having to worry about anything besides my studies. Thank you! +Finally, I want to thank Timothy Gebhard, Daniel Schütz and Yang Zhang for proof-reading +my masters thesis and Stephan Gocht for giving me access to a GTX 1070. + +This work can be cited the following way: +@MastersThesis{Thoma:2017, +Title = {Analysis and Optimization of Convolutional Neural Network +Architectures}, +Author = {Martin Thoma}, +School = {Karlsruhe Institute of Technology}, +Year = {2017}, +Address = {Karlsruhe, Germany}, +Month = jun, +Type = {Masters’s Thesis}, +Keywords = {machine learning; artificial neural networks; +classification; supervised learning; CNNs}, +Url = {https://martin-thoma.com/msthesis/} +} +A DVD with a digital version of this master thesis and the source code as well as the used data is part of this work. -Contents -1 Introduction 1 -2 Convolutional Neural Networks 3 -2.1 Linear Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3 -2.2 CNN Layer Types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4 -2.2.1 Convolutional Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . 5 -2.2.2 Pooling Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 7 -2.2.3 Dropout . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9 -2.2.4 Normalization Layers . . . . . . . . . . . . . . . . . . . . . . . . . . 9 -2.3 CNN Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 -2.3.1 Residual Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 -2.3.2 Aggregation Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 -2.3.3 Dense Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13 -2.4 Transition Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14 -2.5 Analysis Techniques . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 15 -2.5.1 Qualitative Analysis by Example . . . . . . . . . . . . . . . . . . . . 15 -2.5.2 Confusion Matrices . . . . . . . . . . . . . . . . . . . . . . . . . . . 16 -2.5.3 Validation Curves: Accuracy, loss and other metrics . . . . . . . . . 16 -2.5.4 Learning Curves . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20 -2.5.5 Input-feature based model explanations . . . . . . . . . . . . . . . . 21 -2.5.6 Argmax Method . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22 -2.5.7 Feature Map Reconstructions . . . . . . . . . . . . . . . . . . . . . . 22 -2.5.8 Filter comparison . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 23 -2.5.9 Weight update tracking . . . . . . . . . . . . . . . . . . . . . . . . . 23 -2.6 Accuracy boosting techniques . . . . . . . . . . . . . . . . . . . . . . . . . . 24 -3 Topology Learning 27 -3.1 Growing approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 27 -3.1.1 Cascade-Correlation . . . . . . . . . . . . . . . . . . . . . . . . . . . 27 -3.1.2 Meiosis Networks . . . . . . . . . . . . . . . . . . . . . . . . . . . . 28 -3.1.3 Automatic Structure Optimization . . . . . . . . . . . . . . . . . . . . 29 -3.2 Pruning approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 29 -3.3 Genetic approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30 -3.4 Reinforcement Learning . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30 -xi -3.5 Convolutional Neural Fabrics . . . . . . . . . . . . . . . . . . . . . . . . . . 31 -4 Hierarchical Classification 33 -4.1 Advantages of classifier hierarchies . . . . . . . . . . . . . . . . . . . . . . 34 -4.2 Clustering classes . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34 -5 Experimental Evaluation 37 -5.1 Baseline Model and Training setup . . . . . . . . . . . . . . . . . . . . . . . 38 -5.1.1 Baseline Evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . 40 -5.1.2 Weight distribution . . . . . . . . . . . . . . . . . . . . . . . . . . . . 41 -5.1.3 Training behavior . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 45 -5.2 Confusion Matrix Ordering . . . . . . . . . . . . . . . . . . . . . . . . . . . . 48 -5.3 Spectral Clustering vs CMO . . . . . . . . . . . . . . . . . . . . . . . . . . . 51 -5.4 Hierarchy of Classifiers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 53 -5.5 Increased width for faster learning . . . . . . . . . . . . . . . . . . . . . . . 54 -5.6 Weight updates . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 55 -5.7 Multiple narrow layers vs One wide layer . . . . . . . . . . . . . . . . . . . . 56 -5.8 Batch Normalization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 57 -5.9 Batch size . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59 -5.10 Bias . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59 -5.11 Learned Color Space Transformation . . . . . . . . . . . . . . . . . . . . . . 60 -5.12 Pooling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 60 -5.13 Activation Functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 60 -5.14 Label smoothing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 64 -5.15 Optimized Classifier . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66 -5.16 Early Stopping vs More Data . . . . . . . . . . . . . . . . . . . . . . . . . . 68 -5.17 Regularization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 68 -6 Conclusion and Outlook 71 -A Figures, Tables and Algorithms 75 -B Hyperparameters 79 -B.1 Preprocessing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 79 -B.2 Data augmentation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 80 -B.3 Initialization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 81 -B.4 Objective function . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 81 -B.5 Optimization Techniques . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 82 -B.6 Network Design . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 84 -B.7 Regularization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 85 -C Calculating Network Characteristics 87 +Contents +1 Introduction 1 +2 Convolutional Neural Networks 3 +2.1 Linear Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3 +2.2 CNN Layer Types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4 +2.2.1 Convolutional Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . 5 +2.2.2 Pooling Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 7 +2.2.3 Dropout . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9 +2.2.4 Normalization Layers . . . . . . . . . . . . . . . . . . . . . . . . . . 9 +2.3 CNN Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 +2.3.1 Residual Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 +2.3.2 Aggregation Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 +2.3.3 Dense Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13 +2.4 Transition Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14 +2.5 Analysis Techniques . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 15 +2.5.1 Qualitative Analysis by Example . . . . . . . . . . . . . . . . . . . . 15 +2.5.2 Confusion Matrices . . . . . . . . . . . . . . . . . . . . . . . . . . . 16 +2.5.3 Validation Curves: Accuracy, loss and other metrics . . . . . . . . . 16 +2.5.4 Learning Curves . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20 +2.5.5 Input-feature based model explanations . . . . . . . . . . . . . . . . 21 +2.5.6 Argmax Method . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22 +2.5.7 Feature Map Reconstructions . . . . . . . . . . . . . . . . . . . . . . 22 +2.5.8 Filter comparison . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 23 +2.5.9 Weight update tracking . . . . . . . . . . . . . . . . . . . . . . . . . 23 +2.6 Accuracy boosting techniques . . . . . . . . . . . . . . . . . . . . . . . . . . 24 +3 Topology Learning 27 +3.1 Growing approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 27 +3.1.1 Cascade-Correlation . . . . . . . . . . . . . . . . . . . . . . . . . . . 27 +3.1.2 Meiosis Networks . . . . . . . . . . . . . . . . . . . . . . . . . . . . 28 +3.1.3 Automatic Structure Optimization . . . . . . . . . . . . . . . . . . . . 29 +3.2 Pruning approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 29 +3.3 Genetic approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30 +3.4 Reinforcement Learning . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30 + +3.5 Convolutional Neural Fabrics . . . . . . . . . . . . . . . . . . . . . . . . . . 31 +4 Hierarchical Classification 33 +4.1 Advantages of classifier hierarchies . . . . . . . . . . . . . . . . . . . . . . 34 +4.2 Clustering classes . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34 +5 Experimental Evaluation 37 +5.1 Baseline Model and Training setup . . . . . . . . . . . . . . . . . . . . . . . 38 +5.1.1 Baseline Evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . 40 +5.1.2 Weight distribution . . . . . . . . . . . . . . . . . . . . . . . . . . . . 41 +5.1.3 Training behavior . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 45 +5.2 Confusion Matrix Ordering . . . . . . . . . . . . . . . . . . . . . . . . . . . . 48 +5.3 Spectral Clustering vs CMO . . . . . . . . . . . . . . . . . . . . . . . . . . . 51 +5.4 Hierarchy of Classifiers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 53 +5.5 Increased width for faster learning . . . . . . . . . . . . . . . . . . . . . . . 54 +5.6 Weight updates . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 55 +5.7 Multiple narrow layers vs One wide layer . . . . . . . . . . . . . . . . . . . . 56 +5.8 Batch Normalization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 57 +5.9 Batch size . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59 +5.10 Bias . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59 +5.11 Learned Color Space Transformation . . . . . . . . . . . . . . . . . . . . . . 60 +5.12 Pooling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 60 +5.13 Activation Functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 60 +5.14 Label smoothing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 64 +5.15 Optimized Classifier . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66 +5.16 Early Stopping vs More Data . . . . . . . . . . . . . . . . . . . . . . . . . . 68 +5.17 Regularization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 68 +6 Conclusion and Outlook 71 +A Figures, Tables and Algorithms 75 +B Hyperparameters 79 +B.1 Preprocessing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 79 +B.2 Data augmentation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 80 +B.3 Initialization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 81 +B.4 Objective function . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 81 +B.5 Optimization Techniques . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 82 +B.6 Network Design . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 84 +B.7 Regularization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 85 +C Calculating Network Characteristics 87 C.1 Parameter Numbers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87 -C.2 FLOPs . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87 -C.3 Memory Footprint . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 88 -D Common Architectures 89 -D.1 LeNet-5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90 -D.2 AlexNet . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 -D.3 VGG-16 D . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92 -D.4 GoogleNet, Inception v2 and v3 . . . . . . . . . . . . . . . . . . . . . . . . . 94 -D.5 Inception-v4 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 95 -E Datasets 97 -F List of Tables 99 -G List of Figures 101 -H Bibliography 103 +C.2 FLOPs . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87 +C.3 Memory Footprint . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 88 +D Common Architectures 89 +D.1 LeNet-5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90 +D.2 AlexNet . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 +D.3 VGG-16 D . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92 +D.4 GoogleNet, Inception v2 and v3 . . . . . . . . . . . . . . . . . . . . . . . . . 94 +D.5 Inception-v4 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 95 +E Datasets 97 +F List of Tables 99 +G List of Figures 101 +H Bibliography 103 I Glossary 119 -1. Introduction -Computer vision is the academic field which aims to gain a high-level understanding of the -low-level information given by raw pixels from digital images. -Robots, search engines, self-driving cars, surveillance agencies and many others have -applications which include one of the following six problems in computer vision as sub￾problems: -• Classification: -1 The algorithm is given an image and k possible classes. The task is -to decide which of the k classes the image belongs to. For example, an image from -a self-driving cars on-board camera contains either paved road, unpaved road or -no road: Which of those given three classes is in the image? -• Localization: The algorithm is given an image and one class k. The task is to find -bounding boxes for all instances of k. -• Detection: Given an image and k classes, find bounding boxes for all instances of -those classes. -• Semantic Segmentation: Given an image and k classes, classify each pixel. -• Instance segmentation: Given an image and k classes, classify each pixel as one of -the k classes, but distinguish different instances of the classes. -• Content-based Image Retrieval: Given an image x and n images in a database, -find the top u images which are most similar to x. -There are many techniques to approach those problems, but since AlexNet [KSH12] was -published, all of those problems have high-quality solutions which make use of Convolutional -Neural Networks (CNNs) [HZRS15a, LAE+16, RFB15, DHS16, SKP15]. -Today, most neural networks are constructed by rules of thumb and gut feeling. The -architectures evolved and got deeper, more hyperparameters were added. Although there -are methods for analyzing CNNs, those methods are not enough to determine all steps in -the development of network architectures without gut feeling. A detailed introduction to -CNNs as well as nine methods for analysis of CNNs is given in Chapter 2. -1Classification is also called identification if the classes are humans. Another name is object recognition, -although the classes can be humans and animals as well. -1 -1. Introduction -Despite the fact that most researchers and developers do not use topology learning, a couple -of algorithms have been proposed for this task. Five classes of topology learning algorithms -are introduced in Chapter 3. -When datasets and the number of classes are large, evaluating a single idea how to improve -the network can take several weeks just for the training. Hence the idea of building a -hierarchy of classifiers which allows to split the classification task into various sub-tasks -that can easily be combined is evaluated in Chapter 4. -Confusion Matrix Ordering (CMO), the hierarchical classifier, 9 types of hyperparameters -and label smoothing are evaluated in Chapter 5. -This work focuses on classification problems to keep the presented ideas as pure and -simple as possible. The described techniques are relevant to all six described computer -vision problems due to the fact that Encoder-Decoder architectures are one component of -state-of-the-art algorithms for all six of them. +. Introduction +Computer vision is the academic field which aims to gain a high-level understanding of the +low-level information given by raw pixels from digital images. +Robots, search engines, self-driving cars, surveillance agencies and many others have +applications which include one of the following six problems in computer vision as subproblems: +• Classification: +1 The algorithm is given an image and k possible classes. The task is +to decide which of the k classes the image belongs to. For example, an image from +a self-driving cars on-board camera contains either paved road, unpaved road or +no road: Which of those given three classes is in the image? +• Localization: The algorithm is given an image and one class k. The task is to find +bounding boxes for all instances of k. +• Detection: Given an image and k classes, find bounding boxes for all instances of +those classes. +• Semantic Segmentation: Given an image and k classes, classify each pixel. +• Instance segmentation: Given an image and k classes, classify each pixel as one of +the k classes, but distinguish different instances of the classes. +• Content-based Image Retrieval: Given an image x and n images in a database, +find the top u images which are most similar to x. +There are many techniques to approach those problems, but since AlexNet [KSH12] was +published, all of those problems have high-quality solutions which make use of Convolutional +Neural Networks (CNNs) [HZRS15a, LAE+16, RFB15, DHS16, SKP15]. +Today, most neural networks are constructed by rules of thumb and gut feeling. The +architectures evolved and got deeper, more hyperparameters were added. Although there +are methods for analyzing CNNs, those methods are not enough to determine all steps in +the development of network architectures without gut feeling. A detailed introduction to +CNNs as well as nine methods for analysis of CNNs is given in Chapter 2. +1Classification is also called identification if the classes are humans. Another name is object recognition, +although the classes can be humans and animals as well. + +1. Introduction +Despite the fact that most researchers and developers do not use topology learning, a couple +of algorithms have been proposed for this task. Five classes of topology learning algorithms +are introduced in Chapter 3. +When datasets and the number of classes are large, evaluating a single idea how to improve +the network can take several weeks just for the training. Hence the idea of building a +hierarchy of classifiers which allows to split the classification task into various sub-tasks +that can easily be combined is evaluated in Chapter 4. +Confusion Matrix Ordering (CMO), the hierarchical classifier, 9 types of hyperparameters +and label smoothing are evaluated in Chapter 5. +This work focuses on classification problems to keep the presented ideas as pure and +simple as possible. The described techniques are relevant to all six described computer +vision problems due to the fact that Encoder-Decoder architectures are one component of +state-of-the-art algorithms for all six of them. + +2. Convolutional Neural Networks +In the following, it is assumed that the reader knows what a multilayer perceptron (MLP) +is and how they are designed for classification problems, what activation functions are and +how gradient descent works. In case the reader needs a refresher on any of those topics, I +recommend chapter 4.3 and 4.4 of [Tho14a] as well as [LBH15]. +This chapter introduces linear image filters in Section 2.1, then standard layer types of +CNNs are explained in Section 2.2. The layer block pattern is described in Section 2.3, +transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5. +2.1. Linear Image Filters +A linear image filter (also called a filter bank or a kernel) is an element F ∈ R +kw×kh×d +, +where kw represents the filter’s width, kh the filter’s height and d the number of input +channels. The filter F is convolved with the image I ∈ R +w×h×d +to produce a new image I +0 +. +The output image I +0 has only one channel. Each pixel I0 +(x, y) of the output image gets +calculated by point-wise multiplication of one filter element with one element of the original +image I: +I +0 +(x, y) = +b +kw +2X +c +ix=1−d kw 2 -2. Convolutional Neural Networks -In the following, it is assumed that the reader knows what a multilayer perceptron (MLP) -is and how they are designed for classification problems, what activation functions are and -how gradient descent works. In case the reader needs a refresher on any of those topics, I -recommend chapter 4.3 and 4.4 of [Tho14a] as well as [LBH15]. -This chapter introduces linear image filters in Section 2.1, then standard layer types of -CNNs are explained in Section 2.2. The layer block pattern is described in Section 2.3, -transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5. -2.1. Linear Image Filters -A linear image filter (also called a filter bank or a kernel) is an element F ∈ R -kw×kh×d -, -where kw represents the filter’s width, kh the filter’s height and d the number of input -channels. The filter F is convolved with the image I ∈ R -w×h×d -to produce a new image I -0 -. -The output image I -0 has only one channel. Each pixel I -0 -(x, y) of the output image gets -calculated by point-wise multiplication of one filter element with one element of the original -image I: -I -0 -(x, y) = -b -kw -2X -c -ix=1−d kw -2 -e -b -kh -2X -c -iy=1−d kh -2 -e -X -d -ic=1 -I(x + ix, y + iy, ic) · F(ix, iy, ic) -This procedure is explained by Figure 2.1. It is essentially a discrete convolution. -I ∈ R -7×7 -Filter kernel -F ∈ R -3×3 -Result of point-wise -multiplication -I -0 ∈ R -7×7 -104 -116 -116 -112 -58 -47 -47 -109 -97 -114 -116 -105 -110 -45 -116 -104 -111 -109 -97 -46 -100 -101 -47 -109 -97 -115 -116 -101 -114 -47 -99 -97 -116 -99 -97 -116 -99 -97 -116 -46 -112 -104 -112 -63 -118 -61 -49 -46 -48 -9 --3 --1 --6 -5 -3 -2 --8 -0 -936 --333 --109 --282 -545 -291 -94 --792 -0 --4 --254 --498 --662 --849 --642 -187 --520 -45 -240 -211 -388 -215 --861 --340 -559 --105 -185 --138 --180 -503 --718 -429 -350 -173 -251 -268 --655 --567 --53 --75 -80 -571 --128 -24 --408 -596 --550 -368 -26 -976 -156 -302 -647 -879 -223 -811 -54 -660 -Figure 2.1.: Visualization of the application of a linear k × k × 1 image filter. For each pixel of the -output image, k -2 multiplications and k -2 additions of the products have to be calculated. -3 -2. Convolutional Neural Networks -One important detail is how boundaries are treated. There are four common ways of -boundary treatment: -• don’t compute: The image I -0 will be smaller than the original image. I -0 ∈ -R -(w−kw+1)×(h−kh+1)×d3 -, to be exact. -• zero padding: The image I is padded by zeros where the filter would access elements -which do not exist. This will result in edges being detected at the border if the border -pixels are not black, but doesn’t need any computation. -• nearest: Repeat the pixel which is closest to the boundary. -• reflect: Reflect the image at the boundaries. -Common tasks that can be done with linear filters include edge detection, corner detection, -smoothing, sharpening, median filtering, box filtering. See Figure A.1 for five examples. -Please note that the result of a filtering operation is again an image. This means filters -can be applied successively. While each pixel after one filtering operation with a 3 × 3 -filter got influenced by 3 · 3 = 9 pixels of the original image, two successively applied 3 × 3 -filters increase the area of the original image which influenced the output. The output is -then influenced by 25 pixel. This is called the receptive field. The kind of pattern which is -detected by a filter is called a feature. The bigger the receptive field is, the more complex -can features get as they are able to consider more of the original image. Instead of taking -one 5 × 5 filter with 25 parameters, one might consider to take two successive 3 × 3 filters -with 2 · (3 · 3) = 18 parameters. The 5 × 5 filter is a strict superset of possible filtering -operations compared to the two 3 × 3 filters, but the relevance of this technique will become -clear in Section 2.2. -2.2. CNN Layer Types -While the idea behind deep MLPs is that feature hierarchies capture the important parts -of the input more easily, CNNs are inspired by the idea of translational invariance: Many -features in an image are translationally invariant. For example, if a car is developed, one -could try to detect it by its parts [FGMR10]. But then there are many positions at which -the wheels could be. Combining those, it is desirable to capture low-level, translationally -invariant features at lower layers of an artificial neural network (ANN) and in higher layers -high-level features which are combinations of the low-level features. -Also, models should utilize the fact that the pixels of images are ordered. One way to use -this is by learning image filters in so called convolutional layers. -While MLPs vectorize the input, the input of a layer in a CNN are feature maps. A feature -map is a matrix m ∈ R -w×h -, but typically the width equals the height (w = h). For an RGB -4 -2.2. CNN Layer Types -input image, the number of feature maps is d = 3. Each color channel is a feature map. -Since AlexNet [KSH12] almost halved the error in the ImageNet challenge, CNNs are -state-of-the-art in various computer vision tasks. -Traditional CNNs have three important building tools: -• Convolutional layers with a non-linear activation function as described in Section 2.2.1, -• pooling layers as described in Section 2.2.2 and -• normalization layers as described in Section 2.2.4. -2.2.1. Convolutional Layers -Convolutional layers take several feature maps as input and produce n feature maps1 as -output, where n is the number of filters in the convolution layer. The filter weights of -the linear convolutions are the parameters which are adapted to the training data. The -number n of filters as well as the filter’s size kw × kh are hyperparameters of convolutional -layers. Sometimes, it is denoted as n@kw × kh. Although the filter depth is usually omitted -in the notation, the filters are of dimension kw × kh × d -(i−1), where d -(i−1) is the number of -feature maps of the input layer (i − 1). -Another hyperparameter of convolution layers is the stride s ∈ N≥1 and the padding. -Padding (usually zero-padding [SCL12, SEZ+13, HZRS15a]) is used to make sure that the -size of the feature maps doesn’t change. -The hyperparameters of convolutional layers are -• the number of filters n ∈ N≥1, -• kw, kh ∈ N≥1 of the filter size kw × kh × d -(i−1) -, -• the activation function of the layer (see Table B.3) and -• the stride s ∈ N≥1 -Typical choices are n ∈ { 32, 64, 128 }, kw = kh = k ∈ { 1, 3, 5, 11 } such as in [KSH12, -SZ14, SLJ+15], rectified linear unit (ReLU) activation and s = 1. -The concept of weight sharing is crucial for CNNs. This concept was introduced in [WHH+89]. -With weight sharing, the filters can be learned with stochastic gradient descent (SGD) just -like MLPs. In fact, every CNN has an equivalent MLP which computes the same function -if only the flattened output is compared. -1 -also called activation maps or channels -5 -2. Convolutional Neural Networks -This is easier to see when the filtering operation is denoted formally: -o -(i) -(x) = b + -X -k -j=1 -wij · xj with i ∈ { 1, . . . , w } × { 1, . . . , h } × { 1, . . . , d } [2.1] -o -(x,y,z) -(I) = b + -b -kw -2X -c -ix=1−d kw -2 -e -b -kh -2X -c -iy=1−d kh -2 -e -X -d -ic=1 -Fz(ix, iy, ic) · I(x + ix, y + iy, ic) [2.2] -with a bias b ∈ R, x ∈ { 1, . . . , w } , y ∈ { 1, . . . , h } and z ∈ { 1, . . . , d } -One can see that most weights of the equivalent MLP are zero and many weights are -equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters. -The effect of fewer parameters is that less training data is necessary to get suitable -estimations for those. This means a MLP which is able to compute the same functions as a -CNN will likely have worse results on the same dataset, if a CNN architecture is suitable -for the dataset. -See Figure 2.2 for a visualization of the application of a convolutional layer. -3 feature maps -(e.g. RGB) n feature maps -n filters of -size k × k × 3 -width w -width w -height -h -height -h -neural -network -data -apply -. . . -. . . -. . . -. . . -. . . -. . . -Figure 2.2.: Application of a single convolutional layer with n filters of size k × k × 3 with stride -s = 1 to input data of size width × height with three channels. -6 -2.2. CNN Layer Types -A convolutional layer with n filters of size kw × kh and SAME padding after d -(i−1) feature -maps of size sx × sy has n · d -(i−1) -·(kw · kh) parameters if no bias is used. In contrast, a fully -connected layer which produces the same output size and does not use a bias would have -n · d -(i−1) -· (sx × sy) -2 parameters. This means a convolutional layer has drastically fewer -parameters. One the one hand, this means it can learn less complex decision boundaries. On -the other hand, it means fewer parameters have to be learned and hence the optimization -procedure needs fewer examples and the optimization objective is simpler. -It is particularly interesting to notice that even a convolutional layer of 1 × 1 filters does -learn a linear combination of the d input feature maps. This can be used for dimensionality -reduction, if there are fewer 1 × 1 filters in a convolutional layer than input feature maps. -Another insight recently got important: Every fully connected layer has an equivalent -convolutional layer which has the same weights.2 This way, one can use the complete -classification network as a very complex non-linear image filter which can be used for -semantic segmentation. -A fully connected layer with d ∈ N≥1 inputs and n ∈ N≥1 nodes can be interpreted as a -convolutional layer with an input of shape 1 × 1 × d and n filters of size 1 × 1. This will -produce an output shape 1 × 1 × n. Every single output is connected to all of the inputs. -When a convolutional layer is followed by a fully connected layer, it is necessary to vectorize -to feature maps. If the 1 × 1 convolutional filter layer is applied to the vectorized output, -it is completely equivalent to a fully connected layer. However, the vectorization can be -omitted if a convolution layer without padding and a filter size equal to the feature maps -size is applied. This was used by [LSD15]. -2.2.2. Pooling Layers -Pooling summarizes a p × p area of the input feature map. Just like convolutional layers, -pooling can be used with a stride of s ∈ N>1. As s ≥ 2 is the usual choice, pooling layers -are sometimes also called subsampling layers. Typically, p ∈ { 2, 3, 4, 5 } and s = 2 such as -for AlexNet [KSH12] and VGG-16 [SZ14]. -The type of summary for the set of activations A varies between the functions listed -in Table 2.1, spatial pyramid pooling as introduced in [HZRS14] and generalizing pooling -functions as introduced in [LGT16]. -2But convolutional layers only have equivalent fully connected layers if the output feature map is 1 × 1 -7 -2. Convolutional Neural Networks -Name Definition Used by -Max pooling max { a ∈ A } [BPL10, KSH12] -Average / mean pooling 1 -|A| -P -a∈A -a LeNet-5 [LBBH98] and [KSlB+10] -`2 pooling pP -a∈A -a -2 [Le13] -Stochastic pooling * [ZF13] -Table 2.1.: Pooling types for a set A of activations a ∈ R. -(*) For stochastic pooling, each of the p×p activation values ai -in the pooling region gets -picked with probability pi = P ai -aj ∈A aj -. This assumes the activations ai are non-negative. -Pooling is applied for three reasons: To get local translational invariance, to get invariance -against minor local changes and, most important, for data reduction to 1 -s -2 th of the data by -using strides of s > 1. -See Figure 2.3 for a visualization of max pooling. -7 9 3 5 9 4 -0 7 0 0 9 0 -5 0 9 3 7 5 -9 2 9 6 4 3 -2 × 2 max pooling -9 5 9 -9 9 7 -2 -2 -Figure 2.3.: 2 × 2 max pooling applied to a feature map of size 6 × 4 with stride s = 2 and padding. -Average pooling of p × p areas with stride s can be replaced by a convolutional layer. If -the input of the pooling layer are d -(i−1) feature maps, the convolutional layer has to have -d -(i−1) filters of size p × p and stride s. The ith filter has the values - - -1 -p -2 . . . -1 -p -2 -. -. -. -. -. -. -. -. -. -1 -p -2 . . . -1 -p -2 - - -for the dimension i and the zero matrix - - -0 . . . 0 -. -. -. -. -. -. -. -. -. -0 . . . 0 - - -for all other dimensions i = 1, . . . , d(i−1) -. -8 -2.2. CNN Layer Types -2.2.3. Dropout -Dropout is a technique used to prevent overfitting and co-adaptations of neurons by setting -the output of any neuron to zero with probability p. It was introduced in [HSK+12] and is -well-described in [SHK+14]. -A Dropout layer can be implemented as follows: For an input in of any shape s, a tensor of -the same shape D ∈ { 0, 1 } -s -is sampled, where each element di -is sampled independently -from a Bernoulli distribution. The results are element-wise multiplied to calculate the -output out of the Dropout layer: -out = D in with di ∼ B(1, p) -where is the Hadamard product -(A B)i,j := (A)i,j (B)i,j -Hence every value of the input gets set to zero with a dropout probability of p. Typically, -Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout prob￾ability than later layers. In order to keep the expected output at the same value, the -output of a dropout layer is multiplied with 1 -1−p when dropout is enabled [Las17, tf-16b]. -At inference time, dropout is disabled. -Dropout is usually only applied after fully connected layers, but not after convolutional -layers as it usually increases the test error as pointed out in [GG16]. -Models which use Dropout can be interpreted as an ensemble of models with different -numbers of neurons in each layer, but also with weight sharing. -Conceptually similar are DropConnect and networks with stochastic depth. DropCon￾nect [WZZ+13] is a generalization of Dropout, which sets weights to zero in contrast to -setting the output of a neuron to zero. Networks with stochastic depth as introduced -in [HSL+16] dropout only complete layers. This can be done by having Residual networks -which have one identity connection and one residual feature connection. Hence the residual -features can be dropped out and the identity connection remains. -2.2.4. Normalization Layers -One problem when training deep neural networks is internal covariate shift: While the -parameters of layers close to the output are adapted to some input produced by lower layers, -those lower layers parameters are also adapted. This leads to the parameters in the upper -layers being worse. A very low learning rate has to be chosen to adjust for the fact that the -input features might drastically change over time. -9 -2. Convolutional Neural Networks -One way to approach this problem is by normalizing mini-batches as described in [IS15]. A -Batch Normalization layer with d-dimensional input x = (x -(1), . . . , x(d) -) is first normalized -point-wise to -xˆ -(k) = -x -(k) − x¯ -(k) -p -s -0 -[x -(k) -] -2 + ε -with x¯ -(k) = -1 -m -Pm -i=1 x -(k) -i -being the sample mean and s -0 -[x -(k) -] -2 = -1 -m -Pm -i=1(x -(k) -i − x¯ -(k) -) the -sample variance where m ∈ N≥1 is the number of training samples per mini-batch, ε > 0 -being a small constant to prevent division by zero and x -(k) -i -is the activation of neuron k for -training sample i. -Additionally, for each activation x -(k) -two parameters γ -(k) -, β(k) are introduced which scale -and shift the feature: -y -(k) = γ -(k) -· xˆ -(k) + β -(k) -In the case of fully connected layers, this is applied to the activation, before the non-linearity -is applied. If it is applied after the activation, it harms the training in early stages. For -convolution, only one γ and one β is learned per feature map. -One important special case is γ -(k) = -p -s -0 -[x -(k) -] -2 + ε and β -(k) = x¯ -(k) -, which would make the -Batch Normalization layer an identity layer. -During evaluation time,3 -the expected value and the variance are calculated once for the -complete dataset. An unbiased estimate of the empirical variance is used. -The question where Batch Normalization layers (BN) should be applied and for which -reasons is still open. For Dropout, it doesn’t matter if it is applied before or after the -activation function. Considering this, the possible options for the order are: -1. CONV / FC → BN → activation function → Dropout → . . . -2. CONV / FC → activation function → BN → Dropout → . . . -3. CONV / FC → activation function → Dropout → BN → . . . -4. CONV / FC → Dropout → BN → activation function → . . . -The authors of [IS15] suggest to use Batch Normalization before the activation function -as in Items 1 and 4. Batch Normalization after the activation lead to better results in -https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md -Another normalization layer is Local Response Normalization as described in [KSH12], -which includes `2 normalization as described in [WWQ13]. Those two normalization layers, -however, are superseded by Batch Normalization. -3 -also called inference time -10 -2.3. CNN Blocks -2.3. CNN Blocks -This section describes more complex building blocks than simple layers. CNN blocks act -similar to a layer, but they are themselves composed of layers. -2.3.1. Residual Blocks -Residual blocks as introduced in [HZRS15a] are a milestone in computer vision. They -enabled the computer vision community to go from about 16 layers as in VGG 16-D (see -Appendix D.3) to several hundred layers. The key idea of deep residual networks (ResNets) -as introduced in [HZRS15a] is to add an identity connection which skips two layers. This -identity connection adds the feature maps onto the other feature maps and thus requires -the output of the input layer of the residual block to be of the same dimension as last layer -of the residual block. -Formally, it can be described as follows. If xi are the feature maps after layer i and x0 is -the input image, H is a non-linear transformation of feature maps, then -y = H(x) -describes a traditional CNN. Note that this could be multiple layers. A residual block as -visualized in Figure 2.4 is described by -y = H(x) + x -In [HZRS15a], they only used residual skip connections to skip two layers. Hence, if -convi(xi) describes the application of the convolutional layer i to the input xi without the -nonlinearity, then such a residual block is -xi+2 = conv i+1(ReLU(conv i(xi))) + xi -Figure 2.4.: ResNet module -Image source: [HZRS15a] -[HM16] provides some insights why deep residual networks are successful. -11 -2. Convolutional Neural Networks -2.3.2. Aggregation Blocks -Two common ways to add more parameters to neural networks are increasing their depth -by adding more layers or increasing their width by adding more neurons / filters. Inception -blocks [AM15] implicitly started a new idea which was explicitly described in [XGD+16] as -“ResNeXt block”: Increasing the cardinality C ∈ N≥1. By cardinality, the authors describe -the concept of having C small convolutional networks with the same topology but different -weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not -combine aggregation blocks with residual blocks as the authors did. -256-d in -concatenate -total 32 -groups -. . . -128-d out -4 @ 1 × 1 × 256 -4 @ 3 × 3 × 4 -4 @ 1 × 1 × 256 -4 @ 3 × 3 × 4 -4 @ 1 × 1 × 256 -4 @ 3 × 3 × 4 -Figure 2.5.: Aggregation block with a cardinality of C = 32. Each of the 32 groups is a 2-layer -convolutional network. The first layer receives 256 feature maps and applies four 1 × 1 -filters to it. The second layer applies four 3 × 3 filters. Although every group has -the same topology, the learned weights are different. The outputs of the groups are -concatenated. -The hyperparameters of an aggregation block are: -• The topology of the group members. -• The cardinality C ∈ N≥1. Note that a cardinality of C = 1 is equivalent in every -aspect to using the group network without an aggregation block. -12 -2.3. CNN Blocks -2.3.3. Dense Blocks -Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The -idea is to connect each convolutional layer directly to subsequent convolutional layers. -Traditional CNNs with L layers and one input layer have L connections between layers, -but dense blocks have L(L+1) -2 -connections between layers. The input feature maps are -concatenated in depth. According to the authors, this prevents features from being re￾learned and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16 -have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors -used only on the order of 12 feature maps per layer. -A dense block is visualized in Figure 2.6. -256-d in -k @ 3 × 3 -concatenate -k @ 3 × 3 -concatenate -256-d -k-d -(256 + k)-d -k-d -(256 + L · k)-d out -Figure 2.6.: Dense block with L = 2 layers and a growth factor of k. -Dense block have five hyperparameters: -• The activation function being used. The authors use ReLU. -• The size kw × kh of filters. The authors use kw = kh = 3. -• The number of layers L, where L = 2 is a simple convolutional layer. -• The number k of filters added per layer (called growth rate in the paper) -It might be necessary use 1 × 1 convolutions to reduce the number of L · k feature maps. -13 -2. Convolutional Neural Networks -2.4. Transition Layers -Transition layers are used to overcome constraints imposed by resource limitations or -architectural design choices. One constraint is the number of feature maps (see Appendix C.3 -for details). In order to reduce the number of feature maps while still keeping as much -relevant information as possible in the network, a convolutional layer i with ki filters of -the shape 1 × 1 × ki−1 is added. The number of filters ki directly controls the number of -generated feature maps. -In order to reduce the dimensionality (width and height) of the feature maps, one typically -applies pooling. -Global pooling is another type of transition layer. It applies pooling over the complete -feature map size to shrink the input to a constant 1 × 1 feature map and hence allows one -network to have different input sizes. -14 -2.5. Analysis Techniques -2.5. Analysis Techniques -CNNs have dozens of hyperparameters and ways to tune them. Although there are -automatic methods like random search [BB12], grid search [LBOM98], gradient-based -hyperparameter optimization [MDA15] and Hyperband [LJD+16] some actions need a -manual investigation to improve the model’s quality. For this reason, analysis techniques -which guide developers and researchers to the important hyperparameters are necessary. In -the following, nine diagnostic techniques are explained. -A machine learning developer has the following choices to improve the model’s quality: -(I1) Change the problem definition (e.g., the classes which are to be distinguished) -(I2) Get more training data -(I3) Clean the training data -(I4) Change the preprocessing (see Appendix B.1) -(I5) Augment the training data set (see Appendix B.2) -(I6) Change the training setup (see Appendices B.3 to B.5) -(I7) Change the model (see Appendices B.6 and B.7) -The preprocessing is usually not changed in modern architectures. However, this still leaves -six very different ways to improve the classifier. Changing the training setup and the model -each have too many possible choices to explore them completely. Thus, techniques are -necessary to guide the developer to changes which are most promising to improve the model. -For all of the following methods, it is important to use only the training set and the -validation set. -2.5.1. Qualitative Analysis by Example -The most basic analysis technique which should always be used is looking at examples -which the network correctly predicted with a high certainty and what the classifier got -wrong with a high certainty. Those examples can be arranged by applying t-SNE [MH08]. -One the one hand, this might reveal errors in the training data. Most of the time, training -data is manually labeled by humans who make mistakes. If a model is fit to those errors, -its quality decreases. -On the other hand, this can show differences in the distribution of validation data which -are not covered by the training set and thus indicate the need to collect more data. -15 -2. Convolutional Neural Networks -2.5.2. Confusion Matrices -A confusion matrix is a matrix (c)ij ∈ N -K×K -≥0 -, where K ∈ N≥2 is the number of classes, -which contains all correct and wrong classifications. The item cij is the number of times -items of class i were classified as class j. This means the correct classification is on the -diagonal cii and all wrong classifications are of the diagonal. The sum PK -i=1 -PK -j=1 cij is the -total number of samples which were evaluated and -P -i=1 P -cii -K -i=1 -PK -j=1 cij -is the accuracy. -The sums r(i) = PK -j=1 cij of each class i are worth being investigated as they show if the -classes are skewed. If the number of samples of one class dominates the data set, then the -classifier can get a high accuracy by simply always prediction the most common class. If -the accuracy of the classifier is close to the a priory probability of the most common class, -techniques to deal with skewed classes might help. -An automatic criterion to check for this problem is -accuracy ≤ -max({ r(i) | i = 1, . . . , k }) -Pk -i=1 r(i) -+ ε -where ε is a small value to compensate the fact that some examples might be correct just -by chance. -Other values which should be checked are the class-wise sensitivities: -s(k) = # correctly identified instances of class k -# instances of class k -= -ckk -r(k) -∈ [0, 1] -If s(i) is much lower than s(j), it is an indicator that more or cleaner training data is -necessary for s(i). -The class-wise confusion -fconfusability(k1, k2) = P -ck1k2 -K -j=1 ck1j -indicates if class k1 gets often classified as class k2. The highest values here can indicate -if two classes should be merged or a specialized model for separating those classes could -improve the overall system. -2.5.3. Validation Curves: Accuracy, loss and other metrics -Validation curves display a hyperparameter (e.g., the training epoch) on the horizontal -axis and a quality metric on the vertical axis. Accuracy, error = (1 − accuracy) or loss are -typical quality metrics. Other quality metrics can be found in [OHIL16]. -In case that the number of training epochs are used as the examined hyperparameter, -validation curves give an indicator if training longer improves the model’s performance. By -16 -2.5. Analysis Techniques -plotting the error on the training set as well as the error on a validation set, one can also -estimate if overfitting might become a problem. See Figure 2.7 for an example. -10 20 30 40 50 60 70 80 90 100 -0.2 -0.4 -0.6 -0.8 -overfitting -Epochs -Error Training set -Validation set -Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs -and the quality metric is the error (1 − accuracy). The longer the network is trained, -the better it gets on the training set. At some point the network is fit too well to the -training data and loses its capability to generalize. At this point the quality curve of -the training set and the validation set diverge. While the classifier is still improving on -the training set, it gets worse on the validation and the test set. -When the epoch-loss validation curve has plateaus as in Figure 2.8, this means the opti￾mization process did not improve for several epochs. Three possible ways to reduce the -problem of plateaus are (i) to change weight initialization if the plateau was at the beginning, -(ii) regularizing the model or (iii) changing the optimization algorithm. -Loss functions -The loss function (also called error function or cost function) is a function which assigns a -real value to a complex event like the predicted class of a feature vector. It is used to define -the objective function. For classification problems the loss function is typically cross-entropy -with `1 or `2 regularization, as it was described in [NH92]: -ECE(W) = − -X -x∈X -X -K -k=1 -[t -x -k -log(o -x -k -) + (1 − t -x -k -) log(1 − o -x -k -)] -| {z } -cross-entropy data loss -+ λ1 · -`1 -zX}| { -w∈W -|w| +λ2 · -`2 -zX}| { -w∈W -w -2 -| {z } -model complexity loss -where W are the weights, X is the training data set, K ∈ N≥0 is the number of classes and -t -x -k -indicates if the training example x is of class k. o -x -k -is the output of the classification -algorithm which depends on the weights. λ1, λ2 ∈ [0, ∞) weights the regularization and is -typically smaller than 0.1. -17 -2. Convolutional Neural Networks -Figure 2.8.: Example for a validation curve (plotted loss function) with plateaus. The dark orange -curve is smoothed, but the non-smoothed curve is also plotted in light orange. -The data loss is positive whenever the classification is not correct, whereas the model -complexity loss is higher for more complex models. The model complexity loss exists due -to the intuition of Occam’s razor : If two models explain the same data with an accuracy of -100 %, the simpler model is to be preferred. -A reason to show the loss for the validation curve technique instead of other quality metrics -is that it contains more information about the quality of the model. A reason against the -loss is that it has no upper bound like the accuracy and can be hard to interpret. The -loss only shows relative learning progress whereas the accuracy shows absolute progress to -human readers. -There are three observations in the loss validation curve which can help to improve the -network: -• If the loss does not decrease for several epochs, the learning rate might be too low. -The optimization process might also be stuck in a local minimum. -• Loss being NAN might be due to too high learning rates. Another reason is division -by zero or taking the logarithm of zero. In both cases, adding a small constant like -10−7 fixes the problem. -• If the loss-epoch validation curve has a plateau at the beginning, the weight initializa￾tion might be bad. -18 -2.5. Analysis Techniques -Quality criteria -There are several quality criteria for classification models. Most quality criteria are based -the confusion matrix c which denotes at cij the number of times the real class was i and j -was predicted. This means the diagonal contains the number of correct predictions. For -the following, let ti = -Pk -j=1 cij be the number of training samples for class i. The most -common quality criterion is accuracy: -accuracy(c) = -Pk -i=1 cii -Pk -i=1 ti -∈ [0, 1] -One problem of accuracy as a quality criterion are skewed classes. If one class is by far -more common than all other classes, then the simplest way to achieve a high score is to -always classify everything as the most common class. -In order to fix this problem, one can use the mean accuracy: -mean-accuracy(c) = 1 -k -· -X -k -i=1 -cii -ti -∈ [0, 1] -For two-class problems there are many other metrics like precision, recall and Fβ-score. -Quality criteria for semantic segmentation are explained in [Tho16]. -Besides the quality of the classification result, several other quality criteria are important -in practice: -• Speed of evaluation for new images, -• latency, -• power consumption, -• robustness against (non)random perturbations in the training data (see [SZS+13, -PMW+15]), -• robustness against (non)random perturbations in the training labels (see [NDRT13, -XXE12]), -• model size -As reducing the floating point accuracy allows to process more data on a given device [Har15], -analysis under this aspect is also highly relevant in some scenarios. -However, the following focuses on the quality of the classification result. -19 -2. Convolutional Neural Networks -2.5.4. Learning Curves -A learning curve is a plot where the horizontal axis displays the number of training samples -given to the network and the vertical axis displays the error. Two curves are plotted: The -error on the training set (of which the size is given by the horizontal axis) and the error on -the test set (which is of fixed size). See Figure 2.9 for an example. The learning curve for the -validation set is an indicator if more training data without any other changes will improve -the networks performance. Having the training set’s learning curve, it is possible to estimate -if the capacity of the model to fit the data is high enough for the desired classification error. -The error on the validation set should never be expected to be significantly lower than the -error on the training set. If the error on the training set is too high, then more data will -not help. Instead, the model or the training algorithm need to be adjusted. -If the training set’s learning curve is significantly higher than the validation set’s learning -curve, then removing features (e.g., by decreasing the images resolution), more training -samples or more regularization will help. -10 20 30 40 50 60 70 80 90 100 -0.2 -0.4 -0.6 -avoidable bias -variance -human-level error -Training samples -Error Validation set -Training set -Figure 2.9.: A typical learning curve: The more data is used for training, the more errors a given -architecture will make to fit the given training data. At the same time, it is expected -that the training data gets more similar to the true distribution of the data which -should be captured by the test data. At some point, the error on the training and -test set should be about the same. The term “avoidable bias” was coined by Andrew -Ng [Ng16]. In some cases it is not possible to classify data correctly by the given -features. If humans can classify the data given the features correctly, however, then -the bias is avoidable by building a better classifier. -The major drawback of this analysis technique is its computational intensity. In order to -get one point on the training curve and one point on the testing curve, a complete training -has to be executed. On the full data set, this can be several days on high-end computers. -20 -2.5. Analysis Techniques -2.5.5. Input-feature based model explanations -Understanding which clues the model took to come to its prediction is crucial to check if -the model actually learns what the developer thinks it learns. For example, a model which -has to distinguish sled dogs from Chihuahuas might simply look at the background and -check if there is snow. Depending on the training and test data, this works exceptionally -well. However, it is not the desired solution. -For classification problems in computer vision, there are two types of visualizations which -help to diagnose such problems. Both color superpixels of the original image to convey -information how the model used those superpixels: -• Correct class heatmap: The probability of the correct class is encoded to give a -heat map which superpixels are important for the correct class. This can also be done -by setting the opacity accordingly. -• Most-likely class image: Each of the most likely classes for all superpixels is -represented by a color. The colored image thus gives clues why different predictions -were assigned a high probability. -Two methods to generate such images are explained in the following. -Occlusion Sensitivity Analysis -Occlusion sensitivity analysis is described in [ZF14]. The idea is to occlude a part of the -image by something. This could be a gray square as in [ZF14] or a black superpixel as -in [RSG16]. Then the classifier is run on the image again. This is done for each region (e.g., -superpixel or position of the square) and the regions are then colored to generate either a -correct class heatmap of the most-likely class image. It is important to note that the color -at region ri denotes the result if ri -is occluded. -Both visualizations are shown in Figure 2.10. One can see that the network makes sensible -predictions for this image of the class “Pomeranian”. However, the image of the class “Afghan -Hound” gets confused with “Ice lolly”, which is a sign that this needs further investigation. -Gradient-based approaches -In [SVZ13], a gradient-based approach was used to generate image-specific class saliency -maps. The authors describe the problem as a ranking problem, where each pixel of the -image I0 is assigned a score Sc(I0) for a class c of interest. CNNs are non-linear functions, -but they can be approximated by the first order Taylor expansion Sc(I) ≈ w -T -I + b where -w is the derivative of Sc at I0. -21 -2. Convolutional Neural Networks -2.5.6. Argmax Method -The argmax method has two variants: -• Fixed class argmax: Propagate all elements of a given class through the network -and analyze which neurons are activated most often / have the highest activation. -• Fixed neuron argmax: Propagate the data through the network and find the n -data elements which cause the highest activation for a given neuron. -Note that a “neuron” is a filter in a CNN. The amount of activation of a filter F by an -image I is calculated by applying F to I and calculating the element-wise sum of the result. -Fixed-neuron argmax was applied in [ZF14]. However, they did not stop with that. Besides -showing the 9 images which caused the highest activation, they also trained a deconvolutional -neural network to project the activation of the filter back into pixel space. -The fixed neuron argmax can be used qualitatively to get an impression of the kind of -features which are learned. This is useful to diagnose problems, for example in [AM15] it is -described that the network recognized the class “dumbbell” only if a hand was present, too. -Fixed neuron argmax can also be used quantitatively to estimate the amount of parameters -being shared between classes or how many parameters are mainly assigned to which classes. -Going one step further from the fixed neuron argmax method is using an optimization -algorithm to change an initial image minimally in such a way that any desired class gets -predicted. This is called caricaturization in [MV16]. -2.5.7. Feature Map Reconstructions -Feature map visualizations such as the ones made in [ZF14] (see Figure 2.11) give insights -into the learned features. This shows what the network emphasizes. However, it is not -necessarily the case that the feature maps allow direct and easy conclusions about the -learned features. This technique is called inversion in [MV16]. -A key idea of feature map visualizations is to reconstruct a layers input, given its activation. -This makes it possible find which inputs would cause neurons to activate with extremely -high or low values. -More recent work like [NYC16] tries to make the reconstructions appearance look more -natural. -22 -2.5. Analysis Techniques -2.5.8. Filter comparison -One question which might lead to some insight is how robust the features are which -are learned. If the same network is trained with the same data, but different weight -initializations, the learned weights should still be comparable. -If the set of learned filters changes with initialization, this might be an indicator for too -little capacity of that layer. Hence adding more filters to that layer could improve the -performance. -Filters can be compared with the k-translation correlation as introduced in [ZCZL16]: -ρk(Wi -,Wj) = max -(x,y)∈{−k,...,k} -2\(0,0) -hWi -, T(Wj -, x, y)if -kWik2 -kWjk2 -∈ [−1, 1], -where T(·, x, y) denotes the translation of the first operand by (x, y), with zero padding at -the borders to keep the shape. h·, ·if denotes the flattened inner product, where the two -operands are flattened into column vectors before applying the standard inner product. The -closer the absolute value of the k-translation correlation to one, the more similar two filters -Wi -, Wj are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and -VGG-16 (see Appendix D.3) have many filters which are highly correlated. They found -this by comparing the averaged maximum k-translational correlation of the networks with -Gaussian-distributed initialized filters. The averaged maximum k-translational correlation -is defined as -ρ¯k(W) = 1 -N -X -N -i=1 -N -max -j=1,j6=i -ρk(Wi -,Wj ) -where N is the number of filters in the layer W and Wi denotes the ith filter. -2.5.9. Weight update tracking -Andrej Karpathy proposed in the 5th lecture of CS231n to track weight updates to check if -the learning rate is well-chosen. He suggests that the weight update should be in the order -of 10−3 -. If the weight update is too high, then the learning rate has to be decreased. If the -weight update is too low, then the learning rate has to be increased. -The order of the weight updates as well as possible implications highly depend on the model -and the training algorithm. See Appendix B.5 for a short overview of training algorithms -for neural networks. -23 -2. Convolutional Neural Networks -2.6. Accuracy boosting techniques -There are techniques which can almost always be applied to improve accuracy of CNN -classifiers: -• Ensembles [CMS12] -• Training-time augmentation (see Appendix B.2) -• Test-time transformations [DDFK16, How13, HZRS15b] -• Pre-training and fine-tuning [ZDGD14, GDDM14] -One of the most simple ensemble techniques which was introduced in [CMS12] is averaging -the prediction of n classifiers. This improves the accuracy even if the classifiers use exactly -the same training setup by reducing variance. -Data augmentation techniques give the optimizer the possibility to take invariances like -rotation into account by generating artificial training samples from real training samples. -Data augmentation hence reduces bias and variance with no cost at inference time. -Data augmentation at inference time reduces the variance of the classifier. Similar to using -an ensemble, it increases the computational cost of inference. -Pretraining the classifier on another dataset to obtain start from a good position or finetuning -a model which was originally created for another task is also a common technique. -24 -2.6. Accuracy boosting techniques -Figure 2.10.: Occlusion sensitivity analysis by [ZF14]: The left column shows three example images, -where a gray square occluded a part of the image. This gray squares center (x, y) was -moved over the complete image and the classifier was run on each of the occluded -images. The probability of the correct class, depending on the gray squares position, -is showed in the middle column. One can see that the predicted probability of the -correct class “Pomeranian” drops if the face of the dog is occluded. The last image -gives the class with the highest predicted probability. In the case of the Pomeranian, -it always predicts the correct class if the head is visible. However, if the head of the -dog is occluded, it predicts other classes. -25 -2. Convolutional Neural Networks -Figure 2.11.: Filter visualization from [ZF14]: The filters themselves as well as the input feature -maps which caused the highest activation are displayed. -26 -3. Topology Learning -The topology of a neural network is crucial for the number of parameters, the number -of floating point operations (FLOPs), the required memory, as well as the features being -learned. The choice of the topology, however, is still mainly done by trial-and-error. -This chapter introduces three general approaches to automatic topology learning: Growing a -networks from a minimal network in Section 3.1, pruning in Section 3.2, genetic approaches -in Section 3.3 and reinforcement learning approaches in Section 3.4. -3.1. Growing approaches -Growing approaches for topology learning start with a minimal network, which only has -the necessary number of input nodes and the number of output nodes which are determined -by the application and the features of the input. They then apply a criterion to insert new -layers / neurons into the network. -In the following, Cascade-Correlation, Meiosis Networks and Automatic Structure Opti￾mization are introduced. -3.1.1. Cascade-Correlation -Cascade-Correlation was introduced in [FL89]. It generates a cascading architecture which -is similar to dense block described in Section 2.3.3. -Cascade-Correlation works as follows: -1. Initialization: The number of input nodes and the number of output nodes are -defined by the problem. Create a minimal, fully connected network for those. -2. Training: Train the network until the error no longer decreases. -3. Candidate Generation: Generate candidate nodes. Each candidate node is con￾nected to all inputs. They are not connected to other candidate nodes and not -connected to the output nodes. -27 -3. Topology Learning -4. Correlation Maximization: Train the weights of the candidates by maximizing S, -the correlation between candidates output value V with the networks residual error: -S = -X -o∈O - - - - - - -X -p∈T -￾ -Vp − V¯ - -(Ep,o − E¯ -o) - - - - - - -where O is the set of output nodes, T is the training set, Vp is the candidate neurons -activation for a training pattern p. Ep,o is the residual output error at node o for -pattern p. V¯ and E¯ -o are averaged values over all elements of T. This step is finished -when the correlation no longer increases. -5. Candidate selection: Keep the candidate node with the highest correlation, freeze -its incoming weights and add connections to the output nodes. -6. Continue: If the error is higher than desired, continue with step 2. -One network with three hidden nodes trained by Cascade-Correlation is shown in Figure 3.1. -1 -Figure 3.1.: A Cascade-Correlation network with three input nodes (red) and one bias node (gray) -to the left, three hidden nodes (green) in the middle and two output nodes in the upper -right corner. The black squares represent frozen weights which are found by correlation -maximization whereas the white squares are trainable weights. -3.1.2. Meiosis Networks -Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, where -weights are deterministic and fixed at prediction time, each weight wij in Meiosis networks -follows a normal distribution: -wij ∼ N (µij , σ2 -ij ) -2 -3.2. Pruning approaches -Hence every connection has two learned parameters: µij and σ -2 -ij . -The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell -division. A node j is splitted, when the random part dominates the value of the sampled -weights: -P -i -P -σij -i µij -> 1 and -P -k -P -σjk -k µjk -> 1 -The mean of the new nodes is sampled around the old mean, half the variance is assigned -to the new connections. -Hence Meiosis networks only change the number of neurons per layer. They do not add -layers or add skip connections. -3.1.3. Automatic Structure Optimization -Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of on￾line handwriting recognition. It makes use of the confusion matrix C = (cij ) ∈ N -k×k -≥0 -(see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix -S with sij = sj i = cij · cji. The maximum of S defines where the ASO algorithm adds -more parameters. The details how the resources are added are not transferable to CNNs. -3.2. Pruning approaches -Pruning approaches start with a network which is bigger than necessary and prune it. The -motivation to prune a network which has the desired accuracy is to save storage for easier -model sharing, memory for easier deployment and FLOPs to reduce inference time and -energy consumption. Especially for embedded systems, deployment is a challenge and low -energy consumption is important. -Pruning generally works as follows: -1. Train a given network until a reasonable solution is obtained, -2. prune weights according to a pruning criterion and -3. retrain the pruned network. -This procedure can be repeated. -One family of pruning criterions uses the Hessian matrix. For example, Optimal Brain -Damage (OBD) as introduced in [LDS+89]. For every single parameter k, OBD calculates -the effect on the objective function of deleting k. The authors call the effect of the deletion -29 -3. Topology Learning -of parameter k the saliency sk. The parameters with the lowest saliency are deleted, which -means they are set to 0 and are not updated anymore. -A follow-up method called Optimal Brain Surgeon [HSW93] claims to choose the weights -in a much better way. This requires, however, to calculate the inverse Hessian matrix -H−1 ∈ R -n×n where n ∈ N is typically n > 106 -. -A much simpler and computationally cheaper pruning criterion is the weight magnitude. -[HPTD15] prunes all weights w which are below a threshold θ: -w ← - - - -w if w ≥ θ -0 otherwise -3.3. Genetic approaches -The general idea of genetic algorithms (GAs) is to encode the solution space as genes, which -can recombine themselves via crossover and inversion. An introduction to such algorithms -is given in [ES03]. -Commonly used techniques to generate neural networks by GAs are NEAT [SM02] and its -successors HyperNEAT [SDG09] and ES-HyperNEAT [RLS10]. -The results, however, are of unacceptable quality: On MNIST (see Appendix E), where -random chance gives 10 % accuracy, even simple topologies trained with SGD achieve -about 92 % accuracy [TF-16a] and state of the art is 99.79 % [WZZ+13], the HyperNEAT -algorithm achieves only 23.9 % accuracy [VH13]. -Kocmánek shows in [Koc15] that HyperNEAT approaches can achieve 96.47 % accuracy -on MNIST. Kocmánek mentions that HyperNEAT becomes slower with each hidden layer -so that not more than three hidden layers could be trained. At the same time, VGG￾19 [SZ14] already has 19 hidden layers and ResNets are successfully trained with 1202 layers -in [HZRS15a]. -[LX17] shows that Genetic algorithms can achieve competitive results on MNIST and -SVHN, but the best results on CIFAR-10 were 7.10 % error whereas the state of the art is -at 3.74 % [HLW16]. Similarly, the Genetic algorithm achieves 29.03 % error on CIFAR-100, -but the state of the art is 17.18 % [HLW16]. -3.4. Reinforcement Learning -Reinforcement learning is a sub-field of machine learning, which focuses on the question -how to choose actions that lead to high rewards. -30 -3.5. Convolutional Neural Fabrics -One can think of the search for good neural network topologies as a reinforcement learning -problem. The agent is a recurrent neural network which can generate bitstrings. Those -variable-length bitstrings encode neural network topologies. -In 2016, this approach was applied to construct neural networks for computer vision. -In [BGNR16], Q-learning with an ε-greedy exploration was applied. -In [ZL16], the REINFORCE algorithm from [Wil92] was used to train state of the art models -for CIFAR-10 and the Penn Treebank dataset. A drawback of this method is that enormous -amounts of computational resources were used to obtain those results. -3.5. Convolutional Neural Fabrics -Convolutional Neural Fabrics are introduced in [SV16]. They side-step hard decisions -about topologies by learning an ensemble of different CNN architectures. The idea is to -define a single architecture as a trellis through a 3D grid of nodes. Each node represents a -convolutional layer. One dimension is the index of the layer, the other two dimensions are -the amount of filters and the feature size. Each node is connected to nine other nodes and -thus represents nine possible choices of convolutional layers: -• Resolution: (i) convolution with stride=1 or (ii) convolution with stride=2 or -(iii) deconvolution (doubling the resolution) -• Channels: (i) half the number of filters than the layer before (ii) the same number -of filters as the layer before (iii) double the number of filters than the layer before -They always use ReLU as an activation function and they always use filters of size 3 × 3. -They don’t use pooling at all. -31 -3. Topology Learning -32 -4. Hierarchical Classification -Designing a classifier for a new dataset is hard for two main reasons: Many design choices are -not clearly superior to others and evaluating one design choice takes much time. Especially -CNNs are known to take several days [KSH12, SLJ+15] or even weeks [SZ14] to train. -Additionally, some methods for analyzing a dataset become harder to use with more classes -and more training samples. Examples are t-SNE, the manual inspection of errors and -confusion matrices, and the argmax method. -One idea to approach this problem is by building a hierarchy of classifiers. The root -classifier distinguishes clusters of classes, whereas the leaf classifiers distinguish single -classes. Figure 4.1 gives an example for an hierarchy of classifiers. -Figure 4.1.: Example for a hierarchy of classifiers. Each classifier is visualized by a rounded rectangle. -The root classifier C0 has to distinguish six coarse classes (pedestrian, four+-wheelers, -traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C0 predicts a -pedestrian, another classifier has to predict if it is an adult or a child. Similar, if C0 -predicts traffic sign, then another classifier has to predict if it is a speed limit, a -sign indicating danger or something else. If C0, however, predicts road, then no other -classifier will become active. -In this example, the problem has 17 classes. The hierarchical approach introduces -7 clusters of classes and thus uses 8 classifiers. -Such a hierarchy of classifiers needs clusters of classes. -33 -4. Hierarchical Classification -4.1. Advantages of classifier hierarchies -Having a classifier hierarchy has five advantages: -• Division of labor: Different teams can work together. Instead of having a monolithic -task, the solutions can be combined. -• Guarantees: Changing a classifier will only change the prediction of itself and its -children. Siblings are not affected. In the example from Figure 4.1, the classifier -which distinguishes traffic signs can be changed while the classification as pedestrian, -four+-wheelers, traffic sign, street, other will not be affected. Also, the -classification between speed limits, danger signs and other signs will not change. -• Faster training: Except for the root classifier C0, each other classifier will have -less than the total amount of training data. Depending on the combined classes, the -models could also be simpler. Hence the training time is reduced. -• Weighting of errors: In practice, some errors are more severe than others. For -example, it could be acceptable if the two-wheelers classifier has an error rate of -40 %. But it is not acceptable if the speed limit classifier has such a high error rate. -• Post-hoc explanations: The simpler a model is, the easier it is to explain why a -classification is made the way it is made. -4.2. Clustering classes -There are two ways to cluster classes: By similarity or by semantics. While semantic -clustering needs either additional information or manual work, the similarity can be -automatically inferred from the data. As pointed out in [XZY+14], semantically similar -classes are often also visually similar. For example, in the ImageNet dataset most dogs -are semantically and visually more similar to each other than to non-dogs. An example -where this is obviously not the case are symbols: The summation symbol \sum is identical -in appearance to the Greek letter \Sigma, but semantically much closer to the addition -operator +. -One approach to cluster classes by similarity is to train a classifier and examine its -predictions. Each class is represented in the confusion matrix by one row. Those rows -can be directly with standard clustering algorithms such as k-means, DBSCAN [EKS+96], -OPTICS [ABKS99], CLARANS [NH02], DIANA [KR09], AHC (see [HPK11]) or spectral -clustering as in [XZY+14]. Those clusterings, however, are hard to interpret and most of -them do not allow a human to improve the found clustering manually. -The confusion matrix (c)ij ∈ N -k×k -states how often class i was present and class j was -34 -4.2. Clustering classes -predicted. The more often this confusion happens, the more similar those two classes are to -the classifier. Based on the confusion matrix, the classes can be clustered as explained in -the following. -[HAE16] indicates that more classes make it easier to generalize, but the accuracy gains -diminish after a critical point of classes is reached. Hence a binary tree might not be a -good choice. As an alternative, an approach which allows building arbitrary many clusters, -is proposed. -The proposed algorithm has two main ideas: -• The order of columns and rows in the confusion matrix is arbitrary. This means one -can swap rows and columns. If row i and j are swapped, then the columns i and j -have to be swapped to in order to keep the same confusion matrix. -• If two classes are confused often, then they are similar to the classifier. -Hence the order of the classes is permutated in such a way that the highest errors are close -to the diagonal. One possible objective function to be minimized is -f(C) = Xn -i=1 -Xn -j=1 -Cij · |i − j| [4.1] -which punishes errors linearly with the distance to the diagonal. This method is called CMO -in the following. -As pointed out by Tobias Ribizel (personal communication), this optimization problem -is a weighted version of Optimal Linear Arrangement problem. That problem is NP￾complete [GJ02, GJS76]. Simulated Annealing as described in Algorithm 1, however, -produces reasonable clusterings as well as visually appealing confusion matrices. The -algorithm works as follows: First, decide with probability 0.5 if only two random rows are -swapped or a block is swapped. If two rows are swapped, choose both of them randomly. -If a block is swapped, then choose the start randomly and the end of the block randomly -after the start. The insert position has to be a valid position considering the block length, -but besides that it is also chosen uniformly random. -Simple row-swapping can exploit local improvements. For example, in the context of -ImageNet, it can swap the dog-class Silky Terrier to the dog-class Yorkshire terrier -and both dog classes Dalmatian and Greyhound next to each other. Both the two clusters -of dog breeds could be separated by car and bus due to random chance. Moving any single -class increases the score, but moving either one of the dog breed clusters or the vehicle -cluster decreases the score. Hence it is beneficial to implement block moving. -One advantage of permutating the classes in order to minimize Equation (4.1) in comparison -to spectral clustering as used in [XZY+14] is that the adjusted confusion matrix can be -35 -4. Hierarchical Classification -split into many much smaller matrices along the diagonal. In the case of many classes (e.g., -1000 classes of ImageNet or 369 classes of HASYv2) this permutation makes it possible to -visualize the types of errors made. If the errors are systematic due to visual similarity, many -confusions are not made and thus many elements of the confusion matrix are close to 0. -Those will be moved to the corners of the confusion matrix by optimizing Equation (4.1). -Once a permutation of the classes is found which has a low score Equation (4.1), the clusters -can either be made by hand by deciding why classes should not be in one clusters. With -such a permutation, only n − 1 binary decisions have to be made and hence only the list of -classes has to be read. Alternatively, one can calculate the confusions C -0 -i,i+1 + C -0 -i+1,i for -each pair of classes which are neighbors in the confusion matrix. The higher this value, the -more similar are the classes according to the classifier. Hence a threshold θ can be applied. -θ can either be set automatically (e.g., such that 10 % of all pairs are above the threshold) -or semi-automatically by asking the user for information if two classes belong to the same -cluster. Such an approach only needs log(n) binary decisions from the user where n is the -number of classes. -Please note that CMO only works if the classifier is neither too bad nor too good. A classifier -which does not solve the task at all might just give almost uniform predictions whereas the -confusion matrix of an extremely good classifier is almost diagonal and thus contains no -information about the similarity of classes. One possible solution to this problem is to take -the prediction of the class in contrast to using only the argmax in order to find a useful -permutation. -36 -5. Experimental Evaluation -All experiments are implemented using Keras 2.0 [Cho15] with Tensorflow 1.0 [AAB+16] -and cuDNN 5.1 [CWV+14] as the backend. The experiments were run on different machines -with different Nvidia graphics processing units (GPUs), including the Titan Black, GeForce -GTX 970 and GeForce 940MX. -The GTSRB [SSSI12], SVHN [NWC+11b], CIFAR-10 and CIFAR-100 [Kri], MNIST [YL98], -HASYv2 [Tho17a], STL-10 [CLN10] dataset are used for the evaluation. Those datasets are -used as their size is small enough to be trained within a day. Other classification datasets -which were considered are listed in Appendix E. -CIFAR-10 (Canadian Institute for Advanced Research 10) is a 10-class dataset of color -images of the size 32 px × 32 px. Its ten classes are airplane, automobile, bird, cat, deer, -dog, frog, horse, ship, truck. The state of the art achieves an accuracy of 96.54 % [HLW16]. -According to [Kar11], human accuracy is at about 94 %. -CIFAR-100 is a 100-class dataset of color images of the size 32 px × 32 px. Its 100 classes -are grouped to 20 superclasses. It includes animals, people, plants, outdoor scenes, vehicles -and other items. CIFAR-100 is not a superset of CIFAR-10, as CIFAR-100 does not contain -the class airplane. The state of the art achieves an accuracy of 82.82 % [HLW16]. -GTSRB (German Traffic Sign Recognition Benchmark) is a 43-class dataset of traffic signs. -The 51 839 images are in color and of a minimum size of 25 px×25 px up to 266 px×232 px. -The state of the art achieves 99.46 % accuracy with an ensemble of 25 CNNs [SL11]. -According to [SSSI], human performance is at 98.84 %. -HASYv2 (Handwritten Symbols version 2) is a 369 class dataset of black-and-white images -of the size 32 px × 32 px. The 369 classes contain the Latin and Greek letters, arrows, -mathematical symbols. The state of the art achieves an accuracy of 82.00 % [Tho17a]. -STL-10 (self-taught learning 10) is a 10-class dataset of color images of the size 96 px×96 px. -Its ten classes are airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck. The state -of the art achieves an accuracy of 74.80 % [ZMGL15]. It contains 100 000 unlabeled images -for unsupervised training and 500 images per class for supervised training. -SVHN (Street View House Numbers) exists in two formats. For the following experiments, -the cropped digit format was used. It contains the 10 digits cropped from photos of Google -Street View. The images are in color and of size 32 px × 32 px. The state of the art -37 -5. Experimental Evaluation -achieves an accuracy of 98.41 % [HLW16]. According to [NWC+11a], human performance -is at 98.0 %. -As a preprocessing step, the pixel-features were divided by 255 to obtain values in [0, 1]. -For GTSRB, the training and test data was scaled to 32 px × 32 px. -5.1. Baseline Model and Training setup -The baseline model is trained with Adam [KB14], an initial learning rate of 10−4 -, a batch -size of 64 for at most 1000 epochs with data augmentation. The kind of data augmentation -depends on the dataset: -• CIFAR-10, CIFAR-100 and STL-10: Random width and height shift by at most -±3 pixels in either direction; Random horizontal flip. -• GTSRB, MNIST: Random width and height shift by at most ±5 pixels in either -direction; random rotation by at most ±15 degrees; random channel shift; random -zoom in [0.5, 1.5]; random shear by at most 6 degrees. -• HASYv2: Random width and height shift by at most ±5 pixels in either direction; -random rotation by at most ±5 degree. -• SVHN: No data augmentation. -If the dataset does not define a training/test set, a stratified 67 % / 33 % split is applied. If -the dataset does not define a validation set, the training set is split in a stratified manner -into 90 % training set / 10 % test set. -Early stopping [Pre98] with the validation accuracy as a stopping criterion and a patience of -10 epochs is applied. After this, the model is trained without data augmentation for at most -1000 epochs with early stopping and the validation accuracy as a stopping criterion and a -patience of 10 epochs. Kernel weights are initialized according to the uniform initialization -scheme of He [HZRS15b] (see Appendix B.3). -The architecture of the baseline model uses a pattern of -Conv-Block(n) = (Convolution − Batch Normalization − Activation) -n − Pooling -The activation function is the Exponential Linear Unit (ELU) (see Table B.3), except for -the last layer where softmax is used. Before the last two convolutional layer, a dropout -layer with dropout probability 0.5 is applied. The architecture is given in detail in Table 5.1. -Please note that the number of input- and output channels of the network depends on -the dataset. If the input image is larger than 32 px × 32 px, for each power of two a -Conv-Block(2) is added at the input. For MNIST, the images are bilinearly upsampled to -32 px × 32 px. -38 -5.1. Baseline Model and Training setup -# Type Filters @ -Patch size / stride -Parameters FLOPs Output size -Input 0 0 3 @ 32 × 32 -1 Convolution 32 @ 3 × 3 × 3 / 1 896 1 736 704 32 @ 32 × 32 -2 BN + ELU 64 163 904 32 @ 32 × 32 -3 Convolution 32 @ 3 × 3 × 32 / 1 9 248 18 841 600 32 @ 32 × 32 -4 BN + ELU 64 163 904 32 @ 32 × 32 -Max pooling 2 × 2 / 2 0 40 960 32 @ 16 × 16 -5 Convolution 64 @ 3 × 3 × 32 / 1 18 496 9 420 800 64 @ 16 × 16 -6 BN + ELU 128 82 048 64 @ 16 × 16 -7 Convolution 64 @ 3 × 3 × 64 / 1 36 928 18 857 984 64 @ 16 × 16 -8 BN + ELU 128 82 048 64 @ 16 × 16 -Max pooling 2 × 2 / 2 20 480 64 @ 8 × 8 -9 Convolution 64 @ 3 × 3 × 64 / 1 36 928 4 714 496 64 @ 8 × 8 -10 BN + ELU 128 20 608 64 @ 8 × 8 -Max pooling 2 × 2 / 2 5 120 64 @ 4 × 4 -11 Convolution (v) 512 @ 4 × 4 × 64 / 1 524 800 1 048 064 512 @ 1 × 1 -12 BN + ELU 1 024 3 584 512 @ 1 × 1 -Dropout 0.5 0 0 512 @ 1 × 1 -13 Convolution 512 @ 1 × 1 × 512 / 1 262 656 523 776 512 @ 1 × 1 -14 BN + ELU 1 024 3 584 512 @ 1 × 1 -Dropout 0.5 0 0 512 @ 1 × 1 -15 Convolution k @ 1 × 1 × 512 / 1 k · (512 + 1) 1024 · k k @ 1 × 1 -Global avg Pooling 1 × 1 0 k k @ 1 × 1 -16 BN + Softmax 2k 7k k @ 1 × 1 -P 515k -+892 512 -1032k -+55 729 664 103 424+2k -Table 5.1.: Baseline architecture with 3 input channels of size 32 × 32. All convolutional layers -use SAME padding, except for layer 11 which used VALID padding in order to decrease -the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for -each power of two there are two Convolution + BN + ELU blocks and one Max pooling -block added. This is the framed part in the table. -32 × 32 -Input -C 32@3 × 3/1 -BN + ELU -C 32@3 × 3/1 -BN + ELU -16 × 16 -max pooling 2 × 2/2 -C 64@3 × 3/1 -BN + ELU -C 64@3 × 3/1 -BN + ELU -8 × 8 -max pooling 2 × 2/2 -C 64@3 × 3/1 -BN + ELU -4 × 4 -max pooling 2 × 2/2 -C 512@4 × 4/1 (V) -BN + ELU -Dropout, p = 0.5 -1 × 1 -C 512@1 × 1/1 -BN + ELU -Dropout, p = 0.5 -C k@1 × 1/1 -Global AVG pooling -BN + Softmax -Figure 5.1.: Architecture of the baseline model. C 32@3×3/1 is a convolutional layer with 32 filters -of kernel size 3 × 3 with stride 1. -39 -5. Experimental Evaluation -5.1.1. Baseline Evaluation -The results for the baseline model evaluated on eight datasets are given in Table 5.2. The -speed for inference for different GPUs is given in Table 5.3. -Dataset Single Model Accuracy Ensemble of 10 -Training Set Test Set Training Set Test Set -Asirra 94.22 % σ = 3.49 94.37 % σ = 3.47 97.07 % 97.37 % -CIFAR-10 91.23 % σ = 1.10 85.84 % σ = 0.87 92.36 % 86.75 % -CIFAR-100 76.64 % σ = 1.48 63.38 % σ = 0.55 78.30 % 64.70 % -GTSRB 100.00 % σ = 0.00 99.18 % σ = 0.11 100.00 % 99.46 % -HASYv2 89.49 % σ = 0.42 85.35 % σ = 0.10 89.94 % 86.03 % -MNIST 99.93 % σ = 0.07 99.53 % σ = 0.06 99.99 % 99.58 % -STL-10 94.12 % σ = 0.87 75.67 % σ = 0.34 96.35 % 77.62 % -SVHN 99.02 % σ = 0.07 96.28 % σ = 0.10 99.42 % 97.20 % -Table 5.2.: Baseline model accuracy on eight datasets. The single model actuary is the 10 models -used in the ensemble. The empirical standard deviation σ of the accuracy is also given. -CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the -models uses unlabeled data or data from other datasets. For HASYv2 no test time -transformations are used. -Network GPU Tensorflow Inference per Training -1 Image 128 images time / epoch -Baseline Default Intel i7-4930K 3 ms 244 ms 231.0 s -Baseline Optimized Intel i7-4930K 2 ms 143 ms 149.0 s -Baseline Default GeForce 940MX 4 ms 120 ms 145.6 s -Baseline Default GTX 970 6 ms 32 ms 25.0 s-26.3 s -Baseline Default GTX 980 3 ms 24 ms 20.5 s-21.1 s -Baseline Default GTX 980 Ti 5 ms 27 ms 22.0 s-22.1 s -Baseline Default GTX 1070 2 ms 15 ms 14.4 s-14.5 s -Baseline Default Titan Black 4 ms 25 ms 28.1 s-28.1 s -Baseline Optimized Titan Black 3 ms 22 ms 24.4 s-24.4 s -DenseNet-40-12 Default GeForce 940MX 27 ms 2403 ms — -Table 5.3.: Speed comparison of the baseline model on CIFAR-10. The baseline model is evaluated on -six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken from [Maj17]. -Weights the baseline model can be found at [Tho17b]. The optimized Tensorflow build -makes use of SSE4.X, AVX, AVX2 and FMA instructions. -40 -5.1. Baseline Model and Training setup -5.1.2. Weight distribution -The distribution of filter weights by layer is visualized in Figure 5.2 and the distribution -of bias weights by layer is shown in Figure 5.3. Although both figures only show the -distribution for one specific model trained on CIFAR-100, the following observed patterns -are consistent for 70 models (7 datasets and 10 models per dataset): -• The empiric [0.5 − percentile, 99.5 − percentile] interval which contains 99 % of the -filter weights is almost symmetric around zero. The same is true for the bias weights. -• The farther a layer is from the input away, the smaller the 99-percentile interval is, -except for the last layer (see Table A.1). -• The 99-percentile interval of the first layers filter weights is about [−0.5, +0.5], except -for MNIST and HASYv2 where it is in [−0.8, 0.8]. -• The 99-percentile interval of the first layers bias weights is always in [−0.2, 0.2]. -• The distribution of filter weights of the last convolutional layer is not symmetric. In -some cases the distribution is also not unimodal. -• The bias weights of the last three layers are very close to zero. The absolute value of -most of them is smaller than 10−2 -. -Similarly, Figure 5.4 and Figure 5.5 show the distribution of the γ and the β parameter of -Batch Normalization. It is expected that γ is close to 1 and β is close to 0. In those cases, -the Batch Normalization layer equals the identity and thus is only relevant for the training. -While γ and β do not show as clear patterns as the filter and bias weights of convolutional -layers, some observations are also consistent through all models even for different datasets: -• γ of the last layer (layer 16) is bigger than 1.3. -• The 99-percentile interval for β of the last layer is longer than the other 99-percentile -intervals. -• The 99-percentile interval for β of the fourth-last (layer 14 for STL-10, layer 10 for -all other models) is more negative then all other layers. -Finally, the distribution of filter weight ranges is plotted in Figure 5.6 for each convolutional -layer. The ranges are calculated for each channel and filter separately. The smaller the -values are, the less information is lost if the filters are replaced by smaller filters. -41 -5. Experimental Evaluation -Figure 5.2.: Violin plots of the distribution of filter weights of a baseline model trained on CIFAR￾100. The weights of the first layer are relatively evenly spread in the interval [−0.4, +0.4]. -With every layer the interval which contains 95 % of the weights and is centered around -the mean becomes smaller, especially with layer 11 where the feature maps are of -size 1 × 1. In contrast to the other layers, the last convolutional layer has a bimodal -distribution. -This plot indicates that the network might benefit from bigger filters in the first layer, -whereas the filters in layers 7 – 11 could potentially be smaller. -Figure 5.3.: Violin plots of the distribution of bias weights of a baseline model trained on CIFAR-100. -While the first layers biases are in [−0.1, +0.1], after each max-pooling layer the interval -which contains 95 % of the weights and is centered around the mean becomes smaller. -In the last three convolutional layer, most bias weights are in [−0.005, +0.005]. -42 -5.1. Baseline Model and Training setup -Figure 5.4.: Violin plots of the distribution of the γ parameter of Batch Normalization layers of a -baseline model trained on CIFAR-100. -Figure 5.5.: The distribution of the β parameter of Batch Normalization layers of a baseline model -trained on CIFAR-100. -43 -5. Experimental Evaluation -Figure 5.6.: The distribution of the range of values (max - min) of filters by channel and layer. For -each filter, the range of values is recorded by channel. The smaller this range is, the -less information is lost if a n × n filter is replaced by a (n − 1) × (n − 1) filter. -44 -5.1. Baseline Model and Training setup -5.1.3. Training behavior -Due to early stopping, the number of epochs which a model was trained differ. The number -of epochs trained with augmentation ranged from 133 epochs to 182 epochs with a standard -deviation of 17.3 epochs for CIFAR-100. -Figure 5.7 shows the worst and the best validation accuracy during the training with -augmented data. Different initializations lead to very similar validation accuracies during -training. The image might lead to the wrong conclusion that models which are better at -the start are also better at the end. In order to check this hypothesis, the relative order of -validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering -stays approximately the same, then it can be considered to run the first few epochs many -times and only train the best models to the end. For 10 models, there can be 102−10 -2 = 45 -pair-wise changes in the ordering at maximum if the relative order of validation accuracies -is reversed. For the baseline model, 21.8 changes in the relative order of accuracies occurred -in average for each pair of epochs (i, i + 1). This means if one knows only the relative order -of the validation accuracy of two models m and m0 -in epoch i, it is doubtful if one can -make any statement about the ordering of m and m0 -in epoch i + 1. -0 -10 -20 -30 -40 -50 -60 -70 -80 -90 -100 -110 -120 -130 -140 -0.2 -0.3 -0.4 -0.5 -0.6 -0.7 -epoch -validation accuracy -maximum validation accuracy -minimum validation accuracy -1.5 -2 -2.5 -3 -3.5 -4 -4.5 -loss -maximum validation accuracy -minimum validation accuracy -mean loss -Figure 5.7.: Minimum and maximum validation accuracy of the 10 trained models by epoch. The -differences do not exceed 1 % and does not increase by training epoch. Four models -stopped the first training stage at epoch 133 which causes the shift in the loss and the -maximum validation accuracy. -Figures 5.8 to 5.10 show how the weights changed while training on CIFAR-100. It was -expected that the absolute value of weight updates during epochs (sum, max, and mean) -decrease in later training stages. The intuition was that weights need to be adjusted in a -coarse way first. After that, the intuition was that only slight modifications are applied by -45 -5. Experimental Evaluation -the SGD based training algorithm (ADAM). The mean, max and sum of weight updates as -displayed in Figures 5.8 to 5.10, however, do not show such a clear pattern. The biggest -change happens as expected in the first epoch after the weights are initialized. The change -from augmented training to non-augmented training was at epoch 156 to epoch 157 -It can be observed, that layers which receive more input feature maps get larger weight -updates in mean. As layers which are closer to the output take more input feature maps, -their weight updates are larger. This pattern does not occur when SGD is used as the -optimizer. -Figure 5.8.: Mean weight updates of the baseline model between epochs by layer. -46 -5.1. Baseline Model and Training setup -Figure 5.9.: Maximum weight updates of the baseline model between epochs by layer. -Figure 5.10.: Sum of weight updates of the baseline model between epochs by layer. -47 -5. Experimental Evaluation -5.2. Confusion Matrix Ordering -The visualization of the confusion matrix can give valuable information about which part -of the task is hard. For more than about 10 classes, however, it becomes hard to visualize -and read. -For CIFAR-10, the proposed method groups the four object classes and the six animal -classes together (see Figure 5.11a). -(a) CIFAR-10 Test set (b) Random -Figure 5.11.: Figure 5.11a shows an ordered confusion matrix of the CIFAR-10 dataset. The diagonal -elements are set to 0 in order to make other elements easier to see. -Figure 5.11b shows a confusion matrix with random mistakes. -The first image of Figure 5.12 shows one example of a classifier with only 97.13 % test -accuracy where a good permutation was found. Please note that this is not the best classifier. -The confusion matrix which resulted from a baseline classifier with 99.32 % test accuracy is -displayed in as the second image. -Those results suggest that the ordering of classes is a valuable tool to make patterns easier -to see. Humans, however, are good at finding patterns even if they come from random noise. -Hence, for comparison, a confusion matrix of a classifier with 30 classes, 60 % accuracy -and 40 % uniformly random errors of a balanced dataset is created, optimized according to -Equation (4.1) and shown in Figure 5.11b. It clearly looks different than Figure 5.11a. -On the HASYv2 dataset the class-ordering is necessary to see anything as most possible -confusions do not happen. See Figure 5.13 for comparison of the first 50 classes of the -unsorted confusion matrix and the sorted confusion matrix. If confusion matrices of a -maximum size of 50 × 50 are displayed, the ordered method can show only 8 matrices -because the off-diagonal matrices are almost 0. Without sorting, 64 matrices have to be -displayed. -48 -5.2. Confusion Matrix Ordering -Figure 5.12.: The first image shows the confusion matrix for the test of GTSRB set after optimization -to Equation (4.1). The diagonal elements are set to 0 in order to make other elements -easier to see. The symbols next to the label on the vertical axis indicate the shape -and the color of the signs. -The second image shows the same, but with baseline model. -Best viewed in electronic form. 49 -Figure 5.13.: The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal -elements are set to 0 in order to make other elements easier to see. The top image -shows arbitrary class ordering, the bottom image shows the optimized ordering. -5.3. Spectral Clustering vs CMO -5.3. Spectral Clustering vs CMO -This section evaluates the clustering quality of CMO in comparison to the clustering quality -of spectral clustering. -The evaluated model achieves 70.50 % training accuracy and 53.16 % test accuracy on -CIFAR-100. Figure 5.14 shows the sorted confusion matrix. -Figure 5.14.: The first 50 entries of the ordered confusion matrix of the CIFAR-100 dataset. The -diagonal elements are set to 0 in order to make other elements easier to see. Best -viewed in electronic form. -CIFAR-100 has pre-defined coarse classes. Those are used as a ground truth for the clusters -which are to be found. The number of errors is determined by (i) Join all n clusters which -contain the classes of the coarse class C to a set M. The error is n. (ii) Within M, find the -set of classes M− which do not belong to C. (iii) The final error is n + |M−|. As can be -seen in Table 5.4, both clustering methods find reasonable clusters. CMO, however, has -only half the error of spectral clustering. -The results for the HASYv2 dataset are qualitatively similar (see Table 5.5). It should be -noted that the number of clusters was determined by using the semi-automatic method -based on CMO as described in Section 4.2. -51 -5. Experimental Evaluation -Cluster Spectral clustering Errors CMO Errors -fish aquarium fish, orchid + flatfish -+ ray, shark + trout, lion -5 aquarium fish, orchid + flatfish -+ ray + shark, trout -4 -flowers orchid, aquarium fish + sun￾flower + poppy, tulip + rose, -train -5 orchid, aquarium fish + sun￾flower, poppy, tulip, rose -2 -people baby, boy, man + girl + woman 2 baby, boy, girl, woman, man 0 -reptiles crocodile, plain, road, table, -wardrobe + dinosaur + lizard -+ snake, worm + turtle -9 crocodile, lizard, lobster, cater￾pillar + dinosaur + snake + tur￾tle, crab -6 -trees maple, oak, pine + willow, forest -+ palm -3 palm, willow, pine, maple, oak 0 -Total 24 12 -Table 5.4.: Differences in spectral clustering and CMO. Classes in a cluster are separated by , -whereas clusters are separated by +. -Cluster Spectral clustering Errors CMO Errors -A A, A, A 0 A, A, A , Å 1 -B B, B 0 B, B 0 -C C, c, ⊂ and C , ξ, E and C 4 C, c, ⊂, C and C 1 -D D, D, D, . 1 D, D, D 0 -E E and E, ε 2 E and E, ε, , ∈ 4 -F F and F, F 1 F and F, F 1 -H H and H , κ and H 3 H and H, H 1 -K K, κ 0 K, κ 0 -L L, b and L, L 1 L, b and L, L 1 -M M and M and M 2 M and µ, M and M 3 -N N and N, N and N 2 N and N, N and N , ℵ 3 -O O, O, 0, ◦, °, # and o 1 O, O, 0, ◦, ° and # and o 2 -P P, P and p, ρ and P and ℘ 3 P and P, P, ℘ and p, ρ 2 -Q Q, Q, Q, ι, t, &, `, =, Æ, 1 7 Q and Q, Q 1 -R R, R and R, R, k and < 3 R and <, R, R, R 1 -S S, s, S 0 S, s, S 0 -T T, > and T , τ 1 T, > and T , τ 1 -U U, ∪ and u, U, A 1 U, u, U, A and ∪ 2 -V V , v, ∨ 0 V , v, ∨ 0 -W W, w, ω 0 W, w and ω 1 -X X, x, X , χ, × 0 X, x, X , χ, × 0 -Y Y and y 1 Y , y 0 -Z Z, z, Z and Z, Z 1 Z, z, Z, Z, Z 0 -Total 34 25 -Table 5.5.: Differences in spectral clustering and CMO. -52 -5.4. Hierarchy of Classifiers -5.4. Hierarchy of Classifiers -In a first step, a classifier is trained on the 100 classes of CIFAR-100. The fine-grained root -classifier achieves an accuracy of 65.29 % with test-time transformations. The accuracy on -the found sub-classes are listed in Table 5.6. The fact that the root classifier achieves better -results within a cluster than the specialized leaf classifiers in 13 of 14 cases could either -be due to limited training data, overfitting or the small size of 32 px × 32 px of the data. -The experiment also shows that most of the errors are due to not identifying the correct -cluster. Hence, in this case, more work in improving the root classifier is necessary rather -than improving the discrimination of classes within a cluster. -Although the classes within a cluster capture most of the classifications, many misclassifica￾tions happen outside of the clusters. For example, in cluster 3, a perfect leaf classifier would -push the accuracy in the full column only to 63.50 % due to errors of the root classifier -where the root classifier does not predict the correct cluster. -The leaf classifiers use the same topology as the root classifier. By initializing them with -the root classifiers weights their performance can be pushed at about the inner accuracy. -They are, however, only useful if their accuracy is well above the inner accuracy of the root -classifier. Hence, for CIFAR-100, building hierarchies of classifiers is not useful. -Cluster Classes -accuracy -root classifier leaf classifier -cluster identified class identified | cluster class identified | cluster -1 3 69.67 % 84.27 % 72.98 % -2 5 46.60 % 58.54 % 43.47 % -3 2 58.50 % 92.13 % 83.46 % -4 2 50.50 % 87.83 % 81.74 % -5 3 44.67 % 79.29 % 71.01 % -6 2 29.50 % 78.67 % 72.00 % -7 2 52.50 % 92.11 % 87.72 % -8 2 59.50 % 86.23 % 81.88 % -9 2 59.00 % 90.08 % 87.79 % -10 2 62.00 % 85.52 % 73.10 % -11 2 67.00 % 87.01 % 75.32 % -12 2 72.50 % 94.77 % 76.77 % -13 2 64.00 % 82.58 % 86.27 % -14 2 79.67 % 89.85 % 89.10 % -Table 5.6.: Accuracies of the root classifier trained on the full set of 100 classes evaluated on -14 clusters of classes. Each class has 100 elements to test. The column cluster identified -gives the percentage that the root classifiers argmax prediction is within the correct -cluster, but not necessarily the correct class. The columns class identified | cluster only -consider data points where the root classifier correctly identified the cluster. -53 -5. Experimental Evaluation -5.5. Increased width for faster learning -More filters in one layer could simplify the optimization problem as each filter needs smaller -updates. Hence a CNN N with ni filters in layer i is expected to take more epochs than a -CNN N0 with 2 · ni filters in layer i to achieve the same validation accuracy. -This hypothesis can be falsified by training a CNN N and a CNN N0 and comparing the -trained number of epochs. As more filters can lead to different results depending on the -layer where they are added, five models are trained. The details about those models are -given in Table 5.7 -Name Layer Filter count Total -Baseline New parameters -m9 9 64 638 5 978 566 -m0 -9 -9 64 974 8 925 622 -m11 11 512 3786 5 982 698 -m0 -11 11 512 1024 1 731 980 -m13 13 512 8704 5 982 092 -Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer -was increased. -The detailed results are given in Table 5.8. As expected, the number of training epochs of -the models with increased numbers of parameters is lower. The wall-clock time, however, is -higher due to the increase in computation per forward- and backward-pass. -For m9, m11 and m13, the filter weight range of the layer with increased capacity decreases -compared to Figure 5.6, the filter weights of the layer with increased capacity are more -concentrated around zero compared to Figure 5.2. For model m13, the distribution of -weight of the output layer changed to a more bell-shaped distribution. Except for this, the -distribution of filter weights in other layers did not change for all three models compared to -the baseline. -Model Parameters -Accuracy Training -Single Model Ensemble Mean Epochs Mean Time -Mean std -baseline 944 012 63.38 % 0.55 64.70 % 154.7 3856 s -m9 5 978 566 65.53 % 0.37 66.72 % 105.7 4472 s -m0 -9 -8 925 622 65.10 % 1.09 66.54 % 95.6 5261 s -m11 5 982 698 65.73 % 0.77 67.38 % 149.2 5450 s -m0 -11 1 731 980 62.12 % 0.48 62.89 % 143.6 3665 s -m13 5 982 092 62.39 % 0.66 63.77 % 147.8 4485 s -Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m9, m11, m13 -as well as their accuracies. -54 -5.6. Weight updates -5.6. Weight updates -Section 5.5 shows that wider networks learn faster. One hypothesis why this happens is -that every single weight updates can be smaller to learn the same function. Thus the loss -function is smoother and thus gradient descent based optimization algorithms lead to more -consistent weight updates. -Consequently, it is expected that layers with fewer filters have more erratic updates. If -there are many filters, the weights of a filter which does not contribute much to the end -results or is even harmful filter can gradually be set to zero, essentially removing one path -in the network. -In order to test the hypothesis, the baseline model was adjusted. The number of filters in -layer 5 was reduced from 64 filters to 3 filters. As one can see in Figure 5.15, the mean -weight update of the layers 1, 3, 5, 7 and 9 have a far bigger range than the layers 11, 13 and -15 after epoch 50. Compared to the baseline models mean updates (Figure 5.8, Page 46), -the mean weight updates of layers 1 and 3 are higher, the range of the mean weight update -from epoch 50 is higher for layer 5 and the range of mean updates of layer 7 is higher. -For the maximum and the sum, no similar pattern could be observed (see Figures A.3 -and A.4). -Figure 5.15.: Mean weight updates between epochs by layer. The model is the baseline model, but -with layer 5 reduced to 3 filters. -55 -5. Experimental Evaluation -5.7. Multiple narrow layers vs One wide layer -On a given feature map size one can have an arbitrary number of convolutional layers with -SAME padding and each layer can have an arbitrary number of filters. A convolutional layer -with more filters is called wider [ZK16], a convolutional layer with fewer filters is thus called -narrower and the number of filters in a convolutional layer is the layers width. -If the number of parameters which may be used for the feature map scale is fixed and high -enough, there are still many combinations. If ni with i = 0, . . . , k is the number of output -feature maps of layer i where i = 0 is the input layer and all filters are 3 × 3 filters without -a bias, then the number of parameters is -Parameters = -X -k -i=1 -￾ -(ni−1 · 3 -2 + 1) · ni - -Hence the width of one layer does not only influence the parameters in this layer, but also -in the next layer. -The number of possible subsequent layers of one feature map size is enormous, even if -constraints are placed on the number of parameters. For example, the first convolutional -layer of the baseline model has 896 parameters. If one assumes that less than 3 filters per -layer are not desirable, one keeps all layers having a bias and all layers only use 3 × 3 filters, -then the maximum depth is 10. If one furthermore assumes that at least 800 parameters -should be used, there are still 120 possible layer combinations. As experimentally evaluating -one layer combination takes about 10 hours on a GTX 970 for CIFAR-100 it is not possible -to evaluate all layer combinations. In the following, a couple of changes to the network -width / depth will be evaluated. -Each layer expands the perceptive field. Hence deeper layer can use more of the input for -every single output value. But deeper networks need more time for inference as the output -of layer i has to be computed before the output of i + 1 can be computed. Hence there is -less potential to parallelize computations. Each filter can be seen as a concept which can -be learned. The deeper the filter is in the network, the higher is the abstraction level of the -concept. In most cases, both is necessary: Many different concepts (width) and high-level -concepts (depth). -Reducing the two first convolutional layers of the baseline model (see Page 39) to one -convolutional layer of 48 filters (944 396 parameters in total, whereas the baseline model -has 944 012 parameters) resulted in a mean accuracy of 61.64 % (-1.74 %) and a standard -deviation of σ = 1.12 (+0.57). The ensemble achieved 63.18 % (-1.52 %). As expected, -the training time per epoch was reduced. For the GTX 980, it was reduced from 22.0 s of -the baseline model to 15 s of the model with one less convolutional layer, one less Batch -Normalization and one less activation layer. The inference time was also reduced from 6 ms -5 -5.8. Batch Normalization -to 4 ms for 1 image and from 32 ms to 23 ms for 128 images. Due to the loss in accuracy of -more then one percentage point of the mean model and the increased standard deviation of -the models performance, at least two convolutional layers are on the 32 px × 32 px feature -map scale are recommendable for CIFAR-100. -Changing the baseline to have less filters but more layers is another option. This was tried -for the first block at the 32 px × 32 px feature map scale. The two convolutional layers -(layers 1 – 4 in Page 39) were replaced by two convolutional layers with 27 filters and one -convolutional layer with 26 filters in the convolution - BN - ELU pattern. The model -has 944 132 parameters. Compared to the baseline model, the time for inference was the -same. This is unexpected, because the inference time changed when a layer was removed at -this scale. The mean test accuracy was 63.66 % (+0.28) and the standard deviation was -σ = 1.03 (+0.48). The ensemble achieved 64.91 % test accuracy (+0.21). -Having two nonlinearities at each feature map scale could be important to learn nonlinear -transformations at that scale. As the baseline model does only have one nonlinearity at the -8 × 8 feature maps scale, another convolutional layer with 64 filters, Batch Normalization -and ELU was added. To keep the number of parameters constant, layer 11 of the baseline -model was reduced from 512 filters to 488 filters. The new model achieves a mean accuracy -of 63.09 % (-0.29) with a standard deviation of σ = 0.70 (+0.15). The ensemble achieves -an accuracy of 64.39 % (+0.31). This could indicate that having two convolutional layers -is more important for layers close to the input than intermediate layer. Alternatively, the -parameters could be more important in layer 11 than having a new convolutional layer after -layer 9. -In order to control the hypothesis that having two convolutional layers are less important in -the middle of a network, the second convolutional layer at the 16 × 16 feature map scale is -removed. The first convolutional layer was increased from 32 filters to 59 filters, the second -convolutional layer was increased from 32 filter s to 58 filters in order to keep the amount of -parameters of the model constant. The adjusted model achieved 62.72 % (-0.66) mean test -accuracy with a standard deviation of σ = 0.84 (+0.29). The ensemble achieved 63.88 % -test accuracy (-0.66). -Even more extreme, if both convolutional layers are removed from the 16 × 16 feature map -scale, the mean test accuracy drops to 61.21 % (-2.17) with a standard deviation of σ = 0.51 -(-0.04). The ensemble achieves a test accuracy of 63.07 % (-1.63). Thus it is very important -to have at least one convolutional layer at this feature map scale. -5.8. Batch Normalization -In [CUH15], the authors write that Batch Normalization does not improve ELU networks. -Hence the effect of removing Batch Normalization from the baseline is investigated in this -57 -5. Experimental Evaluation -experiment. -As before, 10 models are trained on CIFAR-100. The training setup and the model mno-bn -are identical to the baseline model m, except that in mno-bn the Batch Normalization layers -are removed. -One notable difference is the training time: While m needs 21 ms per epoch in average on -a GTX 980, mno-bn only needs 21 ms per epoch. The number of epochs used for training, -however, also increased noticeably from 149 epochs to 178 epochs in average. The standard -deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs for mno-bn. -The mean accuracy of mno-bn is 62.86 % and hence 0.52 percentage points worse. The -standard deviation between models increased from 0.55 to 0.61. This is likely a result of the -early stopping policy and the differences in training epochs. This can potentially be fixed -by retraining the models which stopped earlier than the model which was trained for the -biggest amount of epochs. The ensemble test accuracy is 63.88 % and hence 0.82 percentage -points worse than the baseline. -The filter weight range and distribution is approximately the same as Figure 5.6 and -Figure 5.2, but the distribution of bias weights changed noticeably: While the bias weights of -the baseline are spread out in the first layer and much more concentrated in subsequent layers -(see Figure 5.3), the model without Batch Normalization has rather concentrated weights -in the first layers and only the bias weights of the last layer is spread out (see Figure A.2). -Another model m0 -no-bn which has one more filter in the convolutional layer 1, 3, 5, and 7 to -compensate for the loss of parameters in Batch Normalization. The mean test accuracy of -10 such models is 62.87 % which is 0.51 percentage points worse than the baseline. The -ensemble of m0 -no-bn achieves 64.33 % which is 0.37 percentage points worse than the baseline. -The mean training time was 14 s per epoch and 157.4 epochs with a standard deviation of -20.7 epochs. -Hence it is not advisable to remove Batch Normalization for the final model. It could, -however, be possible to remove Batch Normalization for the experiments to iterate quicker -through different ideas if the relative performance changes behave the same with or without -Batch Normalization. +e +b +kh +2X +c +iy=1−d kh +2 +e +X +d +ic=1 +I(x + ix, y + iy, ic) · F(ix, iy, ic) +This procedure is explained by Figure 2.1. It is essentially a discrete convolution. +I ∈ R +7×7 +Filter kernel +F ∈ R +3×3 +Result of point-wise +multiplication +I +0 ∈ R7×7 +104 +116 +116 +112 58 -5.9. Batch size -5.9. Batch size -The mini-batch size m ∈ N≥1 influences -• Epochs until convergence: The smaller m, the more often the model is updated -in one epoch. Those updates, however, are based on fewer samples of the dataset. -Hence the gradients of different mini-batches can noticeably differ. In the literature, -this is referred to as gradient noise [KMN+16]. -• Training time per epoch: The smaller the batch size, the higher the training time -per epoch as the hardware is not optimally utilized. -• Resulting model quality: The choice of the hyperparameter m influences the -accuracy of the classifier when training is finished. [KMN+16] supports the view that -smaller m result in less sharp minima. Hence smaller m lead to better generalization. -Empiric evaluation results can be found in Table 5.9. Those results confirm the claim -of [KMN+16] that lower batch sizes generalize better. -m -Training -Epochs -Mean total Single model Ensemble -time training time Accuracy std Accuracy -8 118 s -epoch 81 – 153 14 131 s 61.93 % σ = 1.03 65.68 % -16 62 s -epoch 103 – 173 8349 s 64.16 % σ = 0.81 66.98 % -32 35 s -epoch 119 – 179 5171 s 64.11 % σ = 0.75 65.89 % -64 25 s -epoch 133 – 195 2892 s 63.38 % σ = 0.55 64.70 % -128 18 s -epoch 145 – 239 3126 s 62.23 % σ = 0.73 63.55 % -Table 5.9.: Training time per epoch and single model test set accuracy (mean and standard deviation) -of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on -CIFAR-100. -5.10. Bias -Figure 5.3 suggests that the bias is not important for the layers 11, 13 and 15. Hence a -model mno-bias is created which is identical to the baseline model m, except that the bias of -layers 11, 13 and 15 is removed. -The mean test accuracy of 10 trained mno-bias is 63.74 % which is an improvement of -0.36 percentage points over the baseline. The ensemble achieves a test accuracy of 65.13 % -which is 0.43 percentage points better than the baseline. Hence the bias can safely be -removed. -Removing the biases did not have a noticeable effect on the filter weight range, the filter -weight distribution or the distribution of the remaining biases. Also, the γ and β parameters -of the Batch Normalization layers did not noticeably change. -59 -5. Experimental Evaluation -5.11. Learned Color Space Transformation -In [MSM16] it is described that placing one convolutional layer with 10 filters of size 1 × 1 -directly after the input and then another convolutional layer with 3 filters of size 1 × 1 acts -as a learned transformation in another color space and boosts the accuracy. -This approach was evaluated on CIFAR-100 by adding a convolutional layer with ELU ac￾tivation and 10 filters followed by another convolutional layer with ELU activation and -3 filters. The mean accuracy of 10 models was 63.31 % with a standard deviation of 1.37. -The standard deviation is noticeable higher than the standard deviation of the baseline -model (0.55) and the accuracy also decreased by 0.07 percentage points. The accuracy of -the ensemble is at 64.77 % and hence 0.07 percentage points higher than the accuracy of -the baseline models. -The inference time for 1 image and for 128 images did not change compared to the baseline. -The training time per epoch increased from 26 s to 30 s on the GTX 970. -Hence it is not advisable to use the learned color space transformation. -5.12. Pooling -An alternative to max pooling with stride 2 with a 2 × 2 kernel is using a 3 × 3 kernel with -stride 2. -This approach was evaluated on CIFAR-100 by replacing all max pooling layers with the -3×3 kernel max pooling (and SAME padding). The mean accuracy of 10 models was 63.32 % -(−0.06) and the standard deviation was 0.57 (+0.02). The ensemble achieved 65.15 % test -accuracy (+0.45). -The training time per epoch decreased from 20.5 s-21.1 s to 18.6 s (mean of 10 training runs) -on the Nvidia GTX 970. The time for inference increased from 25 ms to 26 ms for a batch -of 128 images. -5.13. Activation Functions -Nonlinear, differentiable activation functions are important for neural networks to allow them -to learn nonlinear decision boundaries. One of the simplest and most widely used activation -functions for CNNs is ReLU [KSH12], but others such as ELU [CUH15], parametrized -rectified linear unit (PReLU) [HZRS15b], softplus [ZYL+15] and softsign [BDLB09] have -been proposed. The baseline uses ELU. -60 -5.13. Activation Functions -Activation functions differ in the range of values and the derivative. The definitions and -other comparisons of eleven activation functions are given in Table B.3. -Theoretical explanations why one activation function is preferable to another in some -scenarios are the following: -• Vanishing Gradient: Activation functions like tanh and the logistic function sat￾urate outside of the interval [−5, 5]. This means weight updates are very small for -preceding neurons, which is especially a problem for very deep or recurrent networks as -described in [BSF94]. Even if the neurons learn eventually, learning is slower [KSH12]. -• Dying ReLU: The dying ReLU problem is similar to the vanishing gradient problem. -The gradient of the ReLU function is 0 for all non-positive values. This means if all -elements of the training set lead to a negative input for one neuron at any point in the -training process, this neuron does not get any update and hence does not participate -in the training process. This problem is addressed in [MHN13]. -• Mean unit activation: Some publications like [CUH15, IS15] claim that mean -unit activations close to 0 are desirable. They claim that this speeds up learning -by reducing the bias shift effect. The speedup of learning is supported by many -experiments. Hence the possibility of negative activations is desirable. -Those considerations are listed in Table 5.10 for 11 activation functions. Besides the -theoretical properties, empiric results are provided in Tables 5.11 and 5.12. The baseline -network was adjusted so that every activation function except the one of the output layer -was replaced by one of the 11 activation functions. -As expected, PReLU and ELU performed best. Unexpected was that the logistic function, -tanh and softplus performed worse than the identity and it is unclear why the pure-softmax -network performed so much better than the logistic function. One hypothesis why the -logistic function performs so bad is that it cannot produce negative outputs. Hence the -logistic− function was developed: -logistic−(x) = 1 -1 + e−x -− 0.5 -The logistic− function has the same derivative as the logistic function and hence still suffers -from the vanishing gradient problem. The network with the logistic− function achieves an -accuracy which is 11.30 % better than the network with the logistic function, but is still -5.54 % worse than the ELU. -Similarly, ReLU was adjusted to have a negative output: -ReLU−(x) = max(−1, x) = ReLU(x + 1) − 1 -The results of ReLU− are much worse on the training set, but perform similar on the test -61 -5. Experimental Evaluation -set. The result indicates that the possibility of hard zero and thus a sparse representation -is either not important or similar important as the possibility to produce negative outputs. -This contradicts [GBB11, SMGS14]. -A key difference between the logistic− function and ELU is that ELU does neither suffers -from the vanishing gradient problem nor is its range of values bound. For this reason, the -S2ReLU activation function, defined as -S2ReLU(x) = ReLU( -x -2 -+ 1) − ReLU(− -x -2 -+ 1) = - - - -− -x -2 + 1 if x ≤ −2 -x if − 2 ≤ x ≤ 2 -x -2 + 1 if x > −2 -This function is similar to SReLUs as introduced in [JXF+16]. The difference is that S2ReLU -does not introduce learnable parameters. The S2ReLU was designed to be symmetric, be -the identity close to zero and have a smaller absolute value than the identity farther away. -It is easy to compute and easy to implement. -Those results — not only the absolute values, but also the relative comparison — might -depend on the network architecture, the training algorithm, the initialization and the -dataset. Results for MNIST can be found in Table 5.13 and for HASYv2 in Table A.2. For -both datasets, the logistic function has a much shorter training time and a noticeably lower -test accuracy. -Function Vanishing Gradient Negative Activation possible Bound activation -Identity No Yes No -Logistic Yes No Yes -Logistic− Yes Yes Yes -Softmax Yes Yes Yes -tanh Yes Yes Yes -Softsign Yes Yes Yes -ReLU Yes1 No Half-sided -Softplus No No Half-sided -S2ReLU No Yes No -LReLU/PReLU No Yes No -ELU No Yes No -Table 5.10.: Properties of activation functions. -1The dying ReLU problem is similar to the vanishing gradient problem. -62 -5.13. Activation Functions -Function -Single model Ensemble of 10 -Training set Test set Training set Test set -Identity 66.25 % σ = 0.77 56.74 % σ = 0.51 68.77 % 58.78 % -Logistic 51.87 % σ = 3.64 46.54 % σ = 3.22 61.19 % 54.58 % -Logistic− 66.49 % σ = 1.99 57.84 % σ = 1.15 69.04 % 60.10 % -Softmax 75.22 % σ = 2.41 59.49 % σ = 1.25 78.87 % 63.06 % -Tanh 67.27 % σ = 2.38 55.70 % σ = 1.44 70.21 % 58.10 % -Softsign 66.43 % σ = 1.74 55.75 % σ = 0.93 69.78 % 58.40 % -ReLU 78.62 % σ = 2.15 62.18 % σ = 0.99 81.81 % 64.57 % -ReLU− 76.01 % σ = 2.31 62.87 % σ = 1.08 78.18 % 64.81 % -Softplus 66.75 % σ = 2.45 56.68 % σ = 1.32 71.27 % 60.26 % -S2ReLU 63.32 % σ = 1.69 56.99 % σ = 1.14 65.80 % 59.20 % -LReLU 74.92 % σ = 2.49 61.86 % σ = 1.23 77.67 % 64.01 % -PReLU 80.01 % σ = 2.03 62.16 % σ = 0.73 83.50 % 64.79 % -ELU 76.64 % σ = 1.48 63.38 % σ = 0.55 78.30 % 64.70 % -Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation -functions on CIFAR-100. For LReLU, α = 0.3 was chosen. -Function -Inference per Training -Epochs -Mean total -1 Image 128 time training time -Identity 8 ms 42 ms 31 s -epoch 108 – 148 3629 s -Logistic 6 ms 31 ms 24 s -epoch 101 – 167 2234 s -Logistic− 6 ms 31 ms 22 s -epoch 133 – 255 3421 s -Softmax 7 ms 37 ms 33 s -epoch 127 – 248 5250 s -Tanh 6 ms 31 ms 23 s -epoch 125 – 211 3141 s -Softsign 6 ms 31 ms 23 s -epoch 122 – 205 3505 s -ReLU 6 ms 31 ms 23 s -epoch 118 – 192 3449 s -Softplus 6 ms 31 ms 24 s -epoch 101 – 165 2718 s -S2ReLU 5 ms 32 ms 26 s -epoch 108 – 209 3231 s -LReLU 7 ms 34 ms 25 s -epoch 109 – 198 3388 s -PReLU 7 ms 34 ms 28 s -epoch 131 – 215 3970 s -ELU 6 ms 31 ms 23 s -epoch 146 – 232 3692 s -Table 5.12.: Training time and inference time of adjusted baseline models trained with different -activation functions on GTX 970 GPUs on CIFAR-100. It was expected that the -identity is the fastest function. This result is likely an implementation specific problem -of Keras 2.0.4 or Tensorflow 1.1.0. -63 -5. Experimental Evaluation -Function -Single model Ensemble Epochs -Accuracy std Accuracy Range Mean -Identity 99.45 % σ = 0.09 99.63 % 55 – 77 62.2 -Logistic 97.27 % σ = 2.10 99.48 % 37 – 76 54.5 -Softmax 99.60 % σ = 0.03 99.63 % 44 – 73 55.6 -Tanh 99.40 % σ = 0.09 99.57 % 56 – 80 67.6 -Softsign 99.40 % σ = 0.08 99.57 % 72 – 101 84.0 -ReLU 99.62 % σ = 0.04 99.73 % 51 – 94 71.7 -Softplus 99.52 % σ = 0.05 99.62 % 62 – 70 68.9 -PReLU 99.57 % σ = 0.07 99.73 % 44 – 89 71.2 -ELU 99.53 % σ = 0.06 99.58 % 45 – 111 72.5 -Table 5.13.: Test accuracy of adjusted baseline models trained with different activation functions -on MNIST. -5.14. Label smoothing -Ensembles consisting of n models trained by the same procedure on the same data but -initialized with different weights and trained with a different order of the training data -perform consistently better than single models. One drawback of ensembles in applications -such as self-driving cars is that they increase the computation by a factor of n. One idea -why they improve the test accuracy is by reducing the variance. -The idea of label smoothing is to use the ensemble prediction of the training data as labels -for another classifier. For every element x of the training set, the one-hot encoded target -t(x) is smoothed by the ensemble prediction yE(x) -t -0 -(x) = α · t(x) + (1 − α)yE(x) -where α ∈ [0, 1] is the smoothing factor. -There are three reasons why label smoothing could be beneficial: -• Training speed: The ensemble prediction contains more information about the -image than binary class decisions. Classifiers in computer vision predict how similar -the input looks to other input of the classes they are trained on. By smoothing the -labels, the information that one image could also belong to another class is passed to -the optimizer. In early stages of the optimization this could lead to a lower loss on -the non-smoothed validation set. -• Higher accuracy: Using smoothed labels for the optimization could lead to a higher -accuracy of the base-classifier due to a smoothed error surface. It might be less likely -64 -5.14. Label smoothing -that the classifier gets into bad local minima. -• Label noise: Depending on the way how the labels are obtained, it might not always -be clear which label is the correct one. Also, labeling errors can be present in training -datasets. Those errors severely harm the training. By smoothing the labels errors -could be relaxed. -10 models msmooth are trained with the α = 0.5 smoothed labels from the prediction -of an ensemble of 10 baseline models. The mean accuracy of the models trained on the -smoothed training set labels was 63.61 % (+0.23 %) and the standard deviation was σ = 0.72 -(+0.17 %). The ensemble of 10 msmooth models achieved 64.79 % accuracy (+0.09 %). Hence -the effect of this kind of label smoothing on the final accuracy is questionable. -The training speed didn’t noticeably change either: The number of trained epochs ranged -from 144 to 205, the mean number of epochs was 177. The baseline training ranged from -146 to 232 epochs with a mean of 174 epochs. After 10, 30 and 80 epochs both training -methods accuracy differed by less than one percentage point. Hence it is unlikely that label -smoothing has a positive effect on the training speed. -Hinton et al. called this method distillation in [HVD15]. Hinton et al. used smooth and -hard labels for training, this work only used smoothed labels. -65 -5. Experimental Evaluation -5.15. Optimized Classifier -In comparison to the baseline classifier, the following changes are applied to the optimized -classifier: -• Remove the bias for the last layers: For all layers which output a 1 × 1 feature -map, the bias is removed -• Increase the max pooling kernel to 3 × 3 -• More filters in the first layers -The detailed architecture is given in Table 5.14 and visualized in Figure 5.16. The evaluation -is given in Table 5.15 and the timing comparison is given in Table 5.16. -# Type Filters @ -Patch size / stride -Parameters FLOPs Output size -Input 0 0 3 @ 32 × 32 -1 Convolution 69 @ 3 × 3 × 3 / 1 1 932 3 744 768 69 @ 32 × 32 -2 BN + ELU 138 353 418 69 @ 32 × 32 -3 Convolution 69 @ 3 × 3 × 32 / 1 42 918 37 684 096 69 @ 32 × 32 -4 BN + ELU 138 353 418 69 @ 32 × 32 -Max pooling 2 × 2 / 2 0 40 960 32 @ 16 × 16 -5 Convolution 64 @ 3 × 3 × 32 / 1 39 808 20 332 544 64 @ 16 × 16 -6 BN + ELU 128 82 048 64 @ 16 × 16 -7 Convolution 64 @ 3 × 3 × 64 / 1 36 928 18 857 984 64 @ 16 × 16 -8 BN + ELU 128 82 048 64 @ 16 × 16 -Max pooling 2 × 2 / 2 20 480 64 @ 8 × 8 -9 Convolution 64 @ 3 × 3 × 64 / 1 36 928 4 714 496 64 @ 8 × 8 -10 BN + ELU 128 20 608 64 @ 8 × 8 -Max pooling 2 × 2 / 2 5 120 64 @ 4 × 4 -11 Convolution (v) 512 @ 4 × 4 × 64 / 1 524 288 1 048 064 512 @ 1 × 1 -12 BN + ELU 1 024 3 584 512 @ 1 × 1 -Dropout 0.5 0 0 512 @ 1 × 1 -13 Convolution 512 @ 1 × 1 × 512 / 1 262 144 523 776 512 @ 1 × 1 -14 BN + ELU 1 024 3 584 512 @ 1 × 1 -Dropout 0.5 0 0 512 @ 1 × 1 -15 Convolution k @ 1 × 1 × 512 / 1 512 · k 512 · k k @ 1 × 1 -Global avg Pooling 1 × 1 0 k k @ 1 × 1 -16 BN + Softmax 2k 7k k @ 1 × 1 -P 514k -+947 654 -520k -+87 870 996 179 200+2k -Table 5.14.: Optimized architecture with 3 input channels of size 32 × 32. All convolutional layers -use SAME padding, except for layer 11 which used VALID padding in order to decrease -the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for each -power of two there are two Convolution + BN + ELU blocks and one Max pooling -block added. This is the framed part in the table. -66 -5.15. Optimized Classifier -32 × 32 -Input -C 69@3 × 3/1 -BN + ELU -C 69@3 × 3/1 -BN + ELU -16 × 16 -max pooling 3 × 3/2 -C 64@3 × 3/1 -BN + ELU -C 64@3 × 3/1 -BN + ELU -8 × 8 -max pooling 3 × 3/2 -C 64@3 × 3/1 -BN + ELU -4 × 4 -max pooling 3 × 3/2 -C* 512@4 × 4/1 (V) -BN + ELU -Dropout, p = 0.5 -1 × 1 -C* 512@1 × 1/1 -BN + ELU -Dropout, p = 0.5 -C* k@1 × 1/1 -Global AVG pooling -BN + Softmax -Figure 5.16.: Architecture of the optimized model. C 32@3 × 3/1 is a convolutional layer with -32 filters of kernel size 3 × 3 with stride 1. The * indicates that no bias is used. -Dataset Single Model Accuracy Ensemble of 10 -Training Set Test Set Training Set Test Set -Asirra 95.83 % σ = 4.70 90.75 % σ = 4.73 98.78 % 93.09 % -CIFAR-10 94.58 % σ = 0.70 87.92 % σ = 0.46 96.47 % 89.86 % -CIFAR-100 77.96 % σ = 2.18 64.42 % σ = 0.73 81.44 % 67.03 % -GTSRB 100.00 % σ = 0.00 99.28 % σ = 0.10 100.00 % 99.51 % -HASYv2 88.79 % σ = 0.45 85.36 % σ = 0.15 89.36 % 85.92 % -MNIST 99.88 % σ = 0.10 99.48 % σ = 0.13 99.99 % 99.67 % -STL-10 95.43 % σ = 3.57 75.09 % σ = 2.39 98.54 % 78.66 % -SVHN 99.08 % σ = 0.07 96.37 % σ = 0.12 99.50 % 97.47 % -Table 5.15.: Optimized model accuracy on eight datasets. The single model actuary is the 10 models -used in the ensemble. The empirical standard deviation σ of the accuracy is also given. -CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the -models uses unlabeled data or data from other datasets. For MNIST, GTSRB, SVHN -and HASY, no test time transformations are used. -Network GPU Tensorflow Inference per Training -1 Image 128 images time / epoch -Optimized Default Intel i7-4930K 5 ms 432 ms 386 s -Optimized Optimized Intel i7-4930K 4 ms 307 ms 315 s -Optimized Default GeForce 940MX 4 ms 205 ms 192 s -Optimized Default GTX 970 6 ms 41 ms 35 s -Optimized Default GTX 980 3 ms 35 ms 27 s -Optimized Default GTX 980 Ti 6 ms 36 ms 26 s -Optimized Default GTX 1070 2 ms 24 ms 21 s -Optimized Default Titan Black 4 ms 46 ms 43 s -Table 5.16.: Speed comparison of the optimized model on CIFAR-10. The baseline model is -evaluated on six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken -from [Maj17]. Weights the baseline model can be found at [Tho17b]. The optimized -Tensorflow build makes use of SSE4.X, AVX, AVX2 and FMA instructions. -67 -5. Experimental Evaluation -5.16. Early Stopping vs More Data -A separate validation set is necessary for two reasons: (1) Early stopping and (2) preventing -overfitting due to many experiments. To prevent overfitting, a different dataset can be used. -For example, all decisions about hyperparameters in this thesis are based on CIFAR-100, -but the network is finally trained and evaluated with the same hyperparameters on all -datasets.2 The validation set can hence be removed if early stopping is removed. Instead, -the validation data is used in a first run to determine the number of epochs necessary for -training. In a second training run the validation data is added to the training set. The -number of used epochs for the second run is given in Table 5.17. -Dataset Mean epochs Train data classes average data / class -Asirra 60 15 075 2 7538 -MNIST 41 54 000 10 5400 -SVHN 45 543 949 10 54 395 -CIFAR-10 84 45 000 10 4500 -HASYv2 92 136 116 369 369 -GTSRB 97 35 288 43 821 -STL-10 116 4500 10 450 -CIFAR-100 155 45 000 100 450 -Table 5.17.: Mean number of training epochs for the optimized model. For comparison, the total -amount of used training data, the number of classes of the dataset and the average -amount of data per class is given. -Alternatively, the model can be trained with early stopping (ES) purely on the training -loss. All three methods – early stopping on the validation set accuracy, early stopping on -the training loss and training a fixed number of epochs are evaluated. While having more -data helped with Asirra and CIFAR-100, the results as shown in Table 5.18 on the other -datasets are only marginally different. For CIFAR-10, training with more data did not -improve the results when the number of epochs is fixed, but notably improved the results -when the training loss was used as the early stopping criterion. -5.17. Regularization -Stronger regularization might even improve the results when using the training loss as an -early stopping criterion. `2 regularization with a weighting factor of λ = 0.0001 is used in -all other experiments. While the accuracy as shown in Table 5.19 does not show a clear -pattern, the number of epochs increases with lower model regularization (see Table 5.20). -2Except data augmentation and test time transformations. -3Only 1 model is trained due to the long training time of 581 epochs and 12 hours for this model. -4Only 3 models are in this ensemble due to the long training time of more than 8 hours per model. -68 -5.17. Regularization -Dataset Early Stopping Fixed epochs -val. acc train loss -Asirra 93.09 % 96.01 %3 96.01 % -CIFAR-10 89.86 % 91.75 % 88.88 % -CIFAR-100 67.03 % 71.01 % 69.08 % -HASYv2 85.92 % 82.89 %4 85.05 % -MNIST 99.67 % 99.64 % 99.57 % -STL-10 78.66 % 83.25 % 78.64 % -Table 5.18.: Comparisons of trained optimized models with early stopping on the validation accuracy -compared training setups without a validation set and thus more training data. The -second column uses the training loss as a stopping criterion, the third column uses a -fixed number of epochs which is equal to the mean number of training epochs of the -models with early stopping on the validation set accuracy. -λ -Single Model Accuracy Ensemble of 10 -Training Set Test Set Training Set Test Set -λ = 0.01 73.83 % σ = 1.78 58.94 % σ = 1.33 87.78 % 69.98 % -λ = 0.001 82.86 % σ = 0.89 63.03 % σ = 0.67 91.86 % 71.02 % -λ = 0.0001 77.96 % σ = 2.18 64.42 % σ = 0.73 81.44 % 67.03 % -Table 5.19.: Different choices of `2 model regularization applied to the optimized model. -λ min max mean std -λ = 0.01 457 503 404.6 37.2 -λ = 0.001 516 649 588.4 41.6 -λ = 0.0001 579 833 696.1 79.1 -Table 5.20.: Training time in epochs of models with early stopping on training loss by different -choices of `2 model regularization applied to the optimized model. -69 -5. Experimental Evaluation -70 -6. Conclusion and Outlook -This master thesis gave an extensive overview over the design patterns of CNNs in Chapter 2, -the methods how CNNs can be analyzed and the principle directions of topology learning -algorithms in Chapter 3. -Confusion Matrix Ordering (CMO), originally developed as a method to make visualizations -of confusion matrices easier to read (see Figure 5.13), was introduced as a class clustering -algorithm in Chapter 4 and evaluated in Sections 4.2 and 5.4. The important insights are: -• Ordering the classes in the confusion matrix allows to display the relevant parts even -for several hundred classes. -• A hierarchy of classifiers based on the classes does not improve the results on CIFAR￾100. There are three possible reasons for this: -– 32 px × 32 px is too low dimensional -– 100 classes are not enough for this approach -– More classes are always easier to distinguish if each new class comes with more -data. One reason why this might be the case is that distinguishing the object -from background has similar properties even for different classes. -• Label smoothing had only a minor effect on the accuracy and no effect on the training -time when a single base classifier was used to train with the smoothed labels by an -ensemble of base classifiers. -A baseline model was defined and evaluated on eight publicly available datasets. The -baselines topology and training setup are described in detail as well as its behavior during -training and properties of the weights of the trained model. -The influence of various hyperparameters is examined in Sections 5.5 to 5.12 for CIFAR-100. -The insights of those experiments are: -• Averaging ensembles of 10 base classifiers of the same architecture and trained with the -same setup consistently improve the accuracy. The amount of improvement depends -on the base classifiers, but the ensemble tends to improve the test accuracy by about -one percentage point. -• Wider networks learn in fewer epochs. This, however, does not mean that the -71 -6. Conclusion and Outlook -wall-clock time is lower due to increased computation in forward- and backward -passes. -• Batch Normalization increases the training time noticeably. For the described ELU -baseline model it also increases accuracy, which contradicts [CUH15]. -• The lower the batch size, the longer the time for each epoch of training and the less -epochs need to be trained. Higher accuracy by lower batch sizes was empirically -confirmed. The batch size, however, can also be too low. -• An analysis of the weights of the baseline indicated that the bias of layers close to -the output layer can be removed. This was experimentally confirmed. -• It could not be confirmed that learned color space transformation, as described -in [MSM16], improves the network. Neither with ELU nor with leaky rectified linear -unit (LReLU) and α = 0.3. -• It could be confirmed that ELU networks gives better results than any other activation -function on CIFAR-100. For the character datasets MNIST and HASYv2, however, -ReLU, LReLU, PReLU, Softplus and ELU all performed similar. -• Changing the activation functions to the identity had very little impact on the HASYv2 -and MNIST classifiers. Note that those networks are still able to learn nonlinear -decision boundaries due to max-pooling and SAME padding. For CIFAR-100, however, -the accuracy drops by 6.64 % when ELU is replaced by the identity. -Based on the results of those experiments, an optimized classifier was developed and -evaluated on all eight datasets. -The state of the art of STL-10 was improved from 74.80 % [ZMGL15] to 78.66 % without -using the unlabeled part of the dataset. The state of the art of HASYv2 was improved -from 81.00 % [Tho17a] to 85.92 %, for GTSRB the state of the art was improved from -99.46 % [SL11] to 99.51 %, for Asirra it was improved from 82.7 % [Gol08] to 93.09 %. -1 -This was mainly achieved by the combination of ELU, Dropout, ensembles, training data -augmentation and test-time transformations. The removal of the bias of layers close to the -output and re-usage of those parameters in layers close to the input as well as using 3 × 3 -pooling instead of 2 × 2 pooling improved the baseline. -While writing this masters thesis, several related questions could not be answered: -• Deeper CNNs have generally higher accuracy, if trained long enough and if overfitting -is not a problem. But at which subsampling-level does having more layers have the -biggest effect? Can this question be answered before a deeper network is trained? -• Is label smoothing helpful for noisy labels? -1The baseline is better than the optimized model on Asirra and on HASYv2. -72 -• How does the choice of activation functions influence residual architectures? Could the -results be the same for different activation functions in architectures with hundreds -of layers? -• The results for the pooling kernel were inconclusive. Larger pooling kernels might be -advantageous as well as fractional max pooling [Gra15]. -• Why is the mean weight update (see Figure 5.8) not decreasing? Is this an effect that -can and should be fixed? -• Why is softmax so much better than the logistic function? Can the reason be used to -further improve ELU? -Besides those questions, the influence of optimizers on time per epoch, epochs until -convergence, total training time, memory consumption, accuracy of the models and standard -deviation of the models was not evaluated. This, and the stopping criterion for training -might be crucial for the models quality. -73 -74 -A. Figures, Tables and Algorithms -(a) Original image (b) Smoothing filter (c) Laplace edge detection filter -(d) Sobel edge detection filter (e) Prewitt edge detection filter (f) Canny filter -Figure A.1.: Examples of image filters. Best viewed in electronic form. -Layer 99-percentile interval -filter bias -1 [-0.50, 0.48] [-0.06, 0.07] -3 [-0.21, 0.19] [-0.07, 0.07] -5 [-0.20, 0.17] [-0.07, 0.05] -7 [-0.15, 0.14] [-0.05, 0.06] -9 [-0.14, 0.15] [-0.04, 0.03] -11 [-0.08, 0.08] [-0.00, 0.00] -13 [-0.08, 0.08] [-0.00, 0.00] -15 [-0.10, 0.11] [-0.01, 0.01] -Table A.1.: 99-percentile intervals for filter weights and bias weights by layer of a baseline model -trained on CIFAR-100. -75 -Figure A.2.: The distribution of bias weights of a model without batch normalization trained on -CIFAR-100. -Algorithm 1 Simulated Annealing for minimizing Equation (4.1). -Require: C ∈ N -n×n -, steps ∈ N, T ∈ R -+, c ∈ (0, 1) -procedure SimulatedAnnealing(C, steps, T, c) -bestScore ← accuracy(C) -bestC ← C -for i = 0; i < steps; i ← i + 1 do -p ← randomFloat(0, 1) -if p < 0.5 then . Swap rows -i ← randomInteger(1, . . . , n) -j ← randomInteger(1, . . . , n) \ { i } -p ← randomUniform(0, 1) -C -0 ← swap(C, i, j) -s ← accuracy(C -0 -) -if p < exp( s−bestScore -T -) then -C ← C -0 -if s > bestScore then -bestScore ← s -bestC ← C -T ← T · c -else . Move Block -s ← randomInteger(1, . . . , n) . Block start -e ← randomInteger(s, . . . , n) . Block end -i ← randomInteger(1, . . . , n − (e − s)) . Block insert position -Move Block (s, . . . , e) to position i -return bestM -76 -Figure A.3.: Maximum weight updates between epochs by layer. The model is the baseline model, -but with layer 5 reduced to 3 filters. -Function -Single model Ensemble of 10 Epochs -Training set Test set Train Test Range Mean -Identity 87.92 % σ = 0.40 84.69 % σ = 0.08 88.59 % 85.43 % 92 – 140 114.5 -Logistic 81.46 % σ = 5.08 79.67 % σ = 4.85 86.38 % 84.60 % 58 – 91 77.3 -Softmax 88.19 % σ = 0.31 84.70 % σ = 0.15 88.69 % 85.43 % 124 – 171 145.8 -Tanh 88.41 % σ = 0.36 84.46 % σ = 0.27 89.24 % 85.45 % 89 – 123 108.7 -Softsign 88.00 % σ = 0.47 84.46 % σ = 0.23 88.77 % 85.33 % 77 – 119 104.1 -ReLU 88.93 % σ = 0.46 85.35 % σ = 0.21 89.35 % 85.95 % 96 – 132 102.8 -Softplus 88.42 % σ = 0.29 85.16 % σ = 0.15 88.90 % 85.73 % 108 – 143 121.0 -LReLU 88.61 % σ = 0.41 85.21 % σ = 0.05 89.07 % 85.83 % 87 – 117 104.5 -PReLU 89.62 % σ = 0.41 85.35 % σ = 0.17 90.10 % 86.01 % 85 – 111 100.5 -ELU 89.49 % σ = 0.42 85.35 % σ = 0.10 89.94 % 86.03 % 73 – 113 92.4 -Table A.2.: Test accuracy of adjusted baseline models trained with different activation functions on -HASYv2. For LReLU, α = 0.3 was chosen. -77 -Figure A.4.: Sum of weight updates between epochs by layer. The model is the baseline model, but -with layer 5 reduced to 3 filters. -Function -Single model Ensemble of 10 Epochs -Training set Test set Train Test Range Mean -Identity 87.49 % σ = 2.50 69.86 % σ = 1.41 89.78 % 71.90 % 51 – 65 53.4 -Logistic 45.32 % σ = 14.88 40.85 % σ = 12.56 51.06 % 45.49 % 38 – 93 74.6 -Softmax 87.90 % σ = 3.58 67.91 % σ = 2.32 91.51 % 70.96 % 108 – 150 127.5 -Tanh 85.38 % σ = 4.04 67.65 % σ = 2.01 90.47 % 71.29 % 48 – 92 65.2 -Softsign 88.57 % σ = 4.00 69.32 % σ = 1.68 93.04 % 72.40 % 55 – 117 83.2 -ReLU 94.35 % σ = 3.38 71.01 % σ = 1.63 98.20 % 74.85 % 52 – 98 75.5 -Softplus 83.03 % σ = 2.07 68.28 % σ = 1.74 93.04 % 75.99 % 56 – 89 68.9 -LReLU 93.83 % σ = 3.89 74.66 % σ = 2.11 97.56 % 78.08 % 52 – 120 80.1 -PReLU 95.53 % σ = 1.92 71.69 % σ = 1.37 98.17 % 74.69 % 59 – 101 78.8 -ELU 95.42 % σ = 3.57 75.09 % σ = 2.39 98.54 % 78.66 % 66 – 72 67.2 -Table A.3.: Test accuracy of adjusted baseline models trained with different activation functions on -STL-10. For LReLU, α = 0.3 was chosen. -78 -B. Hyperparameters -Hyperparameters are parameters of models which are not optimized automatically (e.g., by -gradient descent), but by methods like random search [BB12], grid search [LBOM98] or -manual search. -B.1. Preprocessing -Preprocessing used to be of major importance in machine learning. However, with the -availability of data sets with hundreds of examples per class and the possibility of CNNs to -learn features themselves, most models today rely on raw pixel values. The only common -preprocessing is size normalization. In order to get a fixed input-size for a CNN, the -following procedure can be used: -• Take one or multiple crops of the image which have the desired aspect ratio. -• Scale the crop(s) to the desired size. -• In training, all crops can be used independently. In testing, all crops can be passed -through the network and the output probability distributions can get fusioned, for -example by averaging. -Other preprocessing methods are: -• Color space transformations (RGB, HSV, etc.) -• Mean subtraction -• Standardization of pixel-values to [0, 1] by dividing through 255 (used by [HLW16]) -• Dimensionality reduction -– Principal component analysis (PCA): An unsupervised linear transformation -which can be learned in the first hidden layer. It is hence doubtful if PCA -improves the network. -– Linear discriminant analysis (LDA) -• Zero Components Analysis (ZCA) whitening (used by [KH09]) -79 -B.2. Data augmentation -Data augmentation techniques aim at making artificially more data from real data items by -applying invariances. For computer vision, they include: -Name Augmentation Factor Used by -Horizontal flip 2 [KSH12, WYS+15] -Vertical flip 2 [DWD15]1 -Rotation ∼ 40 (δ = 20) [DSRB14] -Scaling ∼ 14 (δ ∈ [0.7, 1.4]) [DSRB14] -Crops 322 = 1024 [KSH12, WYS+15] -Shearing [Gra15] -GANs [BCW+17] -Brightness ∼ 20 (δ ∈ [0.5, 1.5]) [How13] -Hue 51 (δ = 0.1) [MRM15, DSRB14] -Saturation ∼ 20 (δ = 0.5) [DSRB14] -Contrast ∼ 20 (δ ∈ [0.5, 1.5]) [How13] -Channel shift [KSH12] -Table B.1.: Overview of data augmentation techniques. The augmentation factor is calculated for -typical situations. For example, the augmentation factor for random crops is calculated -for 256 px × 256 px images which are cropped to 224 px × 224 px. -Taking several scales if the original is of higher resolution than desired is another technique. -Combinations of the techniques above can also be applied. Please note that the order of -operations does matter in many cases and hence the order is another augmentation factor. -Less common, but also reasonable are: -• Adding noise -• Elastic deformations -• Color casting (used by [WYS+15]) -• Vignetting (used by [WYS+15]) -• Lens distortion (used by [WYS+15]) -1Vertical flipping combined with 180◦ -rotation is equivalent to horizontal flipping -80 -B.3. Initialization -Weight initializations are usually chosen to be small and centered around zero. One way to -characterize many initialization schemes is by -w ∼ α · U[−1, 1] + β · N (0, 1) + γ with α, β, γ ≥ 0 -Table B.2 shows six commonly used weight initialization schemes. Several schemes use the -same idea, that unit-variance is desired for each layer as the training converges faster [IS15]. -Name α β γ Reference -Constant α = 0 β = 0 γ ≥ 0 used by [ZF14] -Xavier/Glorot uniform α = -q 6 -nin+nout -β = 0 γ = 0 [GB10] -Xavier/Glorot normal α = 0 β = - -2 -(nin+nout) -2 -γ = 0 [GB10] -He α = 0 β = -2 -nin -γ = 0 [HZRS15b] -Orthogonal — — γ = 0 [SMG13] -LSUV — — γ = 0 [MM15] -Table B.2.: Weight initialization schemes of the form w ∼ α · U[−1, 1] + β · N (0, 1) + γ. -nin, nout are the number of units in the previous layer and the next layer. Typically, -biases are initialized with constant 0 and weights by one of the other schemes to prevent -unit-coadaptation. However, dropout makes it possible to use constant initialization for -all parameters. -LSUV and Orthogonal initialization cannot be described with this simple pattern. -B.4. Objective function -For classification tasks, the cross-entropy -ECE(W) = − -X -x∈X -X -K -k=1 -[t -x -k -log(o -x -k -) + (1 − t -x -k -) log(1 − o -x -k -)] -is by far the most commonly used objective function (e.g., used by [ZF14]). In this equation, -X is the set of training examples, K is the number of classes, t -x -k ∈ { 0, 1 } indicates if the -training example x is of class k, o -x -k -is the output of the classifier for the training example x -and class k. -However, regularization terms weighted with a constant λ ∈ (0, +∞) are sometimes added: -• LASSO: `1 (e.g., used in [HPTD15]) -• Weight decay: `2 (e.g., λ = 0.0005 as in [MSM16]) -• Orthogonality regularization (|(WT -· W − I)|, see [VTKP17]) -81 -B.5. Optimization Techniques -Most relevant optimization techniques for CNNs are based on SGD, which updates the -weights according to the rule -wji ← wji + ∆wji with ∆wji = −η -∂Ex -∂wji -where η ∈ (0, 1), typically 0.01 (e.g., [MSM16]), is called the learning rate. -A slight variation of SGD is mini-batch gradient descent with the mini-batch B (typically -mini-batch sizes are |B| ∈ { 32, 64, 128, 256, 512 }, e.g. [ZF14]). Larger mini-batch sizes -lead to sharp minima and thus poor generalization [KMN+16]. Smaller mini-batch sizes -lead to longer training times due to computational overhead and to more training steps due -to gradient noise. -wji ← wji + ∆wji with ∆wji = −η -∂EB -∂wji -Nine variations which adjust the learning rate during training are: -• Momentum: -w -(t+1) -ji ← w -(t) -ji + ∆w -(t+1) -ji with ∆w -(t+1) -ji = −η -∂EB -∂wji -+ α∆w -(t) -ji -with α ∈ [0, 1], typically 0.9 (e.g., [ZF14, MSM16]) -• Adagrad [DHS11] -• RProp and the mini-batch version RMSProp [TH12] -• Adadelta [Zei12] -• Power Scheduling [Xu11]: η(t) = η(0)(1 + a · t) -−c -, where t ∈ N0 is the training step, -a, c are constants. -• Performance Scheduling [SHY+13]: Measure the error on the cross validation set and -decrease the learning rate when the algorithms improvement is below a threshold. -• Exponential Decay Learning Rate [SHY+13]: η(t) = η(0) · 10− t -k where t ∈ N0 is the -training step, η(0) is the initial learning rate, k ∈ N≥1 is the number of training steps -until the learning rate is decreased by 1 -10 th. -• Newbob Scheduling [new00]: Start with Performance Scheduling, then use Exponential -Decay Scheduling. -• Adam and AdaMax [KB14] -82 -• Nadam [Doz15] -Some of those are explained in [Rud16]. -Other first-order gradient optimization methods are: -• Quickprop [Fah88] -• Nesterov Accellerated Momentum (NAG) [Nes83] -• Conjugate Gradient method [Cha92]: Combines a line search for the step size with -the gradients direction. -Higher-order gradient methods like Newtons method or quasi-Newton methods like BFGS -and L-BFGS need the inverse of the Hessian matrix which is intractable for today’s CNNs. -However, there are alternatives which do not use gradient information: -• Genetic algorithms such as NeuroEvolution of Augmenting Topologies (NEAT) [SM02] -• Simulated Annealing [vLA87] -• Twiddle: A local hill-climbing algorithm explained by Sebastian Thrun and described -on [Tho14b] -There are also approaches which learn the optimization algorithm [ADG+16, LM16]. -83 -B.6. Network Design -CNNs have the following hyperparameters: -• Depth: The number of layers -• Width: The number of filters per layer -• Layer and block connectivity graph -• Layer and block hyperparameters: -– Activation Functions as shown in Table B.3 -– For more, see Sections 2.2 and 2.3. -Name Function ϕ(x) Range of Values ϕ -0 -(x) Used by -Sign function† - - - -+1 if x ≥ 0 -−1 if x < 0 -{ −1, 1 } 0 [KS02] -Heaviside -step function† - - - -+1 if x > 0 -0 if x < 0 -{ 0, 1 } 0 [MP43] -Logistic function 1 -1+e−x [0, 1] e -x -(e -x+1)2 [DJ99] -Tanh e -x−e−x -e -x+e−x = tanh(x) [−1, 1] sech2 -(x) [LBBH98, Tho14a] -ReLU† max(0, x) [0, +∞) - - - -1 if x > 0 -0 if x < 0 -[KSH12] -LReLU†2 -(PReLU) -ϕ(x) = max(αx, x) (−∞, +∞) - - - -1 if x > 0 -α if x < 0 -[MHN13, HZRS15b] -Softplus log(e -x + 1) (0, +∞) -e -x -e -x+1 [DBB+01, GBB11] -ELU - - - -x if x > 0 -α(e -x − 1) if x ≤ 0 -(−∞, +∞) - - - -1 if x > 0 -αex otherwise -[CUH15] -Softmax‡ o(x)j = -e -xj -PK -k=1 e -xk -[0, 1]K o(x)j · -PK -k=1 e -xk −e -xj -PK -k=1 e -xk -[KSH12, Tho14a] -Maxout‡ o(x) = maxx∈x x (−∞, +∞) - - - -1 if xi = max x -0 otherwise -[GWFM+13] -Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0 -and functions marked with ‡ operate on all elements of a layer simultaneously. The -hyperparameters α ∈ (0, 1) of Leaky ReLU and ELU are typically α = 0.01. Other -activation function like randomized leaky ReLUs exist [XWCL15], but are far less -commonly used. -Some functions are smoothed versions of others, like the logistic function for the -Heaviside step function, tanh for the sign function, softplus for ReLU. -Softmax is the standard activation function for the last layer of a classification network -as it produces a probability distribution. See Figure B.1 for a plot of some of them. -2α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function. -84 -−2.0 −1.5 −1.0 −0.5 0.5 1.0 1.5 2.0 -−1.0 -−0.5 -0.5 -1.0 -1.5 -2.0 -x -y -ϕ1(x) = 1 -1+e−x -ϕ2(x) = tanh(x) -ϕ3(x) = max(0, x) -ϕ4(x) = log(e -x + 1) -ϕ5(x) = max(x, ex − 1) -Figure B.1.: Activation functions plotted in [−2, +2]. tanh and ELU are able to produce negative -numbers. The image of ELU, ReLU and Softplus is not bound on the positive side, -whereas tanh and the logistic function are always below 1. -B.7. Regularization -Regularization techniques aim to make the fitted function smoother and reduce overfitting. -Regularization techniques are: -• `1, `2, and Orthogonality regularization: See Appendix B.4 -• Max-norm regularization (e.g. used ins [SHK+14]) -• Dropout (introduced in [SHK+14]), DropConnect (see [WZZ+13]), Stochastic Depth -(see [HSL+16]) -• Feature scale clipping (see [ZF14]) -• Data augmentation (according to [ZBH+16]) -• Global average pooling (according to [ZKL+15]) -• Dense-Sparse-Dense training (see [HPN+16]) -• Soft targets (see [HVD15]) -85 -86 -C. Calculating Network Characteristics -C.1. Parameter Numbers -• A fully connected layer with n nodes, k inputs has n · (k + 1) parameters. The +1 is -due to the bias. -• A convolutional layer i with ki filters of size n × m being applied to ki−1 feature maps -has ki -· ki−1(n · m + 1) parameters. The +1 is due to the bias. -• A fully connected layer with n nodes after k feature maps of size m1 × m2 has -n · (k · m1 · m2 + 1) parameters. -• A dense block with a depth of L, a growth rate of n and 3 × 3 filters has L + n · 3 -2 + -3 -2 -· n -2 PL -i=0(L − i) = L + 9n + 9n -2 L2−L -2 -parameters. -According to [HPTD15], AlexNet has 60 million parameters which is roughly the number -calculated in Table D.2. -C.2. FLOPs -The FLOPs of a layer depend on the implementation, the compiler and the hardware. Hence -the following number are only giving rough estimates. -In the following, nϕ denotes the number of FLOPs to compute the non-linearity ϕ. For -simplicity, nϕ = 5 was chosen. -• A fully connected layer with n nodes and k inputs has to calculate ϕ(W · x + b) with -W ∈ R -n×k -, x ∈ R -k×1 -, b ∈ R -n×1 -. It hence needs about n · (k + (k − 1) + 1) = 2nk -additions / multiplications before the non-linearity ϕ is calculated. The total number -of FLOPs is 2 · n · k + n · nϕ. -• In the following, biases are ignored. A convolutional layer with ki filters of size n × m -being applied to ki−1 filter maps of size w × h results in ki filter maps of size w × h if -padding is applied. For each element of each filter map, n·m·ki−1 multiplications and -(n · m · ki−1 − 1) additions have to be made. This results in (2nmki−1 − 1)·(ki -· w · h) -operations. The total number of FLOPs is (2 ·n·m·ki−1 −1)·(ki -·w ·h)+ki -·w ·h·nϕ. -This is, of course, a naive way of calculating a convolution. There are other ways of -calculating convolutions [LG16]. -87 -• A fully connected layer with n nodes after k feature maps of size w×h needs 2n(k·w·h) -FLOPs. The total number of FLOPs is 2n · (k · w · h) + n · nϕ. -• As Dropout is only calculated during training, the number of FLOPs was set to 0. -• The number of FLOPs for max pooling is dominated by the number of positions to -which the pooling kernel is applied. For a feature map of size w × h a max pooling -filter with stride s gets applied w·h -s -2 . The number of FLOPs per application depends -on the kernel size. A 2 × 2 kernel is assumed to need 5 FLOPs. -• The number of FLOPs for Batch Normalization is the same as the number of its -parameters. -Here are some references which give information for the FLOPs: -• AlexNet -– 1.5B in total [HPTD15]. -– 725M in total [KPY+15]. -– 3300M in total in Table D.2 -• VGG-16: -– 15484M in total [HPTD15]. -– 31000M in total in Table D.3. -• GoogleNet: 1566M in total [HPTD15]. -One can see that the numbers are by a factor of 2 up to a factor of 4 different for the same -network. -C.3. Memory Footprint -The memory footprint of CNNs determines when networks can be used at all and if they -can be trained efficiently. In order to be able to train CNNs efficiently, one weight update -step has to fit in the memory of the GPU. This includes the following: -• Activations: All activations of one mini-batch in order to calculate the gradients -in the backward pass. This is the number of floats in the feature maps of all weight -layers combined. -• Weights -• Optimization algorithm: The optimization algorithm introduces some overhead. -For example, Adam stores two parameters per weights. -At inference time, every two consecutive layers have to fit into memory. When the forward -pass of layer A to layer B is calculated, the memory can be freed if no skip connections are -used. -88 -D. Common Architectures -In the following, some of the most important CNN architectures are explained. Understand￾ing the development of these architectures helps understanding critical insights the machine -learning community got in the past years for convolutional networks for image recognition. -It starts with LeNet-5 from 1998, continues with AlexNet from 2012, VGG-16 D from -2014, the Inception modules v1 to v3 as well as ResNets in 2015. The recently developed -Inception-v4 is also covered. -The summation row gives the sum of all floats for the output size column. This allows -conclusions about the maximum mini-batch size which can be in memory for training. -89 -D.1. LeNet-5 -One of the first CNNs used was LeNet-5 [LBBH98]. LeNet-5 uses two times the common -pattern of a single convolutional layer with tanh as a non-linear activation function followed -by a pooling layer and three fully connected layers. One fully connected layer is used to -get the right output dimension, another one is necessary to allow the network to learn a -non-linear combination of the features of the feature maps. -Its exact architecture is shown in Figure D.1 and described in Table D.1. It reaches a test -error rate of 0.8 % on MNIST. -Figure D.1.: Architecture of LeNet-5 as shown in [LBBH98]. -# Type Filters @ -Patch size / stride -Parameters FLOPs Output size -Input 0 0 1 @ 32 × 32 -1 Convolution 6 @ 5 × 5 × 1 / 1 156 307 800 6 @ 28 × 28 -2 Scaled average pooling 2 × 2 / 2 2 336 6 @ 14 × 14 -3 Convolution 16 @ 5 × 5 × 6 / 1 2 416 942 400 16 @ 10 × 10 -4 Scaled average pooling 2 × 2 / 2 2 1 600 16 @ 5 × 5 -5 Fully Connected 120 neurons 48 120 240 000 120 -6 Fully Connected 84 neurons 10 164 20 580 84 -7 Fully Connected (output) 10 neurons 850 1 730 10 -P 61 710 15 144 446 9118 -Table D.1.: LeNet-5 architecture: After layers 1, 3, 5 and 6 the tanh activation function is applied. -After layer 7, the softmax function is applied. One can see that convolutional layer -need much fewer parameters, but an order of magnitude more FLOPs per parameter -than fully connected layers. -90 -D.2. AlexNet -The first CNN which achieved major improvements on the ImageNet dataset was AlexNet [KSH12]. -Its architecture is shown in Figure D.2 and described in Table D.2. It has about 60·106 param￾eters. A trained AlexNet can be downloaded at www.cs.toronto.edu/˜guerzhoy/tf_alexnet. -Note that the uncompressed size is at least 60 965 224 floats · 32 bit -float ≈ 244 MB. -Figure D.2.: Architecture of AlexNet as shown in [KSH12]: Convolutional Layers are followed -by pooling layers multiple times. At the end, a fully connected network is applied. -Conceptually, it is identical to the architecture of LeNet-5 (see Figure D.1). -# Type Filters @ -Patch size / stride -Parameters FLOPs Output size -Input 3 @ 224 × 224 -1 Convolution 96 @ 11 × 11 × 3 / 4 34 944 211 M 96 @ 55 × 55 -LCN 12 M 96 @ 55 × 55 -2 Max pooling 3 × 3 / 2 0 301 k 96 @ 27 × 27 -3 Convolution 256 @ 5 × 5 × 48 / 1 307 456 448M 256 @ 13 × 13 -LCN 3 M 256 @ 13 × 13 -4 Max pooling 3 × 3 / 2 0 50 k 256 @ 13 × 13 -5 Convolution 384 @ 3 × 3 × 256 / 1 885 120 299 M 384 @ 13 × 13 -7 Convolution 384 @ 3 × 3 × 192 / 1 663 936 224 M 384 @ 13 × 13 -9 Convolution 256 @ 3 × 3 × 192 / 1 442 624 150 M 256 @ 13 × 13 -10 Max pooling 3 × 3 / 2 0 50 k 256 @ 6 × 6 -11 FC 4096 neurons 37 752 832 75 M 4096 -12 FC 4096 neurons 16 781 312 34 M 4096 -13 FC 1000 neurons 4 097 000 8 M 1000 -P 60 965 224 3300 M 1 122 568 -Table D.2.: AlexNet architecture: One special case of AlexNet is grouping of convolutions due to -computational restrictions at the time of its development. This also reduces the number -of parameters and allows parallel computation on separate GPUs. However, to make -the architecture easier to compare, this grouping was ignored for the parameter count. -The FLOPs are taken from [HPTD15] and combined with rough estimates for Local -Contrast Normalization and max pooling. -The calculated number of parameters was checked against the downloaded version. It -also has 60 965 224 parameters. -91 -D.3. VGG-16 D -Another widespread architecture is the VGG-16 (D) [SZ14]. VGG comes from the Visual -Geometry Group in Oxford which developed this architecture. It has 16 layers which can -learn parameters. A major difference compared to AlexNet is that VGG-16 uses only 3 × 3 -filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a -detailed textual description is given in Table D.3. -A trained VGG-16 D for Tensorflow can be downloaded at https://github.com/machrisaa/ -tensorflow-vgg. Note that the uncompressed size is at least 138 357 544 floats · 32 bit -float ≈ -520 MB. The downloaded Numpy binary file npz needs 553 MB without compression and -514 MB with compression. -224 × 224 -Input -C 64@3 × 3/1 -C 64@3 × 3/1 -112 × 112 -max pooling 2 × 2/1 -C 128@3 × 3/1 -C 128@3 × 3/1 -56 × 56 -max pooling 2 × 2/1 -C 256@3 × 3/1 -C 256@3 × 3/1 -C 256@3 × 3/1 -28 × 28 -max pooling 2 × 2/1 -C 512@3 × 3/1 -C 512@3 × 3/1 -C 512@3 × 3/1 -14 × 14 -max pooling 2 × 2/1 -C 512@3 × 3/1 -C 512@3 × 3/1 -C 512@3 × 3/1 -7 × 7 -max pooling 2 × 2/1 -Fully Connected 4096 -Dropout, p = 0.5 -Fully Connected 4096 -Dropout, p = 0.5 -Fully Connected 1000 -Figure D.3.: Architecture of VGG-16 D. C 512@3 × 3/1 is a convolutional layer with 512 filters of -kernel size 3 × 3 with stride 1. All convolutional layers use SAME padding. -92 -# Type Filters @ -Patch size / stride -Parameters FLOPs Output size -Input 3 @ 224 × 224 -1 Convolution 64 @ 3 × 3 × 3 / 1 1 792 186 M 64 @ 224 × 224 -2 Convolution 64 @ 3 × 3 × 64 / 1 36 928 3712M 64 @ 224 × 224 -Max pooling 2 × 2 / 2 0 2 M 64 @ 112 × 112 -3 Convolution 128 @ 3 × 3 × 64 / 1 73 856 1856 M 128 @ 112 × 112 -4 Convolution 128 @ 3 × 3 × 128 / 1 147 584 3705 M 128 @ 112 × 112 -Max pooling 2 × 2 / 2 0 1 M 128 @ 56 × 56 -5 Convolution 256 @ 3 × 3 × 128 / 1 295 168 1853 M 256 @ 56 × 56 -6 Convolution 256 @ 3 × 3 × 256 / 1 590 080 3703 M 256 @ 56 × 56 -7 Convolution 256 @ 3 × 3 × 256 / 1 590 080 3703 M 256 @ 56 × 56 -Max pooling 2 × 2 / 2 0 <1 M 256 @ 28 × 28 -8 Convolution 512 @ 3 × 3 × 256 / 1 1 180 160 1851 M 512 @ 28 × 28 -9 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 3701 M 512 @ 28 × 28 -10 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 3701 M 512 @ 28 × 28 -Max pooling 2 × 2 / 2 0 <1 M 512 @ 14 × 14 -11 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14 -12 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14 -13 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14 -Max pooling 2 × 2 / 2 0 <1 M 512 @ 7 × 7 -14 FC 4096 neurons 102 764 544 206 M 4096 -Dropout 0 0 4096 -15 FC 4096 neurons 16 781 312 34 M 4096 -Dropout 0 0 4096 -16 FC 1000 neurons 4 097 000 8 M 1000 -P 138 357 544 31 000 M 15 245 800 -Table D.3.: VGG-16 D architecture: The authors chose to give only layers a number which have -learnable parameters. All convolutions are zero padded to prevent size changes and -use ReLU activation functions. The channels mean is subtracted from each pixel as -a preprocessing step (−103.939, −116.779, −123.68). As Dropout is only calculated -during training time, the number of FLOPs is 0. The dropout probability is 0.5. -The calculated number of parameters was checked against the downloaded version. It -also has 138 357 544 parameters. -93 -D.4. GoogleNet, Inception v2 and v3 -The large number of parameters and operations is a problem when such models should get -applied in practice to thousands of images. In order to reduce the computational cost while -maintaining the classification quality, GoogleNet [SLJ+15] and the Inception module were -developed. The Inception module essentially only computes 1 × 1 filters, 3 × 3 filters and -5 × 5 filters in parallel, but applied bottleneck 1 × 1 filters before to reduce the number of -parameters. It is shown in Figure D.4. -Figure D.4.: Inception module -Image source: [SLJ+15] -Compared to GoogleNet, Inception v2 [SVI+15] removed the 5 × 5 filters and replaced -them by two successive layers of 3 × 3 filters. A visualization of an Inception v2 module -is given in Figure D.5. Additionally, Inception v2 applies successive asymmetric filters to -approximate symmetric filters with fewer parameters. The authors call this approach filter -factorization. -Inception v3 introduced Batch Normalization to the network [SVI+15]. -Figure D.5.: Inception v2 module -Image source: [SVI+15] -94 -D.5. Inception-v4 -Inception-v4 as described in [SIV16] consists of four main building blocks: The stem, -Inception A, Inception B and Inception C. To quote the authors: Inception-v4 is a deeper, -wider and more uniform simplified architecture than Inception-v3. The stem, Reduction A -and Reduction B use max-pooling, whereas Inception A, Inception B and Inception C use -average pooling. The stem, module B and module C use separable convolutions. -# × Type Parameters Output size -Input 3 @ 299 × 299 -1 Stem 605 728 384 @ 35 × 35 -2 4× Inception A 317 632 384 @ 35 × 35 -3 Reduction A 2 306 112 1024 @ 17 × 17 -4 7× Inception B 2 936 256 1024 @ 17 × 17 -5 Reduction B 2 747 392 1536 @ 8 × 8 -6 3× Inception C 4 553 088 1536 @ 8 × 8 -Global Average Pooling 0 1536 @ 1 × 1 -Dropout (p=0.8) 0 1536 @ 1 × 1 -7 Softmax 1 537 000 1000 -P 42 679 816 -Table D.4.: Inception-v4 network. -95 -96 -E. Datasets -Well-known benchmark datasets for classification problems in computer vision are listed -in Table E.1. The best results known to me are given in Table E.2. However, every semantic -segmentation dataset (e.g., PASCAL VOC) can also be used to benchmark image classifiers -using Algorithm 2. -Database -Image Resolution -(width × height) -Number -of -Images -Number -of -Classes -Channels Data source -MNIST 28 px × 28 px 70 000 10 1 [YL98, LBBH98] -HASYv2 32 px × 32 px 168 233 369 1 [Tho17a] -SVHN 32 px × 32 px 630 420 10 3 -[NWC+11b], -[NWC+11a] -CIFAR-10 32 px × 32 px 60 000 10 3 [Kri, KH09] -CIFAR-100 32 px × 32 px 60 000 100 3 [Kri, KH09] -STL-10 96 px × 96 px 13 000 10 3 [CLN11, CLN10] -Caltech-101 (80 px − 3481 px) -×(92 px − 3999 px) 9144 102 3 [FFP03, FFFP06] -Caltech-256 (75 px − 7913 px) -×(75 px − 7913 px) 30 607 257 3 [Gri06, GG07] -ILSVRC 20121 -(8 px − 9331 px) -×(10 px − 6530 px) 1.2 · 106 1000 3 [Ima12, RDS+14] -Places3652 -(290px − 3158px) -×(225px − 2630px) -1.8 · 106 365 3 [Zho16, ZKL+16] -GTSRB (25 px − 266 px) -×(25 px − 232 px) 51 839 43 3 [SSSI, SSSI12] -Asirra3 -(4 px − 500 px) -×(4 px − 500 px) 25 000 2 3 [Asi17, EDHS07] -Graz-02 480 px × 640 px -and 640 px × 480 px 1096 3 3 [Mar08, MS07] -Table E.1.: An overview over publicly available image databases for classification. The number -of images row gives the sum of the training and the test images. Some datasets, like -SVHN, have additional unlabeled data which is not given in this table. -1 -ImageNet Large Scale Visual Recognition Competition -2The dimensions are only calculated for the validation set. -3Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs” competition on Kaggle +47 +47 +109 97 -Dataset Model type / name Result Score Achieved / -Claimed by -MNIST — 0.21 % error [WZZ+13] -HASYv2 TF-CNN 81.00 % accuracy [Tho17a] -SVHN DenseNet (k = 24) 1.59 % error [HLW16] -CIFAR-10 DenseNet-BC (k = 40) 3.46 % error [HLW16] -CIFAR-100 WRN-28-10 16.21 % error [LH16] -STL-10 SWWAE-4layer 74.80 % accuracy [ZMGL15] -Caltech-101 SPP-net (pretrained) 93.42 %±0.5 % accuracy [HZRS14] -Caltech-256 ZF-Net (pretrained) 74.2 %±0.3 % accuracy [ZF14] -ImageNet 2012 ResNet ensemble 3.57 % Top-5 error [HZRS15a] -GTSRB MCDNN 99.46 % accuracy [SL11] -Asirra SVM 82.7 % accuracy [Gol08] -Graz-02 Optimal NBNN 78.98 % accuracy [BMDP10] -Table E.2.: An overview over state of the art results achieved in computer vision datasets. -Algorithm 2 Create a classification dataset from a semantic segmentation dataset -Require: Semantic segmentation dataset (DS) -procedure CreateDataset(Annotated dataset DS) -DC ← List -w ← desired image width -h ← desired image height -for Image and associated label (x, y) in DS do -i ← randint(0, L.width − w) -j ← randint(0, L.height − h) -cL ← crop(y,(i, j),(i + w, j + h)) -if at least 50% of s are of one class then -cI ← crop(x,(i, j),(i + w, j + h)) -D.append((cI , cL)) -return (DC) -98 -F. List of Tables -2.1 Pooling types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 -5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39 -5.2 Baseline model evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . . 40 -5.3 Baseline model speed comparison . . . . . . . . . . . . . . . . . . . . . . . . 40 -5.4 Clustering errors for spectral clustering and CMO on CIFAR-100 . . . . . . 52 -5.5 Differences in spectral clustering and CMO. . . . . . . . . . . . . . . . . . . 52 -5.6 Accuracies for hierarchy of classifiers on CIFAR-100 . . . . . . . . . . . . . . 53 -5.7 Parameters of models with increased capacity . . . . . . . . . . . . . . . . . 54 -5.8 Training time for models with increased capacity . . . . . . . . . . . . . . . 54 -5.9 Baseline model training time . . . . . . . . . . . . . . . . . . . . . . . . . . 59 -5.10 Activation function properties . . . . . . . . . . . . . . . . . . . . . . . . . . 62 -5.11 Activation function evaluation results on CIFAR-100 . . . . . . . . . . . . . 63 -5.12 Activation function timing results on CIFAR-100 . . . . . . . . . . . . . . . 63 -5.13 Activation function evaluation results on MNIST . . . . . . . . . . . . . . . 64 -5.14 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66 -5.15 Optimized model evaluation results . . . . . . . . . . . . . . . . . . . . . . . 67 -5.16 Optimized model speed comparison . . . . . . . . . . . . . . . . . . . . . . . 67 -5.17 Optimized model mean training epochs . . . . . . . . . . . . . . . . . . . . . 68 -5.18 Optimized model trained with early stopping vs training with more data . . 69 -5.19 Model regularization with early stopping on training loss . . . . . . . . . . . 69 -5.20 Model regularization with early stopping on training loss - Training time . . 69 -A.1 99-percentile intervals for filter weights on CIFAR-100 . . . . . . . . . . . . 75 -A.2 Activation function evaluation results on HASYv2 . . . . . . . . . . . . . . . 77 -A.3 Activation function evaluation results on STL-10 . . . . . . . . . . . . . . . 78 -B.1 Data augmentation techniques . . . . . . . . . . . . . . . . . . . . . . . . . . 80 -B.2 Weight initialization schemes . . . . . . . . . . . . . . . . . . . . . . . . . . 81 -B.3 Activation functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 84 -D.1 LeNet-5 architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90 -D.2 AlexNet architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 -D.3 VGG-16 D architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 93 -D.4 Inception-v4 network . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 95 -99 -E.1 Image Benchmark datasets . . . . . . . . . . . . . . . . . . . . . . . . . . . . 97 -E.2 State of the Art results . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 98 -100 -G. List of Figures -2.1 Application of a single image filter (Convolution) . . . . . . . . . . . . . . . 3 -2.2 Application of a convolutional layer . . . . . . . . . . . . . . . . . . . . . . . 6 -2.3 Max pooling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 -2.4 ResNet module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 -2.5 Aggregation block . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 -2.6 Dense block . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13 -2.7 Validation curve . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17 -2.8 Validation curve with plateaus . . . . . . . . . . . . . . . . . . . . . . . . . 18 -2.9 Learning curve . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20 -2.10 Occlusion analysis . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25 -2.11 Filter visualization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 26 -3.1 Cascade-correlation network . . . . . . . . . . . . . . . . . . . . . . . . . . . 28 -4.1 Class Tree . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 33 -5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39 -5.2 Baseline model filter weight distribution . . . . . . . . . . . . . . . . . . . . 42 -5.3 Baseline model bias weight distribution . . . . . . . . . . . . . . . . . . . . . 42 -5.4 Baseline model γ distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43 -5.5 Baseline model β distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43 -5.6 Baseline model filter weight range distribution . . . . . . . . . . . . . . . . . 44 -5.7 Baseline model CIFAR-100 validation accuracy . . . . . . . . . . . . . . . . 45 -5.8 Baseline Weight updates (mean) . . . . . . . . . . . . . . . . . . . . . . . . 46 -5.9 Baseline Weight updates (maximum) . . . . . . . . . . . . . . . . . . . . . . 47 -5.10 Baseline Weight updates (sum) . . . . . . . . . . . . . . . . . . . . . . . . . 47 -5.11 Confusion matrices for CIFAR-10 . . . . . . . . . . . . . . . . . . . . . . . . 48 -5.12 Confusion matrices for GTSRB . . . . . . . . . . . . . . . . . . . . . . . . . 49 -5.13 Confusion matrices for HASYv2 . . . . . . . . . . . . . . . . . . . . . . . . . 50 -5.14 Confusion matrix of CIFAR-100 . . . . . . . . . . . . . . . . . . . . . . . . . 51 -5.15 Mean weight updates of model with bottleneck . . . . . . . . . . . . . . . . 55 -5.16 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 67 -A.1 Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 75 -A.2 Bias weight distribution without BN . . . . . . . . . . . . . . . . . . . . . . 76 -101 -A.3 Maximum weight updates of baseline with bottleneck . . . . . . . . . . . . . 77 -A.4 Sum of weight updates of baseline with bottleneck . . . . . . . . . . . . . . 78 -B.1 Activation functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 85 -D.1 LeNet-5 architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90 -D.2 AlexNet architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 -D.3 VGG-16 D architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92 -D.4 Inception module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94 -D.5 Inception v2 module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94 -102 -H. Bibliography -[AAB+16] M. Abadi, A. Agarwal et al., “Tensorflow: Large-scale machine learning on -heterogeneous distributed systems,” arXiv preprint arXiv:1603.04467, Mar. -2016. [Online]. Available: https://arxiv.org/abs/1603.04467 -[ABKS99] M. Ankerst, M. M. Breunig et al., “OPTICS: Ordering points to identify the -clustering structure,” in ACM Sigmod record, vol. 28, no. 2. ACM, 1999, pp. -49–60. -[ADG+16] M. Andrychowicz, M. Denil et al., “Learning to learn by gradient descent by -gradient descent,” in Advances in Neural Information Processing Systems 29 -(NIPS), D. D. Lee, M. Sugiyama et al., Eds. Curran Associates, Inc., Mar. -2016, pp. 3981–3989. [Online]. Available: http://papers.nips.cc/paper/6461- -learning-to-learn-by-gradient-descent-by-gradient-descent.pdf -[AM15] M. T. Alexander Mordvintsev, Christopher Olah, “Inceptionism: -Going deeper into neural networks,” Jun. 2015. [Online]. Avail￾able: https://research.googleblog.com/2015/06/inceptionism-going-deeper￾into-neural.html -[Asi17] “Kaggle cats and dogs dataset,” Oct. 2017. [Online]. Available: https: -//www.microsoft.com/en-us/download/details.aspx?id=54765 -[BB12] J. Bergstra and Y. Bengio, “Random search for hyper-parameter optimization,” -Journal of Machine Learning Research, vol. 13, no. Feb, pp. 281–305, -Feb. 2012. [Online]. Available: http://jmlr.csail.mit.edu/papers/volume13/ -bergstra12a/bergstra12a.pdf -[BCW+17] J. Bao, D. Chen et al., “CVAE-GAN: Fine-grained image generation through -asymmetric training,” arXiv preprint arXiv:1703.10155, Mar. 2017. [Online]. -Available: https://arxiv.org/abs/1703.10155 -[BDLB09] J. Bergstra, G. Desjardins et al., “Quadratic polynomials learn better im￾age features,” Département d’Informatique et de Recherche Opérationnelle, -Université de Montréal, Tech. Rep. 1337, 2009. -[BGNR16] B. Baker, O. Gupta et al., “Designing neural network architectures using -reinforcement learning,” arXiv preprint arXiv:1611.02167, Nov. 2016. [Online]. -Available: https://arxiv.org/abs/1611.02167 -103 -[BM93] U. Bodenhausen and S. Manke, Automatically Structured Neural -Networks For Handwritten Character And Word Recognition. London: -Springer London, Sep. 1993, pp. 956–961. [Online]. Available: http: -//dx.doi.org/10.1007/978-1-4471-2063-6_283 -[BMDP10] R. Behmo, P. Marcombes et al., “Towards optimal naive Bayes nearest -neighbor,” in European Conference on Computer Vision (ECCV). Springer, -2010, pp. 171–184. -[BPL10] Y.-L. Boureau, J. Ponce, and Y. LeCun, “A theoretical analysis of -feature pooling in visual recognition,” in International Conference on -Machine Learning (ICML), no. 27, 2010, pp. 111–118. [Online]. Available: -http://yann.lecun.com/exdb/publis/pdf/boureau-icml-10.pdf -[BSF94] Y. Bengio, P. Simard, and P. Frasconi, “Learning long-term dependencies -with gradient descent is difficult,” IEEE transactions on neural networks, -vol. 5, no. 2, pp. 157–166, 1994. -[Cha92] C. Charalambous, “Conjugate gradient algorithm for efficient training -of artificial neural networks,” IEEE Proceedings G-Circuits, Devices -and Systems, vol. 139, no. 3, pp. 301–310, 1992. [Online]. Available: -http://ieeexplore.ieee.org/document/143326/ -[Cho15] F. Chollet, “Keras,” https://github.com/fchollet/keras, 2015. -[CLN10] A. Coates, H. Lee, and A. Y. Ng, “An analysis of single-layer networks -in unsupervised feature learning,” Ann Arbor, vol. 1001, no. 48109, -p. 2, 2010. [Online]. Available: http://cs.stanford.edu/~acoates/papers/ -coatesleeng_aistats_2011.pdf -[CLN11] A. Coates, H. Lee, and A. Y. Ng, “STL-10 dataset,” 2011. [Online]. Available: -http://cs.stanford.edu/~acoates/stl10 -[CMS12] D. Ciregan, U. Meier, and J. Schmidhuber, “Multi-column deep neural -networks for image classification,” in Conference on Computer Vision and -Pattern Recognition (CVPR). IEEE, Feb. 2012, pp. 3642–3649. [Online]. -Available: https://arxiv.org/abs/1202.2745v1 -[CUH15] D.-A. Clevert, T. Unterthiner, and S. Hochreiter, “Fast and accurate -deep network learning by exponential linear units (ELUs),” arXiv -preprint arXiv:1511.07289, Nov. 2015. [Online]. Available: https: -//arxiv.org/abs/1511.07289 -[CWV+14] S. Chetlur, C. Woolley et al., “cuDNN: Efficient primitives for deep -learning,” arXiv preprint arXiv:1410.0759, Oct. 2014. [Online]. Available: -https://arxiv.org/abs/1410.0759 -104 -[DBB+01] C. Dugas, Y. Bengio et al., “Incorporating second-order functional -knowledge for better option pricing,” in Advances in Neural Infor￾mation Processing Systems 13 (NIPS), T. K. Leen, T. G. Dietterich, -and V. Tresp, Eds. MIT Press, 2001, pp. 472–478. [Online]. -Available: http://papers.nips.cc/paper/1920-incorporating-second-order￾functional-knowledge-for-better-option-pricing.pdf -[DDFK16] S. Dieleman, J. De Fauw, and K. Kavukcuoglu, “Exploiting cyclic symmetry -in convolutional neural networks,” arXiv preprint arXiv:1602.02660, Feb. -2016. [Online]. Available: https://arxiv.org/abs/1602.02660 -[DHS11] J. Duchi, E. Hazan, and Y. Singer, “Adaptive subgradient methods for -online learning and stochastic optimization,” Journal of Machine Learning -Research, vol. 12, no. Jul, pp. 2121–2159, 2011. [Online]. Available: -http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf -[DHS16] J. Dai, K. He, and J. Sun, “Instance-aware semantic segmentation via -multi-task network cascades,” in Conference on Computer Vision and Pattern -Recognition (CVPR). IEEE, 2016, pp. 3150–3158. [Online]. Available: -https://arxiv.org/abs/1512.04412 -[DJ99] W. Duch and N. Jankowski, “Survey of neural transfer functions,” Neural -Computing Surveys, vol. 2, no. 1, pp. 163–212, 1999. [Online]. Available: -ftp://ftp.icsi.berkeley.edu/pub/ai/jagota/vol2_6.pdf -[Doz15] T. Dozat, “Incorporating Nesterov momentum into Adam,” Stanford -University, Tech. Rep., 2015. [Online]. Available: http://cs229.stanford.edu/ -proj2015/054_report.pdf -[DSRB14] A. Dosovitskiy, J. T. Springenberg et al., “Discriminative unsupervised -feature learning with convolutional neural networks,” in Advances in Neural -Information Processing Systems 27 (NIPS), Z. Ghahramani, M. Welling -et al., Eds. Curran Associates, Inc., 2014, pp. 766–774. [Online]. -Available: http://papers.nips.cc/paper/5548-discriminative-unsupervised￾feature-learning-with-convolutional-neural-networks.pdf -[DWD15] S. Dieleman, K. W. Willett, and J. Dambre, “Rotation-invariant convolutional -neural networks for galaxy morphology prediction,” Monthly notices of the -royal astronomical society, vol. 450, no. 2, pp. 1441–1459, 2015. -[EDHS07] J. Elson, J. J. Douceur et al., “Asirra: A CAPTCHA that -exploits interest-aligned manual image categorization,” in ACM Con￾ference on Computer and Communications Security (CCS), no. 14. -Association for Computing Machinery, Inc., Oct. 2007. [Online]. +114 +116 105 -Available: https://www.microsoft.com/en-us/research/publication/asirra-a￾captcha-that-exploits-interest-aligned-manual-image-categorization/ -[EKS+96] M. Ester, H.-P. Kriegel et al., “A density-based algorithm for discovering -clusters in large spatial databases with noise.” in Kdd, vol. 96, no. 34, 1996, -pp. 226–231. -[ES03] A. E. Eiben and J. E. Smith, Introduction to evolutionary computing. -Springer, 2003, vol. 53. [Online]. Available: https://dx.doi.org/10.1007/978-3- -662-44874-8 -[Fah88] S. E. Fahlman, “An empirical study of learning speed in back-propagation -networks,” 1988. [Online]. Available: http://repository.cmu.edu/cgi/ -viewcontent.cgi?article=2799&context=compsci -[FFFP06] L. Fei-Fei, R. Fergus, and P. Perona, “One-shot learning of object -categories,” IEEE transactions on pattern analysis and machine intelligence, -vol. 28, no. 4, pp. 594–611, Apr. 2006. [Online]. Available: http: -//vision.stanford.edu/documents/Fei-FeiFergusPerona2006.pdf -[FFP03] R. F. Fei-Fei and P. Perona, “Caltech 101,” 2003. [Online]. Available: http: -//www.vision.caltech.edu/Image_Datasets/Caltech101/Caltech101.html -[FGMR10] P. F. Felzenszwalb, R. B. Girshick et al., “Object detection with discrimina￾tively trained part-based models,” IEEE transactions on pattern analysis and -machine intelligence, vol. 32, no. 9, pp. 1627–1645, 2010. -[FL89] S. E. Fahlman and C. Lebiere, “The cascade-correlation learning architecture,” -1989. [Online]. Available: http://repository.cmu.edu/compsci/1938/ -[GB10] X. Glorot and Y. Bengio, “Understanding the difficulty of training deep -feedforward neural networks.” in Aistats, vol. 9, 2010, pp. 249–256. [Online]. -Available: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf -[GBB11] X. Glorot, A. Bordes, and Y. Bengio, “Deep sparse rectifier neural -networks.” in Aistats, vol. 15, no. 106, 2011, p. 275. [Online]. Available: -http://www.jmlr.org/proceedings/papers/v15/glorot11a/glorot11a.pdf -[GDDM14] R. Girshick, J. Donahue et al., “Rich feature hierarchies for accurate object -detection and semantic segmentation,” in Conference on Computer Vision -and Pattern Recognition (CVPR). IEEE, 2014, pp. 580–587. [Online]. -Available: https://arxiv.org/abs/1311.2524 -[GG07] P. P. Greg Griffin, Alex Holub, “Caltech-256 object category dataset,” Apr. -2007. [Online]. Available: http://authors.library.caltech.edu/7694/ -106 -[GG16] Y. Gal and Z. Ghahramani, “Bayesian convolutional neural networks with -Bernoulli approximate variational inference,” arXiv preprint arXiv:1506.02158, -Jan. 2016. [Online]. Available: https://arxiv.org/abs/1506.02158v6 -[GJ02] M. R. Garey and D. S. Johnson, Computers and intractability. wh freeman -New York, 2002, vol. 29. -[GJS76] M. R. Garey, D. S. Johnson, and L. Stockmeyer, “Some simplified NP-complete -graph problems,” Theoretical computer science, vol. 1, no. 3, pp. 237–267, -1976. -[Gol08] P. Golle, “Machine learning attacks against the Asirra CAPTCHA,” in ACM -conference on Computer and communications security (CCS), no. 15. ACM, -2008, pp. 535–542. -[Gra15] B. Graham, “Fractional max-pooling,” arXiv preprint arXiv:1412.6071, May -2015. [Online]. Available: https://arxiv.org/abs/1412.6071 -[Gri06] A. P. Griffin, G. Holub, “Caltech 256,” 2006. [Online]. Available: -http://www.vision.caltech.edu/Image_Datasets/Caltech256/ -[GWFM+13] I. J. Goodfellow, D. Warde-Farley et al., “Maxout networks.” ICML, -vol. 28, no. 3, pp. 1319–1327, 2013. [Online]. Available: http: -//www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf -[HAE16] M. Huh, P. Agrawal, and A. A. Efros, “What makes ImageNet good for -transfer learning?” arXiv preprint arXiv:1608.08614, Aug. 2016. [Online]. -Available: https://arxiv.org/abs/1608.08614 -[Han89] S. J. Hanson, “Meiosis networks.” in NIPS, 1989, pp. 533–541. [Online]. -Available: http://papers.nips.cc/paper/227-meiosis-networks.pdf -[Har15] M. Harris, “New features in CUDA 7.5,” Jul. 2015. [Online]. Available: -https://devblogs.nvidia.com/parallelforall/new-features-cuda-7-5/ -[HLW16] G. Huang, Z. Liu, and K. Q. Weinberger, “Densely connected convolutional -networks,” arXiv preprint arXiv:1608.06993, Aug. 2016. [Online]. Available: -https://arxiv.org/abs/1608.06993v1 -[HM16] M. Hardt and T. Ma, “Identity matters in deep learning,” arXiv -preprint arXiv:1611.04231, Nov. 2016. [Online]. Available: https: -//arxiv.org/abs/1611.04231 -[How13] A. G. Howard, “Some improvements on deep convolutional neural network -based image classification,” arXiv preprint arXiv:1312.5402, Dec. 2013. -[Online]. Available: https://arxiv.org/abs/1312.5402 -107 -[HPK11] J. Han, J. Pei, and M. Kamber, Data mining: concepts and techniques. -Elsevier, 2011. -[HPN+16] S. Han, J. Pool et al., “DSD: Regularizing deep neural networks with -dense-sparse-dense training flow,” arXiv preprint arXiv:1607.04381, Jul. 2016. -[Online]. Available: https://arxiv.org/abs/1607.04381 -[HPTD15] S. Han, J. Pool et al., “Learning both weights and connections for efficient -neural network,” in Advances in Neural Information Processing Systems 28 -(NIPS), C. Cortes, N. D. Lawrence et al., Eds. Curran Associates, Inc., Jun. -2015, pp. 1135–1143. [Online]. Available: http://papers.nips.cc/paper/5784- -learning-both-weights-and-connections-for-efficient-neural-network.pdf -[HSK+12] G. E. Hinton, N. Srivastava et al., “Improving neural networks by preventing -co-adaptation of feature detectors,” arXiv preprint arXiv:1207.0580, Jul. -2012. [Online]. Available: https://arxiv.org/abs/1207.0580 -[HSL+16] G. Huang, Y. Sun et al., “Deep networks with stochastic depth,” -arXiv preprint arXiv:1603.09382, Mar. 2016. [Online]. Available: https: -//arxiv.org/abs/1603.09382 -[HSW93] B. Hassibi, D. G. Stork, and G. J. Wolff, “Optimal brain surgeon -and general network pruning,” in International Conference on Neural -Networks. IEEE, 1993, pp. 293–299. [Online]. Available: http: -//ee.caltech.edu/Babak/pubs/conferences/00298572.pdf -[HVD15] G. Hinton, O. Vinyals, and J. Dean, “Distilling the knowledge in a neural -network,” arXiv preprint arXiv:1503.02531, Mar. 2015. [Online]. Available: -https://arxiv.org/abs/1503.02531 -[HZRS14] K. He, X. Zhang et al., “Spatial pyramid pooling in deep convolutional -networks for visual recognition,” in European Conference on Computer -Vision (ECCV). Springer, 2014, pp. 346–361. [Online]. Available: -https://arxiv.org/abs/1406.4729 -[HZRS15a] K. He, X. Zhang et al., “Deep residual learning for image recognition,” -arXiv preprint arXiv:1512.03385, Dec. 2015. [Online]. Available: https: -//arxiv.org/abs/1512.03385v1 -[HZRS15b] K. He, X. Zhang et al., “Delving deep into rectifiers: Surpassing human-level -performance on imagenet classification,” in International Conference on -Computer Vision (ICCV), Feb. 2015, pp. 1026–1034. [Online]. Available: -https://arxiv.org/abs/1502.01852 -[Ima12] “Imagenet large scale visual recognition challenge 2012 (ILSVRC2012),” -108 -2012. [Online]. Available: http://www.image-net.org/challenges/LSVRC/ -2012/nonpub-downloads -[IS15] S. Ioffe and C. Szegedy, “Batch normalization: Accelerating deep network -training by reducing internal covariate shift,” arXiv preprint arXiv:1502.03167, -Feb. 2015. [Online]. Available: https://arxiv.org/abs/1502.03167 -[JXF+16] X. Jin, C. Xu et al., “Deep learning with s-shaped rectified linear activation -units,” in Thirtieth AAAI Conference on Artificial Intelligence, Dec. 2016. -[Online]. Available: https://arxiv.org/abs/1512.07030 -[Kar11] A. Karpathy, “Lessons learned from manually classifying CIFAR-10,” Apr. -2011. [Online]. Available: http://karpathy.github.io/2011/04/27/manually￾classifying-cifar10/ -[KB14] D. Kingma and J. Ba, “Adam: A method for stochastic optimization,” -arXiv preprint arXiv:1412.6980, Dec. 2014. [Online]. Available: https: -//arxiv.org/abs/1412.6980 -[KH09] A. Krizhevsky and G. Hinton, “Learning multiple layers of features from tiny -images,” Apr. 2009. [Online]. Available: https://www.cs.toronto.edu/~kriz/ -learning-features-2009-TR.pdf -[KMN+16] N. S. Keskar, D. Mudigere et al., “On large-batch training for deep learning: -Generalization gap and sharp minima,” arXiv preprint arXiv:1609.04836, -Sep. 2016. [Online]. Available: https://arxiv.org/abs/1609.04836 -[Koc15] T. Kocmánek, “HyperNEAT and novelty search for image recognition,” Ph.D. -dissertation, Master’s thesis, Czech Technical University in Prague, 2015. -[Online]. Available: http://kocmi.tk/photos/DiplomaThesis.pdf -[KPY+15] Y.-D. Kim, E. Park et al., “Compression of deep convolutional neural networks -for fast and low power mobile applications,” arXiv preprint arXiv:1511.06530, -Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.06530 -[KR09] L. Kaufman and P. J. Rousseeuw, Finding groups in data: an introduction to -cluster analysis. John Wiley & Sons, 2009, vol. 344. -[Kri] A. Krizhevsky, “The CIFAR-10 dataset.” [Online]. Available: https: -//www.cs.toronto.edu/~kriz/cifar.html -[KS02] V. Kurkova and M. Sanguineti, “Comparison of worst case errors in linear -and neural network approximation,” IEEE Transactions on Information -Theory, vol. 48, no. 1, pp. 264–275, Jan. 2002. [Online]. Available: -http://ieeexplore.ieee.org/abstract/document/971754/ -109 -[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, “Imagenet classification -with deep convolutional neural networks,” in Advances in Neural -Information Processing Systems 25 (NIPS), F. Pereira, C. J. C. Burges -et al., Eds. Curran Associates, Inc., 2012, pp. 1097–1105. [Online]. -Available: http://papers.nips.cc/paper/4824-imagenet-classification-with￾deep-convolutional-neural-networks.pdf -[KSlB+10] K. Kavukcuoglu, P. Sermanet et al., “Learning convolutional feature -hierarchies for visual recognition,” in Advances in Neural Information -Processing Systems 23 (NIPS), J. D. Lafferty, C. K. I. Williams -et al., Eds. Curran Associates, Inc., 2010, pp. 1090–1098. [Online]. -Available: http://papers.nips.cc/paper/4133-learning-convolutional-feature￾hierarchies-for-visual-recognition.pdf -[LAE+16] W. Liu, D. Anguelov et al., “SSD: Single shot multibox detector,” in -European Conference on Computer Vision (ECCV). Springer, 2016, pp. -21–37. [Online]. Available: https://arxiv.org/abs/1512.02325 -[Las17] “Noise layers,” Jan. 2017. [Online]. Available: http://lasagne.readthedocs.io/ -en/latest/modules/layers/noise.html#lasagne.layers.DropoutLayer -[LBBH98] Y. LeCun, L. Bottou et al., “Gradient-based learning applied to document -recognition,” Proceedings of the IEEE, vol. 86, no. 11, pp. 2278–2324, Nov. -1998. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/lecun￾01a.pdf -[LBH15] Y. LeCun, Y. Bengio, and G. Hinton, “Deep learning,” Nature, -vol. 521, no. 7553, pp. 436–444, May 2015. [Online]. Available: -http://www.nature.com/nature/journal/v521/n7553/abs/nature14539.html -[LBOM98] Y. A. LeCun, L. Bottou et al., Efficient BackProp, ser. Lecture Notes in -Computer Science. Berlin, Heidelberg: Springer Berlin Heidelberg, 1998, vol. -1524, pp. 9–50. [Online]. Available: http://dx.doi.org/10.1007/3-540-49430-8 -[LDS+89] Y. LeCun, J. S. Denker et al., “Optimal brain damage.” in NIPs, vol. 2, 1989, -pp. 598–605. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/ -lecun-90b.pdf -[Le13] Q. V. Le, “Building high-level features using large scale unsupervised -learning,” in International conference on acoustics, speech and signal -processing. IEEE, 2013, pp. 8595–8598. [Online]. Available: http: -//ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6639343 -[LG16] A. Lavin and S. Gray, “Fast algorithms for convolutional neural networks,” in 110 -Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. -2016, pp. 4013–4021. [Online]. Available: https://arxiv.org/abs/1509.09308 -[LGT16] C.-Y. Lee, P. W. Gallagher, and Z. Tu, “Generalizing pooling functions in -convolutional neural networks: Mixed, gated, and tree,” in International -Conference on Artificial Intelligence and Statistics, 2016. [Online]. Available: -https://arxiv.org/abs/1509.08985v2 -[LH16] I. Loshchilov and F. Hutter, “SGDR: stochastic gradient descent -with warm restarts,” Learning, Aug. 2016. [Online]. Available: https: -//arxiv.org/abs/1608.03983 -[LJD+16] L. Li, K. Jamieson et al., “Hyperband: A novel bandit-based approach to -hyperparameter optimization,” arXiv preprint arXiv:1603.06560, Mar. 2016. -[Online]. Available: https://arxiv.org/abs/1603.06560 -[LM16] K. Li and J. Malik, “Learning to optimize,” arXiv preprint arXiv:1606.01885, -Jun. 2016. [Online]. Available: https://arxiv.org/abs/1606.01885 -[LSD15] J. Long, E. Shelhamer, and T. Darrell, “Fully convolutional networks for -semantic segmentation,” in Conference on Computer Vision and Pattern -Recognition (CVPR). IEEE, Mar. 2015, pp. 3431–3440. [Online]. Available: -https://arxiv.org/abs/1411.4038v2 -[LX17] A. Y. Lingxi Xie, “Genetic CNN,” arXiv preprint arXiv:1703.01513, Mar. -2017. [Online]. Available: https://arxiv.org/abs/1703.01513 -[Maj17] S. Majumdar, “Densenet,” GitHub, Feb. 2017. [Online]. Available: -https://github.com/titu1994/DenseNet -[Mar08] M. Marszałek, “INRIA annotations for Graz-02 (IG02),” Oct. 2008. [Online]. -Available: http://lear.inrialpes.fr/people/marszalek/data/ig02/ -[MDA15] D. Maclaurin, D. Duvenaud, and R. Adams, “Gradient-based hyperparameter -optimization through reversible learning,” in International Conference on -Machine Learning (ICML), 2015, pp. 2113–2122. -[MH08] L. v. d. Maaten and G. Hinton, “Visualizing data using t-SNE,” Journal of -Machine Learning Research, vol. 9, no. Nov, pp. 2579–2605, 2008. -[MHN13] A. L. Maas, A. Y. Hannun, and A. Y. Ng, “Rectifier nonlinearities -improve neural network acoustic models,” in Proc. ICML, vol. 30, -no. 1, 2013. [Online]. Available: https://web.stanford.edu/~awni/papers/ -relu_hybrid_icml2013_final.pdf -[MM15] D. Mishkin and J. Matas, “All you need is a good init,” arXiv +45 +116 +104 111 -preprint arXiv:1511.06422, Nov. 2015. [Online]. Available: https: -//arxiv.org/abs/1511.06422 -[MP43] W. S. McCulloch and W. Pitts, “A logical calculus of the ideas immanent in -nervous activity,” The bulletin of mathematical biophysics, vol. 5, no. 4, pp. -115–133, 1943. -[MRM15] N. McLaughlin, J. M. D. Rincon, and P. Miller, “Data-augmentation for -reducing dataset bias in person re-identification,” in International Conference -on Advanced Video and Signal Based Surveillance (AVSS), no. 12, Aug. 2015, -pp. 1–6. [Online]. Available: http://ieeexplore.ieee.org/abstract/document/ -7301739/ -[MS07] M. Marszalek and C. Schmid, “Accurate object localization with -shape masks,” in Conference on Computer Vision and Pattern -Recognition (CVPR). IEEE, 2007, pp. 1–8. [Online]. Available: http: -//ieeexplore.ieee.org/document/4270110/ -[MSM16] D. Mishkin, N. Sergievskiy, and J. Matas, “Systematic evaluation of CNN -advances on the ImageNet,” arXiv preprint arXiv:1606.02228, Jun. 2016. -[Online]. Available: https://arxiv.org/abs/1606.02228 -[MV16] A. Mahendran and A. Vedaldi, “Visualizing deep convolutional neural -networks using natural pre-images,” International Journal of Computer Vision, -pp. 1–23, Apr. 2016. [Online]. Available: https://arxiv.org/abs/1512.02017 -[NDRT13] N. Natarajan, I. S. Dhillon et al., “Learning with noisy labels,” in Advances -in Neural Information Processing Systems 26 (NIPS), C. J. C. Burges, -L. Bottou et al., Eds. Curran Associates, Inc., 2013, pp. 1196–1204. [Online]. -Available: http://papers.nips.cc/paper/5073-learning-with-noisy-labels.pdf -[Nes83] Y. Nesterov, “A method of solving a convex programming problem with -convergence rate o (1/k2),” in Soviet Mathematics Doklady, vol. 27, no. 2, -1983, pp. 372–376. -[new00] “The training performed by qnstrn,” Aug. 2000. [Online]. Available: -http://www1.icsi.berkeley.edu/Speech/faq/nn-train.html -[Ng16] A. Ng, “Nuts and bolts of building ai applications using deep learning,” NIPS -Talk, Dec. 2016. -[NH92] S. J. Nowlan and G. E. Hinton, “Simplifying neural networks by soft -weight-sharing,” Neural computation, vol. 4, no. 4, pp. 473–493, 1992. -[Online]. Available: https://www.cs.toronto.edu/~hinton/absps/sunspots.pdf -[NH02] R. T. Ng and J. Han, “CLARANS: A method for clustering objects for spatial -112 -data mining,” IEEE transactions on knowledge and data engineering, vol. 14, -no. 5, pp. 1003–1016, 2002. -[NWC+11a] Y. Netzer, T. Wang et al., “Reading digits in natural images with -unsupervised feature learning,” in NIPS workshop on deep learning and -unsupervised feature learning, vol. 2011, no. 2, 2011, p. 5. [Online]. Available: -http://ufldl.stanford.edu/housenumbers/nips2011_housenumbers.pdf -[NWC+11b] Y. Netzer, T. Wang et al., “The street view house numbers (SVHN) dataset,” -2011. [Online]. Available: http://ufldl.stanford.edu/housenumbers/ -[NYC16] A. Nguyen, J. Yosinski, and J. Clune, “Multifaceted feature visualization: -Uncovering the different types of features learned by each neuron in deep -neural networks,” arXiv preprint arXiv:1602.03616, May 2016. [Online]. -Available: https://arxiv.org/abs/1602.03616 -[OHIL16] J. Ortigosa-Hernández, I. Inza, and J. A. Lozano, “Towards competitive -classifiers for unbalanced classification problems: A study on the performance -scores,” arXiv preprint arXiv:1608.08984, Aug. 2016. [Online]. Available: -https://arxiv.org/abs/1608.08984 -[PMW+15] N. Papernot, P. McDaniel et al., “Distillation as a defense to adversarial -perturbations against deep neural networks,” arXiv preprint arXiv:1511.04508, -Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.04508 -[Pre98] L. Prechelt, Early Stopping - But When? Berlin, Heidelberg: Springer -Berlin Heidelberg, 1998, pp. 55–69. [Online]. Available: http://dx.doi.org/ -10.1007/3-540-49430-8_3 -[RDS+14] O. Russakovsky, J. Deng et al., “Imagenet large scale visual recognition -challenge,” arXiv preprint arXiv:1409.0575, vol. 115, no. 3, pp. 211–252, Sep. -2014. [Online]. Available: https://arxiv.org/abs/1409.0575 -[RFB15] O. Ronneberger, P. Fischer, and T. Brox, “U-net: Convolutional networks -for biomedical image segmentation,” in International Conference on Medical -Image Computing and Computer-Assisted Intervention. Springer, 2015, pp. -234–241. [Online]. Available: https://arxiv.org/abs/1505.04597 -[RLS10] S. Risi, J. Lehman, and K. O. Stanley, “Evolving the placement and den￾sity of neurons in the hyperneat substrate,” in Conference on Genetic and -evolutionary computation, no. 12. ACM, 2010, pp. 563–570. -[RSG16] M. T. Ribeiro, S. Singh, and C. Guestrin, “"why should i trust you?": -Explaining the predictions of any classifier,” arXiv preprint arXiv:1602.04938, -Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.04938 -113 -[Rud16] S. Ruder, “An overview of gradient descent optimization algorithms,” -arXiv preprint arXiv:1609.04747, Sep. 2016. [Online]. Available: https: -//arxiv.org/abs/1609.04747 -[SCL12] P. Sermanet, S. Chintala, and Y. LeCun, “Convolutional neural networks -applied to house numbers digit classification,” in International Conference -on Pattern Recognition (ICPR), no. 21. IEEE, Apr. 2012, pp. 3288–3291. -[Online]. Available: https://arxiv.org/abs/1204.3968 -[SDG09] K. O. Stanley, D. B. D’Ambrosio, and J. Gauci, “A hypercube-based encoding -for evolving large-scale neural networks,” Artificial life, vol. 15, no. 2, pp. 185– -212, 2009. [Online]. Available: http://ieeexplore.ieee.org/document/6792316/ -[SEZ+13] P. Sermanet, D. Eigen et al., “Overfeat: Integrated recognition, localization -and detection using convolutional networks,” arXiv preprint arXiv:1312.6229, -Feb. 2013. [Online]. Available: https://arxiv.org/abs/1312.6229v4 -[SHK+14] N. Srivastava, G. E. Hinton et al., “Dropout: a simple way to -prevent neural networks from overfitting.” Journal of Machine Learning -Research, vol. 15, no. 1, pp. 1929–1958, 2014. [Online]. Available: -https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf -[SHY+13] A. Senior, G. Heigold et al., “An empirical study of learning rates in deep -neural networks for speech recognition,” in International Conference on -Acoustics, Speech and Signal Processing. IEEE, 2013, pp. 6724–6728. [Online]. -Available: http://ieeexplore.ieee.org/document/6638963/?arnumber=6638963 -[SIV16] C. Szegedy, S. Ioffe, and V. Vanhoucke, “Inception-v4, inception-resnet and the -impact of residual connections on learning,” arXiv preprint arXiv:1602.07261, -Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.07261 -[SKP15] F. Schroff, D. Kalenichenko, and J. Philbin, “Facenet: A unified embedding -for face recognition and clustering,” in Conference on Computer Vision -and Pattern Recognition (CVPR). IEEE, Mar. 2015, pp. 815–823. [Online]. -Available: https://arxiv.org/abs/1503.03832 -[SL11] P. Sermanet and Y. LeCun, “Traffic sign recognition with multi-scale -convolutional networks,” in International Joint Conference on Neural -Networks (IJCNN), Jul. 2011, pp. 2809–2813. [Online]. Available: -http://ieeexplore.ieee.org/document/6033589/ -[SLJ+15] C. Szegedy, W. Liu et al., “Going deeper with convolutions,” in Conference -on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. 2015, pp. -1–9. [Online]. Available: https://arxiv.org/abs/1409.4842 -[SM02] K. O. Stanley and R. Miikkulainen, “Evolving neural networks through -114 -augmenting topologies,” Evolutionary computation, vol. 10, no. 2, pp. 99–127, -2002. [Online]. Available: http://www.mitpressjournals.org/doi/abs/10.1162/ -106365602320169811 -[SMG13] A. M. Saxe, J. L. McClelland, and S. Ganguli, “Exact solutions to -the nonlinear dynamics of learning in deep linear neural networks,” -arXiv preprint arXiv:1312.6120, Dec. 2013. [Online]. Available: https: -//arxiv.org/abs/1312.6120 -[SMGS14] R. K. Srivastava, J. Masci et al., “Understanding locally competitive -networks,” arXiv preprint arXiv:1410.1165, Oct. 2014. [Online]. Available: -https://arxiv.org/abs/1410.1165 -[SSSI] J. Stallkamp, M. Schlipsing et al., “The german traffic sign recognition -benchmark.” [Online]. Available: http://benchmark.ini.rub.de/?section= -gtsrb&subsection=news -[SSSI12] J. Stallkamp, M. Schlipsing et al., “Man vs. computer: Benchmarking -machine learning algorithms for traffic sign recognition,” Neural Networks, -no. 0, pp. –, 2012. [Online]. Available: http://www.sciencedirect.com/science/ -article/pii/S0893608012000457 -[SV16] S. Saxena and J. Verbeek, “Convolutional neural fabrics,” arXiv preprint -arXiv:1606.02492, 2016. [Online]. Available: https://arxiv.org/abs/1606.02492 -[SVI+15] C. Szegedy, V. Vanhoucke et al., “Rethinking the inception architecture -for computer vision,” arXiv preprint arXiv:1512.00567, Dec. 2015. [Online]. -Available: https://arxiv.org/abs/1512.00567v3 -[SVZ13] K. Simonyan, A. Vedaldi, and A. Zisserman, “Deep inside convolutional -networks: Visualising image classification models and saliency maps,” -arXiv preprint arXiv:1312.6034, Dec. 2013. [Online]. Available: https: -//arxiv.org/abs/1312.6034 -[SZ14] K. Simonyan and A. Zisserman, “Very deep convolutional networks for -large-scale image recognition,” arXiv preprint arXiv:1409.1556, Sep. 2014. -[Online]. Available: https://arxiv.org/abs/1409.1556 -[SZS+13] C. Szegedy, W. Zaremba et al., “Intriguing properties of neural -networks,” arXiv preprint arXiv:1312.6199, Dec. 2013. [Online]. Available: -https://arxiv.org/abs/1312.6199v4 -[TF-16a] “MNIST for ML beginners,” Dec. 2016. [Online]. Available: https: -//www.tensorflow.org/tutorials/mnist/beginners/ +109 +97 +46 +100 +101 +47 +109 +97 115 -[tf-16b] “tf.nn.dropout,” Dec. 2016. [Online]. Available: https://www.tensorflow.org/ -api_docs/python/nn/activation_functions_#dropout -[TH12] T. Tieleman and G. Hinton, “Lecture 6.5-rmsprop: Divide the gradient -by a running average of its recent magnitude,” COURSERA: Neural -Networks for Machine Learning, vol. 4, no. 2, 2012. [Online]. Available: -http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf -[Tho14a] M. Thoma, “On-line recognition of handwritten mathematical symbols,” -Karlsruhe, Germany, Nov. 2014. [Online]. Available: http://martin￾thoma.com/write-math -[Tho14b] M. Thoma, “The Twiddle algorithm,” Sep. 2014. [Online]. Available: -https://martin-thoma.com/twiddle/ -[Tho16] M. Thoma, “A survey of semantic segmentation,” arXiv preprint -arXiv:1602.06541, Feb. 2016. [Online]. Available: https://arxiv.org/abs/ -1602.06541 -[Tho17a] M. Thoma, “The HASYv2 dataset,” arXiv preprint arXiv:1701.08380, Jan. -2017. [Online]. Available: https://arxiv.org/abs/1701.08380 -[Tho17b] M. Thoma, “Master thesis (blog post),” Apr. 2017. [Online]. Available: -https://martin-thoma.com/msthesis -[VH13] P. Verbancsics and J. Harguess, “Generative neuroevolution for deep -learning,” arXiv preprint arXiv:1312.5355, Dec. 2013. [Online]. Available: -https://arxiv.org/abs/1312.5355 -[vLA87] P. J. M. van Laarhoven and E. H. L. Aarts, Simulated annealing. -Dordrecht: Springer Netherlands, 1987, pp. 7–15. [Online]. Available: -http://dx.doi.org/10.1007/978-94-015-7744-1_2 -[VTKP17] E. Vorontsov, C. Trabelsi et al., “On orthogonality and learning recurrent -networks with long term dependencies,” arXiv preprint arXiv:1702.00071, -Jan. 2017. [Online]. Available: https://arxiv.org/abs/1702.00071 -[WHH+89] A. Waibel, T. Hanazawa et al., “Phoneme recognition using time-delay -neural networks,” IEEE transactions on acoustics, speech, and signal -processing, vol. 37, no. 3, pp. 328–339, Aug. 1989. [Online]. Available: -http://ieeexplore.ieee.org/document/21701/ -[Wil92] R. J. Williams, “Simple statistical gradient-following algorithms for connec￾tionist reinforcement learning,” Machine learning, vol. 8, no. 3-4, pp. 229–256, -1992. 116 -[WWQ13] X. Wang, L. Wang, and Y. Qiao, A Comparative Study of Encoding, Pooling -and Normalization Methods for Action Recognition. Berlin, Heidelberg: -Springer Berlin Heidelberg, Nov. 2013, no. 11, pp. 572–585. [Online]. -Available: http://dx.doi.org/10.1007/978-3-642-37431-9_44 -[WYS+15] R. Wu, S. Yan et al., “Deep image: Scaling up image recognition,” arXiv -preprint arXiv:1501.02876, vol. 7, no. 8, Jul. 2015. [Online]. Available: -https://arxiv.org/abs/1501.02876v4 -[WZZ+13] L. Wan, M. Zeiler et al., “Regularization of neural networks using dropconnect,” -in International Conference on Machine Learning (ICML), no. 30, 2013, -pp. 1058–1066. [Online]. Available: http://www.matthewzeiler.com/pubs/ -icml2013/icml2013.pdf -[XGD+16] S. Xie, R. Girshick et al., “Aggregated residual transformations for deep -neural networks,” arXiv preprint arXiv:1611.05431, Nov. 2016. [Online]. -Available: https://arxiv.org/abs/1611.05431v1 -[Xu11] W. Xu, “Towards optimal one pass large scale learning with averaged -stochastic gradient descent,” arXiv preprint arXiv:1107.2490, Jul. 2011. -[Online]. Available: https://arxiv.org/abs/1107.2490 -[XWCL15] B. Xu, N. Wang et al., “Empirical evaluation of rectified activations in -convolutional network,” arXiv preprint arXiv:1505.00853, May 2015. [Online]. -Available: https://arxiv.org/abs/1505.00853 -[XXE12] H. Xiao, H. Xiao, and C. Eckert, “Adversarial label flips attack on -support vector machines.” in ECAI, 2012, pp. 870–875. [Online]. Available: -https://www.sec.in.tum.de/assets/Uploads/ecai2.pdf -[XZY+14] T. Xiao, J. Zhang et al., “Error-driven incremental learning in deep convolu￾tional neural network for large-scale image classification,” in International -Conference on Multimedia, no. 22. ACM, 2014, pp. 177–186. -[YL98] C. J. B. Yann LeCun, Corinna Cortes, “The MNIST database of handwritten -digits,” 1998. [Online]. Available: http://yann.lecun.com/exdb/mnist/ -[ZBH+16] C. Zhang, S. Bengio et al., “Understanding deep learning requires rethinking -generalization,” arXiv preprint arXiv:1611.03530, Nov. 2016. [Online]. -Available: https://arxiv.org/abs/1611.03530 -[ZCZL16] S. Zhai, Y. Cheng et al., “Doubly convolutional neural networks,” in -Advances in Neural Information Processing Systems 29 (NIPS), D. D. Lee, -M. Sugiyama et al., Eds. Curran Associates, Inc., Oct. 2016, pp. 1082–1090. -[Online]. Available: http://papers.nips.cc/paper/6340-doubly-convolutional￾neural-networks.pdf -117 -[ZDGD14] N. Zhang, J. Donahue et al., “Part-based R-CNNs for fine-grained category -detection,” in European Conference on Computer Vision (ECCV). Springer, -Jul. 2014, pp. 834–849. [Online]. Available: https://arxiv.org/abs/1407.3867 -[Zei12] M. D. Zeiler, “Adadelta: an adaptive learning rate method,” arXiv preprint -arXiv:1212.5701, Dec. 2012. [Online]. Available: https://arxiv.org/abs/ -1212.5701v1 -[ZF13] M. D. Zeiler and R. Fergus, “Stochastic pooling for regularization of deep -convolutional neural networks,” arXiv preprint arXiv:1301.3557, Jan. 2013. -[Online]. Available: https://arxiv.org/abs/1301.3557v1 -[ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional -networks,” in European Conference on Computer Vision (ECCV). Springer, -Nov. 2014, pp. 818–833. [Online]. Available: https://arxiv.org/abs/1311.2901 -[Zho16] B. Zhou, “Places2 download,” 2016. [Online]. Available: http:// -places2.csail.mit.edu/download.html -[ZK16] S. Zagoruyko and N. Komodakis, “Wide residual networks,” arXiv -preprint arXiv:1605.07146, May 2016. [Online]. Available: https: -//arxiv.org/abs/1605.07146 -[ZKL+15] B. Zhou, A. Khosla et al., “Learning deep features for discriminative -localization,” arXiv preprint arXiv:1512.04150, Dec. 2015. [Online]. Available: -https://arxiv.org/abs/1512.04150 -[ZKL+16] B. Zhou, A. Khosla et al., “Places: An image database for deep scene -understanding,” arXiv preprint arXiv:1610.02055, Oct. 2016. [Online]. -Available: https://arxiv.org/abs/1610.02055 -[ZL16] B. Zoph and Q. V. Le, “Neural architecture search with reinforcement -learning,” arXiv preprint arXiv:1611.01578, Nov. 2016. [Online]. Available: -https://arxiv.org/abs/1611.01578 -[ZMGL15] J. Zhao, M. Mathieu et al., “Stacked what-where auto-encoders,” -arXiv preprint arXiv:1506.02351, Jun. 2015. [Online]. Available: https: -//arxiv.org/abs/1506.02351v1 -[ZYL+15] H. Zheng, Z. Yang et al., “Improving deep neural networks using softplus -units,” in International Joint Conference on Neural Networks (IJCNN), Jul. -2015, pp. 1–4. +101 +114 +47 +99 +97 +116 +99 +97 +116 +99 +97 +116 +46 +112 +104 +112 +63 118 -I. Glossary -ANN artificial neural network. 4 -ASO Automatic Structure Optimization. 29 -CMO Confusion Matrix Ordering. 2, 35, 36, 51, 52, 71 -CNN Convolutional Neural Network. 1, 3–6, 11, 13, 15, 21–23, 28, 29, 31, 33, 37, 54, 60, -71, 72, 79, 82–84, 88–91 -ELU Exponential Linear Unit. 38, 57, 60–64, 72, 73, 77, 78, 84 -ES early stopping. 68 -FC Fully Connected. 91, 93 -FLOP floating point operation. 27, 29, 87, 88, 90, 91, 93 -GA genetic algorithm. 30 -GAN Generative Adverserial Network. 80 -GPU graphics processing unit. 37, 40, 59, 63, 67, 88, 91 -HSV hue, saturation, value. 79 -LCN Local Contrast Normalization. 91 -LDA linear discriminant analysis. 79 -LReLU leaky rectified linear unit. 63, 72, 77, 78, 84 -MLP multilayer perceptron. 3–6, 28 -NAG Nesterov Accellerated Momentum. 83 -NEAT NeuroEvolution of Augmenting Topologies. 83 -OBD Optimal Brain Damage. 29 -119 -PCA principal component analysis. 79 -PReLU parametrized rectified linear unit. 60, 61, 63, 64, 72, 77, 78, 84 -ReLU rectified linear unit. 5, 13, 60, 61, 63, 64, 72, 77, 78, 84 -SGD stochastic gradient descent. 5, 30, 45, 46, 82 -ZCA Zero Components Analysis. 79 -120 +61 +49 +46 +48 +9 +-3 +-1 +-6 +5 +3 +2 +-8 +0 +936 +-333 +-109 +-282 +545 +291 +94 +-792 +0 +-4 +-254 +-498 +-662 +-849 +-642 +187 +-520 +45 +240 +211 +388 +215 +-861 +-340 +559 +-105 +185 +-138 +-180 +503 +-718 +429 +350 +173 +251 +268 +-655 +-567 +-53 +-75 +80 +571 +-128 +24 +-408 +596 +-550 +368 +26 +976 +156 +302 +647 +879 +223 +811 +54 +660 +Figure 2.1.: Visualization of the application of a linear k × k × 1 image filter. For each pixel of the +output image, k +2 multiplications and k2 additions of the products have to be calculated. + +2. Convolutional Neural Networks +One important detail is how boundaries are treated. There are four common ways of +boundary treatment: +• don’t compute: The image I +0 will be smaller than the original image. I0 ∈ +R +(w−kw+1)×(h−kh+1)×d3 +, to be exact. +• zero padding: The image I is padded by zeros where the filter would access elements +which do not exist. This will result in edges being detected at the border if the border +pixels are not black, but doesn’t need any computation. +• nearest: Repeat the pixel which is closest to the boundary. +• reflect: Reflect the image at the boundaries. +Common tasks that can be done with linear filters include edge detection, corner detection, +smoothing, sharpening, median filtering, box filtering. See Figure A.1 for five examples. +Please note that the result of a filtering operation is again an image. This means filters +can be applied successively. While each pixel after one filtering operation with a 3 × 3 +filter got influenced by 3 · 3 = 9 pixels of the original image, two successively applied 3 × 3 +filters increase the area of the original image which influenced the output. The output is +then influenced by 25 pixel. This is called the receptive field. The kind of pattern which is +detected by a filter is called a feature. The bigger the receptive field is, the more complex +can features get as they are able to consider more of the original image. Instead of taking +one 5 × 5 filter with 25 parameters, one might consider to take two successive 3 × 3 filters +with 2 · (3 · 3) = 18 parameters. The 5 × 5 filter is a strict superset of possible filtering +operations compared to the two 3 × 3 filters, but the relevance of this technique will become +clear in Section 2.2. +2.2. CNN Layer Types +While the idea behind deep MLPs is that feature hierarchies capture the important parts +of the input more easily, CNNs are inspired by the idea of translational invariance: Many +features in an image are translationally invariant. For example, if a car is developed, one +could try to detect it by its parts [FGMR10]. But then there are many positions at which +the wheels could be. Combining those, it is desirable to capture low-level, translationally +invariant features at lower layers of an artificial neural network (ANN) and in higher layers +high-level features which are combinations of the low-level features. +Also, models should utilize the fact that the pixels of images are ordered. One way to use +this is by learning image filters in so called convolutional layers. +While MLPs vectorize the input, the input of a layer in a CNN are feature maps. A feature +map is a matrix m ∈ R +w×h +, but typically the width equals the height (w = h). For an RGB + +2.2. CNN Layer Types +input image, the number of feature maps is d = 3. Each color channel is a feature map. +Since AlexNet [KSH12] almost halved the error in the ImageNet challenge, CNNs are +state-of-the-art in various computer vision tasks. +Traditional CNNs have three important building tools: +• Convolutional layers with a non-linear activation function as described in Section 2.2.1, +• pooling layers as described in Section 2.2.2 and +• normalization layers as described in Section 2.2.4. +2.2.1. Convolutional Layers +Convolutional layers take several feature maps as input and produce n feature maps1 as +output, where n is the number of filters in the convolution layer. The filter weights of +the linear convolutions are the parameters which are adapted to the training data. The +number n of filters as well as the filter’s size kw × kh are hyperparameters of convolutional +layers. Sometimes, it is denoted as n@kw × kh. Although the filter depth is usually omitted +in the notation, the filters are of dimension kw × kh × d +(i−1), where d(i−1) is the number of +feature maps of the input layer (i − 1). +Another hyperparameter of convolution layers is the stride s ∈ N≥1 and the padding. +Padding (usually zero-padding [SCL12, SEZ+13, HZRS15a]) is used to make sure that the +size of the feature maps doesn’t change. +The hyperparameters of convolutional layers are +• the number of filters n ∈ N≥1, +• kw, kh ∈ N≥1 of the filter size kw × kh × d +(i−1) +, +• the activation function of the layer (see Table B.3) and +• the stride s ∈ N≥1 +Typical choices are n ∈ { 32, 64, 128 }, kw = kh = k ∈ { 1, 3, 5, 11 } such as in [KSH12, +SZ14, SLJ+15], rectified linear unit (ReLU) activation and s = 1. +The concept of weight sharing is crucial for CNNs. This concept was introduced in [WHH+89]. +With weight sharing, the filters can be learned with stochastic gradient descent (SGD) just +like MLPs. In fact, every CNN has an equivalent MLP which computes the same function +if only the flattened output is compared. +1 +also called activation maps or channels + +2. Convolutional Neural Networks +This is easier to see when the filtering operation is denoted formally: +o +(i) +(x) = b + +X +k +j=1 +wij · xj with i ∈ { 1, . . . , w } × { 1, . . . , h } × { 1, . . . , d } [2.1] +o +(x,y,z) +(I) = b + +b +kw +2X +c +ix=1−d kw +2 +e +b +kh +2X +c +iy=1−d kh +2 +e +X +d +ic=1 +Fz(ix, iy, ic) · I(x + ix, y + iy, ic) [2.2] +with a bias b ∈ R, x ∈ { 1, . . . , w } , y ∈ { 1, . . . , h } and z ∈ { 1, . . . , d } +One can see that most weights of the equivalent MLP are zero and many weights are +equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters. +The effect of fewer parameters is that less training data is necessary to get suitable +estimations for those. This means a MLP which is able to compute the same functions as a +CNN will likely have worse results on the same dataset, if a CNN architecture is suitable +for the dataset. +See Figure 2.2 for a visualization of the application of a convolutional layer. +3 feature maps +(e.g. RGB) n feature maps +n filters of +size k × k × 3 +width w +width w +height +h +height +h +neural +network +data +apply +. . . +. . . +. . . +. . . +. . . +. . . +Figure 2.2.: Application of a single convolutional layer with n filters of size k × k × 3 with stride +s = 1 to input data of size width × height with three channels. + +2.2. CNN Layer Types +A convolutional layer with n filters of size kw × kh and SAME padding after d +(i−1) feature +maps of size sx × sy has n · d +(i−1) +·(kw · kh) parameters if no bias is used. In contrast, a fully +connected layer which produces the same output size and does not use a bias would have +n · d +(i−1) +· (sx × sy) +2 parameters. This means a convolutional layer has drastically fewer +parameters. One the one hand, this means it can learn less complex decision boundaries. On +the other hand, it means fewer parameters have to be learned and hence the optimization +procedure needs fewer examples and the optimization objective is simpler. +It is particularly interesting to notice that even a convolutional layer of 1 × 1 filters does +learn a linear combination of the d input feature maps. This can be used for dimensionality +reduction, if there are fewer 1 × 1 filters in a convolutional layer than input feature maps. +Another insight recently got important: Every fully connected layer has an equivalent +convolutional layer which has the same weights.2 This way, one can use the complete +classification network as a very complex non-linear image filter which can be used for +semantic segmentation. +A fully connected layer with d ∈ N≥1 inputs and n ∈ N≥1 nodes can be interpreted as a +convolutional layer with an input of shape 1 × 1 × d and n filters of size 1 × 1. This will +produce an output shape 1 × 1 × n. Every single output is connected to all of the inputs. +When a convolutional layer is followed by a fully connected layer, it is necessary to vectorize +to feature maps. If the 1 × 1 convolutional filter layer is applied to the vectorized output, +it is completely equivalent to a fully connected layer. However, the vectorization can be +omitted if a convolution layer without padding and a filter size equal to the feature maps +size is applied. This was used by [LSD15]. +2.2.2. Pooling Layers +Pooling summarizes a p × p area of the input feature map. Just like convolutional layers, +pooling can be used with a stride of s ∈ N>1. As s ≥ 2 is the usual choice, pooling layers +are sometimes also called subsampling layers. Typically, p ∈ { 2, 3, 4, 5 } and s = 2 such as +for AlexNet [KSH12] and VGG-16 [SZ14]. +The type of summary for the set of activations A varies between the functions listed +in Table 2.1, spatial pyramid pooling as introduced in [HZRS14] and generalizing pooling +functions as introduced in [LGT16]. +2But convolutional layers only have equivalent fully connected layers if the output feature map is 1 × 1 + +2. Convolutional Neural Networks +Name Definition Used by +Max pooling max { a ∈ A } [BPL10, KSH12] +Average / mean pooling 1 +|A| +P +a∈A +a LeNet-5 [LBBH98] and [KSlB+10] +`2 pooling pP +a∈A +a +2 [Le13] +Stochastic pooling * [ZF13] +Table 2.1.: Pooling types for a set A of activations a ∈ R. +(*) For stochastic pooling, each of the p×p activation values aiin the pooling region gets +picked with probability pi = P ai +aj ∈A aj +. This assumes the activations ai are non-negative. +Pooling is applied for three reasons: To get local translational invariance, to get invariance +against minor local changes and, most important, for data reduction to 1 +s +2 th of the data by +using strides of s > 1. +See Figure 2.3 for a visualization of max pooling. +7 9 3 5 9 4 +0 7 0 0 9 0 +5 0 9 3 7 5 +9 2 9 6 4 3 +2 × 2 max pooling +9 5 9 +9 9 7 +2 +2 +Figure 2.3.: 2 × 2 max pooling applied to a feature map of size 6 × 4 with stride s = 2 and padding. +Average pooling of p × p areas with stride s can be replaced by a convolutional layer. If +the input of the pooling layer are d +(i−1) feature maps, the convolutional layer has to have +d +(i−1) filters of size p × p and stride s. The ith filter has the values + + +1 +p +2 . . . +1 +p +2 +. +. +. +. +. +. +. +. +. +1 +p +2 . . . +1 +p +2 + + +for the dimension i and the zero matrix + + +0 . . . 0 +. +. +. +. +. +. +. +. +. +0 . . . 0 + + +for all other dimensions i = 1, . . . , d(i−1). + +2.2. CNN Layer Types +2.2.3. Dropout +Dropout is a technique used to prevent overfitting and co-adaptations of neurons by setting +the output of any neuron to zero with probability p. It was introduced in [HSK+12] and is +well-described in [SHK+14]. +A Dropout layer can be implemented as follows: For an input in of any shape s, a tensor of +the same shape D ∈ { 0, 1 } +s +is sampled, where each element diis sampled independently +from a Bernoulli distribution. The results are element-wise multiplied to calculate the +output out of the Dropout layer: +out = D in with di ∼ B(1, p) +where is the Hadamard product +(A B)i,j := (A)i,j (B)i,j +Hence every value of the input gets set to zero with a dropout probability of p. Typically, +Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout probability than later layers. In order to keep the expected output at the same value, the +output of a dropout layer is multiplied with 1 +1−p when dropout is enabled [Las17, tf-16b]. +At inference time, dropout is disabled. +Dropout is usually only applied after fully connected layers, but not after convolutional +layers as it usually increases the test error as pointed out in [GG16]. +Models which use Dropout can be interpreted as an ensemble of models with different +numbers of neurons in each layer, but also with weight sharing. +Conceptually similar are DropConnect and networks with stochastic depth. DropConnect [WZZ+13] is a generalization of Dropout, which sets weights to zero in contrast to +setting the output of a neuron to zero. Networks with stochastic depth as introduced +in [HSL+16] dropout only complete layers. This can be done by having Residual networks +which have one identity connection and one residual feature connection. Hence the residual +features can be dropped out and the identity connection remains. +2.2.4. Normalization Layers +One problem when training deep neural networks is internal covariate shift: While the +parameters of layers close to the output are adapted to some input produced by lower layers, +those lower layers parameters are also adapted. This leads to the parameters in the upper +layers being worse. A very low learning rate has to be chosen to adjust for the fact that the +input features might drastically change over time. + +2. Convolutional Neural Networks +One way to approach this problem is by normalizing mini-batches as described in [IS15]. A +Batch Normalization layer with d-dimensional input x = (x +(1), . . . , x(d) +) is first normalized +point-wise to +xˆ +(k) = +x +(k) − x¯(k) +p +s +0 +[x +(k) +] +2 + ε +with x¯ +(k) = +1 +m +Pm +i=1 x +(k) +i +being the sample mean and s +0 +[x +(k) +] +2 = +1 +m +Pm +i=1(x +(k) +i − x¯ +(k) +) the +sample variance where m ∈ N≥1 is the number of training samples per mini-batch, ε > 0 +being a small constant to prevent division by zero and x +(k) +i +is the activation of neuron k for +training sample i. +Additionally, for each activation x +(k) +two parameters γ +(k) +, β(k) are introduced which scale +and shift the feature: +y +(k) = γ(k) +· xˆ +(k) + β(k) +In the case of fully connected layers, this is applied to the activation, before the non-linearity +is applied. If it is applied after the activation, it harms the training in early stages. For +convolution, only one γ and one β is learned per feature map. +One important special case is γ +(k) = +p +s +0 +[x +(k) +] +2 + ε and β +(k) = x¯(k) +, which would make the +Batch Normalization layer an identity layer. +During evaluation time,3the expected value and the variance are calculated once for the +complete dataset. An unbiased estimate of the empirical variance is used. +The question where Batch Normalization layers (BN) should be applied and for which +reasons is still open. For Dropout, it doesn’t matter if it is applied before or after the +activation function. Considering this, the possible options for the order are: +1. CONV / FC → BN → activation function → Dropout → . . . +2. CONV / FC → activation function → BN → Dropout → . . . +3. CONV / FC → activation function → Dropout → BN → . . . +4. CONV / FC → Dropout → BN → activation function → . . . +The authors of [IS15] suggest to use Batch Normalization before the activation function +as in Items 1 and 4. Batch Normalization after the activation lead to better results in +https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md +Another normalization layer is Local Response Normalization as described in [KSH12], +which includes `2 normalization as described in [WWQ13]. Those two normalization layers, +however, are superseded by Batch Normalization. +3 +also called inference time + +2.3. CNN Blocks +2.3. CNN Blocks +This section describes more complex building blocks than simple layers. CNN blocks act +similar to a layer, but they are themselves composed of layers. +2.3.1. Residual Blocks +Residual blocks as introduced in [HZRS15a] are a milestone in computer vision. They +enabled the computer vision community to go from about 16 layers as in VGG 16-D (see +Appendix D.3) to several hundred layers. The key idea of deep residual networks (ResNets) +as introduced in [HZRS15a] is to add an identity connection which skips two layers. This +identity connection adds the feature maps onto the other feature maps and thus requires +the output of the input layer of the residual block to be of the same dimension as last layer +of the residual block. +Formally, it can be described as follows. If xi are the feature maps after layer i and x0 is +the input image, H is a non-linear transformation of feature maps, then +y = H(x) +describes a traditional CNN. Note that this could be multiple layers. A residual block as +visualized in Figure 2.4 is described by +y = H(x) + x +In [HZRS15a], they only used residual skip connections to skip two layers. Hence, if +convi(xi) describes the application of the convolutional layer i to the input xi without the +nonlinearity, then such a residual block is +xi+2 = conv i+1(ReLU(conv i(xi))) + xi +Figure 2.4.: ResNet module +Image source: [HZRS15a] +[HM16] provides some insights why deep residual networks are successful. + +2. Convolutional Neural Networks +2.3.2. Aggregation Blocks +Two common ways to add more parameters to neural networks are increasing their depth +by adding more layers or increasing their width by adding more neurons / filters. Inception +blocks [AM15] implicitly started a new idea which was explicitly described in [XGD+16] as +“ResNeXt block”: Increasing the cardinality C ∈ N≥1. By cardinality, the authors describe +the concept of having C small convolutional networks with the same topology but different +weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not +combine aggregation blocks with residual blocks as the authors did. +256-d in +concatenate +total 32 +groups +. . . +128-d out +4 @ 1 × 1 × 256 +4 @ 3 × 3 × 4 +4 @ 1 × 1 × 256 +4 @ 3 × 3 × 4 +4 @ 1 × 1 × 256 +4 @ 3 × 3 × 4 +Figure 2.5.: Aggregation block with a cardinality of C = 32. Each of the 32 groups is a 2-layer +convolutional network. The first layer receives 256 feature maps and applies four 1 × 1 +filters to it. The second layer applies four 3 × 3 filters. Although every group has +the same topology, the learned weights are different. The outputs of the groups are +concatenated. +The hyperparameters of an aggregation block are: +• The topology of the group members. +• The cardinality C ∈ N≥1. Note that a cardinality of C = 1 is equivalent in every +aspect to using the group network without an aggregation block. + +2.3. CNN Blocks +2.3.3. Dense Blocks +Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The +idea is to connect each convolutional layer directly to subsequent convolutional layers. +Traditional CNNs with L layers and one input layer have L connections between layers, +but dense blocks have L(L+1) +2 +connections between layers. The input feature maps are +concatenated in depth. According to the authors, this prevents features from being relearned and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16 +have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors +used only on the order of 12 feature maps per layer. +A dense block is visualized in Figure 2.6. +256-d in +k @ 3 × 3 +concatenate +k @ 3 × 3 +concatenate +256-d +k-d +(256 + k)-d +k-d +(256 + L · k)-d out +Figure 2.6.: Dense block with L = 2 layers and a growth factor of k. +Dense block have five hyperparameters: +• The activation function being used. The authors use ReLU. +• The size kw × kh of filters. The authors use kw = kh = 3. +• The number of layers L, where L = 2 is a simple convolutional layer. +• The number k of filters added per layer (called growth rate in the paper) +It might be necessary use 1 × 1 convolutions to reduce the number of L · k feature maps. + +2. Convolutional Neural Networks +2.4. Transition Layers +Transition layers are used to overcome constraints imposed by resource limitations or +architectural design choices. One constraint is the number of feature maps (see Appendix C.3 +for details). In order to reduce the number of feature maps while still keeping as much +relevant information as possible in the network, a convolutional layer i with ki filters of +the shape 1 × 1 × ki−1 is added. The number of filters ki directly controls the number of +generated feature maps. +In order to reduce the dimensionality (width and height) of the feature maps, one typically +applies pooling. +Global pooling is another type of transition layer. It applies pooling over the complete +feature map size to shrink the input to a constant 1 × 1 feature map and hence allows one +network to have different input sizes. + +2.5. Analysis Techniques +2.5. Analysis Techniques +CNNs have dozens of hyperparameters and ways to tune them. Although there are +automatic methods like random search [BB12], grid search [LBOM98], gradient-based +hyperparameter optimization [MDA15] and Hyperband [LJD+16] some actions need a +manual investigation to improve the model’s quality. For this reason, analysis techniques +which guide developers and researchers to the important hyperparameters are necessary. In +the following, nine diagnostic techniques are explained. +A machine learning developer has the following choices to improve the model’s quality: +(I1) Change the problem definition (e.g., the classes which are to be distinguished) +(I2) Get more training data +(I3) Clean the training data +(I4) Change the preprocessing (see Appendix B.1) +(I5) Augment the training data set (see Appendix B.2) +(I6) Change the training setup (see Appendices B.3 to B.5) +(I7) Change the model (see Appendices B.6 and B.7) +The preprocessing is usually not changed in modern architectures. However, this still leaves +six very different ways to improve the classifier. Changing the training setup and the model +each have too many possible choices to explore them completely. Thus, techniques are +necessary to guide the developer to changes which are most promising to improve the model. +For all of the following methods, it is important to use only the training set and the +validation set. +2.5.1. Qualitative Analysis by Example +The most basic analysis technique which should always be used is looking at examples +which the network correctly predicted with a high certainty and what the classifier got +wrong with a high certainty. Those examples can be arranged by applying t-SNE [MH08]. +One the one hand, this might reveal errors in the training data. Most of the time, training +data is manually labeled by humans who make mistakes. If a model is fit to those errors, +its quality decreases. +On the other hand, this can show differences in the distribution of validation data which +are not covered by the training set and thus indicate the need to collect more data. + +2. Convolutional Neural Networks +2.5.2. Confusion Matrices +A confusion matrix is a matrix (c)ij ∈ N +K×K +≥0 +, where K ∈ N≥2 is the number of classes, +which contains all correct and wrong classifications. The item cij is the number of times +items of class i were classified as class j. This means the correct classification is on the +diagonal cii and all wrong classifications are of the diagonal. The sum PK +i=1 +PK +j=1 cij is the +total number of samples which were evaluated and +P +i=1 P +cii +K +i=1 +PK +j=1 cij +is the accuracy. +The sums r(i) = PK +j=1 cij of each class i are worth being investigated as they show if the +classes are skewed. If the number of samples of one class dominates the data set, then the +classifier can get a high accuracy by simply always prediction the most common class. If +the accuracy of the classifier is close to the a priory probability of the most common class, +techniques to deal with skewed classes might help. +An automatic criterion to check for this problem is +accuracy ≤ +max({ r(i) | i = 1, . . . , k }) +Pk +i=1 r(i) ++ ε +where ε is a small value to compensate the fact that some examples might be correct just +by chance. +Other values which should be checked are the class-wise sensitivities: +s(k) = # correctly identified instances of class k +# instances of class k += +ckk +r(k) +∈ [0, 1] +If s(i) is much lower than s(j), it is an indicator that more or cleaner training data is +necessary for s(i). +The class-wise confusion +fconfusability(k1, k2) = P +ck1k2 +K +j=1 ck1j +indicates if class k1 gets often classified as class k2. The highest values here can indicate +if two classes should be merged or a specialized model for separating those classes could +improve the overall system. +2.5.3. Validation Curves: Accuracy, loss and other metrics +Validation curves display a hyperparameter (e.g., the training epoch) on the horizontal +axis and a quality metric on the vertical axis. Accuracy, error = (1 − accuracy) or loss are +typical quality metrics. Other quality metrics can be found in [OHIL16]. +In case that the number of training epochs are used as the examined hyperparameter, +validation curves give an indicator if training longer improves the model’s performance. By + +2.5. Analysis Techniques +plotting the error on the training set as well as the error on a validation set, one can also +estimate if overfitting might become a problem. See Figure 2.7 for an example. +10 20 30 40 50 60 70 80 90 100 +0.2 +0.4 +0.6 +0.8 +overfitting +Epochs +Error Training set +Validation set +Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs +and the quality metric is the error (1 − accuracy). The longer the network is trained, +the better it gets on the training set. At some point the network is fit too well to the +training data and loses its capability to generalize. At this point the quality curve of +the training set and the validation set diverge. While the classifier is still improving on +the training set, it gets worse on the validation and the test set. +When the epoch-loss validation curve has plateaus as in Figure 2.8, this means the optimization process did not improve for several epochs. Three possible ways to reduce the +problem of plateaus are (i) to change weight initialization if the plateau was at the beginning, +(ii) regularizing the model or (iii) changing the optimization algorithm. +Loss functions +The loss function (also called error function or cost function) is a function which assigns a +real value to a complex event like the predicted class of a feature vector. It is used to define +the objective function. For classification problems the loss function is typically cross-entropy +with `1 or `2 regularization, as it was described in [NH92]: +ECE(W) = − +X +x∈X +X +K +k=1 +[t +x +k +log(o +x +k +) + (1 − t +x +k +) log(1 − o +x +k +)] +| {z } +cross-entropy data loss ++ λ1 · +`1 +zX}| { +w∈W +|w| +λ2 · +`2 +zX}| { +w∈W +w +2 +| {z } +model complexity loss +where W are the weights, X is the training data set, K ∈ N≥0 is the number of classes and +t +x +k +indicates if the training example x is of class k. o +x +k +is the output of the classification +algorithm which depends on the weights. λ1, λ2 ∈ [0, ∞) weights the regularization and is +typically smaller than 0.1. + +2. Convolutional Neural Networks +Figure 2.8.: Example for a validation curve (plotted loss function) with plateaus. The dark orange +curve is smoothed, but the non-smoothed curve is also plotted in light orange. +The data loss is positive whenever the classification is not correct, whereas the model +complexity loss is higher for more complex models. The model complexity loss exists due +to the intuition of Occam’s razor : If two models explain the same data with an accuracy of +100 %, the simpler model is to be preferred. +A reason to show the loss for the validation curve technique instead of other quality metrics +is that it contains more information about the quality of the model. A reason against the +loss is that it has no upper bound like the accuracy and can be hard to interpret. The +loss only shows relative learning progress whereas the accuracy shows absolute progress to +human readers. +There are three observations in the loss validation curve which can help to improve the +network: +• If the loss does not decrease for several epochs, the learning rate might be too low. +The optimization process might also be stuck in a local minimum. +• Loss being NAN might be due to too high learning rates. Another reason is division +by zero or taking the logarithm of zero. In both cases, adding a small constant like +10−7 fixes the problem. +• If the loss-epoch validation curve has a plateau at the beginning, the weight initialization might be bad. + +2.5. Analysis Techniques +Quality criteria +There are several quality criteria for classification models. Most quality criteria are based +the confusion matrix c which denotes at cij the number of times the real class was i and j +was predicted. This means the diagonal contains the number of correct predictions. For +the following, let ti = +Pk +j=1 cij be the number of training samples for class i. The most +common quality criterion is accuracy: +accuracy(c) = +Pk +i=1 cii +Pk +i=1 ti +∈ [0, 1] +One problem of accuracy as a quality criterion are skewed classes. If one class is by far +more common than all other classes, then the simplest way to achieve a high score is to +always classify everything as the most common class. +In order to fix this problem, one can use the mean accuracy: +mean-accuracy(c) = 1 +k +· +X +k +i=1 +cii +ti +∈ [0, 1] +For two-class problems there are many other metrics like precision, recall and Fβ-score. +Quality criteria for semantic segmentation are explained in [Tho16]. +Besides the quality of the classification result, several other quality criteria are important +in practice: +• Speed of evaluation for new images, +• latency, +• power consumption, +• robustness against (non)random perturbations in the training data (see [SZS+13, +PMW+15]), +• robustness against (non)random perturbations in the training labels (see [NDRT13, +XXE12]), +• model size +As reducing the floating point accuracy allows to process more data on a given device [Har15], +analysis under this aspect is also highly relevant in some scenarios. +However, the following focuses on the quality of the classification result. + +2. Convolutional Neural Networks +2.5.4. Learning Curves +A learning curve is a plot where the horizontal axis displays the number of training samples +given to the network and the vertical axis displays the error. Two curves are plotted: The +error on the training set (of which the size is given by the horizontal axis) and the error on +the test set (which is of fixed size). See Figure 2.9 for an example. The learning curve for the +validation set is an indicator if more training data without any other changes will improve +the networks performance. Having the training set’s learning curve, it is possible to estimate +if the capacity of the model to fit the data is high enough for the desired classification error. +The error on the validation set should never be expected to be significantly lower than the +error on the training set. If the error on the training set is too high, then more data will +not help. Instead, the model or the training algorithm need to be adjusted. +If the training set’s learning curve is significantly higher than the validation set’s learning +curve, then removing features (e.g., by decreasing the images resolution), more training +samples or more regularization will help. +10 20 30 40 50 60 70 80 90 100 +0.2 +0.4 +0.6 +avoidable bias +variance +human-level error +Training samples +Error Validation set +Training set +Figure 2.9.: A typical learning curve: The more data is used for training, the more errors a given +architecture will make to fit the given training data. At the same time, it is expected +that the training data gets more similar to the true distribution of the data which +should be captured by the test data. At some point, the error on the training and +test set should be about the same. The term “avoidable bias” was coined by Andrew +Ng [Ng16]. In some cases it is not possible to classify data correctly by the given +features. If humans can classify the data given the features correctly, however, then +the bias is avoidable by building a better classifier. +The major drawback of this analysis technique is its computational intensity. In order to +get one point on the training curve and one point on the testing curve, a complete training +has to be executed. On the full data set, this can be several days on high-end computers. + +2.5. Analysis Techniques +2.5.5. Input-feature based model explanations +Understanding which clues the model took to come to its prediction is crucial to check if +the model actually learns what the developer thinks it learns. For example, a model which +has to distinguish sled dogs from Chihuahuas might simply look at the background and +check if there is snow. Depending on the training and test data, this works exceptionally +well. However, it is not the desired solution. +For classification problems in computer vision, there are two types of visualizations which +help to diagnose such problems. Both color superpixels of the original image to convey +information how the model used those superpixels: +• Correct class heatmap: The probability of the correct class is encoded to give a +heat map which superpixels are important for the correct class. This can also be done +by setting the opacity accordingly. +• Most-likely class image: Each of the most likely classes for all superpixels is +represented by a color. The colored image thus gives clues why different predictions +were assigned a high probability. +Two methods to generate such images are explained in the following. +Occlusion Sensitivity Analysis +Occlusion sensitivity analysis is described in [ZF14]. The idea is to occlude a part of the +image by something. This could be a gray square as in [ZF14] or a black superpixel as +in [RSG16]. Then the classifier is run on the image again. This is done for each region (e.g., +superpixel or position of the square) and the regions are then colored to generate either a +correct class heatmap of the most-likely class image. It is important to note that the color +at region ri denotes the result if riis occluded. +Both visualizations are shown in Figure 2.10. One can see that the network makes sensible +predictions for this image of the class “Pomeranian”. However, the image of the class “Afghan +Hound” gets confused with “Ice lolly”, which is a sign that this needs further investigation. +Gradient-based approaches +In [SVZ13], a gradient-based approach was used to generate image-specific class saliency +maps. The authors describe the problem as a ranking problem, where each pixel of the +image I0 is assigned a score Sc(I0) for a class c of interest. CNNs are non-linear functions, +but they can be approximated by the first order Taylor expansion Sc(I) ≈ w +T +I + b where +w is the derivative of Sc at I0. + +2. Convolutional Neural Networks +2.5.6. Argmax Method +The argmax method has two variants: +• Fixed class argmax: Propagate all elements of a given class through the network +and analyze which neurons are activated most often / have the highest activation. +• Fixed neuron argmax: Propagate the data through the network and find the n +data elements which cause the highest activation for a given neuron. +Note that a “neuron” is a filter in a CNN. The amount of activation of a filter F by an +image I is calculated by applying F to I and calculating the element-wise sum of the result. +Fixed-neuron argmax was applied in [ZF14]. However, they did not stop with that. Besides +showing the 9 images which caused the highest activation, they also trained a deconvolutional +neural network to project the activation of the filter back into pixel space. +The fixed neuron argmax can be used qualitatively to get an impression of the kind of +features which are learned. This is useful to diagnose problems, for example in [AM15] it is +described that the network recognized the class “dumbbell” only if a hand was present, too. +Fixed neuron argmax can also be used quantitatively to estimate the amount of parameters +being shared between classes or how many parameters are mainly assigned to which classes. +Going one step further from the fixed neuron argmax method is using an optimization +algorithm to change an initial image minimally in such a way that any desired class gets +predicted. This is called caricaturization in [MV16]. +2.5.7. Feature Map Reconstructions +Feature map visualizations such as the ones made in [ZF14] (see Figure 2.11) give insights +into the learned features. This shows what the network emphasizes. However, it is not +necessarily the case that the feature maps allow direct and easy conclusions about the +learned features. This technique is called inversion in [MV16]. +A key idea of feature map visualizations is to reconstruct a layers input, given its activation. +This makes it possible find which inputs would cause neurons to activate with extremely +high or low values. +More recent work like [NYC16] tries to make the reconstructions appearance look more +natural. + +2.5. Analysis Techniques +2.5.8. Filter comparison +One question which might lead to some insight is how robust the features are which +are learned. If the same network is trained with the same data, but different weight +initializations, the learned weights should still be comparable. +If the set of learned filters changes with initialization, this might be an indicator for too +little capacity of that layer. Hence adding more filters to that layer could improve the +performance. +Filters can be compared with the k-translation correlation as introduced in [ZCZL16]: +ρk(Wi,Wj) = max +(x,y)∈{−k,...,k} +2\(0,0) +hWi, T(Wj, x, y)if +kWik2kWjk2 +∈ [−1, 1], +where T(·, x, y) denotes the translation of the first operand by (x, y), with zero padding at +the borders to keep the shape. h·, ·if denotes the flattened inner product, where the two +operands are flattened into column vectors before applying the standard inner product. The +closer the absolute value of the k-translation correlation to one, the more similar two filters +Wi, Wj are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and +VGG-16 (see Appendix D.3) have many filters which are highly correlated. They found +this by comparing the averaged maximum k-translational correlation of the networks with +Gaussian-distributed initialized filters. The averaged maximum k-translational correlation +is defined as +ρ¯k(W) = 1 +N +X +N +i=1 +N +max +j=1,j6=i +ρk(Wi,Wj ) +where N is the number of filters in the layer W and Wi denotes the ith filter. +2.5.9. Weight update tracking +Andrej Karpathy proposed in the 5th lecture of CS231n to track weight updates to check if +the learning rate is well-chosen. He suggests that the weight update should be in the order +of 10−3. If the weight update is too high, then the learning rate has to be decreased. If the +weight update is too low, then the learning rate has to be increased. +The order of the weight updates as well as possible implications highly depend on the model +and the training algorithm. See Appendix B.5 for a short overview of training algorithms +for neural networks. + +2. Convolutional Neural Networks +2.6. Accuracy boosting techniques +There are techniques which can almost always be applied to improve accuracy of CNN +classifiers: +• Ensembles [CMS12] +• Training-time augmentation (see Appendix B.2) +• Test-time transformations [DDFK16, How13, HZRS15b] +• Pre-training and fine-tuning [ZDGD14, GDDM14] +One of the most simple ensemble techniques which was introduced in [CMS12] is averaging +the prediction of n classifiers. This improves the accuracy even if the classifiers use exactly +the same training setup by reducing variance. +Data augmentation techniques give the optimizer the possibility to take invariances like +rotation into account by generating artificial training samples from real training samples. +Data augmentation hence reduces bias and variance with no cost at inference time. +Data augmentation at inference time reduces the variance of the classifier. Similar to using +an ensemble, it increases the computational cost of inference. +Pretraining the classifier on another dataset to obtain start from a good position or finetuning +a model which was originally created for another task is also a common technique. + +2.6. Accuracy boosting techniques +Figure 2.10.: Occlusion sensitivity analysis by [ZF14]: The left column shows three example images, +where a gray square occluded a part of the image. This gray squares center (x, y) was +moved over the complete image and the classifier was run on each of the occluded +images. The probability of the correct class, depending on the gray squares position, +is showed in the middle column. One can see that the predicted probability of the +correct class “Pomeranian” drops if the face of the dog is occluded. The last image +gives the class with the highest predicted probability. In the case of the Pomeranian, +it always predicts the correct class if the head is visible. However, if the head of the +dog is occluded, it predicts other classes. + +2. Convolutional Neural Networks +Figure 2.11.: Filter visualization from [ZF14]: The filters themselves as well as the input feature +maps which caused the highest activation are displayed. + +3. Topology Learning +The topology of a neural network is crucial for the number of parameters, the number +of floating point operations (FLOPs), the required memory, as well as the features being +learned. The choice of the topology, however, is still mainly done by trial-and-error. +This chapter introduces three general approaches to automatic topology learning: Growing a +networks from a minimal network in Section 3.1, pruning in Section 3.2, genetic approaches +in Section 3.3 and reinforcement learning approaches in Section 3.4. +3.1. Growing approaches +Growing approaches for topology learning start with a minimal network, which only has +the necessary number of input nodes and the number of output nodes which are determined +by the application and the features of the input. They then apply a criterion to insert new +layers / neurons into the network. +In the following, Cascade-Correlation, Meiosis Networks and Automatic Structure Optimization are introduced. +3.1.1. Cascade-Correlation +Cascade-Correlation was introduced in [FL89]. It generates a cascading architecture which +is similar to dense block described in Section 2.3.3. +Cascade-Correlation works as follows: +1. Initialization: The number of input nodes and the number of output nodes are +defined by the problem. Create a minimal, fully connected network for those. +2. Training: Train the network until the error no longer decreases. +3. Candidate Generation: Generate candidate nodes. Each candidate node is connected to all inputs. They are not connected to other candidate nodes and not +connected to the output nodes. + +3. Topology Learning +4. Correlation Maximization: Train the weights of the candidates by maximizing S, +the correlation between candidates output value V with the networks residual error: +S = +X +o∈O + + + + + + +X +p∈T + +Vp − V¯ + +(Ep,o − E¯ +o) + + + + + + +where O is the set of output nodes, T is the training set, Vp is the candidate neurons +activation for a training pattern p. Ep,o is the residual output error at node o for +pattern p. V¯ and E¯ +o are averaged values over all elements of T. This step is finished +when the correlation no longer increases. +5. Candidate selection: Keep the candidate node with the highest correlation, freeze +its incoming weights and add connections to the output nodes. +6. Continue: If the error is higher than desired, continue with step 2. +One network with three hidden nodes trained by Cascade-Correlation is shown in Figure 3.1. +1 +Figure 3.1.: A Cascade-Correlation network with three input nodes (red) and one bias node (gray) +to the left, three hidden nodes (green) in the middle and two output nodes in the upper +right corner. The black squares represent frozen weights which are found by correlation +maximization whereas the white squares are trainable weights. +3.1.2. Meiosis Networks +Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, where +weights are deterministic and fixed at prediction time, each weight wij in Meiosis networks +follows a normal distribution: +wij ∼ N (µij , σ2 +ij ) + +3.2. Pruning approaches +Hence every connection has two learned parameters: µij and σ +2 +ij . +The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell +division. A node j is splitted, when the random part dominates the value of the sampled +weights: +P +i +P +σij +i µij +> 1 and +P +k +P +σjk +k µjk +> 1 +The mean of the new nodes is sampled around the old mean, half the variance is assigned +to the new connections. +Hence Meiosis networks only change the number of neurons per layer. They do not add +layers or add skip connections. +3.1.3. Automatic Structure Optimization +Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of online handwriting recognition. It makes use of the confusion matrix C = (cij ) ∈ N +k×k +≥0 +(see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix +S with sij = sj i = cij · cji. The maximum of S defines where the ASO algorithm adds +more parameters. The details how the resources are added are not transferable to CNNs. +3.2. Pruning approaches +Pruning approaches start with a network which is bigger than necessary and prune it. The +motivation to prune a network which has the desired accuracy is to save storage for easier +model sharing, memory for easier deployment and FLOPs to reduce inference time and +energy consumption. Especially for embedded systems, deployment is a challenge and low +energy consumption is important. +Pruning generally works as follows: +1. Train a given network until a reasonable solution is obtained, +2. prune weights according to a pruning criterion and +3. retrain the pruned network. +This procedure can be repeated. +One family of pruning criterions uses the Hessian matrix. For example, Optimal Brain +Damage (OBD) as introduced in [LDS+89]. For every single parameter k, OBD calculates +the effect on the objective function of deleting k. The authors call the effect of the deletion + +3. Topology Learning +of parameter k the saliency sk. The parameters with the lowest saliency are deleted, which +means they are set to 0 and are not updated anymore. +A follow-up method called Optimal Brain Surgeon [HSW93] claims to choose the weights +in a much better way. This requires, however, to calculate the inverse Hessian matrix +H−1 ∈ R +n×n where n ∈ N is typically n > 106 +. +A much simpler and computationally cheaper pruning criterion is the weight magnitude. +[HPTD15] prunes all weights w which are below a threshold θ: +w ← + + + +w if w ≥ θ +0 otherwise +3.3. Genetic approaches +The general idea of genetic algorithms (GAs) is to encode the solution space as genes, which +can recombine themselves via crossover and inversion. An introduction to such algorithms +is given in [ES03]. +Commonly used techniques to generate neural networks by GAs are NEAT [SM02] and its +successors HyperNEAT [SDG09] and ES-HyperNEAT [RLS10]. +The results, however, are of unacceptable quality: On MNIST (see Appendix E), where +random chance gives 10 % accuracy, even simple topologies trained with SGD achieve +about 92 % accuracy [TF-16a] and state of the art is 99.79 % [WZZ+13], the HyperNEAT +algorithm achieves only 23.9 % accuracy [VH13]. +Kocmánek shows in [Koc15] that HyperNEAT approaches can achieve 96.47 % accuracy +on MNIST. Kocmánek mentions that HyperNEAT becomes slower with each hidden layer +so that not more than three hidden layers could be trained. At the same time, VGG19 [SZ14] already has 19 hidden layers and ResNets are successfully trained with 1202 layers +in [HZRS15a]. +[LX17] shows that Genetic algorithms can achieve competitive results on MNIST and +SVHN, but the best results on CIFAR-10 were 7.10 % error whereas the state of the art is +at 3.74 % [HLW16]. Similarly, the Genetic algorithm achieves 29.03 % error on CIFAR-100, +but the state of the art is 17.18 % [HLW16]. +3.4. Reinforcement Learning +Reinforcement learning is a sub-field of machine learning, which focuses on the question +how to choose actions that lead to high rewards. + +3.5. Convolutional Neural Fabrics +One can think of the search for good neural network topologies as a reinforcement learning +problem. The agent is a recurrent neural network which can generate bitstrings. Those +variable-length bitstrings encode neural network topologies. +In 2016, this approach was applied to construct neural networks for computer vision. +In [BGNR16], Q-learning with an ε-greedy exploration was applied. +In [ZL16], the REINFORCE algorithm from [Wil92] was used to train state of the art models +for CIFAR-10 and the Penn Treebank dataset. A drawback of this method is that enormous +amounts of computational resources were used to obtain those results. +3.5. Convolutional Neural Fabrics +Convolutional Neural Fabrics are introduced in [SV16]. They side-step hard decisions +about topologies by learning an ensemble of different CNN architectures. The idea is to +define a single architecture as a trellis through a 3D grid of nodes. Each node represents a +convolutional layer. One dimension is the index of the layer, the other two dimensions are +the amount of filters and the feature size. Each node is connected to nine other nodes and +thus represents nine possible choices of convolutional layers: +• Resolution: (i) convolution with stride=1 or (ii) convolution with stride=2 or +(iii) deconvolution (doubling the resolution) +• Channels: (i) half the number of filters than the layer before (ii) the same number +of filters as the layer before (iii) double the number of filters than the layer before +They always use ReLU as an activation function and they always use filters of size 3 × 3. +They don’t use pooling at all. + +3. Topology Learning + +4. Hierarchical Classification +Designing a classifier for a new dataset is hard for two main reasons: Many design choices are +not clearly superior to others and evaluating one design choice takes much time. Especially +CNNs are known to take several days [KSH12, SLJ+15] or even weeks [SZ14] to train. +Additionally, some methods for analyzing a dataset become harder to use with more classes +and more training samples. Examples are t-SNE, the manual inspection of errors and +confusion matrices, and the argmax method. +One idea to approach this problem is by building a hierarchy of classifiers. The root +classifier distinguishes clusters of classes, whereas the leaf classifiers distinguish single +classes. Figure 4.1 gives an example for an hierarchy of classifiers. +Figure 4.1.: Example for a hierarchy of classifiers. Each classifier is visualized by a rounded rectangle. +The root classifier C0 has to distinguish six coarse classes (pedestrian, four+-wheelers, +traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C0 predicts a +pedestrian, another classifier has to predict if it is an adult or a child. Similar, if C0 +predicts traffic sign, then another classifier has to predict if it is a speed limit, a +sign indicating danger or something else. If C0, however, predicts road, then no other +classifier will become active. +In this example, the problem has 17 classes. The hierarchical approach introduces +7 clusters of classes and thus uses 8 classifiers. +Such a hierarchy of classifiers needs clusters of classes. + +4. Hierarchical Classification +4.1. Advantages of classifier hierarchies +Having a classifier hierarchy has five advantages: +• Division of labor: Different teams can work together. Instead of having a monolithic +task, the solutions can be combined. +• Guarantees: Changing a classifier will only change the prediction of itself and its +children. Siblings are not affected. In the example from Figure 4.1, the classifier +which distinguishes traffic signs can be changed while the classification as pedestrian, +four+-wheelers, traffic sign, street, other will not be affected. Also, the +classification between speed limits, danger signs and other signs will not change. +• Faster training: Except for the root classifier C0, each other classifier will have +less than the total amount of training data. Depending on the combined classes, the +models could also be simpler. Hence the training time is reduced. +• Weighting of errors: In practice, some errors are more severe than others. For +example, it could be acceptable if the two-wheelers classifier has an error rate of +40 %. But it is not acceptable if the speed limit classifier has such a high error rate. +• Post-hoc explanations: The simpler a model is, the easier it is to explain why a +classification is made the way it is made. +4.2. Clustering classes +There are two ways to cluster classes: By similarity or by semantics. While semantic +clustering needs either additional information or manual work, the similarity can be +automatically inferred from the data. As pointed out in [XZY+14], semantically similar +classes are often also visually similar. For example, in the ImageNet dataset most dogs +are semantically and visually more similar to each other than to non-dogs. An example +where this is obviously not the case are symbols: The summation symbol \sum is identical +in appearance to the Greek letter \Sigma, but semantically much closer to the addition +operator +. +One approach to cluster classes by similarity is to train a classifier and examine its +predictions. Each class is represented in the confusion matrix by one row. Those rows +can be directly with standard clustering algorithms such as k-means, DBSCAN [EKS+96], +OPTICS [ABKS99], CLARANS [NH02], DIANA [KR09], AHC (see [HPK11]) or spectral +clustering as in [XZY+14]. Those clusterings, however, are hard to interpret and most of +them do not allow a human to improve the found clustering manually. +The confusion matrix (c)ij ∈ N +k×k +states how often class i was present and class j was + +4.2. Clustering classes +predicted. The more often this confusion happens, the more similar those two classes are to +the classifier. Based on the confusion matrix, the classes can be clustered as explained in +the following. +[HAE16] indicates that more classes make it easier to generalize, but the accuracy gains +diminish after a critical point of classes is reached. Hence a binary tree might not be a +good choice. As an alternative, an approach which allows building arbitrary many clusters, +is proposed. +The proposed algorithm has two main ideas: +• The order of columns and rows in the confusion matrix is arbitrary. This means one +can swap rows and columns. If row i and j are swapped, then the columns i and j +have to be swapped to in order to keep the same confusion matrix. +• If two classes are confused often, then they are similar to the classifier. +Hence the order of the classes is permutated in such a way that the highest errors are close +to the diagonal. One possible objective function to be minimized is +f(C) = Xn +i=1 +Xn +j=1 +Cij · |i − j| [4.1] +which punishes errors linearly with the distance to the diagonal. This method is called CMO +in the following. +As pointed out by Tobias Ribizel (personal communication), this optimization problem +is a weighted version of Optimal Linear Arrangement problem. That problem is NPcomplete [GJ02, GJS76]. Simulated Annealing as described in Algorithm 1, however, +produces reasonable clusterings as well as visually appealing confusion matrices. The +algorithm works as follows: First, decide with probability 0.5 if only two random rows are +swapped or a block is swapped. If two rows are swapped, choose both of them randomly. +If a block is swapped, then choose the start randomly and the end of the block randomly +after the start. The insert position has to be a valid position considering the block length, +but besides that it is also chosen uniformly random. +Simple row-swapping can exploit local improvements. For example, in the context of +ImageNet, it can swap the dog-class Silky Terrier to the dog-class Yorkshire terrier +and both dog classes Dalmatian and Greyhound next to each other. Both the two clusters +of dog breeds could be separated by car and bus due to random chance. Moving any single +class increases the score, but moving either one of the dog breed clusters or the vehicle +cluster decreases the score. Hence it is beneficial to implement block moving. +One advantage of permutating the classes in order to minimize Equation (4.1) in comparison +to spectral clustering as used in [XZY+14] is that the adjusted confusion matrix can be + +4. Hierarchical Classification +split into many much smaller matrices along the diagonal. In the case of many classes (e.g., +1000 classes of ImageNet or 369 classes of HASYv2) this permutation makes it possible to +visualize the types of errors made. If the errors are systematic due to visual similarity, many +confusions are not made and thus many elements of the confusion matrix are close to 0. +Those will be moved to the corners of the confusion matrix by optimizing Equation (4.1). +Once a permutation of the classes is found which has a low score Equation (4.1), the clusters +can either be made by hand by deciding why classes should not be in one clusters. With +such a permutation, only n − 1 binary decisions have to be made and hence only the list of +classes has to be read. Alternatively, one can calculate the confusions C +0 +i,i+1 + C +0 +i+1,i for +each pair of classes which are neighbors in the confusion matrix. The higher this value, the +more similar are the classes according to the classifier. Hence a threshold θ can be applied. +θ can either be set automatically (e.g., such that 10 % of all pairs are above the threshold) +or semi-automatically by asking the user for information if two classes belong to the same +cluster. Such an approach only needs log(n) binary decisions from the user where n is the +number of classes. +Please note that CMO only works if the classifier is neither too bad nor too good. A classifier +which does not solve the task at all might just give almost uniform predictions whereas the +confusion matrix of an extremely good classifier is almost diagonal and thus contains no +information about the similarity of classes. One possible solution to this problem is to take +the prediction of the class in contrast to using only the argmax in order to find a useful +permutation. + +5. Experimental Evaluation +All experiments are implemented using Keras 2.0 [Cho15] with Tensorflow 1.0 [AAB+16] +and cuDNN 5.1 [CWV+14] as the backend. The experiments were run on different machines +with different Nvidia graphics processing units (GPUs), including the Titan Black, GeForce +GTX 970 and GeForce 940MX. +The GTSRB [SSSI12], SVHN [NWC+11b], CIFAR-10 and CIFAR-100 [Kri], MNIST [YL98], +HASYv2 [Tho17a], STL-10 [CLN10] dataset are used for the evaluation. Those datasets are +used as their size is small enough to be trained within a day. Other classification datasets +which were considered are listed in Appendix E. +CIFAR-10 (Canadian Institute for Advanced Research 10) is a 10-class dataset of color +images of the size 32 px × 32 px. Its ten classes are airplane, automobile, bird, cat, deer, +dog, frog, horse, ship, truck. The state of the art achieves an accuracy of 96.54 % [HLW16]. +According to [Kar11], human accuracy is at about 94 %. +CIFAR-100 is a 100-class dataset of color images of the size 32 px × 32 px. Its 100 classes +are grouped to 20 superclasses. It includes animals, people, plants, outdoor scenes, vehicles +and other items. CIFAR-100 is not a superset of CIFAR-10, as CIFAR-100 does not contain +the class airplane. The state of the art achieves an accuracy of 82.82 % [HLW16]. +GTSRB (German Traffic Sign Recognition Benchmark) is a 43-class dataset of traffic signs. +The 51 839 images are in color and of a minimum size of 25 px×25 px up to 266 px×232 px. +The state of the art achieves 99.46 % accuracy with an ensemble of 25 CNNs [SL11]. +According to [SSSI], human performance is at 98.84 %. +HASYv2 (Handwritten Symbols version 2) is a 369 class dataset of black-and-white images +of the size 32 px × 32 px. The 369 classes contain the Latin and Greek letters, arrows, +mathematical symbols. The state of the art achieves an accuracy of 82.00 % [Tho17a]. +STL-10 (self-taught learning 10) is a 10-class dataset of color images of the size 96 px×96 px. +Its ten classes are airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck. The state +of the art achieves an accuracy of 74.80 % [ZMGL15]. It contains 100 000 unlabeled images +for unsupervised training and 500 images per class for supervised training. +SVHN (Street View House Numbers) exists in two formats. For the following experiments, +the cropped digit format was used. It contains the 10 digits cropped from photos of Google +Street View. The images are in color and of size 32 px × 32 px. The state of the art + +5. Experimental Evaluation +achieves an accuracy of 98.41 % [HLW16]. According to [NWC+11a], human performance +is at 98.0 %. +As a preprocessing step, the pixel-features were divided by 255 to obtain values in [0, 1]. +For GTSRB, the training and test data was scaled to 32 px × 32 px. +5.1. Baseline Model and Training setup +The baseline model is trained with Adam [KB14], an initial learning rate of 10−4, a batch +size of 64 for at most 1000 epochs with data augmentation. The kind of data augmentation +depends on the dataset: +• CIFAR-10, CIFAR-100 and STL-10: Random width and height shift by at most +±3 pixels in either direction; Random horizontal flip. +• GTSRB, MNIST: Random width and height shift by at most ±5 pixels in either +direction; random rotation by at most ±15 degrees; random channel shift; random +zoom in [0.5, 1.5]; random shear by at most 6 degrees. +• HASYv2: Random width and height shift by at most ±5 pixels in either direction; +random rotation by at most ±5 degree. +• SVHN: No data augmentation. +If the dataset does not define a training/test set, a stratified 67 % / 33 % split is applied. If +the dataset does not define a validation set, the training set is split in a stratified manner +into 90 % training set / 10 % test set. +Early stopping [Pre98] with the validation accuracy as a stopping criterion and a patience of +10 epochs is applied. After this, the model is trained without data augmentation for at most +1000 epochs with early stopping and the validation accuracy as a stopping criterion and a +patience of 10 epochs. Kernel weights are initialized according to the uniform initialization +scheme of He [HZRS15b] (see Appendix B.3). +The architecture of the baseline model uses a pattern of +Conv-Block(n) = (Convolution − Batch Normalization − Activation) +n − Pooling +The activation function is the Exponential Linear Unit (ELU) (see Table B.3), except for +the last layer where softmax is used. Before the last two convolutional layer, a dropout +layer with dropout probability 0.5 is applied. The architecture is given in detail in Table 5.1. +Please note that the number of input- and output channels of the network depends on +the dataset. If the input image is larger than 32 px × 32 px, for each power of two a +Conv-Block(2) is added at the input. For MNIST, the images are bilinearly upsampled to +32 px × 32 px. + +5.1. Baseline Model and Training setup +# Type Filters @ +Patch size / stride +Parameters FLOPs Output size +Input 0 0 3 @ 32 × 32 +1 Convolution 32 @ 3 × 3 × 3 / 1 896 1 736 704 32 @ 32 × 32 +2 BN + ELU 64 163 904 32 @ 32 × 32 +3 Convolution 32 @ 3 × 3 × 32 / 1 9 248 18 841 600 32 @ 32 × 32 +4 BN + ELU 64 163 904 32 @ 32 × 32 +Max pooling 2 × 2 / 2 0 40 960 32 @ 16 × 16 +5 Convolution 64 @ 3 × 3 × 32 / 1 18 496 9 420 800 64 @ 16 × 16 +6 BN + ELU 128 82 048 64 @ 16 × 16 +7 Convolution 64 @ 3 × 3 × 64 / 1 36 928 18 857 984 64 @ 16 × 16 +8 BN + ELU 128 82 048 64 @ 16 × 16 +Max pooling 2 × 2 / 2 20 480 64 @ 8 × 8 +9 Convolution 64 @ 3 × 3 × 64 / 1 36 928 4 714 496 64 @ 8 × 8 +10 BN + ELU 128 20 608 64 @ 8 × 8 +Max pooling 2 × 2 / 2 5 120 64 @ 4 × 4 +11 Convolution (v) 512 @ 4 × 4 × 64 / 1 524 800 1 048 064 512 @ 1 × 1 +12 BN + ELU 1 024 3 584 512 @ 1 × 1 +Dropout 0.5 0 0 512 @ 1 × 1 +13 Convolution 512 @ 1 × 1 × 512 / 1 262 656 523 776 512 @ 1 × 1 +14 BN + ELU 1 024 3 584 512 @ 1 × 1 +Dropout 0.5 0 0 512 @ 1 × 1 +15 Convolution k @ 1 × 1 × 512 / 1 k · (512 + 1) 1024 · k k @ 1 × 1 +Global avg Pooling 1 × 1 0 k k @ 1 × 1 +16 BN + Softmax 2k 7k k @ 1 × 1 +P 515k ++892 512 +1032k ++55 729 664 103 424+2k +Table 5.1.: Baseline architecture with 3 input channels of size 32 × 32. All convolutional layers +use SAME padding, except for layer 11 which used VALID padding in order to decrease +the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for +each power of two there are two Convolution + BN + ELU blocks and one Max pooling +block added. This is the framed part in the table. +32 × 32 +Input +C 32@3 × 3/1 +BN + ELU +C 32@3 × 3/1 +BN + ELU +16 × 16 +max pooling 2 × 2/2 +C 64@3 × 3/1 +BN + ELU +C 64@3 × 3/1 +BN + ELU +8 × 8 +max pooling 2 × 2/2 +C 64@3 × 3/1 +BN + ELU +4 × 4 +max pooling 2 × 2/2 +C 512@4 × 4/1 (V) +BN + ELU +Dropout, p = 0.5 +1 × 1 +C 512@1 × 1/1 +BN + ELU +Dropout, p = 0.5 +C k@1 × 1/1 +Global AVG pooling +BN + Softmax +Figure 5.1.: Architecture of the baseline model. C 32@3×3/1 is a convolutional layer with 32 filters +of kernel size 3 × 3 with stride 1. + +5. Experimental Evaluation +5.1.1. Baseline Evaluation +The results for the baseline model evaluated on eight datasets are given in Table 5.2. The +speed for inference for different GPUs is given in Table 5.3. +Dataset Single Model Accuracy Ensemble of 10 +Training Set Test Set Training Set Test Set +Asirra 94.22 % σ = 3.49 94.37 % σ = 3.47 97.07 % 97.37 % +CIFAR-10 91.23 % σ = 1.10 85.84 % σ = 0.87 92.36 % 86.75 % +CIFAR-100 76.64 % σ = 1.48 63.38 % σ = 0.55 78.30 % 64.70 % +GTSRB 100.00 % σ = 0.00 99.18 % σ = 0.11 100.00 % 99.46 % +HASYv2 89.49 % σ = 0.42 85.35 % σ = 0.10 89.94 % 86.03 % +MNIST 99.93 % σ = 0.07 99.53 % σ = 0.06 99.99 % 99.58 % +STL-10 94.12 % σ = 0.87 75.67 % σ = 0.34 96.35 % 77.62 % +SVHN 99.02 % σ = 0.07 96.28 % σ = 0.10 99.42 % 97.20 % +Table 5.2.: Baseline model accuracy on eight datasets. The single model actuary is the 10 models +used in the ensemble. The empirical standard deviation σ of the accuracy is also given. +CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the +models uses unlabeled data or data from other datasets. For HASYv2 no test time +transformations are used. +Network GPU Tensorflow Inference per Training +1 Image 128 images time / epoch +Baseline Default Intel i7-4930K 3 ms 244 ms 231.0 s +Baseline Optimized Intel i7-4930K 2 ms 143 ms 149.0 s +Baseline Default GeForce 940MX 4 ms 120 ms 145.6 s +Baseline Default GTX 970 6 ms 32 ms 25.0 s-26.3 s +Baseline Default GTX 980 3 ms 24 ms 20.5 s-21.1 s +Baseline Default GTX 980 Ti 5 ms 27 ms 22.0 s-22.1 s +Baseline Default GTX 1070 2 ms 15 ms 14.4 s-14.5 s +Baseline Default Titan Black 4 ms 25 ms 28.1 s-28.1 s +Baseline Optimized Titan Black 3 ms 22 ms 24.4 s-24.4 s +DenseNet-40-12 Default GeForce 940MX 27 ms 2403 ms — +Table 5.3.: Speed comparison of the baseline model on CIFAR-10. The baseline model is evaluated on +six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken from [Maj17]. +Weights the baseline model can be found at [Tho17b]. The optimized Tensorflow build +makes use of SSE4.X, AVX, AVX2 and FMA instructions. + +5.1. Baseline Model and Training setup +5.1.2. Weight distribution +The distribution of filter weights by layer is visualized in Figure 5.2 and the distribution +of bias weights by layer is shown in Figure 5.3. Although both figures only show the +distribution for one specific model trained on CIFAR-100, the following observed patterns +are consistent for 70 models (7 datasets and 10 models per dataset): +• The empiric [0.5 − percentile, 99.5 − percentile] interval which contains 99 % of the +filter weights is almost symmetric around zero. The same is true for the bias weights. +• The farther a layer is from the input away, the smaller the 99-percentile interval is, +except for the last layer (see Table A.1). +• The 99-percentile interval of the first layers filter weights is about [−0.5, +0.5], except +for MNIST and HASYv2 where it is in [−0.8, 0.8]. +• The 99-percentile interval of the first layers bias weights is always in [−0.2, 0.2]. +• The distribution of filter weights of the last convolutional layer is not symmetric. In +some cases the distribution is also not unimodal. +• The bias weights of the last three layers are very close to zero. The absolute value of +most of them is smaller than 10−2. +Similarly, Figure 5.4 and Figure 5.5 show the distribution of the γ and the β parameter of +Batch Normalization. It is expected that γ is close to 1 and β is close to 0. In those cases, +the Batch Normalization layer equals the identity and thus is only relevant for the training. +While γ and β do not show as clear patterns as the filter and bias weights of convolutional +layers, some observations are also consistent through all models even for different datasets: +• γ of the last layer (layer 16) is bigger than 1.3. +• The 99-percentile interval for β of the last layer is longer than the other 99-percentile +intervals. +• The 99-percentile interval for β of the fourth-last (layer 14 for STL-10, layer 10 for +all other models) is more negative then all other layers. +Finally, the distribution of filter weight ranges is plotted in Figure 5.6 for each convolutional +layer. The ranges are calculated for each channel and filter separately. The smaller the +values are, the less information is lost if the filters are replaced by smaller filters. + +5. Experimental Evaluation +Figure 5.2.: Violin plots of the distribution of filter weights of a baseline model trained on CIFAR100. The weights of the first layer are relatively evenly spread in the interval [−0.4, +0.4]. +With every layer the interval which contains 95 % of the weights and is centered around +the mean becomes smaller, especially with layer 11 where the feature maps are of +size 1 × 1. In contrast to the other layers, the last convolutional layer has a bimodal +distribution. +This plot indicates that the network might benefit from bigger filters in the first layer, +whereas the filters in layers 7 – 11 could potentially be smaller. +Figure 5.3.: Violin plots of the distribution of bias weights of a baseline model trained on CIFAR-100. +While the first layers biases are in [−0.1, +0.1], after each max-pooling layer the interval +which contains 95 % of the weights and is centered around the mean becomes smaller. +In the last three convolutional layer, most bias weights are in [−0.005, +0.005]. + +5.1. Baseline Model and Training setup +Figure 5.4.: Violin plots of the distribution of the γ parameter of Batch Normalization layers of a +baseline model trained on CIFAR-100. +Figure 5.5.: The distribution of the β parameter of Batch Normalization layers of a baseline model +trained on CIFAR-100. + +5. Experimental Evaluation +Figure 5.6.: The distribution of the range of values (max - min) of filters by channel and layer. For +each filter, the range of values is recorded by channel. The smaller this range is, the +less information is lost if a n × n filter is replaced by a (n − 1) × (n − 1) filter. + +5.1. Baseline Model and Training setup +5.1.3. Training behavior +Due to early stopping, the number of epochs which a model was trained differ. The number +of epochs trained with augmentation ranged from 133 epochs to 182 epochs with a standard +deviation of 17.3 epochs for CIFAR-100. +Figure 5.7 shows the worst and the best validation accuracy during the training with +augmented data. Different initializations lead to very similar validation accuracies during +training. The image might lead to the wrong conclusion that models which are better at +the start are also better at the end. In order to check this hypothesis, the relative order of +validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering +stays approximately the same, then it can be considered to run the first few epochs many +times and only train the best models to the end. For 10 models, there can be 102−10 +2 = 45 +pair-wise changes in the ordering at maximum if the relative order of validation accuracies +is reversed. For the baseline model, 21.8 changes in the relative order of accuracies occurred +in average for each pair of epochs (i, i + 1). This means if one knows only the relative order +of the validation accuracy of two models m and m0in epoch i, it is doubtful if one can +make any statement about the ordering of m and m0in epoch i + 1. +0 +102030405060708090 +100110120130 +140 +0.2 +0.3 +0.4 +0.5 +0.6 +0.7 +epoch +validation accuracy +maximum validation accuracy +minimum validation accuracy +1.5 +2 +2.5 +3 +3.5 +4 +4.5 +loss +maximum validation accuracy +minimum validation accuracy +mean loss +Figure 5.7.: Minimum and maximum validation accuracy of the 10 trained models by epoch. The +differences do not exceed 1 % and does not increase by training epoch. Four models +stopped the first training stage at epoch 133 which causes the shift in the loss and the +maximum validation accuracy. +Figures 5.8 to 5.10 show how the weights changed while training on CIFAR-100. It was +expected that the absolute value of weight updates during epochs (sum, max, and mean) +decrease in later training stages. The intuition was that weights need to be adjusted in a +coarse way first. After that, the intuition was that only slight modifications are applied by + +5. Experimental Evaluation +the SGD based training algorithm (ADAM). The mean, max and sum of weight updates as +displayed in Figures 5.8 to 5.10, however, do not show such a clear pattern. The biggest +change happens as expected in the first epoch after the weights are initialized. The change +from augmented training to non-augmented training was at epoch 156 to epoch 157 +It can be observed, that layers which receive more input feature maps get larger weight +updates in mean. As layers which are closer to the output take more input feature maps, +their weight updates are larger. This pattern does not occur when SGD is used as the +optimizer. +Figure 5.8.: Mean weight updates of the baseline model between epochs by layer. + +5.1. Baseline Model and Training setup +Figure 5.9.: Maximum weight updates of the baseline model between epochs by layer. +Figure 5.10.: Sum of weight updates of the baseline model between epochs by layer. + +5. Experimental Evaluation +5.2. Confusion Matrix Ordering +The visualization of the confusion matrix can give valuable information about which part +of the task is hard. For more than about 10 classes, however, it becomes hard to visualize +and read. +For CIFAR-10, the proposed method groups the four object classes and the six animal +classes together (see Figure 5.11a). +(a) CIFAR-10 Test set (b) Random +Figure 5.11.: Figure 5.11a shows an ordered confusion matrix of the CIFAR-10 dataset. The diagonal +elements are set to 0 in order to make other elements easier to see. +Figure 5.11b shows a confusion matrix with random mistakes. +The first image of Figure 5.12 shows one example of a classifier with only 97.13 % test +accuracy where a good permutation was found. Please note that this is not the best classifier. +The confusion matrix which resulted from a baseline classifier with 99.32 % test accuracy is +displayed in as the second image. +Those results suggest that the ordering of classes is a valuable tool to make patterns easier +to see. Humans, however, are good at finding patterns even if they come from random noise. +Hence, for comparison, a confusion matrix of a classifier with 30 classes, 60 % accuracy +and 40 % uniformly random errors of a balanced dataset is created, optimized according to +Equation (4.1) and shown in Figure 5.11b. It clearly looks different than Figure 5.11a. +On the HASYv2 dataset the class-ordering is necessary to see anything as most possible +confusions do not happen. See Figure 5.13 for comparison of the first 50 classes of the +unsorted confusion matrix and the sorted confusion matrix. If confusion matrices of a +maximum size of 50 × 50 are displayed, the ordered method can show only 8 matrices +because the off-diagonal matrices are almost 0. Without sorting, 64 matrices have to be +displayed. + +5.2. Confusion Matrix Ordering +Figure 5.12.: The first image shows the confusion matrix for the test of GTSRB set after optimization +to Equation (4.1). The diagonal elements are set to 0 in order to make other elements +easier to see. The symbols next to the label on the vertical axis indicate the shape +and the color of the signs. +The second image shows the same, but with baseline model. +Best viewed in electronic form. +Figure 5.13.: The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal +elements are set to 0 in order to make other elements easier to see. The top image +shows arbitrary class ordering, the bottom image shows the optimized ordering. +5.3. Spectral Clustering vs CMO +5.3. Spectral Clustering vs CMO +This section evaluates the clustering quality of CMO in comparison to the clustering quality +of spectral clustering. +The evaluated model achieves 70.50 % training accuracy and 53.16 % test accuracy on +CIFAR-100. Figure 5.14 shows the sorted confusion matrix. +Figure 5.14.: The first 50 entries of the ordered confusion matrix of the CIFAR-100 dataset. The +diagonal elements are set to 0 in order to make other elements easier to see. Best +viewed in electronic form. +CIFAR-100 has pre-defined coarse classes. Those are used as a ground truth for the clusters +which are to be found. The number of errors is determined by (i) Join all n clusters which +contain the classes of the coarse class C to a set M. The error is n. (ii) Within M, find the +set of classes M− which do not belong to C. (iii) The final error is n + |M−|. As can be +seen in Table 5.4, both clustering methods find reasonable clusters. CMO, however, has +only half the error of spectral clustering. +The results for the HASYv2 dataset are qualitatively similar (see Table 5.5). It should be +noted that the number of clusters was determined by using the semi-automatic method +based on CMO as described in Section 4.2. + +5. Experimental Evaluation +Cluster Spectral clustering Errors CMO Errors +fish aquarium fish, orchid + flatfish ++ ray, shark + trout, lion +5 aquarium fish, orchid + flatfish ++ ray + shark, trout +4 +flowers orchid, aquarium fish + sunflower + poppy, tulip + rose, +train +5 orchid, aquarium fish + sunflower, poppy, tulip, rose2 +people baby, boy, man + girl + woman 2 baby, boy, girl, woman, man 0 +reptiles crocodile, plain, road, table, +wardrobe + dinosaur + lizard ++ snake, worm + turtle +9 crocodile, lizard, lobster, caterpillar + dinosaur + snake + turtle, crab6 +trees maple, oak, pine + willow, forest ++ palm +3 palm, willow, pine, maple, oak 0 +Total 24 12 +Table 5.4.: Differences in spectral clustering and CMO. Classes in a cluster are separated by , +whereas clusters are separated by +. +Cluster Spectral clustering Errors CMO Errors +A A, A, A 0 A, A, A , Å 1 +B B, B 0 B, B 0 +C C, c, ⊂ and C , ξ, E and C 4 C, c, ⊂, C and C 1 +D D, D, D, . 1 D, D, D 0 +E E and E, ε 2 E and E, ε, , ∈ 4 +F F and F, F 1 F and F, F 1 +H H and H , κ and H 3 H and H, H 1 +K K, κ 0 K, κ 0 +L L, b and L, L 1 L, b and L, L 1 +M M and M and M 2 M and µ, M and M 3 +N N and N, N and N 2 N and N, N and N , ℵ 3 +O O, O, 0, ◦, °, # and o 1 O, O, 0, ◦, ° and # and o 2 +P P, P and p, ρ and P and ℘ 3 P and P, P, ℘ and p, ρ 2 +Q Q, Q, Q, ι, t, &, `, =, Æ, 1 7 Q and Q, Q 1 +R R, R and R, R, k and < 3 R and <, R, R, R 1 +S S, s, S 0 S, s, S 0 +T T, > and T , τ 1 T, > and T , τ 1 +U U, ∪ and u, U, A 1 U, u, U, A and ∪ 2 +V V , v, ∨ 0 V , v, ∨ 0 +W W, w, ω 0 W, w and ω 1 +X X, x, X , χ, × 0 X, x, X , χ, × 0 +Y Y and y 1 Y , y 0 +Z Z, z, Z and Z, Z 1 Z, z, Z, Z, Z 0 +Total 34 25 +Table 5.5.: Differences in spectral clustering and CMO. + +5.4. Hierarchy of Classifiers +5.4. Hierarchy of Classifiers +In a first step, a classifier is trained on the 100 classes of CIFAR-100. The fine-grained root +classifier achieves an accuracy of 65.29 % with test-time transformations. The accuracy on +the found sub-classes are listed in Table 5.6. The fact that the root classifier achieves better +results within a cluster than the specialized leaf classifiers in 13 of 14 cases could either +be due to limited training data, overfitting or the small size of 32 px × 32 px of the data. +The experiment also shows that most of the errors are due to not identifying the correct +cluster. Hence, in this case, more work in improving the root classifier is necessary rather +than improving the discrimination of classes within a cluster. +Although the classes within a cluster capture most of the classifications, many misclassifications happen outside of the clusters. For example, in cluster 3, a perfect leaf classifier would +push the accuracy in the full column only to 63.50 % due to errors of the root classifier +where the root classifier does not predict the correct cluster. +The leaf classifiers use the same topology as the root classifier. By initializing them with +the root classifiers weights their performance can be pushed at about the inner accuracy. +They are, however, only useful if their accuracy is well above the inner accuracy of the root +classifier. Hence, for CIFAR-100, building hierarchies of classifiers is not useful. +Cluster Classes +accuracy +root classifier leaf classifier +cluster identified class identified | cluster class identified | cluster +1 3 69.67 % 84.27 % 72.98 % +2 5 46.60 % 58.54 % 43.47 % +3 2 58.50 % 92.13 % 83.46 % +4 2 50.50 % 87.83 % 81.74 % +5 3 44.67 % 79.29 % 71.01 % +6 2 29.50 % 78.67 % 72.00 % +7 2 52.50 % 92.11 % 87.72 % +8 2 59.50 % 86.23 % 81.88 % +9 2 59.00 % 90.08 % 87.79 % +10 2 62.00 % 85.52 % 73.10 % +11 2 67.00 % 87.01 % 75.32 % +12 2 72.50 % 94.77 % 76.77 % +13 2 64.00 % 82.58 % 86.27 % +14 2 79.67 % 89.85 % 89.10 % +Table 5.6.: Accuracies of the root classifier trained on the full set of 100 classes evaluated on +14 clusters of classes. Each class has 100 elements to test. The column cluster identified +gives the percentage that the root classifiers argmax prediction is within the correct +cluster, but not necessarily the correct class. The columns class identified | cluster only +consider data points where the root classifier correctly identified the cluster. + +5. Experimental Evaluation +5.5. Increased width for faster learning +More filters in one layer could simplify the optimization problem as each filter needs smaller +updates. Hence a CNN N with ni filters in layer i is expected to take more epochs than a +CNN N0 with 2 · ni filters in layer i to achieve the same validation accuracy. +This hypothesis can be falsified by training a CNN N and a CNN N0 and comparing the +trained number of epochs. As more filters can lead to different results depending on the +layer where they are added, five models are trained. The details about those models are +given in Table 5.7 +Name Layer Filter count Total +Baseline New parameters +m9 9 64 638 5 978 566 +m0 +9 +9 64 974 8 925 622 +m11 11 512 3786 5 982 698 +m0 +11 11 512 1024 1 731 980 +m13 13 512 8704 5 982 092 +Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer +was increased. +The detailed results are given in Table 5.8. As expected, the number of training epochs of +the models with increased numbers of parameters is lower. The wall-clock time, however, is +higher due to the increase in computation per forward- and backward-pass. +For m9, m11 and m13, the filter weight range of the layer with increased capacity decreases +compared to Figure 5.6, the filter weights of the layer with increased capacity are more +concentrated around zero compared to Figure 5.2. For model m13, the distribution of +weight of the output layer changed to a more bell-shaped distribution. Except for this, the +distribution of filter weights in other layers did not change for all three models compared to +the baseline. +Model Parameters +Accuracy Training +Single Model Ensemble Mean Epochs Mean Time +Mean std +baseline 944 012 63.38 % 0.55 64.70 % 154.7 3856 s +m9 5 978 566 65.53 % 0.37 66.72 % 105.7 4472 s +m0 +9 +8 925 622 65.10 % 1.09 66.54 % 95.6 5261 s +m11 5 982 698 65.73 % 0.77 67.38 % 149.2 5450 s +m0 +11 1 731 980 62.12 % 0.48 62.89 % 143.6 3665 s +m13 5 982 092 62.39 % 0.66 63.77 % 147.8 4485 s +Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m9, m11, m13 +as well as their accuracies. + +5.6. Weight updates +5.6. Weight updates +Section 5.5 shows that wider networks learn faster. One hypothesis why this happens is +that every single weight updates can be smaller to learn the same function. Thus the loss +function is smoother and thus gradient descent based optimization algorithms lead to more +consistent weight updates. +Consequently, it is expected that layers with fewer filters have more erratic updates. If +there are many filters, the weights of a filter which does not contribute much to the end +results or is even harmful filter can gradually be set to zero, essentially removing one path +in the network. +In order to test the hypothesis, the baseline model was adjusted. The number of filters in +layer 5 was reduced from 64 filters to 3 filters. As one can see in Figure 5.15, the mean +weight update of the layers 1, 3, 5, 7 and 9 have a far bigger range than the layers 11, 13 and +15 after epoch 50. Compared to the baseline models mean updates (Figure 5.8, Page 46), +the mean weight updates of layers 1 and 3 are higher, the range of the mean weight update +from epoch 50 is higher for layer 5 and the range of mean updates of layer 7 is higher. +For the maximum and the sum, no similar pattern could be observed (see Figures A.3 +and A.4). +Figure 5.15.: Mean weight updates between epochs by layer. The model is the baseline model, but +with layer 5 reduced to 3 filters. + +5. Experimental Evaluation +5.7. Multiple narrow layers vs One wide layer +On a given feature map size one can have an arbitrary number of convolutional layers with +SAME padding and each layer can have an arbitrary number of filters. A convolutional layer +with more filters is called wider [ZK16], a convolutional layer with fewer filters is thus called +narrower and the number of filters in a convolutional layer is the layers width. +If the number of parameters which may be used for the feature map scale is fixed and high +enough, there are still many combinations. If ni with i = 0, . . . , k is the number of output +feature maps of layer i where i = 0 is the input layer and all filters are 3 × 3 filters without +a bias, then the number of parameters is +Parameters = +X +k +i=1 + +(ni−1 · 3 +2 + 1) · ni + +Hence the width of one layer does not only influence the parameters in this layer, but also +in the next layer. +The number of possible subsequent layers of one feature map size is enormous, even if +constraints are placed on the number of parameters. For example, the first convolutional +layer of the baseline model has 896 parameters. If one assumes that less than 3 filters per +layer are not desirable, one keeps all layers having a bias and all layers only use 3 × 3 filters, +then the maximum depth is 10. If one furthermore assumes that at least 800 parameters +should be used, there are still 120 possible layer combinations. As experimentally evaluating +one layer combination takes about 10 hours on a GTX 970 for CIFAR-100 it is not possible +to evaluate all layer combinations. In the following, a couple of changes to the network +width / depth will be evaluated. +Each layer expands the perceptive field. Hence deeper layer can use more of the input for +every single output value. But deeper networks need more time for inference as the output +of layer i has to be computed before the output of i + 1 can be computed. Hence there is +less potential to parallelize computations. Each filter can be seen as a concept which can +be learned. The deeper the filter is in the network, the higher is the abstraction level of the +concept. In most cases, both is necessary: Many different concepts (width) and high-level +concepts (depth). +Reducing the two first convolutional layers of the baseline model (see Page 39) to one +convolutional layer of 48 filters (944 396 parameters in total, whereas the baseline model +has 944 012 parameters) resulted in a mean accuracy of 61.64 % (-1.74 %) and a standard +deviation of σ = 1.12 (+0.57). The ensemble achieved 63.18 % (-1.52 %). As expected, +the training time per epoch was reduced. For the GTX 980, it was reduced from 22.0 s of +the baseline model to 15 s of the model with one less convolutional layer, one less Batch +Normalization and one less activation layer. The inference time was also reduced from 6 ms + +5.8. Batch Normalization +to 4 ms for 1 image and from 32 ms to 23 ms for 128 images. Due to the loss in accuracy of +more then one percentage point of the mean model and the increased standard deviation of +the models performance, at least two convolutional layers are on the 32 px × 32 px feature +map scale are recommendable for CIFAR-100. +Changing the baseline to have less filters but more layers is another option. This was tried +for the first block at the 32 px × 32 px feature map scale. The two convolutional layers +(layers 1 – 4 in Page 39) were replaced by two convolutional layers with 27 filters and one +convolutional layer with 26 filters in the convolution - BN - ELU pattern. The model +has 944 132 parameters. Compared to the baseline model, the time for inference was the +same. This is unexpected, because the inference time changed when a layer was removed at +this scale. The mean test accuracy was 63.66 % (+0.28) and the standard deviation was +σ = 1.03 (+0.48). The ensemble achieved 64.91 % test accuracy (+0.21). +Having two nonlinearities at each feature map scale could be important to learn nonlinear +transformations at that scale. As the baseline model does only have one nonlinearity at the +8 × 8 feature maps scale, another convolutional layer with 64 filters, Batch Normalization +and ELU was added. To keep the number of parameters constant, layer 11 of the baseline +model was reduced from 512 filters to 488 filters. The new model achieves a mean accuracy +of 63.09 % (-0.29) with a standard deviation of σ = 0.70 (+0.15). The ensemble achieves +an accuracy of 64.39 % (+0.31). This could indicate that having two convolutional layers +is more important for layers close to the input than intermediate layer. Alternatively, the +parameters could be more important in layer 11 than having a new convolutional layer after +layer 9. +In order to control the hypothesis that having two convolutional layers are less important in +the middle of a network, the second convolutional layer at the 16 × 16 feature map scale is +removed. The first convolutional layer was increased from 32 filters to 59 filters, the second +convolutional layer was increased from 32 filter s to 58 filters in order to keep the amount of +parameters of the model constant. The adjusted model achieved 62.72 % (-0.66) mean test +accuracy with a standard deviation of σ = 0.84 (+0.29). The ensemble achieved 63.88 % +test accuracy (-0.66). +Even more extreme, if both convolutional layers are removed from the 16 × 16 feature map +scale, the mean test accuracy drops to 61.21 % (-2.17) with a standard deviation of σ = 0.51 +(-0.04). The ensemble achieves a test accuracy of 63.07 % (-1.63). Thus it is very important +to have at least one convolutional layer at this feature map scale. +5.8. Batch Normalization +In [CUH15], the authors write that Batch Normalization does not improve ELU networks. +Hence the effect of removing Batch Normalization from the baseline is investigated in this + +5. Experimental Evaluation +experiment. +As before, 10 models are trained on CIFAR-100. The training setup and the model mno-bn +are identical to the baseline model m, except that in mno-bn the Batch Normalization layers +are removed. +One notable difference is the training time: While m needs 21 ms per epoch in average on +a GTX 980, mno-bn only needs 21 ms per epoch. The number of epochs used for training, +however, also increased noticeably from 149 epochs to 178 epochs in average. The standard +deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs for mno-bn. +The mean accuracy of mno-bn is 62.86 % and hence 0.52 percentage points worse. The +standard deviation between models increased from 0.55 to 0.61. This is likely a result of the +early stopping policy and the differences in training epochs. This can potentially be fixed +by retraining the models which stopped earlier than the model which was trained for the +biggest amount of epochs. The ensemble test accuracy is 63.88 % and hence 0.82 percentage +points worse than the baseline. +The filter weight range and distribution is approximately the same as Figure 5.6 and +Figure 5.2, but the distribution of bias weights changed noticeably: While the bias weights of +the baseline are spread out in the first layer and much more concentrated in subsequent layers +(see Figure 5.3), the model without Batch Normalization has rather concentrated weights +in the first layers and only the bias weights of the last layer is spread out (see Figure A.2). +Another model m0 +no-bn which has one more filter in the convolutional layer 1, 3, 5, and 7 to +compensate for the loss of parameters in Batch Normalization. The mean test accuracy of +10 such models is 62.87 % which is 0.51 percentage points worse than the baseline. The +ensemble of m0 +no-bn achieves 64.33 % which is 0.37 percentage points worse than the baseline. +The mean training time was 14 s per epoch and 157.4 epochs with a standard deviation of +20.7 epochs. +Hence it is not advisable to remove Batch Normalization for the final model. It could, +however, be possible to remove Batch Normalization for the experiments to iterate quicker +through different ideas if the relative performance changes behave the same with or without +Batch Normalization. + +5.9. Batch size +5.9. Batch size +The mini-batch size m ∈ N≥1 influences +• Epochs until convergence: The smaller m, the more often the model is updated +in one epoch. Those updates, however, are based on fewer samples of the dataset. +Hence the gradients of different mini-batches can noticeably differ. In the literature, +this is referred to as gradient noise [KMN+16]. +• Training time per epoch: The smaller the batch size, the higher the training time +per epoch as the hardware is not optimally utilized. +• Resulting model quality: The choice of the hyperparameter m influences the +accuracy of the classifier when training is finished. [KMN+16] supports the view that +smaller m result in less sharp minima. Hence smaller m lead to better generalization. +Empiric evaluation results can be found in Table 5.9. Those results confirm the claim +of [KMN+16] that lower batch sizes generalize better. +m +Training +Epochs +Mean total Single model Ensemble +time training time Accuracy std Accuracy +8 118 s +epoch 81 – 153 14 131 s 61.93 % σ = 1.03 65.68 % +16 62 s +epoch 103 – 173 8349 s 64.16 % σ = 0.81 66.98 % +32 35 s +epoch 119 – 179 5171 s 64.11 % σ = 0.75 65.89 % +64 25 s +epoch 133 – 195 2892 s 63.38 % σ = 0.55 64.70 % +128 18 s +epoch 145 – 239 3126 s 62.23 % σ = 0.73 63.55 % +Table 5.9.: Training time per epoch and single model test set accuracy (mean and standard deviation) +of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on +CIFAR-100. +5.10. Bias +Figure 5.3 suggests that the bias is not important for the layers 11, 13 and 15. Hence a +model mno-bias is created which is identical to the baseline model m, except that the bias of +layers 11, 13 and 15 is removed. +The mean test accuracy of 10 trained mno-bias is 63.74 % which is an improvement of +0.36 percentage points over the baseline. The ensemble achieves a test accuracy of 65.13 % +which is 0.43 percentage points better than the baseline. Hence the bias can safely be +removed. +Removing the biases did not have a noticeable effect on the filter weight range, the filter +weight distribution or the distribution of the remaining biases. Also, the γ and β parameters +of the Batch Normalization layers did not noticeably change. + +5. Experimental Evaluation +5.11. Learned Color Space Transformation +In [MSM16] it is described that placing one convolutional layer with 10 filters of size 1 × 1 +directly after the input and then another convolutional layer with 3 filters of size 1 × 1 acts +as a learned transformation in another color space and boosts the accuracy. +This approach was evaluated on CIFAR-100 by adding a convolutional layer with ELU activation and 10 filters followed by another convolutional layer with ELU activation and +3 filters. The mean accuracy of 10 models was 63.31 % with a standard deviation of 1.37. +The standard deviation is noticeable higher than the standard deviation of the baseline +model (0.55) and the accuracy also decreased by 0.07 percentage points. The accuracy of +the ensemble is at 64.77 % and hence 0.07 percentage points higher than the accuracy of +the baseline models. +The inference time for 1 image and for 128 images did not change compared to the baseline. +The training time per epoch increased from 26 s to 30 s on the GTX 970. +Hence it is not advisable to use the learned color space transformation. +5.12. Pooling +An alternative to max pooling with stride 2 with a 2 × 2 kernel is using a 3 × 3 kernel with +stride 2. +This approach was evaluated on CIFAR-100 by replacing all max pooling layers with the +3×3 kernel max pooling (and SAME padding). The mean accuracy of 10 models was 63.32 % +(−0.06) and the standard deviation was 0.57 (+0.02). The ensemble achieved 65.15 % test +accuracy (+0.45). +The training time per epoch decreased from 20.5 s-21.1 s to 18.6 s (mean of 10 training runs) +on the Nvidia GTX 970. The time for inference increased from 25 ms to 26 ms for a batch +of 128 images. +5.13. Activation Functions +Nonlinear, differentiable activation functions are important for neural networks to allow them +to learn nonlinear decision boundaries. One of the simplest and most widely used activation +functions for CNNs is ReLU [KSH12], but others such as ELU [CUH15], parametrized +rectified linear unit (PReLU) [HZRS15b], softplus [ZYL+15] and softsign [BDLB09] have +been proposed. The baseline uses ELU. + +5.13. Activation Functions +Activation functions differ in the range of values and the derivative. The definitions and +other comparisons of eleven activation functions are given in Table B.3. +Theoretical explanations why one activation function is preferable to another in some +scenarios are the following: +• Vanishing Gradient: Activation functions like tanh and the logistic function saturate outside of the interval [−5, 5]. This means weight updates are very small for +preceding neurons, which is especially a problem for very deep or recurrent networks as +described in [BSF94]. Even if the neurons learn eventually, learning is slower [KSH12]. +• Dying ReLU: The dying ReLU problem is similar to the vanishing gradient problem. +The gradient of the ReLU function is 0 for all non-positive values. This means if all +elements of the training set lead to a negative input for one neuron at any point in the +training process, this neuron does not get any update and hence does not participate +in the training process. This problem is addressed in [MHN13]. +• Mean unit activation: Some publications like [CUH15, IS15] claim that mean +unit activations close to 0 are desirable. They claim that this speeds up learning +by reducing the bias shift effect. The speedup of learning is supported by many +experiments. Hence the possibility of negative activations is desirable. +Those considerations are listed in Table 5.10 for 11 activation functions. Besides the +theoretical properties, empiric results are provided in Tables 5.11 and 5.12. The baseline +network was adjusted so that every activation function except the one of the output layer +was replaced by one of the 11 activation functions. +As expected, PReLU and ELU performed best. Unexpected was that the logistic function, +tanh and softplus performed worse than the identity and it is unclear why the pure-softmax +network performed so much better than the logistic function. One hypothesis why the +logistic function performs so bad is that it cannot produce negative outputs. Hence the +logistic− function was developed: +logistic−(x) = 1 +1 + e−x +− 0.5 +The logistic− function has the same derivative as the logistic function and hence still suffers +from the vanishing gradient problem. The network with the logistic− function achieves an +accuracy which is 11.30 % better than the network with the logistic function, but is still +5.54 % worse than the ELU. +Similarly, ReLU was adjusted to have a negative output: +ReLU−(x) = max(−1, x) = ReLU(x + 1) − 1 +The results of ReLU− are much worse on the training set, but perform similar on the test + +5. Experimental Evaluation +set. The result indicates that the possibility of hard zero and thus a sparse representation +is either not important or similar important as the possibility to produce negative outputs. +This contradicts [GBB11, SMGS14]. +A key difference between the logistic− function and ELU is that ELU does neither suffers +from the vanishing gradient problem nor is its range of values bound. For this reason, the +S2ReLU activation function, defined as +S2ReLU(x) = ReLU( +x +2 ++ 1) − ReLU(− +x +2 ++ 1) = + + + +− +x +2 + 1 if x ≤ −2 +x if − 2 ≤ x ≤ 2 +x +2 + 1 if x > −2 +This function is similar to SReLUs as introduced in [JXF+16]. The difference is that S2ReLU +does not introduce learnable parameters. The S2ReLU was designed to be symmetric, be +the identity close to zero and have a smaller absolute value than the identity farther away. +It is easy to compute and easy to implement. +Those results — not only the absolute values, but also the relative comparison — might +depend on the network architecture, the training algorithm, the initialization and the +dataset. Results for MNIST can be found in Table 5.13 and for HASYv2 in Table A.2. For +both datasets, the logistic function has a much shorter training time and a noticeably lower +test accuracy. +Function Vanishing Gradient Negative Activation possible Bound activation +Identity No Yes No +Logistic Yes No Yes +Logistic− Yes Yes Yes +Softmax Yes Yes Yes +tanh Yes Yes Yes +Softsign Yes Yes Yes +ReLU Yes1 No Half-sided +Softplus No No Half-sided +S2ReLU No Yes No +LReLU/PReLU No Yes No +ELU No Yes No +Table 5.10.: Properties of activation functions. +1The dying ReLU problem is similar to the vanishing gradient problem. + +5.13. Activation Functions +Function +Single model Ensemble of 10 +Training set Test set Training set Test set +Identity 66.25 % σ = 0.77 56.74 % σ = 0.51 68.77 % 58.78 % +Logistic 51.87 % σ = 3.64 46.54 % σ = 3.22 61.19 % 54.58 % +Logistic− 66.49 % σ = 1.99 57.84 % σ = 1.15 69.04 % 60.10 % +Softmax 75.22 % σ = 2.41 59.49 % σ = 1.25 78.87 % 63.06 % +Tanh 67.27 % σ = 2.38 55.70 % σ = 1.44 70.21 % 58.10 % +Softsign 66.43 % σ = 1.74 55.75 % σ = 0.93 69.78 % 58.40 % +ReLU 78.62 % σ = 2.15 62.18 % σ = 0.99 81.81 % 64.57 % +ReLU− 76.01 % σ = 2.31 62.87 % σ = 1.08 78.18 % 64.81 % +Softplus 66.75 % σ = 2.45 56.68 % σ = 1.32 71.27 % 60.26 % +S2ReLU 63.32 % σ = 1.69 56.99 % σ = 1.14 65.80 % 59.20 % +LReLU 74.92 % σ = 2.49 61.86 % σ = 1.23 77.67 % 64.01 % +PReLU 80.01 % σ = 2.03 62.16 % σ = 0.73 83.50 % 64.79 % +ELU 76.64 % σ = 1.48 63.38 % σ = 0.55 78.30 % 64.70 % +Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation +functions on CIFAR-100. For LReLU, α = 0.3 was chosen. +Function +Inference per Training +Epochs +Mean total +1 Image 128 time training time +Identity 8 ms 42 ms 31 s +epoch 108 – 148 3629 s +Logistic 6 ms 31 ms 24 s +epoch 101 – 167 2234 s +Logistic− 6 ms 31 ms 22 s +epoch 133 – 255 3421 s +Softmax 7 ms 37 ms 33 s +epoch 127 – 248 5250 s +Tanh 6 ms 31 ms 23 s +epoch 125 – 211 3141 s +Softsign 6 ms 31 ms 23 s +epoch 122 – 205 3505 s +ReLU 6 ms 31 ms 23 s +epoch 118 – 192 3449 s +Softplus 6 ms 31 ms 24 s +epoch 101 – 165 2718 s +S2ReLU 5 ms 32 ms 26 s +epoch 108 – 209 3231 s +LReLU 7 ms 34 ms 25 s +epoch 109 – 198 3388 s +PReLU 7 ms 34 ms 28 s +epoch 131 – 215 3970 s +ELU 6 ms 31 ms 23 s +epoch 146 – 232 3692 s +Table 5.12.: Training time and inference time of adjusted baseline models trained with different +activation functions on GTX 970 GPUs on CIFAR-100. It was expected that the +identity is the fastest function. This result is likely an implementation specific problem +of Keras 2.0.4 or Tensorflow 1.1.0. + +5. Experimental Evaluation +Function +Single model Ensemble Epochs +Accuracy std Accuracy Range Mean +Identity 99.45 % σ = 0.09 99.63 % 55 – 77 62.2 +Logistic 97.27 % σ = 2.10 99.48 % 37 – 76 54.5 +Softmax 99.60 % σ = 0.03 99.63 % 44 – 73 55.6 +Tanh 99.40 % σ = 0.09 99.57 % 56 – 80 67.6 +Softsign 99.40 % σ = 0.08 99.57 % 72 – 101 84.0 +ReLU 99.62 % σ = 0.04 99.73 % 51 – 94 71.7 +Softplus 99.52 % σ = 0.05 99.62 % 62 – 70 68.9 +PReLU 99.57 % σ = 0.07 99.73 % 44 – 89 71.2 +ELU 99.53 % σ = 0.06 99.58 % 45 – 111 72.5 +Table 5.13.: Test accuracy of adjusted baseline models trained with different activation functions +on MNIST. +5.14. Label smoothing +Ensembles consisting of n models trained by the same procedure on the same data but +initialized with different weights and trained with a different order of the training data +perform consistently better than single models. One drawback of ensembles in applications +such as self-driving cars is that they increase the computation by a factor of n. One idea +why they improve the test accuracy is by reducing the variance. +The idea of label smoothing is to use the ensemble prediction of the training data as labels +for another classifier. For every element x of the training set, the one-hot encoded target +t(x) is smoothed by the ensemble prediction yE(x) +t +0 +(x) = α · t(x) + (1 − α)yE(x) +where α ∈ [0, 1] is the smoothing factor. +There are three reasons why label smoothing could be beneficial: +• Training speed: The ensemble prediction contains more information about the +image than binary class decisions. Classifiers in computer vision predict how similar +the input looks to other input of the classes they are trained on. By smoothing the +labels, the information that one image could also belong to another class is passed to +the optimizer. In early stages of the optimization this could lead to a lower loss on +the non-smoothed validation set. +• Higher accuracy: Using smoothed labels for the optimization could lead to a higher +accuracy of the base-classifier due to a smoothed error surface. It might be less likely + +5.14. Label smoothing +that the classifier gets into bad local minima. +• Label noise: Depending on the way how the labels are obtained, it might not always +be clear which label is the correct one. Also, labeling errors can be present in training +datasets. Those errors severely harm the training. By smoothing the labels errors +could be relaxed. +10 models msmooth are trained with the α = 0.5 smoothed labels from the prediction +of an ensemble of 10 baseline models. The mean accuracy of the models trained on the +smoothed training set labels was 63.61 % (+0.23 %) and the standard deviation was σ = 0.72 +(+0.17 %). The ensemble of 10 msmooth models achieved 64.79 % accuracy (+0.09 %). Hence +the effect of this kind of label smoothing on the final accuracy is questionable. +The training speed didn’t noticeably change either: The number of trained epochs ranged +from 144 to 205, the mean number of epochs was 177. The baseline training ranged from +146 to 232 epochs with a mean of 174 epochs. After 10, 30 and 80 epochs both training +methods accuracy differed by less than one percentage point. Hence it is unlikely that label +smoothing has a positive effect on the training speed. +Hinton et al. called this method distillation in [HVD15]. Hinton et al. used smooth and +hard labels for training, this work only used smoothed labels. + +5. Experimental Evaluation +5.15. Optimized Classifier +In comparison to the baseline classifier, the following changes are applied to the optimized +classifier: +• Remove the bias for the last layers: For all layers which output a 1 × 1 feature +map, the bias is removed +• Increase the max pooling kernel to 3 × 3 +• More filters in the first layers +The detailed architecture is given in Table 5.14 and visualized in Figure 5.16. The evaluation +is given in Table 5.15 and the timing comparison is given in Table 5.16. +# Type Filters @ +Patch size / stride +Parameters FLOPs Output size +Input 0 0 3 @ 32 × 32 +1 Convolution 69 @ 3 × 3 × 3 / 1 1 932 3 744 768 69 @ 32 × 32 +2 BN + ELU 138 353 418 69 @ 32 × 32 +3 Convolution 69 @ 3 × 3 × 32 / 1 42 918 37 684 096 69 @ 32 × 32 +4 BN + ELU 138 353 418 69 @ 32 × 32 +Max pooling 2 × 2 / 2 0 40 960 32 @ 16 × 16 +5 Convolution 64 @ 3 × 3 × 32 / 1 39 808 20 332 544 64 @ 16 × 16 +6 BN + ELU 128 82 048 64 @ 16 × 16 +7 Convolution 64 @ 3 × 3 × 64 / 1 36 928 18 857 984 64 @ 16 × 16 +8 BN + ELU 128 82 048 64 @ 16 × 16 +Max pooling 2 × 2 / 2 20 480 64 @ 8 × 8 +9 Convolution 64 @ 3 × 3 × 64 / 1 36 928 4 714 496 64 @ 8 × 8 +10 BN + ELU 128 20 608 64 @ 8 × 8 +Max pooling 2 × 2 / 2 5 120 64 @ 4 × 4 +11 Convolution (v) 512 @ 4 × 4 × 64 / 1 524 288 1 048 064 512 @ 1 × 1 +12 BN + ELU 1 024 3 584 512 @ 1 × 1 +Dropout 0.5 0 0 512 @ 1 × 1 +13 Convolution 512 @ 1 × 1 × 512 / 1 262 144 523 776 512 @ 1 × 1 +14 BN + ELU 1 024 3 584 512 @ 1 × 1 +Dropout 0.5 0 0 512 @ 1 × 1 +15 Convolution k @ 1 × 1 × 512 / 1 512 · k 512 · k k @ 1 × 1 +Global avg Pooling 1 × 1 0 k k @ 1 × 1 +16 BN + Softmax 2k 7k k @ 1 × 1 +P 514k ++947 654 +520k ++87 870 996 179 200+2k +Table 5.14.: Optimized architecture with 3 input channels of size 32 × 32. All convolutional layers +use SAME padding, except for layer 11 which used VALID padding in order to decrease +the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for each +power of two there are two Convolution + BN + ELU blocks and one Max pooling +block added. This is the framed part in the table. + +5.15. Optimized Classifier +32 × 32 +Input +C 69@3 × 3/1 +BN + ELU +C 69@3 × 3/1 +BN + ELU +16 × 16 +max pooling 3 × 3/2 +C 64@3 × 3/1 +BN + ELU +C 64@3 × 3/1 +BN + ELU +8 × 8 +max pooling 3 × 3/2 +C 64@3 × 3/1 +BN + ELU +4 × 4 +max pooling 3 × 3/2 +C* 512@4 × 4/1 (V) +BN + ELU +Dropout, p = 0.5 +1 × 1 +C* 512@1 × 1/1 +BN + ELU +Dropout, p = 0.5 +C* k@1 × 1/1 +Global AVG pooling +BN + Softmax +Figure 5.16.: Architecture of the optimized model. C 32@3 × 3/1 is a convolutional layer with +32 filters of kernel size 3 × 3 with stride 1. The * indicates that no bias is used. +Dataset Single Model Accuracy Ensemble of 10 +Training Set Test Set Training Set Test Set +Asirra 95.83 % σ = 4.70 90.75 % σ = 4.73 98.78 % 93.09 % +CIFAR-10 94.58 % σ = 0.70 87.92 % σ = 0.46 96.47 % 89.86 % +CIFAR-100 77.96 % σ = 2.18 64.42 % σ = 0.73 81.44 % 67.03 % +GTSRB 100.00 % σ = 0.00 99.28 % σ = 0.10 100.00 % 99.51 % +HASYv2 88.79 % σ = 0.45 85.36 % σ = 0.15 89.36 % 85.92 % +MNIST 99.88 % σ = 0.10 99.48 % σ = 0.13 99.99 % 99.67 % +STL-10 95.43 % σ = 3.57 75.09 % σ = 2.39 98.54 % 78.66 % +SVHN 99.08 % σ = 0.07 96.37 % σ = 0.12 99.50 % 97.47 % +Table 5.15.: Optimized model accuracy on eight datasets. The single model actuary is the 10 models +used in the ensemble. The empirical standard deviation σ of the accuracy is also given. +CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the +models uses unlabeled data or data from other datasets. For MNIST, GTSRB, SVHN +and HASY, no test time transformations are used. +Network GPU Tensorflow Inference per Training +1 Image 128 images time / epoch +Optimized Default Intel i7-4930K 5 ms 432 ms 386 s +Optimized Optimized Intel i7-4930K 4 ms 307 ms 315 s +Optimized Default GeForce 940MX 4 ms 205 ms 192 s +Optimized Default GTX 970 6 ms 41 ms 35 s +Optimized Default GTX 980 3 ms 35 ms 27 s +Optimized Default GTX 980 Ti 6 ms 36 ms 26 s +Optimized Default GTX 1070 2 ms 24 ms 21 s +Optimized Default Titan Black 4 ms 46 ms 43 s +Table 5.16.: Speed comparison of the optimized model on CIFAR-10. The baseline model is +evaluated on six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken +from [Maj17]. Weights the baseline model can be found at [Tho17b]. The optimized +Tensorflow build makes use of SSE4.X, AVX, AVX2 and FMA instructions. + +5. Experimental Evaluation +5.16. Early Stopping vs More Data +A separate validation set is necessary for two reasons: (1) Early stopping and (2) preventing +overfitting due to many experiments. To prevent overfitting, a different dataset can be used. +For example, all decisions about hyperparameters in this thesis are based on CIFAR-100, +but the network is finally trained and evaluated with the same hyperparameters on all +datasets.2 The validation set can hence be removed if early stopping is removed. Instead, +the validation data is used in a first run to determine the number of epochs necessary for +training. In a second training run the validation data is added to the training set. The +number of used epochs for the second run is given in Table 5.17. +Dataset Mean epochs Train data classes average data / class +Asirra 60 15 075 2 7538 +MNIST 41 54 000 10 5400 +SVHN 45 543 949 10 54 395 +CIFAR-10 84 45 000 10 4500 +HASYv2 92 136 116 369 369 +GTSRB 97 35 288 43 821 +STL-10 116 4500 10 450 +CIFAR-100 155 45 000 100 450 +Table 5.17.: Mean number of training epochs for the optimized model. For comparison, the total +amount of used training data, the number of classes of the dataset and the average +amount of data per class is given. +Alternatively, the model can be trained with early stopping (ES) purely on the training +loss. All three methods – early stopping on the validation set accuracy, early stopping on +the training loss and training a fixed number of epochs are evaluated. While having more +data helped with Asirra and CIFAR-100, the results as shown in Table 5.18 on the other +datasets are only marginally different. For CIFAR-10, training with more data did not +improve the results when the number of epochs is fixed, but notably improved the results +when the training loss was used as the early stopping criterion. +5.17. Regularization +Stronger regularization might even improve the results when using the training loss as an +early stopping criterion. `2 regularization with a weighting factor of λ = 0.0001 is used in +all other experiments. While the accuracy as shown in Table 5.19 does not show a clear +pattern, the number of epochs increases with lower model regularization (see Table 5.20). +2Except data augmentation and test time transformations. +3Only 1 model is trained due to the long training time of 581 epochs and 12 hours for this model. +4Only 3 models are in this ensemble due to the long training time of more than 8 hours per model. + +5.17. Regularization +Dataset Early Stopping Fixed epochs +val. acc train loss +Asirra 93.09 % 96.01 %3 96.01 % +CIFAR-10 89.86 % 91.75 % 88.88 % +CIFAR-100 67.03 % 71.01 % 69.08 % +HASYv2 85.92 % 82.89 %4 85.05 % +MNIST 99.67 % 99.64 % 99.57 % +STL-10 78.66 % 83.25 % 78.64 % +Table 5.18.: Comparisons of trained optimized models with early stopping on the validation accuracy +compared training setups without a validation set and thus more training data. The +second column uses the training loss as a stopping criterion, the third column uses a +fixed number of epochs which is equal to the mean number of training epochs of the +models with early stopping on the validation set accuracy. +λ +Single Model Accuracy Ensemble of 10 +Training Set Test Set Training Set Test Set +λ = 0.01 73.83 % σ = 1.78 58.94 % σ = 1.33 87.78 % 69.98 % +λ = 0.001 82.86 % σ = 0.89 63.03 % σ = 0.67 91.86 % 71.02 % +λ = 0.0001 77.96 % σ = 2.18 64.42 % σ = 0.73 81.44 % 67.03 % +Table 5.19.: Different choices of `2 model regularization applied to the optimized model. +λ min max mean std +λ = 0.01 457 503 404.6 37.2 +λ = 0.001 516 649 588.4 41.6 +λ = 0.0001 579 833 696.1 79.1 +Table 5.20.: Training time in epochs of models with early stopping on training loss by different +choices of `2 model regularization applied to the optimized model. + +5. Experimental Evaluation + +6. Conclusion and Outlook +This master thesis gave an extensive overview over the design patterns of CNNs in Chapter 2, +the methods how CNNs can be analyzed and the principle directions of topology learning +algorithms in Chapter 3. +Confusion Matrix Ordering (CMO), originally developed as a method to make visualizations +of confusion matrices easier to read (see Figure 5.13), was introduced as a class clustering +algorithm in Chapter 4 and evaluated in Sections 4.2 and 5.4. The important insights are: +• Ordering the classes in the confusion matrix allows to display the relevant parts even +for several hundred classes. +• A hierarchy of classifiers based on the classes does not improve the results on CIFAR100. There are three possible reasons for this: +– 32 px × 32 px is too low dimensional +– 100 classes are not enough for this approach +– More classes are always easier to distinguish if each new class comes with more +data. One reason why this might be the case is that distinguishing the object +from background has similar properties even for different classes. +• Label smoothing had only a minor effect on the accuracy and no effect on the training +time when a single base classifier was used to train with the smoothed labels by an +ensemble of base classifiers. +A baseline model was defined and evaluated on eight publicly available datasets. The +baselines topology and training setup are described in detail as well as its behavior during +training and properties of the weights of the trained model. +The influence of various hyperparameters is examined in Sections 5.5 to 5.12 for CIFAR-100. +The insights of those experiments are: +• Averaging ensembles of 10 base classifiers of the same architecture and trained with the +same setup consistently improve the accuracy. The amount of improvement depends +on the base classifiers, but the ensemble tends to improve the test accuracy by about +one percentage point. +• Wider networks learn in fewer epochs. This, however, does not mean that the + +6. Conclusion and Outlook +wall-clock time is lower due to increased computation in forward- and backward +passes. +• Batch Normalization increases the training time noticeably. For the described ELU +baseline model it also increases accuracy, which contradicts [CUH15]. +• The lower the batch size, the longer the time for each epoch of training and the less +epochs need to be trained. Higher accuracy by lower batch sizes was empirically +confirmed. The batch size, however, can also be too low. +• An analysis of the weights of the baseline indicated that the bias of layers close to +the output layer can be removed. This was experimentally confirmed. +• It could not be confirmed that learned color space transformation, as described +in [MSM16], improves the network. Neither with ELU nor with leaky rectified linear +unit (LReLU) and α = 0.3. +• It could be confirmed that ELU networks gives better results than any other activation +function on CIFAR-100. For the character datasets MNIST and HASYv2, however, +ReLU, LReLU, PReLU, Softplus and ELU all performed similar. +• Changing the activation functions to the identity had very little impact on the HASYv2 +and MNIST classifiers. Note that those networks are still able to learn nonlinear +decision boundaries due to max-pooling and SAME padding. For CIFAR-100, however, +the accuracy drops by 6.64 % when ELU is replaced by the identity. +Based on the results of those experiments, an optimized classifier was developed and +evaluated on all eight datasets. +The state of the art of STL-10 was improved from 74.80 % [ZMGL15] to 78.66 % without +using the unlabeled part of the dataset. The state of the art of HASYv2 was improved +from 81.00 % [Tho17a] to 85.92 %, for GTSRB the state of the art was improved from +99.46 % [SL11] to 99.51 %, for Asirra it was improved from 82.7 % [Gol08] to 93.09 %. +1 +This was mainly achieved by the combination of ELU, Dropout, ensembles, training data +augmentation and test-time transformations. The removal of the bias of layers close to the +output and re-usage of those parameters in layers close to the input as well as using 3 × 3 +pooling instead of 2 × 2 pooling improved the baseline. +While writing this masters thesis, several related questions could not be answered: +• Deeper CNNs have generally higher accuracy, if trained long enough and if overfitting +is not a problem. But at which subsampling-level does having more layers have the +biggest effect? Can this question be answered before a deeper network is trained? +• Is label smoothing helpful for noisy labels? +1The baseline is better than the optimized model on Asirra and on HASYv2. + +• How does the choice of activation functions influence residual architectures? Could the +results be the same for different activation functions in architectures with hundreds +of layers? +• The results for the pooling kernel were inconclusive. Larger pooling kernels might be +advantageous as well as fractional max pooling [Gra15]. +• Why is the mean weight update (see Figure 5.8) not decreasing? Is this an effect that +can and should be fixed? +• Why is softmax so much better than the logistic function? Can the reason be used to +further improve ELU? +Besides those questions, the influence of optimizers on time per epoch, epochs until +convergence, total training time, memory consumption, accuracy of the models and standard +deviation of the models was not evaluated. This, and the stopping criterion for training +might be crucial for the models quality. + + +A. Figures, Tables and Algorithms +(a) Original image (b) Smoothing filter (c) Laplace edge detection filter +(d) Sobel edge detection filter (e) Prewitt edge detection filter (f) Canny filter +Figure A.1.: Examples of image filters. Best viewed in electronic form. +Layer 99-percentile interval +filter bias +1 [-0.50, 0.48] [-0.06, 0.07] +3 [-0.21, 0.19] [-0.07, 0.07] +5 [-0.20, 0.17] [-0.07, 0.05] +7 [-0.15, 0.14] [-0.05, 0.06] +9 [-0.14, 0.15] [-0.04, 0.03] +11 [-0.08, 0.08] [-0.00, 0.00] +13 [-0.08, 0.08] [-0.00, 0.00] +15 [-0.10, 0.11] [-0.01, 0.01] +Table A.1.: 99-percentile intervals for filter weights and bias weights by layer of a baseline model +trained on CIFAR-100. + +Figure A.2.: The distribution of bias weights of a model without batch normalization trained on +CIFAR-100. +Algorithm 1 Simulated Annealing for minimizing Equation (4.1). +Require: C ∈ N +n×n +, steps ∈ N, T ∈ R ++, c ∈ (0, 1) +procedure SimulatedAnnealing(C, steps, T, c) +bestScore ← accuracy(C) +bestC ← C +for i = 0; i < steps; i ← i + 1 do +p ← randomFloat(0, 1) +if p < 0.5 then . Swap rows +i ← randomInteger(1, . . . , n) +j ← randomInteger(1, . . . , n) \ { i } +p ← randomUniform(0, 1) +C +0 ← swap(C, i, j) +s ← accuracy(C +0 +) +if p < exp( s−bestScore +T +) then +C ← C +0 +if s > bestScore then +bestScore ← s +bestC ← C +T ← T · c +else . Move Block +s ← randomInteger(1, . . . , n) . Block start +e ← randomInteger(s, . . . , n) . Block end +i ← randomInteger(1, . . . , n − (e − s)) . Block insert position +Move Block (s, . . . , e) to position i +return bestM + +Figure A.3.: Maximum weight updates between epochs by layer. The model is the baseline model, +but with layer 5 reduced to 3 filters. +Function +Single model Ensemble of 10 Epochs +Training set Test set Train Test Range Mean +Identity 87.92 % σ = 0.40 84.69 % σ = 0.08 88.59 % 85.43 % 92 – 140 114.5 +Logistic 81.46 % σ = 5.08 79.67 % σ = 4.85 86.38 % 84.60 % 58 – 91 77.3 +Softmax 88.19 % σ = 0.31 84.70 % σ = 0.15 88.69 % 85.43 % 124 – 171 145.8 +Tanh 88.41 % σ = 0.36 84.46 % σ = 0.27 89.24 % 85.45 % 89 – 123 108.7 +Softsign 88.00 % σ = 0.47 84.46 % σ = 0.23 88.77 % 85.33 % 77 – 119 104.1 +ReLU 88.93 % σ = 0.46 85.35 % σ = 0.21 89.35 % 85.95 % 96 – 132 102.8 +Softplus 88.42 % σ = 0.29 85.16 % σ = 0.15 88.90 % 85.73 % 108 – 143 121.0 +LReLU 88.61 % σ = 0.41 85.21 % σ = 0.05 89.07 % 85.83 % 87 – 117 104.5 +PReLU 89.62 % σ = 0.41 85.35 % σ = 0.17 90.10 % 86.01 % 85 – 111 100.5 +ELU 89.49 % σ = 0.42 85.35 % σ = 0.10 89.94 % 86.03 % 73 – 113 92.4 +Table A.2.: Test accuracy of adjusted baseline models trained with different activation functions on +HASYv2. For LReLU, α = 0.3 was chosen. + +Figure A.4.: Sum of weight updates between epochs by layer. The model is the baseline model, but +with layer 5 reduced to 3 filters. +Function +Single model Ensemble of 10 Epochs +Training set Test set Train Test Range Mean +Identity 87.49 % σ = 2.50 69.86 % σ = 1.41 89.78 % 71.90 % 51 – 65 53.4 +Logistic 45.32 % σ = 14.88 40.85 % σ = 12.56 51.06 % 45.49 % 38 – 93 74.6 +Softmax 87.90 % σ = 3.58 67.91 % σ = 2.32 91.51 % 70.96 % 108 – 150 127.5 +Tanh 85.38 % σ = 4.04 67.65 % σ = 2.01 90.47 % 71.29 % 48 – 92 65.2 +Softsign 88.57 % σ = 4.00 69.32 % σ = 1.68 93.04 % 72.40 % 55 – 117 83.2 +ReLU 94.35 % σ = 3.38 71.01 % σ = 1.63 98.20 % 74.85 % 52 – 98 75.5 +Softplus 83.03 % σ = 2.07 68.28 % σ = 1.74 93.04 % 75.99 % 56 – 89 68.9 +LReLU 93.83 % σ = 3.89 74.66 % σ = 2.11 97.56 % 78.08 % 52 – 120 80.1 +PReLU 95.53 % σ = 1.92 71.69 % σ = 1.37 98.17 % 74.69 % 59 – 101 78.8 +ELU 95.42 % σ = 3.57 75.09 % σ = 2.39 98.54 % 78.66 % 66 – 72 67.2 +Table A.3.: Test accuracy of adjusted baseline models trained with different activation functions on +STL-10. For LReLU, α = 0.3 was chosen. + +B. Hyperparameters +Hyperparameters are parameters of models which are not optimized automatically (e.g., by +gradient descent), but by methods like random search [BB12], grid search [LBOM98] or +manual search. +B.1. Preprocessing +Preprocessing used to be of major importance in machine learning. However, with the +availability of data sets with hundreds of examples per class and the possibility of CNNs to +learn features themselves, most models today rely on raw pixel values. The only common +preprocessing is size normalization. In order to get a fixed input-size for a CNN, the +following procedure can be used: +• Take one or multiple crops of the image which have the desired aspect ratio. +• Scale the crop(s) to the desired size. +• In training, all crops can be used independently. In testing, all crops can be passed +through the network and the output probability distributions can get fusioned, for +example by averaging. +Other preprocessing methods are: +• Color space transformations (RGB, HSV, etc.) +• Mean subtraction +• Standardization of pixel-values to [0, 1] by dividing through 255 (used by [HLW16]) +• Dimensionality reduction +– Principal component analysis (PCA): An unsupervised linear transformation +which can be learned in the first hidden layer. It is hence doubtful if PCA +improves the network. +– Linear discriminant analysis (LDA) +• Zero Components Analysis (ZCA) whitening (used by [KH09]) + +B.2. Data augmentation +Data augmentation techniques aim at making artificially more data from real data items by +applying invariances. For computer vision, they include: +Name Augmentation Factor Used by +Horizontal flip 2 [KSH12, WYS+15] +Vertical flip 2 [DWD15]1 +Rotation ∼ 40 (δ = 20) [DSRB14] +Scaling ∼ 14 (δ ∈ [0.7, 1.4]) [DSRB14] +Crops 322 = 1024 [KSH12, WYS+15] +Shearing [Gra15] +GANs [BCW+17] +Brightness ∼ 20 (δ ∈ [0.5, 1.5]) [How13] +Hue 51 (δ = 0.1) [MRM15, DSRB14] +Saturation ∼ 20 (δ = 0.5) [DSRB14] +Contrast ∼ 20 (δ ∈ [0.5, 1.5]) [How13] +Channel shift [KSH12] +Table B.1.: Overview of data augmentation techniques. The augmentation factor is calculated for +typical situations. For example, the augmentation factor for random crops is calculated +for 256 px × 256 px images which are cropped to 224 px × 224 px. +Taking several scales if the original is of higher resolution than desired is another technique. +Combinations of the techniques above can also be applied. Please note that the order of +operations does matter in many cases and hence the order is another augmentation factor. +Less common, but also reasonable are: +• Adding noise +• Elastic deformations +• Color casting (used by [WYS+15]) +• Vignetting (used by [WYS+15]) +• Lens distortion (used by [WYS+15]) +1Vertical flipping combined with 180◦ +rotation is equivalent to horizontal flipping + +B.3. Initialization +Weight initializations are usually chosen to be small and centered around zero. One way to +characterize many initialization schemes is by +w ∼ α · U[−1, 1] + β · N (0, 1) + γ with α, β, γ ≥ 0 +Table B.2 shows six commonly used weight initialization schemes. Several schemes use the +same idea, that unit-variance is desired for each layer as the training converges faster [IS15]. +Name α β γ Reference +Constant α = 0 β = 0 γ ≥ 0 used by [ZF14] +Xavier/Glorot uniform α = +q 6 +nin+nout +β = 0 γ = 0 [GB10] +Xavier/Glorot normal α = 0 β = + +2 +(nin+nout) +2 +γ = 0 [GB10] +He α = 0 β = +2 +nin +γ = 0 [HZRS15b] +Orthogonal — — γ = 0 [SMG13] +LSUV — — γ = 0 [MM15] +Table B.2.: Weight initialization schemes of the form w ∼ α · U[−1, 1] + β · N (0, 1) + γ. +nin, nout are the number of units in the previous layer and the next layer. Typically, +biases are initialized with constant 0 and weights by one of the other schemes to prevent +unit-coadaptation. However, dropout makes it possible to use constant initialization for +all parameters. +LSUV and Orthogonal initialization cannot be described with this simple pattern. +B.4. Objective function +For classification tasks, the cross-entropy +ECE(W) = − +X +x∈X +X +K +k=1 +[t +x +k +log(o +x +k +) + (1 − t +x +k +) log(1 − o +x +k +)] +is by far the most commonly used objective function (e.g., used by [ZF14]). In this equation, +X is the set of training examples, K is the number of classes, t +x +k ∈ { 0, 1 } indicates if the +training example x is of class k, o +x +k +is the output of the classifier for the training example x +and class k. +However, regularization terms weighted with a constant λ ∈ (0, +∞) are sometimes added: +• LASSO: `1 (e.g., used in [HPTD15]) +• Weight decay: `2 (e.g., λ = 0.0005 as in [MSM16]) +• Orthogonality regularization (|(WT· W − I)|, see [VTKP17]) + +B.5. Optimization Techniques +Most relevant optimization techniques for CNNs are based on SGD, which updates the +weights according to the rule +wji ← wji + ∆wji with ∆wji = −η +∂Ex +∂wji +where η ∈ (0, 1), typically 0.01 (e.g., [MSM16]), is called the learning rate. +A slight variation of SGD is mini-batch gradient descent with the mini-batch B (typically +mini-batch sizes are |B| ∈ { 32, 64, 128, 256, 512 }, e.g. [ZF14]). Larger mini-batch sizes +lead to sharp minima and thus poor generalization [KMN+16]. Smaller mini-batch sizes +lead to longer training times due to computational overhead and to more training steps due +to gradient noise. +wji ← wji + ∆wji with ∆wji = −η +∂EB +∂wji +Nine variations which adjust the learning rate during training are: +• Momentum: +w +(t+1) +ji ← w +(t) +ji + ∆w +(t+1) +ji with ∆w +(t+1) +ji = −η +∂EB +∂wji ++ α∆w +(t) +ji +with α ∈ [0, 1], typically 0.9 (e.g., [ZF14, MSM16]) +• Adagrad [DHS11] +• RProp and the mini-batch version RMSProp [TH12] +• Adadelta [Zei12] +• Power Scheduling [Xu11]: η(t) = η(0)(1 + a · t) +−c +, where t ∈ N0 is the training step, +a, c are constants. +• Performance Scheduling [SHY+13]: Measure the error on the cross validation set and +decrease the learning rate when the algorithms improvement is below a threshold. +• Exponential Decay Learning Rate [SHY+13]: η(t) = η(0) · 10− t +k where t ∈ N0 is the +training step, η(0) is the initial learning rate, k ∈ N≥1 is the number of training steps +until the learning rate is decreased by 1 +10 th. +• Newbob Scheduling [new00]: Start with Performance Scheduling, then use Exponential +Decay Scheduling. +• Adam and AdaMax [KB14] + +• Nadam [Doz15] +Some of those are explained in [Rud16]. +Other first-order gradient optimization methods are: +• Quickprop [Fah88] +• Nesterov Accellerated Momentum (NAG) [Nes83] +• Conjugate Gradient method [Cha92]: Combines a line search for the step size with +the gradients direction. +Higher-order gradient methods like Newtons method or quasi-Newton methods like BFGS +and L-BFGS need the inverse of the Hessian matrix which is intractable for today’s CNNs. +However, there are alternatives which do not use gradient information: +• Genetic algorithms such as NeuroEvolution of Augmenting Topologies (NEAT) [SM02] +• Simulated Annealing [vLA87] +• Twiddle: A local hill-climbing algorithm explained by Sebastian Thrun and described +on [Tho14b] +There are also approaches which learn the optimization algorithm [ADG+16, LM16]. + +B.6. Network Design +CNNs have the following hyperparameters: +• Depth: The number of layers +• Width: The number of filters per layer +• Layer and block connectivity graph +• Layer and block hyperparameters: +– Activation Functions as shown in Table B.3 +– For more, see Sections 2.2 and 2.3. +Name Function ϕ(x) Range of Values ϕ +0 +(x) Used by +Sign function† + + + ++1 if x ≥ 0 +−1 if x < 0 +{ −1, 1 } 0 [KS02] +Heaviside +step function† + + + ++1 if x > 0 +0 if x < 0 +{ 0, 1 } 0 [MP43] +Logistic function 1 +1+e−x [0, 1] e +x +(e +x+1)2 [DJ99] +Tanh e +x−e−x +e +x+e−x = tanh(x) [−1, 1] sech2 +(x) [LBBH98, Tho14a] +ReLU† max(0, x) [0, +∞) + + + +1 if x > 0 +0 if x < 0 +[KSH12] +LReLU†2 +(PReLU) +ϕ(x) = max(αx, x) (−∞, +∞) + + + +1 if x > 0 +α if x < 0 +[MHN13, HZRS15b] +Softplus log(e +x + 1) (0, +∞) +e +x +e +x+1 [DBB+01, GBB11] +ELU + + + +x if x > 0 +α(e +x − 1) if x ≤ 0 +(−∞, +∞) + + + +1 if x > 0 +αex otherwise +[CUH15] +Softmax‡ o(x)j = +e +xj +PK +k=1 e +xk +[0, 1]K o(x)j · +PK +k=1 e +xk −e +xj +PK +k=1 e +xk +[KSH12, Tho14a] +Maxout‡ o(x) = maxx∈x x (−∞, +∞) + + + +1 if xi = max x +0 otherwise +[GWFM+13] +Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0 +and functions marked with ‡ operate on all elements of a layer simultaneously. The +hyperparameters α ∈ (0, 1) of Leaky ReLU and ELU are typically α = 0.01. Other +activation function like randomized leaky ReLUs exist [XWCL15], but are far less +commonly used. +Some functions are smoothed versions of others, like the logistic function for the +Heaviside step function, tanh for the sign function, softplus for ReLU. +Softmax is the standard activation function for the last layer of a classification network +as it produces a probability distribution. See Figure B.1 for a plot of some of them. +2α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function. + +−2.0 −1.5 −1.0 −0.5 0.5 1.0 1.5 2.0 +−1.0 +−0.5 +0.5 +1.0 +1.5 +2.0 +x +y +ϕ1(x) = 1 +1+e−x +ϕ2(x) = tanh(x) +ϕ3(x) = max(0, x) +ϕ4(x) = log(e +x + 1) +ϕ5(x) = max(x, ex − 1) +Figure B.1.: Activation functions plotted in [−2, +2]. tanh and ELU are able to produce negative +numbers. The image of ELU, ReLU and Softplus is not bound on the positive side, +whereas tanh and the logistic function are always below 1. +B.7. Regularization +Regularization techniques aim to make the fitted function smoother and reduce overfitting. +Regularization techniques are: +• `1, `2, and Orthogonality regularization: See Appendix B.4 +• Max-norm regularization (e.g. used ins [SHK+14]) +• Dropout (introduced in [SHK+14]), DropConnect (see [WZZ+13]), Stochastic Depth +(see [HSL+16]) +• Feature scale clipping (see [ZF14]) +• Data augmentation (according to [ZBH+16]) +• Global average pooling (according to [ZKL+15]) +• Dense-Sparse-Dense training (see [HPN+16]) +• Soft targets (see [HVD15]) + + +C. Calculating Network Characteristics +C.1. Parameter Numbers +• A fully connected layer with n nodes, k inputs has n · (k + 1) parameters. The +1 is +due to the bias. +• A convolutional layer i with ki filters of size n × m being applied to ki−1 feature maps +has ki· ki−1(n · m + 1) parameters. The +1 is due to the bias. +• A fully connected layer with n nodes after k feature maps of size m1 × m2 has +n · (k · m1 · m2 + 1) parameters. +• A dense block with a depth of L, a growth rate of n and 3 × 3 filters has L + n · 3 +2 + +3 +2 +· n +2 PL +i=0(L − i) = L + 9n + 9n +2 L2−L +2 +parameters. +According to [HPTD15], AlexNet has 60 million parameters which is roughly the number +calculated in Table D.2. +C.2. FLOPs +The FLOPs of a layer depend on the implementation, the compiler and the hardware. Hence +the following number are only giving rough estimates. +In the following, nϕ denotes the number of FLOPs to compute the non-linearity ϕ. For +simplicity, nϕ = 5 was chosen. +• A fully connected layer with n nodes and k inputs has to calculate ϕ(W · x + b) with +W ∈ R +n×k +, x ∈ R +k×1 +, b ∈ R +n×1 +. It hence needs about n · (k + (k − 1) + 1) = 2nk +additions / multiplications before the non-linearity ϕ is calculated. The total number +of FLOPs is 2 · n · k + n · nϕ. +• In the following, biases are ignored. A convolutional layer with ki filters of size n × m +being applied to ki−1 filter maps of size w × h results in ki filter maps of size w × h if +padding is applied. For each element of each filter map, n·m·ki−1 multiplications and +(n · m · ki−1 − 1) additions have to be made. This results in (2nmki−1 − 1)·(ki· w · h) +operations. The total number of FLOPs is (2 ·n·m·ki−1 −1)·(ki·w ·h)+ki·w ·h·nϕ. +This is, of course, a naive way of calculating a convolution. There are other ways of +calculating convolutions [LG16]. + +• A fully connected layer with n nodes after k feature maps of size w×h needs 2n(k·w·h) +FLOPs. The total number of FLOPs is 2n · (k · w · h) + n · nϕ. +• As Dropout is only calculated during training, the number of FLOPs was set to 0. +• The number of FLOPs for max pooling is dominated by the number of positions to +which the pooling kernel is applied. For a feature map of size w × h a max pooling +filter with stride s gets applied w·h +s +2 . The number of FLOPs per application depends +on the kernel size. A 2 × 2 kernel is assumed to need 5 FLOPs. +• The number of FLOPs for Batch Normalization is the same as the number of its +parameters. +Here are some references which give information for the FLOPs: +• AlexNet +– 1.5B in total [HPTD15]. +– 725M in total [KPY+15]. +– 3300M in total in Table D.2 +• VGG-16: +– 15484M in total [HPTD15]. +– 31000M in total in Table D.3. +• GoogleNet: 1566M in total [HPTD15]. +One can see that the numbers are by a factor of 2 up to a factor of 4 different for the same +network. +C.3. Memory Footprint +The memory footprint of CNNs determines when networks can be used at all and if they +can be trained efficiently. In order to be able to train CNNs efficiently, one weight update +step has to fit in the memory of the GPU. This includes the following: +• Activations: All activations of one mini-batch in order to calculate the gradients +in the backward pass. This is the number of floats in the feature maps of all weight +layers combined. +• Weights +• Optimization algorithm: The optimization algorithm introduces some overhead. +For example, Adam stores two parameters per weights. +At inference time, every two consecutive layers have to fit into memory. When the forward +pass of layer A to layer B is calculated, the memory can be freed if no skip connections are +used. + +D. Common Architectures +In the following, some of the most important CNN architectures are explained. Understanding the development of these architectures helps understanding critical insights the machine +learning community got in the past years for convolutional networks for image recognition. +It starts with LeNet-5 from 1998, continues with AlexNet from 2012, VGG-16 D from +2014, the Inception modules v1 to v3 as well as ResNets in 2015. The recently developed +Inception-v4 is also covered. +The summation row gives the sum of all floats for the output size column. This allows +conclusions about the maximum mini-batch size which can be in memory for training. + +D.1. LeNet-5 +One of the first CNNs used was LeNet-5 [LBBH98]. LeNet-5 uses two times the common +pattern of a single convolutional layer with tanh as a non-linear activation function followed +by a pooling layer and three fully connected layers. One fully connected layer is used to +get the right output dimension, another one is necessary to allow the network to learn a +non-linear combination of the features of the feature maps. +Its exact architecture is shown in Figure D.1 and described in Table D.1. It reaches a test +error rate of 0.8 % on MNIST. +Figure D.1.: Architecture of LeNet-5 as shown in [LBBH98]. +# Type Filters @ +Patch size / stride +Parameters FLOPs Output size +Input 0 0 1 @ 32 × 32 +1 Convolution 6 @ 5 × 5 × 1 / 1 156 307 800 6 @ 28 × 28 +2 Scaled average pooling 2 × 2 / 2 2 336 6 @ 14 × 14 +3 Convolution 16 @ 5 × 5 × 6 / 1 2 416 942 400 16 @ 10 × 10 +4 Scaled average pooling 2 × 2 / 2 2 1 600 16 @ 5 × 5 +5 Fully Connected 120 neurons 48 120 240 000 120 +6 Fully Connected 84 neurons 10 164 20 580 84 +7 Fully Connected (output) 10 neurons 850 1 730 10 +P 61 710 15 144 446 9118 +Table D.1.: LeNet-5 architecture: After layers 1, 3, 5 and 6 the tanh activation function is applied. +After layer 7, the softmax function is applied. One can see that convolutional layer +need much fewer parameters, but an order of magnitude more FLOPs per parameter +than fully connected layers. + +D.2. AlexNet +The first CNN which achieved major improvements on the ImageNet dataset was AlexNet [KSH12]. +Its architecture is shown in Figure D.2 and described in Table D.2. It has about 60·106 parameters. A trained AlexNet can be downloaded at www.cs.toronto.edu/˜guerzhoy/tf_alexnet. +Note that the uncompressed size is at least 60 965 224 floats · 32 bit +float ≈ 244 MB. +Figure D.2.: Architecture of AlexNet as shown in [KSH12]: Convolutional Layers are followed +by pooling layers multiple times. At the end, a fully connected network is applied. +Conceptually, it is identical to the architecture of LeNet-5 (see Figure D.1). +# Type Filters @ +Patch size / stride +Parameters FLOPs Output size +Input 3 @ 224 × 224 +1 Convolution 96 @ 11 × 11 × 3 / 4 34 944 211 M 96 @ 55 × 55 +LCN 12 M 96 @ 55 × 55 +2 Max pooling 3 × 3 / 2 0 301 k 96 @ 27 × 27 +3 Convolution 256 @ 5 × 5 × 48 / 1 307 456 448M 256 @ 13 × 13 +LCN 3 M 256 @ 13 × 13 +4 Max pooling 3 × 3 / 2 0 50 k 256 @ 13 × 13 +5 Convolution 384 @ 3 × 3 × 256 / 1 885 120 299 M 384 @ 13 × 13 +7 Convolution 384 @ 3 × 3 × 192 / 1 663 936 224 M 384 @ 13 × 13 +9 Convolution 256 @ 3 × 3 × 192 / 1 442 624 150 M 256 @ 13 × 13 +10 Max pooling 3 × 3 / 2 0 50 k 256 @ 6 × 6 +11 FC 4096 neurons 37 752 832 75 M 4096 +12 FC 4096 neurons 16 781 312 34 M 4096 +13 FC 1000 neurons 4 097 000 8 M 1000 +P 60 965 224 3300 M 1 122 568 +Table D.2.: AlexNet architecture: One special case of AlexNet is grouping of convolutions due to +computational restrictions at the time of its development. This also reduces the number +of parameters and allows parallel computation on separate GPUs. However, to make +the architecture easier to compare, this grouping was ignored for the parameter count. +The FLOPs are taken from [HPTD15] and combined with rough estimates for Local +Contrast Normalization and max pooling. +The calculated number of parameters was checked against the downloaded version. It +also has 60 965 224 parameters. + +D.3. VGG-16 D +Another widespread architecture is the VGG-16 (D) [SZ14]. VGG comes from the Visual +Geometry Group in Oxford which developed this architecture. It has 16 layers which can +learn parameters. A major difference compared to AlexNet is that VGG-16 uses only 3 × 3 +filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a +detailed textual description is given in Table D.3. +A trained VGG-16 D for Tensorflow can be downloaded at https://github.com/machrisaa/ +tensorflow-vgg. Note that the uncompressed size is at least 138 357 544 floats · 32 bit +float ≈ +520 MB. The downloaded Numpy binary file npz needs 553 MB without compression and +514 MB with compression. +224 × 224 +Input +C 64@3 × 3/1 +C 64@3 × 3/1 +112 × 112 +max pooling 2 × 2/1 +C 128@3 × 3/1 +C 128@3 × 3/1 +56 × 56 +max pooling 2 × 2/1 +C 256@3 × 3/1 +C 256@3 × 3/1 +C 256@3 × 3/1 +28 × 28 +max pooling 2 × 2/1 +C 512@3 × 3/1 +C 512@3 × 3/1 +C 512@3 × 3/1 +14 × 14 +max pooling 2 × 2/1 +C 512@3 × 3/1 +C 512@3 × 3/1 +C 512@3 × 3/1 +7 × 7 +max pooling 2 × 2/1 +Fully Connected 4096 +Dropout, p = 0.5 +Fully Connected 4096 +Dropout, p = 0.5 +Fully Connected 1000 +Figure D.3.: Architecture of VGG-16 D. C 512@3 × 3/1 is a convolutional layer with 512 filters of +kernel size 3 × 3 with stride 1. All convolutional layers use SAME padding. + +# Type Filters @ +Patch size / stride +Parameters FLOPs Output size +Input 3 @ 224 × 224 +1 Convolution 64 @ 3 × 3 × 3 / 1 1 792 186 M 64 @ 224 × 224 +2 Convolution 64 @ 3 × 3 × 64 / 1 36 928 3712M 64 @ 224 × 224 +Max pooling 2 × 2 / 2 0 2 M 64 @ 112 × 112 +3 Convolution 128 @ 3 × 3 × 64 / 1 73 856 1856 M 128 @ 112 × 112 +4 Convolution 128 @ 3 × 3 × 128 / 1 147 584 3705 M 128 @ 112 × 112 +Max pooling 2 × 2 / 2 0 1 M 128 @ 56 × 56 +5 Convolution 256 @ 3 × 3 × 128 / 1 295 168 1853 M 256 @ 56 × 56 +6 Convolution 256 @ 3 × 3 × 256 / 1 590 080 3703 M 256 @ 56 × 56 +7 Convolution 256 @ 3 × 3 × 256 / 1 590 080 3703 M 256 @ 56 × 56 +Max pooling 2 × 2 / 2 0 <1 M 256 @ 28 × 28 +8 Convolution 512 @ 3 × 3 × 256 / 1 1 180 160 1851 M 512 @ 28 × 28 +9 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 3701 M 512 @ 28 × 28 +10 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 3701 M 512 @ 28 × 28 +Max pooling 2 × 2 / 2 0 <1 M 512 @ 14 × 14 +11 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14 +12 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14 +13 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14 +Max pooling 2 × 2 / 2 0 <1 M 512 @ 7 × 7 +14 FC 4096 neurons 102 764 544 206 M 4096 +Dropout 0 0 4096 +15 FC 4096 neurons 16 781 312 34 M 4096 +Dropout 0 0 4096 +16 FC 1000 neurons 4 097 000 8 M 1000 +P 138 357 544 31 000 M 15 245 800 +Table D.3.: VGG-16 D architecture: The authors chose to give only layers a number which have +learnable parameters. All convolutions are zero padded to prevent size changes and +use ReLU activation functions. The channels mean is subtracted from each pixel as +a preprocessing step (−103.939, −116.779, −123.68). As Dropout is only calculated +during training time, the number of FLOPs is 0. The dropout probability is 0.5. +The calculated number of parameters was checked against the downloaded version. It +also has 138 357 544 parameters. + +D.4. GoogleNet, Inception v2 and v3 +The large number of parameters and operations is a problem when such models should get +applied in practice to thousands of images. In order to reduce the computational cost while +maintaining the classification quality, GoogleNet [SLJ+15] and the Inception module were +developed. The Inception module essentially only computes 1 × 1 filters, 3 × 3 filters and +5 × 5 filters in parallel, but applied bottleneck 1 × 1 filters before to reduce the number of +parameters. It is shown in Figure D.4. +Figure D.4.: Inception module +Image source: [SLJ+15] +Compared to GoogleNet, Inception v2 [SVI+15] removed the 5 × 5 filters and replaced +them by two successive layers of 3 × 3 filters. A visualization of an Inception v2 module +is given in Figure D.5. Additionally, Inception v2 applies successive asymmetric filters to +approximate symmetric filters with fewer parameters. The authors call this approach filter +factorization. +Inception v3 introduced Batch Normalization to the network [SVI+15]. +Figure D.5.: Inception v2 module +Image source: [SVI+15] + +D.5. Inception-v4 +Inception-v4 as described in [SIV16] consists of four main building blocks: The stem, +Inception A, Inception B and Inception C. To quote the authors: Inception-v4 is a deeper, +wider and more uniform simplified architecture than Inception-v3. The stem, Reduction A +and Reduction B use max-pooling, whereas Inception A, Inception B and Inception C use +average pooling. The stem, module B and module C use separable convolutions. +# × Type Parameters Output size +Input 3 @ 299 × 299 +1 Stem 605 728 384 @ 35 × 35 +2 4× Inception A 317 632 384 @ 35 × 35 +3 Reduction A 2 306 112 1024 @ 17 × 17 +4 7× Inception B 2 936 256 1024 @ 17 × 17 +5 Reduction B 2 747 392 1536 @ 8 × 8 +6 3× Inception C 4 553 088 1536 @ 8 × 8 +Global Average Pooling 0 1536 @ 1 × 1 +Dropout (p=0.8) 0 1536 @ 1 × 1 +7 Softmax 1 537 000 1000 +P 42 679 816 +Table D.4.: Inception-v4 network. + + +E. Datasets +Well-known benchmark datasets for classification problems in computer vision are listed +in Table E.1. The best results known to me are given in Table E.2. However, every semantic +segmentation dataset (e.g., PASCAL VOC) can also be used to benchmark image classifiers +using Algorithm 2. +Database +Image Resolution +(width × height) +Number +of +Images +Number +of +Classes +Channels Data source +MNIST 28 px × 28 px 70 000 10 1 [YL98, LBBH98] +HASYv2 32 px × 32 px 168 233 369 1 [Tho17a] +SVHN 32 px × 32 px 630 420 10 3 +[NWC+11b], +[NWC+11a] +CIFAR-10 32 px × 32 px 60 000 10 3 [Kri, KH09] +CIFAR-100 32 px × 32 px 60 000 100 3 [Kri, KH09] +STL-10 96 px × 96 px 13 000 10 3 [CLN11, CLN10] +Caltech-101 (80 px − 3481 px) +×(92 px − 3999 px) 9144 102 3 [FFP03, FFFP06] +Caltech-256 (75 px − 7913 px) +×(75 px − 7913 px) 30 607 257 3 [Gri06, GG07] +ILSVRC 20121 +(8 px − 9331 px) +×(10 px − 6530 px) 1.2 · 106 1000 3 [Ima12, RDS+14] +Places3652 +(290px − 3158px) +×(225px − 2630px) +1.8 · 106 365 3 [Zho16, ZKL+16] +GTSRB (25 px − 266 px) +×(25 px − 232 px) 51 839 43 3 [SSSI, SSSI12] +Asirra3 +(4 px − 500 px) +×(4 px − 500 px) 25 000 2 3 [Asi17, EDHS07] +Graz-02 480 px × 640 px +and 640 px × 480 px 1096 3 3 [Mar08, MS07] +Table E.1.: An overview over publicly available image databases for classification. The number +of images row gives the sum of the training and the test images. Some datasets, like +SVHN, have additional unlabeled data which is not given in this table. +1 +ImageNet Large Scale Visual Recognition Competition +2The dimensions are only calculated for the validation set. +3Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs” competition on Kaggle + +Dataset Model type / name Result Score Achieved / +Claimed by +MNIST — 0.21 % error [WZZ+13] +HASYv2 TF-CNN 81.00 % accuracy [Tho17a] +SVHN DenseNet (k = 24) 1.59 % error [HLW16] +CIFAR-10 DenseNet-BC (k = 40) 3.46 % error [HLW16] +CIFAR-100 WRN-28-10 16.21 % error [LH16] +STL-10 SWWAE-4layer 74.80 % accuracy [ZMGL15] +Caltech-101 SPP-net (pretrained) 93.42 %±0.5 % accuracy [HZRS14] +Caltech-256 ZF-Net (pretrained) 74.2 %±0.3 % accuracy [ZF14] +ImageNet 2012 ResNet ensemble 3.57 % Top-5 error [HZRS15a] +GTSRB MCDNN 99.46 % accuracy [SL11] +Asirra SVM 82.7 % accuracy [Gol08] +Graz-02 Optimal NBNN 78.98 % accuracy [BMDP10] +Table E.2.: An overview over state of the art results achieved in computer vision datasets. +Algorithm 2 Create a classification dataset from a semantic segmentation dataset +Require: Semantic segmentation dataset (DS) +procedure CreateDataset(Annotated dataset DS) +DC ← List +w ← desired image width +h ← desired image height +for Image and associated label (x, y) in DS do +i ← randint(0, L.width − w) +j ← randint(0, L.height − h) +cL ← crop(y,(i, j),(i + w, j + h)) +if at least 50% of s are of one class then +cI ← crop(x,(i, j),(i + w, j + h)) +D.append((cI , cL)) +return (DC) + +F. List of Tables +2.1 Pooling types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 +5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39 +5.2 Baseline model evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . . 40 +5.3 Baseline model speed comparison . . . . . . . . . . . . . . . . . . . . . . . . 40 +5.4 Clustering errors for spectral clustering and CMO on CIFAR-100 . . . . . . 52 +5.5 Differences in spectral clustering and CMO. . . . . . . . . . . . . . . . . . . 52 +5.6 Accuracies for hierarchy of classifiers on CIFAR-100 . . . . . . . . . . . . . . 53 +5.7 Parameters of models with increased capacity . . . . . . . . . . . . . . . . . 54 +5.8 Training time for models with increased capacity . . . . . . . . . . . . . . . 54 +5.9 Baseline model training time . . . . . . . . . . . . . . . . . . . . . . . . . . 59 +5.10 Activation function properties . . . . . . . . . . . . . . . . . . . . . . . . . . 62 +5.11 Activation function evaluation results on CIFAR-100 . . . . . . . . . . . . . 63 +5.12 Activation function timing results on CIFAR-100 . . . . . . . . . . . . . . . 63 +5.13 Activation function evaluation results on MNIST . . . . . . . . . . . . . . . 64 +5.14 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66 +5.15 Optimized model evaluation results . . . . . . . . . . . . . . . . . . . . . . . 67 +5.16 Optimized model speed comparison . . . . . . . . . . . . . . . . . . . . . . . 67 +5.17 Optimized model mean training epochs . . . . . . . . . . . . . . . . . . . . . 68 +5.18 Optimized model trained with early stopping vs training with more data . . 69 +5.19 Model regularization with early stopping on training loss . . . . . . . . . . . 69 +5.20 Model regularization with early stopping on training loss - Training time . . 69 +A.1 99-percentile intervals for filter weights on CIFAR-100 . . . . . . . . . . . . 75 +A.2 Activation function evaluation results on HASYv2 . . . . . . . . . . . . . . . 77 +A.3 Activation function evaluation results on STL-10 . . . . . . . . . . . . . . . 78 +B.1 Data augmentation techniques . . . . . . . . . . . . . . . . . . . . . . . . . . 80 +B.2 Weight initialization schemes . . . . . . . . . . . . . . . . . . . . . . . . . . 81 +B.3 Activation functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 84 +D.1 LeNet-5 architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90 +D.2 AlexNet architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 +D.3 VGG-16 D architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 93 +D.4 Inception-v4 network . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 95 + +E.1 Image Benchmark datasets . . . . . . . . . . . . . . . . . . . . . . . . . . . . 97 +E.2 State of the Art results . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 98 + +G. List of Figures +2.1 Application of a single image filter (Convolution) . . . . . . . . . . . . . . . 3 +2.2 Application of a convolutional layer . . . . . . . . . . . . . . . . . . . . . . . 6 +2.3 Max pooling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 +2.4 ResNet module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 +2.5 Aggregation block . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 +2.6 Dense block . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13 +2.7 Validation curve . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17 +2.8 Validation curve with plateaus . . . . . . . . . . . . . . . . . . . . . . . . . 18 +2.9 Learning curve . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20 +2.10 Occlusion analysis . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25 +2.11 Filter visualization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 26 +3.1 Cascade-correlation network . . . . . . . . . . . . . . . . . . . . . . . . . . . 28 +4.1 Class Tree . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 33 +5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39 +5.2 Baseline model filter weight distribution . . . . . . . . . . . . . . . . . . . . 42 +5.3 Baseline model bias weight distribution . . . . . . . . . . . . . . . . . . . . . 42 +5.4 Baseline model γ distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43 +5.5 Baseline model β distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43 +5.6 Baseline model filter weight range distribution . . . . . . . . . . . . . . . . . 44 +5.7 Baseline model CIFAR-100 validation accuracy . . . . . . . . . . . . . . . . 45 +5.8 Baseline Weight updates (mean) . . . . . . . . . . . . . . . . . . . . . . . . 46 +5.9 Baseline Weight updates (maximum) . . . . . . . . . . . . . . . . . . . . . . 47 +5.10 Baseline Weight updates (sum) . . . . . . . . . . . . . . . . . . . . . . . . . 47 +5.11 Confusion matrices for CIFAR-10 . . . . . . . . . . . . . . . . . . . . . . . . 48 +5.12 Confusion matrices for GTSRB . . . . . . . . . . . . . . . . . . . . . . . . . 49 +5.13 Confusion matrices for HASYv2 . . . . . . . . . . . . . . . . . . . . . . . . . 50 +5.14 Confusion matrix of CIFAR-100 . . . . . . . . . . . . . . . . . . . . . . . . . 51 +5.15 Mean weight updates of model with bottleneck . . . . . . . . . . . . . . . . 55 +5.16 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 67 +A.1 Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 75 +A.2 Bias weight distribution without BN . . . . . . . . . . . . . . . . . . . . . . 76 + +A.3 Maximum weight updates of baseline with bottleneck . . . . . . . . . . . . . 77 +A.4 Sum of weight updates of baseline with bottleneck . . . . . . . . . . . . . . 78 +B.1 Activation functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 85 +D.1 LeNet-5 architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90 +D.2 AlexNet architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 +D.3 VGG-16 D architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92 +D.4 Inception module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94 +D.5 Inception v2 module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94 + +H. Bibliography +[AAB+16] M. Abadi, A. Agarwal et al., “Tensorflow: Large-scale machine learning on +heterogeneous distributed systems,” arXiv preprint arXiv:1603.04467, Mar. +2016. [Online]. Available: https://arxiv.org/abs/1603.04467 +[ABKS99] M. Ankerst, M. M. Breunig et al., “OPTICS: Ordering points to identify the +clustering structure,” in ACM Sigmod record, vol. 28, no. 2. ACM, 1999, pp. +49–60. +[ADG+16] M. Andrychowicz, M. Denil et al., “Learning to learn by gradient descent by +gradient descent,” in Advances in Neural Information Processing Systems 29 +(NIPS), D. D. Lee, M. Sugiyama et al., Eds. Curran Associates, Inc., Mar. +2016, pp. 3981–3989. [Online]. Available: http://papers.nips.cc/paper/6461learning-to-learn-by-gradient-descent-by-gradient-descent.pdf + +[AM15] M. T. Alexander Mordvintsev, Christopher Olah, “Inceptionism: +Going deeper into neural networks,” Jun. 2015. [Online]. Available: https://research.googleblog.com/2015/06/inceptionism-going-deeperinto-neural.html +[Asi17] “Kaggle cats and dogs dataset,” Oct. 2017. [Online]. Available: https: +//www.microsoft.com/en-us/download/details.aspx?id=54765 +[BB12] J. Bergstra and Y. Bengio, “Random search for hyper-parameter optimization,” +Journal of Machine Learning Research, vol. 13, no. Feb, pp. 281–305, +Feb. 2012. [Online]. Available: http://jmlr.csail.mit.edu/papers/volume13/ +bergstra12a/bergstra12a.pdf +[BCW+17] J. Bao, D. Chen et al., “CVAE-GAN: Fine-grained image generation through +asymmetric training,” arXiv preprint arXiv:1703.10155, Mar. 2017. [Online]. +Available: https://arxiv.org/abs/1703.10155 +[BDLB09] J. Bergstra, G. Desjardins et al., “Quadratic polynomials learn better image features,” Département d’Informatique et de Recherche Opérationnelle, +Université de Montréal, Tech. Rep. 1337, 2009. +[BGNR16] B. Baker, O. Gupta et al., “Designing neural network architectures using +reinforcement learning,” arXiv preprint arXiv:1611.02167, Nov. 2016. [Online]. +Available: https://arxiv.org/abs/1611.02167 + +[BM93] U. Bodenhausen and S. Manke, Automatically Structured Neural +Networks For Handwritten Character And Word Recognition. London: +Springer London, Sep. 1993, pp. 956–961. [Online]. Available: http: +//dx.doi.org/10.1007/978-1-4471-2063-6_283 +[BMDP10] R. Behmo, P. Marcombes et al., “Towards optimal naive Bayes nearest +neighbor,” in European Conference on Computer Vision (ECCV). Springer, +2010, pp. 171–184. +[BPL10] Y.-L. Boureau, J. Ponce, and Y. LeCun, “A theoretical analysis of +feature pooling in visual recognition,” in International Conference on +Machine Learning (ICML), no. 27, 2010, pp. 111–118. [Online]. Available: +http://yann.lecun.com/exdb/publis/pdf/boureau-icml-10.pdf +[BSF94] Y. Bengio, P. Simard, and P. Frasconi, “Learning long-term dependencies +with gradient descent is difficult,” IEEE transactions on neural networks, +vol. 5, no. 2, pp. 157–166, 1994. +[Cha92] C. Charalambous, “Conjugate gradient algorithm for efficient training +of artificial neural networks,” IEEE Proceedings G-Circuits, Devices +and Systems, vol. 139, no. 3, pp. 301–310, 1992. [Online]. Available: +http://ieeexplore.ieee.org/document/143326/ +[Cho15] F. Chollet, “Keras,” https://github.com/fchollet/keras, 2015. +[CLN10] A. Coates, H. Lee, and A. Y. Ng, “An analysis of single-layer networks +in unsupervised feature learning,” Ann Arbor, vol. 1001, no. 48109, +p. 2, 2010. [Online]. Available: http://cs.stanford.edu/~acoates/papers/ +coatesleeng_aistats_2011.pdf +[CLN11] A. Coates, H. Lee, and A. Y. Ng, “STL-10 dataset,” 2011. [Online]. Available: +http://cs.stanford.edu/~acoates/stl10 +[CMS12] D. Ciregan, U. Meier, and J. Schmidhuber, “Multi-column deep neural +networks for image classification,” in Conference on Computer Vision and +Pattern Recognition (CVPR). IEEE, Feb. 2012, pp. 3642–3649. [Online]. +Available: https://arxiv.org/abs/1202.2745v1 +[CUH15] D.-A. Clevert, T. Unterthiner, and S. Hochreiter, “Fast and accurate +deep network learning by exponential linear units (ELUs),” arXiv +preprint arXiv:1511.07289, Nov. 2015. [Online]. Available: https: +//arxiv.org/abs/1511.07289 +[CWV+14] S. Chetlur, C. Woolley et al., “cuDNN: Efficient primitives for deep +learning,” arXiv preprint arXiv:1410.0759, Oct. 2014. [Online]. Available: +https://arxiv.org/abs/1410.0759 + +[DBB+01] C. Dugas, Y. Bengio et al., “Incorporating second-order functional +knowledge for better option pricing,” in Advances in Neural Information Processing Systems 13 (NIPS), T. K. Leen, T. G. Dietterich, +and V. Tresp, Eds. MIT Press, 2001, pp. 472–478. [Online]. +Available: http://papers.nips.cc/paper/1920-incorporating-second-orderfunctional-knowledge-for-better-option-pricing.pdf +[DDFK16] S. Dieleman, J. De Fauw, and K. Kavukcuoglu, “Exploiting cyclic symmetry +in convolutional neural networks,” arXiv preprint arXiv:1602.02660, Feb. +2016. [Online]. Available: https://arxiv.org/abs/1602.02660 +[DHS11] J. Duchi, E. Hazan, and Y. Singer, “Adaptive subgradient methods for +online learning and stochastic optimization,” Journal of Machine Learning +Research, vol. 12, no. Jul, pp. 2121–2159, 2011. [Online]. Available: +http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf +[DHS16] J. Dai, K. He, and J. Sun, “Instance-aware semantic segmentation via +multi-task network cascades,” in Conference on Computer Vision and Pattern +Recognition (CVPR). IEEE, 2016, pp. 3150–3158. [Online]. Available: +https://arxiv.org/abs/1512.04412 +[DJ99] W. Duch and N. Jankowski, “Survey of neural transfer functions,” Neural +Computing Surveys, vol. 2, no. 1, pp. 163–212, 1999. [Online]. Available: +ftp://ftp.icsi.berkeley.edu/pub/ai/jagota/vol2_6.pdf +[Doz15] T. Dozat, “Incorporating Nesterov momentum into Adam,” Stanford +University, Tech. Rep., 2015. [Online]. Available: http://cs229.stanford.edu/ +proj2015/054_report.pdf +[DSRB14] A. Dosovitskiy, J. T. Springenberg et al., “Discriminative unsupervised +feature learning with convolutional neural networks,” in Advances in Neural +Information Processing Systems 27 (NIPS), Z. Ghahramani, M. Welling +et al., Eds. Curran Associates, Inc., 2014, pp. 766–774. [Online]. +Available: http://papers.nips.cc/paper/5548-discriminative-unsupervisedfeature-learning-with-convolutional-neural-networks.pdf +[DWD15] S. Dieleman, K. W. Willett, and J. Dambre, “Rotation-invariant convolutional +neural networks for galaxy morphology prediction,” Monthly notices of the +royal astronomical society, vol. 450, no. 2, pp. 1441–1459, 2015. +[EDHS07] J. Elson, J. J. Douceur et al., “Asirra: A CAPTCHA that +exploits interest-aligned manual image categorization,” in ACM Conference on Computer and Communications Security (CCS), no. 14. +Association for Computing Machinery, Inc., Oct. 2007. [Online]. + +Available: https://www.microsoft.com/en-us/research/publication/asirra-acaptcha-that-exploits-interest-aligned-manual-image-categorization/ +[EKS+96] M. Ester, H.-P. Kriegel et al., “A density-based algorithm for discovering +clusters in large spatial databases with noise.” in Kdd, vol. 96, no. 34, 1996, +pp. 226–231. +[ES03] A. E. Eiben and J. E. Smith, Introduction to evolutionary computing. +Springer, 2003, vol. 53. [Online]. Available: https://dx.doi.org/10.1007/978-3662-44874-8 + +[Fah88] S. E. Fahlman, “An empirical study of learning speed in back-propagation +networks,” 1988. [Online]. Available: http://repository.cmu.edu/cgi/ +viewcontent.cgi?article=2799&context=compsci +[FFFP06] L. Fei-Fei, R. Fergus, and P. Perona, “One-shot learning of object +categories,” IEEE transactions on pattern analysis and machine intelligence, +vol. 28, no. 4, pp. 594–611, Apr. 2006. [Online]. Available: http: +//vision.stanford.edu/documents/Fei-FeiFergusPerona2006.pdf +[FFP03] R. F. Fei-Fei and P. Perona, “Caltech 101,” 2003. [Online]. Available: http: +//www.vision.caltech.edu/Image_Datasets/Caltech101/Caltech101.html +[FGMR10] P. F. Felzenszwalb, R. B. Girshick et al., “Object detection with discriminatively trained part-based models,” IEEE transactions on pattern analysis and +machine intelligence, vol. 32, no. 9, pp. 1627–1645, 2010. +[FL89] S. E. Fahlman and C. Lebiere, “The cascade-correlation learning architecture,” +1989. [Online]. Available: http://repository.cmu.edu/compsci/1938/ +[GB10] X. Glorot and Y. Bengio, “Understanding the difficulty of training deep +feedforward neural networks.” in Aistats, vol. 9, 2010, pp. 249–256. [Online]. +Available: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf +[GBB11] X. Glorot, A. Bordes, and Y. Bengio, “Deep sparse rectifier neural +networks.” in Aistats, vol. 15, no. 106, 2011, p. 275. [Online]. Available: +http://www.jmlr.org/proceedings/papers/v15/glorot11a/glorot11a.pdf +[GDDM14] R. Girshick, J. Donahue et al., “Rich feature hierarchies for accurate object +detection and semantic segmentation,” in Conference on Computer Vision +and Pattern Recognition (CVPR). IEEE, 2014, pp. 580–587. [Online]. +Available: https://arxiv.org/abs/1311.2524 +[GG07] P. P. Greg Griffin, Alex Holub, “Caltech-256 object category dataset,” Apr. +2007. [Online]. Available: http://authors.library.caltech.edu/7694/ + +[GG16] Y. Gal and Z. Ghahramani, “Bayesian convolutional neural networks with +Bernoulli approximate variational inference,” arXiv preprint arXiv:1506.02158, +Jan. 2016. [Online]. Available: https://arxiv.org/abs/1506.02158v6 +[GJ02] M. R. Garey and D. S. Johnson, Computers and intractability. wh freeman +New York, 2002, vol. 29. +[GJS76] M. R. Garey, D. S. Johnson, and L. Stockmeyer, “Some simplified NP-complete +graph problems,” Theoretical computer science, vol. 1, no. 3, pp. 237–267, +1976. +[Gol08] P. Golle, “Machine learning attacks against the Asirra CAPTCHA,” in ACM +conference on Computer and communications security (CCS), no. 15. ACM, +2008, pp. 535–542. +[Gra15] B. Graham, “Fractional max-pooling,” arXiv preprint arXiv:1412.6071, May +2015. [Online]. Available: https://arxiv.org/abs/1412.6071 +[Gri06] A. P. Griffin, G. Holub, “Caltech 256,” 2006. [Online]. Available: +http://www.vision.caltech.edu/Image_Datasets/Caltech256/ +[GWFM+13] I. J. Goodfellow, D. Warde-Farley et al., “Maxout networks.” ICML, +vol. 28, no. 3, pp. 1319–1327, 2013. [Online]. Available: http: +//www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf +[HAE16] M. Huh, P. Agrawal, and A. A. Efros, “What makes ImageNet good for +transfer learning?” arXiv preprint arXiv:1608.08614, Aug. 2016. [Online]. +Available: https://arxiv.org/abs/1608.08614 +[Han89] S. J. Hanson, “Meiosis networks.” in NIPS, 1989, pp. 533–541. [Online]. +Available: http://papers.nips.cc/paper/227-meiosis-networks.pdf +[Har15] M. Harris, “New features in CUDA 7.5,” Jul. 2015. [Online]. Available: +https://devblogs.nvidia.com/parallelforall/new-features-cuda-7-5/ +[HLW16] G. Huang, Z. Liu, and K. Q. Weinberger, “Densely connected convolutional +networks,” arXiv preprint arXiv:1608.06993, Aug. 2016. [Online]. Available: +https://arxiv.org/abs/1608.06993v1 +[HM16] M. Hardt and T. Ma, “Identity matters in deep learning,” arXiv +preprint arXiv:1611.04231, Nov. 2016. [Online]. Available: https: +//arxiv.org/abs/1611.04231 +[How13] A. G. Howard, “Some improvements on deep convolutional neural network +based image classification,” arXiv preprint arXiv:1312.5402, Dec. 2013. +[Online]. Available: https://arxiv.org/abs/1312.5402 + +[HPK11] J. Han, J. Pei, and M. Kamber, Data mining: concepts and techniques. +Elsevier, 2011. +[HPN+16] S. Han, J. Pool et al., “DSD: Regularizing deep neural networks with +dense-sparse-dense training flow,” arXiv preprint arXiv:1607.04381, Jul. 2016. +[Online]. Available: https://arxiv.org/abs/1607.04381 +[HPTD15] S. Han, J. Pool et al., “Learning both weights and connections for efficient +neural network,” in Advances in Neural Information Processing Systems 28 +(NIPS), C. Cortes, N. D. Lawrence et al., Eds. Curran Associates, Inc., Jun. +2015, pp. 1135–1143. [Online]. Available: http://papers.nips.cc/paper/5784learning-both-weights-and-connections-for-efficient-neural-network.pdf + +[HSK+12] G. E. Hinton, N. Srivastava et al., “Improving neural networks by preventing +co-adaptation of feature detectors,” arXiv preprint arXiv:1207.0580, Jul. +2012. [Online]. Available: https://arxiv.org/abs/1207.0580 +[HSL+16] G. Huang, Y. Sun et al., “Deep networks with stochastic depth,” +arXiv preprint arXiv:1603.09382, Mar. 2016. [Online]. Available: https: +//arxiv.org/abs/1603.09382 +[HSW93] B. Hassibi, D. G. Stork, and G. J. Wolff, “Optimal brain surgeon +and general network pruning,” in International Conference on Neural +Networks. IEEE, 1993, pp. 293–299. [Online]. Available: http: +//ee.caltech.edu/Babak/pubs/conferences/00298572.pdf +[HVD15] G. Hinton, O. Vinyals, and J. Dean, “Distilling the knowledge in a neural +network,” arXiv preprint arXiv:1503.02531, Mar. 2015. [Online]. Available: +https://arxiv.org/abs/1503.02531 +[HZRS14] K. He, X. Zhang et al., “Spatial pyramid pooling in deep convolutional +networks for visual recognition,” in European Conference on Computer +Vision (ECCV). Springer, 2014, pp. 346–361. [Online]. Available: +https://arxiv.org/abs/1406.4729 +[HZRS15a] K. He, X. Zhang et al., “Deep residual learning for image recognition,” +arXiv preprint arXiv:1512.03385, Dec. 2015. [Online]. Available: https: +//arxiv.org/abs/1512.03385v1 +[HZRS15b] K. He, X. Zhang et al., “Delving deep into rectifiers: Surpassing human-level +performance on imagenet classification,” in International Conference on +Computer Vision (ICCV), Feb. 2015, pp. 1026–1034. [Online]. Available: +https://arxiv.org/abs/1502.01852 +[Ima12] “Imagenet large scale visual recognition challenge 2012 (ILSVRC2012),” + +2012. [Online]. Available: http://www.image-net.org/challenges/LSVRC/ +2012/nonpub-downloads +[IS15] S. Ioffe and C. Szegedy, “Batch normalization: Accelerating deep network +training by reducing internal covariate shift,” arXiv preprint arXiv:1502.03167, +Feb. 2015. [Online]. Available: https://arxiv.org/abs/1502.03167 +[JXF+16] X. Jin, C. Xu et al., “Deep learning with s-shaped rectified linear activation +units,” in Thirtieth AAAI Conference on Artificial Intelligence, Dec. 2016. +[Online]. Available: https://arxiv.org/abs/1512.07030 +[Kar11] A. Karpathy, “Lessons learned from manually classifying CIFAR-10,” Apr. +2011. [Online]. Available: http://karpathy.github.io/2011/04/27/manuallyclassifying-cifar10/ +[KB14] D. Kingma and J. Ba, “Adam: A method for stochastic optimization,” +arXiv preprint arXiv:1412.6980, Dec. 2014. [Online]. Available: https: +//arxiv.org/abs/1412.6980 +[KH09] A. Krizhevsky and G. Hinton, “Learning multiple layers of features from tiny +images,” Apr. 2009. [Online]. Available: https://www.cs.toronto.edu/~kriz/ +learning-features-2009-TR.pdf +[KMN+16] N. S. Keskar, D. Mudigere et al., “On large-batch training for deep learning: +Generalization gap and sharp minima,” arXiv preprint arXiv:1609.04836, +Sep. 2016. [Online]. Available: https://arxiv.org/abs/1609.04836 +[Koc15] T. Kocmánek, “HyperNEAT and novelty search for image recognition,” Ph.D. +dissertation, Master’s thesis, Czech Technical University in Prague, 2015. +[Online]. Available: http://kocmi.tk/photos/DiplomaThesis.pdf +[KPY+15] Y.-D. Kim, E. Park et al., “Compression of deep convolutional neural networks +for fast and low power mobile applications,” arXiv preprint arXiv:1511.06530, +Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.06530 +[KR09] L. Kaufman and P. J. Rousseeuw, Finding groups in data: an introduction to +cluster analysis. John Wiley & Sons, 2009, vol. 344. +[Kri] A. Krizhevsky, “The CIFAR-10 dataset.” [Online]. Available: https: +//www.cs.toronto.edu/~kriz/cifar.html +[KS02] V. Kurkova and M. Sanguineti, “Comparison of worst case errors in linear +and neural network approximation,” IEEE Transactions on Information +Theory, vol. 48, no. 1, pp. 264–275, Jan. 2002. [Online]. Available: +http://ieeexplore.ieee.org/abstract/document/971754/ + +[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, “Imagenet classification +with deep convolutional neural networks,” in Advances in Neural +Information Processing Systems 25 (NIPS), F. Pereira, C. J. C. Burges +et al., Eds. Curran Associates, Inc., 2012, pp. 1097–1105. [Online]. +Available: http://papers.nips.cc/paper/4824-imagenet-classification-withdeep-convolutional-neural-networks.pdf +[KSlB+10] K. Kavukcuoglu, P. Sermanet et al., “Learning convolutional feature +hierarchies for visual recognition,” in Advances in Neural Information +Processing Systems 23 (NIPS), J. D. Lafferty, C. K. I. Williams +et al., Eds. Curran Associates, Inc., 2010, pp. 1090–1098. [Online]. +Available: http://papers.nips.cc/paper/4133-learning-convolutional-featurehierarchies-for-visual-recognition.pdf +[LAE+16] W. Liu, D. Anguelov et al., “SSD: Single shot multibox detector,” in +European Conference on Computer Vision (ECCV). Springer, 2016, pp. +21–37. [Online]. Available: https://arxiv.org/abs/1512.02325 +[Las17] “Noise layers,” Jan. 2017. [Online]. Available: http://lasagne.readthedocs.io/ +en/latest/modules/layers/noise.html#lasagne.layers.DropoutLayer +[LBBH98] Y. LeCun, L. Bottou et al., “Gradient-based learning applied to document +recognition,” Proceedings of the IEEE, vol. 86, no. 11, pp. 2278–2324, Nov. +1998. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/lecun01a.pdf +[LBH15] Y. LeCun, Y. Bengio, and G. Hinton, “Deep learning,” Nature, +vol. 521, no. 7553, pp. 436–444, May 2015. [Online]. Available: +http://www.nature.com/nature/journal/v521/n7553/abs/nature14539.html +[LBOM98] Y. A. LeCun, L. Bottou et al., Efficient BackProp, ser. Lecture Notes in +Computer Science. Berlin, Heidelberg: Springer Berlin Heidelberg, 1998, vol. +1524, pp. 9–50. [Online]. Available: http://dx.doi.org/10.1007/3-540-49430-8 +[LDS+89] Y. LeCun, J. S. Denker et al., “Optimal brain damage.” in NIPs, vol. 2, 1989, +pp. 598–605. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/ +lecun-90b.pdf +[Le13] Q. V. Le, “Building high-level features using large scale unsupervised +learning,” in International conference on acoustics, speech and signal +processing. IEEE, 2013, pp. 8595–8598. [Online]. Available: http: +//ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6639343 +[LG16] A. Lavin and S. Gray, “Fast algorithms for convolutional neural networks,” in + +Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. +2016, pp. 4013–4021. [Online]. Available: https://arxiv.org/abs/1509.09308 +[LGT16] C.-Y. Lee, P. W. Gallagher, and Z. Tu, “Generalizing pooling functions in +convolutional neural networks: Mixed, gated, and tree,” in International +Conference on Artificial Intelligence and Statistics, 2016. [Online]. Available: +https://arxiv.org/abs/1509.08985v2 +[LH16] I. Loshchilov and F. Hutter, “SGDR: stochastic gradient descent +with warm restarts,” Learning, Aug. 2016. [Online]. Available: https: +//arxiv.org/abs/1608.03983 +[LJD+16] L. Li, K. Jamieson et al., “Hyperband: A novel bandit-based approach to +hyperparameter optimization,” arXiv preprint arXiv:1603.06560, Mar. 2016. +[Online]. Available: https://arxiv.org/abs/1603.06560 +[LM16] K. Li and J. Malik, “Learning to optimize,” arXiv preprint arXiv:1606.01885, +Jun. 2016. [Online]. Available: https://arxiv.org/abs/1606.01885 +[LSD15] J. Long, E. Shelhamer, and T. Darrell, “Fully convolutional networks for +semantic segmentation,” in Conference on Computer Vision and Pattern +Recognition (CVPR). IEEE, Mar. 2015, pp. 3431–3440. [Online]. Available: +https://arxiv.org/abs/1411.4038v2 +[LX17] A. Y. Lingxi Xie, “Genetic CNN,” arXiv preprint arXiv:1703.01513, Mar. +2017. [Online]. Available: https://arxiv.org/abs/1703.01513 +[Maj17] S. Majumdar, “Densenet,” GitHub, Feb. 2017. [Online]. Available: +https://github.com/titu1994/DenseNet +[Mar08] M. Marszałek, “INRIA annotations for Graz-02 (IG02),” Oct. 2008. [Online]. +Available: http://lear.inrialpes.fr/people/marszalek/data/ig02/ +[MDA15] D. Maclaurin, D. Duvenaud, and R. Adams, “Gradient-based hyperparameter +optimization through reversible learning,” in International Conference on +Machine Learning (ICML), 2015, pp. 2113–2122. +[MH08] L. v. d. Maaten and G. Hinton, “Visualizing data using t-SNE,” Journal of +Machine Learning Research, vol. 9, no. Nov, pp. 2579–2605, 2008. +[MHN13] A. L. Maas, A. Y. Hannun, and A. Y. Ng, “Rectifier nonlinearities +improve neural network acoustic models,” in Proc. ICML, vol. 30, +no. 1, 2013. [Online]. Available: https://web.stanford.edu/~awni/papers/ +relu_hybrid_icml2013_final.pdf +[MM15] D. Mishkin and J. Matas, “All you need is a good init,” arXiv + +preprint arXiv:1511.06422, Nov. 2015. [Online]. Available: https: +//arxiv.org/abs/1511.06422 +[MP43] W. S. McCulloch and W. Pitts, “A logical calculus of the ideas immanent in +nervous activity,” The bulletin of mathematical biophysics, vol. 5, no. 4, pp. +115–133, 1943. +[MRM15] N. McLaughlin, J. M. D. Rincon, and P. Miller, “Data-augmentation for +reducing dataset bias in person re-identification,” in International Conference +on Advanced Video and Signal Based Surveillance (AVSS), no. 12, Aug. 2015, +pp. 1–6. [Online]. Available: http://ieeexplore.ieee.org/abstract/document/ +7301739/ +[MS07] M. Marszalek and C. Schmid, “Accurate object localization with +shape masks,” in Conference on Computer Vision and Pattern +Recognition (CVPR). IEEE, 2007, pp. 1–8. [Online]. Available: http: +//ieeexplore.ieee.org/document/4270110/ +[MSM16] D. Mishkin, N. Sergievskiy, and J. Matas, “Systematic evaluation of CNN +advances on the ImageNet,” arXiv preprint arXiv:1606.02228, Jun. 2016. +[Online]. Available: https://arxiv.org/abs/1606.02228 +[MV16] A. Mahendran and A. Vedaldi, “Visualizing deep convolutional neural +networks using natural pre-images,” International Journal of Computer Vision, +pp. 1–23, Apr. 2016. [Online]. Available: https://arxiv.org/abs/1512.02017 +[NDRT13] N. Natarajan, I. S. Dhillon et al., “Learning with noisy labels,” in Advances +in Neural Information Processing Systems 26 (NIPS), C. J. C. Burges, +L. Bottou et al., Eds. Curran Associates, Inc., 2013, pp. 1196–1204. [Online]. +Available: http://papers.nips.cc/paper/5073-learning-with-noisy-labels.pdf +[Nes83] Y. Nesterov, “A method of solving a convex programming problem with +convergence rate o (1/k2),” in Soviet Mathematics Doklady, vol. 27, no. 2, +1983, pp. 372–376. +[new00] “The training performed by qnstrn,” Aug. 2000. [Online]. Available: +http://www1.icsi.berkeley.edu/Speech/faq/nn-train.html +[Ng16] A. Ng, “Nuts and bolts of building ai applications using deep learning,” NIPS +Talk, Dec. 2016. +[NH92] S. J. Nowlan and G. E. Hinton, “Simplifying neural networks by soft +weight-sharing,” Neural computation, vol. 4, no. 4, pp. 473–493, 1992. +[Online]. Available: https://www.cs.toronto.edu/~hinton/absps/sunspots.pdf +[NH02] R. T. Ng and J. Han, “CLARANS: A method for clustering objects for spatial + +data mining,” IEEE transactions on knowledge and data engineering, vol. 14, +no. 5, pp. 1003–1016, 2002. +[NWC+11a] Y. Netzer, T. Wang et al., “Reading digits in natural images with +unsupervised feature learning,” in NIPS workshop on deep learning and +unsupervised feature learning, vol. 2011, no. 2, 2011, p. 5. [Online]. Available: +http://ufldl.stanford.edu/housenumbers/nips2011_housenumbers.pdf +[NWC+11b] Y. Netzer, T. Wang et al., “The street view house numbers (SVHN) dataset,” +2011. [Online]. Available: http://ufldl.stanford.edu/housenumbers/ +[NYC16] A. Nguyen, J. Yosinski, and J. Clune, “Multifaceted feature visualization: +Uncovering the different types of features learned by each neuron in deep +neural networks,” arXiv preprint arXiv:1602.03616, May 2016. [Online]. +Available: https://arxiv.org/abs/1602.03616 +[OHIL16] J. Ortigosa-Hernández, I. Inza, and J. A. Lozano, “Towards competitive +classifiers for unbalanced classification problems: A study on the performance +scores,” arXiv preprint arXiv:1608.08984, Aug. 2016. [Online]. Available: +https://arxiv.org/abs/1608.08984 +[PMW+15] N. Papernot, P. McDaniel et al., “Distillation as a defense to adversarial +perturbations against deep neural networks,” arXiv preprint arXiv:1511.04508, +Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.04508 +[Pre98] L. Prechelt, Early Stopping - But When? Berlin, Heidelberg: Springer +Berlin Heidelberg, 1998, pp. 55–69. [Online]. Available: http://dx.doi.org/ +10.1007/3-540-49430-8_3 +[RDS+14] O. Russakovsky, J. Deng et al., “Imagenet large scale visual recognition +challenge,” arXiv preprint arXiv:1409.0575, vol. 115, no. 3, pp. 211–252, Sep. +2014. [Online]. Available: https://arxiv.org/abs/1409.0575 +[RFB15] O. Ronneberger, P. Fischer, and T. Brox, “U-net: Convolutional networks +for biomedical image segmentation,” in International Conference on Medical +Image Computing and Computer-Assisted Intervention. Springer, 2015, pp. +234–241. [Online]. Available: https://arxiv.org/abs/1505.04597 +[RLS10] S. Risi, J. Lehman, and K. O. Stanley, “Evolving the placement and density of neurons in the hyperneat substrate,” in Conference on Genetic and +evolutionary computation, no. 12. ACM, 2010, pp. 563–570. +[RSG16] M. T. Ribeiro, S. Singh, and C. Guestrin, “"why should i trust you?": +Explaining the predictions of any classifier,” arXiv preprint arXiv:1602.04938, +Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.04938 + +[Rud16] S. Ruder, “An overview of gradient descent optimization algorithms,” +arXiv preprint arXiv:1609.04747, Sep. 2016. [Online]. Available: https: +//arxiv.org/abs/1609.04747 +[SCL12] P. Sermanet, S. Chintala, and Y. LeCun, “Convolutional neural networks +applied to house numbers digit classification,” in International Conference +on Pattern Recognition (ICPR), no. 21. IEEE, Apr. 2012, pp. 3288–3291. +[Online]. Available: https://arxiv.org/abs/1204.3968 +[SDG09] K. O. Stanley, D. B. D’Ambrosio, and J. Gauci, “A hypercube-based encoding +for evolving large-scale neural networks,” Artificial life, vol. 15, no. 2, pp. 185– +212, 2009. [Online]. Available: http://ieeexplore.ieee.org/document/6792316/ +[SEZ+13] P. Sermanet, D. Eigen et al., “Overfeat: Integrated recognition, localization +and detection using convolutional networks,” arXiv preprint arXiv:1312.6229, +Feb. 2013. [Online]. Available: https://arxiv.org/abs/1312.6229v4 +[SHK+14] N. Srivastava, G. E. Hinton et al., “Dropout: a simple way to +prevent neural networks from overfitting.” Journal of Machine Learning +Research, vol. 15, no. 1, pp. 1929–1958, 2014. [Online]. Available: +https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf +[SHY+13] A. Senior, G. Heigold et al., “An empirical study of learning rates in deep +neural networks for speech recognition,” in International Conference on +Acoustics, Speech and Signal Processing. IEEE, 2013, pp. 6724–6728. [Online]. +Available: http://ieeexplore.ieee.org/document/6638963/?arnumber=6638963 +[SIV16] C. Szegedy, S. Ioffe, and V. Vanhoucke, “Inception-v4, inception-resnet and the +impact of residual connections on learning,” arXiv preprint arXiv:1602.07261, +Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.07261 +[SKP15] F. Schroff, D. Kalenichenko, and J. Philbin, “Facenet: A unified embedding +for face recognition and clustering,” in Conference on Computer Vision +and Pattern Recognition (CVPR). IEEE, Mar. 2015, pp. 815–823. [Online]. +Available: https://arxiv.org/abs/1503.03832 +[SL11] P. Sermanet and Y. LeCun, “Traffic sign recognition with multi-scale +convolutional networks,” in International Joint Conference on Neural +Networks (IJCNN), Jul. 2011, pp. 2809–2813. [Online]. Available: +http://ieeexplore.ieee.org/document/6033589/ +[SLJ+15] C. Szegedy, W. Liu et al., “Going deeper with convolutions,” in Conference +on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. 2015, pp. +1–9. [Online]. Available: https://arxiv.org/abs/1409.4842 +[SM02] K. O. Stanley and R. Miikkulainen, “Evolving neural networks through + +augmenting topologies,” Evolutionary computation, vol. 10, no. 2, pp. 99–127, +2002. [Online]. Available: http://www.mitpressjournals.org/doi/abs/10.1162/ +106365602320169811 +[SMG13] A. M. Saxe, J. L. McClelland, and S. Ganguli, “Exact solutions to +the nonlinear dynamics of learning in deep linear neural networks,” +arXiv preprint arXiv:1312.6120, Dec. 2013. [Online]. Available: https: +//arxiv.org/abs/1312.6120 +[SMGS14] R. K. Srivastava, J. Masci et al., “Understanding locally competitive +networks,” arXiv preprint arXiv:1410.1165, Oct. 2014. [Online]. Available: +https://arxiv.org/abs/1410.1165 +[SSSI] J. Stallkamp, M. Schlipsing et al., “The german traffic sign recognition +benchmark.” [Online]. Available: http://benchmark.ini.rub.de/?section= +gtsrb&subsection=news +[SSSI12] J. Stallkamp, M. Schlipsing et al., “Man vs. computer: Benchmarking +machine learning algorithms for traffic sign recognition,” Neural Networks, +no. 0, pp. –, 2012. [Online]. Available: http://www.sciencedirect.com/science/ +article/pii/S0893608012000457 +[SV16] S. Saxena and J. Verbeek, “Convolutional neural fabrics,” arXiv preprint +arXiv:1606.02492, 2016. [Online]. Available: https://arxiv.org/abs/1606.02492 +[SVI+15] C. Szegedy, V. Vanhoucke et al., “Rethinking the inception architecture +for computer vision,” arXiv preprint arXiv:1512.00567, Dec. 2015. [Online]. +Available: https://arxiv.org/abs/1512.00567v3 +[SVZ13] K. Simonyan, A. Vedaldi, and A. Zisserman, “Deep inside convolutional +networks: Visualising image classification models and saliency maps,” +arXiv preprint arXiv:1312.6034, Dec. 2013. [Online]. Available: https: +//arxiv.org/abs/1312.6034 +[SZ14] K. Simonyan and A. Zisserman, “Very deep convolutional networks for +large-scale image recognition,” arXiv preprint arXiv:1409.1556, Sep. 2014. +[Online]. Available: https://arxiv.org/abs/1409.1556 +[SZS+13] C. Szegedy, W. Zaremba et al., “Intriguing properties of neural +networks,” arXiv preprint arXiv:1312.6199, Dec. 2013. [Online]. Available: +https://arxiv.org/abs/1312.6199v4 +[TF-16a] “MNIST for ML beginners,” Dec. 2016. [Online]. Available: https: +//www.tensorflow.org/tutorials/mnist/beginners/ + +[tf-16b] “tf.nn.dropout,” Dec. 2016. [Online]. Available: https://www.tensorflow.org/ +api_docs/python/nn/activation_functions_#dropout +[TH12] T. Tieleman and G. Hinton, “Lecture 6.5-rmsprop: Divide the gradient +by a running average of its recent magnitude,” COURSERA: Neural +Networks for Machine Learning, vol. 4, no. 2, 2012. [Online]. Available: +http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf +[Tho14a] M. Thoma, “On-line recognition of handwritten mathematical symbols,” +Karlsruhe, Germany, Nov. 2014. [Online]. Available: http://martinthoma.com/write-math +[Tho14b] M. Thoma, “The Twiddle algorithm,” Sep. 2014. [Online]. Available: +https://martin-thoma.com/twiddle/ +[Tho16] M. Thoma, “A survey of semantic segmentation,” arXiv preprint +arXiv:1602.06541, Feb. 2016. [Online]. Available: https://arxiv.org/abs/ +1602.06541 +[Tho17a] M. Thoma, “The HASYv2 dataset,” arXiv preprint arXiv:1701.08380, Jan. +2017. [Online]. Available: https://arxiv.org/abs/1701.08380 +[Tho17b] M. Thoma, “Master thesis (blog post),” Apr. 2017. [Online]. Available: +https://martin-thoma.com/msthesis +[VH13] P. Verbancsics and J. Harguess, “Generative neuroevolution for deep +learning,” arXiv preprint arXiv:1312.5355, Dec. 2013. [Online]. Available: +https://arxiv.org/abs/1312.5355 +[vLA87] P. J. M. van Laarhoven and E. H. L. Aarts, Simulated annealing. +Dordrecht: Springer Netherlands, 1987, pp. 7–15. [Online]. Available: +http://dx.doi.org/10.1007/978-94-015-7744-1_2 +[VTKP17] E. Vorontsov, C. Trabelsi et al., “On orthogonality and learning recurrent +networks with long term dependencies,” arXiv preprint arXiv:1702.00071, +Jan. 2017. [Online]. Available: https://arxiv.org/abs/1702.00071 +[WHH+89] A. Waibel, T. Hanazawa et al., “Phoneme recognition using time-delay +neural networks,” IEEE transactions on acoustics, speech, and signal +processing, vol. 37, no. 3, pp. 328–339, Aug. 1989. [Online]. Available: +http://ieeexplore.ieee.org/document/21701/ +[Wil92] R. J. Williams, “Simple statistical gradient-following algorithms for connectionist reinforcement learning,” Machine learning, vol. 8, no. 3-4, pp. 229–256, +1992. + +[WWQ13] X. Wang, L. Wang, and Y. Qiao, A Comparative Study of Encoding, Pooling +and Normalization Methods for Action Recognition. Berlin, Heidelberg: +Springer Berlin Heidelberg, Nov. 2013, no. 11, pp. 572–585. [Online]. +Available: http://dx.doi.org/10.1007/978-3-642-37431-9_44 +[WYS+15] R. Wu, S. Yan et al., “Deep image: Scaling up image recognition,” arXiv +preprint arXiv:1501.02876, vol. 7, no. 8, Jul. 2015. [Online]. Available: +https://arxiv.org/abs/1501.02876v4 +[WZZ+13] L. Wan, M. Zeiler et al., “Regularization of neural networks using dropconnect,” +in International Conference on Machine Learning (ICML), no. 30, 2013, +pp. 1058–1066. [Online]. Available: http://www.matthewzeiler.com/pubs/ +icml2013/icml2013.pdf +[XGD+16] S. Xie, R. Girshick et al., “Aggregated residual transformations for deep +neural networks,” arXiv preprint arXiv:1611.05431, Nov. 2016. [Online]. +Available: https://arxiv.org/abs/1611.05431v1 +[Xu11] W. Xu, “Towards optimal one pass large scale learning with averaged +stochastic gradient descent,” arXiv preprint arXiv:1107.2490, Jul. 2011. +[Online]. Available: https://arxiv.org/abs/1107.2490 +[XWCL15] B. Xu, N. Wang et al., “Empirical evaluation of rectified activations in +convolutional network,” arXiv preprint arXiv:1505.00853, May 2015. [Online]. +Available: https://arxiv.org/abs/1505.00853 +[XXE12] H. Xiao, H. Xiao, and C. Eckert, “Adversarial label flips attack on +support vector machines.” in ECAI, 2012, pp. 870–875. [Online]. Available: +https://www.sec.in.tum.de/assets/Uploads/ecai2.pdf +[XZY+14] T. Xiao, J. Zhang et al., “Error-driven incremental learning in deep convolutional neural network for large-scale image classification,” in International +Conference on Multimedia, no. 22. ACM, 2014, pp. 177–186. +[YL98] C. J. B. Yann LeCun, Corinna Cortes, “The MNIST database of handwritten +digits,” 1998. [Online]. Available: http://yann.lecun.com/exdb/mnist/ +[ZBH+16] C. Zhang, S. Bengio et al., “Understanding deep learning requires rethinking +generalization,” arXiv preprint arXiv:1611.03530, Nov. 2016. [Online]. +Available: https://arxiv.org/abs/1611.03530 +[ZCZL16] S. Zhai, Y. Cheng et al., “Doubly convolutional neural networks,” in +Advances in Neural Information Processing Systems 29 (NIPS), D. D. Lee, +M. Sugiyama et al., Eds. Curran Associates, Inc., Oct. 2016, pp. 1082–1090. +[Online]. Available: http://papers.nips.cc/paper/6340-doubly-convolutionalneural-networks.pdf + +[ZDGD14] N. Zhang, J. Donahue et al., “Part-based R-CNNs for fine-grained category +detection,” in European Conference on Computer Vision (ECCV). Springer, +Jul. 2014, pp. 834–849. [Online]. Available: https://arxiv.org/abs/1407.3867 +[Zei12] M. D. Zeiler, “Adadelta: an adaptive learning rate method,” arXiv preprint +arXiv:1212.5701, Dec. 2012. [Online]. Available: https://arxiv.org/abs/ +1212.5701v1 +[ZF13] M. D. Zeiler and R. Fergus, “Stochastic pooling for regularization of deep +convolutional neural networks,” arXiv preprint arXiv:1301.3557, Jan. 2013. +[Online]. Available: https://arxiv.org/abs/1301.3557v1 +[ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional +networks,” in European Conference on Computer Vision (ECCV). Springer, +Nov. 2014, pp. 818–833. [Online]. Available: https://arxiv.org/abs/1311.2901 +[Zho16] B. Zhou, “Places2 download,” 2016. [Online]. Available: http:// +places2.csail.mit.edu/download.html +[ZK16] S. Zagoruyko and N. Komodakis, “Wide residual networks,” arXiv +preprint arXiv:1605.07146, May 2016. [Online]. Available: https: +//arxiv.org/abs/1605.07146 +[ZKL+15] B. Zhou, A. Khosla et al., “Learning deep features for discriminative +localization,” arXiv preprint arXiv:1512.04150, Dec. 2015. [Online]. Available: +https://arxiv.org/abs/1512.04150 +[ZKL+16] B. Zhou, A. Khosla et al., “Places: An image database for deep scene +understanding,” arXiv preprint arXiv:1610.02055, Oct. 2016. [Online]. +Available: https://arxiv.org/abs/1610.02055 +[ZL16] B. Zoph and Q. V. Le, “Neural architecture search with reinforcement +learning,” arXiv preprint arXiv:1611.01578, Nov. 2016. [Online]. Available: +https://arxiv.org/abs/1611.01578 +[ZMGL15] J. Zhao, M. Mathieu et al., “Stacked what-where auto-encoders,” +arXiv preprint arXiv:1506.02351, Jun. 2015. [Online]. Available: https: +//arxiv.org/abs/1506.02351v1 +[ZYL+15] H. Zheng, Z. Yang et al., “Improving deep neural networks using softplus +units,” in International Joint Conference on Neural Networks (IJCNN), Jul. +2015, pp. 1–4. + +I. Glossary +ANN artificial neural network. 4 +ASO Automatic Structure Optimization. 29 +CMO Confusion Matrix Ordering. 2, 35, 36, 51, 52, 71 +CNN Convolutional Neural Network. 1, 3–6, 11, 13, 15, 21–23, 28, 29, 31, 33, 37, 54, 60, +71, 72, 79, 82–84, 88–91 +ELU Exponential Linear Unit. 38, 57, 60–64, 72, 73, 77, 78, 84 +ES early stopping. 68 +FC Fully Connected. 91, 93 +FLOP floating point operation. 27, 29, 87, 88, 90, 91, 93 +GA genetic algorithm. 30 +GAN Generative Adverserial Network. 80 +GPU graphics processing unit. 37, 40, 59, 63, 67, 88, 91 +HSV hue, saturation, value. 79 +LCN Local Contrast Normalization. 91 +LDA linear discriminant analysis. 79 +LReLU leaky rectified linear unit. 63, 72, 77, 78, 84 +MLP multilayer perceptron. 3–6, 28 +NAG Nesterov Accellerated Momentum. 83 +NEAT NeuroEvolution of Augmenting Topologies. 83 +OBD Optimal Brain Damage. 29 + +PCA principal component analysis. 79 +PReLU parametrized rectified linear unit. 60, 61, 63, 64, 72, 77, 78, 84 +ReLU rectified linear unit. 5, 13, 60, 61, 63, 64, 72, 77, 78, 84 +SGD stochastic gradient descent. 5, 30, 45, 46, 82 +ZCA Zero Components Analysis. 79 diff --git a/read/results/pdfium/2201.00021.txt b/read/results/pdfium/2201.00021.txt index 8ef5766..be0f404 100644 --- a/read/results/pdfium/2201.00021.txt +++ b/read/results/pdfium/2201.00021.txt @@ -1,808 +1,724 @@ -Astronomy & Astrophysics manuscript no. mainArxiv ©ESO 2022 -April 12, 2022 -Discovery of ammonia (9,6) masers in two high-mass star-forming -regions -Y. T. Yan (闫耀庭) -1,?, C. Henkel1, 2, 3 -, K. M. Menten1 -, Y. Gong (龚龑) -1 -, J. Ott4 -, T. L. Wilson1 -, A. Wootten4 -, A. -Brunthaler1 -, J. S. Zhang (张江水) -5 -, J. L. Chen (陈家梁) -5 -, and K. Yang (杨楷) -6, 7 -1 Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, 53121 Bonn, Germany -e-mail: yyan@mpifr-bonn.mpg.de -2 Astronomy Department, Faculty of Science, King Abdulaziz University, P. O. Box 80203, Jeddah 21589, Saudi Arabia -3 Xinjiang Astronomical Observatory, Chinese Academy of Sciences, 830011 Urumqi, PR China -4 National Radio Astronomy Observatory, 520 Edgemont Road, Charlottesville, VA 22903-2475, USA -5 Center for Astrophysics, Guangzhou University, 510006 Guangzhou, People’s Republic of China -6 School of Astronomy and Space Science, Nanjing University, 163 Xianlin Avenue, Nanjing 210023, People’s Republic of China -7 Key Laboratory of Modern Astronomy and Astrophysics (Nanjing University), Ministry of Education, Nanjing 210023, People’s -Republic of China -Received 13 December 2021 / Accepted 30 December 2021 -ABSTRACT -Context. Molecular maser lines are signposts of high-mass star formation, probing the excitation and kinematics of very compact -regions in the close environment of young stellar objects and providing useful targets for trigonometric parallax measurements. -Aims. Only a few NH3 (9,6) masers are known so far, and their origin is still poorly understood. Here we aim to find new NH3 (9,6) -masers to provide a better observational basis for studying their role in high-mass star-forming regions. -Methods. We carried out NH3 (9,6) observations toward Cepheus A and G34.26+0.15 with the Effelsberg 100-meter telescope (beam -size 4900) and the Karl G. Jansky Very Large Array (JVLA; beam size about 100 -.2). -Results. We discovered new NH3 (9,6) masers in Cep A and G34.26+0.15, which increases the number of known high-mass star￾forming regions hosting NH3 (9,6) masers from five to seven. Long-term monitoring (20 months) at Effelsberg shows that the intensity -of the (9,6) maser in G34.26+0.15 is decreasing, while the Cep A maser remains stable. Compared to the Effelsberg data and assuming -linear variations between the epochs of observation, the JVLA data indicate no missing flux. This suggests that the NH3 (9,6) emission -arises from single compact emission regions that are not resolved by the interferometric measurements. As JVLA imaging shows, the -NH3 (9,6) emission in Cep A originates from a sub-arcsecond-sized region, slightly to the west (000 -.28 ± 0 -00 -.10) of the peak position -of the 1.36 cm continuum object, HW2. In G34.26+0.15, three NH3 (9,6) maser spots are observed: one is close to the head of the -cometary ultracompact H ii region C, and the other two are emitted from a compact region to the west of the hypercompact H ii region -A. -Conclusions. The newly found (9,6) masers appear to be related to outflows. The higher angular resolution of JVLA and very long -baseline interferometry observations are needed to provide more accurate positions and constraints for pumping scenarios. -Key words. Masers – ISM: clouds – ISM: individual objects: Cep A, G34.26+0.15 – ISM: H ii regions – Radio lines: ISM -1. Introduction -Since its discovery more than five decades ago (Cheung et al. -1968), ammonia (NH3) has been a most valuable molecule for -investigating the physical properties of molecular clouds (e.g., -Ho & Townes 1983). While thermally excited transitions in -the centimeter-wavelength inversion transitions of ammonia are -regarded as a reliable thermometer of molecular clouds (e.g., -Walmsley & Ungerechts 1983; Danby et al. 1988), ammonia -masers have attracted attention since the first detection of maser -action in the (J, K) = (3,3) metastable (J = K) line toward the -massive star-forming region W33 (Wilson et al. 1982). Subse￾quent observations have led to the detection of new metastable -ammonia masers, including 15NH3 (3,3) (Mauersberger et al. -1986), NH3 (1,1) (Gaume et al. 1996), NH3 (2,2) (Mills et al. -2018), NH3 (5,5) (Cesaroni et al. 1992), NH3 (6,6) (Beuther -? Member of the International Max Planck Research School (IM￾PRS) for Astronomy and Astrophysics at the universities of Bonn and -Cologne. -et al. 2007), NH3 (7,7), NH3 (9,9), and NH3 (12,12) (Henkel -et al. 2013). These have led to the discovery of metastable maser -lines in 22 different regions (Mauersberger et al. 1986, 1987; -Wilson & Henkel 1988; Wilson et al. 1990; Pratap et al. 1991; -Cesaroni et al. 1992; Wilson & Schilke 1993; Mangum & Woot￾ten 1994; Kraemer & Jackson 1995; Zhang & Ho 1995; Zhang -et al. 1999; Walsh et al. 2007; Hunter et al. 2008; Galván-Madrid -et al. 2009; Brogan et al. 2011; Urquhart et al. 2011; Walsh -et al. 2011; Wang et al. 2012; Henkel et al. 2013; Hoffman & -Joyce 2014; McEwen et al. 2016; Mills et al. 2018; Hogge et al. -2019; Mei et al. 2020; Towner et al. 2021). Compared with the -metastable ammonia masers, detected non-metastable (J > K) -ammonia maser transitions are more numerous. The first highly -excited non-metastable ammonia maser was detected by Mad￾den et al. (1986) in the (J, K) = (9,6) and (6,3) lines. Thereafter, -many other NH3 non-metastable inversion transition lines have -been identified as masers, including the (5,3), (5,4), (6,1), (6,2), -(6,4), (6,5), (7,3), (7,4), (7,5) (7,6), (8,3), (8,4), (8,5), (8,6), (9,3), -(9,4), (9,5), (9,7), (9,8), (10,7), (10,8), (10,9), and (11,9) transi￾Article number, page 1 of 10 +Astronomy & Astrophysics manuscript no. mainArxiv ©ESO 2022 +April 12, 2022 +Discovery of ammonia (9,6) masers in two high-mass star-forming +regions +Y. T. Yan (闫耀庭) +1,?, C. Henkel1, 2, 3 +, K. M. Menten1, Y. Gong (龚龑) +1 +, J. Ott4, T. L. Wilson1, A. Wootten4, A. +Brunthaler1, J. S. Zhang (张江水) +5 +, J. L. Chen (陈家梁) +5 +, and K. Yang (杨楷) +6, 7 +1 Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, 53121 Bonn, Germany +e-mail: yyan@mpifr-bonn.mpg.de +2 Astronomy Department, Faculty of Science, King Abdulaziz University, P. O. Box 80203, Jeddah 21589, Saudi Arabia +3 Xinjiang Astronomical Observatory, Chinese Academy of Sciences, 830011 Urumqi, PR China +4 National Radio Astronomy Observatory, 520 Edgemont Road, Charlottesville, VA 22903-2475, USA +5 Center for Astrophysics, Guangzhou University, 510006 Guangzhou, People’s Republic of China +6 School of Astronomy and Space Science, Nanjing University, 163 Xianlin Avenue, Nanjing 210023, People’s Republic of China +7 Key Laboratory of Modern Astronomy and Astrophysics (Nanjing University), Ministry of Education, Nanjing 210023, People’s +Republic of China +Received 13 December 2021 / Accepted 30 December 2021 +ABSTRACT +Context. Molecular maser lines are signposts of high-mass star formation, probing the excitation and kinematics of very compact +regions in the close environment of young stellar objects and providing useful targets for trigonometric parallax measurements. +Aims. Only a few NH3 (9,6) masers are known so far, and their origin is still poorly understood. Here we aim to find new NH3 (9,6) +masers to provide a better observational basis for studying their role in high-mass star-forming regions. +Methods. We carried out NH3 (9,6) observations toward Cepheus A and G34.26+0.15 with the Effelsberg 100-meter telescope (beam +size 4900) and the Karl G. Jansky Very Large Array (JVLA; beam size about 100.2). +Results. We discovered new NH3 (9,6) masers in Cep A and G34.26+0.15, which increases the number of known high-mass starforming regions hosting NH3 (9,6) masers from five to seven. Long-term monitoring (20 months) at Effelsberg shows that the intensity +of the (9,6) maser in G34.26+0.15 is decreasing, while the Cep A maser remains stable. Compared to the Effelsberg data and assuming +linear variations between the epochs of observation, the JVLA data indicate no missing flux. This suggests that the NH3 (9,6) emission +arises from single compact emission regions that are not resolved by the interferometric measurements. As JVLA imaging shows, the +NH3 (9,6) emission in Cep A originates from a sub-arcsecond-sized region, slightly to the west (000.28 ± 0 +00 +.10) of the peak position +of the 1.36 cm continuum object, HW2. In G34.26+0.15, three NH3 (9,6) maser spots are observed: one is close to the head of the +cometary ultracompact H ii region C, and the other two are emitted from a compact region to the west of the hypercompact H ii region +A. +Conclusions. The newly found (9,6) masers appear to be related to outflows. The higher angular resolution of JVLA and very long +baseline interferometry observations are needed to provide more accurate positions and constraints for pumping scenarios. +Key words. Masers – ISM: clouds – ISM: individual objects: Cep A, G34.26+0.15 – ISM: H ii regions – Radio lines: ISM +1. Introduction +Since its discovery more than five decades ago (Cheung et al. +1968), ammonia (NH3) has been a most valuable molecule for +investigating the physical properties of molecular clouds (e.g., +Ho & Townes 1983). While thermally excited transitions in +the centimeter-wavelength inversion transitions of ammonia are +regarded as a reliable thermometer of molecular clouds (e.g., +Walmsley & Ungerechts 1983; Danby et al. 1988), ammonia +masers have attracted attention since the first detection of maser +action in the (J, K) = (3,3) metastable (J = K) line toward the +massive star-forming region W33 (Wilson et al. 1982). Subsequent observations have led to the detection of new metastable +ammonia masers, including 15NH3 (3,3) (Mauersberger et al. +1986), NH3 (1,1) (Gaume et al. 1996), NH3 (2,2) (Mills et al. +2018), NH3 (5,5) (Cesaroni et al. 1992), NH3 (6,6) (Beuther +? Member of the International Max Planck Research School (IMPRS) for Astronomy and Astrophysics at the universities of Bonn and +Cologne. +et al. 2007), NH3 (7,7), NH3 (9,9), and NH3 (12,12) (Henkel +et al. 2013). These have led to the discovery of metastable maser +lines in 22 different regions (Mauersberger et al. 1986, 1987; +Wilson & Henkel 1988; Wilson et al. 1990; Pratap et al. 1991; +Cesaroni et al. 1992; Wilson & Schilke 1993; Mangum & Wootten 1994; Kraemer & Jackson 1995; Zhang & Ho 1995; Zhang +et al. 1999; Walsh et al. 2007; Hunter et al. 2008; Galván-Madrid +et al. 2009; Brogan et al. 2011; Urquhart et al. 2011; Walsh +et al. 2011; Wang et al. 2012; Henkel et al. 2013; Hoffman & +Joyce 2014; McEwen et al. 2016; Mills et al. 2018; Hogge et al. +2019; Mei et al. 2020; Towner et al. 2021). Compared with the +metastable ammonia masers, detected non-metastable (J > K) +ammonia maser transitions are more numerous. The first highly +excited non-metastable ammonia maser was detected by Madden et al. (1986) in the (J, K) = (9,6) and (6,3) lines. Thereafter, +many other NH3 non-metastable inversion transition lines have +been identified as masers, including the (5,3), (5,4), (6,1), (6,2), +(6,4), (6,5), (7,3), (7,4), (7,5) (7,6), (8,3), (8,4), (8,5), (8,6), (9,3), +(9,4), (9,5), (9,7), (9,8), (10,7), (10,8), (10,9), and (11,9) transiArticle number, page 1 of 10 arXiv:2201.00021v3 [astro-ph.GA] 9 Apr 2022 -A&A proofs: manuscript no. mainArxiv -tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007; -Henkel et al. 2013; Mei et al. 2020). Except for the NH3 (3,3) -masers proposed to be associated with four supernova remnants -(McEwen et al. 2016), almost all the other ammonia masers are -detected in high-mass star-forming regions (HMSFRs). How￾ever, while many HMSFRs host water (H2O), hydroxyl (OH), -or methanol (CH3OH) masers, ammonia masers are quite rare -in these sources, and the role that the environment of a young -high-mass star plays in their excitation remains unclear. There￾fore, dedicated searches for ammonia masers in HMSFRs are -indispensable in regard to their overall incidence and associa￾tion with different environments, which can provide additional -constraints on the pumping mechanism of ammonia masers. -So far, a total of 32 NH3 inversion transitions (∆K = 0 -and ∆J = 0) have been identified as masers. Among these, and -despite arising from energy levels as high as 1090 K above -the ground state, the NH3 (9,6) maser stands out as being the -strongest and most variable one in W51-IRS2 (e.g., Henkel et al. -2013). Maser emission in this line has only been detected in five -HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al. -1986), and Sgr B2(N) (Mei et al. 2020). The NH3 (3,3) masers -are thought to be collisionally excited (e.g., Flower et al. 1990; -Mangum & Wootten 1994); in contrast, the pumping mecha￾nism of NH3 (9,6) masers is less well constrained (Madden et al. -1986). Brown & Cragg (1991) have studied ortho-ammonia and -found that it could possibly pump the (6,3) inversion line, but -they did not extend their model to the (9,6) transition due to the -fact that collision rates are only known for inversion levels up to -J = 6 (e.g., Danby et al. 1988). -NH3 (9,6) masers are found to be strongly variable, similar to -H2O masers (Madden et al. 1986; Pratap et al. 1991; Henkel et al. -2013). In W51-IRS2, Henkel et al. (2013) found that the (9,6) -line showed significant variation in line shape within a time in￾terval of only two days. Mapping of the (9,6) maser toward W51 -with very long baseline interferometry (VLBI) suggests that the -masers are closer to the H2O masers than to the OH masers or -to ultracompact (UC) H ii regions (Pratap et al. 1991). While -Henkel et al. (2013) and Goddi et al. (2015) showed that the SiO -and NH3 masers in W51-IRS2 are very close to each other, their -positions, differing by 000 -.065 (∼0.015 pc), do not fully coincide. -In this paper we report the discovery of NH3 (9,6) masers -in two HMSFRs, Cepheus A and G34.26+0.15. This increases -the number of (9,6) maser detections in our Galaxy from five -to seven. In Sect. 2 observations with the Effelsberg 100-meter -telescope and the Karl G. Jansky Very Large Array (JVLA) are -described. Results are presented in Sect. 3. The morphology of -Cep A and G34.26+0.15 as well as a comparison of the emission -distributions of different tracers with the NH3 (9,6) masers are -presented in Sect. 4. Our main results are summarized in Sect. 5. -2. Observations and data reduction -2.1. Effelsberg observations and data reduction -The NH3 (9,6) line was observed toward Cep A and -G34.26+0.15 with the 100-meter Effelsberg telescope1 -in 2020 -January and 2021 February, July, and August. The S14mm dou￾ble beam secondary focus receiver was employed. The full width -at half maximum (FWHM) beam size is 4900 at 18.5 GHz, the -frequency of the target line. The observations were performed in -position switching mode, and the off position was 100 -in azimuth -1 Based on observations with the 100-meter telescope of the MPIfR -(Max-Planck-Institut für Radioastronomie) at Effelsberg. -away from the source. For observations made before 2021 Au￾gust, we used a spectrometer that covered 2 GHz wide backends -with a channel width of 38.1 kHz, corresponding to ∼0.62 km s−1 -at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar -1975). A high spectral resolution backend with 65536 channels -and a bandwidth of 300 MHz was employed in 2021 August, -providing a channel width of 0.07 km s−1 -at 18.5 GHz. Point￾ing was checked every 2 hours using 3C 286 or NGC 7027. -Focus calibrations were done at the beginning of the observa￾tions and during sunset and sunrise toward the abovementioned -pointing sources. The system temperatures were 100–130 K on -a main-beam brightness temperature, TMB, scale. This flux den￾sity was calibrated assuming a TMB/S ratio of 1.95 K/Jy, derived -from continuum cross scans of NGC 7027 (the flux density was -adopted from Ott et al. 1994). Calibration uncertainties are esti￾mated to be ∼ 10%. -We used the GILDAS/CLASS2 package (Pety 2005) to re￾duce the spectral line data. A first-order polynomial was sub￾tracted from each spectrum for baseline removal. -2.2. JVLA observations and data reduction -Observations of the NH3 (9,6) line toward Cep A and -G34.26+0.15 were obtained on 2021 July 13 with the JVLA -of the National Radio Astronomy Observatory3 -(NRAO) in the -C configuration (project ID: 21A-157, PI: Yaoting Yan). We -employed 27 antennas for the observations. The primary beam -of the JVLA antennas is 15000 (FWHM) at 18.5 GHz. A mix￾ture of mixed three-bit and eight-bit samplers were used to per￾form the observations. For the NH3 (9,6) line observations, we -used one subband with the eight-bit sampler covering a band￾width of 16 MHz with full polarization, eight recirculations, and -four baseline board pairs (BIBPs) to provide a velocity range -of 260 km s−1 with a channel spacing of 0.13 km s−1 -. Two -additional subbands of bandwidth 16 MHz were used to cover -the NH3 (8,5) and (10,7) lines. The three-bit sampler with 32 -subbands, each with a bandwidth of 128 MHz to cover a to￾tal range of 4 GHz between 20–24 GHz, was used to mea￾sure the continuum emission. 3C 286 with a flux density of -2.89 Jy at 18.5 GHz (Perley & Butler 2013) was used as a -calibrator for pointing, flux density, bandpass, and polarization. -J2230+6946 and J1851+0035 served as gain calibrators for Cep -A and G34.26+0.15, respectively. The on-source times were -4 -m30s -and 4m50s -toward Cep A and G34.26+0.15, respectively. -Data from two antennas were lost due to technical is￾sues. The data from the remaining 25 antennas were reduced -through the Common Astronomy Software Applications pack￾age (CASA4 -; McMullin et al. 2007). We calibrated the data with -the JVLA CASA calibration pipeline using CASA 6.1.2. The -results were obtained after flagging data that contain artifacts. -We inspected the phase, amplitude, and bandpass variations of -the calibrated visibility data to search for additional artifacts be￾fore imaging. Then, the uvcontsub task in CASA was used to -separate the calibrated visibilities into two parts, one with line￾only data and the other with the continuum data. The tclean task -with a cell size of 000 -.2 and Briggs weighting with robust=0 was -used to produce the images of spectral line and continuum emis￾sion. The synthesized beams for NH3 (9,6) are 100 -.47 × 0 -00 -.99 at -2 https://www.iram.fr/IRAMFR/GILDAS/ -3 The National Radio Astronomy Observatory is a facility of the Na￾tional Science Foundation operated under cooperative agreement by As￾sociated Universities, Inc. -4 https://casa.nrao.edu/ +A&A proofs: manuscript no. mainArxiv +tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007; +Henkel et al. 2013; Mei et al. 2020). Except for the NH3 (3,3) +masers proposed to be associated with four supernova remnants +(McEwen et al. 2016), almost all the other ammonia masers are +detected in high-mass star-forming regions (HMSFRs). However, while many HMSFRs host water (H2O), hydroxyl (OH), +or methanol (CH3OH) masers, ammonia masers are quite rare +in these sources, and the role that the environment of a young +high-mass star plays in their excitation remains unclear. Therefore, dedicated searches for ammonia masers in HMSFRs are +indispensable in regard to their overall incidence and association with different environments, which can provide additional +constraints on the pumping mechanism of ammonia masers. +So far, a total of 32 NH3 inversion transitions (∆K = 0 +and ∆J = 0) have been identified as masers. Among these, and +despite arising from energy levels as high as 1090 K above +the ground state, the NH3 (9,6) maser stands out as being the +strongest and most variable one in W51-IRS2 (e.g., Henkel et al. +2013). Maser emission in this line has only been detected in five +HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al. +1986), and Sgr B2(N) (Mei et al. 2020). The NH3 (3,3) masers +are thought to be collisionally excited (e.g., Flower et al. 1990; +Mangum & Wootten 1994); in contrast, the pumping mechanism of NH3 (9,6) masers is less well constrained (Madden et al. +1986). Brown & Cragg (1991) have studied ortho-ammonia and +found that it could possibly pump the (6,3) inversion line, but +they did not extend their model to the (9,6) transition due to the +fact that collision rates are only known for inversion levels up to +J = 6 (e.g., Danby et al. 1988). +NH3 (9,6) masers are found to be strongly variable, similar to +H2O masers (Madden et al. 1986; Pratap et al. 1991; Henkel et al. +2013). In W51-IRS2, Henkel et al. (2013) found that the (9,6) +line showed significant variation in line shape within a time interval of only two days. Mapping of the (9,6) maser toward W51 +with very long baseline interferometry (VLBI) suggests that the +masers are closer to the H2O masers than to the OH masers or +to ultracompact (UC) H ii regions (Pratap et al. 1991). While +Henkel et al. (2013) and Goddi et al. (2015) showed that the SiO +and NH3 masers in W51-IRS2 are very close to each other, their +positions, differing by 000.065 (∼0.015 pc), do not fully coincide. +In this paper we report the discovery of NH3 (9,6) masers +in two HMSFRs, Cepheus A and G34.26+0.15. This increases +the number of (9,6) maser detections in our Galaxy from five +to seven. In Sect. 2 observations with the Effelsberg 100-meter +telescope and the Karl G. Jansky Very Large Array (JVLA) are +described. Results are presented in Sect. 3. The morphology of +Cep A and G34.26+0.15 as well as a comparison of the emission +distributions of different tracers with the NH3 (9,6) masers are +presented in Sect. 4. Our main results are summarized in Sect. 5. +2. Observations and data reduction +2.1. Effelsberg observations and data reduction +The NH3 (9,6) line was observed toward Cep A and +G34.26+0.15 with the 100-meter Effelsberg telescope1in 2020 +January and 2021 February, July, and August. The S14mm double beam secondary focus receiver was employed. The full width +at half maximum (FWHM) beam size is 4900 at 18.5 GHz, the +frequency of the target line. The observations were performed in +position switching mode, and the off position was 100in azimuth +1 Based on observations with the 100-meter telescope of the MPIfR +(Max-Planck-Institut für Radioastronomie) at Effelsberg. +away from the source. For observations made before 2021 August, we used a spectrometer that covered 2 GHz wide backends +with a channel width of 38.1 kHz, corresponding to ∼0.62 km s−1 +at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar +1975). A high spectral resolution backend with 65536 channels +and a bandwidth of 300 MHz was employed in 2021 August, +providing a channel width of 0.07 km s−1at 18.5 GHz. Pointing was checked every 2 hours using 3C 286 or NGC 7027. +Focus calibrations were done at the beginning of the observations and during sunset and sunrise toward the abovementioned +pointing sources. The system temperatures were 100–130 K on +a main-beam brightness temperature, TMB, scale. This flux density was calibrated assuming a TMB/S ratio of 1.95 K/Jy, derived +from continuum cross scans of NGC 7027 (the flux density was +adopted from Ott et al. 1994). Calibration uncertainties are estimated to be ∼ 10%. +We used the GILDAS/CLASS2 package (Pety 2005) to reduce the spectral line data. A first-order polynomial was subtracted from each spectrum for baseline removal. +2.2. JVLA observations and data reduction +Observations of the NH3 (9,6) line toward Cep A and +G34.26+0.15 were obtained on 2021 July 13 with the JVLA +of the National Radio Astronomy Observatory3 +(NRAO) in the +C configuration (project ID: 21A-157, PI: Yaoting Yan). We +employed 27 antennas for the observations. The primary beam +of the JVLA antennas is 15000 (FWHM) at 18.5 GHz. A mixture of mixed three-bit and eight-bit samplers were used to perform the observations. For the NH3 (9,6) line observations, we +used one subband with the eight-bit sampler covering a bandwidth of 16 MHz with full polarization, eight recirculations, and +four baseline board pairs (BIBPs) to provide a velocity range +of 260 km s−1 with a channel spacing of 0.13 km s−1. Two +additional subbands of bandwidth 16 MHz were used to cover +the NH3 (8,5) and (10,7) lines. The three-bit sampler with 32 +subbands, each with a bandwidth of 128 MHz to cover a total range of 4 GHz between 20–24 GHz, was used to measure the continuum emission. 3C 286 with a flux density of +2.89 Jy at 18.5 GHz (Perley & Butler 2013) was used as a +calibrator for pointing, flux density, bandpass, and polarization. +J2230+6946 and J1851+0035 served as gain calibrators for Cep +A and G34.26+0.15, respectively. The on-source times were +4 +m30s +and 4m50stoward Cep A and G34.26+0.15, respectively. +Data from two antennas were lost due to technical issues. The data from the remaining 25 antennas were reduced +through the Common Astronomy Software Applications package (CASA4 +; McMullin et al. 2007). We calibrated the data with +the JVLA CASA calibration pipeline using CASA 6.1.2. The +results were obtained after flagging data that contain artifacts. +We inspected the phase, amplitude, and bandpass variations of +the calibrated visibility data to search for additional artifacts before imaging. Then, the uvcontsub task in CASA was used to +separate the calibrated visibilities into two parts, one with lineonly data and the other with the continuum data. The tclean task +with a cell size of 000.2 and Briggs weighting with robust=0 was +used to produce the images of spectral line and continuum emission. The synthesized beams for NH3 (9,6) are 100 +.47 × 0 +00 +.99 at +2 https://www.iram.fr/IRAMFR/GILDAS/ +3 The National Radio Astronomy Observatory is a facility of the National Science Foundation operated under cooperative agreement by Associated Universities, Inc. +4 https://casa.nrao.edu/ Article number, page 2 of 10 -Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions -P.A. = 58◦ -.79 and 100 -.33 × 1 -00 -.06 at P.A. = 5 -◦ -.36 toward Cep A -and G34.26+0.15, respectively. For the 1.36 cm (20–24 GHz) -continuum emission, the synthesized beams are 100 -.08 × 0 -00 -.67 at -P.A. = 60◦ -.64 and 000 -.95 × 0 -00 -.71 at P.A. = 5 -◦ -.91 toward Cep A and -G34.26+0.15. The typical absolute astrometric accuracy of the -JVLA is ∼10% of the synthesized beam5 -. The flux density scale -calibration accuracy is estimated to be within 15%. -Fig. 1. Spectra from NH3 (9,6) transition lines. Left: Top to bottom: -Time sequence of NH3 (9,6) profiles observed toward Cep A with the -Effelsberg 100-meter telescope (after subtracting a first-order polyno￾mial baseline). A JVLA spectrum is interspersed. The systemic veloc￾ity from CO and HCO+ -lines is indicated by a dashed blue line. The -two dashed red lines at LSR velocities, VLSR, of −0.90 km s−1 -and -−0.28 km s−1 -indicate the central velocities of the two major compo￾nents. Right: NH3 (9,6) spectra from G34.26+0.15. The systemic ve￾locity from C17O is indicated by a dashed blue line. The three dashed -red lines at VLSR = 54.1 km s−1 -, 55.8 km s−1 -, and 62.5 km s−1 -show the -central velocities of the main ammonia emission components. -3. Results -The spectra from different epochs are shown in Figs. 1 and 2. -Toward Cep A, the NH3 (9,6) line profile from the JVLA is ex￾tracted from an Effelsberg-beam-sized region (FWHM, 4900). In -the case of G34.26+0.15, the NH3 spectrum is below the noise -level if a similarly large beam size is used. Therefore, we de￾rived the JVLA NH3 (9,6) spectrum from a smaller region, with -radius 300 -.5, that contains all the detected NH3 (9,6) emission. In -Table A.1, the observed NH3 (9,6) line parameters obtained by -Gaussian fits are listed. NH3 (8,5) and (10,7) emission is not de￾tected by our JVLA observations. The 3σ upper limits for the -NH3 (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1 -5 https://science.nrao.edu/facilities/vla/docs/manuals/oss/performance- -/positional-accuracy -Fig. 2. NH3 (9,6) line profiles emphasizing, in contrast to the spectra -in Fig. 1, weaker features. Cep A spectra are presented on the left, -G34.26+0.15 spectra on the right. The two dashed red lines in the left -panels indicate VLSR = 1.48 km s−1 -and 2.89 km s−1 -. In the right panels, -the two dashed red lines refer to 54.1 km s−1 -and 55.8 km s−1 -. -and 27.2 mJy beam−1 -, respectively. In G34.26+0.15, the corre￾sponding 3σ upper limits for the NH3 (8,5) and (10,7) lines are -22.1 mJy beam−1 -and 30.4 mJy beam−1 -. For both sources, sen￾sitivity levels refer to emission from a single channel of width -0.13 km s−1 -. Taking the larger measured line widths of the (9,6) -maser features (see Table A.1), these limits could be further low￾ered by factors of two to four. -3.1. Centimeter-continuum emission -The 1.36 cm continuum, derived from our JVLA observations, -toward Cep A is presented in Fig. 3. Six published compact -sources, HW2, HW3a, HW3b, HW3c, HW3d, and HW9, are de￾tected in our observations. Figure 4 shows the 1.36 cm contin￾uum in G34.26+0.15. Three main continuum objects, A, B, and -C, are detected. By using the imfit task in CASA, we measured -the continuum flux at 1.36 cm toward individual compact source -components in Cep A and G34.26+0.15. Details are given in Ta￾ble A.2. -3.2. NH3 (9,6) emission in Cep A -In 2020 January, NH3 (9,6) emission with a peak flux density of -0.67 ± 0.07 Jy was first detected with the Effelsberg 100-meter -telescope in Cep A. Emission with similar strength was also de￾tected in 2021 February and August with the same telescope. -Higher velocity resolution data, which were obtained in 2021 -August, again with the Effelsberg 100-meter telescope, show -that the (9,6) emission contains two main velocity components. -Overall, the flux densities of the NH3 (9,6) emission line mea￾sured with the Effelsberg 100-meter telescope are, within the cal￾ibration uncertainties, unchanged. This is valid for the time inter￾val between 2020 January and August 2021, when we smoothed -the obtained spectra to the same velocity resolution. We also -see another two weaker components. Figure 2 emphasizes these -weak components with an expanded flux density scale. -Higher angular resolution data from the JVLA pinpoint the -position of the NH3 (9,6) emission with an offset of (−0 -00 -.28, -0 -00 -.02) relative to the 1.36 cm continuum peak of Cep A HW2 -(Fig. 3). The deconvolved NH3 (9,6) component size is (000 -.29 ± -0 -00 -.15) × (000 -.19 ± 0 -00 -.14) at P.A. = 174◦ -, derived with the imfit task -in CASA, and can thus be considered, accounting for the uncer￾tainties, as unresolved. +Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +P.A. = 58◦.79 and 100.33 × 1 +00 +.06 at P.A. = 5 +◦ +.36 toward Cep A +and G34.26+0.15, respectively. For the 1.36 cm (20–24 GHz) +continuum emission, the synthesized beams are 100.08 × 0 +00 +.67 at +P.A. = 60◦.64 and 000.95 × 0 +00 +.71 at P.A. = 5 +◦ +.91 toward Cep A and +G34.26+0.15. The typical absolute astrometric accuracy of the +JVLA is ∼10% of the synthesized beam5. The flux density scale +calibration accuracy is estimated to be within 15%. +Fig. 1. Spectra from NH3 (9,6) transition lines. Left: Top to bottom: +Time sequence of NH3 (9,6) profiles observed toward Cep A with the +Effelsberg 100-meter telescope (after subtracting a first-order polynomial baseline). A JVLA spectrum is interspersed. The systemic velocity from CO and HCO+ +lines is indicated by a dashed blue line. The +two dashed red lines at LSR velocities, VLSR, of −0.90 km s−1and +−0.28 km s−1indicate the central velocities of the two major components. Right: NH3 (9,6) spectra from G34.26+0.15. The systemic velocity from C17O is indicated by a dashed blue line. The three dashed +red lines at VLSR = 54.1 km s−1, 55.8 km s−1, and 62.5 km s−1show the +central velocities of the main ammonia emission components. +3. Results +The spectra from different epochs are shown in Figs. 1 and 2. +Toward Cep A, the NH3 (9,6) line profile from the JVLA is extracted from an Effelsberg-beam-sized region (FWHM, 4900). In +the case of G34.26+0.15, the NH3 spectrum is below the noise +level if a similarly large beam size is used. Therefore, we derived the JVLA NH3 (9,6) spectrum from a smaller region, with +radius 300.5, that contains all the detected NH3 (9,6) emission. In +Table A.1, the observed NH3 (9,6) line parameters obtained by +Gaussian fits are listed. NH3 (8,5) and (10,7) emission is not detected by our JVLA observations. The 3σ upper limits for the +NH3 (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1 +5 https://science.nrao.edu/facilities/vla/docs/manuals/oss/performance/positional-accuracy + +Fig. 2. NH3 (9,6) line profiles emphasizing, in contrast to the spectra +in Fig. 1, weaker features. Cep A spectra are presented on the left, +G34.26+0.15 spectra on the right. The two dashed red lines in the left +panels indicate VLSR = 1.48 km s−1and 2.89 km s−1. In the right panels, +the two dashed red lines refer to 54.1 km s−1and 55.8 km s−1. +and 27.2 mJy beam−1, respectively. In G34.26+0.15, the corresponding 3σ upper limits for the NH3 (8,5) and (10,7) lines are +22.1 mJy beam−1and 30.4 mJy beam−1. For both sources, sensitivity levels refer to emission from a single channel of width +0.13 km s−1. Taking the larger measured line widths of the (9,6) +maser features (see Table A.1), these limits could be further lowered by factors of two to four. +3.1. Centimeter-continuum emission +The 1.36 cm continuum, derived from our JVLA observations, +toward Cep A is presented in Fig. 3. Six published compact +sources, HW2, HW3a, HW3b, HW3c, HW3d, and HW9, are detected in our observations. Figure 4 shows the 1.36 cm continuum in G34.26+0.15. Three main continuum objects, A, B, and +C, are detected. By using the imfit task in CASA, we measured +the continuum flux at 1.36 cm toward individual compact source +components in Cep A and G34.26+0.15. Details are given in Table A.2. +3.2. NH3 (9,6) emission in Cep A +In 2020 January, NH3 (9,6) emission with a peak flux density of +0.67 ± 0.07 Jy was first detected with the Effelsberg 100-meter +telescope in Cep A. Emission with similar strength was also detected in 2021 February and August with the same telescope. +Higher velocity resolution data, which were obtained in 2021 +August, again with the Effelsberg 100-meter telescope, show +that the (9,6) emission contains two main velocity components. +Overall, the flux densities of the NH3 (9,6) emission line measured with the Effelsberg 100-meter telescope are, within the calibration uncertainties, unchanged. This is valid for the time interval between 2020 January and August 2021, when we smoothed +the obtained spectra to the same velocity resolution. We also +see another two weaker components. Figure 2 emphasizes these +weak components with an expanded flux density scale. +Higher angular resolution data from the JVLA pinpoint the +position of the NH3 (9,6) emission with an offset of (−0 +00 +.28, +0 +00 +.02) relative to the 1.36 cm continuum peak of Cep A HW2 +(Fig. 3). The deconvolved NH3 (9,6) component size is (000.29 ± +0 +00 +.15) × (000.19 ± 0 +00 +.14) at P.A. = 174◦, derived with the imfit task +in CASA, and can thus be considered, accounting for the uncertainties, as unresolved. Article number, page 3 of 10 -A&A proofs: manuscript no. mainArxiv -Fig. 3. Cepheus A. White contours mark the 1.36 cm JVLA continuum map of Cep A; levels are −5, 5, 10, 20, 30, 40, 50, 70, 90, -and 110 × 0.125 mJy beam−1 -. The background image is the Spitzer 4.5 µm emission, taken from the Galactic Legacy Infrared Mid-Plane -Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is αJ2000 = 22h56m17s -.972, and -δJ2000 = 62◦0104900 -.587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black -ellipse denoting the position of the NH3 (9,6) emission with a purple star at its center. OH (Bartkiewicz et al. 2005), H2O (Sobolev et al. 2018), -and CH3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, respectively. The color bar on the right-hand side indicates -the LSR velocity range of the maser spots. -Fig. 4. 1.36 cm JVLA continuum map of G34.26+0.15 presented as white contours with levels of −5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130, -150, 180, and 200 × 5.0 mJy beam−1 -. The background image is the Spitzer 4.5 µm emission, taken from GLIMPSE. The reference position is -αJ2000 = 18h53m18s -.560, and δJ2000 = 01◦1405800 -.201, the peak position, is marked by a black cross. The black ellipses show the positions of NH3 -(9,6) emissions with stars at their center (i.e., M1, M2, and M3). OH (Zheng et al. 2000), H2O (Imai et al. 2011), and CH3OH (Bartkiewicz et al. -2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range (VLSR) of maser spots. -In view of the constancy of the flux densities obtained at Ef￾felsberg and the similar JVLA flux density, measured in 2021 -July, there is no missing interferometric flux density in the JVLA -data. -3.3. NH3 (9,6) emission in G34.26+0.15 -The NH3 (9,6) emission was first detected toward G34.26+0.15 -in 2020 January with the Effelsberg 100-meter telescope. Higher -velocity resolution data from 2021 August show the NH3 (9,6) -emission to be composed of two different components. The spec￾tra of weak components on a smaller flux density scale are pre￾sented in Fig. 2. -Three different locations showing NH3 (9,6) emission are -found toward G34.26+0.15 (Fig. 4). The deconvolved NH3 (9,6) -component sizes are (100 -.42±0 -00 -.43)×(000 -.54±0 -00 -.62) at P.A. = 97◦ -(M1), (000 -.42 ± 0 -00 -.27) × (000 -.15 ± 0 -00 -.27) at P.A. = 150◦ -(M2), and +A&A proofs: manuscript no. mainArxiv +Fig. 3. Cepheus A. White contours mark the 1.36 cm JVLA continuum map of Cep A; levels are −5, 5, 10, 20, 30, 40, 50, 70, 90, +and 110 × 0.125 mJy beam−1. The background image is the Spitzer 4.5 µm emission, taken from the Galactic Legacy Infrared Mid-Plane +Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is αJ2000 = 22h56m17s.972, and +δJ2000 = 62◦0104900.587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black +ellipse denoting the position of the NH3 (9,6) emission with a purple star at its center. OH (Bartkiewicz et al. 2005), H2O (Sobolev et al. 2018), +and CH3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, respectively. The color bar on the right-hand side indicates +the LSR velocity range of the maser spots. +Fig. 4. 1.36 cm JVLA continuum map of G34.26+0.15 presented as white contours with levels of −5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130, +150, 180, and 200 × 5.0 mJy beam−1. The background image is the Spitzer 4.5 µm emission, taken from GLIMPSE. The reference position is +αJ2000 = 18h53m18s.560, and δJ2000 = 01◦1405800.201, the peak position, is marked by a black cross. The black ellipses show the positions of NH3 +(9,6) emissions with stars at their center (i.e., M1, M2, and M3). OH (Zheng et al. 2000), H2O (Imai et al. 2011), and CH3OH (Bartkiewicz et al. +2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range (VLSR) of maser spots. +In view of the constancy of the flux densities obtained at Effelsberg and the similar JVLA flux density, measured in 2021 +July, there is no missing interferometric flux density in the JVLA +data. +3.3. NH3 (9,6) emission in G34.26+0.15 +The NH3 (9,6) emission was first detected toward G34.26+0.15 +in 2020 January with the Effelsberg 100-meter telescope. Higher +velocity resolution data from 2021 August show the NH3 (9,6) +emission to be composed of two different components. The spectra of weak components on a smaller flux density scale are presented in Fig. 2. +Three different locations showing NH3 (9,6) emission are +found toward G34.26+0.15 (Fig. 4). The deconvolved NH3 (9,6) +component sizes are (100.42±0 +00 +.43)×(000.54±0 +00 +.62) at P.A. = 97◦ +(M1), (000.42 ± 0 +00 +.27) × (000.15 ± 0 +00 +.27) at P.A. = 150◦(M2), and Article number, page 4 of 10 -Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions -(100 -.17 ± 0 -00 -.34) × (000 -.27 ± 0 -00 -.46) at P.A. = 53◦ -(M3) and are thus -comparable to or smaller than the beam size. -Overall, the NH3 (9,6) line from G34.26+0.15 weakened -during the time interval from 2020 January to 2021 August by -about 70%. A comparison between the JVLA spectrum and the -Effelsberg data, assuming a linear decrease in the integrated in￾tensity as a function of time between different epochs of the -100-meter observations, suggests there is no missing flux in the -JVLA data. This is similar to the situation in Cep A. -4. Discussion -4.1. Morphology of Cep A and G34.26+0.15 -Cep A, at a trigonometric parallax distance of 0.70±0.04 kpc -(Moscadelli et al. 2009; Dzib et al. 2011), is the second closest -HMSFR (after Orion) and by far the closest NH3 (9,6) maser -known. About 16 compact (∼1 -00) radio sources (e.g., Hughes & -Wouterloot 1984; Hughes 1991; Garay et al. 1996) have been -identified in Cep A. Hughes & Wouterloot (1984) discovered -these targets at radio wavelengths, which are UC and hypercom￾pact (HC) H ii regions and/or stellar wind sources, subsequently -named as HW sources. The HW2 object is one of the best known -examples of a protostellar jet or disk system driving a powerful -outflow (e.g., Rodriguez et al. 1980; Güsten et al. 1984; Torrelles -et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021). -The observed NH3 (9,6) emission is slightly offset (−0 -00 -.28, 000 -.02) -from the center of HW2 (see Fig. 3). -G34.26+0.15 is an HMSFR located at a distance of 3.3 kpc -(Kuchar & Bania 1994). It hosts four radio continuum compo￾nents named A, B, C, and D. Component C is a prototypical -cometary UC H ii region containing a compact head and a diffuse -tail that extends from east to west (e.g., Reid & Ho 1985; Garay -et al. 1986; Sewilo et al. 2004; Sewiło et al. 2011). Components -A and B are HC H ii regions, located to the east of component -C. An extended ring-like H ii region, called component D, is lo￾cated southeast of components A-C. One of the three observed -NH3 (9,6) emission line sources, M1, is close to the head of com￾ponent C, whereas M2 and M3 originate from another compact -region in the west of the HC H ii component A (see Fig. 4). -4.2. NH3 (9,6) emission possibly caused by maser action -As shown in Fig. 1, the NH3 (9,6) profiles in Cep A and -G34.26+0.15 are narrow (∆V1/2 ≤2.0 km s−1 -), much narrower -than the expected line widths (&4 km s−1 -) of thermal lines ob￾served at a similar angular resolution (e.g., Torrelles et al. 1985, -1986, 1993, 1999; Henkel et al. 1987; Comito et al. 2007; Mook￾erjea et al. 2007; Wyrowski et al. 2012; Beuther et al. 2018). Ve￾locity shifts with respect to the systemic velocities of the two -sources are both observed, that is, V ∼10 km s−1 -in Cep A and -V ∼4 km s−1 -in G34.26+0.15 (see details in Sect. 4.3). Further￾more, time variability is observed in the case of G34.26+0.15, -which is also a characteristic feature of maser emission. -Additional evidence of their maser nature is the high bright￾ness temperatures of the (9,6) emission spots toward Cep A and -G34.26+0.15. The spectral parameters are listed in Table A.3. -Because at least a significant part of the NH3 (9,6) emission -is not resolved by our JVLA observations, the derived bright￾ness temperatures are only lower limits. Nevertheless, the lower -limits on the brightness temperature are >800 K in Cep A (see -Table A.3), which is much higher than the expected thermal -gas temperature of ∼250 K (e.g., Patel et al. 2005; Comito -et al. 2007; Beuther et al. 2018). This strongly suggests that -the NH3 (9,6) emission in Cep A is due to maser action. Be￾cause G34.26+0.15 is located at about five times the distance to -Cep A, beam dilution effects reduce the lower main beam bright￾ness temperature limit to 400 K in G34.26+0.15 (M2) (see Ta￾ble A.3). We also note that the luminosity of the NH3 (9,6) emis￾sion in G34.26+0.15 is higher than or comparable to that in Cep -A, depending on the epoch of our observations. -Finally, the non-detections of the (8,5) and (10,7) lines also -indicate that the (9,6) line is special. This allows us to derive -lower 3σ limits of the (9,6)/(8,5) and (9,6)/(10,7) line intensity -ratios. The (9,6) line arises from ortho-NH3 (K = 3n), whereas -the NH3 (8,5) and (10,7) lines are para-NH3 (K , 3n) lines. -The minimum ortho-to-para ratios are in the range 12–42 and 1– -8 toward Cep A and G34.26+0.15, respectively. The statistical -weights for the ortho states are twice as large as those for the -para states (e.g., Umemoto et al. 1999; Goddi et al. 2011; Henkel -et al. 2013). In Cep A, the line intensity ratios are far higher than -this factor of two. Thus, at least in Cep A the higher main beam -brightness peak temperature of the (9,6) emission is caused by -maser action, perhaps involving exponential amplification, and -the case of G34.26+0.15 is likely similar. -4.3. Comparison of NH3 (9,6) masers with previously -published (quasi-)thermal NH3 emission -The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines -show thermal emission toward Cep A over a velocity range of -−13 km s−1 ≤ VLSR ≤ −4 km s−1 -(Brown et al. 1981; Güsten -et al. 1984; Torrelles et al. 1985, 1986, 1993, 1999). An average -NH3 column density of ∼5×1015 cm−2 was estimated for a region -of 300 around HW2 (Torrelles et al. 1999). This high NH3 abun￾dance could provide a suitable environment for maser species. -Large line widths (∆V1/2 '7.0 km s−1 -) with VLSR ∼ −10 km s−1 -in both (1,1) and (2,2) lines were found toward HW2 (Torrelles -et al. 1993). The velocity is similar to the cloud’s systemic lo￾cal standard of rest (LSR) velocity of −11.2 km s−1 -, which -is based on CO (Narayanan & Walker 1996) and HCO+ ob￾servations (Gómez et al. 1999). Our (9,6) maser is redshifted -(−0.9 km s−1 ≤ VLSR ≤2.9 km s−1 -) and shares positions with -the outflowing gas seen in CO and HCO+ with similarly red￾shifted velocities. Therefore, we argue that the (9,6) masers are -related to outflowing gas. -In G34.26+0.15, a large NH3 column density, -1018.5±0.2 -cm−2 -, and a kinetic temperature of 225±75 K -were derived by Henkel et al. (1987) based on measurements -of 15 NH3 inversion transitions in the frequency range of -22.0–26.0 GHz. These did not include the (9,6) transition. -While these lines were measured with a beam size of about -4000, a comparison of the peak intensities of the optically thick -lines with the kinetic temperature reveals the size of the hot, -ammonia-emitting core to be only ∼2.500. All those measured -NH3 lines were quasi-thermal and had LSR velocities of -∼ 58.5 km s−1 -, close to the systemic velocity of ∼ 58.1 km s−1 -obtained from C17O observations (Wyrowski et al. 2012). -Their line widths (∆V1/2 ≥3.6 km s−1 -) are larger than what -we find (0.35 km s−1 ≤ ∆V1/2 ≤ 0.94 km s−1 -) for each (9,6) -maser component (see details in Table A.3). In all, we may -have observed four different (9,6) velocity features. Three -are blueshifted at VLSR ∼ 53.8 km s−1 -, 55.8 km s−1 -, and -56.8 km s−1 -, and a fourth, tentatively detected, at 62.5 km s−1 -. -This tentative redshifted feature was only potentially detected -with Effelsberg in 2020 January. The velocity is similar to that -of the JVLA measurements on the NH3 (1,1) absorption line -against continuum source C (∼ 7 -00 resolution; Keto et al. 1987) +Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +(100.17 ± 0 +00 +.34) × (000.27 ± 0 +00 +.46) at P.A. = 53◦(M3) and are thus +comparable to or smaller than the beam size. +Overall, the NH3 (9,6) line from G34.26+0.15 weakened +during the time interval from 2020 January to 2021 August by +about 70%. A comparison between the JVLA spectrum and the +Effelsberg data, assuming a linear decrease in the integrated intensity as a function of time between different epochs of the +100-meter observations, suggests there is no missing flux in the +JVLA data. This is similar to the situation in Cep A. +4. Discussion +4.1. Morphology of Cep A and G34.26+0.15 +Cep A, at a trigonometric parallax distance of 0.70±0.04 kpc +(Moscadelli et al. 2009; Dzib et al. 2011), is the second closest +HMSFR (after Orion) and by far the closest NH3 (9,6) maser +known. About 16 compact (∼1 +00) radio sources (e.g., Hughes & +Wouterloot 1984; Hughes 1991; Garay et al. 1996) have been +identified in Cep A. Hughes & Wouterloot (1984) discovered +these targets at radio wavelengths, which are UC and hypercompact (HC) H ii regions and/or stellar wind sources, subsequently +named as HW sources. The HW2 object is one of the best known +examples of a protostellar jet or disk system driving a powerful +outflow (e.g., Rodriguez et al. 1980; Güsten et al. 1984; Torrelles +et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021). +The observed NH3 (9,6) emission is slightly offset (−0 +00 +.28, 000.02) +from the center of HW2 (see Fig. 3). +G34.26+0.15 is an HMSFR located at a distance of 3.3 kpc +(Kuchar & Bania 1994). It hosts four radio continuum components named A, B, C, and D. Component C is a prototypical +cometary UC H ii region containing a compact head and a diffuse +tail that extends from east to west (e.g., Reid & Ho 1985; Garay +et al. 1986; Sewilo et al. 2004; Sewiło et al. 2011). Components +A and B are HC H ii regions, located to the east of component +C. An extended ring-like H ii region, called component D, is located southeast of components A-C. One of the three observed +NH3 (9,6) emission line sources, M1, is close to the head of component C, whereas M2 and M3 originate from another compact +region in the west of the HC H ii component A (see Fig. 4). +4.2. NH3 (9,6) emission possibly caused by maser action +As shown in Fig. 1, the NH3 (9,6) profiles in Cep A and +G34.26+0.15 are narrow (∆V1/2 ≤2.0 km s−1), much narrower +than the expected line widths (&4 km s−1) of thermal lines observed at a similar angular resolution (e.g., Torrelles et al. 1985, +1986, 1993, 1999; Henkel et al. 1987; Comito et al. 2007; Mookerjea et al. 2007; Wyrowski et al. 2012; Beuther et al. 2018). Velocity shifts with respect to the systemic velocities of the two +sources are both observed, that is, V ∼10 km s−1in Cep A and +V ∼4 km s−1in G34.26+0.15 (see details in Sect. 4.3). Furthermore, time variability is observed in the case of G34.26+0.15, +which is also a characteristic feature of maser emission. +Additional evidence of their maser nature is the high brightness temperatures of the (9,6) emission spots toward Cep A and +G34.26+0.15. The spectral parameters are listed in Table A.3. +Because at least a significant part of the NH3 (9,6) emission +is not resolved by our JVLA observations, the derived brightness temperatures are only lower limits. Nevertheless, the lower +limits on the brightness temperature are >800 K in Cep A (see +Table A.3), which is much higher than the expected thermal +gas temperature of ∼250 K (e.g., Patel et al. 2005; Comito +et al. 2007; Beuther et al. 2018). This strongly suggests that +the NH3 (9,6) emission in Cep A is due to maser action. Because G34.26+0.15 is located at about five times the distance to +Cep A, beam dilution effects reduce the lower main beam brightness temperature limit to 400 K in G34.26+0.15 (M2) (see Table A.3). We also note that the luminosity of the NH3 (9,6) emission in G34.26+0.15 is higher than or comparable to that in Cep +A, depending on the epoch of our observations. +Finally, the non-detections of the (8,5) and (10,7) lines also +indicate that the (9,6) line is special. This allows us to derive +lower 3σ limits of the (9,6)/(8,5) and (9,6)/(10,7) line intensity +ratios. The (9,6) line arises from ortho-NH3 (K = 3n), whereas +the NH3 (8,5) and (10,7) lines are para-NH3 (K , 3n) lines. +The minimum ortho-to-para ratios are in the range 12–42 and 1– +8 toward Cep A and G34.26+0.15, respectively. The statistical +weights for the ortho states are twice as large as those for the +para states (e.g., Umemoto et al. 1999; Goddi et al. 2011; Henkel +et al. 2013). In Cep A, the line intensity ratios are far higher than +this factor of two. Thus, at least in Cep A the higher main beam +brightness peak temperature of the (9,6) emission is caused by +maser action, perhaps involving exponential amplification, and +the case of G34.26+0.15 is likely similar. +4.3. Comparison of NH3 (9,6) masers with previously +published (quasi-)thermal NH3 emission +The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines +show thermal emission toward Cep A over a velocity range of +−13 km s−1 ≤ VLSR ≤ −4 km s−1(Brown et al. 1981; Güsten +et al. 1984; Torrelles et al. 1985, 1986, 1993, 1999). An average +NH3 column density of ∼5×1015 cm−2 was estimated for a region +of 300 around HW2 (Torrelles et al. 1999). This high NH3 abundance could provide a suitable environment for maser species. +Large line widths (∆V1/2 '7.0 km s−1) with VLSR ∼ −10 km s−1 +in both (1,1) and (2,2) lines were found toward HW2 (Torrelles +et al. 1993). The velocity is similar to the cloud’s systemic local standard of rest (LSR) velocity of −11.2 km s−1 +, which +is based on CO (Narayanan & Walker 1996) and HCO+ observations (Gómez et al. 1999). Our (9,6) maser is redshifted +(−0.9 km s−1 ≤ VLSR ≤2.9 km s−1) and shares positions with +the outflowing gas seen in CO and HCO+ with similarly redshifted velocities. Therefore, we argue that the (9,6) masers are +related to outflowing gas. +In G34.26+0.15, a large NH3 column density, +1018.5±0.2cm−2, and a kinetic temperature of 225±75 K +were derived by Henkel et al. (1987) based on measurements +of 15 NH3 inversion transitions in the frequency range of +22.0–26.0 GHz. These did not include the (9,6) transition. +While these lines were measured with a beam size of about +4000, a comparison of the peak intensities of the optically thick +lines with the kinetic temperature reveals the size of the hot, +ammonia-emitting core to be only ∼2.500. All those measured +NH3 lines were quasi-thermal and had LSR velocities of +∼ 58.5 km s−1, close to the systemic velocity of ∼ 58.1 km s−1 +obtained from C17O observations (Wyrowski et al. 2012). +Their line widths (∆V1/2 ≥3.6 km s−1) are larger than what +we find (0.35 km s−1 ≤ ∆V1/2 ≤ 0.94 km s−1) for each (9,6) +maser component (see details in Table A.3). In all, we may +have observed four different (9,6) velocity features. Three +are blueshifted at VLSR ∼ 53.8 km s−1, 55.8 km s−1, and +56.8 km s−1 +, and a fourth, tentatively detected, at 62.5 km s−1. +This tentative redshifted feature was only potentially detected +with Effelsberg in 2020 January. The velocity is similar to that +of the JVLA measurements on the NH3 (1,1) absorption line +against continuum source C (∼ 7 +00 resolution; Keto et al. 1987) Article number, page 5 of 10 -A&A proofs: manuscript no. mainArxiv -and the NH3 (3,3) emission surrounding continuum source B as -well as the head of C (100 -.4×1 -00 -.2 resolution; Heaton et al. 1989). -However, we did not find this redshifted component in our -JVLA observations. Therefore, its position within G34.26+0.15 -cannot be determined. The blueshifted (9,6) masers with a -velocity range of 53.8–56.8 km s−1 -(M1, M2, and M3) show -velocities compatible with those of the NH3 (3,3) emission at -the proper positions (Heaton et al. 1989), which might be a -suitable environment for maser species. -4.4. Comparison of NH3 (9,6) masers with other maser lines -To characterize the environment of NH3 (9,6) masers, we can -compare their positions with respect to those of other maser -species (i.e., OH, H2O, and CH3OH). Toward Cep A HW2, -many CH3OH (e.g., Menten 1991; Sugiyama et al. 2008; Sanna -et al. 2017) and H2O maser spots (e.g., Torrelles et al. 1998, -2011; Sobolev et al. 2018) are detected and are associated with -its disk. Sobolev et al. (2018) also found that most of the H2O -maser flux is associated with the compact H ii region HW3d. OH -maser features close to the H ii regions are also seen in HW2 -(e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These -three kinds of masers in Cep A have a large velocity range of -−25 km s−1 ≤ VLSR ≤ −2 km s−1 -and are widespread around -HW2 and HW3, while NH3 (9,6) emission is only detected at -−0.9 km s−1 ≤ VLSR ≤2.9 km s−1 -toward a sub-arcsecond￾sized region to the west of the peak continuum position of HW2 -(see Fig. 3). This suggests that the NH3 (9,6) maser in Cep A -is unique and not related to maser spots seen in other molecular -species. -In G34.26+0.15, OH (Zheng et al. 2000), H2O (Imai et al. -2011), and CH3OH (Bartkiewicz et al. 2016) masers have been -detected east of source C (Fig. 4), and none of them coincides -with the head of C. The NH3 (9,6) maser M1 is also found -slightly off the head of source C. This could suggest that M1 -is powered by continuum source C or by an outflow. Near com￾ponent B, there are some OH and CH3OH masers but no H2O -or NH3 masers. A group of H2O masers, well-known tracers -of outflows, with a large velocity distribution of 43 km s−1 ≤ -VLSR ≤54 km s−1 -, was found to the west of the centimeter￾continuum source A and close to the peak of the millimeter￾continuum emission (see details in our Fig. A.2 and also in Fig. 5 -of Imai et al. 2011). The closeness of NH3 (9,6) maser spots M2 -and M3 to this group of water masers and their similar velocities -again suggest an association of NH3 (9,6) masers with outflow -activity. -4.5. Constraints on pumping scenarios -Our observations have resulted in the detection of NH3 (9,6) -masers in Cep A and G34.26+0.15. The new detections could -provide additional constraints on the maser line’s pumping -mechanism. As mentioned in Sect. 1, the pumping mechanism -of the (9,6) maser is unclear (Madden et al. 1986; Brown & -Cragg 1991). Previous studies have suggested that there are three -main pumping scenarios to explain the observed NH3 maser -lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared ra￾diation from the dust continuum emission, (2) line overlap, and -(3) collisional pumping. -For the first mechanism, infrared photons near 10 µm are -needed for vibrational excitation. The high dust temperature -(∼300 K) of W51-IRS2 can provide substantial infrared pho￾tons near 10 µm, which is used for radiative pumping (Henkel -et al. 2013). Both Cep A and G34.26+0.15 have similar kinetic -temperatures of &200 K (Henkel et al. 1987; Patel et al. 2005; -Comito et al. 2007; Beuther et al. 2018). This suggests that -high kinetic temperatures are needed to excite NH3 (9,6) masers. -However, it should be noted that the silicate dust absorption fea￾ture might dominate at 10 µm (see the spectral energy distribu￾tion of Cep A in De Buizer et al. 2017). Additionally, there is -no bright infrared emission around the two (9,6) masers, M2 and -M3, in G34.26+0.15 (see Fig. 4; see also Fig. 11 in De Buizer -et al. 2003 for a 10.5 µm map). This indicates that the pumping -mechanism via infrared photons near 10 µm may not be viable -to explain the (9,6) masers in Cep A and G34.26+0.15. Further￾more, Wilson & Schilke (1993) argued that radiative pumping by -dust emission tends to excite multiple adjacent ammonia maser -transitions, which appears to contradict our failure to detect the -adjacent (8,5) and (10,7) lines (with respect to quantum numbers -and frequency) and to only measure the (9,6) transitions in Cep -A and G34.26+0.15. Therefore, we suggest that infrared radia￾tion from dust is not the main pumping source. -Madden et al. (1986) suggested that there might be some -line overlaps between the rotational NH3 transitions in the far￾infrared band. However, this would be unlikely to affect only the -(9,6) line. Nevertheless, far-infrared spectral observations will -be needed to clarify this scenario. -Based on our observations, the (9,6) maser spots are close -to, but not coincident with, the peaks of the radio continuum -emission in Cep A and G34.26+0.15. Furthermore, the (9,6) -masers show velocity offsets with respect to their systemic ve￾locities. This indicates that the (9,6) masers are located at the -base of outflows, similar to the H2O masers. This is supported -by VLBI observations that show that (9,6) masers tend to be -closely associated with H2O masers (Pratap et al. 1991). The ob￾served time variability in G34.26+0.15 and W51-IRS2 can also -be attributed to episodic molecular outflows. This indicates that -collisional pumping could be the driver of the (9,6) maser. On -the other hand, collisional pumping has been successfully used -to explain the NH3 (3,3) maser (Walmsley & Ungerechts 1983; -Flower et al. 1990; Mangum & Wootten 1994). Collisions tend to -pump from the K=0 level to the K=3 level with parity changes, -that is, the upper level of the (3,3) metastable transition will be -overpopulated. NH3 (9,6) arises from the ortho species, so a sim￾ilar mechanism might also occur in the case of the (9,6) transi￾tion. Further measurements of collisional rates of ammonia will -allow us to test this scenario. -5. Summary -We report the discovery of NH3 (9,6) masers in two HMSFRs, -Cep A and G34.26+0.15. The narrow line width of the emis￾sion features (∆V1/2 ≤2.0 km s−1 -) and their high brightness tem￾peratures (> 400 K) indicate the maser nature of the lines. -The intensity of the (9,6) maser in G34.26+0.15 is decreasing -with time, while toward Cep A the maser is stable based on 20 -months of monitoring at Effelsberg. Linearly interpolating the -integrated intensities obtained at Effelsberg as a function of time, -the JVLA measurements show that there is no missing flux den￾sity on scales on the order of 1.2 arcsec (4 ×10−3 -and 2 ×10−2 pc) -to the total single-dish flux. The JVLA-detected emission in￾dicates that the NH3 (9,6) maser in Cep A originates from a -sub-arcsecond-sized region slightly (000 -.28 ± 0 -00 -.10) to the west -of the peak position of the 1.36 cm continuum object, HW2. In -G34.26+0.15, three NH3 (9,6) maser spots are observed: one is -close to the head of the cometary UC H ii region C, and the other -two are emitted from a compact region to the west of the HC H ii +A&A proofs: manuscript no. mainArxiv +and the NH3 (3,3) emission surrounding continuum source B as +well as the head of C (100.4×1 +00 +.2 resolution; Heaton et al. 1989). +However, we did not find this redshifted component in our +JVLA observations. Therefore, its position within G34.26+0.15 +cannot be determined. The blueshifted (9,6) masers with a +velocity range of 53.8–56.8 km s−1(M1, M2, and M3) show +velocities compatible with those of the NH3 (3,3) emission at +the proper positions (Heaton et al. 1989), which might be a +suitable environment for maser species. +4.4. Comparison of NH3 (9,6) masers with other maser lines +To characterize the environment of NH3 (9,6) masers, we can +compare their positions with respect to those of other maser +species (i.e., OH, H2O, and CH3OH). Toward Cep A HW2, +many CH3OH (e.g., Menten 1991; Sugiyama et al. 2008; Sanna +et al. 2017) and H2O maser spots (e.g., Torrelles et al. 1998, +2011; Sobolev et al. 2018) are detected and are associated with +its disk. Sobolev et al. (2018) also found that most of the H2O +maser flux is associated with the compact H ii region HW3d. OH +maser features close to the H ii regions are also seen in HW2 +(e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These +three kinds of masers in Cep A have a large velocity range of +−25 km s−1 ≤ VLSR ≤ −2 km s−1and are widespread around +HW2 and HW3, while NH3 (9,6) emission is only detected at +−0.9 km s−1 ≤ VLSR ≤2.9 km s−1toward a sub-arcsecondsized region to the west of the peak continuum position of HW2 +(see Fig. 3). This suggests that the NH3 (9,6) maser in Cep A +is unique and not related to maser spots seen in other molecular +species. +In G34.26+0.15, OH (Zheng et al. 2000), H2O (Imai et al. +2011), and CH3OH (Bartkiewicz et al. 2016) masers have been +detected east of source C (Fig. 4), and none of them coincides +with the head of C. The NH3 (9,6) maser M1 is also found +slightly off the head of source C. This could suggest that M1 +is powered by continuum source C or by an outflow. Near component B, there are some OH and CH3OH masers but no H2O +or NH3 masers. A group of H2O masers, well-known tracers +of outflows, with a large velocity distribution of 43 km s−1 ≤ +VLSR ≤54 km s−1, was found to the west of the centimetercontinuum source A and close to the peak of the millimetercontinuum emission (see details in our Fig. A.2 and also in Fig. 5 +of Imai et al. 2011). The closeness of NH3 (9,6) maser spots M2 +and M3 to this group of water masers and their similar velocities +again suggest an association of NH3 (9,6) masers with outflow +activity. +4.5. Constraints on pumping scenarios +Our observations have resulted in the detection of NH3 (9,6) +masers in Cep A and G34.26+0.15. The new detections could +provide additional constraints on the maser line’s pumping +mechanism. As mentioned in Sect. 1, the pumping mechanism +of the (9,6) maser is unclear (Madden et al. 1986; Brown & +Cragg 1991). Previous studies have suggested that there are three +main pumping scenarios to explain the observed NH3 maser +lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared radiation from the dust continuum emission, (2) line overlap, and +(3) collisional pumping. +For the first mechanism, infrared photons near 10 µm are +needed for vibrational excitation. The high dust temperature +(∼300 K) of W51-IRS2 can provide substantial infrared photons near 10 µm, which is used for radiative pumping (Henkel +et al. 2013). Both Cep A and G34.26+0.15 have similar kinetic +temperatures of &200 K (Henkel et al. 1987; Patel et al. 2005; +Comito et al. 2007; Beuther et al. 2018). This suggests that +high kinetic temperatures are needed to excite NH3 (9,6) masers. +However, it should be noted that the silicate dust absorption feature might dominate at 10 µm (see the spectral energy distribution of Cep A in De Buizer et al. 2017). Additionally, there is +no bright infrared emission around the two (9,6) masers, M2 and +M3, in G34.26+0.15 (see Fig. 4; see also Fig. 11 in De Buizer +et al. 2003 for a 10.5 µm map). This indicates that the pumping +mechanism via infrared photons near 10 µm may not be viable +to explain the (9,6) masers in Cep A and G34.26+0.15. Furthermore, Wilson & Schilke (1993) argued that radiative pumping by +dust emission tends to excite multiple adjacent ammonia maser +transitions, which appears to contradict our failure to detect the +adjacent (8,5) and (10,7) lines (with respect to quantum numbers +and frequency) and to only measure the (9,6) transitions in Cep +A and G34.26+0.15. Therefore, we suggest that infrared radiation from dust is not the main pumping source. +Madden et al. (1986) suggested that there might be some +line overlaps between the rotational NH3 transitions in the farinfrared band. However, this would be unlikely to affect only the +(9,6) line. Nevertheless, far-infrared spectral observations will +be needed to clarify this scenario. +Based on our observations, the (9,6) maser spots are close +to, but not coincident with, the peaks of the radio continuum +emission in Cep A and G34.26+0.15. Furthermore, the (9,6) +masers show velocity offsets with respect to their systemic velocities. This indicates that the (9,6) masers are located at the +base of outflows, similar to the H2O masers. This is supported +by VLBI observations that show that (9,6) masers tend to be +closely associated with H2O masers (Pratap et al. 1991). The observed time variability in G34.26+0.15 and W51-IRS2 can also +be attributed to episodic molecular outflows. This indicates that +collisional pumping could be the driver of the (9,6) maser. On +the other hand, collisional pumping has been successfully used +to explain the NH3 (3,3) maser (Walmsley & Ungerechts 1983; +Flower et al. 1990; Mangum & Wootten 1994). Collisions tend to +pump from the K=0 level to the K=3 level with parity changes, +that is, the upper level of the (3,3) metastable transition will be +overpopulated. NH3 (9,6) arises from the ortho species, so a similar mechanism might also occur in the case of the (9,6) transition. Further measurements of collisional rates of ammonia will +allow us to test this scenario. +5. Summary +We report the discovery of NH3 (9,6) masers in two HMSFRs, +Cep A and G34.26+0.15. The narrow line width of the emission features (∆V1/2 ≤2.0 km s−1 +) and their high brightness temperatures (> 400 K) indicate the maser nature of the lines. +The intensity of the (9,6) maser in G34.26+0.15 is decreasing +with time, while toward Cep A the maser is stable based on 20 +months of monitoring at Effelsberg. Linearly interpolating the +integrated intensities obtained at Effelsberg as a function of time, +the JVLA measurements show that there is no missing flux density on scales on the order of 1.2 arcsec (4 ×10−3 +and 2 ×10−2 pc) +to the total single-dish flux. The JVLA-detected emission indicates that the NH3 (9,6) maser in Cep A originates from a +sub-arcsecond-sized region slightly (000 +.28 ± 0 +00 +.10) to the west +of the peak position of the 1.36 cm continuum object, HW2. In +G34.26+0.15, three NH3 (9,6) maser spots are observed: one is +close to the head of the cometary UC H ii region C, and the other +two are emitted from a compact region to the west of the HC H ii Article number, page 6 of 10 -Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions -region A. We suggest that the (9,6) masers may be connected to -outflowing gas. Higher angular resolution JVLA and VLBI ob￾servations are planned to provide more accurate positions and -constraints on pumping scenarios. -Acknowledgements. We would like to thank the anonymous referee for the use￾ful comments that improve the manuscript. Y.T.Y. is a member of the Interna￾tional Max Planck Research School (IMPRS) for Astronomy and Astrophysics -at the Universities of Bonn and Cologne. Y.T.Y. would like to thank the China -Scholarship Council (CSC) for its support. We would like to thank the staff at -the Effelsberg for their help provided during the observations. We thank the staff -of the JVLA, especially Tony Perreault and Edward Starr, for their assistance -with the observations and data reduction. This research has made use of the -NASA/IPAC Infrared Science Archive, which is funded by the National Aero￾nautics and Space Administration and operated by the California Institute of -Technology. -References -Bartkiewicz, A., Szymczak, M., Cohen, R. J., & Richards, A. M. S. 2005, MN￾RAS, 361, 623 -Bartkiewicz, A., Szymczak, M., & van Langevelde, H. J. 2016, A&A, 587, A104 -Benjamin, R. A., Churchwell, E., Babler, B. L., et al. 2003, PASP, 115, 953 -Beuther, H., Mottram, J. C., Ahmadi, A., et al. 2018, A&A, 617, A100 -Beuther, H., Walsh, A. J., Thorwirth, S., et al. 2007, A&A, 466, 989 -Brogan, C. L., Hunter, T. R., Cyganowski, C. J., et al. 2011, ApJ, 739, L16 -Brown, A. T., Little, L. T., MacDonald, G. H., Riley, P. W., & Matheson, D. N. -1981, MNRAS, 195, 607 -Brown, R. D. & Cragg, D. M. 1991, ApJ, 378, 445 -Carrasco-González, C., Sanna, A., Rodríguez-Kamenetzky, A., et al. 2021, ApJ, -914, L1 -Cesaroni, R., Walmsley, C. M., & Churchwell, E. 1992, A&A, 256, 618 -Cheung, A. C., Rank, D. M., Townes, C. H., Thornton, D. D., & Welch, W. J. -1968, Phys. Rev. Lett., 21, 1701 -Churchwell, E., Babler, B. L., Meade, M. R., et al. 2009, PASP, 121, 213 -Cohen, R. J. & Brebner, G. C. 1985, MNRAS, 216, 51P -Comito, C., Schilke, P., Endesfelder, U., Jiménez-Serra, I., & Martín-Pintado, J. -2007, A&A, 469, 207 -Curiel, S., Ho, P. T. P., Patel, N. A., et al. 2006, ApJ, 638, 878 -Danby, G., Flower, D. R., Valiron, P., Schilke, P., & Walmsley, C. M. 1988, -MNRAS, 235, 229 -De Buizer, J. M., Liu, M., Tan, J. C., et al. 2017, ApJ, 843, 33 -De Buizer, J. M., Radomski, J. T., Telesco, C. M., & Piña, R. K. 2003, ApJ, 598, -1127 -Dzib, S., Loinard, L., Rodríguez, L. F., Mioduszewski, A. J., & Torres, R. M. -2011, ApJ, 733, 71 -Flower, D. R., Offer, A., & Schilke, P. 1990, MNRAS, 244, 4P -Galván-Madrid, R., Keto, E., Zhang, Q., et al. 2009, ApJ, 706, 1036 -Garay, G., Ramirez, S., Rodriguez, L. F., Curiel, S., & Torrelles, J. M. 1996, ApJ, -459, 193 -Garay, G., Rodriguez, L. F., & van Gorkom, J. H. 1986, ApJ, 309, 553 -Gaume, R. A., Wilson, T. L., & Johnston, K. J. 1996, ApJ, 457, L47 -Goddi, C., Greenhill, L. J., Humphreys, E. M. L., Chandler, C. J., & Matthews, -L. D. 2011, ApJ, 739, L13 -Goddi, C., Henkel, C., Zhang, Q., Zapata, L., & Wilson, T. L. 2015, A&A, 573, -A109 -Gómez, J. F., Sargent, A. I., Torrelles, J. M., et al. 1999, ApJ, 514, 287 -Güsten, R., Chini, R., & Neckel, T. 1984, A&A, 138, 205 -Heaton, B. D., Little, L. T., & Bishop, I. S. 1989, A&A, 213, 148 -Henkel, C., Wilson, T. L., Asiri, H., & Mauersberger, R. 2013, A&A, 549, A90 -Henkel, C., Wilson, T. L., & Mauersberger, R. 1987, A&A, 182, 137 -Ho, P. T. P. & Townes, C. H. 1983, ARA&A, 21, 239 -Hoffman, I. M. & Joyce, S. A. 2014, ApJ, 782, 83 -Hogge, T. G., Jackson, J. M., Allingham, D., et al. 2019, ApJ, 887, 79 -Hughes, V. A. 1991, ApJ, 383, 280 -Hughes, V. A. & Wouterloot, J. G. A. 1984, ApJ, 276, 204 -Hunter, T. R., Brogan, C. L., Indebetouw, R., & Cyganowski, C. J. 2008, ApJ, -680, 1271 -Imai, H., Omi, R., Kurayama, T., et al. 2011, PASJ, 63, 1293 -Keto, E. R., Ho, P. T. P., & Reid, M. J. 1987, ApJ, 323, L117 -Kraemer, K. E. & Jackson, J. M. 1995, ApJ, 439, L9 -Kuchar, T. A. & Bania, T. M. 1994, ApJ, 436, 117 -Madden, S. C., Irvine, W. M., Matthews, H. E., Brown, R. D., & Godfrey, P. D. -1986, ApJ, 300, L79 -Mangum, J. G. & Wootten, A. 1994, ApJ, 428, L33 -Mauersberger, R., Henkel, C., & Wilson, T. L. 1987, A&A, 173, 352 -Mauersberger, R., Wilson, T. L., & Henkel, C. 1986, A&A, 160, L13 -Mauersberger, R., Wilson, T. L., & Henkel, C. 1988, A&A, 201, 123 -McEwen, B. C., Pihlström, Y. M., & Sjouwerman, L. O. 2016, ApJ, 826, 189 -McMullin, J. P., Waters, B., Schiebel, D., Young, W., & Golap, K. 2007, in As￾tronomical Society of the Pacific Conference Series, Vol. 376, Astronomical -Data Analysis Software and Systems XVI, ed. R. A. Shaw, F. Hill, & D. J. -Bell, 127 -Mei, Y., Chen, X., Shen, Z.-Q., & Li, B. 2020, ApJ, 898, 157 -Menten, K. M. 1991, ApJ, 380, L75 -Mills, E. A. C., Ginsburg, A., Clements, A. R., et al. 2018, ApJ, 869, L14 -Mookerjea, B., Casper, E., Mundy, L. G., & Looney, L. W. 2007, ApJ, 659, 447 -Moscadelli, L., Reid, M. J., Menten, K. M., et al. 2009, ApJ, 693, 406 -Narayanan, G. & Walker, C. K. 1996, ApJ, 466, 844 -Ott, M., Witzel, A., Quirrenbach, A., et al. 1994, A&A, 284, 331 -Patel, N. A., Curiel, S., Sridharan, T. K., et al. 2005, Nature, 437, 109 -Perley, R. A. & Butler, B. J. 2013, ApJS, 204, 19 -Pety, J. 2005, in SF2A-2005: Semaine de l’Astrophysique Francaise, ed. F. Ca￾soli, T. Contini, J. M. Hameury, & L. Pagani, 721 -Poynter, R. L. & Kakar, R. K. 1975, ApJS, 29, 87 -Pratap, P., Menten, K. M., Reid, M. J., Moran, J. M., & Walmsley, C. M. 1991, -ApJ, 373, L13 -Reid, M. J. & Ho, P. T. P. 1985, ApJ, 288, L17 -Rodriguez, L. F., Ho, P. T. P., & Moran, J. M. 1980, ApJ, 240, L149 -Sanna, A., Moscadelli, L., Surcis, G., et al. 2017, A&A, 603, A94 -Sewilo, M., Churchwell, E., Kurtz, S., Goss, W. M., & Hofner, P. 2004, ApJ, -605, 285 -Sewiło, M., Churchwell, E., Kurtz, S., Goss, W. M., & Hofner, P. 2011, ApJS, -194, 44 -Sobolev, A. M., Moran, J. M., Gray, M. D., et al. 2018, ApJ, 856, 60 -Sugiyama, K., Fujisawa, K., Doi, A., et al. 2008, PASJ, 60, 1001 -Torrelles, J. M., Gómez, J. F., Garay, G., et al. 1998, ApJ, 509, 262 -Torrelles, J. M., Gómez, J. F., Garay, G., et al. 1999, MNRAS, 307, 58 -Torrelles, J. M., Ho, P. T. P., Rodriguez, L. F., & Canto, J. 1985, ApJ, 288, 595 -Torrelles, J. M., Ho, P. T. P., Rodriguez, L. F., & Canto, J. 1986, ApJ, 305, 721 -Torrelles, J. M., Patel, N. A., Curiel, S., et al. 2011, MNRAS, 410, 627 -Torrelles, J. M., Verdes-Montenegro, L., Ho, P. T. P., Rodriguez, L. F., & Canto, -J. 1993, ApJ, 410, 202 -Towner, A. P. M., Brogan, C. L., Hunter, T. R., & Cyganowski, C. J. 2021, ApJ, -923, 263 -Umemoto, T., Mikami, H., Yamamoto, S., & Hirano, N. 1999, ApJ, 525, L105 -Urquhart, J. S., Morgan, L. K., Figura, C. C., et al. 2011, MNRAS, 418, 1689 -Walmsley, C. M. & Ungerechts, H. 1983, A&A, 122, 164 -Walsh, A. J., Breen, S. L., Britton, T., et al. 2011, MNRAS, 416, 1764 -Walsh, A. J., Longmore, S. N., Thorwirth, S., Urquhart, J. S., & Purcell, C. R. -2007, MNRAS, 382, L35 -Wang, K., Zhang, Q., Wu, Y., Li, H.-b., & Zhang, H. 2012, ApJ, 745, L30 -Wilson, T. L., Batrla, W., & Pauls, T. A. 1982, A&A, 110, L20 -Wilson, T. L. & Henkel, C. 1988, A&A, 206, L26 -Wilson, T. L., Johnston, K. J., & Henkel, C. 1990, A&A, 229, L1 -Wilson, T. L. & Schilke, P. 1993, in Lecture Notes in Physics, Astrophysical -Masers, ed. A. W. Clegg & G. E. Nedoluha, Vol. 412, 123–126 -Wyrowski, F., Güsten, R., Menten, K. M., Wiesemeyer, H., & Klein, B. 2012, -A&A, 542, L15 -Zhang, Q. & Ho, P. T. P. 1995, ApJ, 450, L63 -Zhang, Q., Hunter, T. R., Sridharan, T. K., & Cesaroni, R. 1999, ApJ, 527, L117 -Zheng, X. W., Moran, J. M., & Reid, M. J. 2000, MNRAS, 317, 192 +Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +region A. We suggest that the (9,6) masers may be connected to +outflowing gas. Higher angular resolution JVLA and VLBI observations are planned to provide more accurate positions and +constraints on pumping scenarios. +Acknowledgements. We would like to thank the anonymous referee for the useful comments that improve the manuscript. Y.T.Y. is a member of the International Max Planck Research School (IMPRS) for Astronomy and Astrophysics +at the Universities of Bonn and Cologne. Y.T.Y. would like to thank the China +Scholarship Council (CSC) for its support. We would like to thank the staff at +the Effelsberg for their help provided during the observations. We thank the staff +of the JVLA, especially Tony Perreault and Edward Starr, for their assistance +with the observations and data reduction. This research has made use of the +NASA/IPAC Infrared Science Archive, which is funded by the National Aeronautics and Space Administration and operated by the California Institute of +Technology. +References +Bartkiewicz, A., Szymczak, M., Cohen, R. J., & Richards, A. M. S. 2005, MNRAS, 361, 623 +Bartkiewicz, A., Szymczak, M., & van Langevelde, H. J. 2016, A&A, 587, A104 +Benjamin, R. A., Churchwell, E., Babler, B. L., et al. 2003, PASP, 115, 953 +Beuther, H., Mottram, J. C., Ahmadi, A., et al. 2018, A&A, 617, A100 +Beuther, H., Walsh, A. J., Thorwirth, S., et al. 2007, A&A, 466, 989 +Brogan, C. L., Hunter, T. R., Cyganowski, C. J., et al. 2011, ApJ, 739, L16 +Brown, A. T., Little, L. T., MacDonald, G. H., Riley, P. W., & Matheson, D. N. +1981, MNRAS, 195, 607 +Brown, R. D. & Cragg, D. M. 1991, ApJ, 378, 445 +Carrasco-González, C., Sanna, A., Rodríguez-Kamenetzky, A., et al. 2021, ApJ, +914, L1 +Cesaroni, R., Walmsley, C. M., & Churchwell, E. 1992, A&A, 256, 618 +Cheung, A. C., Rank, D. M., Townes, C. H., Thornton, D. D., & Welch, W. J. +1968, Phys. Rev. Lett., 21, 1701 +Churchwell, E., Babler, B. L., Meade, M. R., et al. 2009, PASP, 121, 213 +Cohen, R. J. & Brebner, G. C. 1985, MNRAS, 216, 51P +Comito, C., Schilke, P., Endesfelder, U., Jiménez-Serra, I., & Martín-Pintado, J. +2007, A&A, 469, 207 +Curiel, S., Ho, P. T. P., Patel, N. A., et al. 2006, ApJ, 638, 878 +Danby, G., Flower, D. R., Valiron, P., Schilke, P., & Walmsley, C. M. 1988, +MNRAS, 235, 229 +De Buizer, J. M., Liu, M., Tan, J. C., et al. 2017, ApJ, 843, 33 +De Buizer, J. M., Radomski, J. T., Telesco, C. M., & Piña, R. K. 2003, ApJ, 598, +1127 +Dzib, S., Loinard, L., Rodríguez, L. F., Mioduszewski, A. J., & Torres, R. M. +2011, ApJ, 733, 71 +Flower, D. R., Offer, A., & Schilke, P. 1990, MNRAS, 244, 4P +Galván-Madrid, R., Keto, E., Zhang, Q., et al. 2009, ApJ, 706, 1036 +Garay, G., Ramirez, S., Rodriguez, L. F., Curiel, S., & Torrelles, J. M. 1996, ApJ, +459, 193 +Garay, G., Rodriguez, L. F., & van Gorkom, J. H. 1986, ApJ, 309, 553 +Gaume, R. A., Wilson, T. L., & Johnston, K. J. 1996, ApJ, 457, L47 +Goddi, C., Greenhill, L. J., Humphreys, E. M. L., Chandler, C. J., & Matthews, +L. D. 2011, ApJ, 739, L13 +Goddi, C., Henkel, C., Zhang, Q., Zapata, L., & Wilson, T. L. 2015, A&A, 573, +A109 +Gómez, J. F., Sargent, A. I., Torrelles, J. M., et al. 1999, ApJ, 514, 287 +Güsten, R., Chini, R., & Neckel, T. 1984, A&A, 138, 205 +Heaton, B. D., Little, L. T., & Bishop, I. S. 1989, A&A, 213, 148 +Henkel, C., Wilson, T. L., Asiri, H., & Mauersberger, R. 2013, A&A, 549, A90 +Henkel, C., Wilson, T. L., & Mauersberger, R. 1987, A&A, 182, 137 +Ho, P. T. P. & Townes, C. H. 1983, ARA&A, 21, 239 +Hoffman, I. M. & Joyce, S. A. 2014, ApJ, 782, 83 +Hogge, T. G., Jackson, J. M., Allingham, D., et al. 2019, ApJ, 887, 79 +Hughes, V. A. 1991, ApJ, 383, 280 +Hughes, V. A. & Wouterloot, J. G. A. 1984, ApJ, 276, 204 +Hunter, T. R., Brogan, C. L., Indebetouw, R., & Cyganowski, C. J. 2008, ApJ, +680, 1271 +Imai, H., Omi, R., Kurayama, T., et al. 2011, PASJ, 63, 1293 +Keto, E. R., Ho, P. T. P., & Reid, M. J. 1987, ApJ, 323, L117 +Kraemer, K. E. & Jackson, J. M. 1995, ApJ, 439, L9 +Kuchar, T. A. & Bania, T. M. 1994, ApJ, 436, 117 +Madden, S. C., Irvine, W. M., Matthews, H. E., Brown, R. D., & Godfrey, P. D. +1986, ApJ, 300, L79 +Mangum, J. G. & Wootten, A. 1994, ApJ, 428, L33 +Mauersberger, R., Henkel, C., & Wilson, T. L. 1987, A&A, 173, 352 +Mauersberger, R., Wilson, T. L., & Henkel, C. 1986, A&A, 160, L13 +Mauersberger, R., Wilson, T. L., & Henkel, C. 1988, A&A, 201, 123 +McEwen, B. C., Pihlström, Y. M., & Sjouwerman, L. O. 2016, ApJ, 826, 189 +McMullin, J. P., Waters, B., Schiebel, D., Young, W., & Golap, K. 2007, in Astronomical Society of the Pacific Conference Series, Vol. 376, Astronomical +Data Analysis Software and Systems XVI, ed. R. A. Shaw, F. Hill, & D. J. +Bell, 127 +Mei, Y., Chen, X., Shen, Z.-Q., & Li, B. 2020, ApJ, 898, 157 +Menten, K. M. 1991, ApJ, 380, L75 +Mills, E. A. C., Ginsburg, A., Clements, A. R., et al. 2018, ApJ, 869, L14 +Mookerjea, B., Casper, E., Mundy, L. G., & Looney, L. W. 2007, ApJ, 659, 447 +Moscadelli, L., Reid, M. J., Menten, K. M., et al. 2009, ApJ, 693, 406 +Narayanan, G. & Walker, C. K. 1996, ApJ, 466, 844 +Ott, M., Witzel, A., Quirrenbach, A., et al. 1994, A&A, 284, 331 +Patel, N. A., Curiel, S., Sridharan, T. K., et al. 2005, Nature, 437, 109 +Perley, R. A. & Butler, B. J. 2013, ApJS, 204, 19 +Pety, J. 2005, in SF2A-2005: Semaine de l’Astrophysique Francaise, ed. F. Casoli, T. Contini, J. M. Hameury, & L. Pagani, 721 +Poynter, R. L. & Kakar, R. K. 1975, ApJS, 29, 87 +Pratap, P., Menten, K. M., Reid, M. J., Moran, J. M., & Walmsley, C. M. 1991, +ApJ, 373, L13 +Reid, M. J. & Ho, P. T. P. 1985, ApJ, 288, L17 +Rodriguez, L. F., Ho, P. T. P., & Moran, J. M. 1980, ApJ, 240, L149 +Sanna, A., Moscadelli, L., Surcis, G., et al. 2017, A&A, 603, A94 +Sewilo, M., Churchwell, E., Kurtz, S., Goss, W. M., & Hofner, P. 2004, ApJ, +605, 285 +Sewiło, M., Churchwell, E., Kurtz, S., Goss, W. M., & Hofner, P. 2011, ApJS, +194, 44 +Sobolev, A. M., Moran, J. M., Gray, M. D., et al. 2018, ApJ, 856, 60 +Sugiyama, K., Fujisawa, K., Doi, A., et al. 2008, PASJ, 60, 1001 +Torrelles, J. M., Gómez, J. F., Garay, G., et al. 1998, ApJ, 509, 262 +Torrelles, J. M., Gómez, J. F., Garay, G., et al. 1999, MNRAS, 307, 58 +Torrelles, J. M., Ho, P. T. P., Rodriguez, L. F., & Canto, J. 1985, ApJ, 288, 595 +Torrelles, J. M., Ho, P. T. P., Rodriguez, L. F., & Canto, J. 1986, ApJ, 305, 721 +Torrelles, J. M., Patel, N. A., Curiel, S., et al. 2011, MNRAS, 410, 627 +Torrelles, J. M., Verdes-Montenegro, L., Ho, P. T. P., Rodriguez, L. F., & Canto, +J. 1993, ApJ, 410, 202 +Towner, A. P. M., Brogan, C. L., Hunter, T. R., & Cyganowski, C. J. 2021, ApJ, +923, 263 +Umemoto, T., Mikami, H., Yamamoto, S., & Hirano, N. 1999, ApJ, 525, L105 +Urquhart, J. S., Morgan, L. K., Figura, C. C., et al. 2011, MNRAS, 418, 1689 +Walmsley, C. M. & Ungerechts, H. 1983, A&A, 122, 164 +Walsh, A. J., Breen, S. L., Britton, T., et al. 2011, MNRAS, 416, 1764 +Walsh, A. J., Longmore, S. N., Thorwirth, S., Urquhart, J. S., & Purcell, C. R. +2007, MNRAS, 382, L35 +Wang, K., Zhang, Q., Wu, Y., Li, H.-b., & Zhang, H. 2012, ApJ, 745, L30 +Wilson, T. L., Batrla, W., & Pauls, T. A. 1982, A&A, 110, L20 +Wilson, T. L. & Henkel, C. 1988, A&A, 206, L26 +Wilson, T. L., Johnston, K. J., & Henkel, C. 1990, A&A, 229, L1 +Wilson, T. L. & Schilke, P. 1993, in Lecture Notes in Physics, Astrophysical +Masers, ed. A. W. Clegg & G. E. Nedoluha, Vol. 412, 123–126 +Wyrowski, F., Güsten, R., Menten, K. M., Wiesemeyer, H., & Klein, B. 2012, +A&A, 542, L15 +Zhang, Q. & Ho, P. T. P. 1995, ApJ, 450, L63 +Zhang, Q., Hunter, T. R., Sridharan, T. K., & Cesaroni, R. 1999, ApJ, 527, L117 +Zheng, X. W., Moran, J. M., & Reid, M. J. 2000, MNRAS, 317, 192 Article number, page 7 of 10 -A&A proofs: manuscript no. mainArxiv -Appendix A: -Table A.1. Summary of NH3 (9, 6) maser observations. -Source Telescope Beam Epoch Channel S ν rms R -S νdv VLSR ∆V1/2 -size spacing -(km s−1 -) (Jy) (mJy) (Jy km s−1 -) (km s−1 -) -Cep A Effelsberg 4900 2020, Jan. 04 0.62 0.67 3.41 1.19 ± 0.02 -1.11 ± 0.02 1.67 ± 0.04 -Effelsberg 4900 2021, Feb. 11 0.62 0.59 5.97 1.08 ± 0.02 -0.74 ± 0.02 1.70 ± 0.04 -Effelsberg 4900 2021, Feb. 15 0.62 0.65 10.98 1.11 ± 0.03 -0.75 ± 0.02 1.60 ± 0.05 -JVLAa 1 -00 -.47 × 0 -00 -.99 2021, Jul. 13 0.13 1.13 144 0.89 ± 0.09 -0.86 ± 0.03 0.74 ± 0.12 -Effelsberg 4900 2021, Aug. 11 0.07 0.98 13.36 0.49 ± 0.02 -0.90 ± 0.01 0.47 ± 0.01 -0.35 0.26 ± 0.02 -0.28 ± 0.02 0.69 ± 0.05 -Effelsberg 4900 2021, Aug. 12 0.07 0.98 13.35 0.50 ± 0.01 -0.89 ± 0.07 0.48 ± 0.07 -0.35 0.20 ± 0.01 -0.29 ± 0.07 0.54 ± 0.07 -0.06 0.07 ± 0.01 0.51 ± 0.07 1.09 ± 0.07 -0.02 0.02 ± 0.01 2.15 ± 0.07 0.80 ± 0.07 -0.07 0.06 ± 0.01 2.89 ± 0.07 0.92 ± 0.07 -G34.26+0.15 Effelsberg 4900 2020, Jan. 03 0.62 0.30 1.26 0.65 ± 0.03 62.50 ± 0.05 2.05 ± 0.13 -Effelsberg 4900 2021, Feb. 11 0.62 0.24 2.42 0.40 ± 0.02 55.76 ± 0.04 1.60 ± 0.12 -Effelsberg 4900 2021, Feb. 15 0.62 0.20 4.86 0.38 ± 0.02 55.71 ± 0.05 1.80 ± 0.14 -JVLAb 1 -00 -.33 × 1 -00 -.06 2021, Jul. 13 0.13 0.23 37.1 0.09 ± 0.02 54.41 ± 0.03 0.38 ± 0.09 -0.22 0.22 ± 0.02 55.82 ± 0.05 0.95 ± 0.12 -0.15 0.06 ± 0.01 57.21 ± 0.04 0.35 ± 0.08 -Effelsberg 4900 2021, Aug. 11 0.07 0.08 13.92 0.06 ± 0.007 54.10 ± 0.05 0.68 ± 0.12 -0.07 0.02 ± 0.006 54.82 ± 0.03 0.31 ± 0.09 -0.12 0.10 ± 0.006 55.85 ± 0.02 0.75 ± 0.06 -Effelsberg 4900 2021, Aug. 12 0.07 0.16 27.40 0.09 ± 0.008 55.83 ± 0.02 0.56 ± 0.05 -Notes. The spectral parameters are obtained from Gaussian fitting. (a) The JVLA spectrum toward Cep A is extracted from the Effelsberg-beam￾sized region (FWHM 4900). (b) For G34.26+0.15, the JVLA beam samples the NH3 (9,6) spectrum over a region of radius 300 -.5, which contains all -detected NH3 (9,6) emissions. -Table A.2. 1.36 cm JVLA flux densities of individual continuum sources. -Source R.A. Dec. Size P.A. S ν -(h m s) (◦ 0 00) (arcsec) (deg) (mJy) -Cep A HW2 22 56 17.972 ± 0.003 +62 01 49.587 ± 0.015 (0.45 ± 0.19) × (0.22 ± 0.10) 50.0 20.2 ± 1.4 -HW3a 22 56 17.420 ± 0.022 +62 01 44.576 ± 0.076 (2.35 ± 0.45) × (0.55 ± 0.14) 66.6 4.75 ± 0.74 -HW3b 22 56 17.578 ± 0.009 +62 01 45.041 ± 0.043 (1.43 ± 0.24) × (0.45 ± 0.10) 59.9 3.19 ± 0.36 -HW3c 22 56 17.956 ± 0.016 +62 01 46.224 ± 0.038 (1.44 ± 0.37) × (0.36 ± 0.19) 86.0 9.90 ± 1.7 -HW3d 22 56 18.195 ± 0.005 +62 01 46.325 ± 0.014 (1.26 ± 0.12) × (0.30 ± 0.19) 102.5 13.75 ± 0.92 -HW9 22 56 18.626 ± 0.014 +62 01 47.851 ± 0.137 (1.53 ± 0.51) × (0.29 ± 0.30) 28.0 3.26 ± 0.78 -G34.26+0.15 A 18 53 18.774 ± 0.005 +01 14 56.208 ± 0.125 (0.66 ± 0.49) × (0.50 ± 0.33) 10.0 94 ± 33 -B 18 53 18.649 ± 0.005 +01 15 00.071 ± 0.180 (2.31 ± 0.49) × (0.85 ± 0.21) 17.4 597 ± 110 -C 18 53 18.560 ± 0.004 +01 14 58.201 ± 0.112 (2.03 ± 0.30) × (1.34 ± 0.20) 178.0 5070 ± 660 +A&A proofs: manuscript no. mainArxiv +Appendix A: +Table A.1. Summary of NH3 (9, 6) maser observations. +Source Telescope Beam Epoch Channel S ν rms RS νdv VLSR ∆V1/2 +size spacing +(km s−1) (Jy) (mJy) (Jy km s−1) (km s−1) +Cep A Effelsberg 4900 2020, Jan. 04 0.62 0.67 3.41 1.19 ± 0.02 -1.11 ± 0.02 1.67 ± 0.04 +Effelsberg 4900 2021, Feb. 11 0.62 0.59 5.97 1.08 ± 0.02 -0.74 ± 0.02 1.70 ± 0.04 +Effelsberg 4900 2021, Feb. 15 0.62 0.65 10.98 1.11 ± 0.03 -0.75 ± 0.02 1.60 ± 0.05 +JVLAa 1 +00 +.47 × 0 +00 +.99 2021, Jul. 13 0.13 1.13 144 0.89 ± 0.09 -0.86 ± 0.03 0.74 ± 0.12 +Effelsberg 4900 2021, Aug. 11 0.07 0.98 13.36 0.49 ± 0.02 -0.90 ± 0.01 0.47 ± 0.01 +0.35 0.26 ± 0.02 -0.28 ± 0.02 0.69 ± 0.05 +Effelsberg 4900 2021, Aug. 12 0.07 0.98 13.35 0.50 ± 0.01 -0.89 ± 0.07 0.48 ± 0.07 +0.35 0.20 ± 0.01 -0.29 ± 0.07 0.54 ± 0.07 +0.06 0.07 ± 0.01 0.51 ± 0.07 1.09 ± 0.07 +0.02 0.02 ± 0.01 2.15 ± 0.07 0.80 ± 0.07 +0.07 0.06 ± 0.01 2.89 ± 0.07 0.92 ± 0.07 +G34.26+0.15 Effelsberg 4900 2020, Jan. 03 0.62 0.30 1.26 0.65 ± 0.03 62.50 ± 0.05 2.05 ± 0.13 +Effelsberg 4900 2021, Feb. 11 0.62 0.24 2.42 0.40 ± 0.02 55.76 ± 0.04 1.60 ± 0.12 +Effelsberg 4900 2021, Feb. 15 0.62 0.20 4.86 0.38 ± 0.02 55.71 ± 0.05 1.80 ± 0.14 +JVLAb 1 +00 +.33 × 1 +00 +.06 2021, Jul. 13 0.13 0.23 37.1 0.09 ± 0.02 54.41 ± 0.03 0.38 ± 0.09 +0.22 0.22 ± 0.02 55.82 ± 0.05 0.95 ± 0.12 +0.15 0.06 ± 0.01 57.21 ± 0.04 0.35 ± 0.08 +Effelsberg 4900 2021, Aug. 11 0.07 0.08 13.92 0.06 ± 0.007 54.10 ± 0.05 0.68 ± 0.12 +0.07 0.02 ± 0.006 54.82 ± 0.03 0.31 ± 0.09 +0.12 0.10 ± 0.006 55.85 ± 0.02 0.75 ± 0.06 +Effelsberg 4900 2021, Aug. 12 0.07 0.16 27.40 0.09 ± 0.008 55.83 ± 0.02 0.56 ± 0.05 +Notes. The spectral parameters are obtained from Gaussian fitting. (a) The JVLA spectrum toward Cep A is extracted from the Effelsberg-beamsized region (FWHM 4900). (b) For G34.26+0.15, the JVLA beam samples the NH3 (9,6) spectrum over a region of radius 300 +.5, which contains all +detected NH3 (9,6) emissions. +Table A.2. 1.36 cm JVLA flux densities of individual continuum sources. +Source R.A. Dec. Size P.A. S ν +(h m s) (◦ 0 00) (arcsec) (deg) (mJy) +Cep A HW2 22 56 17.972 ± 0.003 +62 01 49.587 ± 0.015 (0.45 ± 0.19) × (0.22 ± 0.10) 50.0 20.2 ± 1.4 +HW3a 22 56 17.420 ± 0.022 +62 01 44.576 ± 0.076 (2.35 ± 0.45) × (0.55 ± 0.14) 66.6 4.75 ± 0.74 +HW3b 22 56 17.578 ± 0.009 +62 01 45.041 ± 0.043 (1.43 ± 0.24) × (0.45 ± 0.10) 59.9 3.19 ± 0.36 +HW3c 22 56 17.956 ± 0.016 +62 01 46.224 ± 0.038 (1.44 ± 0.37) × (0.36 ± 0.19) 86.0 9.90 ± 1.7 +HW3d 22 56 18.195 ± 0.005 +62 01 46.325 ± 0.014 (1.26 ± 0.12) × (0.30 ± 0.19) 102.5 13.75 ± 0.92 +HW9 22 56 18.626 ± 0.014 +62 01 47.851 ± 0.137 (1.53 ± 0.51) × (0.29 ± 0.30) 28.0 3.26 ± 0.78 +G34.26+0.15 A 18 53 18.774 ± 0.005 +01 14 56.208 ± 0.125 (0.66 ± 0.49) × (0.50 ± 0.33) 10.0 94 ± 33 +B 18 53 18.649 ± 0.005 +01 15 00.071 ± 0.180 (2.31 ± 0.49) × (0.85 ± 0.21) 17.4 597 ± 110 +C 18 53 18.560 ± 0.004 +01 14 58.201 ± 0.112 (2.03 ± 0.30) × (1.34 ± 0.20) 178.0 5070 ± 660 Article number, page 8 of 10 -Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions -Table A.3. NH3 (9,6) maser positions derived from the JVLA observations. -Source R.A. Dec. S ν TMB VLSR ∆V1/2 -(h m s) (◦ 0 00) (mJy beam−1 -) (K) (km s−1 -) -Cep A M 22 56 17.933 ± 0.002 +62 01 49.608 ± 0.011 985.2 2464.8 -0.88 ± 0.01 0.51 ± 0.02 -343.2 829.5 -0.24 ± 0.03 0.63 ± 0.05 -G34.26+0.15 M1 18 53 18.569 ± 0.007 +01 14 57.997 ± 0.056 37.1 94.5 56.82 ± 0.06 0.68 ± 0.14 -M2 18 53 18.696 ± 0.002 +01 14 55.807 ± 0.034 48.4 122.4 53.77 ± 0.05 0.35 ± 0.08 -57.8 146.2 54.35 ± 0.07 0.83 ± 0.14 -180.8 457.6 55.83 ± 0.01 0.59 ± 0.03 -M3 18 53 18.667 ± 0.005 +01 14 55.348 ± 0.066 78.1 197.2 54.22 ± 0.04 0.94 ± 0.08 -73.7 186.3 55.78 ± 0.04 0.79 ± 0.08 -Fig. A.1. Cepheus A. The grey shaded areas mark the 1.36 cm JVLA continuum map of Cep A. The reference position is αJ2000 = 22h56m17s -.972, -and δJ2000 = 62◦0104900 -.587, the peak position of the continuum map, is marked by a red cross. Slightly to the west of the cross is the white ellipse -denoting the position of the NH3 (9,6) emission with a purple star at its center. The red contours show the NOrthern Extended Millimeter Array -(NOEMA) 1.37 mm continuum, taken from Beuther et al. (2018). Contour levels are -5, 5, 10, 20, 40, 80, 100, 150, and 200 × 2.43 mJy beam−1 -. -OH (Bartkiewicz et al. 2005), H2O (Sobolev et al. 2018), and CH3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, -respectively. The color bar on the right-hand side indicates the velocity range (VLSR) of maser spots. +Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +Table A.3. NH3 (9,6) maser positions derived from the JVLA observations. +Source R.A. Dec. S ν TMB VLSR ∆V1/2 +(h m s) (◦ 0 00) (mJy beam−1) (K) (km s−1) +Cep A M 22 56 17.933 ± 0.002 +62 01 49.608 ± 0.011 985.2 2464.8 -0.88 ± 0.01 0.51 ± 0.02 +343.2 829.5 -0.24 ± 0.03 0.63 ± 0.05 +G34.26+0.15 M1 18 53 18.569 ± 0.007 +01 14 57.997 ± 0.056 37.1 94.5 56.82 ± 0.06 0.68 ± 0.14 +M2 18 53 18.696 ± 0.002 +01 14 55.807 ± 0.034 48.4 122.4 53.77 ± 0.05 0.35 ± 0.08 +57.8 146.2 54.35 ± 0.07 0.83 ± 0.14 +180.8 457.6 55.83 ± 0.01 0.59 ± 0.03 +M3 18 53 18.667 ± 0.005 +01 14 55.348 ± 0.066 78.1 197.2 54.22 ± 0.04 0.94 ± 0.08 +73.7 186.3 55.78 ± 0.04 0.79 ± 0.08 +Fig. A.1. Cepheus A. The grey shaded areas mark the 1.36 cm JVLA continuum map of Cep A. The reference position is αJ2000 = 22h56m17s.972, +and δJ2000 = 62◦0104900.587, the peak position of the continuum map, is marked by a red cross. Slightly to the west of the cross is the white ellipse +denoting the position of the NH3 (9,6) emission with a purple star at its center. The red contours show the NOrthern Extended Millimeter Array +(NOEMA) 1.37 mm continuum, taken from Beuther et al. (2018). Contour levels are -5, 5, 10, 20, 40, 80, 100, 150, and 200 × 2.43 mJy beam−1. +OH (Bartkiewicz et al. 2005), H2O (Sobolev et al. 2018), and CH3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, +respectively. The color bar on the right-hand side indicates the velocity range (VLSR) of maser spots. Article number, page 9 of 10 -A&A proofs: manuscript no. mainArxiv -Fig. A.2. 1.36 cm JVLA continuum map of G34.26+0.15 presented as gray shaded areas. The reference position is αJ2000 = 18h53m18s -.560, and -δJ2000 = 01◦1405800 -.201, the peak position, is marked by a red cross. The red ellipses show the positions of NH3 (9,6) emission with stars at their -center (i.e., M1, M2, and M3). The blue contours show the Berkeley-Illinois-Maryland Association (BIMA) array 2.8 mm continuum, taken from -Mookerjea et al. (2007). Contour levels are -3, 3, 10, 20, 30, 40, 50, 70, 90, 100, 120, and 140 × 20 mJy beam−1 -. OH (Zheng et al. 2000), H2O (Imai -et al. 2011), and CH3OH (Bartkiewicz et al. 2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates -the velocity range (VLSR) of maser spots. -Article number, page 10 of 10 +A&A proofs: manuscript no. mainArxiv +Fig. A.2. 1.36 cm JVLA continuum map of G34.26+0.15 presented as gray shaded areas. The reference position is αJ2000 = 18h53m18s.560, and +δJ2000 = 01◦1405800.201, the peak position, is marked by a red cross. The red ellipses show the positions of NH3 (9,6) emission with stars at their +center (i.e., M1, M2, and M3). The blue contours show the Berkeley-Illinois-Maryland Association (BIMA) array 2.8 mm continuum, taken from +Mookerjea et al. (2007). Contour levels are -3, 3, 10, 20, 30, 40, 50, 70, 90, 100, 120, and 140 × 20 mJy beam−1. OH (Zheng et al. 2000), H2O (Imai +et al. 2011), and CH3OH (Bartkiewicz et al. 2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates +the velocity range (VLSR) of maser spots. +Article number, page 10 of \ No newline at end of file diff --git a/read/results/pdfium/2201.00022.txt b/read/results/pdfium/2201.00022.txt index 835736a..49255d3 100644 --- a/read/results/pdfium/2201.00022.txt +++ b/read/results/pdfium/2201.00022.txt @@ -1,1100 +1,1098 @@ -Draft version July 7, 2022 -Typeset using LATEX twocolumn style in AASTeX631 -The Formation of Intermediate Mass Black Holes in Galactic Nuclei -Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3 -1Department of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA -2Mani L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA -3Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel -ABSTRACT -Most stellar evolution models predict that black holes (BHs) should not exist above approximately -50 − 70 M , the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections -indicate the existence of BHs with masses at and above this threshold. We suggest that massive -BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions -between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical -processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite -efficient, forming IMBHs as massive as 104 M . This upper limit assumes that (1) the BHs accrete a -substantial fraction of the stellar mass captured during each collision and (2) that the rate at which -new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar -disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our -results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic -centers. This formation channel has implications for observations. Collisions between stars and BHs -can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. -Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge -with the supermassive black hole at the center of a galactic nucleus through gravitational waves. -These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, -respectively). -1. INTRODUCTION -The recently detected gravitational wave source -GW190521 (The LIGO Scientific Collaboration et al. -2020a,b) produced an intermediate mass black hole of -approximately 142 M . This event may have also had a -85 M progenitor, which falls within the pair-instability -mass gap that limits stellar black holes (BHs) to no -more than ∼ -< 50 M (e.g., Heger et al. 2003; Woosley -2017) -1 -. Similarly, the merger products of GW150914, -GW170104, and GW170814 fall within the mass gap -(e.g., Abbott et al. 2016, 2017a,b). BH mergers that -form second generation BHs and, in some cases, inter￾mediate mass BHs (IMBHs), these gravitational wave -(GW) events can occur in globular clusters, young stel￾Corresponding author: Sanaea C. Rose -srose@astro.ucla.edu -1 Note that the exact lower and upper limits may be sensitive to -metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli -2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski -et al. 2020a; Renzo et al. 2020; Vink et al. 2021). -lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro￾driguez et al. 2019; Fishbach et al. 2020; Mapelli et al. -2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. -2021; Arca Sedda et al. 2021). However, IMBHs are -not limited to these locations and may reside in galac￾tic nuclei as well. Several studies propose that our -own galactic center may host an IMBH in the inner pc -(e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004; -G¨urkan & Rasio 2005; Gualandris & Merritt 2009; Chen -& Liu 2013; Generozov & Madigan 2020; Fragione et al. -2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY -Collaboration et al. 2020). -Several IMBH formation channels have been suggested -in the literature. For example, IMBHs may have a cos￾mological origin, forming in the early universe either -as a result of the very first stars (e.g., Madau & Rees -2001; Schneider et al. 2002; Johnson & Bromm 2007; -Valiante et al. 2016) or from direct collapse of accumu￾lated gas (e.g., Begelman et al. 2006; Yue et al. 2014; -Ferrara et al. 2014; Choi et al. 2015; Shlosman et al. -2016). These high redshift IMBHs would need to sur￾vive galaxy evolution and mergers to present day (e.g., +Draft version July 7, 2022 +Typeset using LATEX twocolumn style in AASTeX631 +The Formation of Intermediate Mass Black Holes in Galactic Nuclei +Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3 +1Department of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA +2Mani L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA +3Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel +ABSTRACT +Most stellar evolution models predict that black holes (BHs) should not exist above approximately +50 − 70 M , the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections +indicate the existence of BHs with masses at and above this threshold. We suggest that massive +BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions +between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical +processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite +efficient, forming IMBHs as massive as 104 M . This upper limit assumes that (1) the BHs accrete a +substantial fraction of the stellar mass captured during each collision and (2) that the rate at which +new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar +disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our +results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic +centers. This formation channel has implications for observations. Collisions between stars and BHs +can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. +Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge +with the supermassive black hole at the center of a galactic nucleus through gravitational waves. +These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, +respectively). +1. INTRODUCTION +The recently detected gravitational wave source +GW190521 (The LIGO Scientific Collaboration et al. +2020a,b) produced an intermediate mass black hole of +approximately 142 M . This event may have also had a +85 M progenitor, which falls within the pair-instability +mass gap that limits stellar black holes (BHs) to no +more than ∼ +< 50 M (e.g., Heger et al. 2003; Woosley +2017) +1 +. Similarly, the merger products of GW150914, +GW170104, and GW170814 fall within the mass gap +(e.g., Abbott et al. 2016, 2017a,b). BH mergers that +form second generation BHs and, in some cases, intermediate mass BHs (IMBHs), these gravitational wave +(GW) events can occur in globular clusters, young stelCorresponding author: Sanaea C. Rose +srose@astro.ucla.edu +1 Note that the exact lower and upper limits may be sensitive to +metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli +2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski +et al. 2020a; Renzo et al. 2020; Vink et al. 2021). +lar clusters, or the field (e.g., Rodriguez et al. 2018; Rodriguez et al. 2019; Fishbach et al. 2020; Mapelli et al. +2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. +2021; Arca Sedda et al. 2021). However, IMBHs are +not limited to these locations and may reside in galactic nuclei as well. Several studies propose that our +own galactic center may host an IMBH in the inner pc +(e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004; +G¨urkan & Rasio 2005; Gualandris & Merritt 2009; Chen +& Liu 2013; Generozov & Madigan 2020; Fragione et al. +2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY +Collaboration et al. 2020). +Several IMBH formation channels have been suggested +in the literature. For example, IMBHs may have a cosmological origin, forming in the early universe either +as a result of the very first stars (e.g., Madau & Rees +2001; Schneider et al. 2002; Johnson & Bromm 2007; +Valiante et al. 2016) or from direct collapse of accumulated gas (e.g., Begelman et al. 2006; Yue et al. 2014; +Ferrara et al. 2014; Choi et al. 2015; Shlosman et al. +2016). These high redshift IMBHs would need to survive galaxy evolution and mergers to present day (e.g., arXiv:2201.00022v2 [astro-ph.GA] 6 Jul 2022 -2 Rose et al. -Rashkov & Madau 2014), with significant effects on their -stellar and even dark matter surroundings (e.g., Bertone -et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda -et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another -popular formation channel relies on the coalescence of -many stellar-mass black holes, which may seed objects -as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs -may form in the centers of globular clusters, where few￾body interactions lead to the merger of stellar-mass BHs -(e.g., O’Leary et al. 2006; G¨urkan et al. 2006; Blecha -et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Ro￾driguez et al. 2018; Rodriguez et al. 2019; Fragione et al. -2020b). Other formation mechanisms invoke successive -collisions and mergers of massive stars (e.g., Ebisuzaki -et al. 2001; Portegies Zwart & McMillan 2002; Portegies -Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017; -Kremer et al. 2020; Gonz´alez et al. 2021; Di Carlo et al. -2021; Das et al. 2021a,b; Escala 2021). -The main obstacle to sequential BH mergers in clus￾ters is that the merger recoil velocity kick often exceeds -the escape velocity from the cluster (e.g., Schnittman -& Buonanno 2007; Centrella et al. 2010; O’Leary et al. -2006; Baibhav et al. 2020, Rom & Sari, in prep.). How￾ever, nuclear star clusters at the centers of galaxies do -not encounter this problem. For example, Fragione et al. -(2021) explore repeated BH-BH mergers in nuclear star -clusters without a SMBH. They considered BH binary￾single interactions, binary BH GW merger, and GW -merger recoil kicks. The post-kick merger product sinks -back towards the cluster center over a dynamical fric￾tion timescale. Using this approach, they showed that -103 − 104 M IMBHs can form efficiently over the life￾time of a cluster. -However, as discussed in Section 2.2, direct BH-star -collisions are much more frequent than BH-BH collision -in galactic nuclei, making the former a promising chan￾nel for BH growth. In an N-body study of young star -clusters, Rizzuto et al. (2022) find that BH-star colli￾sions are a main contributor to the formation of BHs -in the mass gap and IMBHs. In a similar vein, Stone -et al. (2017) demonstrate that massive BHs can form -from repeated tidal encounters between stars and BHs. -More generally, several studies have explored the role of -collisions in a GN, with implications for the stellar and -red giant populations (e.g., Dale & Davies 2006; Dale -et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti -et al. 2021). We propose that IMBHs can form naturally -within the central pc of a galactic center through re￾peated collisions between BHs and main sequence stars. -During a collision, the BH can accrete some portion of -the star’s mass. Over many collisions, it can grow ap￾preciably in size. We demonstrate that this channel can -create IMBHs with masses as large as 104 M , an upper -limit that depends on the density profile of the surround￾ing stars and the efficiency of the accretion. -The paper is structured as follows: we describe rele￾vant physical processes and our approach in Section 2. -In particular, we provide an overview of collisions in -Section 2.2 and present our statistical approach in Sec￾tion 2.3. Section 2.4 discusses our treatment of the -mass growth with each collision and presents analytic -solutions to our equations in two different regimes, ef￾ficient collisions and inefficient collisions We compare -these solutions to our statistical results. Sections 2.6 -and 2.8 discuss implications for GW merger events be￾tween IMBHs and the SMBH. We then incorporate re￾laxation processes and discuss the subsequent results in -Section 2.9. Finally, we discuss and summarize our find￾ings in Section 3. -2. METHODOLOGY -We consider a population of stellar mass BHs embed￾ded in a cluster of 1 M stars. When stars and BHs -collide, the BHs can accrete mass. The growth rate de￾pends on the physical processes outlined below. We use -a statistical approach to estimate the stellar encounters -and final IMBH masses. -2.1. Physical Picture -We consider a population of BHs within the inner few -parsecs of the SMBH in a galactic nucleus (GN). We as￾sume that the BH mass distribution follows that of the -stars from which they originate, a Kroupa initial mass -function dN/dm ∝ m−2.35. While this choice represents -a gross oversimplification, it has very little bearing on -our final results. Future work may address the particu￾lars of the BH mass distribution, but we do not expect -that it will significantly alter the outcome. The upper -and lower limits of the BH mass distribution are 5 and -50 M , respectively. We select the upper limit to en￾compass the range of upper bounds predicted by stellar -evolution models, which vary between 40 and 125 M -depending on the metallicity (Heger et al. 2003; Woosley -2017; Spera & Mapelli 2017b; Limongi & Chieffi 2018b; -Belczynski et al. 2020b; Renzo et al. 2020). We assume -that the orbits of the BHs follow a thermal eccentricity -distribution. We draw their semimajor axes, a•, from a -uniform distribution in log distance, dN/d(log r) being -constant. While this distribution is not necessarily rep￾resentative of actual conditions in the GN, we use it to -build a comprehensive physical picture of BH growth at -all distances from the SMBH, including within 0.01 pc. -Otherwise, the innermost region of the GN would be + Rose et al. +Rashkov & Madau 2014), with significant effects on their +stellar and even dark matter surroundings (e.g., Bertone +et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda +et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another +popular formation channel relies on the coalescence of +many stellar-mass black holes, which may seed objects +as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs +may form in the centers of globular clusters, where fewbody interactions lead to the merger of stellar-mass BHs +(e.g., O’Leary et al. 2006; G¨urkan et al. 2006; Blecha +et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Rodriguez et al. 2018; Rodriguez et al. 2019; Fragione et al. +2020b). Other formation mechanisms invoke successive +collisions and mergers of massive stars (e.g., Ebisuzaki +et al. 2001; Portegies Zwart & McMillan 2002; Portegies +Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017; +Kremer et al. 2020; Gonz´alez et al. 2021; Di Carlo et al. +2021; Das et al. 2021a,b; Escala 2021). +The main obstacle to sequential BH mergers in clusters is that the merger recoil velocity kick often exceeds +the escape velocity from the cluster (e.g., Schnittman +& Buonanno 2007; Centrella et al. 2010; O’Leary et al. +2006; Baibhav et al. 2020, Rom & Sari, in prep.). However, nuclear star clusters at the centers of galaxies do +not encounter this problem. For example, Fragione et al. +(2021) explore repeated BH-BH mergers in nuclear star +clusters without a SMBH. They considered BH binarysingle interactions, binary BH GW merger, and GW +merger recoil kicks. The post-kick merger product sinks +back towards the cluster center over a dynamical friction timescale. Using this approach, they showed that +103 − 104 M IMBHs can form efficiently over the lifetime of a cluster. +However, as discussed in Section 2.2, direct BH-star +collisions are much more frequent than BH-BH collision +in galactic nuclei, making the former a promising channel for BH growth. In an N-body study of young star +clusters, Rizzuto et al. (2022) find that BH-star collisions are a main contributor to the formation of BHs +in the mass gap and IMBHs. In a similar vein, Stone +et al. (2017) demonstrate that massive BHs can form +from repeated tidal encounters between stars and BHs. +More generally, several studies have explored the role of +collisions in a GN, with implications for the stellar and +red giant populations (e.g., Dale & Davies 2006; Dale +et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti +et al. 2021). We propose that IMBHs can form naturally +within the central pc of a galactic center through repeated collisions between BHs and main sequence stars. +During a collision, the BH can accrete some portion of +the star’s mass. Over many collisions, it can grow appreciably in size. We demonstrate that this channel can +create IMBHs with masses as large as 104 M , an upper +limit that depends on the density profile of the surrounding stars and the efficiency of the accretion. +The paper is structured as follows: we describe relevant physical processes and our approach in Section 2. +In particular, we provide an overview of collisions in +Section 2.2 and present our statistical approach in Section 2.3. Section 2.4 discusses our treatment of the +mass growth with each collision and presents analytic +solutions to our equations in two different regimes, efficient collisions and inefficient collisions We compare +these solutions to our statistical results. Sections 2.6 +and 2.8 discuss implications for GW merger events between IMBHs and the SMBH. We then incorporate relaxation processes and discuss the subsequent results in +Section 2.9. Finally, we discuss and summarize our findings in Section 3. +2. METHODOLOGY +We consider a population of stellar mass BHs embedded in a cluster of 1 M stars. When stars and BHs +collide, the BHs can accrete mass. The growth rate depends on the physical processes outlined below. We use +a statistical approach to estimate the stellar encounters +and final IMBH masses. +2.1. Physical Picture +We consider a population of BHs within the inner few +parsecs of the SMBH in a galactic nucleus (GN). We assume that the BH mass distribution follows that of the +stars from which they originate, a Kroupa initial mass +function dN/dm ∝ m−2.35. While this choice represents +a gross oversimplification, it has very little bearing on +our final results. Future work may address the particulars of the BH mass distribution, but we do not expect +that it will significantly alter the outcome. The upper +and lower limits of the BH mass distribution are 5 and +50 M , respectively. We select the upper limit to encompass the range of upper bounds predicted by stellar +evolution models, which vary between 40 and 125 M +depending on the metallicity (Heger et al. 2003; Woosley +2017; Spera & Mapelli 2017b; Limongi & Chieffi 2018b; +Belczynski et al. 2020b; Renzo et al. 2020). We assume +that the orbits of the BHs follow a thermal eccentricity +distribution. We draw their semimajor axes, a•, from a +uniform distribution in log distance, dN/d(log r) being +constant. While this distribution is not necessarily representative of actual conditions in the GN, we use it to +build a comprehensive physical picture of BH growth at +all distances from the SMBH, including within 0.01 pc. +Otherwise, the innermost region of the GN would be poorly represented in our sample. We consider other -IMBH Formation in Galactic Nuclei 3 -Figure 1. We plot the relevant timescales, including col￾lision (green), relaxation (gold), and BH-BH GW capture -(purple), for a single BH in the GN as a function of distance -from the SMBH. For the collision timescale, we assume the -BH is on a circular orbit. The timescales depend on the -density, so we adopt a range of density profiles, bounded by -α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark -blue line represents the time for a 105 M BH to merge with -the SMBH through GW emission. -observationally motivated distributions in Section 2.9, -but reserve a more detailed examination of the distribu￾tion’s impact for future work. -2.2. Direct Collisions -BHs in the GN can undergo direct collisions with other -objects. The timescale for this process, tcoll, can be es￾timated using a simple rate calculation: t -−1 -coll = nσA, -where n is the number density of objects, σ is the ve￾locity dispersion, and A is the cross-section. We use the -collision timescale from Rose et al. (2020): -t -−1 -coll =πn(a•)σ(a•) -× - -f1(e•)r -2 -c + f2(e•)rc -2G(mBH + m?) -σ(a•) -2 - -. (1) -where G is the gravitational constant and rc is the sum -of the radii of the interacting objects, a black hole with -mass mBH and a star with mass m?. Detailed in Rose -et al. (2020), f1(e•) and f2(e•) account for the effect of -the eccentricity of the BH’s orbit about the SMBH on -the collision rate, while n and σ are simply evaluated -at the semimajor axis of the orbit (see below). Note -that this timescale equation includes the effects of grav￾itational focusing, which enhances the cross-section of -interaction. -Assuming a circular orbit for simplicity, we plot the -timescale for a BH orbiting in the GN to collide with -a 1 M star as a function of distance from the SMBH -in Figure 1. -2 As this timescale depends on the density -of surrounding stars, we adopt a density profile of the -form: -ρ(r•) = ρ0 - -r• -r0 -−α -, (2) -where r• denotes the distance from the SMBH. We adopt -a SMBH mass of 4 × 106 M such that our fiducial GN -matches our own galactic center (e.g., Ghez et al. 2005; -Genzel et al. 2003). In this case, the normalization in -Eq. (2) is ρ0 = 1.35×106 M /pc3 at r0 = 0.25 pc (Gen￾zel et al. 2010). Additionally, in Eq. (2), α gives the -slope of the power law. We assume that a uniform pop￾ulation of solar mass stars account for most of the mass -in the GN, making the stellar number density: -n(r•) = ρ(r•) -1 M -. (3) -The collision timescale also depends on the velocity dis￾persion, which we express as: -σ(r•) = s -GM• -r•(1 + α) -, (4) -where α is the slope of the density profile and M• de￾notes the mass of the SMBH (Alexander 1999; Alexan￾der & Pfuhl 2014). As mentioned above, Eq. (1) depends -on the sum of the radii of the colliding objects, rc. We -take rc = 1 R because these interactions involve a BH -and a star, and the former has a much smaller physi￾cal cross-section. For example, the Schwarzschild radius -of a 10 M BH is only 30 km, or 4.31 × 10−5 R . For -this reason, direct collisions between compact objects -are very rare and not included in our model. -We note that direct collisions between BHs, via GW -emission, were shown to be efficient in nuclear star clus￾ters without SMBHs (e.g., Portegies Zwart & McMil￾lan 2000; O’Leary et al. 2006; Rodriguez et al. 2016). -However, in the GN, star-BH collisions are much more -frequent than direct BH-BH collisions. As depicted in -Figure 1, the star-BH collision timescale for a range -of density profiles is many orders of magnitude shorter -than the BH-BH GW collision timescale (for the rele￾vant equations, see O’Leary et al. 2009; Gond´an et al. -2018, for example). Thus, we expect that star-BH col￾lisions will be the main driver of IMBH growth in the -GN. -2 We note that the eccentricity has a very minor effect on the +IMBH Formation in Galactic Nuclei 3 +Figure 1. We plot the relevant timescales, including collision (green), relaxation (gold), and BH-BH GW capture +(purple), for a single BH in the GN as a function of distance +from the SMBH. For the collision timescale, we assume the +BH is on a circular orbit. The timescales depend on the +density, so we adopt a range of density profiles, bounded by +α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark +blue line represents the time for a 105 M BH to merge with +the SMBH through GW emission. +observationally motivated distributions in Section 2.9, +but reserve a more detailed examination of the distribution’s impact for future work. +2.2. Direct Collisions +BHs in the GN can undergo direct collisions with other +objects. The timescale for this process, tcoll, can be estimated using a simple rate calculation: t +−1 +coll = nσA, +where n is the number density of objects, σ is the velocity dispersion, and A is the cross-section. We use the +collision timescale from Rose et al. (2020): +t +−1 +coll =πn(a•)σ(a•) +× + +f1(e•)r +2 +c + f2(e•)rc +2G(mBH + m?) +σ(a•) +2 + +. (1) +where G is the gravitational constant and rc is the sum +of the radii of the interacting objects, a black hole with +mass mBH and a star with mass m?. Detailed in Rose +et al. (2020), f1(e•) and f2(e•) account for the effect of +the eccentricity of the BH’s orbit about the SMBH on +the collision rate, while n and σ are simply evaluated +at the semimajor axis of the orbit (see below). Note +that this timescale equation includes the effects of gravitational focusing, which enhances the cross-section of +interaction. +Assuming a circular orbit for simplicity, we plot the +timescale for a BH orbiting in the GN to collide with +a 1 M star as a function of distance from the SMBH +in Figure 1. +2 As this timescale depends on the density +of surrounding stars, we adopt a density profile of the +form: +ρ(r•) = ρ0 + +r• +r0 +−α +, (2) +where r• denotes the distance from the SMBH. We adopt +a SMBH mass of 4 × 106 M such that our fiducial GN +matches our own galactic center (e.g., Ghez et al. 2005; +Genzel et al. 2003). In this case, the normalization in +Eq. (2) is ρ0 = 1.35×106 M /pc3 at r0 = 0.25 pc (Genzel et al. 2010). Additionally, in Eq. (2), α gives the +slope of the power law. We assume that a uniform population of solar mass stars account for most of the mass +in the GN, making the stellar number density: +n(r•) = ρ(r•) +1 M +. (3) +The collision timescale also depends on the velocity dispersion, which we express as: +σ(r•) = s +GM• +r•(1 + α) +, (4) +where α is the slope of the density profile and M• denotes the mass of the SMBH (Alexander 1999; Alexander & Pfuhl 2014). As mentioned above, Eq. (1) depends +on the sum of the radii of the colliding objects, rc. We +take rc = 1 R because these interactions involve a BH +and a star, and the former has a much smaller physical cross-section. For example, the Schwarzschild radius +of a 10 M BH is only 30 km, or 4.31 × 10−5 R . For +this reason, direct collisions between compact objects +are very rare and not included in our model. +We note that direct collisions between BHs, via GW +emission, were shown to be efficient in nuclear star clusters without SMBHs (e.g., Portegies Zwart & McMillan 2000; O’Leary et al. 2006; Rodriguez et al. 2016). +However, in the GN, star-BH collisions are much more +frequent than direct BH-BH collisions. As depicted in +Figure 1, the star-BH collision timescale for a range +of density profiles is many orders of magnitude shorter +than the BH-BH GW collision timescale (for the relevant equations, see O’Leary et al. 2009; Gond´an et al. +2018, for example). Thus, we expect that star-BH collisions will be the main driver of IMBH growth in the +GN. +2 We note that the eccentricity has a very minor effect on the collision timescale (Rose et al. 2020). -4 Rose et al. -2.3. Statistical Approach to Collisions -We simulate the mass growth of a population of BHs -with initial conditions detailed in Section 2.1. Over an -increment ∆t of 106 yr, we calculate the probability of -a collision occurring, given by ∆t/tcoll. This choice of -∆t is motivated by our galactic center’s star formation -timescale (e.g., Lu et al. 2009), allowing for regular re￾plenishment of the stellar population in the GN. We have -checked that the results are not sensitive to this choice -of ∆t, omitted here to avoid clutter. We draw a number -between 0 and 1 using a random number generator. If -that number is less than or equal to the probability, we -increase the BH’s mass by ∆m, the mass that the BH is -expected to accrete in a single collision (see Section 2.4 -for details). We recalculate the collision timescale using -the updated BH mass and repeat this process until the -time elapsed equals the simulation time of 10 Gyr3 -. -2.4. Mass Growth -When a BH collides with a star, it may accrete ma￾terial and grow in mass. The details of the accretion -depend on the relative velocity between the BH and -star. For simplicity, this calculation assumes that the -two objects experience a head on collision, with the BH -passing through the star’s center. We begin by con￾sidering the escape velocity from the BH at the star’s -outermost point, its surface, which corresponds to the -maximum impact parameter 1 R . Qualitatively, one -might expect that the BH could capture the entire star -(i.e., ∆m ∼ 1 M ) if the relative velocity is smaller than -the escape velocity from the BH at this point. However, -in the vicinity of the SMBH, the dispersion velocity of -the stars may be much larger than the escape velocity -from the BH at the star’s surface. In this case, the BH -captures a “tunnel” of material through the star. This -tunnel has radius equal to the Bondi radius and length -approximately 1 R . For the purposes of this study, we -assume that the BH accretes all of the material that -it captures. The details of the accretion are uncertain, -however, and it may be much less efficient than our re￾sults imply. We discuss accretion in Section 2.5. -To estimate ∆m, we begin with the Bondi-Hoyle ac￾cretion rate, ˙m, given by: -m˙ = -4πG2m2 -BHρstar -(c -2 -s + σ -2) -3/2 -, (5) -3 Closer to the SMBH, ∆t may exceed the collision timescale by -a factor of a few for steep density profiles. We include a safe￾guard in our code which takes the ratio tcoll/∆t and rounds it -to the nearest integer. We take this integer to be the number of -collisions and increase the BH mass accordingly. -Figure 2. We consider an example that highlights the mass -growth as a function of distance from the SMBH. Grey dots -represent the initial masses and distances from the SMBH -of the BHs involved in the simulation. For simplicity, we set -the inital mass equal to 10 M for all of the BHs. Assuming -the density profile of stars has α = 1, we consider two cases: -BHs accrete all of the star’s mass during a collision (red) and -only a portion of the star’s mass is accreted during a collision -given by Eq. 6 (blue). The latter case results in less growth -closer to the SMBH where the velocity dispersion becomes -high. The shaded regions and dashed lines represent the -analytical predictions detailed in Section 2.4. -where cs is the speed of sound in the star and ρstar is its -density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima -et al. 1985; Edgar 2004, see latter for a review). We -approximate the density as 1 M /(4πR3 - /3) and take -the conservative value of cs = 500 km s−1 -, which is -consistent with the sound speed inside a 1 M star -(Christensen-Dalsgaard et al. 1996) and allows us to set -a lower limit on ∆m. To find ∆m, at each collision, we -have: -∆m = min( ˙m × t?,cross, 1 M ) , (6) -where t?,cross ∼ R /σ is the crossing time of the BH in -the star. We take the minimum between ˙m×t?,cross and -1 M because the BH cannot accrete more mass than -one star at each collision. -Figure 2 juxtaposes the expected growth using Bondi￾Hoyle-Lyttleton accretion (blue small points) with a -much simpler model in which the BH accretes the star’s -entire mass, 1 M (red large points). Both examples -start with identical populations of 10 M BHs (grey) -and simulate growth through collisions using a statisti￾cal approach. As the BHs grow, the collision timescale, -which depends on mBH, decreases. Simultaneously, -∆m, which also depends on mBH, increases. The re￾sult is exponential growth (see discussion and details -surrounding Eq. (8)). In Figure 2, however, the simula￾tions assume α = 1 for the stellar density profile, ensur￾ing the collision timescale is long compared to the sim- -IMBH Formation in Galactic Nuclei 5 -ulation time, 10 Gyr. Therefore, the BHs grow slowly, -and their final masses can be approximated using the -following equation: -mfinal(tcoll → const.) = minitial + ∆m -T -tcoll -, (7) -in which T represents the simulation time and ∆m and -tcoll remain constant, approximated as their initial val￾ues. -This equation is plotted in Figure 2 for both cases, -∆m = 1 M (red) and ∆m from Bondi-Hoyle-Lyttleton -accretion (blue), and the curves coincide with the cor￾responding simulated results. The shaded regions rep￾resent one standard deviation from Eq. (7), calculated -using the square root of the number of collisions, T /tcoll. -As indicated by the results in red, in the absence of -Bondi-Hoyle-Lyttleton accretion, the BHs closest to the -SMBH experience the most growth because they have -shorter collision timescales. However, Bondi-Hoyle￾Lyttleton accretion becomes important closer to the -SMBH, where the velocity dispersion is large compared -with the stars’ escape velocity, and curtails the mass -growth for BHs in this region. Outside of 10−2 pc, a BH -consumes the star’s entire mass: the accretion-limited -∆m governed by Eq. (7) is greater than or equal to the -star’s mass. -Eq. 7 does not apply for other values of α. When the -collision timescale is shorter, corresponding to a larger -index α in the density profile (see Figure 1), the growth -is very efficient and ∆m quickly approaches 1 M . Con￾sequently, while we can now assume ∆m = 1 M , we -can no longer assume the collision timescale is constant. -The final mass grows exponentially as a result. For -∆m = 1M , the general solution is reached by solving -the differential equation dm/dt = 1 M /tcoll(m), which -gives: -mfinal(∆m → 1 M ) =−A + (minitial + A) e -CT (8) -where A = σ -2Rstar/G and C = 2πGnstarRstar/σ. As an -example, we plot this curve in purple for the α = 2 case, -in Figure 3, which agrees with the simulated masses. -2.5. Uncertainties in Accretion -We note that the ∆M calculated in this proof-of￾concept study assumes that the BH accretes all of the -material that it captures. Estimating the true fraction -of the material accreted by the BH is very challeng￾ing; this complex problem requires numerically solving -the generalized GR fluid equations with cooling, heat￾ing, and radiative transfer, etc. and remains an active -field of research (e.g., Blandford & Begelman 1999; Park -& Ostriker 2001; Narayan et al. 2003; Igumenshchev -et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang -et al. 2014; McKinney et al. 2014; Narayan et al. 2022). -Heuristically, if a collision between a BH and a star re￾sults in an accretion disk, the disk’s viscous timescale -may be as low as days. The resultant luminosity can -unbind most of the captured material, though details -such as the amount accreted and peak luminosity re￾main uncertain (e.g., Yuan et al. (2012); Jiang et al. -(2014), see also the discussion in Stone et al. (2017), -Rizzuto et al. (2022), and Kremer et al. (2022)). The -question becomes whether or not a BH can still accu￾mulate significant amounts of mass over many collisions -even if it accretes very little in a single one. We ex￾plore the viability of our channel using a physically mo￾tivated inefficient accretion model. Several studies have -invoked momentum-driven winds in BH accretion (e.g., -Murray et al. 2005; Ostriker et al. 2010; Brennan et al. -2018). We thus estimate the fraction of captured mass -accreted to be approximately vesc/(cη), where vesc is -the escape velocity from the BH at 1 R and η is the -accretion efficiency at the ISCO. We take η to be 0.1 -(e.g., Yu & Tremaine 2002). This expression for the -fraction accreted is consistent with Kremer et al. (2022) -equation 19 for s = 0.5, which is a reasonable value for -s, a free parameter between 0.2 and 0.8. We discuss -the results of the momentum-driven winds estimate in -Section 3. We note that the accretion process may be -more efficient than this estimate implies if, for example, -jets or other instabilities result in the beaming of radi￾ation away from the captured material (e.g., Blandford -& Znajek 1977; Begelman 1979; De Villiers et al. 2005; -McKinney & Gammie 2004; McKinney 2006; Igumen￾shchev 2008; Begelman 2012a,b; McKinney et al. 2014). -2.6. GW Inspiral -When a BH is close to the SMBH, GW emission can -circularize and shrink its orbit. We implement the ef￾fects of GW emission on the BH’s semimajor axis and -eccentricity following Peters & Mathews (1963a). The -characteristic timescale to merge a BH with an SMBH -is given by: -tGW ≈2.9 × 1012 yr  -M• -106 M -−1  -mBH -106 M -−1 -× - -M• + mBH -2 × 106 M -−1  -a• -10−2 pc4 -×f(e•)(1 − e -2 -• -) -7/2 -, (9) -where f(e•) is a function of e•. For all values of e•, -f(e•) is between 0.979 and 1.81 (Blaes et al. 2002). We -plot this timescale for a 1 × 105 M BH in Figure 1 in + Rose et al. +2.3. Statistical Approach to Collisions +We simulate the mass growth of a population of BHs +with initial conditions detailed in Section 2.1. Over an +increment ∆t of 106 yr, we calculate the probability of +a collision occurring, given by ∆t/tcoll. This choice of +∆t is motivated by our galactic center’s star formation +timescale (e.g., Lu et al. 2009), allowing for regular replenishment of the stellar population in the GN. We have +checked that the results are not sensitive to this choice +of ∆t, omitted here to avoid clutter. We draw a number +between 0 and 1 using a random number generator. If +that number is less than or equal to the probability, we +increase the BH’s mass by ∆m, the mass that the BH is +expected to accrete in a single collision (see Section 2.4 +for details). We recalculate the collision timescale using +the updated BH mass and repeat this process until the +time elapsed equals the simulation time of 10 Gyr3. +2.4. Mass Growth +When a BH collides with a star, it may accrete material and grow in mass. The details of the accretion +depend on the relative velocity between the BH and +star. For simplicity, this calculation assumes that the +two objects experience a head on collision, with the BH +passing through the star’s center. We begin by considering the escape velocity from the BH at the star’s +outermost point, its surface, which corresponds to the +maximum impact parameter 1 R . Qualitatively, one +might expect that the BH could capture the entire star +(i.e., ∆m ∼ 1 M ) if the relative velocity is smaller than +the escape velocity from the BH at this point. However, +in the vicinity of the SMBH, the dispersion velocity of +the stars may be much larger than the escape velocity +from the BH at the star’s surface. In this case, the BH +captures a “tunnel” of material through the star. This +tunnel has radius equal to the Bondi radius and length +approximately 1 R . For the purposes of this study, we +assume that the BH accretes all of the material that +it captures. The details of the accretion are uncertain, +however, and it may be much less efficient than our results imply. We discuss accretion in Section 2.5. +To estimate ∆m, we begin with the Bondi-Hoyle accretion rate, ˙m, given by: +m˙ = +4πG2m2 +BHρstar +(c +2 +s + σ +2) +3/2 +, (5) +3 Closer to the SMBH, ∆t may exceed the collision timescale by +a factor of a few for steep density profiles. We include a safeguard in our code which takes the ratio tcoll/∆t and rounds it +to the nearest integer. We take this integer to be the number of +collisions and increase the BH mass accordingly. +Figure 2. We consider an example that highlights the mass +growth as a function of distance from the SMBH. Grey dots +represent the initial masses and distances from the SMBH +of the BHs involved in the simulation. For simplicity, we set +the inital mass equal to 10 M for all of the BHs. Assuming +the density profile of stars has α = 1, we consider two cases: +BHs accrete all of the star’s mass during a collision (red) and +only a portion of the star’s mass is accreted during a collision +given by Eq. 6 (blue). The latter case results in less growth +closer to the SMBH where the velocity dispersion becomes +high. The shaded regions and dashed lines represent the +analytical predictions detailed in Section 2.4. +where cs is the speed of sound in the star and ρstar is its +density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima +et al. 1985; Edgar 2004, see latter for a review). We +approximate the density as 1 M /(4πR3 + /3) and take +the conservative value of cs = 500 km s−1, which is +consistent with the sound speed inside a 1 M star +(Christensen-Dalsgaard et al. 1996) and allows us to set +a lower limit on ∆m. To find ∆m, at each collision, we +have: +∆m = min( ˙m × t?,cross, 1 M ) , (6) +where t?,cross ∼ R /σ is the crossing time of the BH in +the star. We take the minimum between ˙m×t?,cross and +1 M because the BH cannot accrete more mass than +one star at each collision. +Figure 2 juxtaposes the expected growth using BondiHoyle-Lyttleton accretion (blue small points) with a +much simpler model in which the BH accretes the star’s +entire mass, 1 M (red large points). Both examples +start with identical populations of 10 M BHs (grey) +and simulate growth through collisions using a statistical approach. As the BHs grow, the collision timescale, +which depends on mBH, decreases. Simultaneously, +∆m, which also depends on mBH, increases. The result is exponential growth (see discussion and details +surrounding Eq. (8)). In Figure 2, however, the simulations assume α = 1 for the stellar density profile, ensuring the collision timescale is long compared to the sim- +IMBH Formation in Galactic Nuclei 5 +ulation time, 10 Gyr. Therefore, the BHs grow slowly, +and their final masses can be approximated using the +following equation: +mfinal(tcoll → const.) = minitial + ∆m +T +tcoll +, (7) +in which T represents the simulation time and ∆m and +tcoll remain constant, approximated as their initial values. +This equation is plotted in Figure 2 for both cases, +∆m = 1 M (red) and ∆m from Bondi-Hoyle-Lyttleton +accretion (blue), and the curves coincide with the corresponding simulated results. The shaded regions represent one standard deviation from Eq. (7), calculated +using the square root of the number of collisions, T /tcoll. +As indicated by the results in red, in the absence of +Bondi-Hoyle-Lyttleton accretion, the BHs closest to the +SMBH experience the most growth because they have +shorter collision timescales. However, Bondi-HoyleLyttleton accretion becomes important closer to the +SMBH, where the velocity dispersion is large compared +with the stars’ escape velocity, and curtails the mass +growth for BHs in this region. Outside of 10−2 pc, a BH +consumes the star’s entire mass: the accretion-limited +∆m governed by Eq. (7) is greater than or equal to the +star’s mass. +Eq. 7 does not apply for other values of α. When the +collision timescale is shorter, corresponding to a larger +index α in the density profile (see Figure 1), the growth +is very efficient and ∆m quickly approaches 1 M . Consequently, while we can now assume ∆m = 1 M , we +can no longer assume the collision timescale is constant. +The final mass grows exponentially as a result. For +∆m = 1M , the general solution is reached by solving +the differential equation dm/dt = 1 M /tcoll(m), which +gives: +mfinal(∆m → 1 M ) =−A + (minitial + A) e +CT (8) +where A = σ +2Rstar/G and C = 2πGnstarRstar/σ. As an +example, we plot this curve in purple for the α = 2 case, +in Figure 3, which agrees with the simulated masses. +2.5. Uncertainties in Accretion +We note that the ∆M calculated in this proof-ofconcept study assumes that the BH accretes all of the +material that it captures. Estimating the true fraction +of the material accreted by the BH is very challenging; this complex problem requires numerically solving +the generalized GR fluid equations with cooling, heating, and radiative transfer, etc. and remains an active +field of research (e.g., Blandford & Begelman 1999; Park +& Ostriker 2001; Narayan et al. 2003; Igumenshchev +et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang +et al. 2014; McKinney et al. 2014; Narayan et al. 2022). +Heuristically, if a collision between a BH and a star results in an accretion disk, the disk’s viscous timescale +may be as low as days. The resultant luminosity can +unbind most of the captured material, though details +such as the amount accreted and peak luminosity remain uncertain (e.g., Yuan et al. (2012); Jiang et al. +(2014), see also the discussion in Stone et al. (2017), +Rizzuto et al. (2022), and Kremer et al. (2022)). The +question becomes whether or not a BH can still accumulate significant amounts of mass over many collisions +even if it accretes very little in a single one. We explore the viability of our channel using a physically motivated inefficient accretion model. Several studies have +invoked momentum-driven winds in BH accretion (e.g., +Murray et al. 2005; Ostriker et al. 2010; Brennan et al. +2018). We thus estimate the fraction of captured mass +accreted to be approximately vesc/(cη), where vesc is +the escape velocity from the BH at 1 R and η is the +accretion efficiency at the ISCO. We take η to be 0.1 +(e.g., Yu & Tremaine 2002). This expression for the +fraction accreted is consistent with Kremer et al. (2022) +equation 19 for s = 0.5, which is a reasonable value for +s, a free parameter between 0.2 and 0.8. We discuss +the results of the momentum-driven winds estimate in +Section 3. We note that the accretion process may be +more efficient than this estimate implies if, for example, +jets or other instabilities result in the beaming of radiation away from the captured material (e.g., Blandford +& Znajek 1977; Begelman 1979; De Villiers et al. 2005; +McKinney & Gammie 2004; McKinney 2006; Igumenshchev 2008; Begelman 2012a,b; McKinney et al. 2014). +2.6. GW Inspiral +When a BH is close to the SMBH, GW emission can +circularize and shrink its orbit. We implement the effects of GW emission on the BH’s semimajor axis and +eccentricity following Peters & Mathews (1963a). The +characteristic timescale to merge a BH with an SMBH +is given by: +tGW ≈2.9 × 1012 yr  +M• +106 M +−1  +mBH +106 M +−1 +× + +M• + mBH +2 × 106 M +−1  +a• +10−2 pc4 +×f(e•)(1 − e +2 +• +) +7/2 +, (9) +where f(e•) is a function of e•. For all values of e•, +f(e•) is between 0.979 and 1.81 (Blaes et al. 2002). We +plot this timescale for a 1 × 105 M BH in Figure 1 in blue. -6 Rose et al. -Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to -cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass -of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and -merger times of these BHs. -In our simulations, we assume a BH has merged with -the SMBH when the condition tGW < telapsed is met. -When this condition is satisfied, we terminate mass -growth through collisions for that BH.4 -2.7. IMBH growth -As detailed above, BH-stellar collisions can increase -the BH masses as a function of time. Here, we examine -the sensitivity of the BH growth to the density power -law. From Eq. (1), it is clear that the growth rate de￾pends on the stellar density profile, governed by the in￾dex α. We expect that higher values of α, or steeper -profiles, will result in more efficient mass growth. In -Figure 1, larger values of α lead to collision timescales -in the GN’s inner region, inwards of 0.25 pc, that are -much smaller that the 10 Gyr simulation time. Figure 3 -confirms this expectation. It depicts the mass growth of -a uniform distribution of BHs with initial conditions de￾tailed in Section 2.1 for five α values, spanning 1 (green) -to 2 (purple). The most massive IMBHs form inwards -of 0.25 pc for the α = 2 case. -2.8. Gravitational Wave Mergers and Intermediate -and Extreme Mass Ratio Inspiral Candidates -Towards the SMBH, efficient collisions can create BHs -massive enough to merge with the SMBH through GWs. -Following the method detailed in Section 2.6, when a -given BH meets the criterion tGW < telapsed, we mark -4 For comparison, we also incrementally changed the semimajor -axis and eccentricity from GW emission following the equations -in Peters & Mathews (1963b). This method leads to a slight -increase in the final IMBH masses because it accounts for the -collisions that take place while the orbit is gradually shrinking. -it as merged with the SMBH. We assume that at this -point the dynamics of the BH will be determined by GW -emission, shrinking and circularizing the BHs orbit un￾til it undergoes an extreme or intermediate mass ratio -inspiral (EMRI and IMRI, respectively). The righthand -plot in Figure 3 shows the BH masses versus time of -merger. It is interesting to note that even in the ab￾sence of relaxation processes, which are often invoked -to explain the formation of EMRIs, EMRIs and notably -IMRIs can form in this region. -2.9. Two Body Relaxation Processes -A BH orbiting the SMBH experiences weak gravita￾tional interactions with other objects in the GN. Over a -relaxation time, these interactions alter its orbit about -the SMBH. The two-body relaxation timescale for a -single-mass system is: -trelax = 0.34 σ -3 -G2ρhM∗iln Λrlx -, (10) -where ln Λrlx is the Coulomb logarithm and hM∗i is the -average mass of the surrounding objects, here assumed -to be 1 M (Spitzer 1987; Binney & Tremaine 2008, -Eq. (7.106)). This equation represents the approximate -timescale for a BH on a semi-circular orbit to change -its orbital energy and angular momentum by order of -themselves. The BH experiences diffusion in its angular -momentum and energy as a function of time (depending -on the eccentricity of the orbit, this process can be more -efficient Fragione & Sari 2018; Sari & Fragione 2019). -Relaxation can cause the orbit of an object in a GN to -reach high eccentricities. If the object is a BH, it can + Rose et al. +Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to +cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass +of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and +merger times of these BHs. +In our simulations, we assume a BH has merged with +the SMBH when the condition tGW < telapsed is met. +When this condition is satisfied, we terminate mass +growth through collisions for that BH.4 +2.7. IMBH growth +As detailed above, BH-stellar collisions can increase +the BH masses as a function of time. Here, we examine +the sensitivity of the BH growth to the density power +law. From Eq. (1), it is clear that the growth rate depends on the stellar density profile, governed by the index α. We expect that higher values of α, or steeper +profiles, will result in more efficient mass growth. In +Figure 1, larger values of α lead to collision timescales +in the GN’s inner region, inwards of 0.25 pc, that are +much smaller that the 10 Gyr simulation time. Figure 3 +confirms this expectation. It depicts the mass growth of +a uniform distribution of BHs with initial conditions detailed in Section 2.1 for five α values, spanning 1 (green) +to 2 (purple). The most massive IMBHs form inwards +of 0.25 pc for the α = 2 case. +2.8. Gravitational Wave Mergers and Intermediate +and Extreme Mass Ratio Inspiral Candidates +Towards the SMBH, efficient collisions can create BHs +massive enough to merge with the SMBH through GWs. +Following the method detailed in Section 2.6, when a +given BH meets the criterion tGW < telapsed, we mark +4 For comparison, we also incrementally changed the semimajor +axis and eccentricity from GW emission following the equations +in Peters & Mathews (1963b). This method leads to a slight +increase in the final IMBH masses because it accounts for the +collisions that take place while the orbit is gradually shrinking. +it as merged with the SMBH. We assume that at this +point the dynamics of the BH will be determined by GW +emission, shrinking and circularizing the BHs orbit until it undergoes an extreme or intermediate mass ratio +inspiral (EMRI and IMRI, respectively). The righthand +plot in Figure 3 shows the BH masses versus time of +merger. It is interesting to note that even in the absence of relaxation processes, which are often invoked +to explain the formation of EMRIs, EMRIs and notably +IMRIs can form in this region. +2.9. Two Body Relaxation Processes +A BH orbiting the SMBH experiences weak gravitational interactions with other objects in the GN. Over a +relaxation time, these interactions alter its orbit about +the SMBH. The two-body relaxation timescale for a +single-mass system is: +trelax = 0.34 σ +3 +G2ρhM∗iln Λrlx +, (10) +where ln Λrlx is the Coulomb logarithm and hM∗i is the +average mass of the surrounding objects, here assumed +to be 1 M (Spitzer 1987; Binney & Tremaine 2008, +Eq. (7.106)). This equation represents the approximate +timescale for a BH on a semi-circular orbit to change +its orbital energy and angular momentum by order of +themselves. The BH experiences diffusion in its angular +momentum and energy as a function of time (depending +on the eccentricity of the orbit, this process can be more +efficient Fragione & Sari 2018; Sari & Fragione 2019). +Relaxation can cause the orbit of an object in a GN to +reach high eccentricities. If the object is a BH, it can spiral into the SMBH and form an EMRI, while a star -IMBH Formation in Galactic Nuclei 7 -can be tidally disrupted by the SMBH (e.g. Magorrian -& Tremaine 1999; Wang & Merritt 2004; Hopman & -Alexander 2005; Aharon & Perets 2016; Stone & Met￾zger 2016; Amaro-Seoane 2018; Sari & Fragione 2019; -Naoz et al. 2022). The relaxation process is therefore -crucial to our study. In Figure 1, we plot the relaxation -timescale in gold for a range of α. We note that the Bah￾call & Wolf (1976) profile, α = 7/4, corresponds to zero -net flux and therefore does not preferentially migrate -objects inward. -Additionally, because BHs are more massive on av￾erage than the surrounding objects, they are expected -to segregate inwards in the GN (e.g., Shapiro & -Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; -Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004). -They sink toward the SMBH on the mass segregation -timescale, tseg ≈ hM∗i/mBH × trelax (e.g., Spitzer 1987; -Fregeau et al. 2002; Merritt 2006), which is typically an -order of magnitude smaller than the relaxation timescale -plotted in Figure 1. -We incorporate relaxation processes by introducing a -small change in the BH’s energy and angular momen￾tum each time it orbits the SMBH. We apply a small -instantaneous velocity kick to the BH, denoted as ∆v. -We draw ∆v from a Guassian distribution with average -of zero and a standard deviation of ∆vrlx/ -√ -3, where -∆vrlx = v• -p -P•/trlx (see Bradnick et al. 2017, for an -approach to changes in the angular momentum). The -new orbital parameters can be calculated following Lu -& Naoz (2019), and see Naoz et al. (2022) for the full -set of equations. -We account for the effects of relaxation processes, -including mass-segregation, using a multi-faceted ap￾proach. We begin by migrating each BH towards the -center over its mass-segregation timescale, shifting it in￾crementally inward such that its orbital energy changes -by order of itself within the segregation timescale. -As the BHs segregate down the potential well, their -abundance with respect to stars increases, until at some -turnover radius, BHs become the dominant source of -scattering for both black holes and stars. Within this ra￾dius, BH self-interaction dominates over two-body scat￾terings with the now rarer main-sequence stars. The -BHs will then settle onto a Bahcall-Wolf profile, while -the stars may follow a shallower profile, with approx￾imately n? ∝ r -−1.5 -, inwards of the transition radius -(Linial & Sari in prep.). -Therefore, after the initial mass segregation, we allow -the BHs to begin diffusing over a relaxation timescale, -their orbital parameters changing slowly through a ran￾dom process. In this random process, some of the BHs -may migrate closer to the SMBH. We terminate mass -growth when the BH enters the inner 200 au of the GN, -within which the density of stars is uncertain. This cut￾off is based on the 120 au pericenter of S0-2, the closest -known star to the SMBH (e.g., Ghez et al. 2005). -Another physical process that causes inward migra￾tion is dynamical friction. A cursory derivation based -on the dynamical friction equations described in Binney -& Tremaine (2008) reveals the process to have a simi￾lar timescale to mass segregation. If a BH diffuses to -a distance greater than 2 pc from the SMBH, exiting -the sphere of influence, we have it sink inwards, back -towards the center, over a dynamical friction timescale. -After one dynamical friction timescale has passed, we -restart diffusion. -We note that our prescription ignores self-interactions -between the BHs. As mentioned above, as the BHs sink -towards the SMBH, their concentration in the inner re￾gion of the GN increases, allowing them to dominate the -scattering. We reserve the inclusion of these interactions -for future study. -2.10. Effect of Relaxation Processes -As depicted in Figure 4, two-body relaxation processes -result in more EMRIs and IMRIs events. These pro￾cesses allow BHs that begin further from the SMBH -to migrate inwards and grow more efficiently in mass. -However, it also impedes the growth of BHs that are -initially closer to the SMBH by allowing them to dif￾fuse out of the inner region where collisions are efficient. -As can be seen in Figure 4, the net result is that more -BHs grow, but the maximum mass is lower compared -to the scenario that ignores two-body relaxation. The -histogram in Figure 4 presents the final BH mass distri￾butions for different power law indices α. As expected, -the two-body relaxation suppresses the α dependence -highlighted in Figure 3. In fact, using a KS test, we -find that we cannot reject the hypothesis that the two -distributions were drawn from the same sample for the -α = 1.75 and α = 2 results. Interestingly, a BH mass -IMF with an average of 10 M leads to a final distri￾bution with an average of ∼ 200 M and a median of -∼ 45 M , which lies within the mass gap. -3. DISCUSSION AND PREDICTIONS -We explore the feasibility of forming IMBHs in a -GN through successive collisions between a stellar-mass -BH and main-sequence stars. Taking both a statisti￾cal and analytic approach, we show that this channel -can produce IMBHs efficiently with masses as high as -103−4 M and may result in many IMBH-SMBH merg￾ers (intermediate-mass ratio inspirals, or IMRIs) and +IMBH Formation in Galactic Nuclei 7 +can be tidally disrupted by the SMBH (e.g. Magorrian +& Tremaine 1999; Wang & Merritt 2004; Hopman & +Alexander 2005; Aharon & Perets 2016; Stone & Metzger 2016; Amaro-Seoane 2018; Sari & Fragione 2019; +Naoz et al. 2022). The relaxation process is therefore +crucial to our study. In Figure 1, we plot the relaxation +timescale in gold for a range of α. We note that the Bahcall & Wolf (1976) profile, α = 7/4, corresponds to zero +net flux and therefore does not preferentially migrate +objects inward. +Additionally, because BHs are more massive on average than the surrounding objects, they are expected +to segregate inwards in the GN (e.g., Shapiro & +Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; +Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004). +They sink toward the SMBH on the mass segregation +timescale, tseg ≈ hM∗i/mBH × trelax (e.g., Spitzer 1987; +Fregeau et al. 2002; Merritt 2006), which is typically an +order of magnitude smaller than the relaxation timescale +plotted in Figure 1. +We incorporate relaxation processes by introducing a +small change in the BH’s energy and angular momentum each time it orbits the SMBH. We apply a small +instantaneous velocity kick to the BH, denoted as ∆v. +We draw ∆v from a Guassian distribution with average +of zero and a standard deviation of ∆vrlx/ +√ +3, where +∆vrlx = v• +p +P•/trlx (see Bradnick et al. 2017, for an +approach to changes in the angular momentum). The +new orbital parameters can be calculated following Lu +& Naoz (2019), and see Naoz et al. (2022) for the full +set of equations. +We account for the effects of relaxation processes, +including mass-segregation, using a multi-faceted approach. We begin by migrating each BH towards the +center over its mass-segregation timescale, shifting it incrementally inward such that its orbital energy changes +by order of itself within the segregation timescale. +As the BHs segregate down the potential well, their +abundance with respect to stars increases, until at some +turnover radius, BHs become the dominant source of +scattering for both black holes and stars. Within this radius, BH self-interaction dominates over two-body scatterings with the now rarer main-sequence stars. The +BHs will then settle onto a Bahcall-Wolf profile, while +the stars may follow a shallower profile, with approximately n? ∝ r +−1.5 +, inwards of the transition radius +(Linial & Sari in prep.). +Therefore, after the initial mass segregation, we allow +the BHs to begin diffusing over a relaxation timescale, +their orbital parameters changing slowly through a random process. In this random process, some of the BHs +may migrate closer to the SMBH. We terminate mass +growth when the BH enters the inner 200 au of the GN, +within which the density of stars is uncertain. This cutoff is based on the 120 au pericenter of S0-2, the closest +known star to the SMBH (e.g., Ghez et al. 2005). +Another physical process that causes inward migration is dynamical friction. A cursory derivation based +on the dynamical friction equations described in Binney +& Tremaine (2008) reveals the process to have a similar timescale to mass segregation. If a BH diffuses to +a distance greater than 2 pc from the SMBH, exiting +the sphere of influence, we have it sink inwards, back +towards the center, over a dynamical friction timescale. +After one dynamical friction timescale has passed, we +restart diffusion. +We note that our prescription ignores self-interactions +between the BHs. As mentioned above, as the BHs sink +towards the SMBH, their concentration in the inner region of the GN increases, allowing them to dominate the +scattering. We reserve the inclusion of these interactions +for future study. +2.10. Effect of Relaxation Processes +As depicted in Figure 4, two-body relaxation processes +result in more EMRIs and IMRIs events. These processes allow BHs that begin further from the SMBH +to migrate inwards and grow more efficiently in mass. +However, it also impedes the growth of BHs that are +initially closer to the SMBH by allowing them to diffuse out of the inner region where collisions are efficient. +As can be seen in Figure 4, the net result is that more +BHs grow, but the maximum mass is lower compared +to the scenario that ignores two-body relaxation. The +histogram in Figure 4 presents the final BH mass distributions for different power law indices α. As expected, +the two-body relaxation suppresses the α dependence +highlighted in Figure 3. In fact, using a KS test, we +find that we cannot reject the hypothesis that the two +distributions were drawn from the same sample for the +α = 1.75 and α = 2 results. Interestingly, a BH mass +IMF with an average of 10 M leads to a final distribution with an average of ∼ 200 M and a median of +∼ 45 M , which lies within the mass gap. +3. DISCUSSION AND PREDICTIONS +We explore the feasibility of forming IMBHs in a +GN through successive collisions between a stellar-mass +BH and main-sequence stars. Taking both a statistical and analytic approach, we show that this channel +can produce IMBHs efficiently with masses as high as +103−4 M and may result in many IMBH-SMBH mergers (intermediate-mass ratio inspirals, or IMRIs) and EMRIs. -8 Rose et al. -Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance -(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. -We assume α = 1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward -migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, -more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses -for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation -processes. We also show the results for a simulation with α = 1.75 that accounts for momentum-driven winds (black, dotted). -Despite the substantially reduced accretion, BHs in the mass gap still form. -As the stellar mass BH collides with a star, the BH -will grow in mass. The increase may equal star’s en￾tire mass if the relative velocity is smaller than the es￾cape velocity from the BH at 1 R . However, near the -SMBH, the velocity dispersion may be larger than the -escape velocity from the BH at the star’s radius. In this -limit, the BH captures a “tunnel” of material through -the star, estimated using Bondi-Hoyle-Lyttleton accre￾tion. In our statistical analysis, we account for Bondi￾Hoyle-Lyttleton accretion and find that BHs outside of -10−2 pc from the SMBH can capture the entire star (see -Figure 2). -The efficiency of collisions, and therefore IMBH, -EMRI, and IMRI formation as well, are sensitive to -the underlying stellar density. As shown in Figure 3, a -steeper density profile results in larger IMBHs. This be￾havior can be understood from the collision timescale’s -dependence on the stellar density profile. A steeper pro￾file yields shorter collision timescales near the SMBH. -However, the inclusion of relaxation processes in the -simulations dampens the influence of the stellar density -profile by allowing BHs to diffuse into regions of more -or less efficient growth. As a result, more BHs grow in -mass, but their maximum mass is smaller (∼ 104 M ). -Additionally, the final masses have no apparent depen￾dence on distance from the SMBH (see Figure 4). -Most simulations in our study assume that the BHs -accrete all of the mass that they capture. The final BH -masses can be taken as an upper limit. We note that -the accretion is a highly uncertain process and repre￾sents an active field of study (e.g., Blandford & Begel￾man 1999; Park & Ostriker 2001; Narayan et al. 2003; -Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan -et al. 2012; Jiang et al. 2014; McKinney et al. 2014; -Narayan et al. 2022). To assess the limits of our model, -we also consider a physically motivated accretion model, -momentum-driven winds (Section 2.5). We present the -final mass distribution for momentum-driven winds in -Figure 4. Importantly, we find that BHs within the -mass gap still form naturally despite the substantially -reduced accretion. About 5% of the BHs grow by 10 -to 100 M . Furthermore, if we increase this ∆M esti￾mate by a factor of 2 (i.e., use η = 0.05), the simula￾tion produces a 3.5×103 M IMBH for the same initial -conditions. Our proof-of-concept demonstrates that col￾lisions between BH and stars are an important process -that should be taken into account in dense places such -as a GN. -Mass growth through BH-main-sequence star colli￾sions may act in concert with other IMBH formation -channels, such as compact object binary mergers (e.g., -Hoang et al. 2018; Stephan et al. 2019; Fragione et al. -2021; Wang et al. 2021). While in some cases colli￾sions can unbind a binary (e.g., Sigurdsson & Phinney -1993; Fregeau et al. 2004), BH binaries can be tightly -bound enough to withstand the collisions. Wide bina￾ries may also become unbound due to interactions with -the neighboring stars and compact objects (e.g., Binney -& Tremaine 1987; Rose et al. 2020, see latter study for -the timescale for an arbitrary eccentricity). However, -as highlighted in previous studies, a substantial frac￾tion of these binaries may merge due to the Eccentric -Kozai Lidov mechanism, leaving behind a single star or -a single compact object (e.g., Stephan et al. 2016, 2019; -Hoang et al. 2018). Additionally, to be susceptible to -evaporation, BH binaries must have a wider configura￾tion. Otherwise, they will be more tightly bound than -the average kinetic energy of the surrounding objects + Rose et al. +Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance +(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. +We assume α = 1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward +migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, +more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses +for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation +processes. We also show the results for a simulation with α = 1.75 that accounts for momentum-driven winds (black, dotted). +Despite the substantially reduced accretion, BHs in the mass gap still form. +As the stellar mass BH collides with a star, the BH +will grow in mass. The increase may equal star’s entire mass if the relative velocity is smaller than the escape velocity from the BH at 1 R . However, near the +SMBH, the velocity dispersion may be larger than the +escape velocity from the BH at the star’s radius. In this +limit, the BH captures a “tunnel” of material through +the star, estimated using Bondi-Hoyle-Lyttleton accretion. In our statistical analysis, we account for BondiHoyle-Lyttleton accretion and find that BHs outside of +10−2 pc from the SMBH can capture the entire star (see +Figure 2). +The efficiency of collisions, and therefore IMBH, +EMRI, and IMRI formation as well, are sensitive to +the underlying stellar density. As shown in Figure 3, a +steeper density profile results in larger IMBHs. This behavior can be understood from the collision timescale’s +dependence on the stellar density profile. A steeper profile yields shorter collision timescales near the SMBH. +However, the inclusion of relaxation processes in the +simulations dampens the influence of the stellar density +profile by allowing BHs to diffuse into regions of more +or less efficient growth. As a result, more BHs grow in +mass, but their maximum mass is smaller (∼ 104 M ). +Additionally, the final masses have no apparent dependence on distance from the SMBH (see Figure 4). +Most simulations in our study assume that the BHs +accrete all of the mass that they capture. The final BH +masses can be taken as an upper limit. We note that +the accretion is a highly uncertain process and represents an active field of study (e.g., Blandford & Begelman 1999; Park & Ostriker 2001; Narayan et al. 2003; +Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan +et al. 2012; Jiang et al. 2014; McKinney et al. 2014; +Narayan et al. 2022). To assess the limits of our model, +we also consider a physically motivated accretion model, +momentum-driven winds (Section 2.5). We present the +final mass distribution for momentum-driven winds in +Figure 4. Importantly, we find that BHs within the +mass gap still form naturally despite the substantially +reduced accretion. About 5% of the BHs grow by 10 +to 100 M . Furthermore, if we increase this ∆M estimate by a factor of 2 (i.e., use η = 0.05), the simulation produces a 3.5×103 M IMBH for the same initial +conditions. Our proof-of-concept demonstrates that collisions between BH and stars are an important process +that should be taken into account in dense places such +as a GN. +Mass growth through BH-main-sequence star collisions may act in concert with other IMBH formation +channels, such as compact object binary mergers (e.g., +Hoang et al. 2018; Stephan et al. 2019; Fragione et al. +2021; Wang et al. 2021). While in some cases collisions can unbind a binary (e.g., Sigurdsson & Phinney +1993; Fregeau et al. 2004), BH binaries can be tightly +bound enough to withstand the collisions. Wide binaries may also become unbound due to interactions with +the neighboring stars and compact objects (e.g., Binney +& Tremaine 1987; Rose et al. 2020, see latter study for +the timescale for an arbitrary eccentricity). However, +as highlighted in previous studies, a substantial fraction of these binaries may merge due to the Eccentric +Kozai Lidov mechanism, leaving behind a single star or +a single compact object (e.g., Stephan et al. 2016, 2019; +Hoang et al. 2018). Additionally, to be susceptible to +evaporation, BH binaries must have a wider configuration. Otherwise, they will be more tightly bound than +the average kinetic energy of the surrounding objects and will only harden through weak gravitational inter- -IMBH Formation in Galactic Nuclei 9 -actions with neighboring stars (see for example Figure -6 in Rose et al. 2020). -We note that we assume a steady-state and treat the -stars as a reservoir in this model. Future work will take a -more nuanced approach to the background stars, whose -density as a function of time can be influenced by several -factors. Firstly, the relaxation of the stellar population -occurs on Gyr timescales. Some studies have suggested -that in situ star formation can occur in the Galactic -Center as close as 0.04 pc from the SMBH (e.g., Levin -& Beloborodov 2003; Paumard et al. 2006), and star -formation episodes can occur as often as every ∼ 5 Myr -(e.g. Lu et al. 2009). Therefore, we expect that after -the first Gyr, stars within . 0.01 pc will be replenished -at intervals consistent with the star formation episodes; -the infalling populations of stars are separated by ∼ -5−10 Myr, which is shorter than the collision timescale. -However, star-star collisions may complicate this pic￾ture within ∼ 0.01 pc. As discussed above, regular star -formation ensures the BHs always have a stellar popula￾tion to interact with outside of ∼ 0.01 pc.5 At 0.01 pc, -however, the kinetic energy during a collision between -two 1 M stars is larger than their binding energies. -Collisions can therefore thin out the stellar populations -during the time it takes them to diffuse to these small -radii, . 0.01 pc, and may reduce the BH growth in the -innermost region. We reserve the inclusion of star-star -collisions for future work. We also note that the disrup￾tion of binary stars by the SMBH may help replenish -the stellar population even as collisions work to deplete -it (e.g., Balberg et al. 2013); when a binary is disrupted, -one of the stars is captured on a tightly bound orbit -about the SMBH. -An IMBH may also affect the stellar density profile. -As it spirals into the SMBH, it can perturb stellar orbits, -and these interactions can lead to hypervelocity stars -(e.g., Baumgardt et al. 2006a; L¨ockmann & Baumgardt -2008). L¨ockmann & Baumgardt (2008) show that an -IMBH can modify an initially steep stellar density pro￾file to become consistent with the flatter cusp observed -in the Galactic Center. The stars may then be replen￾ished on 100 Myr timescales (Baumgardt et al. 2006a). -Therefore, after the formation of the first few IMBHs, -subsequent BH growth may occur in bursts, coinciding -with replenishment of the stars. -While there are many competing dynamical processes -that shape the stellar density profile, we stress that α -5 -In fact, the star-star collision timescale is greater than 10 Myr -for the entire parameter space, save at 0.001 pc for larger values -of α; the BH-star collision timescale plotted in Fig. 1 is the same -order of magnitude as the star-star collision timescale. -can simply be chosen to encapsulate all of the relevant -physics. A value for α that is constrained by observa￾tions must already reflect ongoing processes like star￾star collisions and replenishment. Sch¨odel et al. (2018) -find the observed stellar mass enclosed within 0.01 pc of -the Milky Way’s Galactic Center to be approximately -180 M . This estimate is consistent to order of magni￾tude with our α = 1.25 case. In a simulation like those -depicted in Figure 4, which include relaxation, α = 1.25 -leads to a maximum IMBH mass of 140 M . Further￾more, while the stellar mass within 0.01 pc may be a -few hundred M , Do et al. (2019) and GRAVITY Col￾laboration et al. (2020) set an upper limit on the mass -enclosed within the orbit of S0-2 to be about a few thou￾sand M , or 0.1% of the central mass. This upper limit -can include mass that was previously in stars but is now -in BHs. In that case, the 180 M is what remains of the -stars, while BHs and IMBHs make up the ∼ 1000 M -in the innermost region. -Also not included in this study, collisions between the -BH and other compact objects will increase the BH -growth rate. BH-BH mergers (e.g., O’Leary et al. 2009; -Fragione et al. 2021) and even neutron star BH mergers -(e.g., Hoang et al. 2020) become more likely as the BHs -increase in mass through stellar collisions. As a result, -the BH-BH collision timescale, discussed in Section 2.2, -will become relevant to our simulations, allowing the -BHs to grow through this channel in addition to stel￾lar collisions. Additionally, this compact object mergers -result in GW recoil, which may have a large impact on -the dynamics (e.g., Baibhav et al. 2020; Fragione et al. -2021). -The BH’s mass growth increases GW emission, which -dissipates energy from the orbit. Along with relaxation, -GW emission causes BHs to sink towards the SMBH -and eventually undergo a merger. As a result, the GN -environment is conducive to the formation of EMRIs -and IMRIs. The GW emission from EMRIs and IM￾RIs is expected to be at mHz frequencies, making them -promising candidates for LISA to observe. While the -exact rate calculation is beyond the scope of this study, -the mechanism outlined here seems very promising. -Our results also suggest that BHs within the mass gap -as well as IMBHs likely exist in many galactic nuclei, as -well as within our own galactic center. This implication -seems to be consistent with recent observational and -theoretical studies (e.g., Hansen & Milosavljevi´c 2003; -Maillard et al. 2004; G¨urkan & Rasio 2005; Gualandris -& Merritt 2009; Chen & Liu 2013; Generozov & Madi￾gan 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz +IMBH Formation in Galactic Nuclei 9 +actions with neighboring stars (see for example Figure +6 in Rose et al. 2020). +We note that we assume a steady-state and treat the +stars as a reservoir in this model. Future work will take a +more nuanced approach to the background stars, whose +density as a function of time can be influenced by several +factors. Firstly, the relaxation of the stellar population +occurs on Gyr timescales. Some studies have suggested +that in situ star formation can occur in the Galactic +Center as close as 0.04 pc from the SMBH (e.g., Levin +& Beloborodov 2003; Paumard et al. 2006), and star +formation episodes can occur as often as every ∼ 5 Myr +(e.g. Lu et al. 2009). Therefore, we expect that after +the first Gyr, stars within . 0.01 pc will be replenished +at intervals consistent with the star formation episodes; +the infalling populations of stars are separated by ∼ +5−10 Myr, which is shorter than the collision timescale. +However, star-star collisions may complicate this picture within ∼ 0.01 pc. As discussed above, regular star +formation ensures the BHs always have a stellar population to interact with outside of ∼ 0.01 pc.5 At 0.01 pc, +however, the kinetic energy during a collision between +two 1 M stars is larger than their binding energies. +Collisions can therefore thin out the stellar populations +during the time it takes them to diffuse to these small +radii, . 0.01 pc, and may reduce the BH growth in the +innermost region. We reserve the inclusion of star-star +collisions for future work. We also note that the disruption of binary stars by the SMBH may help replenish +the stellar population even as collisions work to deplete +it (e.g., Balberg et al. 2013); when a binary is disrupted, +one of the stars is captured on a tightly bound orbit +about the SMBH. +An IMBH may also affect the stellar density profile. +As it spirals into the SMBH, it can perturb stellar orbits, +and these interactions can lead to hypervelocity stars +(e.g., Baumgardt et al. 2006a; L¨ockmann & Baumgardt +2008). L¨ockmann & Baumgardt (2008) show that an +IMBH can modify an initially steep stellar density profile to become consistent with the flatter cusp observed +in the Galactic Center. The stars may then be replenished on 100 Myr timescales (Baumgardt et al. 2006a). +Therefore, after the formation of the first few IMBHs, +subsequent BH growth may occur in bursts, coinciding +with replenishment of the stars. +While there are many competing dynamical processes +that shape the stellar density profile, we stress that α +5 +In fact, the star-star collision timescale is greater than 10 Myr +for the entire parameter space, save at 0.001 pc for larger values +of α; the BH-star collision timescale plotted in Fig. 1 is the same +order of magnitude as the star-star collision timescale. +can simply be chosen to encapsulate all of the relevant +physics. A value for α that is constrained by observations must already reflect ongoing processes like starstar collisions and replenishment. Sch¨odel et al. (2018) +find the observed stellar mass enclosed within 0.01 pc of +the Milky Way’s Galactic Center to be approximately +180 M . This estimate is consistent to order of magnitude with our α = 1.25 case. In a simulation like those +depicted in Figure 4, which include relaxation, α = 1.25 +leads to a maximum IMBH mass of 140 M . Furthermore, while the stellar mass within 0.01 pc may be a +few hundred M , Do et al. (2019) and GRAVITY Collaboration et al. (2020) set an upper limit on the mass +enclosed within the orbit of S0-2 to be about a few thousand M , or 0.1% of the central mass. This upper limit +can include mass that was previously in stars but is now +in BHs. In that case, the 180 M is what remains of the +stars, while BHs and IMBHs make up the ∼ 1000 M +in the innermost region. +Also not included in this study, collisions between the +BH and other compact objects will increase the BH +growth rate. BH-BH mergers (e.g., O’Leary et al. 2009; +Fragione et al. 2021) and even neutron star BH mergers +(e.g., Hoang et al. 2020) become more likely as the BHs +increase in mass through stellar collisions. As a result, +the BH-BH collision timescale, discussed in Section 2.2, +will become relevant to our simulations, allowing the +BHs to grow through this channel in addition to stellar collisions. Additionally, this compact object mergers +result in GW recoil, which may have a large impact on +the dynamics (e.g., Baibhav et al. 2020; Fragione et al. +2021). +The BH’s mass growth increases GW emission, which +dissipates energy from the orbit. Along with relaxation, +GW emission causes BHs to sink towards the SMBH +and eventually undergo a merger. As a result, the GN +environment is conducive to the formation of EMRIs +and IMRIs. The GW emission from EMRIs and IMRIs is expected to be at mHz frequencies, making them +promising candidates for LISA to observe. While the +exact rate calculation is beyond the scope of this study, +the mechanism outlined here seems very promising. +Our results also suggest that BHs within the mass gap +as well as IMBHs likely exist in many galactic nuclei, as +well as within our own galactic center. This implication +seems to be consistent with recent observational and +theoretical studies (e.g., Hansen & Milosavljevi´c 2003; +Maillard et al. 2004; G¨urkan & Rasio 2005; Gualandris +& Merritt 2009; Chen & Liu 2013; Generozov & Madigan 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY Collaboration et al. 2020). -10 Rose et al. -Lastly, the collisions between stellar mass BHs and -stars may contribute to the x-ray emission from our -galactic centre (e.g., Muno et al. 2005, 2009; Hailey -et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kre￾mer et al. (2022) for a discussion of electromagnetic sig￾natures from BH-star collisions)6 -. These interactions, -in particular grazing collisions, may also result in tidal -disruption events (e.g., Baumgardt et al. 2006b; Perets -et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kre￾mer et al. 2021). Thus, the process outlined here may -produce electromagnetic signatures in addition to GW -mergers. -We thank the anonymous referee for useful comments. -We also thank Jessica Lu, Fred Rasio, Kyle Kremer, -Ryosuke Hirai, Ilya Mandel, and Erez Michaely for use￾ful discussion. -SR thanks the Charles E. Young Fellowship, the Nina -Byers Fellowship, and the Michael A. Jura Memorial -Graduate Award for support. SR and SN acknowledge -the partial support from NASA ATP 80NSSC20K0505. -SN thanks Howard and Astrid Preston for their gener￾ous support. IL thanks support from the Adams Fellow￾ship. SN and RS thank the Bhaumik Institute visitor -program. This work was performed in part at the As￾pen Center for Physics, which is supported by National -Science Foundation grant PHY-1607611. -REFERENCES -Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016, -PhRvL, 116, 241102, -doi: 10.1103/PhysRevLett.116.241102 -—. 2017a, PhRvL, 118, 221101, -doi: 10.1103/PhysRevLett.118.221101 -—. 2017b, PhRvL, 119, 141101, -doi: 10.1103/PhysRevLett.119.141101 -Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1, -doi: 10.3847/2041-8205/830/1/L1 -Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129 -Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, -doi: 10.1088/0004-637X/780/2/148 -Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4, -doi: 10.1007/s41114-018-0013-8 -6 The connection between the observed X-ray sources at the Galac￾tic Center and tidal capture has been suggested by Generozov -et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for -alternative channels. -Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. -2021, arXiv e-prints, arXiv:2109.12119. -https://arxiv.org/abs/2109.12119 -Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214, -doi: 10.1086/154711 -Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102, -043002, doi: 10.1103/PhysRevD.102.043002 -Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26, -doi: 10.1093/mnrasl/slt071 -Baumgardt, H., Gualandris, A., & Portegies Zwart, S. -2006a, MNRAS, 372, 174, -doi: 10.1111/j.1365-2966.2006.10818.x -Baumgardt, H., Hopman, C., Portegies Zwart, S., & -Makino, J. 2006b, MNRAS, 372, 467, -doi: 10.1111/j.1365-2966.2006.10885.x -Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ, -613, 1143, doi: 10.1086/423299 -Begelman, M. C. 1979, MNRAS, 187, 237, -doi: 10.1093/mnras/187.2.237 + Rose et al. +Lastly, the collisions between stellar mass BHs and +stars may contribute to the x-ray emission from our +galactic centre (e.g., Muno et al. 2005, 2009; Hailey +et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kremer et al. (2022) for a discussion of electromagnetic signatures from BH-star collisions)6 +. These interactions, +in particular grazing collisions, may also result in tidal +disruption events (e.g., Baumgardt et al. 2006b; Perets +et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kremer et al. 2021). Thus, the process outlined here may +produce electromagnetic signatures in addition to GW +mergers. +We thank the anonymous referee for useful comments. +We also thank Jessica Lu, Fred Rasio, Kyle Kremer, +Ryosuke Hirai, Ilya Mandel, and Erez Michaely for useful discussion. +SR thanks the Charles E. Young Fellowship, the Nina +Byers Fellowship, and the Michael A. Jura Memorial +Graduate Award for support. SR and SN acknowledge +the partial support from NASA ATP 80NSSC20K0505. +SN thanks Howard and Astrid Preston for their generous support. IL thanks support from the Adams Fellowship. SN and RS thank the Bhaumik Institute visitor +program. This work was performed in part at the Aspen Center for Physics, which is supported by National +Science Foundation grant PHY-1607611. +REFERENCES +Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016, +PhRvL, 116, 241102, +doi: 10.1103/PhysRevLett.116.241102 +—. 2017a, PhRvL, 118, 221101, +doi: 10.1103/PhysRevLett.118.221101 +—. 2017b, PhRvL, 119, 141101, +doi: 10.1103/PhysRevLett.119.141101 +Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1, +doi: 10.3847/2041-8205/830/1/L1 +Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129 +Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, +doi: 10.1088/0004-637X/780/2/148 +Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4, +doi: 10.1007/s41114-018-0013-8 +6 The connection between the observed X-ray sources at the Galactic Center and tidal capture has been suggested by Generozov +et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for +alternative channels. +Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. +2021, arXiv e-prints, arXiv:2109.12119. +https://arxiv.org/abs/2109.12119 +Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214, +doi: 10.1086/154711 +Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102, +043002, doi: 10.1103/PhysRevD.102.043002 +Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26, +doi: 10.1093/mnrasl/slt071 +Baumgardt, H., Gualandris, A., & Portegies Zwart, S. +2006a, MNRAS, 372, 174, +doi: 10.1111/j.1365-2966.2006.10818.x +Baumgardt, H., Hopman, C., Portegies Zwart, S., & +Makino, J. 2006b, MNRAS, 372, 467, +doi: 10.1111/j.1365-2966.2006.10885.x +Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ, +613, 1143, doi: 10.1086/423299 +Begelman, M. C. 1979, MNRAS, 187, 237, +doi: 10.1093/mnras/187.2.237 —. 2012a, ApJL, 749, L3, doi: 10.1088/2041-8205/749/1/L3 -IMBH Formation in Galactic Nuclei 11 -—. 2012b, MNRAS, 420, 2912, -doi: 10.1111/j.1365-2966.2011.20071.x -Begelman, M. C., Volonteri, M., & Rees, M. J. 2006, -MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x -Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ, -890, 113, doi: 10.3847/1538-4357/ab6d77 -—. 2020b, ApJ, 890, 113, doi: 10.3847/1538-4357/ab6d77 -Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R. -2009, New Journal of Physics, 11, 105016, -doi: 10.1088/1367-2630/11/10/105016 -Binney, J., & Tremaine, S. 1987, Galactic dynamics -—. 2008, Galactic Dynamics: Second Edition -Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775, -doi: 10.1086/342655 -Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303, -L1, doi: 10.1046/j.1365-8711.1999.02358.x -Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433, -doi: 10.1093/mnras/179.3.433 -Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642, -427, doi: 10.1086/500727 -Bondi, H. 1952, MNRAS, 112, 195, -doi: 10.1093/mnras/112.2.195 -Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273, -doi: 10.1093/mnras/104.5.273 -Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469, -2042, doi: 10.1093/mnras/stx1007 -Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ, -860, 14, doi: 10.3847/1538-4357/aac2c4 -Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger, -C. 2012, JCAP, 2012, 054, -doi: 10.1088/1475-7516/2012/07/054 -Centrella, J., Baker, J. G., Kelly, B. J., & van Meter, J. R. -2010, Reviews of Modern Physics, 82, 3069, -doi: 10.1103/RevModPhys.82.3069 -Chen, X., & Liu, F. K. 2013, ApJ, 762, 95, -doi: 10.1088/0004-637X/762/2/95 -Cheng, Z., Li, Z., Xu, X., & Li, X. 2018, ApJ, 858, 33, -doi: 10.3847/1538-4357/aaba16 -Choi, J.-H., Shlosman, I., & Begelman, M. C. 2015, -MNRAS, 450, 4411, doi: 10.1093/mnras/stv694 -Christensen-Dalsgaard, J., Dappen, W., Ajukov, S. V., -et al. 1996, Science, 272, 1286, -doi: 10.1126/science.272.5266.1286 -Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087, -doi: 10.1086/156685 -Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424, -doi: 10.1111/j.1365-2966.2005.09937.x -Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M. -2009, MNRAS, 393, 1016, -doi: 10.1111/j.1365-2966.2008.14254.x -Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, -MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783 -Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T. -C. N. 2021a, MNRAS, 505, 2186, -doi: 10.1093/mnras/stab1428 -Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt, -T. C. N. 2021b, MNRAS, 503, 1051, -doi: 10.1093/mnras/stab402 -De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S. -2005, ApJ, 620, 878, doi: 10.1086/427142 -Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019, -MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453 -Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021, -MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390 -Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664, -doi: 10.1126/science.aav8137 -Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL, -562, L19, doi: 10.1086/338118 -Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL, -110, 221101, doi: 10.1103/PhysRevLett.110.221101 -Edgar, R. 2004, NewAR, 48, 843, -doi: 10.1016/j.newar.2004.06.001 -Escala, A. 2021, ApJ, 908, 57, -doi: 10.3847/1538-4357/abd93c -Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014, -Monthly Notices of the Royal Astronomical Society, 443, -2410, doi: 10.1093/mnras/stu1280 -Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891, -L31, doi: 10.3847/2041-8213/ab77c9 -Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021, -arXiv e-prints, arXiv:2107.04639. -https://arxiv.org/abs/2107.04639 -Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a, -ApJ, 897, 46, doi: 10.3847/1538-4357/ab94b2 -Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902, -L26, doi: 10.3847/2041-8213/abbc0a -Fragione, G., & Sari, R. 2018, ApJ, 852, 51, -doi: 10.3847/1538-4357/aaa0d7 -Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., & -Rasio, F. A. 2004, MNRAS, 352, 1, -doi: 10.1111/j.1365-2966.2004.07914.x -Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., & -Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576 -Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ, -649, 91, doi: 10.1086/506193 -Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137, -doi: 10.3847/1538-4357/ab94bc -Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker, -J. P. 2018, MNRAS, 478, 4030, +IMBH Formation in Galactic Nuclei 11 +—. 2012b, MNRAS, 420, 2912, +doi: 10.1111/j.1365-2966.2011.20071.x +Begelman, M. C., Volonteri, M., & Rees, M. J. 2006, +MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x +Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ, +890, 113, doi: 10.3847/1538-4357/ab6d77 +—. 2020b, ApJ, 890, 113, doi: 10.3847/1538-4357/ab6d77 +Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R. +2009, New Journal of Physics, 11, 105016, +doi: 10.1088/1367-2630/11/10/105016 +Binney, J., & Tremaine, S. 1987, Galactic dynamics +—. 2008, Galactic Dynamics: Second Edition +Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775, +doi: 10.1086/342655 +Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303, +L1, doi: 10.1046/j.1365-8711.1999.02358.x +Blandford, R. D., & Znajek, R. L. 1977, MNRAS, 179, 433, +doi: 10.1093/mnras/179.3.433 +Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642, +427, doi: 10.1086/500727 +Bondi, H. 1952, MNRAS, 112, 195, +doi: 10.1093/mnras/112.2.195 +Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273, +doi: 10.1093/mnras/104.5.273 +Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469, +2042, doi: 10.1093/mnras/stx1007 +Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ, +860, 14, doi: 10.3847/1538-4357/aac2c4 +Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger, +C. 2012, JCAP, 2012, 054, +doi: 10.1088/1475-7516/2012/07/054 +Centrella, J., Baker, J. G., Kelly, B. J., & van Meter, J. R. +2010, Reviews of Modern Physics, 82, 3069, +doi: 10.1103/RevModPhys.82.3069 +Chen, X., & Liu, F. K. 2013, ApJ, 762, 95, +doi: 10.1088/0004-637X/762/2/95 +Cheng, Z., Li, Z., Xu, X., & Li, X. 2018, ApJ, 858, 33, +doi: 10.3847/1538-4357/aaba16 +Choi, J.-H., Shlosman, I., & Begelman, M. C. 2015, +MNRAS, 450, 4411, doi: 10.1093/mnras/stv694 +Christensen-Dalsgaard, J., Dappen, W., Ajukov, S. V., +et al. 1996, Science, 272, 1286, +doi: 10.1126/science.272.5266.1286 +Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087, +doi: 10.1086/156685 +Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424, +doi: 10.1111/j.1365-2966.2005.09937.x +Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M. +2009, MNRAS, 393, 1016, +doi: 10.1111/j.1365-2966.2008.14254.x +Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, +MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783 +Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T. +C. N. 2021a, MNRAS, 505, 2186, +doi: 10.1093/mnras/stab1428 +Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt, +T. C. N. 2021b, MNRAS, 503, 1051, +doi: 10.1093/mnras/stab402 +De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S. +2005, ApJ, 620, 878, doi: 10.1086/427142 +Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019, +MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453 +Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021, +MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390 +Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664, +doi: 10.1126/science.aav8137 +Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL, +562, L19, doi: 10.1086/338118 +Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL, +110, 221101, doi: 10.1103/PhysRevLett.110.221101 +Edgar, R. 2004, NewAR, 48, 843, +doi: 10.1016/j.newar.2004.06.001 +Escala, A. 2021, ApJ, 908, 57, +doi: 10.3847/1538-4357/abd93c +Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014, +Monthly Notices of the Royal Astronomical Society, 443, +2410, doi: 10.1093/mnras/stu1280 +Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891, +L31, doi: 10.3847/2041-8213/ab77c9 +Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021, +arXiv e-prints, arXiv:2107.04639. +https://arxiv.org/abs/2107.04639 +Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a, +ApJ, 897, 46, doi: 10.3847/1538-4357/ab94b2 +Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902, +L26, doi: 10.3847/2041-8213/abbc0a +Fragione, G., & Sari, R. 2018, ApJ, 852, 51, +doi: 10.3847/1538-4357/aaa0d7 +Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., & +Rasio, F. A. 2004, MNRAS, 352, 1, +doi: 10.1111/j.1365-2966.2004.07914.x +Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., & +Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576 +Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ, +649, 91, doi: 10.1086/506193 +Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137, +doi: 10.3847/1538-4357/ab94bc +Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker, +J. P. 2018, MNRAS, 478, 4030, doi: 10.1093/mnras/sty1262 -12 Rose et al. -Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of -Modern Physics, 82, 3121, -doi: 10.1103/RevModPhys.82.3121 -Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812, -doi: 10.1086/377127 -Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ, -620, 744, doi: 10.1086/427175 -Gond´an, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ, -860, 5, doi: 10.3847/1538-4357/aabfee -Gonz´alez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL, -908, L29, doi: 10.3847/2041-8213/abdf5b -GRAVITY Collaboration, Abuter, R., Amorim, A., et al. -2020, A&A, 636, L5, doi: 10.1051/0004-6361/202037813 -Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361, -doi: 10.1088/0004-637X/705/1/361 -G¨urkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL, -640, L39, doi: 10.1086/503295 -G¨urkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236, -doi: 10.1086/430694 -Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature, -556, 70, doi: 10.1038/nature25029 -Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593, -L77, doi: 10.1086/378182 -Heger, A., Fryer, C. L., Woosley, S. E., Langer, N., & -Hartmann, D. H. 2003, ApJ, 591, 288, -doi: 10.1086/375341 -Hoang, B.-M., Naoz, S., Kocsis, B., Rasio, F. A., & -Dosopoulou, F. 2018, ApJ, 856, 140, -doi: 10.3847/1538-4357/aaafce -Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8, -doi: 10.3847/1538-4357/abb66a -Hopman, C., & Alexander, T. 2005, ApJ, 629, 362, -doi: 10.1086/431475 -Igumenshchev, I. V. 2008, ApJ, 677, 317, -doi: 10.1086/529025 -Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A. -2003, ApJ, 592, 1042, doi: 10.1086/375769 -Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796, -106, doi: 10.1088/0004-637X/796/2/106 -Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the -Royal Astronomical Society, 374, 1557, -doi: 10.1111/j.1365-2966.2006.11275.x -Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., & -Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368. -https://arxiv.org/abs/2201.12368 -Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104, -doi: 10.3847/1538-4357/abeb14 -Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, -45, doi: 10.3847/1538-4357/abb945 -Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020, -MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276 -Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33, -doi: 10.1086/376675 -Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, -doi: 10.3847/1538-4365/aacb24 -—. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 -L¨ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323, -doi: 10.1111/j.1365-2966.2007.12699.x -Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506, -doi: 10.1093/mnras/stz036 -Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ, -690, 1463, doi: 10.1088/0004-637X/690/2/1463 -Madau, P., & Rees, M. J. 2001, ApJL, 551, L27, -doi: 10.1086/319848 -Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447, -doi: 10.1046/j.1365-8711.1999.02853.x -Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F. -2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147 -Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda, -M., & Artale, M. C. 2021a, arXiv e-prints, -arXiv:2109.06222. https://arxiv.org/abs/2109.06222 -Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b, -MNRAS, 505, 339, doi: 10.1093/mnras/stab1334 -Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B. -2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409 -McKinney, J. C. 2006, MNRAS, 368, 1561, -doi: 10.1111/j.1365-2966.2006.10256.x -McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977, -doi: 10.1086/422244 -McKinney, J. C., Tchekhovskoy, A., Sadowski, A., & -Narayan, R. 2014, MNRAS, 441, 3177, -doi: 10.1093/mnras/stu762 -Merritt, D. 2006, Reports on Progress in Physics, 69, 2513, -doi: 10.1088/0034-4885/69/9/R01 -Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847, -doi: 10.1086/317837 -Morris, M. 1993, ApJ, 408, 496, doi: 10.1086/172607 -Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL, -622, L113, doi: 10.1086/429721 -Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, -ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110 -Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ, -618, 569, doi: 10.1086/426067 -Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927, -L18, doi: 10.3847/2041-8213/ac574b -Naoz, S., & Silk, J. 2014, ApJ, 795, 102, -doi: 10.1088/0004-637X/795/2/102 -Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885, + Rose et al. +Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of +Modern Physics, 82, 3121, +doi: 10.1103/RevModPhys.82.3121 +Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812, +doi: 10.1086/377127 +Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ, +620, 744, doi: 10.1086/427175 +Gond´an, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ, +860, 5, doi: 10.3847/1538-4357/aabfee +Gonz´alez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL, +908, L29, doi: 10.3847/2041-8213/abdf5b +GRAVITY Collaboration, Abuter, R., Amorim, A., et al. +2020, A&A, 636, L5, doi: 10.1051/0004-6361/202037813 +Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361, +doi: 10.1088/0004-637X/705/1/361 +G¨urkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL, +640, L39, doi: 10.1086/503295 +G¨urkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236, +doi: 10.1086/430694 +Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature, +556, 70, doi: 10.1038/nature25029 +Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593, +L77, doi: 10.1086/378182 +Heger, A., Fryer, C. L., Woosley, S. E., Langer, N., & +Hartmann, D. H. 2003, ApJ, 591, 288, +doi: 10.1086/375341 +Hoang, B.-M., Naoz, S., Kocsis, B., Rasio, F. A., & +Dosopoulou, F. 2018, ApJ, 856, 140, +doi: 10.3847/1538-4357/aaafce +Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8, +doi: 10.3847/1538-4357/abb66a +Hopman, C., & Alexander, T. 2005, ApJ, 629, 362, +doi: 10.1086/431475 +Igumenshchev, I. V. 2008, ApJ, 677, 317, +doi: 10.1086/529025 +Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A. +2003, ApJ, 592, 1042, doi: 10.1086/375769 +Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796, +106, doi: 10.1088/0004-637X/796/2/106 +Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the +Royal Astronomical Society, 374, 1557, +doi: 10.1111/j.1365-2966.2006.11275.x +Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., & +Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368. +https://arxiv.org/abs/2201.12368 +Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104, +doi: 10.3847/1538-4357/abeb14 +Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, +45, doi: 10.3847/1538-4357/abb945 +Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020, +MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276 +Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33, +doi: 10.1086/376675 +Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, +doi: 10.3847/1538-4365/aacb24 +—. 2018b, ApJS, 237, 13, doi: 10.3847/1538-4365/aacb24 +L¨ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323, +doi: 10.1111/j.1365-2966.2007.12699.x +Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506, +doi: 10.1093/mnras/stz036 +Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ, +690, 1463, doi: 10.1088/0004-637X/690/2/1463 +Madau, P., & Rees, M. J. 2001, ApJL, 551, L27, +doi: 10.1086/319848 +Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447, +doi: 10.1046/j.1365-8711.1999.02853.x +Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F. +2004, A&A, 423, 155, doi: 10.1051/0004-6361:20034147 +Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda, +M., & Artale, M. C. 2021a, arXiv e-prints, +arXiv:2109.06222. https://arxiv.org/abs/2109.06222 +Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b, +MNRAS, 505, 339, doi: 10.1093/mnras/stab1334 +Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B. +2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409 +McKinney, J. C. 2006, MNRAS, 368, 1561, +doi: 10.1111/j.1365-2966.2006.10256.x +McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977, +doi: 10.1086/422244 +McKinney, J. C., Tchekhovskoy, A., Sadowski, A., & +Narayan, R. 2014, MNRAS, 441, 3177, +doi: 10.1093/mnras/stu762 +Merritt, D. 2006, Reports on Progress in Physics, 69, 2513, +doi: 10.1088/0034-4885/69/9/R01 +Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847, +doi: 10.1086/317837 +Morris, M. 1993, ApJ, 408, 496, doi: 10.1086/172607 +Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL, +622, L113, doi: 10.1086/429721 +Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, +ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110 +Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ, +618, 569, doi: 10.1086/426067 +Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927, +L18, doi: 10.3847/2041-8213/ac574b +Naoz, S., & Silk, J. 2014, ApJ, 795, 102, +doi: 10.1088/0004-637X/795/2/102 +Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885, L35, doi: 10.3847/2041-8213/ab4fed -IMBH Formation in Galactic Nuclei 13 -Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL, -888, L8, doi: 10.3847/2041-8213/ab5e3b -Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., & -Curd, B. 2022, MNRAS, 511, 3795, -doi: 10.1093/mnras/stac285 -Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A. -2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69 -Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005, -ApJ, 628, 368, doi: 10.1086/430728 -O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395, -2127, doi: 10.1111/j.1365-2966.2009.14653.x -O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N., -& O’Shaughnessy, R. 2006, ApJ, 637, 937, -doi: 10.1086/498446 -Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga, -D. 2010, ApJ, 722, 642, -doi: 10.1088/0004-637X/722/1/642 -Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100, -doi: 10.1086/319042 -Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643, -1011, doi: 10.1086/503273 -Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek, -Stephen R., J. 2016, ApJ, 823, 113, -doi: 10.3847/0004-637X/823/2/113 -Peters, P. C., & Mathews, J. 1963a, Physical Review, 131, -435, doi: 10.1103/PhysRev.131.435 -—. 1963b, Physical Review, 131, 435, -doi: 10.1103/PhysRev.131.435 -Portegies Zwart, S. F., Baumgardt, H., Hut, P., Makino, J., -& McMillan, S. L. W. 2004, Nature, 428, 724, -doi: 10.1038/nature02448 -Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL, -528, L17, doi: 10.1086/312422 -—. 2002, ApJ, 576, 899, doi: 10.1086/341798 -Rashkov, V., & Madau, P. 2014, ApJ, 780, 187, -doi: 10.1088/0004-637X/780/2/187 -Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640, -A56, doi: 10.1051/0004-6361/202037710 -Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022, -MNRAS, doi: 10.1093/mnras/stac231 -Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., & -Rasio, F. A. 2018, PhRvL, 120, 151101, -doi: 10.1103/PhysRevLett.120.151101 -Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016, -PhRvD, 93, 084029, doi: 10.1103/PhysRevD.93.084029 -Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019, -Phys. Rev. D, 100, 043027, -doi: 10.1103/PhysRevD.100.043027 -Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904, -113, doi: 10.3847/1538-4357/abc557 -Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., -& Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213. -https://arxiv.org/abs/2009.01213 -Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017, -MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044 -Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD, -100, 043009, doi: 10.1103/PhysRevD.100.043009 -Sari, R., & Fragione, G. 2019, ApJ, 885, 24, -doi: 10.3847/1538-4357/ab43df -Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K. -2002, The Astrophysical Journal, 571, 30, -doi: 10.1086/339917 -Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63, -doi: 10.1086/519309 -Sch¨odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A, -609, A27, doi: 10.1051/0004-6361/201730452 -Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603, -doi: 10.1086/156521 -Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985, -MNRAS, 217, 367, doi: 10.1093/mnras/217.2.367 -Shlosman, I., Choi, J.-H., Begelman, M. C., & Nagamine, -K. 2016, MNRAS, 456, 500, doi: 10.1093/mnras/stv2700 -Sigurdsson, S., & Phinney, E. S. 1993, ApJ, 415, 631, -doi: 10.1086/173190 -Spera, M., & Mapelli, M. 2017a, MNRAS, 470, 4739, -doi: 10.1093/mnras/stx1576 -—. 2017b, MNRAS, 470, 4739, doi: 10.1093/mnras/stx1576 -Spitzer, L. 1987, Dynamical evolution of globular clusters -Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv -e-prints. https://arxiv.org/abs/1603.02709 -—. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d -Stone, N. C., K¨upper, A. H. W., & Ostriker, J. P. 2017, -MNRAS, 467, 4180, doi: 10.1093/mnras/stx097 -Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859, -doi: 10.1093/mnras/stv2281 -The LIGO Scientific Collaboration, the Virgo -Collaboration, Abbott, R., et al. 2020a, arXiv e-prints, -arXiv:2009.01075. https://arxiv.org/abs/2009.01075 -—. 2020b, arXiv e-prints, arXiv:2009.01190. -https://arxiv.org/abs/2009.01190 -Umbreit, S., Fregeau, J. M., Chatterjee, S., & Rasio, F. A. -2012, ApJ, 750, 31, doi: 10.1088/0004-637X/750/1/31 -Valiante, R., Schneider, R., Volonteri, M., & Omukai, K. -2016, Monthly Notices of the Royal Astronomical -Society, 457, 3356, doi: 10.1093/mnras/stw225 -Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit, -G. N. 2021, MNRAS, 504, 146, +IMBH Formation in Galactic Nuclei 13 +Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL, +888, L8, doi: 10.3847/2041-8213/ab5e3b +Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., & +Curd, B. 2022, MNRAS, 511, 3795, +doi: 10.1093/mnras/stac285 +Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A. +2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69 +Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005, +ApJ, 628, 368, doi: 10.1086/430728 +O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395, +2127, doi: 10.1111/j.1365-2966.2009.14653.x +O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N., +& O’Shaughnessy, R. 2006, ApJ, 637, 937, +doi: 10.1086/498446 +Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga, +D. 2010, ApJ, 722, 642, +doi: 10.1088/0004-637X/722/1/642 +Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100, +doi: 10.1086/319042 +Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643, +1011, doi: 10.1086/503273 +Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek, +Stephen R., J. 2016, ApJ, 823, 113, +doi: 10.3847/0004-637X/823/2/113 +Peters, P. C., & Mathews, J. 1963a, Physical Review, 131, +435, doi: 10.1103/PhysRev.131.435 +—. 1963b, Physical Review, 131, 435, +doi: 10.1103/PhysRev.131.435 +Portegies Zwart, S. F., Baumgardt, H., Hut, P., Makino, J., +& McMillan, S. L. W. 2004, Nature, 428, 724, +doi: 10.1038/nature02448 +Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL, +528, L17, doi: 10.1086/312422 +—. 2002, ApJ, 576, 899, doi: 10.1086/341798 +Rashkov, V., & Madau, P. 2014, ApJ, 780, 187, +doi: 10.1088/0004-637X/780/2/187 +Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640, +A56, doi: 10.1051/0004-6361/202037710 +Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022, +MNRAS, doi: 10.1093/mnras/stac231 +Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., & +Rasio, F. A. 2018, PhRvL, 120, 151101, +doi: 10.1103/PhysRevLett.120.151101 +Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016, +PhRvD, 93, 084029, doi: 10.1103/PhysRevD.93.084029 +Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019, +Phys. Rev. D, 100, 043027, +doi: 10.1103/PhysRevD.100.043027 +Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904, +113, doi: 10.3847/1538-4357/abc557 +Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., +& Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213. +https://arxiv.org/abs/2009.01213 +Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017, +MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044 +Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD, +100, 043009, doi: 10.1103/PhysRevD.100.043009 +Sari, R., & Fragione, G. 2019, ApJ, 885, 24, +doi: 10.3847/1538-4357/ab43df +Schneider, R., Ferrara, A., Natarajan, P., & Omukai, K. +2002, The Astrophysical Journal, 571, 30, +doi: 10.1086/339917 +Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63, +doi: 10.1086/519309 +Sch¨odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A, +609, A27, doi: 10.1051/0004-6361/201730452 +Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603, +doi: 10.1086/156521 +Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985, +MNRAS, 217, 367, doi: 10.1093/mnras/217.2.367 +Shlosman, I., Choi, J.-H., Begelman, M. C., & Nagamine, +K. 2016, MNRAS, 456, 500, doi: 10.1093/mnras/stv2700 +Sigurdsson, S., & Phinney, E. S. 1993, ApJ, 415, 631, +doi: 10.1086/173190 +Spera, M., & Mapelli, M. 2017a, MNRAS, 470, 4739, +doi: 10.1093/mnras/stx1576 +—. 2017b, MNRAS, 470, 4739, doi: 10.1093/mnras/stx1576 +Spitzer, L. 1987, Dynamical evolution of globular clusters +Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv +e-prints. https://arxiv.org/abs/1603.02709 +—. 2019, ApJ, 878, 58, doi: 10.3847/1538-4357/ab1e4d +Stone, N. C., K¨upper, A. H. W., & Ostriker, J. P. 2017, +MNRAS, 467, 4180, doi: 10.1093/mnras/stx097 +Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859, +doi: 10.1093/mnras/stv2281 +The LIGO Scientific Collaboration, the Virgo +Collaboration, Abbott, R., et al. 2020a, arXiv e-prints, +arXiv:2009.01075. https://arxiv.org/abs/2009.01075 +—. 2020b, arXiv e-prints, arXiv:2009.01190. +https://arxiv.org/abs/2009.01190 +Umbreit, S., Fregeau, J. M., Chatterjee, S., & Rasio, F. A. +2012, ApJ, 750, 31, doi: 10.1088/0004-637X/750/1/31 +Valiante, R., Schneider, R., Volonteri, M., & Omukai, K. +2016, Monthly Notices of the Royal Astronomical +Society, 457, 3356, doi: 10.1093/mnras/stw225 +Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit, +G. N. 2021, MNRAS, 504, 146, doi: 10.1093/mnras/stab842 -14 Rose et al. -Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., & -Breivik, K. 2021, ApJ, 917, 76, -doi: 10.3847/1538-4357/ac088d -Wang, J., & Merritt, D. 2004, ApJ, 600, 149, -doi: 10.1086/379767 -Woosley, S. E. 2017, ApJ, 836, 244, -doi: 10.3847/1538-4357/836/2/244 -Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965, -doi: 10.1046/j.1365-8711.2002.05532.x -Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129, -doi: 10.1088/0004-637X/761/2/129 -Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. -2014, Monthly Notices of the Royal Astronomical -Society, 440, 1263, doi: 10.1093/mnras/stu351 -Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints, -arXiv:2011.04653. https://arxiv.org/abs/2011.04653 -Zhu, Z., Li, Z., & Morris, M. R. 2018, ApJS, 235, 26, -doi: 10.3847/1538-4365/aab14f + Rose et al. +Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., & +Breivik, K. 2021, ApJ, 917, 76, +doi: 10.3847/1538-4357/ac088d +Wang, J., & Merritt, D. 2004, ApJ, 600, 149, +doi: 10.1086/379767 +Woosley, S. E. 2017, ApJ, 836, 244, +doi: 10.3847/1538-4357/836/2/244 +Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965, +doi: 10.1046/j.1365-8711.2002.05532.x +Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129, +doi: 10.1088/0004-637X/761/2/129 +Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. +2014, Monthly Notices of the Royal Astronomical +Society, 440, 1263, doi: 10.1093/mnras/stu351 +Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints, +arXiv:2011.04653. https://arxiv.org/abs/2011.04653 +Zhu, Z., Li, Z., & Morris, M. R. 2018, ApJS, 235, 26, +doi: 10.3847/1538-4365/aab14f \ No newline at end of file diff --git a/read/results/pdfium/2201.00029.txt b/read/results/pdfium/2201.00029.txt index 48f509c..8803826 100644 --- a/read/results/pdfium/2201.00029.txt +++ b/read/results/pdfium/2201.00029.txt @@ -1,273 +1,273 @@ - 1 -Exploring new techniques for analyzing variability in white dwarf KIC 8626021 -Thomas Huckans, Peter Stine -Department of Physics and Engineering, Bloomsburg University of Pennsylvania, 400 E 2nd St., + +Exploring new techniques for analyzing variability in white dwarf KIC 8626021 +Thomas Huckans, Peter Stine +Department of Physics and Engineering, Bloomsburg University of Pennsylvania, 400 E 2nd St., Bloomsburg, PA 17815 -2 -Abstract -As is common with the collection of astronomical data, signals are frequently dominated -by noise. However, when performing FTs of light curves, re-binning data can improve the signal￾to-noise ratio (SNR) at lower frequencies. Using data collected from the Kepler space telescope, -we sequentially re-binned data three times to investigate the SNR improvement of lower frequency -(< 17 µHz) variability in white dwarf KIC 8626021. We found that the SNR at approximately 5.8 -µHz greatly improved through this process, and we postulate that this frequency is linked to the -rotation of KIC 8626021. -Introduction -First detected in 1862, white dwarfs long posed a mystery for early observers. When the -companion to Sirius was detected, apparent contradictions concerning the mass, luminosities, and -densities baffled astronomers. Lacking full understanding of atomic structures and the energy -states of electrons, these early researchers believed white dwarfs too dense to exist. However, new -discoveries at the turn of the 20th century explained the existence of these stars, and between the -world wars white dwarfs were increasingly studied and modeled (Holberg, 2009). -As stars age, those that lack the mass to become neutron stars and black holes become -white dwarf stars, representing 98% of the stars in our galaxy (Winget & Kepler, 2008). They are -composed of a core of carbon and oxygen ions that slowly cools over billions of years, and the -light emanating from these stars is a result of thermal energy. White dwarf stars are no longer -supported against the force of gravity by fusion, so the stars collapse into an electron-degenerate -state where the electrons in the carbon and oxygen atoms occupy the lowest energy levels. As two -electrons cannot occupy the same quantum state, Pauli repulsion keeps white dwarfs from -collapsing entirely. -For many years, accurate detection of light variability in white dwarfs was difficult due to -a lack of adequate instruments. However, the launch of the Kepler space telescope in 2009 made -capturing the light of distant stars much more efficient and effective (Basri et al., 2010). Kepler -was initially developed with the intention of surveying our region of the Milky Way galaxy in -order to find potentially habitable planets. The purpose of the mission was to identify key traits for -such planets by determining the number of planets in habitable zones, the sizes and shapes of orbits, -and the characteristics of the stars being orbited. Over the lifespan of its first mission, Kepler -observed approximately 1.5 x 105 stars (Johnson, 2018), affording scientists excellent -opportunities to research stellar variability. Due to the loss of a second reaction wheel in 2013, -NASA developed the K2 mission, a way to prolong Kepler’s assistance to astronomy and -astrophysics. -Utilizing Kepler’s ability to maintain three-dimensional control, NASA proceeded to use -the telescope to collect photometry data of certain sections of our galaxy, although the number of -targets was significantly reduced. In addition, the K2 mission was designed to be community￾oriented, with the scientific community having an influence on the fields observed and serving as -the analysts of the vast amounts of data being received (Howell et al., 2014). Although Kepler was -deactivated in 2018, the data used in this paper came from observations during 2010 and 2012 of -white dwarf KIC 8626021 and was obtained from the Kepler Asteroseismic Science Operations + +Abstract +As is common with the collection of astronomical data, signals are frequently dominated +by noise. However, when performing FTs of light curves, re-binning data can improve the signalto-noise ratio (SNR) at lower frequencies. Using data collected from the Kepler space telescope, +we sequentially re-binned data three times to investigate the SNR improvement of lower frequency +(< 17 µHz) variability in white dwarf KIC 8626021. We found that the SNR at approximately 5.8 +µHz greatly improved through this process, and we postulate that this frequency is linked to the +rotation of KIC 8626021. +Introduction +First detected in 1862, white dwarfs long posed a mystery for early observers. When the +companion to Sirius was detected, apparent contradictions concerning the mass, luminosities, and +densities baffled astronomers. Lacking full understanding of atomic structures and the energy +states of electrons, these early researchers believed white dwarfs too dense to exist. However, new +discoveries at the turn of the 20th century explained the existence of these stars, and between the +world wars white dwarfs were increasingly studied and modeled (Holberg, 2009). +As stars age, those that lack the mass to become neutron stars and black holes become +white dwarf stars, representing 98% of the stars in our galaxy (Winget & Kepler, 2008). They are +composed of a core of carbon and oxygen ions that slowly cools over billions of years, and the +light emanating from these stars is a result of thermal energy. White dwarf stars are no longer +supported against the force of gravity by fusion, so the stars collapse into an electron-degenerate +state where the electrons in the carbon and oxygen atoms occupy the lowest energy levels. As two +electrons cannot occupy the same quantum state, Pauli repulsion keeps white dwarfs from +collapsing entirely. +For many years, accurate detection of light variability in white dwarfs was difficult due to +a lack of adequate instruments. However, the launch of the Kepler space telescope in 2009 made +capturing the light of distant stars much more efficient and effective (Basri et al., 2010). Kepler +was initially developed with the intention of surveying our region of the Milky Way galaxy in +order to find potentially habitable planets. The purpose of the mission was to identify key traits for +such planets by determining the number of planets in habitable zones, the sizes and shapes of orbits, +and the characteristics of the stars being orbited. Over the lifespan of its first mission, Kepler +observed approximately 1.5 x 105 stars (Johnson, 2018), affording scientists excellent +opportunities to research stellar variability. Due to the loss of a second reaction wheel in 2013, +NASA developed the K2 mission, a way to prolong Kepler’s assistance to astronomy and +astrophysics. +Utilizing Kepler’s ability to maintain three-dimensional control, NASA proceeded to use +the telescope to collect photometry data of certain sections of our galaxy, although the number of +targets was significantly reduced. In addition, the K2 mission was designed to be communityoriented, with the scientific community having an influence on the fields observed and serving as +the analysts of the vast amounts of data being received (Howell et al., 2014). Although Kepler was +deactivated in 2018, the data used in this paper came from observations during 2010 and 2012 of +white dwarf KIC 8626021 and was obtained from the Kepler Asteroseismic Science Operations Center (KASOC). -3 -The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon -previous studies, this research investigated novel techniques of analyzing variability in white -dwarfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on -the star, allowing for the validation of results using our methods. KIC 8626021 has an effective -temperature of 29,700 K, log g = 7.890, and mass of 0.56 M☉ (Córsico, 2020). Other research -has found that this white dwarf is the DBV with the highest known temperature, and its helium -layer is the thinnest (Bischoff-Kim et al., 2015). Despite the long-cadence light curve being too -noisy to draw many conclusions, other FTs of short-cadence data have been performed to find -variability in the dwarf. Analyses at high frequencies of KIC 8626021 yielded pulsations with -frequencies of 4309.89 µHz, 5073.26 µHz, 3681.87 µHz, 3294.22 µHz and 2658.85 µHz -(Østensen et al., 2011). These findings confirm the classification of the white dwarf as a V777 -Herculis, although our research focuses on low frequencies using long-cadence data. -Methods -All data were downloaded from the KASOC database, and the long-cadence (data -sampled approximately every thirty minutes) measurements of Corrected Flux (ppm) were -analyzed. All computations were made in Wolfram Mathematica and Microsoft Excel, and FTs -were performed in Mathematica. The re-binning process consisted of summing adjacent light -curve data points in each quarter, therefore doubling the sampling interval from 0.5 hour to one -hour, and then repeating this process on the data sample for a total of three times. In addition, a -significant detection was defined as being 3 above the mean of the relative flux, and 0 on the -graphs below represents this 3 cutoff. (Koch, D. G., 2010), (Wolfram Research, Inc., 2021). To -find the SNR, we converted to decibels. Using these SNRs, we were able to easily identify -improvement in signal strength. -Results -Figure 1 presents the lightcurves constructed for quarters seven (Q7) and thirteen (Q13), -with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs -of the first iteration and three successive re-bins for Q7, while Figure 3 presents the FTs of the -same for Q13. -Tables 1 and 2 both show the hypothesized frequency corresponding to the rotation of -KIC 8626021 that is found in the FTs of the first iteration and subsequent re-bins for Q7 and -Q13. Tables 3 and 4 show all data values < 17 µHz found in the first iterations and re-bins of Q7 + +The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon +previous studies, this research investigated novel techniques of analyzing variability in white +dwarfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on +the star, allowing for the validation of results using our methods. KIC 8626021 has an effective +temperature of 29,700 K, log g = 7.890, and mass of 0.56 M☉ (Córsico, 2020). Other research +has found that this white dwarf is the DBV with the highest known temperature, and its helium +layer is the thinnest (Bischoff-Kim et al., 2015). Despite the long-cadence light curve being too +noisy to draw many conclusions, other FTs of short-cadence data have been performed to find +variability in the dwarf. Analyses at high frequencies of KIC 8626021 yielded pulsations with +frequencies of 4309.89 µHz, 5073.26 µHz, 3681.87 µHz, 3294.22 µHz and 2658.85 µHz +(Østensen et al., 2011). These findings confirm the classification of the white dwarf as a V777 +Herculis, although our research focuses on low frequencies using long-cadence data. +Methods +All data were downloaded from the KASOC database, and the long-cadence (data +sampled approximately every thirty minutes) measurements of Corrected Flux (ppm) were +analyzed. All computations were made in Wolfram Mathematica and Microsoft Excel, and FTs +were performed in Mathematica. The re-binning process consisted of summing adjacent light +curve data points in each quarter, therefore doubling the sampling interval from 0.5 hour to one +hour, and then repeating this process on the data sample for a total of three times. In addition, a +significant detection was defined as being 3 above the mean of the relative flux, and 0 on the +graphs below represents this 3 cutoff. (Koch, D. G., 2010), (Wolfram Research, Inc., 2021). To +find the SNR, we converted to decibels. Using these SNRs, we were able to easily identify +improvement in signal strength. +Results +Figure 1 presents the lightcurves constructed for quarters seven (Q7) and thirteen (Q13), +with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs +of the first iteration and three successive re-bins for Q7, while Figure 3 presents the FTs of the +same for Q13. +Tables 1 and 2 both show the hypothesized frequency corresponding to the rotation of +KIC 8626021 that is found in the FTs of the first iteration and subsequent re-bins for Q7 and +Q13. Tables 3 and 4 show all data values < 17 µHz found in the first iterations and re-bins of Q7 and Q13. -4 -FIG. 1: Pictured top is the light curve constructed for Q7, below is the light curve for Q13. Q7 -lasted from September 24 – December 13, 2010, and Q13 was from March 29 – June 23, 2012. -Both graphs were constructed by plotting corrected flux magnitude (flux corrected for -instrumental artifacts) versus time in Excel, and gaps in the data were filled in by interpolating + +FIG. 1: Pictured top is the light curve constructed for Q7, below is the light curve for Q13. Q7 +lasted from September 24 – December 13, 2010, and Q13 was from March 29 – June 23, 2012. +Both graphs were constructed by plotting corrected flux magnitude (flux corrected for +instrumental artifacts) versus time in Excel, and gaps in the data were filled in by interpolating between points. Q7 had forty-three interpolated points, and Q13 had sixty-six. -5 -FIG. 2: The graphs show the initial FTs of Q7, and then the FTs of the three successive re-bins of -the light curve data. The significant frequencies of 5.886 µHz and 5.889 µHz are circled. The -disappearance of the frequency in the last FT is most likely a byproduct of the method, and the -spurious frequency of 5.464 µHz in the last FT most probably represents an artifact of the re￾binning process. -6 -FIG. 3: The graphs show the initial FT of Q13, and then the FTs of the three successive re-bins -of the light curve data. The significant frequencies of 5.784 µHz and 5.787 µHz are circled. In -addition, in the third re-bin, the frequencies 11.641 µHz and 16.823 µHz rise above 3 and are -nearly perfect integer multiples of 5.787 µHz. These harmonics are potentially indications of a + +FIG. 2: The graphs show the initial FTs of Q7, and then the FTs of the three successive re-bins of +the light curve data. The significant frequencies of 5.886 µHz and 5.889 µHz are circled. The +disappearance of the frequency in the last FT is most likely a byproduct of the method, and the +spurious frequency of 5.464 µHz in the last FT most probably represents an artifact of the rebinning process. + +FIG. 3: The graphs show the initial FT of Q13, and then the FTs of the three successive re-bins +of the light curve data. The significant frequencies of 5.784 µHz and 5.787 µHz are circled. In +addition, in the third re-bin, the frequencies 11.641 µHz and 16.823 µHz rise above 3 and are +nearly perfect integer multiples of 5.787 µHz. These harmonics are potentially indications of a starspot (Santos et al., 2017). -7 -Q7 Significant -Data Points -Light -Variability -Frequency -(µHz) -Corrected Flux -Magnitude -(ppm) -Period (days) Signal-to-Noise -(dB) -Q7 First -Iteration -5.886 -1.198 1.966 9.9 -Q7 Re-bin 1 5.886 -1.477 1.966 12.8 -Q7 Re-bin 2 5.889 0.597 1.965 19.2 -TABLE I: The table displays the various frequencies collected from Q7 and the information -found through calculations to find period and SNR. The frequency of 5.464 µHz is not included, -and therefore was not used in any calculations determining the average period of rotation. The -values under corrected flux magnitude are relative to our significant frequency cutoff of 3, thus -negative numbers are under the cutoff. -Q13 Significant -Data Points -Light -Variability -Frequency -(µHz) -Corrected Flux -Magnitude -(ppm) -Period (days) Signal-to-Noise -(dB) -Q13 First -Iteration -5.784 1.555 2.001 15.6 -Q13 Re-bin 1 5.784 2.873 2.001 17.7 -Q13 Re-bin 2 5.787 4.938 2.000 22.6 -Q13 Re-bin 3 5.787 6.909 2.000 26.3 -Q13 Re-bin 3 11.641 7.073 0.994 26.4 -Q13 Re-bin 3 16.823 2.299 0.688 24.1 -TABLE II: The table displays the various frequencies collected from Q13 and the information -found through calculations to find period and SNR. The last two significant frequencies (11.641 -µHz and 16.823 µHz) for Q13 Re-bin 3 represent potential harmonics, which are discussed in -further detail in the Conclusions section of this paper. The values under corrected flux magnitude + +Q7 Significant +Data Points +Light +Variability +Frequency +(µHz) +Corrected Flux +Magnitude +(ppm) +Period (days) Signal-to-Noise +(dB) +Q7 First +Iteration +5.886 -1.198 1.966 9.9 +Q7 Re-bin 1 5.886 -1.477 1.966 12.8 +Q7 Re-bin 2 5.889 0.597 1.965 19.2 +TABLE I: The table displays the various frequencies collected from Q7 and the information +found through calculations to find period and SNR. The frequency of 5.464 µHz is not included, +and therefore was not used in any calculations determining the average period of rotation. The +values under corrected flux magnitude are relative to our significant frequency cutoff of 3, thus +negative numbers are under the cutoff. +Q13 Significant +Data Points +Light +Variability +Frequency +(µHz) +Corrected Flux +Magnitude +(ppm) +Period (days) Signal-to-Noise +(dB) +Q13 First +Iteration +5.784 1.555 2.001 15.6 +Q13 Re-bin 1 5.784 2.873 2.001 17.7 +Q13 Re-bin 2 5.787 4.938 2.000 22.6 +Q13 Re-bin 3 5.787 6.909 2.000 26.3 +Q13 Re-bin 3 11.641 7.073 0.994 26.4 +Q13 Re-bin 3 16.823 2.299 0.688 24.1 +TABLE II: The table displays the various frequencies collected from Q13 and the information +found through calculations to find period and SNR. The last two significant frequencies (11.641 +µHz and 16.823 µHz) for Q13 Re-bin 3 represent potential harmonics, which are discussed in +further detail in the Conclusions section of this paper. The values under corrected flux magnitude are relative to our significant frequency cutoff of 3, thus negative numbers are under the cutoff. -8 -First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) -0.933 0.933 0.215 0.216 -1.148 1.148 0.575 0.575 -1.364 1.364 0.934 0.935 -1.507 1.507 1.005 1.006 -12.561 12.561 1.149 1.150 -16.581 16.581 1.221 1.222 -1.364 1.366 -1.508 1.509 -1.580 1.582 -1.724 1.725 -1.795 1.797 -5.889 2.085 -6.822 5.392 -9.192 5.464 -9.479 7.476 -11.203 9.489 -12.568 11.215 -14.291 12.581 -16.230 13.084 -16.589 13.443 -13.659 -14.018 -14.809 -15.097 -16.031 -16.463 -16.894 -TABLE III: The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm) -above the cutoff of 3. The minor shifting of significant frequencies between re-bins is a by￾product of the method, and we calculated for such errors when finding our average. -9 -First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) -3.094 2.018 2.019 1.951 -5.784 3.094 3.095 2.019 -9.080 5.784 5.787 2.442 -13.519 7.667 7.671 2.759 -15.671 9.080 9.084 3.095 -16.209 11.165 11.641 3.634 -16.411 13.519 13.526 4.374 -15.469 15.477 4.778 -15.671 15.679 4.912 -16.209 15.881 5.047 -16.411 16.419 5.787 -8.479 -9.084 -10.565 -11.641 -13.526 -15.544 -15.881 -16.823 -TABLE IV: The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm) -above the cutoff of 3. The minor shifting of significant frequencies between re-bins is a by￾product of the method, and we calculated for such errors when finding our average. -Conclusions -As our research used the long-cadence data from Kepler, much of the high-frequency -variability due to gravitational wave pulsations is lost. However, this presents an opportunity to -verify our results with the work of research groups that analyzed short-cadence data.With the -data analyzed, the lower frequencies between 5-6 µHz emerged. After finding the average of the -periods and accounting for a 1 margin of error, our research hypothesizes that the rotation -period of KIC 8626021 is 1.99 ± 0.02 days. Other short-cadence research has found the rotation -period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff-Kim et -al., 2015). Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011), and -these periods indicate that the more precise significant period identified through our re-binning -relates to the rotation of the white dwarf. -Through the re-binning process, the SNR clearly improves for both quarters, and for Q7 it -improves by approximately 1.3 dB, except for the last data re-bin. In the last re-bin, the previous -10 -significant frequency disappears, which becomes increasingly likely after successive re-binning -processes. The frequency 5.464 µHz rises as another significant frequency; however, we believe -that this new frequency is simply an artifact of the re-binning process. In Q13, we saw SNR -improvement ranging from 1.1 dB to 1.3 dB. -Through the re-binning process, more lines, or significant frequencies, appeared above -the 3 cutoff, particularly at lower frequencies. These findings suggest that as an alternative to -short-cadence analysis, the re-binning process of long-cadence data can be used to identify -significant lower frequencies in white dwarfs. The methods we used are also simple and -replicable, which allows even those with less experience to quickly analyze the large amounts of -data being collected by orbiting telescopes, such as the currently active TESS (Transiting -Exoplanet Survey Satellite) telescope. -The presence of possible harmonics in the third re-bin of Q13 also indicates the possible -presence of a previously unseen starspot in KIC 8626021 caused by magnetic activity. These -spots are darker, cooler, and modulate stellar light curves, and with confirmation of its existence, -the harmonic frequencies can be used to calculate the spot’s rotation rate, size, latitude, and -contrast (Santos et al., 2017). Using the process of re-binning, a starspot signal, previously + +First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) +0.933 0.933 0.215 0.216 +1.148 1.148 0.575 0.575 +1.364 1.364 0.934 0.935 +1.507 1.507 1.005 1.006 +12.561 12.561 1.149 1.150 +16.581 16.581 1.221 1.222 +1.364 1.366 +1.508 1.509 +1.580 1.582 +1.724 1.725 +1.795 1.797 +5.889 2.085 +6.822 5.392 +9.192 5.464 +9.479 7.476 +11.203 9.489 +12.568 11.215 +14.291 12.581 +16.230 13.084 +16.589 13.443 +13.659 +14.018 +14.809 +15.097 +16.031 +16.463 +16.894 +TABLE III: The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm) +above the cutoff of 3. The minor shifting of significant frequencies between re-bins is a byproduct of the method, and we calculated for such errors when finding our average. + +First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) +3.094 2.018 2.019 1.951 +5.784 3.094 3.095 2.019 +9.080 5.784 5.787 2.442 +13.519 7.667 7.671 2.759 +15.671 9.080 9.084 3.095 +16.209 11.165 11.641 3.634 +16.411 13.519 13.526 4.374 +15.469 15.477 4.778 +15.671 15.679 4.912 +16.209 15.881 5.047 +16.411 16.419 5.787 +8.479 +9.084 +10.565 +11.641 +13.526 +15.544 +15.881 +16.823 +TABLE IV: The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm) +above the cutoff of 3. The minor shifting of significant frequencies between re-bins is a byproduct of the method, and we calculated for such errors when finding our average. +Conclusions +As our research used the long-cadence data from Kepler, much of the high-frequency +variability due to gravitational wave pulsations is lost. However, this presents an opportunity to +verify our results with the work of research groups that analyzed short-cadence data.With the +data analyzed, the lower frequencies between 5-6 µHz emerged. After finding the average of the +periods and accounting for a 1 margin of error, our research hypothesizes that the rotation +period of KIC 8626021 is 1.99 ± 0.02 days. Other short-cadence research has found the rotation +period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff-Kim et +al., 2015). Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011), and +these periods indicate that the more precise significant period identified through our re-binning +relates to the rotation of the white dwarf. +Through the re-binning process, the SNR clearly improves for both quarters, and for Q7 it +improves by approximately 1.3 dB, except for the last data re-bin. In the last re-bin, the previous + +significant frequency disappears, which becomes increasingly likely after successive re-binning +processes. The frequency 5.464 µHz rises as another significant frequency; however, we believe +that this new frequency is simply an artifact of the re-binning process. In Q13, we saw SNR +improvement ranging from 1.1 dB to 1.3 dB. +Through the re-binning process, more lines, or significant frequencies, appeared above +the 3 cutoff, particularly at lower frequencies. These findings suggest that as an alternative to +short-cadence analysis, the re-binning process of long-cadence data can be used to identify +significant lower frequencies in white dwarfs. The methods we used are also simple and +replicable, which allows even those with less experience to quickly analyze the large amounts of +data being collected by orbiting telescopes, such as the currently active TESS (Transiting +Exoplanet Survey Satellite) telescope. +The presence of possible harmonics in the third re-bin of Q13 also indicates the possible +presence of a previously unseen starspot in KIC 8626021 caused by magnetic activity. These +spots are darker, cooler, and modulate stellar light curves, and with confirmation of its existence, +the harmonic frequencies can be used to calculate the spot’s rotation rate, size, latitude, and +contrast (Santos et al., 2017). Using the process of re-binning, a starspot signal, previously dominated by noise, may have been discovered. -11 -Acknowledgments -We wish to thank Bloomsburg University of Pennsylvania for its continued support of our -research. -This paper includes data collected by the Kepler mission and obtained from the MAST -data archive at the Space Telescope Science Institute (STScI). Funding for the Kepler mission is -provided by the NASA Science Mission Directorate. STScI is operated by the Association of -Universities for Research in Astronomy, Inc., under NASA contract NAS 5–26555. -References -Basri, G., Walkowicz, L. M., Batalha, N., Gilliland, R. L., Jenkins, J., Borucki, W. J., Koch, D., -Caldwell, D., Dupree, A. K., Latham, D. W., Meibom, S., Howell, S., & Brown, T. (2010). -PHOTOMETRIC VARIABILITY IN KEPLER TARGET stars: THE SUN AMONG -stars—a FIRST LOOK. The Astrophysical Journal, 713(2), L155-L159. -https://doi.org/10.1088/2041-8205/713/2/L155 -Bischoff-Kim, A., Østensen, R. H., Hermes, J.j., & Provencal, J. L. (2015). Seven-Period -asteroseismic fit of KIC 8626021. EPJ Web of Conferences, 101, 06009. -https://doi.org/10.1051/epjconf/201510106009 -Córsico, A. H. (2020). White-Dwarf asteroseismology with the kepler space telescope. Frontiers -in Astronomy and Space Sciences, 7. https://doi.org/10.3389/fspas.2020.00047 -Holberg, J. B. (2009). The discovery of the existence of white dwarf stars: 1862 to 1930. Journal -for the History of Astronomy, 40(2), 137-154. -https://doi.org/10.1177%2F002182860904000201 -Howell, S. B., Sobeck, C., Haas, M., Still, M., Barclay, T., Mullally, F., Troeltzsch, J., Aigrain, S., -Bryson, S. T., Caldwell, D., Chaplin, W. J., Cochran, W. D., Huber, D., Marcy, G. W., -Miglio, A., Najita, J. R., Smith, M., Twicken, J. D., & Fortney, J. J. (2014). The k2 mission: -Characterization and early results. Publications of the Astronomical Society of the Pacific, -126(938), 398-408. https://doi.org/10.1086/676406 -Johnson, M. (Ed.). (2018, October 30). Mission overview. National Aeronautics and Space -Administration. Retrieved September 2, 2021, from -https://www.nasa.gov/mission_pages/kepler/overview/index.html -Koch, D. G., Borucki, W. J., Basri, G., Batalha, N. M., Brown, T. M., Caldwell, D., Christensen￾dalsgaard, J., Cochran, W. D., Devore, E., Dunham, E. W., Gautier, T. N., Geary, J. C., -Gilliland, R. L., Gould, A., Jenkins, J., Kondo, Y., Latham, D. W., Lissauer, J. J., Marcy, -G., . . . Morrison, D. (2010). KEPLER MISSION design, REALIZED PHOTOMETRIC -performance, AND EARLY SCIENCE. The Astrophysical Journal, 713(2), L79-L86. -https://dx.doi.org/10.1088/2041-8205/713/2/L79 -Østensen, R. H., Bloemen, S., Vučković, M., Aerts, C., Oreiro, R., Kinemuchi, K., Still, M., & -Koester, D. (2011). AT last—a v777 HER PULSATOR IN THE KEPLER FIELD. The -Astrophysical Journal, 736(2), L39. https://doi.org/10.1088/2041-8205/736/2/L39 -Santos, A. R. G., Cunha, M. S., Avelino, P. P., García, R. A., & Mathur, S. (2017). Starspot -signature on the light curve. Astronomy & Astrophysics, 599, A1. + +Acknowledgments +We wish to thank Bloomsburg University of Pennsylvania for its continued support of our +research. +This paper includes data collected by the Kepler mission and obtained from the MAST +data archive at the Space Telescope Science Institute (STScI). Funding for the Kepler mission is +provided by the NASA Science Mission Directorate. STScI is operated by the Association of +Universities for Research in Astronomy, Inc., under NASA contract NAS 5–26555. +References +Basri, G., Walkowicz, L. M., Batalha, N., Gilliland, R. L., Jenkins, J., Borucki, W. J., Koch, D., +Caldwell, D., Dupree, A. K., Latham, D. W., Meibom, S., Howell, S., & Brown, T. (2010). +PHOTOMETRIC VARIABILITY IN KEPLER TARGET stars: THE SUN AMONG +stars—a FIRST LOOK. The Astrophysical Journal, 713(2), L155-L159. +https://doi.org/10.1088/2041-8205/713/2/L155 +Bischoff-Kim, A., Østensen, R. H., Hermes, J.j., & Provencal, J. L. (2015). Seven-Period +asteroseismic fit of KIC 8626021. EPJ Web of Conferences, 101, 06009. +https://doi.org/10.1051/epjconf/201510106009 +Córsico, A. H. (2020). White-Dwarf asteroseismology with the kepler space telescope. Frontiers +in Astronomy and Space Sciences, 7. https://doi.org/10.3389/fspas.2020.00047 +Holberg, J. B. (2009). The discovery of the existence of white dwarf stars: 1862 to 1930. Journal +for the History of Astronomy, 40(2), 137-154. +https://doi.org/10.1177%2F002182860904000201 +Howell, S. B., Sobeck, C., Haas, M., Still, M., Barclay, T., Mullally, F., Troeltzsch, J., Aigrain, S., +Bryson, S. T., Caldwell, D., Chaplin, W. J., Cochran, W. D., Huber, D., Marcy, G. W., +Miglio, A., Najita, J. R., Smith, M., Twicken, J. D., & Fortney, J. J. (2014). The k2 mission: +Characterization and early results. Publications of the Astronomical Society of the Pacific, +126(938), 398-408. https://doi.org/10.1086/676406 +Johnson, M. (Ed.). (2018, October 30). Mission overview. National Aeronautics and Space +Administration. Retrieved September 2, 2021, from +https://www.nasa.gov/mission_pages/kepler/overview/index.html +Koch, D. G., Borucki, W. J., Basri, G., Batalha, N. M., Brown, T. M., Caldwell, D., Christensendalsgaard, J., Cochran, W. D., Devore, E., Dunham, E. W., Gautier, T. N., Geary, J. C., +Gilliland, R. L., Gould, A., Jenkins, J., Kondo, Y., Latham, D. W., Lissauer, J. J., Marcy, +G., . . . Morrison, D. (2010). KEPLER MISSION design, REALIZED PHOTOMETRIC +performance, AND EARLY SCIENCE. The Astrophysical Journal, 713(2), L79-L86. +https://dx.doi.org/10.1088/2041-8205/713/2/L79 +Østensen, R. H., Bloemen, S., Vučković, M., Aerts, C., Oreiro, R., Kinemuchi, K., Still, M., & +Koester, D. (2011). AT last—a v777 HER PULSATOR IN THE KEPLER FIELD. The +Astrophysical Journal, 736(2), L39. https://doi.org/10.1088/2041-8205/736/2/L39 +Santos, A. R. G., Cunha, M. S., Avelino, P. P., García, R. A., & Mathur, S. (2017). Starspot +signature on the light curve. Astronomy & Astrophysics, 599, A1. https://doi.org/10.1051/0004-6361/201629923 -12 -Winget, D.e., & Kepler, S.o. (2008). Pulsating white dwarf stars and precision asteroseismology. -Annual Review of Astronomy and Astrophyics, 46(1), 157-199. -https://doi.org/10.1146/annurev.astro.46.060407.145250 -Wolfram Research, Inc., Mathematica, Version 12.3.1, Champaign, IL (2021). + +Winget, D.e., & Kepler, S.o. (2008). Pulsating white dwarf stars and precision asteroseismology. +Annual Review of Astronomy and Astrophyics, 46(1), 157-199. +https://doi.org/10.1146/annurev.astro.46.060407.145250 +Wolfram Research, Inc., Mathematica, Version 12.3.1, Champaign, IL (2021). \ No newline at end of file diff --git a/read/results/pdfium/2201.00037.txt b/read/results/pdfium/2201.00037.txt index 7dea8ef..1a6d063 100644 --- a/read/results/pdfium/2201.00037.txt +++ b/read/results/pdfium/2201.00037.txt @@ -1,2075 +1,2016 @@ -Confidential manuscript submitted to JGR-Planets -The influence of a fluid core and a solid inner core on the -Cassini sate of Mercury -Mathieu Dumberry 1 -1Department of Physics, University of Alberta, Edmonton, Alberta, Canada. -Key Points: -• The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid -planet by no more than 0.01 arcmin. -• For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid -cores into a common precession motion. -• The larger the inner core is, the more the obliquity of the polar moment of inertia ap￾proaches that expected for a rigid planet. -Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca -–1– -arXiv:2201.00037v1 [astro-ph.EP] 31 Dec 2021 -Confidential manuscript submitted to JGR-Planets -Abstract -We present a model of the Cassini state of Mercury that comprises an inner core, a fluid core -and a mantle. Our model includes inertial and gravitational torques between interior regions, -and viscous and electromagnetic (EM) coupling at the boundaries of the fluid core. We show -that the coupling between Mercury’s interior regions is sufficiently strong that the obliquity of -the mantle spin axis deviates from that of a rigid planet by no more than 0.01 arcmin. The man￾tle obliquity decreases with increasing inner core size, but the change between a large and no -inner core is limited to 0.015 arcmin. EM coupling is stronger than viscous coupling at the in￾ner core boundary and, if the core magnetic field strength is above 0.3 mT, locks the fluid and -solid cores into a common precession motion. Because of the strong gravitational coupling be￾tween the mantle and inner core, the larger the inner core is, the more this co-precessing core -is brought into an alignment with the mantle, and the more the obliquity of the polar moment -of inertia approaches that expected for a rigid planet. The misalignment between the polar mo￾ment of inertia and mantle spin axis increases with inner core size, but is limited to 0.007 ar￾cmin. Our results imply that the measured obliquities of the mantle spin axis and polar mo￾ment of inertia should coincide at the present-day level of measurement errors, and cannot be -distinguished from the obliquity of a rigid planet. -Plain language summary: The plane of Mercury’s orbit around the Sun is slowly precess￾ing about an axis fixed in space. This entrains a precession of the spin axis of Mercury at the -same rate, an equilibrium known as a Cassini state. The angle between the spin axis and the -normal to the orbital plane is known as the obliquity and remains fixed. Observations have con￾firmed that Mercury’s obliquity matches, within measurement errors, the theoretical predic￾tion based on an entirely rigid planet. However, we know that Mercury has a large metallic core -which is liquid, although the central part may be solid. In this work, we investigate how the -presence of a fluid and solid core affect the Cassini state of Mercury. We show that the inter￾nal coupling between the solid core, fluid core and the mantle is sufficiently strong that the obliq￾uity of the mantle does not depart from that of a rigid planet by more than 0.01 arcmin, an -offset smaller than the present-day error in measurements. We also show that the larger the -solid inner core is, the more the planet behaves as if it were precessing as an entirely rigid body. -1 Introduction -Mercury is expected to be in a Cassini state (Figure 1) whereby its orbit normal and spin￾symmetry axis are both coplanar with, and precess about, the normal to the Laplace plane [Colombo, -1966; Peale, 1969, 2006]. The orientation of the Laplace plane varies on long timescales, but -its present-day orientation can be reconstructed from ephemerides data [Yseboodt and Margot, -2006; Baland et al., 2017]. Likewise, the rate of precession is also not observed directly, but is -reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513 -yr with an inclination angle of I = 8.5330◦ between the orbit and Laplace plane normals [Ba￾land et al., 2017]. Measurements of the obliquity εm, defined as the angle of misalignment be￾tween the spin-symmetry axis and the orbit normal, have been obtained by different techniques, -including ground based radar observations [Margot et al., 2007, 2012], and stereo digital ter￾rain images [Stark et al., 2015a] and radio tracking data [Mazarico et al., 2014; Verma and Mar￾got, 2016; Genova et al., 2019; Konopliv et al., 2020] from the MErcury Surface Space ENvi￾ronment GEochemistry and Ranging (MESSENGER) spacecraft. Within measurement errors, -all techniques yield an obliquity which is coplanar with the orbit and Laplace plane normals -and consistent with a Cassini state. Furthermore, the observed obliquity angle (2.042 ± 0.08 +Confidential manuscript submitted to JGR-Planets +The influence of a fluid core and a solid inner core on the +Cassini sate of Mercury +Mathieu Dumberry 1 +1Department of Physics, University of Alberta, Edmonton, Alberta, Canada. +Key Points: +• The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid +planet by no more than 0.01 arcmin. +• For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid +cores into a common precession motion. +• The larger the inner core is, the more the obliquity of the polar moment of inertia approaches that expected for a rigid planet. +Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca +–1– +arXiv:2201.00037v1 [astro-ph.EP] 31 Dec 202 +Confidential manuscript submitted to JGR-Planets +Abstract +We present a model of the Cassini state of Mercury that comprises an inner core, a fluid core +and a mantle. Our model includes inertial and gravitational torques between interior regions, +and viscous and electromagnetic (EM) coupling at the boundaries of the fluid core. We show +that the coupling between Mercury’s interior regions is sufficiently strong that the obliquity of +the mantle spin axis deviates from that of a rigid planet by no more than 0.01 arcmin. The mantle obliquity decreases with increasing inner core size, but the change between a large and no +inner core is limited to 0.015 arcmin. EM coupling is stronger than viscous coupling at the inner core boundary and, if the core magnetic field strength is above 0.3 mT, locks the fluid and +solid cores into a common precession motion. Because of the strong gravitational coupling between the mantle and inner core, the larger the inner core is, the more this co-precessing core +is brought into an alignment with the mantle, and the more the obliquity of the polar moment +of inertia approaches that expected for a rigid planet. The misalignment between the polar moment of inertia and mantle spin axis increases with inner core size, but is limited to 0.007 arcmin. Our results imply that the measured obliquities of the mantle spin axis and polar moment of inertia should coincide at the present-day level of measurement errors, and cannot be +distinguished from the obliquity of a rigid planet. +Plain language summary: The plane of Mercury’s orbit around the Sun is slowly precessing about an axis fixed in space. This entrains a precession of the spin axis of Mercury at the +same rate, an equilibrium known as a Cassini state. The angle between the spin axis and the +normal to the orbital plane is known as the obliquity and remains fixed. Observations have confirmed that Mercury’s obliquity matches, within measurement errors, the theoretical prediction based on an entirely rigid planet. However, we know that Mercury has a large metallic core +which is liquid, although the central part may be solid. In this work, we investigate how the +presence of a fluid and solid core affect the Cassini state of Mercury. We show that the internal coupling between the solid core, fluid core and the mantle is sufficiently strong that the obliquity of the mantle does not depart from that of a rigid planet by more than 0.01 arcmin, an +offset smaller than the present-day error in measurements. We also show that the larger the +solid inner core is, the more the planet behaves as if it were precessing as an entirely rigid body. +1 Introduction +Mercury is expected to be in a Cassini state (Figure 1) whereby its orbit normal and spinsymmetry axis are both coplanar with, and precess about, the normal to the Laplace plane [Colombo, +1966; Peale, 1969, 2006]. The orientation of the Laplace plane varies on long timescales, but +its present-day orientation can be reconstructed from ephemerides data [Yseboodt and Margot, +2006; Baland et al., 2017]. Likewise, the rate of precession is also not observed directly, but is +reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513 +yr with an inclination angle of I = 8.5330◦ between the orbit and Laplace plane normals [Baland et al., 2017]. Measurements of the obliquity εm, defined as the angle of misalignment between the spin-symmetry axis and the orbit normal, have been obtained by different techniques, +including ground based radar observations [Margot et al., 2007, 2012], and stereo digital terrain images [Stark et al., 2015a] and radio tracking data [Mazarico et al., 2014; Verma and Margot, 2016; Genova et al., 2019; Konopliv et al., 2020] from the MErcury Surface Space ENvironment GEochemistry and Ranging (MESSENGER) spacecraft. Within measurement errors, +all techniques yield an obliquity which is coplanar with the orbit and Laplace plane normals +and consistent with a Cassini state. Furthermore, the observed obliquity angle (2.042 ± 0.08 –2– -Confidential manuscript submitted to JGR-Planets -I -descending -node of orbit -Ωp -ê3 -I -I -ê3 -εm L -I ê3 -p -ascending -node of orbit -descending -node of equator -equatorial -plane -orbital -direction -S -ê3 -I ê3 -L -M -εm -orbital plane -Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded -rectangle) and the Cassini state of Mercury. The normal to the orbital plane (eˆ -I -3) is offset from the nor￾mal to the Laplace plane (eˆ -L -3 ) by an angle I = 8.5330◦ -. The symmetry axis of the mantle eˆ -p -3 -is offset -from eˆ -I -3 by εm ≈ 2 arcmin. eˆ -I -3 and eˆ -p -3 are coplanar with, and precess about, eˆ -L -3 in a retrograde direction -at frequency Ωp = 2π/325, 513 yr−1 -. The blue (orange) shaded region indicates the portion of the orbit -when Mercury is above (below) the Laplace plane. Angles are not drawn to scale. -arcmin [Margot et al., 2012], 2.029±0.085 arcmin [Stark et al., 2015a] and 1.968±0.027 [Gen￾ova et al., 2019] to list a few) matches that expected if Mercury occupies Cassini state 1. -The prediction of Mercury’s obliquity is based on the assumption that the whole planet -precesses as a single body. However, we know that Mercury has a fluid core from two main lines -of evidence. First, Mercury’s large scale magnetic field is intrinsic, and must be maintained by -dynamo action [Anderson et al., 2011, 2012; Johnson et al., 2012]. This requires fluid motion -in its metallic core, and hence that Mercury’s core is at least partially liquid. Second, the ob￾served amplitude of the 88-day longitudinal libration is approximately twice as large as that -expected if Mercury were librating as a rigid body [Margot et al., 2007, 2012; Stark et al., 2015a]. -This indicates that it is only the mantle that librates, and that the outer part of the core is fluid. -These evidences do not necessarily imply that the whole of Mercury’s core is fluid, but only that -its outermost part must be. A solid inner core may have nucleated at the centre although its -size is not well constrained. Inner core growth leads to planetary contraction, and the inferred -radial contraction of ∼ 7 km since the late heavy bombardment [Byrne et al., 2014] places an -approximate limit of 800 km on the inner core radius [Grott et al., 2011]. However, the inner -core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history. +Confidential manuscript submitted to JGR-Planets +I +descending +node of orbit +Ωp +ê3 +I +I +ê3 +εm L +I ê3 +p +ascending +node of orbit +descending +node of equator +equatorial +plane +orbital +direction +S +ê3 +I ê3 +L +M +εm +orbital plane +Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded +rectangle) and the Cassini state of Mercury. The normal to the orbital plane (eˆ +I +3) is offset from the normal to the Laplace plane (eˆ +L +3 ) by an angle I = 8.5330◦ +. The symmetry axis of the mantle eˆ +p +3 +is offset +from eˆ +I +3 by εm ≈ 2 arcmin. eˆ +I +3 and eˆ +p +3 are coplanar with, and precess about, eˆ +L +3 in a retrograde direction +at frequency Ωp = 2π/325, 513 yr−1. The blue (orange) shaded region indicates the portion of the orbit +when Mercury is above (below) the Laplace plane. Angles are not drawn to scale. +arcmin [Margot et al., 2012], 2.029±0.085 arcmin [Stark et al., 2015a] and 1.968±0.027 [Genova et al., 2019] to list a few) matches that expected if Mercury occupies Cassini state 1. +The prediction of Mercury’s obliquity is based on the assumption that the whole planet +precesses as a single body. However, we know that Mercury has a fluid core from two main lines +of evidence. First, Mercury’s large scale magnetic field is intrinsic, and must be maintained by +dynamo action [Anderson et al., 2011, 2012; Johnson et al., 2012]. This requires fluid motion +in its metallic core, and hence that Mercury’s core is at least partially liquid. Second, the observed amplitude of the 88-day longitudinal libration is approximately twice as large as that +expected if Mercury were librating as a rigid body [Margot et al., 2007, 2012; Stark et al., 2015a]. +This indicates that it is only the mantle that librates, and that the outer part of the core is fluid. +These evidences do not necessarily imply that the whole of Mercury’s core is fluid, but only that +its outermost part must be. A solid inner core may have nucleated at the centre although its +size is not well constrained. Inner core growth leads to planetary contraction, and the inferred +radial contraction of ∼ 7 km since the late heavy bombardment [Byrne et al., 2014] places an +approximate limit of 800 km on the inner core radius [Grott et al., 2011]. However, the inner +core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history. –3– -Confidential manuscript submitted to JGR-Planets -With a fluid core, and possibly a solid inner core, the observed obliquity εm reflects the -orientation of the spin-symmetry axis of the precessing mantle and crust alone. Neglecting dis￾sipation, and at equilibrium in the Cassini state, the spin axis of the fluid core and the spin￾symmetry axis of the inner core should both also precess about the normal to the Laplace plane -in a retrograde direction with a period of 325,513 yr. Both of these axes should also lie in the -plane that defines the equilibrium Cassini state [e.g. Dumberry and Wieczorek, 2016], although -their obliquity angles may be different than εm. Whether the spin axis of the fluid core is brought -into an alignment with the mantle obliquity depends primarily on the pressure torque (also re￾ferred to as the inertial torque) exerted by the centrifugal force of the rotating fluid core on the -misaligned elliptical shape of the core-mantle boundary (CMB) [Poincar´e, 1910]. The more flat￾tened the CMB is, the stronger the pressure torque is, and the more the fluid core is entrained -into a co-precession at a similar obliquity to that of the mantle. The flattening of Mercury’s -CMB is not known. But if one assumes that the topography of the CMB coincides with an equipo￾tential surface at hydrostatic equilibrium with the imposed frozen-in mass anomalies in the up￾per mantle and crust, then the pressure torque at the CMB is sufficient to bring the fluid core -into a close alignment with the mantle [Peale et al., 2014]. The spin axis of the fluid core is not -expected to be exactly aligned with the spin-symmetry axis of the mantle, but sufficiently close -that the resulting mantle obliquity does not differ much from that of a single body planet. Fur￾thermore, viscous and electromagnetic (EM) coupling at the CMB can further restrict the mis￾alignment between the mantle and core [Peale et al., 2014]. -If an inner core is present, its obliquity angle is determined by the sum of the torques act￾ing on it. This includes the gravitational torque from the Sun acting on its tilted figure, anal￾ogous to the torque applied on the tilted mantle that sets the obliquity εm. In addition, the -tilt of the inner core also depends on the gravitational torque imposed by the mantle and the -pressure torque at the inner core boundary (ICB) imposed by the fluid core. If the mantle grav￾itational torque dominates, the inner core tilt is expected to remain closely aligned with the -mantle. Conversely, if the pressure torque at the ICB is the largest, the inner core should in￾stead be closely aligned with the spin axis of the fluid core. A strong viscous and/or EM cou￾pling at the ICB should also enforce a closer alignment between the rotation vectors of the in￾ner core and fluid core. -It is on the basis of the observed mantle obliquity that the polar moment of inertia of Mer￾cury is inferred [e.g. Peale, 1976; Margot et al., 2018]. Inherent in this calculation is the built￾in assumption that the mantle obliquity does not deviate from that of a rigid planet by a sub￾stantial amount. However, the recent study by Peale et al. [2016] suggests that the inner core -can be misaligned from the mantle by a few arcmin and that a large inner core can perturb the -orientation of the spin vector of the mantle by as much as 0.1 arcmin. This challenges the as￾sumption that the observed obliquity reflects the orientation of the whole planet. -Furthermore, if a large inner core is misaligned with the mantle, then the mantle spin axis -does not coincide with the orientation of the polar moment of inertia of the whole planet. This -can introduce a systematic offset between different types of obliquity measurements. Those based -on tracking topographic features [Margot et al., 2007, 2012; Stark et al., 2015a] capture the obliq￾uity of the mantle spin axis. While those based on the orientation of the gravity field [Mazarico -et al., 2014; Verma and Margot, 2016; Genova et al., 2019; Konopliv et al., 2020] are instead -tied to the orientation of the principal moment of inertia of the whole planet. An offset of the -obliquity of the mantle spin axis with respect to the gravity field could be used to constrain the -size of the inner core, even though this is difficult to do at present because the different esti￾mates of the obliquity of the gravity field do not match well with one another. +Confidential manuscript submitted to JGR-Planets +With a fluid core, and possibly a solid inner core, the observed obliquity εm reflects the +orientation of the spin-symmetry axis of the precessing mantle and crust alone. Neglecting dissipation, and at equilibrium in the Cassini state, the spin axis of the fluid core and the spinsymmetry axis of the inner core should both also precess about the normal to the Laplace plane +in a retrograde direction with a period of 325,513 yr. Both of these axes should also lie in the +plane that defines the equilibrium Cassini state [e.g. Dumberry and Wieczorek, 2016], although +their obliquity angles may be different than εm. Whether the spin axis of the fluid core is brought +into an alignment with the mantle obliquity depends primarily on the pressure torque (also referred to as the inertial torque) exerted by the centrifugal force of the rotating fluid core on the +misaligned elliptical shape of the core-mantle boundary (CMB) [Poincar´e, 1910]. The more flattened the CMB is, the stronger the pressure torque is, and the more the fluid core is entrained +into a co-precession at a similar obliquity to that of the mantle. The flattening of Mercury’s +CMB is not known. But if one assumes that the topography of the CMB coincides with an equipotential surface at hydrostatic equilibrium with the imposed frozen-in mass anomalies in the upper mantle and crust, then the pressure torque at the CMB is sufficient to bring the fluid core +into a close alignment with the mantle [Peale et al., 2014]. The spin axis of the fluid core is not +expected to be exactly aligned with the spin-symmetry axis of the mantle, but sufficiently close +that the resulting mantle obliquity does not differ much from that of a single body planet. Furthermore, viscous and electromagnetic (EM) coupling at the CMB can further restrict the misalignment between the mantle and core [Peale et al., 2014]. +If an inner core is present, its obliquity angle is determined by the sum of the torques acting on it. This includes the gravitational torque from the Sun acting on its tilted figure, analogous to the torque applied on the tilted mantle that sets the obliquity εm. In addition, the +tilt of the inner core also depends on the gravitational torque imposed by the mantle and the +pressure torque at the inner core boundary (ICB) imposed by the fluid core. If the mantle gravitational torque dominates, the inner core tilt is expected to remain closely aligned with the +mantle. Conversely, if the pressure torque at the ICB is the largest, the inner core should instead be closely aligned with the spin axis of the fluid core. A strong viscous and/or EM coupling at the ICB should also enforce a closer alignment between the rotation vectors of the inner core and fluid core. +It is on the basis of the observed mantle obliquity that the polar moment of inertia of Mercury is inferred [e.g. Peale, 1976; Margot et al., 2018]. Inherent in this calculation is the builtin assumption that the mantle obliquity does not deviate from that of a rigid planet by a substantial amount. However, the recent study by Peale et al. [2016] suggests that the inner core +can be misaligned from the mantle by a few arcmin and that a large inner core can perturb the +orientation of the spin vector of the mantle by as much as 0.1 arcmin. This challenges the assumption that the observed obliquity reflects the orientation of the whole planet. +Furthermore, if a large inner core is misaligned with the mantle, then the mantle spin axis +does not coincide with the orientation of the polar moment of inertia of the whole planet. This +can introduce a systematic offset between different types of obliquity measurements. Those based +on tracking topographic features [Margot et al., 2007, 2012; Stark et al., 2015a] capture the obliquity of the mantle spin axis. While those based on the orientation of the gravity field [Mazarico +et al., 2014; Verma and Margot, 2016; Genova et al., 2019; Konopliv et al., 2020] are instead +tied to the orientation of the principal moment of inertia of the whole planet. An offset of the +obliquity of the mantle spin axis with respect to the gravity field could be used to constrain the +size of the inner core, even though this is difficult to do at present because the different estimates of the obliquity of the gravity field do not match well with one another. –4– -Confidential manuscript submitted to JGR-Planets -There is thus a significant interest in properly assessing how the presence of a solid in￾ner core at the centre of Mercury may affect its Cassini state equilibrium. Here, we present a -model of Mercury’s Cassini state that comprises a fluid core and solid inner core. The model -is an adaptation of a similar model developed to study the Cassini state of the Moon [Dumb￾erry and Wieczorek, 2016; Stys and Dumberry, 2018; Organowski and Dumberry, 2020]. The -specific questions that motivate our study are the following. First, we want to determine how -large the misaligned obliquities of the fluid core and solid inner core can be and how they de￾pend on model parameters. Second, we want to assess by how much the mantle obliquity may -differ from that of an entirely rigid Mercury, and third, by how much the obliquities of the spin￾symmetry axis of the mantle and gravity field may differ. -2 Theory -2.1 The interior structure of Mercury -Our model of Mercury consists of four layers of uniform density: a solid inner core, a fluid -outer core, a solid mantle, and a thin crust. The outer radii of each of these layers, are denoted -by rs, rf , rm, and R, and their densities by ρs, ρf , ρm, and ρc, respectively. The inner core ra￾dius rs corresponds to the ICB radius, the fluid core radius rf to the CMB radius, and R = -2439.36 km to the planetary radius of Mercury. Compressibility effects from increasing pres￾sure with depth are not negligible in the core of Mercury. However adopting uniform densities -simplifies the analytical expressions of the model while still capturing the first order rotational -dynamics. Uniform densities were also adopted by Peale et al. [2016] and following the same -strategy facilitates comparisons between our results. -We build our interior model as detailed in Peale et al. [2016]. We first specify rs, ρs (or -a density contrast at the ICB), the crustal density ρc and crustal thickness h = R−rm. The -three unknowns rf , ρf and ρm are then solved such that the interior model is consistent with -the known mass M and chosen values of the moments of inertia of the whole planet C and that -of the mantle and crust Cm. -Each layer is triaxial in shape. We denote the polar flattening (or geometrical ellipticity) -by i -, defined as the difference between the mean equatorial and polar radii, divided by the mean -spherical radius. Likewise, we denote the equatorial flattening by the variable ξi -, defined as the -difference between the maximum and minimum equatorial radii, divided by the mean spher￾ical radius. As above, we use the subscript i = s, f, m and r, to denote the polar or equa￾torial flattenings at the ICB, CMB, crust-mantle boundary (CrMB), and surface. -The measured polar and equatorial flattenings are taken from Perry et al. [2015] and their -numerical values are given in Table 1. We then assume that the ICB and CMB are both at hy￾drostatic equilibrium with the imposed gravitational potential induced by the flattenings at the -CrMB and surface. The flattenings at all interior boundaries are specified such that they are -consistent with the observed degree 2 spherical harmonic coefficients of gravity J2 and C22; their -numerical values are given in Table 1. Specifically, J2 and C22 are connected to the principal -moments of inertia of Mercury (C > B > A) and to the polar and equatorial flattenings by -J2 = -C − A¯ -MR2 -= -8π -15 -1 -MR2 - -(ρs − ρf )r -5 -s -s + (ρf − ρm)r -5 -f -f + (ρm − ρc)r -5 -mm + ρcR -5 -r - -, (1a) -C22 = -B − A -4MR2 -= -8π -15 -1 -4MR2 - -(ρs − ρf )r -5 -s -ξs + (ρf − ρm)r -5 -f -ξf + (ρm − ρc)r -5 -mξm + ρcR -5 -ξr - -. (1b) -where A¯ is the mean equatorial moment of inertia defined below. The same procedure was used -in Peale et al. [2016] and the mathematical details are given in Equations (18-20) of Dumberry -–5– -Confidential manuscript submitted to JGR-Planets -Mercury Parameter Numerical value Reference -mean motion, n 2π/87.96935 day−1 Stark et al. [2015b] -rotation rate, Ωo = 1.5n 2π/58.64623 day−1 Stark et al. [2015b] -orbit precession rate, Ωp 2π/325, 513 yr−1 Baland et al. [2017] -Poincar´e number, δω = Ωp/Ωo 4.9327 × 10−7 -orbital eccentricity, ec 0.20563 Baland et al. [2017] -orbital inclination, I 8.5330◦ Baland et al. [2017] -mean planetary radius, R 2439.360 km Perry et al. [2015] -mass, M 3.3012 × 1023 kg Genova et al. [2019] -mean density, ¯ρ 5429.5 kg m−3 -J2 5.0291 × 10−5 Genova et al. [2019] -C22 8.0415 × 10−6 Genova et al. [2019] -polar surface flattening, r 6.7436 × 10−4 Perry et al. [2015] -equatorial surface flattening, ξr 5.1243 × 10−4 Perry et al. [2015] -Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031.8636 × 109 -m3 -/s2 -taken from Genova et al. [2019]. The mean density is calculated from 4π -3 -ρR¯ -3 = M. The numerical -values of r and ξr are calculated from r = (¯a − c)/R and ξr = (a − b)/R, where ¯a = -1 -2 -(a + b) and where -a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semimajor, intermediate and semiminor -axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015]. J2 and C22 are -computed from Equation (4) in the Supporting Information of Genova et al. [2019]. -and Wieczorek [2016] who adopted the same strategy in their interior modelling of the Moon. -Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topog￾raphy and the axes of the principal moments of inertia, which amount to a polar offset of ∼ 2 -◦ -and an equatorial offset of ∼ 15◦ -[Perry et al., 2015]. -Once the densities and flattenings of all interior regions are known, we can specify the mo￾ments of inertia of the fluid core (Cf > Bf > Af ) and solid inner core (Cs > Bs > As) -along with the mean equatorial moments of inertia -A¯ = -1 -2 -(A + B), A¯ -f = -1 -2 -(Af + Bf ), A¯ -s = -1 -2 -(As + Bs). (2) -From these, we define the polar (e, ef , es) and equatorial (γ, γs) dynamical ellipticities of the -whole planet (no subscript), fluid core (subscript f) and solid inner core (subscript s), which -enter our rotational model, -e = -C − A¯ -A¯ -ef = -Cf − A¯ -f -A¯ -f -es = -Cs − A¯ -s -A¯ -s -, (3a) -γ = -B − A -A¯ -γs = -Bs − As -A¯ -s -. (3b) -We further note that e and γ are connected to J2 and C22 by -e = -MR2 -A¯ -J2 , γ = -4MR2 -A¯ -C22 . (4) +Confidential manuscript submitted to JGR-Planets +There is thus a significant interest in properly assessing how the presence of a solid inner core at the centre of Mercury may affect its Cassini state equilibrium. Here, we present a +model of Mercury’s Cassini state that comprises a fluid core and solid inner core. The model +is an adaptation of a similar model developed to study the Cassini state of the Moon [Dumberry and Wieczorek, 2016; Stys and Dumberry, 2018; Organowski and Dumberry, 2020]. The +specific questions that motivate our study are the following. First, we want to determine how +large the misaligned obliquities of the fluid core and solid inner core can be and how they depend on model parameters. Second, we want to assess by how much the mantle obliquity may +differ from that of an entirely rigid Mercury, and third, by how much the obliquities of the spinsymmetry axis of the mantle and gravity field may differ. +2 Theory +2.1 The interior structure of Mercury +Our model of Mercury consists of four layers of uniform density: a solid inner core, a fluid +outer core, a solid mantle, and a thin crust. The outer radii of each of these layers, are denoted +by rs, rf , rm, and R, and their densities by ρs, ρf , ρm, and ρc, respectively. The inner core radius rs corresponds to the ICB radius, the fluid core radius rf to the CMB radius, and R = +2439.36 km to the planetary radius of Mercury. Compressibility effects from increasing pressure with depth are not negligible in the core of Mercury. However adopting uniform densities +simplifies the analytical expressions of the model while still capturing the first order rotational +dynamics. Uniform densities were also adopted by Peale et al. [2016] and following the same +strategy facilitates comparisons between our results. +We build our interior model as detailed in Peale et al. [2016]. We first specify rs, ρs (or +a density contrast at the ICB), the crustal density ρc and crustal thickness h = R−rm. The +three unknowns rf , ρf and ρm are then solved such that the interior model is consistent with +the known mass M and chosen values of the moments of inertia of the whole planet C and that +of the mantle and crust Cm. +Each layer is triaxial in shape. We denote the polar flattening (or geometrical ellipticity) +by i, defined as the difference between the mean equatorial and polar radii, divided by the mean +spherical radius. Likewise, we denote the equatorial flattening by the variable ξi, defined as the +difference between the maximum and minimum equatorial radii, divided by the mean spherical radius. As above, we use the subscript i = s, f, m and r, to denote the polar or equatorial flattenings at the ICB, CMB, crust-mantle boundary (CrMB), and surface. +The measured polar and equatorial flattenings are taken from Perry et al. [2015] and their +numerical values are given in Table 1. We then assume that the ICB and CMB are both at hydrostatic equilibrium with the imposed gravitational potential induced by the flattenings at the +CrMB and surface. The flattenings at all interior boundaries are specified such that they are +consistent with the observed degree 2 spherical harmonic coefficients of gravity J2 and C22; their +numerical values are given in Table 1. Specifically, J2 and C22 are connected to the principal +moments of inertia of Mercury (C > B > A) and to the polar and equatorial flattenings by +J2 = +C − A¯ +MR2 += +8π +15 +1 +MR2 + +(ρs − ρf )r +5 +s +s + (ρf − ρm)r +5 +f +f + (ρm − ρc)r +5 +mm + ρcR +5 +r + +, (1a) +C22 = +B − A +4MR2 += +8π +15 +1 +4MR2 + +(ρs − ρf )r +5 +s +ξs + (ρf − ρm)r +5 +f +ξf + (ρm − ρc)r +5 +mξm + ρcR +5 +ξr + +. (1b) +where A¯ is the mean equatorial moment of inertia defined below. The same procedure was used +in Peale et al. [2016] and the mathematical details are given in Equations (18-20) of Dumberry +–5– +Confidential manuscript submitted to JGR-Planets +Mercury Parameter Numerical value Reference +mean motion, n 2π/87.96935 day−1 Stark et al. [2015b] +rotation rate, Ωo = 1.5n 2π/58.64623 day−1 Stark et al. [2015b] +orbit precession rate, Ωp 2π/325, 513 yr−1 Baland et al. [2017] +Poincar´e number, δω = Ωp/Ωo 4.9327 × 10−7 +orbital eccentricity, ec 0.20563 Baland et al. [2017] +orbital inclination, I 8.5330◦ Baland et al. [2017] +mean planetary radius, R 2439.360 km Perry et al. [2015] +mass, M 3.3012 × 1023 kg Genova et al. [2019] +mean density, ¯ρ 5429.5 kg m−3 +J2 5.0291 × 10−5 Genova et al. [2019] +C22 8.0415 × 10−6 Genova et al. [2019] +polar surface flattening, r 6.7436 × 10−4 Perry et al. [2015] +equatorial surface flattening, ξr 5.1243 × 10−4 Perry et al. [2015] +Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031.8636 × 109 +m3/s2taken from Genova et al. [2019]. The mean density is calculated from 4π +3 +ρR¯ +3 = M. The numerical +values of r and ξr are calculated from r = (¯a − c)/R and ξr = (a − b)/R, where ¯a = +1 +2 +(a + b) and where +a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semimajor, intermediate and semiminor +axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015]. J2 and C22 are +computed from Equation (4) in the Supporting Information of Genova et al. [2019]. +and Wieczorek [2016] who adopted the same strategy in their interior modelling of the Moon. +Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topography and the axes of the principal moments of inertia, which amount to a polar offset of ∼ 2 +◦ +and an equatorial offset of ∼ 15◦[Perry et al., 2015]. +Once the densities and flattenings of all interior regions are known, we can specify the moments of inertia of the fluid core (Cf > Bf > Af ) and solid inner core (Cs > Bs > As) +along with the mean equatorial moments of inertia +A¯ = +1 +2 +(A + B), A¯ +f = +1 +2 +(Af + Bf ), A¯ +s = +1 +2 +(As + Bs). (2) +From these, we define the polar (e, ef , es) and equatorial (γ, γs) dynamical ellipticities of the +whole planet (no subscript), fluid core (subscript f) and solid inner core (subscript s), which +enter our rotational model, +e = +C − A¯ +A¯ +ef = +Cf − A¯ +f +A¯ +f +es = +Cs − A¯ +s +A¯ +s +, (3a) +γ = +B − A +A¯ +γs = +Bs − As +A¯ +s +. (3b) +We further note that e and γ are connected to J2 and C22 by +e = +MR2 +A¯ +J2 , γ = +4MR2 +A¯ +C22 . (4) –6– -Confidential manuscript submitted to JGR-Planets -θm -θn -θs -θf -Ω -Ωs -Ωf -ê3 -p -ê3 -ê3 s -I -I -εm -θp -ê3 -L -ê1 -p -ê2 -p -Cassini plane -ωΩot -ê3 -I -I εm -ê3 -p -ê1 -ê2 -p -ê3 -L -a) b) -Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b) -in a frame attached to the rotating mantle. The orbit normal (eˆ -I -3) is tilted by an angle I = 8.533◦ -from -the Laplace normal (eˆ -L -3 ) and the symmetry axis of Mercury’s mantle (eˆ -p -3 -) is tilted by an obliquity εm -with respect to eˆ -I -3. Shown in (a) are the orientations of the symmetry axis of the inner core (eˆ -s -3), the -rotation rate vectors of the mantle (Ω), fluid core (Ωf ) and inner core (Ωf ) and angles θp, θn, θm, θf -and θs in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer -to as the Cassini plane. The light grey, white, and dark grey ellipsoid represent a polar cross-section of -the mantle, fluid core and inner core, respectively; blue shaded parts show an equatorial cross section. -The black curved arrow in the equatorial plane in (a) indicates the direction of rotation of the equatorial -mantle axes eˆ -p -1 and eˆ -p -2 with respect to the Cassini plane. Viewed in the frame attached to the rotating -mantle (b), the Cassini plane is rotating at frequency ωΩo = −Ωo − Ωp cos I in the longitudinal direc￾tion. The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of -illustration. +Confidential manuscript submitted to JGR-Planets +θm +θn +θs +θf +Ω +Ωs +Ωf +ê3 +p +ê3 +ê3 s +I +I +εm +θp +ê3 +L +ê1 +p +ê2 +p +Cassini plane +ωΩot +ê3 +I +I εm +ê3 +p +ê1 +ê2 +p +ê3 +L +a) b) +Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b) +in a frame attached to the rotating mantle. The orbit normal (eˆ +I +3) is tilted by an angle I = 8.533◦ +from +the Laplace normal (eˆ +L +3 ) and the symmetry axis of Mercury’s mantle (eˆ +p +3 +) is tilted by an obliquity εm +with respect to eˆ +I +3. Shown in (a) are the orientations of the symmetry axis of the inner core (eˆ +s +3), the +rotation rate vectors of the mantle (Ω), fluid core (Ωf ) and inner core (Ωf ) and angles θp, θn, θm, θf +and θs in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer +to as the Cassini plane. The light grey, white, and dark grey ellipsoid represent a polar cross-section of +the mantle, fluid core and inner core, respectively; blue shaded parts show an equatorial cross section. +The black curved arrow in the equatorial plane in (a) indicates the direction of rotation of the equatorial +mantle axes eˆ +p +1 and eˆ +p +2 with respect to the Cassini plane. Viewed in the frame attached to the rotating +mantle (b), the Cassini plane is rotating at frequency ωΩo = −Ωo − Ωp cos I in the longitudinal direction. The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of +illustration. –7– -Confidential manuscript submitted to JGR-Planets -2.2 The rotational model -Mercury’s rotation is characterized by a 3:2 spin-orbit resonance in which it completes -3 rotations around itself for every 2 orbital revolutions around the Sun. The orbital period is -87.96935 day and the sidereal rotation period is 58.64623 day [Stark et al., 2015b]. These de￾fine the mean motion n = 2π/87.96935 day−1 and the sidereal frequency Ωo = 2π/58.64623 -day−1 -, with Ωo = 1.5 n. Mercury’s rotational state is also characterized by a Cassini state whereby -the orientations of the orbit normal (eˆ -I -3 -) and of the mantle symmetry axis (eˆ -p -3 -) are both copla￾nar with, and precess about, the normal to the Laplace plane (eˆ -L -3 -). The orientation of the Laplace -plane varies on long timescales, but it can be taken as invariable in inertial space for our present -purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between eˆ -L -3 -and eˆ -I -3 -is the orbital inclination I = 8.5330◦ -[Baland et al., 2017], the angle between eˆ -I -3 -and eˆ -p -3 -is the -obliquity εm and the angle between eˆ -L -3 -and eˆ -p -3 -is θp = I + εm. The precession of eˆ -I -3 -and eˆ -p -3 -about the Laplace pole is retrograde with frequency Ωp = 2π/325, 513 yr−1 -[Baland et al., 2017]. -The mantle and crust are welded together and form a single rotating region which we re￾fer to as the ‘mantle’ in the context of our rotational model. The rotation and symmetry axes -of the mantle are expected to remain in close alignment, but they do not coincide exactly. We -define the rotation rate vector of the mantle by Ω, and its misalignment from eˆ -p -3 by an angle -θm. Note that θm  εm and it is often the spin axis of Mercury which is used to define the -obliquity εm [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, eˆ -p -3 -and Ω would -characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and -the angles I, εm and θm would completely describe the Cassini state. The presence of a fluid -outer core and solid inner core require three additional orientation vectors and angles. The sym￾metry axis of the inner core is defined by unit vector eˆ -s -3 -and its misalignment from eˆ -p -3 by an -angle θn. The rotation vectors of the fluid core and inner core are defined as Ωf and Ωs, re￾spectively, and their misalignment from the rotation vector of the mantle Ω are defined by an￾gles θf and θs (see Figure 2a). The rotation and symmetry axes of the inner core remain in close -alignment, so θn ≈ θs. To be formal in our definition of the different angles of misalignment, -for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise -direction. -At equilibrium in the Cassini state, the three orientation vectors (eˆ -I -3 -, eˆ -p -3 -, eˆ -s -3 -) and three -rotation vectors (Ω, Ωf , Ωs) are forced to precess about eˆ -L -3 -at the same frequency. If we ne￾glect dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed -in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ωp. Viewed -in the frame attached to the mantle rotating at sidereal frequency Ωo, the Cassini plane is ro￾tating in a retrograde direction at frequency ωΩo (see Figure 2b), where ω, expressed in cycles -per Mercury day, is equal to -ω = −1 − δω cos(θp). (5) -The factor δω = Ωp/Ωo = 4.933 × 10−7 -is the Poincar´e number, expressing the ratio of the -forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal -as seen in the mantle frame is expressed as -d -dteˆ -L -3 + Ω × eˆ -L -3 = 0 , (6) -or equivalently, by Equation (19e) of Stys and Dumberry [2018], -ω sin(θp) + sin(θm + θp) = 0 . (7) +Confidential manuscript submitted to JGR-Planets +2.2 The rotational model +Mercury’s rotation is characterized by a 3:2 spin-orbit resonance in which it completes +3 rotations around itself for every 2 orbital revolutions around the Sun. The orbital period is +87.96935 day and the sidereal rotation period is 58.64623 day [Stark et al., 2015b]. These define the mean motion n = 2π/87.96935 day−1 and the sidereal frequency Ωo = 2π/58.64623 +day−1, with Ωo = 1.5 n. Mercury’s rotational state is also characterized by a Cassini state whereby +the orientations of the orbit normal (eˆ +I +3 +) and of the mantle symmetry axis (eˆ +p +3 +) are both coplanar with, and precess about, the normal to the Laplace plane (eˆ +L +3 +). The orientation of the Laplace +plane varies on long timescales, but it can be taken as invariable in inertial space for our present +purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between eˆ +L +3 +and eˆ +I +3 +is the orbital inclination I = 8.5330◦[Baland et al., 2017], the angle between eˆ +I +3 +and eˆ +p +3 +is the +obliquity εm and the angle between eˆ +L +3 +and eˆ +p +3 +is θp = I + εm. The precession of eˆ +I +3 +and eˆ +p +3 +about the Laplace pole is retrograde with frequency Ωp = 2π/325, 513 yr−1[Baland et al., 2017]. +The mantle and crust are welded together and form a single rotating region which we refer to as the ‘mantle’ in the context of our rotational model. The rotation and symmetry axes +of the mantle are expected to remain in close alignment, but they do not coincide exactly. We +define the rotation rate vector of the mantle by Ω, and its misalignment from eˆ +p +3 by an angle +θm. Note that θm  εm and it is often the spin axis of Mercury which is used to define the +obliquity εm [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, eˆ +p +3 +and Ω would +characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and +the angles I, εm and θm would completely describe the Cassini state. The presence of a fluid +outer core and solid inner core require three additional orientation vectors and angles. The symmetry axis of the inner core is defined by unit vector eˆ +s +3 +and its misalignment from eˆ +p +3 by an +angle θn. The rotation vectors of the fluid core and inner core are defined as Ωf and Ωs, respectively, and their misalignment from the rotation vector of the mantle Ω are defined by angles θf and θs (see Figure 2a). The rotation and symmetry axes of the inner core remain in close +alignment, so θn ≈ θs. To be formal in our definition of the different angles of misalignment, +for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise +direction. +At equilibrium in the Cassini state, the three orientation vectors (eˆ +I +3 +, eˆ +p +3 +, eˆ +s +3 +) and three +rotation vectors (Ω, Ωf , Ωs) are forced to precess about eˆ +L +3 +at the same frequency. If we neglect dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed +in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ωp. Viewed +in the frame attached to the mantle rotating at sidereal frequency Ωo, the Cassini plane is rotating in a retrograde direction at frequency ωΩo (see Figure 2b), where ω, expressed in cycles +per Mercury day, is equal to +ω = −1 − δω cos(θp). (5) +The factor δω = Ωp/Ωo = 4.933 × 10−7is the Poincar´e number, expressing the ratio of the +forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal +as seen in the mantle frame is expressed as +d +dteˆ +L +3 + Ω × eˆ +L +3 = 0 , (6) +or equivalently, by Equation (19e) of Stys and Dumberry [2018], +ω sin(θp) + sin(θm + θp) = 0 . (7) –8– -Confidential manuscript submitted to JGR-Planets -This expresses a formal connection between θp and θm which is independent of the interior struc￾ture of Mercury. Using Equation (5) and cos(θm) → 1, this connection can be rewritten as -sin(θm) = δω sin(θp). (8) -and thus the relative amplitudes of θm and θp depend of the Poincar´e number δω. -To investigate Mercury’s response to the gravitational torque from the Sun, we take ad￾vantage of the framework developed in Mathews et al. [1991] to model the forced nutations of -Earth [see also Mathews et al., 2002; Dehant and Mathews, 2015]. This model takes into ac￾count the pressure torque (also referred to as the inertial torque) that results when the spin axis -of the fluid core is misaligned from the symmetry axes of the elliptical surfaces of the CMB and -ICB. It also includes the gravitational torque exerted on the inner core when it is misaligned -with the mantle. Electromagnetic and viscous torques at both the CMB and ICB have been -incorporated into the framework [e.g Buffett, 1992; Buffett et al., 2002; Mathews and Guo, 2005; -Deleplace and Cardin, 2006]. The framework was adapted to model the Cassini state of the Moon -in Dumberry and Wieczorek [2016] and further developed in Stys and Dumberry [2018] and Organowski -and Dumberry [2020]. We adapt it here to capture the Cassini state of Mercury. -Because the forced precession period is much longer than the rotation and orbital peri￾ods of Mercury, the gravitational solar torque that is relevant to the Cassini state is the mean -torque averaged over one orbit. This mean torque is perpendicular to the Cassini plane, point￾ing in the same direction as the vector connecting the Sun to the descending node of Mercury’s -orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque -is periodic, rotating at frequency ωΩo. Setting the equatorial directions eˆ -p -1 -and eˆ -p -2 -to correspond -to the real and imaginary axes of the complex plane, respectively, we can write the equatorial -components of this periodic applied torque in a compact form as -Γ1(t) + iΓ2(t) = −i Γ( ˜ ω) exp[iωΩot] , (9) -where Γ( ˜ ω) represents the amplitude of the torque at frequency ωΩo. In response to this torque, -the axes defining all angles (θp, εm, θm, θf , θs, θn) as viewed in the mantle frame are also ro￾tating at frequency ωΩo (see Figure 2). The longitudinal direction of each of these angles at -a specific time t can then also be written in the equatorial complex plane and is proportional -to exp[iωΩot]. For instance, the two equatorial time-dependent components θm1 and θm2 of the -angle θm, as seen in the mantle frame, can be written as -θm1(t) + iθm2(t) = ˜m exp[iωΩot] , (10a) -where -m˜ ≡ m˜ (ω) = Re[ ˜m] + iIm[ ˜m] , (10b) -is the amplitude at frequency ωΩo. Equivalent definitions apply for all other angles, with the -connection as follows: -θm ⇔ m , θ ˜ f ⇔ m˜ f , θs ⇔ m˜ s , θn ⇔ n˜s , θp ⇔ p , ε ˜ m ⇔ ε˜m . (11) -The notation ˜m, ˜mf , ˜ms, ˜ns follows that introduced in the original model of Mathews et al. [1991]. -Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase re￾sponse to the applied torque as a result of dissipation, for instance from viscous or EM coupling +Confidential manuscript submitted to JGR-Planets +This expresses a formal connection between θp and θm which is independent of the interior structure of Mercury. Using Equation (5) and cos(θm) → 1, this connection can be rewritten as +sin(θm) = δω sin(θp). (8) +and thus the relative amplitudes of θm and θp depend of the Poincar´e number δω. +To investigate Mercury’s response to the gravitational torque from the Sun, we take advantage of the framework developed in Mathews et al. [1991] to model the forced nutations of +Earth [see also Mathews et al., 2002; Dehant and Mathews, 2015]. This model takes into account the pressure torque (also referred to as the inertial torque) that results when the spin axis +of the fluid core is misaligned from the symmetry axes of the elliptical surfaces of the CMB and +ICB. It also includes the gravitational torque exerted on the inner core when it is misaligned +with the mantle. Electromagnetic and viscous torques at both the CMB and ICB have been +incorporated into the framework [e.g Buffett, 1992; Buffett et al., 2002; Mathews and Guo, 2005; +Deleplace and Cardin, 2006]. The framework was adapted to model the Cassini state of the Moon +in Dumberry and Wieczorek [2016] and further developed in Stys and Dumberry [2018] and Organowski +and Dumberry [2020]. We adapt it here to capture the Cassini state of Mercury. +Because the forced precession period is much longer than the rotation and orbital periods of Mercury, the gravitational solar torque that is relevant to the Cassini state is the mean +torque averaged over one orbit. This mean torque is perpendicular to the Cassini plane, pointing in the same direction as the vector connecting the Sun to the descending node of Mercury’s +orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque +is periodic, rotating at frequency ωΩo. Setting the equatorial directions eˆ +p +1 +and eˆ +p +2 +to correspond +to the real and imaginary axes of the complex plane, respectively, we can write the equatorial +components of this periodic applied torque in a compact form as +Γ1(t) + iΓ2(t) = −i Γ( ˜ ω) exp[iωΩot] , (9) +where Γ( ˜ ω) represents the amplitude of the torque at frequency ωΩo. In response to this torque, +the axes defining all angles (θp, εm, θm, θf , θs, θn) as viewed in the mantle frame are also rotating at frequency ωΩo (see Figure 2). The longitudinal direction of each of these angles at +a specific time t can then also be written in the equatorial complex plane and is proportional +to exp[iωΩot]. For instance, the two equatorial time-dependent components θm1 and θm2 of the +angle θm, as seen in the mantle frame, can be written as +θm1(t) + iθm2(t) = ˜m exp[iωΩot] , (10a) +where +m˜ ≡ m˜ (ω) = Re[ ˜m] + iIm[ ˜m] , (10b) +is the amplitude at frequency ωΩo. Equivalent definitions apply for all other angles, with the +connection as follows: +θm ⇔ m , θ ˜ f ⇔ m˜ f , θs ⇔ m˜ s , θn ⇔ n˜s , θp ⇔ p , ε ˜ m ⇔ ε˜m . (11) +The notation ˜m, ˜mf , ˜ms, ˜ns follows that introduced in the original model of Mathews et al. [1991]. +Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase response to the applied torque as a result of dissipation, for instance from viscous or EM coupling –9– -Confidential manuscript submitted to JGR-Planets -at the boundaries of the fluid core. In the absence of dissipation, all tilded variables are purely -real. We concentrate our analysis in this work on the real part of the solutions, which corre￾sponds to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜εm -corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to εm, -though we keep the tilde notation in the presentation of our results to emphasize that it rep￾resents the real part of the solution from our system. Furthermore, since ˜m  ε˜m, we often -refer to ˜εm as the orientation of spin axis of the mantle, since the Cassini state of Mercury is -more customarily described in terms of the latter in the literature. -The model of Mathews et al. [1991] is developed under the assumption of small angles as -appropriate for the nutations on Earth. The details on how the equations of the model are de￾rived can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. Three equa￾tions describe, respectively, the time rate of change of the angular momenta of the whole of Mer￾cury, the fluid core, and the inner core in the reference frame of the rotating mantle. These three -equations are -(ω − e) ˜m + (1 + ω) -" -A¯ -f -A¯ -m˜ f + -A¯ -s -A¯ -m˜ s + α3es -A¯ -s -A¯ -n˜s -# -= -1 -iΩ2 -oA¯ - -Γ˜ -sun -, (12a) -ωm˜ + (1 + ω + ef ) ˜mf − ωα1es -A¯ -s -A¯ -f -n˜s = -1 -iΩ2 -oA¯ -f - -− Γ˜ -cmb − Γ˜ -icb -, (12b) -(ω − α3es) ˜m + α1esm˜ f + (1 + ω) ˜ms + (1 + ω − α2) esn˜s = -1 -iΩ2 -oA¯ -s - -Γ˜s -sun + Γ˜ -icb -, (12c) -and a fourth equation consists of a kinematic relation that expresses the change in the orien￾tation of the inner core figure as a result of its own rotation, -m˜ s + ωn˜s = 0 . (12d) -In these equations, the parameters α1, α2 and α3 involve the density contrast at the ICB -and are given by -α1 = -ρf -ρs -, α3 = 1 − α1 , α2 = α1 − α3αg , (13a) -where the parameter αg is a measure of the ratio of the gravitational to inertial torque applied -on the inner core, -αg = -8πG -5Ω2 -o -[ρc(r − m) + ρm(m − f ) + ρf f ] , (13b) -where G is the gravitational constant. -Γ˜ -sun is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For -a small mantle obliquity ˜εm and a small inner core tilt ˜ns, it is given by -Γ˜ -sun = −iΩ -2 -oA¯ - -φmε˜m + -A¯ -s -A¯ -α3φsn˜s - -, (14) -where +Confidential manuscript submitted to JGR-Planets +at the boundaries of the fluid core. In the absence of dissipation, all tilded variables are purely +real. We concentrate our analysis in this work on the real part of the solutions, which corresponds to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜εm +corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to εm, +though we keep the tilde notation in the presentation of our results to emphasize that it represents the real part of the solution from our system. Furthermore, since ˜m  ε˜m, we often +refer to ˜εm as the orientation of spin axis of the mantle, since the Cassini state of Mercury is +more customarily described in terms of the latter in the literature. +The model of Mathews et al. [1991] is developed under the assumption of small angles as +appropriate for the nutations on Earth. The details on how the equations of the model are derived can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. Three equations describe, respectively, the time rate of change of the angular momenta of the whole of Mercury, the fluid core, and the inner core in the reference frame of the rotating mantle. These three +equations are +(ω − e) ˜m + (1 + ω) +" +A¯ +f +A¯ +m˜ f + +A¯ +s +A¯ +m˜ s + α3es +A¯ +s +A¯ +n˜s +# += +1 +iΩ2 +oA¯ + +Γ˜ +sun +, (12a) +ωm˜ + (1 + ω + ef ) ˜mf − ωα1es +A¯ +s +A¯ +f +n˜s = +1 +iΩ2 +oA¯ +f + +− Γ˜ +cmb − Γ˜icb +, (12b) +(ω − α3es) ˜m + α1esm˜ f + (1 + ω) ˜ms + (1 + ω − α2) esn˜s = +1 +iΩ2 +oA¯ +s + +Γ˜s +sun + Γ˜ +icb +, (12c) +and a fourth equation consists of a kinematic relation that expresses the change in the orientation of the inner core figure as a result of its own rotation, +m˜ s + ωn˜s = 0 . (12d) +In these equations, the parameters α1, α2 and α3 involve the density contrast at the ICB +and are given by +α1 = +ρf +ρs +, α3 = 1 − α1 , α2 = α1 − α3αg , (13a) +where the parameter αg is a measure of the ratio of the gravitational to inertial torque applied +on the inner core, +αg = +8πG +5Ω2 +o +[ρc(r − m) + ρm(m − f ) + ρf f ] , (13b) +where G is the gravitational constant. +Γ˜ +sun is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For +a small mantle obliquity ˜εm and a small inner core tilt ˜ns, it is given by +Γ˜ +sun = −iΩ +2 +oA¯ + +φmε˜m + +A¯ +s +A¯ +α3φsn˜s + +, (14) +where –10– -Confidential manuscript submitted to JGR-Planets -φm = -3 -2 -n -2 -Ω2 -o - -G210 e + -1 -2 -G201 γ - -, (15a) -φs = -3 -2 -n -2 -Ω2 -o - -G210 es + -1 -2 -G201 γs - -, (15b) -and where G210 and G201 are functions of the orbital eccentricity ec, -G210 = -1 -(1 − e -2 -c -) -3/2 -, (16a) -G201 = -7 -2 -ec − -123 -16 -e -3 -c + -489 -128 -e -5 -c -. (16b) -The gravitational torque by the Sun acting on the inner core alone, Γ˜s -sun, is -Γ˜s -sun = −iΩ -2 -oA¯ -sα3φs(˜εm + ˜ns). (17) -Γ˜ -cmb and Γ˜ -icb are the torques from tangential stresses by the fluid core on the mantle at the -CMB and on the inner core at the ICB, respectively. These torques can be parameterized in -terms of dimensionless complex coupling constants Kicb and Kcmb and the differential angu￾lar velocities at each boundary [e.g Buffett, 1992; Buffett et al., 2002], -Γ˜ -icb = iΩ -2 -oA¯ -sKicb( ˜mf − m˜ s), (18a) -Γ˜ -cmb = iΩ -2 -oA¯ -fKcmb m˜ f . (18b) -Specific expressions for Kicb and Kcmb are delayed to sections 4 and 5 when we consider the -effects of viscous and EM coupling, respectively. -A fifth equation is required to connect this interior model to the obliquity of the mantle, -and this is provided by Equation (7). For small angles θm and θp, this gives [e.g. Mathews et al., -1991; Dumberry and Wieczorek, 2016; Baland et al., 2019] -m˜ + (1 + ω)˜p = 0 . (19) -For Mercury, it is more convenient to connect the internal model with ˜εm instead of ˜p. This -is because θp ≈ 8.567◦ whereas ˜εm ≈ 2 arcmin and thus the latter obeys more strictly the -condition of small angles assumed in our framework. Furthermore, the external torques act￾ing on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜εm. Writ￾ten in terms of ˜εm, and with the approximation of ˜εm  1 and ˜m  1, Equation (7) becomes -m˜ + (1 + ω)˜εm = −(1 + ω) tan I . (20) -Likewise, the frequency ω from Equation (5) can be written simply in terms of I, -ω = −1 − δω cos I . (21) -The set of four Equations (12) with the addition of Equation (20) form a linear system -of equations for the five rotational variables ˜m, ˜mf , ˜ms, ˜ns and ˜εm. It captures the response -of Mercury, in the frequency domain, when subject to a periodic solar torque applied at fre￾quency ω. The system can be written in a matrix form as +Confidential manuscript submitted to JGR-Planets +φm = +3 +2 +n +2 +Ω2 +o + +G210 e + +1 +2 +G201 γ + +, (15a) +φs = +3 +2 +n +2 +Ω2 +o + +G210 es + +1 +2 +G201 γs + +, (15b) +and where G210 and G201 are functions of the orbital eccentricity ec, +G210 = +1 +(1 − e +2 +c +) +3/2 +, (16a) +G201 = +7 +2 +ec − +123 +16 +e +3 +c + +489 +128 +e +5 +c +. (16b) +The gravitational torque by the Sun acting on the inner core alone, Γ˜s +sun, is +Γ˜s +sun = −iΩ +2 +oA¯ +sα3φs(˜εm + ˜ns). (17) +Γ˜ +cmb and Γ˜icb are the torques from tangential stresses by the fluid core on the mantle at the +CMB and on the inner core at the ICB, respectively. These torques can be parameterized in +terms of dimensionless complex coupling constants Kicb and Kcmb and the differential angular velocities at each boundary [e.g Buffett, 1992; Buffett et al., 2002], +Γ˜ +icb = iΩ +2 +oA¯ +sKicb( ˜mf − m˜ s), (18a) +Γ˜ +cmb = iΩ +2 +oA¯ +fKcmb m˜ f . (18b) +Specific expressions for Kicb and Kcmb are delayed to sections 4 and 5 when we consider the +effects of viscous and EM coupling, respectively. +A fifth equation is required to connect this interior model to the obliquity of the mantle, +and this is provided by Equation (7). For small angles θm and θp, this gives [e.g. Mathews et al., +1991; Dumberry and Wieczorek, 2016; Baland et al., 2019] +m˜ + (1 + ω)˜p = 0 . (19) +For Mercury, it is more convenient to connect the internal model with ˜εm instead of ˜p. This +is because θp ≈ 8.567◦ whereas ˜εm ≈ 2 arcmin and thus the latter obeys more strictly the +condition of small angles assumed in our framework. Furthermore, the external torques acting on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜εm. Written in terms of ˜εm, and with the approximation of ˜εm  1 and ˜m  1, Equation (7) becomes +m˜ + (1 + ω)˜εm = −(1 + ω) tan I . (20) +Likewise, the frequency ω from Equation (5) can be written simply in terms of I, +ω = −1 − δω cos I . (21) +The set of four Equations (12) with the addition of Equation (20) form a linear system +of equations for the five rotational variables ˜m, ˜mf , ˜ms, ˜ns and ˜εm. It captures the response +of Mercury, in the frequency domain, when subject to a periodic solar torque applied at frequency ω. The system can be written in a matrix form as –11– -Confidential manuscript submitted to JGR-Planets -M · x = y , (22a) -where the solution (x) and forcing (y) vectors are -x -T = [ ˜m, m˜ f , m˜ s, n˜s, ε˜m] , (22b) -y -T = [0, 0, 0, 0, −(1 + ω) tan I] , (22c) -and the elements of matrix M are -M = - - - - - - - -ω − e (1 + ω) -A¯f -A¯ -(1 + ω) -A¯s -A¯ -A¯s -A¯ α3 -￾ -(1 + ω)es + φs - -φm -ω 1 + ω + ef + Kcmb + -A¯s -A¯f -Kicb − -A¯s -A¯f -Kicb −ωesα1 -A¯s -A¯f -0 -ω − α3es α1es − Kicb 1 + ω + Kicb (1 + ω − α2)es + α3φs α3φs -0 0 1 ω 0 -1 0 0 0 (1 + ω) - - - - - - - -. -(22d) -Solutions of the homogeneous system (i.e. y = 0) represent free modes of precession. Three -modes have periods which, when seen in inertial space, are typically in the range of a few hun￾dred to a few thousand years. The first is the free axial precession of Mercury maintained by -the solar torque acting on its elliptical figure [e.g. Peale, 2005]. The second is the free core nu￾tation (FCN), which is the free precession of the spin axis of the fluid core about the symme￾try axis of the CMB [e.g. Mathews et al., 1991]. The third is the free inner core nutation (FICN), -a free mode of rotation similar to the FCN but associated with the inner core [e.g. Mathews et al., -1991]. -A few remarks on our model are important to point out before we proceed further. First, -although we have retained the triaxial shape of Mercury in the expression of the solar torque, -we treat its angular momentum response as if it were an axially symmetric body. This is con￾venient as the two equatorial angular momentum equations for each region can be combined -into a single equation. To first order, the frequency of the free precession of Mercury is not largely -altered by triaxiality [e.g. Peale, 2005]. Baland et al. [2019] showed that the frequencies of the -FCN and FICN for a triaxial planetary body may be slightly different than those for an axi￾ally symmetric body, but not by large factor. As the response of Mercury to the solar torque -is largely determined by the resonant amplification due to the presence of these three modes, -our model should capture correctly the first order Cassini state of Mercury. Considering the -triaxial shape of Mercury may alter the numerical results, but not our general conclusions. -Second, our modelling approach is different than in the studies of Peale et al. [2014] and -Peale et al. [2016]. In these two studies, dynamical models of Mercury’s Cassini state are de￾veloped and must then be integrated in time. The equilibrium Cassini state is the quasi-steady -state that remains after transient effects associated with the initial conditions have decayed away. -An advantage of these models compared to ours is that the complete triaxial dynamics of Mer￾cury, including its longitudinal librations, are retained. However, the numerical integration can -be lengthy if dissipation is weak, which restricts the number of possible interior models of Mer￾cury that can be tested. In contrast, our model is a simple linear system in the frequency do￾main, focused on one specific frequency: the forced precession associated with the Cassini state. -Solutions are straightforward to obtain for a given interior model, and this allows us to cover -a larger span of the parameter space. One drawback, however, is that our model does not cap￾ture time-dependent variations at any other frequencies, including the precession of the peri￾center of Mercury’s orbit about the Sun. -–12 -Confidential manuscript submitted to JGR-Planets -2.3 Analytical solutions and limiting cases -2.3.1 The Cassini state of a single-body, rigid Mercury -For a rigid planet with no fluid and solid cores, our system of equations reduces to Equa￾tions (12a) and (20), -(ω − e) ˜m + φm ε˜m = 0 , (23a) -m˜ + (1 + ω)˜εm = −(1 + ω) tan I . (23b) -Using Equation (21), δω  1, and the approximation A¯(1 +e+δω cos I) = C +Aδω ¯ cos I ≈ -C, these can be written as -Cm˜ = Aφ¯ m ε˜m , (24a) -m˜ = δω￾ -sin I + cos I ε˜m - -. (24b) -Equation (24b) gives a direct relationship between ˜m and ˜εm. For I = 8.5330◦ -, δω = -4.9327×10−7 and taking ˜εm = 2.04 arcmin, this gives ˜m = 2.52×10−4 arcmin, much smaller -than ˜εm: the offset of the rotation axis of the mantle with respect to its symmetry axis is very -small. Substituting Equation (24b) in Equation (24a) gives -CΩp -￾ -sin I + cos I ε˜m - -= A¯Ωoφmε˜m , (25) -and isolating for ˜εm, -ε˜m = -CΩp sin I -−CΩp cos I + A¯Ωoφm -. (26) -Upon using Equations (4), (15a), and Ωo = -3 -2 -n, we can write -ε˜m = -CΩp sin I -−CΩp cos I + nMR2 (G210J2 + 2G201C22) -. (27) -This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1 -[see for instance Equation (1) of Baland et al., 2017, where their definition of Ω is equal to ˙ −Ωp]. -Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of -Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized mo￾ment of inertia Cˆ, -Cˆ = -C -MR2 -= -n -Ωp -G210J2 + 2G201C22 -cos I + sin I/ε˜m -. (28) -which is equivalent to Equation (89) of Van Hoolst [2015]. It is based on the latter equation -that a measurement of the obliquity gives a constraint on Cˆ. -Two free modes of precession are found by setting y = 0 in Equation (23). One mode cor￾responds to the Eulerian wobble, or Chandler wobble, and represents the prograde precession -of the rotation axis about the symmetry axis. The second mode is the free retrograde axial pre￾cession of Mercury. As seen in the inertial frame, its frequency is given by -–1 -Confidential manuscript submitted to JGR-Planets -ωfp = n -MR2 -C - -G210J2 + 2G201C22 -, (29) -which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical com￾ponent. Note that in Peale [2005] it was assumed that only the mantle was involved in the solid￾body precession and hence C was replaced by Cm. Using C = 0.346 · MR2 -[Margot et al., -2012] and the numerical values for n, J2, C22 and ec given in Table 1, we obtain a free preces￾sion period of Tfp = 2π/ωfp = 1298 yr. If we use Cm instead of C in Equation (29), and take -Cm = 0.431·C = 0.431·0.346·MR2 -[Margot et al., 2012], we obtain Tfp = 2π/ωfp = 560 yr. -These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical, -the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid -core, the degree of which depends on the amplitude of the pole-to-equator CMB flattening. The -true free precession period lies somewhere between 560 and 1298 yr. Regardless of its exact value, -the free precession period is much shorter than the forcing period of 325 kyr. Using Equation -(29), Equation (27) can be written as [e.g. Baland et al., 2017] -ε˜m = -Ωp sin I -−Ωp cos I + ωfp -. (30) -The obliquity of Mercury is thus determined by how the forcing frequency Ωp compares with -the free precession frequency ωfp. Because ωfp > Ωp, Mercury occupies Cassini state 1 [Peale, -1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant -amplification if Ωp ≈ ωfp. Since ωfp  Ωp, resonant amplification is minimal and the re￾sulting obliquity, ˜εm ≈ 2 arcmin, is much smaller than the inclination angle I ≈ 8.5 -◦ -. -2.3.2 The misalignment of the fluid and solid cores -With ω = −1 − δω cos I and δω  1, Equation (12d) gives ˜ns ≈ m˜ s; as for the mantle, -the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state. -The relationship between ˜m and ˜εm of Equation (24b) is independent of the interior structure, -so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equa￾tion (12a), and setting ˜ns = ˜ms, the angular momentum equation of the whole planet becomes -CΩp -￾ -sin I + cos I ε˜m - -+ (A¯ -f cos I Ωp) ˜mf + A¯ -s(cos I Ωp − Ωoα3φs)˜ns = A¯Ωoφmε˜m . (31) -This latter equation shows how the misaligned inner core and fluid core can lead to a modifi￾cation of the mantle obliquity ˜εm. Approximate analytical solutions of ˜ns and ˜mf are given by -n˜s ≈ -Ωp -κλs - -1 + -Ωo(Kicb − α1es) -λf - -￾ -sin I + cos I ε˜m - -− -Ωoα3φs -κλs -ε˜m , (32a) -m˜ f ≈ -Ωp -λf -￾ -sin I + cos I ε˜m - -+ -Ωo -λf -A¯ -s -A¯ -f -￾ -Kicb − α1es - -n˜s , (32b) -where -κ = 1 − -A¯ -s -A¯ -f -Ω -2 -o -￾ -Kicb − α1es -2 -λs λf -, (33a) -λf = ¯σf − Ωp cos I , (33b) -λs = ¯σs − Ωp cos I , (33c) -Confidential manuscript submitted to JGR-Planets -and where we have introduced the frequencies -σ¯f = Ωo - -ef + Kcmb + -A¯ -s -A¯ -f -Kicb -, (33d) -σ¯s = Ωo - -esα3αg − esα1 + α3φs + Kicb -. (33e) -These solutions are good approximations for all the results that we present in section 3. For -an observed mantle obliquity ˜εm and for a chosen set of interior model parameters, they pro￾vide useful predictions of ˜ns and ˜mf . -In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯σs  -Ωp and ¯σf  Ωp, so that ˜ns → 0, ˜mf → 0 and Equation (31) reverts back to Equation (25) -for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and -mantle (i.e. for spherical internal boundaries, ef = es = γs = 0 and no viscous or EM cou￾pling, Kcmb = Kicb = 0), then -φs = 0 , κ = 1 , λf = λs = −Ωp cos I , m˜ f = ˜ns = −(tan I + ˜εm). (34) -Inserting these in Equation (31), and with the moment of inertia of the mantle equal to Cm = -C − A¯ -f − A¯ -s, we obtain -Cm Ωp -￾ -sin I + cos I ε˜m - -= A¯Ωoφmε˜m . (35) -which describes, as expected, a forced precession of the mantle alone. If this was the case for -Mercury, taking Cm/C = 0.431, the obliquity should be ˜εm ≈ 0.88 arcmin, substantially smaller -than the observed obliquity of ˜εm ≈ 2 arcmin. -If ¯σf ≈ Ωp (and thus λf → 0) and/or ¯σs ≈ Ωp (and thus λs → 0) resonant amplifica￾tion leads to large amplitudes for ˜mf , ˜ns and the mantle obliquity ˜εm. The frequencies ¯σf and -σ¯s are closely related to the FCN and FICN frequencies ωf cn and ωf icn, respectively. Hence, -just as a large mantle obliquity can result from resonant amplification when the forcing frequency -approaches the free precession frequency, a large mantle obliquity can likewise result from res￾onant amplification when the forcing frequency approaches the FCN or FICN frequencies. These -frequencies depend on the interior density structure and are not known. However, we will show -that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of -a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not ex￾pect an important amplification effect. Furthermore, since ωf cn, ωf icn  Ωp, then ¯σf  Ωp -and ¯σs  Ωp, and we are in the strong coupling limit. The mantle obliquity should be close -to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜mf and -n˜s should be of the order of ˜εm or smaller. This further justifies the assumption of small an￾gles that we have adopted. -3 Results -3.1 Geodetic constraints and interior density structure -All our interior models are constrained to match the mass M of Mercury and specific choices -of Cˆ = C/MR2 and Cm/C. The choice of Cˆ is determined from Equation (28). For the pa￾rameters listed in Table 1, and an observed obliquity of εm = 2.04 arcmin [Margot et al., 2012], -this gives Cˆ = C/MR2 = 0.3455 and all our interior models are consistent with this choice. -Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are -–15 -Confidential manuscript submitted to JGR-Planets -perfectly aligned with the mantle, which is not strictly correct. Hence, we make an error in es￾timating Cˆ from Equation (28), or conversely in predicting εm based on a given choice for Cˆ. -Part of the objective of our study is to estimate how large this error is. The ratio Cm/C is ob￾tained from the amplitude of the 88-day longitudinal mantle libration φo, which is given by -φo = 6 · f(ec)C22 -MR2 -C -C -Cm -1 -1 + ζ -, (36) -where -f(ec) = 1 − 11e -2 -c + -959 -48 -e -4 -c -, (37) -and where ζ is a correction that takes into account the entrainment of the inner core in the li￾bration [Van Hoolst et al., 2012; Dumberry et al., 2013; Dumberry and Rivoldini, 2015]; this cor￾rection is small and, to simplify, we neglect it here. Taking the observed libration amplitude -to be 38.5 arcsec [Margot et al., 2012], Cˆ = C/MR2 = 0.3455 and C22 and ec from Table 1, -this corresponds to a ratio Cm/C = 0.4269, or equivalently Cˆm = Cm/MR2 = 0.1475. -For all results presented in our study, the crustal density is set at ρc = 2974 kg m−3 -[Sori, -2018]. Our standard choice for the crustal thickness is h = 26 km [Sori, 2018], although in -section 3.2 we also present some results with other choices of h. We have considered two pos￾sible prescriptions connected to the density of the inner core. First, for all the results presented -in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρs = 8800 kg m−3 ap￾proximately that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure -Fe composition in face-centered cubic phase. This captures an end-member scenario where the -core composition is an Fe-S alloy; at Mercury’s core conditions, crystallization of Fe is relatively -free of S on the Fe-rich side of the eutectic [Li et al., 2001]. If the core composition is instead -an Fe-Si alloy, approximately equal partitioning of Si between the liquid and solid phase [e.g. -Schaefer et al., 2017] implies a weak chemical contrast at the ICB. The density jump across the -ICB is expected to be small, although since density increases with depth, the contrast between -the mean densities of the fluid and solid cores is larger. It is these mean densities that enter -our Mercury model with uniform density layers. To capture this other end-member core com￾position scenario, in section 3.5 we present results where we instead prescribe a fixed density -contrast between the fluid and solid core; specifically, we set the numerical value of α3. -For a given choice of inner core radius rs, the densities of the mantle (ρm) and fluid core -(ρf ) and the radius of the CMB (rf ) are determined such that the interior model matches M, -Cˆ = 0.3455 and Cˆm = 0.1475. Figure 3a shows how ρm, ρf and rf vary as a function of in￾ner core radius rs for each of the two inner core density scenarios: a fixed ρs, or a fixed α3. When -the inner core is small, its presence has a limited influence on the resulting density structure, -and we find ρm = 3197 kg m−3 -, ρf = 7263 kg m−3 and rf = 2000 km in each of the two -scenarios. When ρs is fixed to 8800 kg m−3 -, as the inner core reaches 1500 km in size, rf in￾creases to above 2100 km, ρm approaches 4000 kg m−3 and ρf is reduced to below 5000 kg m−3 -. -Figure 3a illustrates that when adopting a fixed ρs, there is a limit in the possible inner core -size, as otherwise ρm gets unreasonably large and ρf gets inappropriately small (as it would -require an excessively large concentration of light elements). When adopting instead a fixed den￾sity contrast, with α3 = 0.1, the changes in rf , ρm and ρf with inner core radius are more mod￾est, allowing larger possible inner core sizes. Different assumptions on ρc and h would alter the -numerical values shown on Figure 3a but not their trends with rs. -Figure 3b shows how the FCN and FICN periods vary with rs for each of the two inner -core density scenarios and in the absence of viscous and EM coupling (i.e. Kcmb = Kicb = +Confidential manuscript submitted to JGR-Planets +M · x = y , (22a) +where the solution (x) and forcing (y) vectors are +x +T = [ ˜m, m˜ f , m˜ s, n˜s, ε˜m] , (22b) +y +T = [0, 0, 0, 0, −(1 + ω) tan I] , (22c) +and the elements of matrix M are +M = + + + + + + + +ω − e (1 + ω) +A¯f +A¯ +(1 + ω) +A¯s +A¯ +A¯s +A¯ α3 + +(1 + ω)es + φs + +φm +ω 1 + ω + ef + Kcmb + +A¯s +A¯f +Kicb − +A¯s +A¯f +Kicb −ωesα1 +A¯s +A¯f +0 +ω − α3es α1es − Kicb 1 + ω + Kicb (1 + ω − α2)es + α3φs α3φs +0 0 1 ω 0 +1 0 0 0 (1 + ω) + + + + + + + +. +(22d) +Solutions of the homogeneous system (i.e. y = 0) represent free modes of precession. Three +modes have periods which, when seen in inertial space, are typically in the range of a few hundred to a few thousand years. The first is the free axial precession of Mercury maintained by +the solar torque acting on its elliptical figure [e.g. Peale, 2005]. The second is the free core nutation (FCN), which is the free precession of the spin axis of the fluid core about the symmetry axis of the CMB [e.g. Mathews et al., 1991]. The third is the free inner core nutation (FICN), +a free mode of rotation similar to the FCN but associated with the inner core [e.g. Mathews et al., +1991]. +A few remarks on our model are important to point out before we proceed further. First, +although we have retained the triaxial shape of Mercury in the expression of the solar torque, +we treat its angular momentum response as if it were an axially symmetric body. This is convenient as the two equatorial angular momentum equations for each region can be combined +into a single equation. To first order, the frequency of the free precession of Mercury is not largely +altered by triaxiality [e.g. Peale, 2005]. Baland et al. [2019] showed that the frequencies of the +FCN and FICN for a triaxial planetary body may be slightly different than those for an axially symmetric body, but not by large factor. As the response of Mercury to the solar torque +is largely determined by the resonant amplification due to the presence of these three modes, +our model should capture correctly the first order Cassini state of Mercury. Considering the +triaxial shape of Mercury may alter the numerical results, but not our general conclusions. +Second, our modelling approach is different than in the studies of Peale et al. [2014] and +Peale et al. [2016]. In these two studies, dynamical models of Mercury’s Cassini state are developed and must then be integrated in time. The equilibrium Cassini state is the quasi-steady +state that remains after transient effects associated with the initial conditions have decayed away. +An advantage of these models compared to ours is that the complete triaxial dynamics of Mercury, including its longitudinal librations, are retained. However, the numerical integration can +be lengthy if dissipation is weak, which restricts the number of possible interior models of Mercury that can be tested. In contrast, our model is a simple linear system in the frequency domain, focused on one specific frequency: the forced precession associated with the Cassini state. +Solutions are straightforward to obtain for a given interior model, and this allows us to cover +a larger span of the parameter space. One drawback, however, is that our model does not capture time-dependent variations at any other frequencies, including the precession of the pericenter of Mercury’s orbit about the Sun. +–12– +Confidential manuscript submitted to JGR-Planets +2.3 Analytical solutions and limiting cases +2.3.1 The Cassini state of a single-body, rigid Mercury +For a rigid planet with no fluid and solid cores, our system of equations reduces to Equations (12a) and (20), +(ω − e) ˜m + φm ε˜m = 0 , (23a) +m˜ + (1 + ω)˜εm = −(1 + ω) tan I . (23b) +Using Equation (21), δω  1, and the approximation A¯(1 +e+δω cos I) = C +Aδω ¯ cos I ≈ +C, these can be written as +Cm˜ = Aφ¯ m ε˜m , (24a) +m˜ = δωsin I + cos I ε˜m + +. (24b) +Equation (24b) gives a direct relationship between ˜m and ˜εm. For I = 8.5330◦, δω = +4.9327×10−7 and taking ˜εm = 2.04 arcmin, this gives ˜m = 2.52×10−4 arcmin, much smaller +than ˜εm: the offset of the rotation axis of the mantle with respect to its symmetry axis is very +small. Substituting Equation (24b) in Equation (24a) gives +CΩp + +sin I + cos I ε˜m + += A¯Ωoφmε˜m , (25) +and isolating for ˜εm, +ε˜m = +CΩp sin I +−CΩp cos I + A¯Ωoφm +. (26) +Upon using Equations (4), (15a), and Ωo = +3 +2 +n, we can write +ε˜m = +CΩp sin I +−CΩp cos I + nMR2 (G210J2 + 2G201C22) +. (27) +This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1 +[see for instance Equation (1) of Baland et al., 2017, where their definition of Ω is equal to ˙ −Ωp]. +Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of +Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized moment of inertia Cˆ, +Cˆ = +C +MR2 += +n +Ωp +G210J2 + 2G201C22 +cos I + sin I/ε˜m +. (28) +which is equivalent to Equation (89) of Van Hoolst [2015]. It is based on the latter equation +that a measurement of the obliquity gives a constraint on Cˆ. +Two free modes of precession are found by setting y = 0 in Equation (23). One mode corresponds to the Eulerian wobble, or Chandler wobble, and represents the prograde precession +of the rotation axis about the symmetry axis. The second mode is the free retrograde axial precession of Mercury. As seen in the inertial frame, its frequency is given by +–13– +Confidential manuscript submitted to JGR-Planets +ωfp = n +MR2 +C + +G210J2 + 2G201C22 +, (29) +which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical component. Note that in Peale [2005] it was assumed that only the mantle was involved in the solidbody precession and hence C was replaced by Cm. Using C = 0.346 · MR2 +[Margot et al., +2012] and the numerical values for n, J2, C22 and ec given in Table 1, we obtain a free precession period of Tfp = 2π/ωfp = 1298 yr. If we use Cm instead of C in Equation (29), and take +Cm = 0.431·C = 0.431·0.346·MR2[Margot et al., 2012], we obtain Tfp = 2π/ωfp = 560 yr. +These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical, +the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid +core, the degree of which depends on the amplitude of the pole-to-equator CMB flattening. The +true free precession period lies somewhere between 560 and 1298 yr. Regardless of its exact value, +the free precession period is much shorter than the forcing period of 325 kyr. Using Equation +(29), Equation (27) can be written as [e.g. Baland et al., 2017] +ε˜m = +Ωp sin I +−Ωp cos I + ωfp +. (30) +The obliquity of Mercury is thus determined by how the forcing frequency Ωp compares with +the free precession frequency ωfp. Because ωfp > Ωp, Mercury occupies Cassini state 1 [Peale, +1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant +amplification if Ωp ≈ ωfp. Since ωfp  Ωp, resonant amplification is minimal and the resulting obliquity, ˜εm ≈ 2 arcmin, is much smaller than the inclination angle I ≈ 8.5 +◦ +. +2.3.2 The misalignment of the fluid and solid cores +With ω = −1 − δω cos I and δω  1, Equation (12d) gives ˜ns ≈ m˜ s; as for the mantle, +the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state. +The relationship between ˜m and ˜εm of Equation (24b) is independent of the interior structure, +so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equation (12a), and setting ˜ns = ˜ms, the angular momentum equation of the whole planet becomes +CΩp + +sin I + cos I ε˜m + ++ (A¯ +f cos I Ωp) ˜mf + A¯s(cos I Ωp − Ωoα3φs)˜ns = A¯Ωoφmε˜m . (31) +This latter equation shows how the misaligned inner core and fluid core can lead to a modification of the mantle obliquity ˜εm. Approximate analytical solutions of ˜ns and ˜mf are given by +n˜s ≈ +Ωp +κλs + +1 + +Ωo(Kicb − α1es) +λf + + +sin I + cos I ε˜m + +− +Ωoα3φs +κλs +ε˜m , (32a) +m˜ f ≈ +Ωp +λf + +sin I + cos I ε˜m + ++ +Ωo +λf +A¯ +s +A¯ +f + +Kicb − α1es + +n˜s , (32b) +where +κ = 1 − +A¯ +s +A¯ +f +Ω +2 +o + +Kicb − α1es +2 +λs λf +, (33a) +λf = ¯σf − Ωp cos I , (33b) +λs = ¯σs − Ωp cos I , (33c) +–14– +Confidential manuscript submitted to JGR-Planets +and where we have introduced the frequencies +σ¯f = Ωo + +ef + Kcmb + +A¯ +s +A¯ +f +Kicb, (33d) +σ¯s = Ωo + +esα3αg − esα1 + α3φs + Kicb +. (33e) +These solutions are good approximations for all the results that we present in section 3. For +an observed mantle obliquity ˜εm and for a chosen set of interior model parameters, they provide useful predictions of ˜ns and ˜mf . +In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯σs +Ωp and ¯σf  Ωp, so that ˜ns → 0, ˜mf → 0 and Equation (31) reverts back to Equation (25) +for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and +mantle (i.e. for spherical internal boundaries, ef = es = γs = 0 and no viscous or EM coupling, Kcmb = Kicb = 0), then +φs = 0 , κ = 1 , λf = λs = −Ωp cos I , m˜ f = ˜ns = −(tan I + ˜εm). (34) +Inserting these in Equation (31), and with the moment of inertia of the mantle equal to Cm = +C − A¯ +f − A¯s, we obtain +Cm Ωp + +sin I + cos I ε˜m + += A¯Ωoφmε˜m . (35) +which describes, as expected, a forced precession of the mantle alone. If this was the case for +Mercury, taking Cm/C = 0.431, the obliquity should be ˜εm ≈ 0.88 arcmin, substantially smaller +than the observed obliquity of ˜εm ≈ 2 arcmin. +If ¯σf ≈ Ωp (and thus λf → 0) and/or ¯σs ≈ Ωp (and thus λs → 0) resonant amplification leads to large amplitudes for ˜mf , ˜ns and the mantle obliquity ˜εm. The frequencies ¯σf and +σ¯s are closely related to the FCN and FICN frequencies ωf cn and ωf icn, respectively. Hence, +just as a large mantle obliquity can result from resonant amplification when the forcing frequency +approaches the free precession frequency, a large mantle obliquity can likewise result from resonant amplification when the forcing frequency approaches the FCN or FICN frequencies. These +frequencies depend on the interior density structure and are not known. However, we will show +that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of +a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not expect an important amplification effect. Furthermore, since ωf cn, ωf icn  Ωp, then ¯σf  Ωp +and ¯σs  Ωp, and we are in the strong coupling limit. The mantle obliquity should be close +to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜mf and +n˜s should be of the order of ˜εm or smaller. This further justifies the assumption of small angles that we have adopted. +3 Results +3.1 Geodetic constraints and interior density structure +All our interior models are constrained to match the mass M of Mercury and specific choices +of Cˆ = C/MR2 and Cm/C. The choice of Cˆ is determined from Equation (28). For the parameters listed in Table 1, and an observed obliquity of εm = 2.04 arcmin [Margot et al., 2012], +this gives Cˆ = C/MR2 = 0.3455 and all our interior models are consistent with this choice. +Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are +–15– +Confidential manuscript submitted to JGR-Planets +perfectly aligned with the mantle, which is not strictly correct. Hence, we make an error in estimating Cˆ from Equation (28), or conversely in predicting εm based on a given choice for Cˆ. +Part of the objective of our study is to estimate how large this error is. The ratio Cm/C is obtained from the amplitude of the 88-day longitudinal mantle libration φo, which is given by +φo = 6 · f(ec)C22 +MR2 +C +C +Cm +1 +1 + ζ +, (36) +where +f(ec) = 1 − 11e +2 +c + +959 +48 +e +4 +c +, (37) +and where ζ is a correction that takes into account the entrainment of the inner core in the libration [Van Hoolst et al., 2012; Dumberry et al., 2013; Dumberry and Rivoldini, 2015]; this correction is small and, to simplify, we neglect it here. Taking the observed libration amplitude +to be 38.5 arcsec [Margot et al., 2012], Cˆ = C/MR2 = 0.3455 and C22 and ec from Table 1, +this corresponds to a ratio Cm/C = 0.4269, or equivalently Cˆm = Cm/MR2 = 0.1475. +For all results presented in our study, the crustal density is set at ρc = 2974 kg m−3[Sori, +2018]. Our standard choice for the crustal thickness is h = 26 km [Sori, 2018], although in +section 3.2 we also present some results with other choices of h. We have considered two possible prescriptions connected to the density of the inner core. First, for all the results presented +in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρs = 8800 kg m−3 approximately that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure +Fe composition in face-centered cubic phase. This captures an end-member scenario where the +core composition is an Fe-S alloy; at Mercury’s core conditions, crystallization of Fe is relatively +free of S on the Fe-rich side of the eutectic [Li et al., 2001]. If the core composition is instead +an Fe-Si alloy, approximately equal partitioning of Si between the liquid and solid phase [e.g. +Schaefer et al., 2017] implies a weak chemical contrast at the ICB. The density jump across the +ICB is expected to be small, although since density increases with depth, the contrast between +the mean densities of the fluid and solid cores is larger. It is these mean densities that enter +our Mercury model with uniform density layers. To capture this other end-member core composition scenario, in section 3.5 we present results where we instead prescribe a fixed density +contrast between the fluid and solid core; specifically, we set the numerical value of α3. +For a given choice of inner core radius rs, the densities of the mantle (ρm) and fluid core +(ρf ) and the radius of the CMB (rf ) are determined such that the interior model matches M, +Cˆ = 0.3455 and Cˆm = 0.1475. Figure 3a shows how ρm, ρf and rf vary as a function of inner core radius rs for each of the two inner core density scenarios: a fixed ρs, or a fixed α3. When +the inner core is small, its presence has a limited influence on the resulting density structure, +and we find ρm = 3197 kg m−3, ρf = 7263 kg m−3 and rf = 2000 km in each of the two +scenarios. When ρs is fixed to 8800 kg m−3, as the inner core reaches 1500 km in size, rf increases to above 2100 km, ρm approaches 4000 kg m−3 and ρf is reduced to below 5000 kg m−3 +. +Figure 3a illustrates that when adopting a fixed ρs, there is a limit in the possible inner core +size, as otherwise ρm gets unreasonably large and ρf gets inappropriately small (as it would +require an excessively large concentration of light elements). When adopting instead a fixed density contrast, with α3 = 0.1, the changes in rf , ρm and ρf with inner core radius are more modest, allowing larger possible inner core sizes. Different assumptions on ρc and h would alter the +numerical values shown on Figure 3a but not their trends with rs. +Figure 3b shows how the FCN and FICN periods vary with rs for each of the two inner +core density scenarios and in the absence of viscous and EM coupling (i.e. Kcmb = Kicb = –16– -Confidential manuscript submitted to JGR-Planets -0 -200 -400 -600 -800 -1000 -1200 -1400 -period (yr) -0 200 400 600 800 1000 1200 1400 -Inner core radius (km) -3000 -4000 -5000 -6000 -7000 -8000 -density (kg/m -3) -0 200 400 600 800 1000 1200 1400 -Inner core radius (km) -2000 -2020 -2040 -2060 -2080 -2100 -Fluid core radius (km) -fluid core density -CMB radius -FICN -FCNint -mantle density -a b -FCN -Figure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand -side scale) and b) FICN (blue) and FCN (red) periods as a function of inner core radius. The FCN -period when the external torque is set to zero (FCNint) is shown in orange. Solid lines correspond to -a scenario where the density of the inner core is set to 8800 kg m−3 -; thin dashed lines correspond to a -scenario where the density contrast between the fluid and solid cores is set to α3 = 0.1. -0). Both of these free modes are retrograde. The FCN period is close to 400 yr for a small in￾ner core, increasing to approximately 600 yr at the largest rs. The FICN period is shorter, close -to 100 yr (160 yr) for a small inner core and decreasing to approximately 40 yr (120 yr) at the -largest rs under the fixed ρs (fixed α3) scenario. This confirms that the FCN and FICN peri￾ods are both much shorter than the forcing precession period of 325 kyr and sufficiently far away -from it that we do not expect large ˜mf and ˜ns from resonant amplification. -The FCN and FICN periods that we have computed include the influence of the exter￾nal torque. As shown by Baland et al. [2019], the external torque allow solid regions to have -a free motion in inertial space thereby affecting the free rotational modes. To a good approx￾imation, the FCN and FICN frequencies (as seen in an inertial frame) for Kcmb = Kicb = 0 -are given by -ωf cn ≈ −Ωo - -A¯ -A¯m + A¯ -s -  -ef + φm - -+ Ωo -efφm -(ef + φm) -, (38a) -ωf icn ≈ Ωo - -A¯ + A¯ -s -A¯ − A¯ -s -  -esα1 − esα3αg − α3φs - -. (38b) -The expression of the FICN frequency involves the inertial torque (term esα1) and the grav￾itational torque from the rest of Mercury (esα3αg) and the Sun (α3φs) acting on the inner core. -For both of our inner core density scenarios (and our choices of ρs = 8800 kg m−3 and α3 = -0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α3αg  α1; -the gravitational torque dominates the inertial torque, in large part because of the slow rota￾tion rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion -is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek, 2016; Stys and -Dumberry, 2018], but it is different for Earth, where α1 > α3αg because of its faster rotation -and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approximate expres- -–17– -Confidential manuscript submitted to JGR-Planets -sion for the FICN differs by a factor (A¯+A¯ -s)/(A¯−A¯ -s) compared to that given in Dumberry -and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon. -The expression for FCN frequency differs from the usual expression for Earth. First, it -involves the external torque from the Sun captured by the parameter φm. If we set φm = 0, -we obtain the FCN frequency for a decoupled model in which only interior torques contribute, -ωf cn,int ≈ −Ωo - -A¯ -A¯m + A¯ -s - -ef . (38c) -This frequency is slightly different from the usual expression for Earth, involving the ratio A/¯ (A¯m+ -A¯ -s) rather than A/¯ A¯m. This is because of the relatively thin mantle of Mercury; for the largest -rs considered, the moment of inertia of the inner core can get close to 40% of that of the man￾tle and is not negligible. The period of the FCN when only interior torques contribute is shown -in Figure 3b. It is close to 1100 yr for a small inner core, increasing to approximately 1500 yr -at the largest rs. Hence, the influence of the solar torque reduces the FCN period by a factor -of approximately 3. We note that the FICN period, in contrast, is not altered substantially when -the external torque is set to zero. -3.2 Gravitational and inertial coupling -Let us now investigate the obliquities of the mantle, fluid core and inner core in their equi￾librium Cassini state. We assume a fixed inner core density scenario in this section, with ρs = -8800 kg m−3 -. Viscous and EM coupling are set to zero in order to isolate the influence of grav￾itational and inertial coupling. Figure 4 shows how ˜εm, ˜mf and ˜ns vary as functions of inner -core radius. We show calculations for three different choices of crustal thickness, but let us con￾centrate first on the case for h = 26 km. For small rs, we retrieve an obliquity of ˜εm = 2.0494 -arcmin (Figure 4a). ˜εm decreases with rs, but not substantially; at the largest rs (1500 km), -ε˜m = 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜εm = 2.04 -arcmin, the obliquity that we used in setting the constraint for Cˆ – and hence the prediction -we should recover for a rigid planet – is an overestimate of approximately 0.01 arcmin which -occurs for small inner cores. -The deviation of ˜εm from that of a rigid planet is due to the misalignments of the fluid -core ( ˜mf ) and solid inner core (˜ns) with respect to the mantle (Figure 4b). The misalignment -of the fluid core spin axis from the mantle is significant: ˜mf is approximately 4.02 arcmin for -a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin -at the largest rs. Recall that ˜mf is measured with respect to the mantle rotation axis (which -coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with -respect to the orbit normal is ˜εm+ ˜mf ≈ 6 arcmin. The reason why the obliquity of the spin -axis of the fluid core is larger than that of the mantle can be understood from Equation (32b), -which shows that ˜mf is determined by the resonant amplification of the FCN mode at the forc￾ing frequency. When the FCN frequency is much larger than the forcing frequency, as is the -case for Mercury, the resonant amplification is very weak but remains present and ˜mf is larger -than zero. -In contrast to ˜mf , the misalignment of the inner core with respect to the mantle is much -smaller; ˜ns is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜εm. -Physically, this is because the gravitational torque acting on the inner core when it is tilted from -the mantle is much stronger than the inertial torque acting at the ICB. As a result, the inner -core must remain in close alignment with the mantle. Presented differently, since the FICN pe￾riod is more than 3000 times shorter than the forced precession period, the inner core can eas- -–18– -Confidential manuscript submitted to JGR-Planets -2.038 -2.040 -2.042 -2.044 -2.046 -2.048 -2.050 -Obliquity angle (arcmin) -0 200 400 600 800 1000 1200 1400 -Inner core radius (km) -1.5 -2.0 -2.5 -3.0 -3.5 -4.0 -4.5 -Obliquity angle (arcmin) -0 200 400 600 800 1000 1200 1400 -Inner core radius (km) -crustal thickness -16 km -36 km -26 km -crustal thickness -16 km -36 km -26 km -εm -εg -εm for a rigid planet -mf -ns (x100) -a b -Figure 4. a) Obliquity of the mantle (˜εm, solid lines) and of the principal moment of inertia (˜εg, -dashed line) b) ˜mf (solid lines) and ˜ns (dashed lines, x100) as a function of inner core radius and for -different choices of crustal thickness. -ily follow the forced precession of the mantle and remains gravitationally locked to it. ˜ns does -not change substantially as the inner core increases in size. -When Kicb = Kcmb = 0, a good approximation of ˜εm is given by -ε˜m = -C -0Ωp sin I -−C0Ωp cos I + A¯Ωoφm -, (39) -which is identical to the prediction of Equation (26) for a rigid Mercury, except C is replaced -by C -0 -. The latter represents an effective moment of inertia that accounts for the coupling of -the core to the mantle, -C -0 = C + A¯ -cχ , (40) -where A¯ -c = A¯ -f + A¯ -s and -χ = -Ωp cos I -A¯ -c - -A¯ -f -(¯σf − Ωp cos I) -+ -A¯ -s -(¯σs − Ωp cos I) - -− -A¯ -s -A¯ -c -Ωoα3φs -(¯σs − Ωp cos I) -. (41) -The frequencies ¯σf and ¯σs are given in Equations (33d-33e) and closely approximate the FCN -and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then -how the core is entrained to precess with the mantle, with the coupling between the two ex￾pressed in terms of the resonant amplification of the FCN and FICN frequencies. In the limit -of ¯σf , σ¯s → 0, then χ = −1, C -0 = Cm, the core is fully decoupled from the mantle and we -retrieve Equation (35). If instead ¯σf , σ¯s → ∞, then χ = 0, C -0 = C and we retrieve the pre￾diction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ωp, -as is the case here, resonant amplification is weak, χ is small and positive, C -0 > C and this -leads to a slightly larger ˜εm compared to a rigid planet. Because the inner core core is grav￾itationally locked to the mantle, deviations from a rigid planet are dominantly caused by the -misalignment of the fluid core. In Equation (41), ¯σs  σ¯f , so to a good approximation +Confidential manuscript submitted to JGR-Planets +0 +200 +400 +600 +800 +1000 +1200 +1400 +period (yr) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +3000 +4000 +5000 +6000 +7000 +8000 +density (kg/m +3) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +2000 +2020 +2040 +2060 +2080 +2100 +Fluid core radius (km) +fluid core density +CMB radius +FICN +FCNint +mantle density +a b +FCN +Figure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand +side scale) and b) FICN (blue) and FCN (red) periods as a function of inner core radius. The FCN +period when the external torque is set to zero (FCNint) is shown in orange. Solid lines correspond to +a scenario where the density of the inner core is set to 8800 kg m−3; thin dashed lines correspond to a +scenario where the density contrast between the fluid and solid cores is set to α3 = 0.1. +0). Both of these free modes are retrograde. The FCN period is close to 400 yr for a small inner core, increasing to approximately 600 yr at the largest rs. The FICN period is shorter, close +to 100 yr (160 yr) for a small inner core and decreasing to approximately 40 yr (120 yr) at the +largest rs under the fixed ρs (fixed α3) scenario. This confirms that the FCN and FICN periods are both much shorter than the forcing precession period of 325 kyr and sufficiently far away +from it that we do not expect large ˜mf and ˜ns from resonant amplification. +The FCN and FICN periods that we have computed include the influence of the external torque. As shown by Baland et al. [2019], the external torque allow solid regions to have +a free motion in inertial space thereby affecting the free rotational modes. To a good approximation, the FCN and FICN frequencies (as seen in an inertial frame) for Kcmb = Kicb = 0 +are given by +ωf cn ≈ −Ωo + +A¯ +A¯m + A¯ +s +  +ef + φm + ++ Ωo +efφm +(ef + φm) +, (38a) +ωf icn ≈ Ωo + +A¯ + A¯ +s +A¯ − A¯ +s +  +esα1 − esα3αg − α3φs + +. (38b) +The expression of the FICN frequency involves the inertial torque (term esα1) and the gravitational torque from the rest of Mercury (esα3αg) and the Sun (α3φs) acting on the inner core. +For both of our inner core density scenarios (and our choices of ρs = 8800 kg m−3 and α3 = +0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α3αg  α1; +the gravitational torque dominates the inertial torque, in large part because of the slow rotation rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion +is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek, 2016; Stys and +Dumberry, 2018], but it is different for Earth, where α1 > α3αg because of its faster rotation +and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approximate expres–17– + +Confidential manuscript submitted to JGR-Planets +sion for the FICN differs by a factor (A¯+A¯ +s)/(A¯−A¯s) compared to that given in Dumberry +and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon. +The expression for FCN frequency differs from the usual expression for Earth. First, it +involves the external torque from the Sun captured by the parameter φm. If we set φm = 0, +we obtain the FCN frequency for a decoupled model in which only interior torques contribute, +ωf cn,int ≈ −Ωo + +A¯ +A¯m + A¯ +s + +ef . (38c) +This frequency is slightly different from the usual expression for Earth, involving the ratio A/¯ (A¯m+ +A¯ +s) rather than A/¯ A¯m. This is because of the relatively thin mantle of Mercury; for the largest +rs considered, the moment of inertia of the inner core can get close to 40% of that of the mantle and is not negligible. The period of the FCN when only interior torques contribute is shown +in Figure 3b. It is close to 1100 yr for a small inner core, increasing to approximately 1500 yr +at the largest rs. Hence, the influence of the solar torque reduces the FCN period by a factor +of approximately 3. We note that the FICN period, in contrast, is not altered substantially when +the external torque is set to zero. +3.2 Gravitational and inertial coupling +Let us now investigate the obliquities of the mantle, fluid core and inner core in their equilibrium Cassini state. We assume a fixed inner core density scenario in this section, with ρs = +8800 kg m−3. Viscous and EM coupling are set to zero in order to isolate the influence of gravitational and inertial coupling. Figure 4 shows how ˜εm, ˜mf and ˜ns vary as functions of inner +core radius. We show calculations for three different choices of crustal thickness, but let us concentrate first on the case for h = 26 km. For small rs, we retrieve an obliquity of ˜εm = 2.0494 +arcmin (Figure 4a). ˜εm decreases with rs, but not substantially; at the largest rs (1500 km), +ε˜m = 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜εm = 2.04 +arcmin, the obliquity that we used in setting the constraint for Cˆ – and hence the prediction +we should recover for a rigid planet – is an overestimate of approximately 0.01 arcmin which +occurs for small inner cores. +The deviation of ˜εm from that of a rigid planet is due to the misalignments of the fluid +core ( ˜mf ) and solid inner core (˜ns) with respect to the mantle (Figure 4b). The misalignment +of the fluid core spin axis from the mantle is significant: ˜mf is approximately 4.02 arcmin for +a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin +at the largest rs. Recall that ˜mf is measured with respect to the mantle rotation axis (which +coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with +respect to the orbit normal is ˜εm+ ˜mf ≈ 6 arcmin. The reason why the obliquity of the spin +axis of the fluid core is larger than that of the mantle can be understood from Equation (32b), +which shows that ˜mf is determined by the resonant amplification of the FCN mode at the forcing frequency. When the FCN frequency is much larger than the forcing frequency, as is the +case for Mercury, the resonant amplification is very weak but remains present and ˜mf is larger +than zero. +In contrast to ˜mf , the misalignment of the inner core with respect to the mantle is much +smaller; ˜ns is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜εm. +Physically, this is because the gravitational torque acting on the inner core when it is tilted from +the mantle is much stronger than the inertial torque acting at the ICB. As a result, the inner +core must remain in close alignment with the mantle. Presented differently, since the FICN period is more than 3000 times shorter than the forced precession period, the inner core can eas–18– + +Confidential manuscript submitted to JGR-Planets +2.038 +2.040 +2.042 +2.044 +2.046 +2.048 +2.050 +Obliquity angle (arcmin) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +1.5 +2.0 +2.5 +3.0 +3.5 +4.0 +4.5 +Obliquity angle (arcmin) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +crustal thickness +16 km +36 km +26 km +crustal thickness +16 km +36 km +26 km +εm +εg +εm for a rigid planet +mf +ns (x100) +a b +Figure 4. a) Obliquity of the mantle (˜εm, solid lines) and of the principal moment of inertia (˜εg, +dashed line) b) ˜mf (solid lines) and ˜ns (dashed lines, x100) as a function of inner core radius and for +different choices of crustal thickness. +ily follow the forced precession of the mantle and remains gravitationally locked to it. ˜ns does +not change substantially as the inner core increases in size. +When Kicb = Kcmb = 0, a good approximation of ˜εm is given by +ε˜m = +C +0Ωp sin I +−C0Ωp cos I + A¯Ωoφm +, (39) +which is identical to the prediction of Equation (26) for a rigid Mercury, except C is replaced +by C +0 +. The latter represents an effective moment of inertia that accounts for the coupling of +the core to the mantle, +C +0 = C + A¯ +cχ , (40) +where A¯ +c = A¯f + A¯s and +χ = +Ωp cos I +A¯ +c + +A¯ +f +(¯σf − Ωp cos I) ++ +A¯ +s +(¯σs − Ωp cos I) + +− +A¯ +s +A¯ +c +Ωoα3φs +(¯σs − Ωp cos I) +. (41) +The frequencies ¯σf and ¯σs are given in Equations (33d-33e) and closely approximate the FCN +and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then +how the core is entrained to precess with the mantle, with the coupling between the two expressed in terms of the resonant amplification of the FCN and FICN frequencies. In the limit +of ¯σf , σ¯s → 0, then χ = −1, C +0 = Cm, the core is fully decoupled from the mantle and we +retrieve Equation (35). If instead ¯σf , σ¯s → ∞, then χ = 0, C +0 = C and we retrieve the prediction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ωp, +as is the case here, resonant amplification is weak, χ is small and positive, C +0 > C and this +leads to a slightly larger ˜εm compared to a rigid planet. Because the inner core core is gravitationally locked to the mantle, deviations from a rigid planet are dominantly caused by the +misalignment of the fluid core. In Equation (41), ¯σs  σ¯f , so to a good approximation –19– -Confidential manuscript submitted to JGR-Planets -χ ≈ -A¯ -f -A¯ -c -Ωo cos I -(¯σf − Ωp cos I) -. (42) -For a small inner core, χ ≈ 7.55×10−3 -. As the inner core grows, A¯ -f decreases, and the com￾bination A¯ -cχ also decreases. This implies that C -0 decreases with inner core size and, consequently, -ε˜m also decreases with inner core size, as seen in Figure 4a, though it remains larger than the -prediction for a rigid planet. -The specific predictions of ˜εm, ˜mf and ˜ns on Figure 4 depend sensitively on the assumed -interior density model and on the dynamical ellipticities of the inner core (es) and fluid core -(ef ). Hence, it depends on the choices we have made for the inner core density ρs, the crustal -density ρc and its thickness h. Changing ρs, ρc and/or h requires a different combination of ρf , -ρm and rf in order to match M, Cˆ and Cˆm. In turn, this leads to different ellipticities at in￾terior boundary in order to match J2 and C22, and thus different predictions for ˜εm, ˜mf and -n˜s. To illustrate this, we show on Figure 4 two additional predictions computed with crustal -thicknesses changed to h = 16 and 36 km. The change in ˜εm remains modest, ∼ 0.025%, but -the changes in ˜mf and ˜ns are more substantial, ∼ 5% and ∼ 10%, respectively. -We also show on Figure 4a (only for h = 26 km) the obliquity of the principal moment -of inertia of the whole planet, which we denote by ˜εg. A difference between ˜εg and ˜εm occurs -if the inner core is misaligned with the mantle. As seen in the mantle frame, a tilted inner core -(with ˜ns assumed small) leads to an off-diagonal component of the moment of inertia tensor -of (Cs−A¯ -s)α3n˜s = A¯ -sesα3n˜s. The angle by which the mantle frame must be rotated so that -the moment of inertia of the whole planet is purely diagonal is (A¯ -sesα3n˜s)/(Ae¯ ), and hence a -good approximation of ˜εg is -ε˜g = ˜εm + -A¯ -ses -Ae¯ -α3n˜s . (43) -Since the inner core is gravitationally forced into a close alignment with the mantle, the dif￾ference between ˜εg and ˜εm remains very small. For the largest inner core radius that we have -considered, ˜εg differs from ˜εm only by approximately 0.001 arcmin. -3.3 Viscous coupling -We now investigate how viscous coupling at the CMB and ICB affects the equilibrium Cassini -state. Peale et al. [2014] present two different parameterizations of viscous coupling based on -the timescale of attenuation of the differential rotation between the fluid core and mantle. More -complete analytical solutions for the flow resulting from a differentially precessing shell have -been derived [e.g. Stewartson and Roberts, 1963; Busse, 1968; Rochester , 1976] and we exploit -these solutions here. The parametrization of the viscous coupling constants Kcmb and Kicb based -on them are given in Mathews and Guo [2005], -Kcmb = -πρf r -4 -f -A¯ -f -r ν -2Ωo - -0.195 − 1.976i - -, (44a) -Kicb = -πρf r -4 -s -A¯ -s -r ν -2Ωo - -0.195 − 1.976i - -, (44b) -where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary inte￾rior is not well known but based on theoretical and experimental studies it is expected to be -of the order of 10−6 m2 -s -−1 -[e.g. Gans, 1972; de Wijs et al., 1998; Alf`e et al., 2000; Rutter et al., -2002a,b]. +Confidential manuscript submitted to JGR-Planets +χ ≈ +A¯ +f +A¯ +c +Ωo cos I +(¯σf − Ωp cos I) +. (42) +For a small inner core, χ ≈ 7.55×10−3. As the inner core grows, A¯ +f decreases, and the combination A¯ +cχ also decreases. This implies that C +0 decreases with inner core size and, consequently, +ε˜m also decreases with inner core size, as seen in Figure 4a, though it remains larger than the +prediction for a rigid planet. +The specific predictions of ˜εm, ˜mf and ˜ns on Figure 4 depend sensitively on the assumed +interior density model and on the dynamical ellipticities of the inner core (es) and fluid core +(ef ). Hence, it depends on the choices we have made for the inner core density ρs, the crustal +density ρc and its thickness h. Changing ρs, ρc and/or h requires a different combination of ρf , +ρm and rf in order to match M, Cˆ and Cˆm. In turn, this leads to different ellipticities at interior boundary in order to match J2 and C22, and thus different predictions for ˜εm, ˜mf and +n˜s. To illustrate this, we show on Figure 4 two additional predictions computed with crustal +thicknesses changed to h = 16 and 36 km. The change in ˜εm remains modest, ∼ 0.025%, but +the changes in ˜mf and ˜ns are more substantial, ∼ 5% and ∼ 10%, respectively. +We also show on Figure 4a (only for h = 26 km) the obliquity of the principal moment +of inertia of the whole planet, which we denote by ˜εg. A difference between ˜εg and ˜εm occurs +if the inner core is misaligned with the mantle. As seen in the mantle frame, a tilted inner core +(with ˜ns assumed small) leads to an off-diagonal component of the moment of inertia tensor +of (Cs−A¯ +s)α3n˜s = A¯sesα3n˜s. The angle by which the mantle frame must be rotated so that +the moment of inertia of the whole planet is purely diagonal is (A¯ +sesα3n˜s)/(Ae¯ ), and hence a +good approximation of ˜εg is +ε˜g = ˜εm + +A¯ +ses +Ae¯ +α3n˜s . (43) +Since the inner core is gravitationally forced into a close alignment with the mantle, the difference between ˜εg and ˜εm remains very small. For the largest inner core radius that we have +considered, ˜εg differs from ˜εm only by approximately 0.001 arcmin. +3.3 Viscous coupling +We now investigate how viscous coupling at the CMB and ICB affects the equilibrium Cassini +state. Peale et al. [2014] present two different parameterizations of viscous coupling based on +the timescale of attenuation of the differential rotation between the fluid core and mantle. More +complete analytical solutions for the flow resulting from a differentially precessing shell have +been derived [e.g. Stewartson and Roberts, 1963; Busse, 1968; Rochester , 1976] and we exploit +these solutions here. The parametrization of the viscous coupling constants Kcmb and Kicb based +on them are given in Mathews and Guo [2005], +Kcmb = +πρf r +4 +f +A¯ +f +r ν +2Ωo + +0.195 − 1.976i + +, (44a) +Kicb = +πρf r +4 +s +A¯ +s +r ν +2Ωo + +0.195 − 1.976i + +, (44b) +where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary interior is not well known but based on theoretical and experimental studies it is expected to be +of the order of 10−6 m2s +−1 +[e.g. Gans, 1972; de Wijs et al., 1998; Alf`e et al., 2000; Rutter et al., +2002a,b]. –20– -Confidential manuscript submitted to JGR-Planets -The above parameterizations are valid only under the assumption that the flow in the bound￾ary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds -number Re = rf∆uf /ν, associated with the differential velocity ∆uf = rfΩom˜ f at the CMB. -For rf = 2000 km, and taking ˜mf = 4 arcmin ≈ 0.001 rad from the results in the previous -section, we get ∆uf ∼ 2 mm/s and Re ∼ 6 × 109 -. Such a large Reynolds number indicates -that the viscous friction between the fluid core and mantle should induce turbulent flows, as -is the case for the Cassini state of the Moon [Yoder , 1981; Williams et al., 2001; C´ebron et al., -2019]. For a boundary layer that involves turbulent flows, the viscous torque should be inde￾pendent of the fluid viscosity and proportional to the square of the differential velocity. The -coupling constant Kcmb should be in the form -Kcmb = fcmb - - m˜ f - - - -0.195 − 1.976i - -, (45) -where fcmb is a numerical factor that depends among other things on surface roughness. In￾corporating a viscous coupling of this form in our rotational model is more challenging not only -because fcmb is not known but also because the viscous torque is no longer linear in ˜mf . One -strategy is to find solutions through an iterative process. The simpler alternative strategy that -we adopt is to use the laminar formulas of Equation (44) but with the understanding that ν -represents an effective turbulent viscosity. -To give an estimate of an appropriate turbulent value for ν, we turn to the Cassini state -of the Moon. A measure of the viscous dissipation at the CMB of the Moon has been obtained -by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR) -[Williams et al., 2001, 2014; Williams and Boggs, 2015]. Viscous dissipation is reported in terms -of a coupling parameter K and a recent estimate is K/CL = (1.41±0.34)×10−8 day−1 -[Williams -and Boggs, 2015], where CL is the lunar polar moment of inertia. The connection between K -and Kcmb is - - - -Im[Kcmb] - - - = -K -CL -CL -CfL -1 -ΩL -, (46) -where CfL is the moment of inertia of the lunar core and ΩL = 2.66 × 10−6 -s -−1 -the lunar -rotation rate. With CfL/CL ∼ 7 × 10−4 -[e.g. Williams et al., 2014], this gives |Im[Kcmb]| ∼ -9×10−5 -. In order to match this amplitude in Equation (44a), with lunar parameters and as￾suming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10−4 m2 -s -−1 -, about 500 times larger than the laminar viscosity. Note that the differential velocity at the -CMB of the Moon is closer to 3 cm/s [Yoder , 1981; Williams et al., 2001], more than 10 times -larger than our estimate for Mercury above. Since the effective turbulent coupling constant Kcmb -is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mer￾cury should be smaller. Thus, ν ≈ 5×10−4 m2 -s -−1 gives a conservative upper bound for the -possible effective turbulent viscosity that can be expected for Mercury. -Figure 5 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for different choices -of effective viscosities. For ν = 10−5 m2 -s -−1 -, viscous coupling is too weak to affect ˜εm and -m˜ f and they are essentially unchanged from the solutions shown in Figure 4. With increasing -ν, the stronger viscous coupling between the core and the mantle reduces their differential ve￾locity, and ˜mf is reduced. With the reduced differential velocity at the CMB, the prediction -of ˜εm gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB -viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜εm -and ˜mf are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the -fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent vis￾cosity that we have identified above (i.e ν ≈ 5 × 10−4 m2 -s -−1 -), the influence of viscous cou- -–21– -Confidential manuscript submitted to JGR-Planets -εm -εg -mf -ns -2.038 -2.040 -2.042 -2.044 -2.046 -2.048 -2.050 -Obliquity angle (arcmin) -0 200 400 600 800 1000 1200 1400 -Inner core radius (km) -0.0 -0.5 -1.0 -1.5 -2.0 -2.5 -3.0 -3.5 -4.0 -4.5 -Obliquity angle (arcmin) -0 200 400 600 800 1000 1200 1400 -Inner core radius (km) -kinematic viscosity: 0.01 m2 s-1 0.00001 m2 0.0001 m2 s-1 0.0005 m2 s-1 0.001 m2 s-1 s-1 -a b -εm for a rigid planet -Figure 5. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf -(solid lines) and ˜ns (dashed lines) as a function of inner core radius and for different choices of kinematic -viscosity (color in legend). -pling on ˜εm remains modest, reducing its amplitude by a maximum of approximately 0.0015 -arcmin. -The inclusion of viscous coupling at the ICB can lead to a substantial change in inner core -tilt. A larger viscosity leads to stronger viscous coupling and to a closer alignment of the in￾ner core with the fluid core spin axis. The viscous coupling strength is inversely proportional -to rs, so a larger viscosity results in a larger inner core radius at which viscous coupling is of -a similar magnitude to gravitational coupling. Taking again an upper bound of ν = 5×10−4 -m2 -s -−1 -, Figure 5 indicates that ˜ns may be 1 arcmin or larger only if the inner core radius is -smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravi￾tational coupling is much larger than viscous coupling, and the inner core tilt is limited to a -fraction of 1 arcmin. -The larger inner core tilt observed with increasing effective viscosity results in a larger -offset between the obliquity of the principal moment of inertia ˜εg and that of the mantle ˜εm, -though it remains limited. For the upper bound of ν = 5 × 10−4 m2 -s -−1 -, and for rs = 1500 -km, the difference between ˜εg and ˜εm is limited to 0.0013 arcmin. -The conclusion that emerges from Figure 5 is that the larger the inner core is, the smaller -the misalignments of both the fluid core and inner core are with respect to the mantle. This -implies that the larger the inner core is, the more we approach a planet precessing as a rigid -body, although the misalignment of the spin axis of the fluid core remains important, approx￾imately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜εm, ˜mf -and ˜ns change with inner core size would certainly be different for a turbulent model of viscous -coupling. But the general conclusion remains that the addition of viscous coupling at the CMB -and ICB does not significantly modify the Cassini state equilibrium angle of the mantle. +Confidential manuscript submitted to JGR-Planets +The above parameterizations are valid only under the assumption that the flow in the boundary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds +number Re = rf∆uf /ν, associated with the differential velocity ∆uf = rfΩom˜ f at the CMB. +For rf = 2000 km, and taking ˜mf = 4 arcmin ≈ 0.001 rad from the results in the previous +section, we get ∆uf ∼ 2 mm/s and Re ∼ 6 × 109. Such a large Reynolds number indicates +that the viscous friction between the fluid core and mantle should induce turbulent flows, as +is the case for the Cassini state of the Moon [Yoder , 1981; Williams et al., 2001; C´ebron et al., +2019]. For a boundary layer that involves turbulent flows, the viscous torque should be independent of the fluid viscosity and proportional to the square of the differential velocity. The +coupling constant Kcmb should be in the form +Kcmb = fcmb + + m˜ f + + + +0.195 − 1.976i + +, (45) +where fcmb is a numerical factor that depends among other things on surface roughness. Incorporating a viscous coupling of this form in our rotational model is more challenging not only +because fcmb is not known but also because the viscous torque is no longer linear in ˜mf . One +strategy is to find solutions through an iterative process. The simpler alternative strategy that +we adopt is to use the laminar formulas of Equation (44) but with the understanding that ν +represents an effective turbulent viscosity. +To give an estimate of an appropriate turbulent value for ν, we turn to the Cassini state +of the Moon. A measure of the viscous dissipation at the CMB of the Moon has been obtained +by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR) +[Williams et al., 2001, 2014; Williams and Boggs, 2015]. Viscous dissipation is reported in terms +of a coupling parameter K and a recent estimate is K/CL = (1.41±0.34)×10−8 day−1[Williams +and Boggs, 2015], where CL is the lunar polar moment of inertia. The connection between K +and Kcmb is + + + +Im[Kcmb] + + + = +K +CL +CL +CfL +1 +ΩL +, (46) +where CfL is the moment of inertia of the lunar core and ΩL = 2.66 × 10−6s +−1 +the lunar +rotation rate. With CfL/CL ∼ 7 × 10−4[e.g. Williams et al., 2014], this gives |Im[Kcmb]| ∼ +9×10−5. In order to match this amplitude in Equation (44a), with lunar parameters and assuming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10−4 m2 +s +−1 +, about 500 times larger than the laminar viscosity. Note that the differential velocity at the +CMB of the Moon is closer to 3 cm/s [Yoder , 1981; Williams et al., 2001], more than 10 times +larger than our estimate for Mercury above. Since the effective turbulent coupling constant Kcmb +is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mercury should be smaller. Thus, ν ≈ 5×10−4 m2 +s +−1 gives a conservative upper bound for the +possible effective turbulent viscosity that can be expected for Mercury. +Figure 5 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for different choices +of effective viscosities. For ν = 10−5 m2s +−1 +, viscous coupling is too weak to affect ˜εm and +m˜ f and they are essentially unchanged from the solutions shown in Figure 4. With increasing +ν, the stronger viscous coupling between the core and the mantle reduces their differential velocity, and ˜mf is reduced. With the reduced differential velocity at the CMB, the prediction +of ˜εm gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB +viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜εm +and ˜mf are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the +fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent viscosity that we have identified above (i.e ν ≈ 5 × 10−4 m2 +s +−1 +), the influence of viscous cou–21– + +Confidential manuscript submitted to JGR-Planets +εm +εg +mf +ns +2.038 +2.040 +2.042 +2.044 +2.046 +2.048 +2.050 +Obliquity angle (arcmin) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +0.0 +0.5 +1.0 +1.5 +2.0 +2.5 +3.0 +3.5 +4.0 +4.5 +Obliquity angle (arcmin) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +kinematic viscosity: 0.01 m2 s-1 0.00001 m2 s 0.0001 m2 -1 s 0.0005 m2 -1 s 0.001 m2 -1 s-1 +a b +εm for a rigid planet +Figure 5. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf +(solid lines) and ˜ns (dashed lines) as a function of inner core radius and for different choices of kinematic +viscosity (color in legend). +pling on ˜εm remains modest, reducing its amplitude by a maximum of approximately 0.0015 +arcmin. +The inclusion of viscous coupling at the ICB can lead to a substantial change in inner core +tilt. A larger viscosity leads to stronger viscous coupling and to a closer alignment of the inner core with the fluid core spin axis. The viscous coupling strength is inversely proportional +to rs, so a larger viscosity results in a larger inner core radius at which viscous coupling is of +a similar magnitude to gravitational coupling. Taking again an upper bound of ν = 5×10−4 +m2s +−1 +, Figure 5 indicates that ˜ns may be 1 arcmin or larger only if the inner core radius is +smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravitational coupling is much larger than viscous coupling, and the inner core tilt is limited to a +fraction of 1 arcmin. +The larger inner core tilt observed with increasing effective viscosity results in a larger +offset between the obliquity of the principal moment of inertia ˜εg and that of the mantle ˜εm, +though it remains limited. For the upper bound of ν = 5 × 10−4 m2s +−1 +, and for rs = 1500 +km, the difference between ˜εg and ˜εm is limited to 0.0013 arcmin. +The conclusion that emerges from Figure 5 is that the larger the inner core is, the smaller +the misalignments of both the fluid core and inner core are with respect to the mantle. This +implies that the larger the inner core is, the more we approach a planet precessing as a rigid +body, although the misalignment of the spin axis of the fluid core remains important, approximately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜εm, ˜mf +and ˜ns change with inner core size would certainly be different for a turbulent model of viscous +coupling. But the general conclusion remains that the addition of viscous coupling at the CMB +and ICB does not significantly modify the Cassini state equilibrium angle of the mantle. –22– -Confidential manuscript submitted to JGR-Planets -3.4 Electromagnetic coupling -Let us now turn to electromagnetic (EM) coupling. To focus on its role in the equilibrium -Cassini state, we set the viscous coupling back to zero. Because magnetic field lines tend to re￾main attached to electrically conducting materials, a differential tangential motion between two -electrically conducting regions stretches existing magnetic field lines that thread their interface. -This induces a secondary magnetic field (or equivalently, an electrical current) and an associ￾ated tangential EM stress resisting the differential motion. EM coupling at the CMB and ICB -acts then in a similar way to viscous coupling, and this ’magnetic friction’ depends on the strength -of the radial magnetic field Br and the electrical conductivity σ on either side of the bound￾ary [Rochester , 1960, 1962, 1968]. -The parametrization of EM coupling in terms of the coupling constants Kcmb and Kicb -has been developed in a few studies [e.g. Buffett, 1992; Buffett et al., 2002; Dumberry and Koot, -2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given -by Br = -√ -3 +Confidential manuscript submitted to JGR-Planets +3.4 Electromagnetic coupling +Let us now turn to electromagnetic (EM) coupling. To focus on its role in the equilibrium +Cassini state, we set the viscous coupling back to zero. Because magnetic field lines tend to remain attached to electrically conducting materials, a differential tangential motion between two +electrically conducting regions stretches existing magnetic field lines that thread their interface. +This induces a secondary magnetic field (or equivalently, an electrical current) and an associated tangential EM stress resisting the differential motion. EM coupling at the CMB and ICB +acts then in a similar way to viscous coupling, and this ’magnetic friction’ depends on the strength +of the radial magnetic field Br and the electrical conductivity σ on either side of the boundary [Rochester , 1960, 1962, 1968]. +The parametrization of EM coupling in terms of the coupling constants Kcmb and Kicb +has been developed in a few studies [e.g. Buffett, 1992; Buffett et al., 2002; Dumberry and Koot, +2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given +by Br = +√ +3 + + +Bd +r + +cos θ, where +Bd +r + +is the r.m.s. strength of the field, the coupling constant +Kcmb can be written is the form +Kcmb = 3(1 − i)Fcmb +B +d +r + 2 +, (47) +where +Fcmb = +1 +Ωoρf rf + +1 +σmδm ++ +1 +σf δf +−1 +, (48) +and where σm, δm = +p +2/(σmµΩo) and σf , δf = +p +2/(σfµΩo) are the electrical conductivities and magnetic skin depths in the mantle and fluid core, respectively, with µ = 4π ×10−7 +N A−2the magnetic permeability of free space. The r.m.s. field strength +Bd +r + +is connected to +the Gauss coefficient g +0 +1 of the surface magnetic field by - -Bd -r - -cos θ, where - -Bd -r - -is the r.m.s. strength of the field, the coupling constant -Kcmb can be written is the form -Kcmb = 3(1 − i)Fcmb - -B -d -r - 2 -, (47) -where -Fcmb = -1 -Ωoρf rf - -1 -σmδm -+ -1 -σf δf -−1 -, (48) -and where σm, δm = -p -2/(σmµΩo) and σf , δf = -p -2/(σfµΩo) are the electrical conductivi￾ties and magnetic skin depths in the mantle and fluid core, respectively, with µ = 4π ×10−7 -N A−2 -the magnetic permeability of free space. The r.m.s. field strength - -Bd -r - -is connected to -the Gauss coefficient g -0 -1 of the surface magnetic field by - -B -d -r - -= -2 -√ -3 - -R -rf -3 - - g -0 -1 - - -. (49) -We can readily build an estimate of the amplitude of Kcmb. The electrical conductivity -of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding -to the CMB of Mercury is in the range of σm ∼ 0.01 − 1 S m−1 -[Constable, 2015]. In con￾trast, the electrical conductivity of Fe in planetary cores is expected to be close σf ∼ 106 S -m−1 -[Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σmδm) -−1  (σf δf ) -−1 -. Tak￾ing σm = 1 S m−1 -, - - g -0 -1 - - = 190 nT for Mercury’s dipole field [Anderson et al., 2012], rf = -2000 km, ρf = 7000 kg m−3 -, this gives Kcmb ≈ (3.1 × 10−11)·(1 − i). To put this amplitude -in perspective, taking a molecular viscosity of ν = 10−6 m2 -s -−1 -in Equation (44a) gives a vis￾cous coupling constant of Kcmb ≈ (6.0 × 10−7 -)·(0.195 − 1.976i). Hence, EM coupling at the -CMB is much weaker than viscous coupling, even if we include other spherical harmonic com￾ponents of the radial magnetic field. -EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by -CMB cavities [Buffett, 2010; Glane and Buffett, 2018], in which case the effective σm could be -closer to σf . Likewise, σm can be increased if a more electrically conducting layer has formed -at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction -of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even -in the extreme case of σm = σf = 106 S m−1 -, Kcmb ≈ (1.6 × 10−8 -) · (1 − i), which remains +B +d +r + += +2 +√ +3 + +R +rf +3 + + g +0 +1 + + +. (49) +We can readily build an estimate of the amplitude of Kcmb. The electrical conductivity +of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding +to the CMB of Mercury is in the range of σm ∼ 0.01 − 1 S m−1[Constable, 2015]. In contrast, the electrical conductivity of Fe in planetary cores is expected to be close σf ∼ 106 S +m−1[Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σmδm) +−1  (σf δf )−1 +. Taking σm = 1 S m−1 +, + + g +0 +1 + + = 190 nT for Mercury’s dipole field [Anderson et al., 2012], rf = +2000 km, ρf = 7000 kg m−3, this gives Kcmb ≈ (3.1 × 10−11)·(1 − i). To put this amplitude +in perspective, taking a molecular viscosity of ν = 10−6 m2s +−1 +in Equation (44a) gives a viscous coupling constant of Kcmb ≈ (6.0 × 10−7 +)·(0.195 − 1.976i). Hence, EM coupling at the +CMB is much weaker than viscous coupling, even if we include other spherical harmonic components of the radial magnetic field. +EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by +CMB cavities [Buffett, 2010; Glane and Buffett, 2018], in which case the effective σm could be +closer to σf . Likewise, σm can be increased if a more electrically conducting layer has formed +at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction +of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even +in the extreme case of σm = σf = 106 S m−1, Kcmb ≈ (1.6 × 10−8) · (1 − i), which remains –23– -Confidential manuscript submitted to JGR-Planets -smaller by a factor ∼ 60 than the smallest possible viscous coupling constant. Viscous forces -dominate the tangential stress on the CMB of Mercury. -At the ICB, because we can expect the electrical conductivity in both the solid inner core -and fluid core to be similar, and because the radial magnetic field is likely much stronger, EM -coupling can be much larger and dominate viscous coupling. We assume that the magnetic field -morphology at the ICB is dominantly comprised of small spatial scales for example as predicted -by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in -terms of an equivalent uniform radial magnetic field hBri capturing its r.m.s. strength [Buf￾fett et al., 2002; Dumberry and Koot, 2012]. Assuming an electrical conductivity σ equal in the -fluid and solid core, the coupling constant Kicb can be written in the form -Kicb = -5 -4 -(1 − i)Ficb hBri -2 -, (50) -where -Ficb = -σδ -Ωoρsrs -, (51) -and where δ = -p -2/(σµΩo) is the magnetic skin depth. As Ficb is inversely proportional to -rs, Kicb is inversely proportional to inner core size. Note that computing the EM coupling based -on the r.m.s. strength hBri rather than a true field morphology tends to overestimate the strength -of the coupling [Koot and Dumberry, 2013]. However, since the strength of the radial magnetic -field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are -absorbed in the range of possible hBri values. -The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al., -2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected. -When hBri is sufficiently large, this is no longer the case. EM coupling then enters a ’strong -field’ regime [Buffett et al., 2002; Dumberry and Koot, 2012; Koot and Dumberry, 2013] in which -Kicb increases linearly with hBri instead of quadratically. A good approximation of Kicb cal￾culated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012], -KE -icb = (0.175 − i0.138)hBri , (52) -where hBri is in units of Tesla. The superscript E emphasizes that the numerical factors are -appropriate for the parameter values adopted for Earth in the computation of Dumberry and -Koot [2012]. To adapt these numerical factors to Mercury, we write, -Kicb = (0.175 − i0.138)Ficb -F E -icb -hBri , (53) -where F -E -icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumb￾erry and Koot [2012]. These are Ωo = 7.292 × 10−5 -s -−1 -, ρs = 12846 kg m−3 -, rs = 1221.5 -km, σ = 5 × 105 S m−1 -, which gives F -E -icb = 90.36 T−2 -. -To compute Ficb, we assume an electrical conductivity of σ = 106 S m−1 -in the core of -Mercury [e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and -strong field regime occurs when hBri ≈ 1.53 mT for the real part of Kicb. hBri at the ICB -of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geom￾etry inside the core could be dominated by small length scales, yet only the weaker lower har￾monics of the field would penetrate through a thermally stratified layer in the upper region of +Confidential manuscript submitted to JGR-Planets +smaller by a factor ∼ 60 than the smallest possible viscous coupling constant. Viscous forces +dominate the tangential stress on the CMB of Mercury. +At the ICB, because we can expect the electrical conductivity in both the solid inner core +and fluid core to be similar, and because the radial magnetic field is likely much stronger, EM +coupling can be much larger and dominate viscous coupling. We assume that the magnetic field +morphology at the ICB is dominantly comprised of small spatial scales for example as predicted +by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in +terms of an equivalent uniform radial magnetic field hBri capturing its r.m.s. strength [Buffett et al., 2002; Dumberry and Koot, 2012]. Assuming an electrical conductivity σ equal in the +fluid and solid core, the coupling constant Kicb can be written in the form +Kicb = +5 +4 +(1 − i)Ficb hBri +2 +, (50) +where +Ficb = +σδ +Ωoρsrs +, (51) +and where δ = +p +2/(σµΩo) is the magnetic skin depth. As Ficb is inversely proportional to +rs, Kicb is inversely proportional to inner core size. Note that computing the EM coupling based +on the r.m.s. strength hBri rather than a true field morphology tends to overestimate the strength +of the coupling [Koot and Dumberry, 2013]. However, since the strength of the radial magnetic +field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are +absorbed in the range of possible hBri values. +The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al., +2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected. +When hBri is sufficiently large, this is no longer the case. EM coupling then enters a ’strong +field’ regime [Buffett et al., 2002; Dumberry and Koot, 2012; Koot and Dumberry, 2013] in which +Kicb increases linearly with hBri instead of quadratically. A good approximation of Kicb calculated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012], +KE +icb = (0.175 − i0.138)hBri , (52) +where hBri is in units of Tesla. The superscript E emphasizes that the numerical factors are +appropriate for the parameter values adopted for Earth in the computation of Dumberry and +Koot [2012]. To adapt these numerical factors to Mercury, we write, +Kicb = (0.175 − i0.138)Ficb +F E +icb +hBri , (53) +where F +E +icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumberry and Koot [2012]. These are Ωo = 7.292 × 10−5 +s +−1 +, ρs = 12846 kg m−3, rs = 1221.5 +km, σ = 5 × 105 S m−1, which gives F +E +icb = 90.36 T−2 +. +To compute Ficb, we assume an electrical conductivity of σ = 106 S m−1in the core of +Mercury [e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and +strong field regime occurs when hBri ≈ 1.53 mT for the real part of Kicb. hBri at the ICB +of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geometry inside the core could be dominated by small length scales, yet only the weaker lower harmonics of the field would penetrate through a thermally stratified layer in the upper region of –24– -Confidential manuscript submitted to JGR-Planets -the fluid core and reach the surface. If so, the field strength inside the core can exceed the sur￾face field strength by a factor 1000. Taking a surface field strength equal to ∼ 300 nT [e.g An￾derson et al., 2012], hBri at the ICB could be as large as 0.3 mT, corresponding to approxi￾mately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mer￾cury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of -Mercury remains in the weak field regime. -Figure 6 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for different choices -of hBri. The larger hBri is, the stronger is the EM coupling at the ICB, and the smaller is the -differential rotation between the fluid core and inner core. The inner core and fluid core are vir￾tually locked into a common precession motion when hBri > 0.3 mT. Further increasing hBri -above 1 mT does not change the solution as EM coupling already dominates all other torques -on the inner core. This is the case even when EM coupling transitions into the strong field regime. -EM coupling at the CMB is included in these calculations, with σm = 1 S m−1 and - - g -0 -1 - - = -190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core -we retrieved the solutions of ˜εm and ˜mf shown in Figure 4. -As the inner core radius is increased, both ˜εm and ˜mf get smaller, as it was the case with -viscous coupling alone, although the addition of EM coupling lead to more substantial changes. -The inner core needs to be larger than approximately 500 km for changes in the Cassini state -equilibrium to be noticeable. It is important to point out that ˜mf is reduced not because of -EM coupling at the CMB, but rather from the combination of EM coupling at the ICB, which -pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the -inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the -greater is the reduction in ˜εm and ˜mf . -When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are -locked into a common precession motion, a good approximation of ˜εm is given by the same pre￾diction as Equations (39-40) involving the effective moment of inertia C -0 -, except χ is now given -by -χ = -A¯ -cΩp cos I − A¯ -sΩoα3φs -A¯ -fΩo(ef + Kcmb) + A¯ -sΩoesα3αg − A¯ -cΩp cos I -. (54) -For a small inner core, A¯ -cΩp cos I > A¯ -sΩoα3φs and χ is positive. Because A¯ -sΩoα3φs increases -with inner core size, χ gets smaller, and so do C -0 and ˜εm. The mantle obliquity drops from 2.049 -arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015 -arcmin. For an inner core larger than ≈ 1000 km, A¯ -cΩp cos I < A¯ -sΩoα3φs, so χ becomes neg￾ative, C -0 becomes smaller than the moment of inertia of a rigid Mercury C, and ˜εm becomes -smaller than the prediction based on a rigid planet. -The larger the inner core is, the smaller are the misalignments of the fluid and solid cores -with respect to the mantle. Hence, the general conclusion we reached for viscous coupling alone -is not altered with the addition of EM coupling but further strengthened; the larger the inner -core is, the closer we approach a planet precessing as a rigid body. This is best revealed by the -obliquity of the gravity field ˜εg which, for a large inner core, asymptotically approaches the obliq￾uity expected for a rigid planet. Note that with strong EM coupling at the ICB, the offset be￾tween ˜εm and ˜εg can be as large as 0.008 arcmin for a large inner core. -3.5 Fixed inner core density versus fixed ICB density contrast -Coupling models when viscous and EM stresses are both present have been presented in -Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of our results, +Confidential manuscript submitted to JGR-Planets +the fluid core and reach the surface. If so, the field strength inside the core can exceed the surface field strength by a factor 1000. Taking a surface field strength equal to ∼ 300 nT [e.g Anderson et al., 2012], hBri at the ICB could be as large as 0.3 mT, corresponding to approximately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mercury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of +Mercury remains in the weak field regime. +Figure 6 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for different choices +of hBri. The larger hBri is, the stronger is the EM coupling at the ICB, and the smaller is the +differential rotation between the fluid core and inner core. The inner core and fluid core are virtually locked into a common precession motion when hBri > 0.3 mT. Further increasing hBri +above 1 mT does not change the solution as EM coupling already dominates all other torques +on the inner core. This is the case even when EM coupling transitions into the strong field regime. +EM coupling at the CMB is included in these calculations, with σm = 1 S m−1 and + + g +0 +1 + + = +190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core +we retrieved the solutions of ˜εm and ˜mf shown in Figure 4. +As the inner core radius is increased, both ˜εm and ˜mf get smaller, as it was the case with +viscous coupling alone, although the addition of EM coupling lead to more substantial changes. +The inner core needs to be larger than approximately 500 km for changes in the Cassini state +equilibrium to be noticeable. It is important to point out that ˜mf is reduced not because of +EM coupling at the CMB, but rather from the combination of EM coupling at the ICB, which +pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the +inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the +greater is the reduction in ˜εm and ˜mf . +When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are +locked into a common precession motion, a good approximation of ˜εm is given by the same prediction as Equations (39-40) involving the effective moment of inertia C +0 +, except χ is now given +by +χ = +A¯ +cΩp cos I − A¯sΩoα3φs +A¯ +fΩo(ef + Kcmb) + A¯sΩoesα3αg − A¯cΩp cos I +. (54) +For a small inner core, A¯ +cΩp cos I > A¯sΩoα3φs and χ is positive. Because A¯sΩoα3φs increases +with inner core size, χ gets smaller, and so do C +0 and ˜εm. The mantle obliquity drops from 2.049 +arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015 +arcmin. For an inner core larger than ≈ 1000 km, A¯ +cΩp cos I < A¯sΩoα3φs, so χ becomes negative, C +0 becomes smaller than the moment of inertia of a rigid Mercury C, and ˜εm becomes +smaller than the prediction based on a rigid planet. +The larger the inner core is, the smaller are the misalignments of the fluid and solid cores +with respect to the mantle. Hence, the general conclusion we reached for viscous coupling alone +is not altered with the addition of EM coupling but further strengthened; the larger the inner +core is, the closer we approach a planet precessing as a rigid body. This is best revealed by the +obliquity of the gravity field ˜εg which, for a large inner core, asymptotically approaches the obliquity expected for a rigid planet. Note that with strong EM coupling at the ICB, the offset between ˜εm and ˜εg can be as large as 0.008 arcmin for a large inner core. +3.5 Fixed inner core density versus fixed ICB density contrast +Coupling models when viscous and EM stresses are both present have been presented in +Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of our results, –25– -Confidential manuscript submitted to JGR-Planets -2.032 -2.034 -2.036 -2.038 -2.040 -2.042 -2.044 -2.046 -2.048 -2.050 -Obliquity angle (arcmin) -0 200 400 600 800 1000 1200 1400 -Inner core radius (km) -0.0 -0.5 -1.0 -1.5 -2.0 -2.5 -3.0 -3.5 -4.0 -4.5 -Obliquity angle (arcmin) -0 200 400 600 800 1000 1200 1400 -Inner core radius (km) -Br at ICB: 1 mT 0.3 mT 0.1 mT 0.03 mT 0.01 mT -εm -εg -mf -ns -a b -εm for a rigid planet -Figure 6. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf -(solid lines) and ˜ns (dashed lines) as a function of inner core radius and for different choices of Br -(colour in legend). -for the Cassini state equilibrium of Mercury, the tangential stress at the CMB is dominated by -viscous forces, and that at the ICB should be dominated by EM forces. To simplify, we con￾sider a model where Kcmb is purely from viscous coupling and Kicb purely from EM coupling. -We choose an effective viscosity at the CMB of ν = 10−4 m2 -s -−1 -, which we believe to be a -representative value given the comparison with the Moon (see section 3.3). We take a radial -field strength at the ICB of hBri = 0.3 mT, approximately the field strength expected under -the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representa￾tive’ coupling model, although the uncertainty on ν and hBri obviously remains high. -Figure 7 shows how ˜εm, ˜mf and ˜ns vary with inner core radius for the ’representative’ -coupling model (black lines) under the fixed inner core density scenario that we have used in -sections 3.2, 3.3 and 3.4. Figure 7 also shows how the results change when, for the same rep￾resentative coupling model, we adopt instead a fixed density contrast between the fluid and solid -cores and for different choices of α3 (coloured lines). For a relatively high density contrast (α3 = -0.2), the results are qualitatively similar to the fixed inner core density scenario. For a smaller -α3, the point at which the orientation of the co-precessing fluid and inner cores begins to be -pulled into an alignment with the mantle is pushed to a larger inner core radius. However, the -general behaviour of ˜εm, ˜mf and ˜ns as functions of inner core radius is unchanged. Hence, all -our results in the previous three sections would be qualitatively similar under a fixed density -contrast scenario. A smaller density contrast at the ICB only implies that a larger inner core -is required in order to produce an equivalent change in the Cassini state equilibrium. -4 Discussion -The study of Peale et al. [2016] also presented predictions of the obliquities of the man￾tle, fluid core and inner core associated with the equilibrium Cassini state of Mercury. Their -model included the tangential viscous stress at the ICB and CMB, but not the EM stress. Their -Table 1 gives the obliquities of the mantle, fluid core and inner core, denoted respectively as +Confidential manuscript submitted to JGR-Planets +2.032 +2.034 +2.036 +2.038 +2.040 +2.042 +2.044 +2.046 +2.048 +2.050 +Obliquity angle (arcmin) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +0.0 +0.5 +1.0 +1.5 +2.0 +2.5 +3.0 +3.5 +4.0 +4.5 +Obliquity angle (arcmin) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +Br at ICB: 1 mT 0.3 mT 0.1 mT 0.03 mT 0.01 mT +εm +εg +mf +ns +a b +εm for a rigid planet +Figure 6. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf +(solid lines) and ˜ns (dashed lines) as a function of inner core radius and for different choices of Br +(colour in legend). +for the Cassini state equilibrium of Mercury, the tangential stress at the CMB is dominated by +viscous forces, and that at the ICB should be dominated by EM forces. To simplify, we consider a model where Kcmb is purely from viscous coupling and Kicb purely from EM coupling. +We choose an effective viscosity at the CMB of ν = 10−4 m2s +−1 +, which we believe to be a +representative value given the comparison with the Moon (see section 3.3). We take a radial +field strength at the ICB of hBri = 0.3 mT, approximately the field strength expected under +the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representative’ coupling model, although the uncertainty on ν and hBri obviously remains high. +Figure 7 shows how ˜εm, ˜mf and ˜ns vary with inner core radius for the ’representative’ +coupling model (black lines) under the fixed inner core density scenario that we have used in +sections 3.2, 3.3 and 3.4. Figure 7 also shows how the results change when, for the same representative coupling model, we adopt instead a fixed density contrast between the fluid and solid +cores and for different choices of α3 (coloured lines). For a relatively high density contrast (α3 = +0.2), the results are qualitatively similar to the fixed inner core density scenario. For a smaller +α3, the point at which the orientation of the co-precessing fluid and inner cores begins to be +pulled into an alignment with the mantle is pushed to a larger inner core radius. However, the +general behaviour of ˜εm, ˜mf and ˜ns as functions of inner core radius is unchanged. Hence, all +our results in the previous three sections would be qualitatively similar under a fixed density +contrast scenario. A smaller density contrast at the ICB only implies that a larger inner core +is required in order to produce an equivalent change in the Cassini state equilibrium. +4 Discussion +The study of Peale et al. [2016] also presented predictions of the obliquities of the mantle, fluid core and inner core associated with the equilibrium Cassini state of Mercury. Their +model included the tangential viscous stress at the ICB and CMB, but not the EM stress. Their +Table 1 gives the obliquities of the mantle, fluid core and inner core, denoted respectively as –26– -Confidential manuscript submitted to JGR-Planets -2.032 -2.034 -2.036 -2.038 -2.040 -2.042 -2.044 -2.046 -2.048 -2.050 -Obliquity angle (arcmin) -0 200 400 600 800 1000 1200 1400 -Inner core radius (km) -0.0 -0.5 -1.0 -1.5 -2.0 -2.5 -3.0 -3.5 -4.0 -4.5 -Obliquity angle (arcmin) -0 200 400 600 800 1000 1200 1400 -Inner core radius (km) -εm for a rigid planet -a b -ρs = 8800 kg m α3: 0.20 0.15 0.10 0.05 0.01 -3 -mf -ns -εm -εg -Figure 7. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf -(solid lines) and ˜ns (dashed lines) as a function of inner core radius, for a fixed inner core density of -8800 kg m−3 -(black lines) and for different choices of α3 (coloured lines). -i -0 -m, i -0 -f -and i -0 -s -; these represent the obliquities with respect to the orbital plane and are connected -to our variables by: i -0 -m = ˜εm, i -0 -f = ˜εm + ˜m+ ˜mf ≈ ε˜m + ˜mf and i -0 -s = ˜εm + ˜ns. To summarize -their results, i -0 -f -and i -0 -s vary substantially for different inner core sizes, are always of compara￾ble amplitude, and i -0 -s -is always larger than i -0 -f -. Furthermore, they find that as the inner core -size is increased, the mantle obliquity i -0 -m gets progressively larger and is displaced further away -from its expected orientation based of a rigid planet (see their Figure 6). The change in i -0 -m they -obtain between a case with no inner core and an inner core radius equal to 0.6 times the plan￾etary radius (≈ 1463 km, close to the maximum inner core size of 1500 km we have considered), -is approximately an increase of 5 × 10−5 -rad = 0.17 arcmin. This also corresponds approxi￾mately to the deviation of the obliquity with respect to that of a rigid planet. -When only viscous stress is included in our model (section 3.3), our results are substan￾tially different. As illustrated in Figure 4, we find instead that the obliquity of the fluid core -gets smaller with inner core size and that the change is very modest. In contrast with the re￾sults of Peale et al. [2016], we find that the inner core obliquity is typically smaller than that -of the fluid core, except when the inner core is very small or when the effective viscosity is un￾reasonably large. We also find that as the inner core size is increased, the mantle obliquity gets -smaller, opposite to the results of Peale et al. [2016], and that the changes remain small, at most -of the order of 0.005 arcmin. A part of the difference is due to the different viscous coupling -model that we use. But even when we adopt their model parameters and use their viscosity model, -we were not able to reproduce their results. -In the absence of viscous and EM coupling, the strong gravitational torque exerted on the -inner core by the mantle should prevent any large misalignment between the two. This is cap￾tured by the period of the FICN, which is of the order of 100 yr, much shorter than the forc￾ing period of 325 kyr. Viscous and/or EM coupling at the ICB can counteract the gravitational -torque (and alter the period of the FICN), but only for a small inner core. The ratio of the viscous￾EM torque to the gravitational torque decreases with inner core size, so a large inner core should -be more strongly aligned with the mantle. The more strongly the inner core and mantle are +Confidential manuscript submitted to JGR-Planets +2.032 +2.034 +2.036 +2.038 +2.040 +2.042 +2.044 +2.046 +2.048 +2.050 +Obliquity angle (arcmin) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +0.0 +0.5 +1.0 +1.5 +2.0 +2.5 +3.0 +3.5 +4.0 +4.5 +Obliquity angle (arcmin) +0 200 400 600 800 1000 1200 1400 +Inner core radius (km) +εm for a rigid planet +a b +α3 ρs = 8800 kg m : 0.20 0.15 0.10 0.05 0.01 -3 +mf +ns +εm +εg +Figure 7. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf +(solid lines) and ˜ns (dashed lines) as a function of inner core radius, for a fixed inner core density of +8800 kg m−3(black lines) and for different choices of α3 (coloured lines). +i +0 +m, i +0 +f +and i +0 +s +; these represent the obliquities with respect to the orbital plane and are connected +to our variables by: i +0 +m = ˜εm, i +0 +f = ˜εm + ˜m+ ˜mf ≈ ε˜m + ˜mf and i +0 +s = ˜εm + ˜ns. To summarize +their results, i +0 +f +and i +0 +s vary substantially for different inner core sizes, are always of comparable amplitude, and i +0 +s +is always larger than i +0 +f +. Furthermore, they find that as the inner core +size is increased, the mantle obliquity i +0 +m gets progressively larger and is displaced further away +from its expected orientation based of a rigid planet (see their Figure 6). The change in i +0 +m they +obtain between a case with no inner core and an inner core radius equal to 0.6 times the planetary radius (≈ 1463 km, close to the maximum inner core size of 1500 km we have considered), +is approximately an increase of 5 × 10−5rad = 0.17 arcmin. This also corresponds approximately to the deviation of the obliquity with respect to that of a rigid planet. +When only viscous stress is included in our model (section 3.3), our results are substantially different. As illustrated in Figure 4, we find instead that the obliquity of the fluid core +gets smaller with inner core size and that the change is very modest. In contrast with the results of Peale et al. [2016], we find that the inner core obliquity is typically smaller than that +of the fluid core, except when the inner core is very small or when the effective viscosity is unreasonably large. We also find that as the inner core size is increased, the mantle obliquity gets +smaller, opposite to the results of Peale et al. [2016], and that the changes remain small, at most +of the order of 0.005 arcmin. A part of the difference is due to the different viscous coupling +model that we use. But even when we adopt their model parameters and use their viscosity model, +we were not able to reproduce their results. +In the absence of viscous and EM coupling, the strong gravitational torque exerted on the +inner core by the mantle should prevent any large misalignment between the two. This is captured by the period of the FICN, which is of the order of 100 yr, much shorter than the forcing period of 325 kyr. Viscous and/or EM coupling at the ICB can counteract the gravitational +torque (and alter the period of the FICN), but only for a small inner core. The ratio of the viscousEM torque to the gravitational torque decreases with inner core size, so a large inner core should +be more strongly aligned with the mantle. The more strongly the inner core and mantle are –27– -Confidential manuscript submitted to JGR-Planets -gravitationally locked together, the more they behave as a single rigid body in response to the -external torque from the Sun. We expect then that the obliquity of the mantle should be brought -closer to that of a rigid planet when the inner core is larger. Hence, we find puzzling the re￾sults of Peale et al. [2016], which suggest the opposite. -We showed that EM coupling is most likely larger than viscous coupling at the ICB, even -though our knowledge of the radial magnetic field strength inside Mercury (on which EM cou￾pling depends) remains poor. If the magnetic field strength at the ICB is above 0.3 mT, EM -coupling is sufficiently strong to bring the fluid and solid cores into a locked procession motion. -The larger the inner core is, the more this co-precessing core is forced into an alignment with -the mantle because of the mantle gravitational torque on the inner core. As a result, the larger -the inner core is, the closer we approach a situation resembling a whole planet precessing as -a rigid body. The addition of EM coupling at the ICB does not change the overall picture that -we observe with viscous coupling alone; the mantle obliquity decreases with inner core size. The -amplitude of the decrease can be as large as 0.015 arcmin, 3 times larger than for viscous cou￾pling alone; this remains a factor 10 smaller than the changes suggested in Peale et al. [2016], -and again, importantly, in the reverse direction. -Our results suggest then that the presence and size of an inner core leads to only mod￾est changes of the mantle obliquity εm compared to the obliquity predicted on the basis of an -entirely rigid planet (ε -r -m). Let us denote this difference as ∆εm = εm−ε -r -m. The largest ∆εm -occurs for a small or no inner core, and is ∆εm ≈ 0.01 arcmin. This difference is decreased -as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM -coupling and large density contrast at the ICB, ∆εm can be negative, but its absolute value -remains smaller than 0.01 arcmin. -To put these results in perspective, the uncertainty in the measurement of the mantle obliq￾uity reported by Margot et al. [2012] and Stark et al. [2015a] is of the order of 0.08 arcmin, much -larger than this difference. This means that, at the current level of precision, it is not possi￾ble to distinguish the position of the mantle obliquity from the obliquity of a rigid planet. This -is consistent with the fact that the observed obliquity falls close to that expected from a rigid -planet. But it also implies that the observed obliquity cannot be used to place constraints on -the inner core size. -Nevertheless, our results show that the presence of a fluid core and inner core affect the -resulting mantle obliquity by as much as 0.01 arcmin. This is of the same order as the change -in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec (≈ 0.006 -arcmin) [Baland et al., 2017]. This is also of the same order as the amplitude of the nutation -motion about the mean equilibrium Cassini state forced by the precession of the pericenter, which -is approximately 0.85 arcsec (≈ 0.014 arcmin) [Baland et al., 2017]. The precision on the obliq￾uity from the upcoming BepiColombo satellite mission is expected to be ≤ 0.5 arcsec (≤ 0.008 -arcmin) [Cical`o et al., 2016]. Thus, in addition to including tidal deformation and the preces￾sion of the pericenter, a Cassini state model that includes a fluid and solid core will then be -necessary in order to properly tie Mercury’s obliquity to its interior structure. In turn, this opens -the possibility of further constraining the interior structure of Mercury on the basis of its obliq￾uity. -Obliquity measurements based on tracking topographic features reflect the orientation of -the spin-symmetry axis of the mantle (εm). Measurements based on tracking the gravity field -of Mercury reflect instead the orientation of the principal moment of the whole planet (εg). These -two orientations do not coincide when an inner core is present and is misaligned from the man￾tle. Since gravitational coupling prevents a large inner core tilt with respect to the mantle, we +Confidential manuscript submitted to JGR-Planets +gravitationally locked together, the more they behave as a single rigid body in response to the +external torque from the Sun. We expect then that the obliquity of the mantle should be brought +closer to that of a rigid planet when the inner core is larger. Hence, we find puzzling the results of Peale et al. [2016], which suggest the opposite. +We showed that EM coupling is most likely larger than viscous coupling at the ICB, even +though our knowledge of the radial magnetic field strength inside Mercury (on which EM coupling depends) remains poor. If the magnetic field strength at the ICB is above 0.3 mT, EM +coupling is sufficiently strong to bring the fluid and solid cores into a locked procession motion. +The larger the inner core is, the more this co-precessing core is forced into an alignment with +the mantle because of the mantle gravitational torque on the inner core. As a result, the larger +the inner core is, the closer we approach a situation resembling a whole planet precessing as +a rigid body. The addition of EM coupling at the ICB does not change the overall picture that +we observe with viscous coupling alone; the mantle obliquity decreases with inner core size. The +amplitude of the decrease can be as large as 0.015 arcmin, 3 times larger than for viscous coupling alone; this remains a factor 10 smaller than the changes suggested in Peale et al. [2016], +and again, importantly, in the reverse direction. +Our results suggest then that the presence and size of an inner core leads to only modest changes of the mantle obliquity εm compared to the obliquity predicted on the basis of an +entirely rigid planet (ε +r +m). Let us denote this difference as ∆εm = εm−ε +r +m. The largest ∆εm +occurs for a small or no inner core, and is ∆εm ≈ 0.01 arcmin. This difference is decreased +as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM +coupling and large density contrast at the ICB, ∆εm can be negative, but its absolute value +remains smaller than 0.01 arcmin. +To put these results in perspective, the uncertainty in the measurement of the mantle obliquity reported by Margot et al. [2012] and Stark et al. [2015a] is of the order of 0.08 arcmin, much +larger than this difference. This means that, at the current level of precision, it is not possible to distinguish the position of the mantle obliquity from the obliquity of a rigid planet. This +is consistent with the fact that the observed obliquity falls close to that expected from a rigid +planet. But it also implies that the observed obliquity cannot be used to place constraints on +the inner core size. +Nevertheless, our results show that the presence of a fluid core and inner core affect the +resulting mantle obliquity by as much as 0.01 arcmin. This is of the same order as the change +in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec (≈ 0.006 +arcmin) [Baland et al., 2017]. This is also of the same order as the amplitude of the nutation +motion about the mean equilibrium Cassini state forced by the precession of the pericenter, which +is approximately 0.85 arcsec (≈ 0.014 arcmin) [Baland et al., 2017]. The precision on the obliquity from the upcoming BepiColombo satellite mission is expected to be ≤ 0.5 arcsec (≤ 0.008 +arcmin) [Cical`o et al., 2016]. Thus, in addition to including tidal deformation and the precession of the pericenter, a Cassini state model that includes a fluid and solid core will then be +necessary in order to properly tie Mercury’s obliquity to its interior structure. In turn, this opens +the possibility of further constraining the interior structure of Mercury on the basis of its obliquity. +Obliquity measurements based on tracking topographic features reflect the orientation of +the spin-symmetry axis of the mantle (εm). Measurements based on tracking the gravity field +of Mercury reflect instead the orientation of the principal moment of the whole planet (εg). These +two orientations do not coincide when an inner core is present and is misaligned from the mantle. Since gravitational coupling prevents a large inner core tilt with respect to the mantle, we –28– -Confidential manuscript submitted to JGR-Planets -find that the misalignment ∆εg = εg − εm is limited. The maximum offset that we obtain -is approximately ∆εg ≈ 0.007 arcmin. This limited magnitude of offset is important in the -light of the recent obliquity of the gravity field estimated in Genova et al. [2019], εg = 1.968± -0.027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the -spin-symmetry axis of the mantle: εm = 2.04 ± 0.08 arcmin [Margot et al., 2012] and εm = -2.029±0.085 arcmin [Stark et al., 2015a], although all three measurements remain consistent -with one another within their error estimates. In their interpretation, Genova et al. [2019] sug￾gest that the different central value of the obliquity that they obtain (smaller by ∼ 0.07 ar￾cmin) is perhaps explained by an offset ∆εg due to the presence of a (possibly large) solid in￾ner core. However, this is one order of magnitude larger than the maximum magnitude of ∆εg -that we predict. Moreover, we predict that the obliquity of the gravity field should be larger -than that of the mantle spin axis, not smaller. Hence, at the present-day level of the precision -of the measurements, εg and εm should coincide, and their difference cannot be interpreted as -reflecting the misalignment between the polar moment of inertia of the whole planet and the -mantle spin axis. -Lastly, we have concentrated our efforts on the mutual orientations of the different spin -and symmetry axes in the Cassini plane. Dissipation at the CMB and ICB introduced by vis￾cous and EM coupling also lead to a displacement of these axes in the direction perpendicu￾lar to the Cassini plane [e.g Peale et al., 2014]. Indeed, the two measurements based on track￾ing surface topographic features from Margot et al. [2012] and Stark et al. [2015a] suggest that -the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼ 0.03 arcmin). -Although this offset is smaller than the measurement errors, so that the observed obliquity is -still consistent with no deviation away from the Cassini plane, some amount of dissipation in￾variably takes place. These measurements give then a measure of the possible amplitude of the -dissipation. One source of dissipation is from anelastic tidal deformation [Baland et al., 2017], -but viscous and EM coupling at the boundaries of the fluid core is another. Hence, the out-of￾plane component of the observed obliquity may further help to quantify and constrain the in￾terior coupling mechanisms. This will be the subject of a future study. -5 Conclusion -We have investigated how the presence of a fluid core and solid inner core affects the Cassini -state equilibrium of Mercury. Our general conclusion is that the coupling strength between Mer￾cury’s interior regions is sufficiently strong that the obliquity of the mantle spin-symmetry axis -does not deviate from that of a rigid planet by more than 0.01 arcmin. This largest offset oc￾curs for a small or no inner core. The larger the inner core is, the more it is forced into an align￾ment with the mantle because of the strong gravitational torque between the two, and the closer -we approach a situation resembling a whole planet precessing as a rigid body. The misalign￾ment between the polar moment of inertia and mantle spin axis increases with inner core size, -but is limited to approximately 0.007 arcmin. These conclusions apply irrespective of the core -composition and thus of the partitioning of light elements into the solid core; a smaller den￾sity contrast at the ICB only implies that a larger inner core is required in order to produce -an equivalent change in the Cassini state equilibrium. -Our results imply that the obliquities of the mantle spin axis and polar moment of iner￾tia (or, equivalently, the gravity field) should coincide at the present-day level of measurement -errors. Moreover, neither of these can be distinguished from the obliquity predicted on the ba￾sis of a rigid planet. However, the smaller measurement errors expected from the upcoming Bepi￾Columbo satellite mission may permit this distinction, and thus provide further constraints on -Mercury’s interior structure. +Confidential manuscript submitted to JGR-Planets +find that the misalignment ∆εg = εg − εm is limited. The maximum offset that we obtain +is approximately ∆εg ≈ 0.007 arcmin. This limited magnitude of offset is important in the +light of the recent obliquity of the gravity field estimated in Genova et al. [2019], εg = 1.968± +0.027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the +spin-symmetry axis of the mantle: εm = 2.04 ± 0.08 arcmin [Margot et al., 2012] and εm = +2.029±0.085 arcmin [Stark et al., 2015a], although all three measurements remain consistent +with one another within their error estimates. In their interpretation, Genova et al. [2019] suggest that the different central value of the obliquity that they obtain (smaller by ∼ 0.07 arcmin) is perhaps explained by an offset ∆εg due to the presence of a (possibly large) solid inner core. However, this is one order of magnitude larger than the maximum magnitude of ∆εg +that we predict. Moreover, we predict that the obliquity of the gravity field should be larger +than that of the mantle spin axis, not smaller. Hence, at the present-day level of the precision +of the measurements, εg and εm should coincide, and their difference cannot be interpreted as +reflecting the misalignment between the polar moment of inertia of the whole planet and the +mantle spin axis. +Lastly, we have concentrated our efforts on the mutual orientations of the different spin +and symmetry axes in the Cassini plane. Dissipation at the CMB and ICB introduced by viscous and EM coupling also lead to a displacement of these axes in the direction perpendicular to the Cassini plane [e.g Peale et al., 2014]. Indeed, the two measurements based on tracking surface topographic features from Margot et al. [2012] and Stark et al. [2015a] suggest that +the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼ 0.03 arcmin). +Although this offset is smaller than the measurement errors, so that the observed obliquity is +still consistent with no deviation away from the Cassini plane, some amount of dissipation invariably takes place. These measurements give then a measure of the possible amplitude of the +dissipation. One source of dissipation is from anelastic tidal deformation [Baland et al., 2017], +but viscous and EM coupling at the boundaries of the fluid core is another. Hence, the out-ofplane component of the observed obliquity may further help to quantify and constrain the interior coupling mechanisms. This will be the subject of a future study. +5 Conclusion +We have investigated how the presence of a fluid core and solid inner core affects the Cassini +state equilibrium of Mercury. Our general conclusion is that the coupling strength between Mercury’s interior regions is sufficiently strong that the obliquity of the mantle spin-symmetry axis +does not deviate from that of a rigid planet by more than 0.01 arcmin. This largest offset occurs for a small or no inner core. The larger the inner core is, the more it is forced into an alignment with the mantle because of the strong gravitational torque between the two, and the closer +we approach a situation resembling a whole planet precessing as a rigid body. The misalignment between the polar moment of inertia and mantle spin axis increases with inner core size, +but is limited to approximately 0.007 arcmin. These conclusions apply irrespective of the core +composition and thus of the partitioning of light elements into the solid core; a smaller density contrast at the ICB only implies that a larger inner core is required in order to produce +an equivalent change in the Cassini state equilibrium. +Our results imply that the obliquities of the mantle spin axis and polar moment of inertia (or, equivalently, the gravity field) should coincide at the present-day level of measurement +errors. Moreover, neither of these can be distinguished from the obliquity predicted on the basis of a rigid planet. However, the smaller measurement errors expected from the upcoming BepiColumbo satellite mission may permit this distinction, and thus provide further constraints on +Mercury’s interior structure. –29– -Confidential manuscript submitted to JGR-Planets -Acknowledgments -Figures were created using the GMT software [Wessel et al., 2013]. The source codes, GMT -scripts and data files to reproduce all figures are freely accessible in Dumberry [2020]. This work -was supported by an NSERC/CRSNG Discovery Grant. -References -Alf`e, D., G. Kresse, and M. Gillan (2000), Structure and dynamics of liquid iron under core -conditions, Phys. Rev., B61, 132–142. -Anderson, B. J., C. L. Johnson, H. Korth, M. E. Purucker, R. M. Winslow, J. A. Slavin, -S. C. Solomon, R. L. McNutt, M. Raines, Jim, and T. H. Zurbuchen (2011), The global -magnetic field of Mercury from MESSENGER orbital observations, Science, 333, 1859– -1862. -Anderson, B. J., C. L. Johnson, H. Korth, R. M. Winslow, J. E. Borovsky, M. E. Pu￾rucker, J. A. Slavin, S. C. Solomon, M. T. Zuber, and R. L. McNutt (2012), Low￾degree structure in mercury’s planetary magnetic field, J. Geophys. Res., 117, E00L12, -doi:10.1029/2012JE004159. -Baland, R.-M., A. Yseboodt, M. Rivoldini, and T. Van Hoolst (2017), Obliquity of Mer￾cury: Influence of the precession of the pericenter and of tides, Icarus, 291, 136–159. -Baland, R.-M., A. Coyette, and T. Van Hoolst (2019), Coupling between the spin pre￾cession and polar motion of a synchronously rotating satellite: application to Titan, -Celestial Mechanics and Dynamical Astronomy, 131 (11), 1–50. -Buffett, B. A. (1992), Constraints on magnetic energy and mantle conductivity from the -forced nutations of the Earth, J. Geophys. Res., 97, 19,581–19,597. -Buffett, B. A. (2010), Chemical stratification at the top of earth’s core: Constraints from -observations of nutations, Earth Planet. Sci. Lett., 296, 367–372. -Buffett, B. A., P. M. Mathews, and T. A. Herring (2002), Modeling of nutation-precession: -effects of electromagnetic coupling, J. Geophys. Res., 107, doi:10.1029/2001JB000056. -Busse, F. H. (1968), Steady fluid flow in a precessing spheroidal shell, J. Fluid Mech., 33, -739–751. -Byrne, P. K., C. Klimczak, A. M. C. Seng¨or, S. C. Solomon, T. R. Watters, and S. A. -Hauck (2014), Mercury’s global contraction much greater than earlier estimates, Nature -Geosci., 7, 301–307. -C´ebron, D., R. Laguerre, J. Noir, and N. Schaeffer (2019), Precessing spherical shells: -flows, dissipation, dynamo and the lunar core, Geophys. J. Int., 219 (Supplement 1), -S34–S57, doi:10.1093/gji/ggz037. -Christensen, U. R. (2006), A deep dynamo generating Mercury’s magnetic field, Nature, -444, 1056–1058. -Cical`o, S., G. Schettino, S. Di Ruzza, E. M. Alessi, G. Tommei, and A. Milani (2016), The -BepiColombo MORE gravimetry and rotation experiments with the ORBIT14 software, -Month. N. Roy. Astr. Soc., 457, 1507–1521. -Colombo, G. (1966), Cassini’s second and third laws, Astron. J., 71, 891–896. -Constable, S. (2015), Geomagnetic induction studies, in Treatise on Geophysics, Second -Edition, vol. 5, edited by G. Schubert and M. Kono, chap. 7, pp. 219–254, Elsevier, Ox￾ford. -de Koker, N., G. Seinle-Neumann, and V. Vlˇcek (2012), Electrical resistivity and thermal -conductivity of liquid Fe alloys at high P and T, and heat flux in Earth’s core, Proc. -Nat. Acad. Sci., 109, 4070–4073. +Confidential manuscript submitted to JGR-Planets +Acknowledgments +Figures were created using the GMT software [Wessel et al., 2013]. The source codes, GMT +scripts and data files to reproduce all figures are freely accessible in Dumberry [2020]. This work +was supported by an NSERC/CRSNG Discovery Grant. +References +Alf`e, D., G. Kresse, and M. Gillan (2000), Structure and dynamics of liquid iron under core +conditions, Phys. Rev., B61, 132–142. +Anderson, B. J., C. L. Johnson, H. Korth, M. E. Purucker, R. M. Winslow, J. A. Slavin, +S. C. Solomon, R. L. McNutt, M. Raines, Jim, and T. H. Zurbuchen (2011), The global +magnetic field of Mercury from MESSENGER orbital observations, Science, 333, 1859– +1862. +Anderson, B. J., C. L. Johnson, H. Korth, R. M. Winslow, J. E. Borovsky, M. E. Purucker, J. A. Slavin, S. C. Solomon, M. T. Zuber, and R. L. McNutt (2012), Lowdegree structure in mercury’s planetary magnetic field, J. Geophys. Res., 117, E00L12, +doi:10.1029/2012JE004159. +Baland, R.-M., A. Yseboodt, M. Rivoldini, and T. Van Hoolst (2017), Obliquity of Mercury: Influence of the precession of the pericenter and of tides, Icarus, 291, 136–159. +Baland, R.-M., A. Coyette, and T. Van Hoolst (2019), Coupling between the spin precession and polar motion of a synchronously rotating satellite: application to Titan, +Celestial Mechanics and Dynamical Astronomy, 131 (11), 1–50. +Buffett, B. A. (1992), Constraints on magnetic energy and mantle conductivity from the +forced nutations of the Earth, J. Geophys. Res., 97, 19,581–19,597. +Buffett, B. A. (2010), Chemical stratification at the top of earth’s core: Constraints from +observations of nutations, Earth Planet. Sci. Lett., 296, 367–372. +Buffett, B. A., P. M. Mathews, and T. A. Herring (2002), Modeling of nutation-precession: +effects of electromagnetic coupling, J. Geophys. Res., 107, doi:10.1029/2001JB000056. +Busse, F. H. (1968), Steady fluid flow in a precessing spheroidal shell, J. Fluid Mech., 33, +739–751. +Byrne, P. K., C. Klimczak, A. M. C. Seng¨or, S. C. Solomon, T. R. Watters, and S. A. +Hauck (2014), Mercury’s global contraction much greater than earlier estimates, Nature +Geosci., 7, 301–307. +C´ebron, D., R. Laguerre, J. Noir, and N. Schaeffer (2019), Precessing spherical shells: +flows, dissipation, dynamo and the lunar core, Geophys. J. Int., 219 (Supplement 1), +S34–S57, doi:10.1093/gji/ggz037. +Christensen, U. R. (2006), A deep dynamo generating Mercury’s magnetic field, Nature, +444, 1056–1058. +Cical`o, S., G. Schettino, S. Di Ruzza, E. M. Alessi, G. Tommei, and A. Milani (2016), The +BepiColombo MORE gravimetry and rotation experiments with the ORBIT14 software, +Month. N. Roy. Astr. Soc., 457, 1507–1521. +Colombo, G. (1966), Cassini’s second and third laws, Astron. J., 71, 891–896. +Constable, S. (2015), Geomagnetic induction studies, in Treatise on Geophysics, Second +Edition, vol. 5, edited by G. Schubert and M. Kono, chap. 7, pp. 219–254, Elsevier, Oxford. +de Koker, N., G. Seinle-Neumann, and V. Vlˇcek (2012), Electrical resistivity and thermal +conductivity of liquid Fe alloys at high P and T, and heat flux in Earth’s core, Proc. +Nat. Acad. Sci., 109, 4070–4073. –30– -Confidential manuscript submitted to JGR-Planets -de Wijs, G. A., G. Kresse, L. Voˇcadlo, D. Dobson, D. Alf´e, M. J. Gillan, and G. D. Price -(1998), The viscosity of liquid iron at the physical conditions of the Earth’s core, Nature, -392, 805–807. -Dehant, V., and P. Mathews (2015), Earth rotation variations, in Treatise on Geophysics, -vol. 3, edited by G. Schubert, chap. 10, pp. 263–305, Elsevier, Oxford. -Deleplace, B., and P. Cardin (2006), Viscomagnetic torque at the core mantle boundary, -Geophys. J. Int., 167, 557–566. -Deng, L., C. Seagle, Y. Fei, and A. Shahar (2013), High pressure and temperature electrical -resistivity of iron and implications for planetary cores, Geophys. Res. Lett., 40, 33–37, -doi:10.1029/2012GL054347. -Dumberry, M. (2020), Replication Data for: The influence of a fluid core and a solid in￾ner core on the Cassini sate of Mercury, https://doi.org/10.7939/DVN/903HUV, UAL -Dataverse, V2. -Dumberry, M., and L. Koot (2012), A global model of electromagnetic coupling for nuta￾tions, Geophys. J. Int., 191, 530–544. -Dumberry, M., and A. Rivoldini (2015), Mercury’s inner core size and core-crystallization -regime, Icarus, 248, 254–268. -Dumberry, M., and M. A. Wieczorek (2016), The forced precession of the Moon’s inner -core, J. Geophys. Res. Planets, 121, 1264–1292. -Dumberry, M., A. Rivoldini, T. Van Hoolst, and M. Yseboodt (2013), The role of Mer￾cury’s core density structure on its longitudinal librations, Icarus, 225, 62–74. -Gans, R. F. (1972), Viscosity of the Earth’s core, J. Geophys. Res., 77, 360–366. -Genova, A., S. Goossens, E. Mazarico, F. G. Lemoine, G. A. Neumann, W. Kuang, -T. J. Sabaka, S. A. Hauck II, D. E. Smith, S. C. Solomon, and M. T. Zuber (2019), -Geodetic evidence that Mercury has a solid inner core, Geophys. Res. Lett., 46, -doi:10.1029/2018GL081135. -Glane, S., and B. A. Buffett (2018), Enhanced core-mantle coupling due to stratification at -the top of the core, Frontiers in Earth Science, 6, 171, doi:10.3389/feart.2018.00171. -Grott, M., D. Breuer, and M. Laneuville (2011), Thermo-chemical evolution and global -contraction of Mercury, Earth Planet. Sci. Lett., 307, 135–146. -Hauck, S. A., J.-L. Margot, S. C. Solomon, R. J. Phillips, C. L. Johnson, F. G. Lemoine, -E. Mazarico, T. J. McCoy, S. Padovan, S. J. Peale, M. E. Perry, D. E. Smith, and M. T. -Zuber (2013), The curious case of Mercury’s internal structure, J. Geophys. Res., 118, -doi:10.1002/jgre.20091. -Johnson, C. L., M. E. Purucker, H. Korth, B. J. Anderson, R. M. Winslow, M. M. H. -Al Asad, J. A. Slavin, I. I. Alexeev, R. J. Phillips, M. T. Zuber, and S. C. Solomon -(2012), MESSENGER observations of mercury’s magnetic field structure, J. Geophys. -Res., 117, E00L14, doi:10.1029/2012JE004217. -Konopliv, A. S., R. S. Park, and A. I. Ermakov (2020), The Mercury gravity field, orien￾tation, love number, and ephemeris from the MESSENGER radiometric tracking data, -Icarus, 335, 113,386. -Koot, L., and M. Dumberry (2013), The role of the magnetic field morphology on the -electromagnetic coupling for nutations, Geophys. J. Int., 195, 200–210. -Li, J., Y. Fei, H. Mao, K. Hirose, and S. Shieh (2001), Sulfur in Earth’s inner core, Earth -Planet. Sci. Lett., 193, 509–514. -Margot, J. L., S. J. Peale, R. F. Jurgens, M. A. Slade, and I. V. Holin (2007), Large longi￾tude libration of Mercury reveals a molten core, Science, 316, 710–714. -Margot, J. L., S. J. Peale, S. C. Solomon, S. A. Hauck, F. D. Ghigo, R. F. Jurgens, -M. Yseboodt, J. D. Giorgini, S. Padovan, and D. B. Campbell (2012), Mercury’s +Confidential manuscript submitted to JGR-Planets +de Wijs, G. A., G. Kresse, L. Voˇcadlo, D. Dobson, D. Alf´e, M. J. Gillan, and G. D. Price +(1998), The viscosity of liquid iron at the physical conditions of the Earth’s core, Nature, +392, 805–807. +Dehant, V., and P. Mathews (2015), Earth rotation variations, in Treatise on Geophysics, +vol. 3, edited by G. Schubert, chap. 10, pp. 263–305, Elsevier, Oxford. +Deleplace, B., and P. Cardin (2006), Viscomagnetic torque at the core mantle boundary, +Geophys. J. Int., 167, 557–566. +Deng, L., C. Seagle, Y. Fei, and A. Shahar (2013), High pressure and temperature electrical +resistivity of iron and implications for planetary cores, Geophys. Res. Lett., 40, 33–37, +doi:10.1029/2012GL054347. +Dumberry, M. (2020), Replication Data for: The influence of a fluid core and a solid inner core on the Cassini sate of Mercury, https://doi.org/10.7939/DVN/903HUV, UAL +Dataverse, V2. +Dumberry, M., and L. Koot (2012), A global model of electromagnetic coupling for nutations, Geophys. J. Int., 191, 530–544. +Dumberry, M., and A. Rivoldini (2015), Mercury’s inner core size and core-crystallization +regime, Icarus, 248, 254–268. +Dumberry, M., and M. A. Wieczorek (2016), The forced precession of the Moon’s inner +core, J. Geophys. Res. Planets, 121, 1264–1292. +Dumberry, M., A. Rivoldini, T. Van Hoolst, and M. Yseboodt (2013), The role of Mercury’s core density structure on its longitudinal librations, Icarus, 225, 62–74. +Gans, R. F. (1972), Viscosity of the Earth’s core, J. Geophys. Res., 77, 360–366. +Genova, A., S. Goossens, E. Mazarico, F. G. Lemoine, G. A. Neumann, W. Kuang, +T. J. Sabaka, S. A. Hauck II, D. E. Smith, S. C. Solomon, and M. T. Zuber (2019), +Geodetic evidence that Mercury has a solid inner core, Geophys. Res. Lett., 46, +doi:10.1029/2018GL081135. +Glane, S., and B. A. Buffett (2018), Enhanced core-mantle coupling due to stratification at +the top of the core, Frontiers in Earth Science, 6, 171, doi:10.3389/feart.2018.00171. +Grott, M., D. Breuer, and M. Laneuville (2011), Thermo-chemical evolution and global +contraction of Mercury, Earth Planet. Sci. Lett., 307, 135–146. +Hauck, S. A., J.-L. Margot, S. C. Solomon, R. J. Phillips, C. L. Johnson, F. G. Lemoine, +E. Mazarico, T. J. McCoy, S. Padovan, S. J. Peale, M. E. Perry, D. E. Smith, and M. T. +Zuber (2013), The curious case of Mercury’s internal structure, J. Geophys. Res., 118, +doi:10.1002/jgre.20091. +Johnson, C. L., M. E. Purucker, H. Korth, B. J. Anderson, R. M. Winslow, M. M. H. +Al Asad, J. A. Slavin, I. I. Alexeev, R. J. Phillips, M. T. Zuber, and S. C. Solomon +(2012), MESSENGER observations of mercury’s magnetic field structure, J. Geophys. +Res., 117, E00L14, doi:10.1029/2012JE004217. +Konopliv, A. S., R. S. Park, and A. I. Ermakov (2020), The Mercury gravity field, orientation, love number, and ephemeris from the MESSENGER radiometric tracking data, +Icarus, 335, 113,386. +Koot, L., and M. Dumberry (2013), The role of the magnetic field morphology on the +electromagnetic coupling for nutations, Geophys. J. Int., 195, 200–210. +Li, J., Y. Fei, H. Mao, K. Hirose, and S. Shieh (2001), Sulfur in Earth’s inner core, Earth +Planet. Sci. Lett., 193, 509–514. +Margot, J. L., S. J. Peale, R. F. Jurgens, M. A. Slade, and I. V. Holin (2007), Large longitude libration of Mercury reveals a molten core, Science, 316, 710–714. +Margot, J. L., S. J. Peale, S. C. Solomon, S. A. Hauck, F. D. Ghigo, R. F. Jurgens, +M. Yseboodt, J. D. Giorgini, S. Padovan, and D. B. Campbell (2012), Mercury’s –31– -Confidential manuscript submitted to JGR-Planets -moment of inertia from spin and gravity data, J. Geophys. Res., 117, E00L09, -doi:10.1029/2012JE004161. -Margot, J. L., S. A. Hauck II, E. Mazarico, S. Padovan, and S. J. Peale (2018), Mercury’s -internal structure, in Mercury: The View after MESSENGER, edited by S. Solomon, -L. Nittler, and B. Anderson, pp. 85–113, Cambridge University Press, Cambridge, doi: -10.1017/9781316650684.005. -Mathews, P. M., and J. Guo (2005), Viscoelectromagnetic coupling in precession-nutation -theory, J. Geophys. Res., 110 (B02402), doi:10.1029/2003JB002915. -Mathews, P. M., B. A. Buffett, T. A. Herring, and I. I. Shapiro (1991), Forced nutations of -the Earth: Influence of inner core dynamics. 1. theory, J. Geophys. Res., 96, 8219–8242. -Mathews, P. M., T. A. Herring, and B. A. Buffett (2002), Modeling of nutations and pre￾cession: New nutation series for nonrigid Earth and insights into the Earth’s interior, J. -Geophys. Res., 107, doi:10.1029/2004JB000390. -Mazarico, E., A. Genova, S. Goossens, F. G. Lemoine, G. A. Neumann, M. T. Zuber, -D. E. Smith, and S. C. Solomon (2014), The gravity field, orientation, and ephemeris of -Mercury from MESSENGER observations after three years in orbit, J. Geophys. Res. -Planets, 119, 2417–2436. -Organowski, O., and M. Dumberry (2020), Viscoelastic relaxation within the Moon -and the phase lead of its Cassini state, Journal of Geophysical Research Planets, 125, -e2020JE006386. -Peale, S. J. (1969), Generalized Cassini’s laws, Astron. J., 74, 483–489. -Peale, S. J. (1974), Possible histories of the obliquity of Mercury, Astron. J., 79, 722–744. -Peale, S. J. (1976), Does Mercury have a molten core?, Nature, 262, 765–766. -Peale, S. J. (2005), The free precession and libration of Mercury, Icarus, 178, 4–18. -Peale, S. J. (2006), The proximity of Mercury’s spin to Cassini state 1 from adiabatic in￾variance, Icarus, 181, 338–347. -Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2014), Effect of core-mantle -and tidal torques on Mercury’s spin axis orientation, Icarus, 231, 206–220. -Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2016), Consequences of a -solid inner core on Mercury’s spin configuration, Icarus, 264, 443–455. -Perry, M. E., G. A. Neumann, R. J. Phillips, and et al. (2015), The low-degree shape of -Mercury, Geophys. Res. Lett., 42, 6951–6958. -Poincar´e, H. (1910), Sur la pr´ecession des corps d´eformables, Bull. Astron. Ser. 1, 27, -321–356. -Pozzo, M., C. Davies, D. Gubbins, and D. Alf´e (2012), Thermal and electrical conductivity -of iron at Earth’s core conditions, Nature, 485, 355–358. -Rochester, M. G. (1960), Geomagnetic westward drift and irregularities in the Earth’s -rotation, Phil. Trans. R. Soc. Lond., A, 252, 531–555. -Rochester, M. G. (1962), Geomagnetic core-mantle coupling, J. Geophys. Res., 67, 4833– -4836. -Rochester, M. G. (1968), Perturbations in the Earth’s rotation and geomagnetic core￾mantle coupling, J. Geomag. Geoelectr., 20, 387–402. -Rochester, M. G. (1976), The secular decrease of obliquity due to dissipative core-mantle -coupling, Geophys. J. R. Astron. Soc., 46, 109–126. -Rutter, M., R. Secco, T. Uchida, H. Liu, Y. Wang, M. Rivers, and S. Sutton (2002a), To￾wards evaluating the viscosity of the Earth’s outer core: an experimental high pressure -study of liquid Fe-S (8.5 wt. per cent S), Geophys. Res. Lett., 29, 080,000–1. -Rutter, M. D., R. A. Secco, H. Liu, T. Uchida, M. Rivers, S. Sutton, and Y. Wang -(2002b), Viscosity of liquid Fe at high pressure, Phys. Rev. B, 66, 060,102, +Confidential manuscript submitted to JGR-Planets +moment of inertia from spin and gravity data, J. Geophys. Res., 117, E00L09, +doi:10.1029/2012JE004161. +Margot, J. L., S. A. Hauck II, E. Mazarico, S. Padovan, and S. J. Peale (2018), Mercury’s +internal structure, in Mercury: The View after MESSENGER, edited by S. Solomon, +L. Nittler, and B. Anderson, pp. 85–113, Cambridge University Press, Cambridge, doi: +10.1017/9781316650684.005. +Mathews, P. M., and J. Guo (2005), Viscoelectromagnetic coupling in precession-nutation +theory, J. Geophys. Res., 110 (B02402), doi:10.1029/2003JB002915. +Mathews, P. M., B. A. Buffett, T. A. Herring, and I. I. Shapiro (1991), Forced nutations of +the Earth: Influence of inner core dynamics. 1. theory, J. Geophys. Res., 96, 8219–8242. +Mathews, P. M., T. A. Herring, and B. A. Buffett (2002), Modeling of nutations and precession: New nutation series for nonrigid Earth and insights into the Earth’s interior, J. +Geophys. Res., 107, doi:10.1029/2004JB000390. +Mazarico, E., A. Genova, S. Goossens, F. G. Lemoine, G. A. Neumann, M. T. Zuber, +D. E. Smith, and S. C. Solomon (2014), The gravity field, orientation, and ephemeris of +Mercury from MESSENGER observations after three years in orbit, J. Geophys. Res. +Planets, 119, 2417–2436. +Organowski, O., and M. Dumberry (2020), Viscoelastic relaxation within the Moon +and the phase lead of its Cassini state, Journal of Geophysical Research Planets, 125, +e2020JE006386. +Peale, S. J. (1969), Generalized Cassini’s laws, Astron. J., 74, 483–489. +Peale, S. J. (1974), Possible histories of the obliquity of Mercury, Astron. J., 79, 722–744. +Peale, S. J. (1976), Does Mercury have a molten core?, Nature, 262, 765–766. +Peale, S. J. (2005), The free precession and libration of Mercury, Icarus, 178, 4–18. +Peale, S. J. (2006), The proximity of Mercury’s spin to Cassini state 1 from adiabatic invariance, Icarus, 181, 338–347. +Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2014), Effect of core-mantle +and tidal torques on Mercury’s spin axis orientation, Icarus, 231, 206–220. +Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2016), Consequences of a +solid inner core on Mercury’s spin configuration, Icarus, 264, 443–455. +Perry, M. E., G. A. Neumann, R. J. Phillips, and et al. (2015), The low-degree shape of +Mercury, Geophys. Res. Lett., 42, 6951–6958. +Poincar´e, H. (1910), Sur la pr´ecession des corps d´eformables, Bull. Astron. Ser. 1, 27, +321–356. +Pozzo, M., C. Davies, D. Gubbins, and D. Alf´e (2012), Thermal and electrical conductivity +of iron at Earth’s core conditions, Nature, 485, 355–358. +Rochester, M. G. (1960), Geomagnetic westward drift and irregularities in the Earth’s +rotation, Phil. Trans. R. Soc. Lond., A, 252, 531–555. +Rochester, M. G. (1962), Geomagnetic core-mantle coupling, J. Geophys. Res., 67, 4833– +4836. +Rochester, M. G. (1968), Perturbations in the Earth’s rotation and geomagnetic coremantle coupling, J. Geomag. Geoelectr., 20, 387–402. +Rochester, M. G. (1976), The secular decrease of obliquity due to dissipative core-mantle +coupling, Geophys. J. R. Astron. Soc., 46, 109–126. +Rutter, M., R. Secco, T. Uchida, H. Liu, Y. Wang, M. Rivers, and S. Sutton (2002a), Towards evaluating the viscosity of the Earth’s outer core: an experimental high pressure +study of liquid Fe-S (8.5 wt. per cent S), Geophys. Res. Lett., 29, 080,000–1. +Rutter, M. D., R. A. Secco, H. Liu, T. Uchida, M. Rivers, S. Sutton, and Y. Wang +(2002b), Viscosity of liquid Fe at high pressure, Phys. Rev. B, 66, 060,102, –32– -Confidential manuscript submitted to JGR-Planets -doi:10.1029/2001GL014392. -Schaefer, L., S. B. Jacobsen, J. L. Remo, M. I. Petaev, and D. D. Sasselov (2017), Metal￾silicate partitioning and its role in core formation and composition on Super-Earths, -Astrophys. J., 835, 234. -Sori, M. M. (2018), A thin, dense crust for Mercury, Earth Planet. Sci. Lett., 489, 92–99. -Stark, A., J. Oberst, F. Preusker, S. J. Peale, J.-L. Margot, R. J. Phillips, G. A. Neumann, -S. D. E., M. T. Zuber, and S. C. Solomon (2015a), First MESSENGER orbital observa￾tions of Mercury’s librations, Geophys. Res. Lett., 42, 7881–7889. -Stark, A., J. Oberst, and H. Hussmann (2015b), Mercury’s resonant rotation from secular -orbital elements, Celest. Mech. Dyn. Astr., 123, 263–277. -Stewartson, K., and P. H. Roberts (1963), On the motion of a liquid in a spheroidal cavity -of a precessing rigid body, J. Fluid Mech., 17, 1–20. -Stys, C., and M. Dumberry (2018), The cassini state of the Moon’s inner core, J. Geophys. -Res. Planets, 123, 1–25, doi:10.1029/2018JE005607. -Van Hoolst, T. (2015), Rotation of the terrestrial planets, in Treatise on Geophysics, -vol. 10, edited by G. Schubert, chap. 4, pp. 121 – 151, Elsevier, Oxford. -Van Hoolst, T., A. Rivoldini, R.-M. Baland, and M. Yseboodt (2012), The effects of tides -and an inner core on the forced libration of mercury, Earth Planet. Sci. Lett., 333–334, -83–90. -Verma, A. K., and J. L. Margot (2016), Mercury’s gravity, tides, and spin from MESSEN￾GER radio science data, J. Geophys. Res. Planets, 121, 1627–1640. -Wessel, P., W. H. F. Smith, R. Scharroo, J. Luis, and F. Wobbe (2013), Generic Mapping -Tools: Improved version released, EOS Trans. AGU, 94, 409–410. -Williams, J. G., and D. H. Boggs (2015), Tides on the Moon: theory and determination of -dissipation, J. Geophys. Res. Planets, 120 (4), 689–724, doi:10.1002/2014JE004755. -Williams, J. G., D. H. Boggs, C. F. Yoder, J. T. Ratcliff, and J. O. Dickey (2001), Lunar -rotational dissipation in solid body and molten core, J. Geophys. Res., 106, 27,933– -27,968. -Williams, J. G., A. S. Konopliv, D. H. Boggs, R. S. Park, D.-N. Yuan, F. G. Lemoine, -S. Goossens, E. Mazarico, F. Nimmo, R. C. Weber, S. W. Asmar, H. J. Melosh, G. A. -Neumann, R. J. Phillips, D. E. Smith, S. C. Solomon, M. M. Watkins, M. A. Wieczorek, -J. C. Andrews-Hanna, J. W. Head, W. S. Kiefer, I. Matsuyama, P. J. McGovern, G. J. -Taylor, and M. T. Zuber (2014), Lunar interior properties from the GRAIL mission, J. -Geophys. Res. Planets, 119 (7), 1546–1578, doi:10.1002/2013JE004559. -Yoder, C. F. (1981), The free librations of a dissipative Moon, Phil. Trans. R. Soc. Lond. -A, 303, 327–338. -Yseboodt, M., and J. L. Margot (2006), Evolution of Mercury’s obliquity, Icarus, 181, -327–337. -–33– +Confidential manuscript submitted to JGR-Planets +doi:10.1029/2001GL014392. +Schaefer, L., S. B. Jacobsen, J. L. Remo, M. I. Petaev, and D. D. Sasselov (2017), Metalsilicate partitioning and its role in core formation and composition on Super-Earths, +Astrophys. J., 835, 234. +Sori, M. M. (2018), A thin, dense crust for Mercury, Earth Planet. Sci. Lett., 489, 92–99. +Stark, A., J. Oberst, F. Preusker, S. J. Peale, J.-L. Margot, R. J. Phillips, G. A. Neumann, +S. D. E., M. T. Zuber, and S. C. Solomon (2015a), First MESSENGER orbital observations of Mercury’s librations, Geophys. Res. Lett., 42, 7881–7889. +Stark, A., J. Oberst, and H. Hussmann (2015b), Mercury’s resonant rotation from secular +orbital elements, Celest. Mech. Dyn. Astr., 123, 263–277. +Stewartson, K., and P. H. Roberts (1963), On the motion of a liquid in a spheroidal cavity +of a precessing rigid body, J. Fluid Mech., 17, 1–20. +Stys, C., and M. Dumberry (2018), The cassini state of the Moon’s inner core, J. Geophys. +Res. Planets, 123, 1–25, doi:10.1029/2018JE005607. +Van Hoolst, T. (2015), Rotation of the terrestrial planets, in Treatise on Geophysics, +vol. 10, edited by G. Schubert, chap. 4, pp. 121 – 151, Elsevier, Oxford. +Van Hoolst, T., A. Rivoldini, R.-M. Baland, and M. Yseboodt (2012), The effects of tides +and an inner core on the forced libration of mercury, Earth Planet. Sci. Lett., 333–334, +83–90. +Verma, A. K., and J. L. Margot (2016), Mercury’s gravity, tides, and spin from MESSENGER radio science data, J. Geophys. Res. Planets, 121, 1627–1640. +Wessel, P., W. H. F. Smith, R. Scharroo, J. Luis, and F. Wobbe (2013), Generic Mapping +Tools: Improved version released, EOS Trans. AGU, 94, 409–410. +Williams, J. G., and D. H. Boggs (2015), Tides on the Moon: theory and determination of +dissipation, J. Geophys. Res. Planets, 120 (4), 689–724, doi:10.1002/2014JE004755. +Williams, J. G., D. H. Boggs, C. F. Yoder, J. T. Ratcliff, and J. O. Dickey (2001), Lunar +rotational dissipation in solid body and molten core, J. Geophys. Res., 106, 27,933– +27,968. +Williams, J. G., A. S. Konopliv, D. H. Boggs, R. S. Park, D.-N. Yuan, F. G. Lemoine, +S. Goossens, E. Mazarico, F. Nimmo, R. C. Weber, S. W. Asmar, H. J. Melosh, G. A. +Neumann, R. J. Phillips, D. E. Smith, S. C. Solomon, M. M. Watkins, M. A. Wieczorek, +J. C. Andrews-Hanna, J. W. Head, W. S. Kiefer, I. Matsuyama, P. J. McGovern, G. J. +Taylor, and M. T. Zuber (2014), Lunar interior properties from the GRAIL mission, J. +Geophys. Res. Planets, 119 (7), 1546–1578, doi:10.1002/2013JE004559. +Yoder, C. F. (1981), The free librations of a dissipative Moon, Phil. Trans. R. Soc. Lond. +A, 303, 327–338. +Yseboodt, M., and J. L. Margot (2006), Evolution of Mercury’s obliquity, Icarus, 181, +327–337. +–33– \ No newline at end of file diff --git a/read/results/pdfium/2201.00069.txt b/read/results/pdfium/2201.00069.txt index 2a505bd..30fcbbc 100644 --- a/read/results/pdfium/2201.00069.txt +++ b/read/results/pdfium/2201.00069.txt @@ -1,913 +1,862 @@ -MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 1 -A MeerKAT, e-MERLIN, H.E.S.S. and Swift search for persistent -and transient emission associated with three localised FRBs -J. O. Chibueze,1,2★ M. Caleb,3,4† L. Spitler,5 H. Ashkar,6,17 F. Schüssler,6 B. W. Stappers,4 -C. Venter,1 -I. Heywood,7,8,9 A. M. S. Richards,3 D. R. A. Williams,3 M. Kramer,3,5 -R. Beswick,3 M. C. Bezuidenhout,3 R. P. Breton,3 L. N. Driessen,3 F. Jankowski,3 -E. F. Keane,10 M. Malenta,3 M. Mickaliger,3 V. Morello3 -, H. Qiu,11 K. Rajwade,3 -S. Sanidas,3 M. Surnis,3 T. W. Scragg,3 C. R. H. Walker,5 -and N. Wrigley,3 -H.E.S.S. Collaboration: F. Aharonian,12,13,14 F. Ait Benkhali,15 E.O. Angüner,16 M. Backes,18,1 -V. Baghmanyan,19 V. Barbosa Martins,20 R. Batzofin,21 Y. Becherini,22,23 D. Berge,20 -M. Böttcher,1 C. Boisson,24 J. Bolmont,25 M. de Bony de Lavergne,26 M. Breuhaus,13 -R. Brose,12 F. Brun,6 T. Bulik,27 F. Cangemi,25 S. Caroff,25 S. Casanova,19 -J. Catalano,28 M. Cerruti,22 T. Chand,1 A. Chen,21 O.U. Chibueze,1 -G. Cotter,29 P. Cristofari,24 J. Damascene Mbarubucyeye,20 J. Devin,30 A. Djannati-Ataï,22 -A. Dmytriiev,1 K. Egberts,31 J.-P. Ernenwein,16 A. Fiasson,26 G. Fichet de Clairfontaine,24 -G. Fontaine,17 S. Funk,28 S. Gabici,22 S. Ghafourizadeh,15 G. Giavitto,20 -D. Glawion,28 M.-H. Grondin,30 M. Hörbe,29 C. Hoischen,31 T. L. Holch,20 -Zhiqiu Huang,13 M. Jamrozy,32 F. Jankowsky,15 I. Jung-Richardt,28 E. Kasai,18 -K. Katarzyński,33 U. Katz,28 B. Khélifi,22 W. Kluźniak,34 Nu. Komin,21 -K. Kosack,6 D. Kostunin,20 A. Lemière,22 J.-P. Lenain,25 F. Leuschner,35 -T. Lohse,36 A. Luashvili,24 I. Lypova,15 J. Mackey,12 D. Malyshev,35 -V. Marandon,13 P. Marchegiani,21 A. Marcowith,37 G. Martí-Devesa,38 R. Marx,15 -A. Mitchell,28,13 R. Moderski,34 L. Mohrmann,13 E. Moulin,6 -J. Muller,17 -K. Nakashima,28 M. de Naurois,17 A. Nayerhoda,19 J. Niemiec,19 A. Priyana Noel,32 -P. O’Brien,39 S. Ohm,20 L. Olivera-Nieto,13 E. de Ona Wilhelmi,20 M. Ostrowski,32 -S. Panny,38 R.D. Parsons,36 S. Pita,22 V. Poireau,26 D.A. Prokhorov,40 -H. Prokoph,20 G. Pühlhofer,35 A. Quirrenbach,15 P. Reichherzer,6 A. Reimer,38 -O. Reimer,38 G. Rowell,41 B. Rudak,34 E. Ruiz-Velasco,13 V. Sahakian,42 -S. Sailer,13 H. Salzmann,35 D.A. Sanchez,26 A. Santangelo,35 M. Sasaki,28 -H.M. Schutte,1 U. Schwanke,36 J.N.S. Shapopi,18 A. Specovius,28 -S. Spencer,29 R. Steenkamp,18 S. Steinmassl,13 T. Takahashi,43 T. Tanaka,44 -C. Thorpe-Morgan,35 N. Tsuji,45 C. van Eldik,28 J. Veh,28 -J. Vink,40 S.J. Wagner,15 A. Wierzcholska,19 Yu Wun Wong,28 A. Yusafzai,28 -M. Zacharias,24,1 D. Zargaryan,12,14 A.A. Zdziarski,34 A. Zech,24 S.J. Zhu,20 -S. Zouari,22 N. Żywucka,1 -Accepted XXX. Received YYY; in original form ZZZ -MNRAS 000, 1–15 (2021) -arXiv:2201.00069v1 [astro-ph.HE] 31 Dec 2021 -MNRAS 000, 1–15 (2021) Preprint 4 January 2022 Compiled using MNRAS LATEX style file v3.0 -ABSTRACT -We report on a search for persistent radio emission from the one-off Fast Radio Burst (FRB) -20190714A, as well as from two repeating FRBs, 20190711A and 20171019A, using the -MeerKAT radio telescope. For FRB 20171019A we also conducted simultaneous observations -with the High Energy Stereoscopic System (H.E.S.S.) in very high energy gamma rays and -searched for signals in the ultraviolet, optical, and X-ray bands. For this FRB, we obtain a UV -flux upper limit of 1.39×10−16 erg cm−2 -s -−1Å -−1 -, X-ray limit of ∼ 6.6×10−14 erg cm−2 -s -−1 -and -a limit on the very-high-energy gamma-ray flux Φ(𝐸 > 120 GeV) < 1.7 × 10−12 erg cm−2 -s -−1 -. -We obtain a radio upper limit of ∼15𝜇Jy beam−1 -for persistent emission at the locations of both -FRBs 20190711A and 20171019A, but detect diffuse radio emission with a peak brightness -of ∼53𝜇Jy beam−1 -associated with FRB 20190714A at 𝑧 = 0.2365. This represents the first -detection of the radio continuum emission potentially associated with the host (galaxy) of FRB -20190714A, and is only the third known FRB to have such an association. Given the possible -association of a faint persistent source, FRB 20190714A may potentially be a repeating FRB -whose age lies between that of FRB 20121102A and FRB 20180916A. A parallel search for -repeat bursts from these FRBs revealed no new detections down to a fluence of 0.08 Jy ms for -a 1 ms duration burst. -Key words: fast radio bursts – radio continuum: galaxies – radiation mechanisms: non-thermal -1 INTRODUCTION -Fast radio bursts (FRBs) are luminous transients that last for mi￾croseconds to milliseconds and occur at extragalactic to cosmo￾logical distances (e.g. Lorimer et al. 2007; Thornton et al. 2013; -Macquart et al. 2020). The estimated high radio luminosities and -associated brightness temperatures required to produce these short￾timescale energetic events at such distances are what makes them -intriguing (Petroff et al. 2021;Caleb & Keane 2021). They have been -observed to emit from ∼ 110 MHz − 8 GHz, though not yet across -a wide and continuous frequency band due to the variable band￾limited spectra of the single pulses. Over 600 FRBs have been dis￾covered1 of which ∼ 20 have been seen to repeat, and it is presently -uncertain whether they all do (Caleb et al. 2019; James et al. 2020). -The extraordinary observed characteristics of the repeating and non￾repeating FRBs have led to various progenitor models with the bulk -of them favouring neutron stars. Progenitor theories include binary -neutron star mergers and collisions (Totani 2013; Yamasaki et al. -2018), giant pulses from extragalactic pulsars (Cordes & Wasser￾man 2016; Popov & Pshirkov 2016), hyperflares and giant flares -from magnetars (Popov & Postnov 2013; Popov et al. 2018), binary -white dwarf mergers (Kashiyama et al. 2013), neutron star “comb￾ing" (Zhang 2018) and interactions of neutron stars with active -galactic nuclei (Vieyro et al. 2017) (see Platts et al. (2019) for a list -of potential progenitors). Some of these models predict radio after￾glows accompanying an FRB with timescales of days to years. Liu -et al. (2016) propose that the merger of a Kerr-Newman black hole -binary is one of the plausible central engines for FRBs and their -afterglows. Dai et al. (2017), however, suggest that the persistent -emission is due to an ultra-relativistic pulsar wind nebula sweeping -up its ambient medium with FRBs repeatedly produced through one -of several potential mechanisms. In the magnetar model by Margalit -et al. (2019), FRBs produced by binary neutron star mergers and -accretion induced collapse are expected to be accompanied by per￾sistent radio continuum emission on timescales of months to years. -★ james.chibueze@nwu.ac.za -† manisha.caleb@manchester.ac.uk -1 https://www.wis-tns.org/ -The persistent emission is powered by the nebula of relativistic elec￾trons and magnetic fields inflated by the magnetar flares (Margalit -et al. 2019). The existence of persistent emission associated with -FRBs could provide vital clues to their origin. Moreover, potential -candidates and models for FRB progenitors predict counterparts -in the X-ray an TeV bands. For example, a model by Lyubarsky -(2014) predicts millisecond outbursts of TeV emission accompany￾ing FRBs from magnetars. In 2020, FRB 20200428 was discovered -for the first time from a galactic magnetar, SGR 1935+2154. Fur￾thermore, an X-ray counterpart to this FRB was deteced for the first -time by several instruments (Tavani et al. 2021; Ridnaia et al. 2021; -Mereghetti et al. 2020; Insight-HXMT 2020). -Of the 19 FRBs that have been associated with host galax￾ies2 -, only the sub-arcsecond localisation of the repeating FRB -20121102A to a host galaxy at a redshift of 𝑧 = 0.19273 ± 0.0008 -(Tendulkar et al. 2017; Bassa et al. 2017) showed that it is physi￾cally associated with a compact (≤ 0.7 pc), persistent radio source -of luminosity 𝜈𝐿𝜈 ∼ 1039 erg s−1 -at a few GHz (Marcote et al. -2017). This source is detectable from 300 MHz – 26 GHz (Resmi -et al. 2020; Chatterjee et al. 2017) and is seen to exhibit ∼ 10% vari￾ability on day timescales. In contrast, a similar sub-milliarcsecond -localisation of another repeating FRB 20180916B to a nearby mas￾sive spiral galaxy at 𝑧 = 0.0337 ± 0.0002 (Marcote et al. 2020) -showed no associated persistent radio emission. This places a strong -upper limit on the persistent source luminosity of 𝜈𝐿𝜈 . 7.6×1035 -erg s−1 -at 1.6 GHz, which is three orders of magnitude lower than -that of FRB 20121102A. Recently, the CHIME/FRB collaboration -announced heightened activity in the repeating FRB 20201124A -(Chime/FRB Collaboration 2021), which was localised to a host -galaxy at a redshift of 𝑧 = 0.0979 ± 0.0001 (Fong et al. 2021). -Persistent radio emission was detected by the upgraded Giant Me￾trewave Radio Telescope (uGMRT) (Wharton et al. 2021) and the -Karl G. Jansky Very Large Array (JVLA) (Ricci et al. 2021) on -angular scales of a few arcseconds, but resolved out to scales of -∼ 0.1 arcseconds with the European VLBI Network (Marcote et al. -2021). -Localisations of four one-off FRBs through imaging of -2 https://frbhosts.org/ -© 2021 The Authors -MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 3 -buffered raw voltage data at 1.4 GHz (Bannister et al. 2019; -Prochaska et al. 2019; Macquart et al. 2020) by the Australian -SKA Pathfinder (ASKAP) telescope did not yield persistent radio -continuum emission from the host galaxies (Bhandari et al. 2020). -Australian Telescope Compact Array (ATCA) observations of FRBs -20180924B, 20181112A, 20190102C and 20190608B were con￾ducted at a centre frequency of 6.5 GHz. No persistent emission as -luminous as the one associated with FRB 20121102A was detected -for the ASKAP FRBs (Bhandari et al. 2020). While the true age of -FRB 121102A is unknown, models based on polarization studies -predict the age to be ∼ 6 − 17 years (Hilmarsson et al. 2021). It is -possible that younger, more active FRBs like FRB 20121102A are -associated with persistent radio emission while the emission might -have faded over time for the older ones. The possibility of repeating -FRBs not being so uncommon after all (Ravi 2019) along with the -increasing arcsecond localisations suggests that we are entering an -era where we can begin to look for evidence of multiple classes by -studying FRB host galaxies and multi-wavelength counterparts. -In this paper, we report on the search for persistent radio emis￾sion in the host galaxies of one apparent one-off source (FRB -20190714A) and two repeating sources (FRBs 20171019A and -20190711A) (Kumar et al. 2019, 2021) using MeerKAT. In case -of the latter, we also conducted simultaneous observations with the -High Energy Stereoscopic System (H.E.S.S.) in very high energy -gamma rays. In addition, we searched for signals in the ultraviolet, -optical, and X-ray bands. The paper is structured as follows. In Sec￾tion 2, we discuss our observations and data reduction; in Section 3, -we discuss the single radio continuum detection and derived multi￾wavelength upper limits. Our discussion and conclusions follow in -Section 4 and 5. -2 OBSERVATIONS AND DATA REDUCTION -2.1 MeerKAT observations -The MeerKAT 64-parabolic-dish array (Jonas & MeerKAT Team -2016; Mauch et al. 2020) is located in the Northern Karoo desert -near Carnarvon, South Africa. Each “offset Gregorian" parabolic -dish antenna has an effective diameter of 13.5 m. The inner core of -the array contains 48 of the 64 dishes in a 1 km radius, while the -remaining 16 dishes are spread outward up to 8 km. The shortest and -longest baselines of the MeerKAT array are 29 m and 8 km, respec￾tively, providing angular scales of 500 to 270 -at the central frequency, -of the L-band receiver used here, of 1283 MHz. Multi-epoch ob￾servations of the FRB fields were conducted with the MeerKAT -array (Project ID: SCI-20190418-VC-01) at L-band (856 MHz to -1712 MHz). Details of the MeerKAT observations are presented -in Table 1. Only Stokes I (total intensity) of the MeerKAT ob￾servations are considered in this paper. The data correlation was -done with the SKARAB correlator (Hickish et al. 2016) in 4k mode -which gives 4096 channels across the 856 MHz bandwidth resulting -in a frequency resolution of ∼209 kHz. The data were reduced us￾ing the semi-automated MeerKAT data analysis pipelines - 𝑜𝑥𝑘𝑎𝑡3 -(Heywood 2020). -2.1.1 Imaging analysis -The 𝑜𝑥𝑘𝑎𝑡 pipeline employs a collection of publicly available ra￾dio interferometry data reduction software. The final data prod￾3 https://ascl.net/code/v/2627 -ucts, including reduced and calibrated visibility data (including -self-calibration), continuum (including sub-band) images as well -as diagnostic plots, are provided by the pipeline. The customary -configuration of the 𝑜𝑥𝑘𝑎𝑡 pipeline incorporates flagging, cross￾calibration and self-calibration processes. In the flagging process, -the low-gain bandpass edges (856 MHz to 880 MHz and 1658 MHz -to 1800 MHz) are flagged on all baselines, along with the location of -the Galactic neutral hydrogen line at 1419.8 MHz to 1421.3 MHz. -Several other radio frequency interference (RFI) prone regions of -the spectrum are then flagged on baselines shorter than 600 m. -Then, other possible RFI affected data are flagged out using the -CASA routines rflag and tfcrop for the calibrators, and using the -tricolour package for the target fields. -The cross-calibration steps using 𝑜𝑥𝑘𝑎𝑡 were standard, includ￾ing setting the flux scale and deriving corrections for residual delay -calibration, bandpass and time-varying gain. The 𝑜𝑥𝑘𝑎𝑡 pipeline -uses the customary tasks from the CASA (McMullin et al. 2007) -suite for cross-calibration. After applying all the corrections to the -target field, we channel-averaged the dataset by a factor of five chan￾nels before splitting out the science target. This is consistent with our -science goals, since the relic sources we target are in the central parts -of our fields, reducing the effect of smearing through the channel -averaging. To deconvolve and image the target data, the WSClean -imager (Offringa et al. 2014) was used, with the multiscale and -wideband deconvolution algorithms enabled to better allow imag￾ing of the diffuse emission present in the our fields. Deconvolution -was performed in ten sub-band images of each 82 MHz wide-band. -WSClean generates the multi-frequency synthesis (MFS) map, in -joined-channel deconvolution mode, with a central frequency of -1283 MHz. In other words, the MFS map is a full bandwidth map. -In WSClean, each of the sub-bands is deconvolved separately with -an initially high mask of 20𝜎rms (using the auto masking function -provided by WSClean), to generate an artefact-free model of the -target field for the self-calibration process. This masking threshold -was iteratively reduced to a value of 3𝜎rms in the final iteration -of imaging. The 𝑜𝑥𝑘𝑎𝑡 pipeline uses the customary tasks from the -Cubical software (Kenyon et al. 2018) for self-calibration. -2.1.2 Single pulse searches -In addition to obtaining correlated data, the output data stream of -the F-engine are captured, delay corrected, phased and channelised -before being sent over the central beamforming network to the beam￾forming User Supplied Equipment (FBFUSE) that was designed and -developed at the Max Planck Institute for Radio Astronomy in Bonn. -For this project, FBFUSE combined the data into 764 total-intensity -tied-array beams which were used to populate the primary beam of -∼ 1 deg2 of the array. The data are then captured at 306.24 μs time -resolution by the Transient User Supplied Equipment (TUSE), a -real-time transient detection backend instrument developed by the -MeerTRAP4 -team at the University of Manchester. More details on -TUSE will be presented in an upcoming paper (Stappers et al. in -prep). The GPU-based single pulse search pipeline AstroAcceler￾ate5 -(Dimoudi & Armour 2015; Adámek & Armour 2016; Adámek -et al. 2017; Dimoudi et al. 2018; Adámek & Armour 2019) was used -to search for bursts in real-time after incoherently de-dispersing the -data in the DM range 0–5118.4 pc cm−3 -(see Caleb et al. 2020, for -more details). -4 https://www.meertrap.org/ -5 https://github.com/AstroAccelerateOrg/astro-accelerate +MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 1 +A MeerKAT, e-MERLIN, H.E.S.S. and Swift search for persistent +and transient emission associated with three localised FRBs +J. O. Chibueze,1,2★ M. Caleb,3,4† L. Spitler,5 H. Ashkar,6,17 F. Schüssler,6 B. W. Stappers,4 +C. Venter,1I. Heywood,7,8,9 A. M. S. Richards,3 D. R. A. Williams,3 M. Kramer,3,5 +R. Beswick,3 M. C. Bezuidenhout,3 R. P. Breton,3 L. N. Driessen,3 F. Jankowski,3 +E. F. Keane,10 M. Malenta,3 M. Mickaliger,3 V. Morello3, H. Qiu,11 K. Rajwade,3 +S. Sanidas,3 M. Surnis,3 T. W. Scragg,3 C. R. H. Walker,5and N. Wrigley,3 +H.E.S.S. Collaboration: F. Aharonian,12,13,14 F. Ait Benkhali,15 E.O. Angüner,16 M. Backes,18,1 +V. Baghmanyan,19 V. Barbosa Martins,20 R. Batzofin,21 Y. Becherini,22,23 D. Berge,20 +M. Böttcher,1 C. Boisson,24 J. Bolmont,25 M. de Bony de Lavergne,26 M. Breuhaus,13 +R. Brose,12 F. Brun,6 T. Bulik,27 F. Cangemi,25 S. Caroff,25 S. Casanova,19 +J. Catalano,28 M. Cerruti,22 T. Chand,1 A. Chen,21 O.U. Chibueze,1 +G. Cotter,29 P. Cristofari,24 J. Damascene Mbarubucyeye,20 J. Devin,30 A. Djannati-Ataï,22 +A. Dmytriiev,1 K. Egberts,31 J.-P. Ernenwein,16 A. Fiasson,26 G. Fichet de Clairfontaine,24 +G. Fontaine,17 S. Funk,28 S. Gabici,22 S. Ghafourizadeh,15 G. Giavitto,20 +D. Glawion,28 M.-H. Grondin,30 M. Hörbe,29 C. Hoischen,31 T. L. Holch,20 +Zhiqiu Huang,13 M. Jamrozy,32 F. Jankowsky,15 I. Jung-Richardt,28 E. Kasai,18 +K. Katarzyński,33 U. Katz,28 B. Khélifi,22 W. Kluźniak,34 Nu. Komin,21 +K. Kosack,6 D. Kostunin,20 A. Lemière,22 J.-P. Lenain,25 F. Leuschner,35 +T. Lohse,36 A. Luashvili,24 I. Lypova,15 J. Mackey,12 D. Malyshev,35 +V. Marandon,13 P. Marchegiani,21 A. Marcowith,37 G. Martí-Devesa,38 R. Marx,15 +A. Mitchell,28,13 R. Moderski,34 L. Mohrmann,13 E. Moulin,6J. Muller,17 +K. Nakashima,28 M. de Naurois,17 A. Nayerhoda,19 J. Niemiec,19 A. Priyana Noel,32 +P. O’Brien,39 S. Ohm,20 L. Olivera-Nieto,13 E. de Ona Wilhelmi,20 M. Ostrowski,32 +S. Panny,38 R.D. Parsons,36 S. Pita,22 V. Poireau,26 D.A. Prokhorov,40 +H. Prokoph,20 G. Pühlhofer,35 A. Quirrenbach,15 P. Reichherzer,6 A. Reimer,38 +O. Reimer,38 G. Rowell,41 B. Rudak,34 E. Ruiz-Velasco,13 V. Sahakian,42 +S. Sailer,13 H. Salzmann,35 D.A. Sanchez,26 A. Santangelo,35 M. Sasaki,28 +H.M. Schutte,1 U. Schwanke,36 J.N.S. Shapopi,18 A. Specovius,28 +S. Spencer,29 R. Steenkamp,18 S. Steinmassl,13 T. Takahashi,43 T. Tanaka,44 +C. Thorpe-Morgan,35 N. Tsuji,45 C. van Eldik,28 J. Veh,28 +J. Vink,40 S.J. Wagner,15 A. Wierzcholska,19 Yu Wun Wong,28 A. Yusafzai,28 +M. Zacharias,24,1 D. Zargaryan,12,14 A.A. Zdziarski,34 A. Zech,24 S.J. Zhu,20 +S. Zouari,22 N. Żywucka,1 +Accepted XXX. Received YYY; in original form ZZZ MNRAS 000, 1–15 (2021) -4 Chibueze et al. -2.2 e-MERLIN Observations -To constrain the position of the persistent continuum emission -associated with FRB 20190714A, we conducted L-band (centre -frequency of 1.51 GHz) observations of the target with the en￾hanced Multi-Element Remote-Linked Interferometer Network, e￾MERLIN array in the United Kingdom (project code: CY10003) -on 13 January, 2021 (see Section 3.1.2). Six antennas were used -including the 75-m Lovell telescope and the target pointing cen￾tre was R.A. = 12ℎ15𝑚55𝑠 -.12, Dec. = −13◦01015. -007. 1407+2827 -was used as the bandpass calibrator, 1331+3030 as the flux cal￾ibrator and 1216−1033 as the phase calibrator. The angular sep￾aration between the target and the phase calibrator is 2.47◦ -. The -data reduction was done following standard e-MERLIN calibra￾tion procedures6 with additional flagging of bad visibilities fol￾lowed by imaging. We found two confusing sources in the field, -at R.A. = 12ℎ15𝑚44𝑠 -.669, Dec. = −12◦57059. -0056 and R.A. = -12ℎ15𝑚37𝑠 -.216, Dec. = −13◦09033. -0044 at 4.10 -and 9.40 -from the -pointing centre, respectively. They had apparent flux densities of 4 -and 1.3 mJy without primary beam correction. We used these for -self-calibration of the field and then subtracted them before final -imaging. The final image synthesized beam is 0. -0065 × 0. -0015, posi￾tion angle 15◦ -elongated in the Declination direction due to the low -target elevation from the UK. -2.3 The Swift satellite: UVOT and XRT observations -Neil Gehrels Swift Observatory (Swift) is a multi-wavelength NASA -space mission operating in soft-X-rays and optical/UV. Here we -use data from the X-ray Telescope (XRT) (Burrows et al. 2005) -which operates in the soft X-ray domain of 0.3 − 10 keV as well as -data taken by the UV/Optical Telescope (UVOT) (Roming et al. -2005) operating in the UV to optical domain (170 − 600 nm). -During the FRB 20171019A multi-wavelength (MWL) observing -campaign, two 2 ks target-of-opportunity (ToO) observations were -performed with Swift from 2019-09-28 18:37:02 to 2019-09-28 -21:52:54 and 2019-10-18 18:03:00 to 2019-10-18 20:03:00 on the -FRB 20171019A localisation region. Simultaneously with Swift￾XRT, five UVOT images were taken with the UVM2 filter (central -wavelengh = 2246 Å) over the 2 epochs with a total exposure of 4 ks. -The images are aspect-corrected and summed with the uvotimsum -tool (HEASOFT 6.26). Observations were performed with Swift￾XRT in the standard Photon Counting observing mode (PC). The -XRT PC data are processed with xrtpipeline (HEASOFT 6.26). -A summed image is extracted with xselect. -2.4 Very-high energy gamma-ray observations with H.E.S.S. -Observations of FRB 20171019A were also obtained in the very￾high energy gamma-ray domain with the H.E.S.S. imaging atmo￾spheric Cherenkov telescope array, sensitive in the range between a -few tens of GeVs and 100 TeV. H.E.S.S. is located on the Khomas -Highland plateau of Namibia (23◦1601800 South, 16◦3000000 East), -at an elevation of ∼1800 m above sea level. Observations took place -contemporaneously to the first epoch of MeerKAT observations of -FRB 20171019A described above. The data set was obtained with -the H.E.S.S. phase II array, including the upgraded 12 m-diameter -CT1-4 telescopes (Ashton et al. 2020) and the large 28 m-diameter -6 https://github.com/e-merlin/eMERLIN_CASA_pipeline -CT5 telescope (Bolmont et al. 2014). A standard data quality selec￾tion was applied to the data (Aharonian et al. 2006). The events have -then been selected and their direction and energy reconstructed us￾ing a log-likelihood minimization comparing the recorded shower -images of all triggered telescopes (requiring at least two telescopes -to see the same gamma-ray event) to a semi-analytical model of air -showers (de Naurois & Rolland 2009). -We define a circular region-of-interest centered on the position -of FRB 20171019A with a radius of 0.12◦ -, optimal for a point-like -source of emission as expected from FRB 20171019A. The back￾ground level in this ON region was determined using the standard -“ring background” technique (Berge et al. 2007) based on a radially -symmetric ring around the source position. This technique allows us -to derive the background level from the same field of view and as￾sures that the gamma-ray signal and background are estimated with -the same acceptance and under the same observation conditions. -3 RESULTS -3.1 MeerKAT -The theoretical thermal noise of the MeerKAT can be calculated as -𝑆rms = -1 -𝜂𝑐 -SEFD -√︃ -𝑛pol × 𝑁(𝑁 − 1) × Δ𝜈 × 𝑡int -. (1) -The system equivalent flux density (SEFD) of MeerKAT at the -1.28 GHz is 443 Jy and 𝜂𝑐 is the correlator efficiency. We used 𝑛pol -= 2 polarisation products (XX and YY), N = 64 telescopes, Δ𝜈 = -856 MHz bandwidth and 𝑡int = 21600 sec observing time for one -epoch. This gives the theoretical rms of ∼ 2 𝜇Jy beam−1 -. The typical -image rms obtained from our residual images is ∼ 5 𝜇Jy beam−1 -, -which is 2.5 times the expected theoretical rms. The wideband MFS -image does not allow primary beam correction procedure as this can -only be done on the sub-band images with limited rms for detection -of the sources. However, our sources are the phase centres of our -fields and thus unaffected by the effect of the primary beam. -Due to the lack of MeerKAT primary beam correction, we -did not compare the flux densities of the discrete sources with -their NRAO (National Radio Astronomy Observatory) VLA (Very -Large Array) Sky Survey (NVSS) counterparts. However, Chibueze -et al. (2021, submitted) confirmed that the overall flux densities -obtained with MeerKAT and NVSS are in good agreement with -each other within errors of ∼ 5%. We compared the astrometry of -the discrete radio sources obtained with MeerKAT and NVSS in -Figure 1. The position uncertainty of the MeerKAT ranges from -0. -002 (close to the centre of the primary beam) to a few arcseconds -towards the edge of the primary beam. The scatter observed in -Figure 1 is mostly due to the probability of the centroids of emission -in the ∼4500 NVSS resolution being different from the centroids at -MeerKAT’s resolution and partly due to higher position uncertainty -of the fainter sources. Therefore, we conclude that our MeerKAT -data are well calibrated and the flux density and astrometry are as -accurate as the errors indicate. -3.1.1 Looking for persistent continuum emission associated with -the FRB fields -Considering the results of the astrometric comparison with NVSS -(see Figure 1), we considered potential associations of contin￾uum sources in the MeerKAT observations with the FRB loca￾MNRAS 000, 1–15 (2021) -MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 5 -tion to sources within 500. Using this spatial coincidence criterion, -we identified a persistent 1283 MHz continuum source near FRB -20190714A, detected in both the 14 September 2019 and the 28 -September 2019 epoch. The peak of the MeerKAT radio emission -is offset by ∼ 2 -00.1 from the peak of the 𝑖-band magnitude of the op￾tical galaxy identified in the Panoramic Survey Telescope and Rapid -Response System (PanSTARRS, located at Haleakala Observatory) -image (shown as contours in Figures 2 and 3). The MeerKAT ra￾dio source is offset by 1. -0068 from the localisation region of FRB -20190714 (cyan circle in Figures 2 and 3). -3.1.2 e-MERLIN detection of compact emission towards -FRB 20190714 -Compact persistent emission was detected in the 1.51 GHz e￾MERLIN image at R.A. = 12ℎ15𝑚55𝑠 -.116, Dec. = −13◦01014. -0048 -at 86 𝜇Jy beam−1 by e-MERLIN. The stochastic position uncer￾tainty is (0.04, 0.15) arcsec and the uncertainty (due to the sepa￾ration between phase-calibrator and target, and antenna position -uncertainty) is (0.013, 0.056) arcsec, giving a total astrometric -uncertainty of (0.04, 0.16) arcsec in R.A. and Dec., respectively. -The offset from the FRB position is negligible in R.A. and 1.2 -arcsec in Dec. The rms in this region (of full primary beam sen￾sitivity) is 20 𝜇Jy beam−1 -, making this a 4.3𝜎rms detection. It is -∼1.5𝜎rms higher than that of the MeerKAT detection. Although the -e-MERLIN flux scale nominal uncertainty is ∼5%, in these data it -is possibly higher due to the low declination of the phase-reference -source and to the strong RFI which were removed from the data -but may have affected the linearity of the receiver response. The -peak of the e-MERLIN radio emission is offset by ∼ 1. -004 from the -peak of the PanSTARRS 𝑖-band emission in Figures 2 and 3. The -e-MERLIN radio source (shown by the cyan cross in Figures 2 and -3) is offset by 0. -0053 from the localised position of FRB 20190714. -We estimate the probability of a chance alignment of a back￾ground persistent radio source and the host galaxy, following the -procedure of Eftekhari et al. (2018). Instead of using the FRB lo￾calisation region, we use the area of the galaxy, which is taken as -2 -00 × 2 -00, twice the half light radius from Heintz et al. (2020). Given -the source has a flux density of ∼ 90𝜇Jy we estimate the chance -alignment probability of 0.0008, which corresponds to 3.4𝜎. The -flux density threshold, assuming 3𝜎, for an unresolved radio source -is ∼ 15 𝜇Jy. If instead we consider the probability of detecting any -radio source above our flux density threshold of 15𝜇Jy, the probabil￾ity of a chance alignment is, therefore, approximately 0.8%, making -the statistical significance of our detection 2.6𝜎. This represents the -first detection of radio continuum emission associated with the host -(galaxy) of FRB 20190714A (see Figure 2 and 3). -3.1.3 MeerKAT non-detections -No continuum emission was detected near FRBs 20171019A and -20190711A. As each of the images of these sources has an rms -of ∼ 5 𝜇Jy beam−1 -, the 3𝜎 intensity upper limit of any emission -associated with FRBs 20171019A and 20190711A will be ∼ 15 𝜇Jy -beam−1 -(see Table 1). -Candidate pulses above a signal-to-noise (S/N) of 10 from the -single pulse search with MeerTRAP were visually inspected offline. -No new FRBs or repeat bursts from the known FRBs were detected -above a fluence threshold of 0.08 Jy ms assuming a 1 ms duration -burst. -3.2 Swift -The UVOT summed image is presented in Figure 4. The UVOT -field of view corresponds roughly to the uncertainty7 of the locali￾sation region of FRB 20171019A (RA = 7.50 -and DEC = 70 -). Using -uvotdetect, we find 30 sources above the 5𝜎 level and within the -FRB 20171019A uncertainty region. Using a 3 arcsec maximum -separation, which is slightly larger than the UVOT PSF (Breeveld -et al. 2010), these sources are cross-matched with known catalogue -sources. We find that out of the 30 sources detected by UVOT, 28 -are spatially coincident with stars catalogued in the SDSS catalogue -(DR12; Alam et al. 2015), and one source is coincident with a galaxy -(AGN broadline SDSS ID: 1237652599570890948 at 𝑧 ∼ 0.156). -This galaxy is also detected by the MeerKAT radio observations. We -use the NASA/IPAC Extragalactic Database (NED)8 -to search for -known galaxies in the FRB 20171019A uncertainty regions. We find -multiple galaxies with unknown redshifts, therefore we cannot draw -conclusions on the host galaxy from our observations. Using a 5000 -circular ON region centred on the position of FRB 20171019A and -a 5000 OFF region that does not contain any of the detected sources, -we run the uvotsource tool with a 5𝜎 background threshold and -obtain a flux upper limit of 1.4 × 10−16 erg cm−2 -s -−1Å -−1 without -applying a Calactic extinction correction. -The XRT summed image is shown in Figure 5. At the edge -of the field-of-view, we detect a source spatially coincident with -the Wolf 1561 star. As we consider this source unrelated to the -FRB, we use the online Swift-XRT data products generator (Evans -et al. 2007) (Evans et al. 2009) to derive upper limits in the 0.3- -10 keV range on the count rate of 0.001885 counts.s -−1 -. Using -WebPIMMS9 -(v4.11a) and assuming a weighted average 𝑁H = 5.12× -1020 cm−2 -from the direction of the source estimated from the -NASA’s HEASARC 10 online tools (HI4PI Collaboration et al. -2016) and a power law model with a photon index = 2, this upper -limit translates to an energy flux of 6.6×10−14 erg cm−2 -s -−1 -(8.3× -10−14 erg cm−2 -s -−1 unabsorbed). -3.3 H.E.S.S. -No significant gamma-ray excess above the expected background -is detected from the direction of FRB 20171019A, with 52 gamma -candidate events from the source region and 524 background event. -A second analysis using an independent event calibration and recon￾struction (Parsons & Hinton 2014) confirms this result. A search for -variable emission on timescales ranging from milliseconds to sev￾eral minutes with tools provided in (Brun et al. 2020) does not reveal -any variability above 2.2 𝜎. For the total data set of 1.8 h, 95% confi￾dence level (C. L.) upper limits on the photon flux are derived using -the method described by Rolke et al. (2005). The energy threshold -of the data is highly dependent on the zenith angle of the observa￾tions. For these observations, the zenith angles range from 15 to 25 -deg, which leads to an energy threshold for the stacked data set of -𝐸th = 120 GeV. The upper limit on the Very High Energy (VHE) -7 https://www.wis-tns.org/object/20171019a -8 https://ned.ipac.caltech.edu; NED is funded by the National -Aeronautics and Space Administration and operated by the California Insti￾tute of Technology -9 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3pimms/ -w3pimms.pl -10 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3nh/w3nh. -pl +arXiv:2201.00069v1 [astro-ph.HE] 31 Dec 202 +MNRAS 000, 1–15 (2021) Preprint 4 January 2022 Compiled using MNRAS LATEX style file v3.0 +ABSTRACT +We report on a search for persistent radio emission from the one-off Fast Radio Burst (FRB) +20190714A, as well as from two repeating FRBs, 20190711A and 20171019A, using the +MeerKAT radio telescope. For FRB 20171019A we also conducted simultaneous observations +with the High Energy Stereoscopic System (H.E.S.S.) in very high energy gamma rays and +searched for signals in the ultraviolet, optical, and X-ray bands. For this FRB, we obtain a UV +flux upper limit of 1.39×10−16 erg cm−2s +−1Å−1 +, X-ray limit of ∼ 6.6×10−14 erg cm−2s +−1 +and +a limit on the very-high-energy gamma-ray flux Φ(𝐸 > 120 GeV) < 1.7 × 10−12 erg cm−2s +−1 +. +We obtain a radio upper limit of ∼15𝜇Jy beam−1for persistent emission at the locations of both +FRBs 20190711A and 20171019A, but detect diffuse radio emission with a peak brightness +of ∼53𝜇Jy beam−1associated with FRB 20190714A at 𝑧 = 0.2365. This represents the first +detection of the radio continuum emission potentially associated with the host (galaxy) of FRB +20190714A, and is only the third known FRB to have such an association. Given the possible +association of a faint persistent source, FRB 20190714A may potentially be a repeating FRB +whose age lies between that of FRB 20121102A and FRB 20180916A. A parallel search for +repeat bursts from these FRBs revealed no new detections down to a fluence of 0.08 Jy ms for +a 1 ms duration burst. +Key words: fast radio bursts – radio continuum: galaxies – radiation mechanisms: non-thermal +1 INTRODUCTION +Fast radio bursts (FRBs) are luminous transients that last for microseconds to milliseconds and occur at extragalactic to cosmological distances (e.g. Lorimer et al. 2007; Thornton et al. 2013; +Macquart et al. 2020). The estimated high radio luminosities and +associated brightness temperatures required to produce these shorttimescale energetic events at such distances are what makes them +intriguing (Petroff et al. 2021;Caleb & Keane 2021). They have been +observed to emit from ∼ 110 MHz − 8 GHz, though not yet across +a wide and continuous frequency band due to the variable bandlimited spectra of the single pulses. Over 600 FRBs have been discovered1 of which ∼ 20 have been seen to repeat, and it is presently +uncertain whether they all do (Caleb et al. 2019; James et al. 2020). +The extraordinary observed characteristics of the repeating and nonrepeating FRBs have led to various progenitor models with the bulk +of them favouring neutron stars. Progenitor theories include binary +neutron star mergers and collisions (Totani 2013; Yamasaki et al. +2018), giant pulses from extragalactic pulsars (Cordes & Wasserman 2016; Popov & Pshirkov 2016), hyperflares and giant flares +from magnetars (Popov & Postnov 2013; Popov et al. 2018), binary +white dwarf mergers (Kashiyama et al. 2013), neutron star “combing" (Zhang 2018) and interactions of neutron stars with active +galactic nuclei (Vieyro et al. 2017) (see Platts et al. (2019) for a list +of potential progenitors). Some of these models predict radio afterglows accompanying an FRB with timescales of days to years. Liu +et al. (2016) propose that the merger of a Kerr-Newman black hole +binary is one of the plausible central engines for FRBs and their +afterglows. Dai et al. (2017), however, suggest that the persistent +emission is due to an ultra-relativistic pulsar wind nebula sweeping +up its ambient medium with FRBs repeatedly produced through one +of several potential mechanisms. In the magnetar model by Margalit +et al. (2019), FRBs produced by binary neutron star mergers and +accretion induced collapse are expected to be accompanied by persistent radio continuum emission on timescales of months to years. +★ james.chibueze@nwu.ac.za +† manisha.caleb@manchester.ac.uk +1 https://www.wis-tns.org/ +The persistent emission is powered by the nebula of relativistic electrons and magnetic fields inflated by the magnetar flares (Margalit +et al. 2019). The existence of persistent emission associated with +FRBs could provide vital clues to their origin. Moreover, potential +candidates and models for FRB progenitors predict counterparts +in the X-ray an TeV bands. For example, a model by Lyubarsky +(2014) predicts millisecond outbursts of TeV emission accompanying FRBs from magnetars. In 2020, FRB 20200428 was discovered +for the first time from a galactic magnetar, SGR 1935+2154. Furthermore, an X-ray counterpart to this FRB was deteced for the first +time by several instruments (Tavani et al. 2021; Ridnaia et al. 2021; +Mereghetti et al. 2020; Insight-HXMT 2020). +Of the 19 FRBs that have been associated with host galaxies2 +, only the sub-arcsecond localisation of the repeating FRB +20121102A to a host galaxy at a redshift of 𝑧 = 0.19273 ± 0.0008 +(Tendulkar et al. 2017; Bassa et al. 2017) showed that it is physically associated with a compact (≤ 0.7 pc), persistent radio source +of luminosity 𝜈𝐿𝜈 ∼ 1039 erg s−1at a few GHz (Marcote et al. +2017). This source is detectable from 300 MHz – 26 GHz (Resmi +et al. 2020; Chatterjee et al. 2017) and is seen to exhibit ∼ 10% variability on day timescales. In contrast, a similar sub-milliarcsecond +localisation of another repeating FRB 20180916B to a nearby massive spiral galaxy at 𝑧 = 0.0337 ± 0.0002 (Marcote et al. 2020) +showed no associated persistent radio emission. This places a strong +upper limit on the persistent source luminosity of 𝜈𝐿𝜈 . 7.6×1035 +erg s−1at 1.6 GHz, which is three orders of magnitude lower than +that of FRB 20121102A. Recently, the CHIME/FRB collaboration +announced heightened activity in the repeating FRB 20201124A +(Chime/FRB Collaboration 2021), which was localised to a host +galaxy at a redshift of 𝑧 = 0.0979 ± 0.0001 (Fong et al. 2021). +Persistent radio emission was detected by the upgraded Giant Metrewave Radio Telescope (uGMRT) (Wharton et al. 2021) and the +Karl G. Jansky Very Large Array (JVLA) (Ricci et al. 2021) on +angular scales of a few arcseconds, but resolved out to scales of +∼ 0.1 arcseconds with the European VLBI Network (Marcote et al. +2021). +Localisations of four one-off FRBs through imaging of +2 https://frbhosts.org/ +© 2021 The Authors +MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 3 +buffered raw voltage data at 1.4 GHz (Bannister et al. 2019; +Prochaska et al. 2019; Macquart et al. 2020) by the Australian +SKA Pathfinder (ASKAP) telescope did not yield persistent radio +continuum emission from the host galaxies (Bhandari et al. 2020). +Australian Telescope Compact Array (ATCA) observations of FRBs +20180924B, 20181112A, 20190102C and 20190608B were conducted at a centre frequency of 6.5 GHz. No persistent emission as +luminous as the one associated with FRB 20121102A was detected +for the ASKAP FRBs (Bhandari et al. 2020). While the true age of +FRB 121102A is unknown, models based on polarization studies +predict the age to be ∼ 6 − 17 years (Hilmarsson et al. 2021). It is +possible that younger, more active FRBs like FRB 20121102A are +associated with persistent radio emission while the emission might +have faded over time for the older ones. The possibility of repeating +FRBs not being so uncommon after all (Ravi 2019) along with the +increasing arcsecond localisations suggests that we are entering an +era where we can begin to look for evidence of multiple classes by +studying FRB host galaxies and multi-wavelength counterparts. +In this paper, we report on the search for persistent radio emission in the host galaxies of one apparent one-off source (FRB +20190714A) and two repeating sources (FRBs 20171019A and +20190711A) (Kumar et al. 2019, 2021) using MeerKAT. In case +of the latter, we also conducted simultaneous observations with the +High Energy Stereoscopic System (H.E.S.S.) in very high energy +gamma rays. In addition, we searched for signals in the ultraviolet, +optical, and X-ray bands. The paper is structured as follows. In Section 2, we discuss our observations and data reduction; in Section 3, +we discuss the single radio continuum detection and derived multiwavelength upper limits. Our discussion and conclusions follow in +Section 4 and 5. +2 OBSERVATIONS AND DATA REDUCTION +2.1 MeerKAT observations +The MeerKAT 64-parabolic-dish array (Jonas & MeerKAT Team +2016; Mauch et al. 2020) is located in the Northern Karoo desert +near Carnarvon, South Africa. Each “offset Gregorian" parabolic +dish antenna has an effective diameter of 13.5 m. The inner core of +the array contains 48 of the 64 dishes in a 1 km radius, while the +remaining 16 dishes are spread outward up to 8 km. The shortest and +longest baselines of the MeerKAT array are 29 m and 8 km, respectively, providing angular scales of 500 to 270 +at the central frequency, +of the L-band receiver used here, of 1283 MHz. Multi-epoch observations of the FRB fields were conducted with the MeerKAT +array (Project ID: SCI-20190418-VC-01) at L-band (856 MHz to +1712 MHz). Details of the MeerKAT observations are presented +in Table 1. Only Stokes I (total intensity) of the MeerKAT observations are considered in this paper. The data correlation was +done with the SKARAB correlator (Hickish et al. 2016) in 4k mode +which gives 4096 channels across the 856 MHz bandwidth resulting +in a frequency resolution of ∼209 kHz. The data were reduced using the semi-automated MeerKAT data analysis pipelines - 𝑜𝑥𝑘𝑎𝑡3 +(Heywood 2020). +2.1.1 Imaging analysis +The 𝑜𝑥𝑘𝑎𝑡 pipeline employs a collection of publicly available radio interferometry data reduction software. The final data prod3 https://ascl.net/code/v/2627 +ucts, including reduced and calibrated visibility data (including +self-calibration), continuum (including sub-band) images as well +as diagnostic plots, are provided by the pipeline. The customary +configuration of the 𝑜𝑥𝑘𝑎𝑡 pipeline incorporates flagging, crosscalibration and self-calibration processes. In the flagging process, +the low-gain bandpass edges (856 MHz to 880 MHz and 1658 MHz +to 1800 MHz) are flagged on all baselines, along with the location of +the Galactic neutral hydrogen line at 1419.8 MHz to 1421.3 MHz. +Several other radio frequency interference (RFI) prone regions of +the spectrum are then flagged on baselines shorter than 600 m. +Then, other possible RFI affected data are flagged out using the +CASA routines rflag and tfcrop for the calibrators, and using the +tricolour package for the target fields. +The cross-calibration steps using 𝑜𝑥𝑘𝑎𝑡 were standard, including setting the flux scale and deriving corrections for residual delay +calibration, bandpass and time-varying gain. The 𝑜𝑥𝑘𝑎𝑡 pipeline +uses the customary tasks from the CASA (McMullin et al. 2007) +suite for cross-calibration. After applying all the corrections to the +target field, we channel-averaged the dataset by a factor of five channels before splitting out the science target. This is consistent with our +science goals, since the relic sources we target are in the central parts +of our fields, reducing the effect of smearing through the channel +averaging. To deconvolve and image the target data, the WSClean +imager (Offringa et al. 2014) was used, with the multiscale and +wideband deconvolution algorithms enabled to better allow imaging of the diffuse emission present in the our fields. Deconvolution +was performed in ten sub-band images of each 82 MHz wide-band. +WSClean generates the multi-frequency synthesis (MFS) map, in +joined-channel deconvolution mode, with a central frequency of +1283 MHz. In other words, the MFS map is a full bandwidth map. +In WSClean, each of the sub-bands is deconvolved separately with +an initially high mask of 20𝜎rms (using the auto masking function +provided by WSClean), to generate an artefact-free model of the +target field for the self-calibration process. This masking threshold +was iteratively reduced to a value of 3𝜎rms in the final iteration +of imaging. The 𝑜𝑥𝑘𝑎𝑡 pipeline uses the customary tasks from the +Cubical software (Kenyon et al. 2018) for self-calibration. +2.1.2 Single pulse searches +In addition to obtaining correlated data, the output data stream of +the F-engine are captured, delay corrected, phased and channelised +before being sent over the central beamforming network to the beamforming User Supplied Equipment (FBFUSE) that was designed and +developed at the Max Planck Institute for Radio Astronomy in Bonn. +For this project, FBFUSE combined the data into 764 total-intensity +tied-array beams which were used to populate the primary beam of +∼ 1 deg2 of the array. The data are then captured at 306.24 μs time +resolution by the Transient User Supplied Equipment (TUSE), a +real-time transient detection backend instrument developed by the +MeerTRAP4team at the University of Manchester. More details on +TUSE will be presented in an upcoming paper (Stappers et al. in +prep). The GPU-based single pulse search pipeline AstroAccelerate5 +(Dimoudi & Armour 2015; Adámek & Armour 2016; Adámek +et al. 2017; Dimoudi et al. 2018; Adámek & Armour 2019) was used +to search for bursts in real-time after incoherently de-dispersing the +data in the DM range 0–5118.4 pc cm−3(see Caleb et al. 2020, for +more details). +4 https://www.meertrap.org/ +5 https://github.com/AstroAccelerateOrg/astro-accelerate MNRAS 000, 1–15 (2021) -6 Chibueze et al. -Figure 1. Astrometric comparison between MeerKAT and NVSS discrete compact sources.The open circles represent the difference in position between the -MeerKAT and NVSS sources. -gamma-ray flux above that threshold and assuming an energy depen￾dence following 𝐸 -−2 -is Φ(𝐸 > 120 GeV) < 2.10 × 10−12 cm−2 -s -−1 -or Φ(𝐸 > 120 GeV) < 1.7 × 10−12 erg cm−2 -s -−1 -. A variation of -± 0.5 of the assumed spectral index leads to a variation in the upper -limit of less than ± 19%. A map of energy flux upper limits covering -the full region accessible within the H.E.S.S. field of view above -120 GeV is given in Figure 6. -4 DISCUSSION -Of the targeted FRB fields reported here, only FRB 20190714A -is observed to be spatially coincident with a persistent radio con￾tinuum source. We obtain an upper limit of ∼ 15 𝜇Jy beam−1 -for -FRBs 20190711A and 20171019A, respectively, and a peak inten￾sity of ∼ 53 𝜇Jy beam−1 -for the emission coincident with FRB -20190714A. This source is detected at both epochs with similar -intensities within the measured rms of the images (see Tables 1 and -2 for details). The values in the Table 2 are derived by carrying -out 2D Gaussian fit using similar ellipses enclosing the detected -persistent emission. The average flux density is ∼ 3 times less than -that of the persistent source associated with FRBs 20121102A, one -of the most prolific repeaters, located at 𝑧 = 0.19273(8). Persistent -radio emission from FRB 20201124A was detected by the uGMRT -(Wharton et al. 2021) and the JVLA (Ricci et al. 2021) on angular -scales of a few arcseconds. However, it is resolved out at scales of -∼ 0.1 arcseconds with the European VLBI Network (Marcote et al. -2021) suggesting that it is not a compact source directly associated -with the FRB. In contrast, the other localised, prolific repeating -FRB 20180916A has no persistent radio counterpart. -In the image in Figure 3 one can see that the persistent radio -source lies at the edge of the optical extent of the host galaxy -as seen in PanSTARRS observations (Heintz et al. 2020). Our -derived 1283 MHz peak position with MeerKAT places it just -1. -0068 away from the position of FRB 20190714A (𝛼𝐽2000, 𝛿𝐽2000 -= 12ℎ15𝑚55𝑠 -.12, -13◦01015. -0070; Heintz et al. 2020). The posi￾tional uncertainty on the FRB position is 0. -00283. Similarly, the peak -1.51 GHz e-MERLIN position of the persistent radio source is sepa￾rated from the position of FRB 20190714A by 0. -0053. The persistent -source near FRB 20190714A has a flux broadly consistent with the -MeerKAT flux and is unresolved on the e-MERLIN baselines. The + Chibueze et al. +2.2 e-MERLIN Observations +To constrain the position of the persistent continuum emission +associated with FRB 20190714A, we conducted L-band (centre +frequency of 1.51 GHz) observations of the target with the enhanced Multi-Element Remote-Linked Interferometer Network, eMERLIN array in the United Kingdom (project code: CY10003) +on 13 January, 2021 (see Section 3.1.2). Six antennas were used +including the 75-m Lovell telescope and the target pointing centre was R.A. = 12ℎ15𝑚55𝑠 +.12, Dec. = −13◦01015. +007. 1407+2827 +was used as the bandpass calibrator, 1331+3030 as the flux calibrator and 1216−1033 as the phase calibrator. The angular separation between the target and the phase calibrator is 2.47◦ +. The +data reduction was done following standard e-MERLIN calibration procedures6 with additional flagging of bad visibilities followed by imaging. We found two confusing sources in the field, +at R.A. = 12ℎ15𝑚44𝑠.669, Dec. = −12◦57059. +0056 and R.A. = +12ℎ15𝑚37𝑠.216, Dec. = −13◦09033. +0044 at 4.10 +and 9.40from the +pointing centre, respectively. They had apparent flux densities of 4 +and 1.3 mJy without primary beam correction. We used these for +self-calibration of the field and then subtracted them before final +imaging. The final image synthesized beam is 0. +0065 × 0.0015, position angle 15◦ +elongated in the Declination direction due to the low +target elevation from the UK. +2.3 The Swift satellite: UVOT and XRT observations +Neil Gehrels Swift Observatory (Swift) is a multi-wavelength NASA +space mission operating in soft-X-rays and optical/UV. Here we +use data from the X-ray Telescope (XRT) (Burrows et al. 2005) +which operates in the soft X-ray domain of 0.3 − 10 keV as well as +data taken by the UV/Optical Telescope (UVOT) (Roming et al. +2005) operating in the UV to optical domain (170 − 600 nm). +During the FRB 20171019A multi-wavelength (MWL) observing +campaign, two 2 ks target-of-opportunity (ToO) observations were +performed with Swift from 2019-09-28 18:37:02 to 2019-09-28 +21:52:54 and 2019-10-18 18:03:00 to 2019-10-18 20:03:00 on the +FRB 20171019A localisation region. Simultaneously with SwiftXRT, five UVOT images were taken with the UVM2 filter (central +wavelengh = 2246 Å) over the 2 epochs with a total exposure of 4 ks. +The images are aspect-corrected and summed with the uvotimsum +tool (HEASOFT 6.26). Observations were performed with SwiftXRT in the standard Photon Counting observing mode (PC). The +XRT PC data are processed with xrtpipeline (HEASOFT 6.26). +A summed image is extracted with xselect. +2.4 Very-high energy gamma-ray observations with H.E.S.S. +Observations of FRB 20171019A were also obtained in the veryhigh energy gamma-ray domain with the H.E.S.S. imaging atmospheric Cherenkov telescope array, sensitive in the range between a +few tens of GeVs and 100 TeV. H.E.S.S. is located on the Khomas +Highland plateau of Namibia (23◦1601800 South, 16◦3000000 East), +at an elevation of ∼1800 m above sea level. Observations took place +contemporaneously to the first epoch of MeerKAT observations of +FRB 20171019A described above. The data set was obtained with +the H.E.S.S. phase II array, including the upgraded 12 m-diameter +CT1-4 telescopes (Ashton et al. 2020) and the large 28 m-diameter +6 https://github.com/e-merlin/eMERLIN_CASA_pipeline +CT5 telescope (Bolmont et al. 2014). A standard data quality selection was applied to the data (Aharonian et al. 2006). The events have +then been selected and their direction and energy reconstructed using a log-likelihood minimization comparing the recorded shower +images of all triggered telescopes (requiring at least two telescopes +to see the same gamma-ray event) to a semi-analytical model of air +showers (de Naurois & Rolland 2009). +We define a circular region-of-interest centered on the position +of FRB 20171019A with a radius of 0.12◦, optimal for a point-like +source of emission as expected from FRB 20171019A. The background level in this ON region was determined using the standard +“ring background” technique (Berge et al. 2007) based on a radially +symmetric ring around the source position. This technique allows us +to derive the background level from the same field of view and assures that the gamma-ray signal and background are estimated with +the same acceptance and under the same observation conditions. +3 RESULTS +3.1 MeerKAT +The theoretical thermal noise of the MeerKAT can be calculated as +𝑆rms = +1 +𝜂𝑐 +SEFD +√︃ +𝑛pol × 𝑁(𝑁 − 1) × Δ𝜈 × 𝑡int +. (1) +The system equivalent flux density (SEFD) of MeerKAT at the +1.28 GHz is 443 Jy and 𝜂𝑐 is the correlator efficiency. We used 𝑛pol += 2 polarisation products (XX and YY), N = 64 telescopes, Δ𝜈 = +856 MHz bandwidth and 𝑡int = 21600 sec observing time for one +epoch. This gives the theoretical rms of ∼ 2 𝜇Jy beam−1. The typical +image rms obtained from our residual images is ∼ 5 𝜇Jy beam−1, +which is 2.5 times the expected theoretical rms. The wideband MFS +image does not allow primary beam correction procedure as this can +only be done on the sub-band images with limited rms for detection +of the sources. However, our sources are the phase centres of our +fields and thus unaffected by the effect of the primary beam. +Due to the lack of MeerKAT primary beam correction, we +did not compare the flux densities of the discrete sources with +their NRAO (National Radio Astronomy Observatory) VLA (Very +Large Array) Sky Survey (NVSS) counterparts. However, Chibueze +et al. (2021, submitted) confirmed that the overall flux densities +obtained with MeerKAT and NVSS are in good agreement with +each other within errors of ∼ 5%. We compared the astrometry of +the discrete radio sources obtained with MeerKAT and NVSS in +Figure 1. The position uncertainty of the MeerKAT ranges from +0. +002 (close to the centre of the primary beam) to a few arcseconds +towards the edge of the primary beam. The scatter observed in +Figure 1 is mostly due to the probability of the centroids of emission +in the ∼4500 NVSS resolution being different from the centroids at +MeerKAT’s resolution and partly due to higher position uncertainty +of the fainter sources. Therefore, we conclude that our MeerKAT +data are well calibrated and the flux density and astrometry are as +accurate as the errors indicate. +3.1.1 Looking for persistent continuum emission associated with +the FRB fields +Considering the results of the astrometric comparison with NVSS +(see Figure 1), we considered potential associations of continuum sources in the MeerKAT observations with the FRB locaMNRAS 000, 1–15 (2021) +MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 5 +tion to sources within 500. Using this spatial coincidence criterion, +we identified a persistent 1283 MHz continuum source near FRB +20190714A, detected in both the 14 September 2019 and the 28 +September 2019 epoch. The peak of the MeerKAT radio emission +is offset by ∼ 2 +00.1 from the peak of the 𝑖-band magnitude of the optical galaxy identified in the Panoramic Survey Telescope and Rapid +Response System (PanSTARRS, located at Haleakala Observatory) +image (shown as contours in Figures 2 and 3). The MeerKAT radio source is offset by 1. +0068 from the localisation region of FRB +20190714 (cyan circle in Figures 2 and 3). +3.1.2 e-MERLIN detection of compact emission towards +FRB 20190714 +Compact persistent emission was detected in the 1.51 GHz eMERLIN image at R.A. = 12ℎ15𝑚55𝑠 +.116, Dec. = −13◦01014. +0048 +at 86 𝜇Jy beam−1 by e-MERLIN. The stochastic position uncertainty is (0.04, 0.15) arcsec and the uncertainty (due to the separation between phase-calibrator and target, and antenna position +uncertainty) is (0.013, 0.056) arcsec, giving a total astrometric +uncertainty of (0.04, 0.16) arcsec in R.A. and Dec., respectively. +The offset from the FRB position is negligible in R.A. and 1.2 +arcsec in Dec. The rms in this region (of full primary beam sensitivity) is 20 𝜇Jy beam−1 +, making this a 4.3𝜎rms detection. It is +∼1.5𝜎rms higher than that of the MeerKAT detection. Although the +e-MERLIN flux scale nominal uncertainty is ∼5%, in these data it +is possibly higher due to the low declination of the phase-reference +source and to the strong RFI which were removed from the data +but may have affected the linearity of the receiver response. The +peak of the e-MERLIN radio emission is offset by ∼ 1. +004 from the +peak of the PanSTARRS 𝑖-band emission in Figures 2 and 3. The +e-MERLIN radio source (shown by the cyan cross in Figures 2 and +3) is offset by 0. +0053 from the localised position of FRB 20190714. +We estimate the probability of a chance alignment of a background persistent radio source and the host galaxy, following the +procedure of Eftekhari et al. (2018). Instead of using the FRB localisation region, we use the area of the galaxy, which is taken as +2 +00 × 200, twice the half light radius from Heintz et al. (2020). Given +the source has a flux density of ∼ 90𝜇Jy we estimate the chance +alignment probability of 0.0008, which corresponds to 3.4𝜎. The +flux density threshold, assuming 3𝜎, for an unresolved radio source +is ∼ 15 𝜇Jy. If instead we consider the probability of detecting any +radio source above our flux density threshold of 15𝜇Jy, the probability of a chance alignment is, therefore, approximately 0.8%, making +the statistical significance of our detection 2.6𝜎. This represents the +first detection of radio continuum emission associated with the host +(galaxy) of FRB 20190714A (see Figure 2 and 3). +3.1.3 MeerKAT non-detections +No continuum emission was detected near FRBs 20171019A and +20190711A. As each of the images of these sources has an rms +of ∼ 5 𝜇Jy beam−1, the 3𝜎 intensity upper limit of any emission +associated with FRBs 20171019A and 20190711A will be ∼ 15 𝜇Jy +beam−1(see Table 1). +Candidate pulses above a signal-to-noise (S/N) of 10 from the +single pulse search with MeerTRAP were visually inspected offline. +No new FRBs or repeat bursts from the known FRBs were detected +above a fluence threshold of 0.08 Jy ms assuming a 1 ms duration +burst. +3.2 Swift +The UVOT summed image is presented in Figure 4. The UVOT +field of view corresponds roughly to the uncertainty7 of the localisation region of FRB 20171019A (RA = 7.50 +and DEC = 70). Using +uvotdetect, we find 30 sources above the 5𝜎 level and within the +FRB 20171019A uncertainty region. Using a 3 arcsec maximum +separation, which is slightly larger than the UVOT PSF (Breeveld +et al. 2010), these sources are cross-matched with known catalogue +sources. We find that out of the 30 sources detected by UVOT, 28 +are spatially coincident with stars catalogued in the SDSS catalogue +(DR12; Alam et al. 2015), and one source is coincident with a galaxy +(AGN broadline SDSS ID: 1237652599570890948 at 𝑧 ∼ 0.156). +This galaxy is also detected by the MeerKAT radio observations. We +use the NASA/IPAC Extragalactic Database (NED)8to search for +known galaxies in the FRB 20171019A uncertainty regions. We find +multiple galaxies with unknown redshifts, therefore we cannot draw +conclusions on the host galaxy from our observations. Using a 5000 +circular ON region centred on the position of FRB 20171019A and +a 5000 OFF region that does not contain any of the detected sources, +we run the uvotsource tool with a 5𝜎 background threshold and +obtain a flux upper limit of 1.4 × 10−16 erg cm−2s +−1Å−1 without +applying a Calactic extinction correction. +The XRT summed image is shown in Figure 5. At the edge +of the field-of-view, we detect a source spatially coincident with +the Wolf 1561 star. As we consider this source unrelated to the +FRB, we use the online Swift-XRT data products generator (Evans +et al. 2007) (Evans et al. 2009) to derive upper limits in the 0.310 + keV range on the count rate of 0.001885 counts.s +−1 +. Using +WebPIMMS9(v4.11a) and assuming a weighted average 𝑁H = 5.12× +1020 cm−2from the direction of the source estimated from the +NASA’s HEASARC 10 online tools (HI4PI Collaboration et al. +2016) and a power law model with a photon index = 2, this upper +limit translates to an energy flux of 6.6×10−14 erg cm−2s +−1 +(8.3× +10−14 erg cm−2s +−1 unabsorbed). +3.3 H.E.S.S. +No significant gamma-ray excess above the expected background +is detected from the direction of FRB 20171019A, with 52 gamma +candidate events from the source region and 524 background event. +A second analysis using an independent event calibration and reconstruction (Parsons & Hinton 2014) confirms this result. A search for +variable emission on timescales ranging from milliseconds to several minutes with tools provided in (Brun et al. 2020) does not reveal +any variability above 2.2 𝜎. For the total data set of 1.8 h, 95% confidence level (C. L.) upper limits on the photon flux are derived using +the method described by Rolke et al. (2005). The energy threshold +of the data is highly dependent on the zenith angle of the observations. For these observations, the zenith angles range from 15 to 25 +deg, which leads to an energy threshold for the stacked data set of +𝐸th = 120 GeV. The upper limit on the Very High Energy (VHE) +7 https://www.wis-tns.org/object/20171019a +8 https://ned.ipac.caltech.edu; NED is funded by the National +Aeronautics and Space Administration and operated by the California Institute of Technology +9 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3pimms/ +w3pimms.pl +10 https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3nh/w3nh. +pl MNRAS 000, 1–15 (2021) -MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 7 -Figure 2. FRB 20190714A MeerKAT epoch I image (top) and a zoom-in (bottom) around the position of the FRB indicated by the cyan circle. White contours -(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝑖-band optical counterpart coincident in position with the persistent radio emission. The -white ellipse in the bottom left corner represents the beam size of MeerKAT. The cyan cross indicates the position of the detected compact emission in our -e-MERLIN observations. + Chibueze et al. +Figure 1. Astrometric comparison between MeerKAT and NVSS discrete compact sources.The open circles represent the difference in position between the +MeerKAT and NVSS sources. +gamma-ray flux above that threshold and assuming an energy dependence following 𝐸 +−2 +is Φ(𝐸 > 120 GeV) < 2.10 × 10−12 cm−2s +−1 +or Φ(𝐸 > 120 GeV) < 1.7 × 10−12 erg cm−2 +s +−1 +. A variation of +± 0.5 of the assumed spectral index leads to a variation in the upper +limit of less than ± 19%. A map of energy flux upper limits covering +the full region accessible within the H.E.S.S. field of view above +120 GeV is given in Figure 6. +4 DISCUSSION +Of the targeted FRB fields reported here, only FRB 20190714A +is observed to be spatially coincident with a persistent radio continuum source. We obtain an upper limit of ∼ 15 𝜇Jy beam−1 +for +FRBs 20190711A and 20171019A, respectively, and a peak intensity of ∼ 53 𝜇Jy beam−1 +for the emission coincident with FRB +20190714A. This source is detected at both epochs with similar +intensities within the measured rms of the images (see Tables 1 and +2 for details). The values in the Table 2 are derived by carrying +out 2D Gaussian fit using similar ellipses enclosing the detected +persistent emission. The average flux density is ∼ 3 times less than +that of the persistent source associated with FRBs 20121102A, one +of the most prolific repeaters, located at 𝑧 = 0.19273(8). Persistent +radio emission from FRB 20201124A was detected by the uGMRT +(Wharton et al. 2021) and the JVLA (Ricci et al. 2021) on angular +scales of a few arcseconds. However, it is resolved out at scales of +∼ 0.1 arcseconds with the European VLBI Network (Marcote et al. +2021) suggesting that it is not a compact source directly associated +with the FRB. In contrast, the other localised, prolific repeating +FRB 20180916A has no persistent radio counterpart. +In the image in Figure 3 one can see that the persistent radio +source lies at the edge of the optical extent of the host galaxy +as seen in PanSTARRS observations (Heintz et al. 2020). Our +derived 1283 MHz peak position with MeerKAT places it just +1. +0068 away from the position of FRB 20190714A (𝛼𝐽2000, 𝛿𝐽2000 += 12ℎ15𝑚55𝑠.12, -13◦01015. +0070; Heintz et al. 2020). The positional uncertainty on the FRB position is 0. +00283. Similarly, the peak +1.51 GHz e-MERLIN position of the persistent radio source is separated from the position of FRB 20190714A by 0. +0053. The persistent +source near FRB 20190714A has a flux broadly consistent with the +MeerKAT flux and is unresolved on the e-MERLIN baselines. The MNRAS 000, 1–15 (2021) -8 Chibueze et al. -Figure 3. FRB 20190714A MeerKAT epoch II image (top) and a zoom-in (bottom) around the position of the FRB indicated by the cyan circle. White contours -(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝑖-band optical counterpart coincident in position with the persistent radio emission. The -white ellipse in the bottom left corner represents the beam size of MeerKAT. The cyan cross indicates the position of the detected compact emission in our -e-MERLIN observations. +MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 7 +Figure 2. FRB 20190714A MeerKAT epoch I image (top) and a zoom-in (bottom) around the position of the FRB indicated by the cyan circle. White contours +(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝑖-band optical counterpart coincident in position with the persistent radio emission. The +white ellipse in the bottom left corner represents the beam size of MeerKAT. The cyan cross indicates the position of the detected compact emission in our +e-MERLIN observations. MNRAS 000, 1–15 (2021) -MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 9 -Figure 4. UVOT summed image of FRB 20171019A region taken during the MWL observation campaign in September-October 2019. The white circles -indicate sources detected above 5𝜎. The cyan dot denotes the location of FRB 20171019A, the circle around it indicates the region used to derive the upper -limits while the magenta region indicates the background region used. The green box indicates FRB 20171019A 90% localisation region as reported in Kumar -et al. (2019). -Table 1. Details of the FRB fields observed with MeerKAT. -Field name Observation date Synthesized beam rms (𝜇Jy beam−1 -) Detected? -FRB 20171019A 28 September 2019 – No (calibration failure) -FRB 20171019A 18 October 2019 6. -008 × 5. -000 5.2 < 15𝜇Jy beam−1 -FRB 20190711A 23 August 2019 11. -007 × 4. -009 4.9 < 15𝜇Jy beam−1 -FRB 20190711A 09 September 2019 12. -005 × 4. -009 4.6 < 15𝜇Jy beam−1 -FRB 20190714A 14 September 2019 7. -001 × 6. -002 4.2 54.4 𝜇Jy beam−1 -FRB 20190714A 28 September 2019 6. -005 × 5. -001 5.8 52.0 𝜇Jy beam−1 -Table 2. Details of the radio continuum source associated with FRB 20190714A. -Field name Observation date Telescope 𝜈centre (GHz) 𝛼J2000 𝛿J2000 Maj. × min. axis Pos. angle Int. flux density -FRB 20190714A 28 September 2019 MeerKAT 1.283 12ℎ15𝑚55𝑠 -.154 -13◦01017. -0030 9. -006 × 7. -004 88.7◦ 87.4 𝜇Jy -FRB 20190714A 18 October 2019 MeerKAT 1.283 12ℎ15𝑚55𝑠 -.193 −13◦01017. -0018 8. -002 × 6. -004 12.2◦ 80.7 𝜇Jy -FRB 20190714A 13 January 2021 e-MERLIN 1.510 12ℎ15𝑚55𝑠 -.116 −13◦01014. -0051 0. -0015 × 0. -0065 17.6◦ 107.5 𝜇Jy -large offset from the centre of the galaxy makes the persistent source -unlikely to be an AGN. So far this FRB has not been seen to repeat. -Higher resolution imaging will be required to be certain of a direct -association of the persistent source with the FRB. We did not have -sufficient sensitivity in the sub-band images, thus, we are unable to -derive the spectral index of the emission of the host galaxy. -Our e-MERLIN observations probe a different spatial -scale than the size of the persistent radio source associated -with FRB 20121102A. At the angular diameter distance of + Chibueze et al. +Figure 3. FRB 20190714A MeerKAT epoch II image (top) and a zoom-in (bottom) around the position of the FRB indicated by the cyan circle. White contours +(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝑖-band optical counterpart coincident in position with the persistent radio emission. The +white ellipse in the bottom left corner represents the beam size of MeerKAT. The cyan cross indicates the position of the detected compact emission in our +e-MERLIN observations. MNRAS 000, 1–15 (2021) -10 Chibueze et al. -Figure 5. XRT summed image of FRB 20171019A region taken during the MWL observation campaign in September - October 2019. The position of the -Wolf 1561 star is shown in cyan and is labelled. The green box indicates FRB 20171019A 90% localisation region as reported in Kumar et al. (2019). -FRB 20190714A (780 Mpc), an unresolved source with an an￾gular size of 0. -006 corresponds to a physical extent of .2.3 kpc. The -uGMRT reported the detection of an unresolved radio emission at -650 MHz with a flux density of 700±100 𝜇Jy (Wharton et al. 2021), -while the JVLA detected persistent emission with a flux density of -340 ± 30 𝜇Jy at 3 GHz (Ricci et al. 2021). Assuming the estimated -spectral index between these frequencies (∼ −0.5, Ricci et al. 2021), -the 1.3 GHz flux density would be ∼ 500 𝜇Jy (similar to the 3-𝜎 -upper limit on observations from 1 − 2 GHz; Law et al. 2021). The -flux density we measured for FRB 20190714A is a factor of ∼10 -lower than FRB20201124A, but FRB 20190714A is also a factor -2.6 more distant. Therefore, the flux densities would be comparable -if they were at similar distances. -Given the resolution of MeerKAT we are unable to defini￾tively state whether the persistent emission is associated with a -star-forming region or the FRB itself. However, the increased reso￾lution with the e-MERLIN baselines would tend to favour a compact -source similar to the one observed in FRB 20121102A. One of the -leading models to explain the bursts from, and radio counterpart -to FRB 20121102A, is a young nebula powered flaring magnetar -embedded in a 20–50 year-old supernova remnant (Beloborodov -2017; Metzger et al. 2019). The lack of a bright persistent radio -source associated with the repeater FRB 20180916A suggests that -it is comparatively older at & 200 − 500 years and the persistent -radio source may have faded. In the model by Metzger et al. (2019), -the nebula is suggested to contribute significantly to the rotation -measure and dispersion measure (DM), as well as to the persis￾tent radio luminosity. These values are expected to decrease on a -timescale of a few decades to centuries. Given the association of a -comparatively fainter persistent source, FRB 20190714A may po￾tentially be a repeating FRB whose age lies between that of FRB -20121102A and FRB 20180916A. Millisecond magnetars formed -through standard astrophysical channels such as hydrogen poor su￾perluminous supernovae and long duration gamma-ray bursts are -consistent with the progenitors of FRBs expected in low-metallicity -dwarf galaxies with high specific star-formation rate such as for -FRB 20121102A. However, Margalit et al. (2019) note that it is also -possible to form such sources through a variety of channels, includ￾ing binary neutron star mergers and accretion induced collapse of -white dwarfs in environments and host galaxy demographics differ￾ent to FRB 20121102A. Such suggestions are consistent with recent -localisations (e.g. Heintz et al. 2020). -The X-ray and VHE observations with Swift and H.E.S.S. -allows us to probe non-thermal persistent emission associated to -the FRB host galaxy or its source. Recently, H.E.S.S. observed -SGR1935+2154 (H.E.S.S. collaboration 2021) that is a Galactic -magnetar linked to a repeating FRB and its first X-ray counterpart. -Magnetar X-ray flares could in fact be non-thermal in nature (Li et al. -2021) indicating the presence of particle acceleration that could po￾tentially reach the VHE domain. The inverse Compton process is a -primary candidate for the production of VHE non-thermal emission. -H.E.S.S. observations did not lead to a detection of a persistent or a -transient source associated to FRB 20171019A. We found no X-ray -counterparts and thus derived the upper limits to constrain these +MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 9 +Figure 4. UVOT summed image of FRB 20171019A region taken during the MWL observation campaign in September-October 2019. The white circles +indicate sources detected above 5𝜎. The cyan dot denotes the location of FRB 20171019A, the circle around it indicates the region used to derive the upper +limits while the magenta region indicates the background region used. The green box indicates FRB 20171019A 90% localisation region as reported in Kumar +et al. (2019). +Table 1. Details of the FRB fields observed with MeerKAT. +Field name Observation date Synthesized beam rms (𝜇Jy beam−1) Detected? +FRB 20171019A 28 September 2019 – No (calibration failure) +FRB 20171019A 18 October 2019 6. +008 × 5.000 5.2 < 15𝜇Jy beam−1 +FRB 20190711A 23 August 2019 11. +007 × 4.009 4.9 < 15𝜇Jy beam−1 +FRB 20190711A 09 September 2019 12. +005 × 4.009 4.6 < 15𝜇Jy beam−1 +FRB 20190714A 14 September 2019 7. +001 × 6.002 4.2 54.4 𝜇Jy beam−1 +FRB 20190714A 28 September 2019 6. +005 × 5.001 5.8 52.0 𝜇Jy beam−1 +Table 2. Details of the radio continuum source associated with FRB 20190714A. +Field name Observation date Telescope 𝜈centre (GHz) 𝛼J2000 𝛿J2000 Maj. × min. axis Pos. angle Int. flux density +FRB 20190714A 28 September 2019 MeerKAT 1.283 12ℎ15𝑚55𝑠.154 -13◦01017. +0030 9.006 × 7.004 88.7◦ 87.4 𝜇Jy +FRB 20190714A 18 October 2019 MeerKAT 1.283 12ℎ15𝑚55𝑠.193 −13◦01017. +0018 8.002 × 6.004 12.2◦ 80.7 𝜇Jy +FRB 20190714A 13 January 2021 e-MERLIN 1.510 12ℎ15𝑚55𝑠.116 −13◦01014. +0051 0.0015 × 0.0065 17.6◦ 107.5 𝜇Jy +large offset from the centre of the galaxy makes the persistent source +unlikely to be an AGN. So far this FRB has not been seen to repeat. +Higher resolution imaging will be required to be certain of a direct +association of the persistent source with the FRB. We did not have +sufficient sensitivity in the sub-band images, thus, we are unable to +derive the spectral index of the emission of the host galaxy. +Our e-MERLIN observations probe a different spatial +scale than the size of the persistent radio source associated +with FRB 20121102A. At the angular diameter distance of MNRAS 000, 1–15 (2021) -MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 11 -Figure 6. Map of upper limits on the VHE gamma-ray energy flux derived from the H.E.S.S. observations. The limits are valid above 120 GeV and assume -a photon flux distribution following an 𝐸 -−2 dependence. The green box indicates the FRB 20171019A 90% localisation region as reported in Kumar et al. -(2019).The oversampling radius is 0.1◦ -. -emissions. In the case of existence of X-ray non-thermal outbursts, -the lack of VHE detection could indicate that inverse Compton is -weak in the vicinity of the magnetars or that the VHE gamma-ray -emission is quenched. This latter scenario could be explained by the -fact that inverse Compton is taking place too close to the magne￾tar’s surface, where pair production and photon splitting could be -responsible for significant energy losses (Hu et al. 2019), preventing -energetic particles and photons to reach the nebula. -No persistent emissions were detected towards FRB -20190711A and FRB 20171019A in our MeerKAT observations -(see Figures 7, 8, and 9), therefore no follow up observations were -conducted towards those FRBs. -5 CONCLUSIONS -Several FRB models envision persistent emission to be associated -with these sources. In this paper, we conducted radio observations -of three FRBs (FRB 20190714A, 20190711A and 20171019A), -and also a multi-wavelength campaign on one of these (FRB -20171019A). -We detected persistent compact radio emission associated with -FRB 20190714A (at 𝑧 = 0.2365) using the MeerKAT and e￾MERLIN radio telescope. This represents the first detection of the -radio continuum emission associated with the host (galaxy) of FRB -20190714A and is only the third known FRB to have such an as￾sociation. We furthermore obtained a radio upper limit of∼ 15𝜇Jy -beam−1 -for the repeating FRBs 20190711A and 20171019A. -We also performed UV, X-ray and VHE observations with the -Swift and H.E.S.S. instruments and obtained upper limits in the three -domains constraining the MWL emissions from FRB 20171019A. -The search for FRB MWL counterparts is ongoing within the -H.E.S.S. collaboration and more results will be published in fu￾ture works. -Given the association of a comparatively fainter persistent -source, FRB 20190714A may potentially be a repeating FRB whose -age lies between that of FRB 20121102A and FRB 20180916A. -ACKNOWLEDGEMENTS -This paper makes use of the MeerKAT data (Project ID: SCI￾20190418-VC-01). The MeerKAT telescope is operated by the -South African Radio Astronomy Observatory, which is a facility -of the National Research Foundation, an agency of the Depart￾ment of Science and Innovation (DSI). This work made use of the -Inter-University Institute for Data Intensive Astronomy (IDIA) vi￾sualization lab https://vislab.idia.ac.za. IDIA is a partnership of the -University of Cape Town, the University of Pretoria, the University -of the Western Cape and the South African Radio astronomy Obser￾vatory. e-MERLIN is a National Facility operated by the University -of Manchester at Jodrell Bank Observatory on behalf of STFC. -The authors acknowledge funding from the European Research -Council (ERC) under the European Union’s Horizon 2020 research -and innovation programme (grant agreement No 694745). The sup￾port of the Namibian authorities and of the University of Namibia -in facilitating the construction and operation of H.E.S.S. is grate￾fully acknowledged, as is the support by the German Ministry for + Chibueze et al. +Figure 5. XRT summed image of FRB 20171019A region taken during the MWL observation campaign in September - October 2019. The position of the +Wolf 1561 star is shown in cyan and is labelled. The green box indicates FRB 20171019A 90% localisation region as reported in Kumar et al. (2019). +FRB 20190714A (780 Mpc), an unresolved source with an angular size of 0. +006 corresponds to a physical extent of .2.3 kpc. The +uGMRT reported the detection of an unresolved radio emission at +650 MHz with a flux density of 700±100 𝜇Jy (Wharton et al. 2021), +while the JVLA detected persistent emission with a flux density of +340 ± 30 𝜇Jy at 3 GHz (Ricci et al. 2021). Assuming the estimated +spectral index between these frequencies (∼ −0.5, Ricci et al. 2021), +the 1.3 GHz flux density would be ∼ 500 𝜇Jy (similar to the 3-𝜎 +upper limit on observations from 1 − 2 GHz; Law et al. 2021). The +flux density we measured for FRB 20190714A is a factor of ∼10 +lower than FRB20201124A, but FRB 20190714A is also a factor +2.6 more distant. Therefore, the flux densities would be comparable +if they were at similar distances. +Given the resolution of MeerKAT we are unable to definitively state whether the persistent emission is associated with a +star-forming region or the FRB itself. However, the increased resolution with the e-MERLIN baselines would tend to favour a compact +source similar to the one observed in FRB 20121102A. One of the +leading models to explain the bursts from, and radio counterpart +to FRB 20121102A, is a young nebula powered flaring magnetar +embedded in a 20–50 year-old supernova remnant (Beloborodov +2017; Metzger et al. 2019). The lack of a bright persistent radio +source associated with the repeater FRB 20180916A suggests that +it is comparatively older at & 200 − 500 years and the persistent +radio source may have faded. In the model by Metzger et al. (2019), +the nebula is suggested to contribute significantly to the rotation +measure and dispersion measure (DM), as well as to the persistent radio luminosity. These values are expected to decrease on a +timescale of a few decades to centuries. Given the association of a +comparatively fainter persistent source, FRB 20190714A may potentially be a repeating FRB whose age lies between that of FRB +20121102A and FRB 20180916A. Millisecond magnetars formed +through standard astrophysical channels such as hydrogen poor superluminous supernovae and long duration gamma-ray bursts are +consistent with the progenitors of FRBs expected in low-metallicity +dwarf galaxies with high specific star-formation rate such as for +FRB 20121102A. However, Margalit et al. (2019) note that it is also +possible to form such sources through a variety of channels, including binary neutron star mergers and accretion induced collapse of +white dwarfs in environments and host galaxy demographics different to FRB 20121102A. Such suggestions are consistent with recent +localisations (e.g. Heintz et al. 2020). +The X-ray and VHE observations with Swift and H.E.S.S. +allows us to probe non-thermal persistent emission associated to +the FRB host galaxy or its source. Recently, H.E.S.S. observed +SGR1935+2154 (H.E.S.S. collaboration 2021) that is a Galactic +magnetar linked to a repeating FRB and its first X-ray counterpart. +Magnetar X-ray flares could in fact be non-thermal in nature (Li et al. +2021) indicating the presence of particle acceleration that could potentially reach the VHE domain. The inverse Compton process is a +primary candidate for the production of VHE non-thermal emission. +H.E.S.S. observations did not lead to a detection of a persistent or a +transient source associated to FRB 20171019A. We found no X-ray +counterparts and thus derived the upper limits to constrain these MNRAS 000, 1–15 (2021) -12 Chibueze et al. -Figure 7. FRB 20171019A MeerKAT image and a zoom-in (insert) around the position of the FRB. The white ellipse on the bottom left corner of the insert -represent the beam size of MeerKAT. -Education and Research (BMBF), the Max Planck Society, the -German Research Foundation (DFG), the Helmholtz Association, -the Alexander von Humboldt Foundation, the French Ministry of -Higher Education, Research and Innovation, the Centre National -de la Recherche Scientifique (CNRS/IN2P3 and CNRS/INSU), -the Commissariat à l’énergie atomique et aux énergies alterna￾tives (CEA), the U.K. Science and Technology Facilities Council -(STFC), the Knut and Alice Wallenberg Foundation, the National -Science Centre, Poland grant no. 2016/22/M/ST9/00382, the South -African Department of Science and Technology and National Re￾search Foundation, the University of Namibia, the National Com￾mission on Research, Science & Technology of Namibia (NCRST), -the Austrian Federal Ministry of Education, Science and Research -and the Austrian Science Fund (FWF), the Australian Research -Council (ARC), the Japan Society for the Promotion of Science -and by the University of Amsterdam. We appreciate the excellent -work of the technical support staff in Berlin, Zeuthen, Heidelberg, -Palaiseau, Paris, Saclay, Tübingen and in Namibia in the construc￾tion and operation of the equipment. This work benefited from -services provided by the H.E.S.S. Virtual Organisation, supported -by the national resource providers of the EGI Federation. -DATA AVAILABILITY -The data underlying this article will be shared on reasonable request -to the corresponding authors. -REFERENCES -Adámek K., Armour W., 2016, arXiv e-prints, p. arXiv:1611.09704 -Adámek K., Armour W., 2019, A GPU Implementation of the Harmonic -Sum Algorithm. p. 489 -Adámek K., Dimoudi S., Giles M., Armour W., 2017, arXiv e-prints, p. -arXiv:1711.10855 -Aharonian F., et al., 2006, A&A, 457, 899 -Alam S., et al., 2015, The Astrophysical Journal Supplement Series, 219, 12 -Ashton T., et al., 2020, arXiv e-prints, p. arXiv:2001.04510 -Bannister K. W., et al., 2019, Science, 365, 565 -Bassa C. G., et al., 2017, ApJ, 843, L8 -Beloborodov A. M., 2017, ApJ, 843, L26 -Berge D., Funk S., Hinton J., 2007, A&A, 466, 1219 -Bhandari S., et al., 2020, ApJ, 895, L37 -Bolmont J., et al., 2014, Nuclear Instruments and Methods in Physics Re￾search Section A: Accelerators, Spectrometers, Detectors and Associ￾ated Equipment, 761, 46–57 -Breeveld A. A., et al., 2010, Monthly Notices of the Royal Astronomical -Society, 406, 1687 -Brun F., Piel Q., de Naurois M., Bernhard S., 2020, Astropart.Phys., 118, -102429 -Burrows D. N., et al., 2005, Space Sci. Rev., 120, 165 -Caleb M., Keane E., 2021, Universe, 7, 453 -Caleb M., Stappers B. W., Rajwade K., Flynn C., 2019, MNRAS, 484, 5500 -Caleb M., et al., 2020, MNRAS, 496, 4565 -Chatterjee S., et al., 2017, Nature, 541, 58 -Chime/FRB Collaboration 2021, The Astronomer’s Telegram, 14497, 1 -Cordes J. M., Wasserman I., 2016, MNRAS, 457, 232 -Dai Z. G., Wang J. S., Yu Y. W., 2017, ApJ, 838, L7 -Dimoudi S., Armour W., 2015, arXiv e-prints, p. arXiv:1511.07343 -Dimoudi S., Adamek K., Thiagaraj P., Ransom S. M., Karastergiou A., -Armour W., 2018, ApJS, 239, 28 +MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 11 +Figure 6. Map of upper limits on the VHE gamma-ray energy flux derived from the H.E.S.S. observations. The limits are valid above 120 GeV and assume +a photon flux distribution following an 𝐸 +−2 dependence. The green box indicates the FRB 20171019A 90% localisation region as reported in Kumar et al. +(2019).The oversampling radius is 0.1◦. +emissions. In the case of existence of X-ray non-thermal outbursts, +the lack of VHE detection could indicate that inverse Compton is +weak in the vicinity of the magnetars or that the VHE gamma-ray +emission is quenched. This latter scenario could be explained by the +fact that inverse Compton is taking place too close to the magnetar’s surface, where pair production and photon splitting could be +responsible for significant energy losses (Hu et al. 2019), preventing +energetic particles and photons to reach the nebula. +No persistent emissions were detected towards FRB +20190711A and FRB 20171019A in our MeerKAT observations +(see Figures 7, 8, and 9), therefore no follow up observations were +conducted towards those FRBs. +5 CONCLUSIONS +Several FRB models envision persistent emission to be associated +with these sources. In this paper, we conducted radio observations +of three FRBs (FRB 20190714A, 20190711A and 20171019A), +and also a multi-wavelength campaign on one of these (FRB +20171019A). +We detected persistent compact radio emission associated with +FRB 20190714A (at 𝑧 = 0.2365) using the MeerKAT and eMERLIN radio telescope. This represents the first detection of the +radio continuum emission associated with the host (galaxy) of FRB +20190714A and is only the third known FRB to have such an association. We furthermore obtained a radio upper limit of∼ 15𝜇Jy +beam−1for the repeating FRBs 20190711A and 20171019A. +We also performed UV, X-ray and VHE observations with the +Swift and H.E.S.S. instruments and obtained upper limits in the three +domains constraining the MWL emissions from FRB 20171019A. +The search for FRB MWL counterparts is ongoing within the +H.E.S.S. collaboration and more results will be published in future works. +Given the association of a comparatively fainter persistent +source, FRB 20190714A may potentially be a repeating FRB whose +age lies between that of FRB 20121102A and FRB 20180916A. +ACKNOWLEDGEMENTS +This paper makes use of the MeerKAT data (Project ID: SCI20190418-VC-01). The MeerKAT telescope is operated by the +South African Radio Astronomy Observatory, which is a facility +of the National Research Foundation, an agency of the Department of Science and Innovation (DSI). This work made use of the +Inter-University Institute for Data Intensive Astronomy (IDIA) visualization lab https://vislab.idia.ac.za. IDIA is a partnership of the +University of Cape Town, the University of Pretoria, the University +of the Western Cape and the South African Radio astronomy Observatory. e-MERLIN is a National Facility operated by the University +of Manchester at Jodrell Bank Observatory on behalf of STFC. +The authors acknowledge funding from the European Research +Council (ERC) under the European Union’s Horizon 2020 research +and innovation programme (grant agreement No 694745). The support of the Namibian authorities and of the University of Namibia +in facilitating the construction and operation of H.E.S.S. is gratefully acknowledged, as is the support by the German Ministry for MNRAS 000, 1–15 (2021) -MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 13 -Figure 8. FRB 20190711A MeerKAT epoch I image and a zoom-in (insert) around the position of the FRB. The white ellipse on the bottom left corner of the -insert represent the beam size of MeerKAT. -Eftekhari T., Berger E., Williams P. K. G., Blanchard P. K., 2018, ApJ, 860, -73 -Evans P. A., et al., 2007, A&A, 469, 379 -Evans P. A., et al., 2009, MNRAS, 397, 1177 -Fong W.-f., et al., 2021, ApJ, 919, L23 -H.E.S.S. collaboration 2021, ApJ, 919, 106 -HI4PI Collaboration et al., 2016, A&A, 594, A116 -Heintz K. E., et al., 2020, ApJ, 903, 152 -Heywood I., 2020, oxkat: Semi-automated imaging of MeerKAT observa￾tions (ascl:2009.003) -Hickish J., et al., 2016,Journal of Astronomical Instrumentation, 5, 1641001 -Hilmarsson G. H., et al., 2021, ApJ, 908, L10 -Hu K., Baring M. G., Wadiasingh Z., Harding A. K., 2019, MNRAS, 486, -3327–3349 -Insight-HXMT 2020, SGR J1935+2154 burst list, http://hxmten.ihep. -ac.cn/bfy/331.jhtml -James C. W., et al., 2020, MNRAS, 495, 2416 -Jonas J., MeerKAT Team 2016, in MeerKAT Science: On the Pathway to -the SKA. p. 1 -Kashiyama K., Ioka K., Mészáros P., 2013, ApJ, 776, L39 -Kenyon J. S., Smirnov O. M., Grobler T. L., Perkins S. J., 2018, MNRAS, -478, 2399 -Kumar P., et al., 2019, ApJ, 887, L30 -Kumar P., et al., 2021, MNRAS, 500, 2525 -Law C., Tendulkar S., Clarke T., Aggarwal K., Bethapudy S., 2021, The -Astronomer’s Telegram, 14526, 1 -Li C. K., et al., 2021, Nature Astronomy, -Liu T., Romero G. E., Liu M.-L., Li A., 2016, ApJ, 826, 82 -Lorimer D. R., Bailes M., McLaughlin M. A., Narkevic D. J., Crawford F., -2007, Science, 318, 777 -Lyubarsky Y., 2014, MNRAS: Letters, 442, L9 -Macquart J. P., et al., 2020, Nature, 581, 391 -Marcote B., et al., 2017, ApJ, 834, L8 -Marcote B., et al., 2020, Nature, 577, 190 -Marcote B., et al., 2021, The Astronomer’s Telegram, 14603, 1 -Margalit B., Berger E., Metzger B. D., 2019, ApJ, 886, 110 -Mauch T., et al., 2020, ApJ, 888, 61 -McMullin J. P., Waters B., Schiebel D., Young W., Golap K., 2007, in -Shaw R. A., Hill F., Bell D. J., eds, Astronomical Society of the Pacific -Conference Series Vol. 376, Astronomical Data Analysis Software and -Systems XVI. p. 127 -Mereghetti S., et al., 2020, ApJ, 898, L29 -Metzger B. D., Margalit B., Sironi L., 2019, MNRAS, 485, 4091 -Offringa A. R., et al., 2014, MNRAS, 444, 606 -Parsons R. D., Hinton J. A., 2014, Astroparticle Physics, 56, 26 -Petroff E., Hessels J. W. T., Lorimer D. R., 2021, arXiv e-prints, p. -arXiv:2107.10113 -Platts E., Weltman A., Walters A., Tendulkar S. P., Gordin J. E. B., Kandhai -S., 2019, Phys. Rep., 821, 1 -Popov S. B., Postnov K. A., 2013, arXiv e-prints, p. arXiv:1307.4924 -Popov S. B., Pshirkov M. S., 2016, MNRAS, 462, L16 -Popov S., Postnov K., Pshirkov M., 2018, International Journal of Modern -Physics D, 27, 1844016 -Prochaska J. X., et al., 2019, Science, 366, 231 -Ravi V., 2019, Nature Astronomy, 3, 928 -Resmi L., Vink J., Ishwara-Chandra C. H., 2020, arXiv e-prints, p. -arXiv:2010.14334 -Ricci R., Piro L., Panessa F., O’Connor B., Lotti S., Bruni G., Zhang B., -2021, The Astronomer’s Telegram, 14549, 1 -Ridnaia A., et al., 2021, Nature Astronomy, in press -Rolke W. A., López A. M., Conrad J., 2005, Nuclear Instruments and Meth￾ods in Physics Research A, 551, 493 -Roming P. W. A., et al., 2005, Space Science Reviews, 120, 95–142 -Tavani M., et al., 2021, Nature Astronomy, 5, 401–407 + Chibueze et al. +Figure 7. FRB 20171019A MeerKAT image and a zoom-in (insert) around the position of the FRB. The white ellipse on the bottom left corner of the insert +represent the beam size of MeerKAT. +Education and Research (BMBF), the Max Planck Society, the +German Research Foundation (DFG), the Helmholtz Association, +the Alexander von Humboldt Foundation, the French Ministry of +Higher Education, Research and Innovation, the Centre National +de la Recherche Scientifique (CNRS/IN2P3 and CNRS/INSU), +the Commissariat à l’énergie atomique et aux énergies alternatives (CEA), the U.K. Science and Technology Facilities Council +(STFC), the Knut and Alice Wallenberg Foundation, the National +Science Centre, Poland grant no. 2016/22/M/ST9/00382, the South +African Department of Science and Technology and National Research Foundation, the University of Namibia, the National Commission on Research, Science & Technology of Namibia (NCRST), +the Austrian Federal Ministry of Education, Science and Research +and the Austrian Science Fund (FWF), the Australian Research +Council (ARC), the Japan Society for the Promotion of Science +and by the University of Amsterdam. We appreciate the excellent +work of the technical support staff in Berlin, Zeuthen, Heidelberg, +Palaiseau, Paris, Saclay, Tübingen and in Namibia in the construction and operation of the equipment. This work benefited from +services provided by the H.E.S.S. Virtual Organisation, supported +by the national resource providers of the EGI Federation. +DATA AVAILABILITY +The data underlying this article will be shared on reasonable request +to the corresponding authors. +REFERENCES +Adámek K., Armour W., 2016, arXiv e-prints, p. arXiv:1611.09704 +Adámek K., Armour W., 2019, A GPU Implementation of the Harmonic +Sum Algorithm. p. 489 +Adámek K., Dimoudi S., Giles M., Armour W., 2017, arXiv e-prints, p. +arXiv:1711.10855 +Aharonian F., et al., 2006, A&A, 457, 899 +Alam S., et al., 2015, The Astrophysical Journal Supplement Series, 219, 12 +Ashton T., et al., 2020, arXiv e-prints, p. arXiv:2001.04510 +Bannister K. W., et al., 2019, Science, 365, 565 +Bassa C. G., et al., 2017, ApJ, 843, L8 +Beloborodov A. M., 2017, ApJ, 843, L26 +Berge D., Funk S., Hinton J., 2007, A&A, 466, 1219 +Bhandari S., et al., 2020, ApJ, 895, L37 +Bolmont J., et al., 2014, Nuclear Instruments and Methods in Physics Research Section A: Accelerators, Spectrometers, Detectors and Associated Equipment, 761, 46–57 +Breeveld A. A., et al., 2010, Monthly Notices of the Royal Astronomical +Society, 406, 1687 +Brun F., Piel Q., de Naurois M., Bernhard S., 2020, Astropart.Phys., 118, +102429 +Burrows D. N., et al., 2005, Space Sci. Rev., 120, 165 +Caleb M., Keane E., 2021, Universe, 7, 453 +Caleb M., Stappers B. W., Rajwade K., Flynn C., 2019, MNRAS, 484, 5500 +Caleb M., et al., 2020, MNRAS, 496, 4565 +Chatterjee S., et al., 2017, Nature, 541, 58 +Chime/FRB Collaboration 2021, The Astronomer’s Telegram, 14497, 1 +Cordes J. M., Wasserman I., 2016, MNRAS, 457, 232 +Dai Z. G., Wang J. S., Yu Y. W., 2017, ApJ, 838, L7 +Dimoudi S., Armour W., 2015, arXiv e-prints, p. arXiv:1511.07343 +Dimoudi S., Adamek K., Thiagaraj P., Ransom S. M., Karastergiou A., +Armour W., 2018, ApJS, 239, 28 MNRAS 000, 1–15 (2021) -14 Chibueze et al. -Figure 9. FRB 20190711A MeerKAT epoch II image and a zoom-in (insert) around the position of the FRB. The white ellipse on the bottom left corner of -the insert represent the beam size of MeerKAT. -Tendulkar S. P., et al., 2017, ApJ, 834, L7 -Thornton D., et al., 2013, Science, 341, 53 -Totani T., 2013, PASJ, 65, L12 -Vieyro F. L., Romero G. E., Bosch-Ramon V., Marcote B., del Valle M. V., -2017, A&A, 602, A64 -Wharton R., et al., 2021, The Astronomer’s Telegram, 14529, 1 -Yamasaki S., Totani T., Kiuchi K., 2018, PASJ, 70, 39 -Zhang B., 2018, ApJ, 854, L21 -de Naurois M., Rolland L., 2009, Astroparticle Physics, 32, 231 -APPENDIX A: AUTHOR AFFILIATIONS -1Centre for Space Research, North-West University, Potchefstroom -2531, South Africa -2Department of Physics and Astronomy, Faculty of Physical Sci￾ences, University of Nigeria, Carver Building, 1 University Road, -Nsukka 410001, Nigeria -3 -Jodrell Bank Centre for Astrophysics, Department of Physics and -Astronomy, University of Manchester, Manchester M13 9PL, UK -4Sydney Institute for Astronomy, School of Physics, The University -of Sydney, NSW 2006, Australia -5Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, D￾53121 Bonn, Germany -6 -IRFU, CEA, Université Paris-Saclay, F-91191 Gif-sur-Yvette, -France -7Department of Physics and Electronics, Rhodes University, PO -Box 94, Grahamstown 6140, South Africa -8South African Radio Astronomy Observatory, Black River Park, 2 -Fir Street, Observatory, Cape Town 7925, South Africa -9Astrophysics, Department of Physics, University of Oxford, Keble -Road, Oxford OX1 3RH, UK -10National University of Ireland Galway, University Road, Galway, -H91 TK33, Ireland -11SKA Observatory, Jodrell Bank Observatory, Macclesfield, -Cheshire SK11 9DL, UK -12Dublin Institute for Advanced Studies, 31 Fitzwilliam Place, -Dublin 2, Ireland -13Max-Planck-Institut für Kernphysik, P.O. Box 103980, D 69029 -Heidelberg, Germany -14High Energy Astrophysics Laboratory, RAU, 123 Hovsep Emin -St Yerevan 0051, Armenia -15Landessternwarte, Universität Heidelberg, Königstuhl, D 69117 -Heidelberg, Germany -16Aix Marseille Université, CNRS/IN2P3, CPPM, Marseille, -France -17Laboratoire Leprince-Ringuet, École Polytechnique, CNRS, In￾stitut Polytechnique de Paris, F-91128 Palaiseau, France -18University of Namibia, Department of Physics, Private Bag -13301, Windhoek 10005, Namibia -19Instytut Fizyki Ja¸drowej PAN, ul. Radzikowskiego 152, 31-342 -Kraków, Poland -20DESY, D-15738 Zeuthen, Germany -21School of Physics, University of the Witwatersrand, 1 Jan Smuts -Avenue, Braamfontein, Johannesburg, 2050 South Africa -22Université de Paris, CNRS, Astroparticule et Cosmologie, F￾75013 Paris, France -23Department of Physics and Electrical Engineering, Linnaeus Uni￾versity, 351 95 Växjö, Sweden -24Laboratoire Univers et Théories, Observatoire de Paris, Univer￾sité PSL, CNRS, Université de Paris, 92190 Meudon, France +MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 13 +Figure 8. FRB 20190711A MeerKAT epoch I image and a zoom-in (insert) around the position of the FRB. The white ellipse on the bottom left corner of the +insert represent the beam size of MeerKAT. +Eftekhari T., Berger E., Williams P. K. G., Blanchard P. K., 2018, ApJ, 860, +73 +Evans P. A., et al., 2007, A&A, 469, 379 +Evans P. A., et al., 2009, MNRAS, 397, 1177 +Fong W.-f., et al., 2021, ApJ, 919, L23 +H.E.S.S. collaboration 2021, ApJ, 919, 106 +HI4PI Collaboration et al., 2016, A&A, 594, A116 +Heintz K. E., et al., 2020, ApJ, 903, 152 +Heywood I., 2020, oxkat: Semi-automated imaging of MeerKAT observations (ascl:2009.003) +Hickish J., et al., 2016,Journal of Astronomical Instrumentation, 5, 1641001 +Hilmarsson G. H., et al., 2021, ApJ, 908, L10 +Hu K., Baring M. G., Wadiasingh Z., Harding A. K., 2019, MNRAS, 486, +3327–3349 +Insight-HXMT 2020, SGR J1935+2154 burst list, http://hxmten.ihep. +ac.cn/bfy/331.jhtml +James C. W., et al., 2020, MNRAS, 495, 2416 +Jonas J., MeerKAT Team 2016, in MeerKAT Science: On the Pathway to +the SKA. p. 1 +Kashiyama K., Ioka K., Mészáros P., 2013, ApJ, 776, L39 +Kenyon J. S., Smirnov O. M., Grobler T. L., Perkins S. J., 2018, MNRAS, +478, 2399 +Kumar P., et al., 2019, ApJ, 887, L30 +Kumar P., et al., 2021, MNRAS, 500, 2525 +Law C., Tendulkar S., Clarke T., Aggarwal K., Bethapudy S., 2021, The +Astronomer’s Telegram, 14526, 1 +Li C. K., et al., 2021, Nature Astronomy, +Liu T., Romero G. E., Liu M.-L., Li A., 2016, ApJ, 826, 82 +Lorimer D. R., Bailes M., McLaughlin M. A., Narkevic D. J., Crawford F., +2007, Science, 318, 777 +Lyubarsky Y., 2014, MNRAS: Letters, 442, L9 +Macquart J. P., et al., 2020, Nature, 581, 391 +Marcote B., et al., 2017, ApJ, 834, L8 +Marcote B., et al., 2020, Nature, 577, 190 +Marcote B., et al., 2021, The Astronomer’s Telegram, 14603, 1 +Margalit B., Berger E., Metzger B. D., 2019, ApJ, 886, 110 +Mauch T., et al., 2020, ApJ, 888, 61 +McMullin J. P., Waters B., Schiebel D., Young W., Golap K., 2007, in +Shaw R. A., Hill F., Bell D. J., eds, Astronomical Society of the Pacific +Conference Series Vol. 376, Astronomical Data Analysis Software and +Systems XVI. p. 127 +Mereghetti S., et al., 2020, ApJ, 898, L29 +Metzger B. D., Margalit B., Sironi L., 2019, MNRAS, 485, 4091 +Offringa A. R., et al., 2014, MNRAS, 444, 606 +Parsons R. D., Hinton J. A., 2014, Astroparticle Physics, 56, 26 +Petroff E., Hessels J. W. T., Lorimer D. R., 2021, arXiv e-prints, p. +arXiv:2107.10113 +Platts E., Weltman A., Walters A., Tendulkar S. P., Gordin J. E. B., Kandhai +S., 2019, Phys. Rep., 821, 1 +Popov S. B., Postnov K. A., 2013, arXiv e-prints, p. arXiv:1307.4924 +Popov S. B., Pshirkov M. S., 2016, MNRAS, 462, L16 +Popov S., Postnov K., Pshirkov M., 2018, International Journal of Modern +Physics D, 27, 1844016 +Prochaska J. X., et al., 2019, Science, 366, 231 +Ravi V., 2019, Nature Astronomy, 3, 928 +Resmi L., Vink J., Ishwara-Chandra C. H., 2020, arXiv e-prints, p. +arXiv:2010.14334 +Ricci R., Piro L., Panessa F., O’Connor B., Lotti S., Bruni G., Zhang B., +2021, The Astronomer’s Telegram, 14549, 1 +Ridnaia A., et al., 2021, Nature Astronomy, in press +Rolke W. A., López A. M., Conrad J., 2005, Nuclear Instruments and Methods in Physics Research A, 551, 493 +Roming P. W. A., et al., 2005, Space Science Reviews, 120, 95–142 +Tavani M., et al., 2021, Nature Astronomy, 5, 401–407 MNRAS 000, 1–15 (2021) -MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 15 -25Sorbonne Université, Université Paris Diderot, Sorbonne Paris -Cité, CNRS/IN2P3, Laboratoire de Physique Nucléaire et de Hautes -Energies, -LPNHE, 4 Place Jussieu, F-75252 Paris, France -26Université Savoie Mont Blanc, CNRS, Laboratoire d’Annecy de -Physique des Particules - IN2P3, 74000 Annecy, France -27Astronomical Observatory, The University of Warsaw, Al. Ujaz￾dowskie 4, 00-478 Warsaw, Poland -28Friedrich-Alexander-Universität Erlangen-Nürnberg, Erlangen -Centre for Astroparticle Physics, Erwin-Rommel-Str. 1, D 91058 -Erlangen, Germany -29University of Oxford, Department of Physics, Denys Wilkinson -Building, Keble Road, Oxford OX1 3RH, UK -30Université Bordeaux, CNRS/IN2P3, Centre d’Études Nucléaires -de Bordeaux Gradignan, 33175 Gradignan, France -31Institut für Physik und Astronomie, Universität Potsdam, Karl￾Liebknecht-Strasse 24/25, D 14476 Potsdam, Germany -32Obserwatorium Astronomiczne, Uniwersytet Jagielloński, ul. -Orla 171, 30-244 Kraków, Poland -33Institute of Astronomy, Faculty of Physics, Astronomy and In￾formatics, Nicolaus Copernicus University, Grudziadzka 5, 87-100 -Torun, Poland -34Nicolaus Copernicus Astronomical Center, Polish Academy of -Sciences, ul. Bartycka 18, 00-716 Warsaw, Poland -35Institut für Astronomie und Astrophysik, Universität Tübingen, -Sand 1, D 72076 Tübingen, Germany -36Institut für Physik, Humboldt-Universität zu Berlin, Newtonstr. -15, D 12489 Berlin, Germany -37Laboratoire Univers et Particules de Montpellier, Université -Montpellier, CNRS/IN2P3, CC 72, Place Eugène Bataillon, F￾34095 Montpellier Cedex 5, France -38Institut für Astro- und Teilchenphysik, Leopold-Franzens￾Universität Innsbruck, A-6020 Innsbruck, Austria -39Department of Physics and Astronomy, The University of Leices￾ter, University Road, Leicester, LE1 7RH, United Kingdom -40GRAPPA, Anton Pannekoek Institute for Astronomy, University -of Amsterdam, Science Park 904, 1098 XH Amsterdam, The Nether￾lands -41School of Physical Sciences, University of Adelaide, Adelaide -5005, Australia -42Yerevan Physics Institute, 2 Alikhanian Brothers St., 375036 -Yerevan, Armenia -43Kavli Institute for the Physics and Mathematics of the Universe -(WPI), The University of Tokyo Institutes for Advanced Study -(UTIAS), -The University of Tokyo, 5-1-5 Kashiwa-no-Ha, Kashiwa, Chiba, -277-8583, Japan -44Department of Physics, Konan University, 8-9-1 Okamoto, Hi￾gashinada, Kobe, Hyogo 658-8501, Japan -45RIKEN, 2-1 Hirosawa, Wako, Saitama 351-0198, Japan -This paper has been typeset from a TEX/LATEX file prepared by the author. + Chibueze et al. +Figure 9. FRB 20190711A MeerKAT epoch II image and a zoom-in (insert) around the position of the FRB. The white ellipse on the bottom left corner of +the insert represent the beam size of MeerKAT. +Tendulkar S. P., et al., 2017, ApJ, 834, L7 +Thornton D., et al., 2013, Science, 341, 53 +Totani T., 2013, PASJ, 65, L12 +Vieyro F. L., Romero G. E., Bosch-Ramon V., Marcote B., del Valle M. V., +2017, A&A, 602, A64 +Wharton R., et al., 2021, The Astronomer’s Telegram, 14529, 1 +Yamasaki S., Totani T., Kiuchi K., 2018, PASJ, 70, 39 +Zhang B., 2018, ApJ, 854, L21 +de Naurois M., Rolland L., 2009, Astroparticle Physics, 32, 231 +APPENDIX A: AUTHOR AFFILIATIONS +1Centre for Space Research, North-West University, Potchefstroom +2531, South Africa +2Department of Physics and Astronomy, Faculty of Physical Sciences, University of Nigeria, Carver Building, 1 University Road, +Nsukka 410001, Nigeria +3 +Jodrell Bank Centre for Astrophysics, Department of Physics and +Astronomy, University of Manchester, Manchester M13 9PL, UK +4Sydney Institute for Astronomy, School of Physics, The University +of Sydney, NSW 2006, Australia +5Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, D53121 Bonn, Germany +6 +IRFU, CEA, Université Paris-Saclay, F-91191 Gif-sur-Yvette, +France +7Department of Physics and Electronics, Rhodes University, PO +Box 94, Grahamstown 6140, South Africa +8South African Radio Astronomy Observatory, Black River Park, 2 +Fir Street, Observatory, Cape Town 7925, South Africa +9Astrophysics, Department of Physics, University of Oxford, Keble +Road, Oxford OX1 3RH, UK +10National University of Ireland Galway, University Road, Galway, +H91 TK33, Ireland +11SKA Observatory, Jodrell Bank Observatory, Macclesfield, +Cheshire SK11 9DL, UK +12Dublin Institute for Advanced Studies, 31 Fitzwilliam Place, +Dublin 2, Ireland +13Max-Planck-Institut für Kernphysik, P.O. Box 103980, D 69029 +Heidelberg, Germany +14High Energy Astrophysics Laboratory, RAU, 123 Hovsep Emin +St Yerevan 0051, Armenia +15Landessternwarte, Universität Heidelberg, Königstuhl, D 69117 +Heidelberg, Germany +16Aix Marseille Université, CNRS/IN2P3, CPPM, Marseille, +France +17Laboratoire Leprince-Ringuet, École Polytechnique, CNRS, Institut Polytechnique de Paris, F-91128 Palaiseau, France +18University of Namibia, Department of Physics, Private Bag +13301, Windhoek 10005, Namibia +19Instytut Fizyki Ja¸drowej PAN, ul. Radzikowskiego 152, 31-342 +Kraków, Poland +20DESY, D-15738 Zeuthen, Germany +21School of Physics, University of the Witwatersrand, 1 Jan Smuts +Avenue, Braamfontein, Johannesburg, 2050 South Africa +22Université de Paris, CNRS, Astroparticule et Cosmologie, F75013 Paris, France +23Department of Physics and Electrical Engineering, Linnaeus University, 351 95 Växjö, Sweden +24Laboratoire Univers et Théories, Observatoire de Paris, Université PSL, CNRS, Université de Paris, 92190 Meudon, France MNRAS 000, 1–15 (2021) +MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 15 +25Sorbonne Université, Université Paris Diderot, Sorbonne Paris +Cité, CNRS/IN2P3, Laboratoire de Physique Nucléaire et de Hautes +Energies, +LPNHE, 4 Place Jussieu, F-75252 Paris, France +26Université Savoie Mont Blanc, CNRS, Laboratoire d’Annecy de +Physique des Particules - IN2P3, 74000 Annecy, France +27Astronomical Observatory, The University of Warsaw, Al. Ujazdowskie 4, 00-478 Warsaw, Poland +28Friedrich-Alexander-Universität Erlangen-Nürnberg, Erlangen +Centre for Astroparticle Physics, Erwin-Rommel-Str. 1, D 91058 +Erlangen, Germany +29University of Oxford, Department of Physics, Denys Wilkinson +Building, Keble Road, Oxford OX1 3RH, UK +30Université Bordeaux, CNRS/IN2P3, Centre d’Études Nucléaires +de Bordeaux Gradignan, 33175 Gradignan, France +31Institut für Physik und Astronomie, Universität Potsdam, KarlLiebknecht-Strasse 24/25, D 14476 Potsdam, Germany +32Obserwatorium Astronomiczne, Uniwersytet Jagielloński, ul. +Orla 171, 30-244 Kraków, Poland +33Institute of Astronomy, Faculty of Physics, Astronomy and Informatics, Nicolaus Copernicus University, Grudziadzka 5, 87-100 +Torun, Poland +34Nicolaus Copernicus Astronomical Center, Polish Academy of +Sciences, ul. Bartycka 18, 00-716 Warsaw, Poland +35Institut für Astronomie und Astrophysik, Universität Tübingen, +Sand 1, D 72076 Tübingen, Germany +36Institut für Physik, Humboldt-Universität zu Berlin, Newtonstr. +15, D 12489 Berlin, Germany +37Laboratoire Univers et Particules de Montpellier, Université +Montpellier, CNRS/IN2P3, CC 72, Place Eugène Bataillon, F34095 Montpellier Cedex 5, France +38Institut für Astro- und Teilchenphysik, Leopold-FranzensUniversität Innsbruck, A-6020 Innsbruck, Austria +39Department of Physics and Astronomy, The University of Leicester, University Road, Leicester, LE1 7RH, United Kingdom +40GRAPPA, Anton Pannekoek Institute for Astronomy, University +of Amsterdam, Science Park 904, 1098 XH Amsterdam, The Netherlands +41School of Physical Sciences, University of Adelaide, Adelaide +5005, Australia +42Yerevan Physics Institute, 2 Alikhanian Brothers St., 375036 +Yerevan, Armenia +43Kavli Institute for the Physics and Mathematics of the Universe +(WPI), The University of Tokyo Institutes for Advanced Study +(UTIAS), +The University of Tokyo, 5-1-5 Kashiwa-no-Ha, Kashiwa, Chiba, +277-8583, Japan +44Department of Physics, Konan University, 8-9-1 Okamoto, Higashinada, Kobe, Hyogo 658-8501, Japan +45RIKEN, 2-1 Hirosawa, Wako, Saitama 351-0198, Japan +This paper has been typeset from a TEX/LATEX file prepared by the author. +MNRAS 000, 1–15 (2021) \ No newline at end of file diff --git a/read/results/pdfium/2201.00151.txt b/read/results/pdfium/2201.00151.txt index c2ba066..2b542c7 100644 --- a/read/results/pdfium/2201.00151.txt +++ b/read/results/pdfium/2201.00151.txt @@ -1,1383 +1,1378 @@ -arXiv:2201.00151v1 [astro-ph.GA] 1 Jan 2022 -Astronomy & Astrophysics manuscript no. Populations4 ©ESO 2022 -January 4, 2022 -Multiple stellar populations in Schwarzschild modeling -and the application to the Fornax dwarf -Klaudia Kowalczyk and Ewa L. Łokas -Nicolaus Copernicus Astronomical Center, Polish Academy of Sciences, Bartycka 18, 00-716 Warsaw, Poland -e-mail: klaudia.kowalczyk@gmail.com, lokas@camk.edu.pl -January 4, 2022 -ABSTRACT -Dwarf spheroidal (dSph) galaxies are believed to be strongly dark matter dominated and thus are considered perfect objects to study -dark matter distribution and test theories of structure formation. They possess resolved, multiple stellar populations that offer new -possibilities for modeling. A promising tool for the dynamical modeling of these objects is the Schwarzschild orbit superposition -method. In this work we extend our previous implementation of the scheme to include more than one population of stars and a more -general form of the mass-to-light ratio function. We tested the improved approach on a nearly spherical, gas-free galaxy formed in -the cosmological context from the Illustris simulation. We modeled the binned velocity moments for stars split into two populations -by metallicity and demonstrate that in spite of larger sampling errors the increased number of constraints leads to significantly tighter -confidence regions on the recovered density and velocity anisotropy profiles. We then applied the method to the Fornax dSph galaxy -with stars similarly divided into two populations. In comparison with our earlier work, we find the anisotropy parameter to be slightly -increasing, rather than decreasing, with radius and more strongly constrained. We are also able to infer anisotropy for each stellar -population separately and find them to be significantly different. -Key words. galaxies: kinematics and dynamics – galaxies: structure – galaxies: fundamental parameters – galaxies: dwarf – galaxies: -star clusters: individual: Fornax -1. Introduction -Dwarf spheroidal (dSph) galaxies of the Local Group (Mateo -1998; Tolstoy et al. 2009) are considered to be a perfect tool to -test our current theories of structure formation involving dark -matter in the context of near-field cosmology. The objects are -believed to be strongly dark matter dominated with mass-to-light -ratios even on the order of a few hundred solar units. Due to their -proximity they are also the only extragalactic systems where in￾dividual stars can be resolved and their velocities measured of￾fering the possibility to create interesting dynamical modeling -techniques. -The first estimates of dark matter content in dSph galaxies -were based on a single measurement of the line-of-sight velocity -dispersion of the stars and the application of the virial theorem. -As the samples of the stars with kinematic measurements grew, -it became possible to estimate the profile of the velocity disper￾sion and model it using the Jeans equation (Binney & Tremaine -2008). Since the stars in the galaxy can move on a variety -of orbits, from circular to radial, the degeneracy between the -anisotropy of the orbits and the mass distribution is inherent in -this type of modeling. The reason for this lies in the fact that -different combinations of these quantities can reproduce the ve￾locity dispersion profile equally well. -A way to overcome this issue, at least partially, is to resort to -higher order line-of-sight velocity moments, such as the kurto￾sis, and use the corresponding Jeans equations. Since the kurto￾sis is more sensitive to the velocity anisotropy than to the mass -distribution, useful constraints can be obtained on both. Still, the -method requires large kinematic samples to estimate the velocity -moments reliably and some assumption on the functional form -of the anisotropy (Łokas 2002; Łokas et al. 2005). -The Schwarzschild modeling technique (Schwarzschild -1979) offers a different approach to estimate the properties of -dSph galaxies without prior assumptions on the type of orbits. -It relies on building a galaxy model out of a set of best-fitting -orbits probed in the range of energy and angular momenta. In -this method, the anisotropy of the stellar orbits comes out as a -result of the modeling in the same way as the density profile. Al￾though it has been originally developed for large elliptical galax￾ies (van der Marel et al. 1998; Valluri et al. 2004; Gebhardt et al. -2015), it has recently been adopted for use on discrete data -characteristic of dSph galaxies and applied to a number of -dwarfs, including Carina, Draco, Fornax, Sculptor, and Sextans -(Jardel & Gebhardt 2008; Jardel et al. 2013; Breddels & Helmi -2013; Breddels et al. 2013; Kowalczyk et al. 2019). -Many dSph galaxies show signs of the presence of multiple -stellar populations resulting from a few star formation episodes -(Bellazzini et al. 2001; del Pino et al. 2015; Fabrizio et al. 2016; -Pace et al. 2020). This observation offers a way to improve the -modeling methods since, assuming dynamical equilibrium, all -populations are supposed to be influenced by the same under￾lying gravitational potential of the galaxy, but they have dif￾ferent distributions so more constraints can be imposed during -the modeling. This approach was first used by Battaglia et al. -(2008) to model the mass distribution in the Sculptor dSph -galaxy. A few attempts have also been made to constrain the -inner slope of the dark matter profile in dSph galaxies using -this technique (Walker & Peñarrubia 2011; Amorisco & Evans -2012; Hayashi et al. 2018) in order to resolve the so-called cusp￾core problem. It has been shown to be difficult, however, due +arXiv:2201.00151v1 [astro-ph.GA] 1 Jan 2022 +Astronomy & Astrophysics manuscript no. Populations4 ©ESO 2022 +January 4, 2022 +Multiple stellar populations in Schwarzschild modeling +and the application to the Fornax dwarf +Klaudia Kowalczyk and Ewa L. Łokas +Nicolaus Copernicus Astronomical Center, Polish Academy of Sciences, Bartycka 18, 00-716 Warsaw, Poland +e-mail: klaudia.kowalczyk@gmail.com, lokas@camk.edu.pl +January 4, 2022 +ABSTRACT +Dwarf spheroidal (dSph) galaxies are believed to be strongly dark matter dominated and thus are considered perfect objects to study +dark matter distribution and test theories of structure formation. They possess resolved, multiple stellar populations that offer new +possibilities for modeling. A promising tool for the dynamical modeling of these objects is the Schwarzschild orbit superposition +method. In this work we extend our previous implementation of the scheme to include more than one population of stars and a more +general form of the mass-to-light ratio function. We tested the improved approach on a nearly spherical, gas-free galaxy formed in +the cosmological context from the Illustris simulation. We modeled the binned velocity moments for stars split into two populations +by metallicity and demonstrate that in spite of larger sampling errors the increased number of constraints leads to significantly tighter +confidence regions on the recovered density and velocity anisotropy profiles. We then applied the method to the Fornax dSph galaxy +with stars similarly divided into two populations. In comparison with our earlier work, we find the anisotropy parameter to be slightly +increasing, rather than decreasing, with radius and more strongly constrained. We are also able to infer anisotropy for each stellar +population separately and find them to be significantly different. +Key words. galaxies: kinematics and dynamics – galaxies: structure – galaxies: fundamental parameters – galaxies: dwarf – galaxies: +star clusters: individual: Fornax +1. Introduction +Dwarf spheroidal (dSph) galaxies of the Local Group (Mateo +1998; Tolstoy et al. 2009) are considered to be a perfect tool to +test our current theories of structure formation involving dark +matter in the context of near-field cosmology. The objects are +believed to be strongly dark matter dominated with mass-to-light +ratios even on the order of a few hundred solar units. Due to their +proximity they are also the only extragalactic systems where individual stars can be resolved and their velocities measured offering the possibility to create interesting dynamical modeling +techniques. +The first estimates of dark matter content in dSph galaxies +were based on a single measurement of the line-of-sight velocity +dispersion of the stars and the application of the virial theorem. +As the samples of the stars with kinematic measurements grew, +it became possible to estimate the profile of the velocity dispersion and model it using the Jeans equation (Binney & Tremaine +2008). Since the stars in the galaxy can move on a variety +of orbits, from circular to radial, the degeneracy between the +anisotropy of the orbits and the mass distribution is inherent in +this type of modeling. The reason for this lies in the fact that +different combinations of these quantities can reproduce the velocity dispersion profile equally well. +A way to overcome this issue, at least partially, is to resort to +higher order line-of-sight velocity moments, such as the kurtosis, and use the corresponding Jeans equations. Since the kurtosis is more sensitive to the velocity anisotropy than to the mass +distribution, useful constraints can be obtained on both. Still, the +method requires large kinematic samples to estimate the velocity +moments reliably and some assumption on the functional form +of the anisotropy (Łokas 2002; Łokas et al. 2005). +The Schwarzschild modeling technique (Schwarzschild +1979) offers a different approach to estimate the properties of +dSph galaxies without prior assumptions on the type of orbits. +It relies on building a galaxy model out of a set of best-fitting +orbits probed in the range of energy and angular momenta. In +this method, the anisotropy of the stellar orbits comes out as a +result of the modeling in the same way as the density profile. Although it has been originally developed for large elliptical galaxies (van der Marel et al. 1998; Valluri et al. 2004; Gebhardt et al. +2015), it has recently been adopted for use on discrete data +characteristic of dSph galaxies and applied to a number of +dwarfs, including Carina, Draco, Fornax, Sculptor, and Sextans +(Jardel & Gebhardt 2008; Jardel et al. 2013; Breddels & Helmi +2013; Breddels et al. 2013; Kowalczyk et al. 2019). +Many dSph galaxies show signs of the presence of multiple +stellar populations resulting from a few star formation episodes +(Bellazzini et al. 2001; del Pino et al. 2015; Fabrizio et al. 2016; +Pace et al. 2020). This observation offers a way to improve the +modeling methods since, assuming dynamical equilibrium, all +populations are supposed to be influenced by the same underlying gravitational potential of the galaxy, but they have different distributions so more constraints can be imposed during +the modeling. This approach was first used by Battaglia et al. +(2008) to model the mass distribution in the Sculptor dSph +galaxy. A few attempts have also been made to constrain the +inner slope of the dark matter profile in dSph galaxies using +this technique (Walker & Peñarrubia 2011; Amorisco & Evans +2012; Hayashi et al. 2018) in order to resolve the so-called cuspcore problem. It has been shown to be difficult, however, due Article number, page 1 of 12 -A&A proofs: manuscript no. Populations4 -Table 1. Properties of the Illustris galaxy used to create mock data. -Property Value -Subhalo ID 16960 -Number of stellar particles (N⋆) 70446 -Number of dark matter particles (NDM) 78448 -Stellar mass (M⋆) 5.74 × 1010 M⊙ -Dark matter mass (MDM) 4.91 × 1011 M⊙ -Mean mass of stellar particles 815808 M⊙ -Stellar half-mass radius 9.99 kpc -Stellar half-number radius (r1/2) 9.6 kpc -Axis ratio c/a within r1/2 0.907 -Axis ratio b/a within r1/2 0.949 -Triaxiality 0.56 -to the nonsphericity of the dwarfs that introduces biases in such -measurements (Kowalczyk et al. 2013; Genina et al. 2018). -In our recent papers (Kowalczyk et al. 2017, 2018, 2019) we -developed the Schwarzschild technique in the form applicable to -binned velocity moments of a single tracer and verified its abil￾ity to reproduce the mass distribution and velocity anisotropy of -simulated galaxies. We have also studied biases resulting from -the nonsphericity of the modeled objects. Later, we applied the -method to model the kinematics of the Fornax dSph galaxy esti￾mating its mass and anisotropy profiles with unprecedented pre￾cision. -In this paper we extend our Schwarzschild modeling tech￾nique to include multiple stellar populations with the aim to -constrain the properties of dSph galaxies even more strongly. -We test our approach on a realistic simulated galaxy formed in -the cosmological context, originating from the Illustris project -(Vogelsberger et al. 2014a). Although no precise analogues of -dSph galaxies are available in this simulation because of the res￾olution, we use a more massive galaxy but with properties oth￾erwise similar to dSphs. The reliability of the modeling does not -depend on the particular value of the mass so we believe these -tests to be viable. We do not attempt to constrain the inner dark -matter density profile (which is poorly resolved anyway) but try -to put tighter limits on the estimates of the mass and anisotropy -profiles. Finally, we apply the improved method to the available -kinematic data for the distinct stellar populations of the Fornax -dSph. -This paper is organized as follows. In Section 2 we present -the data for the simulated galaxy as well as their splitting into -stellar populations and mock observations along the main axes. -Section 3 contains an overview of our modeling method, the ap￾plication of the method to all stars and to two populations, and -a comparison of the results obtained with these two approaches. -The results of the application of the method to the Fornax dSph -galaxy are presented in Section 4. We discuss our findings and -summarize the paper in Section 5. -2. Mock data -2.1. Selection of the simulated galaxy -In order to test our modeling method on realistic simulated -data, we decided to use a galaxy from the Illustris project -(Vogelsberger et al. 2014a,b; Genel et al. 2014; Nelson et al. -2015), namely the Illustris-1 cosmological simulation. This sim￾ulation follows the formation and evolution of galaxies from the -early Universe to the present by solving gravity and hydrody￾namics, as well as modeling of star formation, galactic winds, -SFR [M -⊙ yr --1 -] -t [Gyr] - 0 - 4 - 8 - 12 - 16 - 0 2 4 6 8 10 12 -Fig. 1. Star formation rate as a function of the age of the Universe in -the simulated galaxy from the Illustris project used to create mock data. -The black and gray vertical arrows indicate the last mergers which the -galaxy underwent, wet and dry, respectively. t [Gyr] -Z [Z⊙] - 0 - 2 - 4 - 6 - 8 - 10 - 0 1 2 3 4 5 - 0 - 2 - 4 - 6 -N [10 -2 -] -Fig. 2. Number of stars as a function of their metallicity and time of -formation (the age of the Universe) in the simulated galaxy. The vertical -line indicates the applied split into stellar populations. -magnetic fields, and the feedback from black holes. Although -dwarf galaxies that are of our interest here are not resolved in the -suite, this can be easily overcome with the appropriate choice of -the object and the treatment of data. -As the key properties of dSph galaxy equivalents we iden￾tified: the lack of gas, the lack of a black hole, a low spin, -the stellar mass much smaller than the dark matter mass and a -nearly spherical shape. The last condition was adopted in an at￾tempt to avoid any strong bias introduced by the spherical mod￾eling of a nonspherical object. Moreover, we required the galaxy -to possess a significant number of both stellar and dark mat￾ter particles (over 105 -), and a well resolved center. Due to the -large softening scale for dark matter particles in the simulation -(ǫDM = 1.42 kpc), we looked for an object in which even the -more concentrated stellar population (see Section 2.2) extended -over 43 kpc so that the region affected by the numerical artifacts -was enclosed within 2-3 innermost data bins (we used 20 linearly -spaced spatial bins, see Section 3.1). -Out of 27345 galaxies listed in the catalog of stellar circu￾larities, angular momenta, and axis ratios published by the Illus￾tris team (Genel et al. 2015) containing subhalos with the stellar -mass larger than 109 M⊙, only a few met our restrictive require￾Article number, page 2 of 12 -K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling --80 --40 -0 -40 -80 -POPULATION I -[kpc] -major intermediate minor - 5.3 - 5.9 - 6.5 - 7.1 - 7.7 -log(Σ) [M⊙/kpc -2 -] --80 --40 -0 -40 -POPULATION II -[kpc] --160 --80 - 0 - 80 - 160 -V [km/s] --80 --40 -0 -40 --80 -40 0 40 -POPULATION II -[kpc] -[kpc] --80 -40 0 40 -POPULATION II -[kpc] --80 -40 0 40 80 -POPULATION II -[kpc] - 0 - 30 - 60 - 90 -σ [km/s] --80 --40 -0 -40 -80 -POPULATION II -[kpc] -major intermediate minor - 5.3 - 5.9 - 6.5 - 7.1 - 7.7 -log(Σ) [M⊙/kpc -2 -] --80 --40 -0 -40 -POPULATION II -[kpc] --160 --80 - 0 - 80 - 160 -V [km/s] --80 --40 -0 -40 --80 -40 0 40 -POPULATION II -[kpc] -[kpc] --80 -40 0 40 -POPULATION II -[kpc] --80 -40 0 40 80 -POPULATION II -[kpc] - 0 - 30 - 60 - 90 -σ [km/s] -Fig. 3. Maps of the projected stellar density, mean stellar velocity, and stellar velocity dispersion (in rows) for two stellar populations: the metal￾rich population I (left-hand side panels) and the metal-poor population II (right-hand side), and observations along the principal axes determined -for all stars (in columns, along the major, the intermediate, and the minor axis, respectively). --1 --0.5 - 0 - 0.5 - 1 - 1 10 100 -β(r) -r [kpc] --1 --0.5 - 0 - 0.5 - 1 - 0 10 20 30 40 50 -β(r) -r [kpc] -all stars -pop I -pop II - 40 - 60 - 80 - 100 - 120 - 1 10 100 -σr(r) -r [kpc] - 40 - 60 - 80 - 100 - 120 - 0 10 20 30 40 50 -σr(r) -r [kpc] - 40 - 60 - 80 - 100 - 120 - 1 10 100 -σt(r) -r [kpc] - 40 - 60 - 80 - 100 - 120 - 0 10 20 30 40 50 -σt(r) -r [kpc] -Fig. 4. Profiles of the velocity anisotropy parameter, radial velocity dispersion, and tangential velocity dispersion (in consecutive columns) calcu￾lated from all stars (in red), including only population I (in orange), and only population II (in blue). The upper row shows the profiles using the -logarithmic distance scale and reaching the outskirts of the galaxy whereas the bottom row presents in the linear scale only the radial range used -in the modeling. -ments. We decided to use a galaxy labeled as subhalo 16960. -All the relevant properties of the galaxy are given in Table 1, -including numbers of particles and total masses for both compo￾nents, and details on the shape of the stellar component: the axis -ratios minor to major (shortest to longest) c/a, intermediate to -major b/a, and the triaxiality parameter T = (a -2 − b -2 -)/(a -2 − c -2 -). -We distinguish between the half-mass radius provided in the Il￾lustris database and the half-number radius r1/2, which we use -for further calculations in this paper. The difference between the -two comes from a small gradient in the stellar mass-to-light ratio -with the distance from the galactic center. Since in our approach -we treat stars as equal-mass particles and refer to number den￾sities (multiplied by the mean mass of a stellar particle when -needed), the application of the half-number radius is more self￾consistent. +A&A proofs: manuscript no. Populations4 +Table 1. Properties of the Illustris galaxy used to create mock data. +Property Value +Subhalo ID 16960 +Number of stellar particles (N⋆) 70446 +Number of dark matter particles (NDM) 78448 +Stellar mass (M⋆) 5.74 × 1010 M⊙ +Dark matter mass (MDM) 4.91 × 1011 M⊙ +Mean mass of stellar particles 815808 M⊙ +Stellar half-mass radius 9.99 kpc +Stellar half-number radius (r1/2) 9.6 kpc +Axis ratio c/a within r1/2 0.907 +Axis ratio b/a within r1/2 0.949 +Triaxiality 0.56 +to the nonsphericity of the dwarfs that introduces biases in such +measurements (Kowalczyk et al. 2013; Genina et al. 2018). +In our recent papers (Kowalczyk et al. 2017, 2018, 2019) we +developed the Schwarzschild technique in the form applicable to +binned velocity moments of a single tracer and verified its ability to reproduce the mass distribution and velocity anisotropy of +simulated galaxies. We have also studied biases resulting from +the nonsphericity of the modeled objects. Later, we applied the +method to model the kinematics of the Fornax dSph galaxy estimating its mass and anisotropy profiles with unprecedented precision. +In this paper we extend our Schwarzschild modeling technique to include multiple stellar populations with the aim to +constrain the properties of dSph galaxies even more strongly. +We test our approach on a realistic simulated galaxy formed in +the cosmological context, originating from the Illustris project +(Vogelsberger et al. 2014a). Although no precise analogues of +dSph galaxies are available in this simulation because of the resolution, we use a more massive galaxy but with properties otherwise similar to dSphs. The reliability of the modeling does not +depend on the particular value of the mass so we believe these +tests to be viable. We do not attempt to constrain the inner dark +matter density profile (which is poorly resolved anyway) but try +to put tighter limits on the estimates of the mass and anisotropy +profiles. Finally, we apply the improved method to the available +kinematic data for the distinct stellar populations of the Fornax +dSph. +This paper is organized as follows. In Section 2 we present +the data for the simulated galaxy as well as their splitting into +stellar populations and mock observations along the main axes. +Section 3 contains an overview of our modeling method, the application of the method to all stars and to two populations, and +a comparison of the results obtained with these two approaches. +The results of the application of the method to the Fornax dSph +galaxy are presented in Section 4. We discuss our findings and +summarize the paper in Section 5. +2. Mock data +2.1. Selection of the simulated galaxy +In order to test our modeling method on realistic simulated +data, we decided to use a galaxy from the Illustris project +(Vogelsberger et al. 2014a,b; Genel et al. 2014; Nelson et al. +2015), namely the Illustris-1 cosmological simulation. This simulation follows the formation and evolution of galaxies from the +early Universe to the present by solving gravity and hydrodynamics, as well as modeling of star formation, galactic winds, +SFR [M +⊙ yr +-1 +] +t [Gyr] + 0 + 4 + 8 + 12 + 16 + 0 2 4 6 8 10 12 +Fig. 1. Star formation rate as a function of the age of the Universe in +the simulated galaxy from the Illustris project used to create mock data. +The black and gray vertical arrows indicate the last mergers which the +galaxy underwent, wet and dry, respectively. t [Gyr] +Z [Z⊙] + 0 + 2 + 4 + 6 + 8 + 10 + 0 1 2 3 4 5 + 0 + 2 + 4 + 6 +N [10 +2 +] +Fig. 2. Number of stars as a function of their metallicity and time of +formation (the age of the Universe) in the simulated galaxy. The vertical +line indicates the applied split into stellar populations. +magnetic fields, and the feedback from black holes. Although +dwarf galaxies that are of our interest here are not resolved in the +suite, this can be easily overcome with the appropriate choice of +the object and the treatment of data. +As the key properties of dSph galaxy equivalents we identified: the lack of gas, the lack of a black hole, a low spin, +the stellar mass much smaller than the dark matter mass and a +nearly spherical shape. The last condition was adopted in an attempt to avoid any strong bias introduced by the spherical modeling of a nonspherical object. Moreover, we required the galaxy +to possess a significant number of both stellar and dark matter particles (over 105 +), and a well resolved center. Due to the +large softening scale for dark matter particles in the simulation +(ǫDM = 1.42 kpc), we looked for an object in which even the +more concentrated stellar population (see Section 2.2) extended +over 43 kpc so that the region affected by the numerical artifacts +was enclosed within 2-3 innermost data bins (we used 20 linearly +spaced spatial bins, see Section 3.1). +Out of 27345 galaxies listed in the catalog of stellar circularities, angular momenta, and axis ratios published by the Illustris team (Genel et al. 2015) containing subhalos with the stellar +mass larger than 109 M⊙, only a few met our restrictive requireArticle number, page 2 of 1 +K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling +-80 +-40 +0 +40 +80 +POPULATION I +[kpc] +major intermediate minor + 5.3 + 5.9 + 6.5 + 7.1 + 7.7 +log(Σ) [M⊙/kpc +2 +] +-80 +-40 +0 +40 +POPULATION II +[kpc] +-160 +-80 + 0 + 80 + 160 +V [km/s] +-80 +-40 +0 +40 +-80 -40 0 40 +POPULATION II +[kpc] +[kpc] +-80 -40 0 40 +POPULATION II +[kpc] +-80 -40 0 40 80 +POPULATION II +[kpc] + 0 + 30 + 60 + 90 +σ [km/s] +-80 +-40 +0 +40 +80 +POPULATION II +[kpc] +major intermediate minor + 5.3 + 5.9 + 6.5 + 7.1 + 7.7 +log(Σ) [M⊙/kpc +2 +] +-80 +-40 +0 +40 +POPULATION II +[kpc] +-160 +-80 + 0 + 80 + 160 +V [km/s] +-80 +-40 +0 +40 +-80 -40 0 40 +POPULATION II +[kpc] +[kpc] +-80 -40 0 40 +POPULATION II +[kpc] +-80 -40 0 40 80 +POPULATION II +[kpc] + 0 + 30 + 60 + 90 +σ [km/s] +Fig. 3. Maps of the projected stellar density, mean stellar velocity, and stellar velocity dispersion (in rows) for two stellar populations: the metalrich population I (left-hand side panels) and the metal-poor population II (right-hand side), and observations along the principal axes determined +for all stars (in columns, along the major, the intermediate, and the minor axis, respectively). +-1 +-0.5 + 0 + 0.5 + 1 + 1 10 100 +β(r) +r [kpc] +-1 +-0.5 + 0 + 0.5 + 1 + 0 10 20 30 40 50 +β(r) +r [kpc] +all stars +pop I +pop II + 40 + 60 + 80 + 100 + 120 + 1 10 100 +σr(r) +r [kpc] + 40 + 60 + 80 + 100 + 120 + 0 10 20 30 40 50 +σr(r) +r [kpc] + 40 + 60 + 80 + 100 + 120 + 1 10 100 +σt(r) +r [kpc] + 40 + 60 + 80 + 100 + 120 + 0 10 20 30 40 50 +σt(r) +r [kpc] +Fig. 4. Profiles of the velocity anisotropy parameter, radial velocity dispersion, and tangential velocity dispersion (in consecutive columns) calculated from all stars (in red), including only population I (in orange), and only population II (in blue). The upper row shows the profiles using the +logarithmic distance scale and reaching the outskirts of the galaxy whereas the bottom row presents in the linear scale only the radial range used +in the modeling. +ments. We decided to use a galaxy labeled as subhalo 16960. +All the relevant properties of the galaxy are given in Table 1, +including numbers of particles and total masses for both components, and details on the shape of the stellar component: the axis +ratios minor to major (shortest to longest) c/a, intermediate to +major b/a, and the triaxiality parameter T = (a +2 − b2 +)/(a +2 − c2 +). +We distinguish between the half-mass radius provided in the Illustris database and the half-number radius r1/2, which we use +for further calculations in this paper. The difference between the +two comes from a small gradient in the stellar mass-to-light ratio +with the distance from the galactic center. Since in our approach +we treat stars as equal-mass particles and refer to number densities (multiplied by the mean mass of a stellar particle when +needed), the application of the half-number radius is more selfconsistent. Article number, page 3 of 12 -A&A proofs: manuscript no. Populations4 -10-3 -10-1 -101 -103 - 10 100 -n⋆(R) [kpc-2 -] -R [kpc] -major - 10 100 -R [kpc] -intermediate - 10 100 -R [kpc] -minor -all stars -pop I -pop II -Fig. 5. Surface number density profiles of the stellar data samples for the simulated galaxy observed along different lines of sight (from the left to -the right). Different lines show profiles for all available stars (in red), the metal-rich population I (in orange), and the metal-poor population II (in -blue). Thin vertical lines indicate r0 (see text) and the outer boundary of the spectroscopic data. -2.2. Splitting the stars into populations -Our chosen galaxy shows a complex formation history under￾going multiple mergers which result in extended star formation -with a few star formation bursts. The last wet merger, that is a -merger with an object containing gas, happens at 6.9 Gyr from -the beginning of the simulation, whereas the last dry merger (no -gas transfer) at 12.1 Gyr, giving the galaxy enough time to regain -dynamical equilibrium. We present the star formation rate (SFR) -as a function of time (the age of the Universe) in Fig. 1, where -these last mergers are indicated with black and gray vertical ar￾rows. In Fig. 2 we show the distribution of stars as a function of -their metallicity (in solar units) and the time of formation. In or￾der to divide the stellar sample into two populations we cut it in -half based on the metallicity index of each stellar particle. This -split is indicated in Fig. 2 with the vertical line. With satisfying -accuracy it separates the stars born before and after 4 Gyr since -the start of the simulation, which corresponds to the formation -time before and after the end of the second major star burst, as -shown in Fig. 1. We refer to the metal-rich stars as population I -and to the metal-poor as population II, following the commonly -used nomenclature in astronomy. -In Fig. 3 we present maps of the projected stellar mass den￾sity, line-of-sight velocity, and line-of-sight velocity dispersion -for both populations obtained by projecting the galaxy along its -principal axes. The orientation was determined from the iner￾tia tensor calculated from all stars within the half-number radius -r1/2 and therefore is the same in both panels. The two popula￾tions differ significantly in the spatial distribution and kinemat￾ics with the metal-rich (considered to be younger) population I -being more concentrated but having lower central velocity dis￾persion. Both populations show a weak rotation signal at large -distances from the center. -The velocity anisotropy parameter β(r) = 1 − (σ -2 -θ + -σ -2 -φ -)/(2σ -2 -r -), where σi are velocity dispersions in spherical coordi￾nates (Binney & Tremaine 2008), describes the orbital structure -of galaxies. It is one of the most important dynamical properties -of bound systems which cannot be inferred directly from ob￾servations and has to be recovered by dynamical modeling. The -profiles of the anisotropy parameter β as well as the radial σr -and tangential σt = [(σ -2 -θ +σ -2 -φ -)/2]1/2 velocity dispersions for our -simulated galaxy are presented in the consecutive columns of -Fig. 4. Throughout the paper we use red, orange, and blue colors -to indicate values calculated or recovered for all stars, popula￾tion I, and population II, respectively. The two rows of the figure -show the behavior of the parameters at different scales. The top -row plots the profiles with the distance from the center of the -galaxy in the logarithmic scale and shows the drop of anisotropy -at the outer edges of the object. The bottom row uses the linear -distance scale and focuses on the main body of the galaxy. -Figure 5 shows the surface number density profiles of the -stars as measured in different directions. We can see that while -the different subsamples have quite distinguishable profiles, the -difference between the lines of sight is small because the galaxy -is close to spherical. -2.3. Observables -We generated nine sets of mock data by observing all stars and -each population separately along the principal axes determined -from all stars. For the observables to be used in the modeling we -divided the stars into 20 bins spaced linearly in distance from -the center of the galaxy up to 50 kpc, measuring the fraction -of the total number of stars and the 2nd, 3rd, and 4th proper -moments of the line-of-sight velocity defined in Eq. 8 and 9 -of Kowalczyk et al. (2018). The profiles of these quantities are -shown in consecutive rows in Fig. 6. Columns correspond to dif￾ferent lines of sight, from the left to the right: along the major, -intermediate, and minor axis of the galaxy. For clarity of the fig￾ure, in each panel we indicate only the error bars for one of the -data sets. However, as the number of stars in a sample remains -roughly constant between the lines of sight, the error bars are -very similar among the panels in a given row. -Although in our previous studies of the reliability of -the Schwarzschild modeling and its applications to real data -(Kowalczyk et al. 2017, 2018, 2019) we approximated the den￾sity profile of the tracer with the Sérsic formula, we found that it -does not provide a good approximation of the data for the simu￾lated galaxy considered here. We therefore fit the projected den￾sity profile with the King formula (King 1962) -I(R) = I0 - - -1 -p -1 + (R/Rc) -2 -− -1 -p -1 + (Rt/Rc) -2 - - -2 -, (1) +A&A proofs: manuscript no. Populations4 +10-3 +10-1 +101 +103 + 10 100 +n⋆(R) [kpc-2 +] +R [kpc] +major + 10 100 +R [kpc] +intermediate + 10 100 +R [kpc] +minor +all stars +pop I +pop II +Fig. 5. Surface number density profiles of the stellar data samples for the simulated galaxy observed along different lines of sight (from the left to +the right). Different lines show profiles for all available stars (in red), the metal-rich population I (in orange), and the metal-poor population II (in +blue). Thin vertical lines indicate r0 (see text) and the outer boundary of the spectroscopic data. +2.2. Splitting the stars into populations +Our chosen galaxy shows a complex formation history undergoing multiple mergers which result in extended star formation +with a few star formation bursts. The last wet merger, that is a +merger with an object containing gas, happens at 6.9 Gyr from +the beginning of the simulation, whereas the last dry merger (no +gas transfer) at 12.1 Gyr, giving the galaxy enough time to regain +dynamical equilibrium. We present the star formation rate (SFR) +as a function of time (the age of the Universe) in Fig. 1, where +these last mergers are indicated with black and gray vertical arrows. In Fig. 2 we show the distribution of stars as a function of +their metallicity (in solar units) and the time of formation. In order to divide the stellar sample into two populations we cut it in +half based on the metallicity index of each stellar particle. This +split is indicated in Fig. 2 with the vertical line. With satisfying +accuracy it separates the stars born before and after 4 Gyr since +the start of the simulation, which corresponds to the formation +time before and after the end of the second major star burst, as +shown in Fig. 1. We refer to the metal-rich stars as population I +and to the metal-poor as population II, following the commonly +used nomenclature in astronomy. +In Fig. 3 we present maps of the projected stellar mass density, line-of-sight velocity, and line-of-sight velocity dispersion +for both populations obtained by projecting the galaxy along its +principal axes. The orientation was determined from the inertia tensor calculated from all stars within the half-number radius +r1/2 and therefore is the same in both panels. The two populations differ significantly in the spatial distribution and kinematics with the metal-rich (considered to be younger) population I +being more concentrated but having lower central velocity dispersion. Both populations show a weak rotation signal at large +distances from the center. +The velocity anisotropy parameter β(r) = 1 − (σ +2 +θ + +σ +2 +φ +)/(2σ +2 +r +), where σi are velocity dispersions in spherical coordinates (Binney & Tremaine 2008), describes the orbital structure +of galaxies. It is one of the most important dynamical properties +of bound systems which cannot be inferred directly from observations and has to be recovered by dynamical modeling. The +profiles of the anisotropy parameter β as well as the radial σr +and tangential σt = [(σ +2 +θ +σ +2 +φ +)/2]1/2 velocity dispersions for our +simulated galaxy are presented in the consecutive columns of +Fig. 4. Throughout the paper we use red, orange, and blue colors +to indicate values calculated or recovered for all stars, population I, and population II, respectively. The two rows of the figure +show the behavior of the parameters at different scales. The top +row plots the profiles with the distance from the center of the +galaxy in the logarithmic scale and shows the drop of anisotropy +at the outer edges of the object. The bottom row uses the linear +distance scale and focuses on the main body of the galaxy. +Figure 5 shows the surface number density profiles of the +stars as measured in different directions. We can see that while +the different subsamples have quite distinguishable profiles, the +difference between the lines of sight is small because the galaxy +is close to spherical. +2.3. Observables +We generated nine sets of mock data by observing all stars and +each population separately along the principal axes determined +from all stars. For the observables to be used in the modeling we +divided the stars into 20 bins spaced linearly in distance from +the center of the galaxy up to 50 kpc, measuring the fraction +of the total number of stars and the 2nd, 3rd, and 4th proper +moments of the line-of-sight velocity defined in Eq. 8 and 9 +of Kowalczyk et al. (2018). The profiles of these quantities are +shown in consecutive rows in Fig. 6. Columns correspond to different lines of sight, from the left to the right: along the major, +intermediate, and minor axis of the galaxy. For clarity of the figure, in each panel we indicate only the error bars for one of the +data sets. However, as the number of stars in a sample remains +roughly constant between the lines of sight, the error bars are +very similar among the panels in a given row. +Although in our previous studies of the reliability of +the Schwarzschild modeling and its applications to real data +(Kowalczyk et al. 2017, 2018, 2019) we approximated the density profile of the tracer with the Sérsic formula, we found that it +does not provide a good approximation of the data for the simulated galaxy considered here. We therefore fit the projected density profile with the King formula (King 1962) +I(R) = I0 + + +1 +p +1 + (R/Rc) +2 +− +1 +p +1 + (Rt/Rc) +2 + + +2 +, (1) Article number, page 4 of 12 -K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling -10-3 -10-2 -10-1 -100 - 0 10 20 30 40 -M(R) -R [kpc] -major - 0 10 20 30 40 -R [kpc] -intermediate - 0 10 20 30 40 50 -R [kpc] -minor -3 -6 -9 -12 - 0 10 20 30 40 -m2(R)[10 -3(km s-1 -) -2 -] -R [kpc] - 0 10 20 30 40 -R [kpc] - 0 10 20 30 40 50 -R [kpc] --10 --5 -0 -5 -10 - 0 10 20 30 40 -m3(R)[10 -4(km s-1 -) -3 -] -R [kpc] - 0 10 20 30 40 -R [kpc] - 0 10 20 30 40 50 -R [kpc] -0 -1 -2 -3 -4 - 0 10 20 30 40 -m4(R)[10 -8(km s-1 -) -4 -] -R [kpc] - 0 10 20 30 40 -R [kpc] - 0 10 20 30 40 50 -R [kpc] -all stars -pop I -pop II -Fig. 6. Observables used in our Schwarzschild modeling scheme of the simulated galaxy. In rows: the fraction of the total number of stars, 2nd, -3rd, and 4th velocity moment. In columns: mock data from the simulated galaxy along the major, intermediate, and minor axis. In red we present -the values obtained for all stars whereas in orange and blue those for populations I and II, respectively. For clarity of the figure, in each panel we -indicate only the error bars for one of the data sets. -where I0, Rc, and Rt are the model parameters. The profile can -be analytically deprojected to obtain the 3D density -ρ(r) = -ρ0 -z -2 -" -1 -z -arccos(z) − -p -1 − z -2 -# -, (2) -where -ρ0 = -I0 -πRc[1 + (Rt/Rc) -2 -] -3/2 -(3) -and -z = -s -r -2 + R -2 -c -R -2 -c + R -2 -t -. (4) -3. Schwarzschild modeling -In this section we briefly present our modeling method and its -application to the data sets derived for all stars and the two pop￾ulations of the simulated galaxy separately. In both cases our -aim was to recover the profiles of the total mass and the velocity -anisotropy. -3.1. Overview of the method -We follow the approach introduced in Kowalczyk et al. (2018), -namely we model the total mass profile with the mass-to-light -ratio Υ varying with radius: -logΥ(r) = -( -log(Υ0) r ≤ r0 -a(log r − log r0) -c + log(Υ0) r > r0 -(5) +K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling +10-3 +10-2 +10-1 +100 + 0 10 20 30 40 +M(R) +R [kpc] +major + 0 10 20 30 40 +R [kpc] +intermediate + 0 10 20 30 40 50 +R [kpc] +minor +3 +6 +9 +12 + 0 10 20 30 40 +m2(R)[10 +3(km s-1 +) +2 +] +R [kpc] + 0 10 20 30 40 +R [kpc] + 0 10 20 30 40 50 +R [kpc] +-10 +-5 +0 +5 +10 + 0 10 20 30 40 +m3(R)[10 +4(km s-1 +) +3 +] +R [kpc] + 0 10 20 30 40 +R [kpc] + 0 10 20 30 40 50 +R [kpc] +0 +1 +2 +3 +4 + 0 10 20 30 40 +m4(R)[10 +8(km s-1 +) +4 +] +R [kpc] + 0 10 20 30 40 +R [kpc] + 0 10 20 30 40 50 +R [kpc] +all stars +pop I +pop II +Fig. 6. Observables used in our Schwarzschild modeling scheme of the simulated galaxy. In rows: the fraction of the total number of stars, 2nd, +3rd, and 4th velocity moment. In columns: mock data from the simulated galaxy along the major, intermediate, and minor axis. In red we present +the values obtained for all stars whereas in orange and blue those for populations I and II, respectively. For clarity of the figure, in each panel we +indicate only the error bars for one of the data sets. +where I0, Rc, and Rt are the model parameters. The profile can +be analytically deprojected to obtain the 3D density +ρ(r) = +ρ0 +z +2 +" +1 +z +arccos(z) − +p +1 − z +2 +# +, (2) +where +ρ0 = +I0 +πRc[1 + (Rt/Rc) +2 +] +3/2 +(3) +and +z = +s +r +2 + R +2 +c +R +2 +c + R +2 +t +. (4) +3. Schwarzschild modeling +In this section we briefly present our modeling method and its +application to the data sets derived for all stars and the two populations of the simulated galaxy separately. In both cases our +aim was to recover the profiles of the total mass and the velocity +anisotropy. +3.1. Overview of the method +We follow the approach introduced in Kowalczyk et al. (2018), +namely we model the total mass profile with the mass-to-light +ratio Υ varying with radius: +logΥ(r) = +( +log(Υ0) r ≤ r0 +a(log r − log r0) +c + log(Υ0) r > r0 +(5) Article number, page 5 of 12 -A&A proofs: manuscript no. Populations4 - 1 - 2 - 3 - 0 - 0.5 - 1 - 1 - 2 - 3 -ALL -Υ0 -a -c - 1 - 2 - 3 - 0 - 0.5 - 1 - 1 - 2 - 3 -POPULATIONS -Υ0 -a -c - 10 - 100 -χ -2 - 1 - 2 - 3 - 0 - 0.5 - 1 - 1 - 2 - 3 -POP I -Υ0 -a -c - 1 - 2 - 3 - 0 - 0.5 - 1 - 1 - 2 - 3 -POP II -Υ0 -a -c - 10 - 100 -χ -2 -Fig. 7. Absolute values of χ -2 obtained from the fits of three data sets: all stars (top left panel), population I (bottom left), and population II (bottom -right) for the observations along the major axis of the simulated galaxy. The results for the modeling of two populations (top right) were obtained -as an algebraic sum of values for populations I and II. To avoid large numbers in the figure, Υ0 was divided by the mean mass of a stellar particle. -where r is the distance from the center of the galaxy, r0 is a -constant, while Υ0, a, and c are the parameters of a model. We -have assumed log r0 = 0.33 which corresponds to three softening -scales for stellar particles in the Illustris simulation. -We probed the parameter a ∈ [0 : 1.3] with a step ∆a = 0.04 -and c ∈ [1.1 : 2.9] with a step ∆c = 0.2, imposing the require￾ment on the total density profile to be monotonically decreasing -with radius. For each set of parameters and for each line of sight -we generated 1200 orbits using 100 values of energy (expressed -with the radius of a circular orbit) spaced logarithmically and -12 values of the relative angular momentum spaced linearly. The -outer radius of the orbit library, that is the apocenter of the most -extended orbit, was set to rout = 165 kpc in order to cover over -0.999 of the total stellar mass based on the fitted King profile -parameters. -We fit the kinematics weighted with the fraction of mass with -the constrained least squares algorithm where different values -of Υ0 were obtained with a simple transformation of velocities -given by Eq. 12, 13, and 15 in Kowalczyk et al. (2018). In or￾der to smooth out the numerical artifacts, the three-dimensional -χ -2 -spaces were then interpolated with 12-order polynomials -(∼ a -4 -c -4Υ -4 -0 -) that were further used to determine the global min￾imums (identified as the best-fitting models) and 1, 2, 3σ con￾fidence levels which for three parameters correspond to ∆χ -2 = -3.53, 8.02, 14.2 (Press et al. 1992). -3.2. Application to mock data -In the following we present the direct and inferred results of -the Schwarzschild modeling of the data sets described in Sec￾tion 2.3. -First, Fig. 7 shows the distribution of the absolute values of -the χ -2 -as a function of three parameters of the mass-to-light ra￾tio. In order to avoid unnecessary repetitions, we include only -the plot for the mock data obtained by observing the Illustris -galaxy along its major axis as the others are qualitatively similar. -The four panels refer to fits for all stars (top left), the metal-rich -population I (bottom left), the metal-poor population II (bottom -right), and the one named "populations" (top right) which is the -algebraic sum of values for both populations. -As our parametrization of the mass-to-light ratio is not intu￾itive we present its profiles explicitly in the first rows of the left￾Article number, page 6 of 12 -K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling -106 -107 -108 -109 -1010 - 10 100 -ALL -Υ(r) [M⊙/L⊙] -r [kpc] -major - 10 100 -ALL -r [kpc] -intermediate - 10 100 -r [kpc] -minor -3σ -2σ -1σ -best model -data -104 -106 -108 - 10 100 -ALL -νtot(r) [M⊙ kpc-3 -] -r [kpc] - 10 100 -ALL -r [kpc] - 10 100 -r [kpc] -1010 -1011 -1012 - 10 100 -ALL -Mtot(r) [M⊙] -r [kpc] - 10 100 -ALL -r [kpc] - 10 100 -r [kpc] --2 --1 -0 -1 - 0 10 20 30 40 -ALL -β(r) -r [kpc] - 0 10 20 30 40 -ALL -r [kpc] - 0 10 20 30 40 50 -ALL -r [kpc] -106 -107 -108 -109 -1010 - 10 100 -POPULATIONS -Υ(r) [M⊙/L⊙] -r [kpc] -major - 10 100 -POPULATIONS -r [kpc] -intermediate - 10 100 -r [kpc] -minor -3σ -2σ -1σ -best model -data -104 -106 -108 - 10 100 -POPULATIONS -νtot(r) [M⊙ kpc-3 -] -r [kpc] - 10 100 -POPULATIONS -r [kpc] - 10 100 -r [kpc] -1010 -1011 -1012 - 10 100 -POPULATIONS -Mtot(r) [M⊙] -r [kpc] - 10 100 -POPULATIONS -r [kpc] - 10 100 -r [kpc] --2 --1 -0 -1 - 0 10 20 30 40 -POPULATIONS -β(r) -r [kpc] - 0 10 20 30 40 -POPULATIONS -r [kpc] - 0 10 20 30 40 50 -POPULATIONS -r [kpc] -Fig. 8. Left-hand side: results of Schwarzschild modeling of three mock data sets obtained by observing the simulated galaxy along the principal -axes. In rows: derived mass-to-light ratio, total density, total mass, and anisotropy parameter. In columns: observations along the major, interme￾diate, and minor axis, respectively. Green lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the -1, 2, and 3σ confidence levels. The true values are presented as black lines. Thin vertical lines mark the values of r0 and the outer range of the -data sets, from left to right. Right-hand side: same as left but for the fit of two stellar populations. -and right-hand side panels of Fig. 8 for the results obtained for -all stars and the populations, respectively. We further calculate -the total density (second rows) and the total mass content (third -rows). We include the obtained orbit anisotropy within the mod￾eled range in the bottom rows. The consecutive columns present -the results for the observations along the major, intermediate, -and minor axis. Green lines indicate values for the best-fit mod￾els whereas the colored areas of decreasing intensity correspond -to 1, 2, and 3σ confidence regions obtained as extreme values al￾lowed by the models with χ -2 within a given region. In each panel -the true values from the simulation are presented with black lines -while thin vertical lines mark the values of r0 and the outer range -of the data sets beyond which the reliability of results drops sig￾nificantly. The true mass-to-light ratio profile was obtained by -dividing the total mass by the fitted King profiles, therefore the -drop at 100 kpc is the numerical artifact occurring at the very -outskirts of the galaxy. -Whereas in the right-hand side panels of Fig. 8 the resulting -anisotropy is obtained from the fit of all stars and uses only the -location of global minimum and confidence levels from two pop￾ulations (as in the top right panel of Fig. 7), in Fig. 9 we present -another method of calculating the anisotropy. In the second and -third row we show the derived profiles for population I and II -separately and combine them as stellar mass weighted average -in the top row. As in previous figures, three columns refer to the -different lines of sight whereas the narrow fourth one shows the -behavior of the true profiles outside the modeled range which, as -we noticed in our previous studies, in a limited way influences -the results. Such an impact is understandable since the stars at -larger distances from the center are still included in the line-of￾sight measurements. -3.3. Comparison of fitting results -The main strength of the two populations method comes from -tracing the underlying gravitational potential at different scales. -As can be seen in the bottom panels of Fig. 7, population I, which -is more concentrated, is also more sensitive to Υ0, but gives -weaker constraints on a or c. On the other hand, population II -attempts to reproduce the total mass content at larger distances -as well, therefore showing stronger coupling between the param￾eters. -The global minimums of the χ -2 distributions for both ap￾proaches, that is modeling one and two populations, which we -identify as the best-fitting models, closely coincide showing that -there is no internal bias in the improved method. However, sig￾nificant differences can be observed when comparing the confi￾dence levels, mainly at 1 and 3 σ. Namely, we find that using -two populations, the constraints we obtain on the density and -anisotropy profile are much stronger. -Additionally, the more accurate method allows us to study -other effects and biases, for example the consequences of the -nonsphericity of the modeled object. Whereas for the fit of all -stars the true values of the density, mass, and anisotropy profiles -are contained within 1 σ confidence regions, the results for the -populations are more or less biased depending on the axis. They -are well reproduced for the observation along the intermediate -axis, for which the effects of nonsphericity seem to cancel out, -and more biased for the remaining lines of sight. We notice a -trend from under- to overestimation of the anisotropy when go￾ing from the major to the minor axis. +A&A proofs: manuscript no. Populations4 + 1 + 2 + 3 + 0 + 0.5 + 1 + 1 + 2 + 3 +ALL +Υ0 +a +c + 1 + 2 + 3 + 0 + 0.5 + 1 + 1 + 2 + 3 +POPULATIONS +Υ0 +a +c + 10 + 100 +χ +2 + 1 + 2 + 3 + 0 + 0.5 + 1 + 1 + 2 + 3 +POP I +Υ0 +a +c + 1 + 2 + 3 + 0 + 0.5 + 1 + 1 + 2 + 3 +POP II +Υ0 +a +c + 10 + 100 +χ +2 +Fig. 7. Absolute values of χ +2 obtained from the fits of three data sets: all stars (top left panel), population I (bottom left), and population II (bottom +right) for the observations along the major axis of the simulated galaxy. The results for the modeling of two populations (top right) were obtained +as an algebraic sum of values for populations I and II. To avoid large numbers in the figure, Υ0 was divided by the mean mass of a stellar particle. +where r is the distance from the center of the galaxy, r0 is a +constant, while Υ0, a, and c are the parameters of a model. We +have assumed log r0 = 0.33 which corresponds to three softening +scales for stellar particles in the Illustris simulation. +We probed the parameter a ∈ [0 : 1.3] with a step ∆a = 0.04 +and c ∈ [1.1 : 2.9] with a step ∆c = 0.2, imposing the requirement on the total density profile to be monotonically decreasing +with radius. For each set of parameters and for each line of sight +we generated 1200 orbits using 100 values of energy (expressed +with the radius of a circular orbit) spaced logarithmically and +12 values of the relative angular momentum spaced linearly. The +outer radius of the orbit library, that is the apocenter of the most +extended orbit, was set to rout = 165 kpc in order to cover over +0.999 of the total stellar mass based on the fitted King profile +parameters. +We fit the kinematics weighted with the fraction of mass with +the constrained least squares algorithm where different values +of Υ0 were obtained with a simple transformation of velocities +given by Eq. 12, 13, and 15 in Kowalczyk et al. (2018). In order to smooth out the numerical artifacts, the three-dimensional +χ +2 +spaces were then interpolated with 12-order polynomials +(∼ a +4 +c +4Υ4 +0 +) that were further used to determine the global minimums (identified as the best-fitting models) and 1, 2, 3σ confidence levels which for three parameters correspond to ∆χ +2 = +3.53, 8.02, 14.2 (Press et al. 1992). +3.2. Application to mock data +In the following we present the direct and inferred results of +the Schwarzschild modeling of the data sets described in Section 2.3. +First, Fig. 7 shows the distribution of the absolute values of +the χ +2 +as a function of three parameters of the mass-to-light ratio. In order to avoid unnecessary repetitions, we include only +the plot for the mock data obtained by observing the Illustris +galaxy along its major axis as the others are qualitatively similar. +The four panels refer to fits for all stars (top left), the metal-rich +population I (bottom left), the metal-poor population II (bottom +right), and the one named "populations" (top right) which is the +algebraic sum of values for both populations. +As our parametrization of the mass-to-light ratio is not intuitive we present its profiles explicitly in the first rows of the leftArticle number, page 6 of 12 +K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling +106 +107 +108 +109 +1010 + 10 100 +ALL +Υ(r) [M⊙/L⊙] +r [kpc] +major + 10 100 +ALL +r [kpc] +intermediate + 10 100 +r [kpc] +minor +3σ +2σ +1σ +best model +data +104 +106 +108 + 10 100 +ALL +νtot(r) [M⊙ kpc-3 +] +r [kpc] + 10 100 +ALL +r [kpc] + 10 100 +r [kpc] +1010 +1011 +1012 + 10 100 +ALL +Mtot(r) [M⊙] +r [kpc] + 10 100 +ALL +r [kpc] + 10 100 +r [kpc] +-2 +-1 +0 +1 + 0 10 20 30 40 +ALL +β(r) +r [kpc] + 0 10 20 30 40 +ALL +r [kpc] + 0 10 20 30 40 50 +ALL +r [kpc] +106 +107 +108 +109 +1010 + 10 100 +POPULATIONS +Υ(r) [M⊙/L⊙] +r [kpc] +major + 10 100 +POPULATIONS +r [kpc] +intermediate + 10 100 +r [kpc] +minor +3σ +2σ +1σ +best model +data +104 +106 +108 + 10 100 +POPULATIONS +νtot(r) [M⊙ kpc-3 +] +r [kpc] + 10 100 +POPULATIONS +r [kpc] + 10 100 +r [kpc] +1010 +1011 +1012 + 10 100 +POPULATIONS +Mtot(r) [M⊙] +r [kpc] + 10 100 +POPULATIONS +r [kpc] + 10 100 +r [kpc] +-2 +-1 +0 +1 + 0 10 20 30 40 +POPULATIONS +β(r) +r [kpc] + 0 10 20 30 40 +POPULATIONS +r [kpc] + 0 10 20 30 40 50 +POPULATIONS +r [kpc] +Fig. 8. Left-hand side: results of Schwarzschild modeling of three mock data sets obtained by observing the simulated galaxy along the principal +axes. In rows: derived mass-to-light ratio, total density, total mass, and anisotropy parameter. In columns: observations along the major, intermediate, and minor axis, respectively. Green lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the +1, 2, and 3σ confidence levels. The true values are presented as black lines. Thin vertical lines mark the values of r0 and the outer range of the +data sets, from left to right. Right-hand side: same as left but for the fit of two stellar populations. +and right-hand side panels of Fig. 8 for the results obtained for +all stars and the populations, respectively. We further calculate +the total density (second rows) and the total mass content (third +rows). We include the obtained orbit anisotropy within the modeled range in the bottom rows. The consecutive columns present +the results for the observations along the major, intermediate, +and minor axis. Green lines indicate values for the best-fit models whereas the colored areas of decreasing intensity correspond +to 1, 2, and 3σ confidence regions obtained as extreme values allowed by the models with χ +2 within a given region. In each panel +the true values from the simulation are presented with black lines +while thin vertical lines mark the values of r0 and the outer range +of the data sets beyond which the reliability of results drops significantly. The true mass-to-light ratio profile was obtained by +dividing the total mass by the fitted King profiles, therefore the +drop at 100 kpc is the numerical artifact occurring at the very +outskirts of the galaxy. +Whereas in the right-hand side panels of Fig. 8 the resulting +anisotropy is obtained from the fit of all stars and uses only the +location of global minimum and confidence levels from two populations (as in the top right panel of Fig. 7), in Fig. 9 we present +another method of calculating the anisotropy. In the second and +third row we show the derived profiles for population I and II +separately and combine them as stellar mass weighted average +in the top row. As in previous figures, three columns refer to the +different lines of sight whereas the narrow fourth one shows the +behavior of the true profiles outside the modeled range which, as +we noticed in our previous studies, in a limited way influences +the results. Such an impact is understandable since the stars at +larger distances from the center are still included in the line-ofsight measurements. +3.3. Comparison of fitting results +The main strength of the two populations method comes from +tracing the underlying gravitational potential at different scales. +As can be seen in the bottom panels of Fig. 7, population I, which +is more concentrated, is also more sensitive to Υ0, but gives +weaker constraints on a or c. On the other hand, population II +attempts to reproduce the total mass content at larger distances +as well, therefore showing stronger coupling between the parameters. +The global minimums of the χ +2 distributions for both approaches, that is modeling one and two populations, which we +identify as the best-fitting models, closely coincide showing that +there is no internal bias in the improved method. However, significant differences can be observed when comparing the confidence levels, mainly at 1 and 3 σ. Namely, we find that using +two populations, the constraints we obtain on the density and +anisotropy profile are much stronger. +Additionally, the more accurate method allows us to study +other effects and biases, for example the consequences of the +nonsphericity of the modeled object. Whereas for the fit of all +stars the true values of the density, mass, and anisotropy profiles +are contained within 1 σ confidence regions, the results for the +populations are more or less biased depending on the axis. They +are well reproduced for the observation along the intermediate +axis, for which the effects of nonsphericity seem to cancel out, +and more biased for the remaining lines of sight. We notice a +trend from under- to overestimation of the anisotropy when going from the major to the minor axis. Article number, page 7 of 12 -A&A proofs: manuscript no. Populations4 --1 -0 -1 - 0 10 20 30 40 -POP I + POP II -β(r) -r [kpc] -major - 0 10 20 30 40 -r [kpc] -intermediate - 0 10 20 30 40 -r [kpc] -minor - 50 60 70 80 --1 -0 -1 - 0 10 20 30 40 -POP I -β(r) -r [kpc] - 0 10 20 30 40 -r [kpc] - 0 10 20 30 40 -r [kpc] - 50 60 70 80 --1 -0 -1 - 0 10 20 30 40 -POP II -β(r) -r [kpc] - 0 10 20 30 40 -r [kpc] - 0 10 20 30 40 -r [kpc] - 50 60 70 80 -data -best model -1σ -2σ -3σ -Fig. 9. Profiles of the anisotropy parameter obtained with the Schwarzschild modeling of two stellar populations of the simulated galaxy. In rows: -results for all stars (calculated as the superposition of two populations), population I, and population II. Colors follow the convention used in -previous figures. In columns: observations along the major, intermediate, and minor axis. The last narrower column shows the data (black lines) -outside the modeled radial range. Color lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the 1, -2, and 3σ confidence regions. -4. Modeling Fornax dSph -In this section we present the application of our Schwarzschild -modeling scheme to the observational data for the Fornax dSph -galaxy obtained by del Pino et al. (2015) and del Pino et al. -(2017). This study is a follow-up of the work of Kowalczyk et al. -(2019) and can be directly compared to the results presented -there. Moreover, we refer the reader to these previous publica￾tions for details on the origin of data and our procedures used -for cleaning the spectroscopic sample. -Similarly to the approach introduced in Section 2.2, we di￾vided all available stars into two equal-size populations based on -their metallicity and then cross-correlated the samples with the -data used in Kowalczyk et al. (2019). The metallicity histogram -of the final spectroscopic sample is shown in Fig. 10. Addition￾ally, we color-coded each bin with the population it has been -assigned to, namely orange or blue for population I or II. Inter￾estingly, the case of Fornax is similar to our simulated galaxy -as the split at [Fe/H]= −1 also captures an important feature -of the object’s star formation history, separating stars into sub￾samples older and younger than 6 Gyr, as shown in Fig. 12 of -del Pino et al. (2015) and Fig. 8 of del Pino et al. (2017). The -numbers of stars contained in the samples of all stars, popula￾tion I, and population II are given in Table 2, where the indices -"phot" and "spec" refer to the photometric and kinematic sam￾ples. The sum of stars in the populations is lower than in the -sample of all stars since only stars with reliable measurements -of metallicity could be included. -N -[Fe/H] -pop I -pop II - 0 - 20 - 40 - 60 - 80 - 100 --2.5 -2 -1.5 -1 -0.5 0 -Fig. 10. Metallicity histogram of the final spectroscopic sample used in -the modeling of two stellar populations in the Fornax dSph. Each bin is -color-coded according to the population it has been assigned to, orange -or blue for population I and II, respectively. -As we have shown in our earlier work, the light profile of the -Fornax dSph can be well reproduced with the three-parameter +A&A proofs: manuscript no. Populations4 +-1 +0 +1 + 0 10 20 30 40 +POP I + POP II +β(r) +r [kpc] +major + 0 10 20 30 40 +r [kpc] +intermediate + 0 10 20 30 40 +r [kpc] +minor + 50 60 70 80 +-1 +0 +1 + 0 10 20 30 40 +POP I +β(r) +r [kpc] + 0 10 20 30 40 +r [kpc] + 0 10 20 30 40 +r [kpc] + 50 60 70 80 +-1 +0 +1 + 0 10 20 30 40 +POP II +β(r) +r [kpc] + 0 10 20 30 40 +r [kpc] + 0 10 20 30 40 +r [kpc] + 50 60 70 80 +data +best model +1σ +2σ +3σ +Fig. 9. Profiles of the anisotropy parameter obtained with the Schwarzschild modeling of two stellar populations of the simulated galaxy. In rows: +results for all stars (calculated as the superposition of two populations), population I, and population II. Colors follow the convention used in +previous figures. In columns: observations along the major, intermediate, and minor axis. The last narrower column shows the data (black lines) +outside the modeled radial range. Color lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the 1, +2, and 3σ confidence regions. +4. Modeling Fornax dSph +In this section we present the application of our Schwarzschild +modeling scheme to the observational data for the Fornax dSph +galaxy obtained by del Pino et al. (2015) and del Pino et al. +(2017). This study is a follow-up of the work of Kowalczyk et al. +(2019) and can be directly compared to the results presented +there. Moreover, we refer the reader to these previous publications for details on the origin of data and our procedures used +for cleaning the spectroscopic sample. +Similarly to the approach introduced in Section 2.2, we divided all available stars into two equal-size populations based on +their metallicity and then cross-correlated the samples with the +data used in Kowalczyk et al. (2019). The metallicity histogram +of the final spectroscopic sample is shown in Fig. 10. Additionally, we color-coded each bin with the population it has been +assigned to, namely orange or blue for population I or II. Interestingly, the case of Fornax is similar to our simulated galaxy +as the split at [Fe/H]= −1 also captures an important feature +of the object’s star formation history, separating stars into subsamples older and younger than 6 Gyr, as shown in Fig. 12 of +del Pino et al. (2015) and Fig. 8 of del Pino et al. (2017). The +numbers of stars contained in the samples of all stars, population I, and population II are given in Table 2, where the indices +"phot" and "spec" refer to the photometric and kinematic samples. The sum of stars in the populations is lower than in the +sample of all stars since only stars with reliable measurements +of metallicity could be included. +N +[Fe/H] +pop I +pop II + 0 + 20 + 40 + 60 + 80 + 100 +-2.5 -2 -1.5 -1 -0.5 0 +Fig. 10. Metallicity histogram of the final spectroscopic sample used in +the modeling of two stellar populations in the Fornax dSph. Each bin is +color-coded according to the population it has been assigned to, orange +or blue for population I and II, respectively. +As we have shown in our earlier work, the light profile of the +Fornax dSph can be well reproduced with the three-parameter Article number, page 8 of 12 -K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling -Table 2. Properties of the data samples for the Fornax dSph. -Property ALL POP I POP II -Number of stars (Nphot) 65 797 14 882 49 205 -Number of stars (Nspec) 3286 1136 1151 -Stars within 1.8 kpc 3268 1134 1130 -Fitted normalization (N0) [×104 -] 6.95 1.81 5.45 -Sérsic radius (RS) [kpc] 0.454 0.429 0.420 -Sérsic parameter (m) 0.808 0.807 0.898 -102 -103 -104 -105 - 0.1 0.2 0.5 1 2 -n -⋆(R) [kpc-2 -] -R [kpc] -all stars -popI -popII -Fig. 11. Surface number density profiles of the photometric data sam￾ples for the Fornax dSph: all available stars (in red), the metal-rich pop￾ulation I (in orange), and the metal-poor population II (in blue). Thin -vertical lines indicate r0 (see text) and the outer boundary of the spec￾troscopic data. -Sérsic formula (Sérsic 1968). The profiles of number density for -all stars and both populations together with the best-fitting Sérsic -profiles are presented in Fig. 11. The colors follow the conven￾tion introduced in previous sections. Thin vertical lines indicate -the innermost data point for the light profile for all stars and -the outer boundary of the kinematic sample. The former, set at -log r = −0.16, is also used as the minimum of the mass-to-light -ratio profile (r0 in Eq. 5). The fitted parameters of the profiles, -that is the normalization N0, the Sérsic radius RS, and the Sérsic -parameter m, are included in the second part of Table 2. -Figure 12 presents the profiles of the observables used in the -Schwarzschild modeling: the fraction of stars and the 2nd, 3rd, -and 4th velocity moments (top to bottom) for the three data sam￾ples: all stars, population I, and population II (in red, orange, and -blue, respectively). The error bars indicate 1 σ sampling errors. -The parameter space for Υ(r) has been probed as follows: -a ∈ [0 : 1.85] with a step ∆a = 0.05 and c ∈ [1.2 : 6] with a -step ∆c = 0.2. We point out that in Kowalczyk et al. (2019) the -parameter c was fixed at c = 3 and now we fit it as a free pa￾rameter. As for the mock data in Section 3.2, different values of -Υ0 were obtained with the transformation of velocity moments -within the χ -2 fitting routine. The values of ∆χ -2 -for all stars and -the populations are shown in the two panels of Fig. 13 (left and -right-hand side, respectively). Due to the dense coverage of the -grid, we decided to include only the values within 3σ from the -fitted minimums (see Section 3.1). -The profiles of the mass-to-light ratio, total density, total -mass, and velocity anisotropy resulting from the χ -2 distributions -are presented in the consecutive rows of Fig. 14. The anisotropy -profile for the populations is based on the fit of all stars but using - 0 - 0.05 - 0.1 - 0.15 - 0.2 - 0.25 - 0 0.4 0.8 1.2 1.6 -M(R) -R [kpc] -all stars -pop I -pop II -0 -40 -80 -120 -160 -200 - 0 0.4 0.8 1.2 1.6 -m2(R)[(km s-1 -) -2 -] -R [kpc] --16 --8 -0 -8 -16 - 0 0.4 0.8 1.2 1.6 -m3(R)[10 -2(km s-1 -) -3 -] -R [kpc] -0 -4 -8 -12 -16 - 0 0.4 0.8 1.2 1.6 -m4(R)[10 -4(km s-1 -) -4 -] -R [kpc] -Fig. 12. Observables of the Fornax dSph used in our Schwarzschild -modeling scheme. In rows: the fraction of the total number of stars, the -2nd, 3rd, and 4th velocity moment. In red we present the values obtained -for all stars whereas in orange and blue those for populations I and II, -respectively. -the confidence levels on Υ from the fit of two populations. Green -lines indicate the values for the best-fitting models whereas the -colored areas of decreasing intensity show the 1, 2, and 3 σ con￾fidence regions. Additionally, with black dashed lines we include -the results from Kowalczyk et al. (2019) for comparison. -As a result of freeing the steepness of the mass-to-light -ratio profile (parameter c) with respect to the previous study +K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling +Table 2. Properties of the data samples for the Fornax dSph. +Property ALL POP I POP II +Number of stars (Nphot) 65 797 14 882 49 205 +Number of stars (Nspec) 3286 1136 1151 +Stars within 1.8 kpc 3268 1134 1130 +Fitted normalization (N0) [×104] 6.95 1.81 5.45 +Sérsic radius (RS) [kpc] 0.454 0.429 0.420 +Sérsic parameter (m) 0.808 0.807 0.898 +102 +103 +104 +105 + 0.1 0.2 0.5 1 2 +n +⋆(R) [kpc-2 +] +R [kpc] +all stars +popI +popII +Fig. 11. Surface number density profiles of the photometric data samples for the Fornax dSph: all available stars (in red), the metal-rich population I (in orange), and the metal-poor population II (in blue). Thin +vertical lines indicate r0 (see text) and the outer boundary of the spectroscopic data. +Sérsic formula (Sérsic 1968). The profiles of number density for +all stars and both populations together with the best-fitting Sérsic +profiles are presented in Fig. 11. The colors follow the convention introduced in previous sections. Thin vertical lines indicate +the innermost data point for the light profile for all stars and +the outer boundary of the kinematic sample. The former, set at +log r = −0.16, is also used as the minimum of the mass-to-light +ratio profile (r0 in Eq. 5). The fitted parameters of the profiles, +that is the normalization N0, the Sérsic radius RS, and the Sérsic +parameter m, are included in the second part of Table 2. +Figure 12 presents the profiles of the observables used in the +Schwarzschild modeling: the fraction of stars and the 2nd, 3rd, +and 4th velocity moments (top to bottom) for the three data samples: all stars, population I, and population II (in red, orange, and +blue, respectively). The error bars indicate 1 σ sampling errors. +The parameter space for Υ(r) has been probed as follows: +a ∈ [0 : 1.85] with a step ∆a = 0.05 and c ∈ [1.2 : 6] with a +step ∆c = 0.2. We point out that in Kowalczyk et al. (2019) the +parameter c was fixed at c = 3 and now we fit it as a free parameter. As for the mock data in Section 3.2, different values of +Υ0 were obtained with the transformation of velocity moments +within the χ +2 fitting routine. The values of ∆χ2 +for all stars and +the populations are shown in the two panels of Fig. 13 (left and +right-hand side, respectively). Due to the dense coverage of the +grid, we decided to include only the values within 3σ from the +fitted minimums (see Section 3.1). +The profiles of the mass-to-light ratio, total density, total +mass, and velocity anisotropy resulting from the χ +2 distributions +are presented in the consecutive rows of Fig. 14. The anisotropy +profile for the populations is based on the fit of all stars but using + 0 + 0.05 + 0.1 + 0.15 + 0.2 + 0.25 + 0 0.4 0.8 1.2 1.6 +M(R) +R [kpc] +all stars +pop I +pop II +0 +40 +80 +120 +160 +200 + 0 0.4 0.8 1.2 1.6 +m2(R)[(km s-1 +) +2 +] +R [kpc] +-16 +-8 +0 +8 +16 + 0 0.4 0.8 1.2 1.6 +m3(R)[10 +2(km s-1 +) +3 +] +R [kpc] +0 +4 +8 +12 +16 + 0 0.4 0.8 1.2 1.6 +m4(R)[10 +4(km s-1 +) +4 +] +R [kpc] +Fig. 12. Observables of the Fornax dSph used in our Schwarzschild +modeling scheme. In rows: the fraction of the total number of stars, the +2nd, 3rd, and 4th velocity moment. In red we present the values obtained +for all stars whereas in orange and blue those for populations I and II, +respectively. +the confidence levels on Υ from the fit of two populations. Green +lines indicate the values for the best-fitting models whereas the +colored areas of decreasing intensity show the 1, 2, and 3 σ confidence regions. Additionally, with black dashed lines we include +the results from Kowalczyk et al. (2019) for comparison. +As a result of freeing the steepness of the mass-to-light +ratio profile (parameter c) with respect to the previous study Article number, page 9 of 12 -A&A proofs: manuscript no. Populations4 - 0 - 0.5 - 1 - 1.5 - 0 - 0.5 - 1 - 1.5 - 2 - 3 - 4 - 5 - 6 -ALL -Υ0 -a -c - 0 - 0.5 - 1 - 1.5 - 0 - 0.5 - 1 - 1.5 - 2 - 3 - 4 - 5 - 6 -POPULATIONS -Υ0 -a -c - 0 - 3 - 6 - 9 - 12 -χ -2 --χ -2 -min -Fig. 13. Values of χ -2 -relative to the fitted minimum within the range of 3σ confidence level for all stars (left panel) and for the populations (right -panel) for the Fornax dSph. -(Kowalczyk et al. 2019), we obtained higher estimates of the en￾closed total mass at larger radii. In particular, for the mass en￾closed within 1.8 kpc we get Mall(< 1.8 kpc) = 3.87+1.48 -−1.56 × 108 -M⊙ from the fit for all stars and Mpops(< 1.8 kpc) = 4.71+0.87 -−1.13 × -108 M⊙ from the fit of populations, while previously we had -Mold(< 1.8 kpc) = 3.7 -+1.4 -−1.3 -× 108 M⊙. -Interestingly, despite the significant shift of the position of -χ -2 -min (to c = 4.2 for all stars and 3.6 for populations), the ob￾tained profile of the anisotropy parameter remains decreasing or -flat for all stars but changes to increasing from 0 to 0.5 for the -populations. Nevertheless, even in the latter case the previous -result agrees with the new finding within 1σ. -The detailed analysis of the anisotropy is shown in Fig. 15 -where the middle and bottom panels present the profiles ob￾tained for each population separately. We notice that the profile -for population I is decreasing or has a local minimum whereas -for population II is increasing (from −0.25 to 0.5 for the best￾fitting model). Since population I is more concentrated, the last -bins contain very few stars, which limits their credibility. The -top panel of Fig. 15 presents the anisotropy of all stars calcu￾lated as a weighted superposition of two populations. With such -approach we still obtain the increasing profile (from 0 to 0.5) but -the previous result agrees with it only within 2σ. -Since Fornax dSph is significantly elongated with the pro￾jected ellipticity of ǫ = 0.30 ± 0.01 (Irwin & Hatzidimitriou -1995), we anticipate some bias in the obtained results caused -by the spherically symmetric modeling. Kowalczyk et al. (2018) -studied such bias in an axisymmetric simulated object qualita￾tively similar to Fornax and identified differences in the system￾atic errors depending on whether the galaxy was observed along -its major or minor axis. Assuming that Fornax is observed along -the line of sight in between these extremes, we expect the total -mass profile to be slightly overestimated and the anisotropy to be -underestimated, further strengthening the likelihood of the real -anisotropy to be radial and its profile to be growing with radius -with respect to the results of Kowalczyk et al. (2019). -Both constant (like for our population I) and growing (pop￾ulation II) anisotropy profiles can arise from biased modeling -of the real growing profile by observing an object along the -minor and major axis, respectively. However, for the bias to -occur in two populations presented here, their inner orienta￾tions would need to be opposite. Since such morphological fea￾tures are not supported by the photometric studies of Fornax -(del Pino et al. 2015; Wang et al. 2019) which rather find a good -spatial alignment between the stellar populations, we conclude -that the anisotropy profiles of the two populations modeled in -this work are indeed significantly distinct. -Finally, it is worth noticing that the so-called mass-follows￾light model, that is the one following from the assumption that -the total density traces the stellar distribution, is no longer sup￾ported by the fit of the populations. With our parametrization, -the mass-follows-light model corresponds to a = 0 and whereas -it is enclosed within 3σ for the fit of all stars, as was the case -in Kowalczyk et al. (2019), the allowed values for the improved -method are much larger, as demonstrated by the right panel of -Fig. 13. -5. Summary and discussion -Building on the previously created implementation of the -Schwarzschild orbit superposition method focused on modeling -dSph galaxies of the Local Group (Kowalczyk et al. 2017, 2018, -2019), we improved our tool by introducing multiple stellar pop￾ulations. Such an improvement is desirable and justified since -many of the dwarfs show signs of multiple star formation bursts -or extended star formation episodes. As the different populations -trace the common underlying gravitational potential, one may -expect a significant improvement in the estimates of not only the -total mass content but also the orbit anisotropy since this robust -modeling technique reproduces the anisotropy as a by-product -of the modeling rather than taking it as an assumption. -We have tested our hypothesis by modeling mock data gener￾ated from a galaxy formed in the Illustris simulation. Due to the -limitations of the resolution, we chose a galaxy of mass a few or￾ders of magnitude larger than the estimated masses of classical -dwarfs. Still, the galaxy possessed appropriate qualitative char￾acteristics, such as the lack of gas and an almost spherical shape, +A&A proofs: manuscript no. Populations4 + 0 + 0.5 + 1 + 1.5 + 0 + 0.5 + 1 + 1.5 + 2 + 3 + 4 + 5 + 6 +ALL +Υ0 +a +c + 0 + 0.5 + 1 + 1.5 + 0 + 0.5 + 1 + 1.5 + 2 + 3 + 4 + 5 + 6 +POPULATIONS +Υ0 +a +c + 0 + 3 + 6 + 9 + 12 +χ +2 +-χ +2 +min +Fig. 13. Values of χ +2 +relative to the fitted minimum within the range of 3σ confidence level for all stars (left panel) and for the populations (right +panel) for the Fornax dSph. +(Kowalczyk et al. 2019), we obtained higher estimates of the enclosed total mass at larger radii. In particular, for the mass enclosed within 1.8 kpc we get Mall(< 1.8 kpc) = 3.87+1.48 +−1.56 × 108 +M⊙ from the fit for all stars and Mpops(< 1.8 kpc) = 4.71+0.87 +−1.13 × +108 M⊙ from the fit of populations, while previously we had +Mold(< 1.8 kpc) = 3.7 ++1.4 +−1.3 +× 108 M⊙. +Interestingly, despite the significant shift of the position of +χ +2 +min (to c = 4.2 for all stars and 3.6 for populations), the obtained profile of the anisotropy parameter remains decreasing or +flat for all stars but changes to increasing from 0 to 0.5 for the +populations. Nevertheless, even in the latter case the previous +result agrees with the new finding within 1σ. +The detailed analysis of the anisotropy is shown in Fig. 15 +where the middle and bottom panels present the profiles obtained for each population separately. We notice that the profile +for population I is decreasing or has a local minimum whereas +for population II is increasing (from −0.25 to 0.5 for the bestfitting model). Since population I is more concentrated, the last +bins contain very few stars, which limits their credibility. The +top panel of Fig. 15 presents the anisotropy of all stars calculated as a weighted superposition of two populations. With such +approach we still obtain the increasing profile (from 0 to 0.5) but +the previous result agrees with it only within 2σ. +Since Fornax dSph is significantly elongated with the projected ellipticity of ǫ = 0.30 ± 0.01 (Irwin & Hatzidimitriou +1995), we anticipate some bias in the obtained results caused +by the spherically symmetric modeling. Kowalczyk et al. (2018) +studied such bias in an axisymmetric simulated object qualitatively similar to Fornax and identified differences in the systematic errors depending on whether the galaxy was observed along +its major or minor axis. Assuming that Fornax is observed along +the line of sight in between these extremes, we expect the total +mass profile to be slightly overestimated and the anisotropy to be +underestimated, further strengthening the likelihood of the real +anisotropy to be radial and its profile to be growing with radius +with respect to the results of Kowalczyk et al. (2019). +Both constant (like for our population I) and growing (population II) anisotropy profiles can arise from biased modeling +of the real growing profile by observing an object along the +minor and major axis, respectively. However, for the bias to +occur in two populations presented here, their inner orientations would need to be opposite. Since such morphological features are not supported by the photometric studies of Fornax +(del Pino et al. 2015; Wang et al. 2019) which rather find a good +spatial alignment between the stellar populations, we conclude +that the anisotropy profiles of the two populations modeled in +this work are indeed significantly distinct. +Finally, it is worth noticing that the so-called mass-followslight model, that is the one following from the assumption that +the total density traces the stellar distribution, is no longer supported by the fit of the populations. With our parametrization, +the mass-follows-light model corresponds to a = 0 and whereas +it is enclosed within 3σ for the fit of all stars, as was the case +in Kowalczyk et al. (2019), the allowed values for the improved +method are much larger, as demonstrated by the right panel of +Fig. 13. +5. Summary and discussion +Building on the previously created implementation of the +Schwarzschild orbit superposition method focused on modeling +dSph galaxies of the Local Group (Kowalczyk et al. 2017, 2018, +2019), we improved our tool by introducing multiple stellar populations. Such an improvement is desirable and justified since +many of the dwarfs show signs of multiple star formation bursts +or extended star formation episodes. As the different populations +trace the common underlying gravitational potential, one may +expect a significant improvement in the estimates of not only the +total mass content but also the orbit anisotropy since this robust +modeling technique reproduces the anisotropy as a by-product +of the modeling rather than taking it as an assumption. +We have tested our hypothesis by modeling mock data generated from a galaxy formed in the Illustris simulation. Due to the +limitations of the resolution, we chose a galaxy of mass a few orders of magnitude larger than the estimated masses of classical +dwarfs. Still, the galaxy possessed appropriate qualitative characteristics, such as the lack of gas and an almost spherical shape, Article number, page 10 of 12 -K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling -101 -103 -105 - 0.1 1 -Υ(r) [M⊙/L⊙] -r [kpc] -ALL - 0.1 1 -r [kpc] -POPULATIONS -3σ -2σ -1σ -best model -K19 -104 -106 -108 - 0.1 1 -νtot(r) [M⊙ kpc-3 -] -r [kpc] - 0.1 1 -r [kpc] -105 -107 -109 - 0.1 1 -Mtot(r) [M⊙] -r [kpc] - 0.1 1 -r [kpc] --3 --2 --1 -0 -1 - 0 0.4 0.8 1.2 1.6 -β(r) -r [kpc] - 0 0.4 0.8 1.2 1.6 -r [kpc] -Fig. 14. Results of Schwarzschild modeling of the Fornax dSph. -In rows: derived mass-to-light ratio, total density, total mass, and -anisotropy parameter. In columns: results for all stars and the popula￾tions, respectively. Green lines indicate the values for the best-fit models -whereas the colored areas of decreasing intensity show the 1, 2, and 3σ -confidence regions. The best-fitting values obtained by Kowalczyk et al. -(2019) are shown with black dashed lines. -that made it a good test bed for modeling techniques applica￾ble to dSph galaxies. We applied our approach to all data and -to two stellar populations separately, comparing the accuracy of -the obtained results. Although the addition of the second tracer -seemingly increases the number of constraints twice, the incre￾ment is somewhat compromised by the sampling errors since the -number of stars in each sample is then reduced. Still, we found -strong improvements in the accuracy of the method when us￾ing two populations. The results of the modeling show that the -density and velocity anisotropy profiles are more strongly con￾strained, most importantly at the 3 σ level, that is the range of -allowed values is much narrower. -Similarly to the conclusions of Kowalczyk et al. (2018) who -explored the effects of nonsphericity using large and small -data samples, the comparison of results presented in the left￾and right-hand side panels of Fig. 8 suggests that the improved -method using two stellar populations gives more precise but less -accurate outcome. However, in both studies the apparent dete￾rioration of the reliability is a consequence of modeling of a -nonspherical object. In both cases, a simpler approach (much -smaller data samples or using one stellar population) resulted --2 --1 -0 -1 - 0 0.4 0.8 1.2 1.6 -POP I + POP II -β(r) -r [kpc] --2 --1 -0 -1 - 0 0.4 0.8 1.2 1.6 -POP I -β(r) -r [kpc] --2 --1 -0 -1 - 0 0.4 0.8 1.2 1.6 -POP II -β(r) -r [kpc] -best model -1σ -2σ -3σ -K19 -Fig. 15. Profiles of the anisotropy parameter obtained with the -Schwarzschild modeling of two stellar populations for the Fornax dSph. -In rows: results for all stars (calculated as the superposition of two pop￾ulations), population I, and population II. Color lines indicate values -for the best-fit models whereas the colored areas of decreasing intensity -show the 1, 2, and 3σ confidence regions. The dashed black line shows -the result from Kowalczyk et al. (2019) for comparison. -in larger final uncertainties, usually containing the true values -within 1 σ confidence region. On the other hand, the improved -methods exhibit substantially reduced uncertainties, highlighting -the underlying bias. -Our method parametrizes the total mass content with the -mass-to-light ratio varying with radius as a power-law in the log￾log scale. We made two main changes with respect to our previ￾ous work: we added a third parameter c controlling the steepness -of the mass-to-light ratio profile (previously fixed at the value of -3) and allowed for different stellar density profiles (previously -only Sérsic, now also King). These changes are of course cou￾pled since different density profiles require different exponents to -reproduce the same mass profile. It is visible also in our results -since the King profile applied in the simulated galaxy gave us -values of c lower than 3. Nevertheless, we decided to use differ￾ent density profiles to make our method more general and appli￾cable to objects, such as our Illustris galaxy, for which the Sérsic -formula does not provide a good approximation of the density -distribution. -Finally, we applied the improved method to the data for the -Fornax dSph galaxy. Due to the addition of another free param￾eter in our functional form for the mass-to-light ratio, our re￾sults for modeling all stars are slightly different from the ones +K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling +101 +103 +105 + 0.1 1 +Υ(r) [M⊙/L⊙] +r [kpc] +ALL + 0.1 1 +r [kpc] +POPULATIONS +3σ +2σ +1σ +best model +K19 +104 +106 +108 + 0.1 1 +νtot(r) [M⊙ kpc-3 +] +r [kpc] + 0.1 1 +r [kpc] +105 +107 +109 + 0.1 1 +Mtot(r) [M⊙] +r [kpc] + 0.1 1 +r [kpc] +-3 +-2 +-1 +0 +1 + 0 0.4 0.8 1.2 1.6 +β(r) +r [kpc] + 0 0.4 0.8 1.2 1.6 +r [kpc] +Fig. 14. Results of Schwarzschild modeling of the Fornax dSph. +In rows: derived mass-to-light ratio, total density, total mass, and +anisotropy parameter. In columns: results for all stars and the populations, respectively. Green lines indicate the values for the best-fit models +whereas the colored areas of decreasing intensity show the 1, 2, and 3σ +confidence regions. The best-fitting values obtained by Kowalczyk et al. +(2019) are shown with black dashed lines. +that made it a good test bed for modeling techniques applicable to dSph galaxies. We applied our approach to all data and +to two stellar populations separately, comparing the accuracy of +the obtained results. Although the addition of the second tracer +seemingly increases the number of constraints twice, the increment is somewhat compromised by the sampling errors since the +number of stars in each sample is then reduced. Still, we found +strong improvements in the accuracy of the method when using two populations. The results of the modeling show that the +density and velocity anisotropy profiles are more strongly constrained, most importantly at the 3 σ level, that is the range of +allowed values is much narrower. +Similarly to the conclusions of Kowalczyk et al. (2018) who +explored the effects of nonsphericity using large and small +data samples, the comparison of results presented in the leftand right-hand side panels of Fig. 8 suggests that the improved +method using two stellar populations gives more precise but less +accurate outcome. However, in both studies the apparent deterioration of the reliability is a consequence of modeling of a +nonspherical object. In both cases, a simpler approach (much +smaller data samples or using one stellar population) resulted +-2 +-1 +0 +1 + 0 0.4 0.8 1.2 1.6 +POP I + POP II +β(r) +r [kpc] +-2 +-1 +0 +1 + 0 0.4 0.8 1.2 1.6 +POP I +β(r) +r [kpc] +-2 +-1 +0 +1 + 0 0.4 0.8 1.2 1.6 +POP II +β(r) +r [kpc] +best model +1σ +2σ +3σ +K19 +Fig. 15. Profiles of the anisotropy parameter obtained with the +Schwarzschild modeling of two stellar populations for the Fornax dSph. +In rows: results for all stars (calculated as the superposition of two populations), population I, and population II. Color lines indicate values +for the best-fit models whereas the colored areas of decreasing intensity +show the 1, 2, and 3σ confidence regions. The dashed black line shows +the result from Kowalczyk et al. (2019) for comparison. +in larger final uncertainties, usually containing the true values +within 1 σ confidence region. On the other hand, the improved +methods exhibit substantially reduced uncertainties, highlighting +the underlying bias. +Our method parametrizes the total mass content with the +mass-to-light ratio varying with radius as a power-law in the loglog scale. We made two main changes with respect to our previous work: we added a third parameter c controlling the steepness +of the mass-to-light ratio profile (previously fixed at the value of +3) and allowed for different stellar density profiles (previously +only Sérsic, now also King). These changes are of course coupled since different density profiles require different exponents to +reproduce the same mass profile. It is visible also in our results +since the King profile applied in the simulated galaxy gave us +values of c lower than 3. Nevertheless, we decided to use different density profiles to make our method more general and applicable to objects, such as our Illustris galaxy, for which the Sérsic +formula does not provide a good approximation of the density +distribution. +Finally, we applied the improved method to the data for the +Fornax dSph galaxy. Due to the addition of another free parameter in our functional form for the mass-to-light ratio, our results for modeling all stars are slightly different from the ones Article number, page 11 of 12 -A&A proofs: manuscript no. Populations4 -obtained in Kowalczyk et al. (2019). However, in terms of the -total density and mass distribution the estimates obtained here -agree very well with those earlier results in the range covered -by the data. Therefore, the detailed comparison with other esti￾mates from the literature presented in Kowalczyk et al. (2019) is -still valid and we do not repeat it here. -A more significant difference with respect to these previous -estimates is seen in the results of modeling two populations in -Fornax. In this case we find the anisotropy to be slightly increas￾ing rather than decreasing with radius and, most importantly, the -confidence regions for this parameter, as well as for the den￾sity, are much narrower. We were thus able to obtain tighter con￾straints on the properties of Fornax, which means that the im￾proved method is successful. For the first time, we were also able -to deduce the velocity anisotropy profiles for each of the popula￾tions separately. We found that the more concentrated, metal-rich -population I has a decreasing anisotropy profile while the more -extended, metal-poor population II has the anisotropy increasing -with radius. This finding may partially explain the large spread -of the anisotropy values obtained in the literature and summa￾rized in Table 2 and 3 of Kowalczyk et al. (2019), which were -often based on modeling subsamples of our spectroscopic data -set. -For both studied objects we split the stars into two popula￾tions by dividing them in half based on their metallicity, Z (in -solar units), for the Illustris galaxy and [Fe/H] for Fornax. Such -a method is approximate but justified. Both galaxies have com￾plex star formation history with multiple star formation bursts, as -demonstrated by Fig. 1 in this work and Fig. 7 in del Pino et al. -(2013), producing multiple stellar populations which cannot be -easily tracked as the metallicity is a good but not perfect proxy -for the stellar age. Moreover, the metallicity histograms for both -objects are approximately unimodal not allowing for a conve￾nient separation. More refined methods of division have been -suggested in the literature, for example in the form of the likeli￾hood function based on the position, velocity, and metallicity in￾dex (Walker & Peñarrubia 2011). However, the likelihood func￾tion requires many assumptions which introduce additional un￾certainties into the treatment of the data. On the other hand, our -approach ensures the maximization of each sample (and there￾fore minimization of sampling errors) while capturing the im￾portant features of the star formation history. -Further improvements to the Schwarzschild modeling -method are certainly possible. One way to proceed would be to -include the modeling of the proper motions of the stars. For now, -measurements of transverse velocities are available only for the -brightest stars in dSph galaxies, but even small samples of this -type could provide further constraints on the models, as demon￾strated by Strigari et al. (2007) and Massari et al. (2020). -Acknowledgements. We are grateful to Andrés del Pino for providing the data for -the Fornax dSph and to the Illustris team for making their simulations publicly -available. Useful comments from the anonymous referee are kindly appreciated. -This research was supported by the Polish National Science Center under grant -2018/28/C/ST9/00529. -References -Amorisco, N. C., & Evans, N. W. 2012, MNRAS, 419, 184 -Battaglia, G., Helmi, A., Tolstoy, E., et al. 2008, ApJ, 681, L13 -Bellazzini, M., Ferraro, F. R., & Pancino, E. 2001, MNRAS, 327, L15 -Binney, J., & Tremaine, S. 2008, Galactic Dynamics, 2nd edn. (Princeton Uni￾versity Press, Princeton) -Breddels, M. A., & Helmi, A. 2013, A&A, 558, A35 -Breddels, M. A., Helmi, A., van den Bosch, R. C. E., van de Ven, G., & Battaglia, -G. 2013, MNRAS, 433, 3173 -del Pino, A., Hidalgo, S. L., Aparicio, A., et al. 2013, MNRAS, 433, 1505 -del Pino, A., Aparicio, A., & Hidalgo, S. L. 2015, MNRAS, 454, 3996 -del Pino, A., Aparicio, A., Hidalgo, S. L., & Łokas, E. L. 2017, MNRAS, 465, -3708 -Fabrizio, M., Bono, G., Nonino, M., et al. 2016, ApJ, 830, 126 -Gebhardt, K., Richstone, D., Tremaine, S., et al. 2003, ApJ, 583, 92 -Genel, S., Fall, S. M., Hernquist, L., et al. 2015, ApJ, 804, L40 -Genel, S., Vogelsberger, M., Springel, V., et al. 2014, MNRAS, 445, 175 -Genina, A., Benitez-Llambay, A., Frenk, C. S., et al. 2018, MNRAS, 474, 1398 -Hayashi, K., Fabrizio, M., Łokas, E. L., et al. 2018, MNRAS, 481, 250 -Irwin, M., & Hatzidimitriou, D. 1995, MNRAS, 277, 1354 -Jardel, J. R., & Gebhardt, K. 2012, ApJ, 746, 89 -Jardel, J. R., Gebhardt, K., Fabricius, M. H., Drory, N., & Williams, M. J. 2013, -ApJ, 763, 91 -King, I. 1962, AJ, 67, 471 -Kowalczyk, K., Łokas, E. L., Kazantzidis, S., & Mayer, L. 2013, MNRAS, 431, -2796 -Kowalczyk, K., Łokas, E. L., & Valluri, M. 2017, MNRAS, 470, 3959 -Kowalczyk, K., Łokas, E. L., & Valluri, M. 2018, MNRAS, 476, 2918 -Kowalczyk, K., del Pino, A., Łokas, E. L., & Valluri, M. 2019, MNRAS, 482, -5241 -Łokas, E. L., 2002, MNRAS, 333, 697 -Łokas, E. L., Mamon, G. A., & Prada, F. 2005, MNRAS, 363, 918 -Massari, D., Helmi, A., Mucciarelli, A. et al. 2020, A&A, 633, A36 -Mateo, M. 1998, ARA&A, 36, 435 -Nelson, D., Pillepich, A., Genel, S., et al. 2015, Astronomy and Computing, 13, -12 -Pace, A. B., Kaplinghat, M., Kirby, E., et al. 2020, MNRAS, 495, 3022 -Press, W. H., Teukolsky, S. A., Vetterling, W. T., & Flannery, B. P. 1992, Numer￾ical Recipes in C, 2nd edn. (Cambridge University Press, Cambridge) -Schwarzschild, M. 1979, ApJ, 232, 236 -Sérsic, J. L. 1968, Atlas de Galaxias Australes (Observatorio Astronomico, Cor￾doba, Argentina) -Strigari, L. E., Bullock, J. S., & Kaplinghat, M. 2007, ApJ, 657, L1 -Tolstoy, E., Hill, V., & Tosi, M. 2009, ARA&A, 47, 371 -Valluri, M., Merritt, D., & Emsellem, E. 2004, ApJ, 602, 66 -van der Marel, R. P., Cretton, N., de Zeeuw, P. T., & Rix, H.-W. 1998, ApJ, 493, -613 -Vogelsberger, M., Genel, S., Springel, V., et al. 2014a, Nature, 509, 177 -Vogelsberger, M., Genel, S., Springel, V., et al. 2014b, MNRAS, 444, 1518 -Walker, M. G., & Peñarrubia, J. 2011, ApJ, 742, 20 -Wang, M. Y., de Boer, T., Pieres, A., et al. 2019, ApJ, 881, 118 -Article number, page 12 of 12 +A&A proofs: manuscript no. Populations4 +obtained in Kowalczyk et al. (2019). However, in terms of the +total density and mass distribution the estimates obtained here +agree very well with those earlier results in the range covered +by the data. Therefore, the detailed comparison with other estimates from the literature presented in Kowalczyk et al. (2019) is +still valid and we do not repeat it here. +A more significant difference with respect to these previous +estimates is seen in the results of modeling two populations in +Fornax. In this case we find the anisotropy to be slightly increasing rather than decreasing with radius and, most importantly, the +confidence regions for this parameter, as well as for the density, are much narrower. We were thus able to obtain tighter constraints on the properties of Fornax, which means that the improved method is successful. For the first time, we were also able +to deduce the velocity anisotropy profiles for each of the populations separately. We found that the more concentrated, metal-rich +population I has a decreasing anisotropy profile while the more +extended, metal-poor population II has the anisotropy increasing +with radius. This finding may partially explain the large spread +of the anisotropy values obtained in the literature and summarized in Table 2 and 3 of Kowalczyk et al. (2019), which were +often based on modeling subsamples of our spectroscopic data +set. +For both studied objects we split the stars into two populations by dividing them in half based on their metallicity, Z (in +solar units), for the Illustris galaxy and [Fe/H] for Fornax. Such +a method is approximate but justified. Both galaxies have complex star formation history with multiple star formation bursts, as +demonstrated by Fig. 1 in this work and Fig. 7 in del Pino et al. +(2013), producing multiple stellar populations which cannot be +easily tracked as the metallicity is a good but not perfect proxy +for the stellar age. Moreover, the metallicity histograms for both +objects are approximately unimodal not allowing for a convenient separation. More refined methods of division have been +suggested in the literature, for example in the form of the likelihood function based on the position, velocity, and metallicity index (Walker & Peñarrubia 2011). However, the likelihood function requires many assumptions which introduce additional uncertainties into the treatment of the data. On the other hand, our +approach ensures the maximization of each sample (and therefore minimization of sampling errors) while capturing the important features of the star formation history. +Further improvements to the Schwarzschild modeling +method are certainly possible. One way to proceed would be to +include the modeling of the proper motions of the stars. For now, +measurements of transverse velocities are available only for the +brightest stars in dSph galaxies, but even small samples of this +type could provide further constraints on the models, as demonstrated by Strigari et al. (2007) and Massari et al. (2020). +Acknowledgements. We are grateful to Andrés del Pino for providing the data for +the Fornax dSph and to the Illustris team for making their simulations publicly +available. Useful comments from the anonymous referee are kindly appreciated. +This research was supported by the Polish National Science Center under grant +2018/28/C/ST9/00529. +References +Amorisco, N. C., & Evans, N. W. 2012, MNRAS, 419, 184 +Battaglia, G., Helmi, A., Tolstoy, E., et al. 2008, ApJ, 681, L13 +Bellazzini, M., Ferraro, F. R., & Pancino, E. 2001, MNRAS, 327, L15 +Binney, J., & Tremaine, S. 2008, Galactic Dynamics, 2nd edn. (Princeton University Press, Princeton) +Breddels, M. A., & Helmi, A. 2013, A&A, 558, A35 +Breddels, M. A., Helmi, A., van den Bosch, R. C. E., van de Ven, G., & Battaglia, +G. 2013, MNRAS, 433, 3173 +del Pino, A., Hidalgo, S. L., Aparicio, A., et al. 2013, MNRAS, 433, 1505 +del Pino, A., Aparicio, A., & Hidalgo, S. L. 2015, MNRAS, 454, 3996 +del Pino, A., Aparicio, A., Hidalgo, S. L., & Łokas, E. L. 2017, MNRAS, 465, +3708 +Fabrizio, M., Bono, G., Nonino, M., et al. 2016, ApJ, 830, 126 +Gebhardt, K., Richstone, D., Tremaine, S., et al. 2003, ApJ, 583, 92 +Genel, S., Fall, S. M., Hernquist, L., et al. 2015, ApJ, 804, L40 +Genel, S., Vogelsberger, M., Springel, V., et al. 2014, MNRAS, 445, 175 +Genina, A., Benitez-Llambay, A., Frenk, C. S., et al. 2018, MNRAS, 474, 1398 +Hayashi, K., Fabrizio, M., Łokas, E. L., et al. 2018, MNRAS, 481, 250 +Irwin, M., & Hatzidimitriou, D. 1995, MNRAS, 277, 1354 +Jardel, J. R., & Gebhardt, K. 2012, ApJ, 746, 89 +Jardel, J. R., Gebhardt, K., Fabricius, M. H., Drory, N., & Williams, M. J. 2013, +ApJ, 763, 91 +King, I. 1962, AJ, 67, 471 +Kowalczyk, K., Łokas, E. L., Kazantzidis, S., & Mayer, L. 2013, MNRAS, 431, +2796 +Kowalczyk, K., Łokas, E. L., & Valluri, M. 2017, MNRAS, 470, 3959 +Kowalczyk, K., Łokas, E. L., & Valluri, M. 2018, MNRAS, 476, 2918 +Kowalczyk, K., del Pino, A., Łokas, E. L., & Valluri, M. 2019, MNRAS, 482, +5241 +Łokas, E. L., 2002, MNRAS, 333, 697 +Łokas, E. L., Mamon, G. A., & Prada, F. 2005, MNRAS, 363, 918 +Massari, D., Helmi, A., Mucciarelli, A. et al. 2020, A&A, 633, A36 +Mateo, M. 1998, ARA&A, 36, 435 +Nelson, D., Pillepich, A., Genel, S., et al. 2015, Astronomy and Computing, 13, +12 +Pace, A. B., Kaplinghat, M., Kirby, E., et al. 2020, MNRAS, 495, 3022 +Press, W. H., Teukolsky, S. A., Vetterling, W. T., & Flannery, B. P. 1992, Numerical Recipes in C, 2nd edn. (Cambridge University Press, Cambridge) +Schwarzschild, M. 1979, ApJ, 232, 236 +Sérsic, J. L. 1968, Atlas de Galaxias Australes (Observatorio Astronomico, Cordoba, Argentina) +Strigari, L. E., Bullock, J. S., & Kaplinghat, M. 2007, ApJ, 657, L1 +Tolstoy, E., Hill, V., & Tosi, M. 2009, ARA&A, 47, 371 +Valluri, M., Merritt, D., & Emsellem, E. 2004, ApJ, 602, 66 +van der Marel, R. P., Cretton, N., de Zeeuw, P. T., & Rix, H.-W. 1998, ApJ, 493, +613 +Vogelsberger, M., Genel, S., Springel, V., et al. 2014a, Nature, 509, 177 +Vogelsberger, M., Genel, S., Springel, V., et al. 2014b, MNRAS, 444, 1518 +Walker, M. G., & Peñarrubia, J. 2011, ApJ, 742, 20 +Wang, M. Y., de Boer, T., Pieres, A., et al. 2019, ApJ, 881, 118 +Article number, page 12 of \ No newline at end of file diff --git a/read/results/pdfium/2201.00178.txt b/read/results/pdfium/2201.00178.txt index dcee01b..f8a31e6 100644 --- a/read/results/pdfium/2201.00178.txt +++ b/read/results/pdfium/2201.00178.txt @@ -1,1077 +1,1033 @@ -Draft version January 4, 2022 -Typeset using LATEX default style in AASTeX631 -Imaging the Sun’s near-surface flows using mode-coupling analysis -Prasad Mani , -1 Chris S. Hanson , -2 and Shravan Hanasoge 1, 2 -1Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India -2Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE -ABSTRACT -The technique of normal-mode coupling is a powerful tool with which to seismically image non￾axisymmetric phenomena in the Sun. Here we apply mode coupling in the Cartesian approximation to -probe steady, near-surface flows in the Sun. Using Doppler cubes obtained from the Helioseismic and -Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on mode-coupling -measurements to show that the resulting divergence and radial vorticity maps at supergranular length -scales (∼30 Mm) near the surface compare extremely well with those obtained using the Local Corre￾lation Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows, -while ≥ 0.8 is obtained for the radial vorticity. -Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662) -1. INTRODUCTION -Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its effect -on solar oscillations (see Christensen-Dalsgaard 2002, for a review). These are resonant normal modes of the Sun, -behaving as standing waves in a cavity bounded by the solar surface and a depth that depends on the wavenumber -of the oscillation. As these waves penetrate the interior, they register information of the properties and dynamics of -the solar interior and return to the surface, where they are observed. The internal structure of the Sun can then be -retrieved through meticulous inversions of these seismic measurements. -Several important flow systems on the Sun have been inferred using various global and local helioseismic methods. -Of those, the most notable global helioseismic results include inferences on the solar differential rotation, through -global mode frequency splitting (Thompson et al. 1996; Schou et al. 1998), and the resolving the neutrino problem -(Bahcall & Pinsonneault 1992). Notable local helioseismic results include imaging of the meridional flow (Giles et al. -1997; Gizon et al. 2020) through time-distance helioseismology (Duvall et al. 1993), and farside imaging of active -regions (Braun & Lindsey 2001) and their near side emergence (Birch et al. 2016), through helioseismic holography -(Lindsey & Braun 2000). The recent discovery of various inertial waves (Gizon et al. 2021), including the equatorial -Rossby wave (L¨optien et al. 2018), has been achieved through local helioseismic ring-diagram analysis (Hill 1988) and -the non-helioseismic local correlation tracking (LCT, November & Simon 1988) of granulation. -In recent years, the use of global mode-coupling helioseismology (Woodard 1989; Lavely & Ritzwoller 1992) has -received attention, with many studies seeking to validate and demonstrate the importance of such a technique for -investigating numerous solar phenomena. While the derivation of the mode-coupling technique is mathematically -challenging, the data analysis is simple and utilizes all the information registered by the mode. Thus far, global -mode-coupling has been validated through observations of the meridional flow (Vorontsov 2011; Woodard et al. 2013), -differential rotation (Schad & Roth 2020; Kashyap et al. 2021), global-scale convection (Woodard 2014, 2016; Hanasoge -et al. 2020; Mani & Hanasoge 2021) and Rossby modes (Hanasoge & Mandal 2019; Mandal & Hanasoge 2020; Mandal -et al. 2021). Local mode-coupling analysis in the Cartesian approximation, formulated by Woodard (2006), was -validated by Hanson et al. (2021) (hereafter H21) by examining the power-spectrum of supergranular waves and -comparing with previous time-distance studies (Langfellner et al. 2018). -prasad.subramanian@tifr.res.in +Draft version January 4, 2022 +Typeset using LATEX default style in AASTeX631 +Imaging the Sun’s near-surface flows using mode-coupling analysis +Prasad Mani , +1 Chris S. Hanson ,2 and Shravan Hanasoge 1, 2 +1Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India +2Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE +ABSTRACT +The technique of normal-mode coupling is a powerful tool with which to seismically image nonaxisymmetric phenomena in the Sun. Here we apply mode coupling in the Cartesian approximation to +probe steady, near-surface flows in the Sun. Using Doppler cubes obtained from the Helioseismic and +Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on mode-coupling +measurements to show that the resulting divergence and radial vorticity maps at supergranular length +scales (∼30 Mm) near the surface compare extremely well with those obtained using the Local Correlation Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows, +while ≥ 0.8 is obtained for the radial vorticity. +Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662) +1. INTRODUCTION +Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its effect +on solar oscillations (see Christensen-Dalsgaard 2002, for a review). These are resonant normal modes of the Sun, +behaving as standing waves in a cavity bounded by the solar surface and a depth that depends on the wavenumber +of the oscillation. As these waves penetrate the interior, they register information of the properties and dynamics of +the solar interior and return to the surface, where they are observed. The internal structure of the Sun can then be +retrieved through meticulous inversions of these seismic measurements. +Several important flow systems on the Sun have been inferred using various global and local helioseismic methods. +Of those, the most notable global helioseismic results include inferences on the solar differential rotation, through +global mode frequency splitting (Thompson et al. 1996; Schou et al. 1998), and the resolving the neutrino problem +(Bahcall & Pinsonneault 1992). Notable local helioseismic results include imaging of the meridional flow (Giles et al. +1997; Gizon et al. 2020) through time-distance helioseismology (Duvall et al. 1993), and farside imaging of active +regions (Braun & Lindsey 2001) and their near side emergence (Birch et al. 2016), through helioseismic holography +(Lindsey & Braun 2000). The recent discovery of various inertial waves (Gizon et al. 2021), including the equatorial +Rossby wave (L¨optien et al. 2018), has been achieved through local helioseismic ring-diagram analysis (Hill 1988) and +the non-helioseismic local correlation tracking (LCT, November & Simon 1988) of granulation. +In recent years, the use of global mode-coupling helioseismology (Woodard 1989; Lavely & Ritzwoller 1992) has +received attention, with many studies seeking to validate and demonstrate the importance of such a technique for +investigating numerous solar phenomena. While the derivation of the mode-coupling technique is mathematically +challenging, the data analysis is simple and utilizes all the information registered by the mode. Thus far, global +mode-coupling has been validated through observations of the meridional flow (Vorontsov 2011; Woodard et al. 2013), +differential rotation (Schad & Roth 2020; Kashyap et al. 2021), global-scale convection (Woodard 2014, 2016; Hanasoge +et al. 2020; Mani & Hanasoge 2021) and Rossby modes (Hanasoge & Mandal 2019; Mandal & Hanasoge 2020; Mandal +et al. 2021). Local mode-coupling analysis in the Cartesian approximation, formulated by Woodard (2006), was +validated by Hanson et al. (2021) (hereafter H21) by examining the power-spectrum of supergranular waves and +comparing with previous time-distance studies (Langfellner et al. 2018). +prasad.subramanian@tifr.res.in arXiv:2201.00178v1 [astro-ph.SR] 1 Jan 2022 -2 Mani et al. -Normal-mode coupling refers to the concept of expressing solar-oscillation eigenfunctions as a linear weighted combi￾nation of model-eigenfunctions (e.g., Model S Christensen-Dalsgaard 2021). The model eigenfunctions form a complete -and orthogonal basis. By design, the model Sun is spherically symmetric, adiabatic, free from rotation, magnetism and -flows. In this state, the oscillations are considered to be uncoupled. The weights needed to express the solar-oscillation -eigenfunctions would then encode all the perturbations that are absent in the model. The forward problem then -reduces to relating observed seismic measurements to the perturbations that we want to infer. The surface wavefield -cross-correlation is the primary measurement in the mode-coupling analysis and can be directly related to the weights -(Woodard 2016). As mode coupling is a Fourier domain technique, wavefields are cross-correlated at different spatial -and temporal frequencies, leaving us with measurements sensitive to different quantities of interest. -In this study, we extend the spectral analysis of H21 and develop the method to produce near-surface flow maps -at supergranulation length scales. A part of the formalism that was used to derive the forward model in H21 is -reworked, primarily to image steady flows. Measurements are then constructed, and inversions to infer divergence flow -and radial vorticity are described. We also demonstrate signal associated with supergranular flow in a radial-order -coupling (p2-p2), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface. -We compare our results with flows obtained using the Local Correlation Tracking method on solar granules. -1.1. Forward problem -In favor of algebraic brevity, we only show crucial steps here and refer the interested reader to Appendix A for a -complete derivation of the forward problem. Working in the plane-parallel atmosphere (see also Woodard 2006), we -denote the horizontal unit vectors ex and ey in our local Cartesian domain as pointing towards west and north on the -solar surface, respectively, and ez points outwards. This approximation is valid when observing patches of the surface -that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood -of the supergranular scale (∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the -horizontal wavenumber qR ≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(qx, qy)| is the vector horizontal -wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow -perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, see Rincon -& Rieutord 2018), permitting us to model the flow vector u = (ux, uy, uz) in the Cartesian domain like so (Unno et al. -1989; Woodard 2006) -u -σ = ∇×[∇×(P ez)] + ∇×(T ez), (1) -where P = P -σ -(x) and T = T -σ -(x) are poloidal and toroidal scalar functions, varying with position x and temporal -frequency σ. ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying -perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for -example), here we only consider the frequency bin σ = 0, denoting the temporally averaged flow over the period -of analysis. We therefore suppress σ from all terms this point forward, remembering that temporal dynamics of -perturbations may also be studied using the same model outlined in the following paragraphs. Simplifying eq 1 using -vector calculus results in -u = −∇2Pez + ∇(∂zP) + ∇hT×ez, (2) -where ∇h refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the -Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a -function of horizontal wavenumber q and depth zez. Hence the poloidal and toroidal flows are described by Pq(z) and -Tq(z), respectively. Furthermore, we parametrize the flow along ez using basis functions f(z) (Chebyshev, B-spline, -etc). This is expressed as -P ≡ Pq(z) = X -j -fj (z) Pqj , T ≡ Tq(z) = X -j -fj (z) Tqj . (3) -The flow coefficients Pqj and Tqj , represented by the discrete indices q and j, become ideal candidates for inversions, -where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be -exploited to expedite inversions. Note that Pqj = P -∗ -−qj -and Tqj = T -∗ -−qj -for the flow field to be real in the spatio￾temporal domain. + Mani et al. +Normal-mode coupling refers to the concept of expressing solar-oscillation eigenfunctions as a linear weighted combination of model-eigenfunctions (e.g., Model S Christensen-Dalsgaard 2021). The model eigenfunctions form a complete +and orthogonal basis. By design, the model Sun is spherically symmetric, adiabatic, free from rotation, magnetism and +flows. In this state, the oscillations are considered to be uncoupled. The weights needed to express the solar-oscillation +eigenfunctions would then encode all the perturbations that are absent in the model. The forward problem then +reduces to relating observed seismic measurements to the perturbations that we want to infer. The surface wavefield +cross-correlation is the primary measurement in the mode-coupling analysis and can be directly related to the weights +(Woodard 2016). As mode coupling is a Fourier domain technique, wavefields are cross-correlated at different spatial +and temporal frequencies, leaving us with measurements sensitive to different quantities of interest. +In this study, we extend the spectral analysis of H21 and develop the method to produce near-surface flow maps +at supergranulation length scales. A part of the formalism that was used to derive the forward model in H21 is +reworked, primarily to image steady flows. Measurements are then constructed, and inversions to infer divergence flow +and radial vorticity are described. We also demonstrate signal associated with supergranular flow in a radial-order +coupling (p2-p2), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface. +We compare our results with flows obtained using the Local Correlation Tracking method on solar granules. +1.1. Forward problem +In favor of algebraic brevity, we only show crucial steps here and refer the interested reader to Appendix A for a +complete derivation of the forward problem. Working in the plane-parallel atmosphere (see also Woodard 2006), we +denote the horizontal unit vectors ex and ey in our local Cartesian domain as pointing towards west and north on the +solar surface, respectively, and ez points outwards. This approximation is valid when observing patches of the surface +that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood +of the supergranular scale (∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the +horizontal wavenumber qR ≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(qx, qy)| is the vector horizontal +wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow +perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, see Rincon +& Rieutord 2018), permitting us to model the flow vector u = (ux, uy, uz) in the Cartesian domain like so (Unno et al. +1989; Woodard 2006) +u +σ = ∇×[∇×(P ez)] + ∇×(T ez), (1) +where P = P +σ +(x) and T = T +σ +(x) are poloidal and toroidal scalar functions, varying with position x and temporal +frequency σ. ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying +perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for +example), here we only consider the frequency bin σ = 0, denoting the temporally averaged flow over the period +of analysis. We therefore suppress σ from all terms this point forward, remembering that temporal dynamics of +perturbations may also be studied using the same model outlined in the following paragraphs. Simplifying eq 1 using +vector calculus results in +u = −∇2Pez + ∇(∂zP) + ∇hT×ez, (2) +where ∇h refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the +Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a +function of horizontal wavenumber q and depth zez. Hence the poloidal and toroidal flows are described by Pq(z) and +Tq(z), respectively. Furthermore, we parametrize the flow along ez using basis functions f(z) (Chebyshev, B-spline, +etc). This is expressed as +P ≡ Pq(z) = X +j +fj (z) Pqj , T ≡ Tq(z) = X +j +fj (z) Tqj . (3) +The flow coefficients Pqj and Tqj , represented by the discrete indices q and j, become ideal candidates for inversions, +where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be +exploited to expedite inversions. Note that Pqj = P +∗ +−qj +and Tqj = T +∗ +−qj +for the flow field to be real in the spatiotemporal domain. To infer flows from wavefields φ scattered by a perturbation of length scale q, cross-correlate them in the manner -Imaging near-surface flows using mode-coupling analysis 3 -φ -ω∗ -k φ -ω -k+q -, where k is the oscillation mode wavenumber (kx, ky) and ω is the temporal frequency. Relate φ -ω∗ -k φ -ω -k+q -thus -to the flow coefficients Pqj and Tqj (see eq A7) -hφ -ω∗ -k φ -ω -k+q -i = Hω -kk0nn0 -X -j -Cqj,kPqj + Dqj,kTqj . (4) -The weight factor Hω (see eq A8) is a function of frequency, capturing information about the extent of coupling between -the two modes [n, k] and [n -0 -, k0 -], where n and n -0 are the radial orders of the modes, and k = |k| and k -0 = |k -0 -| = |k+q|. -The spectral profile of the mode (see eq A9) is approximated using a Lorentzian (Anderson et al. 1990). The more the -Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms Cqj,k and Dqj,k are poloidal -and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements -and are derived from the solar model see Appendix A. They possess the symmetry relation: Cqj,k = C−qj,−k and -Dqj,k = D−qj,−k (see eq A6). The kernels, as flows, are expressed on the basis fj (z). -1.2. Least-squares of cross-correlation -Even though φ -ω∗ -k φ -ω -k+q -isolates the effect of flow perturbations at individual wavenumbers q, a more compact mea￾surement, known in mode-coupling literature as ’B-coefficients’, is much better designed for inversion as it reduces the -dimension of the problem. A least-squares fit to the cross-correlation φ -ω∗ -k φ -ω -k+q -(see Woodard 2006, 2014, 2016) results -in the B-coefficients Bk,q, according to -Bk,q = -P -ω -Hω∗ -kk0nn0φ -ω∗ -k φ -ω -k+q -P -ω -|Hω -kk0nn0 | -2 -. (5) -Multiplying eq 4 on both sides by Hω∗ -kk0nn0 and substituting by eq 5 on the left-hand-side results in a concisely defined -forward problem (compare with eq 4) -Bk,q = -X -j -Cqj,kPqj + Dqj,kTqj . (6) -In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over ω. -Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ωnk, -|ω| ∈  -ωnk − Γnk/2, ωnk + Γnk/2 - -or -|ω| ∈  -ωn0k0 − Γn0k0/2, ωn0k0 + Γn0k0/2 - -. (7) -Summing over ±ω guarantees that the parity Bk,q = B∗ -−k,−q -(see Appendix A for derivation) is obeyed, thereby -ensuring that the flow field on the right-hand-side of eq 6 is a real physical quantity in the spatio-temporal domain. -Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components −q and -−k, -B -∗ -−k,−q = -X -j -C−qj,−kP -∗ -−qj + D−qj,−kT -∗ -−qj -. (8) -Substituting parity and symmetry relations for all terms in the above results in eq 6. As Bk,q is constructed by a -least-squares fitting, it is noteworthy that summing over −ω will also lead to improvement in its signal-to-noise as a -by-product. -1.3. Noise model -In the addition to the sensitivity kernels, a systematic background noise model is required to infer the flows from -the observed B-coefficients. For estimating the contribution from realization noise to the measurements, we make the -following assumptions (Gizon & Birch 2004): that the excitation of the wavefield is modelled as a multivariate Gaussian -random process and the wavefields are uncorrelated across wavenumber and frequency in the absence of perturbations. -Every independent realization of a mode can be understood as the output of a damped harmonic oscillator driven by a -random forcing function (see Duvall & Harvey 1986). Modes are thus generated with random phases and amplitudes +Imaging near-surface flows using mode-coupling analysis 3 +φ +ω∗ +k φ +ω +k+q +, where k is the oscillation mode wavenumber (kx, ky) and ω is the temporal frequency. Relate φ +ω∗ +k φ +ω +k+q +thus +to the flow coefficients Pqj and Tqj (see eq A7) +hφ +ω∗ +k φ +ω +k+q +i = Hω +kk0nn0 +X +j +Cqj,kPqj + Dqj,kTqj . (4) +The weight factor Hω (see eq A8) is a function of frequency, capturing information about the extent of coupling between +the two modes [n, k] and [n +0 +, k0], where n and n +0 are the radial orders of the modes, and k = |k| and k0 = |k +0 +| = |k+q|. +The spectral profile of the mode (see eq A9) is approximated using a Lorentzian (Anderson et al. 1990). The more the +Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms Cqj,k and Dqj,k are poloidal +and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements +and are derived from the solar model see Appendix A. They possess the symmetry relation: Cqj,k = C−qj,−k and +Dqj,k = D−qj,−k (see eq A6). The kernels, as flows, are expressed on the basis fj (z). +1.2. Least-squares of cross-correlation +Even though φ +ω∗ +k φ +ω +k+q +isolates the effect of flow perturbations at individual wavenumbers q, a more compact measurement, known in mode-coupling literature as ’B-coefficients’, is much better designed for inversion as it reduces the +dimension of the problem. A least-squares fit to the cross-correlation φ +ω∗ +k φ +ω +k+q +(see Woodard 2006, 2014, 2016) results +in the B-coefficients Bk,q, according to +Bk,q = +P +ω +Hω∗ +kk0nn0φ +ω∗ +k φ +ω +k+q +P +ω +|Hω +kk0nn0 | +2 +. (5) +Multiplying eq 4 on both sides by Hω∗ +kk0nn0 and substituting by eq 5 on the left-hand-side results in a concisely defined +forward problem (compare with eq 4) +Bk,q = +X +j +Cqj,kPqj + Dqj,kTqj . (6) +In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over ω. +Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ωnk, +|ω| ∈ ωnk − Γnk/2, ωnk + Γnk/2 + +or +|ω| ∈ ωn0k0 − Γn0k0/2, ωn0k0 + Γn0k0/2 + +. (7) +Summing over ±ω guarantees that the parity Bk,q = B∗ +−k,−q +(see Appendix A for derivation) is obeyed, thereby +ensuring that the flow field on the right-hand-side of eq 6 is a real physical quantity in the spatio-temporal domain. +Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components −q and +−k, +B +∗ +−k,−q = +X +j +C−qj,−kP +∗ +−qj + D−qj,−kT +∗ +−qj +. (8) +Substituting parity and symmetry relations for all terms in the above results in eq 6. As Bk,q is constructed by a +least-squares fitting, it is noteworthy that summing over −ω will also lead to improvement in its signal-to-noise as a +by-product. +1.3. Noise model +In the addition to the sensitivity kernels, a systematic background noise model is required to infer the flows from +the observed B-coefficients. For estimating the contribution from realization noise to the measurements, we make the +following assumptions (Gizon & Birch 2004): that the excitation of the wavefield is modelled as a multivariate Gaussian +random process and the wavefields are uncorrelated across wavenumber and frequency in the absence of perturbations. +Every independent realization of a mode can be understood as the output of a damped harmonic oscillator driven by a +random forcing function (see Duvall & Harvey 1986). Modes are thus generated with random phases and amplitudes and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters -4 Mani et al. -Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p1 (orange) and p2 (green). The shaded -regions of the same colours indicate 1-linewidth Γ about the mode frequency. The yellow shaded region indicates the range of -kR and ω/2π to which we have restricted ourselves in this analysis. Beyond kR of 2000, it is seen that the theoretical fitting -of mode frequencies start deviating from the observed dispersion relation for the f-mode. -such as its amplitude, frequency and linewidth, and consequently in Bk,q in our case. We use the same noise model -as in H21, which was motivated by the above discussion, -Gk,q ≡ h|Bk,q| -2 -i, (9) -where, unlike H21, we again sum over ±ω. Gk,q is real, with the symmetry relation Gk,q = G−k,−q (see Appendix A -for explanation). -2. DATA ANALYSIS -In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the -Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO, Scherrer et al. 2012). Each image -is Postel projected, with a spatial resolution of approximately 0.48Mm, sperated in time by 45 seconds, and is tracked -at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194.4 × 194.4 Mm2 -in size, tracked for 24 hours -and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number -2197, Carrington longitude 90◦ -). This Dopplercube is considered as the physical wavefield φ(x, y;t). The Fourier-space -wavefield φ -ω -k -(and subsequently, the cross-correlation φ -ω∗ -k φ -ω -k+q -) is obtained by computing the 3D spatial and temporal -Fourier transform of the Dopplercube. -The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in -Eq 6, while short enough that supergranules do not substantially evolve (lifetime is purported to be 1.6 days; Rincon -& Rieutord 2018) over this period. Our observation region is close to the disk center to also avoid any contamination -from center-to-limb systematics (Zhao et al. 2012; Langfellner et al. 2015). -Maximum signal can be extracted from the weighted summation of the cross correlations (eq 5) when the spectral -profiles of the two modes [n, k] and [n -0 -, k0 -] closely align in ω space. This implies that their mode frequencies should be -sufficiently close (|ωnk − ωn0k0 | ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over -±ω is significant only over a few linewidths (, the summation parameter; see eq 7). We have empirically found and -tabulated δ in Table 1 for the radial order couplings n-n -0 ∈ f-f, p1-p1, and p2-p2 (the signal strength depends only -weakly on ; we set it to 3 line widths). -Figure 1 shows that for any two adjacent ridges (adjacent n and n -0 -), mode frequencies ωnk and ωn0k become spaced -farther apart with increasing wavenumber kR . It is also known that mode linewidth Γ grows with radial orders for -a given kR . Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of -observation set the total number of modes within a range of kR (and ω/2π) that can be clearly observed, thereby -affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually -inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR at fixed -radial order are different. In wavenumber, we restrict our analysis to within 200 ≤ kR ≤ 2000 and qR ≤ 300. Our + Mani et al. +Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p1 (orange) and p2 (green). The shaded +regions of the same colours indicate 1-linewidth Γ about the mode frequency. The yellow shaded region indicates the range of +kR and ω/2π to which we have restricted ourselves in this analysis. Beyond kR of 2000, it is seen that the theoretical fitting +of mode frequencies start deviating from the observed dispersion relation for the f-mode. +such as its amplitude, frequency and linewidth, and consequently in Bk,q in our case. We use the same noise model +as in H21, which was motivated by the above discussion, +Gk,q ≡ h|Bk,q| +2 +i, (9) +where, unlike H21, we again sum over ±ω. Gk,q is real, with the symmetry relation Gk,q = G−k,−q (see Appendix A +for explanation). +2. DATA ANALYSIS +In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the +Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO, Scherrer et al. 2012). Each image +is Postel projected, with a spatial resolution of approximately 0.48Mm, sperated in time by 45 seconds, and is tracked +at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194.4 × 194.4 Mm2in size, tracked for 24 hours +and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number +2197, Carrington longitude 90◦). This Dopplercube is considered as the physical wavefield φ(x, y;t). The Fourier-space +wavefield φ +ω +k +(and subsequently, the cross-correlation φ +ω∗ +k φ +ω +k+q +) is obtained by computing the 3D spatial and temporal +Fourier transform of the Dopplercube. +The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in +Eq 6, while short enough that supergranules do not substantially evolve (lifetime is purported to be 1.6 days; Rincon +& Rieutord 2018) over this period. Our observation region is close to the disk center to also avoid any contamination +from center-to-limb systematics (Zhao et al. 2012; Langfellner et al. 2015). +Maximum signal can be extracted from the weighted summation of the cross correlations (eq 5) when the spectral +profiles of the two modes [n, k] and [n +0 +, k0] closely align in ω space. This implies that their mode frequencies should be +sufficiently close (|ωnk − ωn0k0 | ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over +±ω is significant only over a few linewidths (, the summation parameter; see eq 7). We have empirically found and +tabulated δ in Table 1 for the radial order couplings n-n +0 ∈ f-f, p1-p1, and p2-p2 (the signal strength depends only +weakly on ; we set it to 3 line widths). +Figure 1 shows that for any two adjacent ridges (adjacent n and n +0 +), mode frequencies ωnk and ωn0k become spaced +farther apart with increasing wavenumber kR . It is also known that mode linewidth Γ grows with radial orders for +a given kR . Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of +observation set the total number of modes within a range of kR (and ω/2π) that can be clearly observed, thereby +affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually +inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR at fixed +radial order are different. In wavenumber, we restrict our analysis to within 200 ≤ kR ≤ 2000 and qR ≤ 300. Our frequency range is confined to span the range over which acoustic modes are observed (2 ≤ ω/2π ≤ 5 in mHz). -Imaging near-surface flows using mode-coupling analysis 5 -Coupling kR range # of δ -modes -f-f [400,1000] 5240 4 -[1000,1500] 7784 1.1 -[1500,2000] 10940 0.4 -p1-p1 [400,1000] 5240 4.5 -[1000,1750] 12852 2 -p2-p2 [200,1000] 5886 3 -[1000,1300] 4280 3 -Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different -ranges of kR . -3. INVERSION -The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements -Bk,q from the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and -leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods -complement each other (see Sekii 1997), where RLS tries to minimize the misfit between data and model, whereas -SOLA gives better localization. For total number of modes M, RLS scales as MxJ where J is the number of basis -functions fj (z) (J  M; see eq 3 and section 3.1), whereas SOLA scales as M2 -(see Appendix B). For M > 5000, -computation starts to quickly become expensive for SOLA. -Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While -f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is -present even in p1-p1, and p2-p2 (see Figure 3), and possibly other higher order self- and cross-couplings. Since we are -interested in only surface flows, we leave higher order coupling to future work. -It bears mentioning that the slopes of the ridges in the kR -ν spectrum (Figure 1) increase with radial order. This -limits us to low-to-intermediate kR (< 1000) for these higher radial orders if we are to remain under the acoustic cut￾off frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals -from low kR - too large an observation region could possibly render invalid the Cartesian geometry approximation. -Regardless, in addition to performing inversions using all the couplings stacked together, we also demonstrate inversions -separately for the three couplings (see Table 2) in order to account for the full gamut of mode-coupling as a signal-rich -helioseismic technique. -3.1. RLS -For given q, the forward problem may be stated as -KU = B, (10) -with the aim to minimize the misfit P -k -||KU − B||2, with || ||2 denoting the L2 norm. Here, K is the matrix formed -by the sensitivity kernels: {Cqj,k, Dqj,k}. U is a vector composed of the flow coefficients: {Pqj , Tqj} and B is a vector -composed of computed B-coefficients: {Bk,q}. The least-squares problem is solved simultaneously for poloidal and -toroidal flow. We use B-spline basis functions as our fj (z), comprising 11 knots spaced uniformly in acoustic radius, -for both poloidal and toroidal coefficients. Hence, for M modes (total number of k for a given q is M) and 11 basis -functions for each poloidal and toroidal, the dimensions of K, U and B are thus M ×22, 22×1, and M ×1 respectively. -Normalizing both sides of eq 10 by the noise covariance Λ (a diagonal matrix with the entries Gk,q; see eq 9; dimension -M × M) and pre-multiplying by K| -, -(K|Λ -−1K)U =(K|Λ -−1 -)B, (11) -U =(K|Λ -−1K) -−1K|Λ -−1B. (12) -6 Mani et al. -Figure 2. Left: Averaging kernel for poloidal flow (see section B.2, eq B17, and left panel of Figure 8) for qR = [−112, −45], -at the depth zo = −0.41 Mm. Right: L-curve for the mode qR = [−112, −45]; the knee (λ = 2.48) is marked by a blue -diamond. -Since the least-squares problem is typically ill-posed, we restate the minimization as P -k -||KU − B||2 + λ||U||2 with -the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution -norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the -data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this -regularization makes the problem better conditioned and is now defined as -U = (K|Λ -−1K + λI) -−1K|Λ -−1B, (13) -where I is the identity matrix for L1 regularization. The knee-point of the L-curve (Hansen 1992), a curve formed -by plotting ||U||2 vs ||KU − B||2 for different values of λ (see right panel of Figure 2), is usually chosen as the -regularization parameter. After successfully inverting for U, we reconstruct the flow using eq 3. Results for poloidal -flow Pq are shown in Figure 3. -4. LCT -To improve confidence in the imaged near-surface flows through mode-coupling, we compare them with flows obtained -from Local Correlation Tracking method (LCT; November & Simon 1988). LCT provides surface-flow maps by -examining the advection of convective granules (1.2 Mm, qR ≈ 3500; Hathaway et al. 2015) by underlying larger￾scale flow systems. Since granules are used as tracers, which are much smaller in size than supergranules (≈ 35 Mm), -LCT is an effective method (see Rieutord et al. 2001) to produce surface horizontal flow maps of supergranulation. -Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2 -(tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are ob￾tained and Postel projected. The horizontal flows are deduced by tracking the proper motions of granules between -consecutive intensity images, which we denote as I1, I2. The LCT method selects a patch in two images each -(I1 = I1e -(x−xij ) -2/2 sigma2 -, I2 = I2e -(x−xij ) -2/2 sigma2 -) that observe the same granule at the grid point xij = (xi -, yj ). -A Gaussian of width sigma allows to isolate a small region surrounding the grid point of interest as the distance -moved by granules are usually in sub-pixel regime. The convention for the direction of x is the same as described in -section 1.1. The two patches I1, I2 are then cross correlated for different values of position shifts ∆x, -Cij (∆x, ∆y) = Z -dx I -∗ -1 -(−x)I2(∆x − x). (14) -The shift ∆x = (∆x, ∆y) that maximizes the cross-correlation Cij is taken to be the proper motion of the granule. -Provided that the time difference ∆t, here 45 seconds, between the images is less than the lifetime of granules (< 10 -min), the velocities are given by vx = ∆x/∆t and vy = ∆y/∆t. This exercise is repeated for all grid points in the -images I1, I2 and for each consecutive pair of images in the cube. -In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing vx and vy. FLCT +Imaging near-surface flows using mode-coupling analysis 5 +Coupling kR range # of δ +modes +f-f [400,1000] 5240 4 +[1000,1500] 7784 1.1 +[1500,2000] 10940 0.4 +p1-p1 [400,1000] 5240 4.5 +[1000,1750] 12852 2 +p2-p2 [200,1000] 5886 3 +[1000,1300] 4280 3 +Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different +ranges of kR . +3. INVERSION +The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements +Bk,q from the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and +leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods +complement each other (see Sekii 1997), where RLS tries to minimize the misfit between data and model, whereas +SOLA gives better localization. For total number of modes M, RLS scales as MxJ where J is the number of basis +functions fj (z) (J  M; see eq 3 and section 3.1), whereas SOLA scales as M2(see Appendix B). For M > 5000, +computation starts to quickly become expensive for SOLA. +Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While +f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is +present even in p1-p1, and p2-p2 (see Figure 3), and possibly other higher order self- and cross-couplings. Since we are +interested in only surface flows, we leave higher order coupling to future work. +It bears mentioning that the slopes of the ridges in the kR -ν spectrum (Figure 1) increase with radial order. This +limits us to low-to-intermediate kR (< 1000) for these higher radial orders if we are to remain under the acoustic cutoff frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals +from low kR - too large an observation region could possibly render invalid the Cartesian geometry approximation. +Regardless, in addition to performing inversions using all the couplings stacked together, we also demonstrate inversions +separately for the three couplings (see Table 2) in order to account for the full gamut of mode-coupling as a signal-rich +helioseismic technique. +3.1. RLS +For given q, the forward problem may be stated as +KU = B, (10) +with the aim to minimize the misfit P +k +||KU − B||2, with || ||2 denoting the L2 norm. Here, K is the matrix formed +by the sensitivity kernels: {Cqj,k, Dqj,k}. U is a vector composed of the flow coefficients: {Pqj , Tqj} and B is a vector +composed of computed B-coefficients: {Bk,q}. The least-squares problem is solved simultaneously for poloidal and +toroidal flow. We use B-spline basis functions as our fj (z), comprising 11 knots spaced uniformly in acoustic radius, +for both poloidal and toroidal coefficients. Hence, for M modes (total number of k for a given q is M) and 11 basis +functions for each poloidal and toroidal, the dimensions of K, U and B are thus M ×22, 22×1, and M ×1 respectively. +Normalizing both sides of eq 10 by the noise covariance Λ (a diagonal matrix with the entries Gk,q; see eq 9; dimension +M × M) and pre-multiplying by K|, +(K|Λ +−1K)U =(K|Λ−1 +)B, (11) +U =(K|Λ +−1K)−1K|Λ−1B. (12) + Mani et al. +Figure 2. Left: Averaging kernel for poloidal flow (see section B.2, eq B17, and left panel of Figure 8) for qR = [−112, −45], +at the depth zo = −0.41 Mm. Right: L-curve for the mode qR = [−112, −45]; the knee (λ = 2.48) is marked by a blue +diamond. +Since the least-squares problem is typically ill-posed, we restate the minimization as P +k +||KU − B||2 + λ||U||2 with +the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution +norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the +data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this +regularization makes the problem better conditioned and is now defined as +U = (K|Λ +−1K + λI)−1K|Λ−1B, (13) +where I is the identity matrix for L1 regularization. The knee-point of the L-curve (Hansen 1992), a curve formed +by plotting ||U||2 vs ||KU − B||2 for different values of λ (see right panel of Figure 2), is usually chosen as the +regularization parameter. After successfully inverting for U, we reconstruct the flow using eq 3. Results for poloidal +flow Pq are shown in Figure 3. +4. LCT +To improve confidence in the imaged near-surface flows through mode-coupling, we compare them with flows obtained +from Local Correlation Tracking method (LCT; November & Simon 1988). LCT provides surface-flow maps by +examining the advection of convective granules (1.2 Mm, qR ≈ 3500; Hathaway et al. 2015) by underlying largerscale flow systems. Since granules are used as tracers, which are much smaller in size than supergranules (≈ 35 Mm), +LCT is an effective method (see Rieutord et al. 2001) to produce surface horizontal flow maps of supergranulation. +Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2 +(tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are obtained and Postel projected. The horizontal flows are deduced by tracking the proper motions of granules between +consecutive intensity images, which we denote as I1, I2. The LCT method selects a patch in two images each +(I1 = I1e +(x−xij ) +2/2 sigma2 +, I2 = I2e +(x−xij ) +2/2 sigma2 +) that observe the same granule at the grid point xij = (xi, yj ). +A Gaussian of width sigma allows to isolate a small region surrounding the grid point of interest as the distance +moved by granules are usually in sub-pixel regime. The convention for the direction of x is the same as described in +section 1.1. The two patches I1, I2 are then cross correlated for different values of position shifts ∆x, +Cij (∆x, ∆y) = Zdx I +∗ +1 +(−x)I2(∆x − x). (14) +The shift ∆x = (∆x, ∆y) that maximizes the cross-correlation Cij is taken to be the proper motion of the granule. +Provided that the time difference ∆t, here 45 seconds, between the images is less than the lifetime of granules (< 10 +min), the velocities are given by vx = ∆x/∆t and vy = ∆y/∆t. This exercise is repeated for all grid points in the +images I1, I2 and for each consecutive pair of images in the cube. +In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing vx and vy. FLCT requires the input sigma, which we set to 4 pix, that captures the extent of localization desired, and depends on the -Imaging near-surface flows using mode-coupling analysis 7 -Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p1-p1, and p2-p2 as a function of qxR and -qyR . Bottom: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the -mean. Total power appears to increase through the radial orders. Power is in units of m2 -/s4 -. -dominant length scale of the velocity field in the images. The Postel-projected intensity images are fed as input to the -FLCT code. vx and vy are then computed for consecutive pairs of images and are averaged over the entire day. -5. MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY -For mode-coupling, horizontal divergence (hereafter div) and radial vorticity (hereafter curl) are computed by -substituting P and T from eq 3 into eq 2 as below - -u(q, z) = −∇2Pez + ∇(∂zP) + ∇hT×ez, -= −(0, 0, ∂2 -xP + ∂ -2 -yP + ∂ -2 -zP) + (∂x∂zP, ∂y∂zP, ∂2 -zP) + (∂yT, −∂xT, 0). (15) -Setting ∂ -2 -x + ∂ -2 -y = q -2 -, div is given by, -∇h · u(q, z) = q -2 -∂zP, (16) -and curl is given by, -h -∇ × u(q, z) -i -z -= q -2T. (17) -We follow similar steps to those taken in Langfellner et al. (2015) for comparison of flow maps with LCT. The -essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR of -interest (see Figure 4), and subsequently convert it to real space. -We seek to show comparisons (see Figures 5, 6, and 7) for qR = 100, 150, 200 and 250. To sufficiently delineate -flows at these length scales, we apply a Gaussian filter (see Figure 4) to flows obtained from eqns 16 and 17. The -Gaussian is centered at the desired wavenumber with a half-width of 25. We then perform a 2D Fourier transform to +Imaging near-surface flows using mode-coupling analysis 7 +Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p1-p1, and p2-p2 as a function of qxR and +qyR . Bottom: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the +mean. Total power appears to increase through the radial orders. Power is in units of m2/s4. +dominant length scale of the velocity field in the images. The Postel-projected intensity images are fed as input to the +FLCT code. vx and vy are then computed for consecutive pairs of images and are averaged over the entire day. +5. MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY +For mode-coupling, horizontal divergence (hereafter div) and radial vorticity (hereafter curl) are computed by +substituting P and T from eq 3 into eq 2 as below u(q, + z) = −∇2Pez + ∇(∂zP) + ∇hT×ez, += −(0, 0, ∂2 +xP + ∂ +2 +yP + ∂ +2 +zP) + (∂x∂zP, ∂y∂zP, ∂2 +zP) + (∂yT, −∂xT, 0). (15) +Setting ∂ +2 +x + ∂ +2 +y = q +2 +, div is given by, +∇h · u(q, z) = q +2 +∂zP, (16) +and curl is given by, +h +∇ × u(q, z) +i +z += q +2T. (17) +We follow similar steps to those taken in Langfellner et al. (2015) for comparison of flow maps with LCT. The +essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR of +interest (see Figure 4), and subsequently convert it to real space. +We seek to show comparisons (see Figures 5, 6, and 7) for qR = 100, 150, 200 and 250. To sufficiently delineate +flows at these length scales, we apply a Gaussian filter (see Figure 4) to flows obtained from eqns 16 and 17. The +Gaussian is centered at the desired wavenumber with a half-width of 25. We then perform a 2D Fourier transform to obtain a real-space steady-flow map. -8 Mani et al. -Figure 4. Left: Divergence-flow power spectrum |div| -2 -, from eqn 16, obtained from inversion using all the couplings. The -power-spectrum is then filtered with a bandpass centered around qR = 150 (middle panel). The resulting spectra is shown in -the right panel. The units of |div| -2 -are in s−2 -. For illustration, we show the action of the filter on the power-spectrum |div| -2 -since it is a real quantity, but recall that it is the Fourier-space flow div (a complex quantity) on which we apply the filter. -For LCT, we first apply a Gaussian smoothing to vx and vy to average over small-scale features; the extent of -smoothing depends on the length scale qR to be compared with mode-coupling. div and curl are then simply -computed by -div = ∂xvx + ∂yvy, (18) -curl = ∂xvy − ∂yvx. (19) -We then perform a 2D Fourier transform on eqns 18 and 19, apply the same Gaussian filters as for mode-coupling, -and transform back to real space. -Condensing all of the above, the following sequence of operations to compare flows at desired length scales are -performed for mode-coupling (M-C) and for LCT - -M-C : φ(x, y;t) -3D FFT =====⇒ φ -ω -k -, Bk,q -inversion ======⇒ P, T ∇h· -===⇒ -∇× -eqns 16, 17 Filter, -=====⇒ -2D FFT -div, curl -LCT : I1, I2 -FLCT ====⇒ vx, vy -smooth, -======⇒ -∇h· ∇× -eqns 18, 19 2D FFT, -======⇒ -Filter -Filtered, -Fourier-space -flows -2D FFT =====⇒ div, curl -6. RESULTS -Table 2 summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure 5, -where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from -the two methods near supergranular scale (qR ≈ 100). Near-surface flows are imaged most faithfully when all the -couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of -vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between -the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence -flows (this is consistent with the results of Hathaway et al. 2015; Langfellner et al. 2015; Rincon et al. 2017). Due to -insufficient modes for the p2-p2 case (see Table 1), we are unable to infer vortical flows with conviction other than near -the supergranular scale, as can be seen from Table 2. Figure 6 also aligns with what we believe can be accomplished -through mode-coupling helioseismology - using f-f or p1-p1 alone to seismically infer near-surface divergence and vortical -flows at different scales (qR = 100, 150) can yield extremely good agreement with LCT. As the length scale of the -inferred flow moves further away from that of supergranules (Figure 7), the demand on signal-to-noise also increases. -An adequate number of modes (and coupling strength between higher radial-orders) thus becomes a necessity to -comment substantively on the flows at these scales. + Mani et al. +Figure 4. Left: Divergence-flow power spectrum |div| +2 +, from eqn 16, obtained from inversion using all the couplings. The +power-spectrum is then filtered with a bandpass centered around qR = 150 (middle panel). The resulting spectra is shown in +the right panel. The units of |div| +2 +are in s−2. For illustration, we show the action of the filter on the power-spectrum |div| +2 +since it is a real quantity, but recall that it is the Fourier-space flow div (a complex quantity) on which we apply the filter. +For LCT, we first apply a Gaussian smoothing to vx and vy to average over small-scale features; the extent of +smoothing depends on the length scale qR to be compared with mode-coupling. div and curl are then simply +computed by +div = ∂xvx + ∂yvy, (18) +curl = ∂xvy − ∂yvx. (19) +We then perform a 2D Fourier transform on eqns 18 and 19, apply the same Gaussian filters as for mode-coupling, +and transform back to real space. +Condensing all of the above, the following sequence of operations to compare flows at desired length scales are +performed for mode-coupling (M-C) and for LCT M-C + : φ(x, y;t) +3D FFT =====⇒ φ +ω +k +, Bk,q +inversion ======⇒ P, T ∇h· +===⇒ +∇× +eqns 16, 17 Filter,=====⇒ +2D FFT +div, curl +LCT : I1, I2 +FLCT ====⇒ vx, vy +smooth, +======⇒ +∇h· ∇× +eqns 18, 19 2D FFT,======⇒ +Filter +Filtered, +Fourier-space +flows +2D FFT =====⇒ div, curl +6. RESULTS +Table 2 summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure 5, +where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from +the two methods near supergranular scale (qR ≈ 100). Near-surface flows are imaged most faithfully when all the +couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of +vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between +the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence +flows (this is consistent with the results of Hathaway et al. 2015; Langfellner et al. 2015; Rincon et al. 2017). Due to +insufficient modes for the p2-p2 case (see Table 1), we are unable to infer vortical flows with conviction other than near +the supergranular scale, as can be seen from Table 2. Figure 6 also aligns with what we believe can be accomplished +through mode-coupling helioseismology - using f-f or p1-p1 alone to seismically infer near-surface divergence and vortical +flows at different scales (qR = 100, 150) can yield extremely good agreement with LCT. As the length scale of the +inferred flow moves further away from that of supergranules (Figure 7), the demand on signal-to-noise also increases. +An adequate number of modes (and coupling strength between higher radial-orders) thus becomes a necessity to +comment substantively on the flows at these scales. 6.1. Amplitudes of mode-coupling flows -Imaging near-surface flows using mode-coupling analysis 9 -(a) qR = 100, f-f + p1-p1 + p2-p2 -Figure 5. Real-space divergence flows (left column, in units of 10−5 -s -−1 -) and radial vorticity (right column, in units of 10−6 -s -−1 -) -for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass filtered around -qR = 100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges -out from the flow maps and compare a circular region of diameter ≈175 Mm. The slopes of the best-fit line through the scatter -plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps are saturated to show only 40% of the maximum -values. -For both LCT and mode-coupling divergence and vorticity maps, numerous factors, arising from the associated -numerous data processing steps, can influence the final inference of flow amplitudes, making it difficult to put forward -a precise statement on them. H21 reported a 60% greater amplitude for p1-p1 over f-f coupling (Figure 3 reflects a -similar conclusion), another element to consider when combining different radial orders. The choice of regularization -(see right panel of Figure 2) has the potential to affect the amplitudes of the inverted flows to some degree. Flow -amplitudes also vary with depth, implying that different radial orders and LCT will measure different flow averages. -This variability emerges as a natural consequence of any helioseismic inversion procedure necessitating the use of a -radial grid along which kernels and flows tend to be described. -Thus, the amplitudes of the mode-coupling flows (and the correlation coefficient) depend upon the following factors: -• Coupling(s) used, -• Regularization parameter in the inversion, -• Smoothing applied to LCT flows (indirectly; see below paragraph), -• The depth at which flows are inferred. -Here, we report in Table 2 only the maximum correlation found from among the points in the radial grid close -to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR , we first fix the coupling(s) +Imaging near-surface flows using mode-coupling analysis 9 +(a) qR = 100, f-f + p1-p1 + p2-p2 +Figure 5. Real-space divergence flows (left column, in units of 10−5s +−1 +) and radial vorticity (right column, in units of 10−6s +−1 +) +for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass filtered around +qR = 100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges +out from the flow maps and compare a circular region of diameter ≈175 Mm. The slopes of the best-fit line through the scatter +plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps are saturated to show only 40% of the maximum +values. +For both LCT and mode-coupling divergence and vorticity maps, numerous factors, arising from the associated +numerous data processing steps, can influence the final inference of flow amplitudes, making it difficult to put forward +a precise statement on them. H21 reported a 60% greater amplitude for p1-p1 over f-f coupling (Figure 3 reflects a +similar conclusion), another element to consider when combining different radial orders. The choice of regularization +(see right panel of Figure 2) has the potential to affect the amplitudes of the inverted flows to some degree. Flow +amplitudes also vary with depth, implying that different radial orders and LCT will measure different flow averages. +This variability emerges as a natural consequence of any helioseismic inversion procedure necessitating the use of a +radial grid along which kernels and flows tend to be described. +Thus, the amplitudes of the mode-coupling flows (and the correlation coefficient) depend upon the following factors: +• Coupling(s) used, +• Regularization parameter in the inversion, +• Smoothing applied to LCT flows (indirectly; see below paragraph), +• The depth at which flows are inferred. +Here, we report in Table 2 only the maximum correlation found from among the points in the radial grid close +to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR , we first fix the coupling(s) and the regularization parameter to be used in the inversion. We then separately compute filtered divergence and -10 Mani et al. -(a) qR = 100, f-f (b) qR = 150, p1-p1 -Figure 6. Real-space divergence flows (left column, in units of 10−5 -s -−1 -) and radial vorticity (right column, in units of 10−6 -s -−1 -) -for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around -qR = 100, and using (b) p1-p1 coupling (bottom row), bandpass filtered around qR = 150. We cut edges out from the flow -maps and compare a circular region of diameter ≈175 Mm. -(a) qR = 200, f-f + p1-p1 + p2-p2 (b) qR = 250, f-f + p1-p1 + p2-p2 -Figure 7. Real-space divergence flows (left column, in units of 10−5 -s -−1 -) and radial vorticity (right column, in units of 10−6 -s -−1 -) -for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass filtered around -(a) qR = 200, and (b) qR = 250. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. -vorticity maps for LCT for different values of smoothing. These flow maps are then compared with those obtained -from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation -(corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired -qR . -It has been shown (see De Rosa & Toomre 2004; Langfellner et al. 2015) that line-of-sight velocity from Dopplergrams -and LCT agree closely in amplitudes. But, to recapitulate, a host of factors described above can skew the amplitudes -for divergence flows owing to the multi-step process involved in obtaining them. For example, there has been a history -(see, e.g., De Rosa et al. 2000; Sekii et al. 2007; Zhao et al. 2007; Langfellner et al. 2018; B¨oning et al. 2020; Korda + Mani et al. +(a) qR = 100, f-f (b) qR = 150, p1-p1 +Figure 6. Real-space divergence flows (left column, in units of 10−5s +−1 +) and radial vorticity (right column, in units of 10−6s +−1 +) +for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around +qR = 100, and using (b) p1-p1 coupling (bottom row), bandpass filtered around qR = 150. We cut edges out from the flow +maps and compare a circular region of diameter ≈175 Mm. +(a) qR = 200, f-f + p1-p1 + p2-p2 (b) qR = 250, f-f + p1-p1 + p2-p2 +Figure 7. Real-space divergence flows (left column, in units of 10−5s +−1 +) and radial vorticity (right column, in units of 10−6s +−1 +) +for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass filtered around +(a) qR = 200, and (b) qR = 250. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. +vorticity maps for LCT for different values of smoothing. These flow maps are then compared with those obtained +from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation +(corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired +qR . +It has been shown (see De Rosa & Toomre 2004; Langfellner et al. 2015) that line-of-sight velocity from Dopplergrams +and LCT agree closely in amplitudes. But, to recapitulate, a host of factors described above can skew the amplitudes +for divergence flows owing to the multi-step process involved in obtaining them. For example, there has been a history +(see, e.g., De Rosa et al. 2000; Sekii et al. 2007; Zhao et al. 2007; Langfellner et al. 2018; B¨oning et al. 2020; Korda & Svanda ˇ 2021) of using travel-time difference as only a proxy for horizontal divergence. However, Langfellner et al. -Imaging near-surface flows using mode-coupling analysis 11 -Coupling qR div curl -f-f 100 0.97 0.87 -+ p1-p1 150 0.95 0.76 -+ p2-p2 200 0.92 0.76 -250 0.85 0.65 -f-f 100 0.96 0.85 -150 0.93 0.76 -200 0.89 0.69 -250 0.77 0.58 -p1-p1 100 0.95 0.83 -150 0.95 0.75 -200 0.92 0.75 -250 0.85 0.61 -p2-p2 100 0.94 0.7 -150 0.91 0.39 -200 0.79 0.3 -250 0.55 0.3 -Table 2. Correlation between mode-coupling flow maps and LCT maps derived from HMI Dopplergrams and intensity images, -respectively. -(2015), Birch et al. (2016) and Birch et al. (2019) use empirically determined conversion factors to align flow amplitudes -from travel-time measurements with those of LCT, while acknowledging that LCT underestimates magnitudes (see -Verma et al. 2013; L¨optien et al. 2016). Even for the case of supergranulation divergence maps obtained through -ring-diagram helioseismology, Greer et al. (2016) only report normalized amplitudes. -In this work, we have developed inversions to show that the Cartesian approximation of mode-coupling can be used -with great confidence to investigate flows near the surface. Careful inversions of mode-coupling measurements, built -using a sufficiently large modeset that penetrates into the deeper layers of the convection zone, can also enable probing -of the depth structure and time-evolution of supergranules, part of future work. With enough modes to improve -signal-to-noise through larger observation sizes, we suggest that Cartesian mode-coupling can find local helioseismic -applications to investigate other depth- and time-varying features such as giant cell flows (see Hathaway et al. 2013; -Hanson et al. 2020), emerging active regions, meridional flows and Rossby waves. -APPENDIX -A. DERIVATION OF THE FORWARD MODEL -As described in section 1.1, we seek to describe the flow u as a function of q along ez. To that end, substituting -eq 3 into eq 2, -u -σ -q -(z) = X -j - -q -2 -fjez + iq f -0 -j - -P -σ -jq + iq×ez fjT -σ -jq -. (A1) -For flows in the anelastic limit (u  speed of sound), we can denote the flow perturbation operator as δL -σ = -−2iωρu -σ -· ∇ (see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get, -δL -σ -q = −2iω ρ (iu -σ -q -· k + u -σ -q -· ez∂z), (A2) -=−2iωρP -j - -−k · q f -0 -jP -σ -jq − k · (q×ez) fjT -σ -jq + q -2 fjP -σ -jq ∂z - +Imaging near-surface flows using mode-coupling analysis 11 +Coupling qR div curl +f-f 100 0.97 0.87 ++ p1-p1 150 0.95 0.76 ++ p2-p2 200 0.92 0.76 +250 0.85 0.65 +f-f 100 0.96 0.85 +150 0.93 0.76 +200 0.89 0.69 +250 0.77 0.58 +p1-p1 100 0.95 0.83 +150 0.95 0.75 +200 0.92 0.75 +250 0.85 0.61 +p2-p2 100 0.94 0.7 +150 0.91 0.39 +200 0.79 0.3 +250 0.55 0.3 +Table 2. Correlation between mode-coupling flow maps and LCT maps derived from HMI Dopplergrams and intensity images, +respectively. +(2015), Birch et al. (2016) and Birch et al. (2019) use empirically determined conversion factors to align flow amplitudes +from travel-time measurements with those of LCT, while acknowledging that LCT underestimates magnitudes (see +Verma et al. 2013; L¨optien et al. 2016). Even for the case of supergranulation divergence maps obtained through +ring-diagram helioseismology, Greer et al. (2016) only report normalized amplitudes. +In this work, we have developed inversions to show that the Cartesian approximation of mode-coupling can be used +with great confidence to investigate flows near the surface. Careful inversions of mode-coupling measurements, built +using a sufficiently large modeset that penetrates into the deeper layers of the convection zone, can also enable probing +of the depth structure and time-evolution of supergranules, part of future work. With enough modes to improve +signal-to-noise through larger observation sizes, we suggest that Cartesian mode-coupling can find local helioseismic +applications to investigate other depth- and time-varying features such as giant cell flows (see Hathaway et al. 2013; +Hanson et al. 2020), emerging active regions, meridional flows and Rossby waves. +APPENDIX +A. DERIVATION OF THE FORWARD MODEL +As described in section 1.1, we seek to describe the flow u as a function of q along ez. To that end, substituting +eq 3 into eq 2, +u +σ +q +(z) = X +j + +q +2 +fjez + iq f +0 +j + +P +σ +jq + iq×ez fjT +σ +jq +. (A1) +For flows in the anelastic limit (u  speed of sound), we can denote the flow perturbation operator as δL +σ = +−2iωρu +σ +· ∇ (see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get, +δL +σ +q = −2iω ρ (iu +σ +q +· k + u +σ +q +· ez∂z), (A2) +=−2iωρP +j + +−k · q f +0 +jP +σ +jq − k · (q×ez) fjT +σ +jq + q +2 fjPσ +jq ∂z + . (A3) -12 Mani et al. -Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006) -ξk ≡ ξnk(z) = ikˆHnk(z)ez + ˆzVnk(z), (A4) -where H and V are real-valued functions; n and n -0 are dropped for compactness of notation. Then the coupling of -two modes ξk and ξk0 (k -0 = k + q), by the flow perturbation operator δL -σ -q -, denoted by coupling integral Λk -k0 (σ), is -given by -Λ -k -k0 (σ) ≡ -Z -dx (δL -σ -q ξk -) · ξ -∗ -k0 = -Z -dx -" -− 2iωρX -j -n -q -2 -fjP -σ -jq -(kˆ · kˆ -0 -H0 -kH∗ -k0 + V -0 -kV -∗ -k0 ) -− - -k · q f -0 -jP -σ -jq + k · (q×ez) fjT -σ -jq - -(kˆ · kˆ -0 -HkH∗ -k0 + VkV -∗ -k0 ) -o -# -(A5) -We desire to linearly relate the coupling integral in the above equation to the flows P and T, through poloidal and -toroidal sensitivity kernels, Cqj,k and Dqj,k respectively. Hence, they are given by -Cqj,k = -Z -dz ρ h -q -2 -fj (kˆ · kˆ -0 -H0 -kH∗ -k0 + V -0 -kV -∗ -k0 ) -−k · q f -0 -j -(kˆ · kˆ -0 -HkH∗ -k0 + VkV -∗ -k0 ) -i -, -Dqj,k = k · (q×ez) -Z -dz ρ fj (kˆ · kˆ -0 -HkH∗ -k0 + VkV -∗ -k0 ). (A6) -Note the symmetry Cqj,k = C−qj,−k and Dqj,k = D−qj,−k. This coupling integral contributes to the cross-spectral -measurement between modes k and k + q From eq 8 of Woodard (2014), we write the first-order effect of flow on -wavefield cross-correlation as -hφ -ω∗ -k φ -ω+σ -k+q -i = Hω -kk0σΛ -k -k0 (σ), (A7) -where the function H is given by -Hω -kk0σ = −2iω(Nk|R -ω -k -| -2 R -ω+σ -k0 + Nk0 |R -ω+σ -k0 | -2 R -ω∗ -k -). (A8) -We absorb the factor −2iω into the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4. -The mode spectral profile R is a Lorentzian, given by -R -ω -k = -1 -ω -2 -nk − ω2 − iωγnk/2 -, (A9) -where ωnk is the resonant frequency of the mode, and γnk is the mode linewidth. Eq A9 can be derived by introducing -mode damping −iωγρ as an operator in the differential equation that governs undamped, driven oscillations (see eq -5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation. -Also, the parity Hω -kk0σ = H -−ω∗ -kk0−σ -and Rω -k = R -−ω∗ -k -are established. Mode normalization N is given by -Nk = -1 -Q -X -Q -k -P -ω -|φ -ω -k -| -2 -P -ω -Rω -k -, (A10) -where the 1 -Q -P -Q -k -on the right-hand-side implies average over all [kx, ky] (Q terms in all) such that k = |k| is constant. -This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ωnk. -Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real. -The three equations A8 through A10, along with the symmetry relation for kernels, and summation over ±ω, serve -to establish the parity Bσ -k,q = B -∗−σ -−k,−q -. This allows for obtaining P -σ -q = P -∗−σ -−q -, and subsequently, purely real flow in -the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into -the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσ -k,q = G -−σ -−k,−q -. -Imaging near-surface flows using mode-coupling analysis 13 -B. SOLA INVERSIONS -Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) aims to obtain a set of weight factors -P -for the mode q and depth zo, which we will call αk,zo. A linear weighted sum of the measurements Bk,q in the fashion -k -αk,zoBk,q allows for an average value of the flow Pq(z) to be estimated at the depth zo. To obtain the coefficients -αk,zo, it is assumed that a set of sensitivity kernels Kk,q(z) for the mode q can be summed up coherently to give an -’averaging kernel’ that is localized at the depth zo. Conventionally, a Gaussian centered at zo and a width ∆ is chosen -which the averaging kernel should resemble after performing inversion. -B.1. Kernels in the integral form -Since the kernels in eq A6 are manifest as coefficients on a basis fj (z), we first derive kernels that can be expressed -as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions: -P ≡ Pq(z), p ≡ Pqj , F ≡ fj (z), B ≡ Bk,q C ≡ Cqj,k and K ≡ Kk,q(z), we write (assume only poloidal flow for -simplicity, the same derivations hold true for toroidal flow as well) -P = F p (B11) -The size of P is thus the same as the length of the radial grid z. -Now, pre-multiply by F -T and integrate over z on both sides (drop the integral notation for compactness), -F -T P = (F -T F)p -p = (F -T F) -−1 F -T P (B12) -Now, substituting eq B12 into the forward problem eq 6, -B = Cp -= (F -T F) -−1F -T CP -= KP (B13) -where -K = (F -T F) -−1F -T C, -i.e., Kk,q(z) = X -j,j0 -h Z -dz fj (z)fj -0 (z) -i−1 -fj -0 (z)Cqj -0 -,k (B14) -B.2. Obtaining the coefficients α -Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at zo -T (z, zo) = 1 -√ -2π∆2 -expz − zo -2∆2 - -. (B15) -This can be achieved by solving the optimization problem -minimize X = -Z -dz -h -T (z, zo) − Θq(z, zo) -i2 -, (B16) -where we introduce the averaging kernel for mode q thus -Θq(z, zo) = X -k -αk,zoKk,q(z). (B17) -As an aside, we note that averaging kernels can similarly be constructed for RLS (see section 3.1) using eqns 13 + Mani et al. +Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006) +ξk ≡ ξnk(z) = ikˆHnk(z)ez + ˆzVnk(z), (A4) +where H and V are real-valued functions; n and n +0 are dropped for compactness of notation. Then the coupling of +two modes ξk and ξk0 (k +0 = k + q), by the flow perturbation operator δL +σ +q +, denoted by coupling integral Λk +k0 (σ), is +given by +Λ +k +k0 (σ) ≡ +Z +dx (δL +σ +q ξk +) · ξ +∗ +k0 = +Z +dx +" +− 2iωρX +j +n +q +2 +fjP +σ +jq +(kˆ · kˆ +0 +H0 +kH∗k0 + V +0 +kV +∗ +k0 ) +− + +k · q f +0 +jP +σ +jq + k · (q×ez) fjT +σ +jq + +(kˆ · kˆ +0 +HkH∗ +k0 + VkV +∗ +k0 ) +o +# +(A5) +We desire to linearly relate the coupling integral in the above equation to the flows P and T, through poloidal and +toroidal sensitivity kernels, Cqj,k and Dqj,k respectively. Hence, they are given by +Cqj,k = +Z +dz ρ hq +2 +fj (kˆ · kˆ +0 +H0 +kH∗k0 + V +0 +kV +∗ +k0 ) +−k · q f +0 +j +(kˆ · kˆ +0 +HkH∗ +k0 + VkV +∗ +k0 ) +i +, +Dqj,k = k · (q×ez) +Z +dz ρ fj (kˆ · kˆ +0 +HkH∗ +k0 + VkV +∗ +k0 ). (A6) +Note the symmetry Cqj,k = C−qj,−k and Dqj,k = D−qj,−k. This coupling integral contributes to the cross-spectral +measurement between modes k and k + q From eq 8 of Woodard (2014), we write the first-order effect of flow on +wavefield cross-correlation as +hφ +ω∗ +k φ +ω+σ +k+q +i = Hω +kk0σΛ +k +k0 (σ), (A7) +where the function H is given by +Hω +kk0σ = −2iω(Nk|R +ω +k +| +2 R +ω+σ +k0 + Nk0 |R +ω+σ +k0 | +2 Rω∗ +k +). (A8) +We absorb the factor −2iω into the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4. +The mode spectral profile R is a Lorentzian, given by +R +ω +k = +1 +ω +2 +nk − ω2 − iωγnk/2 +, (A9) +where ωnk is the resonant frequency of the mode, and γnk is the mode linewidth. Eq A9 can be derived by introducing +mode damping −iωγρ as an operator in the differential equation that governs undamped, driven oscillations (see eq +5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation. +Also, the parity Hω +kk0σ = H +−ω∗ +kk0−σ +and Rω +k = R +−ω∗ +k +are established. Mode normalization N is given by +Nk = +1 +Q +X +Q +k +P +ω +|φ +ω +k +| +2 +P +ω +Rω +k +, (A10) +where the 1 +Q +P +Q +k +on the right-hand-side implies average over all [kx, ky] (Q terms in all) such that k = |k| is constant. +This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ωnk. +Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real. +The three equations A8 through A10, along with the symmetry relation for kernels, and summation over ±ω, serve +to establish the parity Bσ +k,q = B +∗−σ +−k,−q +. This allows for obtaining P +σ +q = P +∗−σ +−q +, and subsequently, purely real flow in +the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into +the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσ +k,q = G +−σ +−k,−q +. +Imaging near-surface flows using mode-coupling analysis 13 +B. SOLA INVERSIONS +Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) aims to obtain a set of weight factors +P +for the mode q and depth zo, which we will call αk,zo. A linear weighted sum of the measurements Bk,q in the fashion +k +αk,zoBk,q allows for an average value of the flow Pq(z) to be estimated at the depth zo. To obtain the coefficients +αk,zo, it is assumed that a set of sensitivity kernels Kk,q(z) for the mode q can be summed up coherently to give an +’averaging kernel’ that is localized at the depth zo. Conventionally, a Gaussian centered at zo and a width ∆ is chosen +which the averaging kernel should resemble after performing inversion. +B.1. Kernels in the integral form +Since the kernels in eq A6 are manifest as coefficients on a basis fj (z), we first derive kernels that can be expressed +as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions: +P ≡ Pq(z), p ≡ Pqj , F ≡ fj (z), B ≡ Bk,q C ≡ Cqj,k and K ≡ Kk,q(z), we write (assume only poloidal flow for +simplicity, the same derivations hold true for toroidal flow as well) +P = F p (B11) +The size of P is thus the same as the length of the radial grid z. +Now, pre-multiply by F +T and integrate over z on both sides (drop the integral notation for compactness), +F +T P = (FT F)p +p = (F +T F)−1 FT P (B12) +Now, substituting eq B12 into the forward problem eq 6, +B = Cp += (F +T F)−1FT CP += KP (B13) +where +K = (F +T F)−1FT C, +i.e., Kk,q(z) = X +j,j0 +h Z +dz fj (z)fj +0 (z) +i−1 +fj +0 (z)Cqj +0 +,k (B14) +B.2. Obtaining the coefficients α +Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at zo +T (z, zo) = 1 +√ +2π∆2 +expz − zo +2∆2 + +. (B15) +This can be achieved by solving the optimization problem +minimize X = +Z +dz +h +T (z, zo) − Θq(z, zo) +i2 +, (B16) +where we introduce the averaging kernel for mode q thus +Θq(z, zo) = X +k +αk,zoKk,q(z). (B17) +As an aside, we note that averaging kernels can similarly be constructed for RLS (see section 3.1) using eqns 13 and B14. -14 Mani et al. -Figure 8. Left: Kernel Kk,q(z) (eq B14) shown vs depth z for the three radial order couplings f-f, p1-p1, and p2-p2. qR = -[−112, −45] and kR = [−853, −157] is chosen for all the radial order couplings for comparison. Right: Averaging kernel -(eq B17) using SOLA, for qR = [−112, −45] at depth z0 = −0.48 Mm, and the corresponding target Gaussian (eq B15). -Integral of the averaging kernel over z is 0.89. -Setting ∂X -∂α → 0 gives us the matrix problem to be solved -A{α} = v, -{α} = -h -A + µIi−1 -v, (B18) -where the square matrix A = -R -dz Kk,q(z)Kk0 -,q(z) and v = -R -dz Kk,q(z)T (z, zo). Here, k -0 -is just a dummy index for -denoting elements in the matrix A, (k -0 -6= k+q). In the last line of eq B18, we introduce regularization using an Identity -matrix I, with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining -α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α -obtained from eq B18 into last line of eq B13, and P -k -on both sides -X -k -αk,zoB -σ -k,q = -X -k -αk,zo -Z -dz Kk,q(z)P -σ -q -(z), -= -Z -dz Θq(z, zo)P -σ -q -(z), -≈ hP -σ -q -(zo)i (B19) -Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Di￾vergence flow can then be obtained from eq 16. Results are shown in Figures 9 and 10. -REFERENCES -Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M. -1990, ApJ, 364, 699, doi: 10.1086/169452 -Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of -Modern Physics, 64, 885, -doi: 10.1103/RevModPhys.64.885 -Birch, A. C., Schunker, H., Braun, D. C., et al. 2016, -Science Advances, 2, e1600557, -doi: 10.1126/sciadv.1600557 -Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019, -A&A, 628, A37, doi: 10.1051/0004-6361/201935591 -B¨oning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., & -Schou, J. 2020, A&A, 635, A181, -doi: 10.1051/0004-6361/201937331 -Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189, -doi: 10.1086/324323 -Christensen-Dalsgaard, J. 2002, Reviews of Modern -Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073 -—. 2021, Living Reviews in Solar Physics, 18, 2, + Mani et al. +Figure 8. Left: Kernel Kk,q(z) (eq B14) shown vs depth z for the three radial order couplings f-f, p1-p1, and p2-p2. qR = +[−112, −45] and kR = [−853, −157] is chosen for all the radial order couplings for comparison. Right: Averaging kernel +(eq B17) using SOLA, for qR = [−112, −45] at depth z0 = −0.48 Mm, and the corresponding target Gaussian (eq B15). +Integral of the averaging kernel over z is 0.89. +Setting ∂X +∂α → 0 gives us the matrix problem to be solved +A{α} = v, +{α} = +h +A + µIi−1v, (B18) +where the square matrix A = +R +dz Kk,q(z)Kk0 +,q(z) and v = +R +dz Kk,q(z)T (z, zo). Here, k +0 +is just a dummy index for +denoting elements in the matrix A, (k +0 +6= k+q). In the last line of eq B18, we introduce regularization using an Identity +matrix I, with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining +α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α +obtained from eq B18 into last line of eq B13, and P +k +on both sides +X +k +αk,zoB +σ +k,q = +X +k +αk,zo +Z +dz Kk,q(z)P +σ +q +(z), += +Z +dz Θq(z, zo)P +σ +q +(z), +≈ hP +σ +q +(zo)i (B19) +Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Divergence flow can then be obtained from eq 16. Results are shown in Figures 9 and 10. +REFERENCES +Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M. +1990, ApJ, 364, 699, doi: 10.1086/169452 +Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of +Modern Physics, 64, 885, +doi: 10.1103/RevModPhys.64.885 +Birch, A. C., Schunker, H., Braun, D. C., et al. 2016, +Science Advances, 2, e1600557, +doi: 10.1126/sciadv.1600557 +Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019, +A&A, 628, A37, doi: 10.1051/0004-6361/201935591 +B¨oning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., & +Schou, J. 2020, A&A, 635, A181, +doi: 10.1051/0004-6361/201937331 +Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189, +doi: 10.1086/324323 +Christensen-Dalsgaard, J. 2002, Reviews of Modern +Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073 +—. 2021, Living Reviews in Solar Physics, 18, 2, doi: 10.1007/s41116-020-00028-3 -Imaging near-surface flows using mode-coupling analysis 15 -Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of qxR and qyR . Right: Corresponding power-spectrum -averaged over the azimuthal angle. Shaded region shows ±1 − σ error around the mean. Power is in units of m2 -/s4 -. -Figure 10. Real-space divergence flows (in units of 10−5 -s -−1 -) for mode-coupling inversion through SOLA using f-f coupling, -and LCT, bandpass filtered around qR = 100. We cut edges out from the flow maps and compare a circular region of diameter -≈175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is -1.05. For demonstration, we show inversions only for poloidal flow using SOLA. -De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh, -192, 351, doi: 10.1023/A:1005269001739 -De Rosa, M. L., & Toomre, J. 2004, ApJ, 616, 1242, -doi: 10.1086/424920 -Duvall, T. L., J., & Harvey, J. W. 1986, in NATO Advanced -Study Institute (ASI) Series C, Vol. 169, Seismology of -the Sun and the Distant Stars, ed. D. O. Gough, 105–116 -Duvall, T. L., J., Jefferies, S. M., Harvey, J. W., & -Pomerantz, M. A. 1993, Nature, 362, 430, -doi: 10.1038/362430a0 -Fisher, G. H., & Welsch, B. T. 2008, in Astronomical -Society of the Pacific Conference Series, Vol. 383, -Subsurface and Atmospheric Influences on Solar Activity, -ed. R. Howe, R. W. Komm, K. S. Balasubramaniam, & -G. J. D. Petrie, 373. https://arxiv.org/abs/0712.4289 -Giles, P. M., Duvall, T. L., Scherrer, P. H., & Bogart, R. S. -1997, Nature, 390, 52, doi: 10.1038/36294 -Gizon, L., & Birch, A. C. 2004, ApJ, 614, 472, -doi: 10.1086/423367 -Gizon, L., Cameron, R. H., Pourabdian, M., et al. 2020, -Science, 368, 1469, doi: 10.1126/science.aaz7119 -Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A, -652, L6, doi: 10.1051/0004-6361/202141462 -Greer, B. J., Hindman, B. W., & Toomre, J. 2016, ApJ, -824, 128, doi: 10.3847/0004-637X/824/2/128 -Hanasoge, S., & Mandal, K. 2019, ApJL, 871, L32, -doi: 10.3847/2041-8213/aaff60 -Hanasoge, S. M., Hotta, H., & Sreenivasan, K. R. 2020, -Science Advances, 6, eaba9639, -doi: 10.1126/sciadv.aba9639 -Hanasoge, S. M., Woodard, M., Antia, H. M., Gizon, L., & -Sreenivasan, K. R. 2017, MNRAS, 470, 1404, -doi: 10.1093/mnras/stx1298 -Hansen, P. C. 1992, SIAM review, 34, 561 -Hanson, C. S., Duvall, T. L., Birch, A. C., Gizon, L., & -Sreenivasan, K. R. 2020, A&A, 644, A103, -doi: 10.1051/0004-6361/202039108 -Hanson, C. S., Hanasoge, S., & Sreenivasan, K. R. 2021, -ApJ, 910, 156, doi: 10.3847/1538-4357/abe770 -Hathaway, D. H., Teil, T., Norton, A. A., & Kitiashvili, I. +Imaging near-surface flows using mode-coupling analysis 15 +Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of qxR and qyR . Right: Corresponding power-spectrum +averaged over the azimuthal angle. Shaded region shows ±1 − σ error around the mean. Power is in units of m2/s4. +Figure 10. Real-space divergence flows (in units of 10−5s +−1 +) for mode-coupling inversion through SOLA using f-f coupling, +and LCT, bandpass filtered around qR = 100. We cut edges out from the flow maps and compare a circular region of diameter +≈175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is +1.05. For demonstration, we show inversions only for poloidal flow using SOLA. +De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh, +192, 351, doi: 10.1023/A:1005269001739 +De Rosa, M. L., & Toomre, J. 2004, ApJ, 616, 1242, +doi: 10.1086/424920 +Duvall, T. L., J., & Harvey, J. W. 1986, in NATO Advanced +Study Institute (ASI) Series C, Vol. 169, Seismology of +the Sun and the Distant Stars, ed. D. O. Gough, 105–116 +Duvall, T. L., J., Jefferies, S. M., Harvey, J. W., & +Pomerantz, M. A. 1993, Nature, 362, 430, +doi: 10.1038/362430a0 +Fisher, G. H., & Welsch, B. T. 2008, in Astronomical +Society of the Pacific Conference Series, Vol. 383, +Subsurface and Atmospheric Influences on Solar Activity, +ed. R. Howe, R. W. Komm, K. S. Balasubramaniam, & +G. J. D. Petrie, 373. https://arxiv.org/abs/0712.4289 +Giles, P. M., Duvall, T. L., Scherrer, P. H., & Bogart, R. S. +1997, Nature, 390, 52, doi: 10.1038/36294 +Gizon, L., & Birch, A. C. 2004, ApJ, 614, 472, +doi: 10.1086/423367 +Gizon, L., Cameron, R. H., Pourabdian, M., et al. 2020, +Science, 368, 1469, doi: 10.1126/science.aaz7119 +Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A, +652, L6, doi: 10.1051/0004-6361/202141462 +Greer, B. J., Hindman, B. W., & Toomre, J. 2016, ApJ, +824, 128, doi: 10.3847/0004-637X/824/2/128 +Hanasoge, S., & Mandal, K. 2019, ApJL, 871, L32, +doi: 10.3847/2041-8213/aaff60 +Hanasoge, S. M., Hotta, H., & Sreenivasan, K. R. 2020, +Science Advances, 6, eaba9639, +doi: 10.1126/sciadv.aba9639 +Hanasoge, S. M., Woodard, M., Antia, H. M., Gizon, L., & +Sreenivasan, K. R. 2017, MNRAS, 470, 1404, +doi: 10.1093/mnras/stx1298 +Hansen, P. C. 1992, SIAM review, 34, 561 +Hanson, C. S., Duvall, T. L., Birch, A. C., Gizon, L., & +Sreenivasan, K. R. 2020, A&A, 644, A103, +doi: 10.1051/0004-6361/202039108 +Hanson, C. S., Hanasoge, S., & Sreenivasan, K. R. 2021, +ApJ, 910, 156, doi: 10.3847/1538-4357/abe770 +Hathaway, D. H., Teil, T., Norton, A. A., & Kitiashvili, I. 2015, ApJ, 811, 105, doi: 10.1088/0004-637X/811/2/105 -16 Mani et al. -Hathaway, D. H., Upton, L., & Colegrove, O. 2013, Science, -342, 1217, doi: 10.1126/science.1244682 -Hill, F. 1988, ApJ, 333, 996, doi: 10.1086/166807 -Kashyap, S. G., Das, S. B., Hanasoge, S. M., Woodard, -M. F., & Tromp, J. 2021, ApJS, 253, 47, -doi: 10.3847/1538-4365/abdf5e -Korda, D., & Svanda, M. 2021, A&A, 646, A184, ˇ -doi: 10.1051/0004-6361/202039928 -Langfellner, J., Birch, A. C., & Gizon, L. 2018, A&A, 617, -A97, doi: 10.1051/0004-6361/201732471 -Langfellner, J., Gizon, L., & Birch, A. C. 2015, A&A, 581, -A67, doi: 10.1051/0004-6361/201526024 -Lavely, E. M., & Ritzwoller, M. H. 1992, Philosophical -Transactions of the Royal Society of London Series A, -339, 431, doi: 10.1098/rsta.1992.0048 -Lindsey, C., & Braun, D. C. 2000, SoPh, 192, 261, -doi: 10.1023/A:1005227200911 -L¨optien, B., Birch, A. C., Duvall, T. L., Gizon, L., & -Schou, J. 2016, A&A, 587, A9, -doi: 10.1051/0004-6361/201526805 -L¨optien, B., Gizon, L., Birch, A. C., et al. 2018, Nature -Astronomy, 2, 568, doi: 10.1038/s41550-018-0460-x -Mandal, K., & Hanasoge, S. 2020, ApJ, 891, 125, -doi: 10.3847/1538-4357/ab7227 -Mandal, K., Hanasoge, S. M., & Gizon, L. 2021, A&A, 652, -A96, doi: 10.1051/0004-6361/202141044 -Mani, P., & Hanasoge, S. 2020, ApJ, 901, 139, -doi: 10.3847/1538-4357/abb133 -—. 2021, ApJ, 920, 36, doi: 10.3847/1538-4357/ac1ad6 -November, L. J., & Simon, G. W. 1988, ApJ, 333, 427, -doi: 10.1086/166758 -Pijpers, F. P., & Thompson, M. J. 1994, A&A, 281, 231 -Rieutord, M., Roudier, T., Ludwig, H. G., Nordlund, ˚A., & -Stein, R. 2001, A&A, 377, L14, -doi: 10.1051/0004-6361:20011160 -Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar -Physics, 15, 6, doi: 10.1007/s41116-018-0013-5 -Rincon, F., Roudier, T., Schekochihin, A. A., & Rieutord, -M. 2017, A&A, 599, A69, -doi: 10.1051/0004-6361/201629747 -Schad, A., & Roth, M. 2020, ApJ, 890, 32, -doi: 10.3847/1538-4357/ab65ec -Scherrer, P. H., Schou, J., Bush, R. I., et al. 2012, SoPh, -275, 207, doi: 10.1007/s11207-011-9834-2 -Schou, J., Antia, H. M., Basu, S., et al. 1998, ApJ, 505, -390, doi: 10.1086/306146 -Sekii, T. 1997, in Sounding Solar and Stellar Interiors, ed. -J. Provost & F.-X. Schmider, Vol. 181, ISBN0792348389 -Sekii, T., Kosovichev, A. G., Zhao, J., et al. 2007, PASJ, -59, S637, doi: 10.1093/pasj/59.sp3.S637 -Snodgrass, H. B. 1984, SoPh, 94, 13, -doi: 10.1007/BF00154804 -Thompson, M. J., Toomre, J., Anderson, E. R., et al. 1996, -Science, 272, 1300, doi: 10.1126/science.272.5266.1300 -Unno, W., Osaki, Y., Ando, H., Saio, H., & Shibahashi, H. -1989, Nonradial oscillations of stars -Verma, M., Steffen, M., & Denker, C. 2013, A&A, 555, -A136, doi: 10.1051/0004-6361/201321628 -Vorontsov, S. V. 2011, MNRAS, 418, 1146, -doi: 10.1111/j.1365-2966.2011.19564.x -Woodard, M. 2014, SoPh, 289, 1085, -doi: 10.1007/s11207-013-0386-5 -Woodard, M., Schou, J., Birch, A. C., & Larson, T. P. -2013, SoPh, 287, 129, doi: 10.1007/s11207-012-0075-9 -Woodard, M. F. 1989, ApJ, 347, 1176, doi: 10.1086/168206 -—. 2006, ApJ, 649, 1140, doi: 10.1086/506927 -—. 2007, ApJ, 668, 1189, doi: 10.1086/521391 -—. 2016, MNRAS, 460, 3292, doi: 10.1093/mnras/stw1223 -Zhao, J., Georgobiani, D., Kosovichev, A. G., et al. 2007, -ApJ, 659, 848, doi: 10.1086/512009 -Zhao, J., Nagashima, K., Bogart, R. S., Kosovichev, A. G., -& Duvall, T. L., J. 2012, ApJL, 749, L5, -doi: 10.1088/2041-8205/749/1/L5 + Mani et al. +Hathaway, D. H., Upton, L., & Colegrove, O. 2013, Science, +342, 1217, doi: 10.1126/science.1244682 +Hill, F. 1988, ApJ, 333, 996, doi: 10.1086/166807 +Kashyap, S. G., Das, S. B., Hanasoge, S. M., Woodard, +M. F., & Tromp, J. 2021, ApJS, 253, 47, +doi: 10.3847/1538-4365/abdf5e +Korda, D., & Svanda, M. 2021, A&A, 646, A184, ˇ +doi: 10.1051/0004-6361/202039928 +Langfellner, J., Birch, A. C., & Gizon, L. 2018, A&A, 617, +A97, doi: 10.1051/0004-6361/201732471 +Langfellner, J., Gizon, L., & Birch, A. C. 2015, A&A, 581, +A67, doi: 10.1051/0004-6361/201526024 +Lavely, E. M., & Ritzwoller, M. H. 1992, Philosophical +Transactions of the Royal Society of London Series A, +339, 431, doi: 10.1098/rsta.1992.0048 +Lindsey, C., & Braun, D. C. 2000, SoPh, 192, 261, +doi: 10.1023/A:1005227200911 +L¨optien, B., Birch, A. C., Duvall, T. L., Gizon, L., & +Schou, J. 2016, A&A, 587, A9, +doi: 10.1051/0004-6361/201526805 +L¨optien, B., Gizon, L., Birch, A. C., et al. 2018, Nature +Astronomy, 2, 568, doi: 10.1038/s41550-018-0460-x +Mandal, K., & Hanasoge, S. 2020, ApJ, 891, 125, +doi: 10.3847/1538-4357/ab7227 +Mandal, K., Hanasoge, S. M., & Gizon, L. 2021, A&A, 652, +A96, doi: 10.1051/0004-6361/202141044 +Mani, P., & Hanasoge, S. 2020, ApJ, 901, 139, +doi: 10.3847/1538-4357/abb133 +—. 2021, ApJ, 920, 36, doi: 10.3847/1538-4357/ac1ad6 +November, L. J., & Simon, G. W. 1988, ApJ, 333, 427, +doi: 10.1086/166758 +Pijpers, F. P., & Thompson, M. J. 1994, A&A, 281, 231 +Rieutord, M., Roudier, T., Ludwig, H. G., Nordlund, ˚A., & +Stein, R. 2001, A&A, 377, L14, +doi: 10.1051/0004-6361:20011160 +Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar +Physics, 15, 6, doi: 10.1007/s41116-018-0013-5 +Rincon, F., Roudier, T., Schekochihin, A. A., & Rieutord, +M. 2017, A&A, 599, A69, +doi: 10.1051/0004-6361/201629747 +Schad, A., & Roth, M. 2020, ApJ, 890, 32, +doi: 10.3847/1538-4357/ab65ec +Scherrer, P. H., Schou, J., Bush, R. I., et al. 2012, SoPh, +275, 207, doi: 10.1007/s11207-011-9834-2 +Schou, J., Antia, H. M., Basu, S., et al. 1998, ApJ, 505, +390, doi: 10.1086/306146 +Sekii, T. 1997, in Sounding Solar and Stellar Interiors, ed. +J. Provost & F.-X. Schmider, Vol. 181, ISBN0792348389 +Sekii, T., Kosovichev, A. G., Zhao, J., et al. 2007, PASJ, +59, S637, doi: 10.1093/pasj/59.sp3.S637 +Snodgrass, H. B. 1984, SoPh, 94, 13, +doi: 10.1007/BF00154804 +Thompson, M. J., Toomre, J., Anderson, E. R., et al. 1996, +Science, 272, 1300, doi: 10.1126/science.272.5266.1300 +Unno, W., Osaki, Y., Ando, H., Saio, H., & Shibahashi, H. +1989, Nonradial oscillations of stars +Verma, M., Steffen, M., & Denker, C. 2013, A&A, 555, +A136, doi: 10.1051/0004-6361/201321628 +Vorontsov, S. V. 2011, MNRAS, 418, 1146, +doi: 10.1111/j.1365-2966.2011.19564.x +Woodard, M. 2014, SoPh, 289, 1085, +doi: 10.1007/s11207-013-0386-5 +Woodard, M., Schou, J., Birch, A. C., & Larson, T. P. +2013, SoPh, 287, 129, doi: 10.1007/s11207-012-0075-9 +Woodard, M. F. 1989, ApJ, 347, 1176, doi: 10.1086/168206 +—. 2006, ApJ, 649, 1140, doi: 10.1086/506927 +—. 2007, ApJ, 668, 1189, doi: 10.1086/521391 +—. 2016, MNRAS, 460, 3292, doi: 10.1093/mnras/stw1223 +Zhao, J., Georgobiani, D., Kosovichev, A. G., et al. 2007, +ApJ, 659, 848, doi: 10.1086/512009 +Zhao, J., Nagashima, K., Bogart, R. S., Kosovichev, A. G., +& Duvall, T. L., J. 2012, ApJL, 749, L5, +doi: 10.1088/2041-8205/749/1/L5 \ No newline at end of file diff --git a/read/results/pdfium/2201.00200.txt b/read/results/pdfium/2201.00200.txt index 2074c3d..d1a20de 100644 --- a/read/results/pdfium/2201.00200.txt +++ b/read/results/pdfium/2201.00200.txt @@ -1,536 +1,527 @@ -Astronomy & Astrophysics manuscript no. solar˙model˙v10˙corrected © ESO 2022 -January 4, 2022 -Local heating due to convective overshooting and the solar -modelling problem -I. Baraffe -1,2 -, T. Constantino1 -, J. Clarke1 -, A. Le Saux1,2 -, T. Goffrey4 -, T. Guillet1 -, J. Pratt3 -, D. G. Vlaykov1 -1 University of Exeter, Physics and Astronomy, EX4 4QL Exeter, UK (e-mail: i.baraffe@ex.ac.uk) -2 Ecole Normale Sup ´ erieure, Lyon, CRAL (UMR CNRS 5574), Universit ´ e de Lyon, France ´ -3 Department of Physics and Astronomy, Georgia State University, Atlanta GA 30303, USA -4 Centre for Fusion, Space and Astrophysics, Department of Physics, University of Warwick, Coventry, CV4 7AL, UK -ABSTRACT -Recent hydrodynamical simulations of convection in a solar-like model suggest that penetrative convective flows at the boundary -of the convective envelope modify the thermal background in the overshooting layer. Based on these results, we implement in one￾dimensional stellar evolution codes a simple prescription to modify the temperature gradient below the convective boundary of a -solar model. This simple prescription qualitatively reproduces the behaviour found in the hydrodynamical simulations, namely a -local heating and smoothing of the temperature gradient below the convective boundary. We show that introducing local heating in -the overshooting layer can reduce the sound-speed discrepancy usually reported between solar models and the structure of the Sun -inferred from helioseismology. It also affects key quantities in the convective envelope, such as the density, the entropy, and the -speed of sound. These effects could help reduce the discrepancies between solar models and observed constraints based on seismic -inversions of the Ledoux discriminant. Since mixing due to overshooting and local heating are the result of the same convective -penetration process, the goal of this work is to invite solar modellers to consider both processes for a more consistent approach. -Key words. Convection – Hydrodynamics – Stars: evolution – Sun: evolution - helioseismology - interior -1. Introduction -Modelling the internal structure of the Sun is still a challenge. -A recent review by Christensen-Dalsgaard (2021) describes in -detail the long-standing efforts to improve solar models. The so￾lar modelling problem refers to the discrepancy between helio￾seismology and solar interior models that adopt low metallici￾ties predicted by the three-dimensional (3D) atmosphere models -of, for example, Asplund et al. (2009) and Caffau et al. (2011), -in contrast to the high metallicities based on previous litera￾ture compilations by, for example, Anders & Grevesse (1989) -and Grevesse & Noels (1993). Asplund et al. (2021) have re￾cently confirmed with state-of-the-art 3D simulations the rela￾tively low metal abundances for the Sun. Asplund et al. (2021) -consider that their study yields the most reliable solar abun￾dances available today, suggesting that the solar modelling prob￾lem is no longer a problem of abundances but rather a problem -of stellar physics. The treatment of mixing below the convective -zone is one of the key processes that could improve solar mod￾els. Several studies indeed reveal that the process of convective -penetration, also called overshooting, at the bottom of the con￾vective envelope could play an important role in improving the -agreement between solar models and helioseismic constraints -(see for example Christensen-Dalsgaard et al. 2011; Zhang et al. -2012; Buldgen et al. 2019b). Overshooting in solar models has -most often been treated using diffusive or instantaneous chemi￾cal mixing. A temperature gradient that sharply transitions from -a nearly adiabatic form to a radiative form is usually assumed, -as suggested by the theoretical work of Zahn (1991). Models -with a smoother transition have also been investigated. Based -on the analysis of models with different stratifications near the -Send offprint requests to: I. Baraffe -base of the convective zone, Christensen-Dalsgaard et al. (2011) -found that models that better fit the helioseismic data have a -weakly sub-adiabatic temperature gradient in the lower part of -the convective zone and a smooth transition to the radiative gra￾dient in the overshooting layer. But Christensen-Dalsgaard et al. -(2011) noted that the required temperature stratification is diffi￾cult to reconcile with existing overshooting models and numer￾ical simulations. They concluded that only non-local turbulent -convection models could produce the desired degree of smooth￾ness in the transition (see for example Zhang & Li 2012; Zhang -et al. 2012). But these non-local models remain uncertain, and -their description of overshooting under the conditions found at -the base of the solar convective zone is yet to be validated. -Zhang et al. (2019) explored the impact of overshooting by -introducing a parametrised turbulent kinetic energy flux based -on a model with parameters that are adjusted to improve the -helioseismic properties. They suggest that amelioration can be -obtained specifically below the convective envelope. However, -Zhang et al. (2019) find that this model cannot solve the whole -solar problem because such a flux worsens the sound-speed pro￾file in the deep radiative interior of their solar model. Given the -uncertainties regarding the temperature stratification of the over￾shooting region, solar modellers have considered these effects as -secondary and have focused their efforts on exploring the impact -of solar abundances, microphysics (opacities, equations of state, -nuclear reaction rates), and chemical mixing and diffusion (see -details and references in the review of Buldgen et al. 2019a). -Additional, more exotic effects such as early disk accretion or -solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot -2021) are also attracting increasing attention. -To reinvigorate the debate, Buldgen et al. (2019b) recently -highlighted once again how the transition of the temperature gra￾1 +Astronomy & Astrophysics manuscript no. solar˙model˙v10˙corrected © ESO 2022 +January 4, 2022 +Local heating due to convective overshooting and the solar +modelling problem +I. Baraffe +1,2 +, T. Constantino1, J. Clarke1, A. Le Saux1,2, T. Goffrey4, T. Guillet1, J. Pratt3, D. G. Vlaykov1 +1 University of Exeter, Physics and Astronomy, EX4 4QL Exeter, UK (e-mail: i.baraffe@ex.ac.uk) +2 Ecole Normale Sup ´ erieure, Lyon, CRAL (UMR CNRS 5574), Universit ´ e de Lyon, France ´ +3 Department of Physics and Astronomy, Georgia State University, Atlanta GA 30303, USA +4 Centre for Fusion, Space and Astrophysics, Department of Physics, University of Warwick, Coventry, CV4 7AL, UK +ABSTRACT +Recent hydrodynamical simulations of convection in a solar-like model suggest that penetrative convective flows at the boundary +of the convective envelope modify the thermal background in the overshooting layer. Based on these results, we implement in onedimensional stellar evolution codes a simple prescription to modify the temperature gradient below the convective boundary of a +solar model. This simple prescription qualitatively reproduces the behaviour found in the hydrodynamical simulations, namely a +local heating and smoothing of the temperature gradient below the convective boundary. We show that introducing local heating in +the overshooting layer can reduce the sound-speed discrepancy usually reported between solar models and the structure of the Sun +inferred from helioseismology. It also affects key quantities in the convective envelope, such as the density, the entropy, and the +speed of sound. These effects could help reduce the discrepancies between solar models and observed constraints based on seismic +inversions of the Ledoux discriminant. Since mixing due to overshooting and local heating are the result of the same convective +penetration process, the goal of this work is to invite solar modellers to consider both processes for a more consistent approach. +Key words. Convection – Hydrodynamics – Stars: evolution – Sun: evolution - helioseismology - interior +1. Introduction +Modelling the internal structure of the Sun is still a challenge. +A recent review by Christensen-Dalsgaard (2021) describes in +detail the long-standing efforts to improve solar models. The solar modelling problem refers to the discrepancy between helioseismology and solar interior models that adopt low metallicities predicted by the three-dimensional (3D) atmosphere models +of, for example, Asplund et al. (2009) and Caffau et al. (2011), +in contrast to the high metallicities based on previous literature compilations by, for example, Anders & Grevesse (1989) +and Grevesse & Noels (1993). Asplund et al. (2021) have recently confirmed with state-of-the-art 3D simulations the relatively low metal abundances for the Sun. Asplund et al. (2021) +consider that their study yields the most reliable solar abundances available today, suggesting that the solar modelling problem is no longer a problem of abundances but rather a problem +of stellar physics. The treatment of mixing below the convective +zone is one of the key processes that could improve solar models. Several studies indeed reveal that the process of convective +penetration, also called overshooting, at the bottom of the convective envelope could play an important role in improving the +agreement between solar models and helioseismic constraints +(see for example Christensen-Dalsgaard et al. 2011; Zhang et al. +2012; Buldgen et al. 2019b). Overshooting in solar models has +most often been treated using diffusive or instantaneous chemical mixing. A temperature gradient that sharply transitions from +a nearly adiabatic form to a radiative form is usually assumed, +as suggested by the theoretical work of Zahn (1991). Models +with a smoother transition have also been investigated. Based +on the analysis of models with different stratifications near the +Send offprint requests to: I. Baraffe +base of the convective zone, Christensen-Dalsgaard et al. (2011) +found that models that better fit the helioseismic data have a +weakly sub-adiabatic temperature gradient in the lower part of +the convective zone and a smooth transition to the radiative gradient in the overshooting layer. But Christensen-Dalsgaard et al. +(2011) noted that the required temperature stratification is difficult to reconcile with existing overshooting models and numerical simulations. They concluded that only non-local turbulent +convection models could produce the desired degree of smoothness in the transition (see for example Zhang & Li 2012; Zhang +et al. 2012). But these non-local models remain uncertain, and +their description of overshooting under the conditions found at +the base of the solar convective zone is yet to be validated. +Zhang et al. (2019) explored the impact of overshooting by +introducing a parametrised turbulent kinetic energy flux based +on a model with parameters that are adjusted to improve the +helioseismic properties. They suggest that amelioration can be +obtained specifically below the convective envelope. However, +Zhang et al. (2019) find that this model cannot solve the whole +solar problem because such a flux worsens the sound-speed profile in the deep radiative interior of their solar model. Given the +uncertainties regarding the temperature stratification of the overshooting region, solar modellers have considered these effects as +secondary and have focused their efforts on exploring the impact +of solar abundances, microphysics (opacities, equations of state, +nuclear reaction rates), and chemical mixing and diffusion (see +details and references in the review of Buldgen et al. 2019a). +Additional, more exotic effects such as early disk accretion or +solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot +2021) are also attracting increasing attention. +To reinvigorate the debate, Buldgen et al. (2019b) recently +highlighted once again how the transition of the temperature gra1 arXiv:2201.00200v1 [astro-ph.SR] 1 Jan 2022 -Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem -dient just below the convective envelope can significantly impact -the disagreement between solar models and helioseismic con￾straints. Their results, based on a method that combines multi￾ple structural inversions, suggest that the transition in temper￾ature gradient is improperly reproduced by adopting either an -adiabatic or a radiative temperature gradient in the overshoot￾ing layer. The solution should be somewhere in between these -two extremes. Christensen-Dalsgaard et al. (2018) also note that -an increase in the temperature at the transition would remove -a remaining small sharp dip in the speed of sound immediately -beneath the convective zone of the model. A major difficulty is -to disentangle the effects of overshoot from the effects of opaci￾ties, which can also alter the temperature gradient in these layers. -Given the large number of parameters to deal with in order to im￾prove solar models and the current lack of strong arguments in -favour of modifying the thermal stratification in the overshoot￾ing layer, there has been no real motivation to deviate from the -traditional picture of a sharp transition as formalised by Zahn -(1991). -The present work is motivated by arguments inspired by hy￾drodynamical simulations of convection and convective penetra￾tion in solar-like models. Recent hydrodynamical simulations by -Baraffe et al. (2021, hereafter B21) highlight the process of local -heating in the overshooting region due to penetrating convective -motions across the convective boundary. In the following, we -analyse the potential impact of this feature on one-dimensional -(1D) stellar evolution structures in the context of solar models. -The hydrodynamical results of B21 are briefly summarised in -Sect. 2, and their impact on 1D models are analysed in Sect. 3 -and discussed in Sect. 4. -2. Modification of the thermal background in the -overshooting layer: Results from -two-dimensional hydrodynamical simulations -B21 performed two-dimensional (2D) fully compressible time￾implicit simulations of convection and convective penetration in -a solar-like model with the MUlti-dimensional Stellar Implicit -Code MUSIC (Viallet et al. 2011, 2016; Goffrey et al. 2017). -The main motivation was to explore the impact of an artificial -increase in the stellar luminosity on the properties of convection -and convective penetration. This procedure is a common tactic -adopted in hydrodynamical simulations of convection (Rogers -et al. 2006; Meakin & Arnett 2007; Brun et al. 2011; Hotta 2017; -Edelmann et al. 2019). The experiments of B21 highlight the im￾pact of penetrative downflows on the local thermal background -in the overshooting layer. They illustrate how convective down￾flows, when penetrating the region below the convective bound￾ary of the envelope, can induce a local heating and a modification -of the temperature gradient as a result of compression and shear -in the overshooting layer. This modification of the local back￾ground is connected to a local increase in the radiative flux to -counterbalance the negative enthalpy flux (or heat flux) produced -by penetrating flows. The negative peak of the enthalpy flux -and the positive bump of the radiative flux below the convective -boundary are well-known features described in many numeri￾cal works (Hurlburt et al. 1986; Muthsam et al. 1995; Brummell -et al. 2002; Brun et al. 2011; Hotta 2017; Kapyl ¨ a 2019; Cai ¨ -2020). A few works (Rogers et al. 2006; Viallet et al. 2013; Korre -et al. 2019; Higl et al. 2021) have also reported a modification -of the local thermal background in the overshooting region, but -without providing a detailed description. The simulations of B21 -provide a physical explanation that links the convective penetra￾tion process to the local heating and to the radiative bump in the -overshooting layer. The solar-like star simulated in B21 is based -on a model that is not thermally relaxed. It is reasonable to as￾sume that the local heating seen in B21 is present in stars because -the negative heat flux in the overshooting layer and the bump in -the radiative flux that compensates for this feature are persistent. -These two features are also commonly observed in other hydro￾dynamical simulations, as mentioned above. An exploration of -the impact of this heating on stellar evolution models may reveal -that heating is a necessary aspect of models for overshooting. -Fig. 1. Radial profile of the temperature departure ∆T/T0 from -the initial profile T0 and of the sub-adiabaticity (∇−∇ad) close to -the convective boundary predicted by 2D hydrodynamical simu￾lations (B21) of solar-like models. The lower panel corresponds -to the model with a realistic stellar luminosity and the upper -panel to a model with luminosity enhanced by a factor of ten. -The dash-dotted red lines show ∆T/T0 (in %), the relative dif￾ference between the time and space averages of the temperature, -T, and the initial temperature, T0. The solid blue lines show the -time and space averages of the sub-adiabaticity (∇ − ∇ad). The -dashed black lines show the initial profile of the sub-adiabaticity, -(∇−∇ad)init. The convective boundary is indicated by the vertical -solid line (see details in B21) -The behaviour of the thermal profile below the convective -boundary found in the simulations of B21 is illustrated in Fig. -1. It is displayed for the model with a realistic stellar luminosity -(lower panel). We also show the results for a model with an artifi￾cial enhancement in the luminosity by a factor of ten because the -features are intensified in these ‘boosted’ models (upper panel). -The figure shows the local heating in the overshooting layer and -its impact on the sub-adiabaticity (∇ − ∇ad), with ∇ = -d log T -d log P -the +Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem +dient just below the convective envelope can significantly impact +the disagreement between solar models and helioseismic constraints. Their results, based on a method that combines multiple structural inversions, suggest that the transition in temperature gradient is improperly reproduced by adopting either an +adiabatic or a radiative temperature gradient in the overshooting layer. The solution should be somewhere in between these +two extremes. Christensen-Dalsgaard et al. (2018) also note that +an increase in the temperature at the transition would remove +a remaining small sharp dip in the speed of sound immediately +beneath the convective zone of the model. A major difficulty is +to disentangle the effects of overshoot from the effects of opacities, which can also alter the temperature gradient in these layers. +Given the large number of parameters to deal with in order to improve solar models and the current lack of strong arguments in +favour of modifying the thermal stratification in the overshooting layer, there has been no real motivation to deviate from the +traditional picture of a sharp transition as formalised by Zahn +(1991). +The present work is motivated by arguments inspired by hydrodynamical simulations of convection and convective penetration in solar-like models. Recent hydrodynamical simulations by +Baraffe et al. (2021, hereafter B21) highlight the process of local +heating in the overshooting region due to penetrating convective +motions across the convective boundary. In the following, we +analyse the potential impact of this feature on one-dimensional +(1D) stellar evolution structures in the context of solar models. +The hydrodynamical results of B21 are briefly summarised in +Sect. 2, and their impact on 1D models are analysed in Sect. 3 +and discussed in Sect. 4. +2. Modification of the thermal background in the +overshooting layer: Results from +two-dimensional hydrodynamical simulations +B21 performed two-dimensional (2D) fully compressible timeimplicit simulations of convection and convective penetration in +a solar-like model with the MUlti-dimensional Stellar Implicit +Code MUSIC (Viallet et al. 2011, 2016; Goffrey et al. 2017). +The main motivation was to explore the impact of an artificial +increase in the stellar luminosity on the properties of convection +and convective penetration. This procedure is a common tactic +adopted in hydrodynamical simulations of convection (Rogers +et al. 2006; Meakin & Arnett 2007; Brun et al. 2011; Hotta 2017; +Edelmann et al. 2019). The experiments of B21 highlight the impact of penetrative downflows on the local thermal background +in the overshooting layer. They illustrate how convective downflows, when penetrating the region below the convective boundary of the envelope, can induce a local heating and a modification +of the temperature gradient as a result of compression and shear +in the overshooting layer. This modification of the local background is connected to a local increase in the radiative flux to +counterbalance the negative enthalpy flux (or heat flux) produced +by penetrating flows. The negative peak of the enthalpy flux +and the positive bump of the radiative flux below the convective +boundary are well-known features described in many numerical works (Hurlburt et al. 1986; Muthsam et al. 1995; Brummell +et al. 2002; Brun et al. 2011; Hotta 2017; Kapyl ¨ a 2019; Cai ¨ +2020). A few works (Rogers et al. 2006; Viallet et al. 2013; Korre +et al. 2019; Higl et al. 2021) have also reported a modification +of the local thermal background in the overshooting region, but +without providing a detailed description. The simulations of B21 +provide a physical explanation that links the convective penetration process to the local heating and to the radiative bump in the +overshooting layer. The solar-like star simulated in B21 is based +on a model that is not thermally relaxed. It is reasonable to assume that the local heating seen in B21 is present in stars because +the negative heat flux in the overshooting layer and the bump in +the radiative flux that compensates for this feature are persistent. +These two features are also commonly observed in other hydrodynamical simulations, as mentioned above. An exploration of +the impact of this heating on stellar evolution models may reveal +that heating is a necessary aspect of models for overshooting. +Fig. 1. Radial profile of the temperature departure ∆T/T0 from +the initial profile T0 and of the sub-adiabaticity (∇−∇ad) close to +the convective boundary predicted by 2D hydrodynamical simulations (B21) of solar-like models. The lower panel corresponds +to the model with a realistic stellar luminosity and the upper +panel to a model with luminosity enhanced by a factor of ten. +The dash-dotted red lines show ∆T/T0 (in %), the relative difference between the time and space averages of the temperature, +T, and the initial temperature, T0. The solid blue lines show the +time and space averages of the sub-adiabaticity (∇ − ∇ad). The +dashed black lines show the initial profile of the sub-adiabaticity, +(∇−∇ad)init. The convective boundary is indicated by the vertical +solid line (see details in B21) +The behaviour of the thermal profile below the convective +boundary found in the simulations of B21 is illustrated in Fig. +1. It is displayed for the model with a realistic stellar luminosity +(lower panel). We also show the results for a model with an artificial enhancement in the luminosity by a factor of ten because the +features are intensified in these ‘boosted’ models (upper panel). +The figure shows the local heating in the overshooting layer and +its impact on the sub-adiabaticity (∇ − ∇ad), with ∇ = +d log T +d log P +the + +Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem +temperature gradient and ∇ad = +d log T +d log P +|S the adiabatic gradient. +The initial stratification below the convective boundary (located +at r = 0.6734 × Rstar for this specific stellar model) is set by +the stable radiative gradient, ∇rad (see the dashed black line below the convective boundary in Fig. 1). B21 show that, as a result of the local heating below the convective boundary characterised by the bump in temperature difference ∆T/T0 displayed +in Fig. 1, the temperature gradient becomes less sub-adiabatic +immediately below the convective boundary1. The net result is +a smoother transition just below the convective boundary with +a temperature gradient that has an intermediate value between +the radiative temperature gradient and the adiabatic one. In the +next section we analyse the impact of this local heating on 1D +solar structures by adopting a simple prescription that mimics +the behaviour of the temperature gradient suggested by hydrodynamical simulations. +3. Impact on one-dimensional solar structure +models +3.1. Helioseismic constraints +Our primary goal in this short paper is to illustrate the potential, +qualitative impact of the local heating produced by overshooting. We adopted a strategy inspired by the analysis of Buldgen +et al. (2020), who constructed a static structure of the Sun in +agreement with seismic inversions of the Ledoux discriminant +defined by +A = +1 +Γ1 +d ln P +d ln r +− +d ln ρ +d ln r +, (1) +with Γ1 = (∂ ln P/∂ ln ρ)ad. Starting from a reference evolutionary model, Buldgen et al. (2020) used an inversion procedure to iteratively reconstruct a solar model. Successive inversions of the Ledoux discriminant allowed them to obtain a +model-independent profile for this quantity. Their reconstruction +method also gives solar structures that are in excellent agreement with other structural inversions, namely the entropy, S , the +square of the speed of sound, c 2 -Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem -temperature gradient and ∇ad = -d log T -d log P -|S the adiabatic gradient. -The initial stratification below the convective boundary (located -at r = 0.6734 × Rstar for this specific stellar model) is set by -the stable radiative gradient, ∇rad (see the dashed black line be￾low the convective boundary in Fig. 1). B21 show that, as a re￾sult of the local heating below the convective boundary charac￾terised by the bump in temperature difference ∆T/T0 displayed -in Fig. 1, the temperature gradient becomes less sub-adiabatic -immediately below the convective boundary1 -. The net result is -a smoother transition just below the convective boundary with -a temperature gradient that has an intermediate value between -the radiative temperature gradient and the adiabatic one. In the -next section we analyse the impact of this local heating on 1D -solar structures by adopting a simple prescription that mimics -the behaviour of the temperature gradient suggested by hydro￾dynamical simulations. -3. Impact on one-dimensional solar structure -models -3.1. Helioseismic constraints -Our primary goal in this short paper is to illustrate the potential, -qualitative impact of the local heating produced by overshoot￾ing. We adopted a strategy inspired by the analysis of Buldgen -et al. (2020), who constructed a static structure of the Sun in -agreement with seismic inversions of the Ledoux discriminant -defined by -A = -1 -Γ1 -d ln P -d ln r -− -d ln ρ -d ln r -, (1) -with Γ1 = (∂ ln P/∂ ln ρ)ad. Starting from a reference evolu￾tionary model, Buldgen et al. (2020) used an inversion pro￾cedure to iteratively reconstruct a solar model. Successive in￾versions of the Ledoux discriminant allowed them to obtain a -model-independent profile for this quantity. Their reconstruction -method also gives solar structures that are in excellent agree￾ment with other structural inversions, namely the entropy, S , the -square of the speed of sound, c -2 -s -, and the density, ρ. To illustrate -the convergence of their reconstruction procedure, they show -(right panels of their Figs. 3-6) the successive iterations that con￾verge to an excellent level of agreement for the four structural -inversions (A, S , c -2 -s -, ρ) starting from the initial reference model -adopted in their work. The differences found between the recon￾structed model and the reference model are useful as they indi￾cate the modifications of the reference model that are required to -converge towards a solar model in agreement with helioseismic -data. We recall here the major trends found by Buldgen et al. -(2020) for the four structural quantities, which are used for our -analysis in Sect. 3.2. -The first concerns the Ledoux discriminant. The major dis￾crepancy between the Sun and the reference model occurs just -below the convective boundary, with a large positive bump for -the quantity (ASun - Aref). -The second concerns the speed of sound. The same positive -bump at the same location as for the Ledoux discriminant, A, is -observed for the quantity (c -2 -s,Sun − c -2 -s,ref)/c -2 -s,ref. The corrections -applied to A during the reconstruction procedure also reduce the -discrepancy in the speed of sound in the radiative region. -The third concerns the entropy. Large discrepancies are ob￾served in both the radiative region and the convective zone. The -1 Less sub-adiabatic means that |∇ − ∇ad| decreases compared to the -initial profile. -entropy discrepancy (S Sun − S ref)/S ref has two positive peaks in -the radiative zone, one just below the overshooting region and a -larger peak deeper at ∼ 40% of the stellar radius. This discrep￾ancy is negative in the convective zone. The corrections applied -to A help reduce these entropy discrepancies in both regions. -The fourth concerns the density. The quantity (ρSun − -ρref)/ρref has a negative peak in the radiative region, at ∼ 35% -of the stellar radius, and is positive in the convective zone. -Importantly, Buldgen et al. (2020) mention that their recon￾struction procedure gives similar Ledoux discriminant profiles -for a wide range of initial reference models. We used these re￾sults to gauge whether the modifications of the thermal profile -predicted by B21 can help in qualitatively improving all the -structural quantities used by Buldgen et al. (2020). -3.2. Testing one-dimensional solar models -Our main motivation is to show the potential impact of the local -heating described in Sect. 2 on stellar models. We are not aim￾ing in this short work at constructing the best solar model to fit -helioseismic constraints. Using stellar evolution codes, we have -adopted two different methods that can be found in the litera￾ture to construct solar models (e.g. Zhang et al. 2012; Vinyoles -et al. 2017). Our first method relies on the thermal relaxation -of a reference model with solar radius and luminosity that is -modified to reproduce the temperature gradient in the overshoot￾ing layer suggested by hydrodynamical simulations. In this case, -the chemical abundances are not modified by nuclear reactions, -mixing, or microscopic diffusion during the relaxation process. -For these tests, we used the 1D Lyon stellar evolution code -(Baraffe et al. 1998). We repeated this experiment based on ther￾mal relaxation with the stellar evolution code MONSTAR (e.g. -Constantino et al. 2014) and obtained the same qualitative re￾sults. -The second method considers models that account for the -modification of the temperature gradient in the overshooting -layer from the zero age main sequence (ZAMS). The models -are then evolved until they reach the solar radius and luminosity. -With this approach, changes in the chemical abundances from -nuclear reactions, microscopic diffusion, and overshooting mix￾ing are also consistent with any modification of the structure -induced by the forced local heating in the overshooting layer. -These tests were performed with MONSTAR as it includes the -treatment of microscopic diffusion. -The first method allows the impact of local heating in -the overshooting layer after thermal relaxation to be isolated. -The second method provides evolutionary models that are self￾consistent since the effect of the modification of the temperature -gradient is accounted for during their evolution on the main se￾quence. -In the following, we adopt a modification of the local temper￾ature gradient in the overshooting layer that qualitatively repro￾duces the behaviour displayed in Fig. 1. We define an overshoot￾ing length dov = αovHP,CB, with HP,CB the pressure scale height -at the convective boundary and αov a free parameter. We also de￾fine two radial locations, rov = rCB − dov and rmid = rCB − dov/2, -with rCB the radial location of the convective boundary. The tem￾perature gradient is modified as follows. For rmid ≤ r < rCB, we -use -∇ = g(r)∇ad + (1 − g(r))∇rad, (2) -with -g(r) = sin{[(r − rmid)/(rCB − rmid)]a × π/2}. (3) -3 -Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem -For rov ≤ r < rmid, we use -∇ = ∇rad − h(r)∇ad, (4) -with -h(r) = b × sin{[(rmid − r)/(rmid − rov)] × π}. (5) -Sine functions are used in Eqs. (3) and (5) to reproduce the -smooth variations in the temperature gradient below the convec￾tive boundary produced by the hydrodynamical simulations. We -have verified that the results are insensitive to the smoothness of -these variations and to the exact shape of the temperature gra￾dient radial profile.We adopted a=0.3 in Eq. (3) as it provides a -behaviour for the temperature gradient very close to the one dis￾played in Fig. 1. Results are rather insensitive to variations in the -values of a between 0.2 and 0.4. We adopted b=0.03 in Eq. (5), -which also provides a close visual match to the hydrodynamical -results, but we note that the results are insensitive to the value of -b. -3.2.1. Thermal equilibrium models -The details of the procedure for the first method are the follow￾ing. We calculate the evolution of a 1 M model with an initial -helium mass fraction of 0.28, metallicity Z = 0.02, and a mix￾ing length lmix = 1.9HP. We use a reference model that is in -thermal equilibrium2 -and has the luminosity and radius of the -current Sun. Starting from this reference model, the tempera￾ture gradient is modified over a prescribed depth to mimic the -impact of overshooting according to the hydrodynamical sim￾ulations described in Sect. 2. We adopt the prescription given -by Eqs. (2)-(5) over a distance dov below the convective bound￾ary. We show the results in Fig. 2 for αov = 0.15 and αov= 0.20. -These overshooting widths are in good agreement with the maxi￾mal depth reached by downflows below the convective boundary -predicted by the hydrodynamical simulations for the solar-like -model investigated in B21. We note that the stellar model used -in B21 is slightly under-luminous compared to the Sun (see B21 -for details). B21 also mention that one should be cautious when -directly applying the overshooting depths predicted by their sim￾ulations to real stars since the final relaxed state for these simula￾tions may have different properties from non-thermally relaxed -states. We varied αov between 0.15 and 0.35 and find that the -results do not change qualitatively. However, the amplitude of -the variations in the model properties depends on dov (see be￾low). As shown below, this simple prescription implemented in -a stellar evolution code yields a local increase in the tempera￾ture below the convective boundary, similar to that observed in -the hydrodynamical simulations. We stress that Eqs. (2)-(5) have -been chosen for simplicity. They are only a rough approximation -that can mimic the thermal profile behaviour suggested in the 2D -simulations. -The model with a modified temperature gradient is then ther￾mally relaxed, that is to say, it is evolved over many thermal -timescales without any modification of the abundances from nu￾clear reactions until thermal equilibrium is reached. The temper￾ature gradient is modified in the overshooting layer during the -whole relaxation process, and this is referred to as a ‘forced local -heating’. This procedure ensures that the model with a modified -temperature gradient can be consistently compared to the refer￾ence model. As shown in Fig. 2, the simple prescription given -2 Thermal equilibrium means that the total nuclear energy produced -in the central regions balances the radiative losses at the surface, i.e. the -total nuclear luminosity, Lnuc, equals the total stellar luminosity, L. -by Eqs. (2)-(5) yields similar qualitative changes in the temper￾ature and the sub-adiabaticity close to the convective boundary -that was found in the hydrodynamical simulations of B21. -Fig. 2. Radial profile of the temperature difference and of the -sub-adiabaticity of a 1D solar-like structure with a modified tem￾perature gradient in the overshooting layer according to Eqs. -(2)-(5). The temperature gradient is modified over a distance -dov = αovHP,CB, with αov=0.15 in the lower panel and αov=0.20 -in the upper panel. The dash-dotted red lines show the percent￾age relative temperature difference, ∆T/Tref, with ∆T = T −Tref. -The solid blue lines correspond to the sub-adiabaticity (∇−∇ad). -The dashed black lines show the sub-adiabaticity of the refer￾ence model. The convective boundary is indicated by the vertical -solid line. The vertical dashed line in each panel is located at a -distance dov below the convective boundary. -The impact on the whole stellar structure was quantified by -comparing the four structural quantities (A, S , c -2 -s -, ρ) between the -modified and the reference model. The results are displayed in -Fig. 3, with ∆X defined as (X−Xref) for any structural quantity X. -The forced local heating in the overshooting layer produces sim￾ilar positive peaks for ∆A, ∆S , and ∆c -2 -s -, as found for the temper￾ature. The modification thus provides the correction required to -improve the discrepancy for the Ledoux discriminant described -in the first of the trends outlined in Sect. 3.1. Unsurprisingly, -such a modification of the temperature gradient is expected to -improve the agreement with helioseismic constraints and help -4 -Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem -remove the sound speed anomaly below the convective bound￾ary (second trend in Sect. 3.1), as suggested by the results of -Christensen-Dalsgaard et al. (2011). But it is also interesting to -note that such a modification yields a slight cooling of the con￾vective zone (see Fig. 2) and thus a negative difference for the -entropy (see Fig. 3). A negative difference in the convective en￾velope is in agreement with the correction required for the ref￾erence model of Buldgen et al. (2020) to better match the Sun -(see third trend in Sect. 3.1). Regarding the density, the modifi￾cation of the temperature gradient has an interesting impact in -the radiative zone, with a large decrease in the density compared -to the reference model over a broad region below the convective -boundary. The impact on the density in the convective region for -this specific model is partly in agreement with the correction re￾quired for this quantity in the Buldgen et al. (2020) study, with a -positive difference found only in the upper part of the convective -envelope (see the fourth trend in Sect. 3.1). -These trends are insensitive to the depth over which the tem￾perature gradient is modified. Increasing the depth increases the -magnitude of the differences but has no impact on their sign. We -find that the maximum variation in the model properties, such as -the speed of sound, ∆c -2 -s -/c -2 -s,ref, roughly scales with d -2 -ov. This scal￾ing is linked to the integrated area between the modified temper￾ature gradient curve and the one for the reference (non-modified) -temperature gradient, which roughly decreases linearly with r. -This area is proportional to the square of the overshooting depth, -and consequently, the maximum variation in the model proper￾ties is also proportional to d -2 -ov. The qualitative trends also remain -the same whether overshooting mixing in the reference model -is ignored or included using a step function (with instantaneous -mixing) or an exponential decay for the diffusion coefficient (e.g. -Freytag et al. 1996). -3.2.2. Self-consistent evolutionary models -For the tests based on the second method, we ran different sets -of models with different combinations of assumptions, including -or not microscopic diffusion and with or without overshooting -mixing. When overshooting mixing was included in the over￾shooting layer, it was based either on a step function or on an -exponential decay for the diffusion coefficient. Microscopic dif￾fusion for H and He was implemented according to Thoul et al. -(1994). For these tests, the temperature gradient was modified -according to Eqs. (2)-(5). All models start from the ZAMS and -are evolved until they reach the solar radius and luminosity at the -same age. This was achieved by making small adjustments to the -mixing length, lmix. The models with temperature gradient mod￾ifications were compared to the relevant reference model, which -has no modification of the temperature gradient but everything -else is the same (i.e. the same treatment of microscopic diffu￾sion and of overshooting mixing). The evolutionary models with -temperature gradient modifications are thus self-consistent. The -main difference between this approach and the one in the previ￾ous section is that these models accumulate small differences in, -for example, central H abundance when compared to their ref￾erence model. These tests produce the same trends in the over￾shooting layer as found for the tests based on the first method -(Sect. 3.2.1), independently of the treatment of overshooting -mixing and whether microscopic diffusion is included or not. -In the convective zone, all models give a positive difference for -the density between the model with a modified temperature gra￾dient and the relevant reference model. For the other quantities -(S , c -2 -s -), the differences in the convective zone are very sensitive -Fig. 3. Difference of various structural quantities between a -model with a modified temperature gradient in the overshoot￾ing layer and a reference model calculated with the Lyon stellar -evolution code. The temperature gradient in the modified model -is changed over a distance dov = αovHP,CB below the convec￾tive boundary (indicated by the vertical solid line). The lower -panel shows the results for αov = 0.15 and the upper panel for -αov = 0.20. -to the assumptions regarding whether overshooting mixing is in￾cluded or not. But at least we find solutions that are compatible -with the four trends found by Buldgen et al. (2020) for the four -structural quantities. This is illustrated in Fig. 4 with a model -that accounts for step function overshooting mixing over a dis￾tance dov = 0.15HP,CB (lower panel) and dov = 0.20HP,CB (upper -panel). -4. Conclusion -The tests performed in Sect. 3 are based on different methods -(relaxed models versus consistent evolution) that can be used to -construct solar models. Independently of the method used, the -tests show that a local increase in the temperature in the over￾shooting region due to convective penetration provides the quali￾tative effects required to improve the speed of sound discrepancy -below the convective boundary. This discrepancy is persistent in -5 -Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem -Fig. 4. Difference of various structural quantities between a -modified model and a reference model calculated with the -MONSTAR stellar evolution code. The reference model is -evolved from the ZAMS with microscopic diffusion and step -function overshooting mixing over a distance dov = αovHP,CB be￾low the convective boundary. The lower panel shows the results -for αov = 0.15 and the upper panel for αov = 0.20. The models -with a modified temperature gradient in the overshooting layer -(same microscopic diffusion and overshooting mixing treatment -as the reference model) are evolved similarly from the ZAMS. -The convective boundary is indicated by the vertical solid line. -solar models that use low solar metal abundances. This is not -surprising because an increase in the temperature in this spe￾cific region has previously been invoked in the literature to solve -this problem, as mentioned in Sect. 1. However, the details of -the physical process responsible for this local heating have been -lacking, whereas we can now suggest an explanation based on -the B21 results. The trends that we find for the four structural -quantities (A, S , c -2 -s -, ρ) are robust below the convective bound￾ary and in a large fraction of the radiative core, independently of -the treatment of mixing and diffusion and of the method for con￾structing the models in Sects. 3.2.1 and 3.2.2. Our experiments -additionally show that such a local change in the temperature, -despite being made over a very limited region below the convec￾tive boundary, can also affect the density, the entropy, and the -speed of sound in the convective envelope after thermal relax￾ation or evolution on the main sequence. How these quantities -are affected in the convective envelope compared to a reference -model with no local heating depends on the strategy for building -solar models and on the treatment of overshooting mixing. This -mixing is obviously linked to the local heating given that both -result from the same dynamical process. A combined testing of -both effects in stellar models could provide more constraints on -the general process of overshooting. -Increasingly, efforts are now devoted to characterising the -process of convective boundary mixing in stellar models based -on multi-dimensional hydrodynamical simulations. More work -is required to obtain reliable determinations of an overshooting -depth and to describe quantitatively the mixing and impact on -the temperature gradient. Understanding the effects of rotation -and magnetic fields on overshooting is a significantly more dif￾ficult theoretical and numerical problem to address; however, -efforts to study these combined non-linear effects are ongoing -(Hotta 2017; Korre et al. 2021). Despite the limitations of ex￾isting hydrodynamical simulations, they are already providing -constraints on physical processes usually treated with several -free parameters in 1D stellar evolution models. They can thus -limit the degrees of freedom in a problem as complex as so￾lar modelling. Our primary goal in this work is to highlight the -potential impact of convective penetration on the thermal back￾ground in the overshooting region. The processes studied in B21 -that produce a local change in the temperature gradient are also -responsible for the mixing in this region. Because much observa￾tional evidence points towards the need for extra mixing at con￾vective boundaries, for example lithium depletion in solar-like -stars (Baraffe et al. 2017), the size of convective cores (Claret -& Torres 2016), and colour-magnitude diagrams (Castro et al. -2014), solar modellers often include this extra mixing in their -models. But a consistent approach should also require account￾ing for a local change in the temperature gradient. The impact of -this local heating goes in the right direction to improve not only -the discrepancies of solar models below the convective bound￾ary, but also in the convective envelope. This effect offers an in￾teresting step forward for solving the solar modelling problem. -In this exploratory work, we adopt a simple prescription for the -local heating in the overshooting layer since the main goal is -to highlight its qualitative impact on stellar models. However, -this effect should not be considered as another free parameter in -the solar modelling problem. Future multi-dimensional hydro￾dynamical simulations will enable this process, and its treatment -in 1D stellar evolution codes, to be better constrained. -5. Acknowledgements -We thank our anonymous referee for valuable comments which -helped improving the manuscript. This work is supported by the -ERC grant No. 787361-COBOM and the consolidated STFC -grant ST/R000395/1. IB thanks the Max Planck Institut fur¨ -Astrophysics (Garching) for warm hospitality during completion -of part of this work. The authors would like to acknowledge the -use of the University of Exeter High-Performance Computing -(HPC) facility ISCA and of the DiRAC Data Intensive service -at Leicester, operated by the University of Leicester IT Services, -which forms part of the STFC DiRAC HPC Facility. The equip￾ment was funded by BEIS capital funding via STFC capital -grants ST/K000373/1 and ST/R002363/1 and STFC DiRAC -Operations grant ST/R001014/1. DiRAC is part of the National -e-Infrastructure. -6 -Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem -References -Anders, E. & Grevesse, N. 1989, Geochim. Cosmochim. Acta, 53, 197 -Asplund, M., Amarsi, A. M., & Grevesse, N. 2021, A&A, 653, A141 -Asplund, M., Grevesse, N., Sauval, A. J., & Scott, P. 2009, ARA&A, 47, 481 -Baraffe, I., Chabrier, G., Allard, F., & Hauschildt, P. H. 1998, A&A, 337, 403 -Baraffe, I., Pratt, J., Goffrey, T., et al. 2017, ApJ, 845, L6 -Baraffe, I., Pratt, J., Vlaykov, D. G., et al. 2021, A&A, 654, A126 -Brummell, N. H., Clune, T. L., & Toomre, J. 2002, ApJ, 570, 825 -Brun, A. S., Miesch, M. S., & Toomre, J. 2011, ApJ, 742, 79 -Buldgen, G., Eggenberger, P., Baturin, V. A., et al. 2020, A&A, 642, A36 -Buldgen, G., Salmon, S., & Noels, A. 2019a, Frontiers in Astronomy and Space -Sciences, 6, 42 -Buldgen, G., Salmon, S. J. A. J., Noels, A., et al. 2019b, A&A, 621, A33 -Caffau, E., Ludwig, H. G., Steffen, M., Freytag, B., & Bonifacio, P. 2011, -Sol. Phys., 268, 255 -Cai, T. 2020, ApJ, 888, 46 -Castro, N., Fossati, L., Langer, N., et al. 2014, A&A, 570, L13 -Christensen-Dalsgaard, J. 2021, Living Reviews in Solar Physics, 18, 2 -Christensen-Dalsgaard, J., Gough, D. O., & Knudstrup, E. 2018, MNRAS, 477, -3845 -Christensen-Dalsgaard, J., Monteiro, M. J. P. F. G., Rempel, M., & Thompson, -M. J. 2011, MNRAS, 414, 1158 -Claret, A. & Torres, G. 2016, A&A, 592, A15 -Constantino, T., Campbell, S., Gil-Pons, P., & Lattanzio, J. 2014, ApJ, 784, 56 -Edelmann, P. V. F., Ratnasingam, R. P., Pedersen, M. G., et al. 2019, ApJ, 876, 4 -Freytag, B., Ludwig, H. G., & Steffen, M. 1996, A&A, 313, 497 -Goffrey, T., Pratt, J., Viallet, M., et al. 2017, A&A, 600, A7 -Grevesse, N. & Noels, A. 1993, in Origin and Evolution of the Elements, ed. -N. Prantzos, E. Vangioni-Flam, & M. Casse, 15–25 -Higl, J., Muller, E., & Weiss, A. 2021, A&A, 646, A133 ¨ -Hotta, H. 2017, ApJ, 843, 52 -Hurlburt, N. E., Toomre, J., & Massaguer, J. M. 1986, ApJ, 311, 563 -Kapyl ¨ a, P. J. 2019, A&A, 631, A122 ¨ -Korre, L., Brummell, N., Garaud, P., & Guervilly, C. 2021, MNRAS, 503, 362 -Korre, L., Garaud, P., & Brummell, N. H. 2019, MNRAS, 484, 1220 -Kunitomo, M. & Guillot, T. 2021, arXiv e-prints, arXiv:2109.06492 -Meakin, C. A. & Arnett, D. 2007, ApJ, 667, 448 -Muthsam, H. J., Goeb, W., Kupka, F., Liebich, W., & Zoechling, J. 1995, A&A, -293, 127 -Rogers, T. M., Glatzmaier, G. A., & Jones, C. A. 2006, ApJ, 653, 765 -Thoul, A. A., Bahcall, J. N., & Loeb, A. 1994, ApJ, 421, 828 -Viallet, M., Baraffe, I., & Walder, R. 2011, A&A, 531, A86 -Viallet, M., Goffrey, T., Baraffe, I., et al. 2016, A&A, 586, A153 -Viallet, M., Meakin, C., Arnett, D., & Mocak, M. 2013, ApJ, 769, 1 ´ -Vinyoles, N., Serenelli, A. M., Villante, F. L., et al. 2017, ApJ, 835, 202 -Zahn, J. P. 1991, A&A, 252, 179 -Zhang, C., Deng, L., Xiong, D., & Christensen-Dalsgaard, J. 2012, ApJ, 759, -L14 -Zhang, Q. S. & Li, Y. 2012, ApJ, 746, 50 -Zhang, Q.-S., Li, Y., & Christensen-Dalsgaard, J. 2019, ApJ, 881, 103 -7 +s +, and the density, ρ. To illustrate +the convergence of their reconstruction procedure, they show +(right panels of their Figs. 3-6) the successive iterations that converge to an excellent level of agreement for the four structural +inversions (A, S , c +2 +s +, ρ) starting from the initial reference model +adopted in their work. The differences found between the reconstructed model and the reference model are useful as they indicate the modifications of the reference model that are required to +converge towards a solar model in agreement with helioseismic +data. We recall here the major trends found by Buldgen et al. +(2020) for the four structural quantities, which are used for our +analysis in Sect. 3.2. +The first concerns the Ledoux discriminant. The major discrepancy between the Sun and the reference model occurs just +below the convective boundary, with a large positive bump for +the quantity (ASun - Aref). +The second concerns the speed of sound. The same positive +bump at the same location as for the Ledoux discriminant, A, is +observed for the quantity (c +2 +s,Sun − c +2 +s,ref)/c +2 +s,ref. The corrections +applied to A during the reconstruction procedure also reduce the +discrepancy in the speed of sound in the radiative region. +The third concerns the entropy. Large discrepancies are observed in both the radiative region and the convective zone. The +1 Less sub-adiabatic means that |∇ − ∇ad| decreases compared to the +initial profile. +entropy discrepancy (S Sun − S ref)/S ref has two positive peaks in +the radiative zone, one just below the overshooting region and a +larger peak deeper at ∼ 40% of the stellar radius. This discrepancy is negative in the convective zone. The corrections applied +to A help reduce these entropy discrepancies in both regions. +The fourth concerns the density. The quantity (ρSun − +ρref)/ρref has a negative peak in the radiative region, at ∼ 35% +of the stellar radius, and is positive in the convective zone. +Importantly, Buldgen et al. (2020) mention that their reconstruction procedure gives similar Ledoux discriminant profiles +for a wide range of initial reference models. We used these results to gauge whether the modifications of the thermal profile +predicted by B21 can help in qualitatively improving all the +structural quantities used by Buldgen et al. (2020). +3.2. Testing one-dimensional solar models +Our main motivation is to show the potential impact of the local +heating described in Sect. 2 on stellar models. We are not aiming in this short work at constructing the best solar model to fit +helioseismic constraints. Using stellar evolution codes, we have +adopted two different methods that can be found in the literature to construct solar models (e.g. Zhang et al. 2012; Vinyoles +et al. 2017). Our first method relies on the thermal relaxation +of a reference model with solar radius and luminosity that is +modified to reproduce the temperature gradient in the overshooting layer suggested by hydrodynamical simulations. In this case, +the chemical abundances are not modified by nuclear reactions, +mixing, or microscopic diffusion during the relaxation process. +For these tests, we used the 1D Lyon stellar evolution code +(Baraffe et al. 1998). We repeated this experiment based on thermal relaxation with the stellar evolution code MONSTAR (e.g. +Constantino et al. 2014) and obtained the same qualitative results. +The second method considers models that account for the +modification of the temperature gradient in the overshooting +layer from the zero age main sequence (ZAMS). The models +are then evolved until they reach the solar radius and luminosity. +With this approach, changes in the chemical abundances from +nuclear reactions, microscopic diffusion, and overshooting mixing are also consistent with any modification of the structure +induced by the forced local heating in the overshooting layer. +These tests were performed with MONSTAR as it includes the +treatment of microscopic diffusion. +The first method allows the impact of local heating in +the overshooting layer after thermal relaxation to be isolated. +The second method provides evolutionary models that are selfconsistent since the effect of the modification of the temperature +gradient is accounted for during their evolution on the main sequence. +In the following, we adopt a modification of the local temperature gradient in the overshooting layer that qualitatively reproduces the behaviour displayed in Fig. 1. We define an overshooting length dov = αovHP,CB, with HP,CB the pressure scale height +at the convective boundary and αov a free parameter. We also define two radial locations, rov = rCB − dov and rmid = rCB − dov/2, +with rCB the radial location of the convective boundary. The temperature gradient is modified as follows. For rmid ≤ r < rCB, we +use +∇ = g(r)∇ad + (1 − g(r))∇rad, (2) +with +g(r) = sin{[(r − rmid)/(rCB − rmid)]a × π/2}. (3) + +Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem +For rov ≤ r < rmid, we use +∇ = ∇rad − h(r)∇ad, (4) +with +h(r) = b × sin{[(rmid − r)/(rmid − rov)] × π}. (5) +Sine functions are used in Eqs. (3) and (5) to reproduce the +smooth variations in the temperature gradient below the convective boundary produced by the hydrodynamical simulations. We +have verified that the results are insensitive to the smoothness of +these variations and to the exact shape of the temperature gradient radial profile.We adopted a=0.3 in Eq. (3) as it provides a +behaviour for the temperature gradient very close to the one displayed in Fig. 1. Results are rather insensitive to variations in the +values of a between 0.2 and 0.4. We adopted b=0.03 in Eq. (5), +which also provides a close visual match to the hydrodynamical +results, but we note that the results are insensitive to the value of +b. +3.2.1. Thermal equilibrium models +The details of the procedure for the first method are the following. We calculate the evolution of a 1 M model with an initial +helium mass fraction of 0.28, metallicity Z = 0.02, and a mixing length lmix = 1.9HP. We use a reference model that is in +thermal equilibrium2and has the luminosity and radius of the +current Sun. Starting from this reference model, the temperature gradient is modified over a prescribed depth to mimic the +impact of overshooting according to the hydrodynamical simulations described in Sect. 2. We adopt the prescription given +by Eqs. (2)-(5) over a distance dov below the convective boundary. We show the results in Fig. 2 for αov = 0.15 and αov= 0.20. +These overshooting widths are in good agreement with the maximal depth reached by downflows below the convective boundary +predicted by the hydrodynamical simulations for the solar-like +model investigated in B21. We note that the stellar model used +in B21 is slightly under-luminous compared to the Sun (see B21 +for details). B21 also mention that one should be cautious when +directly applying the overshooting depths predicted by their simulations to real stars since the final relaxed state for these simulations may have different properties from non-thermally relaxed +states. We varied αov between 0.15 and 0.35 and find that the +results do not change qualitatively. However, the amplitude of +the variations in the model properties depends on dov (see below). As shown below, this simple prescription implemented in +a stellar evolution code yields a local increase in the temperature below the convective boundary, similar to that observed in +the hydrodynamical simulations. We stress that Eqs. (2)-(5) have +been chosen for simplicity. They are only a rough approximation +that can mimic the thermal profile behaviour suggested in the 2D +simulations. +The model with a modified temperature gradient is then thermally relaxed, that is to say, it is evolved over many thermal +timescales without any modification of the abundances from nuclear reactions until thermal equilibrium is reached. The temperature gradient is modified in the overshooting layer during the +whole relaxation process, and this is referred to as a ‘forced local +heating’. This procedure ensures that the model with a modified +temperature gradient can be consistently compared to the reference model. As shown in Fig. 2, the simple prescription given +2 Thermal equilibrium means that the total nuclear energy produced +in the central regions balances the radiative losses at the surface, i.e. the +total nuclear luminosity, Lnuc, equals the total stellar luminosity, L. +by Eqs. (2)-(5) yields similar qualitative changes in the temperature and the sub-adiabaticity close to the convective boundary +that was found in the hydrodynamical simulations of B21. +Fig. 2. Radial profile of the temperature difference and of the +sub-adiabaticity of a 1D solar-like structure with a modified temperature gradient in the overshooting layer according to Eqs. +(2)-(5). The temperature gradient is modified over a distance +dov = αovHP,CB, with αov=0.15 in the lower panel and αov=0.20 +in the upper panel. The dash-dotted red lines show the percentage relative temperature difference, ∆T/Tref, with ∆T = T −Tref. +The solid blue lines correspond to the sub-adiabaticity (∇−∇ad). +The dashed black lines show the sub-adiabaticity of the reference model. The convective boundary is indicated by the vertical +solid line. The vertical dashed line in each panel is located at a +distance dov below the convective boundary. +The impact on the whole stellar structure was quantified by +comparing the four structural quantities (A, S , c +2 +s +, ρ) between the +modified and the reference model. The results are displayed in +Fig. 3, with ∆X defined as (X−Xref) for any structural quantity X. +The forced local heating in the overshooting layer produces similar positive peaks for ∆A, ∆S , and ∆c +2 +s +, as found for the temperature. The modification thus provides the correction required to +improve the discrepancy for the Ledoux discriminant described +in the first of the trends outlined in Sect. 3.1. Unsurprisingly, +such a modification of the temperature gradient is expected to +improve the agreement with helioseismic constraints and help + +Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem +remove the sound speed anomaly below the convective boundary (second trend in Sect. 3.1), as suggested by the results of +Christensen-Dalsgaard et al. (2011). But it is also interesting to +note that such a modification yields a slight cooling of the convective zone (see Fig. 2) and thus a negative difference for the +entropy (see Fig. 3). A negative difference in the convective envelope is in agreement with the correction required for the reference model of Buldgen et al. (2020) to better match the Sun +(see third trend in Sect. 3.1). Regarding the density, the modification of the temperature gradient has an interesting impact in +the radiative zone, with a large decrease in the density compared +to the reference model over a broad region below the convective +boundary. The impact on the density in the convective region for +this specific model is partly in agreement with the correction required for this quantity in the Buldgen et al. (2020) study, with a +positive difference found only in the upper part of the convective +envelope (see the fourth trend in Sect. 3.1). +These trends are insensitive to the depth over which the temperature gradient is modified. Increasing the depth increases the +magnitude of the differences but has no impact on their sign. We +find that the maximum variation in the model properties, such as +the speed of sound, ∆c +2 +s +/c +2 +s,ref, roughly scales with d +2 +ov. This scaling is linked to the integrated area between the modified temperature gradient curve and the one for the reference (non-modified) +temperature gradient, which roughly decreases linearly with r. +This area is proportional to the square of the overshooting depth, +and consequently, the maximum variation in the model properties is also proportional to d +2 +ov. The qualitative trends also remain +the same whether overshooting mixing in the reference model +is ignored or included using a step function (with instantaneous +mixing) or an exponential decay for the diffusion coefficient (e.g. +Freytag et al. 1996). +3.2.2. Self-consistent evolutionary models +For the tests based on the second method, we ran different sets +of models with different combinations of assumptions, including +or not microscopic diffusion and with or without overshooting +mixing. When overshooting mixing was included in the overshooting layer, it was based either on a step function or on an +exponential decay for the diffusion coefficient. Microscopic diffusion for H and He was implemented according to Thoul et al. +(1994). For these tests, the temperature gradient was modified +according to Eqs. (2)-(5). All models start from the ZAMS and +are evolved until they reach the solar radius and luminosity at the +same age. This was achieved by making small adjustments to the +mixing length, lmix. The models with temperature gradient modifications were compared to the relevant reference model, which +has no modification of the temperature gradient but everything +else is the same (i.e. the same treatment of microscopic diffusion and of overshooting mixing). The evolutionary models with +temperature gradient modifications are thus self-consistent. The +main difference between this approach and the one in the previous section is that these models accumulate small differences in, +for example, central H abundance when compared to their reference model. These tests produce the same trends in the overshooting layer as found for the tests based on the first method +(Sect. 3.2.1), independently of the treatment of overshooting +mixing and whether microscopic diffusion is included or not. +In the convective zone, all models give a positive difference for +the density between the model with a modified temperature gradient and the relevant reference model. For the other quantities +(S , c +2 +s +), the differences in the convective zone are very sensitive +Fig. 3. Difference of various structural quantities between a +model with a modified temperature gradient in the overshooting layer and a reference model calculated with the Lyon stellar +evolution code. The temperature gradient in the modified model +is changed over a distance dov = αovHP,CB below the convective boundary (indicated by the vertical solid line). The lower +panel shows the results for αov = 0.15 and the upper panel for +αov = 0.20. +to the assumptions regarding whether overshooting mixing is included or not. But at least we find solutions that are compatible +with the four trends found by Buldgen et al. (2020) for the four +structural quantities. This is illustrated in Fig. 4 with a model +that accounts for step function overshooting mixing over a distance dov = 0.15HP,CB (lower panel) and dov = 0.20HP,CB (upper +panel). +4. Conclusion +The tests performed in Sect. 3 are based on different methods +(relaxed models versus consistent evolution) that can be used to +construct solar models. Independently of the method used, the +tests show that a local increase in the temperature in the overshooting region due to convective penetration provides the qualitative effects required to improve the speed of sound discrepancy +below the convective boundary. This discrepancy is persistent in + +Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem +Fig. 4. Difference of various structural quantities between a +modified model and a reference model calculated with the +MONSTAR stellar evolution code. The reference model is +evolved from the ZAMS with microscopic diffusion and step +function overshooting mixing over a distance dov = αovHP,CB below the convective boundary. The lower panel shows the results +for αov = 0.15 and the upper panel for αov = 0.20. The models +with a modified temperature gradient in the overshooting layer +(same microscopic diffusion and overshooting mixing treatment +as the reference model) are evolved similarly from the ZAMS. +The convective boundary is indicated by the vertical solid line. +solar models that use low solar metal abundances. This is not +surprising because an increase in the temperature in this specific region has previously been invoked in the literature to solve +this problem, as mentioned in Sect. 1. However, the details of +the physical process responsible for this local heating have been +lacking, whereas we can now suggest an explanation based on +the B21 results. The trends that we find for the four structural +quantities (A, S , c +2 +s +, ρ) are robust below the convective boundary and in a large fraction of the radiative core, independently of +the treatment of mixing and diffusion and of the method for constructing the models in Sects. 3.2.1 and 3.2.2. Our experiments +additionally show that such a local change in the temperature, +despite being made over a very limited region below the convective boundary, can also affect the density, the entropy, and the +speed of sound in the convective envelope after thermal relaxation or evolution on the main sequence. How these quantities +are affected in the convective envelope compared to a reference +model with no local heating depends on the strategy for building +solar models and on the treatment of overshooting mixing. This +mixing is obviously linked to the local heating given that both +result from the same dynamical process. A combined testing of +both effects in stellar models could provide more constraints on +the general process of overshooting. +Increasingly, efforts are now devoted to characterising the +process of convective boundary mixing in stellar models based +on multi-dimensional hydrodynamical simulations. More work +is required to obtain reliable determinations of an overshooting +depth and to describe quantitatively the mixing and impact on +the temperature gradient. Understanding the effects of rotation +and magnetic fields on overshooting is a significantly more difficult theoretical and numerical problem to address; however, +efforts to study these combined non-linear effects are ongoing +(Hotta 2017; Korre et al. 2021). Despite the limitations of existing hydrodynamical simulations, they are already providing +constraints on physical processes usually treated with several +free parameters in 1D stellar evolution models. They can thus +limit the degrees of freedom in a problem as complex as solar modelling. Our primary goal in this work is to highlight the +potential impact of convective penetration on the thermal background in the overshooting region. The processes studied in B21 +that produce a local change in the temperature gradient are also +responsible for the mixing in this region. Because much observational evidence points towards the need for extra mixing at convective boundaries, for example lithium depletion in solar-like +stars (Baraffe et al. 2017), the size of convective cores (Claret +& Torres 2016), and colour-magnitude diagrams (Castro et al. +2014), solar modellers often include this extra mixing in their +models. But a consistent approach should also require accounting for a local change in the temperature gradient. The impact of +this local heating goes in the right direction to improve not only +the discrepancies of solar models below the convective boundary, but also in the convective envelope. This effect offers an interesting step forward for solving the solar modelling problem. +In this exploratory work, we adopt a simple prescription for the +local heating in the overshooting layer since the main goal is +to highlight its qualitative impact on stellar models. However, +this effect should not be considered as another free parameter in +the solar modelling problem. Future multi-dimensional hydrodynamical simulations will enable this process, and its treatment +in 1D stellar evolution codes, to be better constrained. +5. Acknowledgements +We thank our anonymous referee for valuable comments which +helped improving the manuscript. This work is supported by the +ERC grant No. 787361-COBOM and the consolidated STFC +grant ST/R000395/1. IB thanks the Max Planck Institut fur¨ +Astrophysics (Garching) for warm hospitality during completion +of part of this work. The authors would like to acknowledge the +use of the University of Exeter High-Performance Computing +(HPC) facility ISCA and of the DiRAC Data Intensive service +at Leicester, operated by the University of Leicester IT Services, +which forms part of the STFC DiRAC HPC Facility. The equipment was funded by BEIS capital funding via STFC capital +grants ST/K000373/1 and ST/R002363/1 and STFC DiRAC +Operations grant ST/R001014/1. DiRAC is part of the National +e-Infrastructure. + +Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem +References +Anders, E. & Grevesse, N. 1989, Geochim. Cosmochim. Acta, 53, 197 +Asplund, M., Amarsi, A. M., & Grevesse, N. 2021, A&A, 653, A141 +Asplund, M., Grevesse, N., Sauval, A. J., & Scott, P. 2009, ARA&A, 47, 481 +Baraffe, I., Chabrier, G., Allard, F., & Hauschildt, P. H. 1998, A&A, 337, 403 +Baraffe, I., Pratt, J., Goffrey, T., et al. 2017, ApJ, 845, L6 +Baraffe, I., Pratt, J., Vlaykov, D. G., et al. 2021, A&A, 654, A126 +Brummell, N. H., Clune, T. L., & Toomre, J. 2002, ApJ, 570, 825 +Brun, A. S., Miesch, M. S., & Toomre, J. 2011, ApJ, 742, 79 +Buldgen, G., Eggenberger, P., Baturin, V. A., et al. 2020, A&A, 642, A36 +Buldgen, G., Salmon, S., & Noels, A. 2019a, Frontiers in Astronomy and Space +Sciences, 6, 42 +Buldgen, G., Salmon, S. J. A. J., Noels, A., et al. 2019b, A&A, 621, A33 +Caffau, E., Ludwig, H. G., Steffen, M., Freytag, B., & Bonifacio, P. 2011, +Sol. Phys., 268, 255 +Cai, T. 2020, ApJ, 888, 46 +Castro, N., Fossati, L., Langer, N., et al. 2014, A&A, 570, L13 +Christensen-Dalsgaard, J. 2021, Living Reviews in Solar Physics, 18, 2 +Christensen-Dalsgaard, J., Gough, D. O., & Knudstrup, E. 2018, MNRAS, 477, +3845 +Christensen-Dalsgaard, J., Monteiro, M. J. P. F. G., Rempel, M., & Thompson, +M. J. 2011, MNRAS, 414, 1158 +Claret, A. & Torres, G. 2016, A&A, 592, A15 +Constantino, T., Campbell, S., Gil-Pons, P., & Lattanzio, J. 2014, ApJ, 784, 56 +Edelmann, P. V. F., Ratnasingam, R. P., Pedersen, M. G., et al. 2019, ApJ, 876, 4 +Freytag, B., Ludwig, H. G., & Steffen, M. 1996, A&A, 313, 497 +Goffrey, T., Pratt, J., Viallet, M., et al. 2017, A&A, 600, A7 +Grevesse, N. & Noels, A. 1993, in Origin and Evolution of the Elements, ed. +N. Prantzos, E. Vangioni-Flam, & M. Casse, 15–25 +Higl, J., Muller, E., & Weiss, A. 2021, A&A, 646, A133 ¨ +Hotta, H. 2017, ApJ, 843, 52 +Hurlburt, N. E., Toomre, J., & Massaguer, J. M. 1986, ApJ, 311, 563 +Kapyl ¨ a, P. J. 2019, A&A, 631, A122 ¨ +Korre, L., Brummell, N., Garaud, P., & Guervilly, C. 2021, MNRAS, 503, 362 +Korre, L., Garaud, P., & Brummell, N. H. 2019, MNRAS, 484, 1220 +Kunitomo, M. & Guillot, T. 2021, arXiv e-prints, arXiv:2109.06492 +Meakin, C. A. & Arnett, D. 2007, ApJ, 667, 448 +Muthsam, H. J., Goeb, W., Kupka, F., Liebich, W., & Zoechling, J. 1995, A&A, +293, 127 +Rogers, T. M., Glatzmaier, G. A., & Jones, C. A. 2006, ApJ, 653, 765 +Thoul, A. A., Bahcall, J. N., & Loeb, A. 1994, ApJ, 421, 828 +Viallet, M., Baraffe, I., & Walder, R. 2011, A&A, 531, A86 +Viallet, M., Goffrey, T., Baraffe, I., et al. 2016, A&A, 586, A153 +Viallet, M., Meakin, C., Arnett, D., & Mocak, M. 2013, ApJ, 769, 1 ´ +Vinyoles, N., Serenelli, A. M., Villante, F. L., et al. 2017, ApJ, 835, 202 +Zahn, J. P. 1991, A&A, 252, 179 +Zhang, C., Deng, L., Xiong, D., & Christensen-Dalsgaard, J. 2012, ApJ, 759, +L14 +Zhang, Q. S. & Li, Y. 2012, ApJ, 746, 50 +Zhang, Q.-S., Li, Y., & Christensen-Dalsgaard, J. 2019, ApJ, 881, 103 diff --git a/read/results/pdfium/2201.00201.txt b/read/results/pdfium/2201.00201.txt index 5bb1ebd..f0f0d8e 100644 --- a/read/results/pdfium/2201.00201.txt +++ b/read/results/pdfium/2201.00201.txt @@ -1,720 +1,715 @@ -Astronomy & Astrophysics manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs ©ESO 2022 -January 19, 2022 -Letter to the Editor -The period-age relation of long-period variables -M. Trabucchi1,?, N. Mowlavi1 -Department of Astronomy, University of Geneva, Ch. Pegasi 51, 1290 Versoix, Switzerland -December 2021 -ABSTRACT -Context. Pieces of empirical evidence suggest the existence of a period-age relation for long-period variables (LPVs). Yet, this -property has hardly been studied on theoretical grounds thus far. -Aims. We aim to examine the period-age relation using the results from recent nonlinear pulsation calculations. -Methods. We combined isochrone models with theoretical periods to simulate the distribution of fundamental mode LPV pulsators, -which include Miras, in the period-age plane, and we compared it with observations of LPVs in Galactic and Magellanic Clouds’ -clusters. -Results. In agreement with observations, models predict that the fundamental mode period decreases with increasing age because of -the dominant role of mass in shaping stellar structure and evolution. At a given age, the period distribution shows a non-negligible -width and is skewed toward short periods, except for young C-rich stars. As a result, the period-age relations of O-rich and C￾rich models are predicted to have different slopes. We derived best-fit relations describing age and initial mass as a function of the -fundamental mode period for both O- and C-rich models. -Conclusions. The study confirms the power of the period-age relations to study populations of LPVs of specific types, either O-rich -or C-rich, on statistical grounds. In doing so, it is recommended not to limit a study to Miras, which would make it prone to selection -biases, but rather to include semi-regular variables that pulsate predominantly in the fundamental mode. The use of the relations to -study individual LPVs, on the other hand, requires more care given the scatter in the period distribution predicted at any given age. -Key words. stars: AGB and post-AGB – stars: evolution – stars: variables: general – Galaxy: stellar content – Galaxy: globular -clusters: general – Magellanic Clouds -1. Introduction -Low- to intermediate-mass stars approach the end of their lives -through the asymptotic giant branch (AGB) evolutionary phase, -during which they exhibit pulsations with timescales up to sev￾eral hundreds of days, and they are hence known as long-period -variables (LPVs). If their V-band amplitude exceeds 2.5 mag, -they are classified as Miras, which have a rather regular periodic￾ity and they are believed to pulsate only in the radial fundamen￾tal mode (FM). If their photometric amplitude is smaller, they -are known as semi-regular variables (SRVs), which are thought -to be the progenitors of Miras. The name stems from the lesser -degree of regularity of their light curves, likely due to the fact -that they can pulsate in multiple modes simultaneously. -The notion that younger LPVs tend to display longer periods -compared to older ones, often referred to as the period-age (PA) -relation, is rooted in the empirical evidence from stellar kinemat￾ics in the solar neighborhood. The first such piece of evidence -is probably due to Merrill (1923), who pointed out that M-type -LPVs increasingly lag behind the local standard of rest (i.e., pos￾sess a higher asymmetric drift) as their period decreases. Later -studies (as summarized by Wyatt & Cahn 1983) confirmed this -behavior (also using proper motion data, e.g., Wilson & Mer￾rill 1942), and showed that the shorter periods are also accom￾panied by a higher velocity dispersion. Furthermore, groups of -LPVs with relatively short periods are characterized by a greater -scale height above the Galactic plane. This was shown, using for -? Corresponding author: M. Trabucchi -(michele.trabucchi@unige.ch) -the first time the radial velocity of LPVs in the southern hemi￾sphere, by Feast (1963). In this seminal paper, Feast realized -that LPVs with shorter periods must be members of older stellar -populations and emphasized their highly promising applications -for both Galactic and extra-galactic studies over a wide range -of stellar ages. It should be noted that the PA relation is con￾nected with the existence of a period-metallicity relation (Lloyd -Evans & Menzies 1973; Lloyd Evans 1983b; Feast 1981; Feast -& Whitelock 2000a, and references therein). -A number of subsequent works have corroborated the PA -relation on empirical grounds, or have exploited it to interpret -observational results. Relevant examples are studies of LPVs in -globular clusters (e.g., Feast 1966; Lloyd Evans 1983b; White￾lock 1986), toward the galactic center and bulge (Lloyd Evans -1976; Feast et al. 1980; Whitelock et al. 1991) or at high galactic -latitude (Jura & Kleinmann 1992; Whitelock et al. 1994). Of par￾ticular interest is the recent effort to extend the analysis of LPVs -to dwarf galaxies in the Local Group (Menzies et al. 2002, 2008; -Whitelock et al. 2009; Menzies et al. 2010, 2011; Sakamoto et al. -2012; Battinelli & Demers 2012, 2013; Whitelock et al. 2013; -Menzies et al. 2015). -The Hipparcos mission provided the means to refine the re￾sults on the period-kinematics connection. This was done by -Feast & Whitelock (2000b), who found evidence supporting the -existence of a bar-like structure in the Bulge from the orbits of -local LPVs. A similar study dedicated to C-rich LPVs was per￾formed by Feast et al. (2006), who provided quantitative age -estimates for these stars. A summary of the main results and -prospects emerging from these Hipparcos-era studies is given by -Article number, page 1 of 9 +Astronomy & Astrophysics manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs ©ESO 2022 +January 19, 2022 +Letter to the Editor +The period-age relation of long-period variables +M. Trabucchi1,?, N. Mowlavi1 +Department of Astronomy, University of Geneva, Ch. Pegasi 51, 1290 Versoix, Switzerland +December 2021 +ABSTRACT +Context. Pieces of empirical evidence suggest the existence of a period-age relation for long-period variables (LPVs). Yet, this +property has hardly been studied on theoretical grounds thus far. +Aims. We aim to examine the period-age relation using the results from recent nonlinear pulsation calculations. +Methods. We combined isochrone models with theoretical periods to simulate the distribution of fundamental mode LPV pulsators, +which include Miras, in the period-age plane, and we compared it with observations of LPVs in Galactic and Magellanic Clouds’ +clusters. +Results. In agreement with observations, models predict that the fundamental mode period decreases with increasing age because of +the dominant role of mass in shaping stellar structure and evolution. At a given age, the period distribution shows a non-negligible +width and is skewed toward short periods, except for young C-rich stars. As a result, the period-age relations of O-rich and Crich models are predicted to have different slopes. We derived best-fit relations describing age and initial mass as a function of the +fundamental mode period for both O- and C-rich models. +Conclusions. The study confirms the power of the period-age relations to study populations of LPVs of specific types, either O-rich +or C-rich, on statistical grounds. In doing so, it is recommended not to limit a study to Miras, which would make it prone to selection +biases, but rather to include semi-regular variables that pulsate predominantly in the fundamental mode. The use of the relations to +study individual LPVs, on the other hand, requires more care given the scatter in the period distribution predicted at any given age. +Key words. stars: AGB and post-AGB – stars: evolution – stars: variables: general – Galaxy: stellar content – Galaxy: globular +clusters: general – Magellanic Clouds +1. Introduction +Low- to intermediate-mass stars approach the end of their lives +through the asymptotic giant branch (AGB) evolutionary phase, +during which they exhibit pulsations with timescales up to several hundreds of days, and they are hence known as long-period +variables (LPVs). If their V-band amplitude exceeds 2.5 mag, +they are classified as Miras, which have a rather regular periodicity and they are believed to pulsate only in the radial fundamental mode (FM). If their photometric amplitude is smaller, they +are known as semi-regular variables (SRVs), which are thought +to be the progenitors of Miras. The name stems from the lesser +degree of regularity of their light curves, likely due to the fact +that they can pulsate in multiple modes simultaneously. +The notion that younger LPVs tend to display longer periods +compared to older ones, often referred to as the period-age (PA) +relation, is rooted in the empirical evidence from stellar kinematics in the solar neighborhood. The first such piece of evidence +is probably due to Merrill (1923), who pointed out that M-type +LPVs increasingly lag behind the local standard of rest (i.e., possess a higher asymmetric drift) as their period decreases. Later +studies (as summarized by Wyatt & Cahn 1983) confirmed this +behavior (also using proper motion data, e.g., Wilson & Merrill 1942), and showed that the shorter periods are also accompanied by a higher velocity dispersion. Furthermore, groups of +LPVs with relatively short periods are characterized by a greater +scale height above the Galactic plane. This was shown, using for +? Corresponding author: M. Trabucchi +(michele.trabucchi@unige.ch) +the first time the radial velocity of LPVs in the southern hemisphere, by Feast (1963). In this seminal paper, Feast realized +that LPVs with shorter periods must be members of older stellar +populations and emphasized their highly promising applications +for both Galactic and extra-galactic studies over a wide range +of stellar ages. It should be noted that the PA relation is connected with the existence of a period-metallicity relation (Lloyd +Evans & Menzies 1973; Lloyd Evans 1983b; Feast 1981; Feast +& Whitelock 2000a, and references therein). +A number of subsequent works have corroborated the PA +relation on empirical grounds, or have exploited it to interpret +observational results. Relevant examples are studies of LPVs in +globular clusters (e.g., Feast 1966; Lloyd Evans 1983b; Whitelock 1986), toward the galactic center and bulge (Lloyd Evans +1976; Feast et al. 1980; Whitelock et al. 1991) or at high galactic +latitude (Jura & Kleinmann 1992; Whitelock et al. 1994). Of particular interest is the recent effort to extend the analysis of LPVs +to dwarf galaxies in the Local Group (Menzies et al. 2002, 2008; +Whitelock et al. 2009; Menzies et al. 2010, 2011; Sakamoto et al. +2012; Battinelli & Demers 2012, 2013; Whitelock et al. 2013; +Menzies et al. 2015). +The Hipparcos mission provided the means to refine the results on the period-kinematics connection. This was done by +Feast & Whitelock (2000b), who found evidence supporting the +existence of a bar-like structure in the Bulge from the orbits of +local LPVs. A similar study dedicated to C-rich LPVs was performed by Feast et al. (2006), who provided quantitative age +estimates for these stars. A summary of the main results and +prospects emerging from these Hipparcos-era studies is given by +Article number, page 1 of 9 arXiv:2201.00201v2 [astro-ph.SR] 17 Jan 2022 -A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs -Feast (2007). More recently, the study of the Galaxy with LPVs -has been stimulated by the wealth of data acquired by large-scale -surveys (e.g., Catchpole et al. 2016; Urago et al. 2020), espe￾cially the Gaia mission (Grady et al. 2019, 2020). -It seems relevant that just a few years after the study of Feast -(1963), Kippenhahn & Smith (1969) predicted the PA relation -of classical Cepheids from stellar evolution and pulsation mod￾els. The theoretical modeling of Cepheids and of their period￾luminosity (PL) and PA relations is now an active field of re￾search (e.g., Bono et al. 2005; Anderson et al. 2016; De Somma -et al. 2020). In contrast, when it comes to theoretical assessments -of the LPV PA relation, the literature is surprisingly scarce (espe￾cially in comparison with the significant effort put into empirical -studies). In fact, we were able to identify only two relevant stud￾ies addressing this subject (Wyatt & Cahn 1983; Eggen 1998). -The discrepancy in period predictions between linear and nonlin￾ear pulsation models (e.g., Ya’Ari & Tuchman 1996; Lebzelter -& Wood 2005; Trabucchi et al. 2021b), and more generally the -difficulty in modeling the structure of evolved red giants, likely -played a role in hampering the theoretical investigation of the PA -relation of LPVs. -Motivated by the release of updated AGB evolutionary mod￾els (Pastorelli et al. 2019, 2020) and the availability of new, ac￾curate model predictions for the FM period of AGB stars (Tra￾bucchi et al. 2019, 2021b), we decided to investigate the nature -of the PA relation of LPVs on theoretical grounds. The adopted -models and observed data are described in Sect. 2, while in -Sect. 3 we present the results, which are discussed in Sect. 4. -We summarize our conclusions in Sect. 5. -2. Methods -2.1. Models -We employed PARSEC-COLIBRI isochrones (Marigo et al. -2017) with stellar evolutionary models from Pastorelli et al. -(2019, 2020) for the thermally pulsing asymptotic giant branch -(TP-AGB) phase, and from PARSEC (Bressan et al. 2012, ver￾sion 1.2S) for the preceding evolution. The adopted set of -isochrones covers the range 0.001 to 0.016 in initial metal￾licity (Zi), with a 0.001 step, while it spans the age interval -8.00 ≤ log(τ/yr) ≤ 10.45 with a step of 0.05. Since the AGB -phase is short-lived, it only spans a small range of initial masses -for each given isochrone, of order of 10−2 M at most. -The adopted isochrones include linear pulsation periods from -Trabucchi et al. (2019) for overtone modes and nonlinear periods -computed with the period-mass-radius relation from Trabucchi -et al. (2021b) for the FM1 -. Pulsation properties were computed -along both the early-AGB and the TP-AGB. We did not extend -our analysis to red supergiant stars as the pulsation prescription -we employed are strictly valid only below 7 M . -We recall that, with the adopted nonlinear relation, the period -increases with radius (R) as a broken power law, whose exponent -decreases as soon as the “bending radius” Rb is exceeded, it and -becomes zero when the “saturation radius” Rs > Rb is reached -(i.e., the period becomes independent of radius). The exact val￾ues of Rb and Rs -, as well as of the exponents, depend on the -current mass (M). We assume that the FM is dominant if the -stellar radius is larger than the critical value Rdom,0, which we -computed from the current stellar mass using Eq. 4 of Trabucchi -et al. (2021b). -1 Hereinafter, whenever we discuss periods, it should be understood -that we refer to FM periods on which this work is focused. -2.2. Data -As a first set of data, we considered the cluster-LPV pairs used -by Grady et al. (2019, see their tables 1 and 2). These consist of -19 clusters in the Large Magellanic Cloud, hosting a total of 20 -potential LPV members, and eight Galactic clusters each hosting -a potential LPV member. -We expanded this list with data for LPVs in a few populous -clusters, namely the Galactic clusters NGC 362, NGC 2808, 47 -Tuc (NGC 104), and ω Cen (NGC 5139); the LMC clusters NGC -1978 and NGC 1846; and the cluster NGC 419 in the Small Mag￾ellanic Cloud (SMC). The source lists were taken from Lebzel￾ter & Wood (2005, 2007, 2011, 2016) and Kamath et al. (2010), -whose notation for the sources names is adopted here. After ex￾cluding the star LW3 in NGC 1846 and the star V129 in ω Cen, -which are unlikely cluster members (cf. Lebzelter & Wood 2007, -2016), we reached a total of 203 sources. -The aforementioned studies also provide a lot of informa￾tion, possibly including JHK photometry, one or more periods, -and a spectral type. In order to expand on the available data, -we crossmatched the selected sample with the Two Micron All￾Sky Survey (2MASS, Skrutskie et al. 2006), the all-sky data -release of the Wide-field Infrared Survey Explorer (AllWISE, -Cutri et al. 2013), the catalog of variable stars from the All￾Sky Automated Survey for SuperNovae (ASAS-SN Jayasinghe -et al. 2020), the catalogs of LPVs in the Magellanic Clouds from -the third phase of the Optical Gravitational Lensing Experiment -(OGLE-III, Soszynski et al. ´ 2009, 2011), the early third data re￾lease from the Gaia mission (Gaia EDR3, Gaia Collaboration -et al. 2021), and the catalog of LPV candidates from Gaia DR2 -(Mowlavi et al. 2018). -Following Grady et al. (2019), we took ages from -Kharchenko et al. (2016) and Baumgardt et al. (2013) for clusters -in the Galaxy and LMC, respectively, thereby ensuring that ages -would be homogeneously derived for clusters in both galaxies. -Age uncertainties from Baumgardt et al. (2013), provided for -each cluster, are generally around σlog(τ) ' 0.05. Kharchenko -et al. (2016) do not provide age uncertainties, but a reasonable -upper limit for their method should be σlog(τ) = 0.2 based on -the analysis of Kharchenko et al. (2005) (the same value was -adopted by Grady et al. 2019, in their Fig. 7). -As discussed by Kamath et al. (2010), the age of the SMC -cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is -consistent with the value τ = 1.45 ± 0.05 Gyr from Goudfrooij -et al. (2014), while it is as young as τ ' 0.89 ± 0.015 Gyr ac￾cording to Perren et al. (2017). Since an accurate estimate is not -necessary for our exploratory analysis, we took a rough average -and assumed log(τ/yr) = 9.1 ± 0.1. NGC 419 and NGC 1846 -likely exhibit TP-AGB boosting (Girardi et al. 2013). We note -that some clusters show multiple stellar populations, whose age -spread has been estimated in some cases (e.g., Mackey & Broby -Nielsen 2007; Joo & Lee 2013; Villanova et al. 2014) and is con￾sistent with the age uncertainties we adopted. -Distances of Galactic clusters were also taken from -Kharchenko et al. (2016), while for the Magellanic Clouds and -their clusters we adopted the distance moduli µLMC = 18.49 ± -0.09 mag and µSMC = 18.96 ± 0.02 mag from de Grijs et al. -(2017). We searched for data on interstellar extinction from sev￾eral literature works (e.g., Nayak et al. 2016; Kharchenko et al. -2016; Perren et al. 2017), all of which suggest that extinction -in the Ks filter is smaller than ∼ 0.1 mag for most of the clus￾ters we considered, and at most as large as ∼ 0.3 mag, which is -negligible for our purposes. +A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs +Feast (2007). More recently, the study of the Galaxy with LPVs +has been stimulated by the wealth of data acquired by large-scale +surveys (e.g., Catchpole et al. 2016; Urago et al. 2020), especially the Gaia mission (Grady et al. 2019, 2020). +It seems relevant that just a few years after the study of Feast +(1963), Kippenhahn & Smith (1969) predicted the PA relation +of classical Cepheids from stellar evolution and pulsation models. The theoretical modeling of Cepheids and of their periodluminosity (PL) and PA relations is now an active field of research (e.g., Bono et al. 2005; Anderson et al. 2016; De Somma +et al. 2020). In contrast, when it comes to theoretical assessments +of the LPV PA relation, the literature is surprisingly scarce (especially in comparison with the significant effort put into empirical +studies). In fact, we were able to identify only two relevant studies addressing this subject (Wyatt & Cahn 1983; Eggen 1998). +The discrepancy in period predictions between linear and nonlinear pulsation models (e.g., Ya’Ari & Tuchman 1996; Lebzelter +& Wood 2005; Trabucchi et al. 2021b), and more generally the +difficulty in modeling the structure of evolved red giants, likely +played a role in hampering the theoretical investigation of the PA +relation of LPVs. +Motivated by the release of updated AGB evolutionary models (Pastorelli et al. 2019, 2020) and the availability of new, accurate model predictions for the FM period of AGB stars (Trabucchi et al. 2019, 2021b), we decided to investigate the nature +of the PA relation of LPVs on theoretical grounds. The adopted +models and observed data are described in Sect. 2, while in +Sect. 3 we present the results, which are discussed in Sect. 4. +We summarize our conclusions in Sect. 5. +2. Methods +2.1. Models +We employed PARSEC-COLIBRI isochrones (Marigo et al. +2017) with stellar evolutionary models from Pastorelli et al. +(2019, 2020) for the thermally pulsing asymptotic giant branch +(TP-AGB) phase, and from PARSEC (Bressan et al. 2012, version 1.2S) for the preceding evolution. The adopted set of +isochrones covers the range 0.001 to 0.016 in initial metallicity (Zi), with a 0.001 step, while it spans the age interval +8.00 ≤ log(τ/yr) ≤ 10.45 with a step of 0.05. Since the AGB +phase is short-lived, it only spans a small range of initial masses +for each given isochrone, of order of 10−2 M at most. +The adopted isochrones include linear pulsation periods from +Trabucchi et al. (2019) for overtone modes and nonlinear periods +computed with the period-mass-radius relation from Trabucchi +et al. (2021b) for the FM1. Pulsation properties were computed +along both the early-AGB and the TP-AGB. We did not extend +our analysis to red supergiant stars as the pulsation prescription +we employed are strictly valid only below 7 M . +We recall that, with the adopted nonlinear relation, the period +increases with radius (R) as a broken power law, whose exponent +decreases as soon as the “bending radius” Rb is exceeded, it and +becomes zero when the “saturation radius” Rs > Rb is reached +(i.e., the period becomes independent of radius). The exact values of Rb and Rs +, as well as of the exponents, depend on the +current mass (M). We assume that the FM is dominant if the +stellar radius is larger than the critical value Rdom,0, which we +computed from the current stellar mass using Eq. 4 of Trabucchi +et al. (2021b). +1 Hereinafter, whenever we discuss periods, it should be understood +that we refer to FM periods on which this work is focused. +2.2. Data +As a first set of data, we considered the cluster-LPV pairs used +by Grady et al. (2019, see their tables 1 and 2). These consist of +19 clusters in the Large Magellanic Cloud, hosting a total of 20 +potential LPV members, and eight Galactic clusters each hosting +a potential LPV member. +We expanded this list with data for LPVs in a few populous +clusters, namely the Galactic clusters NGC 362, NGC 2808, 47 +Tuc (NGC 104), and ω Cen (NGC 5139); the LMC clusters NGC +1978 and NGC 1846; and the cluster NGC 419 in the Small Magellanic Cloud (SMC). The source lists were taken from Lebzelter & Wood (2005, 2007, 2011, 2016) and Kamath et al. (2010), +whose notation for the sources names is adopted here. After excluding the star LW3 in NGC 1846 and the star V129 in ω Cen, +which are unlikely cluster members (cf. Lebzelter & Wood 2007, +2016), we reached a total of 203 sources. +The aforementioned studies also provide a lot of information, possibly including JHK photometry, one or more periods, +and a spectral type. In order to expand on the available data, +we crossmatched the selected sample with the Two Micron AllSky Survey (2MASS, Skrutskie et al. 2006), the all-sky data +release of the Wide-field Infrared Survey Explorer (AllWISE, +Cutri et al. 2013), the catalog of variable stars from the AllSky Automated Survey for SuperNovae (ASAS-SN Jayasinghe +et al. 2020), the catalogs of LPVs in the Magellanic Clouds from +the third phase of the Optical Gravitational Lensing Experiment +(OGLE-III, Soszynski et al. ´ 2009, 2011), the early third data release from the Gaia mission (Gaia EDR3, Gaia Collaboration +et al. 2021), and the catalog of LPV candidates from Gaia DR2 +(Mowlavi et al. 2018). +Following Grady et al. (2019), we took ages from +Kharchenko et al. (2016) and Baumgardt et al. (2013) for clusters +in the Galaxy and LMC, respectively, thereby ensuring that ages +would be homogeneously derived for clusters in both galaxies. +Age uncertainties from Baumgardt et al. (2013), provided for +each cluster, are generally around σlog(τ) ' 0.05. Kharchenko +et al. (2016) do not provide age uncertainties, but a reasonable +upper limit for their method should be σlog(τ) = 0.2 based on +the analysis of Kharchenko et al. (2005) (the same value was +adopted by Grady et al. 2019, in their Fig. 7). +As discussed by Kamath et al. (2010), the age of the SMC +cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is +consistent with the value τ = 1.45 ± 0.05 Gyr from Goudfrooij +et al. (2014), while it is as young as τ ' 0.89 ± 0.015 Gyr according to Perren et al. (2017). Since an accurate estimate is not +necessary for our exploratory analysis, we took a rough average +and assumed log(τ/yr) = 9.1 ± 0.1. NGC 419 and NGC 1846 +likely exhibit TP-AGB boosting (Girardi et al. 2013). We note +that some clusters show multiple stellar populations, whose age +spread has been estimated in some cases (e.g., Mackey & Broby +Nielsen 2007; Joo & Lee 2013; Villanova et al. 2014) and is consistent with the age uncertainties we adopted. +Distances of Galactic clusters were also taken from +Kharchenko et al. (2016), while for the Magellanic Clouds and +their clusters we adopted the distance moduli µLMC = 18.49 ± +0.09 mag and µSMC = 18.96 ± 0.02 mag from de Grijs et al. +(2017). We searched for data on interstellar extinction from several literature works (e.g., Nayak et al. 2016; Kharchenko et al. +2016; Perren et al. 2017), all of which suggest that extinction +in the Ks filter is smaller than ∼ 0.1 mag for most of the clusters we considered, and at most as large as ∼ 0.3 mag, which is +negligible for our purposes. Article number, page 2 of 9 -Trabucchi et al.: The period-age relation of LPVs -A detailed membership verification is beyond the scope of -this work, and we relied on the checks performed by authors -whose source lists we adopted. It should be kept in mind that -some sources may not be real cluster members. -For sources without a spectral type, we used the Gaia￾2MASS diagram (Lebzelter et al. 2018, 2019) to determine -whether they are O- or C-rich. We used the near-infrared period￾luminosity diagram to identify the most likely pulsation mode -associated with each period of each observed source. We se￾lected only FM periods and rejected long secondary periods and -periods attributed to overtone mode pulsation. The details of -these classification steps are provided in Appendix A. Out of -203 sources from the initial list, we identified 95 LPVs pulsat￾ing in the FM, consisting of 40 C-rich and 55 O-rich sources. -They consist of 29 Miras, 33 semi-regular variables, and 33 other -sources (most likely LPVs) whose variability type has not been -determined. We note that, with the exception of Gaia DR2, the -sources of variability data considered here do not report the un￾certainty associated with observed periods. However, since peri￾ods were derived in most cases from well-sampled, high-quality -variability observations, relative period uncertainties are most -likely negligible compared with those associated with age. -3. Results -Panel (a) of Fig. 1 shows a comparison between model predic￾tions and observations in the PFM–log(τ/yr) plane. The former -are displayed by a density map showing the expected number -NFM of LPVs pulsating in the FM in each period-age bin, nor￾malized to maximum. Model predictions are in good agreement -with data derived from observations (i.e., individual LPVs in -clusters, represented by symbols), and they show that the pe￾riod of LPVs pulsating in the FM decreases with increasing age. -Crosses mark the average properties of the three groups of C￾rich LPVs from Feast et al. (2006, their table 4), which fit the -general pattern with the exception of their group 3, estimated to -be older than what our models predict at P ' 650. -We also show a linear best-fit to the models distribution -(weighted by NFM), which shows a fairly good agreement with -the best-fit to observations by Grady et al. (2019, also shown). -However, the best-fit line does not fully capture the properties -of the predictions, nor of the observed trend. Indeed, models are -indicative of a substantial dispersion around the relation. For in￾stance, at 1 Gyr, the FM period ranges from ∼ 200 days to ∼ 550 -days. Conversely, LPVs pulsating in the FM with a period of 350 -days are predicted to be at least ∼200 Myr old, but they can be as -old as ∼3 Gyr. Observed data are consistent with the predicted -spread, although the agreement cannot be considered as the ob￾served sample adopted is not complete. -Nonetheless, it is relevant that some clusters host multiple -LPVs, which are thus almost coeval, and they do span a wide -period range. Some of these clusters host multiple stellar popu￾lations that are believed to have formed over a time comparable -with the age uncertainties we adopted. This means that longer￾period (more massive) LPVs in these clusters probably lean to￾ward the lower age limit assumed for their host cluster, and the -opposite is true at shorter periods. This tends to strengthen the -agreement between models and observations. -Our data set samples the intermediate-age range (NGC 419 -and NGC 1846) relatively well as well as old ages (ω Cen, 47 -Tuc, NGC 362, and NGC 2808). This provides us with the op￾portunity to study the period distribution at these ages, and for -a more detailed comparison between models and observations. -On the basis of the average age of these two groups of clus￾ters and the associated uncertainty, and taking the discrete age -sampling of the isochrones into account, we considered the age -ranges log(τ/yr) = 9.15±0.10 and log(τ/yr) = 10.10±0.20. Pe￾riod distributions at those ages are displayed in panels (b) and (c) -of Fig. 1, respectively, showing good agreement between model -predictions and observations. We note that in both cases, the dis￾tribution is skewed toward short periods, which seems to be true -at all ages for O-rich stars. This can be seen in panel (a) of Fig. 2, -which is a version of the PA plane limited to an O-rich compo￾sition2 -. Indeed, although at τ . 5 Gyr the observed sample is -very scarce, it appears to be consistent with models predicting a -more densely populated region in the shorter-period half of the -PA distribution. -The case of C-stars, shown in panel (b) of Fig. 2, is differ￾ent. They only form over a restricted range of initial masses -and ages, so their occurrence in a given stellar population is an -age indicator on its own. Toward the low-mass (old age) side -of the C-star regime, the behavior is similar to the O-rich case -with a concentration around relatively short periods. C-rich mod￾els tend to have a lower surface temperature and larger radii, -at a given mass, compared to O-rich models, and thus they at￾tain longer periods more easily. This occurs in particular toward -higher masses, so that younger C-rich models are more concen￾trated at longer periods, leading to a steeper PA relation com￾pared with the O-rich case. These predictions agree with ob￾servations on the old side of the period distribution, while the -scarcity of C stars at τ ' 0.6 Gyr prevents us from performing a -comparison at younger ages. -In appendix B, we provide analytic PA relations by fitting the -high-density parts of the O- and C-rich models’ distribution. We -emphasize that, because of the large scatter of the relation, ages -estimated in this way for individual LPVs are bound to be highly -uncertain. As a way to assess the error in age determination, we -also provide analytic best-fit relations to the boundaries of the -PA distribution of the models in the appendix. These relations -are displayed in Fig. 2. -4. Discussion -In general agreement with observations, models confirm that -LPVs pulsating predominantly in the FM follow a PA relation, -which exhibits a non-negligible dispersion. Thanks to the newly -available nonlinear period predictions, we were able to better ex￾amine the nature of this relation and the origin of its scatter. -The PA relation is intimately connected with the PL relation, -both patterns emerging because of the prominent role of mass in -shaping stellar structure and evolution. Indeed, stellar mass de￾termines the lifetimes of the main evolutionary stages, and thus -the age of stars in the AGB phase. Pulsation models (Trabuc￾chi et al. 2021b) show that the radius Rdom,0 (and correspond￾ing luminosity) at the onset of dominant FM pulsation (DFMP) -increases with mass, so that the most massive FM-dominated -LPVs are brighter. They also have longer periods, as this in￾creases with radius. In other words, the period, luminosity, and -age near the tip of the AGB are all functions of initial stellar -mass (at least to a good approximation). -We note that this would not be the case if the FM were dom￾inant along the entire AGB, as the large change in radius during -this phase would result in a wide range of periods at a given age. -It is the very fact that DFMP occurs only during the final portion -2 A further version of the PA plane highlighting both chemical types -can be found in Fig. A.2 of appendix A.1. +Trabucchi et al.: The period-age relation of LPVs +A detailed membership verification is beyond the scope of +this work, and we relied on the checks performed by authors +whose source lists we adopted. It should be kept in mind that +some sources may not be real cluster members. +For sources without a spectral type, we used the Gaia2MASS diagram (Lebzelter et al. 2018, 2019) to determine +whether they are O- or C-rich. We used the near-infrared periodluminosity diagram to identify the most likely pulsation mode +associated with each period of each observed source. We selected only FM periods and rejected long secondary periods and +periods attributed to overtone mode pulsation. The details of +these classification steps are provided in Appendix A. Out of +203 sources from the initial list, we identified 95 LPVs pulsating in the FM, consisting of 40 C-rich and 55 O-rich sources. +They consist of 29 Miras, 33 semi-regular variables, and 33 other +sources (most likely LPVs) whose variability type has not been +determined. We note that, with the exception of Gaia DR2, the +sources of variability data considered here do not report the uncertainty associated with observed periods. However, since periods were derived in most cases from well-sampled, high-quality +variability observations, relative period uncertainties are most +likely negligible compared with those associated with age. +3. Results +Panel (a) of Fig. 1 shows a comparison between model predictions and observations in the PFM–log(τ/yr) plane. The former +are displayed by a density map showing the expected number +NFM of LPVs pulsating in the FM in each period-age bin, normalized to maximum. Model predictions are in good agreement +with data derived from observations (i.e., individual LPVs in +clusters, represented by symbols), and they show that the period of LPVs pulsating in the FM decreases with increasing age. +Crosses mark the average properties of the three groups of Crich LPVs from Feast et al. (2006, their table 4), which fit the +general pattern with the exception of their group 3, estimated to +be older than what our models predict at P ' 650. +We also show a linear best-fit to the models distribution +(weighted by NFM), which shows a fairly good agreement with +the best-fit to observations by Grady et al. (2019, also shown). +However, the best-fit line does not fully capture the properties +of the predictions, nor of the observed trend. Indeed, models are +indicative of a substantial dispersion around the relation. For instance, at 1 Gyr, the FM period ranges from ∼ 200 days to ∼ 550 +days. Conversely, LPVs pulsating in the FM with a period of 350 +days are predicted to be at least ∼200 Myr old, but they can be as +old as ∼3 Gyr. Observed data are consistent with the predicted +spread, although the agreement cannot be considered as the observed sample adopted is not complete. +Nonetheless, it is relevant that some clusters host multiple +LPVs, which are thus almost coeval, and they do span a wide +period range. Some of these clusters host multiple stellar populations that are believed to have formed over a time comparable +with the age uncertainties we adopted. This means that longerperiod (more massive) LPVs in these clusters probably lean toward the lower age limit assumed for their host cluster, and the +opposite is true at shorter periods. This tends to strengthen the +agreement between models and observations. +Our data set samples the intermediate-age range (NGC 419 +and NGC 1846) relatively well as well as old ages (ω Cen, 47 +Tuc, NGC 362, and NGC 2808). This provides us with the opportunity to study the period distribution at these ages, and for +a more detailed comparison between models and observations. +On the basis of the average age of these two groups of clusters and the associated uncertainty, and taking the discrete age +sampling of the isochrones into account, we considered the age +ranges log(τ/yr) = 9.15±0.10 and log(τ/yr) = 10.10±0.20. Period distributions at those ages are displayed in panels (b) and (c) +of Fig. 1, respectively, showing good agreement between model +predictions and observations. We note that in both cases, the distribution is skewed toward short periods, which seems to be true +at all ages for O-rich stars. This can be seen in panel (a) of Fig. 2, +which is a version of the PA plane limited to an O-rich composition2 +. Indeed, although at τ . 5 Gyr the observed sample is +very scarce, it appears to be consistent with models predicting a +more densely populated region in the shorter-period half of the +PA distribution. +The case of C-stars, shown in panel (b) of Fig. 2, is different. They only form over a restricted range of initial masses +and ages, so their occurrence in a given stellar population is an +age indicator on its own. Toward the low-mass (old age) side +of the C-star regime, the behavior is similar to the O-rich case +with a concentration around relatively short periods. C-rich models tend to have a lower surface temperature and larger radii, +at a given mass, compared to O-rich models, and thus they attain longer periods more easily. This occurs in particular toward +higher masses, so that younger C-rich models are more concentrated at longer periods, leading to a steeper PA relation compared with the O-rich case. These predictions agree with observations on the old side of the period distribution, while the +scarcity of C stars at τ ' 0.6 Gyr prevents us from performing a +comparison at younger ages. +In appendix B, we provide analytic PA relations by fitting the +high-density parts of the O- and C-rich models’ distribution. We +emphasize that, because of the large scatter of the relation, ages +estimated in this way for individual LPVs are bound to be highly +uncertain. As a way to assess the error in age determination, we +also provide analytic best-fit relations to the boundaries of the +PA distribution of the models in the appendix. These relations +are displayed in Fig. 2. +4. Discussion +In general agreement with observations, models confirm that +LPVs pulsating predominantly in the FM follow a PA relation, +which exhibits a non-negligible dispersion. Thanks to the newly +available nonlinear period predictions, we were able to better examine the nature of this relation and the origin of its scatter. +The PA relation is intimately connected with the PL relation, +both patterns emerging because of the prominent role of mass in +shaping stellar structure and evolution. Indeed, stellar mass determines the lifetimes of the main evolutionary stages, and thus +the age of stars in the AGB phase. Pulsation models (Trabucchi et al. 2021b) show that the radius Rdom,0 (and corresponding luminosity) at the onset of dominant FM pulsation (DFMP) +increases with mass, so that the most massive FM-dominated +LPVs are brighter. They also have longer periods, as this increases with radius. In other words, the period, luminosity, and +age near the tip of the AGB are all functions of initial stellar +mass (at least to a good approximation). +We note that this would not be the case if the FM were dominant along the entire AGB, as the large change in radius during +this phase would result in a wide range of periods at a given age. +It is the very fact that DFMP occurs only during the final portion +2 A further version of the PA plane highlighting both chemical types +can be found in Fig. A.2 of appendix A.1. Article number, page 3 of 9 -A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs -Fig. 1. Period-age diagram. Panel (a) shows the predicted period-age distribution (darker tones indicate a higher expected number of LPVs on -a linear scale, normalized to maximum). Symbols represent observed LPVs (green: SRVs; purple: Miras; white: unclassified) with the shape -indicating their host cluster or literature source as indicated in the legend. The age uncertainties are marked by the error bars. The groups of -galactic C-stars of Feast et al. (2006) are marked by crosses annotated with the group number. The solid and dotted line represent a linear best-fit -to models and the best-fit by Grady et al. (2019), respectively. Period distributions at selected ages are compared in panels (b) and (c) and marked -in panel (a) by the blue and red shaded areas (at log(τ/yr) ∼ 9.15 and ∼ 10.10, respectively). For clarity, the effect of the TP-AGB boosting is -suppressed in panel (a). -Fig. 2. Similar to Fig. 1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while -dashed lines are best fits to the edges of the model distribution (see the text for more details). -of the AGB that limits the range of periods a FM-pulsating LPV -can have at a given age. Yet, the DFMP part of the AGB is long -enough for significant variations in radius to occur, which result -in the dispersion of the PA relation seen in Fig. 1. -At a given initial metallicity Zi -, the shape of the period dis￾tribution primarily results from the fact that, throughout the TP￾AGB (the stage during which the FM is normally excited), the -envelope expansion accelerates, while the period becomes pro￾gressively less sensitive to changes in radius (see Appendix C). -In particular, the slope of the period-radius relation decreases -sharply at Pb = P(Rb). The FM period distribution is roughly -symmetric around that value, but at its short-period side, the FM -is not dominant. Therefore, when only FM-dominated LPVs are -considered, as is done here, the observed period distribution ap￾pears skewed toward short periods. -This feature is strengthened when a set of isochrones is con￾sidered which spans a range of initial metallicities because the -adopted criterion for the onset of DFMP does not depend on -metallicity, but the FM period does as metal-poor LPVs are -warmer and have smaller radii compared with metal-rich ones. -As a consequence, the bulk of the period distribution of metal￾poor LPVs is at periods shorter than Pb, so they only contribute -to the global distribution (i.e., at all Zi at a given age) over a -small period range at P & Pb. In contrast, metal-rich LPVs have -periods well beyond Pb, so they contribute both at that value and -at longer periods. The result is an excess of FM-dominated LPVs -near Pb, that is to say on the short side of the overall period dis￾tribution. -We note that, in contrast with the prescription we adopted, -the onset of DFMP in reality is probably sensitive to metallic￾Article number, page 4 of 9 -Trabucchi et al.: The period-age relation of LPVs -ity. While the good degree of agreement with observations sug￾gests that the dependence is weak at most, it is possible for -any discrepancy to be smeared out by the fact that our set of -isochrone implicitly assumes a flat star-formation rate with no -age-metallicity relation, so it is not an accurate representation of -any realistic stellar environment. In this sense, the PA relation is -environment-dependent, and it is not necessarily universal. -A further point of uncertainty stems from the fact that the -prescription we adopted assumes that the FM period only de￾pends upon the mass and radius, and that it is affected by a -change in composition only through the effect that such a varia￾tion has on the radius. While this is true to a good approximation, -linear models show a small dependence of periods on metallic￾ity at a fixed mass and radius, but the quantitative impact in the -nonlinear case is unknown. We can only estimate, based on the -results of Trabucchi et al. (2019), an uncertainty of ±10% at most -with respect to the prescriptions adopted here. -Qualitatively, a realistic age-metallicity relation and the -metallicity dependence of the period and of the onset of DFMP -are all expected to result in a steeper PA relation than the one -we predict, but it is difficult to assess the relative importance of -these effects. In this sense, the composition probably affects the -shape of the PA relation more than its dispersion. The latter is -likely affected by the composition indirectly through mass loss, -the analysis of which is beyond the scope of this study. How￾ever, we point out that mass loss represents a source of scatter in -combination with the occurrence of thermal pulses, because it re￾duces the minimum radius for the onset of DFMP. Thus, during -the luminosity dips associated with thermal pulses, a LPV can -have a period shorter than the one it had when it first entered the -DFMP regime (see Appendix C). An additional source of uncer￾tainty, which we disregarded, is rotation (or other processes that -induce extra mixing in the core) which causes a spread in ages -at a given initial mass (cf. Anderson et al. 2016, for the case of -classical Cepheids). -The fairly good agreement between models and observations -encourages the use of LPVs as age indicators, but the scatter of -the PA relation hampers this application. We attempted to reduce -the scatter through corrections involving photometric properties, -as is customarily done for classical Cepheids with a color term -(e.g., Bono et al. 2005), but with unsatisfactory results. A correc￾tion dependent on the photometric amplitude of variability rep￾resents a promising alternative, but it cannot be pursued at the -moment. Indeed, for computational efficiency, current pulsation -models include only a crude treatment of the atmospheric layers -as they do not affect pulsation periods. On the other hand, the -atmosphere is crucial in determining the spectral energy distri￾bution and its variation throughout the pulsation cycle, and hence -the amplitude of variability. At the same time, the observational -sample adopted here is too heterogeneous for a self-consistent -investigation of amplitude, but this kind of study could be made -possible by the upcoming data release 3 of the Gaia mission -(Gaia Collaboration et al. 2021) and the future Legacy Survey -of Space and Time (LSST, Ivezic et al. ´ 2019) of the Vera Rubin -Observatory. -It is worth noting that our analysis applies to Miras as well -as SRVs, provided that they predominantly pulsate in the FM. -The limitation of PA relation studies to Miras, as has mainly -been done in literature so far, undoubtedly has some advan￾tages: to begin with, the fact that Miras are typically easier to -detect than SRVs, and their light curves are easier to process -as they tend to be more regular. Moreover, Miras represent the -end-point of AGB evolution, so in principle they correspond to a -smaller range of stellar parameters compared to the full extent of -the DFMP regime, and they display a smaller range of periods -at a given age (cf. Feast & Whitelock 2000b). In other words, -they should exhibit a relatively narrow PA relation (even though, -based on the observational data set we adopted, there is no con￾clusive evidence that considering only Miras reduces the scatter -of the PA relation). -Nonetheless, we caution against this approach as it is prone -to introducing uncontrolled biases, as the traditional distinction -between SRVs and Miras is arbitrary (see Trabucchi et al. 2021a, -and references therein). As such, it disregards the physical pro￾cesses at the origin of the range of amplitudes characterizing -LPVs. In particular, photometric amplitudes are largely deter￾mined by the formation and dissociation of molecules in the stel￾lar atmosphere, and they are likely to be metallicity-dependent. -It is therefore reasonable to assume that metal-poor (old) Mira -analogs might be classified as SRVs, thereby undermining the -potential application of the PA relation if restricted to Miras. -This seems to be supported by the fact that the bulk of old LPVs -in our sample are classified as SRVs. Therefore, studies involv￾ing PA relations of LPVs would advantageously include both -Miras and FM-pulsating SRVs. -The challenge associated with SRVs stems from the fact that -they are often multiperiodic (even when predominantly pulsat￾ing in the FM), a property that complicates the light curve anal￾ysis and period extraction. At the same time, this feature could -potentially improve age determinations as overtone modes are -expected to display a PA relation as well. -5. Conclusions -We used the results from recent nonlinear pulsation calculations -and combined them with state-of-the-art isochrone models to in￾vestigate the PA relation of FM-dominated LPVs, finding good -agreement with the distribution of observed LPVs in star clus￾ters. The theoretical PA relation displays a non-negligible scat￾ter, whose origin we identified due to the fact that, despite being -very brief, the portion of AGB evolution during which the FM -becomes dominant shows a relatively large range in mass and -radius at a given age. -The theoretical distribution of FM periods is roughly sym￾metric, but the FM is not dominant at the shortest periods. As a -result, models predict that the distribution of dominant FM peri￾ods at a given age is skewed toward short periods, in agreement -with observations. Depending on stellar populations, metallicity -may enhance this feature as metal-poor LPVs, which tend to be -warmer and more compact, only contribute near short periods. -We provide the best-fit PA relation separately for O-rich and -C-rich FM-pulsating LPVs. The latter LPVs show a steeper PA -relation because of their lower surface temperatures, which allow -them to reach longer periods more easily. -Our analysis concerns all LPVs predominantly pulsating in -the FM, regardless of whether they are classified as Miras or -SRVs. We discourage such a distinction in that it is arbitrary and -prone to selection biases that risk compromising the use of LPVs -as age indicators. -The main limitation in the use of the PA relation for age de￾terminations of individual LPVs stems from its relatively large -scatter. We suggest that corrective terms, involving the ampli￾tude of variability, might help to reduce this scatter and antici￾pate that upcoming data from ongoing and future surveys dedi￾cated to time-domain astronomy will be highly valuable to probe -this possibility. A study of the impact of metallicity on nonlinear -pulsation is highly desirable to pursue this line of investigation, +A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs +Fig. 1. Period-age diagram. Panel (a) shows the predicted period-age distribution (darker tones indicate a higher expected number of LPVs on +a linear scale, normalized to maximum). Symbols represent observed LPVs (green: SRVs; purple: Miras; white: unclassified) with the shape +indicating their host cluster or literature source as indicated in the legend. The age uncertainties are marked by the error bars. The groups of +galactic C-stars of Feast et al. (2006) are marked by crosses annotated with the group number. The solid and dotted line represent a linear best-fit +to models and the best-fit by Grady et al. (2019), respectively. Period distributions at selected ages are compared in panels (b) and (c) and marked +in panel (a) by the blue and red shaded areas (at log(τ/yr) ∼ 9.15 and ∼ 10.10, respectively). For clarity, the effect of the TP-AGB boosting is +suppressed in panel (a). +Fig. 2. Similar to Fig. 1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while +dashed lines are best fits to the edges of the model distribution (see the text for more details). +of the AGB that limits the range of periods a FM-pulsating LPV +can have at a given age. Yet, the DFMP part of the AGB is long +enough for significant variations in radius to occur, which result +in the dispersion of the PA relation seen in Fig. 1. +At a given initial metallicity Zi, the shape of the period distribution primarily results from the fact that, throughout the TPAGB (the stage during which the FM is normally excited), the +envelope expansion accelerates, while the period becomes progressively less sensitive to changes in radius (see Appendix C). +In particular, the slope of the period-radius relation decreases +sharply at Pb = P(Rb). The FM period distribution is roughly +symmetric around that value, but at its short-period side, the FM +is not dominant. Therefore, when only FM-dominated LPVs are +considered, as is done here, the observed period distribution appears skewed toward short periods. +This feature is strengthened when a set of isochrones is considered which spans a range of initial metallicities because the +adopted criterion for the onset of DFMP does not depend on +metallicity, but the FM period does as metal-poor LPVs are +warmer and have smaller radii compared with metal-rich ones. +As a consequence, the bulk of the period distribution of metalpoor LPVs is at periods shorter than Pb, so they only contribute +to the global distribution (i.e., at all Zi at a given age) over a +small period range at P & Pb. In contrast, metal-rich LPVs have +periods well beyond Pb, so they contribute both at that value and +at longer periods. The result is an excess of FM-dominated LPVs +near Pb, that is to say on the short side of the overall period distribution. +We note that, in contrast with the prescription we adopted, +the onset of DFMP in reality is probably sensitive to metallicArticle number, page 4 of 9 +Trabucchi et al.: The period-age relation of LPVs +ity. While the good degree of agreement with observations suggests that the dependence is weak at most, it is possible for +any discrepancy to be smeared out by the fact that our set of +isochrone implicitly assumes a flat star-formation rate with no +age-metallicity relation, so it is not an accurate representation of +any realistic stellar environment. In this sense, the PA relation is +environment-dependent, and it is not necessarily universal. +A further point of uncertainty stems from the fact that the +prescription we adopted assumes that the FM period only depends upon the mass and radius, and that it is affected by a +change in composition only through the effect that such a variation has on the radius. While this is true to a good approximation, +linear models show a small dependence of periods on metallicity at a fixed mass and radius, but the quantitative impact in the +nonlinear case is unknown. We can only estimate, based on the +results of Trabucchi et al. (2019), an uncertainty of ±10% at most +with respect to the prescriptions adopted here. +Qualitatively, a realistic age-metallicity relation and the +metallicity dependence of the period and of the onset of DFMP +are all expected to result in a steeper PA relation than the one +we predict, but it is difficult to assess the relative importance of +these effects. In this sense, the composition probably affects the +shape of the PA relation more than its dispersion. The latter is +likely affected by the composition indirectly through mass loss, +the analysis of which is beyond the scope of this study. However, we point out that mass loss represents a source of scatter in +combination with the occurrence of thermal pulses, because it reduces the minimum radius for the onset of DFMP. Thus, during +the luminosity dips associated with thermal pulses, a LPV can +have a period shorter than the one it had when it first entered the +DFMP regime (see Appendix C). An additional source of uncertainty, which we disregarded, is rotation (or other processes that +induce extra mixing in the core) which causes a spread in ages +at a given initial mass (cf. Anderson et al. 2016, for the case of +classical Cepheids). +The fairly good agreement between models and observations +encourages the use of LPVs as age indicators, but the scatter of +the PA relation hampers this application. We attempted to reduce +the scatter through corrections involving photometric properties, +as is customarily done for classical Cepheids with a color term +(e.g., Bono et al. 2005), but with unsatisfactory results. A correction dependent on the photometric amplitude of variability represents a promising alternative, but it cannot be pursued at the +moment. Indeed, for computational efficiency, current pulsation +models include only a crude treatment of the atmospheric layers +as they do not affect pulsation periods. On the other hand, the +atmosphere is crucial in determining the spectral energy distribution and its variation throughout the pulsation cycle, and hence +the amplitude of variability. At the same time, the observational +sample adopted here is too heterogeneous for a self-consistent +investigation of amplitude, but this kind of study could be made +possible by the upcoming data release 3 of the Gaia mission +(Gaia Collaboration et al. 2021) and the future Legacy Survey +of Space and Time (LSST, Ivezic et al. ´ 2019) of the Vera Rubin +Observatory. +It is worth noting that our analysis applies to Miras as well +as SRVs, provided that they predominantly pulsate in the FM. +The limitation of PA relation studies to Miras, as has mainly +been done in literature so far, undoubtedly has some advantages: to begin with, the fact that Miras are typically easier to +detect than SRVs, and their light curves are easier to process +as they tend to be more regular. Moreover, Miras represent the +end-point of AGB evolution, so in principle they correspond to a +smaller range of stellar parameters compared to the full extent of +the DFMP regime, and they display a smaller range of periods +at a given age (cf. Feast & Whitelock 2000b). In other words, +they should exhibit a relatively narrow PA relation (even though, +based on the observational data set we adopted, there is no conclusive evidence that considering only Miras reduces the scatter +of the PA relation). +Nonetheless, we caution against this approach as it is prone +to introducing uncontrolled biases, as the traditional distinction +between SRVs and Miras is arbitrary (see Trabucchi et al. 2021a, +and references therein). As such, it disregards the physical processes at the origin of the range of amplitudes characterizing +LPVs. In particular, photometric amplitudes are largely determined by the formation and dissociation of molecules in the stellar atmosphere, and they are likely to be metallicity-dependent. +It is therefore reasonable to assume that metal-poor (old) Mira +analogs might be classified as SRVs, thereby undermining the +potential application of the PA relation if restricted to Miras. +This seems to be supported by the fact that the bulk of old LPVs +in our sample are classified as SRVs. Therefore, studies involving PA relations of LPVs would advantageously include both +Miras and FM-pulsating SRVs. +The challenge associated with SRVs stems from the fact that +they are often multiperiodic (even when predominantly pulsating in the FM), a property that complicates the light curve analysis and period extraction. At the same time, this feature could +potentially improve age determinations as overtone modes are +expected to display a PA relation as well. +5. Conclusions +We used the results from recent nonlinear pulsation calculations +and combined them with state-of-the-art isochrone models to investigate the PA relation of FM-dominated LPVs, finding good +agreement with the distribution of observed LPVs in star clusters. The theoretical PA relation displays a non-negligible scatter, whose origin we identified due to the fact that, despite being +very brief, the portion of AGB evolution during which the FM +becomes dominant shows a relatively large range in mass and +radius at a given age. +The theoretical distribution of FM periods is roughly symmetric, but the FM is not dominant at the shortest periods. As a +result, models predict that the distribution of dominant FM periods at a given age is skewed toward short periods, in agreement +with observations. Depending on stellar populations, metallicity +may enhance this feature as metal-poor LPVs, which tend to be +warmer and more compact, only contribute near short periods. +We provide the best-fit PA relation separately for O-rich and +C-rich FM-pulsating LPVs. The latter LPVs show a steeper PA +relation because of their lower surface temperatures, which allow +them to reach longer periods more easily. +Our analysis concerns all LPVs predominantly pulsating in +the FM, regardless of whether they are classified as Miras or +SRVs. We discourage such a distinction in that it is arbitrary and +prone to selection biases that risk compromising the use of LPVs +as age indicators. +The main limitation in the use of the PA relation for age determinations of individual LPVs stems from its relatively large +scatter. We suggest that corrective terms, involving the amplitude of variability, might help to reduce this scatter and anticipate that upcoming data from ongoing and future surveys dedicated to time-domain astronomy will be highly valuable to probe +this possibility. A study of the impact of metallicity on nonlinear +pulsation is highly desirable to pursue this line of investigation, Article number, page 5 of 9 -A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs -as would be a theoretical investigation of the dependence of pho￾tometric amplitudes upon global stellar parameters. -Acknowledgements. M.T. and N.M. acknowledge the support provided by the -Swiss National Science Foundation through grant Nr. 188697. We are grateful -to the anonymous referee for the constructive comments that helped improving -this paper, and to Léo Girardi for helping with the computation and interpre￾tation of isochrones. This research has made use of: data from the OGLE-III -Catalog of Variable Stars; data products from the Two Micron All Sky Sur￾vey, which is a joint project of the University of Massachusetts and the In￾frared Processing and Analysis Center/California Institute of Technology, funded -by the National Aeronautics and Space Administration and the National Sci￾ence Foundation; data from the European Space Agency (ESA) mission Gaia -(https://www.cosmos.esa.int/gaia), processed by the Gaia Data Process￾ing and Analysis Consortium (DPAC, https://www.cosmos.esa.int/web/ -gaia/dpac/consortium). Funding for the DPAC has been provided by na￾tional institutions, in particular the institutions participating in the Gaia Multi￾lateral Agreement. This research has made use of the following free/open source -software and/or libraries: the Starlink Tables Infrastructure Library (STILTS and -Topcat, Taylor 2006); IPython (Pérez & Granger 2007) and Jupyter (Kluyver -et al. 2016) notebooks; the Python libraries NumPy (Harris et al. 2020), SciPy -(Virtanen et al. 2020), matplotlib (a Python library for publication quality graph￾ics, Hunter 2007), and Astropy (a community-developed core Python package -for Astronomy, Astropy Collaboration et al. 2018). This research has made use of -NASA’s Astrophysics Data System Bibliographic Services, and of the following -services provided by CDS, Strasbourg: the SIMBAD data base, VizieR catalogue -access tool (DOI: 10.26093/cds/vizier, Ochsenbein et al. 2000), the “Aladin sky -atlas” (Bonnarel et al. 2000), and the cross-match service (Boch et al. 2012; -Pineau et al. 2020). -References -Anderson, R. I., Saio, H., Ekström, S., Georgy, C., & Meynet, G. 2016, A&A, -591, A8 -Astropy Collaboration, Price-Whelan, A. M., Sipocz, B. M., et al. 2018, AJ, 156, ˝ -123 -Battinelli, P. & Demers, S. 2012, A&A, 544, A10 -Battinelli, P. & Demers, S. 2013, A&A, 553, A93 -Baumgardt, H., Parmentier, G., Anders, P., & Grebel, E. K. 2013, MNRAS, 430, -676 -Boch, T., Pineau, F., & Derriere, S. 2012, in Astronomical Society of the Pa￾cific Conference Series, Vol. 461, Astronomical Data Analysis Software and -Systems XXI, ed. P. Ballester, D. Egret, & N. P. F. Lorente, 291 -Bonnarel, F., Fernique, P., Bienaymé, O., et al. 2000, A&AS, 143, 33 -Bono, G., Marconi, M., Cassisi, S., et al. 2005, ApJ, 621, 966 -Bressan, A., Marigo, P., Girardi, L., et al. 2012, MNRAS, 427, 127 -Catchpole, R. M., Whitelock, P. A., Feast, M. W., et al. 2016, MNRAS, 455, -2216 -Cutri, R. M., Wright, E. L., Conrow, T., et al. 2013, Explanatory Supplement -to the AllWISE Data Release Products, Explanatory Supplement to the All￾WISE Data Release Products -de Grijs, R., Courbin, F., Martínez-Vázquez, C. E., et al. 2017, Space Sci. Rev., -212, 1743 -De Somma, G., Marconi, M., Cassisi, S., et al. 2020, MNRAS, 496, 5039 -Eggen, O. J. 1998, AJ, 115, 2435 -Feast, M. 2007, in Astronomical Society of the Pacific Conference Series, Vol. -378, Why Galaxies Care About AGB Stars: Their Importance as Actors and -Probes, ed. F. Kerschbaum, C. Charbonnel, & R. F. Wing, 479 -Feast, M. & Whitelock, P. 2000a, in Astrophysics and Space Science Library, -Vol. 255, Astrophysics and Space Science Library, ed. F. Matteucci & F. Gio￾vannelli, 229 -Feast, M. W. 1963, MNRAS, 125, 367 -Feast, M. W. 1966, The Observatory, 86, 120 -Feast, M. W. 1981, in Astrophysics and Space Science Library, Vol. 88, Physical -Processes in Red Giants, ed. J. Iben, I. & A. Renzini, 193–204 -Feast, M. W., Robertson, B. S. C., & Black, C. 1980, MNRAS, 190, 227 -Feast, M. W. & Whitelock, P. A. 2000b, MNRAS, 317, 460 -Feast, M. W., Whitelock, P. A., & Menzies, J. W. 2006, MNRAS, 369, 791 -Gaia Collaboration, Brown, A. G. A., Vallenari, A., et al. 2021, A&A, 649, A1 -Girardi, L., Marigo, P., Bressan, A., & Rosenfield, P. 2013, ApJ, 777, 142 -Goudfrooij, P., Girardi, L., Kozhurina-Platais, V., et al. 2014, ApJ, 797, 35 -Grady, J., Belokurov, V., & Evans, N. W. 2019, MNRAS, 483, 3022 -Grady, J., Belokurov, V., & Evans, N. W. 2020, MNRAS, 492, 3128 -Harris, C. R., Millman, K. J., van der Walt, S. J., et al. 2020, Nature, 585, 357 -Hunter, J. D. 2007, Computing in Science & Engineering, 9, 90 -Ivezic, Ž., Kahn, S. M., Tyson, J. A., et al. 2019, ApJ, 873, 111 ´ -Jayasinghe, T., Stanek, K. Z., Kochanek, C. S., et al. 2020, MNRAS, 491, 13 -Joo, S.-J. & Lee, Y.-W. 2013, ApJ, 762, 36 -Jura, M. & Kleinmann, S. G. 1992, ApJS, 79, 105 -Kamath, D., Wood, P. R., Soszynski, I., & Lebzelter, T. 2010, MNRAS, 408, 522 ´ -Kharchenko, N. V., Piskunov, A. E., Röser, S., Schilbach, E., & Scholz, R. D. -2005, A&A, 438, 1163 -Kharchenko, N. V., Piskunov, A. E., Schilbach, E., Röser, S., & Scholz, R. D. -2016, A&A, 585, A101 -Kippenhahn, R. & Smith, L. 1969, A&A, 1, 142 -Kluyver, T., Ragan-Kelley, B., Pérez, F., et al. 2016, in Positioning and Power -in Academic Publishing: Players, Agents and Agendas, ed. F. Loizides & -B. Scmidt (Netherlands: IOS Press), 87–90 -Lebzelter, T., Mowlavi, N., Marigo, P., et al. 2018, A&A, 616, L13 -Lebzelter, T., Trabucchi, M., Mowlavi, N., et al. 2019, A&A, 631, A24 -Lebzelter, T. & Wood, P. R. 2005, A&A, 441, 1117 -Lebzelter, T. & Wood, P. R. 2007, A&A, 475, 643 -Lebzelter, T. & Wood, P. R. 2011, A&A, 529, A137 -Lebzelter, T. & Wood, P. R. 2016, A&A, 585, A111 -Lloyd Evans, T. 1976, MNRAS, 174, 169 -Lloyd Evans, T. 1983a, MNRAS, 204, 985 -Lloyd Evans, T. 1983b, MNRAS, 204, 961 -Lloyd Evans, T. & Menzies, J. W. 1973, in Astrophysics and Space Science Li￾brary, Vol. 36, IAU Colloq. 21: Variable Stars in Globular Clusters and in -Related Systems, ed. J. D. Fernie, 151 -Mackey, A. D. & Broby Nielsen, P. 2007, MNRAS, 379, 151 -Marigo, P., Girardi, L., Bressan, A., et al. 2017, ApJ, 835, 77 -Menzies, J., Feast, M., Tanabé, T., Whitelock, P., & Nakada, Y. 2002, MNRAS, -335, 923 -Menzies, J., Feast, M., Whitelock, P., et al. 2008, MNRAS, 385, 1045 -Menzies, J. W., Feast, M. W., Whitelock, P. A., & Matsunaga, N. 2011, MNRAS, -414, 3492 -Menzies, J. W., Whitelock, P. A., & Feast, M. W. 2015, MNRAS, 452, 910 -Menzies, J. W., Whitelock, P. A., Feast, M. W., & Matsunaga, N. 2010, MNRAS, -406, 86 -Merrill, P. W. 1923, ApJ, 58, 215 -Mowlavi, N., Lecoeur-Taïbi, I., Lebzelter, T., et al. 2018, A&A, 618, A58 -Nayak, P. K., Subramaniam, A., Choudhury, S., Indu, G., & Sagar, R. 2016, -MNRAS, 463, 1446 -Ochsenbein, F., Bauer, P., & Marcout, J. 2000, A&AS, 143, 23 -Pastorelli, G., Marigo, P., Girardi, L., et al. 2020, MNRAS, 498, 3283 -Pastorelli, G., Marigo, P., Girardi, L., et al. 2019, MNRAS, 485, 5666 -Pérez, F. & Granger, B. E. 2007, Computing in Science and Engineering, 9, 21 -Perren, G. I., Piatti, A. E., & Vázquez, R. A. 2017, A&A, 602, A89 -Pineau, F.-X., Boch, T., Derrière, S., & Schaaff, A. 2020, in Astronomical So￾ciety of the Pacific Conference Series, Vol. 522, Astronomical Data Analysis -Software and Systems XXVII, ed. P. Ballester, J. Ibsen, M. Solar, & K. Short￾ridge, 125 -Sakamoto, T., Matsunaga, N., Hasegawa, T., & Nakada, Y. 2012, ApJ, 761, L10 -Skrutskie, M. F., Cutri, R. M., Stiening, R., et al. 2006, AJ, 131, 1163 -Soszynski, I., Olechowska, A., Ratajczak, M., et al. 2021, ApJ, 911, L22 ´ -Soszynski, I., Udalski, A., Szyma ´ nski, M. K., et al. 2009, Acta Astron., 59, 239 ´ -Soszynski, I., Udalski, A., Szyma ´ nski, M. K., et al. 2011, Acta Astron., 61, 217 ´ -Taylor, M. B. 2006, in Astronomical Society of the Pacific Conference Se￾ries, Vol. 351, Astronomical Data Analysis Software and Systems XV, ed. -C. Gabriel, C. Arviset, D. Ponz, & S. Enrique, 666 -Trabucchi, M., Mowlavi, N., & Lebzelter, T. 2021a, A&A, 656, A66 -Trabucchi, M., Wood, P. R., Montalbán, J., et al. 2017, ApJ, 847, 139 -Trabucchi, M., Wood, P. R., Montalbán, J., et al. 2019, MNRAS, 482, 929 -Trabucchi, M., Wood, P. R., Mowlavi, N., et al. 2021b, MNRAS, 500, 1575 -Urago, R., Omodaka, T., Nagayama, T., et al. 2020, ApJ, 891, 50 -Villanova, S., Geisler, D., Gratton, R. G., & Cassisi, S. 2014, ApJ, 791, 107 -Virtanen, P., Gommers, R., Oliphant, T. E., et al. 2020, Nature Methods, 17, 261 -Wenger, M., Ochsenbein, F., Egret, D., et al. 2000, A&AS, 143, 9 -Whitelock, P., Feast, M., & Catchpole, R. 1991, MNRAS, 248, 276 -Whitelock, P., Menzies, J., Feast, M., et al. 1994, MNRAS, 267, 711 -Whitelock, P. A. 1986, MNRAS, 219, 525 -Whitelock, P. A., Menzies, J. W., Feast, M. W., et al. 2009, MNRAS, 394, 795 -Whitelock, P. A., Menzies, J. W., Feast, M. W., Nsengiyumva, F., & Matsunaga, -N. 2013, MNRAS, 428, 2216 -Wilson, R. E. & Merrill, P. W. 1942, ApJ, 95, 248 -Wyatt, S. P. & Cahn, J. H. 1983, ApJ, 275, 225 -Ya’Ari, A. & Tuchman, Y. 1996, ApJ, 456, 350 +A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs +as would be a theoretical investigation of the dependence of photometric amplitudes upon global stellar parameters. +Acknowledgements. M.T. and N.M. acknowledge the support provided by the +Swiss National Science Foundation through grant Nr. 188697. We are grateful +to the anonymous referee for the constructive comments that helped improving +this paper, and to Léo Girardi for helping with the computation and interpretation of isochrones. This research has made use of: data from the OGLE-III +Catalog of Variable Stars; data products from the Two Micron All Sky Survey, which is a joint project of the University of Massachusetts and the Infrared Processing and Analysis Center/California Institute of Technology, funded +by the National Aeronautics and Space Administration and the National Science Foundation; data from the European Space Agency (ESA) mission Gaia +(https://www.cosmos.esa.int/gaia), processed by the Gaia Data Processing and Analysis Consortium (DPAC, https://www.cosmos.esa.int/web/ +gaia/dpac/consortium). Funding for the DPAC has been provided by national institutions, in particular the institutions participating in the Gaia Multilateral Agreement. This research has made use of the following free/open source +software and/or libraries: the Starlink Tables Infrastructure Library (STILTS and +Topcat, Taylor 2006); IPython (Pérez & Granger 2007) and Jupyter (Kluyver +et al. 2016) notebooks; the Python libraries NumPy (Harris et al. 2020), SciPy +(Virtanen et al. 2020), matplotlib (a Python library for publication quality graphics, Hunter 2007), and Astropy (a community-developed core Python package +for Astronomy, Astropy Collaboration et al. 2018). This research has made use of +NASA’s Astrophysics Data System Bibliographic Services, and of the following +services provided by CDS, Strasbourg: the SIMBAD data base, VizieR catalogue +access tool (DOI: 10.26093/cds/vizier, Ochsenbein et al. 2000), the “Aladin sky +atlas” (Bonnarel et al. 2000), and the cross-match service (Boch et al. 2012; +Pineau et al. 2020). +References +Anderson, R. I., Saio, H., Ekström, S., Georgy, C., & Meynet, G. 2016, A&A, +591, A8 +Astropy Collaboration, Price-Whelan, A. M., Sipocz, B. M., et al. 2018, AJ, 156, ˝ +123 +Battinelli, P. & Demers, S. 2012, A&A, 544, A10 +Battinelli, P. & Demers, S. 2013, A&A, 553, A93 +Baumgardt, H., Parmentier, G., Anders, P., & Grebel, E. K. 2013, MNRAS, 430, +676 +Boch, T., Pineau, F., & Derriere, S. 2012, in Astronomical Society of the Pacific Conference Series, Vol. 461, Astronomical Data Analysis Software and +Systems XXI, ed. P. Ballester, D. Egret, & N. P. F. Lorente, 291 +Bonnarel, F., Fernique, P., Bienaymé, O., et al. 2000, A&AS, 143, 33 +Bono, G., Marconi, M., Cassisi, S., et al. 2005, ApJ, 621, 966 +Bressan, A., Marigo, P., Girardi, L., et al. 2012, MNRAS, 427, 127 +Catchpole, R. M., Whitelock, P. A., Feast, M. W., et al. 2016, MNRAS, 455, +2216 +Cutri, R. M., Wright, E. L., Conrow, T., et al. 2013, Explanatory Supplement +to the AllWISE Data Release Products, Explanatory Supplement to the AllWISE Data Release Products +de Grijs, R., Courbin, F., Martínez-Vázquez, C. E., et al. 2017, Space Sci. Rev., +212, 1743 +De Somma, G., Marconi, M., Cassisi, S., et al. 2020, MNRAS, 496, 5039 +Eggen, O. J. 1998, AJ, 115, 2435 +Feast, M. 2007, in Astronomical Society of the Pacific Conference Series, Vol. +378, Why Galaxies Care About AGB Stars: Their Importance as Actors and +Probes, ed. F. Kerschbaum, C. Charbonnel, & R. F. Wing, 479 +Feast, M. & Whitelock, P. 2000a, in Astrophysics and Space Science Library, +Vol. 255, Astrophysics and Space Science Library, ed. F. Matteucci & F. Giovannelli, 229 +Feast, M. W. 1963, MNRAS, 125, 367 +Feast, M. W. 1966, The Observatory, 86, 120 +Feast, M. W. 1981, in Astrophysics and Space Science Library, Vol. 88, Physical +Processes in Red Giants, ed. J. Iben, I. & A. Renzini, 193–204 +Feast, M. W., Robertson, B. S. C., & Black, C. 1980, MNRAS, 190, 227 +Feast, M. W. & Whitelock, P. A. 2000b, MNRAS, 317, 460 +Feast, M. W., Whitelock, P. A., & Menzies, J. W. 2006, MNRAS, 369, 791 +Gaia Collaboration, Brown, A. G. A., Vallenari, A., et al. 2021, A&A, 649, A1 +Girardi, L., Marigo, P., Bressan, A., & Rosenfield, P. 2013, ApJ, 777, 142 +Goudfrooij, P., Girardi, L., Kozhurina-Platais, V., et al. 2014, ApJ, 797, 35 +Grady, J., Belokurov, V., & Evans, N. W. 2019, MNRAS, 483, 3022 +Grady, J., Belokurov, V., & Evans, N. W. 2020, MNRAS, 492, 3128 +Harris, C. R., Millman, K. J., van der Walt, S. J., et al. 2020, Nature, 585, 357 +Hunter, J. D. 2007, Computing in Science & Engineering, 9, 90 +Ivezic, Ž., Kahn, S. M., Tyson, J. A., et al. 2019, ApJ, 873, 111 ´ +Jayasinghe, T., Stanek, K. Z., Kochanek, C. S., et al. 2020, MNRAS, 491, 13 +Joo, S.-J. & Lee, Y.-W. 2013, ApJ, 762, 36 +Jura, M. & Kleinmann, S. G. 1992, ApJS, 79, 105 +Kamath, D., Wood, P. R., Soszynski, I., & Lebzelter, T. 2010, MNRAS, 408, 522 ´ +Kharchenko, N. V., Piskunov, A. E., Röser, S., Schilbach, E., & Scholz, R. D. +2005, A&A, 438, 1163 +Kharchenko, N. V., Piskunov, A. E., Schilbach, E., Röser, S., & Scholz, R. D. +2016, A&A, 585, A101 +Kippenhahn, R. & Smith, L. 1969, A&A, 1, 142 +Kluyver, T., Ragan-Kelley, B., Pérez, F., et al. 2016, in Positioning and Power +in Academic Publishing: Players, Agents and Agendas, ed. F. Loizides & +B. Scmidt (Netherlands: IOS Press), 87–90 +Lebzelter, T., Mowlavi, N., Marigo, P., et al. 2018, A&A, 616, L13 +Lebzelter, T., Trabucchi, M., Mowlavi, N., et al. 2019, A&A, 631, A24 +Lebzelter, T. & Wood, P. R. 2005, A&A, 441, 1117 +Lebzelter, T. & Wood, P. R. 2007, A&A, 475, 643 +Lebzelter, T. & Wood, P. R. 2011, A&A, 529, A137 +Lebzelter, T. & Wood, P. R. 2016, A&A, 585, A111 +Lloyd Evans, T. 1976, MNRAS, 174, 169 +Lloyd Evans, T. 1983a, MNRAS, 204, 985 +Lloyd Evans, T. 1983b, MNRAS, 204, 961 +Lloyd Evans, T. & Menzies, J. W. 1973, in Astrophysics and Space Science Library, Vol. 36, IAU Colloq. 21: Variable Stars in Globular Clusters and in +Related Systems, ed. J. D. Fernie, 151 +Mackey, A. D. & Broby Nielsen, P. 2007, MNRAS, 379, 151 +Marigo, P., Girardi, L., Bressan, A., et al. 2017, ApJ, 835, 77 +Menzies, J., Feast, M., Tanabé, T., Whitelock, P., & Nakada, Y. 2002, MNRAS, +335, 923 +Menzies, J., Feast, M., Whitelock, P., et al. 2008, MNRAS, 385, 1045 +Menzies, J. W., Feast, M. W., Whitelock, P. A., & Matsunaga, N. 2011, MNRAS, +414, 3492 +Menzies, J. W., Whitelock, P. A., & Feast, M. W. 2015, MNRAS, 452, 910 +Menzies, J. W., Whitelock, P. A., Feast, M. W., & Matsunaga, N. 2010, MNRAS, +406, 86 +Merrill, P. W. 1923, ApJ, 58, 215 +Mowlavi, N., Lecoeur-Taïbi, I., Lebzelter, T., et al. 2018, A&A, 618, A58 +Nayak, P. K., Subramaniam, A., Choudhury, S., Indu, G., & Sagar, R. 2016, +MNRAS, 463, 1446 +Ochsenbein, F., Bauer, P., & Marcout, J. 2000, A&AS, 143, 23 +Pastorelli, G., Marigo, P., Girardi, L., et al. 2020, MNRAS, 498, 3283 +Pastorelli, G., Marigo, P., Girardi, L., et al. 2019, MNRAS, 485, 5666 +Pérez, F. & Granger, B. E. 2007, Computing in Science and Engineering, 9, 21 +Perren, G. I., Piatti, A. E., & Vázquez, R. A. 2017, A&A, 602, A89 +Pineau, F.-X., Boch, T., Derrière, S., & Schaaff, A. 2020, in Astronomical Society of the Pacific Conference Series, Vol. 522, Astronomical Data Analysis +Software and Systems XXVII, ed. P. Ballester, J. Ibsen, M. Solar, & K. Shortridge, 125 +Sakamoto, T., Matsunaga, N., Hasegawa, T., & Nakada, Y. 2012, ApJ, 761, L10 +Skrutskie, M. F., Cutri, R. M., Stiening, R., et al. 2006, AJ, 131, 1163 +Soszynski, I., Olechowska, A., Ratajczak, M., et al. 2021, ApJ, 911, L22 ´ +Soszynski, I., Udalski, A., Szyma ´ nski, M. K., et al. 2009, Acta Astron., 59, 239 ´ +Soszynski, I., Udalski, A., Szyma ´ nski, M. K., et al. 2011, Acta Astron., 61, 217 ´ +Taylor, M. B. 2006, in Astronomical Society of the Pacific Conference Series, Vol. 351, Astronomical Data Analysis Software and Systems XV, ed. +C. Gabriel, C. Arviset, D. Ponz, & S. Enrique, 666 +Trabucchi, M., Mowlavi, N., & Lebzelter, T. 2021a, A&A, 656, A66 +Trabucchi, M., Wood, P. R., Montalbán, J., et al. 2017, ApJ, 847, 139 +Trabucchi, M., Wood, P. R., Montalbán, J., et al. 2019, MNRAS, 482, 929 +Trabucchi, M., Wood, P. R., Mowlavi, N., et al. 2021b, MNRAS, 500, 1575 +Urago, R., Omodaka, T., Nagayama, T., et al. 2020, ApJ, 891, 50 +Villanova, S., Geisler, D., Gratton, R. G., & Cassisi, S. 2014, ApJ, 791, 107 +Virtanen, P., Gommers, R., Oliphant, T. E., et al. 2020, Nature Methods, 17, 261 +Wenger, M., Ochsenbein, F., Egret, D., et al. 2000, A&AS, 143, 9 +Whitelock, P., Feast, M., & Catchpole, R. 1991, MNRAS, 248, 276 +Whitelock, P., Menzies, J., Feast, M., et al. 1994, MNRAS, 267, 711 +Whitelock, P. A. 1986, MNRAS, 219, 525 +Whitelock, P. A., Menzies, J. W., Feast, M. W., et al. 2009, MNRAS, 394, 795 +Whitelock, P. A., Menzies, J. W., Feast, M. W., Nsengiyumva, F., & Matsunaga, +N. 2013, MNRAS, 428, 2216 +Wilson, R. E. & Merrill, P. W. 1942, ApJ, 95, 248 +Wyatt, S. P. & Cahn, J. H. 1983, ApJ, 275, 225 +Ya’Ari, A. & Tuchman, Y. 1996, ApJ, 456, 350 Article number, page 6 of 9 -Trabucchi et al.: The period-age relation of LPVs -Fig. A.1. Absolute-Ks Gaia-2MASS diagram for the stars with or with￾out a spectral type (left and right panels, respectively) in the selected -sample. Symbol colors and shapes indicate the spectral type and host -cluster described in the legend, respectively, which also reports the num￾ber of sources displayed (i.e., having both optical and NIR photometry). -The dashed line marks the separation between O- and C-rich sources -according to Lebzelter et al. (2018). An arrow marks the source MSX -LMC 124 in NGC 1830 that, having WBP,RP−WJ,Ks = 9.73 mag, lies out￾side the plot area. Background dots are LPVs in the LMC from OGLE￾III (light gray) and Mowlavi et al. (2018) (darker gray). -Appendix A: Classification of observed LPVs -Appendix A.1: Spectral type -We adopted the spectral types provided by Lebzelter & Wood -(2007) and Kamath et al. (2010) for 52 of the LPVs they studied -in NGC 1846, NGC 1978, and NGC 419. The only exception -is the star 5-3 in NGC 419, for which we adopted the S-type as -reported by Lloyd Evans (1983a). -We also searched the SIMBAD astronomical database -(Wenger et al. 2000) for spectral type information, which we -found for 26 more stars. We used the Gaia-2MASS diagram of -Lebzelter et al. (2018) to confirm the chemical type classification -taken from literature and to characterize the surface chemistry of -sources of an unknown spectral type (see Fig. A.1). Among the -latter, we identified 13 C-rich stars and 106 O-rich sources. -Three of the sources without a spectral type lack Gaia pho￾tometry, so they cannot be classified with the Gaia-2MASS. Two -of them (LW5 and LW22 in 47 Tuc) have no match in Gaia -EDR3, but they have NIR data and are probably O-rich based on -their position in the J − Ks versus Ks color-magnitude diagram. -The third source is one of the two stars in NGC 1903 from the -list of Grady et al. (2019), which we identified with the 2MASS -source J05171633-6920298. It is likely C-rich according to the -NIR color-magnitude diagram. -Finally, the sources V138 in ω Cen, LW15 in NGC 2808, -and LW4 in NGC 362 lack NIR data. They cannot be placed in -the NIR PL diagram, upon which we relied to assign pulsation -modes to periods, so we excluded them from the sample. The -distribution of O- and C-rich sources in the period-age diagram -is shown in Fig. A.2. -Appendix A.2: Variability -For variability information, we complemented the data from -Lebzelter & Wood and Kamath et al. (2010) with the catalogs -from OGLE-III, ASAS-SN, and Gaia DR2. Combining these -data sets, we found at least one period for each of the 176 sources -in our sample. -In order to identify the pulsation mode most likely respon￾sible for periods in a given source, we assumed that the second -overtone mode is associated with sequence A, the first overtone -mode with sequences B and C0 -, and the fundamental mode with -sequence C (e.g., Trabucchi et al. 2017). We excluded long sec￾ondary periods on sequence D as they are not due to stellar pul￾sation (Soszynski et al. ´ 2021, and references therein), and we -used the pattern of PL sequences in the LMC as a reference to -guide the mode identification (cf. Trabucchi et al. 2021a). -We performed this classification separately for periods com￾ing from each distinct data set. If two or more periods from dif￾ferent data sets were assigned to the same pulsation mode, we -retained only one of those periods, with priority to the values -from Lebzelter & Wood and Kamath et al. (2010). If the latter -authors do not provide this information, we adopted the period -from OGLE-III if available, and otherwise from ASAS-SN or -from Gaia DR2. -For some sources, the periods reported in different catalogs -were assigned to the same mode through this procedure. In most -cases, these periods are reasonably similar to each other. Only -in a few cases were they significantly different, but this did not -alter our conclusions. -When available, the variability type was taken from OGLE￾III or ASAS-SN. We note that we are only interested in whether -a star is classified as a Mira or semi-regular variable. In many -cases, this type is not given or the star is simply considered, for -instance, as an LPV or AGB in SIMBAD, in which case we con￾sidered the variability type as undetermined. -Appendix B: Fitting relations -We obtained analytic expressions for the PA relations separately -for O- and C-rich stars, proceeding as follows. For each bin of -log(τ/yr), we modeled the period distribution with a Gaussian -kernel density estimator (KDE) and identified the peak of the -distribution. To describe the boundaries of the PA relation, we -adopted, at each age, the values of the period at which the dis￾tribution equals 25% of its maximum. We selected this arbitrary -value upon visual inspection of the PA plane. We modeled the -central trend of the PA relation, as well as its short- and long￾period edges, with linear or quadratic functions in the form -log(τ/yr) = a0 + a1 (P/P˜) + a2 (P/P˜) -2 -, (B.1) -(where P˜ = 350 days) and employed a Lenvenberg-Marquardt -nonlinear regression algorithm3 -to derive the best-fit coefficients, -which are listed in Table B.1. We remark that these best-fit ex￾pressions are only valid in the intervals 8.0 ≤ log(τ/yr) ≤ 10.3 -and 20 < P/days < 700 for O-rich composition, and within -3 We made use of the Python library SciPy to perform Gaussian KDE -modeling and best-fit, respectively, by means of the gaussian_kde -tool from the stats module and the curve_fit function from the -optimize module. +Trabucchi et al.: The period-age relation of LPVs +Fig. A.1. Absolute-Ks Gaia-2MASS diagram for the stars with or without a spectral type (left and right panels, respectively) in the selected +sample. Symbol colors and shapes indicate the spectral type and host +cluster described in the legend, respectively, which also reports the number of sources displayed (i.e., having both optical and NIR photometry). +The dashed line marks the separation between O- and C-rich sources +according to Lebzelter et al. (2018). An arrow marks the source MSX +LMC 124 in NGC 1830 that, having WBP,RP−WJ,Ks = 9.73 mag, lies outside the plot area. Background dots are LPVs in the LMC from OGLEIII (light gray) and Mowlavi et al. (2018) (darker gray). +Appendix A: Classification of observed LPVs +Appendix A.1: Spectral type +We adopted the spectral types provided by Lebzelter & Wood +(2007) and Kamath et al. (2010) for 52 of the LPVs they studied +in NGC 1846, NGC 1978, and NGC 419. The only exception +is the star 5-3 in NGC 419, for which we adopted the S-type as +reported by Lloyd Evans (1983a). +We also searched the SIMBAD astronomical database +(Wenger et al. 2000) for spectral type information, which we +found for 26 more stars. We used the Gaia-2MASS diagram of +Lebzelter et al. (2018) to confirm the chemical type classification +taken from literature and to characterize the surface chemistry of +sources of an unknown spectral type (see Fig. A.1). Among the +latter, we identified 13 C-rich stars and 106 O-rich sources. +Three of the sources without a spectral type lack Gaia photometry, so they cannot be classified with the Gaia-2MASS. Two +of them (LW5 and LW22 in 47 Tuc) have no match in Gaia +EDR3, but they have NIR data and are probably O-rich based on +their position in the J − Ks versus Ks color-magnitude diagram. +The third source is one of the two stars in NGC 1903 from the +list of Grady et al. (2019), which we identified with the 2MASS +source J05171633-6920298. It is likely C-rich according to the +NIR color-magnitude diagram. +Finally, the sources V138 in ω Cen, LW15 in NGC 2808, +and LW4 in NGC 362 lack NIR data. They cannot be placed in +the NIR PL diagram, upon which we relied to assign pulsation +modes to periods, so we excluded them from the sample. The +distribution of O- and C-rich sources in the period-age diagram +is shown in Fig. A.2. +Appendix A.2: Variability +For variability information, we complemented the data from +Lebzelter & Wood and Kamath et al. (2010) with the catalogs +from OGLE-III, ASAS-SN, and Gaia DR2. Combining these +data sets, we found at least one period for each of the 176 sources +in our sample. +In order to identify the pulsation mode most likely responsible for periods in a given source, we assumed that the second +overtone mode is associated with sequence A, the first overtone +mode with sequences B and C0, and the fundamental mode with +sequence C (e.g., Trabucchi et al. 2017). We excluded long secondary periods on sequence D as they are not due to stellar pulsation (Soszynski et al. ´ 2021, and references therein), and we +used the pattern of PL sequences in the LMC as a reference to +guide the mode identification (cf. Trabucchi et al. 2021a). +We performed this classification separately for periods coming from each distinct data set. If two or more periods from different data sets were assigned to the same pulsation mode, we +retained only one of those periods, with priority to the values +from Lebzelter & Wood and Kamath et al. (2010). If the latter +authors do not provide this information, we adopted the period +from OGLE-III if available, and otherwise from ASAS-SN or +from Gaia DR2. +For some sources, the periods reported in different catalogs +were assigned to the same mode through this procedure. In most +cases, these periods are reasonably similar to each other. Only +in a few cases were they significantly different, but this did not +alter our conclusions. +When available, the variability type was taken from OGLEIII or ASAS-SN. We note that we are only interested in whether +a star is classified as a Mira or semi-regular variable. In many +cases, this type is not given or the star is simply considered, for +instance, as an LPV or AGB in SIMBAD, in which case we considered the variability type as undetermined. +Appendix B: Fitting relations +We obtained analytic expressions for the PA relations separately +for O- and C-rich stars, proceeding as follows. For each bin of +log(τ/yr), we modeled the period distribution with a Gaussian +kernel density estimator (KDE) and identified the peak of the +distribution. To describe the boundaries of the PA relation, we +adopted, at each age, the values of the period at which the distribution equals 25% of its maximum. We selected this arbitrary +value upon visual inspection of the PA plane. We modeled the +central trend of the PA relation, as well as its short- and longperiod edges, with linear or quadratic functions in the form +log(τ/yr) = a0 + a1 (P/P˜) + a2 (P/P˜) +2 +, (B.1) +(where P˜ = 350 days) and employed a Lenvenberg-Marquardt +nonlinear regression algorithm3to derive the best-fit coefficients, +which are listed in Table B.1. We remark that these best-fit expressions are only valid in the intervals 8.0 ≤ log(τ/yr) ≤ 10.3 +and 20 < P/days < 700 for O-rich composition, and within +3 We made use of the Python library SciPy to perform Gaussian KDE +modeling and best-fit, respectively, by means of the gaussian_kde +tool from the stats module and the curve_fit function from the +optimize module. Article number, page 7 of 9 -A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs -Fig. A.2. Similar to Fig. 1, except each source is color-coded according to whether it has been classified as O-rich (blue) or C-rich (red). -Table B.1. Best-fit coefficients for the PA relation and its boundaries in -the form given in Eq. B.1. -Sp. type relation a0 a1 a2 -O-rich -center 10.78 -2.660 0.5953 -lower edge 10.46 -2.818 0.6578 -upper edge 10.54 -0.8187 -0.2335 -C-rich -center 9.755 -0.7532 -lower edge 9.982 -1.698 -upper edge 8.498 -1.827 -0.9959 -8.6 ≤ log(τ/yr) ≤ 9.3 and 140 < P/days < 620 in the C-rich -case. -Because of the connection between age and initial mass, the -PA relation can be translated into a period-initial mass relation, -which we derived using the same approach described above, and -assuming the form -log(Mi/M ) = b0 + b1 (P/P˜) + b2 (P/P˜) -2 -. (B.2) -The resulting best-fit lines are displayed in Fig. B.1, and the co￾efficients are given in Table B.2. -We remark that both the PA and the period-initial mass rela￾tions depend on model assumptions, in particular mass loss and -mixing, as well as on the properties of the population of LPVs, -namely the star-formation history and age-metallicity relation. -Appendix C: The shape of the period distribution -As an example case, we consider an isochrone of age log(τ/yr) = -8.3 and initial metallicity Zi = 0.006. Stars on the TP-AGB have -initial masses Mi ' 3.85 M over a small range of ∼ 10−3 M . -The relation between period and initial mass is displayed in -panel (a) of Fig. C.1, where isochrone portions undergoing -Table B.2. Best-fit coefficients for the period-initial mass relation and -its boundaries in the form given in Eq. B.2. -Sp. type relation b0 b1 b2 -O-rich -center -0.2790 0.8958 -0.1828 -lower edge -0.1772 0.9975 -0.2203 -upper edge -0.1740 0.2783 0.8247 -C-rich -center -0.0304 0.2885 -lower edge -0.0131 0.5752 -upper edge -0.2245 -0.2720 0.2343 -DFMP are indicated by solid lines. Panel (b) shows the period -distributions for a few different cases. -It is instructive, to begin with, to ignore the effect of thermal -pulses and consider only the quiescent evolution (green lines in -Fig. C.1). The smallest initial mass corresponds to a star that just -entered the TP-AGB, when the FM has a period of ∼ 240 days -but is not dominant. It only becomes dominant above a threshold -radius Rdom,0, that is for periods longer than a (mass-dependent) -critical period Pdom,0 (the solid gray line in Fig. C.1). The least -evolved (quiescent) model with dominant FM has PFM ' 360 -days (green circle and horizontal line), corresponding to a sharp -cut in the period distribution shown in panel (b) of Fig. C.1. -As a star evolves along the AGB it expands, and its period be￾comes longer in response to the increase in radius. Models with -a higher initial mass are more evolved, hence they have a larger -radius and a longer period. The rate at which a period increases -with radius is not fixed, but rather decreases with evolution. Ac￾cording to the prescription of Trabucchi et al. (2021b), a period -grows with radius as a broken power-law with exponent α ' 1.8 -if R < Rb, and with α ' 1.25 at larger radii. -This is equivalent to saying that the period grows more -slowly after it exceeds a critical value Pb = P(Rb), marked by -the gray dotted line in Fig. C.1. The isochrone reaches it at +A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs +Fig. A.2. Similar to Fig. 1, except each source is color-coded according to whether it has been classified as O-rich (blue) or C-rich (red). +Table B.1. Best-fit coefficients for the PA relation and its boundaries in +the form given in Eq. B.1. +Sp. type relation a0 a1 a2 +O-rich +center 10.78 -2.660 0.5953 +lower edge 10.46 -2.818 0.6578 +upper edge 10.54 -0.8187 -0.2335 +C-rich +center 9.755 -0.7532 +lower edge 9.982 -1.698 +upper edge 8.498 -1.827 -0.9959 +8.6 ≤ log(τ/yr) ≤ 9.3 and 140 < P/days < 620 in the C-rich +case. +Because of the connection between age and initial mass, the +PA relation can be translated into a period-initial mass relation, +which we derived using the same approach described above, and +assuming the form +log(Mi/M ) = b0 + b1 (P/P˜) + b2 (P/P˜) +2 +. (B.2) +The resulting best-fit lines are displayed in Fig. B.1, and the coefficients are given in Table B.2. +We remark that both the PA and the period-initial mass relations depend on model assumptions, in particular mass loss and +mixing, as well as on the properties of the population of LPVs, +namely the star-formation history and age-metallicity relation. +Appendix C: The shape of the period distribution +As an example case, we consider an isochrone of age log(τ/yr) = +8.3 and initial metallicity Zi = 0.006. Stars on the TP-AGB have +initial masses Mi ' 3.85 M over a small range of ∼ 10−3 M . +The relation between period and initial mass is displayed in +panel (a) of Fig. C.1, where isochrone portions undergoing +Table B.2. Best-fit coefficients for the period-initial mass relation and +its boundaries in the form given in Eq. B.2. +Sp. type relation b0 b1 b2 +O-rich +center -0.2790 0.8958 -0.1828 +lower edge -0.1772 0.9975 -0.2203 +upper edge -0.1740 0.2783 0.8247 +C-rich +center -0.0304 0.2885 +lower edge -0.0131 0.5752 +upper edge -0.2245 -0.2720 0.2343 +DFMP are indicated by solid lines. Panel (b) shows the period +distributions for a few different cases. +It is instructive, to begin with, to ignore the effect of thermal +pulses and consider only the quiescent evolution (green lines in +Fig. C.1). The smallest initial mass corresponds to a star that just +entered the TP-AGB, when the FM has a period of ∼ 240 days +but is not dominant. It only becomes dominant above a threshold +radius Rdom,0, that is for periods longer than a (mass-dependent) +critical period Pdom,0 (the solid gray line in Fig. C.1). The least +evolved (quiescent) model with dominant FM has PFM ' 360 +days (green circle and horizontal line), corresponding to a sharp +cut in the period distribution shown in panel (b) of Fig. C.1. +As a star evolves along the AGB it expands, and its period becomes longer in response to the increase in radius. Models with +a higher initial mass are more evolved, hence they have a larger +radius and a longer period. The rate at which a period increases +with radius is not fixed, but rather decreases with evolution. According to the prescription of Trabucchi et al. (2021b), a period +grows with radius as a broken power-law with exponent α ' 1.8 +if R < Rb, and with α ' 1.25 at larger radii. +This is equivalent to saying that the period grows more +slowly after it exceeds a critical value Pb = P(Rb), marked by +the gray dotted line in Fig. C.1. The isochrone reaches it at Article number, page 8 of 9 -Trabucchi et al.: The period-age relation of LPVs -Fig. B.1. Similar to Fig. 2, but showing initial mass Mi -in place of age. The best-fit lines to the most populated band and edges of the theoretical -PFM – Mi relation are shown. -Fig. C.1. Period distribution at fixed age and metallicity. Panel (a) shows -period as a function of initial mass (current mass on the top axis) on the -TP-AGB for a ∼ 200 Myr old isochrone with Zi = 0.006. Red lines -show full thermal pulses, while blue lines ignore luminosity spikes and -green lines show only the quiescent evolution. The same color code -is used for the period distributions (normalized to their maximum) on -panel (b). Solid lines indicate that the FM is dominant. Circles indi￾cate the earliest onset of DFMP accounting for (red) or ignoring (green) -luminosity spikes, and the shortest period of the dominant FM (blue). -Gray lines mark the critical values of periods at which the FM becomes -dominant (solid line), less sensitive to radius (dotted line, which occurs -at the vertical line for this specific isochrone), and independent of radius -(dashed line). -Mi ' 3.8524 M (vertical gray line), when PFM ' 420 days. In -models with a smaller initial mass, the period is still increasing -at a relatively large rate as the envelope expands, while in more -massive models the period has already become less sensitive to -changes in radius. This is reflected by a slight inflection of the -green curve, which corresponds to the maximum in the period -distribution shown in panel (b) of Fig. C.1. The period distri￾bution of the full TP-AGB range is roughly symmetric around -this maximum, while limiting the selection to DFMP, produces -a distribution skewed toward short periods, as found in Sect. 3. -If the luminosity dips following thermal pulses are taken -into account (blue lines), the corresponding envelope contrac￾tion causes the period to decrease, and the cut at ∼ 360 days -becomes less sharp. Because of mass loss, the threshold period -Pdom,0 is lowered, so that the shortest period associated with -DFMP does not correspond to the least evolved model (green -circle), but rather to the luminosity dip of a thermal pulse (blue -circle). -To be precise, the earliest occurrence of DFMP is on the left￾most luminosity spike (red circle), whose duration is so short that -it is unlikely to be observed. Indeed, the inclusion of luminosity -spikes alters the period distribution at long periods very little. -Luminosity spikes are relevant only for relatively massive and -young TP-AGB stars, and they give rise to the poorly populated -portion of the PA relation at the longest periods, as seen in panel -(a) of Fig. 2. -Article number, page 9 of 9 +Trabucchi et al.: The period-age relation of LPVs +Fig. B.1. Similar to Fig. 2, but showing initial mass Miin place of age. The best-fit lines to the most populated band and edges of the theoretical +PFM – Mi relation are shown. +Fig. C.1. Period distribution at fixed age and metallicity. Panel (a) shows +period as a function of initial mass (current mass on the top axis) on the +TP-AGB for a ∼ 200 Myr old isochrone with Zi = 0.006. Red lines +show full thermal pulses, while blue lines ignore luminosity spikes and +green lines show only the quiescent evolution. The same color code +is used for the period distributions (normalized to their maximum) on +panel (b). Solid lines indicate that the FM is dominant. Circles indicate the earliest onset of DFMP accounting for (red) or ignoring (green) +luminosity spikes, and the shortest period of the dominant FM (blue). +Gray lines mark the critical values of periods at which the FM becomes +dominant (solid line), less sensitive to radius (dotted line, which occurs +at the vertical line for this specific isochrone), and independent of radius +(dashed line). +Mi ' 3.8524 M (vertical gray line), when PFM ' 420 days. In +models with a smaller initial mass, the period is still increasing +at a relatively large rate as the envelope expands, while in more +massive models the period has already become less sensitive to +changes in radius. This is reflected by a slight inflection of the +green curve, which corresponds to the maximum in the period +distribution shown in panel (b) of Fig. C.1. The period distribution of the full TP-AGB range is roughly symmetric around +this maximum, while limiting the selection to DFMP, produces +a distribution skewed toward short periods, as found in Sect. 3. +If the luminosity dips following thermal pulses are taken +into account (blue lines), the corresponding envelope contraction causes the period to decrease, and the cut at ∼ 360 days +becomes less sharp. Because of mass loss, the threshold period +Pdom,0 is lowered, so that the shortest period associated with +DFMP does not correspond to the least evolved model (green +circle), but rather to the luminosity dip of a thermal pulse (blue +circle). +To be precise, the earliest occurrence of DFMP is on the leftmost luminosity spike (red circle), whose duration is so short that +it is unlikely to be observed. Indeed, the inclusion of luminosity +spikes alters the period distribution at long periods very little. +Luminosity spikes are relevant only for relatively massive and +young TP-AGB stars, and they give rise to the poorly populated +portion of the PA relation at the longest periods, as seen in panel +(a) of Fig. 2. +Article number, page 9 of \ No newline at end of file diff --git a/read/results/pdfium/2201.00214.txt b/read/results/pdfium/2201.00214.txt index 85a597e..b1bf082 100644 --- a/read/results/pdfium/2201.00214.txt +++ b/read/results/pdfium/2201.00214.txt @@ -1,953 +1,940 @@ -arXiv:2201.00214v1 [astro-ph.SR] 1 Jan 2022 -Temperature Analysis of Flaring -(AR11283) and non-Flaring (AR12194) -Coronal Loops -N. Fathalian1 -, S. S. Hosseini Rad2 -, N. Alipour2 -, H. Safari2 -1Department of Physics, Payame Noor University (PNU), 19395-3697, Tehran, Iran. -2Department of Physics, Faculty of Science, University of Zanjan, 45195-313, Zanjan, Iran. -e-mail: narges_fathalian@alum.sharif.edu -January 4, 2022 -Abstract -Here, we study the temperature structure of flaring and non-flaring coronal loops, using extracted -loops from images taken in six extreme ultraviolet (EUV) channels recorded by Atmospheric Imaging -Assembly (AIA)/ Solar Dynamic Observatory (SDO). We use data for loops of X2.1-class-flaring active -region (AR11283) during 22:10UT till 23:00UT, on 2011, September 6; and non-flaring active region -(AR12194) during 08:00:00UT till 09:00:00UT on 2014, October 26. By using spatially-synthesized -Gaussian DEM forward-fitting method, we calculate the peak temperatures for each strip of the loops. -We apply the Lomb-Scargle method to compute the oscillations periods for the temperature series of each -strip. The periods of the temperature oscillations for the flaring loops are ranged from 7 min to 28.4 -min. These temperature oscillations show very close behavior to the slow-mode oscillation. We observe -that the temperature oscillations in the flaring loops are started at least around 10 minutes before the -transverse oscillations and continue for a long time duration even after the transverse oscillations are -ended. The temperature amplitudes are increased at the flaring time (during 20 min) in the flaring loops. -The periods of the temperatures obtained for the non-flaring loops are ranged from 8.5 min to 30 min,but -their significances are less (below 0.5) in comparison with the flaring ones (near to one). Hence the -detected temperature periods for the non-flaring loops’ strips are less probable in comparison with the -flaring ones, and maybe they are just fluctuations. Based on our confined observations, it seems that the -flaring loops’ periods show more diversity and their temperatures have wider ranges of variation than the -non-flaring ones. More accurate commentary in this respect requires more extensive statistical research -and broader observations. -Coronal Loops,Temperature Analysis, Temperature Oscillations,Flaring and non-Flaring Active Regions -I. Introduction -Analyzing the thermal structure of coronal loops is of considerable interest, especially as these -magnetic loops have an essential role in heating the solar chromosphere and corona. Such anal￾ysis can help to describe how the process of solar flaring is correlated with the loop’s thermal -structure. -Detections of coronal waves have a historical preview and have been reported for several times -(e.g., Aschwanden et al. (1999); Nakariakov et al. (1999);Wang et al. (2003); Wang & Solanki (2004); -Berghmans & Clette (1999); De Moortel et al. (2000), Verwichte et al. (2004), De Moortel & Brady -(2007), Ballai et al. (2011)). Coronal seismology and MHD waves have been reviewed widely by -1 -De Moortel (2005), Nakariakov & Verwichte (2005), Aschwanden (2006), Banerjee et al. (2007) and -De Moortel & Nakariakov (2012). Along with the development of the observations, transverse -and longitudinal oscillations have also been studied theoretically (e.g., Gruszecki et al. (2006), -Pascoe et al. (2007), Fathalian et al. (2010); Luna et al. (2010); Fathalian & Safari (2010). Coronal -seismology techniques help to elicit the information from observations of oscillatory phenomena -and the results to be interpreted by using theoretical models (see for e.g., Roberts et al. (1984); -Goossens et al. (1992)). Oscillatory patterns and processes which happen during solar flares, were -interesting and subject of investigations from different approaches (e.g., Nakariakov et al. (2010), -Nisticò et al. (2013), Anfinogentov et al. (2013), Hindman & Jain (2014), Russell et al. (2015)). As -we know the transverse loops oscillations usually occur in response to a close filament or flare -(Wills-Davey & Thompson (1999)). -Rapidly decaying long-period oscillations are mostly interpreted as global (or fundamen￾tal mode) standing slow magnetoacoustic waves (reviewed by Liu & Ofman (2014), and Wang -(2011), also see Ofman & Wang (2002), and for slow-mode observed in fan-loops see Pant et al. -(2017)). They often occur in hot coronal loops of active regions, associated with tiny (or micro-) -flares.Increasing evidence has suggested that the harmonic type of decaying pulsations detected -in intensity plots of solar and stellar flares are possibly caused by standing slow-mode waves (see -reviews by Van Doorsselaere et al. (2016), and McLaughlin et al. (2018)).Excitation, propagation, -and damping mechanisms of slow-mode waves have been studied theoretically (e.g., Wang et al. -(2007); Wang et al. (2015); Jess et al. (2016); Nakariakov et al. (2017); Nisticò et al. (2017); Kolotkov -et al. (2019); Krishna Prasad et al. (2019); Reale et al. (2019); Wang & Ofman (2019)). To have -a complete overview of slow-mode magnetoacoustic waves in coronal loops see the review by -Wang et al. (2021). -Investigating and comparing the thermal structures and oscillations of coronal loops in loops -of flaring and non-flaring active regions could help us in better understanding the loops’ material -oscillations and the flare impact on them. Several different methods have been developed to in￾vestigate the thermal structure of the coronal loops and loop strands. The thermal stability of the -coronal loops was the subject of research, done by Habbal & Rosner (1979) (and references cited -therein). McClymont & Craig (1985) stated that a pressure fluctuation must assist asymmetric -coronal temperature perturbation. They concluded that coronal loops are impartially stable in -the case of uniform heating. Van Doorsselaere et al. (2011) used spectroscopic line ratios to obtain -the required temperature (via CHIANTI code) and estimated the adiabatic index of the corona. -The dependence of coronal loop temperature on loop length and magnetic field strength is also -a favorite topic. For instance, Dahlburg et al. (2018) probed the temperature properties of solar -coronal loops over a wide range of lengths and magnetic field strengths via numerical simula￾tions and observed a very high correlation between magnetic field strength and a maximum of -the temperature. The effect of temperature inhomogeneity on the periods and the damping times -of the standing slow-modes in stratified solar coronal loops was studied either (e.g., Abedini et al. -(2012)). Fathalian (2019) estimated the loop temperature using the intensity ratios and the AIA re￾sponse functions in different wavelengths. Different emission measure (DEM) computations and -methods have been developed to estimate the temperature in the corona, which led to various -discussions. Schmelz et al. (2010) analyzed a coronal loop, which was observed on 2010 August -3, by AIA. They took some differential emission measure (DEM) curves, claiming a multithermal -rather than an isothermal DEM distribution (for the cross-sectional temperature of the loop). Af￾ter that, Aschwanden & Boerner (2011) criticized the method of background subtraction which -Schmelz et al. had applied. They claimed that the background subtraction method caused their -inferred result of a multithermal loop. Aschwanden & Boerner (2011) analyzed a set of hundred +arXiv:2201.00214v1 [astro-ph.SR] 1 Jan 2022 +Temperature Analysis of Flaring +(AR11283) and non-Flaring (AR12194) +Coronal Loops +N. Fathalian1, S. S. Hosseini Rad2, N. Alipour2, H. Safari2 +1Department of Physics, Payame Noor University (PNU), 19395-3697, Tehran, Iran. +2Department of Physics, Faculty of Science, University of Zanjan, 45195-313, Zanjan, Iran. +e-mail: narges_fathalian@alum.sharif.edu +January 4, 2022 +Abstract +Here, we study the temperature structure of flaring and non-flaring coronal loops, using extracted +loops from images taken in six extreme ultraviolet (EUV) channels recorded by Atmospheric Imaging +Assembly (AIA)/ Solar Dynamic Observatory (SDO). We use data for loops of X2.1-class-flaring active +region (AR11283) during 22:10UT till 23:00UT, on 2011, September 6; and non-flaring active region +(AR12194) during 08:00:00UT till 09:00:00UT on 2014, October 26. By using spatially-synthesized +Gaussian DEM forward-fitting method, we calculate the peak temperatures for each strip of the loops. +We apply the Lomb-Scargle method to compute the oscillations periods for the temperature series of each +strip. The periods of the temperature oscillations for the flaring loops are ranged from 7 min to 28.4 +min. These temperature oscillations show very close behavior to the slow-mode oscillation. We observe +that the temperature oscillations in the flaring loops are started at least around 10 minutes before the +transverse oscillations and continue for a long time duration even after the transverse oscillations are +ended. The temperature amplitudes are increased at the flaring time (during 20 min) in the flaring loops. +The periods of the temperatures obtained for the non-flaring loops are ranged from 8.5 min to 30 min,but +their significances are less (below 0.5) in comparison with the flaring ones (near to one). Hence the +detected temperature periods for the non-flaring loops’ strips are less probable in comparison with the +flaring ones, and maybe they are just fluctuations. Based on our confined observations, it seems that the +flaring loops’ periods show more diversity and their temperatures have wider ranges of variation than the +non-flaring ones. More accurate commentary in this respect requires more extensive statistical research +and broader observations. +Coronal Loops,Temperature Analysis, Temperature Oscillations,Flaring and non-Flaring Active Regions +I. Introduction +Analyzing the thermal structure of coronal loops is of considerable interest, especially as these +magnetic loops have an essential role in heating the solar chromosphere and corona. Such analysis can help to describe how the process of solar flaring is correlated with the loop’s thermal +structure. +Detections of coronal waves have a historical preview and have been reported for several times +(e.g., Aschwanden et al. (1999); Nakariakov et al. (1999);Wang et al. (2003); Wang & Solanki (2004); +Berghmans & Clette (1999); De Moortel et al. (2000), Verwichte et al. (2004), De Moortel & Brady +(2007), Ballai et al. (2011)). Coronal seismology and MHD waves have been reviewed widely by + +De Moortel (2005), Nakariakov & Verwichte (2005), Aschwanden (2006), Banerjee et al. (2007) and +De Moortel & Nakariakov (2012). Along with the development of the observations, transverse +and longitudinal oscillations have also been studied theoretically (e.g., Gruszecki et al. (2006), +Pascoe et al. (2007), Fathalian et al. (2010); Luna et al. (2010); Fathalian & Safari (2010). Coronal +seismology techniques help to elicit the information from observations of oscillatory phenomena +and the results to be interpreted by using theoretical models (see for e.g., Roberts et al. (1984); +Goossens et al. (1992)). Oscillatory patterns and processes which happen during solar flares, were +interesting and subject of investigations from different approaches (e.g., Nakariakov et al. (2010), +Nisticò et al. (2013), Anfinogentov et al. (2013), Hindman & Jain (2014), Russell et al. (2015)). As +we know the transverse loops oscillations usually occur in response to a close filament or flare +(Wills-Davey & Thompson (1999)). +Rapidly decaying long-period oscillations are mostly interpreted as global (or fundamental mode) standing slow magnetoacoustic waves (reviewed by Liu & Ofman (2014), and Wang +(2011), also see Ofman & Wang (2002), and for slow-mode observed in fan-loops see Pant et al. +(2017)). They often occur in hot coronal loops of active regions, associated with tiny (or micro-) +flares.Increasing evidence has suggested that the harmonic type of decaying pulsations detected +in intensity plots of solar and stellar flares are possibly caused by standing slow-mode waves (see +reviews by Van Doorsselaere et al. (2016), and McLaughlin et al. (2018)).Excitation, propagation, +and damping mechanisms of slow-mode waves have been studied theoretically (e.g., Wang et al. +(2007); Wang et al. (2015); Jess et al. (2016); Nakariakov et al. (2017); Nisticò et al. (2017); Kolotkov +et al. (2019); Krishna Prasad et al. (2019); Reale et al. (2019); Wang & Ofman (2019)). To have +a complete overview of slow-mode magnetoacoustic waves in coronal loops see the review by +Wang et al. (2021). +Investigating and comparing the thermal structures and oscillations of coronal loops in loops +of flaring and non-flaring active regions could help us in better understanding the loops’ material +oscillations and the flare impact on them. Several different methods have been developed to investigate the thermal structure of the coronal loops and loop strands. The thermal stability of the +coronal loops was the subject of research, done by Habbal & Rosner (1979) (and references cited +therein). McClymont & Craig (1985) stated that a pressure fluctuation must assist asymmetric +coronal temperature perturbation. They concluded that coronal loops are impartially stable in +the case of uniform heating. Van Doorsselaere et al. (2011) used spectroscopic line ratios to obtain +the required temperature (via CHIANTI code) and estimated the adiabatic index of the corona. +The dependence of coronal loop temperature on loop length and magnetic field strength is also +a favorite topic. For instance, Dahlburg et al. (2018) probed the temperature properties of solar +coronal loops over a wide range of lengths and magnetic field strengths via numerical simulations and observed a very high correlation between magnetic field strength and a maximum of +the temperature. The effect of temperature inhomogeneity on the periods and the damping times +of the standing slow-modes in stratified solar coronal loops was studied either (e.g., Abedini et al. +(2012)). Fathalian (2019) estimated the loop temperature using the intensity ratios and the AIA response functions in different wavelengths. Different emission measure (DEM) computations and +methods have been developed to estimate the temperature in the corona, which led to various +discussions. Schmelz et al. (2010) analyzed a coronal loop, which was observed on 2010 August +3, by AIA. They took some differential emission measure (DEM) curves, claiming a multithermal +rather than an isothermal DEM distribution (for the cross-sectional temperature of the loop). After that, Aschwanden & Boerner (2011) criticized the method of background subtraction which +Schmelz et al. had applied. They claimed that the background subtraction method caused their +inferred result of a multithermal loop. Aschwanden & Boerner (2011) analyzed a set of hundred loops and understood that 66% of the loops could be fitted with a narrowband single-Gaussian -DEM model. In this regard, some attention was paid to the instrumental limitations and abil￾ity of AIA and Guennou et al. (2012a,b) discussed on the accuracy of the differential emission -measure diagnostics of solar plasmas in respect of the AIA instrument of SDO. The abovemen￾tioned controversy of whether the cross-field temperatures of coronal loops are multithermal or -isothermal, continued by Schmelz et al. (2013) (similar to Schmelz et al. (2011)). They analyzed -twelve loops to understand the cross-field temperature distributions of them and reveal the loops’ -substructure. Based on their achievements, the warmer loops entail broader DEMs. Thereafter, -Schmelz et al. (2014) found indications of a relationship between the DEM weighted-temperature -and the cross-field DEM width for coronal loops. They argued that cooler loops tend to have -narrower DEM widths. This could imply that fewer strands are seen emitting in the later cool￾ing phase, which they claim could potentially resolve the abovementioned controversy. In this -subject, Aschwanden et al. (2015) (as well as 2013 (Aschwanden, 2013)) developed a method to -extract the loop temperature which is based on Gaussian fit for Differential Emission Measure, -named spatially-synthesized Gaussian DEM forward-fitting method (DEM hereafter). -This paper aims to analyze and compare thermal oscillations of coronal loops in flaring and -non-flaring active regions, 11283 and 12194, respectively. The contents of this paper are as follows: -In section II, data, we introduce the considered flaring and non-flaring active regions and describe -the data employed and the time and properties of the flare, occurred in the active region. In -section III, we explain the method we use to analyze the time-series of temperatures in different -strips of the loops. Section IV is specified to our results, obtained related to flaring and non￾flaring regions. In section V we briefly state a summary of this work. -II. Data -We investigate the thermal structure and treatment of loops in a flaring region to see if it follows -the transverse oscillations of the loops, and we examine the thermal fluctuations at the flare time. -For this purpose, we select a high energy flare x2.1 which the transverse oscillations of two loops -of it have been analyzed by Jain et al. (2015). They analyzed intensity variations in the wavelength -171 in two coronal loops of this region and detected obvious transverse oscillation with periods -of roughly 2 minutes and decay times of 5 minutes for these loops at the flare time. To see -the specific thermal properties of the flaring loops, as a blind test, we select a non-flaring active -region, extract its loops and analyze their thermal treatment. Then we compare the temperature -treatment of the loops at the flaring region with the loops of the non-flaring region to see the -differences. -The temperature analysis done here uses EUV images from the AIA onboard the SDO. AIA -has ten different wavelength channels, three in white light and UV, and the other seven in EUV -channels. Between these seven, the 304 filter, which is mostly sensitive to chromospheric temper￾atures (in order of T = 104.7K), not the corona, is not taken into account (Aschwanden et al. 2015). -Therefore, we consider the images of the events in the six wavelengths (94, 131, 171, 193, 211, 335 -). These are covering the coronal temperature range from T ≈ 0.6 to T ≥ 16MK. -The two below data sets are finally selected to study thermal variations and coronal loops -oscillations in flaring or non-flaring active regions. A few distinct loops are visible in the regions. -Finally, these loops are chosen: -– Three loops of the x-flaring active region 11283: Observationally, the X-class flares are rarely -happening around the loops with the specification we are looking for. So this selected LOS -X-flare, which occurs near the loops is of rare cases. We consider EUV images of NOAA +DEM model. In this regard, some attention was paid to the instrumental limitations and ability of AIA and Guennou et al. (2012a,b) discussed on the accuracy of the differential emission +measure diagnostics of solar plasmas in respect of the AIA instrument of SDO. The abovementioned controversy of whether the cross-field temperatures of coronal loops are multithermal or +isothermal, continued by Schmelz et al. (2013) (similar to Schmelz et al. (2011)). They analyzed +twelve loops to understand the cross-field temperature distributions of them and reveal the loops’ +substructure. Based on their achievements, the warmer loops entail broader DEMs. Thereafter, +Schmelz et al. (2014) found indications of a relationship between the DEM weighted-temperature +and the cross-field DEM width for coronal loops. They argued that cooler loops tend to have +narrower DEM widths. This could imply that fewer strands are seen emitting in the later cooling phase, which they claim could potentially resolve the abovementioned controversy. In this +subject, Aschwanden et al. (2015) (as well as 2013 (Aschwanden, 2013)) developed a method to +extract the loop temperature which is based on Gaussian fit for Differential Emission Measure, +named spatially-synthesized Gaussian DEM forward-fitting method (DEM hereafter). +This paper aims to analyze and compare thermal oscillations of coronal loops in flaring and +non-flaring active regions, 11283 and 12194, respectively. The contents of this paper are as follows: +In section II, data, we introduce the considered flaring and non-flaring active regions and describe +the data employed and the time and properties of the flare, occurred in the active region. In +section III, we explain the method we use to analyze the time-series of temperatures in different +strips of the loops. Section IV is specified to our results, obtained related to flaring and nonflaring regions. In section V we briefly state a summary of this work. +II. Data +We investigate the thermal structure and treatment of loops in a flaring region to see if it follows +the transverse oscillations of the loops, and we examine the thermal fluctuations at the flare time. +For this purpose, we select a high energy flare x2.1 which the transverse oscillations of two loops +of it have been analyzed by Jain et al. (2015). They analyzed intensity variations in the wavelength +171 in two coronal loops of this region and detected obvious transverse oscillation with periods +of roughly 2 minutes and decay times of 5 minutes for these loops at the flare time. To see +the specific thermal properties of the flaring loops, as a blind test, we select a non-flaring active +region, extract its loops and analyze their thermal treatment. Then we compare the temperature +treatment of the loops at the flaring region with the loops of the non-flaring region to see the +differences. +The temperature analysis done here uses EUV images from the AIA onboard the SDO. AIA +has ten different wavelength channels, three in white light and UV, and the other seven in EUV +channels. Between these seven, the 304 filter, which is mostly sensitive to chromospheric temperatures (in order of T = 104.7K), not the corona, is not taken into account (Aschwanden et al. 2015). +Therefore, we consider the images of the events in the six wavelengths (94, 131, 171, 193, 211, 335 +). These are covering the coronal temperature range from T ≈ 0.6 to T ≥ 16MK. +The two below data sets are finally selected to study thermal variations and coronal loops +oscillations in flaring or non-flaring active regions. A few distinct loops are visible in the regions. +Finally, these loops are chosen: +– Three loops of the x-flaring active region 11283: Observationally, the X-class flares are rarely +happening around the loops with the specification we are looking for. So this selected LOS +X-flare, which occurs near the loops is of rare cases. We consider EUV images of NOAA AR 11283, in the time period of 22:10UT till 23:00UT of 2011 September 6 with the cadence -of 12 sec. This period of time is selected since no other flare is happening during it. A -few distinct loops are visible and follow-able here during this period. Loop shapes in our -active region change permanently; therefore, it is difficult or impossible to follow a loop -over a very long time. Hence, it is not useful to extend the time interval of this region -to the time before the flare. The transverse oscillations of two loops in this region were -analyzed before by Jain et al. (2015). We mark these loops by A and B in Figure 1 b. They -detected fundamental mode oscillation with periods of roughly 2 minutes and decay time -of 5 minutes for these loops. We are curious to see the loops’ thermal oscillations (if any) -or thermal fluctuations in this condition. Figure 1a (left) displays AR 11283 and the area, -indicated by the white box is featured in a zoom-in view in Figure 1.b (right) and the five -selected parts of the center of the three chosen loops are shown by red lines (the movie of -the region is available in this link). As it is clear in the movie, these three loops oscillate -together and their oscillations decay simultaneously. The center of figure 1.a is coordinated -at (230, 165) arcsec and its width and height are 450′′ -× 456′′ /750 × 775 pixels. The flare -occurring in this active region is an X2.1 class flare located close to the disk center at latitude -14◦ north and longitude 18◦ west (269.9 arcsec, 129.9 arcsec). This flare initiates at 22:12UT, -ends about 22:24UT with the peak at 22:20UT, and associates with a coronal mass ejection -(CME) which occurs from 2011 September 6, 21:36:05T to 2011 September 7, 02:24:05T, with -the radial velocity of 469 km/s,angular width of 252 deg, and position angle of 275 deg (for -more details look at LASCO CME catalogue.) 1 -– Three loops of non-flaring active region 12194: As a blind test, we select three loops of the -non-flaring (nonf hereafter) active region 12194 in the smooth time period of 08:00:00UT till -09:00:00UT of 2014 October 26. The center of figure 2.a is coordinated at (0, -264) arcsec -and its width and height are 615′′ -× 615′′ /1025 × 1025 pixels. We consider the images of -the selected area with the cadence of 12 sec in the same six wavelengths mentioned above. -These loops are relatively motionless and do not show any transversal oscillation (see the -region’s movie in the link). We select the loops in such a way that they do not have any -crossing over the neighbor loops (in our perspective) during this time. In figure 2 the -selected loops are distinguished in red in the mentioned active region. The size of the final -cut of non-flaring region (represented in the right) is 351 × 401 pixels. -The data set are primarily downloaded at level 1 with a pixel resolution of 0.6 arcsec. We use -the standard aia_prep.pro subroutine available in SDO package SolarSoftWare library to adjust -the screen scale between the four arms of the AIA. This pre-processing step increases the data -level from 1 to 1.5, so that finally no jump or sudden movement is observed in the image series. -We also used drot_map.pro subroutine to correct the differential rotation effect. According to the -movie made by pre-processed images, the most obvious loops (marked in the abovementioned -figures) are selected in each region (with obvious transversal oscillations in the case of the flaring -active region). -III. Temperature Analysis Method -We extract the selected loop segment pixels, for each loop, and calculate the normal vectors -to each point of the loop’s direction. Then by using these data, we straighten each loop in a -considered box with the thickness of 15 to 40 pixels (macro-pixels, depending on the available -empty area around each loop and the distance to the neighbor loop). The area around the -loop is needed for calculations of background subtraction. The selected loop segment is cut in +of 12 sec. This period of time is selected since no other flare is happening during it. A +few distinct loops are visible and follow-able here during this period. Loop shapes in our +active region change permanently; therefore, it is difficult or impossible to follow a loop +over a very long time. Hence, it is not useful to extend the time interval of this region +to the time before the flare. The transverse oscillations of two loops in this region were +analyzed before by Jain et al. (2015). We mark these loops by A and B in Figure 1 b. They +detected fundamental mode oscillation with periods of roughly 2 minutes and decay time +of 5 minutes for these loops. We are curious to see the loops’ thermal oscillations (if any) +or thermal fluctuations in this condition. Figure 1a (left) displays AR 11283 and the area, +indicated by the white box is featured in a zoom-in view in Figure 1.b (right) and the five +selected parts of the center of the three chosen loops are shown by red lines (the movie of +the region is available in this link). As it is clear in the movie, these three loops oscillate +together and their oscillations decay simultaneously. The center of figure 1.a is coordinated +at (230, 165) arcsec and its width and height are 450′′× 456′′ /750 × 775 pixels. The flare +occurring in this active region is an X2.1 class flare located close to the disk center at latitude +14◦ north and longitude 18◦ west (269.9 arcsec, 129.9 arcsec). This flare initiates at 22:12UT, +ends about 22:24UT with the peak at 22:20UT, and associates with a coronal mass ejection +(CME) which occurs from 2011 September 6, 21:36:05T to 2011 September 7, 02:24:05T, with +the radial velocity of 469 km/s,angular width of 252 deg, and position angle of 275 deg (for +more details look at LASCO CME catalogue.) 1 +– Three loops of non-flaring active region 12194: As a blind test, we select three loops of the +non-flaring (nonf hereafter) active region 12194 in the smooth time period of 08:00:00UT till +09:00:00UT of 2014 October 26. The center of figure 2.a is coordinated at (0, -264) arcsec +and its width and height are 615′′× 615′′ /1025 × 1025 pixels. We consider the images of +the selected area with the cadence of 12 sec in the same six wavelengths mentioned above. +These loops are relatively motionless and do not show any transversal oscillation (see the +region’s movie in the link). We select the loops in such a way that they do not have any +crossing over the neighbor loops (in our perspective) during this time. In figure 2 the +selected loops are distinguished in red in the mentioned active region. The size of the final +cut of non-flaring region (represented in the right) is 351 × 401 pixels. +The data set are primarily downloaded at level 1 with a pixel resolution of 0.6 arcsec. We use +the standard aia_prep.pro subroutine available in SDO package SolarSoftWare library to adjust +the screen scale between the four arms of the AIA. This pre-processing step increases the data +level from 1 to 1.5, so that finally no jump or sudden movement is observed in the image series. +We also used drot_map.pro subroutine to correct the differential rotation effect. According to the +movie made by pre-processed images, the most obvious loops (marked in the abovementioned +figures) are selected in each region (with obvious transversal oscillations in the case of the flaring +active region). +III. Temperature Analysis Method +We extract the selected loop segment pixels, for each loop, and calculate the normal vectors +to each point of the loop’s direction. Then by using these data, we straighten each loop in a +considered box with the thickness of 15 to 40 pixels (macro-pixels, depending on the available +empty area around each loop and the distance to the neighbor loop). The area around the +loop is needed for calculations of background subtraction. The selected loop segment is cut in 1Based on data on these WebSites: https://solarflare.njit.edu/webapp.html, and https://www.swpc.noaa.gov/ -all wavelengths and at the same considered box from the images set. These loop images are -necessary entrances for our thermal analysis process. Then the loop is divided into different -strips and its best division in terms of pixel intervals is considered. To do thermal analysis, we -use the spatially-synthesized Gaussian DEM forward-fitting method founded by Aschwanden -et al. (2015). -The images in the above six wavelength filters are considered to calculate the temperature in -each strip of the loop. The DEM function is considered a single-Gaussian function relative to the -temperature determined by the forward fitting method. To obtain the temperature for each loop, -we divided the loop into narrow strips, and then the intensity flux was averaged over each strip. -The number of each strip is displayed with the index i. One of the usual methods to subtract -the background from observed data is fitting a single-Gaussian cospatial function with a linear -function on the flux profile. The DEM for each strip is considered to be single-Gaussian DEM -in terms of the logarithm of the temperature, which has three free parameters (Aschwanden & -Boerner, 2011): -DEMi = -dEMi -dT = EMp,i exp (− -[log (T) − log (Tp,i -) -2σ -2 -T,i -). (1) -In which, Tp,i -is the DEM peak temperature, EMp,i -is the peak EM function, and σT,i -is the -logarithmic width of the temperature for that strip. To calculate the background-subtracted fluxes -(for each strip) we use Eq.6 of Aschwanden & Boerner (2011) (in below): -F0λ = -Z -dEM(T) -dT Rλ(T)dT = ∑ -k -EM(Tk -)Rλ(Tk -). (2) -Here, Rλ(T) is the instrumental temperature response function of each wavelength filter λ, which -is obtained by the code aia_get_response.pro in the SSW package. As time has passed, the AIA -response functions calibration has partly changed. Here, we use the updated calibration of the -temperature response functions, for each of the AIA temperature filters, according to the CHI￾ANTI Version 2019 code available in the Solar SoftWare (SSW). After forward-fitting the Gaussian -DEM to the background-subtracted observed fluxes in multiple wavelengths, the three-fitting pa￾rameters, temperature width (σT,i -), peak of temperature (Tp,i -), and peak emission measure (EMp,i -) -are found by minimizing χ -2 -i -. -Our data sample is uneven because of omitting some damaged images in between. There￾fore to analyze the temperature oscillations, we use the Lomb-Scargle method. This method is -developed to use the technique periodogram, in the case where the observation times are un￾evenly spaced (Scargle, 1982). The Lomb-Scargle periodogram method is useful in cases where -the periodicity of data treatment is not immediately apparent. This method allows efficient com￾putation of a Fourier-like power spectrum estimator from unevenly-sampled data, resulting in -an intuitive means of determining the period of oscillation (VanderPlas, 2018). Therefore we use -Lomb-Scargle Periodogram to evaluate and estimate the efficient periods of temperature oscilla￾tions in our loops. We select the first period related to the highest power frequency, which is -obtained by this method.We considered the achieved periods with the highest significances and -amplitudes. The most significant (highest) periods observed in temperature (minute) for flaring -and non-flaring loops are listed in Tables 1 and 2, respectively. To estimate the significance of -the periods, we computed the probability values (p-values). In the Lomb-Scargle method, the -significance returned here is the false alarm probability of the null hypothesis, i.e., as the data -is composed of independent Gaussian random variables. Accordingly, low probability values +all wavelengths and at the same considered box from the images set. These loop images are +necessary entrances for our thermal analysis process. Then the loop is divided into different +strips and its best division in terms of pixel intervals is considered. To do thermal analysis, we +use the spatially-synthesized Gaussian DEM forward-fitting method founded by Aschwanden +et al. (2015). +The images in the above six wavelength filters are considered to calculate the temperature in +each strip of the loop. The DEM function is considered a single-Gaussian function relative to the +temperature determined by the forward fitting method. To obtain the temperature for each loop, +we divided the loop into narrow strips, and then the intensity flux was averaged over each strip. +The number of each strip is displayed with the index i. One of the usual methods to subtract +the background from observed data is fitting a single-Gaussian cospatial function with a linear +function on the flux profile. The DEM for each strip is considered to be single-Gaussian DEM +in terms of the logarithm of the temperature, which has three free parameters (Aschwanden & +Boerner, 2011): +DEMi = +dEMi +dT = EMp,i exp (− +[log (T) − log (Tp,i) +2σ +2 +T,i +). (1) +In which, Tp,iis the DEM peak temperature, EMp,iis the peak EM function, and σT,iis the +logarithmic width of the temperature for that strip. To calculate the background-subtracted fluxes +(for each strip) we use Eq.6 of Aschwanden & Boerner (2011) (in below): +F0λ = +Z +dEM(T) +dT Rλ(T)dT = ∑ +k +EM(Tk)Rλ(Tk). (2) +Here, Rλ(T) is the instrumental temperature response function of each wavelength filter λ, which +is obtained by the code aia_get_response.pro in the SSW package. As time has passed, the AIA +response functions calibration has partly changed. Here, we use the updated calibration of the +temperature response functions, for each of the AIA temperature filters, according to the CHIANTI Version 2019 code available in the Solar SoftWare (SSW). After forward-fitting the Gaussian +DEM to the background-subtracted observed fluxes in multiple wavelengths, the three-fitting parameters, temperature width (σT,i +), peak of temperature (Tp,i), and peak emission measure (EMp,i) +are found by minimizing χ +2 +i +. +Our data sample is uneven because of omitting some damaged images in between. Therefore to analyze the temperature oscillations, we use the Lomb-Scargle method. This method is +developed to use the technique periodogram, in the case where the observation times are unevenly spaced (Scargle, 1982). The Lomb-Scargle periodogram method is useful in cases where +the periodicity of data treatment is not immediately apparent. This method allows efficient computation of a Fourier-like power spectrum estimator from unevenly-sampled data, resulting in +an intuitive means of determining the period of oscillation (VanderPlas, 2018). Therefore we use +Lomb-Scargle Periodogram to evaluate and estimate the efficient periods of temperature oscillations in our loops. We select the first period related to the highest power frequency, which is +obtained by this method.We considered the achieved periods with the highest significances and +amplitudes. The most significant (highest) periods observed in temperature (minute) for flaring +and non-flaring loops are listed in Tables 1 and 2, respectively. To estimate the significance of +the periods, we computed the probability values (p-values). In the Lomb-Scargle method, the +significance returned here is the false alarm probability of the null hypothesis, i.e., as the data +is composed of independent Gaussian random variables. Accordingly, low probability values (p-value less than 0.05) indicate a high degree of significance in the associated periodic signal. -IV. Results -i. Temperature Analysis of Flaring Active Region Loops -Thenceforth the temperature time-series of different strips of the selected loops are calculated -using the method described in section 3. In the following figures, the vertical axis shows the -logarithm of the temperature and the horizontal axis shows the time duration. To be comparable -by eyes, all the forthcoming figures (which show the loops temperature oscillations) have been co￾scaled in the (log) temperature range of 5.7 to 6.9. The color maps are shown for each temperature -map. Loops A, B1, B2, C1, and C2 are subdivided into 25, 11, 8, 12, and 6 strips, respectively. Each -strip’s length is equal to 4 pixels (macro-pixel), for all loops in this paper. For brevity, a few strips’ -temperature oscillations are presented here. Figure 3 displays the time-series of temperature -oscillations for the first 3 strips of Loop A, and first 2 strips of loops B1. We calculated the -errors for each point (temperature) but removed in the presentation to avoid overcrowding of the -figures. As we observe in Figures 3 and 4), the temperature oscillations are started and increase -around 22:12 before the flare peak time (22:20) and are mostly continuing after the flare ended -(22:24). These temperature oscillations follow the transverse loop oscillations observed by Jain -et al. (2015). As Jain et al. reported, LoopA and B have a transverse oscillation with periods -of roughly 2 minutes and decay times of 5 minutes, starting at 22:18 around the flare peak time -(23:20) and decaying after the flare ended (22:24). So as we observe, the temperature oscillations in -these flaring loops happen before the start of their transverse oscillations and are continuing even -in the time interval after the transverse oscillations decay. Although the temperature oscillations -do not decay as rapid as the transverse oscillations do, and conversely, the loop temperature -increases at the end of the oscillating mode (see Fig.4, the temperature map of the loop A, for -instance) -We calculate the temperature oscillations periods, using Lomb-Scargle method. We consider -the thermal oscillations periods with the highest significances. As this method shows, the most -powerful period in the range of data time-series (listed in Table1) are from 7 to 28.4 minutes -observed in the strips of the marked loops of this flaring region. These loops of flaring region -also show some short periods in temperature oscillations which some are less than 10 minutes -(listed in Table1). These short periods are more frequently observed in the loops of the flaring -active region. Such short periods are very scarce for the loops of the non-flaring active region -(compare Tables1 and 2). -The first column in Table1 is the number of every strip along the loop. The second column is -the period of the most powerful frequency observed for the loop strips, calculated by the Lomb￾Scargle method. The third column shows the maximum of log(T) minus its minimum in each -strip. The columns of Table2 are exactly the same as Table1; the only difference is that Table2 is -for the non-flaring loops. -The loop A, has the length of 42.3 (Mm) which is the length of the selected part of the loop -marked in Figure 1.b. The mean of the parameter (Max(log T)-Min(log T)) for the strips of loop A -is 1.21. Mean of the temperature (log) of this loop over time is 6.15 ± 0.25. The loop B1, divided -into 11 strips, has the length of 20.24 (Mm). The mean of (Max(log T)-Min(log T)) and the mean -of the temperature for this loop are, 1.10, and 6.28 ± 0.22 respectively. The loop B2, which has 8 -strips, with the length of 15.61 (Mm), has the mean temperature (log) of 6.21 ± 0.21. The mean -of (Max(log T)-Min(log T)) is 0.81 through this loop segment. The loops C1 and C2, divided into -12, and 6 strips, have the lengths of 22.08 and 11.06 (Mm), the mean temperatures of 6.25 ± 0.22, -and 6.14 ± 0.25 (log), and the mean (Max(log T)-Min(log T)) of 1.48, 0.88, respectively. +IV. Results +i. Temperature Analysis of Flaring Active Region Loops +Thenceforth the temperature time-series of different strips of the selected loops are calculated +using the method described in section 3. In the following figures, the vertical axis shows the +logarithm of the temperature and the horizontal axis shows the time duration. To be comparable +by eyes, all the forthcoming figures (which show the loops temperature oscillations) have been coscaled in the (log) temperature range of 5.7 to 6.9. The color maps are shown for each temperature +map. Loops A, B1, B2, C1, and C2 are subdivided into 25, 11, 8, 12, and 6 strips, respectively. Each +strip’s length is equal to 4 pixels (macro-pixel), for all loops in this paper. For brevity, a few strips’ +temperature oscillations are presented here. Figure 3 displays the time-series of temperature +oscillations for the first 3 strips of Loop A, and first 2 strips of loops B1. We calculated the +errors for each point (temperature) but removed in the presentation to avoid overcrowding of the +figures. As we observe in Figures 3 and 4), the temperature oscillations are started and increase +around 22:12 before the flare peak time (22:20) and are mostly continuing after the flare ended +(22:24). These temperature oscillations follow the transverse loop oscillations observed by Jain +et al. (2015). As Jain et al. reported, LoopA and B have a transverse oscillation with periods +of roughly 2 minutes and decay times of 5 minutes, starting at 22:18 around the flare peak time +(23:20) and decaying after the flare ended (22:24). So as we observe, the temperature oscillations in +these flaring loops happen before the start of their transverse oscillations and are continuing even +in the time interval after the transverse oscillations decay. Although the temperature oscillations +do not decay as rapid as the transverse oscillations do, and conversely, the loop temperature +increases at the end of the oscillating mode (see Fig.4, the temperature map of the loop A, for +instance) +We calculate the temperature oscillations periods, using Lomb-Scargle method. We consider +the thermal oscillations periods with the highest significances. As this method shows, the most +powerful period in the range of data time-series (listed in Table1) are from 7 to 28.4 minutes +observed in the strips of the marked loops of this flaring region. These loops of flaring region +also show some short periods in temperature oscillations which some are less than 10 minutes +(listed in Table1). These short periods are more frequently observed in the loops of the flaring +active region. Such short periods are very scarce for the loops of the non-flaring active region +(compare Tables1 and 2). +The first column in Table1 is the number of every strip along the loop. The second column is +the period of the most powerful frequency observed for the loop strips, calculated by the LombScargle method. The third column shows the maximum of log(T) minus its minimum in each +strip. The columns of Table2 are exactly the same as Table1; the only difference is that Table2 is +for the non-flaring loops. +The loop A, has the length of 42.3 (Mm) which is the length of the selected part of the loop +marked in Figure 1.b. The mean of the parameter (Max(log T)-Min(log T)) for the strips of loop A +is 1.21. Mean of the temperature (log) of this loop over time is 6.15 ± 0.25. The loop B1, divided +into 11 strips, has the length of 20.24 (Mm). The mean of (Max(log T)-Min(log T)) and the mean +of the temperature for this loop are, 1.10, and 6.28 ± 0.22 respectively. The loop B2, which has 8 +strips, with the length of 15.61 (Mm), has the mean temperature (log) of 6.21 ± 0.21. The mean +of (Max(log T)-Min(log T)) is 0.81 through this loop segment. The loops C1 and C2, divided into +12, and 6 strips, have the lengths of 22.08 and 11.06 (Mm), the mean temperatures of 6.25 ± 0.22, +and 6.14 ± 0.25 (log), and the mean (Max(log T)-Min(log T)) of 1.48, 0.88, respectively. We observe that despite the temperature oscillations, the flaring loops show a temperature -rise at the end of the considered time interval (figure3). As their temperature maps also show, -the oscillations follow with a relatively sensible rise in the final temperature of the loop segments -(Figures 4). Although in the case of the transverse oscillations, the loops oscillate as the flare -occurs and then the oscillations decay and stop, in the case of temperature oscillations, the tem￾peratures of the various strips of the loops oscillate and at the end of the flare occurrence, they -get to a relatively higher value of temperature in average. -Figure 4 shows the temperature maps of the flaring loops A, B1, B2, C1, and C2, respectively -as a time series. In each plot, the vertical axis is the distance along the loop segment in Mm, and -the horizontal axis shows time. The color bar (in the left) shows the temperature range. Each -separated grid part on the map is standing for one strip. Figure 4 shows that the temperature -for most of the strips increased, bypassing a few oscillations. Before the end of the time duration, -some strips become hotter (yellow ones) and some cooler (blue ones). The loop B1 is colder at -the early times of the duration and becomes hotter at the middle and end times with a swing -to lower temperatures again (see Fig. 4). There are some temperature fluctuations at the middle -times (the red and green stripes) while at the end the strips temperatures are smoother with less -fluctuations. The temperature map of the loop segment B2 (Fig.4) shows that at the beginning of -the time duration, the first strips of the loop are hotter, and the last ones are colder, but at the end -times this pattern is reversed in this loop segment. In loop segment C1 (Fig.4), the temperature -fluctuations are mainly observed to start after the end of the flare (22:24), and at the end time -(23:00) the temperature is much higher than the beginning. The temperature is increasing after -the flare time (22:24) for the loop C2 either (see Fig.4). This happens with some oscillations in -the strips’ temperatures. So as figure 4 shows, the temperature increases with some fluctuation -in most of the flaring loops’ strips after the flare time. According to these temperature maps, -the temperature fluctuations in the flaring loops are increasing at the flaring time and around 20 -minutes after that. -We expect the flaring loops to cool down as a result of heat conduction and radiative cooling. -Hence this relative temperature increase should be scrutinized. As we probed, this temperature -rise is also followed in intensity time-series. As the intensity time-series show, the related intensity -in the Loop A of the flaring AR increases at the end of the time duration. To be assured, the -authors also checked the wavelength of Fe XV I I I which has a peak formation temperature of -7 × 106 ◦K (Ugarte-Urra & Warren (2014)). By using the method developed by Warren et al. (2012) -the contribution of the Fe XV I I I emission line can be isolated from the AIA 94 , to analyze the -evolution of hot plasma in the loops. We do it to omit the contamination from the cooler plasma -(mostly around 1MK) which also contributes to this AIA channel Boerner et al. (2012). This is -done by subtracting the contaminating warm (i.e., around 1MK) component to the bandpass. -This warm contribution is calculated from a weighted combination of the emission from the AIA -171 and 193 channels dominated by Fe X and Fe X I I emission, respectively. This intensity -analysis is done directly and it has not gone through any other process like the thermal analysis. -For this purpose, we applied the formulation (1) used by Li et al. (2015). Plots in Figure 5 show -the intensity map, and the mean intensity variation of the wavelength Fe XV I I I, for Loop A of -the flaring region, respectively. As these plots show, this intensity is also higher at the end of -the time duration in respect of the flare time. It seems to us that the expected cooling has not -occurred in these flaring loops yet, even after the flare occurrence in the probed duration due to -some plausible reasons. We consider that the mentioned simultaneous CME (see sectionII) which -this flare is associated with could cause this increase in temperature. We can be sure that the -source of this CME is AR 11283 (Romano et al. (2015)). This CME is in our flare region, hence -the loops receive energy even after the flare occurrence and it is probably the reason why the +rise at the end of the considered time interval (figure3). As their temperature maps also show, +the oscillations follow with a relatively sensible rise in the final temperature of the loop segments +(Figures 4). Although in the case of the transverse oscillations, the loops oscillate as the flare +occurs and then the oscillations decay and stop, in the case of temperature oscillations, the temperatures of the various strips of the loops oscillate and at the end of the flare occurrence, they +get to a relatively higher value of temperature in average. +Figure 4 shows the temperature maps of the flaring loops A, B1, B2, C1, and C2, respectively +as a time series. In each plot, the vertical axis is the distance along the loop segment in Mm, and +the horizontal axis shows time. The color bar (in the left) shows the temperature range. Each +separated grid part on the map is standing for one strip. Figure 4 shows that the temperature +for most of the strips increased, bypassing a few oscillations. Before the end of the time duration, +some strips become hotter (yellow ones) and some cooler (blue ones). The loop B1 is colder at +the early times of the duration and becomes hotter at the middle and end times with a swing +to lower temperatures again (see Fig. 4). There are some temperature fluctuations at the middle +times (the red and green stripes) while at the end the strips temperatures are smoother with less +fluctuations. The temperature map of the loop segment B2 (Fig.4) shows that at the beginning of +the time duration, the first strips of the loop are hotter, and the last ones are colder, but at the end +times this pattern is reversed in this loop segment. In loop segment C1 (Fig.4), the temperature +fluctuations are mainly observed to start after the end of the flare (22:24), and at the end time +(23:00) the temperature is much higher than the beginning. The temperature is increasing after +the flare time (22:24) for the loop C2 either (see Fig.4). This happens with some oscillations in +the strips’ temperatures. So as figure 4 shows, the temperature increases with some fluctuation +in most of the flaring loops’ strips after the flare time. According to these temperature maps, +the temperature fluctuations in the flaring loops are increasing at the flaring time and around 20 +minutes after that. +We expect the flaring loops to cool down as a result of heat conduction and radiative cooling. +Hence this relative temperature increase should be scrutinized. As we probed, this temperature +rise is also followed in intensity time-series. As the intensity time-series show, the related intensity +in the Loop A of the flaring AR increases at the end of the time duration. To be assured, the +authors also checked the wavelength of Fe XV I I I which has a peak formation temperature of +7 × 106 ◦K (Ugarte-Urra & Warren (2014)). By using the method developed by Warren et al. (2012) +the contribution of the Fe XV I I I emission line can be isolated from the AIA 94 , to analyze the +evolution of hot plasma in the loops. We do it to omit the contamination from the cooler plasma +(mostly around 1MK) which also contributes to this AIA channel Boerner et al. (2012). This is +done by subtracting the contaminating warm (i.e., around 1MK) component to the bandpass. +This warm contribution is calculated from a weighted combination of the emission from the AIA +171 and 193 channels dominated by Fe X and Fe X I I emission, respectively. This intensity +analysis is done directly and it has not gone through any other process like the thermal analysis. +For this purpose, we applied the formulation (1) used by Li et al. (2015). Plots in Figure 5 show +the intensity map, and the mean intensity variation of the wavelength Fe XV I I I, for Loop A of +the flaring region, respectively. As these plots show, this intensity is also higher at the end of +the time duration in respect of the flare time. It seems to us that the expected cooling has not +occurred in these flaring loops yet, even after the flare occurrence in the probed duration due to +some plausible reasons. We consider that the mentioned simultaneous CME (see sectionII) which +this flare is associated with could cause this increase in temperature. We can be sure that the +source of this CME is AR 11283 (Romano et al. (2015)). This CME is in our flare region, hence +the loops receive energy even after the flare occurrence and it is probably the reason why the expected cooling does not occur. -The thermal oscillations periods obtained the Lomb-Scargle method, do not have the same -significance in all strips of the loops, but for most strips of the flaring loops, the significances are -very near to one. To be assured about these oscillations, we probed the intensity time-series for -each strip of the loops and we observed that this loop’s intensities shows intensity oscillations -too (i.e., alongside the loop). The most probable dominant periods observed in intensity, for -wavelength of 171 is 18.22, and 16.7 min for strips of F-Loop A, 16.7, and 18.22 min for strips of -F-Loop B1, 16.70, and 12.52 for F-Loop B2, and 16.7 for F-Loop C1 and F-Loop C2. These periods -are in the same order of the observed thermal oscillation periods. The intensity in this time series -has not passed any thermal process but still shows oscillation periods close to thermal ones. So -we think these results confirm the observation of thermal oscillations. -ii. Temperature Analysis of non-Flaring Active Region Loops -The temperature time-series for different strips of the selected loops of the non-flaring active -region 12194 are calculated using the Lomb-Scargle method. In the following figures (Fig. 6), -the vertical axis shows the logarithm of the temperature and the horizontal axis shows the time -duration. Figure 6 displays the time-series of temperature variations for the first two strips of -the non-flaring Loops A, and B. These figures are all co-scaled in the range of 5.7 to 6.9 for the -logarithm of temperature (like the flaring loops range). The most powerful periods, observed in -most of these non-flaring loops’ strips (listed in Table2) are from 8.5 min. to 30 min. Comparing -the periods of the loops in the flaring region (Table1) with the non-flaring one (Table2), we see -that the temperature periods of the flaring loops have lower values on average and have more -diversity than the non-flaring ones. As Tables 1 and 2 show, the mean temperatures of nonf￾loops are lower in comparison with the f-loops, a fact we also expected from common sense. -The parameter (Max(log T)-Min(log T)) in nonf-loops’ strips is less than that for the flaring loops’ -strips. -Nonf-loop A, divided into 11 strips, has the length of 19.91 (Mm) which is the length of the -selected part of the loop marked in Figure 2b. The mean of (Max(log T)-Min(log T)) for the strips -of nonf-loop A is 0.81. Mean of the temperature (log) of this loop segment over time is 5.93 ± 0.10. -Nonf-Loop B, divided into 6 strips, has the length of 11.11 (Mm), and the mean temperature (log), -and the mean of (Max(log T)-Min(log T)) for this loop are, 5.99 ± 0.13 and 0.62 respectively. Nonf￾loop C, which has 5 strips, with the length of 10.13 (Mm), has the mean temperature (log) of -5.82 ± 0.12, and the mean (Max(log T)-Min(log T)) of 0.56. -The first highest period observed for the temperature oscillations of these non-flaring loops’ -strips is reported in Table2. As we observe the temperature periods in these non-flaring loops -are mostly longer than those of the flaring loops (compare the values listed in Table1 and Table2). -Therefore the temperature oscillations of these loops are a little slower than the flaring ones. -Figure 7 shows the temperature maps of the non-flaring loops A, B, and C, respectively as a -time series. In each plot, the vertical axis is the distance along the loop in Mm, and the horizontal -axis is the time. The color bar in the left shows the colors considered for the temperature range. -Each separated colored part in the map is one strip. These color maps are plotted totally at the -same color range of the loops of the flaring region either. -As figure 7 shows, the strips’ temperature of these non-flaring loops have fewer temperature -fluctuations and are smoother in comparison with the flaring ones (Fig. 4). Furthermore, that -much increase in the temperatures of the strips, which was obvious in the loops of the flaring -region toward the end times, is not observed here. The temperatures are also totally lower in the -nonf-loops in comparison with the flaring loops. Conversely, it seems that different strips of the +The thermal oscillations periods obtained the Lomb-Scargle method, do not have the same +significance in all strips of the loops, but for most strips of the flaring loops, the significances are +very near to one. To be assured about these oscillations, we probed the intensity time-series for +each strip of the loops and we observed that this loop’s intensities shows intensity oscillations +too (i.e., alongside the loop). The most probable dominant periods observed in intensity, for +wavelength of 171 is 18.22, and 16.7 min for strips of F-Loop A, 16.7, and 18.22 min for strips of +F-Loop B1, 16.70, and 12.52 for F-Loop B2, and 16.7 for F-Loop C1 and F-Loop C2. These periods +are in the same order of the observed thermal oscillation periods. The intensity in this time series +has not passed any thermal process but still shows oscillation periods close to thermal ones. So +we think these results confirm the observation of thermal oscillations. +ii. Temperature Analysis of non-Flaring Active Region Loops +The temperature time-series for different strips of the selected loops of the non-flaring active +region 12194 are calculated using the Lomb-Scargle method. In the following figures (Fig. 6), +the vertical axis shows the logarithm of the temperature and the horizontal axis shows the time +duration. Figure 6 displays the time-series of temperature variations for the first two strips of +the non-flaring Loops A, and B. These figures are all co-scaled in the range of 5.7 to 6.9 for the +logarithm of temperature (like the flaring loops range). The most powerful periods, observed in +most of these non-flaring loops’ strips (listed in Table2) are from 8.5 min. to 30 min. Comparing +the periods of the loops in the flaring region (Table1) with the non-flaring one (Table2), we see +that the temperature periods of the flaring loops have lower values on average and have more +diversity than the non-flaring ones. As Tables 1 and 2 show, the mean temperatures of nonfloops are lower in comparison with the f-loops, a fact we also expected from common sense. +The parameter (Max(log T)-Min(log T)) in nonf-loops’ strips is less than that for the flaring loops’ +strips. +Nonf-loop A, divided into 11 strips, has the length of 19.91 (Mm) which is the length of the +selected part of the loop marked in Figure 2b. The mean of (Max(log T)-Min(log T)) for the strips +of nonf-loop A is 0.81. Mean of the temperature (log) of this loop segment over time is 5.93 ± 0.10. +Nonf-Loop B, divided into 6 strips, has the length of 11.11 (Mm), and the mean temperature (log), +and the mean of (Max(log T)-Min(log T)) for this loop are, 5.99 ± 0.13 and 0.62 respectively. Nonfloop C, which has 5 strips, with the length of 10.13 (Mm), has the mean temperature (log) of +5.82 ± 0.12, and the mean (Max(log T)-Min(log T)) of 0.56. +The first highest period observed for the temperature oscillations of these non-flaring loops’ +strips is reported in Table2. As we observe the temperature periods in these non-flaring loops +are mostly longer than those of the flaring loops (compare the values listed in Table1 and Table2). +Therefore the temperature oscillations of these loops are a little slower than the flaring ones. +Figure 7 shows the temperature maps of the non-flaring loops A, B, and C, respectively as a +time series. In each plot, the vertical axis is the distance along the loop in Mm, and the horizontal +axis is the time. The color bar in the left shows the colors considered for the temperature range. +Each separated colored part in the map is one strip. These color maps are plotted totally at the +same color range of the loops of the flaring region either. +As figure 7 shows, the strips’ temperature of these non-flaring loops have fewer temperature +fluctuations and are smoother in comparison with the flaring ones (Fig. 4). Furthermore, that +much increase in the temperatures of the strips, which was obvious in the loops of the flaring +region toward the end times, is not observed here. The temperatures are also totally lower in the +nonf-loops in comparison with the flaring loops. Conversely, it seems that different strips of the non-flaring loops have relatively more similar temperature fluctuations. -As figure 8 shows, the peaks of the observed temperature periods for the loops’ strips of the -flaring active region (blue ones), and non-flaring active region (red ones), are around 18 minutes, -and 30 minutes, respectively. The temperature periods’ diversity is higher in the loops’ strips of -the flaring active region, and shorter temperature periods (less than 10 minutes, nearer to the -transverse oscillations periods) are observed in the case of the flaring loops’ strips in comparison -with the non-flaring ones. And figure 9 shows that the increasing and decreasing of temperature -range, or the difference between maximum and minimum of the temperature value (max(log(T))- -min(log(T))), is much higher on average for the loops’ strips of the flaring AR in comparison with -the loops’ strips of the non-flaring one. -V. Summery -We reported the temperature oscillations of coronal loops of a flaring active region. We selected -the flaring active region 11283 to investigate the thermal structure and treatment of its loops. This -region includes a high energy flare x2.1 and the transverse oscillations of two loops of it have been -analyzed before by Jain et al. (2015). They analyzed intensity variations in the wavelength 171 -in two coronal loops of this region and detected obvious transverse oscillation with periods of -roughly 2 minutes and decay times of 5 minutes for these loops (loops A and B in Figure.1b) -at the flare time. We were curious to know if the temperature variations follow the transverse -oscillations of the loops, or there is any relation or correlation between them. We also wanted to -investigate the thermal fluctuations at the flare time. As a blind test to see the specific thermal -properties of the flaring loops, we selected a LOS non-flaring active region (12194), extracted three -segments of its loops and analyzed their thermal treatment. Then we compared the temperature -treatment of the loops at the flaring region with the loops of the non-flaring region to see the -differences. We were eager to observe the probable discrepancies between flaring and non-flaring -loops in this respect. -Here we used data of three loops of the flaring active region (AR11283) around the time of the -Flare X2.1, from 22:10UT till 23:00UT on 2011 September 6, plus three loops of the non-flaring -active region (AR12194), from 08:00:00UT till 09:00:00UT of 2014 October 26 (marked in figures -1 and 2). To calculate the time series of the loop temperature values, we first extracted the loop -pixels in each image and then displayed the loop straightly for all the images in the time series -of different wavelengths. To do thermal analysis, we used the spatially-synthesized Gaussian -DEM forward-fitting method founded by Aschwanden et al. (2015). We calculated the peak -temperatures for each strip of the loops. Then we applied the Lomb-Scargle method to analyze -temperature oscillations of the time-series for each strip of the loops. -We observed temperature oscillations which are following the transverse loop oscillations -observed by Jain et al. (2015) for the flaring loops. Furthermore, the temperature oscillations in -these flaring loops happen before the transverse oscillations start and continue even in the time -duration after the transverse oscillations decay. As observed, the temperature oscillations do not -decay as rapidly as the transverse oscillations do. Conversely, the strips’ temperatures increase -at the end of the oscillating mode and a rather sensible rise is observed in the final temperatures -of the f-loops’ segments. The ranges of the obtained periods are from 7 min. to 28.4 min. for the -flaring loops, and from 8.5 min. to 30 min. for the non-flaring loops. With the onset of X-flare in -the F-loopA, which has a distinct transverse oscillation in the flaring time with period of roughly -2 minutes and decay time of 5 minutes, a temperature oscillation is observed with periods of -roughly 10 to 28.5 minutes in different segments of this loop. And as the transverse oscillation -decays in this interval, no special definite decay is observed in its temperature oscillations. +As figure 8 shows, the peaks of the observed temperature periods for the loops’ strips of the +flaring active region (blue ones), and non-flaring active region (red ones), are around 18 minutes, +and 30 minutes, respectively. The temperature periods’ diversity is higher in the loops’ strips of +the flaring active region, and shorter temperature periods (less than 10 minutes, nearer to the +transverse oscillations periods) are observed in the case of the flaring loops’ strips in comparison +with the non-flaring ones. And figure 9 shows that the increasing and decreasing of temperature +range, or the difference between maximum and minimum of the temperature value (max(log(T))min(log(T))), + is much higher on average for the loops’ strips of the flaring AR in comparison with +the loops’ strips of the non-flaring one. +V. Summery +We reported the temperature oscillations of coronal loops of a flaring active region. We selected +the flaring active region 11283 to investigate the thermal structure and treatment of its loops. This +region includes a high energy flare x2.1 and the transverse oscillations of two loops of it have been +analyzed before by Jain et al. (2015). They analyzed intensity variations in the wavelength 171 +in two coronal loops of this region and detected obvious transverse oscillation with periods of +roughly 2 minutes and decay times of 5 minutes for these loops (loops A and B in Figure.1b) +at the flare time. We were curious to know if the temperature variations follow the transverse +oscillations of the loops, or there is any relation or correlation between them. We also wanted to +investigate the thermal fluctuations at the flare time. As a blind test to see the specific thermal +properties of the flaring loops, we selected a LOS non-flaring active region (12194), extracted three +segments of its loops and analyzed their thermal treatment. Then we compared the temperature +treatment of the loops at the flaring region with the loops of the non-flaring region to see the +differences. We were eager to observe the probable discrepancies between flaring and non-flaring +loops in this respect. +Here we used data of three loops of the flaring active region (AR11283) around the time of the +Flare X2.1, from 22:10UT till 23:00UT on 2011 September 6, plus three loops of the non-flaring +active region (AR12194), from 08:00:00UT till 09:00:00UT of 2014 October 26 (marked in figures +1 and 2). To calculate the time series of the loop temperature values, we first extracted the loop +pixels in each image and then displayed the loop straightly for all the images in the time series +of different wavelengths. To do thermal analysis, we used the spatially-synthesized Gaussian +DEM forward-fitting method founded by Aschwanden et al. (2015). We calculated the peak +temperatures for each strip of the loops. Then we applied the Lomb-Scargle method to analyze +temperature oscillations of the time-series for each strip of the loops. +We observed temperature oscillations which are following the transverse loop oscillations +observed by Jain et al. (2015) for the flaring loops. Furthermore, the temperature oscillations in +these flaring loops happen before the transverse oscillations start and continue even in the time +duration after the transverse oscillations decay. As observed, the temperature oscillations do not +decay as rapidly as the transverse oscillations do. Conversely, the strips’ temperatures increase +at the end of the oscillating mode and a rather sensible rise is observed in the final temperatures +of the f-loops’ segments. The ranges of the obtained periods are from 7 min. to 28.4 min. for the +flaring loops, and from 8.5 min. to 30 min. for the non-flaring loops. With the onset of X-flare in +the F-loopA, which has a distinct transverse oscillation in the flaring time with period of roughly +2 minutes and decay time of 5 minutes, a temperature oscillation is observed with periods of +roughly 10 to 28.5 minutes in different segments of this loop. And as the transverse oscillation +decays in this interval, no special definite decay is observed in its temperature oscillations. The temperature periods of the flaring loops are rather shorter than the temperature periods -of the non-flaring loops. The loops of the flaring region show some short temperature oscillations -periods in which some are less than 10 minutes (Table1). These kind of short periods are more -frequently observed for the loops of the flaring active region and in the case of the non-flaring -ones, are very scarce. We observed that the periods of the flaring loops have more diversity -than those of the non-flaring ones. Based on our confined observations, the non-flaring loops’ -periods are longer and their temperatures’ values are totally lower. So our research showed that -thermal structures of the flaring loops differ from the non-flaring ones in the ways described -above. As temperature maps show, the temperature fluctuations are increasing at the flaring time -and around 20 min. after, in the flaring loops. This happens with some oscillations in strips’ -temperature. Conversely, it seems that different strips of the non-flaring loops have relatively -more similar temperature fluctuations. The temperatures are either higher in average in the flar￾ing loops’ segments as expected. The significances of the periods, obtained by the Lomb-Scargle -method, are calculated for each strip of each loop and the results show that these significances -for the loops’ strips of the flaring region are high and close to one, while for the loops’ strips of -the non-flaring region are less than 0.5. Hence the detected periods in the flaring loops’ strips -have high significances (near to one) and are oscillations. Whereas the detected periods in the -non-flaring loops’ strips have less significances in comparison with the flaring ones, and maybe -they are just fluctuations. -Using this method for the coronal loops showed that the oscillation modes obtained for the -temperatures of the flaring loops are very close to those of the spatial slow-mode oscillations of -the coronal loops. So the origin of temperature oscillation is probably slow-mode waves. These -kind of oscillations often occur in hot coronal loops (log(T) > 6) of active regions especially the -ones associated with small (or micro-) flares (Wang et al. (2021)). The loops of our flaring active -region are also hot loops with the mean temperature above this range. They also show intensity -oscillations. Hence we think the above evidence confirms the slow-mode oscillations for flaring -loops. The temperature of the non-flaring loops are lower (log(T) < 6) and as discussed above, -we believe that the observed oscillation-like periods in non-flaring loops should be more probably -related to the high amplitude fluctuations. -Comparing the loops of the flaring and non-flaring regions, we observed that the amplitudes -of the fluctuations show a discrepancy. Mean of the parameter (Max(log T)-Min(log T)) in the -FloopA, , FloopB1, FLoopB2, FloopC1, and FloopC2, are 1.21, 1.10, 0.81, 1.48, and 0.88, respec￾tively. And for non-flaring region, mean of (Max(log T)-Min(log T)), are 0.81, 0.62, and 0.56, for -nonfloopA, B, and C respectively. Therefore the values of the quantity mean of (Max(log T)- -Min(log T)) for these non-flaring loops show a difference from the flaring ones and are lower. -Loops of the non-flaring active region 12194 have a relatively uniform temperature at the -beginning of the time interval, which rises slightly at its end. As the Solar Monitor reports in the -neighborhood of this region, the flaring active region 12192 exists of which between its multiple -flares, there is a c4.6 class flare occurring at 9:44UT. Therefore, it could be a possible suggestion -that the abovementioned slight temperature rise in the loops of AR 12194 (in the time interval -8:00 to 9:00) originated from the influence of an increase in the energy at the pre-flare conditions -exist in the AR 12192. -Hence as our study shows, the temperature of coronal loops of flaring AR changes in an -oscillatory manner. Compared with these non-flaring loops, the flaring loops show higher tem￾peratures on average and higher oscillation periods with higher peaks and deeper valleys. More -accurate commentary in this respect requires more extensive statistical research and broader ob￾servations. -arcsec -arcsec -79 154 229 304 379 454 -−68 -25 -118 -211 -304 -397 -a -arcsec -arcsec -114.6 171.2 227.8 284.4 341 -171.4 -206.3 -241.2 -276.1 -311 -Loop B1 Loop A -Loop C2 -Loop C1 -b -Loop B2 -Figure 1: (a) AIA image of the AR 11283 on 2011 September 6, 22:10 UT as seen in the 171 filter. (b) Zoom-in view -of the area marked by a box in the left. The selected loops are distinguished in red. The loops A and B are -the same loops studied by Jain et al. (2015) (see Fig.3a in Jain et al. (2015)). -arcsec -arcsec -−154 0 154 308 -−572 -−418 -−264 -−110 -44 -a -arcsec -arcsec -−202 −134 −66 2 70 -−396 -−338 -−280 -−221 -−162 -nonf−LoopA -nonf−LoopB -nonf−LoopC -b -Figure 2: (a) The NOAA AR12194 on 2014 October 26, at 08:00:00UT in 171 recorded by AIA/SDO. (b) Zoom-in +of the non-flaring loops. The loops of the flaring region show some short temperature oscillations +periods in which some are less than 10 minutes (Table1). These kind of short periods are more +frequently observed for the loops of the flaring active region and in the case of the non-flaring +ones, are very scarce. We observed that the periods of the flaring loops have more diversity +than those of the non-flaring ones. Based on our confined observations, the non-flaring loops’ +periods are longer and their temperatures’ values are totally lower. So our research showed that +thermal structures of the flaring loops differ from the non-flaring ones in the ways described +above. As temperature maps show, the temperature fluctuations are increasing at the flaring time +and around 20 min. after, in the flaring loops. This happens with some oscillations in strips’ +temperature. Conversely, it seems that different strips of the non-flaring loops have relatively +more similar temperature fluctuations. The temperatures are either higher in average in the flaring loops’ segments as expected. The significances of the periods, obtained by the Lomb-Scargle +method, are calculated for each strip of each loop and the results show that these significances +for the loops’ strips of the flaring region are high and close to one, while for the loops’ strips of +the non-flaring region are less than 0.5. Hence the detected periods in the flaring loops’ strips +have high significances (near to one) and are oscillations. Whereas the detected periods in the +non-flaring loops’ strips have less significances in comparison with the flaring ones, and maybe +they are just fluctuations. +Using this method for the coronal loops showed that the oscillation modes obtained for the +temperatures of the flaring loops are very close to those of the spatial slow-mode oscillations of +the coronal loops. So the origin of temperature oscillation is probably slow-mode waves. These +kind of oscillations often occur in hot coronal loops (log(T) > 6) of active regions especially the +ones associated with small (or micro-) flares (Wang et al. (2021)). The loops of our flaring active +region are also hot loops with the mean temperature above this range. They also show intensity +oscillations. Hence we think the above evidence confirms the slow-mode oscillations for flaring +loops. The temperature of the non-flaring loops are lower (log(T) < 6) and as discussed above, +we believe that the observed oscillation-like periods in non-flaring loops should be more probably +related to the high amplitude fluctuations. +Comparing the loops of the flaring and non-flaring regions, we observed that the amplitudes +of the fluctuations show a discrepancy. Mean of the parameter (Max(log T)-Min(log T)) in the +FloopA, , FloopB1, FLoopB2, FloopC1, and FloopC2, are 1.21, 1.10, 0.81, 1.48, and 0.88, respectively. And for non-flaring region, mean of (Max(log T)-Min(log T)), are 0.81, 0.62, and 0.56, for +nonfloopA, B, and C respectively. Therefore the values of the quantity mean of (Max(log T)Min(log + T)) for these non-flaring loops show a difference from the flaring ones and are lower. +Loops of the non-flaring active region 12194 have a relatively uniform temperature at the +beginning of the time interval, which rises slightly at its end. As the Solar Monitor reports in the +neighborhood of this region, the flaring active region 12192 exists of which between its multiple +flares, there is a c4.6 class flare occurring at 9:44UT. Therefore, it could be a possible suggestion +that the abovementioned slight temperature rise in the loops of AR 12194 (in the time interval +8:00 to 9:00) originated from the influence of an increase in the energy at the pre-flare conditions +exist in the AR 12192. +Hence as our study shows, the temperature of coronal loops of flaring AR changes in an +oscillatory manner. Compared with these non-flaring loops, the flaring loops show higher temperatures on average and higher oscillation periods with higher peaks and deeper valleys. More +accurate commentary in this respect requires more extensive statistical research and broader observations. +arcsec +arcsec +79 154 229 304 379 454 +−68 +25 +118 +211 +304 +397 +a +arcsec +arcsec +114.6 171.2 227.8 284.4 341 +171.4 +206.3 +241.2 +276.1 +311 +Loop B1 Loop A +Loop C2 +Loop C1 +b +Loop B2 +Figure 1: (a) AIA image of the AR 11283 on 2011 September 6, 22:10 UT as seen in the 171 filter. (b) Zoom-in view +of the area marked by a box in the left. The selected loops are distinguished in red. The loops A and B are +the same loops studied by Jain et al. (2015) (see Fig.3a in Jain et al. (2015)). +arcsec +arcsec +−154 0 154 308 +−572 +−418 +−264 +−110 +44 +a +arcsec +arcsec +−202 −134 −66 2 70 +−396 +−338 +−280 +−221 +−162 +nonf−LoopA +nonf−LoopB +nonf−LoopC +b +Figure 2: (a) The NOAA AR12194 on 2014 October 26, at 08:00:00UT in 171 recorded by AIA/SDO. (b) Zoom-in view of the area, marked by a box in the left, the loops are distinguished in red. -5.8 -6 -6.2 -6.4 -6.6 -6.8 -LogT -F−LoopA -5.8 -6 -6.2 -6.4 -6.6 -6.8 -LogT -22:10 22:20 22:30 22:40 22:50 23:00 -5.8 -6 -6.2 -6.4 -6.6 -6.8 -time -LogT -5.8 -6 -6.2 -6.4 -6.6 -6.8 -LogT -F−LoopB1 -22:10 22:20 22:30 22:40 22:50 23:00 -5.8 -6 -6.2 -6.4 -6.6 -6.8 -time -LogT -Figure 3: From up to down: The time-series of the temperature oscillations for the first 3 strips of Loop A (strip 1 to -3 from top to down), and the first 2 strips of LoopB1. Horizontal axis is the time and the vertical axis is the +5.8 +6 +6.2 +6.4 +6.6 +6.8 +LogT +F−LoopA +5.8 +6 +6.2 +6.4 +6.6 +6.8 +LogT +22:10 22:20 22:30 22:40 22:50 23:00 +5.8 +6 +6.2 +6.4 +6.6 +6.8 +time +LogT +5.8 +6 +6.2 +6.4 +6.6 +6.8 +LogT +F−LoopB1 +22:10 22:20 22:30 22:40 22:50 23:00 +5.8 +6 +6.2 +6.4 +6.6 +6.8 +time +LogT +Figure 3: From up to down: The time-series of the temperature oscillations for the first 3 strips of Loop A (strip 1 to +3 from top to down), and the first 2 strips of LoopB1. Horizontal axis is the time and the vertical axis is the logarithm of the temperature. The red lines mark the initial and final time of the flare x2.1. -22:10 22:20 22:30 22:40 22:50 23:00 -0 -11 -21 -32 -42 -F−loopA -Time -Loop Length(Mm) -5.8 -6 -6.2 -6.4 -6.6 -6.8 -22:10 22:20 22:30 22:40 22:50 23:00 -0 -5 -10 -15 -20 -F−loopB1 -Time -Loop Length(Mm) -6 -6.05 -6.1 -6.15 -6.2 -6.25 -6.3 -6.35 -6.4 -6.45 -6.5 -22:10 22:20 22:30 22:40 22:50 23:00 -0 -4 -8 -12 -16 -F−loopB2 -Time -Loop Length(Mm) -5.8 -6 -6.2 -6.4 -6.6 -6.8 -22:10 22:20 22:30 22:40 22:50 23:00 -0 -6 -11 -17 -22 -F−loopC1 -Time -Loop Length(Mm) -5.6 -5.8 -6 -6.2 -6.4 -6.6 -6.8 -22:10 22:20 22:30 22:40 22:50 23:00 -0 -3 -6 -8 -11 -F−loopC2 -Time -Loop Length(Mm) -5.8 -6 -6.2 -6.4 -6.6 -6.8 -Figure 4: Temperature map of the flaring loops A, B1, B2, C1, and C2 (from top to down) as a time series. The vertical -axis is the distance along the loop in Mm, and the horizontal axis is the time. The colorbar in the left shows +22:10 22:20 22:30 22:40 22:50 23:00 +0 +11 +21 +32 +42 +F−loopA +Time +Loop Length(Mm) +5.8 +6 +6.2 +6.4 +6.6 +6.8 +22:10 22:20 22:30 22:40 22:50 23:00 +0 +5 +10 +15 +20 +F−loopB1 +Time +Loop Length(Mm) +6 +6.05 +6.1 +6.15 +6.2 +6.25 +6.3 +6.35 +6.4 +6.45 +6.5 +22:10 22:20 22:30 22:40 22:50 23:00 +0 +4 +8 +12 +16 +F−loopB2 +Time +Loop Length(Mm) +5.8 +6 +6.2 +6.4 +6.6 +6.8 +22:10 22:20 22:30 22:40 22:50 23:00 +0 +6 +11 +17 +22 +F−loopC1 +Time +Loop Length(Mm) +5.6 +5.8 +6 +6.2 +6.4 +6.6 +6.8 +22:10 22:20 22:30 22:40 22:50 23:00 +0 +3 +6 +8 +11 +F−loopC2 +Time +Loop Length(Mm) +5.8 +6 +6.2 +6.4 +6.6 +6.8 +Figure 4: Temperature map of the flaring loops A, B1, B2, C1, and C2 (from top to down) as a time series. The vertical +axis is the distance along the loop in Mm, and the horizontal axis is the time. The colorbar in the left shows the colors considered for the temperature range. -Table 1: The properties observed for the loop segments of the flaring AR. -FLoopA -(Strip Number) -The highest -Temp.’s period -observed -Max(log(T))- -Min(log(T)) -FLoopB2 -(Strip Number) -The highest -Temp.’s period -observed -Max(log(T))- -Min(log(T)) -1 9.94 1.09 1 18.07 0.68 -2 16.57 0.79 2 24.85 0.83 -3 8.46 0.65 3 24.85 0.85 -4 28.4 1.11 4 7.36 0.84 -5 28.4 0.75 5 8.64 0.85 -6 24.85 0.76 6 8.28 0.93 -7 22.09 0.58 7 18.07 0.84 -8 18.07 1.55 8 28.4 0.73 -9 18.07 1.6 FLoopC1 - - -10 12.42 1.57 1 28.4 1.46 -11 12.42 1.42 2 22.09 1.34 -12 24.85 1.56 3 16.57 1.36 -13 19.88 1.6 4 28.04 1.49 -14 19.88 1.24 5 24.85 1.6 -15 18.07 1.58 6 24.85 1.42 -16 19.88 1.45 7 15.29 1.6 -17 16.57 0.7 8 13.25 1.56 -18 7.36 1.6 9 13.25 1.6 -19 8.64 0.95 10 16.57 1.6 -20 16.57 1.54 11 16.57 1.6 -21 7.36 1.18 12 9.46 1.13 -22 7.36 1.51 FLoopC1 - - -23 18.07 1.58 1 18.07 0.88 -24 22.09 1.33 2 28.4 0.8 -25 24.85 0.72 3 15.29 0.87 -FLoopB1 - - 4 16.57 0.93 -1 18.07 1.43 5 18.07 1.22 -2 15.29 0.76 6 28.4 0.58 -3 18.07 0.76 -4 18.07 0.75 -5 18.07 0.59 -6 19.88 0.8 -7 19.88 0.91 -8 19.88 1.36 -9 11.04 1.6 -10 18.07 1.6 +Table 1: The properties observed for the loop segments of the flaring AR. +FLoopA +(Strip Number) +The highest +Temp.’s period +observed +Max(log(T))Min(log(T)) + +FLoopB2 +(Strip Number) +The highest +Temp.’s period +observed +Max(log(T))Min(log(T)) + +1 9.94 1.09 1 18.07 0.68 +2 16.57 0.79 2 24.85 0.83 +3 8.46 0.65 3 24.85 0.85 +4 28.4 1.11 4 7.36 0.84 +5 28.4 0.75 5 8.64 0.85 +6 24.85 0.76 6 8.28 0.93 +7 22.09 0.58 7 18.07 0.84 +8 18.07 1.55 8 28.4 0.73 +9 18.07 1.6 FLoopC1 - 10 + 12.42 1.57 1 28.4 1.46 +11 12.42 1.42 2 22.09 1.34 +12 24.85 1.56 3 16.57 1.36 +13 19.88 1.6 4 28.04 1.49 +14 19.88 1.24 5 24.85 1.6 +15 18.07 1.58 6 24.85 1.42 +16 19.88 1.45 7 15.29 1.6 +17 16.57 0.7 8 13.25 1.56 +18 7.36 1.6 9 13.25 1.6 +19 8.64 0.95 10 16.57 1.6 +20 16.57 1.54 11 16.57 1.6 +21 7.36 1.18 12 9.46 1.13 +22 7.36 1.51 FLoopC1 - 23 + 18.07 1.58 1 18.07 0.88 +24 22.09 1.33 2 28.4 0.8 +25 24.85 0.72 3 15.29 0.87 +FLoopB1 - - 4 16.57 0.93 +1 18.07 1.43 5 18.07 1.22 +2 15.29 0.76 6 28.4 0.58 +3 18.07 0.76 +4 18.07 0.75 +5 18.07 0.59 +6 19.88 0.8 +7 19.88 0.91 +8 19.88 1.36 +9 11.04 1.6 +10 18.07 1.6 11 18.07 1.6 -Table 2: The properties observed for the loop segments of the non flaring AR. -Nonf-LoopA -(Strip Number) -The highest -Temp.’s period -observed -Max(log(T))- -Min(log(T)) -1 24 0.61 -2 30 0.95 -3 30 0.81 -4 20 1.51 -5 20 0.77 -6 20 0.81 -7 11.42 0.71 -8 12 0.73 -9 30 0.72 -10 30 0.77 -11 30 0.61 -Nonf-LoopB -(Strip Number) -The highest -Temp.’s period -observed -Max(log(T))- -Min(log(T)) -1 26.66 0.36 -2 26.66 0.64 -3 10.43 0.45 -4 12 0.62 -5 30 0.98 -6 8.57 0.67 -Nonf-LoopC -(Strip Number) -The highest -Temp.’s period -observed -Max(log(T))- -Min(log(T)) -1 26.66 0.76 -2 26.66 0.75 -3 26.66 0.26 -4 30 0.27 +Table 2: The properties observed for the loop segments of the non flaring AR. +Nonf-LoopA +(Strip Number) +The highest +Temp.’s period +observed +Max(log(T))Min(log(T)) + +1 24 0.61 +2 30 0.95 +3 30 0.81 +4 20 1.51 +5 20 0.77 +6 20 0.81 +7 11.42 0.71 +8 12 0.73 +9 30 0.72 +10 30 0.77 +11 30 0.61 +Nonf-LoopB +(Strip Number) +The highest +Temp.’s period +observed +Max(log(T))Min(log(T)) + +1 26.66 0.36 +2 26.66 0.64 +3 10.43 0.45 +4 12 0.62 +5 30 0.98 +6 8.57 0.67 +Nonf-LoopC +(Strip Number) +The highest +Temp.’s period +observed +Max(log(T))Min(log(T)) + +1 26.66 0.76 +2 26.66 0.75 +3 26.66 0.26 +4 30 0.27 5 30 0.8 -22:10 22:20 22:30 22:40 22:50 23:00 -0 -11 -22 -32 -43 -Int−Fe−LoopA -Time -Loop Length(Mm) -0 -0.02 -0.04 -0.06 -0.08 -0.1 -0.12 -0.14 -0.16 -0.18 -0.2 -22:10 22:20 22:30 22:40 22:50 23:00 -0 -0.1 -0.2 -0.3 -0.4 -0.5 -0.6 -0.7 -0.8 -0.9 -1 -Int−Fe−LoopA -Time -Normalized Intensity Fe XVIII -Figure 5: Normalized intensity map of the flaring loop A for the wavelength Fe XV I I I, and mean intensity of Fe -XV I I I (from top to down). The vertical axis is the distance along the loop in Mm for the first plot, and -normalized intensity for the second. The horizontal axis is the time. The colorbar in the left shows the colors -considered for the Intensity range. -VI. acknowledgements -The author Narges Fathalian wishes to also express her thanks for the technical support and -comments which has received from Dr.Farhad Daii and Dr.Mohsen Javaherian regarding to this +22:10 22:20 22:30 22:40 22:50 23:00 +0 +11 +22 +32 +43 +Int−Fe−LoopA +Time +Loop Length(Mm) +0 +0.02 +0.04 +0.06 +0.08 +0.1 +0.12 +0.14 +0.16 +0.18 +0.2 +22:10 22:20 22:30 22:40 22:50 23:00 +0 +0.1 +0.2 +0.3 +0.4 +0.5 +0.6 +0.7 +0.8 +0.9 +1 +Int−Fe−LoopA +Time +Normalized Intensity Fe XVIII +Figure 5: Normalized intensity map of the flaring loop A for the wavelength Fe XV I I I, and mean intensity of Fe +XV I I I (from top to down). The vertical axis is the distance along the loop in Mm for the first plot, and +normalized intensity for the second. The horizontal axis is the time. The colorbar in the left shows the colors +considered for the Intensity range. +VI. acknowledgements +The author Narges Fathalian wishes to also express her thanks for the technical support and +comments which has received from Dr.Farhad Daii and Dr.Mohsen Javaherian regarding to this work. -5.8 -6 -6.2 -6.4 -6.6 -6.8 -LogT -NonF−LoopA -8:00 8:10 8:20 8:30 8:40 8:50 9:00 -5.8 -6 -6.2 -6.4 -6.6 -6.8 -time -LogT -5.8 -6 -6.2 -6.4 -6.6 -6.8 -LogT -NonF−LoopB -8:00 8:10 8:20 8:30 8:40 8:50 9:00 -5.8 -6 -6.2 -6.4 -6.6 -6.8 -time -LogT -Figure 6: from top to down: The time-series of the temperature for the first 2 strips (from top to down) of the non￾flaring Loops A and B. Horizontal axis is the time and the vertical axis is the logarithm of the temperature. -8:10 8:20 8:30 8:40 8:50 9:00 -0 -5 -10 -15 -20 -NonF−loopA -Time -Loop Length(Mm) -5.8 -6 -6.2 -6.4 -6.6 -6.8 -8:10 8:20 8:30 8:40 8:50 9:00 -0 -5 -9 -14 -18 -NonF−loopB -Time -Loop Length(Mm) -5.8 -6 -6.2 -6.4 -6.6 -6.8 -8:10 8:20 8:30 8:40 8:50 9:00 -0 -3 -5 -8 -10 -NonF−loopC -Time -Loop Length(Mm) -5.8 -6 -6.2 -6.4 -6.6 -6.8 -Figure 7: from top to down: Temperature map of the non-flaring loops A, B and C as a time-series. The vertical axis -is the distance along the loop in Mm, and the horizontal axis is the time. The color-bar in the left shows the +5.8 +6 +6.2 +6.4 +6.6 +6.8 +LogT +NonF−LoopA +8:00 8:10 8:20 8:30 8:40 8:50 9:00 +5.8 +6 +6.2 +6.4 +6.6 +6.8 +time +LogT +5.8 +6 +6.2 +6.4 +6.6 +6.8 +LogT +NonF−LoopB +8:00 8:10 8:20 8:30 8:40 8:50 9:00 +5.8 +6 +6.2 +6.4 +6.6 +6.8 +time +LogT +Figure 6: from top to down: The time-series of the temperature for the first 2 strips (from top to down) of the nonflaring Loops A and B. Horizontal axis is the time and the vertical axis is the logarithm of the temperature. +8:10 8:20 8:30 8:40 8:50 9:00 +0 +5 +10 +15 +20 +NonF−loopA +Time +Loop Length(Mm) +5.8 +6 +6.2 +6.4 +6.6 +6.8 +8:10 8:20 8:30 8:40 8:50 9:00 +0 +5 +9 +14 +18 +NonF−loopB +Time +Loop Length(Mm) +5.8 +6 +6.2 +6.4 +6.6 +6.8 +8:10 8:20 8:30 8:40 8:50 9:00 +0 +3 +5 +8 +10 +NonF−loopC +Time +Loop Length(Mm) +5.8 +6 +6.2 +6.4 +6.6 +6.8 +Figure 7: from top to down: Temperature map of the non-flaring loops A, B and C as a time-series. The vertical axis +is the distance along the loop in Mm, and the horizontal axis is the time. The color-bar in the left shows the colors considered for the temperature range. -6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 -0 -0.05 -0.1 -0.15 -0.2 -0.25 -0.3 -0.35 -0.4 -Temp. Period (min) -Percentage of Temp. Periods -Figure 8: Hisogram of the temperature periods percentages for the loops’ strips of the flaring (blue bars) and non￾flaring (red bars) ARs. The horizontal axis shows the temperature periods in minute. -0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7 -0 -2 -4 -6 -8 -10 -12 -max(log(T))−min(log(T)) -Number -Figure 9: Hisogram of the parameter of (max(log(T))-min(log(T))) for each strip of the loops of the flaring (blue bars) +6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 +0 +0.05 +0.1 +0.15 +0.2 +0.25 +0.3 +0.35 +0.4 +Temp. Period (min) +Percentage of Temp. Periods +Figure 8: Hisogram of the temperature periods percentages for the loops’ strips of the flaring (blue bars) and nonflaring (red bars) ARs. The horizontal axis shows the temperature periods in minute. +0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7 +0 +2 +4 +6 +8 +10 +12 +max(log(T))−min(log(T)) +Number +Figure 9: Hisogram of the parameter of (max(log(T))-min(log(T))) for each strip of the loops of the flaring (blue bars) and non-flaring (red bars) ARs. -References -Abedini, A., Safari, H., & Nasiri, S. 2012, Solar Physics, 280 -Anfinogentov, S., Nakariakov, V. M., Mathioudakis, M., Van Doorsselaere, T., & Kowalski, A. F. -2013, ApJ, 773, 156 -Aschwanden, M., B. P. S. C. M. A. 2013, Solar Physics, 283, 5 -Aschwanden, M. J. 2006, Philosophical Transactions of the Royal Society of London Series A, 364, -417 -Aschwanden, M. J., & Boerner, P. 2011, The Astrophysical Journal, 732, 81 -Aschwanden, M. J., Boerner, P., Ryan, D., et al. 2015, The Astrophysical Journal, 802, 53 -Aschwanden, M. J., Fletcher, L., Schrijver, C. J., & Alexander, D. 1999, ApJ, 520, 880 -Ballai, I., Jess, D. B., & Douglas, M. 2011, A&A, 534, A13 -Banerjee, D., Erdélyi, R., Oliver, R., & O’Shea, E. 2007, Solar Physics, 246, 3 -Berghmans, D., & Clette, F. 1999, Solar Physics, 186, 207 -Boerner, P., Edwards, C., Lemen, J., et al. 2012, Solar Physics, 275, 41 -Dahlburg, R. B., Einaudi, G., Ugarte-Urra, I., Rappazzo, A. F., & Velli, M. 2018, ApJ, 868, 116 -De Moortel, I. 2005, Philosophical Transactions of the Royal Society of London Series A, 363, 2743 -De Moortel, I., & Brady, C. S. 2007, ApJ, 664, 1210 -De Moortel, I., Ireland, J., & Walsh, R. W. 2000, A&A, 355, L23 -De Moortel, I., & Nakariakov, V. M. 2012, Philosophical Transactions of the Royal Society of -London Series A, 370, 3193 -Fathalian, N. 2019, arXiv e-prints, arXiv:1908.11369 -Fathalian, N., & Safari, H. 2010, ApJ, 724, 411 -Fathalian, N., Safari, H., & Nasiri, S. 2010, New Astronomy, 15, 403 -Goossens, M., Hollweg, J. V., & Sakurai, T. 1992, Solar Physics, 138, 233 -Gruszecki, M., Murawski, K., Selwa, M., & Ofman, L. 2006, A&A, 460, 887 -Guennou, C., Auchère, F., Soubrié, E., et al. 2012a, ApJ, 203, 25 -Guennou, C., Auchère, F., Soubrié, E., et al. 2012b, ApJ, 203, 26 -Habbal, S. R., & Rosner, R. 1979, ApJ, 234, 1113 -Hindman, B. W., & Jain, R. 2014, ApJ, 784, 103 -Jain, R., Maurya, R. A., & Hindman, B. W. 2015, ApJ, 804, L19 +References +Abedini, A., Safari, H., & Nasiri, S. 2012, Solar Physics, 280 +Anfinogentov, S., Nakariakov, V. M., Mathioudakis, M., Van Doorsselaere, T., & Kowalski, A. F. +2013, ApJ, 773, 156 +Aschwanden, M., B. P. S. C. M. A. 2013, Solar Physics, 283, 5 +Aschwanden, M. J. 2006, Philosophical Transactions of the Royal Society of London Series A, 364, +417 +Aschwanden, M. J., & Boerner, P. 2011, The Astrophysical Journal, 732, 81 +Aschwanden, M. J., Boerner, P., Ryan, D., et al. 2015, The Astrophysical Journal, 802, 53 +Aschwanden, M. J., Fletcher, L., Schrijver, C. J., & Alexander, D. 1999, ApJ, 520, 880 +Ballai, I., Jess, D. B., & Douglas, M. 2011, A&A, 534, A13 +Banerjee, D., Erdélyi, R., Oliver, R., & O’Shea, E. 2007, Solar Physics, 246, 3 +Berghmans, D., & Clette, F. 1999, Solar Physics, 186, 207 +Boerner, P., Edwards, C., Lemen, J., et al. 2012, Solar Physics, 275, 41 +Dahlburg, R. B., Einaudi, G., Ugarte-Urra, I., Rappazzo, A. F., & Velli, M. 2018, ApJ, 868, 116 +De Moortel, I. 2005, Philosophical Transactions of the Royal Society of London Series A, 363, 2743 +De Moortel, I., & Brady, C. S. 2007, ApJ, 664, 1210 +De Moortel, I., Ireland, J., & Walsh, R. W. 2000, A&A, 355, L23 +De Moortel, I., & Nakariakov, V. M. 2012, Philosophical Transactions of the Royal Society of +London Series A, 370, 3193 +Fathalian, N. 2019, arXiv e-prints, arXiv:1908.11369 +Fathalian, N., & Safari, H. 2010, ApJ, 724, 411 +Fathalian, N., Safari, H., & Nasiri, S. 2010, New Astronomy, 15, 403 +Goossens, M., Hollweg, J. V., & Sakurai, T. 1992, Solar Physics, 138, 233 +Gruszecki, M., Murawski, K., Selwa, M., & Ofman, L. 2006, A&A, 460, 887 +Guennou, C., Auchère, F., Soubrié, E., et al. 2012a, ApJ, 203, 25 +Guennou, C., Auchère, F., Soubrié, E., et al. 2012b, ApJ, 203, 26 +Habbal, S. R., & Rosner, R. 1979, ApJ, 234, 1113 +Hindman, B. W., & Jain, R. 2014, ApJ, 784, 103 +Jain, R., Maurya, R. A., & Hindman, B. W. 2015, ApJ, 804, L19 Jess, D. B., Reznikova, V. E., Ryans, R. S. I., et al. 2016, Nature Physics, 12, 179 -Kolotkov, D. Y., Nakariakov, V. M., & Zavershinskii, D. I. 2019, A&A, 628, A133 -Krishna Prasad, S., Jess, D. B., & Van Doorsselaere, T. 2019, Frontiers in Astronomy and Space -Sciences, 6, 57 -Li, L. P., Peter, H., Chen, F., & Zhang, J. 2015, A&A, 583, A109 -Liu, W., & Ofman, L. 2014, Solar Physics, 289, 3233–3277 -Luna, M., Terradas, J., Oliver, R., & Ballester, J. L. 2010, ApJ, 716, 1371 -McClymont, A. N., & Craig, I. J. D. 1985, ApJ, 289, 834 -McLaughlin, J. A., Nakariakov, V. M., Dominique, M., Jelínek, P., & Takasao, S. 2018, Space -Science Reviews volume, 214, 45 -Nakariakov, V. M., Afanasyev, A. N., Kumar, S., & Moon, Y. J. 2017, ApJ, 849, 62 -Nakariakov, V. M., Inglis, A. R., Zimovets, I. V., et al. 2010, Plasma Physics and Controlled Fusion, -52, 124009 -Nakariakov, V. M., Ofman, L., Deluca, E. E., Roberts, B., & Davila, J. M. 1999, Science, 285, 862 -Nakariakov, V. M., & Verwichte, E. 2005, Living Reviews in Solar Physics, 2, 3 -Nisticò, G., Nakariakov, V. M., & Verwichte, E. 2013, A&A, 552, A57 -Nisticò, G., Polito, V., Nakariakov, V. M., & Del Zanna, G. 2017, A&A, 600, A37 -Ofman, L., & Wang, T. 2002, ApJ, 580, L85 -Pant, V., Tiwari, A., Yuan, D., & Banerjee, D. 2017, ApJ, 847, L5 -Pascoe, D. J., Nakariakov, V. M., & Arber, T. D. 2007, Solar Physics, 246, 165 -Reale, F., Testa, P., Petralia, A., & Kolotkov, D. Y. 2019, ApJ, 884, 131 -Roberts, B., Edwin, P. M., & Benz, A. O. 1984, ApJ, 279, 857 -Romano, P., Zuccarello, F., Guglielmino, S. L., et al. 2015, A&A, 582, A55 -Russell, A. J. B., Simões, P. J. A., & Fletcher, L. 2015, A&A, 581, A8 -Scargle, J. D. 1982, ApJ, 263, 835 -Schmelz, J. T., Jenkins, B. S., Worley, B. T., et al. 2011, ApJ, 731, 49 -Schmelz, J. T., Kimble, J. A., Jenkins, B. S., et al. 2010, ApJ, 725, L34 -Schmelz, J. T., Pathak, S., Brooks, D. H., Christian, G. M., & Dhaliwal, R. S. 2014, ApJ, 795, 171 -Schmelz, J. T., Pathak, S., Jenkins, B. S., & Worley, B. T. 2013, ApJ, 764, 53 -Ugarte-Urra, I., & Warren, H. P. 2014, ApJ, 783, 12 -Van Doorsselaere, T., Kupriyanova, E. G., & Yuan, D. 2016, Solar Physics, 291, 3143 +Kolotkov, D. Y., Nakariakov, V. M., & Zavershinskii, D. I. 2019, A&A, 628, A133 +Krishna Prasad, S., Jess, D. B., & Van Doorsselaere, T. 2019, Frontiers in Astronomy and Space +Sciences, 6, 57 +Li, L. P., Peter, H., Chen, F., & Zhang, J. 2015, A&A, 583, A109 +Liu, W., & Ofman, L. 2014, Solar Physics, 289, 3233–3277 +Luna, M., Terradas, J., Oliver, R., & Ballester, J. L. 2010, ApJ, 716, 1371 +McClymont, A. N., & Craig, I. J. D. 1985, ApJ, 289, 834 +McLaughlin, J. A., Nakariakov, V. M., Dominique, M., Jelínek, P., & Takasao, S. 2018, Space +Science Reviews volume, 214, 45 +Nakariakov, V. M., Afanasyev, A. N., Kumar, S., & Moon, Y. J. 2017, ApJ, 849, 62 +Nakariakov, V. M., Inglis, A. R., Zimovets, I. V., et al. 2010, Plasma Physics and Controlled Fusion, +52, 124009 +Nakariakov, V. M., Ofman, L., Deluca, E. E., Roberts, B., & Davila, J. M. 1999, Science, 285, 862 +Nakariakov, V. M., & Verwichte, E. 2005, Living Reviews in Solar Physics, 2, 3 +Nisticò, G., Nakariakov, V. M., & Verwichte, E. 2013, A&A, 552, A57 +Nisticò, G., Polito, V., Nakariakov, V. M., & Del Zanna, G. 2017, A&A, 600, A37 +Ofman, L., & Wang, T. 2002, ApJ, 580, L85 +Pant, V., Tiwari, A., Yuan, D., & Banerjee, D. 2017, ApJ, 847, L5 +Pascoe, D. J., Nakariakov, V. M., & Arber, T. D. 2007, Solar Physics, 246, 165 +Reale, F., Testa, P., Petralia, A., & Kolotkov, D. Y. 2019, ApJ, 884, 131 +Roberts, B., Edwin, P. M., & Benz, A. O. 1984, ApJ, 279, 857 +Romano, P., Zuccarello, F., Guglielmino, S. L., et al. 2015, A&A, 582, A55 +Russell, A. J. B., Simões, P. J. A., & Fletcher, L. 2015, A&A, 581, A8 +Scargle, J. D. 1982, ApJ, 263, 835 +Schmelz, J. T., Jenkins, B. S., Worley, B. T., et al. 2011, ApJ, 731, 49 +Schmelz, J. T., Kimble, J. A., Jenkins, B. S., et al. 2010, ApJ, 725, L34 +Schmelz, J. T., Pathak, S., Brooks, D. H., Christian, G. M., & Dhaliwal, R. S. 2014, ApJ, 795, 171 +Schmelz, J. T., Pathak, S., Jenkins, B. S., & Worley, B. T. 2013, ApJ, 764, 53 +Ugarte-Urra, I., & Warren, H. P. 2014, ApJ, 783, 12 +Van Doorsselaere, T., Kupriyanova, E. G., & Yuan, D. 2016, Solar Physics, 291, 3143 Van Doorsselaere, T., Wardle, N., Del Zanna, G., et al. 2011, ApJ, 727, L32 -VanderPlas, J. T. 2018, ApJ, 236, 16 -Verwichte, E., Nakariakov, V. M., Ofman, L., & Deluca, E. E. 2004, Solar Physics, 223, 77 -Wang, T. 2011, Space Science Reviews, 158, 397–419 -Wang, T., Innes, D. E., & Qiu, J. 2007, ApJ, 656, 598 -Wang, T. J., & Solanki, S. K. 2004, A&A, 421, L33 -Wang, T. J., Solanki, S. K., Innes, D. E., Curdt, W., & Marsch, E. 2003, A&A, 402, L17 -Wang, T., & Ofman, L. 2019, ApJ, 886, 2 -Wang, T., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M. 2015, ApJ, 811, L13 -Wang, T., Ofman, L., Yuan, D., et al. 2021, Space Science Reviews, 217 -Warren, H. P., Winebarger, A. R., & Brooks, D. H. 2012, ApJ, 759, 141 -Wills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 190, 467 +VanderPlas, J. T. 2018, ApJ, 236, 16 +Verwichte, E., Nakariakov, V. M., Ofman, L., & Deluca, E. E. 2004, Solar Physics, 223, 77 +Wang, T. 2011, Space Science Reviews, 158, 397–419 +Wang, T., Innes, D. E., & Qiu, J. 2007, ApJ, 656, 598 +Wang, T. J., & Solanki, S. K. 2004, A&A, 421, L33 +Wang, T. J., Solanki, S. K., Innes, D. E., Curdt, W., & Marsch, E. 2003, A&A, 402, L17 +Wang, T., & Ofman, L. 2019, ApJ, 886, 2 +Wang, T., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M. 2015, ApJ, 811, L13 +Wang, T., Ofman, L., Yuan, D., et al. 2021, Space Science Reviews, 217 +Warren, H. P., Winebarger, A. R., & Brooks, D. H. 2012, ApJ, 759, 141 +Wills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 190, 467 \ No newline at end of file diff --git a/read/results/pdfium/GeoTopo-book.txt b/read/results/pdfium/GeoTopo-book.txt index 407fed2..62b1487 100644 --- a/read/results/pdfium/GeoTopo-book.txt +++ b/read/results/pdfium/GeoTopo-book.txt @@ -1,6443 +1,6123 @@ -Einführung in die -Geometrie und Topologie +Einführung in die +Geometrie und Topologie 0. Auflage, 31. Dezember 2016 Martin Thoma -Vorwort -Dieses Skript wurde im Wintersemester 2013/2014 von Martin Thoma geschrieben. Es beinhaltet -die Mitschriften aus der Vorlesung von Prof. Dr. Herrlich sowie die Mitschriften einiger Übungen -und Tutorien. -Das Skript ist kostenlos über martin-thoma.com/geotopo verfügbar. Wer es gerne in A5 (Schwarz￾Weiß, Ringbindung) für 10 Euro hätte, kann mir eine E-Mail schicken (info@martin-thoma.de). -Danksagungen -An dieser Stelle möchte ich Herrn Prof. Dr. Herrlich für einige Korrekturvorschläge und einen -gut strukturierten Tafelanschrieb danken, der als Vorlage für dieses Skript diente. Tatsächlich -basiert die Struktur dieses Skripts auf der Vorlesung von Herrn Prof. Dr. Herrlich und ganze -Abschnitte konnten direkt mit LATEX umgesetzt werden. Vielen Dank für die Erlaubnis, Ihre -Inhalte in diesem Skript einbauen zu dürfen! -Vielen Dank auch an Frau Lenz und Frau Randecker, die es mir erlaubt haben, ihre Übungsauf￾gaben und Lösungen zu benutzen. -Jérôme Urhausen hat durch viele Verbesserungsvorschläge und Beweise zu einer erheblichen -Qualitätssteigerung am Skript beigetragen und meine Tutorin Sarah hat mir viele Fragen per -E-Mail und nach dem Tutorium beantwortet. Danke! -Was ist Topologie? -Die Kugeloberfläche S -2 -lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche -oder der Oberfläche einer Pyramide verformen, aber nicht zum R -2 oder zu einem Torus T -2 -. Für -den R -2 müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein -Loch machen. -Erforderliche Vorkenntnisse -Es wird ein sicherer Umgang mit den Quantoren (∀, ∃), Mengenschreibweisen (∪, ∩, \, ∅, R,P(M)) -und ganz allgemein formaler Schreibweise vorausgesetzt. Auch die Beweisführung mittels Wider￾spruchsbeweisen sollte bekannt sein und der Umgang mit komplexen Zahlen C, deren Betrag, -Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werden vor allem -in „Analysis I“ vermittelt. -Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit, -der Spektralsatz und der projektive Raum P(R) aus „Lineare Algebra I“ bekannt sind. In „Lineare +Vorwort +Dieses Skript wurde im Wintersemester 2013/2014 von Martin Thoma geschrieben. Es beinhaltet +die Mitschriften aus der Vorlesung von Prof. Dr. Herrlich sowie die Mitschriften einiger Übungen +und Tutorien. +Das Skript ist kostenlos über martin-thoma.com/geotopo verfügbar. Wer es gerne in A5 (SchwarzWeiß, Ringbindung) für 10 Euro hätte, kann mir eine E-Mail schicken (info@martin-thoma.de). +Danksagungen +An dieser Stelle möchte ich Herrn Prof. Dr. Herrlich für einige Korrekturvorschläge und einen +gut strukturierten Tafelanschrieb danken, der als Vorlage für dieses Skript diente. Tatsächlich +basiert die Struktur dieses Skripts auf der Vorlesung von Herrn Prof. Dr. Herrlich und ganze +Abschnitte konnten direkt mit LATEX umgesetzt werden. Vielen Dank für die Erlaubnis, Ihre +Inhalte in diesem Skript einbauen zu dürfen! +Vielen Dank auch an Frau Lenz und Frau Randecker, die es mir erlaubt haben, ihre Übungsaufgaben und Lösungen zu benutzen. +Jérôme Urhausen hat durch viele Verbesserungsvorschläge und Beweise zu einer erheblichen +Qualitätssteigerung am Skript beigetragen und meine Tutorin Sarah hat mir viele Fragen per +E-Mail und nach dem Tutorium beantwortet. Danke! +Was ist Topologie? +Die Kugeloberfläche S +2 +lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche +oder der Oberfläche einer Pyramide verformen, aber nicht zum R +2 oder zu einem Torus T2 +. Für +den R +2 müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein +Loch machen. +Erforderliche Vorkenntnisse +Es wird ein sicherer Umgang mit den Quantoren (∀, ∃), Mengenschreibweisen (∪, ∩, \, ∅, R,P(M)) +und ganz allgemein formaler Schreibweise vorausgesetzt. Auch die Beweisführung mittels Widerspruchsbeweisen sollte bekannt sein und der Umgang mit komplexen Zahlen C, deren Betrag, +Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werden vor allem +in „Analysis I“ vermittelt. +Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit, +der Spektralsatz und der projektive Raum P(R) aus „Lineare Algebra I“ bekannt sind. In „Lineare Algebra II“ wird der Begriff der Orthonormalbasis eingeführt. -iii -(a) S -2 -(b) Würfel (c) Pyramide -y -x -(d) R -2 -(e) T -2 -Abbildung 0.1: Beispiele für verschiedene Formen -Obwohl es nicht vorausgesetzt wird, könnte es von Vorteil sein „Einführung in die Algebra und + +(a) S +2 +(b) Würfel (c) Pyramide +y +x +(d) R +2 +(e) T +2 +Abbildung 0.1: Beispiele für verschiedene Formen +Obwohl es nicht vorausgesetzt wird, könnte es von Vorteil sein „Einführung in die Algebra und Zahlentheorie“ gehört zu haben. -Inhaltsverzeichnis -1 Topologische Grundbegriffe 2 -1.1 Topologische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2 -1.2 Metrische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6 -1.3 Stetigkeit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9 -1.4 Zusammenhang . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 -1.5 Kompaktheit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14 -1.6 Wege und Knoten . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17 -Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22 -2 Mannigfaltigkeiten und Simplizialkomplexe 24 -2.1 Topologische Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . . . 24 -2.2 Differenzierbare Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . 29 -2.3 Simplizialkomplex . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34 -Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 43 -3 Fundamentalgruppe und Überlagerungen 44 -3.1 Homotopie von Wegen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 44 -3.2 Fundamentalgruppe . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 47 -3.3 Überlagerungen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 51 -3.4 Gruppenoperationen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 61 -4 Euklidische und nichteuklidische Geometrie 64 -4.1 Axiome für die euklidische Ebene . . . . . . . . . . . . . . . . . . . . . . . . . . . 64 -4.2 Weitere Eigenschaften einer euklidischen Ebene . . . . . . . . . . . . . . . . . . . 74 -4.2.1 Flächeninhalt . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 74 -4.3 Hyperbolische Geometrie . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 77 -Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 86 -5 Krümmung 87 -5.1 Krümmung von Kurven . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87 -5.2 Tangentialebene . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 89 -5.3 Gauß-Krümmung . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 -5.4 Erste und zweite Fundamentalform . . . . . . . . . . . . . . . . . . . . . . . . . . 94 -Lösungen der Übungsaufgaben 99 -Bildquellen 105 -Abkürzungsverzeichnis 106 -Ergänzende Definitionen und Sätze 107 +Inhaltsverzeichnis +1 Topologische Grundbegriffe 2 +1.1 Topologische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2 +1.2 Metrische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6 +1.3 Stetigkeit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9 +1.4 Zusammenhang . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 +1.5 Kompaktheit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14 +1.6 Wege und Knoten . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17 +Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22 +2 Mannigfaltigkeiten und Simplizialkomplexe 24 +2.1 Topologische Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . . . 24 +2.2 Differenzierbare Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . 29 +2.3 Simplizialkomplex . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34 +Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 43 +3 Fundamentalgruppe und Überlagerungen 44 +3.1 Homotopie von Wegen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 44 +3.2 Fundamentalgruppe . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 47 +3.3 Überlagerungen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 51 +3.4 Gruppenoperationen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 61 +4 Euklidische und nichteuklidische Geometrie 64 +4.1 Axiome für die euklidische Ebene . . . . . . . . . . . . . . . . . . . . . . . . . . . 64 +4.2 Weitere Eigenschaften einer euklidischen Ebene . . . . . . . . . . . . . . . . . . . 74 +4.2.1 Flächeninhalt . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 74 +4.3 Hyperbolische Geometrie . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 77 +Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 86 +5 Krümmung 87 +5.1 Krümmung von Kurven . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87 +5.2 Tangentialebene . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 89 +5.3 Gauß-Krümmung . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 +5.4 Erste und zweite Fundamentalform . . . . . . . . . . . . . . . . . . . . . . . . . . 94 +Lösungen der Übungsaufgaben 99 +Bildquellen 105 +Abkürzungsverzeichnis 106 +Ergänzende Definitionen und Sätze 107 Symbolverzeichnis 108 -2 Inhaltsverzeichnis + Inhaltsverzeichnis Stichwortverzeichnis 111 -1 Topologische Grundbegriffe -1.1 Topologische Räume -Definition 1 -Ein topologischer Raum ist ein Paar (X, T) bestehend aus einer Menge X und T ⊆ P(X) -mit folgenden Eigenschaften -(i) ∅, X ∈ T -(ii) Sind U1, U2 ∈ T, so ist U1 ∩ U2 ∈ T -(iii) Ist I eine Menge und Ui ∈ T für jedes i ∈ I, so ist [ -i∈I -Ui ∈ T -Die Elemente von T heißen offene Teilmengen von X. -A ⊆ X heißt abgeschlossen, wenn X \ A offen ist. -Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0, 1). Auch gibt es -Mengen, die sowohl abgeschlossen als auch offen sind. -Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.) -Betrachte ∅ und X mit der trivialen Topologie Ttriv = { ∅, X }. -Es gilt: X ∈ T und ∅ ∈ T, d. h. X und ∅ sind offen. Außerdem XC = X \ X = ∅ ∈ T und -X \ ∅ = X ∈ T, d. h. X und ∅ sind als Komplement offener Mengen abgeschlossen.  -Beispiel 1 (Topologien) -1) X = R -n mit der von der euklidischen Metrik erzeugten Topologie TEuklid: -U ⊆ R -n -offen ⇔ für jedes x ∈ U gibt es r > 0, -sodass Br(x) = { y ∈ R -n -| d(x, y) < r } ⊆ U -Diese Topologie wird auch „Standardtopologie des R -n -“ genannt. Sie beinhaltet unter -anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedli￾chem Mittelpunkt (vgl. Definition 1.ii). -2) Jeder metrische Raum (X, d) ist auch ein topologischer Raum. -3) Für eine Menge X heißt TDiskret = P(X) diskrete Topologie. -4) X := R, TZ := { U ⊆ R | R \ U endlich } ∪ { ∅ } heißt Zariski-Topologie -Beobachtungen: -• U ∈ TZ ⇔ ∃f ∈ R[X], sodass R \ U = V (f) = { x ∈ R | f(x) = 0 } +1 Topologische Grundbegriffe +1.1 Topologische Räume +Definition 1 +Ein topologischer Raum ist ein Paar (X, T) bestehend aus einer Menge X und T ⊆ P(X) +mit folgenden Eigenschaften +(i) ∅, X ∈ T +(ii) Sind U1, U2 ∈ T, so ist U1 ∩ U2 ∈ T +(iii) Ist I eine Menge und Ui ∈ T für jedes i ∈ I, so ist [ +i∈I +Ui ∈ T +Die Elemente von T heißen offene Teilmengen von X. +A ⊆ X heißt abgeschlossen, wenn X \ A offen ist. +Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0, 1). Auch gibt es +Mengen, die sowohl abgeschlossen als auch offen sind. +Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.) +Betrachte ∅ und X mit der trivialen Topologie Ttriv = { ∅, X }. +Es gilt: X ∈ T und ∅ ∈ T, d. h. X und ∅ sind offen. Außerdem XC = X \ X = ∅ ∈ T und +X \ ∅ = X ∈ T, d. h. X und ∅ sind als Komplement offener Mengen abgeschlossen.  +Beispiel 1 (Topologien) +1) X = R +n mit der von der euklidischen Metrik erzeugten Topologie TEuklid: +U ⊆ R +n +offen ⇔ für jedes x ∈ U gibt es r > 0, +sodass Br(x) = { y ∈ R +n +| d(x, y) < r } ⊆ U +Diese Topologie wird auch „Standardtopologie des R +n +“ genannt. Sie beinhaltet unter +anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedlichem Mittelpunkt (vgl. Definition 1.ii). +2) Jeder metrische Raum (X, d) ist auch ein topologischer Raum. +3) Für eine Menge X heißt TDiskret = P(X) diskrete Topologie. +4) X := R, TZ := { U ⊆ R | R \ U endlich } ∪ { ∅ } heißt Zariski-Topologie +Beobachtungen: +• U ∈ TZ ⇔ ∃f ∈ R[X], sodass R \ U = V (f) = { x ∈ R | f(x) = 0 } • Es gibt keine disjunkten offenen Mengen in TZ. -4 1.1. TOPOLOGISCHE RÄUME -5) X := R -n -, TZ = {U ⊆ R -n -|Es gibt Polynome f1, . . . , fr ∈ R[X1, . . . , Xn] sodass -R -n \ U = V (f1, . . . , fr)} -6) X := { 0, 1 } , T = { ∅, { 0, 1 } , { 0 } } heißt Sierpińskiraum. -∅, { 0, 1 } , { 1 } sind dort alle abgeschlossenen Mengen. -Definition 2 -Sei (X, T) ein topologischer Raum und x ∈ X. -Eine Teilmenge U ⊆ X heißt Umgebung von x, wenn es ein U0 ∈ T gibt mit x ∈ U0 und -U0 ⊆ U. -Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokal gilt. -Definition 3 -Sei (X, T) ein topologischer Raum und M ⊆ X eine Teilmenge. -a) M◦ -:= { x ∈ M | M ist Umgebung von x } = -[ -U⊆M -U∈T -U heißt Inneres oder offener -Kern von M. -b) M := \ -M⊆A -A abgeschlossen -A heißt abgeschlossene Hülle oder Abschluss von M. -c) ∂M := M \ M◦ heißt Rand von M. -d) M heißt dicht in X, wenn M = X ist. -Beispiel 2 -1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M◦ = ∅ -2) Sei X = R und M = (a, b). Dann gilt: M = [a, b] -3) Sei X = R, T = TZ und M = (a, b). Dann gilt: M = R -Definition 4 -Sei (X, T) ein topologischer Raum. -a) B ⊆ T heißt Basis der Topologie T, wenn jedes U ∈ T Vereinigung von Elementen -aus B ist. -b) S ⊆ T heißt Subbasis der Topologie T, wenn jedes U ∈ T Vereinigung von endlichen -Durchschnitten von Elementen aus S ist. -Beispiel 3 (Basis und Subbasis) -1) Jede Basis ist auch eine Subbasis, z.B. -S = { (a, b) | a, b ∈ R, a < b } ist für R mit der Standardtopologie sowohl Basis als -auch Subbasis. -2) Gegeben sei X = R -n mit euklidischer Topologie T. Dann ist -B = { Br(x) | r ∈ Q>0, x ∈ Q -n -} -ist eine abzählbare Basis von T. -3) Sei (X, T) ein topologischer Raum mit X = { 0, 1, 2 } und T = { ∅, { 0 } , { 0, 1 } , { 0, 2 } , X }. + 1.1. TOPOLOGISCHE RÄUME +5) X := R +n +, TZ = {U ⊆ R +n +|Es gibt Polynome f1, . . . , fr ∈ R[X1, . . . , Xn] sodass +R +n \ U = V (f1, . . . , fr)} +6) X := { 0, 1 } , T = { ∅, { 0, 1 } , { 0 } } heißt Sierpińskiraum. +∅, { 0, 1 } , { 1 } sind dort alle abgeschlossenen Mengen. +Definition 2 +Sei (X, T) ein topologischer Raum und x ∈ X. +Eine Teilmenge U ⊆ X heißt Umgebung von x, wenn es ein U0 ∈ T gibt mit x ∈ U0 und +U0 ⊆ U. +Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokal gilt. +Definition 3 +Sei (X, T) ein topologischer Raum und M ⊆ X eine Teilmenge. +a) M◦ +:= { x ∈ M | M ist Umgebung von x } = +[ +U⊆M +U∈T +U heißt Inneres oder offener +Kern von M. +b) M := \ +M⊆A +A abgeschlossen +A heißt abgeschlossene Hülle oder Abschluss von M. +c) ∂M := M \ M◦ heißt Rand von M. +d) M heißt dicht in X, wenn M = X ist. +Beispiel 2 +1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M◦ = ∅ +2) Sei X = R und M = (a, b). Dann gilt: M = [a, b] +3) Sei X = R, T = TZ und M = (a, b). Dann gilt: M = R +Definition 4 +Sei (X, T) ein topologischer Raum. +a) B ⊆ T heißt Basis der Topologie T, wenn jedes U ∈ T Vereinigung von Elementen +aus B ist. +b) S ⊆ T heißt Subbasis der Topologie T, wenn jedes U ∈ T Vereinigung von endlichen +Durchschnitten von Elementen aus S ist. +Beispiel 3 (Basis und Subbasis) +1) Jede Basis ist auch eine Subbasis, z.B. +S = { (a, b) | a, b ∈ R, a < b } ist für R mit der Standardtopologie sowohl Basis als +auch Subbasis. +2) Gegeben sei X = R +n mit euklidischer Topologie T. Dann ist +B = { Br(x) | r ∈ Q>0, x ∈ Q +n +} +ist eine abzählbare Basis von T. +3) Sei (X, T) ein topologischer Raum mit X = { 0, 1, 2 } und T = { ∅, { 0 } , { 0, 1 } , { 0, 2 } , X }. Dann ist S = { ∅, { 0, 1 } , { 0, 2 } } eine Subbasis von T, da gilt: -5 1.1. TOPOLOGISCHE RÄUME -• S ⊆ T -• ∅, { 0, 1 } und { 0, 2 } ∈ S -• { 0 } = { 0, 1 } ∩ { 0, 2 } -• X = { 0, 1 } ∪ { 0, 2 } -Allerings ist S keine Basis von (X, T), da { 0 } nicht als Vereinigung von Elementen -aus S erzeugt werden kann. -Bemerkung 2 -Sei X eine Menge und S ⊆ P(X). Dann gibt es genau eine Topologie T auf X, für die S -Subbasis ist. -Definition 5 -Sei (X, T) ein topologischer Raum und Y ⊆ X. -TY := { U ∩ Y | U ∈ T } ist eine Topologie auf Y . -TY heißt Teilraumtopologie und (Y, TY ) heißt ein Teilraum von (X, T). -Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt. -Definition 6 -Seien X1, X2 topologische Räume. -U ⊆ X1 × X2 sei offen, wenn es zu jedem x = (x1, x2) ∈ U Umgebungen Ui um xi mit -i = 1, 2 gibt, sodass U1 × U2 ⊆ U gilt. -T = { U ⊆ X1 × X2 | U offen } ist eine Topologie auf X1×X2. Sie heißt Produkttopologie. -B = { U1 × U2 | Ui offen in Xi -, i = 1, 2 } ist eine Basis von T. -U -x -x2 -x1 -U2 -U1 -X1 -X2 -Abbildung 1.1: Zu x = (x1, x2) gibt es Umgebungen U1, U2 mit U1 × U2 ⊆ U -Beispiel 4 (Produkttopologien) -1) X1 = X2 = R mit euklidischer Topologie. -⇒ Die Produkttopologie auf R × R = R -2 -stimmt mit der euklidischen Topologie auf -R -2 überein. -2) X1 = X2 = R mit Zariski-Topologie. T Produkttopologie auf R -2 -: U1 × U2 + 1.1. TOPOLOGISCHE RÄUME +• S ⊆ T +• ∅, { 0, 1 } und { 0, 2 } ∈ S +• { 0 } = { 0, 1 } ∩ { 0, 2 } +• X = { 0, 1 } ∪ { 0, 2 } +Allerings ist S keine Basis von (X, T), da { 0 } nicht als Vereinigung von Elementen +aus S erzeugt werden kann. +Bemerkung 2 +Sei X eine Menge und S ⊆ P(X). Dann gibt es genau eine Topologie T auf X, für die S +Subbasis ist. +Definition 5 +Sei (X, T) ein topologischer Raum und Y ⊆ X. +TY := { U ∩ Y | U ∈ T } ist eine Topologie auf Y . +TY heißt Teilraumtopologie und (Y, TY ) heißt ein Teilraum von (X, T). +Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt. +Definition 6 +Seien X1, X2 topologische Räume. +U ⊆ X1 × X2 sei offen, wenn es zu jedem x = (x1, x2) ∈ U Umgebungen Ui um xi mit +i = 1, 2 gibt, sodass U1 × U2 ⊆ U gilt. +T = { U ⊆ X1 × X2 | U offen } ist eine Topologie auf X1×X2. Sie heißt Produkttopologie. +B = { U1 × U2 | Ui offen in Xi, i = 1, 2 } ist eine Basis von T. +U +x +x2 +x1 +U2 +U1 +X1 +X2 +Abbildung 1.1: Zu x = (x1, x2) gibt es Umgebungen U1, U2 mit U1 × U2 ⊆ U +Beispiel 4 (Produkttopologien) +1) X1 = X2 = R mit euklidischer Topologie. +⇒ Die Produkttopologie auf R × R = R +2 +stimmt mit der euklidischen Topologie auf +R +2 überein. +2) X1 = X2 = R mit Zariski-Topologie. T Produkttopologie auf R +2 +: U1 × U2 (Siehe Abbildung 1.2) -6 1.1. TOPOLOGISCHE RÄUME -U1 = R \ N -U2 = -R \ N -Abbildung 1.2: Zariski-Topologie auf R -2 -Definition 7 -Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ sei die Menge -der Äquivalenzklassen, π : X → X, x 7→ [x]∼. -TX -:=  -U ⊆ X - - π -−1 -(U) ∈ TX - -(X, TX -) heißt Quotiententopologie. -Beispiel 5 -X = R, a ∼ b :⇔ a − b ∈ Z --1 0 1 2 3 4 5 R -0 -a -U -π a -−1 -(u) -0 ∼ 1, d. h. [0] = [1] -Beispiel 6 -Sei X = R -2 und (x1, y1) ∼ (x2, y2) ⇔ x1 − x2 ∈ Z und y1 − y2 ∈ Z. Dann ist X/∼ ein Torus. -Beispiel 7 (Projektiver Raum) -X = R -n+1 \ { 0 } , x ∼ y ⇔ ∃λ ∈ R -× mit y = λx -⇔ x und y liegen auf der gleichen -Ursprungsgerade -X = P -n + 1.1. TOPOLOGISCHE RÄUME +U1 = R \ N +U2 = +R \ N +Abbildung 1.2: Zariski-Topologie auf R +2 +Definition 7 +Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ sei die Menge +der Äquivalenzklassen, π : X → X, x 7→ [x]∼. +TX +:=  +U ⊆ X + + π +−1 +(U) ∈ TX + +(X, TX +) heißt Quotiententopologie. +Beispiel 5 +X = R, a ∼ b :⇔ a − b ∈ Z +-1 0 1 2 3 4 5 R +0 +a +U +π a +−1 +(u) +0 ∼ 1, d. h. [0] = [1] +Beispiel 6 +Sei X = R +2 und (x1, y1) ∼ (x2, y2) ⇔ x1 − x2 ∈ Z und y1 − y2 ∈ Z. Dann ist X/∼ ein Torus. +Beispiel 7 (Projektiver Raum) +X = R +n+1 \ { 0 } , x ∼ y ⇔ ∃λ ∈ R× mit y = λx +⇔ x und y liegen auf der gleichen +Ursprungsgerade +X = P +n (R) -7 1.2. METRISCHE RÄUME -Also für n = 1: -−4 −2 2 4 6 8 -−4 -−2 -2 -4 -1.2 Metrische Räume -Definition 8 -Sei X eine Menge. Eine Abbildung d : X × X → R -+ -0 -heißt Metrik, wenn gilt: -(i) Definitheit: d(x, y) = 0 ⇔ x = y ∀x, y ∈ X -(ii) Symmetrie: d(x, y) = d(y, x) ∀x, y ∈ X -(iii) Dreiecksungleichung: d(x, z) ≤ d(x, y) + d(y, z) ∀x, y, z ∈ X -Das Paar (X, d) heißt ein metrischer Raum. -Bemerkung 3 -Sei (X, d) ein metrischer Raum und -Br(x) := { y ∈ X | d(x, y) < r } für x ∈ X, r ∈ R -+ -B = { Br(x) ⊆ P(X) | x ∈ X, r ∈ R -+ } ist Basis einer Topologie auf X. -Definition 9 -Seien (X, dX) und (Y, dY ) metrische Räume und ϕ : X → Y eine Abbildung mit -∀x1, x2 ∈ X : dX(x1, x2) = dY (ϕ(x1), ϕ(x2)) -Dann heißt ϕ eine Isometrie von X nach Y . -Beispiel 8 (Skalarprodukt erzeugt Metrik) -Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt h·, ·i. Dann wird V -durch d(x, y) := p -hx − y, x − yi zum metrischen Raum. -Beispiel 9 (diskrete Metrik) -Sei X eine Menge. Dann heißt -d(x, y) = ( -0 falls x = y -1 falls x 6= y + 1.2. METRISCHE RÄUME +Also für n = 1: +−4 −2 2 4 6 8 +−4 +−2 +2 +4 +1.2 Metrische Räume +Definition 8 +Sei X eine Menge. Eine Abbildung d : X × X → R ++ +0 +heißt Metrik, wenn gilt: +(i) Definitheit: d(x, y) = 0 ⇔ x = y ∀x, y ∈ X +(ii) Symmetrie: d(x, y) = d(y, x) ∀x, y ∈ X +(iii) Dreiecksungleichung: d(x, z) ≤ d(x, y) + d(y, z) ∀x, y, z ∈ X +Das Paar (X, d) heißt ein metrischer Raum. +Bemerkung 3 +Sei (X, d) ein metrischer Raum und +Br(x) := { y ∈ X | d(x, y) < r } für x ∈ X, r ∈ R ++ +B = { Br(x) ⊆ P(X) | x ∈ X, r ∈ R ++ } ist Basis einer Topologie auf X. +Definition 9 +Seien (X, dX) und (Y, dY ) metrische Räume und ϕ : X → Y eine Abbildung mit +∀x1, x2 ∈ X : dX(x1, x2) = dY (ϕ(x1), ϕ(x2)) +Dann heißt ϕ eine Isometrie von X nach Y . +Beispiel 8 (Skalarprodukt erzeugt Metrik) +Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt h·, ·i. Dann wird V +durch d(x, y) := phx − y, x − yi zum metrischen Raum. +Beispiel 9 (diskrete Metrik) +Sei X eine Menge. Dann heißt +d(x, y) = ( +0 falls x = y +1 falls x 6= y die diskrete Metrik. Die Metrik d induziert die diskrete Topologie. -8 1.2. METRISCHE RÄUME -Beispiel 10 -X = R -2 und d ((x1, y1),(x2, y2)) := max(kx1 − x2k, ky1 − y2k) ist Metrik. -Beobachtung: d erzeugt die euklidische Topologie. -Br(0) = -r r -r -r -(a) Br(0) (b) Euklidische Topologie + 1.2. METRISCHE RÄUME +Beispiel 10 +X = R +2 und d ((x1, y1),(x2, y2)) := max(kx1 − x2k, ky1 − y2k) ist Metrik. +Beobachtung: d erzeugt die euklidische Topologie. +Br(0) = +r r +r +r +(a) Br(0) (b) Euklidische Topologie Abbildung 1.3: Veranschaulichungen zur Metrik d aus Beispiel 10 -9 1.2. METRISCHE RÄUME -Beispiel 11 (SNCF-Metrik1 -) -X = R -2 -−4 −2 2 4 6 8 -−4 -−2 -2 -4 -Definition 10 -Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x =6 y in X -Umgebungen Ux um x und Uy um y gibt, sodass Ux ∩ Uy = ∅. -Bemerkung 4 (Trennungseigenschaft) -Metrische Räume sind hausdorffsch, wegen -d(x, y) > 0 ⇒ ∃ε > 0 : Bε(x) ∩ Bε(y) = ∅ -Beispiel 12 (Topologische Räume und Hausdorff-Räume) -1) (R, TZ) ist ein topologischer Raum, der nicht hausdorffsch ist. -2) (R, TEuklid) ist ein topologischer Hausdorff-Raum. -Bemerkung 5 (Eigenschaften von Hausdorff-Räumen) -Seien X, X1, X2 Hausdorff-Räume. -a) Jeder Teilraum von X ist hausdorffsch. -b) X1 × X2 ist hausdorffsch (vgl. Abbildung 1.4). -Definition 11 -Sei X ein topologischer Raum und (x)n∈N eine Folge in X. x ∈ X heißt Grenzwert oder -Limes von (xn), wenn es für jede Umgebung U von x ein n0 gibt, sodass xn ∈ U für alle -n ≥ n0. -Bemerkung 6 -Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert. -Beweis: Sei (xn) eine konvergierende Folge und x und y Grenzwerte der Folge. -Da X hausdorffsch ist, gibt es Umgebungen Ux von x und Uy von y mit Ux ∩ Uy = ∅ falls -x 6= y. Da (xn) gegen x und y konvergiert, existiert ein n0 mit xn ∈ Ux ∩ Uy für alle n ≥ n0 -⇒ x = y  + 1.2. METRISCHE RÄUME +Beispiel 11 (SNCF-Metrik1) +X = R +2 +−4 −2 2 4 6 8 +−4 +−2 +2 +4 +Definition 10 +Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x =6 y in X +Umgebungen Ux um x und Uy um y gibt, sodass Ux ∩ Uy = ∅. +Bemerkung 4 (Trennungseigenschaft) +Metrische Räume sind hausdorffsch, wegen +d(x, y) > 0 ⇒ ∃ε > 0 : Bε(x) ∩ Bε(y) = ∅ +Beispiel 12 (Topologische Räume und Hausdorff-Räume) +1) (R, TZ) ist ein topologischer Raum, der nicht hausdorffsch ist. +2) (R, TEuklid) ist ein topologischer Hausdorff-Raum. +Bemerkung 5 (Eigenschaften von Hausdorff-Räumen) +Seien X, X1, X2 Hausdorff-Räume. +a) Jeder Teilraum von X ist hausdorffsch. +b) X1 × X2 ist hausdorffsch (vgl. Abbildung 1.4). +Definition 11 +Sei X ein topologischer Raum und (x)n∈N eine Folge in X. x ∈ X heißt Grenzwert oder +Limes von (xn), wenn es für jede Umgebung U von x ein n0 gibt, sodass xn ∈ U für alle +n ≥ n0. +Bemerkung 6 +Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert. +Beweis: Sei (xn) eine konvergierende Folge und x und y Grenzwerte der Folge. +Da X hausdorffsch ist, gibt es Umgebungen Ux von x und Uy von y mit Ux ∩ Uy = ∅ falls +x 6= y. Da (xn) gegen x und y konvergiert, existiert ein n0 mit xn ∈ Ux ∩ Uy für alle n ≥ n0 +⇒ x = y  1Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt. -10 1.3. STETIGKEIT -(x1, y1) (x2, y2) -x1 x2 -U1 × X2 U2 × X2 -X1 -X2 -Abbildung 1.4: Wenn X1, X2 hausdorffsch sind, dann auch X1 × X2 -1.3 Stetigkeit -Definition 12 -Seien (X, TX),(Y, TY ) topologische Räume und f : X → Y eine Abbildung. -a) f heißt stetig :⇔ ∀U ∈ TY : f -−1 -(U) ∈ TX. -b) f heißt Homöomorphismus, wenn f stetig ist und es eine stetige Abbildung g : -Y → X gibt, sodass g ◦ f = idX und f ◦ g = idY . -Bemerkung 72 -Seien X, Y metrische Räume und f : X → Y eine Abbildung. -Dann gilt: f ist stetig ⇔ zu jedem x ∈ X und jedem ε > 0 gibt es δ(x, ε) > 0, sodass für -alle y ∈ X mit d(x, y) < δ gilt dY (f(x), f(y)) < ε. -Beweis: „⇒“: Sei x ∈ X, ε > 0 gegeben und U := Bε(f(x)). -Dann ist U offen in Y . -Def. 12.a =====⇒ f -−1 -(U) ist offen in X. Dann ist x ∈ f -−1 -(U). -⇒ ∃δ > 0, sodass Bδ(x) ⊆ f -−1 -(U) -⇒ f(Bδ(x)) ⊆ U -⇒ { y ∈ X | dX(x, y) < δ } ⇒ Beh. -„⇐“: Sei U ⊆ Y offen, X ∈ f -−1 -(U). -Dann gibt es ε > 0, sodass Bε(f(x)) ⊆ U -Vor. ==⇒ Es gibt δ > 0, sodass f(Bδ(x)) ⊆ Bε(f(x))) -⇒ Bδ(x) ⊆ f -−1 -(Bε(f(x))) ⊆ f -−1 -(U)  -Bemerkung 8 -Seien X, Y topologische Räume und f : X → Y eine Abbildung. Dann gilt: -f ist stetig -⇔ für jede abgeschlossene Teilmenge A ⊆ Y gilt : f -−1 -(A) ⊆ X ist abgeschlossen. -Beispiel 13 (Stetige Abbildungen und Homöomorphismen) -1) Für jeden topologischen Raum X gilt: idX : X → X ist Homöomorphismus. + 1.3. STETIGKEIT +(x1, y1) (x2, y2) +x1 x2 +U1 × X2 U2 × X2 +X1 +X2 +Abbildung 1.4: Wenn X1, X2 hausdorffsch sind, dann auch X1 × X2 +1.3 Stetigkeit +Definition 12 +Seien (X, TX),(Y, TY ) topologische Räume und f : X → Y eine Abbildung. +a) f heißt stetig :⇔ ∀U ∈ TY : f +−1 +(U) ∈ TX. +b) f heißt Homöomorphismus, wenn f stetig ist und es eine stetige Abbildung g : +Y → X gibt, sodass g ◦ f = idX und f ◦ g = idY . +Bemerkung 72 +Seien X, Y metrische Räume und f : X → Y eine Abbildung. +Dann gilt: f ist stetig ⇔ zu jedem x ∈ X und jedem ε > 0 gibt es δ(x, ε) > 0, sodass für +alle y ∈ X mit d(x, y) < δ gilt dY (f(x), f(y)) < ε. +Beweis: „⇒“: Sei x ∈ X, ε > 0 gegeben und U := Bε(f(x)). +Dann ist U offen in Y . +Def. 12.a =====⇒ f +−1 +(U) ist offen in X. Dann ist x ∈ f +−1 +(U). +⇒ ∃δ > 0, sodass Bδ(x) ⊆ f +−1 +(U) +⇒ f(Bδ(x)) ⊆ U +⇒ { y ∈ X | dX(x, y) < δ } ⇒ Beh. +„⇐“: Sei U ⊆ Y offen, X ∈ f +−1 +(U). +Dann gibt es ε > 0, sodass Bε(f(x)) ⊆ U +Vor. ==⇒ Es gibt δ > 0, sodass f(Bδ(x)) ⊆ Bε(f(x))) +⇒ Bδ(x) ⊆ f +−1 +(Bε(f(x))) ⊆ f +−1 +(U)  +Bemerkung 8 +Seien X, Y topologische Räume und f : X → Y eine Abbildung. Dann gilt: +f ist stetig +⇔ für jede abgeschlossene Teilmenge A ⊆ Y gilt : f +−1 +(A) ⊆ X ist abgeschlossen. +Beispiel 13 (Stetige Abbildungen und Homöomorphismen) +1) Für jeden topologischen Raum X gilt: idX : X → X ist Homöomorphismus. 2Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt. -11 1.3. STETIGKEIT -2) Ist (Y, TY ) trivialer topologischer Raum, d. h. TY = Ttriv, so ist jede Abbildung -f : X → Y stetig. -3) Ist X diskreter topologischer Raum, so ist f : X → Y stetig für jeden topologischen -Raum Y und jede Abbildung f. -4) Sei X = [0, 1), Y = S -1 = { z ∈ C | kzk = 1 } und f(t) = e -2πit -. -0 1 R -0 -f -g -Abbildung 1.5: Beispiel einer stetigen Funktion f, deren Umkehrabbildung g nicht stetig ist. -Die Umkehrabbildung g ist nicht stetig, da g -−1 -(U) nicht offen ist (vgl. Abbildung 1.5). -Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig) -Seien X, Y, Z topologische Räume, f : X → Y und g : Y → Z stetige Abbildungen. -Dann ist g ◦ f : X → Z stetig. -X -f -/ -g◦f - -Y -g - -Z -Beweis: Sei U ⊆ Z offen ⇒ (g ◦ f) -−1 -(U) = f -−1 -(g -−1 -(U)). g -−1 -(U) ist offen in Y weil g stetig -ist, f -−1 -(g -−1 -(U)) ist offen in X, weil f stetig ist.  -Bemerkung 10 -a) Für jeden topologischen Raum X ist -Homöo(X) := { f : X → X | f ist Homöomorphismus } -eine Gruppe. -b) Jede Isometrie f : X → Y zwischen metrischen Räumen ist ein Homöomorphismus. -c) Iso(X) := { f : X → X | f ist Isometrie } ist eine Untergruppe von Homöo(X) für -jeden metrischen Raum X. -Bemerkung 11 (Projektionen sind stetig) -Seien X, Y topologische Räume. πX : X × Y → X und πY : X × Y → Y die Projektionen -πX : (x, y) 7→ x und πY : (x, y) 7→ y -Wird X × Y mit der Produkttopologie versehen, so sind πX und πY stetig. -Beweis: Sei U ⊆ X offen -⇒ π -−1 -X (U) = U × Y ist offen in X × Y .  -Bemerkung 12 -Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ der Bahnenraum -versehen mit der Quotiententopologie, π : X → X, x 7→ [x]∼. + 1.3. STETIGKEIT +2) Ist (Y, TY ) trivialer topologischer Raum, d. h. TY = Ttriv, so ist jede Abbildung +f : X → Y stetig. +3) Ist X diskreter topologischer Raum, so ist f : X → Y stetig für jeden topologischen +Raum Y und jede Abbildung f. +4) Sei X = [0, 1), Y = S +1 = { z ∈ C | kzk = 1 } und f(t) = e2πit +. +0 1 R +0 +f +g +Abbildung 1.5: Beispiel einer stetigen Funktion f, deren Umkehrabbildung g nicht stetig ist. +Die Umkehrabbildung g ist nicht stetig, da g +−1 +(U) nicht offen ist (vgl. Abbildung 1.5). +Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig) +Seien X, Y, Z topologische Räume, f : X → Y und g : Y → Z stetige Abbildungen. +Dann ist g ◦ f : X → Z stetig. +X +f +/ +g◦f + +Y +g + +Z +Beweis: Sei U ⊆ Z offen ⇒ (g ◦ f) +−1 +(U) = f +−1 +(g +−1 +(U)). g +−1 +(U) ist offen in Y weil g stetig +ist, f +−1 +(g +−1 +(U)) ist offen in X, weil f stetig ist.  +Bemerkung 10 +a) Für jeden topologischen Raum X ist +Homöo(X) := { f : X → X | f ist Homöomorphismus } +eine Gruppe. +b) Jede Isometrie f : X → Y zwischen metrischen Räumen ist ein Homöomorphismus. +c) Iso(X) := { f : X → X | f ist Isometrie } ist eine Untergruppe von Homöo(X) für +jeden metrischen Raum X. +Bemerkung 11 (Projektionen sind stetig) +Seien X, Y topologische Räume. πX : X × Y → X und πY : X × Y → Y die Projektionen +πX : (x, y) 7→ x und πY : (x, y) 7→ y +Wird X × Y mit der Produkttopologie versehen, so sind πX und πY stetig. +Beweis: Sei U ⊆ X offen +⇒ π +−1 +X (U) = U × Y ist offen in X × Y .  +Bemerkung 12 +Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ der Bahnenraum +versehen mit der Quotiententopologie, π : X → X, x 7→ [x]∼. Dann ist π stetig. -12 1.4. ZUSAMMENHANG -Beweis: Nach Definition ist U ⊆ X offen ⇔ π -−1 -(U) ⊆ X offen.  -Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird. -Beispiel 14 (Stereographische Projektion) -R -n und S -n \ { N } sind homöomorph für beliebiges N ∈ S -n -. Es gilt: -S -n = - -x ∈ R -n+1 - kxk = 1 -= -( -x ∈ R -n+1 - - - - - -nX -+1 -i=1 -x -2 -i = 1 ) -O. B. d. A. sei N = - - -0 -. -. -. -0 -1 - - -. Die Gerade durch N und P schneidet die Ebene H in genau -einem Punkt Pˆ. P wird auf Pˆ abgebildet. -f :S -n -\ { N } → R -n -P 7→ -genau ein Punkt -z }| { -LP ∩ H -wobei R -n = H = - - - - - -x1 -. -. -. -xn+1 - - -∈ R -n+1 - - - - - - - -xn+1 = 0 - - - -und LP die Gerade in R -n+1 durch N -und P ist. -Sei P = - - -x1 -. -. -. -xn+1 - -, so ist xn+1 < 1, also ist LP nicht parallel zu H. Also schneiden sich LP -und H in genau einem Punkt Pˆ. -Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig. -1.4 Zusammenhang -Definition 13 -a) Ein Raum X heißt zusammenhängend, wenn es keine offenen, nichtleeren Teilmengen -U1, U2 von X gibt mit U1 ∩ U2 = ∅ und U1 ∪ U2 = X. -b) Eine Teilmenge Y ⊆ X heißt zusammenhängend, wenn Y als topologischer Raum mit + 1.4. ZUSAMMENHANG +Beweis: Nach Definition ist U ⊆ X offen ⇔ π +−1 +(U) ⊆ X offen.  +Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird. +Beispiel 14 (Stereographische Projektion) +R +n und Sn \ { N } sind homöomorph für beliebiges N ∈ Sn +. Es gilt: +S +n = + +x ∈ R +n+1 + kxk = 1 += +( +x ∈ R +n+1 + + + + + +nX+1 +i=1 +x +2 +i = 1 ) +O. B. d. A. sei N = + + +0 +. +. +. +0 +1 + + +. Die Gerade durch N und P schneidet die Ebene H in genau +einem Punkt Pˆ. P wird auf Pˆ abgebildet. +f :S +n +\ { N } → R +n +P 7→ +genau ein Punkt +z }| { +LP ∩ H +wobei R +n = H = + + + + + +x1 +. +. +. +xn+1 + + +∈ R +n+1 + + + + + + + +xn+1 = 0 + + + +und LP die Gerade in R +n+1 durch N +und P ist. +Sei P = + + +x1 +. +. +. +xn+1 + +, so ist xn+1 < 1, also ist LP nicht parallel zu H. Also schneiden sich LP +und H in genau einem Punkt Pˆ. +Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig. +1.4 Zusammenhang +Definition 13 +a) Ein Raum X heißt zusammenhängend, wenn es keine offenen, nichtleeren Teilmengen +U1, U2 von X gibt mit U1 ∩ U2 = ∅ und U1 ∪ U2 = X. +b) Eine Teilmenge Y ⊆ X heißt zusammenhängend, wenn Y als topologischer Raum mit der Teilraumtopologie zusammenhängend ist. -13 1.4. ZUSAMMENHANG -x -y -z -N -Pˆ -0 -P -Abbildung 1.6: Visualisierung der stereographischen Projektion -Bemerkung 13 -X ist zusammenhängend ⇔ Es gibt keine abgeschlossenen, nichtleeren Teilmengen A1, A2 -mit A1 ∩ A2 = ∅ und A1 ∪ A2 = X. -Beispiel 15 (Zusammenhang von Räumen) -1) (R -n -, TEuklid) ist zusammenhängend, denn: -Annahme: R -n = U1 ∪˙ U2 mit ∅ 6= U1, U2 ∈ TEuklid existieren. -Sei x ∈ U1, y ∈ U2 und [x, y] die Strecke zwischen x und y. Sei V = [x, y]. Nun -betrachten wir V ( Rn als (metrischen) Teilraum mit der Teilraumtopologie TV . -Somit gilt U1 ∩ [x, y] ∈ TV wegen der Definition der Teilraumtopologie. -Dann gibt es z ∈ [x, y] mit z ∈ ∂(U1 ∩ [x, y]), aber z /∈ U1 ⇒ z ∈ U2. In jeder -Umgebung von z liegt ein Punkt von U1 ⇒ Widerspruch zu U2 offen. -2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R<0 ∪ R>0 -3) R -2 \ { 0 } ist zusammenhängend. -4) Q ( R ist nicht zusammenhängend, da (Q ∩ R< -√ -2 -) ∪ (Q ∩ R> -√ -2 -) = Q -5) { x } ist zusammenhängend für jedes x ∈ X, wobei X ein topologischer Raum ist. -6) R mit Zariski-Topologie ist zusammenhängend. -Bemerkung 14 -Sei X ein topologischer Raum und A ⊆ X zusammenhängend. Dann ist auch A zusammen￾hängend. -14 1.4. ZUSAMMENHANG -Beweis: durch Widerspruch -Annahme: A = A1 ∪ A2, Ai abgeschlossen, Ai 6= ∅, A1 ∩ A2 = ∅ -⇒ A = (A ∩ A1) -| {z } -abgeschlossen -∪˙ (A ∩ A2) -| {z } -abgeschlossen -| {z } -disjunkt -Wäre A ∩ A1 = ∅ -⇒ A ⊆ A = A1 ∪˙ A2 -⇒ A ⊆ A2 ⇒ A ⊆ A2 -⇒ A1 = ∅ -⇒ Widerspruch zu A1 6= ∅ -⇒ A ∩ A1 6= ∅ und analog A ∩ A2 6= ∅ -⇒ Widerspruch zu A ist zusammenhängend.  -Bemerkung 15 -Sei X ein topologischer Raum und A, B ⊆ X zusammenhängend. -Ist A ∩ B 6= ∅, dann ist A ∪ B zusammenhängend. -Beweis: Sei A ∪ B = U1 ∪˙ U2, Ui 6= ∅ offen -o. B. d. A. ======⇒ A = (A ∩ U1) ∪˙ (A ∩ U2) offen -A zhgd. -====⇒ A ∩ U1 = ∅ -A∩B6=∅ -====⇒ U1 ⊆ B -B = (B ∩ U1) -| {z } -=U1 -∪ (B ∩ U2) -| {z } -=∅ -ist unerlaubte Zerlegung. - -Definition 14 -Sei X ein topologischer Raum. -Für x ∈ X sei Z(x) ⊆ X definiert durch -Z(x) := [ -A⊆Xzhgd. -x∈A -A -Z(x) heißt Zusammenhangskomponente. -Bemerkung 16 (Eigenschaften von Zusammenhangskomponenten) -Sei X ein topologischer Raum. Dann gilt: -a) Z(x) ist die größte zusammenhängende Teilmenge von X, die x enthält. -b) Z(x) ist abgeschlossen. -c) X ist disjunkte Vereinigung von Zusammenhangskomponenten. + 1.4. ZUSAMMENHANG +x +y +z +N +Pˆ +0 +P +Abbildung 1.6: Visualisierung der stereographischen Projektion +Bemerkung 13 +X ist zusammenhängend ⇔ Es gibt keine abgeschlossenen, nichtleeren Teilmengen A1, A2 +mit A1 ∩ A2 = ∅ und A1 ∪ A2 = X. +Beispiel 15 (Zusammenhang von Räumen) +1) (R +n +, TEuklid) ist zusammenhängend, denn: +Annahme: R +n = U1 ∪˙ U2 mit ∅ 6= U1, U2 ∈ TEuklid existieren. +Sei x ∈ U1, y ∈ U2 und [x, y] die Strecke zwischen x und y. Sei V = [x, y]. Nun +betrachten wir V ( Rn als (metrischen) Teilraum mit der Teilraumtopologie TV . +Somit gilt U1 ∩ [x, y] ∈ TV wegen der Definition der Teilraumtopologie. +Dann gibt es z ∈ [x, y] mit z ∈ ∂(U1 ∩ [x, y]), aber z /∈ U1 ⇒ z ∈ U2. In jeder +Umgebung von z liegt ein Punkt von U1 ⇒ Widerspruch zu U2 offen. +2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R<0 ∪ R>0 +3) R +2 \ { 0 } ist zusammenhängend. +4) Q ( R ist nicht zusammenhängend, da (Q ∩ R< +√ +2 +) ∪ (Q ∩ R> +√ +2 +) = Q +5) { x } ist zusammenhängend für jedes x ∈ X, wobei X ein topologischer Raum ist. +6) R mit Zariski-Topologie ist zusammenhängend. +Bemerkung 14 +Sei X ein topologischer Raum und A ⊆ X zusammenhängend. Dann ist auch A zusammenhängend. + 1.4. ZUSAMMENHANG +Beweis: durch Widerspruch +Annahme: A = A1 ∪ A2, Ai abgeschlossen, Ai 6= ∅, A1 ∩ A2 = ∅ +⇒ A = (A ∩ A1) +| {z } +abgeschlossen +∪˙ (A ∩ A2) +| {z } +abgeschlossen +| {z } +disjunkt +Wäre A ∩ A1 = ∅ +⇒ A ⊆ A = A1 ∪˙ A2 +⇒ A ⊆ A2 ⇒ A ⊆ A2 +⇒ A1 = ∅ +⇒ Widerspruch zu A1 6= ∅ +⇒ A ∩ A1 6= ∅ und analog A ∩ A2 6= ∅ +⇒ Widerspruch zu A ist zusammenhängend.  +Bemerkung 15 +Sei X ein topologischer Raum und A, B ⊆ X zusammenhängend. +Ist A ∩ B 6= ∅, dann ist A ∪ B zusammenhängend. +Beweis: Sei A ∪ B = U1 ∪˙ U2, Ui 6= ∅ offen +o. B. d. A. ======⇒ A = (A ∩ U1) ∪˙ (A ∩ U2) offen +A zhgd. +====⇒ A ∩ U1 = ∅ +A∩B6=∅ +====⇒ U1 ⊆ B +B = (B ∩ U1) +| {z } +=U1 +∪ (B ∩ U2) +| {z } +=∅ +ist unerlaubte Zerlegung. + +Definition 14 +Sei X ein topologischer Raum. +Für x ∈ X sei Z(x) ⊆ X definiert durch +Z(x) := [ +A⊆Xzhgd. +x∈A +A +Z(x) heißt Zusammenhangskomponente. +Bemerkung 16 (Eigenschaften von Zusammenhangskomponenten) +Sei X ein topologischer Raum. Dann gilt: +a) Z(x) ist die größte zusammenhängende Teilmenge von X, die x enthält. +b) Z(x) ist abgeschlossen. +c) X ist disjunkte Vereinigung von Zusammenhangskomponenten. Beweis: -15 1.5. KOMPAKTHEIT -a) Sei Z(x) = A1 ∪˙ A2 mit Ai 6= ∅ abgeschlossen. -O. B. d. A. sei x ∈ A1 und y ∈ A2. y liegt in einer zusammehängenden Teilmenge A, -die auch x enthält. ⇒ A = (A ∩ A1) -| {z } -3x -∪ (A ∩ A2) -| {z } -3y -ist unerlaubte Zerlegung. -b) Nach Bemerkung 14 ist Z(x) zusammenhängend ⇒ Z(x) ⊆ Z(x) ⇒ Z(x) = Z(x) -c) Ist Z(y) ∩ Z(x) 6= ∅ -Bem. 15 =====⇒ Z(y) ∪ Z(x) ist zusammenhängend. -⇒ Z(x) ∪ Z(y) ⊆ Z(x) ⇒ Z(y) ⊆ Z(x) -⊆ Z(y) ⇒ Z(x) ⊆ Z(y) - -Bemerkung 17 -Sei f : X → Y stetig. Ist A ⊆ X zusammenhängend, so ist f(A) ⊆ Y zusammenhängend. -Beweis: Sei f(A) = U1 ∪ U2, Ui 6= ∅, offen, disjunkt. -⇒ f -−1 -(f(A)) = f -−1 -(U1) ∪ f -−1 -(U2) -⇒ A = (A ∩ f -−1 -(U1)) -| {z } -6=∅ -∪ (A ∩ f -−1 -(U2)) -| {z } -6=∅ - -1.5 Kompaktheit -Definition 15 -Sei X eine Menge und U ⊆ P(X). -U heißt eine Überdeckung von X, wenn gilt: -∀x ∈ X : ∃M ∈ U : x ∈ M -Definition 16 -Ein topologischer Raum X heißt kompakt, wenn jede offene Überdeckung von X -U = { Ui }i∈I mit Ui offen in X -eine endliche Teilüberdeckung -[ -i∈J⊆I -Ui = X mit |J| ∈ N -besitzt. -Bemerkung 18 -Das Einheitsintervall I := [0, 1] ist kompakt bezüglich der euklidischen Topologie. -Beweis: Sei (Ui)i∈J eine offene Überdeckung von I. -Es genügt zu zeigen, dass es ein δ > 0 gibt, sodass jedes Teilintervall der Länge δ von I in + 1.5. KOMPAKTHEIT +a) Sei Z(x) = A1 ∪˙ A2 mit Ai 6= ∅ abgeschlossen. +O. B. d. A. sei x ∈ A1 und y ∈ A2. y liegt in einer zusammehängenden Teilmenge A, +die auch x enthält. ⇒ A = (A ∩ A1) +| {z } +3x +∪ (A ∩ A2) +| {z } +3y +ist unerlaubte Zerlegung. +b) Nach Bemerkung 14 ist Z(x) zusammenhängend ⇒ Z(x) ⊆ Z(x) ⇒ Z(x) = Z(x) +c) Ist Z(y) ∩ Z(x) 6= ∅ +Bem. 15 =====⇒ Z(y) ∪ Z(x) ist zusammenhängend. +⇒ Z(x) ∪ Z(y) ⊆ Z(x) ⇒ Z(y) ⊆ Z(x) +⊆ Z(y) ⇒ Z(x) ⊆ Z(y) + +Bemerkung 17 +Sei f : X → Y stetig. Ist A ⊆ X zusammenhängend, so ist f(A) ⊆ Y zusammenhängend. +Beweis: Sei f(A) = U1 ∪ U2, Ui 6= ∅, offen, disjunkt. +⇒ f +−1 +(f(A)) = f +−1 +(U1) ∪ f +−1 +(U2) +⇒ A = (A ∩ f +−1 +(U1)) +| {z } +6=∅ +∪ (A ∩ f +−1 +(U2)) +| {z } +6=∅ + +1.5 Kompaktheit +Definition 15 +Sei X eine Menge und U ⊆ P(X). +U heißt eine Überdeckung von X, wenn gilt: +∀x ∈ X : ∃M ∈ U : x ∈ M +Definition 16 +Ein topologischer Raum X heißt kompakt, wenn jede offene Überdeckung von X +U = { Ui }i∈I mit Ui offen in X +eine endliche Teilüberdeckung +[ +i∈J⊆I +Ui = X mit |J| ∈ N +besitzt. +Bemerkung 18 +Das Einheitsintervall I := [0, 1] ist kompakt bezüglich der euklidischen Topologie. +Beweis: Sei (Ui)i∈J eine offene Überdeckung von I. +Es genügt zu zeigen, dass es ein δ > 0 gibt, sodass jedes Teilintervall der Länge δ von I in einem der Ui enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich viele Intervalle -16 1.5. KOMPAKTHEIT -der Länge δ unterteilen und alle Ui -in die endliche Überdeckung aufnehmen, die Teilintervalle -enthalten. -Angenommen, es gibt kein solches δ. Dann gibt es für jedes n ∈ N ein Intervall In ⊆ [0, 1] -der Länge 1/n sodass In ( Ui für alle i ∈ J. -Sei xn der Mittelpunkt von In. Die Folge (xn) hat einen Häufungspunkt x ∈ [0, 1]. Dann -gibt es i ∈ J mit x ∈ Ui -. Da Ui offen ist, gibt es ein ε > 0, sodass (x − ε, x + ε) ⊆ Ui -. -Dann gibt es n0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n ≥ n0 : |x − xn| < ε/2, also -In ⊆ (x − ε, x + ε) ⊆ Ui für mindestens ein n ∈ N. -4 -⇒ Widerspruch -Dann überdecke [0, 1] mit endlich vielen Intervallen I1, . . . , Id der Länge δ. Jedes Ij ist in -Uij enthalten. -⇒ Uj1 -, . . . , Ujd -ist endliche Teilüberdeckung von U.  -Beispiel 16 (Kompakte Räume) -1) R ist nicht kompakt. -2) (0, 1) ist nicht kompakt. -Un = (1/n, 1 − 1/n) ⇒ -S -n∈N Un = (0, 1) -3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch. -Bemerkung 19 -Sei X kompakter Raum, A ⊆ X abgeschlossen. Dann ist A kompakt. -Beweis: Sei (Vi)i∈I offene Überdeckung von A. -Dann gibt es für jedes i ∈ I eine offene Teilmenge Ui ⊆ X mit Vi = Ui ∩ A. -⇒ A ⊆ -[ -i∈I -Ui -⇒ U = { Ui -| i ∈ I } ∪ { X \ A } ist offene Überdeckung von X -X kompakt -=======⇒ es gibt i1, . . . , in ∈ I, sodass [n -j=1 -Uij ∪ (X \ A) = X -⇒ - - -[n -j=1 -Uij ∪ (X \ A) - - ∩ A = A -⇒ -[n -j=1 -(Uij ∩ A) -| {z } -=Vij -∪ ((X \ A) ∩ A) -| {z } -=∅ -= A -⇒ Vi1 -, . . . , Vin überdecken A. - -Bemerkung 20 -Seien X, Y kompakte topologische Räume. Dann ist X × Y mit der Produkttopologie -kompakt. -Beweis: Sei (Wi)i∈I eine offene Überdeckung von X × Y . Für jedes (x, y) ∈ X × Y gibt es -offene Teilmengen Ux,y von X und Vx,y von Y sowie ein i ∈ I, sodass Ux,y × Vx,y ⊆ Wi -. -3Dies gilt nicht für alle n ≥ n0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. + 1.5. KOMPAKTHEIT +der Länge δ unterteilen und alle Uiin die endliche Überdeckung aufnehmen, die Teilintervalle +enthalten. +Angenommen, es gibt kein solches δ. Dann gibt es für jedes n ∈ N ein Intervall In ⊆ [0, 1] +der Länge 1/n sodass In ( Ui für alle i ∈ J. +Sei xn der Mittelpunkt von In. Die Folge (xn) hat einen Häufungspunkt x ∈ [0, 1]. Dann +gibt es i ∈ J mit x ∈ Ui. Da Ui offen ist, gibt es ein ε > 0, sodass (x − ε, x + ε) ⊆ Ui. +Dann gibt es n0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n ≥ n0 : |x − xn| < ε/2, also +In ⊆ (x − ε, x + ε) ⊆ Ui für mindestens ein n ∈ N. +4 +⇒ Widerspruch +Dann überdecke [0, 1] mit endlich vielen Intervallen I1, . . . , Id der Länge δ. Jedes Ij ist in +Uij enthalten. +⇒ Uj1, . . . , Ujdist endliche Teilüberdeckung von U.  +Beispiel 16 (Kompakte Räume) +1) R ist nicht kompakt. +2) (0, 1) ist nicht kompakt. +Un = (1/n, 1 − 1/n) ⇒ +S +n∈N Un = (0, 1) +3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch. +Bemerkung 19 +Sei X kompakter Raum, A ⊆ X abgeschlossen. Dann ist A kompakt. +Beweis: Sei (Vi)i∈I offene Überdeckung von A. +Dann gibt es für jedes i ∈ I eine offene Teilmenge Ui ⊆ X mit Vi = Ui ∩ A. +⇒ A ⊆ +[ +i∈I +Ui +⇒ U = { Ui| i ∈ I } ∪ { X \ A } ist offene Überdeckung von X +X kompakt +=======⇒ es gibt i1, . . . , in ∈ I, sodass [n +j=1 +Uij ∪ (X \ A) = X +⇒ + + +[n +j=1 +Uij ∪ (X \ A) + + ∩ A = A +⇒ +[n +j=1 +(Uij ∩ A) +| {z } +=Vij +∪ ((X \ A) ∩ A) +| {z } +=∅ += A +⇒ Vi1, . . . , Vin überdecken A. + +Bemerkung 20 +Seien X, Y kompakte topologische Räume. Dann ist X × Y mit der Produkttopologie +kompakt. +Beweis: Sei (Wi)i∈I eine offene Überdeckung von X × Y . Für jedes (x, y) ∈ X × Y gibt es +offene Teilmengen Ux,y von X und Vx,y von Y sowie ein i ∈ I, sodass Ux,y × Vx,y ⊆ Wi. +3Dies gilt nicht für alle n ≥ n0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. 4Sogar für unendlich viele. -17 1.5. KOMPAKTHEIT -Wi -x -y -x -Vx,y -Ux,y -Y -X -Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen -Die offenen Mengen Ux0,y × Vx0,y für festes x0 und alle y ∈ Y überdecken { x0 } × y. Da Y -kompakt ist, ist auch { x0 } × Y kompakt. Also gibt es y1, . . . , ym(x0) mit Sm(x0) -i=1 Ux0,yi × -Vx0,yi ⊇ { x0 } × Y . -Sei Ux0 -:= Tm(x) -i=1 Ux0,yi -. Da X kompakt ist, gibt es x1, . . . , xn ∈ X mit Sn -j=1 Uxj = X -⇒ -Sk -j=1 -Sm(xj ) -i=1 ￾ -Uxj ,yi × Vxj ,yi - -| {z } -Ein grün-oranges Kästchen -⊇ X × Y -⇒ -S -j -S -i Wi(xj , yi) = X × Y  -Bemerkung 21 -Sei X ein Hausdorffraum und K ⊆ X kompakt. Dann ist K abgeschlossen. -Beweis: z. Z.: Komplement ist offen -Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y ∈ X \ K. Für jedes x ∈ K seien -Ux bzw. Vy Umgebungen von x bzw. von y, sodass Ux ∩ Vy = ∅. -Xi -K -x -y -Da K kompakt ist, gibt es endlich viele x1, . . . , xn ∈ K, sodass Sm -i=1 Uxi ⊇ K. -Sei V := \n -i=1 -Vx -18 1.6. WEGE UND KNOTEN -⇒ V ∩ - [n -i=1 -Uxi -! -= ∅ -⇒ V ∩ K = ∅ -⇒ V ist Überdeckung von y, die ganz in X \ K enthalten ist. -⇒ X \ K ist offen -Damit ist K abgeschlossen.  -Bemerkung 22 -Seien X, Y topologische Räume, f : X → Y stetig. -Ist K ⊆ X kompakt, so ist f(K) ⊆ Y kompakt. -Beweis: Sei (Vi)i∈I offene Überdeckung von f(K) -f stetig -====⇒ (f -−1 -(Vi))i∈I ist offene Überdeckung von K -Kompakt -=====⇒ es gibt i1, . . . , in, sodass f -−1 -(Vi1 -), . . . , f −1 -(Vin -) Überdeckung von K ist. -⇒ f(f -−1 -(Vi1 -)), . . . , f(f -−1 -(Vin -)) überdecken f(K). -Es gilt: f(f -−1 -(V )) = V ∩ f(X)  -Satz 1.1 (Heine-Borel) -Eine Teilmenge von R -n oder C -n -ist genau dann kompakt, wenn sie beschränkt und -abgeschlossen ist. -Beweis: „⇒“: Sei K ⊆ R -n -(oder C -n -) kompakt. -Da R -n und C -n hausdorffsch sind, ist K nach Bemerkung 21 abgeschlossen. Nach Vorausset￾zung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ K ist -beschränkt. -„⇐“ Sei A ⊆ R -n -(oder C -n -) beschränkt und abgeschlossen. -Dann gibt es einen Würfel W = [−N, N] × · · · × [−N, N] -| {z } -n mal -mit A ⊆ W bzw. „Polyzylinder“ -Z = { (z1, . . . , zn) ∈ C -n -| zi ≤ N für i = 1, . . . , n } -Nach Bemerkung 20 und Bemerkung 18 ist W kompakt, also ist A nach Bemerkung 19 auch -kompakt. Genauso ist Z kompakt, weil -{ z ∈ C k z| ≤ 1 } -homöomorph zu - -(x, y) ∈ R -2 - - k(x, y)k ≤ 1 - -ist.  -1.6 Wege und Knoten -Definition 17 + 1.5. KOMPAKTHEIT +Wi +x +y +x +Vx,y +Ux,y +Y +X +Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen +Die offenen Mengen Ux0,y × Vx0,y für festes x0 und alle y ∈ Y überdecken { x0 } × y. Da Y +kompakt ist, ist auch { x0 } × Y kompakt. Also gibt es y1, . . . , ym(x0) mit Sm(x0) +i=1 Ux0,yi × +Vx0,yi ⊇ { x0 } × Y . +Sei Ux0:= Tm(x) +i=1 Ux0,yi +. Da X kompakt ist, gibt es x1, . . . , xn ∈ X mit Sn +j=1 Uxj = X +⇒ +Sk +j=1 +Sm(xj ) +i=1 +Uxj ,yi × Vxj ,yi + +| {z } +Ein grün-oranges Kästchen +⊇ X × Y +⇒ +S +j +S +i Wi(xj , yi) = X × Y  +Bemerkung 21 +Sei X ein Hausdorffraum und K ⊆ X kompakt. Dann ist K abgeschlossen. +Beweis: z. Z.: Komplement ist offen +Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y ∈ X \ K. Für jedes x ∈ K seien +Ux bzw. Vy Umgebungen von x bzw. von y, sodass Ux ∩ Vy = ∅. +Xi +K +x +y +Da K kompakt ist, gibt es endlich viele x1, . . . , xn ∈ K, sodass Sm +i=1 Uxi ⊇ K. +Sei V := \n +i=1 +Vxi + 1.6. WEGE UND KNOTEN +⇒ V ∩ + [n +i=1 +Uxi +! += ∅ +⇒ V ∩ K = ∅ +⇒ V ist Überdeckung von y, die ganz in X \ K enthalten ist. +⇒ X \ K ist offen +Damit ist K abgeschlossen.  +Bemerkung 22 +Seien X, Y topologische Räume, f : X → Y stetig. +Ist K ⊆ X kompakt, so ist f(K) ⊆ Y kompakt. +Beweis: Sei (Vi)i∈I offene Überdeckung von f(K) +f stetig +====⇒ (f +−1 +(Vi))i∈I ist offene Überdeckung von K +Kompakt +=====⇒ es gibt i1, . . . , in, sodass f +−1 +(Vi1), . . . , f −1(Vin +) Überdeckung von K ist. +⇒ f(f +−1 +(Vi1)), . . . , f(f +−1 +(Vin)) überdecken f(K). +Es gilt: f(f +−1 +(V )) = V ∩ f(X)  +Satz 1.1 (Heine-Borel) +Eine Teilmenge von R +n oder Cn +ist genau dann kompakt, wenn sie beschränkt und +abgeschlossen ist. +Beweis: „⇒“: Sei K ⊆ R +n +(oder C +n +) kompakt. +Da R +n und Cn hausdorffsch sind, ist K nach Bemerkung 21 abgeschlossen. Nach Voraussetzung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ K ist +beschränkt. +„⇐“ Sei A ⊆ R +n +(oder C +n +) beschränkt und abgeschlossen. +Dann gibt es einen Würfel W = [−N, N] × · · · × [−N, N] +| {z } +n mal +mit A ⊆ W bzw. „Polyzylinder“ +Z = { (z1, . . . , zn) ∈ C +n +| zi ≤ N für i = 1, . . . , n } +Nach Bemerkung 20 und Bemerkung 18 ist W kompakt, also ist A nach Bemerkung 19 auch +kompakt. Genauso ist Z kompakt, weil +{ z ∈ C k z| ≤ 1 } +homöomorph zu + +(x, y) ∈ R +2 + + k(x, y)k ≤ 1 + +ist.  +1.6 Wege und Knoten +Definition 17 +Sei X ein topologischer Raum. + 1.6. WEGE UND KNOTEN +a) Ein Weg in X ist eine stetige Abbildung γ : [0, 1] → X. +b) γ heißt geschlossen, wenn γ(1) = γ(0) gilt. +c) γ heißt einfach, wenn γ|[0,1) injektiv ist. +Beispiel 17 +Ist X diskret, so ist jeder Weg konstant, d. h. von der Form +∀x ∈ [0, 1] : γ(x) = c, c ∈ X +Denn γ([0, 1]) ist zusammenhängend für jeden Weg γ. +Definition 18 +Ein topologischer Raum X heißt wegzusammenhängend, wenn es zu je zwei Punkten +x, y ∈ X einen Weg γ : [0, 1] → X gibt mit γ(0) = x und γ(1) = y. +Bemerkung 23 Sei X ein topologischer Raum. -19 1.6. WEGE UND KNOTEN -a) Ein Weg in X ist eine stetige Abbildung γ : [0, 1] → X. -b) γ heißt geschlossen, wenn γ(1) = γ(0) gilt. -c) γ heißt einfach, wenn γ|[0,1) injektiv ist. -Beispiel 17 -Ist X diskret, so ist jeder Weg konstant, d. h. von der Form -∀x ∈ [0, 1] : γ(x) = c, c ∈ X -Denn γ([0, 1]) ist zusammenhängend für jeden Weg γ. -Definition 18 -Ein topologischer Raum X heißt wegzusammenhängend, wenn es zu je zwei Punkten -x, y ∈ X einen Weg γ : [0, 1] → X gibt mit γ(0) = x und γ(1) = y. -Bemerkung 23 -Sei X ein topologischer Raum. -a) X ist wegzusammenhängend ⇒ X ist zusammenhängend -b) X ist wegzusammenhängend 6⇐ X ist zusammenhängend -Beweis: -a) Sei X ein wegzusammenhängender topologischer Raum, A1, A2 nichtleere, disjunkte, -abgeschlossene Teilmengen von X mit A1 ∪ A2 = X. Sei x ∈ A1, y ∈ A2, γ : [0, 1] → X -ein Weg von x nach y. -Dann ist C := γ([0, 1]) ⊆ X zusammenhängend, weil γ stetig ist. -C = (C ∩ A1) -| {z } -3x -∪ (C ∩ A2) -| {z } -3y -ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒ Widerspruch -b) Sei X = -n -(x, y) ∈ R -2 - - - -x -2 + y -2 = 1 ∨ y = 1 + 2 · e -− 1 -10 x -o -. -Abbildung 1.8a veranschaulicht diesen Raum. -Sei U1 ∪ U2 = X, U1 6= U2 = ∅, Ui offen. X = C ∪ S. Dann ist C ⊆ U1 oder C ⊆ U2, -weil C und S zusammenhängend sind. -Also ist C = U1 und S = U2 (oder umgekehrt). -Sei y ∈ C = U1, ε > 0 und Bε(y) ⊆ U1 eine Umgebung von y, die in U1 enthalten ist. -Aber: Bε(y) ∩ S 6= ∅ ⇒ Widerspruch ⇒ X ∪ S ist zusammenhängend, aber nicht -wegzusammenhängend.  -Beispiel 18 (Hilbert-Kurve) -Es gibt stetige, surjektive Abbildungen [0, 1] → [0, 1] × [0, 1]. Ein Beispiel ist die in Abbil￾dung 1.9 dargestellte Hilbert-Kurve. -Definition 19 -Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ : -[0, 1] → C ⊆ X bzw. γ : S +a) X ist wegzusammenhängend ⇒ X ist zusammenhängend +b) X ist wegzusammenhängend 6⇐ X ist zusammenhängend +Beweis: +a) Sei X ein wegzusammenhängender topologischer Raum, A1, A2 nichtleere, disjunkte, +abgeschlossene Teilmengen von X mit A1 ∪ A2 = X. Sei x ∈ A1, y ∈ A2, γ : [0, 1] → X +ein Weg von x nach y. +Dann ist C := γ([0, 1]) ⊆ X zusammenhängend, weil γ stetig ist. +C = (C ∩ A1) +| {z } +3x +∪ (C ∩ A2) +| {z } +3y +ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒ Widerspruch +b) Sei X = +n +(x, y) ∈ R +2 + + + +x +2 + y2 = 1 ∨ y = 1 + 2 · e +− 1 +10 x +o +. +Abbildung 1.8a veranschaulicht diesen Raum. +Sei U1 ∪ U2 = X, U1 6= U2 = ∅, Ui offen. X = C ∪ S. Dann ist C ⊆ U1 oder C ⊆ U2, +weil C und S zusammenhängend sind. +Also ist C = U1 und S = U2 (oder umgekehrt). +Sei y ∈ C = U1, ε > 0 und Bε(y) ⊆ U1 eine Umgebung von y, die in U1 enthalten ist. +Aber: Bε(y) ∩ S 6= ∅ ⇒ Widerspruch ⇒ X ∪ S ist zusammenhängend, aber nicht +wegzusammenhängend.  +Beispiel 18 (Hilbert-Kurve) +Es gibt stetige, surjektive Abbildungen [0, 1] → [0, 1] × [0, 1]. Ein Beispiel ist die in Abbildung 1.9 dargestellte Hilbert-Kurve. +Definition 19 +Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ : +[0, 1] → C ⊆ X bzw. γ : S 1 → C ⊆ X, wobei C := Bild γ. -20 1.6. WEGE UND KNOTEN -(a) Spirale S mit Kreis C -0.1 1 -−1 -0 -1 -X -Y -{(x,sin( 1 -x -)) ∈ X × Y } -(−1, 1) ⊆ Y -(b) Sinus -Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend -sind. -(a) n = 1 (b) n = 2 (c) n = 3 (d) n = 4 (e) n = 5 -Abbildung 1.9: Hilbert-Kurve -Jede Jordankurve ist also ein einfacher Weg. -Satz 1.2 (Jordanscher Kurvensatz) -Ist C = γ([0, 1]) eine geschlossene Jordankurve in R -2 -, so hat R -2 \ C genau zwei -Zusammenhangskomponenten, von denen eine beschränkt ist und eine unbeschränkt. -außen -innen -Jordankurve -Abbildung 1.10: Die unbeschränkte Zusammenhangskomponente wird häufig inneres, die be￾schränkte äußeres genannt. -Beweis: ist technisch mühsam und wird hier nicht geführt. Er kann in „Algebraische Topologie: -Eine Einführung“ von R. Stöcker und H. Zieschang auf S. 301f (ISBN 978-3519122265) -nachgelesen werden. + 1.6. WEGE UND KNOTEN +(a) Spirale S mit Kreis C +0.1 1 +−1 +0 +1 +X +Y +{(x,sin( 1 +x +)) ∈ X × Y } +(−1, 1) ⊆ Y +(b) Sinus +Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend +sind. +(a) n = 1 (b) n = 2 (c) n = 3 (d) n = 4 (e) n = 5 +Abbildung 1.9: Hilbert-Kurve +Jede Jordankurve ist also ein einfacher Weg. +Satz 1.2 (Jordanscher Kurvensatz) +Ist C = γ([0, 1]) eine geschlossene Jordankurve in R +2 +, so hat R +2 \ C genau zwei +Zusammenhangskomponenten, von denen eine beschränkt ist und eine unbeschränkt. +außen +innen +Jordankurve +Abbildung 1.10: Die unbeschränkte Zusammenhangskomponente wird häufig inneres, die beschränkte äußeres genannt. +Beweis: ist technisch mühsam und wird hier nicht geführt. Er kann in „Algebraische Topologie: +Eine Einführung“ von R. Stöcker und H. Zieschang auf S. 301f (ISBN 978-3519122265) +nachgelesen werden. Idee: Ersetze Weg C durch Polygonzug. -21 1.6. WEGE UND KNOTEN -Definition 20 -Eine geschlossene Jordankurve in R -3 heißt Knoten. -Beispiel 19 (Knoten) -(a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten (d) 62-Knoten -Abbildung 1.11: Beispiele für verschiedene Knoten -Definition 21 -Zwei Knoten γ1, γ2 : S -1 → R -3 heißen äquivalent, wenn es eine stetige Abbildung -H : S -1 × [0, 1] → R -3 -gibt mit -H(z, 0) = γ1(z) ∀z ∈ S -1 -H(z, 1) = γ2(z) ∀z ∈ S -1 -und für jedes feste t ∈ [0, 1] ist -Hz : S -1 → R -3 -, z 7→ H(z, t) -ein Knoten. Die Abbildung H heißt Isotopie zwischen γ1 und γ2. -Definition 22 -Sei γ : [0, 1] → R -3 -ein Knoten, E eine Ebene und π : R -3 → E eine Projektion auf E. -π heißt Knotendiagramm von γ, wenn gilt: - - π -−1 -(x) - - ≤ 2 ∀x ∈ π(γ) -Ist (π|γ([0,1])) -−1 -(x) = { y1, y2 }, so liegt y1 über y2, wenn gilt: -∃λ > 1 : (y1 − x) = λ(y2 − x) -Satz 1.3 (Satz von Reidemeister) -Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie + 1.6. WEGE UND KNOTEN +Definition 20 +Eine geschlossene Jordankurve in R +3 heißt Knoten. +Beispiel 19 (Knoten) +(a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten (d) 62-Knoten +Abbildung 1.11: Beispiele für verschiedene Knoten +Definition 21 +Zwei Knoten γ1, γ2 : S +1 → R3 heißen äquivalent, wenn es eine stetige Abbildung +H : S +1 × [0, 1] → R3 +gibt mit +H(z, 0) = γ1(z) ∀z ∈ S +1 +H(z, 1) = γ2(z) ∀z ∈ S +1 +und für jedes feste t ∈ [0, 1] ist +Hz : S +1 → R3 +, z 7→ H(z, t) +ein Knoten. Die Abbildung H heißt Isotopie zwischen γ1 und γ2. +Definition 22 +Sei γ : [0, 1] → R +3 +ein Knoten, E eine Ebene und π : R +3 → E eine Projektion auf E. +π heißt Knotendiagramm von γ, wenn gilt: + + π +−1 +(x) + + ≤ 2 ∀x ∈ π(γ) +Ist (π|γ([0,1])) +−1 +(x) = { y1, y2 }, so liegt y1 über y2, wenn gilt: +∃λ > 1 : (y1 − x) = λ(y2 − x) +Satz 1.3 (Satz von Reidemeister) +Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können. -22 1.6. WEGE UND KNOTEN -(a) Ω1 (b) Ω2 -(c) Ω3 -Abbildung 1.12: Reidemeister-Züge -Beweis: Durch sorgfältige Fallunterscheidung.5 -Definition 23 -Ein Knotendiagramm heißt 3-färbbar, wenn jeder Bogen von D so mit einer Farbe gefärbt -werden kann, dass an jeder Kreuzung eine oder 3 Farben auftreten und alle 3 Farben -auftreten. -Abbildung 1.13: Ein 3-gefärber Kleeblattknoten + 1.6. WEGE UND KNOTEN +(a) Ω1 (b) Ω2 +(c) Ω3 +Abbildung 1.12: Reidemeister-Züge +Beweis: Durch sorgfältige Fallunterscheidung.5 +Definition 23 +Ein Knotendiagramm heißt 3-färbbar, wenn jeder Bogen von D so mit einer Farbe gefärbt +werden kann, dass an jeder Kreuzung eine oder 3 Farben auftreten und alle 3 Farben +auftreten. +Abbildung 1.13: Ein 3-gefärber Kleeblattknoten 5Siehe „Knot Theory and Its Applications“ von Kunio Murasugi. ISBN 978-0817638177. -23 1.6. WEGE UND KNOTEN -Übungsaufgaben -Aufgabe 1 (Sierpińskiraum) -Es sei X := { 0, 1 } und TX := { ∅, { 0 } , X }. Dies ist der sogenannte Sierpińskiraum. -(a) Beweisen Sie, dass (X, TX) ein topologischer Raum ist. -(b) Ist (X, TX) hausdorffsch? -(c) Ist TX von einer Metrik erzeugt? -Aufgabe 2 -Es sei Z mit der von den Mengen Ua,b := a + bZ(a ∈ Z, b ∈ Z \ { 0 }) erzeugten Topologie -versehen. -Zeigen Sie: -(a) Jedes Ua,b und jede einelementige Teilmenge von Z ist abgeschlossen. -(b) { −1, 1 } ist nicht offen. -(c) Es gibt unendlich viele Primzahlen. -Aufgabe 3 (Cantorsches Diskontinuum) -Für jedes i ∈ N sei Pi -:= { 0, 1 } mit der diskreten Topologie. Weiter Sei P := Q -i∈N Pi -. -(a) Wie sehen die offenen Mengen von P aus? -(b) Was können Sie über den Zusammenhang von P sagen? -Aufgabe 4 (Kompaktheit) -(a) Ist GLn(R) = { A ∈ R -n×n -| det(A) 6= 0 } kompakt? -(b) Ist SLn(R) = { A ∈ R -n×n -| det(A) = 1 } kompakt? -(c) Ist P(R) kompakt? -Aufgabe 5 (Begriffe) -Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“. -Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist, -begründen Sie warum. -1) Ein Homomorphismus, der zugleich ein Homöomorphismus ist, + 1.6. WEGE UND KNOTEN +Übungsaufgaben +Aufgabe 1 (Sierpińskiraum) +Es sei X := { 0, 1 } und TX := { ∅, { 0 } , X }. Dies ist der sogenannte Sierpińskiraum. +(a) Beweisen Sie, dass (X, TX) ein topologischer Raum ist. +(b) Ist (X, TX) hausdorffsch? +(c) Ist TX von einer Metrik erzeugt? +Aufgabe 2 +Es sei Z mit der von den Mengen Ua,b := a + bZ(a ∈ Z, b ∈ Z \ { 0 }) erzeugten Topologie +versehen. +Zeigen Sie: +(a) Jedes Ua,b und jede einelementige Teilmenge von Z ist abgeschlossen. +(b) { −1, 1 } ist nicht offen. +(c) Es gibt unendlich viele Primzahlen. +Aufgabe 3 (Cantorsches Diskontinuum) +Für jedes i ∈ N sei Pi:= { 0, 1 } mit der diskreten Topologie. Weiter Sei P := Q +i∈N Pi +. +(a) Wie sehen die offenen Mengen von P aus? +(b) Was können Sie über den Zusammenhang von P sagen? +Aufgabe 4 (Kompaktheit) +(a) Ist GLn(R) = { A ∈ R +n×n +| det(A) 6= 0 } kompakt? +(b) Ist SLn(R) = { A ∈ R +n×n +| det(A) = 1 } kompakt? +(c) Ist P(R) kompakt? +Aufgabe 5 (Begriffe) +Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“. +Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist, +begründen Sie warum. +1) Ein Homomorphismus, der zugleich ein Homöomorphismus ist, 2) ein Homomorphismus, der kein Homöomorphismus ist, -24 1.6. WEGE UND KNOTEN -3) ein Homöomorphismus, der kein Homomorphismus ist -Aufgabe 6 (Begriffe) + 1.6. WEGE UND KNOTEN +3) ein Homöomorphismus, der kein Homomorphismus ist +Aufgabe 6 (Begriffe) Definieren Sie die Begriffe „Isomorphismus“, „Isotopie“ und „Isometrie“. -2 Mannigfaltigkeiten und -Simplizialkomplexe -2.1 Topologische Mannigfaltigkeiten -Definition 24 -Sei (X, T) ein topologischer Raum und n ∈ N. -a) Eine n-dimensionale Karte auf X ist ein Paar (U, ϕ), wobei U ∈ T und ϕ : U → V -Homöomorphismus von U auf eine offene Teilmenge V ⊆ R -n -. -b) Ein n-dimensionaler Atlas A auf X ist eine Familie (Ui -, ϕi)i∈I von Karten auf X, -sodass S -i∈I Ui = X. -c) X heißt (topologische) n-dimensionale Mannigfaltigkeit, wenn X hausdorffsch ist, -eine abzählbare Basis der Topologie hat und einen n-dimensionalen Atlas besitzt. -Anschaulich ist also ein n-dimensionale Mannigfaltigkeit lokal dem R -n ähnlich. -Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten) -Jede n-dimensionale Mannigfaltigkeit mit n ≥ 1 ist mindestens so mächtig wie R. -Beweis: Sei (X, T) ein topologischer Raum und (U, ϕ) mit U ∈ T und ϕ : U → V ⊆ R -n -, wobei -V offen und ϕ ein Homöomorphismus ist, eine Karte auf X. -Da jede offene Teilmenge des R -n genauso mächtig ist wie der R -n -, ϕ als Homöomorphismus -insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig -sind, ist U genauso mächtig wie der R -n -. Da jede Mannigfaltigkeit mindestens eine Karte -hat, muss jede Mannigfaltigkeit X mindestens so mächtig sein wie der R -n -.  -Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können -beliebig viele Elemente haben. -Bemerkung 25 -a) Es gibt surjektive, stetige Abbildungen [0, 1] → [0, 1] × [0, 1] -b) Für n =6 m sind R -n und R -m nicht homöomorph. Zum Beweis benutzt man den „Satz -von der Gebietstreue“ (Brouwer): -Ist U ⊆ R -n offen und f : U → R -n -stetig und injektiv, so ist f(U) offen. -Ist n < m und R -m homöomorph zu R -n -, so wäre -f : R -n → R -m → R -n -, (x1, . . . , xn) 7→ (x1, x2, . . . , xn, 0, . . . , 0) -eine stetige injektive Abbildung. Also müsste f(R -n +2 Mannigfaltigkeiten und +Simplizialkomplexe +2.1 Topologische Mannigfaltigkeiten +Definition 24 +Sei (X, T) ein topologischer Raum und n ∈ N. +a) Eine n-dimensionale Karte auf X ist ein Paar (U, ϕ), wobei U ∈ T und ϕ : U → V +Homöomorphismus von U auf eine offene Teilmenge V ⊆ R +n +. +b) Ein n-dimensionaler Atlas A auf X ist eine Familie (Ui, ϕi)i∈I von Karten auf X, +sodass S +i∈I Ui = X. +c) X heißt (topologische) n-dimensionale Mannigfaltigkeit, wenn X hausdorffsch ist, +eine abzählbare Basis der Topologie hat und einen n-dimensionalen Atlas besitzt. +Anschaulich ist also ein n-dimensionale Mannigfaltigkeit lokal dem R +n ähnlich. +Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten) +Jede n-dimensionale Mannigfaltigkeit mit n ≥ 1 ist mindestens so mächtig wie R. +Beweis: Sei (X, T) ein topologischer Raum und (U, ϕ) mit U ∈ T und ϕ : U → V ⊆ R +n +, wobei +V offen und ϕ ein Homöomorphismus ist, eine Karte auf X. +Da jede offene Teilmenge des R +n genauso mächtig ist wie der Rn +, ϕ als Homöomorphismus +insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig +sind, ist U genauso mächtig wie der R +n +. Da jede Mannigfaltigkeit mindestens eine Karte +hat, muss jede Mannigfaltigkeit X mindestens so mächtig sein wie der R +n +.  +Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können +beliebig viele Elemente haben. +Bemerkung 25 +a) Es gibt surjektive, stetige Abbildungen [0, 1] → [0, 1] × [0, 1] +b) Für n 6= m sind R +n und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz +von der Gebietstreue“ (Brouwer): +Ist U ⊆ R +n offen und f : U → Rn +stetig und injektiv, so ist f(U) offen. +Ist n < m und R +m homöomorph zu Rn +, so wäre +f : R +n → Rm → Rn +, (x1, . . . , xn) 7→ (x1, x2, . . . , xn, 0, . . . , 0) +eine stetige injektive Abbildung. Also müsste f(R +n ) offen sein ⇒ Widerspruch -26 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN -Beispiel 20 (Mannigfaltigkeiten) -1) Jede offene Teilmenge U ⊆ R -n -ist eine n-dimensionale Mannigfaltigkeit mit einem -Atlas aus einer Karte. -2) C -n -ist eine 2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte: -(z1, . . . , zn) 7→ (<(z1), =(z1), . . . , <(zn), =(zn)) -3) P -n -(R) = (R -n+1 \ { 0 })/∼ = S -n/∼ und P -n -(C) sind Mannigfaltigkeiten der Dimension -n bzw. 2n, da gilt: -Sei Ui -:= { (x0 : · · · : xn) ∈ Pn -(R) | xi 6= 0 } ∀i ∈ 0, . . . , n. Dann ist P -n -(R) = Sn -i=0 Ui -und die Abbildung -Ui → R -n -(x0 : · · · : xn) 7→ - -x0 -xi -, . . . , ✁ -✁ -✁ -xi -xi -, . . . , -xn -xi - -(y1 : · · · : yi−1 : 1 : yi -: · · · : yn) 7→(y1, . . . , yn) -ist bijektiv. -Die Ui mit i = 0, . . . , n bilden einen n-dimensionalen Atlas: -x = (1 : 0 : 0) ∈ U0 → R -2 x 7→ (0, 0) -y = (0 : 1 : 1) ∈ U2 → R -2 -y 7→ (0, 1) -Umgebung: B1(0, 1) → { (1 : u : v) | k(u, v)k < 1 } = V1 -Umgebung: B1(0, 1) → - -(w : z : 1) - - w -2 + z -2 < 1 - -= V2 -V1 ∩ V2 = ∅? -(a : b : c) ∈ V1 ∩ V2 -⇒ a 6= 0 und ( -b -a -) -2 + ( c -a -) -2 < 1 ⇒ c -a < 1 -⇒ c 6= 0 und ( -a -c -) -2 + ( b -c -) -2 < 1 ⇒ a -c < 1 -⇒ Widerspruch -4) S -n = - -x ∈ R -n+1 - kxk = 1 -ist n-dimensionale Mannigfaltigkeit. -Karten: -Di -:= {(x1, . . . , xn+1) ∈ S -n -|xi > 0} → B1(0, . . . , 0 -| {z } -∈Rn -) -Ci -:= {(x1, . . . , xn+1) ∈ S -n -|xi < 0} → B1(0, . . . , 0) -(x1, . . . , xn+1) 7→ (x1, . . . ,✚xi -, . . . , xn+1) -1 -(x1, . . . , xn) 7→ (x1, . . . , xi−1, -q -1 − -Pn -k=1 x -2 -k -, xi -, . . . , xn), oder − -q -1 − -Pn -k=1 x -2 -k -für Ci -S -n = -Sn+1 -i=1 (Ci ∪ Di) -Als kompakte Mannigfaltigkeit wird S -n auch „geschlossene Mannigfaltigkeit“ genannt. -5) [0, 1] ist keine Mannigfaltigkeit, denn: -Es gibt keine Umgebung von 0 in [0, 1], die homöomorph zu einem offenem Intervall -ist. + 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN +Beispiel 20 (Mannigfaltigkeiten) +1) Jede offene Teilmenge U ⊆ R +n +ist eine n-dimensionale Mannigfaltigkeit mit einem +Atlas aus einer Karte. +2) C +n +ist eine 2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte: +(z1, . . . , zn) 7→ (<(z1), =(z1), . . . , <(zn), =(zn)) +3) P +n +(R) = (R +n+1 \ { 0 })/∼ = Sn/∼ und Pn +(C) sind Mannigfaltigkeiten der Dimension +n bzw. 2n, da gilt: +Sei Ui:= { (x0 : · · · : xn) ∈ Pn(R) | xi 6= 0 } ∀i ∈ 0, . . . , n. Dann ist P +n +(R) = Sn +i=0 Ui +und die Abbildung +Ui → R +n +(x0 : · · · : xn) 7→ + +x0 +xi +, . . . , ✁ +✁ +✁ +xi +xi +, . . . , +xn +xi + +(y1 : · · · : yi−1 : 1 : yi: · · · : yn) 7→(y1, . . . , yn) +ist bijektiv. +Die Ui mit i = 0, . . . , n bilden einen n-dimensionalen Atlas: +x = (1 : 0 : 0) ∈ U0 → R +2 x 7→ (0, 0) +y = (0 : 1 : 1) ∈ U2 → R +2 +y 7→ (0, 1) +Umgebung: B1(0, 1) → { (1 : u : v) | k(u, v)k < 1 } = V1 +Umgebung: B1(0, 1) → + +(w : z : 1) + + w +2 + z2 < 1 + += V2 +V1 ∩ V2 = ∅? +(a : b : c) ∈ V1 ∩ V2 +⇒ a 6= 0 und ( +b +a +) +2 + ( c +a +) +2 < 1 ⇒ c +a < 1 +⇒ c 6= 0 und ( +a +c +) +2 + ( b +c +) +2 < 1 ⇒ a +c < 1 +⇒ Widerspruch +4) S +n = + +x ∈ R +n+1 + kxk = 1 +ist n-dimensionale Mannigfaltigkeit. +Karten: +Di:= {(x1, . . . , xn+1) ∈ S +n +|xi > 0} → B1(0, . . . , 0 +| {z } +∈Rn +) +Ci:= {(x1, . . . , xn+1) ∈ S +n +|xi < 0} → B1(0, . . . , 0) +(x1, . . . , xn+1) 7→ (x1, . . . ,✚xi, . . . , xn+1) +1 +(x1, . . . , xn) 7→ (x1, . . . , xi−1, +q +1 − +Pn +k=1 x +2 +k +, xi, . . . , xn), oder − +q +1 − +Pn +k=1 x +2 +k +für Ci +S +n = +Sn+1 +i=1 (Ci ∪ Di) +Als kompakte Mannigfaltigkeit wird S +n auch „geschlossene Mannigfaltigkeit“ genannt. +5) [0, 1] ist keine Mannigfaltigkeit, denn: +Es gibt keine Umgebung von 0 in [0, 1], die homöomorph zu einem offenem Intervall +ist. 1xi wird rausgenommen -27 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN -6) V1 = - -(x, y) ∈ R -2 - - x · y = 0 -ist keine Mannigfaltigkeit. -Das Problem ist (0, 0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4 -Zusammenhangskomponenten. Jeder R -n -zerfällt jedoch in höchstens zwei Zusammen￾hangskomponenten, wenn man einen Punkt entfernt. -7) V2 = - -(x, y) ∈ R -2 - - x -3 = y -2 - -ist eine Mannigfaltigkeit. -8) X = (R \ { 0 }) ∪ (01, 02) -U ⊆ X offen ⇔ -( -U offen in R \ { 0 } , falls 01 ∈/ U, 02 ∈ U -∃ε > 0 : (−ε, ε) ⊆ U falls 01 ∈ U, 02 ∈ U -Insbesondere sind (R \ { 0 }) ∪ { 01 } und (R \ { 0 }) ∪ { 02 } offen und homöomorph -zu R. -Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 01 -und 02. -9) GLn(R) ist eine Mannigfaltigkeit der Dimension n -2 -, weil offene Teilmengen von R -n -2 -eine Mannigfaltigkeit bilden. -Definition 25 -Seien X, Y n-dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y offen, Φ : U → V ein Ho￾möomorphismus Z = (X ∪˙ Y )/∼ mit der von u ∼ Φ(u) ∀u ∈ U erzeugten Äquivalenzrelation -und der von ∼ induzierten Quotiententopologie. -Z heißt Verklebung von X und Y längs U und V . Z besitzt einen Atlas aus n-dimensionalen -Karten. Falls Z hausdorffsch ist, ist Z eine n-dimensionale Mannigfaltigkeit. -Bemerkung 26 -Sind X, Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X × Y eine Mannigfaltigkeit -der Dimension n + m. -Beweis: Produkte von Karten sind Karten.  -Beispiel 21 -Mannigfaltigkeiten mit Dimension 1: -1) Offene Intervalle, R, (0, 1) sind alle homöomorph -2) S -1 -Mannigfaltigkeiten mit Dimension 2: -1) R -2 -2) S -2 -(0 Henkel) -3) T -2 -(1 Henkel) -4) oder mehr Henkel, wie z.B. der Zweifachtorus in Abbildung 2.1 -Bemerkung 27 -Sei n ∈ N, F : R -n → R stetig differenzierbar und X = V (F) := { x ∈ R -n -| F(x) = 0 } das -„vanishing set“. + 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN +6) V1 = + +(x, y) ∈ R +2 + + x · y = 0 +ist keine Mannigfaltigkeit. +Das Problem ist (0, 0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4 +Zusammenhangskomponenten. Jeder R +n +zerfällt jedoch in höchstens zwei Zusammenhangskomponenten, wenn man einen Punkt entfernt. +7) V2 = + +(x, y) ∈ R +2 + + x +3 = y2 + +ist eine Mannigfaltigkeit. +8) X = (R \ { 0 }) ∪ (01, 02) +U ⊆ X offen ⇔ +( +U offen in R \ { 0 } , falls 01 ∈/ U, 02 ∈ U +∃ε > 0 : (−ε, ε) ⊆ U falls 01 ∈ U, 02 ∈ U +Insbesondere sind (R \ { 0 }) ∪ { 01 } und (R \ { 0 }) ∪ { 02 } offen und homöomorph +zu R. +Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 01 +und 02. +9) GLn(R) ist eine Mannigfaltigkeit der Dimension n +2 +, weil offene Teilmengen von R +n +2 +eine Mannigfaltigkeit bilden. +Definition 25 +Seien X, Y n-dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y offen, Φ : U → V ein Homöomorphismus Z = (X ∪˙ Y )/∼ mit der von u ∼ Φ(u) ∀u ∈ U erzeugten Äquivalenzrelation +und der von ∼ induzierten Quotiententopologie. +Z heißt Verklebung von X und Y längs U und V . Z besitzt einen Atlas aus n-dimensionalen +Karten. Falls Z hausdorffsch ist, ist Z eine n-dimensionale Mannigfaltigkeit. +Bemerkung 26 +Sind X, Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X × Y eine Mannigfaltigkeit +der Dimension n + m. +Beweis: Produkte von Karten sind Karten.  +Beispiel 21 +Mannigfaltigkeiten mit Dimension 1: +1) Offene Intervalle, R, (0, 1) sind alle homöomorph +2) S +1 +Mannigfaltigkeiten mit Dimension 2: +1) R +2 +2) S +2 +(0 Henkel) +3) T +2 +(1 Henkel) +4) oder mehr Henkel, wie z.B. der Zweifachtorus in Abbildung 2.1 +Bemerkung 27 +Sei n ∈ N, F : R +n → R stetig differenzierbar und X = V (F) := { x ∈ Rn +| F(x) = 0 } das +„vanishing set“. Dann gilt: -28 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN -Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus. -a) X ist abgeschlossen in R -n -b) Ist grad(F)(X) 6= 0 ∀x ∈ X, so ist X eine Mannigfaltigkeit der Dimension n − 1. -Beweis: -a) Sei y ∈ R -n \ V (F). Weil F stetig ist, gibt es δ > 0, sodass F(Bδ(y)) ⊆ Bε(F(y)) mit -ε = -1 -2 -kF(y)k. Folgt Bδ(y) ∩ V (F) = ∅ ⇒ R -n \ V (F) ist offen. -b) Sei x ∈ X mit grad(F)(x) 6= 0, also o. B. d. A. ∂F -∂X1 -(x) 6= 0, x = (x1, . . . , xn), -x -0 -:= (x2, . . . , xn) ∈ R -n−1 -. Der Satz von der impliziten Funktion liefert nun: Es -gibt Umgebungen U von x -0 und differenzierbare Funktionen g : U → R, sodass -G : U → R -n -, u 7→ (g(u), u) eine stetige Abbildung auf eine offene Umgebung V von x -in X ist. - -Beispiel 22 -1) F : R -3 → R, (x, y, z) 7→ x -2+y -2+z -2−1, V (F) = S -2 -, grad(F) = (2x, 2y, 2z) -Bem. 27.b ======⇒ -S -n -ist n-dimensionale Mannigfaltigkeit in R -n+1 -2) F : R -2 → R, (x, y) 7→ y -2 −x -3 Es gilt: grad(F) = (−3x -2 -, 2y). Also: grad(0, 0) = (0, 0). -−5 −4 −3 −2 −1 -0 -1 -2 -3 -4 -5 -−4 -−2 -0 -2 -4 -−100 -0 -100 -x -y -z -−100 -0 -100 -f(x, y) -(a) F(x, y) = y -2 − x -3 -2 4 6 8 10 12 -−10 -−5 -5 -10 -x -y -a = -1 -3 -a = 1 -a = 2 -(b) y -2 − ax3 = 0 -Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a. -Daher ist Bemerkung 27.b nicht anwendbar, aber V (F) ist trotzdem eine 1-dimensionale + 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN +Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus. +a) X ist abgeschlossen in R +n +b) Ist grad(F)(X) 6= 0 ∀x ∈ X, so ist X eine Mannigfaltigkeit der Dimension n − 1. +Beweis: +a) Sei y ∈ R +n \ V (F). Weil F stetig ist, gibt es δ > 0, sodass F(Bδ(y)) ⊆ Bε(F(y)) mit +ε = +1 +2 +kF(y)k. Folgt Bδ(y) ∩ V (F) = ∅ ⇒ R +n \ V (F) ist offen. +b) Sei x ∈ X mit grad(F)(x) 6= 0, also o. B. d. A. ∂F +∂X1 +(x) 6= 0, x = (x1, . . . , xn), +x +0 +:= (x2, . . . , xn) ∈ R +n−1 +. Der Satz von der impliziten Funktion liefert nun: Es +gibt Umgebungen U von x +0 und differenzierbare Funktionen g : U → R, sodass +G : U → R +n +, u 7→ (g(u), u) eine stetige Abbildung auf eine offene Umgebung V von x +in X ist. + +Beispiel 22 +1) F : R +3 → R, (x, y, z) 7→ x2+y2+z2−1, V (F) = S2 +, grad(F) = (2x, 2y, 2z) +Bem. 27.b ======⇒ +S +n +ist n-dimensionale Mannigfaltigkeit in R +n+1 +2) F : R +2 → R, (x, y) 7→ y2 −x3 Es gilt: grad(F) = (−3x2 +, 2y). Also: grad(0, 0) = (0, 0). +−5 −4 −3 −2 −1 +0 +1 +2 +3 +4 +5 +−4 +−2 +0 +2 +4 +−100 +0 +100 +x +y +z +−100 +0 +100 +f(x, y) +(a) F(x, y) = y +2 − x3 +2 4 6 8 10 12 +−10 +−5 +5 +10 +x +y +a = +1 +3 +a = 1 +a = 2 +(b) y +2 − ax3 = 0 +Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a. +Daher ist Bemerkung 27.b nicht anwendbar, aber V (F) ist trotzdem eine 1-dimensionale topologische Mannigfaltigkeit. -29 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN -Definition 26 -Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n-dimensionale -Mannigfaltigkeit mit Rand, wenn es einen Atlas (Ui -, ϕi) gibt, wobei Ui ⊆ Xi offen und -ϕi ein Homöomorphismus auf eine offene Teilmenge von -R -n -+,0 -:= { (x1, . . . , xn) ∈ R -n -| xn ≥ 0 } -ist. -R -n -+,0 -ist ein „Halbraum“. -Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten. -∼= -(a) Halbraum -∼ -= -(b) Pair of pants -∼= -(c) Sphäre mit einem Loch -Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand -Definition 27 -Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt -∂X := [ -(U,ϕ)∈A -{ x ∈ U | ϕ(x) = 0 } -Rand von X. -∂X ist eine Mannigfaltigkeit der Dimension n − 1. -Definition 28 -Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui -, ϕi)i∈I -Für i, j ∈ I mit Ui ∩ Uj 6= ∅ heißt -ϕij := ϕj ◦ ϕ -−1 -i -ϕi(Ui ∩ Uj ) → ϕj (Ui ∩ Uj ) + 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN +Definition 26 +Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n-dimensionale +Mannigfaltigkeit mit Rand, wenn es einen Atlas (Ui, ϕi) gibt, wobei Ui ⊆ Xi offen und +ϕi ein Homöomorphismus auf eine offene Teilmenge von +R +n ++,0 +:= { (x1, . . . , xn) ∈ R +n +| xn ≥ 0 } +ist. +R +n ++,0 +ist ein „Halbraum“. +Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten. +∼= +(a) Halbraum +∼ += +(b) Pair of pants +∼= +(c) Sphäre mit einem Loch +Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand +Definition 27 +Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt +∂X := [ +(U,ϕ)∈A +{ x ∈ U | ϕ(x) = 0 } +Rand von X. +∂X ist eine Mannigfaltigkeit der Dimension n − 1. +Definition 28 +Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui, ϕi)i∈I +Für i, j ∈ I mit Ui ∩ Uj 6= ∅ heißt +ϕij := ϕj ◦ ϕ +−1 +i +ϕi(Ui ∩ Uj ) → ϕj (Ui ∩ Uj ) Kartenwechsel oder Übergangsfunktion. -30 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN -R -n R -n -Ui Uj -Vi Vj -X -ϕi ϕj -Abbildung 2.4: Kartenwechsel -2.2 Differenzierbare Mannigfaltigkeiten -Definition 29 -Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui -, ϕi)i∈I . -a) X heißt differenzierbare Mannigfaltigkeit der Klasse C -k -, wenn jede Karten￾wechselabbildung ϕij , i, j ∈ I k-mal stetig differenzierbar ist. -b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannig￾faltigkeit der Klasse C∞ ist. -Differenzierbare Mannigfaltigkeiten der Klasse C∞ werden auch glatt genannt. -Definition 30 -Sei X eine differenzierbare Mannigfaltigkeit der Klasse C -k -(k ∈ N ∪ { ∞ }) mit Atlas -A = (Ui -, ϕi)i∈I . -a) Eine Karte (U, ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ ϕ -−1 -i -und ϕi ◦ ϕ -−1 -(i ∈ I mit Ui ∩ U 6= ∅) differenzierbar von Klasse C -k -sind. -b) Die Menge aller mit A verträglichen Karten auf X bildet einen maximalen Atlas der -Klasse C -k -. Er heißt C -k --Struktur auf X. -Eine C∞-Struktur heißt auch differenzierbare Struktur auf X. -Bemerkung 28 -Für n ≥ 4 gibt es auf S -n mehrere verschiedene differenzierbare Strukturen, die sogenannten -„exotische Sphären“. -Definition 31 -Seien X, Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x ∈ X. -a) Eine stetige Abbildung f : X → Y heißt differenzierbar in x (von Klasse C -k -), wenn -es Karten (U, ϕ) von X mit x ∈ U und (V, ψ) von Y mit f(U) ⊆ V gibt, sodass -ψ ◦ f ◦ ϕ -−1 -stetig differenzierbar von Klasse C -k -in ϕ(x) ist. -b) f heißt differenzierbar (von Klasse C -k -), wenn f in jedem x ∈ X differenzierbar ist. -c) f heißt Diffeomorphismus, wenn f differenzierbar von Klasse C∞ ist und es eine -differenzierbare Abbildung g : Y → X von Klasse C∞ gibt mit g ◦ f = idX und + 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN +R +n Rn +Ui Uj +Vi Vj +X +ϕi ϕj +Abbildung 2.4: Kartenwechsel +2.2 Differenzierbare Mannigfaltigkeiten +Definition 29 +Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui, ϕi)i∈I . +a) X heißt differenzierbare Mannigfaltigkeit der Klasse C +k +, wenn jede Kartenwechselabbildung ϕij , i, j ∈ I k-mal stetig differenzierbar ist. +b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannigfaltigkeit der Klasse C∞ ist. +Differenzierbare Mannigfaltigkeiten der Klasse C∞ werden auch glatt genannt. +Definition 30 +Sei X eine differenzierbare Mannigfaltigkeit der Klasse C +k +(k ∈ N ∪ { ∞ }) mit Atlas +A = (Ui, ϕi)i∈I . +a) Eine Karte (U, ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ ϕ +−1 +i +und ϕi ◦ ϕ +−1 +(i ∈ I mit Ui ∩ U 6= ∅) differenzierbar von Klasse C +k +sind. +b) Die Menge aller mit A verträglichen Karten auf X bildet einen maximalen Atlas der +Klasse C +k +. Er heißt C +k +-Struktur auf X. +Eine C∞-Struktur heißt auch differenzierbare Struktur auf X. +Bemerkung 28 +Für n ≥ 4 gibt es auf S +n mehrere verschiedene differenzierbare Strukturen, die sogenannten +„exotische Sphären“. +Definition 31 +Seien X, Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x ∈ X. +a) Eine stetige Abbildung f : X → Y heißt differenzierbar in x (von Klasse C +k +), wenn +es Karten (U, ϕ) von X mit x ∈ U und (V, ψ) von Y mit f(U) ⊆ V gibt, sodass +ψ ◦ f ◦ ϕ +−1 +stetig differenzierbar von Klasse C +k +in ϕ(x) ist. +b) f heißt differenzierbar (von Klasse C +k +), wenn f in jedem x ∈ X differenzierbar ist. +c) f heißt Diffeomorphismus, wenn f differenzierbar von Klasse C∞ ist und es eine +differenzierbare Abbildung g : Y → X von Klasse C∞ gibt mit g ◦ f = idX und f ◦ g = idY . -31 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN -Bemerkung 29 -Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab. -Beweis: Seien (U -0 -, ϕ0 -) und (V -0 -, ψ0 -) Karten von X bzw. Y um x bzw. f(x) mit f(U -0 -) ⊆ V -0 -. -⇒ ψ -0 ◦ f ◦ (ϕ -0 -) -−1 -= ψ -0 ◦ (ψ -−1 ◦ ψ) ◦ f ◦ (ϕ -−1 ◦ ϕ) ◦ (ϕ -0 -) -−1 -ist genau dann differenzierbar, wenn ψ ◦ f ◦ ϕ -−1 differenzierbar ist. -Beispiel 23 -f : R → R, x 7→ x -3 -ist kein Diffeomorphismus, aber Homöomorphismus, da mit g(x) := √3 x -gilt: f ◦ g = idR, g ◦ f = idR -Bemerkung 30 -Sei X eine glatte Mannigfaltigkeit. Dann ist -Diffeo(X) := { f : X → X | f ist Diffeomorphismus } -eine Untergruppe von Homöo(X). -Definition 32 -S ⊆ R -3 heißt reguläre Fläche :⇔ ∀s ∈ S ∃ Umgebung V (s) ⊆ R -3 ∃U ⊆ R -2 offen: -∃ differenzierbare Abbildung F : U → V ∩ S: Rg(JF (u)) = 2 ∀u ∈ U. -F heißt (lokale) reguläre Parametrisierung von S. -F(u, v) = (x(u, v), y(u, v), z(u, v)) -JF (u, v) = - - -∂x -∂u(p) -∂x -∂v (p) -∂y -∂u(p) -∂y -∂v (p) -∂z -∂u(p) -∂z -∂v (p) - - -Beispiel 24 -1) Rotationsflächen: Sei r : R → R>0 eine differenzierbare Funktion. -F : R -2 → R -3 -(u, v) 7→ (r(u) cos(u), r(v) sin(u), v) -JF (u, v) = - - -−r(v) sin u r0 -(v) cos u -r(v) cos u r0 -(v) sin u -0 1 - - -hat Rang 2 für alle (u, v) ∈ R -2 -. -2) Kugelkoordinaten: F : R -2 → R -3 -, -(u, v) 7→ (R cos v cos u, R cos v sin u, R sin v) -Es gilt: F(u, v) ∈ S -2 -R -, denn -R -2 -cos2 -(v) cos2 -(u) + R -2 -cos2 -(v) sin2 -(u) + R -2 -sin2 -(v) -=R -2 -(cos2 -(v) cos2 -(u) + cos2 -(v) sin2 -(u) + sin2 -(v)) -=R -2 -￾ -cos2 -(v)(cos2 -(u) + sin2 -(u)) + sin2 -(v) - -=R -2 -￾ -cos2 -(v) + sin2 -(v) - -=R -32 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN -N -S -v u -(a) Kugelkoordinaten -−1 -0 -1 -2−2 -−1 -0 -1 -2 -0.6 -0.8 -1 -(b) Rotationskörper -π -2 -π 3π -2 -2π -−1 -−0.5 -0.5 -1 -x -y -sin x -cos x + 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN +Bemerkung 29 +Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab. +Beweis: Seien (U +0 +, ϕ0) und (V +0 +, ψ0) Karten von X bzw. Y um x bzw. f(x) mit f(U +0 +) ⊆ V +0 +. +⇒ ψ +0 ◦ f ◦ (ϕ0 +) +−1 += ψ +0 ◦ (ψ−1 ◦ ψ) ◦ f ◦ (ϕ−1 ◦ ϕ) ◦ (ϕ0 +) +−1 +ist genau dann differenzierbar, wenn ψ ◦ f ◦ ϕ +−1 differenzierbar ist. +Beispiel 23 +f : R → R, x 7→ x +3 +ist kein Diffeomorphismus, aber Homöomorphismus, da mit g(x) := √3 x +gilt: f ◦ g = idR, g ◦ f = idR +Bemerkung 30 +Sei X eine glatte Mannigfaltigkeit. Dann ist +Diffeo(X) := { f : X → X | f ist Diffeomorphismus } +eine Untergruppe von Homöo(X). +Definition 32 +S ⊆ R +3 heißt reguläre Fläche :⇔ ∀s ∈ S ∃ Umgebung V (s) ⊆ R3 ∃U ⊆ R2 offen: +∃ differenzierbare Abbildung F : U → V ∩ S: Rg(JF (u)) = 2 ∀u ∈ U. +F heißt (lokale) reguläre Parametrisierung von S. +F(u, v) = (x(u, v), y(u, v), z(u, v)) +JF (u, v) = + + +∂x +∂u(p) +∂x +∂v (p) +∂y +∂u(p) +∂y +∂v (p) +∂z +∂u(p) +∂z +∂v (p) + + +Beispiel 24 +1) Rotationsflächen: Sei r : R → R>0 eine differenzierbare Funktion. +F : R +2 → R3 +(u, v) 7→ (r(u) cos(u), r(v) sin(u), v) +JF (u, v) = + + +−r(v) sin u r0(v) cos u +r(v) cos u r0(v) sin u +0 1 + + +hat Rang 2 für alle (u, v) ∈ R +2 +. +2) Kugelkoordinaten: F : R +2 → R3 +, +(u, v) 7→ (R cos v cos u, R cos v sin u, R sin v) +Es gilt: F(u, v) ∈ S +2 +R +, denn +R +2 +cos2(v) cos2(u) + R +2 +cos2(v) sin2(u) + R +2 +sin2(v) +=R +2 +(cos2(v) cos2(u) + cos2(v) sin2(u) + sin2(v)) +=R +2 + +cos2(v)(cos2(u) + sin2(u)) + sin2(v) + +=R +2 + +cos2(v) + sin2(v) + +=R +2 + 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN +N +S +v u +(a) Kugelkoordinaten +−1 +0 +1 +2−2 +−1 +0 +1 +2 +0.6 +0.8 +1 +(b) Rotationskörper +π +2 +π 3π +2 +2π +−1 +−0.5 +0.5 +1 +x +y +sin x +cos x (c) Sinus und Kosinus haben keine gemeinsame Nullstelle -33 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN -Die Jacobi-Matrix -JF (u, v) = - - -−R cos v sin u −R sin v cos u -R cos v cos u −R sin v sin u -0 R cos v - - -hat Rang 2 für cos v 6= 0. In N und S ist cos v = 0. -Bemerkung 31 -Jede reguläre Fläche S ⊆ R -3 -ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit. -Beweis: -S ⊆ R -3 -ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von -regulären Flächen folgt direkt, dass Karten (Ui -, Fi) und (Uj ⊆ R -2 -, Fj : R -2 → R -3 -) von S mit -Ui ∩ Uj 6= ∅ existieren, wobei Fi und Fj nach Definition differenzierbare Abbildungen sind. -z.Z.: F -−1 -j -◦ Fi -ist ein Diffeomorphismus. -Ui Uj -S -s -Fi Fj -F -−1 -j -◦Fi -Abbildung 2.5: Reguläre Fläche S zum Beweis von Bemerkung 31 -Idee: Finde differenzierbare Funktion Fg−1 -j -in Umgebung W von s, sodass Fg−1 -j -|S∩W = F -−1 -j -. -Ausführung: Sei u0 ∈ Ui -, v0 ∈ Uj mit Fi(u0) = s = Fj (v0). -Da Rg(JFj -(v0)) = 2 ist, ist o. B. d. A. -det ∂x -∂u -∂x -∂v -∂y -∂u -∂y -∂v - -(v0) 6= 0 -und Fj (u, v) = (x(u, v), y(u, v), z(u, v)). -Definiere Ffj : Uj × R → R -3 durch -Ffj (u, v, t) := (x(u, v), y(u, v), z(u, v) + t) -Offensichtlich: Ffj |Uj×{ 0 } = Fj -JFfj -= - - -∂x -∂u -∂x -∂v 0 -∂y -∂u -∂y -∂v 0 -∂z -∂u -∂z -∂v 1 - - ⇒ det JFfj -(v0, 0) 6= 0 -Analysis II -======⇒ Es gibt Umgebungen W von Fj von Ffj (v0, 0) = Fj (v0) = s, sodass Ffj auf W eine -differenzierbar Inverse F -−1 -j + 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN +Die Jacobi-Matrix +JF (u, v) = + + +−R cos v sin u −R sin v cos u +R cos v cos u −R sin v sin u +0 R cos v + + +hat Rang 2 für cos v 6= 0. In N und S ist cos v = 0. +Bemerkung 31 +Jede reguläre Fläche S ⊆ R +3 +ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit. +Beweis: +S ⊆ R +3 +ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von +regulären Flächen folgt direkt, dass Karten (Ui, Fi) und (Uj ⊆ R +2 +, Fj : R +2 → R3 +) von S mit +Ui ∩ Uj 6= ∅ existieren, wobei Fi und Fj nach Definition differenzierbare Abbildungen sind. +z.Z.: F +−1 +j +◦ Fiist ein Diffeomorphismus. +Ui Uj +S +s +Fi Fj +F +−1 +j +◦Fi +Abbildung 2.5: Reguläre Fläche S zum Beweis von Bemerkung 31 +Idee: Finde differenzierbare Funktion Fg−1 +j +in Umgebung W von s, sodass Fg−1 +j +|S∩W = F +−1 +j +. +Ausführung: Sei u0 ∈ Ui, v0 ∈ Uj mit Fi(u0) = s = Fj (v0). +Da Rg(JFj +(v0)) = 2 ist, ist o. B. d. A. +det ∂x +∂u +∂x +∂v +∂y +∂u +∂y +∂v + +(v0) 6= 0 +und Fj (u, v) = (x(u, v), y(u, v), z(u, v)). +Definiere Ffj : Uj × R → R +3 durch +Ffj (u, v, t) := (x(u, v), y(u, v), z(u, v) + t) +Offensichtlich: Ffj |Uj×{ 0 } = Fj +JFfj += + + +∂x +∂u +∂x +∂v 0 +∂y +∂u +∂y +∂v 0 +∂z +∂u +∂z +∂v 1 + + ⇒ det JFfj +(v0, 0) 6= 0 +Analysis II +======⇒ Es gibt Umgebungen W von Fj von Ffj (v0, 0) = Fj (v0) = s, sodass Ffj auf W eine +differenzierbar Inverse F +−1 +j hat. -34 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN -Weiter gilt: -Ffj -−1 -|W∩S = F -−1 -j -|W∩S -⇒ F -−1 -j -◦ Fi -|F -−1 -i -(W∩S) = F -−1 -j -◦ Fi -|F -−1 -i -(W∩S) -ist differenzierbar. -Definition 33 -Sei G eine Mannigfaltigkeit und (G, ◦) eine Gruppe. -a) G heißt topologische Gruppe, wenn die Abbildungen ◦ : G×G → G und ι : G → G -definiert durch -g ◦ h := g · h und ι(g) := g -−1 -stetig sind. -b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, ◦) und -(G, ι) differenzierbar sind. -Beispiel 25 (Lie-Gruppen) -1) Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen. -2) GLn(R) -3) (R -×, ·) -4) (R>0, ·) -5) (R -n -, +), denn A · B(i, j) = Pn -k=1 aikbkj ist nach allen Variablen differenzierbar -(A−1 -)(i, j) = det(Aij ) -det A -Aij = - - -ai1 . . . ain -. -. -. -. -. -. -. -. -. -an1 . . . ann - - -∈ R -(n−1)×(n−1) -ist differenzierbar. -det Aij kann 0 werden, da: - -1 1 -−1 0 -6) SLn(R) = { A ∈ GLn(R) | det(A) = 1 } -Bemerkung 32 -Ist G eine Lie-Gruppe und g ∈ G, so ist die Abbildung -lg : G → G -h 7→ g · h + 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN +Weiter gilt: +Ffj +−1 +|W∩S = F +−1 +j +|W∩S +⇒ F +−1 +j +◦ Fi|F +−1 +i +(W∩S) = F +−1 +j +◦ Fi|F +−1 +i +(W∩S) +ist differenzierbar. +Definition 33 +Sei G eine Mannigfaltigkeit und (G, ◦) eine Gruppe. +a) G heißt topologische Gruppe, wenn die Abbildungen ◦ : G×G → G und ι : G → G +definiert durch +g ◦ h := g · h und ι(g) := g +−1 +stetig sind. +b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, ◦) und +(G, ι) differenzierbar sind. +Beispiel 25 (Lie-Gruppen) +1) Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen. +2) GLn(R) +3) (R +×, ·) +4) (R>0, ·) +5) (R +n +, +), denn A · B(i, j) = Pn +k=1 aikbkj ist nach allen Variablen differenzierbar +(A−1)(i, j) = det(Aij ) +det A +Aij = + + +ai1 . . . ain +. +. +. +. +. +. +. +. +. +an1 . . . ann + + +∈ R +(n−1)×(n−1) +ist differenzierbar. +det Aij kann 0 werden, da: + +1 1 +−1 0 +6) SLn(R) = { A ∈ GLn(R) | det(A) = 1 } +Bemerkung 32 +Ist G eine Lie-Gruppe und g ∈ G, so ist die Abbildung +lg : G → G +h 7→ g · h ein Diffeomorphismus. -35 2.3. SIMPLIZIALKOMPLEX -2.3 Simplizialkomplex -Definition 34 -Seien v0, . . . , vk ∈ R -n Punkte. -a) v0, . . . , vk sind in allgemeiner Lage -⇔ es gibt keinen (k−1)-dimensionalen affinen Untervektorraum, der v0, . . . , vk enthält -⇔ v1 − v0, . . . , vk − v0 sind linear unabhängig. -b) conv(v0, . . . , vk) := n Pk -i=0 λivi - - - -λi ≥ 0, -Pk -i=0 λi = 1 o -heißt die konvexe Hülle von -v0, . . . , vk. -Definition 35 -a) Sei ∆n = conv(e0, . . . , en) ⊆ R -n+1 die konvexe Hülle der Standard-Basisvektoren -e0, . . . , en. -Dann heißt ∆n Standard-Simplex und n die Dimension des Simplex. -b) Für Punkte v0, . . . , vk im R -n -in allgemeiner Lage heißt ∆(v0, . . . , vk) = conv(v0, . . . , vk) -ein k-Simplex in R -n -. -c) Ist ∆(v0, . . . , vk) ein k-Simplex und I = { i0, . . . , ir } ⊆ { 0, . . . , k }, so ist si0,...,ir -:= -conv(vi0 -, . . . , vir -) ein r-Simplex und heißt Teilsimplex oder Seite von ∆. -(a) 0-Simplex ∆0 -1 2 3 -1 -2 -3 -e0 -e1 -(b) 1-Simplex ∆1 -1 2 3 -1 -2 -3 -e0 -e1 -e2 -(c) 2-Simplex ∆2 -e0 e1 -e2 -e3 -(d) 3-Simplex ∆3 -Abbildung 2.6: Beispiele für k-Simplexe -Definition 36 -a) Eine endliche Menge K von Simplizes im R -n heißt (endlicher) Simplizialkomplex, -wenn gilt: -(i) Für ∆ ∈ K und S ⊆ ∆ Teilsimplex ist S ∈ K. -(ii) Für ∆1, ∆2 ∈ K ist ∆1 ∩ ∆2 leer oder ein Teilsimplex von ∆1 und von ∆2. -b) |K| := S -∆∈K ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K. + 2.3. SIMPLIZIALKOMPLEX +2.3 Simplizialkomplex +Definition 34 +Seien v0, . . . , vk ∈ R +n Punkte. +a) v0, . . . , vk sind in allgemeiner Lage +⇔ es gibt keinen (k−1)-dimensionalen affinen Untervektorraum, der v0, . . . , vk enthält +⇔ v1 − v0, . . . , vk − v0 sind linear unabhängig. +b) conv(v0, . . . , vk) := n Pk +i=0 λivi + + + +λi ≥ 0, +Pk +i=0 λi = 1 o +heißt die konvexe Hülle von +v0, . . . , vk. +Definition 35 +a) Sei ∆n = conv(e0, . . . , en) ⊆ R +n+1 die konvexe Hülle der Standard-Basisvektoren +e0, . . . , en. +Dann heißt ∆n Standard-Simplex und n die Dimension des Simplex. +b) Für Punkte v0, . . . , vk im R +n +in allgemeiner Lage heißt ∆(v0, . . . , vk) = conv(v0, . . . , vk) +ein k-Simplex in R +n +. +c) Ist ∆(v0, . . . , vk) ein k-Simplex und I = { i0, . . . , ir } ⊆ { 0, . . . , k }, so ist si0,...,ir:= +conv(vi0, . . . , vir) ein r-Simplex und heißt Teilsimplex oder Seite von ∆. +(a) 0-Simplex ∆0 +1 2 3 +1 +2 +3 +e0 +e1 +(b) 1-Simplex ∆1 +1 2 3 +1 +2 +3 +e0 +e1 +e2 +(c) 2-Simplex ∆2 +e0 e1 +e2 +e3 +(d) 3-Simplex ∆3 +Abbildung 2.6: Beispiele für k-Simplexe +Definition 36 +a) Eine endliche Menge K von Simplizes im R +n heißt (endlicher) Simplizialkomplex, +wenn gilt: +(i) Für ∆ ∈ K und S ⊆ ∆ Teilsimplex ist S ∈ K. +(ii) Für ∆1, ∆2 ∈ K ist ∆1 ∩ ∆2 leer oder ein Teilsimplex von ∆1 und von ∆2. +b) |K| := S +∆∈K ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K. c) Ist d = max { k ∈ N0 | K enthält k-Simplex }, so heißt d die Dimension von K. -36 2.3. SIMPLIZIALKOMPLEX -(a) 1D Simplizialkomplex (b) 2D Simplizialkomplex -(ohne untere Fläche!) -(c) 2D Simplizialkomplex -(d) 1D Simplizialkomplex (e) 2D Simplizialkomplex -P -(f) P ist kein Teilsimplex, da Eigen￾schaft Punkt b.ii verletzt ist -P -(g) Simplizialkomplex -Abbildung 2.7: Beispiele für Simplizialkomplexe -Definition 37 -Seien K, L Simplizialkomplexe. Eine stetige Abbildung -f : |K| → |L| -heißt simplizial, wenn für jedes ∆ ∈ K gilt: -a) f(∆) ∈ L -b) f|∆ : ∆ → f(∆) ist eine affine Abbildung. -Beispiel 26 (Simpliziale Abbildungen) -1) ϕ(e1) := b1, ϕ(e2) := b2 + 2.3. SIMPLIZIALKOMPLEX +(a) 1D Simplizialkomplex (b) 2D Simplizialkomplex +(ohne untere Fläche!) +(c) 2D Simplizialkomplex +(d) 1D Simplizialkomplex (e) 2D Simplizialkomplex +P +(f) P ist kein Teilsimplex, da Eigenschaft Punkt b.ii verletzt ist +P +(g) Simplizialkomplex +Abbildung 2.7: Beispiele für Simplizialkomplexe +Definition 37 +Seien K, L Simplizialkomplexe. Eine stetige Abbildung +f : |K| → |L| +heißt simplizial, wenn für jedes ∆ ∈ K gilt: +a) f(∆) ∈ L +b) f|∆ : ∆ → f(∆) ist eine affine Abbildung. +Beispiel 26 (Simpliziale Abbildungen) +1) ϕ(e1) := b1, ϕ(e2) := b2 ϕ ist eine eindeutig bestimmte lineare Abbildung -37 2.3. SIMPLIZIALKOMPLEX -0 e2 -e1 -0 b1 -b2 -ϕ -2) Folgende Abbildung ϕ : ∆n → ∆n−1 -ist simplizial: -ϕ -3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8) -M M -a -a -a -b -b -b -c -c -c -d -d -d -M -a -b -c -d -b b b -b b b -b b b -b -b -b -b -b -b -b b -b -b b -b b -b b -b -b -b -b -Abbildung 2.8: Abbildung eines Torus auf eine Sphäre -Definition 38 -Sei K ein endlicher Simplizialkomplex. Für n ≥ 0 sei an(K) die Anzahl der n-Simplizes in -K. -Dann heißt -χ(K) := -dim -X -K -n=0 -(−1)n -an(K) -Eulerzahl (oder Euler-Charakteristik) von K. -Beispiel 27 -1) χ(∆1 -) = 2 − 1 = 1 -χ(∆2 -) = 3 − 3 + 1 = 1 -χ(∆3 -) = 4 − 6 + 4 − 1 = 1 -2) χ(Oktaeder-Oberfläche) = 6 − 12 + 8 = 2 -χ(Rand des Tetraeders) = 2 -χ(Ikosaeder) = 12 − 30 + 20 = 2 -3) χ(Würfel) = 8 − 12 + 6 = 2 -χ(Würfel, unterteilt in Dreiecksflächen) = 8 − (12 + 6) + (6 · 2) = 2 -Bemerkung 33 -χ(∆n -) = 1 für jedes n ∈ N0 -38 2.3. SIMPLIZIALKOMPLEX -Beweis: ∆n -ist die konvexe Hülle von (e0, . . . , en) in R -n+1. Jede (k + 1)-elementige Teilmenge -von { e0, . . . , en } definiert ein k-Simplex. -⇒ ak(∆n -) = ￾ -n+1 -k+1 -, k = 0, . . . , n -⇒ χ(∆n -) = Pn -k=0(−1)k -￾ -n+1 -k+1 -f(x) = (x + 1)n+1 -Binomischer -Lehrsatz = -Pn+1 -k=0 ￾ -n+1 -k - -x -k -⇒ 0 = Pn+1 -k=0 ￾ -n+1 -k - -(−1)k = χ(∆n -) − 1 -⇒ χ(∆n -) = 1  -Definition 39 -a) Ein 1D-Simplizialkomplex heißt Graph. -b) Ein Graph, der homöomorph zu S -1 -ist, heißt Kreis. -c) Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält. -(a) Dies wird häufig auch als -Multigraph bezeichnet. -(b) Planare Einbettung des Te￾traeders -(c) K5 (d) K3,3 -Abbildung 2.9: Beispiele für Graphen -Bemerkung 34 -Für jeden Baum T gilt χ(T) = 1. -Beweis: Induktion über die Anzahl der Ecken. -Bemerkung 35 -a) Jeder zusammenhängende Graph Γ enthält einen Teilbaum T, der alle Ecken von Γ -enthält.2 -b) Ist n = a1(Γ) − a1(T), so ist χ(Γ) = 1 − n. -Beweis: -a) Siehe „Algorithmus von Kruskal“. -2T wird „Spannbaum“ gena -39 2.3. SIMPLIZIALKOMPLEX -b) χ(Γ) = a0(Γ) − a1(Γ) -= a0(Γ) − (n + a1(T)) -= a0(T) − a1(T) − n -= χ(T) − n -= 1 − n -Bemerkung 36 -Sei ∆ ein n-Simplex und x ∈ ∆◦ ⊆ R -n -. Sei K der Simplizialkomplex, der aus ∆ durch -„Unterteilung“ in x entsteht. Dann ist χ(K) = χ(∆) = 1. -(a) K (b) ∆, das aus K durch Unter￾teilung entsteht -Abbildung 2.10: Beispiel für Bemerkung 36. -Beweis: χ(K) = χ(∆) − (−1)n -| {z } -n-Simplex -+ -Xn -k=0 -(−1)k - -n + 1 -k - -| {z } -(1+(−1))n+1 -= χ(∆)  -Definition 40 -Sei X ein topologischer Raum, K ein Simplizialkomplex und -h : |K| → X -ein Homöomorphismus von der geometrischen Realisierung |K| auf X. Dann heißt h eine -Triangulierung von X. -Beispiel 28 (Triangulierung des Torus) -Für eine Triangulierung des Torus werden mindestens 14 Dreiecke benötigt. Beispiele für -fehlerhafte „Triangulierungen“ sind in Beispiel 28 zu sehen. Korrekte Triangulierungen sind -in Beispiel 28. -Satz 2.1 (Eulersche Polyederformel) -Sei P ein konvexes Polyeder in R -3 -, d. h. ∂P ist ein 2-dimensionaler Simplizialkomplex, -sodass gilt: -∀x, y ∈ ∂P : [x, y] ⊆ P -Dann ist χ(∂P) = 2. -Beweis: -1) Die Aussage ist richtig für den Tetraeder. -2) O. B. d. A. sei 0 ∈ P und P ⊆ B1(0). Projeziere ∂P von 0 aus auf ∂B1(0) = S -2 -. -Erhalte Triangulierung von S -2 + 2.3. SIMPLIZIALKOMPLEX +0 e2 +e1 +0 b1 +b2 +ϕ +2) Folgende Abbildung ϕ : ∆n → ∆n−1ist simplizial: +ϕ +3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8) +M M +a +a +a +b +b +b +c +c +c +d +d +d +M +a +b +c +d +b b b +b b b +b b b +b +b +b +b +b +b +b b +b +b b +b b +b b +b +b +b +b +Abbildung 2.8: Abbildung eines Torus auf eine Sphäre +Definition 38 +Sei K ein endlicher Simplizialkomplex. Für n ≥ 0 sei an(K) die Anzahl der n-Simplizes in +K. +Dann heißt +χ(K) := +dim +X +K +n=0 +(−1)nan(K) +Eulerzahl (oder Euler-Charakteristik) von K. +Beispiel 27 +1) χ(∆1) = 2 − 1 = 1 +χ(∆2) = 3 − 3 + 1 = 1 +χ(∆3) = 4 − 6 + 4 − 1 = 1 +2) χ(Oktaeder-Oberfläche) = 6 − 12 + 8 = 2 +χ(Rand des Tetraeders) = 2 +χ(Ikosaeder) = 12 − 30 + 20 = 2 +3) χ(Würfel) = 8 − 12 + 6 = 2 +χ(Würfel, unterteilt in Dreiecksflächen) = 8 − (12 + 6) + (6 · 2) = 2 +Bemerkung 33 +χ(∆n) = 1 für jedes n ∈ N0 + 2.3. SIMPLIZIALKOMPLEX +Beweis: ∆nist die konvexe Hülle von (e0, . . . , en) in R +n+1. Jede (k + 1)-elementige Teilmenge +von { e0, . . . , en } definiert ein k-Simplex. +⇒ ak(∆n) = +n+1 +k+1 +, k = 0, . . . , n +⇒ χ(∆n) = Pn +k=0(−1)k + +n+1 +k+1 +f(x) = (x + 1)n+1 +Binomischer +Lehrsatz = +Pn+1 +k=0 +n+1 +k + +x +k +⇒ 0 = Pn+1 +k=0 +n+1 +k + +(−1)k = χ(∆n) − 1 +⇒ χ(∆n) = 1  +Definition 39 +a) Ein 1D-Simplizialkomplex heißt Graph. +b) Ein Graph, der homöomorph zu S +1 +ist, heißt Kreis. +c) Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält. +(a) Dies wird häufig auch als +Multigraph bezeichnet. +(b) Planare Einbettung des Tetraeders +(c) K5 (d) K3,3 +Abbildung 2.9: Beispiele für Graphen +Bemerkung 34 +Für jeden Baum T gilt χ(T) = 1. +Beweis: Induktion über die Anzahl der Ecken. +Bemerkung 35 +a) Jeder zusammenhängende Graph Γ enthält einen Teilbaum T, der alle Ecken von Γ +enthält.2 +b) Ist n = a1(Γ) − a1(T), so ist χ(Γ) = 1 − n. +Beweis: +a) Siehe „Algorithmus von Kruskal“. +2T wird „Spannbaum“ genannt. + 2.3. SIMPLIZIALKOMPLEX +b) χ(Γ) = a0(Γ) − a1(Γ) += a0(Γ) − (n + a1(T)) += a0(T) − a1(T) − n += χ(T) − n += 1 − n +Bemerkung 36 +Sei ∆ ein n-Simplex und x ∈ ∆◦ ⊆ R +n +. Sei K der Simplizialkomplex, der aus ∆ durch +„Unterteilung“ in x entsteht. Dann ist χ(K) = χ(∆) = 1. +(a) K (b) ∆, das aus K durch Unterteilung entsteht +Abbildung 2.10: Beispiel für Bemerkung 36. +Beweis: χ(K) = χ(∆) − (−1)n +| {z } +n-Simplex ++ +Xn +k=0 +(−1)k + +n + 1 +k + +| {z } +(1+(−1))n+1 += χ(∆)  +Definition 40 +Sei X ein topologischer Raum, K ein Simplizialkomplex und +h : |K| → X +ein Homöomorphismus von der geometrischen Realisierung |K| auf X. Dann heißt h eine +Triangulierung von X. +Beispiel 28 (Triangulierung des Torus) +Für eine Triangulierung des Torus werden mindestens 14 Dreiecke benötigt. Beispiele für +fehlerhafte „Triangulierungen“ sind in Beispiel 28 zu sehen. Korrekte Triangulierungen sind +in Beispiel 28. +Satz 2.1 (Eulersche Polyederformel) +Sei P ein konvexes Polyeder in R +3 +, d. h. ∂P ist ein 2-dimensionaler Simplizialkomplex, +sodass gilt: +∀x, y ∈ ∂P : [x, y] ⊆ P +Dann ist χ(∂P) = 2. +Beweis: +1) Die Aussage ist richtig für den Tetraeder. +2) O. B. d. A. sei 0 ∈ P und P ⊆ B1(0). Projeziere ∂P von 0 aus auf ∂B1(0) = S +2 +. +Erhalte Triangulierung von S +2 . -40 2.3. SIMPLIZIALKOMPLEX -(a) Die beiden markierten Dreiecke schneiden sich im -Mittelpunkt und in einer Seite. -(b) Die beiden markierten Dreiecke schneiden sich im -Mittelpunkt und außen. -Abbildung 2.11: Fehlerhafte Triangulierungen -(a) Einfache Triangulierung (b) Minimale Triangulierung + 2.3. SIMPLIZIALKOMPLEX +(a) Die beiden markierten Dreiecke schneiden sich im +Mittelpunkt und in einer Seite. +(b) Die beiden markierten Dreiecke schneiden sich im +Mittelpunkt und außen. +Abbildung 2.11: Fehlerhafte Triangulierungen +(a) Einfache Triangulierung (b) Minimale Triangulierung Abbildung 2.12: Triangulierungen des Torus -41 2.3. SIMPLIZIALKOMPLEX -3) Sind P1 und P2 konvexe Polygone und T1, T2 die zugehörigen Triangulierungen von -S -2 -, so gibt es eine Triangulierung T, die sowohl um T1 als auch um T2 Verfeinerung -ist (vgl. Abbildung 2.13). -T1 -T2 -T -Abbildung 2.13: T ist eine Triangulierung, die für T1 und T2 eine Verfeinerung ist. -Nach Bemerkung 36 ist χ(∂P1) = χ(T1) = χ(T) = χ(T2) = χ(∂P2) = 2, weil o. B. d. A. -P2 ein Tetraeder ist. -Bemerkung 37 (Der Rand vom Rand ist 0) -Sei K ein endlicher Simplizialkomplex mit Knotenmenge V und < eine Totalordnung auf V . -Sei An die Menge der n-Simplizes in K, d. h. -An(K) := { σ ∈ K | dim(σ) = n } für n = 0, . . . , d = dim(K) -und Cn(K) der R-Vektorraum mit Basis An(K), d. h. -Cn(K) = - - - -X -σ∈An(K) -cσ · σ - - - - - - -cσ ∈ R - - - -Sei σ = ∆(x0, . . . , xn) ∈ An(K), sodass x0 < x1 < · · · < xn. -Für i = 0, . . . , n sei ∂iσ := ∆(x0, . . . , xˆi -P -, . . . , xn) die i-te Seite von σ und dσ = dnσ := -i=0(−1)i∂iσ ∈ Cn−1(K) und dn : Cn(K) → Cn−1(K) die dadurch definierte lineare -Abbildung. -Dann gilt: dn−1 ◦ dn = 0 -a b -c -σ -e3 -e2 e1 -Abbildung 2.14: Simplizialkomplex mit Totalordnung -Beispiel 29 -Sei a < b < c. Dann gilt: -d2σ = e1 − e2 + e3 + 2.3. SIMPLIZIALKOMPLEX +3) Sind P1 und P2 konvexe Polygone und T1, T2 die zugehörigen Triangulierungen von +S +2 +, so gibt es eine Triangulierung T, die sowohl um T1 als auch um T2 Verfeinerung +ist (vgl. Abbildung 2.13). +T1 +T2 +T +Abbildung 2.13: T ist eine Triangulierung, die für T1 und T2 eine Verfeinerung ist. +Nach Bemerkung 36 ist χ(∂P1) = χ(T1) = χ(T) = χ(T2) = χ(∂P2) = 2, weil o. B. d. A. +P2 ein Tetraeder ist. +Bemerkung 37 (Der Rand vom Rand ist 0) +Sei K ein endlicher Simplizialkomplex mit Knotenmenge V und < eine Totalordnung auf V . +Sei An die Menge der n-Simplizes in K, d. h. +An(K) := { σ ∈ K | dim(σ) = n } für n = 0, . . . , d = dim(K) +und Cn(K) der R-Vektorraum mit Basis An(K), d. h. +Cn(K) = + + + +X +σ∈An(K) +cσ · σ + + + + + + +cσ ∈ R + + + +Sei σ = ∆(x0, . . . , xn) ∈ An(K), sodass x0 < x1 < · · · < xn. +Für i = 0, . . . , n sei ∂iσ := ∆(x0, . . . , xˆi +P +, . . . , xn) die i-te Seite von σ und dσ = dnσ := +i=0(−1)i∂iσ ∈ Cn−1(K) und dn : Cn(K) → Cn−1(K) die dadurch definierte lineare +Abbildung. +Dann gilt: dn−1 ◦ dn = 0 +a b +c +σ +e3 +e2 e1 +Abbildung 2.14: Simplizialkomplex mit Totalordnung +Beispiel 29 +Sei a < b < c. Dann gilt: +d2σ = e1 − e2 + e3 d1(e1 − e2 + e3) = (c − b) − (c − a) + (b − a) -42 2.3. SIMPLIZIALKOMPLEX -= 0 -Sei a < b < c < d. Dann gilt für Tetraeder: -d3(∆(a, b, c, d)) = ∆(b, c, d) − ∆(a, c, d) + ∆(a, b, d) − ∆(a, b, c), wobei: -d2( ∆(b, c, d)) = ∆(c, d)−∆(b, d) + ∆(b, c) -d2(−∆(a, c, d)) = −∆(c, d) + ∆(a, d)−∆(a, c) -d2( ∆(a, b, d)) = ∆(b, d)−∆(a, d) + ∆(a, b) -d2(−∆(a, b, c)) = −∆(b, c) + ∆(a, c)−∆(a, b) -⇒ d2(d3(∆(a, b, c, d))) = 0 -Beweis: Sei σ ∈ An. Dann gilt: -dn−1(dnσ) = dn−1( -Xn -i=0 -(−1)i -∂iσ) -= -Xn -i=0 -(−1)i -dn−1(∂iσ) -= -Xn -i=0 -(−1)i -nX−1 -j=0 -∂i(∂jσ)(−1)j -= -X -0≤i≤j≤n−1 -(−1)i+j -∂j (∂i(σ)) +X -0≤j d(P, C) = d(P, B) + d(B, C) = d(P, A) + d(B, C) ⇒ -d(A, C) > d(B, C) ⇒ Widerspruch zu Punkt (i) -b) C liegt zwischen P und B -d(P, C) + d(C, A) > d(P, A) = d(P, B) = d(P, C) + d(C, B) -⇒ d(C, A) > d(C, B) -⇒ Widerspruch zu Punkt (i) -2. Fall: Q und B liegen auf verschieden Halbebenen bzgl. P A. -Dann liegen A und Q in derselben Halbebene bzgl. P B. -Tausche A und B ⇒ Fall 1  -Bemerkung 63 -Sei (X, d, G) eine Geometrie, die §1 - §3 erfüllt, P, Q ∈ X mit P 6= Q und ϕ eine Isometrie -mit ϕ(P) = P und ϕ(Q) = Q. -Dann gilt ϕ(S) = S ∀S ∈ P Q. -Beweis: -O. B. d. A. sei S ∈ P Q 2 -⇔ d(P, Q) = d(P, S) + d(S, Q) -ϕ∈Iso(X) ⇒ d(ϕ(P), ϕ(Q)) = d(ϕ(P), ϕ(S)) + d(ϕ(S), ϕ(Q)) -P,Q∈Fix(ϕ) ⇒ d(P, Q) = d(P, ϕ(S)) + d(ϕ(S), Q) -⇒ ϕ(S) liegt zwischen P und Q -⇒ d(P, S) = d(ϕ(P), ϕ(S)) = d(P, ϕ(S)) -3(i) ⇒ ϕ(S) = S - -Proposition 4.2 -In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P, P0 -, Q, Q0 mit d(P, Q) = d(P -0 -, Q0 -) -höchstens zwei Isometrien mit ϕ(P) = P + 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE +P Q +B +C +A +(a) 1. Fall +P +Q +B A +(b) 2. Fall +Abbildung 4.4: Fallunterscheidung aus Bemerkung 62 +(ii) a) B liegt zwischen P und C. +d(P, A) + d(A, C) > d(P, C) = d(P, B) + d(B, C) = d(P, A) + d(B, C) ⇒ +d(A, C) > d(B, C) ⇒ Widerspruch zu Punkt (i) +b) C liegt zwischen P und B +d(P, C) + d(C, A) > d(P, A) = d(P, B) = d(P, C) + d(C, B) +⇒ d(C, A) > d(C, B) +⇒ Widerspruch zu Punkt (i) +2. Fall: Q und B liegen auf verschieden Halbebenen bzgl. P A. +Dann liegen A und Q in derselben Halbebene bzgl. P B. +Tausche A und B ⇒ Fall 1  +Bemerkung 63 +Sei (X, d, G) eine Geometrie, die §1 - §3 erfüllt, P, Q ∈ X mit P 6= Q und ϕ eine Isometrie +mit ϕ(P) = P und ϕ(Q) = Q. +Dann gilt ϕ(S) = S ∀S ∈ P Q. +Beweis: +O. B. d. A. sei S ∈ P Q 2⇔ d(P, Q) = d(P, S) + d(S, Q) +ϕ∈Iso(X) ⇒ d(ϕ(P), ϕ(Q)) = d(ϕ(P), ϕ(S)) + d(ϕ(S), ϕ(Q)) +P,Q∈Fix(ϕ) ⇒ d(P, Q) = d(P, ϕ(S)) + d(ϕ(S), Q) +⇒ ϕ(S) liegt zwischen P und Q +⇒ d(P, S) = d(ϕ(P), ϕ(S)) = d(P, ϕ(S)) +3(i) ⇒ ϕ(S) = S + +Proposition 4.2 +In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P, P0, Q, Q0 mit d(P, Q) = d(P +0 +, Q0) +höchstens zwei Isometrien mit ϕ(P) = P 0 und ϕ(Q) = Q0 -70 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE -Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometrien mit -ϕi(P) = P -0 und ϕi(Q) = Q0 gibt. -Beweis: Seien ϕ1, ϕ2, ϕ3 Isometrien mit ϕi(P) = P -0 -, ϕi(Q) = Q0 mit i = 1, 2, 3. -Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen: -(Teil i) ∃R ∈ X \ P Q mit ϕ1(R) = ϕ2(R). -(Teil ii) Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = idX. -Aus (Teil i) und (Teil ii) folgt, dass ϕ -−1 -2 -◦ ϕ1 = idX, also ϕ2 = ϕ1, da P, Q und R in diesem -Fall Fixpunkte sind. -Nun zu den Beweisen der Teilaussagen: -(Teil i) Sei R ∈ X \ P Q. Von den drei Punkten ϕ1(R), ϕ2(R), ϕ3(R) liegen zwei in der selben -Halbebene bzgl. P -0Q0 = ϕi(P Q). -O. B. d. A. seien ϕ1(R) und ϕ2(R) in der selben Halbebene. -Es gilt: d(P -0 -, ϕ1(R)) = d(ϕ1(P), ϕ1(R)) -= d(P, R) -= d(ϕ2(P), ϕ2(R)) -= d(P -0 -, ϕ2(R)) -und analog d(Q0 -, ϕ1(R)) = d(Q0 -, ϕ2(R)) -(Teil ii) Seien P, Q und R Fixpunkte von ϕ, R /∈ P Q und A /∈ P Q ∪ P R ∪ QR. Sei B ∈ -P Q \ { P, Q }. Dann ist ϕ(B) = B wegen Bemerkung 63. -Ist R ∈ AB, so enthält AB 2 Fixpunkte von ϕ -Bem. 63 =====⇒ ϕ(A) = A. -P B Q -C -R -A -Abbildung 4.5: P, Q, R sind Fixpunkte, B ∈ P Q \ { P, Q }, A /∈ P Q ∪ P R ∪ QR -Ist R /∈ AB, so ist AB ∩ P R 6= ∅ oder AB ∈ RQ 6= ∅ nach Satz 4.1. Der Schnittpunkt -C ist dann Fixpunkt von ϕ -0 nach Bemerkung 63 ⇒ ϕ(A) = A. -Bemerkung 64 (SWS-Kongruenzsatz) -Sei (X, d, G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem 4ABC und 4A0B0C -0 -Dreiecke, für die gilt: -(i) d(A, B) = d(A0 -, B0 -) -(ii) ∠CAB ∼= ∠C + 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE +Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometrien mit +ϕi(P) = P +0 und ϕi(Q) = Q0 gibt. +Beweis: Seien ϕ1, ϕ2, ϕ3 Isometrien mit ϕi(P) = P +0 +, ϕi(Q) = Q0 mit i = 1, 2, 3. +Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen: +(Teil i) ∃R ∈ X \ P Q mit ϕ1(R) = ϕ2(R). +(Teil ii) Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = idX. +Aus (Teil i) und (Teil ii) folgt, dass ϕ +−1 +2 +◦ ϕ1 = idX, also ϕ2 = ϕ1, da P, Q und R in diesem +Fall Fixpunkte sind. +Nun zu den Beweisen der Teilaussagen: +(Teil i) Sei R ∈ X \ P Q. Von den drei Punkten ϕ1(R), ϕ2(R), ϕ3(R) liegen zwei in der selben +Halbebene bzgl. P +0Q0 = ϕi(P Q). +O. B. d. A. seien ϕ1(R) und ϕ2(R) in der selben Halbebene. +Es gilt: d(P +0 +, ϕ1(R)) = d(ϕ1(P), ϕ1(R)) += d(P, R) += d(ϕ2(P), ϕ2(R)) += d(P +0 +, ϕ2(R)) +und analog d(Q0, ϕ1(R)) = d(Q0, ϕ2(R)) +(Teil ii) Seien P, Q und R Fixpunkte von ϕ, R /∈ P Q und A /∈ P Q ∪ P R ∪ QR. Sei B ∈ +P Q \ { P, Q }. Dann ist ϕ(B) = B wegen Bemerkung 63. +Ist R ∈ AB, so enthält AB 2 Fixpunkte von ϕ +Bem. 63 =====⇒ ϕ(A) = A. +P B Q +C +R +A +Abbildung 4.5: P, Q, R sind Fixpunkte, B ∈ P Q \ { P, Q }, A /∈ P Q ∪ P R ∪ QR +Ist R /∈ AB, so ist AB ∩ P R 6= ∅ oder AB ∈ RQ 6= ∅ nach Satz 4.1. Der Schnittpunkt +C ist dann Fixpunkt von ϕ +0 nach Bemerkung 63 ⇒ ϕ(A) = A. +Bemerkung 64 (SWS-Kongruenzsatz) +Sei (X, d, G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem 4ABC und 4A0B0C +0 +Dreiecke, für die gilt: +(i) d(A, B) = d(A0, B0) +(ii) ∠CAB ∼= ∠C +0A0B0 + 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE +(iii) d(A, C) = d(A0, C0) +Dann ist 4ABC kongruent zu 4A0B0C +0 +. +Beweis: Sei ϕ die Isometrie mit ϕ(A0 +) = A, ϕ(A0C +0+) = AC+ und ϕ(A0B0+) = AB+. Diese +Isometrie existiert wegen Punkt §4. +⇒ C ∈ ϕ(A0C +0+) und B ∈ ϕ(A0B0+). +d(A0, C0) = d(ϕ(A0), ϕ(C +0 +)) = d(A, ϕ(C +0 +)) 3(i)==⇒ ϕ(C +0 +) = C +d(A0, B0) = d(ϕ(A0), ϕ(B0)) = d(A, ϕ(B0)) 3(i)==⇒ ϕ(B0) = B +Also gilt insbesondere ϕ(4A0B0C +0 +) = 4ABC.  +Bemerkung 65 (WSW-Kongruenzsatz) +Sei (X, d, G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem 4ABC und 4A0B0C +0 +Dreiecke, für die gilt: +(i) d(A, B) = d(A0, B0) +(ii) ∠CAB ∼= ∠C 0A0B0 -71 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE -(iii) d(A, C) = d(A0 -, C0 -) -Dann ist 4ABC kongruent zu 4A0B0C -0 -. -Beweis: Sei ϕ die Isometrie mit ϕ(A0 -) = A, ϕ(A0C -0+) = AC+ und ϕ(A0B0+) = AB+. Diese -Isometrie existiert wegen Punkt §4. -⇒ C ∈ ϕ(A0C -0+) und B ∈ ϕ(A0B0+). -d(A0 -, C0 -) = d(ϕ(A0 -), ϕ(C -0 -)) = d(A, ϕ(C -0 -)) 3(i) -==⇒ ϕ(C -0 -) = C -d(A0 -, B0 -) = d(ϕ(A0 -), ϕ(B0 -)) = d(A, ϕ(B0 -)) 3(i) -==⇒ ϕ(B0 -) = B -Also gilt insbesondere ϕ(4A0B0C -0 -) = 4ABC.  -Bemerkung 65 (WSW-Kongruenzsatz) -Sei (X, d, G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem 4ABC und 4A0B0C -0 -Dreiecke, für die gilt: -(i) d(A, B) = d(A0 -, B0 -) -(ii) ∠CAB ∼= ∠C -0A0B0 -(iii) ∠ABC ∼= ∠A0B0C -0 -Dann ist 4ABC kongruent zu 4A0B0C -0 -. -Beweis: Sei ϕ die Isometrie mit ϕ(A0 -) = A, ϕ(B0 -) = B und ϕ(C -0 -) liegt in der selben Halbebene -bzgl. AB wie C. Diese Isometrie existiert wegen §4. -Aus ∠CAB = ∠C -0A0B0 = ∠ϕ(C -0 -)ϕ(A0 -)ϕ(B0 -) = ∠ϕ(C -0 -)AB folgt, dass ϕ(C -0 -) ∈ AC+. -Analog folgt aus ∠ABC = ∠A0B0C -0 = ∠ϕ(A0 -)ϕ(B0 -)ϕ(C -0 -) = ∠ABϕ(C -0 -), dass ϕ(C -0 -) ∈ -BC+. -Dann gilt ϕ(C -0 -) ∈ AC ∩ BC = { C } ⇒ ϕ(C -0 -) = C. -Es gilt also ϕ(4A0B0C -0 -) = 4ABC.  -Definition 61 -a) Ein Winkel ist ein Punkt P ∈ X zusammen mit 2 Halbgeraden mit Anfangspunkt P. -Man schreibt: ∠R1P R2 bzw. ∠R2P R1 -2 -b) Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den -anderen abbildet. -c) ∠R0 -1P -0R0 -2 heißt kleiner als ∠R1P R2, wenn es eine Isometrie ϕ gibt, mit ϕ(P -0 -) = P, -ϕ(P -0R -0+ -1 -) = P R+ -1 -und ϕ(R0 -2 -) liegt in der gleichen Halbebene bzgl. P R1 wie R2 und in -der gleichen Halbebene bzgl. P R2 wie R1 -d) Im Dreieck 4P QR gibt es Innenwinkel und Außenwinkel. -Bemerkung 66 -In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel. -Beweis: Zeige ∠P RQ < ∠RQP0 -. -Sei M der Mittelpunkt der Strecke QR und P -0 ∈ P Q+ \ P Q. Sei A ∈ MP − mit d(P, M) = -d(M, A). -2Für dieses Skript gilt: ∠R1P R2 = ∠R2P R1. Also sind insbesondere alle Winkel ≤ 180◦ +(iii) ∠ABC ∼= ∠A0B0C +0 +Dann ist 4ABC kongruent zu 4A0B0C +0 . -72 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE -P R0 -1 R1 -R0 -2 -R2 -(a) ∠R -0 -1P -0R -0 -2 ist kleiner als ∠R1P R2, -vgl. Definition 61.c -P -Q R -(b) Innenwinkel und Außenwin￾kel in 4P QR, vgl. Definiti￾on 61.d -Abbildung 4.6: Situation aus Definition 61 -Q M -A -P -R -(a) Parallelogramm AQPR -α -β -R -Q P -(b) Innen- und Außenwin￾kel von 4P QR -Abbildung 4.7: Situation aus Bemerkung 66 -Es gilt: d(Q, M) = d(M, R) und d(P, M) = d(M, A) sowie ∠PMR = ∠AMQ ⇒ 4MRQ -ist kongruent zu 4AMQ, denn eine der beiden Isometrien, die ∠PMR auf ∠AMQ abbildet, -bildet R auf Q und P auf A ab. -⇒ ∠MQA = ∠MRP = ∠QRP = ∠P RQ. -Noch zu zeigen: ∠MQA < ∠RQP0 -, denn A liegt in der selben Halbebene bzgl. P Q wie M. -Proposition 4.3 (Existenz der Parallelen) -Sei (X, d, G) eine Geometrie mit den Axiomen §1 - §4. -Dann gibt es zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g mindestens eine -Parallele h ∈ G mit P ∈ h und g ∩ h = ∅. -Beweis: Seien P, Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P -0 ∈ f mit -d(P, P0 -) = d(P, Q) abbildet und die Halbebenen bzgl. f erhält. -73 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE -Q -h -f -g -P -Abbildung 4.8: Situation aus Proposition 4.3 -Annahme: ϕ(g) ∩ g 6= ∅ -⇒ Es gibt einen Schnittpunkt { R } = ϕ(g) ∩ g. -Dann ist ∠RQP = ∠RQP0 < ∠RP P0 nach Bemerkung 66 und ∠RQP = ∠RP P0 -, weil -ϕ(∠RQP) = ∠RP P0 -. -⇒ Widerspruch -⇒ ϕ(g) ∩ g = ∅  -Folgerung 4.4 -Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π. -D. h. es gibt eine Isometrie ϕ mit ϕ(Q) = P und ϕ(QP +) = P R+, sodass ϕ(R) in der gleichen -Halbebene bzgl. P Q liegt wie R. -Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln ist π, d. h. die -beiden Halbgeraden bilden eine Gerade. -Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie, -Dreiecke mit drei 90◦ --Winkeln. -Proposition 4.5 -In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der +Beweis: Sei ϕ die Isometrie mit ϕ(A0) = A, ϕ(B0) = B und ϕ(C +0 +) liegt in der selben Halbebene +bzgl. AB wie C. Diese Isometrie existiert wegen §4. +Aus ∠CAB = ∠C +0A0B0 = ∠ϕ(C0 +)ϕ(A0)ϕ(B0) = ∠ϕ(C +0 +)AB folgt, dass ϕ(C +0 +) ∈ AC+. +Analog folgt aus ∠ABC = ∠A0B0C +0 = ∠ϕ(A0 +)ϕ(B0)ϕ(C +0 +) = ∠ABϕ(C +0 +), dass ϕ(C +0 +) ∈ +BC+. +Dann gilt ϕ(C +0 +) ∈ AC ∩ BC = { C } ⇒ ϕ(C +0 +) = C. +Es gilt also ϕ(4A0B0C +0 +) = 4ABC.  +Definition 61 +a) Ein Winkel ist ein Punkt P ∈ X zusammen mit 2 Halbgeraden mit Anfangspunkt P. +Man schreibt: ∠R1P R2 bzw. ∠R2P R1 +2 +b) Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den +anderen abbildet. +c) ∠R0 +1P +0R0 +2 heißt kleiner als ∠R1P R2, wenn es eine Isometrie ϕ gibt, mit ϕ(P +0 +) = P, +ϕ(P +0R +0+ +1 +) = P R+ +1 +und ϕ(R0 +2 +) liegt in der gleichen Halbebene bzgl. P R1 wie R2 und in +der gleichen Halbebene bzgl. P R2 wie R1 +d) Im Dreieck 4P QR gibt es Innenwinkel und Außenwinkel. +Bemerkung 66 +In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel. +Beweis: Zeige ∠P RQ < ∠RQP0. +Sei M der Mittelpunkt der Strecke QR und P +0 ∈ P Q+ \ P Q. Sei A ∈ MP − mit d(P, M) = +d(M, A). +2Für dieses Skript gilt: ∠R1P R2 = ∠R2P R1. Also sind insbesondere alle Winkel ≤ 180◦ +. + 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE +P R0 +1 R1 +R0 +2 +R2 +(a) ∠R +0 +1P +0R0 +2 ist kleiner als ∠R1P R2, +vgl. Definition 61.c +P +Q R +(b) Innenwinkel und Außenwinkel in 4P QR, vgl. Definition 61.d +Abbildung 4.6: Situation aus Definition 61 +Q M +A +P +R +(a) Parallelogramm AQPR +α +β +R +Q P +(b) Innen- und Außenwinkel von 4P QR +Abbildung 4.7: Situation aus Bemerkung 66 +Es gilt: d(Q, M) = d(M, R) und d(P, M) = d(M, A) sowie ∠PMR = ∠AMQ ⇒ 4MRQ +ist kongruent zu 4AMQ, denn eine der beiden Isometrien, die ∠PMR auf ∠AMQ abbildet, +bildet R auf Q und P auf A ab. +⇒ ∠MQA = ∠MRP = ∠QRP = ∠P RQ. +Noch zu zeigen: ∠MQA < ∠RQP0, denn A liegt in der selben Halbebene bzgl. P Q wie M. +Proposition 4.3 (Existenz der Parallelen) +Sei (X, d, G) eine Geometrie mit den Axiomen §1 - §4. +Dann gibt es zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g mindestens eine +Parallele h ∈ G mit P ∈ h und g ∩ h = ∅. +Beweis: Seien P, Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P +0 ∈ f mit +d(P, P0) = d(P, Q) abbildet und die Halbebenen bzgl. f erhält. + 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE +Q +h +f +g +P +Abbildung 4.8: Situation aus Proposition 4.3 +Annahme: ϕ(g) ∩ g 6= ∅ +⇒ Es gibt einen Schnittpunkt { R } = ϕ(g) ∩ g. +Dann ist ∠RQP = ∠RQP0 < ∠RP P0 nach Bemerkung 66 und ∠RQP = ∠RP P0, weil +ϕ(∠RQP) = ∠RP P0. +⇒ Widerspruch +⇒ ϕ(g) ∩ g = ∅  +Folgerung 4.4 +Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π. +D. h. es gibt eine Isometrie ϕ mit ϕ(Q) = P und ϕ(QP +) = P R+, sodass ϕ(R) in der gleichen +Halbebene bzgl. P Q liegt wie R. +Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln ist π, d. h. die +beiden Halbgeraden bilden eine Gerade. +Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie, +Dreiecke mit drei 90◦-Winkeln. +Proposition 4.5 +In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der Innenwinkel ≤ π. -74 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE -Sei im Folgenden „IWS“ die „Innenwinkelsumme“. -Beweis: Sei 4 ein Dreieck mit IWS(4) = π + ε -α -β -γ -P -(a) Summe der Winkel α, β und γ -α1 -α2 β -γ -M -A B -C A0 -α -(b) Situation aus Proposition 4.5 -Abbildung 4.10: Situation aus Proposition 4.5 -Sei α ein Innenwinkel von 4. -Beh.: Es gibt ein Dreieck 40 mit IWS(40 -) = IWS(4) und einem Innenwinkel α -0 ≤ -α -2 -. -Dann gibt es für jedes n ein 4n mit IWS(4n) = IWS(4) und Innenwinkel α -0 ≤ -α -2n . Für -α -2n < ε ist dann die Summe der beiden Innenwinkel um 4n größer als π ⇒ Widerspruch -zu Folgerung 4.4. -Beweis: Es seien A, B, C ∈ X und 4 das Dreieck mit den Eckpunkten A, B, C und α sei -der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C. -Sei M der Mittelpunkt der Strecke BC. Sei außerdem α1 = ∠CAM und α2 = ∠BAM. -Sei weiter A0 ∈ MA− mit d(A0 -, M) = d(A, M). -Die Situation ist in Abbildung 4.10b skizziert. -⇒ 4(MA0C) und 4(MAB) sind kongruent. ⇒ ∠ABM = ∠A0CM und ∠MA0C = -∠MAB. ⇒ α+β +γ = IWS(4ABC) = IWS(4AA0C) und α1+α2 = α, also o. B. d. A. -α1 ≤ -α -2 -Bemerkung 67 -In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π. -α -0 -α -00 -α β -β -0 -γ -A B -C -g -Abbildung 4.11: Situation aus Bemerkung 67 -Beweis: Sei g eine Parallele von AB durch C. -• Es gilt α -0 = α wegen Proposition 4.3. -• Es gilt β -0 = β wegen Proposition 4.3. -• Es gilt α -00 = α -0 wegen Aufgabe 8. -75 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE -⇒ IWS(4ABC) = γ + α -00 + β -0 = π -Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich -π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW. -4.2 Weitere Eigenschaften einer euklidischen Ebene -Satz 4.6 (Strahlensatz) -In ähnlichen Dreiecken sind Verhältnisse entsprechender Seiten gleich. -x -y -−1 0 1 2 3 4 -0 -1 -2 -3 -z -x -λ -2 -z -λ -2x -Abbildung 4.12: Strahlensatz -Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar. -A B0 -C -0 -B -C -c -b a -c -0 -b -0 -a -0 -Abbildung 4.13: Die Dreiecke 4ABC und 4AB0C -0 -sind ähnlich. -4.2.1 Flächeninhalt -Definition 62 -„Simplizialkomplexe“ in euklidischer Ebene (X, d) heißen flächengleich, wenn sie sich in + 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE +Sei im Folgenden „IWS“ die „Innenwinkelsumme“. +Beweis: Sei 4 ein Dreieck mit IWS(4) = π + ε +α +β +γ +P +(a) Summe der Winkel α, β und γ +α1 +α2 β +γ +M +A B +C A0 +α +(b) Situation aus Proposition 4.5 +Abbildung 4.10: Situation aus Proposition 4.5 +Sei α ein Innenwinkel von 4. +Beh.: Es gibt ein Dreieck 40 mit IWS(40) = IWS(4) und einem Innenwinkel α +0 ≤ +α +2 +. +Dann gibt es für jedes n ein 4n mit IWS(4n) = IWS(4) und Innenwinkel α +0 ≤ +α +2n . Für +α +2n < ε ist dann die Summe der beiden Innenwinkel um 4n größer als π ⇒ Widerspruch +zu Folgerung 4.4. +Beweis: Es seien A, B, C ∈ X und 4 das Dreieck mit den Eckpunkten A, B, C und α sei +der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C. +Sei M der Mittelpunkt der Strecke BC. Sei außerdem α1 = ∠CAM und α2 = ∠BAM. +Sei weiter A0 ∈ MA− mit d(A0, M) = d(A, M). +Die Situation ist in Abbildung 4.10b skizziert. +⇒ 4(MA0C) und 4(MAB) sind kongruent. ⇒ ∠ABM = ∠A0CM und ∠MA0C = +∠MAB. ⇒ α+β +γ = IWS(4ABC) = IWS(4AA0C) und α1+α2 = α, also o. B. d. A. +α1 ≤ +α +2 +Bemerkung 67 +In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π. +α +0 +α +00 +α β +β +0 +γ +A B +C +g +Abbildung 4.11: Situation aus Bemerkung 67 +Beweis: Sei g eine Parallele von AB durch C. +• Es gilt α +0 = α wegen Proposition 4.3. +• Es gilt β +0 = β wegen Proposition 4.3. +• Es gilt α +00 = α0 wegen Aufgabe 8. + 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE +⇒ IWS(4ABC) = γ + α +00 + β0 = π +Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich +π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW. +4.2 Weitere Eigenschaften einer euklidischen Ebene +Satz 4.6 (Strahlensatz) +In ähnlichen Dreiecken sind Verhältnisse entsprechender Seiten gleich. +x +y +−1 0 1 2 3 4 +0 +1 +2 +3 +z +x +λ +2 +z +λ +2x +Abbildung 4.12: Strahlensatz +Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar. +A B0 +C +0 +B +C +c +b a +c +0 +b +0 +a +0 +Abbildung 4.13: Die Dreiecke 4ABC und 4AB0C +0 +sind ähnlich. +4.2.1 Flächeninhalt +Definition 62 +„Simplizialkomplexe“ in euklidischer Ebene (X, d) heißen flächengleich, wenn sie sich in kongruente Dreiecke zerlegen lassen. -76 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE -(a) Zwei kongruente Dreiecke (b) Zwei weitere kongruente Drei￾ecke -Abbildung 4.14: Flächengleichheit -Der Flächeninhalt eines Dreiecks ist 1/2 · Grundseite · Höhe. -A B -C -LC -hc -c -(a) 1/2 · |AB| · |hc| -· -A B -C -LA -ha -c -(b) 1/2 · |BC| · |ha| -Abbildung 4.15: Flächenberechnung im Dreieck -Zu zeigen: Unabhängigkeit von der gewählten Grundseite. -α -α -γ -γ -A B -C -LA -LC -Abbildung 4.16: 4ABLa und 4CLCB sind ähnlich, weil IWS = π -Strahlensatz =======⇒ a -hc -= -c -ha → a · ha = c · hc -Satz 4.7 (Satz des Pythagoras) -Im rechtwinkligen Dreieck gilt a -2 + b -2 = c -2 -, wobei c die Hypotenuse und a, b die beiden -Katheten sind. -Beweis: (a + b) · (a + b) = a -2 + 2ab + b -2 = c -2 + 4 · ( -1 -2 + 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE +(a) Zwei kongruente Dreiecke (b) Zwei weitere kongruente Dreiecke +Abbildung 4.14: Flächengleichheit +Der Flächeninhalt eines Dreiecks ist 1/2 · Grundseite · Höhe. +A B +C +LC +hc +c +(a) 1/2 · |AB| · |hc| +· +A B +C +LA +ha +c +(b) 1/2 · |BC| · |ha| +Abbildung 4.15: Flächenberechnung im Dreieck +Zu zeigen: Unabhängigkeit von der gewählten Grundseite. +α +α +γ +γ +A B +C +LA +LC +Abbildung 4.16: 4ABLa und 4CLCB sind ähnlich, weil IWS = π +Strahlensatz =======⇒ a +hc += +c +ha → a · ha = c · hc +Satz 4.7 (Satz des Pythagoras) +Im rechtwinkligen Dreieck gilt a +2 + b2 = c2 +, wobei c die Hypotenuse und a, b die beiden +Katheten sind. +Beweis: (a + b) · (a + b) = a +2 + 2ab + b2 = c2 + 4 · ( +1 +2 · a · b) -77 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE -c -b a -A B -C -· -(a) a, b sind Katheten und c ist die Hypo￾tenuse -b a -b -a -a b -b -a -· -· · -· -γ -(b) Beweisskizze -Abbildung 4.17: Satz des Pythagoras -Satz 4.8 -Bis auf Isometrie gibt es genau eine euklidische Ebene (X, d, G), nämlich X = R -2 -, -d = euklidischer Abstand, G = Menge der üblichen Geraden. -Beweis: -(i) (R -2 -, dEuklid) ist offensichtlich eine euklidische Ebene. -(ii) Sei (X, d) eine euklidische Ebene und g1, g2 Geraden in X, die sich in einem Punkt 0 -im rechten Winkel schneiden. -Sei P ∈ X \ (g1 ∪ g2) ein Punkt und PX der Fußpunkt des Lots von P auf g1 (vgl. -Aufgabe 9 (c)) und PY der Fußpunkt des Lots von P auf g2. -Sei xP := d(PX, 0) und yP := d(PY , 0). -In Abbildung 4.19 wurde die Situation skizziert. -Sei h : X → R -2 -eine Abbildung mit h(P) := (xP , yP ) Dadurch wird h auf dem -Quadranten definiert, in dem P liegt, d. h. -∀Q ∈ X mit P Q ∩ g1 = ∅ = P Q ∩ g2 -Fortsetzung auf ganz X durch konsistente Vorzeichenwahl. -Im Folgenden werden zwei Aussagen gezeigt: -(i) h ist surjektiv -(ii) h ist eine Isometrie -Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dass h bijektiv ist. + 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE +c +b a +A B +C +· +(a) a, b sind Katheten und c ist die Hypotenuse +b a +b +a +a b +b +a +· +· · +· +γ +(b) Beweisskizze +Abbildung 4.17: Satz des Pythagoras +Satz 4.8 +Bis auf Isometrie gibt es genau eine euklidische Ebene (X, d, G), nämlich X = R +2 +, +d = euklidischer Abstand, G = Menge der üblichen Geraden. +Beweis: +(i) (R +2 +, dEuklid) ist offensichtlich eine euklidische Ebene. +(ii) Sei (X, d) eine euklidische Ebene und g1, g2 Geraden in X, die sich in einem Punkt 0 +im rechten Winkel schneiden. +Sei P ∈ X \ (g1 ∪ g2) ein Punkt und PX der Fußpunkt des Lots von P auf g1 (vgl. +Aufgabe 9 (c)) und PY der Fußpunkt des Lots von P auf g2. +Sei xP := d(PX, 0) und yP := d(PY , 0). +In Abbildung 4.19 wurde die Situation skizziert. +Sei h : X → R +2 +eine Abbildung mit h(P) := (xP , yP ) Dadurch wird h auf dem +Quadranten definiert, in dem P liegt, d. h. +∀Q ∈ X mit P Q ∩ g1 = ∅ = P Q ∩ g2 +Fortsetzung auf ganz X durch konsistente Vorzeichenwahl. +Im Folgenden werden zwei Aussagen gezeigt: +(i) h ist surjektiv +(ii) h ist eine Isometrie +Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dass h bijektiv ist. Nun zu den Beweisen der Teilaussagen: -78 4.3. HYPERBOLISCHE GEOMETRIE -· -g1 -g2 -P -X -(a) Schritt 1 -· -g1 -g2 -xP -yP -P -0 PX -PY -X -(b) Schritt 2 -Abbildung 4.18: Beweis zu Satz 4.8 -(i) Sei (x, y) ∈ R -2 -, z. B. x ≥ 0, y ≥ 0. Sei P -0 ∈ g1 mit d(0, P0 -) = x und P -0 auf der -gleichen Seite von g2 wie P. -g1 -g2 -xP -yP -P -Q -0 -R -X -Abbildung 4.19: Beweis zu Satz 4.8 -(ii) Zu Zeigen: d(P, Q) = d(h(P), h(Q)) -d(P, Q) -2 Pythagoras -= d(P, R) -2 + d(R, Q) -2 = (yQ − yP ) -2 + (xQ − xP ) -2 -. -h(Q) = (xQ, yQ) -4.3 Hyperbolische Geometrie -Definition 63 -Sei -H := { z ∈ C | =(z) > 0 } = - -(x, y) ∈ R -2 - - y > 0 - -79 4.3. HYPERBOLISCHE GEOMETRIE -die obere Halbebene bzw. Poincaré-Halbebene und G = G1 ∪ G2 mit -G1 = { g1 ⊆ H | ∃m ∈ R, r ∈ R>0 : g1 = { z ∈ H : | z − m| = r } } -G2 = { g2 ⊆ H | ∃x ∈ R : g2 = { z ∈ H : <(z) = x } } -Die Elemente aus G heißen hyperbolische Geraden. -Bemerkung 68 (Eigenschaften der hyperbolischen Geraden) -Die hyperbolischen Geraden erfüllen. . . -a) . . . die Inzidenzaxiome §1 -b) . . . das Anordnungsaxiom §3 (ii) -c) . . . nicht das Parallelenaxiom §5 -Beweis: -a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt: -Gegeben z1, z2 ∈ H -Existenz: -Fall 1 <(z1) = <(z2) -⇒ z1 und z2 liegen auf -g = { z ∈ C | <(z) = <(z1) ∧ H } -Siehe Abbildung 4.20a. -Fall 2 <(z1) 6= <(z2) -Betrachte nun z1 und z2 als Punkte in der euklidischen Ebene. Die Mittelsenkrech￾te zu diesen Punkten schneidet die x-Achse. Alle Punkte auf der Mittelsenkrechten -zu z1 und z2 sind gleich weit von z1 und z2 entfernt. Daher ist der Schnittpunkt mit -der x-Achse der Mittelpunkt eines Kreises durch z1 und z2 (vgl. Abbildung 4.20b) -x -y -−1 0 1 2 3 4 5 -0 -1 -2 -3 -4 -Z1 -Z2 -<(Z1) -(a) Fall 1 -x -y -−1 0 1 2 3 4 5 -0 -1 -2 -3 -4 -Z1 -Z2 -(b) Fall 2 -Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer -Geraden + 4.3. HYPERBOLISCHE GEOMETRIE +· +g1 +g2 +P +X +(a) Schritt 1 +· +g1 +g2 +xP +yP +P +0 PX +PY +X +(b) Schritt 2 +Abbildung 4.18: Beweis zu Satz 4.8 +(i) Sei (x, y) ∈ R +2 +, z. B. x ≥ 0, y ≥ 0. Sei P +0 ∈ g1 mit d(0, P0 +) = x und P +0 auf der +gleichen Seite von g2 wie P. +g1 +g2 +xP +yP +P +Q +0 +R +X +Abbildung 4.19: Beweis zu Satz 4.8 +(ii) Zu Zeigen: d(P, Q) = d(h(P), h(Q)) +d(P, Q) +2 Pythagoras += d(P, R) +2 + d(R, Q)2 = (yQ − yP )2 + (xQ − xP )2 +. +h(Q) = (xQ, yQ) +4.3 Hyperbolische Geometrie +Definition 63 +Sei +H := { z ∈ C | =(z) > 0 } = + +(x, y) ∈ R +2 + + y > 0 + + 4.3. HYPERBOLISCHE GEOMETRIE +die obere Halbebene bzw. Poincaré-Halbebene und G = G1 ∪ G2 mit +G1 = { g1 ⊆ H | ∃m ∈ R, r ∈ R>0 : g1 = { z ∈ H : | z − m| = r } } +G2 = { g2 ⊆ H | ∃x ∈ R : g2 = { z ∈ H : <(z) = x } } +Die Elemente aus G heißen hyperbolische Geraden. +Bemerkung 68 (Eigenschaften der hyperbolischen Geraden) +Die hyperbolischen Geraden erfüllen. . . +a) . . . die Inzidenzaxiome §1 +b) . . . das Anordnungsaxiom §3 (ii) +c) . . . nicht das Parallelenaxiom §5 +Beweis: +a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt: +Gegeben z1, z2 ∈ H +Existenz: +Fall 1 <(z1) = <(z2) +⇒ z1 und z2 liegen auf +g = { z ∈ C | <(z) = <(z1) ∧ H } +Siehe Abbildung 4.20a. +Fall 2 <(z1) 6= <(z2) +Betrachte nun z1 und z2 als Punkte in der euklidischen Ebene. Die Mittelsenkrechte zu diesen Punkten schneidet die x-Achse. Alle Punkte auf der Mittelsenkrechten +zu z1 und z2 sind gleich weit von z1 und z2 entfernt. Daher ist der Schnittpunkt mit +der x-Achse der Mittelpunkt eines Kreises durch z1 und z2 (vgl. Abbildung 4.20b) +x +y +−1 0 1 2 3 4 5 +0 +1 +2 +3 +4 +Z1 +Z2 +<(Z1) +(a) Fall 1 +x +y +−1 0 1 2 3 4 5 +0 +1 +2 +3 +4 +Z1 +Z2 +(b) Fall 2 +Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer +Geraden b) Sei g ∈ G1 ∪˙ G2 eine hyperbolische Gerade. -80 4.3. HYPERBOLISCHE GEOMETRIE -Es existieren disjunkte Zerlegungen von H \ g: -Fall 1: g = { z ∈ H k z − m| = r } ∈ G1 -Dann gilt: -H = { z ∈ H k z − m| < r } -| {z } -=:H1 (Kreisinneres) -∪ { ˙ z ∈ H k z − m| > r } -| {z } -=:H2 (Kreisäußeres) -Da r > 0 ist H1 nicht leer, da r ∈ R ist H2 nicht leer. -Fall 2: g = { z ∈ H | x } -| {z } -=:H2 (Rechts) -Zu zeigen: ∀A ∈ Hi -, B ∈ Hj mit i, j ∈ { 1, 2 } gilt: AB ∩ g 6= ∅ ⇔ i 6= j -„⇐“: A ∈ H1, B ∈ H2 : AB ∩ g 6= ∅ -Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte in H1 haben einen Abstand -von m der kleiner ist als r und alle Punkte in H2 haben einen Abstand von m der -größer ist als r. Da man jede Strecke von A nach B insbesondere auch als stetige -Abbildung f : R → R>0 auffassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g 6= ∅ -„⇒“: A ∈ Hi -, B ∈ Hj mit i, j ∈ { 1, 2 } : AB ∩ g 6= ∅ ⇒ i 6= j -Sei h die Gerade, die durch A und B geht. -Da A, B /∈ g, aber A, B ∈ h gilt, haben g und h insbesondere mindestens einen -unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich g und h in höchstens einen Punkt -schneiden. Sei C dieser Punkt. -Aus A, B /∈ g folgt: C 6= A und C 6= B. Also liegt C zwischen A und B. Daraus folgt, -dass A und B bzgl. g in verschiedenen Halbebenen liegen. -c) Siehe Abbildung 4.21. -x -y -−5 −4 −3 −2 −1 0 1 2 3 4 5 6 -0 -1 -2 -3 -4 -5 + 4.3. HYPERBOLISCHE GEOMETRIE +Es existieren disjunkte Zerlegungen von H \ g: +Fall 1: g = { z ∈ H k z − m| = r } ∈ G1 +Dann gilt: +H = { z ∈ H k z − m| < r } +| {z } +=:H1 (Kreisinneres) +∪ { ˙ z ∈ H k z − m| > r } +| {z } +=:H2 (Kreisäußeres) +Da r > 0 ist H1 nicht leer, da r ∈ R ist H2 nicht leer. +Fall 2: g = { z ∈ H | x } +| {z } +=:H2 (Rechts) +Zu zeigen: ∀A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } gilt: AB ∩ g 6= ∅ ⇔ i 6= j +„⇐“: A ∈ H1, B ∈ H2 : AB ∩ g 6= ∅ +Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte in H1 haben einen Abstand +von m der kleiner ist als r und alle Punkte in H2 haben einen Abstand von m der +größer ist als r. Da man jede Strecke von A nach B insbesondere auch als stetige +Abbildung f : R → R>0 auffassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g 6= ∅ +„⇒“: A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } : AB ∩ g 6= ∅ ⇒ i 6= j +Sei h die Gerade, die durch A und B geht. +Da A, B /∈ g, aber A, B ∈ h gilt, haben g und h insbesondere mindestens einen +unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich g und h in höchstens einen Punkt +schneiden. Sei C dieser Punkt. +Aus A, B /∈ g folgt: C 6= A und C 6= B. Also liegt C zwischen A und B. Daraus folgt, +dass A und B bzgl. g in verschiedenen Halbebenen liegen. +c) Siehe Abbildung 4.21. +x +y +−5 −4 −3 −2 −1 0 1 2 3 4 5 6 +0 +1 +2 +3 +4 +5 Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht. -81 4.3. HYPERBOLISCHE GEOMETRIE -Definition 64 -Es seien a, b, c, d ∈ R mit ad − bc 6= 0 und σ : C → C eine Abbildung definiert durch -σ(z) := az + b -cz + d -σ heißt Möbiustransformation. -Proposition 4.9 -a) Die Gruppe SL2(R) operiert auf H durch die Möbiustransformation -σ(z) :=  -a b -c d -◦ z := -az + b -cz + d -b) Die Gruppe PSL2(R) = SL2(R)/(±I) operiert durch σ auf H. -c) PSL2(R) operiert auf R ∪ { ∞ }. Diese Gruppenoperation ist 3-fach transitiv, d. h. -zu x0 < x1 < x∞ ∈ R gibt es genau ein σ ∈ PSL2(R) mit σ(x0) = 0, σ(x1) = 1, -σ(x∞) = ∞. -d) SL2(R) wird von den Matrizen - -λ 0 -0 λ -−1 - -| {z } -=:Aλ -, - -1 t -0 1 -| {z } -=:Bt -und  -0 1 -−1 0 -| {z } -=:C -mit t, λ ∈ R -× -erzeugt. -e) PSL2(R) operiert auf G. -Beweis: -a) Sei z = x + iy ∈ H, d. h. y > 0 und σ = - -a b -c d -∈ SL2(R) -⇒ σ(z) = a(x + iy) + b -c(x + iy) + d -= -(ax + b) + iay -(cx + d) + icy -· -(cx + d) − icy -(cx + d) − icy -= -(ax + b)(cx + d) + aycy -(cx + d) -2 + (cy) -2 -+ i -ay(cx + d) − (ax + b)cy -(cx + d) -2 + (cy) -2 -= -axcx + axd + bcx + bd + aycy -(cx + d) -2 + (cy) -2 -+ i -(ad − bc)y -(cx + d) -2 + (cy) -2 -SL2(R) -= -ac(x -2 + y -2 -) + adx + bcx + bd -(cx + d) -2 + (cy) -2 -+ i -y -(cx + d) -2 + (cy) -2 -⇒ =(σ(z)) = y -(cx+d) -2+(cy) -2 > 0 -Die Abbildung bildet also nach H ab. Außerdem gilt: - -1 0 -0 1 -◦ z = -x + iy -1 + 4.3. HYPERBOLISCHE GEOMETRIE +Definition 64 +Es seien a, b, c, d ∈ R mit ad − bc 6= 0 und σ : C → C eine Abbildung definiert durch +σ(z) := az + b +cz + d +σ heißt Möbiustransformation. +Proposition 4.9 +a) Die Gruppe SL2(R) operiert auf H durch die Möbiustransformation +σ(z) :=  +a b +c d +◦ z := +az + b +cz + d +b) Die Gruppe PSL2(R) = SL2(R)/(±I) operiert durch σ auf H. +c) PSL2(R) operiert auf R ∪ { ∞ }. Diese Gruppenoperation ist 3-fach transitiv, d. h. +zu x0 < x1 < x∞ ∈ R gibt es genau ein σ ∈ PSL2(R) mit σ(x0) = 0, σ(x1) = 1, +σ(x∞) = ∞. +d) SL2(R) wird von den Matrizen + +λ 0 +0 λ +−1 + +| {z } +=:Aλ +, + +1 t +0 1 +| {z } +=:Bt +und  +0 1 +−1 0 +| {z } +=:C +mit t, λ ∈ R +× +erzeugt. +e) PSL2(R) operiert auf G. +Beweis: +a) Sei z = x + iy ∈ H, d. h. y > 0 und σ = + +a b +c d +∈ SL2(R) +⇒ σ(z) = a(x + iy) + b +c(x + iy) + d += +(ax + b) + iay +(cx + d) + icy +· +(cx + d) − icy +(cx + d) − icy += +(ax + b)(cx + d) + aycy +(cx + d) +2 + (cy)2 ++ i +ay(cx + d) − (ax + b)cy +(cx + d) +2 + (cy)2 += +axcx + axd + bcx + bd + aycy +(cx + d) +2 + (cy)2 ++ i +(ad − bc)y +(cx + d) +2 + (cy)2 +SL2(R) += +ac(x +2 + y2 +) + adx + bcx + bd +(cx + d) +2 + (cy)2 ++ i +y +(cx + d) +2 + (cy)2 +⇒ =(σ(z)) = y +(cx+d) +2+(cy)2 > 0 +Die Abbildung bildet also nach H ab. Außerdem gilt: + +1 0 +0 1 +◦ z = +x + iy +1 = x + iy = z -82 4.3. HYPERBOLISCHE GEOMETRIE -und - -a b -c d -◦ -a -0 -b -0 -c -0 d -0 - -◦ z - -= - -a b -c d -◦ -a -0 -z + b -0 -c -0z + d -0 -= -a -a -0z+b -0 -c -0z+d -0 + b -c -a -0z+b -0 -c -0z+d -0 + d -= -a(a -0z+b -0 -)+b(c -0z+d -0 -) -c -0z+d -0 -c(a -0z+b -0)+d(c -0z+d -0) -c -0z+d -0 -= -a(a -0 -z + b -0 -) + b(c -0 -z + d -0 -) -c(a -0z + b -0) + d(c -0z + d -0) -= -(aa0 + bc0 -)z + ab0 + bd0 -(ca0 + db0)z + cb0 + dd0 -= - -aa0 + bc0 ab0 + bd0 -ca0 + db0 -cb0 + dd0 - -◦ z -= -a b -c d -· - -a -0 -b -0 -c -0 d -0 - ◦ z -b) Es gilt σ(z) = (−σ)(z) für alle σ ∈ SL2(R) und z ∈ H. -c) Ansatz: σ = - -a b -c d -σ(x0) = ax0+b -cx0+d -!= 0 ⇒ ax0 + b = 0 ⇒ b = −ax0 -σ(x∞) = ∞ ⇒ cx∞ + d = 0 ⇒ d = −cx∞ -σ(x1) = 1 ⇒ ax1 + b = cx1 + d -a(x1 − x0) = c(x1 − x∞) ⇒ c = a -x1−x0 -x1−x∞ -⇒ −a -2 -· x∞ -x1−x0 -x1−x∞ -+ a -2x0 -x1−x0 -x1−x∞ -= 1 -⇒ a -2 x1−x0 -x0−x∞ -(x0 − x∞) = 1 ⇒ a -2 = -x1−x∞ -(x1−x∞)(x1−x0) -d) Es gilt: -A -−1 -λ = A1 -λ -B -−1 -t = B−t -C -−1 = C -3 -Daher genügt es zu zeigen, dass man mit Aλ, Bt und C alle Matrizen aus SL2(R) -erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit -Matrizen der Form Aλ, Bt und C die Einheitsmatrix zu generieren. -Sei also -M = - -a b -c d -∈ SL2(R) -beliebig. -Fall 1: a = 0 -Da M ∈ SL2(R) ist, gilt det M = 1 = ad − bc = −bc. Daher ist insbesondere c 6= 0. Es -folgt: - -0 1 -−1 0 -· - -a b -c d -= - -c d -−a −b + 4.3. HYPERBOLISCHE GEOMETRIE +und + +a b +c d +◦ +a +0 +b +0 +c +0 d0 + +◦ z + += + +a b +c d +◦ +a +0 +z + b +0 +c +0z + d0 += +a +a +0z+b0 +c +0z+d0 + b +c +a +0z+b0 +c +0z+d0 + d += +a(a +0z+b0 +)+b(c +0z+d0 +) +c +0z+d0 +c(a +0z+b0)+d(c0z+d0) +c +0z+d0 += +a(a +0 +z + b +0 +) + b(c +0 +z + d +0 +) +c(a +0z + b0) + d(c0z + d0) += +(aa0 + bc0)z + ab0 + bd0 +(ca0 + db0)z + cb0 + dd0 += + +aa0 + bc0 ab0 + bd0 +ca0 + db0cb0 + dd0 + +◦ z += +a b +c d +· + +a +0 +b +0 +c +0 d0 + ◦ z +b) Es gilt σ(z) = (−σ)(z) für alle σ ∈ SL2(R) und z ∈ H. +c) Ansatz: σ = + +a b +c d +σ(x0) = ax0+b +cx0+d +!= 0 ⇒ ax0 + b = 0 ⇒ b = −ax0 +σ(x∞) = ∞ ⇒ cx∞ + d = 0 ⇒ d = −cx∞ +σ(x1) = 1 ⇒ ax1 + b = cx1 + d +a(x1 − x0) = c(x1 − x∞) ⇒ c = a +x1−x0 +x1−x∞ +⇒ −a +2 +· x∞ +x1−x0 +x1−x∞ ++ a +2x0 +x1−x0 +x1−x∞ += 1 +⇒ a +2 x1−x0 +x0−x∞ +(x0 − x∞) = 1 ⇒ a +2 = +x1−x∞ +(x1−x∞)(x1−x0) +d) Es gilt: +A +−1 +λ = A1 +λ +B +−1 +t = B−t +C +−1 = C3 +Daher genügt es zu zeigen, dass man mit Aλ, Bt und C alle Matrizen aus SL2(R) +erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit +Matrizen der Form Aλ, Bt und C die Einheitsmatrix zu generieren. +Sei also +M = + +a b +c d +∈ SL2(R) +beliebig. +Fall 1: a = 0 +Da M ∈ SL2(R) ist, gilt det M = 1 = ad − bc = −bc. Daher ist insbesondere c 6= 0. Es +folgt: + +0 1 +−1 0 +· + +a b +c d += + +c d +−a −b  -83 4.3. HYPERBOLISCHE GEOMETRIE -Gehe zu Fall 2. -Fall 2: a 6= 0 -Nun wird in M durch M · A1 -a -an der Stelle von a eine 1 erzeugt: - -a b -c d -· -1 -a -0 -0 a - -= - -1 ab -c -a -ad -Gehe zu Fall 3. -Fall 3: a = 1 - -1 b -c d -· - -1 −b -0 1  -= - -1 0 -c d − bc -Da wir det M = 1 = ad − bc = d − bc wissen, gilt sogar M2,2 = 1. -Gehe zu Fall 4. -Fall 4: a = 1, b = 0, d = 1 -A−1CBcC - -1 0 -c 1 - -= - -1 0 -0 1 -Daher erzeugen Matrizen der Form Aλ, Bt und C die Gruppe SL2R.  -e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen. -• σ = - -λ 0 -0 λ -−1 - -, also σ(z) = λ -2 -z. Daraus ergeben sich die Situationen, die in -Abbildung 4.22a und Abbildung 4.22b dargestellt sind. -x -y -−1 0 1 2 3 4 5 6 7 -0 -1 -2 -3 -m λ -2m -m + ir -λ -2m + iλ2r -m + 1 -(a) Fall 1 -x -y -−1 0 1 2 3 4 -0 -1 -2 -3 -z -x -λ -2 -z -λ -2x -(b) Fall 2 (Strahlensatz) -Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix -• Offensichtlich gilt die Aussage für σ = - -1 a -0 1 -• Sei nun σ = - -0 1 -−1 0 -, also σ(z) = − -1 -z -Bemerkung 69 + 4.3. HYPERBOLISCHE GEOMETRIE +Gehe zu Fall 2. +Fall 2: a 6= 0 +Nun wird in M durch M · A1 +a +an der Stelle von a eine 1 erzeugt: + +a b +c d +· +1 +a +0 +0 a + += + +1 ab +c +a +ad +Gehe zu Fall 3. +Fall 3: a = 1 + +1 b +c d +· + +1 −b +0 1  += + +1 0 +c d − bc +Da wir det M = 1 = ad − bc = d − bc wissen, gilt sogar M2,2 = 1. +Gehe zu Fall 4. +Fall 4: a = 1, b = 0, d = 1 +A−1CBcC + +1 0 +c 1 + += + +1 0 +0 1 +Daher erzeugen Matrizen der Form Aλ, Bt und C die Gruppe SL2R.  +e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen. +• σ = + +λ 0 +0 λ +−1 + +, also σ(z) = λ +2 +z. Daraus ergeben sich die Situationen, die in +Abbildung 4.22a und Abbildung 4.22b dargestellt sind. +x +y +−1 0 1 2 3 4 5 6 7 +0 +1 +2 +3 +m λ +2m +m + ir +λ +2m + iλ2r +m + 1 +(a) Fall 1 +x +y +−1 0 1 2 3 4 +0 +1 +2 +3 +z +x +λ +2 +z +λ +2x +(b) Fall 2 (Strahlensatz) +Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix +• Offensichtlich gilt die Aussage für σ = + +1 a +0 1 +• Sei nun σ = + +0 1 +−1 0 +, also σ(z) = − +1 +z +Bemerkung 69 Zu hyperbolischen Geraden g1, g2 gibt es σ ∈ PSL2(R) mit σ(g1) = g2. -84 4.3. HYPERBOLISCHE GEOMETRIE -· -x -y -−1 0 1 -0 -1 -z = r · e -iϕ -1 -z = -1 -r -· e -iϕ -Abbildung 4.23: Inversion am Kreis -Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a1) = b1 und σ(a2) = b2. Dann existiert -σ(g1) := g2 wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt. -Definition 65 -Seien z1, z2, z3, z4 ∈ C paarweise verschieden. -Dann heißt -DV(z1, z2, z3, z4) := -z1−z4 -z1−z2 -z3−z4 -z3−z2 -= -(z1 − z4) · (z3 − z2) -(z1 − z2) · (z3 − z4) -Doppelverhältnis von z1, . . . , z4. -Bemerkung 70 (Eigenschaften des Doppelverhältnisses) -a) DV(z1, . . . , z4) ∈ C \ { 0, 1 } -b) DV(z1, z4, z3, z2) = 1 -DV(z1,z2,z3,z4) -c) DV(z3, z2, z1, z4) = 1 -DV(z1,z2,z3,z4) -d) DV ist auch wohldefiniert, wenn eines der zi = ∞ oder wenn zwei der zi gleich sind. -e) DV(0, 1, ∞, z4) = z4 (Der Fall z4 ∈ { 0, 1, ∞ } ist zugelassen). -f) Für σ ∈ PSL2(C) und z1, . . . , z4 ∈ C ∪ { ∞ } ist -DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4) -und für σ(z) = 1 -z -gilt -DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4) -g) DV(z1, z2, z3, z4) ∈ R ∪ { ∞ } ⇔ z1, . . . , z4 liegen auf einer hyperbolischen Geraden. -Beweis: -a) DV(z1, . . . , z4) 6= 0, da zi paarweise verschieden -DV(z1, . . . , z4) 6= 1, da: -Annahme: DV(z1, . . . , z4) = 1 + 4.3. HYPERBOLISCHE GEOMETRIE +· +x +y +−1 0 1 +0 +1 +z = r · e +iϕ +1 +z = +1 +r +· e +iϕ +Abbildung 4.23: Inversion am Kreis +Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a1) = b1 und σ(a2) = b2. Dann existiert +σ(g1) := g2 wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt. +Definition 65 +Seien z1, z2, z3, z4 ∈ C paarweise verschieden. +Dann heißt +DV(z1, z2, z3, z4) := +z1−z4 +z1−z2 +z3−z4 +z3−z2 += +(z1 − z4) · (z3 − z2) +(z1 − z2) · (z3 − z4) +Doppelverhältnis von z1, . . . , z4. +Bemerkung 70 (Eigenschaften des Doppelverhältnisses) +a) DV(z1, . . . , z4) ∈ C \ { 0, 1 } +b) DV(z1, z4, z3, z2) = 1 +DV(z1,z2,z3,z4) +c) DV(z3, z2, z1, z4) = 1 +DV(z1,z2,z3,z4) +d) DV ist auch wohldefiniert, wenn eines der zi = ∞ oder wenn zwei der zi gleich sind. +e) DV(0, 1, ∞, z4) = z4 (Der Fall z4 ∈ { 0, 1, ∞ } ist zugelassen). +f) Für σ ∈ PSL2(C) und z1, . . . , z4 ∈ C ∪ { ∞ } ist +DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4) +und für σ(z) = 1 +z +gilt +DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4) +g) DV(z1, z2, z3, z4) ∈ R ∪ { ∞ } ⇔ z1, . . . , z4 liegen auf einer hyperbolischen Geraden. +Beweis: +a) DV(z1, . . . , z4) 6= 0, da zi paarweise verschieden +DV(z1, . . . , z4) 6= 1, da: +Annahme: DV(z1, . . . , z4) = 1 ⇔ (z1 − z2)(z3 − z4) = (z1 − z4)(z3 − z2) -85 4.3. HYPERBOLISCHE GEOMETRIE -⇔ z1z3 − z2z3 − z1z4 + z2z4 = z1z3 − z3z4 − z1z2 + z2z4 -⇔ z2z3 + z1z4 = z3z4 + z1z2 -⇔ z2z3 − z3z4 = z1z2 − z1z4 -⇔ z3(z2 − z4) = z1(z2 − z4) -⇔ z3 = z1 oder z2 = z4 -Alle zi sind paarweise verschieden ⇒ Widerspruch  -b) DV(z1, z4, z3, z2) = (z1−z2)·(z3−z4) -(z1−z4)·(z3−z2) = -1 -DV(z1,z2,z3,z4) -c) DV(z3, z2, z1, z4) = (z3−z4)·(z1−z2) -(z3−z2)·(z1−z4) = -1 -DV(z1,z2,z3,z4) -d) Zwei der zi dürfen gleich sein, da: -Fall 1 z1 = z4 oder z3 = z2 -In diesem Fall ist DV(z1, . . . , z4) = 0 -Fall 2 z1 = z2 oder z3 = z4 -Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1, . . . , z4) = ∞ gilt. -Fall 3 z1 = z3 oder z2 = z4 -Durch Einsetzen ergibt sich DV(z1, . . . , z4) = 1. -Im Fall, dass ein zi = ∞ ist, ist entweder DV(0, 1, ∞, z4) = 0 oder DV(0, 1, ∞, z4)±∞ -e) DV(0, 1, ∞, z4) = (0−z4)·(∞−1) -(0−1)·(∞−z4) = -z4·(∞−1) -∞−z4 -= z4 -f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. -g) Sei σ ∈ PSL2(C) mit σ(z1) = 0, σ(z2) = 1, σ(z3) = ∞. Ein solches σ existiert, da man -drei Parameter von σ wählen darf. -Bem. 70.f -⇒ DV(z1, . . . , z4) = DV(0, 1, ∞, σ(z4)) -⇒ DV(z1, . . . , z4) ∈ R ∪ { ∞ } -⇔ σ(z4) ∈ R ∪ { ∞ } -Behauptung folgt, weil σ -−1 -(R ∪ ∞) ein Kreis oder eine Gerade in C ist. -Definition 66 -Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 die -„Schnittpunkte“ von gz1,z2 mit R ∪ { ∞ }. -Dann sei dH(z1, z2) := 1 -2 -| ln DV(a1, z1, a2, z2)| und heiße hyperbolische Metrik. -Beh.: Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 -die „Schnittpunkte“ von gz1,z2 mit R ∪ { ∞ }. -Dann gilt: -1 -2 -| ln DV(a1, z1, a2, z2)| = -1 -2 -| ln DV(a2, z1, a1, z2)| -Beweis: Wegen Bemerkung 70.c gilt: -DV(a1, z1, a2, z2) = 1 -DV(a2, z1, a1, z2) -Außerdem gilt: -ln 1 -x -= ln x + 4.3. HYPERBOLISCHE GEOMETRIE +⇔ z1z3 − z2z3 − z1z4 + z2z4 = z1z3 − z3z4 − z1z2 + z2z4 +⇔ z2z3 + z1z4 = z3z4 + z1z2 +⇔ z2z3 − z3z4 = z1z2 − z1z4 +⇔ z3(z2 − z4) = z1(z2 − z4) +⇔ z3 = z1 oder z2 = z4 +Alle zi sind paarweise verschieden ⇒ Widerspruch  +b) DV(z1, z4, z3, z2) = (z1−z2)·(z3−z4) +(z1−z4)·(z3−z2) = +1 +DV(z1,z2,z3,z4) +c) DV(z3, z2, z1, z4) = (z3−z4)·(z1−z2) +(z3−z2)·(z1−z4) = +1 +DV(z1,z2,z3,z4) +d) Zwei der zi dürfen gleich sein, da: +Fall 1 z1 = z4 oder z3 = z2 +In diesem Fall ist DV(z1, . . . , z4) = 0 +Fall 2 z1 = z2 oder z3 = z4 +Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1, . . . , z4) = ∞ gilt. +Fall 3 z1 = z3 oder z2 = z4 +Durch Einsetzen ergibt sich DV(z1, . . . , z4) = 1. +Im Fall, dass ein zi = ∞ ist, ist entweder DV(0, 1, ∞, z4) = 0 oder DV(0, 1, ∞, z4)±∞ +e) DV(0, 1, ∞, z4) = (0−z4)·(∞−1) +(0−1)·(∞−z4) = +z4·(∞−1) +∞−z4 += z4 +f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. +g) Sei σ ∈ PSL2(C) mit σ(z1) = 0, σ(z2) = 1, σ(z3) = ∞. Ein solches σ existiert, da man +drei Parameter von σ wählen darf. +Bem. 70.f +⇒ DV(z1, . . . , z4) = DV(0, 1, ∞, σ(z4)) +⇒ DV(z1, . . . , z4) ∈ R ∪ { ∞ } +⇔ σ(z4) ∈ R ∪ { ∞ } +Behauptung folgt, weil σ +−1 +(R ∪ ∞) ein Kreis oder eine Gerade in C ist. +Definition 66 +Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 die +„Schnittpunkte“ von gz1,z2 mit R ∪ { ∞ }. +Dann sei dH(z1, z2) := 1 +2 +| ln DV(a1, z1, a2, z2)| und heiße hyperbolische Metrik. +Beh.: Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 +die „Schnittpunkte“ von gz1,z2 mit R ∪ { ∞ }. +Dann gilt: +1 +2 +| ln DV(a1, z1, a2, z2)| = +1 +2 +| ln DV(a2, z1, a1, z2)| +Beweis: Wegen Bemerkung 70.c gilt: +DV(a1, z1, a2, z2) = 1 +DV(a2, z1, a1, z2) +Außerdem gilt: +ln 1 +x += ln x −1 = (−1) · ln x = − ln x -86 4.3. HYPERBOLISCHE GEOMETRIE -Da der ln im Betrag steht, folgt direkt: -1 -2 -| ln DV(a1, z1, a2, z2)| = -1 -2 -| ln DV(a2, z1, a1, z2)| -Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x-Achse im Doppelver￾hältnis genutzt werden.  -Beh.: Die hyperbolische Metrik ist eine Metrik auf H. -Beweis: Wegen Bemerkung 70.f ist -d(z1, z2) := d(σ(z1), σ(z2)) mit σ(a1) = 0, σ(a2) = ∞ -d. h. σ(gz1,z2 -) = iR (imaginäre Achse). -also gilt o. B. d. A. z1 = ia und z2 = ib mit a, b ∈ R und a < b. -2d(ia, ib) =| ln DV(0, ia, ∞, ib) | -=| ln (0 − ib)(∞ − ia) -(0 − ia)(∞ − ib) -| -=| ln b -a -| -= ln b − ln a -Also: d(z1, z2) ≥ 0, d(z1, z2) = 0 ⇔ z1 = z2 -2d(z2, z1) =| ln DV(a2, z2, a1, z1) | -=| ln DV(∞, ib, 0, ia) | -Bem. 70.b = | ln DV(0, ib, ∞, ia) | -= 2d(z1, z2) -Liegen drei Punkte z1, z2, z3 ∈ C auf einer hyperbolischen Geraden, so gilt d(z1, z3) = -d(z1, z2) + d(z2, z3) (wenn z2 zwischen z1 und z3 liegt). -Dreiecksungleichung: Beweis ist umständlich und wird hier nicht geführt. Es sei auf die -Vorlesung „Hyperbolische Geometrie“ verwiesen. -Satz 4.10 -Die hyperbolische Ebene H mit der hyperbolischen Metrik d und den hyperbolischen -Geraden bildet eine „nichteuklidische Geometrie“, d. h. die Axiome §1 - §4 sind erfüllt, + 4.3. HYPERBOLISCHE GEOMETRIE +Da der ln im Betrag steht, folgt direkt: +1 +2 +| ln DV(a1, z1, a2, z2)| = +1 +2 +| ln DV(a2, z1, a1, z2)| +Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x-Achse im Doppelverhältnis genutzt werden.  +Beh.: Die hyperbolische Metrik ist eine Metrik auf H. +Beweis: Wegen Bemerkung 70.f ist +d(z1, z2) := d(σ(z1), σ(z2)) mit σ(a1) = 0, σ(a2) = ∞ +d. h. σ(gz1,z2) = iR (imaginäre Achse). +also gilt o. B. d. A. z1 = ia und z2 = ib mit a, b ∈ R und a < b. +2d(ia, ib) =| ln DV(0, ia, ∞, ib) | +=| ln (0 − ib)(∞ − ia) +(0 − ia)(∞ − ib) +| +=| ln b +a +| += ln b − ln a +Also: d(z1, z2) ≥ 0, d(z1, z2) = 0 ⇔ z1 = z2 +2d(z2, z1) =| ln DV(a2, z2, a1, z1) | +=| ln DV(∞, ib, 0, ia) | +Bem. 70.b = | ln DV(0, ib, ∞, ia) | += 2d(z1, z2) +Liegen drei Punkte z1, z2, z3 ∈ C auf einer hyperbolischen Geraden, so gilt d(z1, z3) = +d(z1, z2) + d(z2, z3) (wenn z2 zwischen z1 und z3 liegt). +Dreiecksungleichung: Beweis ist umständlich und wird hier nicht geführt. Es sei auf die +Vorlesung „Hyperbolische Geometrie“ verwiesen. +Satz 4.10 +Die hyperbolische Ebene H mit der hyperbolischen Metrik d und den hyperbolischen +Geraden bildet eine „nichteuklidische Geometrie“, d. h. die Axiome §1 - §4 sind erfüllt, aber Axiom §5 ist verletzt. -87 4.3. HYPERBOLISCHE GEOMETRIE -Übungsaufgaben -Aufgabe 8 -Seien (X, d) eine absolute Ebene und P, Q, R ∈ X Punkte. Der Scheitelwinkel des Winkels -∠P QR ist der Winkel, der aus den Halbgeraden QP − und QR− gebildet wird. Die -Nebenwinkel von ∠P QR sind die von QP + und QR− bzw. QP − und QR+ gebildeten -Winkel. -Zeigen Sie: -(a) Die beiden Nebenwinkel von ∠P QR sind gleich. -(b) Der Winkel ∠P QR ist gleich seinem Scheitelwinkel. -Aufgabe 9 -Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ X von -Punkten ist definiert durch d(P, Y ) := inf d(P, y)|y ∈ Y . -Zeigen Sie: -(a) Ist 4ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die -Winkel ∠ABC und ∠BCA gleich. -(b) Ist 4ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel -gegenüber und umgekehrt. -(c) Sind g eine Gerade und P /∈ g ein Punkt, so gibt es eine eindeutige Gerade h mit -P ∈ h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g -und der Schnittpunkt des Lots mit g heißt Lotfußpunkt. -Aufgabe 10 -Seien f, g, h ∈ G und paarweise verschieden. -Zeigen Sie: f k g ∧ g k h ⇒ f k h -Aufgabe 11 + 4.3. HYPERBOLISCHE GEOMETRIE +Übungsaufgaben +Aufgabe 8 +Seien (X, d) eine absolute Ebene und P, Q, R ∈ X Punkte. Der Scheitelwinkel des Winkels +∠P QR ist der Winkel, der aus den Halbgeraden QP − und QR− gebildet wird. Die +Nebenwinkel von ∠P QR sind die von QP + und QR− bzw. QP − und QR+ gebildeten +Winkel. +Zeigen Sie: +(a) Die beiden Nebenwinkel von ∠P QR sind gleich. +(b) Der Winkel ∠P QR ist gleich seinem Scheitelwinkel. +Aufgabe 9 +Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ X von +Punkten ist definiert durch d(P, Y ) := inf d(P, y)|y ∈ Y . +Zeigen Sie: +(a) Ist 4ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die +Winkel ∠ABC und ∠BCA gleich. +(b) Ist 4ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel +gegenüber und umgekehrt. +(c) Sind g eine Gerade und P /∈ g ein Punkt, so gibt es eine eindeutige Gerade h mit +P ∈ h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g +und der Schnittpunkt des Lots mit g heißt Lotfußpunkt. +Aufgabe 10 +Seien f, g, h ∈ G und paarweise verschieden. +Zeigen Sie: f k g ∧ g k h ⇒ f k h +Aufgabe 11 Beweise den Kongruenzsatz SSS. -5 Krümmung -Definition 67 -Sei f : [a, b] → R -n -eine eine Funktion aus C∞. Dann heißt f Kurve. -5.1 Krümmung von Kurven -Definition 68 -Sei γ : I = [a, b] → R -n -eine Kurve. -a) Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt: -kγ -0 -(t)k2 = 1 ∀t ∈ I -Dabei ist γ -0 -(t) = (γ -0 -1 -(t), γ0 -2 -(t), . . . , γ0 -n -(t)). -b) l(γ) = R b -a -kγ -0 -(t)kdt heißt Länge von γ. -Bemerkung 71 (Eigenschaften von Kurven I) -Sei γ : I = [a, b] → R -n -eine C∞-Funktion. -a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b − a. -b) Ist γ durch Bogenlänge parametrisiert, so ist γ -0 -(t) orthogonal zu γ -00(t) für alle t ∈ I. -Beweis: -a) l(γ) = R b -a -kγ -0 -(t)kdt = -R b -a -1dt = b − a. -b) Im Folgenden wird die Aussage nur für γ : [a, b] → R -2 bewiesen. Allerdings funktioniert -der Beweis im R -n analog. Es muss nur die Ableitung angepasst werden. -1 = kγ -0 -(t)k = kγ -0 -(t)k -2 = hγ -0 -(t), γ0 -(t)i -⇒ 0 = -d -dt -hγ -0 -(t), γ0 -(t)i -= -d -dt -(γ -0 -1 -(t)γ -0 -1 -(t) + γ -0 -2 -(t)γ -0 -2 -(t)) -= 2 · (γ -00 -1 -(t) · γ -0 -1 -(t) + γ -00 -2 -(t) · γ -0 -2 -(t)) -= 2 · hγ -00(t), γ0 -(t)i -Definition 69 -Sei γ : I → R -2 -eine durch Bogenlänge parametrisierte Kurve. -a) Für t ∈ I sei n(t) Normalenvektor an γ in t wenn gilt: -hn(t), γ0 -(t)i = 0, kn(t)k = 1 und det((γ -0 +5 Krümmung +Definition 67 +Sei f : [a, b] → R +n +eine eine Funktion aus C∞. Dann heißt f Kurve. +5.1 Krümmung von Kurven +Definition 68 +Sei γ : I = [a, b] → R +n +eine Kurve. +a) Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt: +kγ +0 +(t)k2 = 1 ∀t ∈ I +Dabei ist γ +0 +(t) = (γ +0 +1 +(t), γ0 +2 +(t), . . . , γ0 +n +(t)). +b) l(γ) = R b +a +kγ +0 +(t)kdt heißt Länge von γ. +Bemerkung 71 (Eigenschaften von Kurven I) +Sei γ : I = [a, b] → R +n +eine C∞-Funktion. +a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b − a. +b) Ist γ durch Bogenlänge parametrisiert, so ist γ +0 +(t) orthogonal zu γ +00(t) für alle t ∈ I. +Beweis: +a) l(γ) = R b +a +kγ +0 +(t)kdt = +R b +a +1dt = b − a. +b) Im Folgenden wird die Aussage nur für γ : [a, b] → R +2 bewiesen. Allerdings funktioniert +der Beweis im R +n analog. Es muss nur die Ableitung angepasst werden. +1 = kγ +0 +(t)k = kγ +0 +(t)k +2 = hγ0 +(t), γ0(t)i +⇒ 0 = +d +dt +hγ +0 +(t), γ0(t)i += +d +dt +(γ +0 +1 +(t)γ +0 +1 +(t) + γ +0 +2 +(t)γ +0 +2 +(t)) += 2 · (γ +00 +1 +(t) · γ +0 +1 +(t) + γ +00 +2 +(t) · γ +0 +2 +(t)) += 2 · hγ +00(t), γ0 +(t)i +Definition 69 +Sei γ : I → R +2 +eine durch Bogenlänge parametrisierte Kurve. +a) Für t ∈ I sei n(t) Normalenvektor an γ in t wenn gilt: +hn(t), γ0(t)i = 0, kn(t)k = 1 und det((γ +0 (t), n(t))) = +1 -89 5.1. KRÜMMUNG VON KURVEN -b) Seit κ : I → R so, dass gilt: -γ -00(t) = κ(t) · n(t) -Dann heißt κ(t) Krümmung von γ in t. -Da n(t) und γ -00(t) nach Bemerkung 71.b linear abhängig sind, existiert κ(t). -Beispiel 45 -Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt: -γ(t) =  -r · cos -t -r -, r · sin -t -r - -für t ∈ [0, 2πr] -ist parametrisiert durch Bogenlänge, da gilt: -γ -0 -(t) =  -(r · -1 -r -)(− sin -t -r -), r -1 -r -cos -t -r - -= - -− sin -t -r -, cos -t -r - -Der Normalenvektor von γ in t ist -n(t) =  -− cos -t -r -, − sin -t -r - -da gilt: -hn(t), γ0 -(t)i = -− cos t -r -− sin t -r - -, - -− sin t -r -cos t -r - -= (− cos -t -r -) · (− sin -t -r -) + (− sin -t -r -) · (cos t -r -) -= 0 -kn(t)k = - - - - -(− cos -t -r -, − sin -t -r -) - - - - -= (− cos -t -r -) -2 + (− sin -t -r -) -2 -= 1 -det(γ -0 -1 -(t), n(t)) = - - - - - -− sin t -r − cos t -r -cos t -r − sin t -r - - - - -= (− sin -t -r -) -2 − (− cos -t -r -) · cos -t -r -= 1 -Die Krümmung ist für jedes t konstant 1 -r -, da gilt: -γ -00(t) =  -− -1 -r -cos -t -r -, − -1 -r -sin -t -r - -= -1 -r -· - -− cos -t -r -, − sin -t -r - -⇒ κ(t) = 1 + 5.1. KRÜMMUNG VON KURVEN +b) Seit κ : I → R so, dass gilt: +γ +00(t) = κ(t) · n(t) +Dann heißt κ(t) Krümmung von γ in t. +Da n(t) und γ +00(t) nach Bemerkung 71.b linear abhängig sind, existiert κ(t). +Beispiel 45 +Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt: +γ(t) = r · cos +t +r +, r · sin +t +r + +für t ∈ [0, 2πr] +ist parametrisiert durch Bogenlänge, da gilt: +γ +0 +(t) = (r · +1 +r +)(− sin +t +r +), r +1 +r +cos +t +r + += + +− sin +t +r +, cos +t +r + +Der Normalenvektor von γ in t ist +n(t) = − cos +t +r +, − sin +t +r + +da gilt: +hn(t), γ0(t)i = +− cos t +r +− sin t +r + +, + +− sin t +r +cos t +r + += (− cos +t +r +) · (− sin +t +r +) + (− sin +t +r +) · (cos t +r +) += 0 +kn(t)k = + + + + +(− cos +t +r +, − sin +t +r +) + + + + += (− cos +t +r +) +2 + (− sin +t +r +) +2 += 1 +det(γ +0 +1 +(t), n(t)) = + + + + + +− sin t +r − cos tr +cos t +r − sin tr + + + + += (− sin +t +r +) +2 − (− cos +t +r +) · cos +t +r += 1 +Die Krümmung ist für jedes t konstant 1 +r +, da gilt: +γ +00(t) =  +− +1 +r +cos +t +r +, − +1 +r +sin +t r -90 5.2. TANGENTIALEBENE -Definition 70 -Sei γ : I → R -3 -eine durch Bogenlänge parametrisierte Kurve. -a) Für t ∈ I heißt κ(t) := kγ -00(t)k die Krümmung von γ in t. -b) Ist für t ∈ I die Ableitung γ -00(t) 6= 0, so heißt γ -00(t) -kγ -00(t)k Normalenvektor an γ in t. -c) b(t) sei ein Vektor, der γ -0 -(t), n(t) zu einer orientierten Orthonormalbasis von R -3 -ergänzt. -Also gilt: -det(γ -0 -(t), n(t), b(t)) = 1 -b(t) heißt Binormalenvektor, die Orthonormalbasis - -γ -0 -(t), n(t), b(t) - -heißt begleitendes Dreibein. -Bemerkung 72 (Eigenschaften von Kurven II) -Sei γ : I → R -3 durch Bogenlänge parametrisierte Kurve. -a) n(t) ist orthogonal zu γ -0 -(t). -b) b(t) aus Definition 70.c ist eindeutig. -5.2 Tangentialebene -Erinnerung Sie sich an Definition 32 „reguläre Fläche“. -Äquivalent dazu ist: S ist lokal von der Form -V (f) =  -x ∈ R -3 - - f(x) = 0 -für eine C∞-Funktion f : R -3 → R. -Definition 71 -Sei S ⊆ R -3 -eine reguläre Fläche, s ∈ S, F : U → V ∩ S eine lokale Parametrisierung um -s ∈ V : -(u, v) 7→ (x(u, v), y(u, v), z(u, v)) -Für p = F -−1 -(s) ∈ U sei -JF (p) = - - -∂x -∂u(p) -∂x -∂v (p) -∂y -∂u(p) -∂y -∂v (p) -∂z -∂u(p) -∂z -∂v (p) - - -und DpF : R -2 → R -3 die durch JF (p) definierte lineare Abbildung. -Dann heißt TsS := Bild(DpF) die Tangentialebene an s ∈ S. -Bemerkung 73 (Eigenschaften der Tangentialebene) -a) TsS ist 2-dimensionaler Untervektorraum von R -3 -. -b) TsS = hu, ˜ v˜i, wobei u, ˜ v˜ die Spaltenvektoren der Jacobi-Matrix JF (p) sind. + += +1 +r +· + +− cos +t +r +, − sin +t +r + +⇒ κ(t) = 1 +r + 5.2. TANGENTIALEBENE +Definition 70 +Sei γ : I → R +3 +eine durch Bogenlänge parametrisierte Kurve. +a) Für t ∈ I heißt κ(t) := kγ +00(t)k die Krümmung von γ in t. +b) Ist für t ∈ I die Ableitung γ +00(t) 6= 0, so heißt γ +00(t) +kγ +00(t)k Normalenvektor an γ in t. +c) b(t) sei ein Vektor, der γ +0 +(t), n(t) zu einer orientierten Orthonormalbasis von R +3 +ergänzt. +Also gilt: +det(γ +0 +(t), n(t), b(t)) = 1 +b(t) heißt Binormalenvektor, die Orthonormalbasis + +γ +0 +(t), n(t), b(t) + +heißt begleitendes Dreibein. +Bemerkung 72 (Eigenschaften von Kurven II) +Sei γ : I → R +3 durch Bogenlänge parametrisierte Kurve. +a) n(t) ist orthogonal zu γ +0 +(t). +b) b(t) aus Definition 70.c ist eindeutig. +5.2 Tangentialebene +Erinnerung Sie sich an Definition 32 „reguläre Fläche“. +Äquivalent dazu ist: S ist lokal von der Form +V (f) = x ∈ R +3 + + f(x) = 0 +für eine C∞-Funktion f : R +3 → R. +Definition 71 +Sei S ⊆ R +3 +eine reguläre Fläche, s ∈ S, F : U → V ∩ S eine lokale Parametrisierung um +s ∈ V : +(u, v) 7→ (x(u, v), y(u, v), z(u, v)) +Für p = F +−1 +(s) ∈ U sei +JF (p) = + + +∂x +∂u(p) +∂x +∂v (p) +∂y +∂u(p) +∂y +∂v (p) +∂z +∂u(p) +∂z +∂v (p) + + +und DpF : R +2 → R3 die durch JF (p) definierte lineare Abbildung. +Dann heißt TsS := Bild(DpF) die Tangentialebene an s ∈ S. +Bemerkung 73 (Eigenschaften der Tangentialebene) +a) TsS ist 2-dimensionaler Untervektorraum von R +3 +. +b) TsS = hu, ˜ v˜i, wobei u, ˜ v˜ die Spaltenvektoren der Jacobi-Matrix JF (p) sind. c) TsS hängt nicht von der gewählten Parametrisierung ab. -91 5.2. TANGENTIALEBENE -d) Sei S = V (f) eine reguläre Fläche in R -3 -, also f : V → R eine C∞-Funktion, V ⊆ R -3 -offen, grad(f)(x) 6= 0 für alle x ∈ S. -Dann ist TsS = (grad(f)(s))⊥ für jedes s ∈ S. -Beweis: -a) JF ist eine 3 × 2-Matrix, die mit einem 2 × 1-Vektor multipliziert wird. Das ist -eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein -Vektorraum ist. Da Rg(JF ) = 2, ist auch dim(TsS) = 2. -b) Hier kann man wie in Punkt a) argumentieren -c) TsS = {x ∈ R -3 -|∃parametrisierte Kurve γ : [−ε, +ε] → S für ein ε > 0 mit γ(0) = -s und γ -0 -(0) = x} -Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. -d) Sei x ∈ TsS, γ : [−ε, +ε] → S eine parametrisierte Kurve mit ε > 0 und γ -0 -(0) = s, -sodass γ -0 -(0) = x gilt. Da γ(t) ∈ S für alle t ∈ [−ε, ε], ist f ◦ γ = 0 -⇒ 0 = (f ◦ γ) -0 -(0) = hgrad(f)(γ(0)), γ0 -(0)i -⇒ TsS ⊆ grad(f)(s) -⊥ -dim=2 ====⇒ TsS = (grad(f)(s))⊥ -Definition 72 -a) Ein Normalenfeld auf der regulären Fläche S ⊆ R -3 -ist eine Abbildung n : S → S -2 ⊆ -R -3 mit n(s) ∈ TsS -⊥ für jedes s ∈ S. -b) S heißt orientierbar, wenn es ein stetiges Normalenfeld auf S gibt. -Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden. -Im Folgenden werden diese Begriffe jedoch synonym benutzt. -Bemerkung 74 (Eigenschaften von Normalenfeldern) -a) Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C∞). -b) Zu jedem s ∈ S gibt es eine Umgebung V ⊆ R -3 von s und eine lokale Parametrisierung -F : U → V von S um s, sodass auf F(U) = V ∩ S ein stetiges Normalenfeld existiert. -c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas von S aus lokalen -Parametrisierungen Fi -: Ui → Vi -, i ∈ I gibt, sodass für alle i, j ∈ F und alle -s ∈ Vi ∩ Vj ∩ S gilt: -det(Ds -Vi→Vj -z }| { -Fj ◦ F -−1 -i -| {z } -∈R3×3 -) > 0 -Beweis: Wird hier nicht geführt. -Beispiel 46 (Normalenfelder) -1) S = S -2 -, n1 = idS2 ist ein stetiges Normalenfeld. -Auch n2 = −idS2 ist ein stetiges Normalenfeld. -2) S = Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Norma￾lenfeld, aber kein stetiges Normalenfeld. -92 5.3. GAUSS-KRÜMMUNG -Abbildung 5.1: Möbiusband -5.3 Gauß-Krümmung -Bemerkung 75 -Sei S eine reguläre Fläche, s ∈ S, n(s) ist ein Normalenvektor in s, x ∈ TsS, kxk = 1. -Sei E der von x und n(s) aufgespannte 2-dimensionale Untervektorraum von R -3 -. -Dann gibt es eine Umgebung V ⊆ R -3 von s, sodass -C := (s + E) ∩ S ∩ V -das Bild einer durch Bogenlänge parametrisierten Kurve γ : [−ε, ε] → S enthält mit γ(0) = s -und γ -0 -(0) = x. -Beweis: „Satz über implizite Funktionen“1 -Definition 73 -In der Situation aus Bemerkung 75 heißt die Krümmung κγ(0) der Kurve γ in der Ebene -(s + E) im Punkt s die Normalkrümmung von S in s in Richtung x = γ -0 -(0). -Man schreibt: κNor(s, x) := κγ(0) -Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt. -Beispiel 47 (Gauß-Krümmung) -1) S = S -2 = V (X2 + Y -2 + Z -2 − 1) ist die Kugel um den Ursprung mit Radius 1, n = id, -s = (0, 0, 1), x = (1, 0, 0) -⇒ E = R · x + R · n(s) (x, z-Ebene) -C = E ∩ S ist Kreislinie -κNor(s, x) = 1 -r = 1 -2) S = V (X2 + Z -2 − 1) ⊆ R -3 -ist ein Zylinder (siehe Abbildung 5.2a). s = (1, 0, 0) -x1 = (0, 1, 0) ⇒ E1 = R · e1 + R · e2 (x, y-Ebene) -S ∩ E1 = V (X2 + Y -2 − 1) ∩ E, Kreislinie in E -⇒ κNor(s, x1) = ±1 -x2 = (0, 0, 1), E2 = R · e1 + R · e3 (x, z-Ebene) + 5.2. TANGENTIALEBENE +d) Sei S = V (f) eine reguläre Fläche in R +3 +, also f : V → R eine C∞-Funktion, V ⊆ R +3 +offen, grad(f)(x) 6= 0 für alle x ∈ S. +Dann ist TsS = (grad(f)(s))⊥ für jedes s ∈ S. +Beweis: +a) JF ist eine 3 × 2-Matrix, die mit einem 2 × 1-Vektor multipliziert wird. Das ist +eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein +Vektorraum ist. Da Rg(JF ) = 2, ist auch dim(TsS) = 2. +b) Hier kann man wie in Punkt a) argumentieren +c) TsS = {x ∈ R +3 +|∃parametrisierte Kurve γ : [−ε, +ε] → S für ein ε > 0 mit γ(0) = +s und γ +0 +(0) = x} +Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. +d) Sei x ∈ TsS, γ : [−ε, +ε] → S eine parametrisierte Kurve mit ε > 0 und γ +0 +(0) = s, +sodass γ +0 +(0) = x gilt. Da γ(t) ∈ S für alle t ∈ [−ε, ε], ist f ◦ γ = 0 +⇒ 0 = (f ◦ γ) +0 +(0) = hgrad(f)(γ(0)), γ0(0)i +⇒ TsS ⊆ grad(f)(s) +⊥ +dim=2 ====⇒ TsS = (grad(f)(s))⊥ +Definition 72 +a) Ein Normalenfeld auf der regulären Fläche S ⊆ R +3 +ist eine Abbildung n : S → S +2 ⊆ +R +3 mit n(s) ∈ TsS⊥ für jedes s ∈ S. +b) S heißt orientierbar, wenn es ein stetiges Normalenfeld auf S gibt. +Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden. +Im Folgenden werden diese Begriffe jedoch synonym benutzt. +Bemerkung 74 (Eigenschaften von Normalenfeldern) +a) Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C∞). +b) Zu jedem s ∈ S gibt es eine Umgebung V ⊆ R +3 von s und eine lokale Parametrisierung +F : U → V von S um s, sodass auf F(U) = V ∩ S ein stetiges Normalenfeld existiert. +c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas von S aus lokalen +Parametrisierungen Fi: Ui → Vi, i ∈ I gibt, sodass für alle i, j ∈ F und alle +s ∈ Vi ∩ Vj ∩ S gilt: +det(Ds +Vi→Vj +z }| { +Fj ◦ F +−1 +i +| {z } +∈R3×3 +) > 0 +Beweis: Wird hier nicht geführt. +Beispiel 46 (Normalenfelder) +1) S = S +2 +, n1 = idS2 ist ein stetiges Normalenfeld. +Auch n2 = −idS2 ist ein stetiges Normalenfeld. +2) S = Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Normalenfeld, aber kein stetiges Normalenfeld. + 5.3. GAUSS-KRÜMMUNG +Abbildung 5.1: Möbiusband +5.3 Gauß-Krümmung +Bemerkung 75 +Sei S eine reguläre Fläche, s ∈ S, n(s) ist ein Normalenvektor in s, x ∈ TsS, kxk = 1. +Sei E der von x und n(s) aufgespannte 2-dimensionale Untervektorraum von R +3 +. +Dann gibt es eine Umgebung V ⊆ R +3 von s, sodass +C := (s + E) ∩ S ∩ V +das Bild einer durch Bogenlänge parametrisierten Kurve γ : [−ε, ε] → S enthält mit γ(0) = s +und γ +0 +(0) = x. +Beweis: „Satz über implizite Funktionen“1 +Definition 73 +In der Situation aus Bemerkung 75 heißt die Krümmung κγ(0) der Kurve γ in der Ebene +(s + E) im Punkt s die Normalkrümmung von S in s in Richtung x = γ +0 +(0). +Man schreibt: κNor(s, x) := κγ(0) +Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt. +Beispiel 47 (Gauß-Krümmung) +1) S = S +2 = V (X2 + Y2 + Z2 − 1) ist die Kugel um den Ursprung mit Radius 1, n = id, +s = (0, 0, 1), x = (1, 0, 0) +⇒ E = R · x + R · n(s) (x, z-Ebene) +C = E ∩ S ist Kreislinie +κNor(s, x) = 1 +r = 1 +2) S = V (X2 + Z +2 − 1) ⊆ R3 +ist ein Zylinder (siehe Abbildung 5.2a). s = (1, 0, 0) +x1 = (0, 1, 0) ⇒ E1 = R · e1 + R · e2 (x, y-Ebene) +S ∩ E1 = V (X2 + Y +2 − 1) ∩ E, Kreislinie in E +⇒ κNor(s, x1) = ±1 +x2 = (0, 0, 1), E2 = R · e1 + R · e3 (x, z-Ebene) 1Siehe z. B. https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II -93 5.3. GAUSS-KRÜMMUNG -V ∩ E2 ∩ S = - -(1, 0, z) ∈ R -3 - - z ∈ R - -ist eine Gerade -⇒ κNor(s, x2) = 0 -3) S = V (X2 − Y -2 − Z), s = (0, 0, 0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b) -x1 = (1, 0, 0), n(s) = (0, 0, 1) -x2 = (0, 1, 0) -κNor(s, x1) = 2 -κNor(s, x2) = −2 -−1.5 −1 −0.5 -0 -0.5 -1 -1.5 -−1 -0 -1 -0 -1 -2 -3 -4 -5 -x -y -z -(a) S = V (X -2 + Z -2 − 1) -−2 −1.5 −1 −0.5 -0 -0.5 -1 -1.5 -2 -−2 -−1 -0 -1 -2 -−2 -0 -2 -x -y -z −4 -−2 -0 -2 -4 -f(x, y) -(b) S = V (X -2 − Y -2 − Z) -Abbildung 5.2: Beispiele für reguläre Flächen -Definition 74 -Sei S ⊆ R -3 -eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S. -γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und -γ -00(0) 6= 0. -Sei n(0) := γ -00(0) -kγ -00(0)k -. Zerlege -n(0) = n(0)t + n(0)⊥ mit n(0)t ∈ TsS und n(0)⊥ ∈ (TsS) -⊥ -Dann ist n(0)⊥ = hn(0), n(s)i · n(s) -κNor(s, γ) := hγ -00(0), n(s)i die Normalkrümmung. -Bemerkung 76 -Sei γ(t) = γ(−t), t ∈ [−ε, ε]. Dann ist κNor(s, γ) = κNor(s, γ). -Beweis: γ -00(0) = γ -00(0), da γ -0 -(0) = −γ -0 -(0). -Es gilt: κNor(s, γ) hängt nur von |γ -0 -(0)| ab und ist gleich κNor(s, γ0 -(0)). -Bemerkung 77 -Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s. -Sei T -1 -s S = { x ∈ TsS | kxk = 1 } ∼= S -1 -. Dann ist -κ -n -Nor(s) : T -1 -s S → R, x 7→ κNor(s, x) -eine glatte Funktion und Bild κ -n -Nor(s) ist ein abgeschlossenes Intervall. -Definition 75 + 5.3. GAUSS-KRÜMMUNG +V ∩ E2 ∩ S = + +(1, 0, z) ∈ R +3 + + z ∈ R + +ist eine Gerade +⇒ κNor(s, x2) = 0 +3) S = V (X2 − Y +2 − Z), s = (0, 0, 0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b) +x1 = (1, 0, 0), n(s) = (0, 0, 1) +x2 = (0, 1, 0) +κNor(s, x1) = 2 +κNor(s, x2) = −2 +−1.5 −1 −0.5 +0 +0.5 +1 +1.5 +−1 +0 +1 +0 +1 +2 +3 +4 +5 +x +y +z +(a) S = V (X +2 + Z2 − 1) +−2 −1.5 −1 −0.5 +0 +0.5 +1 +1.5 +2 +−2 +−1 +0 +1 +2 +−2 +0 +2 +x +y +z −4 +−2 +0 +2 +4 +f(x, y) +(b) S = V (X +2 − Y2 − Z) +Abbildung 5.2: Beispiele für reguläre Flächen +Definition 74 +Sei S ⊆ R +3 +eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S. +γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und +γ +00(0) 6= 0. +Sei n(0) := γ +00(0) +kγ +00(0)k +. Zerlege +n(0) = n(0)t + n(0)⊥ mit n(0)t ∈ TsS und n(0)⊥ ∈ (TsS) +⊥ +Dann ist n(0)⊥ = hn(0), n(s)i · n(s) +κNor(s, γ) := hγ +00(0), n(s)i die Normalkrümmung. +Bemerkung 76 +Sei γ(t) = γ(−t), t ∈ [−ε, ε]. Dann ist κNor(s, γ) = κNor(s, γ). +Beweis: γ +00(0) = γ00(0), da γ0 +(0) = −γ +0 +(0). +Es gilt: κNor(s, γ) hängt nur von |γ +0 +(0)| ab und ist gleich κNor(s, γ0(0)). +Bemerkung 77 Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s. -94 5.3. GAUSS-KRÜMMUNG -a) κ -n -1 -(s) : = min  -κ -n -Nor(s, x) - - x ∈ T -1 -s S - -und -κ -n -2 -(s) : = max  -κ -n -Nor(s, x) - - x ∈ T -1 -s S - -heißen Hauptkrümmungen von S in s. -b) K(s) := κ -n -1 -(s) · κ -n -2 -(s) heißt Gauß-Krümmung von S in s. -Bemerkung 78 -Ersetzt man n durch −n, so gilt: -κ -−n -Nor(s, x) = −κ -n -Nor(x) ∀x ∈ T -1 -s S -⇒ κ -−n -1 -(s) = −κ -n -2 -(s) -κ -−n -2 -(s) = −κ -n -1 -(s) -und K−n -(s) = Kn -(s) =: K(s) -Beispiel 48 -1) S = S -2 -. Dann ist κ1(s) = κ2(s) = ±1 ∀s ∈ S -2 -⇒ K(s) = 1 -2) Zylinder: -κ1(s) = 0, κ2(s) = 1 ⇒ K(s) = 0 -3) Sattelpunkt auf hyperbolischem Paraboloid: -κ1(s) < 0, κ2(s) = 0 → K(s) < 0 -4) S = Torus. Siehe Abbildung 5.3 -s1 -s2 -s3 -Abbildung 5.3: K(s1) > 0, K(s2) = 0, K(s3) < 0 -Bemerkung 79 +Sei T +1 +s S = { x ∈ TsS | kxk = 1 } ∼= S +1 +. Dann ist +κ +n +Nor(s) : T +1 +s S → R, x 7→ κNor(s, x) +eine glatte Funktion und Bild κ +n +Nor(s) ist ein abgeschlossenes Intervall. +Definition 75 +Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s. + 5.3. GAUSS-KRÜMMUNG +a) κ +n +1 +(s) : = min κ +n +Nor(s, x) + + x ∈ T +1 +s S + +und +κ +n +2 +(s) : = max κ +n +Nor(s, x) + + x ∈ T +1 +s S + +heißen Hauptkrümmungen von S in s. +b) K(s) := κ +n +1 +(s) · κ +n +2 +(s) heißt Gauß-Krümmung von S in s. +Bemerkung 78 +Ersetzt man n durch −n, so gilt: +κ +−n +Nor(s, x) = −κ +n +Nor(x) ∀x ∈ T +1 +s S +⇒ κ +−n +1 +(s) = −κ +n +2 +(s) +κ +−n +2 +(s) = −κ +n +1 +(s) +und K−n(s) = Kn(s) =: K(s) +Beispiel 48 +1) S = S +2 +. Dann ist κ1(s) = κ2(s) = ±1 ∀s ∈ S +2 +⇒ K(s) = 1 +2) Zylinder: +κ1(s) = 0, κ2(s) = 1 ⇒ K(s) = 0 +3) Sattelpunkt auf hyperbolischem Paraboloid: +κ1(s) < 0, κ2(s) = 0 → K(s) < 0 +4) S = Torus. Siehe Abbildung 5.3 +s1 +s2 +s3 +Abbildung 5.3: K(s1) > 0, K(s2) = 0, K(s3) < 0 +Bemerkung 79 Sei S eine reguläre Fläche, s ∈ S ein Punkt. -95 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM -a) Ist K(s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von TsS + s. -b) Ist K(s) < 0, so schneidet jede Umgebung von s in S beide Seiten von TsS + s. -5.4 Erste und zweite Fundamentalform -Sei S ⊆ R -3 -eine reguläre Fläche, s ∈ S, TsS die Tangentialebene an S in s und F : U → V eine -lokale Parametrisierung von S um s. Weiter sei p := F -−1 -(s). -Definition 76 -Sei IS ∈ R -2×2 definiert als -IS : =  -g1,1(s) g1,2(s) -g1,2(s) g2,2(s) - -= - -E(s) F(s) -F(s) G(s) - -mit gi,j = gs(DpF(ei), DpF(ej )) -= h -∂F -∂ui -(p), -∂F -∂uj -(p)i i, j ∈ { 1, 2 } -Die Matrix IS heißt erste Fundamentalform von S bzgl. der Parametrisierung F. -Bemerkung 80 -a) Die Einschränkung des Standardskalarproduktes des R -3 auf TsS macht TsS zu einem -euklidischen Vektorraum. -b) { DpF(e1), DpF(e2) } ist eine Basis von TsS. -c) Bzgl. der Basis { DpF(e1), DpF(e2) } hat das Standardskalarprodukt aus Bemer￾kung 80.a die Darstellungsmatrix IS. -d) gi,j (s) ist eine differenzierbare Funktion von s. -Bemerkung 81 -det(IS) = - - - - -∂F -∂u1 -(p) × -∂F -∂u2 -(p) - - - - -2 -Beweis: Sei ∂F -∂u1 -(p) = - - -x1 -x2 -x3 - - , -∂F -∂u2 -(p) = - - -y1 -y2 -y3 - - -Dann ist ∂F -∂u1 -(p) × -∂F -∂u2 -(p) = - - -z1 -z2 -z3 - - mit -z1 = x2y3 − x3y2 -z2 = x3y1 − x1y3 -z3 = x1y2 − x2y1 -⇒ k ∂F -∂u1 -(p) × -∂F -∂u2 -(p)k = z -2 -1 + z -2 -2 + z -2 + 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM +a) Ist K(s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von TsS + s. +b) Ist K(s) < 0, so schneidet jede Umgebung von s in S beide Seiten von TsS + s. +5.4 Erste und zweite Fundamentalform +Sei S ⊆ R +3 +eine reguläre Fläche, s ∈ S, TsS die Tangentialebene an S in s und F : U → V eine +lokale Parametrisierung von S um s. Weiter sei p := F +−1 +(s). +Definition 76 +Sei IS ∈ R +2×2 definiert als +IS : =  +g1,1(s) g1,2(s) +g1,2(s) g2,2(s) + += + +E(s) F(s) +F(s) G(s) + +mit gi,j = gs(DpF(ei), DpF(ej )) += h +∂F +∂ui +(p), +∂F +∂uj +(p)i i, j ∈ { 1, 2 } +Die Matrix IS heißt erste Fundamentalform von S bzgl. der Parametrisierung F. +Bemerkung 80 +a) Die Einschränkung des Standardskalarproduktes des R +3 auf TsS macht TsS zu einem +euklidischen Vektorraum. +b) { DpF(e1), DpF(e2) } ist eine Basis von TsS. +c) Bzgl. der Basis { DpF(e1), DpF(e2) } hat das Standardskalarprodukt aus Bemerkung 80.a die Darstellungsmatrix IS. +d) gi,j (s) ist eine differenzierbare Funktion von s. +Bemerkung 81 +det(IS) = + + + + +∂F +∂u1 +(p) × +∂F +∂u2 +(p) + + + + +2 +Beweis: Sei ∂F +∂u1 +(p) = + + +x1 +x2 +x3 + + , +∂F +∂u2 +(p) = + + +y1 +y2 +y3 + + +Dann ist ∂F +∂u1 +(p) × +∂F +∂u2 +(p) = + + +z1 +z2 +z3 + + mit +z1 = x2y3 − x3y2 +z2 = x3y1 − x1y3 +z3 = x1y2 − x2y1 +⇒ k ∂F +∂u1 +(p) × +∂F +∂u2 +(p)k = z +2 +1 + z +2 +2 + z +2 +3 + 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM +det(IS) = g1,1g2,2 − g +2 +1,2 += +* + +x1 +x2 +x3 + + , + + +x1 +x2 +x3 + + ++ * + +y1 +y2 +y3 + + , + + +y1 +y2 +y3 + + ++ +− +* + +x1 +x2 +x3 + + , + + +y1 +y2 +y3 + + ++ +2 += (x +2 +1 + x +2 +2 + x +2 +3 +)(y +2 +1 + y +2 +2 + y +2 +3 +) − (x1y1 + x2y2 + x3y3) +2 +Definition 77 +a) Das Differential dA = +p +det(I)du1du2 heißt Flächenelement von S bzgl. der Parametrisierung F. +b) Für eine Funktion f : V → R heißt +Z +V +fdA := Z +U +f(F(u1, u2) +| {z } +=:s +) +p +det I(s)du1du2 +der Wert des Integrals von f über V , falls das Integral rechts existiert. +Bemerkung 82 +a) R +V +fdA ist unabhängig von der gewählten Parametrisierung. +b) Sei f : S → R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist. +Dann ist R +S +fdA wohldefiniert, falls (z. B.) S kompakt ist. +Etwa: +Z +S +fdA = +Xn +i=1 +Z +Vi +fdA +− +X +i6=j +Z +Vi∩Vj +fdA ++ +X +i,j,k +Z +Vi∩Vj∩Vk +fdA +− . . . +Beweis: +a) Mit Transformationsformel. +b) Ist dem Leser überlassen. +Proposition 5.1 +Sei S ⊆ R 3 -96 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM -det(IS) = g1,1g2,2 − g -2 -1,2 -= -* - -x1 -x2 -x3 - - , - - -x1 -x2 -x3 - - -+ * - -y1 -y2 -y3 - - , - - -y1 -y2 -y3 - - -+ -− -* - -x1 -x2 -x3 - - , - - -y1 -y2 -y3 - - -+ -2 -= (x -2 -1 + x -2 -2 + x -2 -3 -)(y -2 -1 + y -2 -2 + y -2 -3 -) − (x1y1 + x2y2 + x3y3) -2 -Definition 77 -a) Das Differential dA = -p -det(I)du1du2 heißt Flächenelement von S bzgl. der Para￾metrisierung F. -b) Für eine Funktion f : V → R heißt -Z -V -fdA := Z -U -f(F(u1, u2) -| {z } -=:s -) -p -det I(s)du1du2 -der Wert des Integrals von f über V , falls das Integral rechts existiert. -Bemerkung 82 -a) R -V -fdA ist unabhängig von der gewählten Parametrisierung. -b) Sei f : S → R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist. -Dann ist R -S -fdA wohldefiniert, falls (z. B.) S kompakt ist. -Etwa: -Z -S -fdA = -Xn -i=1 -Z -Vi -fdA -− -X -i6=j -Z -Vi∩Vj -fdA -+ -X -i,j,k -Z -Vi∩Vj∩Vk -fdA -− . . . -Beweis: -a) Mit Transformationsformel. -b) Ist dem Leser überlassen. -Proposition 5.1 -Sei S ⊆ R -3 -eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S → S -2 -. -Dann gilt: -a) n induziert für jedes s ∈ S eine lineare Abbildung dsn : TsS → Tn(s)S -2 durch -dsn(x) = d -dt -n(s„+“tx -| {z } -Soll auf Fläche S bleiben -) - - - -t=0 +eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S → S +2 +. +Dann gilt: +a) n induziert für jedes s ∈ S eine lineare Abbildung dsn : TsS → Tn(s)S +2 durch +dsn(x) = d +dt +n(s„+“tx +| {z } +Soll auf Fläche S bleiben +) + + + +t=0 Die Abbildung dsn heißt Weingarten-Abbildung -97 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM -b) Tn(s)S -2 = TsS. -c) dsn ist ein Endomorphismus von TsS. -d) dsn ist selbstadjungiert bzgl. des Skalarproduktes IS. + 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM +b) Tn(s)S +2 = TsS. +c) dsn ist ein Endomorphismus von TsS. +d) dsn ist selbstadjungiert bzgl. des Skalarproduktes IS. Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt. -98 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM -Beweis: -a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. -b) Tn(S)S -2 = hn(s)i -⊥ = TsS -c) Wegen Proposition 5.1 (a) ist dsn ein Homomorphismus. -d) Zu zeigen: ∀x, y ∈ IsS : hx, dsn(y)i = hdsn(x), yi -Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die -Basisvektoren zu zeigen. -Sei xi = DpF(ei) = ∂F -∂ui -(p) i = 1, 2 -Beh.: hxi -, dsn(xj )i = h -∂ -2F -∂ui∂uj -(p), dsn(xi)i -⇒ h ∂ -2F -∂ui∂uj -(p), dsn(xi)i = hxj , dsn(xi)i -Bew.: 0 = h -∂F -∂u (p + tej ), n(p + tej )i -⇒ 0 = -d -dt - -h -∂F -∂u (p + tej ), n(p + tej )i - - - -t=0 -= h -d -dt -∂F -∂ui -(p + tej ) -| {z } -∂2F -∂uj∂ui -(p) - - - -t=0 -, n(s)i + hxi -, dsn DpF(ej ) -| {z } -xj -i -Definition 78 -Die durch −dsn definierte symmetrische Bilinearform auf TsS heißt zweite Fundamental￾form von S in s bzgl. F. -Man schreibt: IIs(x, y) = h−dsn(x), yi = Is(−dsn(x), y) -Bemerkung 83 -Bezüglich der Basis { x1, x2 } von TsS hat IIs die Darstellungsmatrix -(h -(s) -i,j )i,j=1,2 mit hi,j (s) = h -∂ -2F -∂ui∂uj -(p), n(s)i -Proposition 5.2 -Sei γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt: -κNor(s, γ) = IIs(γ -0 -(0), γ0 -(0)) -Beweis: Nach Definition 74 ist κNor(s, γ) = hγ -00(0), n(s)i. Nach Voraussetzung gilt -n(γ(t)) ⊥ γ -0 -(t) ⇔ hγ -00(0), n(s)i = 0 -Die Ableitung nach t ergibt -0 = -d -dt -(hn(γ(t)), γ0 -(t)) -= - -d -dt -n(γ(t)) - - - -t=0 -, γ0 -(0) -+ hn(s), γ00(0)i -99 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM -= hdsn(γ -0 -(0)), γ0 -(0)i + κNor(s, γ) -= −IIs(γ -0 -(0), γ0 -(0)) + κNor(s, γ) -Folgerung 5.3 -Die beiden Definitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein: -κNor(s, γ) = κNor(s, γ0 -(0)) -Satz 5.4 -Sei S ⊆ R -3 -eine reguläre, orientierbare Fläche und s ∈ S. -a) Die Hauptkrümmungen κ1(s), κ2(s) sind die Eigenwerte von IIs. -b) Für die Gauß-Krümmung gilt: K(s) = det(IIs) -Beweis: -a) IIs ist symmetrisch, IsS hat also eine Orthonormalbasis aus Eigenvektoren y1, y2 von -IIs. Ist x ∈ TsS, kxk = 1, so gibt es ϕ ∈ [0, 2π) mit x = cos ϕ · y1 + sin ϕ · y2. -Seien λ1, λ2 die Eigenwerte von IIs, also IIs(yi -, yi) = λi -. Dann gilt: -IIs(x, x) = cos2 ϕλ1 + sin2 ϕλ2 -= (1 − sin2 ϕ)λ1 + sin2 ϕλ2 -= λ1 + sin2 ϕ(λ2 − λ1) ≥ λ1 -= cos2 ϕ + (1 − cos2 ϕ)λ2 -= λ2 − cos2 ϕ(λ2 − λ1) ≤ λ2 -Prop. 5.2 -=====⇒ λ1 = min  -κNor(s, x) - - x ∈ T -1 -s S - -λ2 = max  -κNor(s, x) - - x ∈ T -1 -s S - -Satz 5.5 (Satz von Gauß-Bonnet) -Sei S ⊆ R -3 -eine kompakte orientierbare reguläre Fläche. Dann gilt: -Z -S -K(s)dA = 2πχ(S) -Dabei ist χ(S) die Euler-Charakteristik von S. -Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von + 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM +Beweis: +a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. +b) Tn(S)S +2 = hn(s)i⊥ = TsS +c) Wegen Proposition 5.1 (a) ist dsn ein Homomorphismus. +d) Zu zeigen: ∀x, y ∈ IsS : hx, dsn(y)i = hdsn(x), yi +Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die +Basisvektoren zu zeigen. +Sei xi = DpF(ei) = ∂F +∂ui +(p) i = 1, 2 +Beh.: hxi, dsn(xj )i = h +∂ +2F +∂ui∂uj +(p), dsn(xi)i +⇒ h ∂ +2F +∂ui∂uj +(p), dsn(xi)i = hxj , dsn(xi)i +Bew.: 0 = h +∂F +∂u (p + tej ), n(p + tej )i +⇒ 0 = +d +dt + +h +∂F +∂u (p + tej ), n(p + tej )i + + + +t=0 += h +d +dt +∂F +∂ui +(p + tej ) +| {z } +∂2F +∂uj∂ui +(p) + + + +t=0 +, n(s)i + hxi, dsn DpF(ej ) +| {z } +xj +i +Definition 78 +Die durch −dsn definierte symmetrische Bilinearform auf TsS heißt zweite Fundamentalform von S in s bzgl. F. +Man schreibt: IIs(x, y) = h−dsn(x), yi = Is(−dsn(x), y) +Bemerkung 83 +Bezüglich der Basis { x1, x2 } von TsS hat IIs die Darstellungsmatrix +(h +(s) +i,j )i,j=1,2 mit hi,j (s) = h +∂ +2F +∂ui∂uj +(p), n(s)i +Proposition 5.2 +Sei γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt: +κNor(s, γ) = IIs(γ +0 +(0), γ0(0)) +Beweis: Nach Definition 74 ist κNor(s, γ) = hγ +00(0), n(s)i. Nach Voraussetzung gilt +n(γ(t)) ⊥ γ +0 +(t) ⇔ hγ +00(0), n(s)i = 0 +Die Ableitung nach t ergibt +0 = +d +dt +(hn(γ(t)), γ0(t)) += + +d +dt +n(γ(t)) + + + +t=0 +, γ0(0)+ hn(s), γ00(0)i + 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM += hdsn(γ +0 +(0)), γ0(0)i + κNor(s, γ) += −IIs(γ +0 +(0), γ0(0)) + κNor(s, γ) +Folgerung 5.3 +Die beiden Definitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein: +κNor(s, γ) = κNor(s, γ0(0)) +Satz 5.4 +Sei S ⊆ R +3 +eine reguläre, orientierbare Fläche und s ∈ S. +a) Die Hauptkrümmungen κ1(s), κ2(s) sind die Eigenwerte von IIs. +b) Für die Gauß-Krümmung gilt: K(s) = det(IIs) +Beweis: +a) IIs ist symmetrisch, IsS hat also eine Orthonormalbasis aus Eigenvektoren y1, y2 von +IIs. Ist x ∈ TsS, kxk = 1, so gibt es ϕ ∈ [0, 2π) mit x = cos ϕ · y1 + sin ϕ · y2. +Seien λ1, λ2 die Eigenwerte von IIs, also IIs(yi, yi) = λi. Dann gilt: +IIs(x, x) = cos2 ϕλ1 + sin2 ϕλ2 += (1 − sin2 ϕ)λ1 + sin2 ϕλ2 += λ1 + sin2 ϕ(λ2 − λ1) ≥ λ1 += cos2 ϕ + (1 − cos2 ϕ)λ2 += λ2 − cos2 ϕ(λ2 − λ1) ≤ λ2 +Prop. 5.2 +=====⇒ λ1 = min κNor(s, x) + + x ∈ T +1 +s S + +λ2 = max κNor(s, x) + + x ∈ T +1 +s S + +Satz 5.5 (Satz von Gauß-Bonnet) +Sei S ⊆ R +3 +eine kompakte orientierbare reguläre Fläche. Dann gilt: +Z +S +K(s)dA = 2πχ(S) +Dabei ist χ(S) die Euler-Charakteristik von S. +Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen werden. -Lösungen der Übungsaufgaben -Lösung zu Aufgabe 1 -Teilaufgabe a) Es gilt: -(i) ∅, X ∈ TX. -(ii) TX ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U1, U2 ∈ -TX : U1 ∩ U2 ∈ TX. -(iii) Auch unter beliebigen Vereinigungen ist TX abgeschlossen, d. h. es gilt für eine -beliebige Indexmenge I und alle Ui ∈ TX für alle i ∈ I : -S -i∈I Ui ∈ TX -Also ist (X, TX) ein topologischer Raum. -Teilaufgabe b) Wähle x = 1, y = 0. Dann gilt x 6= y und die einzige Umgebung von x -ist X. Da y = 0 ∈ X können also x und y nicht durch offene Mengen getrennt werden. -(X, TX) ist also nicht hausdorffsch. -Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X, TX) nach -(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass (X, TX) -kein metrischer Raum sein kann. -Lösung zu Aufgabe 2 -Teilaufgabe a) -Beh.: ∀a ∈ Z : { a } ist abgeschlossen. -Sei a ∈ Z beliebig. Dann gilt: -Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de -schicken. -Teilaufgabe b) -Beh.: { −1, 1 } ist nicht offen -Bew.: durch Widerspruch -Annahme: { −1, 1 } ist offen. -Dann gibt es T ⊆ B, sodass S -M∈T M = { −1, 1 }. Aber alle U ∈ B haben unendlich viele -Elemente. Auch endlich viele Schnitte von Elementen in B haben unendlich viele Elemente -⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒ { −1, 1 } ist -nicht offen.  -Teilaufgabe c) +Lösungen der Übungsaufgaben +Lösung zu Aufgabe 1 +Teilaufgabe a) Es gilt: +(i) ∅, X ∈ TX. +(ii) TX ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U1, U2 ∈ +TX : U1 ∩ U2 ∈ TX. +(iii) Auch unter beliebigen Vereinigungen ist TX abgeschlossen, d. h. es gilt für eine +beliebige Indexmenge I und alle Ui ∈ TX für alle i ∈ I : +S +i∈I Ui ∈ TX +Also ist (X, TX) ein topologischer Raum. +Teilaufgabe b) Wähle x = 1, y = 0. Dann gilt x 6= y und die einzige Umgebung von x +ist X. Da y = 0 ∈ X können also x und y nicht durch offene Mengen getrennt werden. +(X, TX) ist also nicht hausdorffsch. +Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X, TX) nach +(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass (X, TX) +kein metrischer Raum sein kann. +Lösung zu Aufgabe 2 +Teilaufgabe a) +Beh.: ∀a ∈ Z : { a } ist abgeschlossen. +Sei a ∈ Z beliebig. Dann gilt: +Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de +schicken. +Teilaufgabe b) +Beh.: { −1, 1 } ist nicht offen +Bew.: durch Widerspruch +Annahme: { −1, 1 } ist offen. +Dann gibt es T ⊆ B, sodass S +M∈T M = { −1, 1 }. Aber alle U ∈ B haben unendlich viele +Elemente. Auch endlich viele Schnitte von Elementen in B haben unendlich viele Elemente +⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒ { −1, 1 } ist +nicht offen.  +Teilaufgabe c) Beh.: Es gibt unendlich viele Primzahlen. -101 Lösungen der Übungsaufgaben -Bew.: durch Widerspruch -Annahme: Es gibt nur endlich viele Primzahlen p ∈ P -Dann ist -Z \ { −1, +1 } -FS d. Arithmetik = -[ -p∈P -U0,p -endlich. Das ist ein Widerspruch zu |Z| ist unendlich und | { −1, 1 } | ist endlich.  -Lösung zu Aufgabe 3 -(a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form -Y -j∈J -Uj × -Y -i∈N,i6=j -Pi -wobei J ⊆ N endlich und Uj ⊆ Pj offen ist. -Beweis: Nach Definition der Produkttopologie bilden Mengen der Form -Y -i∈J -Uj × -Y -i∈N\J -Pi -wobei J ⊆ N endlich und Uj ⊆ Pj offen ∀j ∈ J eine Basis der Topologie. -Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen -Form.  -(b) Beh.: Die Zusammenhangskomponenten von P sind alle einpunktig. -Beweis: Es seinen x, y ∈ P und x sowie y liegen in der gleichen Zusammenhangs￾komponente Z ⊆ P. Da Z zusammenhängend ist und ∀i ∈ I : pi -: P → Pi -ist -stetig, ist pi(Z) ⊆ Pi zusammenhängend für alle i ∈ N. Die zusammenhängenden -Mengen von Pi sind genau { 0 } und { 1 }, d. h. für alle i ∈ N gilt entweder -pi(Z) ⊆ { 0 } oder pi(Z) ⊆ { 1 }. Es sei zi ∈ { 0, 1 } so, dass pi(Z) ⊆ { zi } für -alle i ∈ N. Dann gilt also: -pi(x) -| {z } -=xi -= zi = pi(y) -| {z } -=yi -∀i ∈ N -Somit folgt: x = y  -Lösung zu Aufgabe 4 -(a) Beh.: GLn(R) ist nicht kompakt. -Bew.: det : GLn(R) → R \ { 0 } ist stetig. Außerdem ist det(GLn(R)) = R \ { 0 } -nicht kompakt. 22 -⇒ GLn(R) ist nicht kompakt.  -(b) Beh.: SL1(R) ist nicht kompakt, für n > 1 ist SLn(R) kompakt. -Bew.: Für SL1(R) gilt: SL1(R) =  -A ∈ R -1×1 - - det A = 1 -= -￾ -1 - ∼= { 1 }. -22 -⇒ SL1(R) -ist kompakt -102 Lösungen der Übungsaufgaben -SLn(R) ⊆ GLn(R) lässt sich mit einer Teilmenge des R -n -2 -identifizieren. Nach Satz 1.1 -sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Definiere -nun für für n ∈ N≥2, m ∈ N: -Am = diagn -(m, -1 -m -, . . . , 1) -Dann gilt: det Am = 1, d. h. Am ∈ SLn(R), und Am ist unbeschränkt, da kAmk∞ = -m −−−−→ m→∞ -∞.  -(c) Beh.: P(R) ist kompakt. -Bew.: P(R) ∼= S -n/x∼−x. Per Definition der Quotiententopologie ist die Klassenabbil￾dung stetig. Da S -n als abgeschlossene und beschränkte Teilmenge des R -n+1 kompakt -ist 22⇒ P(R) ist kompakt.  -Lösung zu Aufgabe 5 -Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden. -Definition 79 -Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung. -ϕ heißt Homomorphismus, wenn -∀g1, g2 ∈ G : ϕ(g1 ∗ g2) = ϕ(g1) ◦ ϕ(g2) -gilt. -Es folgt direkt: -1) Sei X = R mit der Standarttopologie und ϕ1 : idR und R = (R, +). Dann ist ϕ1 ein -Gruppenhomomorphismus und ein Homöomorphismus. -2) Sei G = (Z, +) und H = (Z/3Z, +). Dann ist ϕ2 : G → H, x 7→ x mod 3 ein -Gruppenhomomorphismus. Jedoch ist ϕ2 nicht injektiv, also sicher kein Homöomor￾phismus. -3) Sei X ein topologischer Raum. Dann ist idX ein Homöomorphismus. Da keine -Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Grup￾penhomomorphismus. -Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten -verwendet. -Lösung zu Aufgabe 6 -Die Definition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf -Seite 6. -Definition 80 -Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung. -ϕ heißt Isomorphismus, wenn ϕ ein bijektiver Homomorphismus ist. -Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen + Lösungen der Übungsaufgaben +Bew.: durch Widerspruch +Annahme: Es gibt nur endlich viele Primzahlen p ∈ P +Dann ist +Z \ { −1, +1 } +FS d. Arithmetik = +[ +p∈P +U0,p +endlich. Das ist ein Widerspruch zu |Z| ist unendlich und | { −1, 1 } | ist endlich.  +Lösung zu Aufgabe 3 +(a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form +Y +j∈J +Uj × +Y +i∈N,i6=j +Pi +wobei J ⊆ N endlich und Uj ⊆ Pj offen ist. +Beweis: Nach Definition der Produkttopologie bilden Mengen der Form +Y +i∈J +Uj × +Y +i∈N\J +Pi +wobei J ⊆ N endlich und Uj ⊆ Pj offen ∀j ∈ J eine Basis der Topologie. +Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen +Form.  +(b) Beh.: Die Zusammenhangskomponenten von P sind alle einpunktig. +Beweis: Es seinen x, y ∈ P und x sowie y liegen in der gleichen Zusammenhangskomponente Z ⊆ P. Da Z zusammenhängend ist und ∀i ∈ I : pi +: P → Piist +stetig, ist pi(Z) ⊆ Pi zusammenhängend für alle i ∈ N. Die zusammenhängenden +Mengen von Pi sind genau { 0 } und { 1 }, d. h. für alle i ∈ N gilt entweder +pi(Z) ⊆ { 0 } oder pi(Z) ⊆ { 1 }. Es sei zi ∈ { 0, 1 } so, dass pi(Z) ⊆ { zi } für +alle i ∈ N. Dann gilt also: +pi(x) +| {z } +=xi += zi = pi(y) +| {z } +=yi +∀i ∈ N +Somit folgt: x = y  +Lösung zu Aufgabe 4 +(a) Beh.: GLn(R) ist nicht kompakt. +Bew.: det : GLn(R) → R \ { 0 } ist stetig. Außerdem ist det(GLn(R)) = R \ { 0 } +nicht kompakt. 22⇒ GLn(R) ist nicht kompakt.  +(b) Beh.: SL1(R) ist nicht kompakt, für n > 1 ist SLn(R) kompakt. +Bew.: Für SL1(R) gilt: SL1(R) = A ∈ R +1×1 + + det A = 1 += + +1 + ∼= { 1 }. +22 +⇒ SL1(R) +ist kompakt. + Lösungen der Übungsaufgaben +SLn(R) ⊆ GLn(R) lässt sich mit einer Teilmenge des R +n +2 +identifizieren. Nach Satz 1.1 +sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Definiere +nun für für n ∈ N≥2, m ∈ N: +Am = diagn(m, +1 +m +, . . . , 1) +Dann gilt: det Am = 1, d. h. Am ∈ SLn(R), und Am ist unbeschränkt, da kAmk∞ = +m −−−−→ m→∞∞.  +(c) Beh.: P(R) ist kompakt. +Bew.: P(R) ∼= S +n/x∼−x. Per Definition der Quotiententopologie ist die Klassenabbildung stetig. Da S +n als abgeschlossene und beschränkte Teilmenge des Rn+1 kompakt +ist 22⇒ P(R) ist kompakt.  +Lösung zu Aufgabe 5 +Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden. +Definition 79 +Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung. +ϕ heißt Homomorphismus, wenn +∀g1, g2 ∈ G : ϕ(g1 ∗ g2) = ϕ(g1) ◦ ϕ(g2) +gilt. +Es folgt direkt: +1) Sei X = R mit der Standarttopologie und ϕ1 : idR und R = (R, +). Dann ist ϕ1 ein +Gruppenhomomorphismus und ein Homöomorphismus. +2) Sei G = (Z, +) und H = (Z/3Z, +). Dann ist ϕ2 : G → H, x 7→ x mod 3 ein +Gruppenhomomorphismus. Jedoch ist ϕ2 nicht injektiv, also sicher kein Homöomorphismus. +3) Sei X ein topologischer Raum. Dann ist idX ein Homöomorphismus. Da keine +Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Gruppenhomomorphismus. +Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten +verwendet. +Lösung zu Aufgabe 6 +Die Definition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf +Seite 6. +Definition 80 +Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung. +ϕ heißt Isomorphismus, wenn ϕ ein bijektiver Homomorphismus ist. +Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen Sinn und ein Isomorphismus benötigt eine Gruppenstruktur. -103 Lösungen der Übungsaufgaben -Lösung zu Aufgabe 7 -(a) Vor.: Sei M eine topologische Mannigfaltigkeit. -Beh.: M ist wegzusammehängend ⇔ M ist zusammenhängend -Beweis: „⇒“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung -direkt aus Bemerkung 23. -„⇐“: Seien x, y ∈ M und -Z := { z ∈ M | ∃Weg von x nach z } -Es gilt: -(i) Z 6= ∅, da M lokal wegzusammenhängend ist -(ii) Z ist offen, da M lokal wegzusammenhängend ist -(iii) Z -C := { z˜ ∈ M | @Weg von x nach z˜ } ist offen -Da M eine Mannigfaltigkeit ist, existiert zu jedem z˜ ∈ Z -C eine offene und -wegzusammenhängende Umgebung Uz˜ ⊆ M. -Es gilt sogar Uz˜ ⊆ Z -C, denn gäbe es ein Uz˜ 3 z ∈ Z, so gäbe es Wege γ2 : -[0, 1] → M, γ2(0) = z, γ2(1) = x und γ1 : [0, 1] → M, γ1(0) = z, γ ˜ 1(1) = z. -Dann wäre aber -γ : [0, 1] → M, -γ(x) = ( -γ1(2x) falls 0 ≤ x ≤ -1 -2 -γ2(2x − 1) falls 1 -2 < x ≤ 1 -ein stetiger Weg von z˜ nach x ⇒ Widerspruch. -Da M zusammenhängend ist und M = Z -|{z} -offen -∪ Z -C -|{z} -offen -, sowie Z 6= ∅ folgt Z -C = ∅. -Also ist M = Z wegzusammenhängend.  -(b) Beh.: X ist wegzusammenhängend. -Beweis: X := (R \ { 0 }) ∪ { 01, 02 } und (R \ { 0 }) ∪ { 02 } sind homöomorph zu R. -Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte -01 und 02. -Da (R\ { 0 })∪ { 01 } homöomorph zu R ist, exisitert ein Weg γ1 von 01 zu einem -beliebigen Punkt a ∈ R \ { 0 }. -Da (R \ { 0 }) ∪ { 02 } ebenfalls homöomorph zu R ist, existiert außerdem ein -Weg γ2 von a nach 02. Damit existiert ein (nicht einfacher) Weg γ von 01 nach -02.  -Lösung zu Aufgabe 9 + Lösungen der Übungsaufgaben +Lösung zu Aufgabe 7 +(a) Vor.: Sei M eine topologische Mannigfaltigkeit. +Beh.: M ist wegzusammehängend ⇔ M ist zusammenhängend +Beweis: „⇒“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung +direkt aus Bemerkung 23. +„⇐“: Seien x, y ∈ M und +Z := { z ∈ M | ∃Weg von x nach z } +Es gilt: +(i) Z 6= ∅, da M lokal wegzusammenhängend ist +(ii) Z ist offen, da M lokal wegzusammenhängend ist +(iii) Z +C := { z˜ ∈ M | @Weg von x nach z˜ } ist offen +Da M eine Mannigfaltigkeit ist, existiert zu jedem z˜ ∈ Z +C eine offene und +wegzusammenhängende Umgebung Uz˜ ⊆ M. +Es gilt sogar Uz˜ ⊆ Z +C, denn gäbe es ein Uz˜ 3 z ∈ Z, so gäbe es Wege γ2 : +[0, 1] → M, γ2(0) = z, γ2(1) = x und γ1 : [0, 1] → M, γ1(0) = z, γ ˜ 1(1) = z. +Dann wäre aber +γ : [0, 1] → M, +γ(x) = ( +γ1(2x) falls 0 ≤ x ≤ +1 +2 +γ2(2x − 1) falls 1 +2 < x ≤ 1 +ein stetiger Weg von z˜ nach x ⇒ Widerspruch. +Da M zusammenhängend ist und M = Z +|{z} +offen +∪ Z +C +|{z} +offen +, sowie Z 6= ∅ folgt Z +C = ∅. +Also ist M = Z wegzusammenhängend.  +(b) Beh.: X ist wegzusammenhängend. +Beweis: X := (R \ { 0 }) ∪ { 01, 02 } und (R \ { 0 }) ∪ { 02 } sind homöomorph zu R. +Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte +01 und 02. +Da (R\ { 0 })∪ { 01 } homöomorph zu R ist, exisitert ein Weg γ1 von 01 zu einem +beliebigen Punkt a ∈ R \ { 0 }. +Da (R \ { 0 }) ∪ { 02 } ebenfalls homöomorph zu R ist, existiert außerdem ein +Weg γ2 von a nach 02. Damit existiert ein (nicht einfacher) Weg γ von 01 nach +02.  +Lösung zu Aufgabe 9 Vor.: Sei (X, d) eine absolute Ebene, A, B, C ∈ X und 4ABC ein Dreieck. -104 Lösungen der Übungsaufgaben -(a) Beh.: AB ∼= AC ⇒ ∠ABC ∼= ∠ACB -Bew.: Sei AB ∼= AC. -⇒ ∃ Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A. -⇒ ϕ(∠ABC) = ∠ACB -⇒ ∠ABC ∼= ∠ACB  -(b) Beh.: Der längeren Seite von 4ABC liegt der größere Winkel gegenüber und umge￾kehrt. -Bew.: Sei d(A, C) > d(A, B). Nach §3 (i) gibt es C -0 ∈ AC+ mit d(A, C0 -) = d(A, B) -⇒ C -0 -liegt zwischen A und C. -Es gilt ]ABC0 < ]ABC und aus Aufgabe 9 (a) folgt: ]ABC0 = ]AC0B. -∠BC0A ist ein nicht anliegender Außenwinkel zu ∠BCA Bem. 66 =====⇒ ]BC0A > ]BCA -⇒ ]BCA < ]BC0A = ]ABC0 < ]ABC Sei umgekehrt ]ABC > ]BCA, kann -wegen 1. Teil von Aufgabe 9 (b) nicht d(A, B) > d(A, C) gelten. -Wegen Aufgabe 9 (a) kann nicht d(A, B) = d(A, C) gelten. -⇒ d(A, B) < d(A, C)  -(c) Vor.: Sei g eine Gerade, P ∈ X und P /∈ g -Beh.: ∃! Lot -Bew.: ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g. ϕ vertauscht die beiden -Halbebenen bzgl. g. -⇒ ϕ(P)P schneidet g in F. -Es gibt eine Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g -⇒ ϕ(P)P schneidet g in F. -Sei A ∈ g \ { F }. Dann gilt ϕ(∠AF P) = ∠AF ϕ(P) = π ⇒ ∠AF P ist rechter Winkel. -Gäbe es nun G ∈ g \ { F }, so dass P G weiteres Lot von P auf g ist, wäre 4P F G -ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4). -· -· -A -G -P -F -g -Abbildung 5.4: Zwei Lote zu einer Geraden g durch einen Punkt P -Nach Folgerung 4.4 ist die Summe von zwei Innenwinkeln immer < π -⇒ G gibt es nicht.  -Lösung zu Aufgabe 10 -Sei f k h und o. B. d. A. f k g. -f ∦ h ⇒ f ∩ h =6 ∅, sei also x ∈ f ∩ h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele + Lösungen der Übungsaufgaben +(a) Beh.: AB ∼= AC ⇒ ∠ABC ∼= ∠ACB +Bew.: Sei AB ∼= AC. +⇒ ∃ Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A. +⇒ ϕ(∠ABC) = ∠ACB +⇒ ∠ABC ∼= ∠ACB  +(b) Beh.: Der längeren Seite von 4ABC liegt der größere Winkel gegenüber und umgekehrt. +Bew.: Sei d(A, C) > d(A, B). Nach §3 (i) gibt es C +0 ∈ AC+ mit d(A, C0 +) = d(A, B) +⇒ C +0 +liegt zwischen A und C. +Es gilt ]ABC0 < ]ABC und aus Aufgabe 9 (a) folgt: ]ABC0 = ]AC0B. +∠BC0A ist ein nicht anliegender Außenwinkel zu ∠BCA Bem. 66 =====⇒ ]BC0A > ]BCA +⇒ ]BCA < ]BC0A = ]ABC0 < ]ABC Sei umgekehrt ]ABC > ]BCA, kann +wegen 1. Teil von Aufgabe 9 (b) nicht d(A, B) > d(A, C) gelten. +Wegen Aufgabe 9 (a) kann nicht d(A, B) = d(A, C) gelten. +⇒ d(A, B) < d(A, C)  +(c) Vor.: Sei g eine Gerade, P ∈ X und P /∈ g +Beh.: ∃! Lot +Bew.: ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g. ϕ vertauscht die beiden +Halbebenen bzgl. g. +⇒ ϕ(P)P schneidet g in F. +Es gibt eine Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g +⇒ ϕ(P)P schneidet g in F. +Sei A ∈ g \ { F }. Dann gilt ϕ(∠AF P) = ∠AF ϕ(P) = π ⇒ ∠AF P ist rechter Winkel. +Gäbe es nun G ∈ g \ { F }, so dass P G weiteres Lot von P auf g ist, wäre 4P F G +ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4). +· +· +A +G +P +F +g +Abbildung 5.4: Zwei Lote zu einer Geraden g durch einen Punkt P +Nach Folgerung 4.4 ist die Summe von zwei Innenwinkeln immer < π +⇒ G gibt es nicht.  +Lösung zu Aufgabe 10 +Sei f k h und o. B. d. A. f k g. +f ∦ h ⇒ f ∩ h 6= ∅, sei also x ∈ f ∩ h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele zu g durch x, da x /∈ g. Diese ist f, da x ∈ f und f k g. Da aber x ∈ h, kann h nicht -105 Lösungen der Übungsaufgaben -parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f 6= h). ⇒ g ∦ h  -Lösung zu Aufgabe 11 -Sei (X, d, G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem 4ABC und 4A0B0C -0 -Dreiecke, für die gilt: -d(A, B) = d(A -0 -, B0 -) -d(A, C) = d(A -0 -, C0 -) -d(B, C) = d(B -0 -, C0 -) -Sei ϕ die Isometrie mit ϕ(A) = A0 -, ϕ(B) = B0 und ϕ(C -0 -) liegt in der selben Halbebene -bzgl. AB wie C. Diese Isometrie existiert wegen §4. -Es gilt d(A, C) = d(A0 -, C0 -) = d(ϕ(A0 -), ϕ(C -0 -)) = d(A, ϕ(C -0 -)) und d(B, C) = d(B0 -, C0 -) = -d(ϕ(B0 -), ϕ(C -0 -)) = d(B, ϕ(C -0 -)). -Bem. 62 =====⇒ C = ϕ(C). -Es gilt also ϕ(4A0B0C -0 + Lösungen der Übungsaufgaben +parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f =6 h). ⇒ g ∦ h  +Lösung zu Aufgabe 11 +Sei (X, d, G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem 4ABC und 4A0B0C +0 +Dreiecke, für die gilt: +d(A, B) = d(A +0 +, B0) +d(A, C) = d(A +0 +, C0) +d(B, C) = d(B +0 +, C0) +Sei ϕ die Isometrie mit ϕ(A) = A0, ϕ(B) = B0 und ϕ(C +0 +) liegt in der selben Halbebene +bzgl. AB wie C. Diese Isometrie existiert wegen §4. +Es gilt d(A, C) = d(A0, C0) = d(ϕ(A0), ϕ(C +0 +)) = d(A, ϕ(C +0 +)) und d(B, C) = d(B0, C0) = +d(ϕ(B0), ϕ(C +0 +)) = d(B, ϕ(C +0 +)). +Bem. 62 =====⇒ C = ϕ(C). +Es gilt also ϕ(4A0B0C +0 ) = 4ABC.  -Bildquellen -Alle Bilder, die hier nicht aufgeführt sind, wurden von Martin Thoma erstellt. -Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert. -Abb. 0.1a S -2 -: Tom Bombadil, tex.stackexchange.com/a/42865 -Abb. 0.1b Würfel: Jan Hlavacek, tex.stackexchange.com/a/12069 -Abb. 0.1e T -2 -: Jake, tex.stackexchange.com/a/70979/5645 -Abb. 1.6 Stereographische Projektion: texample.net/tikz/examples/map-projections -Abb. 1.11 Knoten von Jim.belk aus der „Blue knots“-Serie: -– Trivialer Knoten: commons.wikimedia.org/wiki/File:Blue_Unknot.png -– Kleeblattknoten: commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png -– Achterknoten: commons.wikimedia.org/wiki/File:Blue_Figure-Eight_Knot.png -– 62-Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png -Abb. 1.12 Reidemeister-Züge: YAMASHITA Makoto (1, 2, 3) -Abb. 1.13 Kleeblattknoten, 3-Färbung: Jim.belk, commons.wikimedia.org/wiki/File:Tricoloring. -png -Abb. 2.1 Doppeltorus: Oleg Alexandrov, commons.wikimedia.org/wiki/File:Double\_torus\_illustration. -png -Abb. 2.8 Faltungsdiagramm: Jérôme Urhausen, Email vom 11.02.2014. -Abb. 3.3b 3 Pfade auf Torus: Charles Staats, tex.stackexchange.com/a/149991/5645 -Abb. 3.10 Überlagerung von S -1 mit R: Alex, tex.stackexchange.com/a/149706/5645 -Abb. 4.7a Sphärisches Dreieck: Dominique Toussaint, -commons.wikimedia.org/wiki/File:Spherical_triangle_3d_opti.png -Abb. 5.1 Möbiusband: Jake, tex.stackexchange.com/a/118573/5645 +Bildquellen +Alle Bilder, die hier nicht aufgeführt sind, wurden von Martin Thoma erstellt. +Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert. +Abb. 0.1a S +2 +: Tom Bombadil, tex.stackexchange.com/a/42865 +Abb. 0.1b Würfel: Jan Hlavacek, tex.stackexchange.com/a/12069 +Abb. 0.1e T +2 +: Jake, tex.stackexchange.com/a/70979/5645 +Abb. 1.6 Stereographische Projektion: texample.net/tikz/examples/map-projections +Abb. 1.11 Knoten von Jim.belk aus der „Blue knots“-Serie: +– Trivialer Knoten: commons.wikimedia.org/wiki/File:Blue_Unknot.png +– Kleeblattknoten: commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png +– Achterknoten: commons.wikimedia.org/wiki/File:Blue_Figure-Eight_Knot.png +– 62-Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png +Abb. 1.12 Reidemeister-Züge: YAMASHITA Makoto (1, 2, 3) +Abb. 1.13 Kleeblattknoten, 3-Färbung: Jim.belk, commons.wikimedia.org/wiki/File:Tricoloring. +png +Abb. 2.1 Doppeltorus: Oleg Alexandrov, commons.wikimedia.org/wiki/File:Double\_torus\_illustration +png +Abb. 2.8 Faltungsdiagramm: Jérôme Urhausen, Email vom 11.02.2014. +Abb. 3.3b 3 Pfade auf Torus: Charles Staats, tex.stackexchange.com/a/149991/5645 +Abb. 3.10 Überlagerung von S +1 mit R: Alex, tex.stackexchange.com/a/149706/5645 +Abb. 4.7a Sphärisches Dreieck: Dominique Toussaint, +commons.wikimedia.org/wiki/File:Spherical_triangle_3d_opti.png +Abb. 5.1 Möbiusband: Jake, tex.stackexchange.com/a/118573/5645 Abb. 5.3 Krümmung des Torus: Charles Staats, tex.stackexchange.com/a/149991/5645 -Abkürzungsverzeichnis -Beh. Behauptung -Bew. Beweis -bzgl. bezüglich -bzw. beziehungsweise -ca. circa -d. h. das heißt -Def. Definition -etc. et cetera -ex. existieren -Hom. Homomorphismus -o. B. d. A. ohne Beschränkung der Allgemeinheit -Prop. Proposition -sog. sogenannte -Vor. Voraussetzung -vgl. vergleiche -z. B. zum Beispiel -zhgd. zusammenhängend +Abkürzungsverzeichnis +Beh. Behauptung +Bew. Beweis +bzgl. bezüglich +bzw. beziehungsweise +ca. circa +d. h. das heißt +Def. Definition +etc. et cetera +ex. existieren +Hom. Homomorphismus +o. B. d. A. ohne Beschränkung der Allgemeinheit +Prop. Proposition +sog. sogenannte +Vor. Voraussetzung +vgl. vergleiche +z. B. zum Beispiel +zhgd. zusammenhängend z. z. zu zeigen -Ergänzende Definitionen und Sätze -Da dieses Skript in die Geometrie und Topologie einführen soll, sollten soweit wie möglich alle -benötigten Begriffe definiert und erklärt werden. Die folgenden Begriffe wurden zwar verwendet, -aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ sowie „Lineare Algebra -und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen. -Definition 81 -Sei D ⊆ R und x0 ∈ R. x0 heißt ein Häufungspunkt von D :⇔ ∃ Folge xn in D \ { x0 } -mit xn → x0. -Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra -entnommen: -Definition 82 -Es seien V und W K-Vektorräume und A(V ) und A(W) die zugehörigen affinen Räume. -Eine Abbildung f : V → W heißt affin, falls für alle a, b ∈ V und alle λ, µ ∈ K mit λ+µ = 1 -gilt: -f(λa + µb) = λf(a) + µf(b) -Definition 83 -Sei V ein Vektorraum und S ⊆ V eine Teilmenge. -S heißt eine Orthonormalbasis von V , wenn gilt: -(i) S ist eine Basis von V -(ii) ∀v ∈ S : kvk = 1 -(iii) ∀v1, v2 ∈ S : v1 6= v2 ⇒ hv1, v2i = 0 -Satz (Zwischenwertsatz) -Sei a < b und f ∈ C[a, b] := C([a, b]), weiter sei y0 ∈ R und f(a) < y0 < f(b) oder -f(b) < y0 < f(a). Dann existiert ein x0 ∈ [a, b] mit f(x0) = y0. -Definition 84 -Sei V ein Vektorraum über einem Körper K und f : V → V eine lineare Abbildung. -v ∈ V \ { 0 } heißt Eigenvektor :⇔ ∃λ ∈ K : f(v) = λv. -Wenn ein solches λ ∈ K existiert, heißt es Eigenwert von f. -Satz (Binomischer Lehrsatz) -Sei x, y ∈ R. Dann gilt: -(x + y) -n = -Xn -k=0 - -n -k - -x -n−k -y -k ∀n ∈ N0 -Definition 85 -Seien a, b ∈ R -3 Vektoren. -a × b := - - -a1 -b3 -a3 - - × - - -a1 -b3 -a3 - - = - - -a2b3 − a3b2 -a3b1 − a1b3 -a1b2 − a2b1 - +Ergänzende Definitionen und Sätze +Da dieses Skript in die Geometrie und Topologie einführen soll, sollten soweit wie möglich alle +benötigten Begriffe definiert und erklärt werden. Die folgenden Begriffe wurden zwar verwendet, +aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ sowie „Lineare Algebra +und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen. +Definition 81 +Sei D ⊆ R und x0 ∈ R. x0 heißt ein Häufungspunkt von D :⇔ ∃ Folge xn in D \ { x0 } +mit xn → x0. +Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra +entnommen: +Definition 82 +Es seien V und W K-Vektorräume und A(V ) und A(W) die zugehörigen affinen Räume. +Eine Abbildung f : V → W heißt affin, falls für alle a, b ∈ V und alle λ, µ ∈ K mit λ+µ = 1 +gilt: +f(λa + µb) = λf(a) + µf(b) +Definition 83 +Sei V ein Vektorraum und S ⊆ V eine Teilmenge. +S heißt eine Orthonormalbasis von V , wenn gilt: +(i) S ist eine Basis von V +(ii) ∀v ∈ S : kvk = 1 +(iii) ∀v1, v2 ∈ S : v1 6= v2 ⇒ hv1, v2i = 0 +Satz (Zwischenwertsatz) +Sei a < b und f ∈ C[a, b] := C([a, b]), weiter sei y0 ∈ R und f(a) < y0 < f(b) oder +f(b) < y0 < f(a). Dann existiert ein x0 ∈ [a, b] mit f(x0) = y0. +Definition 84 +Sei V ein Vektorraum über einem Körper K und f : V → V eine lineare Abbildung. +v ∈ V \ { 0 } heißt Eigenvektor :⇔ ∃λ ∈ K : f(v) = λv. +Wenn ein solches λ ∈ K existiert, heißt es Eigenwert von f. +Satz (Binomischer Lehrsatz) +Sei x, y ∈ R. Dann gilt: +(x + y) +n = +Xn +k=0 + +n +k + +x +n−k +y +k ∀n ∈ N0 +Definition 85 +Seien a, b ∈ R +3 Vektoren. +a × b := + + +a1 +b3 +a3 + + × + + +a1 +b3 +a3 + + = + + +a2b3 − a3b2 +a3b1 − a1b3 +a1b2 − a2b1 +  -Symbolverzeichnis -Mengenoperationen -Seien A, B und M Mengen. -AC Komplement von A -P(M) Potenzmenge von M -M Abschluss von M -∂M Rand der Menge M -M◦ -Inneres der Menge M -A × B Kreuzprodukt -A ⊆ B Teilmengenbeziehung -A ( B echte Teilmengenbeziehung -A \ B Differenzmenge -A ∪ B Vereinigung -A ∪˙ B Disjunkte Vereinigung -A ∩ B Schnitt -Geometrie -AB Gerade durch die Punkte A und -B -AB Strecke mit Endpunkten A und B -4ABC Dreieck mit Eckpunkten A, B, C -AB ∼= CD Die Strecken AB und CD sind -isometrisch -|K| Geometrische Realisierung des -Simplizialkomplexes K -Gruppen -Sei X ein topologischer Raum und K ein Kör￾per. -Homöo(X) Homöomorphismengruppe -Iso(X) Isometriengruppe -GLn(K) Allgemeine lineare Gruppe (von -General Linear Group) -SLn(K) Spezielle lineare Gruppe -PSLn(K) Projektive lineare Gruppe -Perm(X) Permutationsgruppe -Sym(X) Symmetrische Gruppe -Wege -Sei γ : I → X ein Weg. -[γ] Homotopieklasse von γ -γ1 ∗ γ2 Zusammenhängen von Wegen -γ1 ∼ γ2 Homotopie von Wegen -γ(x) Inverser Weg, also γ(x) := γ(1 − x) -C Bild eines Weges γ, also C := -γ([0, 1]) -Weiteres -B Basis einer Topologie -Bδ(x) δ-Kugel um x -S Subbasis einer Topologie -T Topologie -A Atlas -P Projektiver Raum -h·, ·i Skalarprodukt -X/∼ X modulo ∼ -[x]∼ Äquivalenzklassen von x bzgl. ∼ -kxk Norm von x -|x| Betrag von x -hai Erzeugnis von a -S -n Sphäre -T -n Torus -f ◦ g Verkettung von f und g -πX Projektion auf X -f|U f eingeschränkt auf U -f -−1 -(M) Urbild von M -Rg(M) Rang von M +Symbolverzeichnis +Mengenoperationen +Seien A, B und M Mengen. +AC Komplement von A +P(M) Potenzmenge von M +M Abschluss von M +∂M Rand der Menge M +M◦Inneres der Menge M +A × B Kreuzprodukt +A ⊆ B Teilmengenbeziehung +A ( B echte Teilmengenbeziehung +A \ B Differenzmenge +A ∪ B Vereinigung +A ∪˙ B Disjunkte Vereinigung +A ∩ B Schnitt +Geometrie +AB Gerade durch die Punkte A und +B +AB Strecke mit Endpunkten A und B +4ABC Dreieck mit Eckpunkten A, B, C +AB ∼= CD Die Strecken AB und CD sind +isometrisch +|K| Geometrische Realisierung des +Simplizialkomplexes K +Gruppen +Sei X ein topologischer Raum und K ein Körper. +Homöo(X) Homöomorphismengruppe +Iso(X) Isometriengruppe +GLn(K) Allgemeine lineare Gruppe (von +General Linear Group) +SLn(K) Spezielle lineare Gruppe +PSLn(K) Projektive lineare Gruppe +Perm(X) Permutationsgruppe +Sym(X) Symmetrische Gruppe +Wege +Sei γ : I → X ein Weg. +[γ] Homotopieklasse von γ +γ1 ∗ γ2 Zusammenhängen von Wegen +γ1 ∼ γ2 Homotopie von Wegen +γ(x) Inverser Weg, also γ(x) := γ(1 − x) +C Bild eines Weges γ, also C := +γ([0, 1]) +Weiteres +B Basis einer Topologie +Bδ(x) δ-Kugel um x +S Subbasis einer Topologie +T Topologie +A Atlas +P Projektiver Raum +h·, ·i Skalarprodukt +X/∼ X modulo ∼ +[x]∼ Äquivalenzklassen von x bzgl. ∼ +kxk Norm von x +|x| Betrag von x +hai Erzeugnis von a +S +n Sphäre +T +n Torus +f ◦ g Verkettung von f und g +πX Projektion auf X +f|U f eingeschränkt auf U +f +−1 +(M) Urbild von M +Rg(M) Rang von M χ(K) Euler-Charakteristik von K -110 Symbolverzeichnis -∆k Standard-Simplex -X#Y Verklebung von X und Y -dn Lineare Abbildung aus Bemer￾kung 37 -A ∼= B A ist isometrisch zu B -f∗ Abbildung zwischen Fundamental￾gruppen (vgl. Seite 49) -111 Symbolverzeichnis -Zahlenmengen -N = { 1, 2, 3, . . . } Natürliche Zahlen -Z = N ∪ { 0, −1, −2, . . . } Ganze Zahlen -Q = Z ∪ - 1 -2 -, -1 -3 -, -2 -3 - -= - z -n mit z ∈ Z und n ∈ Z \ { 0 } - -Rationale Zahlen -R = Q ∪ - √ -2, − -√3 -3, . . . -Reele Zahlen -R+ Echt positive reele Zahlen -R -n -+,0 -:= { (x1, . . . , xn) ∈ R -n -| xn ≥ 0 } Halbraum -R -× = R \ { 0 } Einheitengruppe von R -C = { a + ib | a, b ∈ R } Komplexe Zahlen -P = { 2, 3, 5, 7, . . . } Primzahlen -H = { z ∈ C | =z > 0 } obere Halbebene -I = [0, 1] ( R Einheitsintervall -f : S -1 -,→ R -2 Einbettung der Kreislinie in die Ebene -π1(X, x) Fundamentalgruppe im topologischen Raum X um x ∈ X -Fix(f) Menge der Fixpunkte der Abbildung f -k · k2 2-Norm; Euklidische Norm -κ Krümmung -κNor Normalenkrümmung -V (f) Nullstellenmenge von f -2 -Krümmung -DpF : R -2 → R -3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89) -TsS Tangentialebene an S ⊆ R -3 durch s ∈ S -dsn(x) Weingarten-Abbildung -2 + Symbolverzeichnis +∆k Standard-Simplex +X#Y Verklebung von X und Y +dn Lineare Abbildung aus Bemerkung 37 +A ∼= B A ist isometrisch zu B +f∗ Abbildung zwischen Fundamentalgruppen (vgl. Seite 49) + Symbolverzeichnis +Zahlenmengen +N = { 1, 2, 3, . . . } Natürliche Zahlen +Z = N ∪ { 0, −1, −2, . . . } Ganze Zahlen +Q = Z ∪ + 1 +2 +, +1 +3 +, +2 +3 + += + z +n mit z ∈ Z und n ∈ Z \ { 0 } + +Rationale Zahlen +R = Q ∪ + √ +2, − +√3 +3, . . . Reele Zahlen +R+ Echt positive reele Zahlen +R +n ++,0 +:= { (x1, . . . , xn) ∈ R +n +| xn ≥ 0 } Halbraum +R +× = R \ { 0 } Einheitengruppe von R +C = { a + ib | a, b ∈ R } Komplexe Zahlen +P = { 2, 3, 5, 7, . . . } Primzahlen +H = { z ∈ C | =z > 0 } obere Halbebene +I = [0, 1] ( R Einheitsintervall +f : S +1 +,→ R +2 Einbettung der Kreislinie in die Ebene +π1(X, x) Fundamentalgruppe im topologischen Raum X um x ∈ X +Fix(f) Menge der Fixpunkte der Abbildung f +k · k2 2-Norm; Euklidische Norm +κ Krümmung +κNor Normalenkrümmung +V (f) Nullstellenmenge von f +2 +Krümmung +DpF : R +2 → R3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89) +TsS Tangentialebene an S ⊆ R +3 durch s ∈ S +dsn(x) Weingarten-Abbildung +2 von Vanishing Set -Stichwortverzeichnis -Abbildung -affine, 107 -differenzierbare, 29 -homotope, 50 -offene, 53 -simpliziale, 35 -stetige, 9 -Abschluss, 3 -Abstand, 86 -Abstandsaxiom, 65 -Achterknoten, 20 -Aktion, siehe Gruppenoperation -Anordnungsaxiome, 66 -Atlas, 24 -Außenwinkel, 70 -Axiom, 64 -Axiomensystem, 64 -Basis, 3 -Baum, 37 -Betti-Zahl, 41 -Bewegungsaxiom, 66 -Binormalenvektor, 89 -Cantorsches Diskontinuum, 22 -C -k --Struktur, 29 -Decktransformation, 59 -Decktransformationsgruppe, 59 -Deformationsretrakt, 47 -dicht, 3 -Diffeomorphismus, 29 -Dimension, 34 -diskret, 53 -Doppelverhältnis, 83 -Dreibein -begleitendes, 89 -Ebene -euklidische, 64 -Eigenvektor, 107 -Eigenwert, 107 -einfach zusammenhängend, 49 -Einheitsnormalenfeld, 90 -Euler-Charakteristik, siehe Eulerzahl -Eulersche Polyederformel, 38 -Eulerzahl, 36 -Färbbarkeit, 21 -Faser, siehe Urbild -Fläche -orientierbare, 90 -reguläre, 30 -Flächenelement, 95 -Formoperator, siehe Weingarten-Abbildung -Fundamentalform -erste, 94 -zweite, 97 -Fundamentalgruppe, 47 -Gauß-Krümmung, 92, 91–94 -Geometrie, 64 -Gerade, 64 -hyperbolische, 77 -Graph, 37 -Grenzwert, 8 -Gruppe -allgemeine lineare, 22, 26 -spezielle lineare, 22 -topologische, 33 -Gruppe operiert durch Homöomorphismen, -61 -Gruppenaktion, siehe Gruppenoperation -Gruppenoperation, 60, 60–63 -stetige, 61 -Häufungspunkt, 107 -Hülle -konvexe, 34 -Halbebene, 66 -Halbgerade, 65 -Halbraum, 28 -Hauptkrümmung, 92 +Stichwortverzeichnis +Abbildung +affine, 107 +differenzierbare, 29 +homotope, 50 +offene, 53 +simpliziale, 35 +stetige, 9 +Abschluss, 3 +Abstand, 86 +Abstandsaxiom, 65 +Achterknoten, 20 +Aktion, siehe Gruppenoperation +Anordnungsaxiome, 66 +Atlas, 24 +Außenwinkel, 70 +Axiom, 64 +Axiomensystem, 64 +Basis, 3 +Baum, 37 +Betti-Zahl, 41 +Bewegungsaxiom, 66 +Binormalenvektor, 89 +Cantorsches Diskontinuum, 22 +C +k +-Struktur, 29 +Decktransformation, 59 +Decktransformationsgruppe, 59 +Deformationsretrakt, 47 +dicht, 3 +Diffeomorphismus, 29 +Dimension, 34 +diskret, 53 +Doppelverhältnis, 83 +Dreibein +begleitendes, 89 +Ebene +euklidische, 64 +Eigenvektor, 107 +Eigenwert, 107 +einfach zusammenhängend, 49 +Einheitsnormalenfeld, 90 +Euler-Charakteristik, siehe Eulerzahl +Eulersche Polyederformel, 38 +Eulerzahl, 36 +Färbbarkeit, 21 +Faser, siehe Urbild +Fläche +orientierbare, 90 +reguläre, 30 +Flächenelement, 95 +Formoperator, siehe Weingarten-Abbildung +Fundamentalform +erste, 94 +zweite, 97 +Fundamentalgruppe, 47 +Gauß-Krümmung, 92, 91–94 +Geometrie, 64 +Gerade, 64 +hyperbolische, 77 +Graph, 37 +Grenzwert, 8 +Gruppe +allgemeine lineare, 22, 26 +spezielle lineare, 22 +topologische, 33 +Gruppe operiert durch Homöomorphismen, +61 +Gruppenaktion, siehe Gruppenoperation +Gruppenoperation, 60, 60–63 +stetige, 61 +Häufungspunkt, 107 +Hülle +konvexe, 34 +Halbebene, 66 +Halbgerade, 65 +Halbraum, 28 +Hauptkrümmung, 92 Hilbert-Kurve, 19, 19 -113 Stichwortverzeichnis -Homöomorphismengruppe, 10 -Homöomorphismus, 9 -Homologiegruppe, 41 -Homomorphismus, 101 -Homotopie, 44 -Homotopieklasse, 47 -Inklusionsabbildung, 47 -Innenwinkel, 70 -Inneres, 3 -Inzidenzaxiome, 64 -Isometrie, 6, 10 -Isometriegruppe, 10 -Isomorphismus, 101 -Isotopie, 20 -Jordankurve, 19 -geschlossene, 19 -Karte, 24 -Kartenwechsel, 28 -Kern -offener, 3 -Kleeblattknoten, 20 -Klumpentopologie, siehe triviale Topologie -Knoten, 20, 17–21 -äquivalente, 20 -trivialer, 20 -Knotendiagramm, 20 -kollinear, 65 -kongruent, siehe isometrisch -Kongruenz, siehe Isometrie -Kongruenzsatz -SSS, 104 -SWS, 69 -SWW, 74 -WSW, 70 -Krümmung, 88, 89 -Kreis, 37 -Kreuzprodukt, 107 -Kurve, 87 -Länge einer, 87 -Lage -allgemeine, 34 -Lehrsatz -Binomischer, 107 -Lie-Gruppe, 33 -liegt zwischen, 65 -Liftung, 54 -Limes, 8 -lokal, 3 -Lot, 86 -Lotfußpunkt, 86 -Möbiusband, 91 -Möbiustransformation, 80 -Mannigfaltigkeit, 24 -differenzierbare, 29 -geschlossene, 25 -glatte, 29 -mit Rand, 28 -Menge -abgeschlossene, 2 -offene, 2 -zusammenhängende, 11 -Metrik, 6 -diskrete, 6 -hyperbolische, 84 -SNCF, 8 -Nebenwinkel, 86 -Neilsche Parabel, 27 -Normalenfeld, 90 -Normalenvektor, 87, 89 -Normalkrümmung, 91, 92, 98 -Oktaeder, 34 -Orthonormalbasis, 107 -Paraboloid -hyperbolisches, 92 -Parallele, 66 -Parallelenaxiom, 64 -parametrisiert -durch Bogenlänge, 87 -Parametrisierung -reguläre, 30 -Polyzylinder, 17 -Produkttopologie, 4 -Projektion -stereographische, 11 -Punkt, 34 -Quotiententopologie, 5, 10, 11 -Rand, 3, 28 -Raum -hausdorffscher, 8 -kompakter, 14 -metrischer, 6 + Stichwortverzeichnis +Homöomorphismengruppe, 10 +Homöomorphismus, 9 +Homologiegruppe, 41 +Homomorphismus, 101 +Homotopie, 44 +Homotopieklasse, 47 +Inklusionsabbildung, 47 +Innenwinkel, 70 +Inneres, 3 +Inzidenzaxiome, 64 +Isometrie, 6, 10 +Isometriegruppe, 10 +Isomorphismus, 101 +Isotopie, 20 +Jordankurve, 19 +geschlossene, 19 +Karte, 24 +Kartenwechsel, 28 +Kern +offener, 3 +Kleeblattknoten, 20 +Klumpentopologie, siehe triviale Topologie +Knoten, 20, 17–21 +äquivalente, 20 +trivialer, 20 +Knotendiagramm, 20 +kollinear, 65 +kongruent, siehe isometrisch +Kongruenz, siehe Isometrie +Kongruenzsatz +SSS, 104 +SWS, 69 +SWW, 74 +WSW, 70 +Krümmung, 88, 89 +Kreis, 37 +Kreuzprodukt, 107 +Kurve, 87 +Länge einer, 87 +Lage +allgemeine, 34 +Lehrsatz +Binomischer, 107 +Lie-Gruppe, 33 +liegt zwischen, 65 +Liftung, 54 +Limes, 8 +lokal, 3 +Lot, 86 +Lotfußpunkt, 86 +Möbiusband, 91 +Möbiustransformation, 80 +Mannigfaltigkeit, 24 +differenzierbare, 29 +geschlossene, 25 +glatte, 29 +mit Rand, 28 +Menge +abgeschlossene, 2 +offene, 2 +zusammenhängende, 11 +Metrik, 6 +diskrete, 6 +hyperbolische, 84 +SNCF, 8 +Nebenwinkel, 86 +Neilsche Parabel, 27 +Normalenfeld, 90 +Normalenvektor, 87, 89 +Normalkrümmung, 91, 92, 98 +Oktaeder, 34 +Orthonormalbasis, 107 +Paraboloid +hyperbolisches, 92 +Parallele, 66 +Parallelenaxiom, 64 +parametrisiert +durch Bogenlänge, 87 +Parametrisierung +reguläre, 30 +Polyzylinder, 17 +Produkttopologie, 4 +Projektion +stereographische, 11 +Punkt, 34 +Quotiententopologie, 5, 10, 11 +Rand, 3, 28 +Raum +hausdorffscher, 8 +kompakter, 14 +metrischer, 6 projektiver, 5, 22, 25, 52 -114 Stichwortverzeichnis -topologischer, 2 -zusammenhängender, 11 -Realisierung -geometrische, 34 -Retraktion, 47 -Satz von -Gauß-Bonnet, 98 -Scheitelwinkel, 86 -Seite, 34 -Sierpińskiraum, 3, 22 -Simplex, 34 -Simplizialkomplex, 34 -Simplizialkomplexe -flächengleiche, 74 -Sphäre -exotische, 29 -Standard-Simplex, 34 -Standardtopologie, 2 -sternförmig, 48 -Stetigkeit, 9–11 -Strecke, 65 -Struktur -differenzierbare, 29 -Subbasis, 3 -Tangentialebene, 89, 89–90 -Teilraum, 4 -Teilraumtopologie, 4 -Teilsimplex, 34 -Topologie -diskrete, 2, 6 -euklidische, 2 -feinste, 11 -triviale, 2 -Zariski, 2, 12, 15 -Torus, iii, 5, 38, 51, 93 -Total Unzusammenhängend, 100 -Triangulierung, 38 -Überdeckung, 14 -Übergangsfunktion, siehe Kartenwechsel -Überlagerung, 51, 51–60 -reguläre, 59 -universelle, 57 -Umgebung, 3 -Umgebungsbasis, 58 -vanishing set, 26 -Vektorprodukt, siehe Kreuzprodukt -Verklebung, 26 -verträglich, 29 -Würfel, 34 -Weg, 17 -einfacher, 17 -geschlossener, 17 -homotope, 44 -inverser, 48 -zusammengesetzter, 46 -Wegzusammenhang, 18 -Weingarten-Abbildung, 95 -Winkel, 70 -Zusammenhang, 11–14 -Zusammenhangskomponente, 13 -Zwischenwertsatz, 107 + Stichwortverzeichnis +topologischer, 2 +zusammenhängender, 11 +Realisierung +geometrische, 34 +Retraktion, 47 +Satz von +Gauß-Bonnet, 98 +Scheitelwinkel, 86 +Seite, 34 +Sierpińskiraum, 3, 22 +Simplex, 34 +Simplizialkomplex, 34 +Simplizialkomplexe +flächengleiche, 74 +Sphäre +exotische, 29 +Standard-Simplex, 34 +Standardtopologie, 2 +sternförmig, 48 +Stetigkeit, 9–11 +Strecke, 65 +Struktur +differenzierbare, 29 +Subbasis, 3 +Tangentialebene, 89, 89–90 +Teilraum, 4 +Teilraumtopologie, 4 +Teilsimplex, 34 +Topologie +diskrete, 2, 6 +euklidische, 2 +feinste, 11 +triviale, 2 +Zariski, 2, 12, 15 +Torus, iii, 5, 38, 51, 93 +Total Unzusammenhängend, 100 +Triangulierung, 38 +Überdeckung, 14 +Übergangsfunktion, siehe Kartenwechsel +Überlagerung, 51, 51–60 +reguläre, 59 +universelle, 57 +Umgebung, 3 +Umgebungsbasis, 58 +vanishing set, 26 +Vektorprodukt, siehe Kreuzprodukt +Verklebung, 26 +verträglich, 29 +Würfel, 34 +Weg, 17 +einfacher, 17 +geschlossener, 17 +homotope, 44 +inverser, 48 +zusammengesetzter, 46 +Wegzusammenhang, 18 +Weingarten-Abbildung, 95 +Winkel, 70 +Zusammenhang, 11–14 +Zusammenhangskomponente, 13 +Zwischenwertsatz, 107 \ No newline at end of file diff --git a/read/results/pdfminer/1602.06541.txt b/read/results/pdfminer/1602.06541.txt index 175bca7..202d224 100644 --- a/read/results/pdfminer/1602.06541.txt +++ b/read/results/pdfminer/1602.06541.txt @@ -1969,12 +1969,12 @@ J. 13 -no. - 30, 10, +no. + Analysis 1699–1712, @@ -2046,10 +2046,10 @@ Dec. 2, -no. - pp. +no. + Jun. of diff --git a/read/results/pdfminer/1707.09725.txt b/read/results/pdfminer/1707.09725.txt index 55f0dc1..87a0bfc 100644 --- a/read/results/pdfminer/1707.09725.txt +++ b/read/results/pdfminer/1707.09725.txt @@ -6219,10 +6219,10 @@ Softmax 87.92 % σ = 0.40 -84.70 % σ = 0.15 - 79.67 % σ = 4.85 +84.70 % σ = 0.15 + 84.69 % σ = 0.08 88.59 % 85.43 % 92 – 140 @@ -6240,14 +6240,14 @@ ELU 89.49 % σ = 0.42 85.35 % σ = 0.10 -88.93 % σ = 0.46 85.35 % σ = 0.21 - 88.42 % σ = 0.29 85.16 % σ = 0.15 -84.46 % σ = 0.23 +88.93 % σ = 0.46 85.35 % σ = 0.21 84.46 % σ = 0.27 +84.46 % σ = 0.23 + 88.61 % σ = 0.41 88.00 % σ = 0.47 @@ -6324,16 +6324,16 @@ ELU 75.5 -80.1 +83.2 78.8 -83.2 - -68.9 +80.1 67.2 +68.9 + Table A.3.: Test accuracy of adjusted baseline models trained with different activation functions on STL-10. For LReLU, α = 0.3 was chosen. @@ -8596,11 +8596,11 @@ Springer, 2003, vol. 53. [Online]. Available: https://dx.doi.org/10.1007/978-3- S. E. Fahlman, “An empirical study of learning speed in back-propagation http://repository.cmu.edu/cgi/ -[Online]. Available: - networks,” viewcontent.cgi?article=2799&context=compsci +[Online]. Available: + 1988. L. Fei-Fei, R. Fergus, and P. Perona, diff --git a/read/results/pdfminer/GeoTopo-book.txt b/read/results/pdfminer/GeoTopo-book.txt index 40a246b..151eec7 100644 --- a/read/results/pdfminer/GeoTopo-book.txt +++ b/read/results/pdfminer/GeoTopo-book.txt @@ -51,10 +51,10 @@ in „Analysis I“ vermittelt. ), Mengenschreibweisen ( , -∪ +∩ , -∩ +∪ P @@ -273,10 +273,10 @@ sind als Komplement offener Mengen abgeschlossen. X = -∅ - \ +∅ + T und (cid:4) @@ -1385,10 +1385,10 @@ x = y Ux -∈ - ∩ +∈ + Uy = ∅ Uy für alle n @@ -7537,10 +7537,10 @@ R) → -→ - (cid:55)→ +→ + × I @@ -8855,10 +8855,10 @@ z Abbildungen. -(cid:107) - } +(cid:107) + ∈ 1) f1 := idR ist eine offene und stetige Abbildung. 2) g(x) := e2πix ist eine offene, aber keine stetige Abbildung (vgl. Abbildung 1.5). @@ -10188,10 +10188,10 @@ von y. ⊆ -Y - U ein Homöomorphismus. Dann ist W := f −1(V ) +Y + × ∈ @@ -10234,10 +10234,10 @@ f (y0) = g(y0), so ist (g−1 − -} - 1 +} + schon Fix(f ) = Y , also f = idY . = @@ -10797,10 +10797,10 @@ b) Ist X ein topologischer Raum, so entsprechen dabei die Gruppenoperationen dur → -} - { +} + | Homöomorphismus den Gruppenhomomorphismen G @@ -11440,8 +11440,6 @@ Diese Teilmengen Hi heißen Halbebenen bzgl. g. für alle A -gilt: AB - Hi, B ∈ { @@ -11457,6 +11455,8 @@ Hi, B H2 in zwei nichtleere Teilmengen H1, H2, sodass = +gilt: AB + = j. g @@ -13660,18 +13660,18 @@ Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht. 1 − -2 - 1 -3 - 0 -4 +3 + +2 5 +4 + x 6 @@ -16703,10 +16703,10 @@ y3 (cid:43) - -  + +  (cid:42) @@ -17590,12 +17590,12 @@ zi } -} - 0 } +} + { ⊆ { @@ -17773,10 +17773,10 @@ phismus. H, x -→ - (cid:55)→ +→ + 3) Sei X ein topologischer Raum. Dann ist idX ein Homöomorphismus. Da keine Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Grup- penhomomorphismus. @@ -18038,12 +18038,12 @@ R \ { -0 - } 0 +0 + ) Lösung zu Aufgabe 9 diff --git a/read/results/pdfplumber/1601.03642.txt b/read/results/pdfplumber/1601.03642.txt index c7d8790..8c74671 100644 --- a/read/results/pdfplumber/1601.03642.txt +++ b/read/results/pdfplumber/1601.03642.txt @@ -127,40 +127,11 @@ are learned. This is usually done by an optimization technique called gradient descent. The gradient descent algorithm takes a function which has to be derivable, starts at any point of the surface of this error function and -a -r -X -i -v -: -1 -6 -0 -1 -. -0 -3 -6 -4 -2 -v -1 -[ -c -s -. -C -V -] -1 -2 -J -a -n -2 -0 -1 -6 +arXiv:1601.03642v1 +[cs.CV] +12 +Jan +2016 2 makes a step in the direction which goes downwards. Hence it tries to find a minimum of this high-dimensional function. diff --git a/read/results/pdfplumber/1602.06541.txt b/read/results/pdfplumber/1602.06541.txt index 6a34a91..a5be9a3 100644 --- a/read/results/pdfplumber/1602.06541.txt +++ b/read/results/pdfplumber/1602.06541.txt @@ -94,39 +94,11 @@ tion algorithms, there is a publication about multiple class affiliation segmentation [LRAL08]. Similarly, recent publications in pixel-level object segmentation used layered models [YHRF12]. -a -r -X -i -v -: -1 -6 -0 -2 -. -0 -6 -5 -4 -1 -v -2 -[ -c s -. -C -V -] -1 -1 -M -a -y -2 -0 -1 -6 +arXiv:1602.06541v2 +[cs.CV] +11 +May +2016 2 C. Input Data The available data which can be used for the diff --git a/read/results/pdfplumber/1707.09725.txt b/read/results/pdfplumber/1707.09725.txt index c3dca81..5dfd974 100644 --- a/read/results/pdfplumber/1707.09725.txt +++ b/read/results/pdfplumber/1707.09725.txt @@ -12,40 +12,11 @@ Second reviewer: Prof. Dr.–Ing. J. M. Zöllner Advisor: Dipl.–Inform. Michael Weber Research Period: 03. May 2017 – 03. August 2017 KIT–UniversityoftheStateofBaden-WuerttembergandNationalResearchCenteroftheHelmholtzAssociation www.kit.edu -a -r -X -i -v -: -1 -7 -0 -7 -. -0 -9 -7 -2 -5 -v -1 -[ -c -s -. -C -V -] -3 -1 -J -u -l -2 -0 -1 -7 +arXiv:1707.09725v1 +[cs.CV] +31 +Jul +2017 Analysis and Optimization of Convolutional Neural Network Architectures @@ -435,20 +406,7 @@ n feature maps n filters of size k×k×3 widthw widthw -h -e -i -g -h -t -h -h -e -i -g -h -t -h +heighth heighth neural network data @@ -1802,18 +1760,7 @@ make any statement about the ordering of m and m(cid:48) in epoch i+1. 0.6 0.7 epoch -v -a l i -d a -t i -o -n -a c -c -u -r -a -c y +validationaccuracy maximum validation accuracy minimum validation accuracy 1.5 @@ -1823,9 +1770,7 @@ minimum validation accuracy 3.5 4 4.5 -l -o s -s +loss maximum validation accuracy minimum validation accuracy mean loss diff --git a/read/results/pdfplumber/2201.00021.txt b/read/results/pdfplumber/2201.00021.txt index 0085371..74f39a7 100644 --- a/read/results/pdfplumber/2201.00021.txt +++ b/read/results/pdfplumber/2201.00021.txt @@ -91,45 +91,11 @@ beenidentifiedasmasers,includingthe(5,3),(5,4),(6,1),(6,2), (6,4),(6,5),(7,3),(7,4),(7,5)(7,6),(8,3),(8,4),(8,5),(8,6),(9,3), (9,4),(9,5),(9,7),(9,8),(10,7),(10,8),(10,9),and(11,9)transi- Articlenumber,page1of10 -a -r -X -i -v -: -2 -2 -0 -1 -. -0 -0 -0 -2 -1 -v -3 -[ -a -s -t -r -o -- -p -h -. -G -A -] +arXiv:2201.00021v3 +[astro-ph.GA] 9 -A -p -r -2 -0 -2 -2 +Apr +2022 A&Aproofs:manuscriptno.mainArxiv tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007; Henkel et al. 2013; Mei et al. 2020). Except for the NH diff --git a/read/results/pdfplumber/2201.00022.txt b/read/results/pdfplumber/2201.00022.txt index 2d9662e..4dbb0e7 100644 --- a/read/results/pdfplumber/2201.00022.txt +++ b/read/results/pdfplumber/2201.00022.txt @@ -71,45 +71,11 @@ lated gas (e.g., Begelman et al. 2006; Yue et al. 2014; Ferrara et al. 2014; Choi et al. 2015; Shlosman et al. 2016). These high redshift IMBHs would need to sur- vive galaxy evolution and mergers to present day (e.g., -a -r -X -i -v -: -2 -2 -0 -1 -. -0 -0 -0 -2 -2 -v -2 -[ -a -s -t -r -o -- -p -h -. -G -A -] +arXiv:2201.00022v2 +[astro-ph.GA] 6 -J -u -l -2 -0 -2 -2 +Jul +2022 2 Rose et al. Rashkov&Madau2014),withsignificanteffectsontheir stellarandevendarkmattersurroundings(e.g.,Bertone diff --git a/read/results/pdfplumber/2201.00037.txt b/read/results/pdfplumber/2201.00037.txt index 35b3bae..f149c3c 100644 --- a/read/results/pdfplumber/2201.00037.txt +++ b/read/results/pdfplumber/2201.00037.txt @@ -12,46 +12,11 @@ cores into a common precession motion. proaches that expected for a rigid planet. Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca –1– -a -r -X -i -v -: -2 -2 -0 -1 -. -0 -0 -0 -3 -7 -v -1 -[ -a -s -t -r -o -- -p -h -. -E -P -] -3 -1 -D -e -c -2 -0 -2 -1 +arXiv:2201.00037v1 +[astro-ph.EP] +31 +Dec +2021 Confidential manuscript submitted to JGR-Planets Abstract We present a model of the Cassini state of Mercury that comprises an inner core, a fluid core @@ -1438,31 +1403,39 @@ Confidential manuscript submitted to JGR-Planets 0 200 400 -600 800 1000 +600 +800 +1000 1200 1400 -p e r i o d ( y r ) +period +(yr) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) 3000 4000 -5000 6000 +5000 +6000 7000 8000 -d e n s i t y ( k g / m 3 ) +density +(kg/m +3) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) 2000 2020 -2040 2060 +2040 +2060 2080 2100 -F l -u i -d c o r e r a d i u s ( k -m ) +Fluid +core +radius +(km) fluid core density -CMB radius +CMB +radius FICN FCNint mantle density @@ -1636,27 +1609,40 @@ Confidential manuscript submitted to JGR-Planets 2.038 2.040 2.042 -2.044 2.046 2.048 +2.044 +2.046 +2.048 2.050 -O b l i q u i t -y -a n g l e ( a r c m i n ) +Obliquity +angle +(arcmin) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) 1.5 2.0 2.5 -3.0 3.5 +3.0 +3.5 4.0 4.5 -O b l i q u i t -y -a n g l e ( a r c m i n ) +Obliquity +angle +(arcmin) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) -crustal thickness 16 km 36 km 26 km crustal thickness 16 km 36 km 26 km ε m ε g -for a rigid planet ε m -m f n s(x100) +crustal thickness 16 km +36 km +26 km +crustal thickness 16 km +36 km +26 km +ε m ε +g +for a rigid planet +ε +m +m f +n s(x100) a b Figure 4. a) Obliquity of the mantle (˜ ε m, solid lines) and of the principal moment of inertia (˜ ε g, dashed line) b) ˜ m @@ -1979,8 +1965,10 @@ cosity that we have identified above (i.e ν ≈ 5×10−4 m2 s−1), the influe Confidential manuscript submitted to JGR-Planets ε m ε g -m f -n s +m +f +n +s 2.038 2.040 2.042 @@ -1988,31 +1976,24 @@ n s 2.046 2.048 2.050 -O -b l i q u i t -y a n g l e -( -a r c -m i -n ) +Obliquity +angle +(arcmin) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) 0.0 0.5 1.0 1.5 -2.0 2.5 +2.0 +2.5 3.0 3.5 4.0 4.5 -O -b l i q u i t -y a n g l e -( -a r c -m i -n ) +Obliquity +angle +(arcmin) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) kinematic viscosity: 0.01 m2 s-1 0.00001 m2 s-1 0.0001 m2 s-1 0.0005 m2 s-1 0.001 m2 s-1 @@ -2351,15 +2332,9 @@ Confidential manuscript submitted to JGR-Planets 2.046 2.048 2.050 -O -b l i q -u i t -y a n -g l e -( -a r -c -m i n ) +Obliquity +angle +(arcmin) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) 0.0 @@ -2372,26 +2347,23 @@ Inner core radius (km) 3.5 4.0 4.5 -O -b l i q -u i t -y a n -g l e -( -a r -c -m i n ) +Obliquity +angle +(arcmin) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) Br at ICB: 1 mT 0.01 mT 0.03 mT 0.1 mT 0.3 mT ε m -ε g +ε +g m f n s a b -for a rigid planet ε m +for a rigid planet +ε +m Figure 6. a) Obliquity of the mantle (˜ ε m, solid lines) and gravity field (˜ ε g, dashed lines) b) ˜ m f (solid lines) and ˜ n @@ -2452,14 +2424,9 @@ Confidential manuscript submitted to JGR-Planets 2.046 2.048 2.050 -O b l i q -u i t y a -n -g l e -( -a r -c -m i n ) +Obliquity +angle +(arcmin) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) 0.0 @@ -2472,14 +2439,9 @@ Inner core radius (km) 3.5 4.0 4.5 -O b l i q -u i t y a -n -g l e -( -a r -c -m i n ) +Obliquity +angle +(arcmin) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) for a rigid planet @@ -2487,10 +2449,14 @@ for a rigid planet m a b α 3: 0.20 0.01 0.05 0.10 0.15 ρ s = 8800 kg m-3 -m f -n s +m +f +n +s +ε +m ε -m ε g +g Figure 7. a) Obliquity of the mantle (˜ ε m, solid lines) and gravity field (˜ ε g, dashed lines) b) ˜ m f (solid lines) and ˜ n diff --git a/read/results/pdfplumber/2201.00069.txt b/read/results/pdfplumber/2201.00069.txt index 04f435e..e23a0fa 100644 --- a/read/results/pdfplumber/2201.00069.txt +++ b/read/results/pdfplumber/2201.00069.txt @@ -35,45 +35,11 @@ M. Zacharias,24,1 D. Zargaryan,12,14 A.A. Zdziarski,34 A. Zech,24 S.J. Zhu,20 S. Zouari,22 N. Żywucka,1 AcceptedXXX.ReceivedYYY;inoriginalformZZZ MNRAS000,1–15(2021) -a -r -X i -v -: -2 -2 -0 -1 -. -0 -0 -0 -6 -9 -v -1 -[ -a -s -t -r -o -- -p -h -. -H -E -] -3 -1 -D -e -c -2 -0 -2 -1 +arXiv:2201.00069v1 +[astro-ph.HE] +31 +Dec +2021 MNRAS000,1–15(2021) Preprint4January2022 CompiledusingMNRASLATEXstylefilev3.0 ABSTRACT Wereportonasearchforpersistentradioemissionfromtheone-offFastRadioBurst(FRB) diff --git a/read/results/pdfplumber/2201.00151.txt b/read/results/pdfplumber/2201.00151.txt index ae8363e..f5a51ce 100644 --- a/read/results/pdfplumber/2201.00151.txt +++ b/read/results/pdfplumber/2201.00151.txt @@ -1,41 +1,8 @@ -a -r -X -i -v -: -2 -2 -0 -1 . -0 -0 -1 -5 -1 -v +arXiv:2201.00151v1 +[astro-ph.GA] 1 -[ -a -s -t -r -o -- -p -h -. -G -A -] -1 -J -a -n -2 -0 -2 -2 +Jan +2022 Astronomy&Astrophysicsmanuscriptno.Populations4 ©ESO2022 January4,2022 Multiple stellar populations in Schwarzschild modeling @@ -182,16 +149,10 @@ data, we decided to use a galaxy from the Illustris project ulationfollowstheformationandevolutionofgalaxiesfromthe early Universe to the present by solving gravity and hydrody- namics, as well as modeling of star formation, galactic winds, -S -F -R -[ -M -⊙ -y -r - -1 -] +SFR +[M⊙ +yr +-1] t [Gyr] 0 4 @@ -204,9 +165,7 @@ thesimulatedgalaxyfromtheIllustrisprojectusedtocreatemockdata. Theblackandgrayverticalarrowsindicatethelastmergerswhichthe galaxyunderwent,wetanddry,respectively. t -[ -G y -r ] +[Gyr] Z [Z ⊙] 0 2 @@ -219,9 +178,9 @@ Z [Z ⊙] 2 4 6 -N [ -1 0 -2 ] +N +[10 +2] Fig. 2. Number of stars as a function of their metallicity and time of formation(theageoftheUniverse)inthesimulatedgalaxy.Thevertical lineindicatestheappliedsplitintostellarpopulations. @@ -253,40 +212,47 @@ Articlenumber,page2of12 K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling -80 -40 -0 40 +0 +40 80 POPULATION I -[ k p c ] +[kpc] major POPULATION I intermediate POPULATION I minor 5.3 -5.9 6.5 7.1 +5.9 +6.5 +7.1 7.7 -l o g ( -Σ ) [ M ⊙ / k p c 2 ] +log( +Σ) +[M⊙ +/kpc +2] -80 -40 0 40 POPULATION II -[ k p c ] +[kpc] POPULATION II POPULATION II -160 -80 0 80 160 -V [ k m / s ] +V +[km/s] -80 -40 0 40 -80 -40 0 40 POPULATION II -[ k p c ] +[kpc] [kpc] -80 -40 0 40 POPULATION II @@ -298,43 +264,51 @@ POPULATION II 30 60 90 -σ [ k m / s ] +σ +[km/s] -80 -40 -0 40 +0 +40 80 POPULATION II -[ k p c ] +[kpc] major POPULATION II intermediate POPULATION II minor 5.3 -5.9 6.5 7.1 +5.9 +6.5 +7.1 7.7 -l o g ( -Σ ) [ M ⊙ / k p c 2 ] +log( +Σ) +[M⊙ +/kpc +2] -80 -40 0 40 POPULATION II -[ k p c ] +[kpc] POPULATION II POPULATION II -160 -80 0 80 160 -V [ k m / s ] +V +[km/s] -80 -40 0 40 -80 -40 0 40 POPULATION II -[ k p c ] +[kpc] [kpc] -80 -40 0 40 POPULATION II @@ -346,7 +320,8 @@ POPULATION II 30 60 90 -σ [ k m / s ] +σ +[km/s] Fig.3.Mapsoftheprojectedstellardensity,meanstellarvelocity,andstellarvelocitydispersion(inrows)fortwostellarpopulations:themetal- richpopulationI(left-handsidepanels)andthemetal-poorpopulationII(right-handside),andobservationsalongtheprincipalaxesdetermined forallstars(incolumns,alongthemajor,theintermediate,andtheminoraxis,respectively). @@ -356,7 +331,7 @@ forallstars(incolumns,alongthemajor,theintermediate,andtheminoraxis,respectively 0.5 1 1 10 100 -β ( r ) +β(r) r [kpc] -1 -0.5 @@ -364,7 +339,7 @@ r [kpc] 0.5 1 0 10 20 30 40 50 -β ( r ) +β(r) r [kpc] all stars pop I @@ -375,7 +350,8 @@ pop II 100 120 1 10 100 -σ r ( r ) +σr +(r) r [kpc] 40 60 @@ -383,7 +359,8 @@ r [kpc] 100 120 0 10 20 30 40 50 -σ r ( r ) +σr +(r) r [kpc] 40 60 @@ -391,7 +368,8 @@ r [kpc] 100 120 1 10 100 -σ t ( r ) +σt +(r) r [kpc] 40 60 @@ -399,7 +377,8 @@ r [kpc] 100 120 0 10 20 30 40 50 -σ t ( r ) +σt +(r) r [kpc] Fig.4.Profilesofthevelocityanisotropyparameter,radialvelocitydispersion,andtangentialvelocitydispersion(inconsecutivecolumns)calcu- latedfromallstars(inred),includingonlypopulationI(inorange),andonlypopulationII(inblue).Theupperrowshowstheprofilesusingthe @@ -427,15 +406,10 @@ A&Aproofs:manuscriptno.Populations4 101 103 10 100 -n -⋆ ( -R -) -[ -k -p -c - 2 -] +n⋆ +(R) +[kpc +-2] R [kpc] major 10 100 @@ -551,8 +525,7 @@ K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling 10-1 100 0 10 20 30 40 -M ( -R ) +M(R) R [kpc] major 0 10 20 30 40 @@ -564,17 +537,12 @@ minor 369 12 0 10 20 30 40 -m -2 ( -R ) [ -1 -0 -3 ( -k -m -s - -1 ) -2 ] +m2 +(R)[10 +3(km +s +-1) +2] R [kpc] 0 10 20 30 40 R [kpc] @@ -584,17 +552,12 @@ R [kpc] -505 10 0 10 20 30 40 -m -3 ( -R ) [ -1 -0 -4 ( -k -m -s - -1 ) -3 ] +m3 +(R)[10 +4(km +s +-1) +3] R [kpc] 0 10 20 30 40 R [kpc] @@ -602,17 +565,12 @@ R [kpc] R [kpc] 01234 0 10 20 30 40 -m -4 ( -R ) [ -1 -0 -8 ( -k -m -s - -1 ) -4 ] +m4 +(R)[10 +8(km +s +-1) +4] R [kpc] 0 10 20 30 40 R [kpc] @@ -784,17 +742,16 @@ Asourparametrizationofthemass-to-lightratioisnotintu- itivewepresentitsprofilesexplicitlyinthefirstrowsoftheleft- Articlenumber,page6of12 K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling -106 107 +106 +107 108 109 1010 10 100 ALL -Υ ( r ) [ -M -⊙ / -L -⊙ ] +Υ(r) +[M⊙ +/L⊙ ] r [kpc] major 10 100 @@ -814,11 +771,11 @@ data 108 10 100 ALL -ν t o t ( r ) -[ -M ⊙ -k -p c - 3 ] +νtot +(r) +[M⊙ +kpc +-3] r [kpc] 10 100 ALL @@ -826,13 +783,14 @@ r [kpc] 10 100 ALL r [kpc] -1010 1011 +1010 +1011 1012 10 100 ALL -M t o t ( r ) [ -M -⊙ ] +Mtot +(r) +[M⊙ ] r [kpc] 10 100 ALL @@ -845,7 +803,7 @@ r [kpc] 01 0 10 20 30 40 ALL -β ( r ) +β(r) r [kpc] 0 10 20 30 40 ALL @@ -853,17 +811,16 @@ r [kpc] 0 10 20 30 40 50 ALL r [kpc] -106 107 +106 +107 108 109 1010 10 100 POPULATIONS -Υ ( r ) [ -M -⊙ / -L -⊙ ] +Υ(r) +[M⊙ +/L⊙ ] r [kpc] major 10 100 @@ -883,11 +840,11 @@ data 108 10 100 POPULATIONS -ν t o t ( r ) -[ -M ⊙ -k -p c - 3 ] +νtot +(r) +[M⊙ +kpc +-3] r [kpc] 10 100 POPULATIONS @@ -895,13 +852,14 @@ r [kpc] 10 100 POPULATIONS r [kpc] -1010 1011 +1010 +1011 1012 10 100 POPULATIONS -M t o t ( r ) [ -M -⊙ ] +Mtot +(r) +[M⊙ ] r [kpc] 10 100 POPULATIONS @@ -914,7 +872,7 @@ r [kpc] 01 0 10 20 30 40 POPULATIONS -β ( r ) +β(r) r [kpc] 0 10 20 30 40 POPULATIONS @@ -993,20 +951,12 @@ A&Aproofs:manuscriptno.Populations4 -1 01 0 10 20 30 40 -P -O -P +POP I + -P -O -P -I -I -β -( -r -) +POP +II +β(r) r [kpc] major 0 10 20 30 40 @@ -1019,12 +969,9 @@ minor -1 01 0 10 20 30 40 -P -O -P +POP I -β ( -r ) +β(r) r [kpc] 0 10 20 30 40 r [kpc] @@ -1034,12 +981,9 @@ r [kpc] -1 01 0 10 20 30 40 -P -O -P I -I -β ( -r ) +POP +II +β(r) r [kpc] 0 10 20 30 40 r [kpc] @@ -1115,13 +1059,10 @@ Sérsicparameter(m) 0.808 0.807 0.898 104 105 0.2 0.5 2 0.1 1 -n -⋆ ( -R ) -[ -k -p -c - 2 ] +n⋆ +(R) +[kpc +-2] R [kpc] all stars popI @@ -1140,7 +1081,9 @@ tionintroducedinprevioussections.Thinverticallinesindicate the innermost data point for the light profile for all stars and the outerboundaryof the kinematic sample. The former,set at logr = −0.16,isalsousedastheminimumofthemass-to-light -ratio profile (r 0 in Eq.5). The fitted parameters of the profiles, +ratio profile (r +0 +in Eq.5). The fitted parameters of the profiles, thatisthenormalizationN 0,theSérsicradiusR S,andtheSérsic parameterm,areincludedinthesecondpartofTable2. Figure12presentstheprofilesoftheobservablesusedinthe @@ -1172,9 +1115,7 @@ profileforthepopulationsisbasedonthefitofallstarsbutusing 0.2 0.25 0 0.4 0.8 1.2 1.6 -M -( R -) +M(R) R [kpc] all stars pop I @@ -1186,55 +1127,34 @@ pop II 160 200 0 0.4 0.8 1.2 1.6 -m -2 ( -R ) [ -( -k -m -s - 1 ) -2 -] +m2 +(R)[(km +s +-1) +2] R [kpc] -16 -8 08 16 0 0.4 0.8 1.2 1.6 -m -3 -( -R -) -[ -1 -0 -2 -( -k -m -s - -1 -) -3 -] +m3 +(R)[10 +2(km +s +-1) +3] R [kpc] 048 12 16 0 0.4 0.8 1.2 1.6 -m -4 ( -R ) -[ -1 -0 4 ( -k -m -s - -1 ) -4 -] +m4 +(R)[10 +4(km +s +-1) +4] R [kpc] Fig. 12. Observables of the Fornax dSph used in our Schwarzschild modelingscheme.Inrows:thefractionofthetotalnumberofstars,the @@ -1287,11 +1207,10 @@ c 6 9 12 -χ 2 - χ -2 -m i -n +2- +χ +2min Fig.13.Valuesofχ2relativetothefittedminimumwithintherangeof3σconfidencelevelforallstars(leftpanel)andforthepopulations(right panel)fortheFornaxdSph. (Kowalczyketal.2019),weobtainedhigherestimatesoftheen- @@ -1377,28 +1296,27 @@ K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling 103 105 0.1 1 -Υ ( r ) [ M -⊙ / L ⊙ ] +Υ(r) +[M⊙ +/L⊙ ] r [kpc] ALL 0.1 1 r [kpc] POPULATIONS 3σ -2σ 1σ best model +2σ 1σ +best model K19 104 106 108 0.1 1 -ν t o t ( r ) -[ -M -⊙ -k -p -c - -3 ] +νtot +(r) +[M⊙ +kpc +-3] r [kpc] 0.1 1 r [kpc] @@ -1406,10 +1324,8 @@ r [kpc] 107 109 0.1 1 -M t o t ( r ) -[ -M -⊙ ] +Mtot (r) +[M⊙ ] r [kpc] 0.1 1 r [kpc] @@ -1418,7 +1334,7 @@ r [kpc] -1 01 0 0.4 0.8 1.2 1.6 -β ( r ) +β(r) r [kpc] 0 0.4 0.8 1.2 1.6 r [kpc] @@ -1454,35 +1370,34 @@ smaller data samples or using one stellar population) resulted -1 01 0 0.4 0.8 1.2 1.6 -P O P I -+ P O P -I +POP I -β -( r ) ++ +POP +II +β(r) r [kpc] -2 -1 01 0 0.4 0.8 1.2 1.6 -P -O -P +POP I -β ( -r ) +β(r) r [kpc] -2 -1 01 0 0.4 0.8 1.2 1.6 -P O P I -I -β ( r ) +POP +II +β(r) r [kpc] -best model 1σ +best model +1σ 2σ -3σ K19 +3σ +K19 Fig. 15. Profiles of the anisotropy parameter obtained with the SchwarzschildmodelingoftwostellarpopulationsfortheFornaxdSph. Inrows:resultsforallstars(calculatedasthesuperpositionoftwopop- diff --git a/read/results/pdfplumber/2201.00178.txt b/read/results/pdfplumber/2201.00178.txt index 8b78181..11d5958 100644 --- a/read/results/pdfplumber/2201.00178.txt +++ b/read/results/pdfplumber/2201.00178.txt @@ -41,44 +41,11 @@ et al. 2021). Local mode-coupling analysis in the Cartesian approximation, formu validated by Hanson et al. (2021) (hereafter H21) by examining the power-spectrum of supergranular waves and comparing with previous time-distance studies (Langfellner et al. 2018). prasad.subramanian@tifr.res.in -a -r -X -i -v -: -2 -2 -0 -1 -. -0 -0 -1 -7 -8 -v +arXiv:2201.00178v1 +[astro-ph.SR] 1 -[ -a -s -t -r -o -- -p -h . -S -R -] -1 -J -a -n -2 -0 -2 -2 +Jan +2022 2 Mani et al. Normal-modecouplingreferstotheconceptofexpressingsolar-oscillationeigenfunctionsasalinearweightedcombi- nationofmodel-eigenfunctions(e.g.,ModelSChristensen-Dalsgaard2021). Themodeleigenfunctionsformacomplete diff --git a/read/results/pdfplumber/2201.00200.txt b/read/results/pdfplumber/2201.00200.txt index 483c0fb..1d9e0e0 100644 --- a/read/results/pdfplumber/2201.00200.txt +++ b/read/results/pdfplumber/2201.00200.txt @@ -83,44 +83,11 @@ solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot To reinvigorate the debate, Buldgen et al. (2019b) recently highlightedonceagainhowthetransitionofthetemperaturegra- 1 -a -r -X i -v -: -2 -2 -0 +arXiv:2201.00200v1 +[astro-ph.SR] 1 -. -0 -0 -2 -0 -0 -v -1 -[ -a -s -t -r -o -- -p -h -. -S -R -] -1 -J -a -n -2 -0 -2 -2 +Jan +2022 Baraffeetal.:Localheatingduetoconvectiveovershootingandthesolarmodellingproblem dientjustbelowtheconvectiveenvelopecansignificantlyimpact the disagreement between solar models and helioseismic con- diff --git a/read/results/pdfplumber/2201.00201.txt b/read/results/pdfplumber/2201.00201.txt index 3180bca..ea6303a 100644 --- a/read/results/pdfplumber/2201.00201.txt +++ b/read/results/pdfplumber/2201.00201.txt @@ -81,46 +81,11 @@ formed by Feast et al. (2006), who provided quantitative age estimates for these stars. A summary of the main results and prospectsemergingfromtheseHipparcos-erastudiesisgivenby Articlenumber,page1of9 -a -r -X -i -v -: -2 -2 -0 -1 -. -0 -0 -2 -0 -1 -v -2 -[ -a -s -t -r -o -- -p -h -. -S -R -] -1 -7 -J -a -n -2 -0 -2 -2 +arXiv:2201.00201v2 +[astro-ph.SR] +17 +Jan +2022 A&Aproofs:manuscriptno.trabucchi_etal_2022_period_age_relation_of_lpvs Feast(2007).Morerecently,thestudyoftheGalaxywithLPVs hasbeenstimulatedbythewealthofdataacquiredbylarge-scale diff --git a/read/results/pdfplumber/2201.00214.txt b/read/results/pdfplumber/2201.00214.txt index 13da42f..ac66774 100644 --- a/read/results/pdfplumber/2201.00214.txt +++ b/read/results/pdfplumber/2201.00214.txt @@ -1,42 +1,8 @@ -a -r -X -i -v -: -2 -2 -0 -1 -. -0 -0 -2 -1 -4 -v -1 -[ -a -s -t -r -o -- -p -h -. -S -R -] +arXiv:2201.00214v1 +[astro-ph.SR] 1 -J -a -n -2 -0 -2 -2 +Jan +2022 Temperature Analysis of Flaring (AR11283) and non-Flaring (AR12194) Coronal Loops @@ -530,7 +496,7 @@ peraturesonaverageandhigher oscillation periodswith higher peaksanddeepervalleys accurate commentary in this respect requiresmore extensive statistical researchand broader ob- servations. arcsec -a r c s e c +arcsec 79 154 229 304 379 454 −68 25 @@ -540,7 +506,7 @@ a r c s e c 397 a arcsec -a r c s e c +arcsec 114.6 171.2 227.8 284.4 341 171.4 206.3 @@ -556,7 +522,7 @@ Figure1: (a)AIAimageoftheAR11283on2011September6,22:10UTasseeninthe171 filter. ( oftheareamarkedbyaboxintheleft. Theselectedloopsaredistinguishedinred. TheloopsAandBare thesameloopsstudiedbyJainetal.(2015)(seeFig.3ainJainetal.(2015)). arcsec -a r c s e c +arcsec −154 0 154 308 −572 −418 @@ -565,7 +531,7 @@ a r c s e c 44 a arcsec -a r c s e c +arcsec −202 −134 −66 2 70 −396 −338 @@ -584,8 +550,7 @@ viewofthearea,markedbyaboxintheleft,theloopsaredistinguishedinred. 6.4 6.6 6.8 -L o -g T +LogT F−LoopA 5.8 6 @@ -593,8 +558,7 @@ F−LoopA 6.4 6.6 6.8 -L o -g T +LogT 22:10 22:20 22:30 22:40 22:50 23:00 5.8 6 @@ -603,23 +567,24 @@ g T 6.6 6.8 time -L o -g T +LogT 5.8 6 -6.2 6.4 +6.2 +6.4 6.6 6.8 -L o g T +LogT F−LoopB1 22:10 22:20 22:30 22:40 22:50 23:00 5.8 6 -6.2 6.4 +6.2 +6.4 6.6 6.8 time -L o g T +LogT Figure3: Fromuptodown: Thetime-seriesofthetemperatureoscillationsforthefirst3stripsofLoopA(strip1to 3fromtoptodown),andthefirst2stripsofLoopB1. Horizontalaxisisthetimeandtheverticalaxisisthe logarithmofthetemperature. Theredlinesmarktheinitialandfinaltimeoftheflarex2.1. @@ -631,10 +596,11 @@ logarithmofthetemperature. Theredlinesmarktheinitialandfinaltimeoftheflarex2.1. 42 F−loopA Time -L o o p L e n g t h ( M -m ) +Loop +Length(Mm) 5.8 -66.2 6.4 +66.2 +6.4 6.6 6.8 22:10 22:20 22:30 22:40 22:50 23:00 @@ -645,14 +611,14 @@ m ) 20 F−loopB1 Time -L o o p -L e n g t h ( M -m ) +Loop +Length(Mm) 66.05 6.1 6.15 6.2 -6.25 6.3 +6.25 +6.3 6.35 6.4 6.45 @@ -665,10 +631,11 @@ m ) 16 F−loopB2 Time -L o o p L e n g t h ( M -m ) +Loop +Length(Mm) 5.8 -66.2 6.4 +66.2 +6.4 6.6 6.8 22:10 22:20 22:30 22:40 22:50 23:00 @@ -679,9 +646,8 @@ m ) 22 F−loopC1 Time -L o o p -L e n g t h ( -M m ) +Loop +Length(Mm) 5.6 5.8 66.2 @@ -696,10 +662,11 @@ M m ) 11 F−loopC2 Time -L o o p L e n g t h ( M -m ) +Loop +Length(Mm) 5.8 -66.2 6.4 +66.2 +6.4 6.6 6.8 Figure4: TemperaturemapoftheflaringloopsA,B1,B2,C1,andC2(fromtoptodown)asatimeseries. Thevertical @@ -809,9 +776,8 @@ Min(log(T)) 43 Int−Fe−LoopA Time -L o o p -L e n g t h ( -M m ) +Loop +Length(Mm) 00.02 0.04 0.06 @@ -826,16 +792,20 @@ M m ) 0 0.1 0.2 -0.3 0.4 0.5 0.6 +0.3 +0.4 +0.5 +0.6 0.7 0.8 0.9 1 Int−Fe−LoopA Time -N o r m a l i z e d I n t e n s i t y -F e -X V I I I +Normalized +Intensity +Fe +XVIII Figure5: Normalized intensity map of the flaring loop A for the wavelength Fe XVIII, and mean intensity of Fe XVIII (from top to down). The vertical axis is the distance along the loop in Mm for the first plot, and normalizedintensityforthesecond. Thehorizontalaxisisthetime. Thecolorbarintheleftshowsthecolors @@ -847,34 +817,38 @@ comments which has received from Dr.Farhad Daii and Dr.Mohsen Javaherian regardi work. 5.8 6 -6.2 6.4 +6.2 +6.4 6.6 6.8 -L o g T +LogT NonF−LoopA 8:00 8:10 8:20 8:30 8:40 8:50 9:00 5.8 6 -6.2 6.4 +6.2 +6.4 6.6 6.8 time -L o g T +LogT 5.8 6 -6.2 6.4 +6.2 +6.4 6.6 6.8 -L o g T +LogT NonF−LoopB 8:00 8:10 8:20 8:30 8:40 8:50 9:00 5.8 6 -6.2 6.4 +6.2 +6.4 6.6 6.8 time -L o g T +LogT Figure6: from top to down: The time-series of the temperature for the first 2 strips (from top to down) of the non- flaringLoopsAandB.Horizontalaxisisthetimeandtheverticalaxisisthelogarithmofthetemperature. 8:10 8:20 8:30 8:40 8:50 9:00 @@ -885,9 +859,8 @@ flaringLoopsAandB.Horizontalaxisisthetimeandtheverticalaxisisthelogarithmofthete 20 NonF−loopA Time -L o o p -L e n g t h ( M -m ) +Loop +Length(Mm) 5.8 66.2 6.4 @@ -901,9 +874,8 @@ m ) 18 NonF−loopB Time -L o o p -L e n g t h ( M -m ) +Loop +Length(Mm) 5.8 66.2 6.4 @@ -917,9 +889,8 @@ m ) 10 NonF−loopC Time -L o o p -L e n g t h ( M -m ) +Loop +Length(Mm) 5.8 66.2 6.4 @@ -931,15 +902,18 @@ colorsconsideredforthetemperaturerange. 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 0 0.05 -0.1 0.15 +0.1 +0.15 0.2 -0.25 0.3 +0.25 +0.3 0.35 0.4 Temp. Period (min) -P e r c e n t a g e -o f T e -m p . P e r i o d s +Percentage +of +Temp. +Periods Figure8: Hisogram of the temperature periods percentages for the loops’ strips of the flaring (blue bars) and non- flaring(redbars)ARs. Thehorizontalaxisshowsthetemperatureperiodsinminute. 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7 @@ -947,8 +921,7 @@ flaring(redbars)ARs. Thehorizontalaxisshowsthetemperatureperiodsinminute. 10 12 max(log(T))−min(log(T)) -N u -m b e r +Number Figure9: Hisogramoftheparameterof(max(log(T))-min(log(T)))foreachstripoftheloopsoftheflaring(bluebars) andnon-flaring(redbars)ARs. References diff --git a/read/results/pdfplumber/GeoTopo-book.txt b/read/results/pdfplumber/GeoTopo-book.txt index 78adc17..f79353c 100644 --- a/read/results/pdfplumber/GeoTopo-book.txt +++ b/read/results/pdfplumber/GeoTopo-book.txt @@ -425,9 +425,11 @@ R2 überein. U 1 = R \N -U2 +U +2 = -R\ +R +\ N Abbildung 1.2: Zariski-Topologie auf R2 Definition 7 @@ -2145,7 +2147,7 @@ i : ··· : y n) -(cid:55)→ +(cid:55) → (y 1,...,y n) ist bijektiv. Die U diff --git a/read/results/playa/1601.03642.txt b/read/results/playa/1601.03642.txt new file mode 100644 index 0000000..7d39186 --- /dev/null +++ b/read/results/playa/1601.03642.txt @@ -0,0 +1,632 @@ +1 +Creativity in Machine Learning +Martin Thoma +E-Mail: info@martin-thoma.de +Abstract —Recent machine learning techniques can be modified +to produce creative results. Those results did not exist before; it +is not a trivial combination of the data which was fed into the +machine learning system. The obtained results come in multiple +forms: As images, as text and as audio. +This paper gives a high level overview of how they are created +and gives some examples. It is meant to be a summary of the +current work and give people who are new to machine learning +some starting points. + I. I NTRODUCTION +According to [Gad06] creativity is “the ability to use your +imagination to produce new ideas, make things etc.” and +imagination is “the ability to form pictures or ideas in your +mind”. +Recent advances in machine learning produce results which the +author would intuitively call creative. A high-level overview +over several of those algorithms are described in the following. +This paper is structured as follows: Section II introduces the +reader on a very simple and superficial level to machine +learning, Section III gives examples of creativity with images, +Section IV gives examples of machines producing textual +content, and Section V gives examples of machine learning +and music. A discussion follows in Section VI. +II. B ASICS OF M ACHINE L EARNING +The traditional approach of solving problems with software +is to program machines to do so. The task is divided in as +simple sub-tasks as possible, the subtasks are analyzed and the +machine is instructed to process the input with human-designed +algorithms to produce the desired output. However, for some +tasks like object recognition this approach is not feasible. There +are way to many different objects, different lighting situations, +variations in rotation and the arrangement of a scene for a +human to think of all of them and model them. But with the +internet, cheap computers, cameras, crowd-sourcing platforms +like Wikipedia and lots of Websites, services like Amazon +Mechanical Turk and several other changes in the past decades +a lot of data has become available. The idea of machine learning +is to make use of this data. +A formal definition of the field of Machine Learning is given +by Tom Mitchel [Mit97]: +A computer program is said to learn from experi- +ence E with respect to some class of tasks T and +performance measure P , if its performance at tasks +in T , as measured by P , improves with experience E . Σ ϕx + 0 +x + 1 +x + 2 +x + 3 +x +n w + 0 +w +1 +w +2 +w +3 +w +n. +. +. +(a) Example of an artificial neuron unit. +x + i are the input signals and w +i are +weights which have to get learned. +Each input signal gets multiplied +with its weight, everything gets +summed up and the activation func- +tion ϕ is applied. (b) A visualization of a simple feed- +forward neural network. The 5 in- +put nodes are red, the 2 bias nodes +are gray, the 3 hidden units are +green and the single output node +is blue. +Fig. 1: Neural networks are based on simple units which get +combined to complex networks. +This means that machine learning programs adjust internal +parameters to fit the data they are given. Those computer +programs are still developed by software developers, but the +developer writes them in a way which makes it possible to +adjust them without having to re-program everything. Machine +learning programs should generally improve when they are fed +with more data. +The field of machine learning is related to statistics. Some +algorithms directly try to find models which are based on well- +known distribution assumptions of the developer, others are +more general. +A common misunderstanding of people who are not related +in this field is that the developers don’t understand what their +machine learning program is doing. It is understood very well +in the sense that the developer, given only a pen, lots of paper +and a calculator could calculate the same result as the machine +does when he gets the same data. And lots of time, of course. It +is not understood in the sense that it is hard to make predictions +how the algorithm behaves without actually trying it. However, +this is similar to expecting from an electrical engineer to +explain how a computer works. The electrical engineer could +probably get the knowledge he needs to do so, but the amount +of time required to understand such a complex system from +basic building blocks is a time-intensive and difficult task. +An important group of machine learning algorithms was +inspired by biological neurons and are thus called artificial +neural networks . Those networks are based on mathematical +functions called artificial neurons which take n ∈ N num- +bers x + 1 , . . . , x + n ∈ R as input, multiply them with weights +w + 1 , . . . , w + n ∈ R , add them and apply a so called activation +function ϕ as visualized in Figure 1(a). One example of such +an activation function is the sigmoid function ϕ( x ) = 1 +1+e − x . +Those functions act as building blocks for more complex +systems as they can be chained and grouped in layers as +visualized in Figure 1(b). The interesting question is how +the parameters w +i are learned. This is usually done by an +optimization technique called gradient descent . The gradient +descent algorithm takes a function which has to be derivable, +starts at any point of the surface of this error function andarXiv:1601.03642v1 [cs.CV] 12 Jan 2016 +2 +makes a step in the direction which goes downwards. Hence +it tries to find a minimum of this high-dimensional function. +There is, of course, a lot more to say about machine learning. +The interested reader might want to read the introduction given +by Mitchell [Mit97]. + III. I MAGE D ATA +Applying a simple neural network on image data directly can +work, but the number of parameters gets extraordinary large. +One would take one neuron per pixel and channel. This means +for 500 px × 500 px RGB images one would get 750 ,000 input +signals. To approach this problem, so called Convolutional +Neural Networks (CNNs) were introduced. Instead of learning +the full connection between the input layer and the first +hidden layer, those networks make use of convolution layers. +Convolution layers learn a convolution; this means they learn +the weights of an image filter. An additional advantage is that +CNNs make use of spacial relationships of the pixels instead +of flattening the image to a stream of single numbers. +An excellent introduction into CNNs is given by [Nie15]. +A. Google DeepDream +The gradient descent algorithm which optimizes most of the +parameters in neural networks is well-understood. However, the +effect it has on the recognition system is difficult to estimate. +[MOT15] proposes a technique to analyze the weights learned +by such a network. A similar idea was applied by [VKMT13]. +For example, consider a neural network which was trained to +recognize various images like bananas. This technique turns +the network upside down and starts with random noise. To +analyze what the network considers bananas to look like, the +random noise image is gradually tweaked so that it generates +the output “banana”. Additionally, the changes can be restricted +in a way that the statistics of the input image have to be similar +to natural images. One example of this is that neighboring +pixels are correlated. +Another technique is to amplify the output of layers. This was +described in [MOT15]: +We ask the network: “Whatever you see there, I want +more of it!” This creates a feedback loop: if a cloud +looks a little bit like a bird, the network will make +it look more like a bird. This in turn will make the +network recognize the bird even more strongly on +the next pass and so forth, until a highly detailed +bird appears, seemingly out of nowhere. +The name “Inceptionism” in the title of [MOT15] comes from +the science-fiction movie “Inception” (2010). One reason it +might be chosen is because neural networks are structured +in layers. Recent publications tend to have more and more +layers [HZRS15]. The used jargon is to say they get “deeper”. +As this technique as published by Google engineers, the +technique is called Google DeepDream . Fig. 2: Aurelia aurita +Fig. 3: DeepDream impression of Aurelia aurita +It has become famous in the internet [Red]. Usually, the images +are generated in iterations and in each iteration it is zoomed +into the image. +Images and videos published by the Google engineers can be +seen at [goo15]. Figure 2 shows the original image from which +Figure 3 was created with the deep dream algorithm. +B. Artistic Style Imitation +A key idea of neural networks is that they learn different +representations of the data in each layer. In the case of +CNNs, this can easily be visualized as it was done in various +papers [ZF14]. Usually, one finds that the network learned +to build edge detectors in the first layer and more complex +structures in the upper layers. +Gatys, Ecker and Bethge showed in [GEB15] that with a clever +choice of features it is possible to separate the general style of +an image in terms of local image appearance from the content +of an image. They support their claim by applying the style of +different artists to an arbitrary image of their choice. +3 +(a) Original Image (b) Style image +(c) The artistic style of Van Gogh’s “Starry Night” applied to the photograph +of a Scottish Highland Cattle. +Fig. 4: The algorithm takes both, the original image and the +style image to produce the result. +This artistic style imitation can be seen itself as creative work. +An example is given by Figure 4. The code which created this +example is available under [Joh16]. +Something similar was done by [SPB+ + 14], where the style of +a portrait photograph was transferred to another photograph. +A demo can be seen on [Shi14]. +C. Drawing Robots +Patrick Tresset and Frdric Fol Leymarie created a system called +AIKON (Automatic IKONic drawing) which can automatically +generated sketches for portraits [TL05]. AIKON takes a digital +photograph, detects faces on them and sketches them with a +pen-plotter. +Tresset and Leymaire use k -means clustering [KMN + + 02] to +segment regions of the photograph with similar color which, +in turn, will get a similar shading. +Such a drawing robot could apply machine learning techniques +known from computer vision for detecting the human. It +could apply self-learning techniques to draw results most +similar to the artists impression of the image. However, the +system described in [TL05] seems not to be a machine +learning computer program according to the definition by Tom +Mitchell [Mit97]. IV. T EXT D ATA +Digital text is the first form of natural communication which +involved computers. It is used in the form of chats, websites, +on collaborative projects like Wikipedia, in scientific literature. +Of course, it was used in pre-digital times, too: In newspaper, +in novels, in dramas, in religious texts like the bible, in books +for education, in notes from conversations. +This list could be continued and most of these kinds of texts +are now available in digital form. This digital form can be +used to teach machines to generate similar texts. +The most simple language model which is of use is ann -gram +model. This model makes use of sequences of the lengthn to +model language. It can be used to get the probability of a third +word, given the previous two words. This way, a complete text +can be generated word by word. Refinements and extensions +to this model are discussed in the field of Natural Language +Processing (NLP). +However, there are much more sophisticated models. One +of those are character predictors based on Recurrent Neural +Networks (RNNs). Those character predictors take a sequence +of characters as input and predict the next character. In that +sense they are similar to the n -gram model, but operate on +a lower level. Using such a predictor, one can generate texts +character by character. If the model is good, the text can have +the correct punctuation. This would not be possible with a +word predictor. +Character predictors can be implemented with RNNs. In con- +trast to standard feed-forward neural networks like multilayer +Perceptrons (MLPs) which was shown in Figure 1(b), those +networks are trained to take their output at some point as well as +the normal input. This means they can keep some information +over time. One of the most common variant to implement +RNNs is by using so called Long short-term memory (LSTM) +cells [HS97]. +Recurrent networks apply two main ideas in order to learn: The +first is called unrolling and means that an recurrent network +is imagined to be an infinite network over time. At each time +step the recurrent neurons get duplicated. The second idea is +weight sharing which means that those unrolled neurons share +the same weight. +A. Similar Texts Generation +Karpathy trained multiple character RNNs on different datasets +and gave an excellent introduction [Kar15b]. He trained it on +Paul Graham’s essays, all the works of Shakespeare, the Hutter +Prize [hut] 100 MB dataset of raw Wikipedia articles, the raw +LA +T +EX source file of a book about algebraic stacks and geometry +and Linux C code. +With that training data, the models can generate similar texts. +New works which look like Shakespeare plays, new Wikipedia +articles, new Linux code and new papers about algebraic +geometry can thus automatically be generated. At a first +4 +glance, they do look authentic. The syntax was mostly used +correctly, the formatting looks as expected, the sentences are +grammatically correct. However, when one looks at the broader +context it is easy to recognize that the algorithm has no insight +in what it is doing. It does match patterns really well, but it +fails to follow a central theme. In the context of C code this +means that new variables are introduced, but not used. At the +same time, variables which were not declared are used. In +the context of Shakespear plays this means that a lot of new +characters are introduced, but they don’t speak with each other +or about each other. +The code used to generate these examples is available and +ready to use through [Kar15a]. A couple of examples are +in Section A. +B. Chatbots +Chatbots are computer programs which participate in chat +rooms as autonomous agents. This means they have similar +permissions and possibilities as usual human users have, but +users can trigger a set of commands to make the bot give them +valuable information or features. +A special category of chatbots are such bots which actively +participate in the conversation, which is usually not the case. +One of the earliest programs in this category is ELIZA, a bot +created by Joseph Weizenbaum in the 1960s [Wei76]. This +program had a set of patterns implemented to which it would +reply in a seemingly smart way in a psychologists fashion. +This means quite often the program would simply repeat the +last sentence and add something meaningless like “How do +you feel about it?”. According to [Cur14], Weizenbaum once +found his secretary — who was aware of the fact that this is +a computer program — chatting with the machine. When he +looked over her shoulder, she asked him “would you mind +leaving the room”. +Today, much more sophisticated chatbots exist. They make use +of the vast amount of data which is available by millions of +Wikipedia articles, chat protocols, websites, help desk protocols, +subtitles of movies as well as the astonishing increase in +computing power to train RNNs and language models similar +to the ones described before. +Interesting results like the following were obtained by [VL15]: +Human: what is the purpose of life ? +Machine : to serve the greater good . +Human: what is the purpose of living ? +Machine : to live forever . +V. A UDIO D ATA +Common machine learning tasks which involve audio data +are speech recognition, speaker identification, identification of +songs. This leads to some less-common, but interesting topics: +The composition of music, the synthesizing of audio as art. +While the composition might be considered in Section IV, we will now investigate the work which was done in audio +synthesization. +A. Emily Howell +David Cope created a project called “Experiments in Musical +Intelligence” (short: EMI or Emmy) in 1984 [Cop87]. He +introduces the idea of seeing music as a language which +can be analyzed with natural language processing (NLP) +methods. Cope mentions that EMI was more useful to him, +when he used the system to “create small phrase-size textures +as next possibilities using its syntactic dictionary and rule +base” [Cop87]. +In 2003, Cope started a new project which was based on EMI: +Emily Howell [Cop13]. This program is able to “creat[e] both +highly authentic replications and novel music compositions”. +The reader might want to listen to [Cop12] to get an impression +of the beauty of the created music. +According to Cope, an essential part of music is “a set of +instructions for creating different, but highly related self- +replications”. Emmy was programmed to find this set of +instructions. It tries to find the “signature” of a composer, +which Cope describes as “contiguous patterns that recur in two +or more works of the composer”. +The new feature of Emily Howell compared to Emmy is that +Emily Howell does not necessarily remain in a single, already +known style. +Emily Howell makes use of association network. Cope empha- +sizes that this is not a form of a neural network. However, it +is not clear from [Cop13] how exactly an association network +is trained. Cope mentions that Emily Howell is explained in +detail in [Cop05]. +B. GRUV +Recurrent neural networks — LSTM networks, to be exact +— are used in [NV15] together with Gated Recurrent Units +(GRU) to build a network which can be trained to generate +music. Instead of taking notes directly or MIDI files, Nayebi +and Vitelli took raw audio waveforms as input. Those audio +waveforms are feature vectors given for time steps0, 1 , . . . , t − +1 , t. The network is given those feature vectors X +1 , . . . , X + t +and has to predict the following feature vector X +t +1 . This +means it continues the music. As the input is continuous, the +problem was modeled as a regression task. Discrete Fourier +Transformation (DFT) was used on chunks of lengthN of the +music to obtain features in the frequency domain. +An implementation can be found at [VN15] and a demonstration +can be found at [Vit15]. +C. Audio Synthesization +Audio synthesization is generating new audio files. This can +either be music or speech. With the techniques described before, +5 +neural networks can be trained to generate music note by note. +However, it is desirable to allow multiple notes being played +at the same time. +This idea and some others were applied by Daniel Johnson. He +wrote a very good introduction into neural networks for music +composition which explains those ideas [Joh15b]. Example +compositions are available there, too. He also made the code for +his Biaxial Recurrent Neural Network available under [Joh15a]. +VI. D ISCUSSION +What does these examples mean for our understanding of +creativity? Does it influence how much we value art? Could +we define art and creativity better after having those and similar +results? +I think we might readjust our understanding of creativity just +like we adjusted our understanding of algorithmically hard +problems after Deep Blue won against the reigning world +chess champion Garry Kasparov in 1997. +However, by now it is obvious that machine learning algorithms +cannot compete with human artists. Today’s state of the art +algorithms which are purely based on machine learning don’t +follow a central theme. They lack the ability to plan. Although +clever algorithms were implemented for composing music, it +seems as if there is still a lot of supervision involved. +R EFERENCES +[Cop87] D. Cope, “Experiments in music intelligence (emi),” 1987. +[Online]. Available: http://hdl.handle.net/2027/spo.bbp2372.1987. +025 +[Cop05] ——, Computer models of musical creativity . MIT Press +Cambridge, 2005. +[Cop12] ——, “Emily howell fugue,” YouTube, Oct. 2012. [Online]. +Available: https://www.youtube.com/watch?v=jLR- c uCwI +[Cop13] ——, “The well-programmed clavier: Style in computer music +composition,” XRDS: Crossroads, The ACM Magazine for +Students , vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available: +http://dl.acm.org/citation.cfm?id=2460444 +[Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [On- +line]. Available: http://www.bbc.co.uk/blogs/adamcurtis/entries/ +78691781- c9b7-30a0- 9a0a-3ff76e8bfe58 +[Gad06] A. Gadsby, Ed., Dictionary of Contemporary English. Pearson +Education Limited, 2006. +[GEB15] + L. A. Gatys, A. S. Ecker, and M. Bethge, “A neural algorithm of +artistic style,” arXiv preprint arXiv:1508.06576 , 2015. [Online]. +Available: http://arxiv.org/abs/1508.06576 +[goo15] “Inceptionism: Going deeper into neural networks,” Google +Photos, Jun. 2015. [Online]. Available: https://goo.gl/Bydofw +[HS97] S. Hochreiter and J. Schmidhuber, “Long short-term memory,” +Neural computation , vol. 9, no. 8, pp. 1735–1780, 1997. +[Online]. Available: http://ieeexplore.ieee.org/xpl/freeabs all.jsp? +arnumber=6795963 +[hut] + “50’000 euro prize for compressing human knowledge.” [Online]. +Available: http://prize.hutter1.net/ +[HZRS15] K. He, X. Zhang, S. Ren, and J. Sun, “Deep residual learning +for image recognition,” arXiv preprint arXiv:1512.03385 , 2015. +[Online]. Available: http://arxiv.org/abs/1512.03385 [Joh15a] D. Johnson, “Biaxial recurrent neural network for music +composition,” GitHub, Aug. 2015. [Online]. Available: https: +//github.com/hexahedria/biaxial-rnn- music-composition +[Joh15b] ——, “Composing music with recurrent neu- +ral networks,” Personal Blog, Aug. 2015. [On- +line]. Available: http://www.hexahedria.com/2015/08/03/ +composing-music- with-recurrent- neural-networks/ +[Joh16] + J. Johnson, “neural-style,” GitHub, Jan. 2016. [Online]. Available: +https://github.com/jcjohnson/neural-style +[Kar15a] A. Karpathy, “char-rnn,” GitHub, Nov. 2015. [Online]. Available: +https://github.com/karpathy/char-rnn +[Kar15b] + ——, “The unreasonable effectiveness of recurrent neural +networks,” Personal Blog, May 2015. [Online]. Available: +http://karpathy.github.io/2015/05/21/rnn-effectiveness/ +[KMN + + 02] T. Kanungo, D. Mount, N. Netanyahu, C. Piatko, R. Silverman, +and A. Wu, “An efficient k-means clustering algorithm: analysis +and implementation,” Pattern Analysis and Machine Intelligence, +IEEE Transactions on, vol. 24, no. 7, pp. 881–892, Jul 2002. +[Mit97] T. M. Mitchell, Machine learning , ser. McGraw Hill series in +computer science. McGraw-Hill, 1997. +[MOT15] + A. Mordvintsev, C. Olah, and M. Tyka, “Inceptionism: Going +deeper into neural networks,” googleresearch.blogspot.co.uk, +Jun. 2015. [Online]. Available: http://googleresearch.blogspot.de/ +2015/06/inceptionism-going- deeper-into- neural.html +[Nie15] M. A. Nielsen, Neural Networks and Deep Learn- +ing . Determination Press, 2015. [Online]. Avail- +able: http://neuralnetworksanddeeplearning.com/chap6.html# +introducing convolutional networks +[NV15] A. Nayebi and M. Vitelli, “GRUV: Algorithmic music generation +using recurrent neural networks,” 2015. [Online]. Available: +http://cs224d.stanford.edu/reports/NayebiAran.pdf +[Red] “Deepdream,” Reddit. [Online]. Available: https://www.reddit. +com/r/deepdream/ +[Shi14] Y. Shih, “Style transfer for headshot portraits,” YouTube, Jun. +2014. [Online]. Available: https://www.youtube.com/watch?v= +Hj5lGFzlubU +[SPB + + 14] Y. Shih, S. Paris, C. Barnes, W. T. Freeman, and F. Durand, +“Style transfer for headshot portraits,” ACM Transactions on +Graphics (TOG), vol. 33, no. 4, p. 148, 2014. [Online]. Available: +http://dl.acm.org/citation.cfm?id=2601137 +[TL05] P. Tresset and F. F. Leymarie, “Generative portrait sketching,” in +Proceedings of VSMM, 2005, pp. 739–748. +[Vit15] M. Vitelli, “Algorithmic music generation with recurrent +neural networks,” YouTube, Jun. 2015. [Online]. Available: +https://youtu.be/0VTI1BBLydE +[VKMT13] C. Vondrick, A. Khosla, T. Malisiewicz, and A. Torralba, +“Hoggles: Visualizing object detection features,” in Computer +Vision (ICCV), 2013 IEEE International Conference on. IEEE, +2013, pp. 1–8. [Online]. Available: http://ieeexplore.ieee.org/ +xpls/abs all.jsp?arnumber=6751109 +[VL15] O. Vinyals and Q. Le, “A neural conversational model,” +arXiv preprint arXiv:1506.05869, Jul. 2015. [Online]. Available: +http://arxiv.org/abs/1506.05869v2 +[VN15] M. Vitelli and A. Nayebi, “GRUV,” Aug. 2015. [Online]. +Available: https://github.com/MattVitelli/GRUV +[Wei76] J. Weizenbaum, Computer Power and Human Reason: From +Judgement to Calculation. W.H.Freeman & Co Ltd, 1976. +[ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding con- +volutional networks,” in Computer Vision–ECCV 2014. Springer, +2014, pp. 818–833. +6 +A PPENDIX A +A UTOM ATICALLY GENERATED T EXTS +The following texts were generated by [Kar15a] and published by Karpathy on [Kar15b]. This is meant to be a copy for +convenience and the case that the website is not reachable. +A. Shakespeare +PANDARUS: +Alas, I think he shall be come approached and the day +When little srain would be attain’d into being never fed, +And who is but a chain and subjects of his death, +I should not sleep. +Second Senator: +They are away this miseries, produced upon my soul, +Breaking and strongly should be buried, when I perish +The earth and thoughts of many states. +DUKE VINCENTIO: +Well, your wit is in the care of side and that. +Second Lord: +They would be ruled after this chamber, and +my fair nues begun out of the fact, to be conveyed, +Whose noble souls I’ll have the heart of the wars. +Clown: +Come, sir, I will make did behold your worship. +VIOLA: +I’ll drink it. +B. Wikipeda +Naturalism and decision for the majority of Arab countries’ capitalide was grounded +by the Irish language by [[John Clair]], [[An Imperial Japanese Revolt]], associated +with Guangzham’s sovereignty. His generals were the powerful ruler of the Portugal +in the [[Protestant Immineners]], which could be said to be directly in Cantonese +Communication, which followed a ceremony and set inspired prison, training. The +emperor travelled back to [[Antioch, Perth, October 25|21]] to note, the Kingdom +of Costa Rica, unsuccessful fashioned the [[Thrales]], [[Cynth’s Dajoard]], known +in western [[Scotland]], near Italy to the conquest of India with the conflict. +Copyright was the succession of independence in the slop of Syrian influence that +was a famous German movement based on a more popular servicious, non-doctrinal +and sexual power post. Many governments recognize the military housing of the +[[Civil Liberalization and Infantry Resolution 265 National Party in Hungary]], +that is sympathetic to be to the [[Punjab Resolution]] +(PJS)[http://www.humah.yahoo.com/guardian. +cfm/7754800786d17551963s89.htm Official economics Adjoint for the Nazism, Montgomery +was swear to advance to the resources for those Socialism’s rule, +was starting to signing a major tripad of aid exile.]] +C. Linux Code, 1 +/ +* +7 +* Increment the size file of the new incorrect UI_FILTER group information +* of the size generatively. +* / +static int indicate_policy(void) +{ + int error; +if (fd == MARN_EPT) { +/ + * +* The kernel blank will coeld it to userspace. +*/ +if (ss->segment < mem_total) +unblock_graph_and_set_blocked(); +else +ret = 1; +goto bail; +} +segaddr = in_SB(in.addr); +selector = seg / 16; +setup_works = true; +for (i = 0; i < blocks; i++) { +seq = buf[i++]; +bpf = bd->bd.next + i + * search; +if (fd) { +current = blocked; +} +} +rw->name = "Getjbbregs"; +bprm_self_clearl(&iv->version); +regs->new = blocks[(BPF_STATS << info->historidac)] | PFMR_CLOBATHINC_SECONDS << 12; +return segtable; +} +D. Linux Code, 2 +/ +* +* Copyright (c) 2006-2010, Intel Mobile Communications. All rights reserved. +* +* This program is free software; you can redistribute it and/or modify it +* under the terms of the GNU General Public License version 2 as published by +* the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +* / +#include +#include +#include +#include +#include +8 +#include +#include +#include +#include +#include +#include +#include +#define REG_PG vesa_slot_addr_pack +#define PFM_NOCOMP AFSR(0, load) +#define STACK_DDR(type) (func) +#define SWAP_ALLOCATE(nr) (e) +#define emulate_sigs() arch_get_unaligned_child() +#define access_rw(TST) asm volatile("movd %%esp, %0, %3" : : "r" (0)); \ +if (__type & DO_READ) +static void stat_PC_SEC __read_mostly offsetof(struct seq_argsqueue, \ +pC>[1]); +static void +os_prefix(unsigned long sys) +{ +#ifdef CONFIG_PREEMPT +PUT_PARAM_RAID(2, sel) = get_state_state(); +set_pid_sum((unsigned long)state, current_state_str(), +(unsigned long)-1->lr_full; low; +} diff --git a/read/results/playa/1602.06541.txt b/read/results/playa/1602.06541.txt new file mode 100644 index 0000000..9184247 --- /dev/null +++ b/read/results/playa/1602.06541.txt @@ -0,0 +1,1782 @@ +1 +A Survey of Semantic Segmentation +Martin Thoma +info@martin-thoma.de +Abstract —This survey gives an overview over different +techniques used for pixel-level semantic segmentation. +Metrics and datasets for the evaluation of segmenta- +tion algorithms and traditional approaches for segmen- +tation such as unsupervised methods, Decision Forests +and SVMs are described and pointers to the relevant +papers are given. Recently published approaches with +convolutional neural networks are mentioned and typical +problematic situations for segmentation algorithms are +examined. A taxonomy of segmentation algorithms is +given. + I. I NTRODUCTION +Semantic segmentation is the task of clustering +parts of images together which belong to the same +object class. This type of algorithm has several use- +cases such as detecting road signs [ MBLAGJ+ + 07], +detecting tumors [ MBVLG02 ], detecting medical in- +struments in operations [WAH97 ], colon crypts segmen- +tation [ CRSS14], land use and land cover classifica- +tion [HDT02 ]. In contrast, non-semantic segmentation +only clusters pixels together based on general character- +istics of single objects. Hence the task of non-semantic +segmentation is not well-defined, as many different +segmentations might be acceptable. +Several applications of segmentation in medicine are +listed in [PXP00]. +Object detection, in comparison to semantic seg- +mentation, has to distinguish different instances of the +same object. While having a semantic segmentation +is certainly a big advantage when trying to get object +instances, there are a couple of problems: neighboring +pixels of the same class might belong to different object +instances and regions which are not connected my +belong to the same object instance. For example, a +tree in front of a car which visually divides the car into +two parts. +This paper is organized as follows: It begins by giving +a taxonomy of segmentation algorithms in Section II. +A summary of quality measures and datasets which are +used for semantic segmentation follows in Section III. +A summary of traditional segmentation algorithms and +their characteristics follows in Section V, as well as a +brief, non-exhaustive summary of recently published +semantic segmentation algorithms which are based on +neural networks in Section VI. Finally, Section VII +informs the reader about typical problematic cases for +segmentation algorithms. II. TAXONOMY OF S EGMENTATION A LGORITHMS +The computer vision community has published a +wide range of segmentation algorithms so far. Those +algorithms can be grouped by the kind of data they +operate on and the kind of segmentation they are able +to produce. +The following subsections will give four different +criteria by which segmentation algorithms can be +classified. +This survey describes fixed-class (see Section II-A), +single-class affiliation (see Section II-B ) algorithms +which work on grayscale or colored single pixel images +(see Section II-C ) in a completely automated, passive +fashion (see Section II-D). +A. Allowed classes +Semantic segmentation is a classification task. As +such, the classes on which the algorithm is trained is a +central design decision. +Most algorithms work with a fixed set of classes; +some even only work on binary classes like fore- +ground vs background [ RM07], [ CS10 ] or street vs +no street [BKTT15]. +However, there are also unsupervised segmentation +algorithms which do not distinguish classes at all (see +Section V-B) as well as segmentation algorithms which +are able to recognize when they don’t know a class. +For example, in [ GRC+ + 08 ] a void class was added +for classes which were not in the training set. Such +a void class was also used in the MSRCv2 dataset +(see Section III-B2) to make it possible to make more +coarse segmentations and thus having to spend less +time annotating the image. +B. Class affiliation of pixels +Humans do an incredible job when looking at the +world. For example, when we see a glass of water +standing on a table we can automatically say that there +is the glass and behind it the table, even if we only had a +single image and were not allowed to move. This means +we simultaneously two labels to the coordinates of the +glass: Glass and table. Although there is much more +work being done on single class affiliation segmenta- +tion algorithms, there is a publication about multiple +class affiliation + segmentation [ LRAL08 ]. Similarly, +recent publications in pixel-level object segmentation +used layered models [YHRF12].arXiv:1602.06541v2 [cs.CV] 11 May 2016 +2 +C. Input Data +The available data which can be used for the +inference of a segmentation varies by application. +• Grayscale vs colored: Grayscale images are +commonly used in medical imaging such as +magnetic resonance (MR) imaging or ultrasonog- +raphy whereas colored photographs are obviously +widespread. +• Excluding or including depth data: RGB-D, +sometimes also called range [ HJBJ + + 96 ] is avail- +able in robotics, autonomous cars and recently +also in consumer electronics such as Microsoft +Kinect [Zha12]. +• Single image vs stereo images vs co- +segmentation: Single image segmentation is the +most wide-spread kind of segmentation, but using +stereo images was already tried in [BVZ01]. It can +be seen as a more natural way of segmentation as +most mammals have two eyes. It can also be seen +as being related to having depth data. +Co-segmentation as in [ RMBK06], [ CXGS12 ] is +the problem of finding a consistent segmentation +for multiple images. This problem can be seen +in two ways: One the one hand, it can be seen +as the problem of finding common objects in at +least two images. On the other hand, every image +after the first can be used as an additional source +of information to find a meaningful segmentation. +This idea can be extended to time series such as +videos. +• 2D vs 3D + : Segmenting images is a 2D segmenta- +tion task where the smallest unit is called a pixel. +In 3D data, such as volumetric X-ray CT images +as they were used in [ HHR01 ], the smallest unit +is called a voxel. +D. Operation state +The operation state of the classifying machine can +either be active as in [SUM + + 11 ], [SSA12] where robots +can move objects to find a segmentation or passive, +where the received image cannot be influenced. Among +the passive algorithms, some segment in a completely +automatic fashion, others work in an interactive mode. +One example would be a system where the user clicks +on the background or marks a coarse segmentation and +the algorithm finds a fine-grained segmentation. [BJ00], +[ RKB04 ], [ PS07] describe systems which work in an +interactive mode. (a) Example Scene (b) Visualization of a found seg- +mentation +Figure 1: An example of a scene and a possible visu- +alization of a found segmentation. +III. E VALUATION AND D ATASETS +A. Quality measures for evaluation +A performance measure is a crucial part of any +machine learning system. As users of a semantic +segmentation system expect correct results, the accuracy +is the most commonly used performance measure, but +there are other measures of quality which matter when +segmentation algorithms are compared. This section +gives an overview of those quality measures. +1) Accuracy: Showing the correctness of the segmen- +tation hypotheses is done in most publications about +semantic segmentation. However, there are a couple +of different ways how this accuracy can be displayed. +One way to give readers a first qualitative impression +of the obtained segmentations is by showing examples +such as Figure 1. +However, this can only support the explanation of +particular problems or showcase special situation. For +meaningful information about the overall accuracy, there +are a couple of metrics how accuracy can be defined. +For this section, let k ∈ N be the number of classes, +n + ij ∈ N + 0 with i, j ∈ 1 , . . . , k be the number of pixels +which belong to class i and were labeled as class j . +(n + ij ) is called a confusion matrix . Let t + i = + k +j =1 n + ij +be the total number of pixels of class i. +One way to compare segmentation algorithms is by +the pixel-wise accuracy of the predicted segmentation +as done in many publications [ SWRC06], [ CP08], +[ LSD14 ]. This is also called per-pixel rate and de- +fined as + k +i=1 n + ii + +k +i=1 t +i . Taking the pixel-wise classification +accuracy has two major drawbacks: +P1 Tasks like segmenting images for autonomous cars +have large regions which have one class. This +makes achieving classification accuracies of more +than 30 % with a priori knowledge only possible. +For example, a system might learn that a certain +position of the image is most of the time “sky” +while another position is most of the time “road”. +3 +P2 The manually labeled images could have a more +coarse labeling. For example, a human classifier +could have labeled a region as “car” and the +algorithm could have split that region into the +general “car” and the more specific “wheel of a +car” +Three accuracy metrics which do not suffer from +problem P1 are used in [LSD14]: +• mean accuracy : 1 +k · +k +i =1 n + ii +t +i ∈ [0 , 1] +• mean intersection over union: +1 +k · +k +i =1 n + ii +t + i − n +ii + + k +j =1 n + ji ∈ [0, 1] +• frequency weighted intersection over union : +( +k +i =1 t +i ) −1 +k +i =1 t +i · n + ii +t +i − n + ii + +k +j =1 n +ji ∈ [0 , 1] +Another problem might be pixels which cannot be +assigned to one of the known classes. For this reason, +[ SWRC06 ] makes use of a void class. This class gets +completely ignored for all quality measures. Hence the +total number of pixels is assumed to bewidth · height − +number of void pixels. +One way to deal with problem P1 and problem P2 +is giving the confusion matrix as done in [ SWRC06 ]. +However, this approach is not feasible if many classes +are given. +The F -measure is useful for binary classifica- +tion task such as the KITTI road segmentation +benchmark [ FKG13 ] or crypt segmentation as done +by [ CRSS14]. It is calculated as “the harmonic mean +of the precision and recall” [PH05]: +F +β = (1 + β )2 tp +(1 + β 2 + ) · tp + β 2 + · fn + fp +where β = 1 is chosen in most cases and tp means +true positive , fn means false negative and fp means +false positive. +Finally, it should be noted that a lot of other measures +for the accuracy of segmentations were proposed for +non-semantic segmentation. One of those accuracy +measures is Normalized Probabilistic Rand (NPR) +index which was introduced in [ UPH05 ] and eval- +uated in [ CSI + + 09 ] on dermoscopy images. Other +non-semantic segmentation measures were introduced +in [MFTM01], but the reason for creating them seems to +be to deal with the under-defined task description of non- +semantic segmentation. These accuracy measures try to +deal with different levels of coarsity of the segmentation. +This is much less of a problem in semantic segmentation +and thus those measures are not explained here. +2) Speed: A maximum upper bound on the execution +time for the inference on a single image is a hard +requirement for some applications. For example, in the +case of autonomous cars an algorithm which classifies +pixel as street or no-street and thus makes a semantic segmentation, every image needs to be processed within +20 ms [BKTT15]. This time is called latency. +Most papers do not give exact values for the time +their application needs. One reason might be that this is +very hardware, implementation and in some cases even +data specific. For example, [HJBJ + + 96 ] notes that their +algorithm needs 10 s on a Sun SparcStation 20. The +fastest CPU ever produced for this system had200 MHz. +Comparing this directly with results which were ob- +tained using an Intel i7-4820K with 3.9 GHz would not +be meaningful. +However, it does still make sense to mention the +execution time as well as the hardware in individual +papers. This gives the interested reader the possibility to +estimate how difficult it might be to adjust the algorithm +to work in the required time-constraints. +Besides the latency, the throughput is another +relevant characteristic of algorithms and implementa- +tions for semantic segmentation. For example, for the +automatic description of images in order to enable text +search the throughput is of much higher importance +than latency. +3) Stability: A reasonable requirement on semantic +segmentation algorithms is the stability of a segmen- +tation over slight changes in the input image. When +the image data is sightly blurred by smoke such as +in Figure 4(c), the segmentation should not change. +Also, two images which show a slight change in +perspective should also only result in slight changes in +the segmentation [PH05]. +4) Memory usage: Peak memory usage matters +when segmentation algorithms are used in devices like +smartphones or cameras, or when the algorithms have +to finish in a given time frame, run on the graphics +processing unit (GPU) and consume so much memory +for single image segmentation that only the latest +graphic cards can be used. However, no publication +were available mentioning the peak memory usage. +B. Datasets +The computer vision community produced a couple +of different datasets which are publicly available. In +the following, only the most widely used ones as well +as three medical databases are described. An overview +over the quantity and the kind of data is given by +Table I. +1) PASCAL VOC: The PASCAL 1 + VOC 2 + challenge +was organized eight times with different datasets: +Once every year from 2005 to 2012 [ EVGW+ + b ]. +1 + p + attern analysis, statistical modelling and computational l earning, +an EU network of excellence +2 + V isual Object Classes +4 +Beginning with 2007, a segmentation challenge was +added [EVGW + + a]. +The dataset consists of annotated photographs from +www.flicker.com, a photo sharing website. There are +multiple challenges for PASCAL VOC. The 2012 +competition had five challenges of which one is a +segmentation challenge where a single class label was +given for each pixel. The classes are: aeroplane, bicycle, +bird, boat, bottle, bus, car, cat, chair, cow, dining table, +dog, horse, motorbike, person, potted plant, sheep, sofa, +train, tv/monitor. +Although no new competitions will be held, new +algorithms can be evaluated on the 2010, 2011 and +2012 data via http://host.robots.ox.ac.uk:8080/ +The PASCAL VOC segmentation challenges use the +segmentation over union criterion (see Section III-A). +2) MSRCv2: Microsoft Research has published a +database of 591 photographs with pixel-level annotation +of 21 classes: aeroplane, bike, bird, boat, body, book, +building, car, cat, chair, cow, dog, face, flower, grass, +road, sheep, sign, sky, tree, water. Additionally, there +is a void label for pixels which do not belong to +any of the 21 classes or which are close to the +segmentation boundary. This allows a “rough and quick +hand-segmentation which does not align exactly with +the object boundaries” [SWRC06]. +3) Medical Databases: The Warwick-QU Dataset +consists of 165 images with pixel-level annotation of +5 classes: “healthy, adenomatous, moderately differen- +tiated, moderately-to-poorly differentiated, and poorly +differentiated” [ CSM09]. This dataset is part of the +Gland Segmentation (GlaS) challenge. +The DIARETDB1 [KKV + + 14 ] is a dataset of 89 im- +ages fundus images. Those images show the interior +surface of the eye. Fundus images can be used to detect +diabetic retinopathy. The images have four classes of +coarse annotations: hard and soft exudates, hemorrhages +and red small dots. +20 test and additionally 20 training retinal fun- +dus images are available through the DRIVE data +set [ SAN+ + 04 ]. The vessels were annotated. Addition- +ally, [AP11] added vascular features. +The Open-CAS Endoscopic Datasets [ MHMK+ + 14] +are 60 images taken from laparoscopic adrenalectomies +and 60 images taken from laparoscopic pancreatic +resections. Those are from 3 surgical procedures each. +Half of the data was annotated by a medical expert for +“medial instrument” and “no medical instrument”. All +images were labeled by anonymous untrained workers +to which they refer to as knowledge workers (KWs). +One crowd annotation was obtained for each image by +a majority vote on a pixel basis of 10 segmentations +given by 10 different KWs. Figure 2: A typical segmentation pipeline gets raw +pixel data, applies preprocessing techniques +like scaling and feature extraction like HOG +features. For training, data augmentation +techniques such as image rotation can be +applied. For every single image, patches of +the image called windows are extracted and +those windows are classified. The resulting +semantic segmentation can be refined by +simple morphologic operations or by more +complex approaches such as Markov Random +Fields (MRFs). +IV. S EGMENTATION P IPELINE +Typically, semantic segmentation is done with a +classifier which operates on fixed-size feature inputs +and a sliding-window approach [ DT05 ], [ YBCK10], +[SCZ08]. This means a classifier is trained on images +of a fixed size. The trained classifier is then fed with +rectangular regions of the image which are called win- +dows. Although the classifier gets an image patch of e.g. +51 px × 51 px of the environment, it might only classify +the center pixel or a subset of the complete window. +This segmentation pipeline is visualized in Figure 2. +This approach was taken by [BKTT15 ] and a major- +ity of the VOC2007 participants [ EVGW + + a]. As this +approach has to apply the patch classifier 512 · 512 = +262 144 times for images of size 512 px × 512 px, there +are techniques for speeding it up such as applying a +stride and interpolating the results. +Neural networks are able to apply the sliding window +approach in a very efficient way by handling a trained +network as a convolution and applying the convolution +on the complete image. +However, there are alternatives. Namely MRFs and +Conditional Random Fields (CRFs) which take the +information of the complete image and segment it in +an holistic approach. +5 +V. T RADITIONAL A PPROACHES +Image segmentation algorithms which use traditional +approaches, hence don’t apply neural networks and +make heavy use of domain knowledge, are wide-spread +in the computer vision community. Features which can +be used for segmentation are described in SectionV-A , +a very brief overview of unsupervised, non-semantic +segmentation is given in SectionV-B, Random Decision +Forests are described in Section V-C, Markov Random +Fields in Section V-E and Support Vector Machines +(SVMs) in Section V-D. Postprocessing is covered in +Section V-G. +It should be noted that algorithms can use combina- +tion of methods. For example, [TNL14] makes use of a +combination of a SVM and a MRF. Also, auto-encoders +can be used to learn features which in turn can be used +by any classifier. +A. Features and Preprocessing methods +The choice of features is very important in traditional +approaches. The most commonly used local and global +features are explained in the following as well as feature +dimensionality reduction algorithms. +1) Pixel Color: Pixel color in different image spaces +(e.g. 3 features for RGB, 3 features for HSV, 1 feature +for the gray-value) are the most widely used features. A +typical image is in the RGB color space, but depending +on the classifier and the problem another color space +might result in better segmentations. RGB, YcBcr, HSL, +Lab and YIQ are some examples used by [ CRSS14]. +No single color space has been proven to be superior +to all others in all contexts [ CJSW01]. However, the +most common choices seem to be RGB and HSI. +Reasons for choosing RGB is simplicity and the support +by programming languages, whereas the choice of +the HSI color space might make it simpler for the +classifier to become invariant to illumination. One +reason for choosing CIE-L*a*b* color space is that it +approximates human perception of brightness [ KP92 ]. +It follows that choosing the L*a*b color space helps +algorithms to detect structures which are seen by +humans. Another way of improving the structure within +an image is histogram equalization, which can be +applied to improve contrast [PAA+ + 87], [RM07]. +2) Histogram of oriented Gradients: Histogram of +oriented gradients (HOG) features interpret the image +as a discrete function I : N 2 + → { 0, . . . , 255 } which +maps the position (x, y ) to a color. For each pixel, there +are two gradients: The partial derivative of x and y . +Now the original image is transformed to two feature +maps of equal size which represents the gradient. These +feature maps are splitted into patches and a histogram of the directions is calculated for each patch. HOG features +were proposed in [DT05 ] and are used in [BMBM10], +[FGMR10] for segmentation tasks. +3) SIFT: Scale-invariant feature transform (SIFT) +feature descriptors describe keypoints in an image. The +image patch of the size 16 × 16 around the keypoint +is taken. This patch is divided in 16 distinct parts of +the size 4 × 4 . For each of those parts a histogram of +8 orientations is calculated similar as for HOG features. +This results in a 128-dimensional feature vector for +each keypoint. +It should be emphasized that SIFT is a global feature +for a complete image. +SIFT is described in detail in [Low04 ] and are used +in [PTN09]. +4) BOV: Bag-of-visual-words (BOV), also called +bag of keypoints , is based on vector quantization. +Similar to HOG features, BOV features are histograms +which count the number of occurrences of certain +patterns within a patch of the image. BOV are described +in [ CDF+ + 04 ] and used in combination with SIFT +feature descriptors in [CP08]. +5) Poselets: Poselets rely on manually added extra +keypoints such as “right shoulder”, “left shoulder”, +“right knee” and “left knee”. They were originally +used for human pose estimation. Finding those extra +keypoints is easily possible for well-known image +classes like humans. However, it is difficult for classes +like airplanes, ships, organs or cells where the human +annotators do not know the keypoints. Additionally, the +keypoints have to be chosen for every single class. There +are strategies to deal with those problems like viewpoint- +dependent keypoints. Poselets were used in [BMBM10] +to detect people and in [BBMM11] for general object +detection of the PASCAL VOC dataset. +6) Textons: A texton is the minimal building block +of vision. The computer vision literature does not give a +strict definition for textons, but edge detectors could be +one example. One might argue that deep learning tech- +niques with Convolution Neuronal Networks (CNNs) +learn textons in the first filters. +An excellent explanation of textons can be found +in [ZGWX05]. +7) Dimensionality Reduction: High-resolution im- +ages have a lot of pixels. Having one or more feature per +pixel results in well over a million features. This makes +training difficult while the higher resolution might not +contain much more information. A simple approach +to deal with this is downsampling the high-resolution +image to a low-resolution variant. Another way of +doing dimensionality reduction is principal component +analysis (PCA), which is applied by [ COWR11 ]. The +idea behind PCA is to find a hyperplane on which all +6 +feature vectors can be projected with a minimal loss +of information. A detailed description of PCA is given +by [Smi02]. +One problem of PCA is the fact that it does not +distinguish different classes. This means it can happen +that a perfectly linearly separable set of feature vectors +becomes not separable at all after applying PCA. +There are many other techniques for dimensionality +reduction. An overview and a comparison over some +of them is given by [vdMPvdH09]. +B. Unsupervised Segmentation +Unsupervised segmentation algorithms can be used +in supervised segmentation as another source of infor- +mation or to refine a segmentation. While unsupervised +segmentation algorithms can never be semantic, they are +well-studied and deserve at least a very brief overview. +Semantic segmentation algorithms store information +about the classes they were trained to segment while +non-semantic segmentation algorithms try to detect +consistent regions or region boundaries. +1) Clustering Algorithms: Clustering algorithms can +directly be applied on the pixels, when one gives a +feature vector per pixel. Two clustering algorithms are +k -means and the mean-shift algorithm. +The k -means algorithm is a general-purpose cluster- +ing algorithm which requires the number of clusters to +be given beforehand. Initially, it places the k centroids +randomly in the feature space. Then it assigns each +data point to the nearest centroid, moves the centroid +to the center of the cluster and continues the process +until a stopping criterion is reached. A faster variant is +described in [Har75]. +k -means was applied by [CLP98] for medical image +segmentation. +Another clustering algorithm is the mean-shift algo- +rithm which was introduced by [ CM02] for segmen- +tation tasks. The algorithm finds the cluster centers +by initializing centroids at random seed points and +iteratively shifting them to the mean coordinate within +a certain range. Instead of taking a hard range constraint, +the mean can also be calculated by using any kernel. +This effectively applies a weight to the coordinates +of the points. The mean shift algorithm finds cluster +centers at positions with a highest local density of +points. +2) Graph Based Image Segmentation: Graph-based +image segmentation algorithms typically interpret pixels +as vertices and an edge weight is a measure of +dissimilarity such as the difference in color [ FH04 ], +[ Fel]. There are several different candidates for edges. The 4-neighborhood (north, east, south west) or an 8- +neighborhood (north, north-east, east, south-east, south, +south-west, west, north-west) are plausible choices. +One way to cut the edges is by building a minimum +spanning tree and removing edges above a threshold. +This threshold can either be constant, adapted to the +graph or adjusted by the user. After the edge-cutting +step, the connected components are the segments. +A graph-based method which ranked 2 nd + in the +Pascal VOC 2010 challenge [EVGW+ + 10 ] is described +in [ CS10 ]. The system makes heavy use of the multi- +cue contour detector globalPb [ MAFM08] and needs +about 10 GB of main memory [CS11]. +3) Random Walks: Random walks belong to the +graph-based image segmentation algorithms. Random +walk image segmentation usually works as follows: +Seed points are placed on the image for the different +objects in the image. From every single pixel, the +probability to reach the different seed points by a +random walk is calculated. This is done by taking +image gradients as described in Section V-A for HOG +features. The class of the pixel is the class of which a +seed point will be reached with highest probability. At +first, this is an interactive segmentation method, but it +can be extended to be non-interactive by using another +segmentation methods output as seed points. +4) Active Contour Models: Active contour models +(ACMs) are algorithms which segment images roughly +along edges, but also try to find a border which is +smooth. This is done by defining a so called energy +function which will be minimized. They were initially +described in [KWT88]. ACMs can be used to segment +an image or to refine segmentation as it was done +in [AM98] for brain MR images. +5) Watershed Segmentation: The watershed algo- +rithm takes a grayscale image and interprets it as a +height map. Low values are catchment basins and +the higher values between two neighboring catchment +basins is the watershed. The catchment basins should +contain what the developer wants to capture. This +implies that those areas must be dark on grayscale +images. The algorithm starts to fill the basins from +the lowest point. When two basins are connected, a +watershed is found. The algorithm stops when the +highest point is reached. +A detailed description of the watershed segmentation +algorithm is given in [RM00]. +The watershed segmentation was used in [JLD03] to +segment white blood cells. As the authors describe, +the segmentation by watershed transform has two +flaws: Over-segmentation due to local minima and thick +watersheds due to plateaus. +7 +C. Random Decision Forests +Random Decision Forests were first proposed +in [ Ho95 ]. This type of classifier applies techniques +called ensemble learning , where multiple classifiers +are trained and a combination of their hypotheses is +used. One ensemble learning technique is the random +subspaces method where each classifier is trained +on a random subspace of the feature space. Another +ensemble learning technique is bagging , which is +training the trees on random subsets of the training set. +In the case of Random Decision Forests, the classifiers +are decision trees. A decision tree is a tree where each +inner node uses one or more features to decide in which +branch to descend. Each leaf is a class. +One strength of Random Decision Forests compared +to many other classifiers like SVMs and neural networks +is that the scale of measure of the features (nominal, +ordinal, interval, ratio) can be arbitrary. Another advan- +tage of Random Decision Forests compared to SVMs, +for example, is the speed of training and classification. +Decision trees were extensively studied in the past +20 years and a multitude of training algorithms have +been proposed (e.g. ID3 in [ Qui86], C4.5 in [ Qui93 ]). +Possible training hyperparameters are the measure to +evaluate the “goodness of split” [Min89], the number of +decision trees being used, and if the depth of the trees +is restricted. Typically in the context of classification, +decision trees are trained by adding new nodes until +each leaf contains only nodes of a single class or until it +is not possible to split further. This is called astopping +criterion. +There are two typical training modes: Central axis +projection and perceptron training . In training, for +each node a hyperplane is searched which is optimal +according to an error function. +Random Decision Forests with texton features (see +Section V-A6) are applied in [SJC08] for segmentation. +In the [MSC] dataset, they report a per-pixel accuracy +rate of 66 .9 % for their best system. This system +requires 415 ms for the segmentation of 320 px × 213 px +images on a single 2 .7 GHz core. On the Pascal +VOC 2007 dataset, they report an average per-pixel +accuracy for their best segmentation system of 42 %. +An excellent introduction to Random Decision +Forests for semantic segmentation is given by [SCZ08]. +D. SVMs +SVMs are well-studied binary classifiers which can +be described by five central ideas. For those ideas, the +training data is represented as (x + i , y +i ) where x + i is the +feature vector and y + i ∈ { −1, 1 } the binary label for +training example i ∈ { 1, . . . , m }. 1) If data is linearly separable, it can be separated +by a hyperplane. There is one hyperplane which +maximizes the distance to the next datapoints +(support vectors). This hyperplane should be taken: +minimize +w ,b 1 +2 w 2 +s.t. ∀m +i=1 y +i · ( w , x + i + b) + +sgn applied to this gives the classification≥ 1 +2) Even if the underlying process which generates the +features for the two classes is linearly separable, +noise can make the data not separable. The intro- +duction of slack variables to relax the requirement +of linear separability solves this problem. The +trade-off between accepting some errors and a +more complex model is weighted by a parameter +C ∈ R + +0 . The bigger C , the more errors are +accepted. The new optimization problem is: +minimize +w 1 +2 w 2 + + C · m + +i=1 ξ + i +s.t. ∀m +i=1 y +i · ( w , x + i + b) ≥ 1 − ξ + i +Note that 0 ≤ ξ + i ≤ 1 means that the data point +is within the margin, whereas ξ + i ≥ 1 means it is +misclassified. An SVM with C > 0 is also called +a soft-margin SVM. +3) The primal problem is to find the normal vector +w and the bias b . The dual problem is to express +w as a linear combination of the training data x + i : +w = m + +i =1 α +i y +i x + i +where y +i ∈ { −1 , 1 } represents the class of the +training example and α +i are Lagrange multipliers. +The usage of Lagrange multipliers is explained +with some examples in [Smi04]. The usage of the +Lagrange multipliers α +i changes the optimization +problem depend on the α +i which are weights for +the feature vectors. It turns out that most α +i will +be zero. The non-zero weighted vectors are called +support vectors. +The optimization problem is now, according +to [Bur98]: +maximize +α +i m + +i=1 α +i − 1 +2 m + +i=1 m + +j =1 α +i α +j y + i y + j x + i , x + j +s.t. ∀m +i=1 0 ≤ α + i ≤ C +s.t. m + +i=1 α +i y + i = 0 +8 +4) Not every dataset is linearly separable. This prob- +lem is approached by transforming the feature +vectors x with a non-linear mapping Φ into +a higher dimensional (probably ∞ -dimensional) +space. As the feature vectors x are only used +within scalar product x + i , x +j , it is not necessary +to do the transformation. It is enough to do the +calculation + K ( x +i , x + j ) = x + i , x + j +This function K is called a kernel . The idea of +never explicitly transforming the vectors x + i to the +higher dimensional space is called the kernel trick. +Common kernels include the polynomial kernel +K + P (x + i , x + j ) = (x + i , x + j + r )p +of degree p and coefficient r , the Gaussian radial +basis function (RBF) kernel +K + Gauss (x + i , x + j ) = e − γ x +i − x +j 2 +2σ 2 +and the sigmoid kernel +K +tanh (x + i , x +j ) = tanh( γ x + i , x + j − r ) +where the parameter γ determines how much +influence single training examples have. +5) The described SVMs can only distinguish between +two classes. Common strategies to expand those +binary classifiers to multi-class classification is +the one-vs-all and the one-vs-one strategy. In the +one-vs-all strategy n classifiers have to be trained +which can distinguish one of the n classes against +all other classes. In the one-vs-one strategy n 2 + − n +2 +classifiers are trained; one classifier for each pair +of classes. +A detailed description of SVMs can be found +in [Bur98]. +SVMs are used by [YHRF12 ] on the 2009 and 2010 +PASCAL segmentation challenge [ EVGW + + 10 ]. They +did not hand their classifier in to the challenge itself, +but calculated an average rank of 7 among the different +categories. +[ FGMR10] also used an SVM based method with +HOG features and achieved the 7 th + rank in the 2010 +PASCAL segmentation challenge by mean accuracy. It +needs about 2 s on a 2.8 GHz 8-core Intel processor. +E. Markov Random Fields +MRFs are undirected probabilistic graphical models +which are wide-spread model in computer vision. The +overall idea of MRFs is to assign a random variable for +each feature and a random variable for each pixel which x + 1 x + 2 x + 3x + 4 x + 5 x + 6x + 7 x + 8 x + 9 +y + 1 y +2 y +3y +4 y + 5 y + 6y +7 y +8 y +9 +x + 1 x + 2 x + 3x + 4 x + 5 x + 6x + 7 x + 8 x + 9 +y + 1 y +2 y +3y +4 y + 5 y + 6y +7 y +8 y +9 +Figure 3: CRF with 4-neighborhood. Each node x + i +represents a pixel and each nodey +i represents +a label. +gets labeled as shown in Figure 3. For example, a MRF +which is trained on images of the size224 px × 224 pixel +and gets the raw RGB values as features has +224 · 224 · 3 + +input + 224 · 224 + +output = 200 704 +random variables. Those random variables are condi- +tionally independent, given their local neighborhood. +These (in)dependencies can be expressed with a graph. +Let G = ( V , E ) be the associated undirected graph +of an MRF and C be the set of all maximal cliques in +that graph. Nodes represent random variables x , y and +edges represent conditional dependencies. Just like in +he 4-neighborhood [SWRC06] and the 8-neighborhood +are reasonable choices for constructing the graph. +Typically, random variables y represent the class of a +single pixel, random variables x represent a pixel values +and edges represent pixel neighborhood in computer +vision problems segmentation problems where MRFs +are used. Accordingly, the random variables y live +on 1, . . . , nr of classes and the random variables x +typically live on 0, . . . , 255 or [0 , 1]. +The probability of x , y can be expressed as +P ( x, y ) = 1 +Z e − E (x ,y ) +where Z = +x ,y e −E ( x,y ) + is a normalization term +called the partition function and E is called the energy +function. A common choice for the energy function is +E ( x, y ) = +c∈C ψ +c (x , y ) +where ψ is called a clique potential . One choice for +cliques of size two x, y = (x + 1 , x +2 ) is [KP06] +ψ +c (x + 1 , x +2 ) = wδ (x + 1 , x +2 ) = + +w if x + 1 = x + 2 +−w if x + 1 = x + 2 +According to [ Mur12], the most common way of +inference over the posterior MRF in computer vision +problems is Maximum A Posteriori (MAP) estimation. +9 +Detailed introductions to MRFs are given by +[ BKR11 ], [ Mur12]. MRFs are used by [ ZBS01] and +[MSB12] for image segmentation. +F. Conditional Random Fields +CRFs are MRFs where all clique potentials are +conditioned on input features [ Mur12]. This means, +instead of learning the distribution P ( y , x ), the task +is reformulated to learn the distribution P (y |x ) . One +consequence of this reformulation is that CRFs need +much less parameters as the distribution of x does +not have to be estimated. Another advantage of CRFs +compared to MRFs is that no distribution assumption +about x has to be made. +A CRF has the partition function Z : +Z (x ) = +y P ( x, y ) +and joint probability distribution +P ( y | x ) = 1 +Z ( x ) +c∈C ψ +c (y + c | x ) +The simplest way to define the clique potentialsψ is +the count of the class y + c given x added with a positive +smoothing constant to prevent the complete term from +getting zero. +CRFs as described in [ LRKT09 ] have reached top +performance in PASCAL VOC 2010 [ VOC10] and +are also used in [ HZCP04], [ SWRC06 ] for semantic +segmentation. +A method similar to CRFs was proposed +in [ GBVdW+ + 10]. The system of Gonfaus et.al. +ranked 1 st + by mean accuracy in the segmentation task +of the PASCAL VOC 2010 challenge [EVGW+ + 10]. +An introduction to CRFs is given by [SM11]. +G. Post-processing methods +Post-processing refine a found segmentation and +remove obvious errors. For example, the morphological +operations opening and closing can remove noise. The +opening operation is a dilation followed by a erosion. +This removes tiny segments. The closing operation is a +erosion followed by a dilation. This removes tiny gaps +in otherwise filled regions. They were used in [CLP98] +for biomedical image segmentation. +Another way of refinement of the found segmentation +is by adjusting the segmentation to match close edges. +This was used in [ BBMM11] with an ultra-metric +contour map [AMFM09]. +Active contour models are another example of a +post-processing method [KWT88]. VI. N EURAL N ETWORKS FOR S EM ANTIC +S EGM ENTATION +Artificial neural networks are classifiers which are +inspired by biologic neurons. Every single artificial +neuron has some inputs which are weighted and sumed +up. Then, the neuron applies a so called activation +function to the weighted sum and gives an output. Those +neurons can take either a feature vector as input or the +output of other neurons. In this way, they build up +feature hierarchies. +The parameters they learn are the weights w ∈ R . +They are learned by gradient descent. To do so, an error +function — usually cross-entropy or mean squared error +— is necessary. For the gradient descent algorithm, one +sees the labeled training data as given, the weights +as variables and the error function as a surface in +this weight-space. Minimizing the error function in the +weight space adapts the neural network to the problem. +There are lots of ideas around neural networks like +regularization, better optimization algorithms, automat- +ically building up architectures, design choices for +activation functions. This is not explained in detail here, +but some of the mayor breakthroughs are outlined. +CNNs are neural networks which learn image filters. +They drastically reduce the number of parameters which +have to be learned while being still general enough for +the problem domain of images. This was shown by Alex +Krizhevsky et al. in [ KSH12 ]. One major idea was a +clever regularization called dropout training, which set +the output of neurons while training randomly to zero. +Another contribution was the usage of an activation +function called rectified linear unit : +ϕ +ReLU ( x ) = max(0 , x) +Those are much faster to train than the commonly used +sigmoid activation functions +ϕ +Sigmoid ( x ) = 1 +e − x + + 1 +Krizhevsky et al. implemented those ideas and partici- +pated in the ImageNet Large-Scale Visual Recognition +Challenge (ILSVRC). The best other system, which +used SIFT features and Fisher Vectors, had a perfor- +mance of about 25 .7 % while the network by Alex +Krizhevsky et al. got 17 .0 % error rate on the ILSVRC- +2010 dataset. As a preprocessing step, they downsam- +pled all images to a fixed size of 256 px × 256 px before +they fed the features into their network. This network +is commonly known as AlexNet. +Since AlexNet was developed, a lot of different +neural networks have been proposed. One interesting +example is [PC13 ], where a recurrent CNN for semantic +segmentation is presented. +10 +Another notable paper is [ LSD14 ]. The algorithm +presented there makes use of a classifying network such +as AlexNet, but applies the complete network as an +image filter. This way, each pixel gets a probability +distribution for each of the trained classes. By taking +the most likely class, a semantic segmentation can be +done with arbitrary image sizes. +A very recent publication by Dai et al. [ DHS15] +showed that segmentation with much deeper networks +is possible and achieves better results. +More detailed explanations to neural networks for +visual recognition is given by [LKJ15]. +VII. P OSSIBLE P ROBLEMS IN THE D ATA FOR +S EGMENTATION ALGORITHMS +Different segmentation workflows have different +problems. However, there are a couple of special cases +which should be tested. Those cases might not occur +often in the training data, but it could still happen in +the productive system. +I am not aware of any systematic work which exam- +ined the influence of problems such as the following. +A. Lens Flare +Lens flare is the effect of light getting scattered in +the lens system of the camera. The testing data set of +the KITTI road evaluation benchmark [ FKG13] has a +couple of photos with this problem. Figure 4(a) shows +an extreme example of lens flare. +B. Vignetting +Vignetting is the effect of a photograph getting darker +in the corners. This can have many reasons, for example +filters on the camera blocking light at the corners. +C. Blurred images +Images can be blurred for a couple of reasons. A +problem with the lenses mechanics, focusing on the +wrong point, too quick movement, smoke or foam. One +example of a blurred image is Figure 4(c), which was +taken during an in vivo porcine procedure of diaphragm +dissection. The smoke was caused by cauterization. +D. Other Problems +If the following effects can occur at all and if they +are problems depends heavily on the problem domain +and the used model. +1) Partial Occlusions: Segmentation systems which +employ a model of the objects which should be +segmented might suffer from partial occlusions. (a)Lens Flare +Image by [Hus07] (b)Vignetting +Image by [Man12] +(c)Smoke by cauterization +Image by [GVSY13] (d)Camouflage +Image by [Kaf07] +(e) Transparency (f) Viewpoint +Figure 4: Examples of images which might cause +semantic segmentation systems to fail. +2) Camouflage: Some objects, like animals in the +wild, actively try to hide (see Figure 4(d) as an example). +In other cases it might just be bad luck that objects +are hard for humans to detect. This problem has two +interesting aspects: On the one hand, the segmenting +system might suffer from the same problems as humans +do. On the other hand, the segmenting system might be +better than humans are, but it is forced to learn from +images labeled by humans. If the labels are wrong, the +system is forced to learn something wrong. +3) Semi-transparent Occlusion: Some objects like +drinking glasses can be visible and still leave the object +behind them visible as shown in Figure 4(e). This is +mainly a definition problem: Is the seen pixel the glass +label or the smartphone label? +4) Viewpoints: Changes in viewpoints can be a +problem, if they don’t occur in the training data. For +example, an image captioning system which was trained +on photographs of professional photographers might +not have photos from the point of view of a child. This +is visualized in Figure 4(f). +11 +VIII. D ISCUSSION +Ohta et al. wrote [ OKS78 ] 38 years ago. It is one +of the first papers mentioning semantic segmentation. +In this time, a lot of work was done and many +different directions have been explored. Different kinds +of semantic segmentation have emerged. +This paper presents a taxonomy of those kinds +of semantic segmentation and a brief overview of +completely automatic, passive, semantic segmentation +algorithms. +Future work includes a comparative study of +those algorithms on publicly available dataset such +as the ones presented in Table I. Another open +question is the influence of the problems described +in Section VII. This could be done using a subset of the +thousands of images of Wikipedia Commons, such as +https://commons.wikimedia.org/wiki/Category:Blurring +for blurred images. +A combination of different classifiers in an ensemble +would be an interesting option to explore in order to +improve accuracy. Another direction which is currently +studied is combining classifiers such as neural networks +with CRFs [ZJRP + + 15]. R EFERENCES +[AM98] M. S. Atkins and B. T. Mackiewich, “Fully +automatic segmentation of the brain in +mri,” Medical Imaging, IEEE Transactions +on , vol. 17, no. 1, pp. 98–107, Feb. 1998. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=668699 +[AMFM09] P. Arbelaez, M. Maire, C. Fowlkes, and +J. Malik, “From contours to regions: An +empirical evaluation,” in Computer Vision and +Pattern Recognition, 2009. CVPR 2009. IEEE +Conference on . IEEE, Jun. 2009, pp. 2294–2301. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=5206707 +[AP11] G. Azzopardi and N. Petkov, “Detection of +retinal vascular bifurcations by trainable v4-like +filters,” in Computer Analysis of Images and +Patterns . Springer, 2011, pp. 451–459. [Online]. +Available: http://www. cs. rug. nl/~imaging/databases/ +retina_database/retinalfeatures_database. html +[BBMM11] T. Brox, L. Bourdev, S. Maji, and J. Malik, +“Object segmentation by alignment of poselet +activations to image contours,” in Computer Vision +and Pattern Recognition (CVPR), 2011 IEEE +Conference on . IEEE, Jun. 2011, pp. 2225–2232. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=5995659 +[BJ00] Y. Boykov and M.-P. Jolly, “Interactive organ +segmentation using graph cuts,” in Medical Image +Computing and Computer-Assisted Intervention– +MICCAI 2000 . Springer, 2000, pp. 276– +286. [Online]. Available: http://link . springer . com/ +chapter/10 . 1007/978- 3-540- 40899-4_28 +[BKR11] A. Blake, P. Kohli, and C. Rother, Markov random +fields for vision and image processing. Mit Press, +2011. +[BKTT15] S. Bittel, V. Kaiser, M. Teichmann, and M. Thoma, +“Pixel-wise segmentation of street with neural +networks,” arXiv preprint arXiv:1511.00513, 2015. +[Online]. Available: http://arxiv. org/abs/1511. 00513 +[BMBM10] L. Bourdev, S. Maji, T. Brox, and J. Malik, +“Detecting people using mutually consistent +poselet activations,” in Computer Vision–ECCV +2010 . Springer, 2010, pp. 168–181. [Online]. +Available: http://link. springer. com/chapter/10. 1007/ +978- 3-642- 15567-3_13#page- 1 +[Bur98] C. J. Burges, “A tutorial on support vector machines +for pattern recognition,” Data mining and knowledge +discovery , vol. 2, no. 2, pp. 121–167, 1998. +[BVZ01] Y. Boykov, O. Veksler, and R. Zabih, “Fast +approximate energy minimization via graph cuts,” +Pattern Analysis and Machine Intelligence, IEEE +Transactions on , vol. 23, no. 11, pp. 1222–1239, +2001. [Online]. Available: http://ieeexplore. ieee. org/ +xpls/abs_all. jsp?arnumber=969114 +[CDF + + 04] G. Csurka, C. Dance, L. Fan, J. Willamowski, +and C. Bray, “Visual categorization with bags of +keypoints,” in Workshop on statistical learning in +computer vision, ECCV, vol. 1, no. 1-22. Prague, +2004, pp. 1–2. +[CJSW01] H.-D. Cheng, X. Jiang, Y. Sun, and J. Wang, +“Color image segmentation: advances and prospects,” +Pattern recognition, vol. 34, no. 12, pp. 2259–2281, +2001. +[CLP98] C. W. Chen, J. Luo, and K. J. Parker, “Image +segmentation via adaptive k-mean clustering and +knowledge-based morphological operations with +biomedical applications,” Image Processing, IEEE +Transactions on, vol. 7, no. 12, pp. 1673–1683, Dec. +12 +1998. [Online]. Available: http://ieeexplore. ieee. org/ +xpls/abs_all. jsp?arnumber=730379 +[CM02] D. Comaniciu and P. Meer, “Mean shift: A +robust approach toward feature space analysis,” +Pattern Analysis and Machine Intelligence, IEEE +Transactions on, vol. 24, no. 5, pp. 603–619, 2002. +[Online]. Available: http://ieeexplore . ieee . org/xpl/ +login . jsp?tp=&arnumber=1000236 +[COWR11] + C. Chen, J. Ozolek, W. Wang, and G. K. Rohde, +“A pixel classification system for segmenting +biomedical images using intensity neighborhoods +and dimension reduction,” in Biomedical Imaging: +From Nano to Macro, 2011 IEEE International +Symposium on . IEEE, 2011, pp. 1649–1652. +[Online]. Available: https://www . andrew . cmu . edu/ +user/gustavor/chen_isbi_11. pdf +[CP08] G. Csurka and F. Perronnin, “A simple high +performance approach to semantic segmentation.” +in BMVC , 2008, pp. 1–10. [Online]. Avail- +able: http://www . xrce . xerox . com/layout/set/print/ +content/download/16654/118653/file/2008-023 . pdf +[CRSS] A. Cohen, E. Rivlin, I. Shimshoni, and +E. Sabo, “Colon crypt segmentation website.” [On- +line]. Available: http://mis . haifa . ac . il/~ishimshoni/ +SegmentCrypt/Download. htm +[CRSS14] ——, “Memory based active contour algorithm +using pixel-level classified images for colon crypt +segmentation,” Computerized Medical Imaging +and Graphics , Nov. 2014. [Online]. Available: +http://mis . haifa . ac . il/~ishimshoni/SegmentCrypt/ +Active%20contour%20based%20on%20pixel- +level%20classified%20image%20for%20colon% +20crypts%20segmentation. pdf +[CS10] J. Carreira and C. Sminchisescu, “Constrained +parametric min-cuts for automatic object segmenta- +tion,” in Computer Vision and Pattern Recognition +(CVPR), 2010 IEEE Conference on . IEEE, 2010, +pp. 3241–3248. +[CS11] ——, “Cpmc: Constrained parametric min-cuts for +automatic object segmentation,” Feb. 2011. [Online]. +Available: http://www . maths . lth . se/matematiklth/ +personal/sminchis/code/cpmc/ +[CSI+ + 09] M. E. Celebi, G. Schaefer, H. Iyatomi, W. V. +Stoecker, J. M. Malters, and J. M. Grichnik, “An +improved objective evaluation measure for border +detection in dermoscopy images,” Skin Research +and Technology, vol. 15, no. 4, pp. 444–450, 2009. +[Online]. Available: http://arxiv. org/abs/1009. 1020 +[CSM09] L. P. Coelho, A. Shariff, and R. F. Murphy, “Nuclear +segmentation in microscope cell images: a hand- +segmented dataset and comparison of algorithms,” +in Biomedical Imaging: From Nano to Macro, +2009. ISBI’09. IEEE International Symposium on . +IEEE, 2009, pp. 518–521. [Online]. Available: +http://murphylab. web. cmu. edu/data +[CXGS12] M. D. Collins, J. Xu, L. Grady, and V. Singh, +“Random walks based multi-image segmentation: +Quasiconvexity results and gpu-based solutions,” +in Computer Vision and Pattern Recognition +(CVPR), 2012 IEEE Conference on . IEEE, +2012, pp. 1656–1663. [Online]. Available: http: +//pages. cs. wisc. edu/~jiaxu/pub/rwcoseg. pdf +[DHS15] J. Dai, K. He, and J. Sun, “Instance-aware seman- +tic segmentation via multi-task network cascades,” +arXiv preprint arXiv:1512.04412, 2015. +[DT05] N. Dalal and B. Triggs, “Histograms of oriented +gradients for human detection,” in Computer +Vision and Pattern Recognition, 2005. CVPR +2005. IEEE Computer Society Conference on , vol. 1, June 2005, pp. 886–893 vol. 1. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=1467360 +[EVGW + + a] M. Everingham, L. Van Gool, C. K. I. +Williams, J. Winn, and A. Zisserman, “The +PASCAL Visual Object Classes Challenge +2007 (VOC2007) Results,” http://www.pascal- +network.org/challenges/VOC/voc2007/workshop/index.html. +[Online]. Available: http://host . robots . ox . ac . uk: +8080/pascal/VOC/voc2007/index . html +[EVGW + + b] ——, “The PASCAL Visual Object Classes Chal- +lenge 2012 (VOC2012) Results,” http://www.pascal- +network.org/challenges/VOC/voc2012/workshop/index.html. +[Online]. Available: http://host . robots . ox . ac . uk: +8080/pascal/VOC/voc2012/index . html +[EVGW + + 10] M. Everingham, L. Van Gool, C. K. Williams, +J. Winn, and A. Zisserman, “The pascal visual object +classes (voc) challenge,” International journal of +computer vision, vol. 88, no. 2, pp. 303–338, 2010. +[EVGW + + 12] M. Everingham, L. Van Gool, C. K. I. Williams, +J. Winn, and A. Zisserman, “Visual object +classes challenge 2012 (voc2012),” 2012. [Online]. +Available: http://host . robots . ox . ac . uk:8080/pascal/ +VOC/voc2012/index. html +[Fel] P. F. Felzenszwalb, “Graph based im- +age segmentation.” [Online]. Available: http: +//cs . brown. edu/~pff/segment/ +[FGMR10] + P. F. Felzenszwalb, R. B. Girshick, D. McAllester, +and D. Ramanan, “Object detection with discrimina- +tively trained part-based models,” Pattern Analysis +and Machine Intelligence, IEEE Transactions on , +vol. 32, no. 9, pp. 1627–1645, 2010. +[FH04] P. F. Felzenszwalb and D. P. Huttenlocher, +“Efficient graph-based image segmentation,” +International Journal of Computer Vision , +vol. 59, no. 2, pp. 167–181, 2004. [Online]. +Available: http://link . springer . com/article/10 . 1023/ +B:VISI . 0000022288 . 19776. 77 +[FKG13] J. Fritsch, T. Kuehnl, and A. Geiger, “A +new performance measure and evaluation +benchmark for road detection algorithms,” in +International Conference on Intelligent Transporta- +tion Systems (ITSC) , 2013. [Online]. Available: +http://www . cvlibs. net/datasets/kitti/eval_road. php +[GBVdW + + 10] + J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D. +Bagdanov, J. Serrat, and J. Gonzalez, “Harmony po- +tentials for joint classification and segmentation,” in +Computer Vision and Pattern Recognition (CVPR), +2010 IEEE Conference on. IEEE, 2010, pp. 3280– +3287. +[GRC + + 08] S. Gould, J. Rodgers, D. Cohen, G. Elidan, and +D. Koller, “Multi-class segmentation with relative +location prior,” International Journal of Computer +Vision , vol. 80, no. 3, pp. 300–316, Apr. 2008. +[GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.- +Z. Yang, “Probabilistic tracking of affine-invariant +anisotropic regions,” Pattern Analysis and Machine +Intelligence, IEEE Transactions on , vol. 35, no. 1, +pp. 130–143, 2013. +[Har75] J. A. Hartigan, Clustering algorithms. John Wiley +& Sons, Inc., 1975. +[HDT02] C. Huang, L. Davis, and J. Townshend, “An +assessment of support vector machines for land +cover classification,” International Journal of remote +sensing , vol. 23, no. 4, pp. 725–749, 2002. +[HHR01] S. Hu, E. Hoffman, and J. Reinhardt, “Automatic +lung segmentation for accurate quantitation of +volumetric x-ray ct images,” Medical Imaging, IEEE +13 +Transactions on , vol. 20, no. 6, pp. 490–498, Jun. +2001. +[HJBJ+ + 96] A. Hoover, G. Jean-Baptiste, X. Jiang, P. J. +Flynn, H. Bunke, D. B. Goldgof, K. Bowyer, +D. W. Eggert, A. Fitzgibbon, and R. B. +Fisher, “An experimental comparison of range +image segmentation algorithms,” Pattern Analysis +and Machine Intelligence, IEEE Transactions +on , vol. 18, no. 7, pp. 673–689, Jul. 1996. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=506791 +[Ho95] T. K. Ho, “Random decision forests,” in +Document Analysis and Recognition, 1995., +Proceedings of the Third International Conference +on , vol. 1. IEEE, 1995, pp. 278–282. +[Online]. Available: http://ect . bell-labs . com/who/ +tkh/publications/papers/odt. pdf +[Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia +Commons, Nov. 2007. [Online]. Avail- +able: https://commons . wikimedia . org/wiki/File: +CCTV_Lens_flare. jpg +[HZCP04] X. He, R. Zemel, and M. Carreira-Perpindn, +“Multiscale conditional random fields for image +labeling,” in Computer Vision and Pattern +Recognition, 2004. CVPR 2004. Proceedings +of the 2004 IEEE Computer Society Conference +on , vol. 2, Jun. 2004, pp. II–695–II–702 Vol.2. +[Online]. Available: http://ieeexplore . ieee . org/xpl/ +login . jsp?tp=&arnumber=1315232 +[JLD03] K. Jiang, Q.-M. Liao, and S.-Y. Dai, “A novel white +blood cell segmentation scheme using scale-space +filtering and watershed clustering,” in Machine +Learning and Cybernetics, 2003 International +Conference on , vol. 5, Nov 2003, pp. 2820–2825 +Vol.5. [Online]. Available: http://ieeexplore. ieee. org/ +xpl/login. jsp?tp=&arnumber=1260033 +[Kaf07] L. Kaffer, “File:great male leopard in south afrika- +jd.jpg,” Wikipedia Commons, Jul. 2007. [Online]. +Available: https://commons. wikimedia. org/wiki/File: +Great_male_Leopard_in_South_Afrika-JD . JPG +[KKV+ + 14] V. Kalesnykiene, J.-k. Kamarainen, R. Voutilainen, +J. Pietilä, H. Kälviäinen, and H. Uusitalo, +“Diaretdb1 diabetic retinopathy database and +evaluation protocol,” 2014. [Online]. Available: +http://www2 . it. lut. fi/project/imageret/diaretdb1/ +[KP92] J. M. Kasson and W. Plouffe, “An analysis of +selected computer interchange color spaces,” ACM +Transactions on Graphics (TOG), vol. 11, no. 4, pp. +373–405, 1992. +[KP06] Z. Kato and T.-C. Pong, “A markov random +field image segmentation model for color +textured images,” Image and Vision Computing , +vol. 24, no. 10, pp. 1103–1114, 2006. [Online]. +Available: http://www . sciencedirect . com/science/ +article/pii/S0262885606001223 +[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, +“Imagenet classification with deep convolutional +neural networks,” in Advances in neural information +processing systems , 2012, pp. 1097–1105. +[KWT88] M. Kass, A. Witkin, and D. Terzopoulos, +“Snakes: Active contour models,” International +journal of computer vision , vol. 1, no. 4, pp. +321–331, Jan. 1988. [Online]. Available: http: +//link . springer. com/article/10. 1007/BF00133570 +[LKJ15] F.-F. Li, A. Karpathy, and J. Johnson, +“CS231n: Convolutional neural networks for +visual recognition,” 2015. [Online]. Available: +http://cs231n . stanford. edu/ +[Low04] D. Lowe, “Distinctive image features from scale- invariant keypoints,” International Journal of +Computer Vision, vol. 60, no. 2, pp. 91–110, 2004. +[Online]. Available: http://dx . doi . org/10 . 1023/B% +3AVISI . 0000029664 . 99615. 94 +[LRAL08] A. Levin, A. Rav-Acha, and D. Lischinski, +“Spectral matting,” Pattern Analysis and +Machine Intelligence, IEEE Transactions on , +vol. 30, no. 10, pp. 1699–1712, 2008. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=4547428 +[LRKT09] L. Ladický, C. Russell, P. Kohli, and P. Torr, +“Associative hierarchical crfs for object class image +segmentation,” in Computer Vision, 2009 IEEE 12th +International Conference on , 2009, pp. 739–746. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=5459248 +[LSD14] + J. Long, E. Shelhamer, and T. Darrell, “Fully +convolutional networks for semantic segmentation,” +arXiv preprint arXiv:1411.4038 , 2014. [Online]. +Available: http://arxiv. org/abs/1411. 4038 +[MAFM08] M. Maire, P. Arbelaez, C. Fowlkes, and +J. Malik, “Using contours to detect and localize +junctions in natural images,” in Computer Vision +and Pattern Recognition, 2008. CVPR 2008. +IEEE Conference on , June 2008, pp. 1–8. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=4587420 +[Man12] M. Manske, “File:randabschattung mikroskop +kamera 6.jpg,” Wikipedia Com- +mons, Dec. 2012. [Online]. Avail- +able: https://commons . wikimedia . org/wiki/File: +Randabschattung_Mikroskop_Kamera_6. JPG +[MBLAGJ + + 07] S. Maldonado-Bascon, S. Lafuente-Arroyo, P. Gil- +Jimenez, H. Gomez-Moreno, and F. Lopez- +Ferreras, “Road-sign detection and recognition +based on support vector machines,” Intelligent +Transportation Systems, IEEE Transactions on , +vol. 8, no. 2, pp. 264–278, Jun. 2007. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=4220659 +[MBVLG02] N. Moon, E. Bullitt, K. Van Leemput, and G. Gerig, +“Automatic brain and tumor segmentation,” inMed- +ical Image Computing and Computer-Assisted In- +tervention—MICCAI 2002 . Springer, 2002, pp. +372–379. +[MFTM01] D. Martin, C. Fowlkes, D. Tal, and J. Malik, +“A database of human segmented natural +images and its application to evaluating +segmentation algorithms and measuring ecological +statistics,” in Computer Vision, 2001. ICCV +2001. Proceedings. Eighth IEEE International +Conference on , vol. 2. IEEE, 2001, pp. 416–423. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=937655 +[MHMK + + 14] L. Maier-Hein, S. Mersmann, D. Kondermann, +S. Bodenstedt, A. Sanchez, C. Stock, H. G. +Kenngott, M. Eisenmann, and S. Speidel, “Can +masses of non-experts train highly accurate +image classifiers?” in Medical Image Computing +and Computer-Assisted Intervention–MICCAI 2014. +Springer, 2014, pp. 438–445. [Online]. Available: +http://opencas. webarchiv. kit. edu/?q=node/26 +[Min89] J. Mingers, “An empirical comparison of selection +measures for decision-tree induction,” Machine +Learning , vol. 3, no. 4, pp. 319–342, 1989. +[Online]. Available: http://dx . doi . org/10 . 1023/A% +3A1022645801436 +[MSB12] G. Moser, S. B. Serpico, and J. A. Benediktsson, +“Markov random field models for supervised land +14 +cover classification from very high resolution +multispectral remote sensing images,” in Advances +in Radar and Remote Sensing (TyWRRS), 2012 +Tyrrhenian Workshop on . IEEE, 2012, pp. 235– +242. [Online]. Available: http://ieeexplore. ieee. org/ +xpl/login. jsp?tp=&arnumber=6381135 +[MSC] “Object class recognition image database.” +[Online]. Available: http://research . microsoft . com/ +vision/cambridge/recognition/ +[MSR] “Image understanding - research data,” +Microsoft Research. [Online]. Avail- +able: http://research . microsoft . com/en-us/projects/ +objectclassrecognition/ +[Mur12] K. P. Murphy, Machine learning: a probabilistic +perspective. MIT press, 2012. +[OKS78] Y.-i. Ohta, T. Kanade, and T. Sakai, “An analysis +system for scenes containing objects with substruc- +tures,” in Proceedings of the Fourth International +Joint Conference on Pattern Recognitions, 1978, pp. +752–754. +[PAA + + 87] S. M. Pizer, E. P. Amburn, J. D. Austin, +R. Cromartie, A. Geselowitz, T. Greer, B. ter +Haar Romeny, J. B. Zimmerman, and K. Zuiderveld, +“Adaptive histogram equalization and its variations,” +Computer vision, graphics, and image processing , +vol. 39, no. 3, pp. 355–368, 1987. [Online]. +Available: http://www . sciencedirect . com/science/ +article/pii/S0734189X8780186X +[PC13] P. H. Pinheiro and R. Collobert, “Recurrent +convolutional neural networks for scene parsing,” +arXiv preprint arXiv:1306.2795 , 2013. [Online]. +Available: http://arxiv. org/abs/1306. 2795v1 +[PH05] C. Pantofaru and M. Hebert, “A +comparison of image segmentation algorithms,” +Robotics Institute , p. 336, 2005. [Online]. +Available: http://riweb-backend . ri . cmu . edu/ +pub _files/pub4/pantofaru _caroline _ 2005 _1/ +pantofaru_caroline_2005_1 . pdf +[PS07] A. Protiere and G. Sapiro, “Interactive +image segmentation via adaptive weighted +distances,” Image Processing, IEEE Transactions +on , vol. 16, no. 4, pp. 1046–1057, 2007. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=4130436 +[PTN09] N. Plath, M. Toussaint, and S. Nakajima, “Multi- +class image segmentation using conditional random +fields and global classification,” in Proceedings +of the 26th Annual International Conference on +Machine Learning. ACM, 2009, pp. 817–824. +[PXP00] D. L. Pham, C. Xu, and J. L. Prince, “A +survey of current methods in medical image +segmentation,” Annual Review of Biomedical +Engineering , vol. 2, no. 1, pp. 315–337, 2000, +pMID: 11701515. [Online]. Available: http:// +dx. doi. org/10. 1146/annurev . bioeng . 2. 1. 315 +[Qui86] + J. R. Quinlan, “Induction of decision trees,” +Machine learning , vol. 1, no. 1, pp. 81–106, +Aug. 1986. [Online]. Available: http://dx . doi . org/ +10 . 1023/A%3A1022643204877 +[Qui93] ——, C4.5: Programs for Machine Learning, P. Lan- +gley, Ed. Morgan Kaufmann Publishers, Inc., 1993. +[RKB04] C. Rother, V. Kolmogorov, and A. Blake, “Grabcut: +Interactive foreground extraction using iterated +graph cuts,” ACM Transactions on Graphics +(TOG), vol. 23, no. 3, pp. 309–314, 2004. [Online]. +Available: http://delivery. acm. org/10. 1145/1020000/ +1015720/p309- rother. pdf +[RM00] J. B. Roerdink and A. Meijster, “The watershed +transform: Definitions, algorithms and paralleliza- tion strategies,” Fundam. Inform. , vol. 41, no. 1-2, +pp. 187–228, 2000. +[RM07] J. Reynolds and K. Murphy, “Figure-ground +segmentation using a hierarchical conditional +random field,” in Computer and Robot +Vision, 2007. CRV ’07. Fourth Canadian +Conference on , May 2007, pp. 175–182. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=4228537 +[RMBK06] + C. Rother, T. Minka, A. Blake, and V. Kolmogorov, +“Cosegmentation of image pairs by histogram +matching - incorporating a global constraint +into mrfs,” in Computer Vision and Pattern +Recognition, 2006 IEEE Computer Society +Conference on , vol. 1, June 2006, pp. 993– +1000. [Online]. Available: http://ieeexplore. ieee. org/ +xpls/abs_all. jsp?arnumber=1640859 +[SAN + + 04] J. Staal, M. D. Abràmoff, M. Niemeijer, +M. Viergever, B. Van Ginneken et al., “Ridge-based +vessel segmentation in color images of the retina,” +Medical Imaging, IEEE Transactions on , vol. 23, +no. 4, pp. 501–509, 2004. [Online]. Available: +http://www . isi . uu. nl/Research/Databases/DRIVE/ +[SCZ08] F. Schroff, A. Criminisi, and A. Zisserman, +“Object class segmentation using random +forests.” in BMVC , 2008, pp. 1–10. [On- +line]. Available: http://research. microsoft. com/pubs/ +72423/Criminisi_bmvc2008. pdf +[SJC08] + J. Shotton, M. Johnson, and R. Cipolla, +“Semantic texton forests for image categorization +and segmentation,” in Computer vision and +pattern recognition, 2008. CVPR 2008. IEEE +Conference on . IEEE, Jun. 2008, pp. 1–8. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=4587503 +[SM11] C. Sutton and A. McCallum, “An introduction +to conditional random fields,” Machine Learning , +vol. 4, no. 4, pp. 267–373, 2011. [Online]. +Available: http://homepages . inf . ed . ac . uk/csutton/ +publications/crftutv2 . pdf +[Smi02] L. I. Smith, “A tutorial on principal components +analysis,” Cornell University, USA , vol. 51, p. 52, +2002. +[Smi04] B. T. Smith, “Lagrange multipliers tutorial in the +context of support vector machines,” Memorial Uni- +versity of Newfoundland St. John’s, Newfoundland, +Canada , Jun. 2004. +[SSA12] D. Schiebener, J. Schill, and T. Asfour, “Discovery, +segmentation and reactive grasping of unknown +objects.” in Humanoids , 2012, pp. 71–77. [On- +line]. Available: http://h2t . anthropomatik . kit . edu/ +pdf/Schiebener2012. pdf +[SUM+ + 11] D. Schiebener, A. Ude, J. Morimotot, +T. Asfour, and R. Dillmann, “Segmentation +and learning of unknown objects through physical +interaction,” in Humanoid Robots (Humanoids), +2011 11th IEEE-RAS International Conference +on . IEEE, 2011, pp. 500–506. [Online]. +Available: http://ieeexplore. ieee. org/ielx5/6086637/ +6100798/06100843 . pdf +[SWRC06] J. Shotton, J. Winn, C. Rother, and A. Criminisi, +“Textonboost: Joint appearance, shape and context +modeling for multi-class object recognition and +segmentation,” in Computer Vision–ECCV 2006 . +Springer, 2006, pp. 1–15. [Online]. Available: http: +//link . springer. com/chapter/10 . 1007/11744023_1 +[TNL14] J. Tighe, M. Niethammer, and S. Lazebnik, +“Scene parsing with object instances and +occlusion ordering,” in Computer Vision and +15 +Pattern Recognition (CVPR), 2014 IEEE +Conference on . IEEE, 2014, pp. 3748–3755. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=6909874 +[UPH05] R. Unnikrishnan, C. Pantofaru, and M. Hebert, +“A measure for objective evaluation of +image segmentation algorithms,” in Computer +Vision and Pattern Recognition-Workshops, 2005. +CVPR Workshops. IEEE Computer Society +Conference on . IEEE, 2005, pp. 34–34. +[Online]. Available: http://repository . cmu . edu/cgi/ +viewcontent. cgi?article=1365&context=robotics +[vdMPvdH09] L. J. van der Maaten, E. O. Postma, and H. J. +van den Herik, “Dimensionality reduction: A com- +parative review,” Journal of Machine Learning +Research, vol. 10, no. 1-41, pp. 66–71, 2009. +[VOC10] “Voc2010 preliminary results,” 2010. [Online]. +Available: http://host . robots . ox . ac . uk/pascal/VOC/ +voc2010/results/index. html +[WAH97] G.-Q. Wei, K. Arbter, and G. Hirzinger, “Automatic +tracking of laparoscopic instruments by color +coding,” in CVRMed-MRCAS’97 , ser. Lecture +Notes in Computer Science, J. Troccaz, E. Grimson, +and R. Mösges, Eds. Springer Berlin Heidelberg, +1997, vol. 1205, pp. 357–366. [Online]. Available: +http://dx . doi . org/10. 1007/BFb0029257 +[YBCK10] Z. Yin, R. Bise, M. Chen, and T. Kanade, “Cell +segmentation in microscopy imagery using a +bag of local bayesian classifiers,” in Biomedical +Imaging: From Nano to Macro, 2010 IEEE +International Symposium on , Apr. 2010, pp. 125– +128. [Online]. Available: http://ieeexplore. ieee. org/ +xpls/abs_all. jsp?arnumber=5490399 +[YHRF12] Y. Yang, S. Hallman, D. Ramanan, and +C. C. Fowlkes, “Layered object models for +image segmentation,” Pattern Analysis and +Machine Intelligence, IEEE Transactions on , +vol. 34, no. 9, pp. 1731–1743, Sep. 2012. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=6042883 +[ZBS01]Y. Zhang, M. Brady, and S. Smith, “Segmentation +of brain MR images through a hidden Markov +random field model and the expectation- +maximization algorithm,” Medical Imaging, IEEE +Transactions on , vol. 20, no. 1, pp. 45–57, 2001. +[Online]. Available: http://ieeexplore . ieee . org/xpls/ +abs_all. jsp?arnumber=906424 +[ZGWX05] + S.-C. Zhu, C.-E. Guo, Y. Wang, and Z. Xu, “What +are textons?” International Journal of Computer +Vision , vol. 62, no. 1-2, pp. 121–143, 2005. +[Zha12] + Z. Zhang, “Microsoft kinect sensor and its effect,” +MultiMedia, IEEE , vol. 19, no. 2, pp. 4–10, Feb. +2012. +[ZJRP+ + 15] S. Zheng, S. Jayasumana, B. Romera-Paredes, +V. Vineet, Z. Su, D. Du, C. Huang, and +P. H. Torr, “Conditional random fields as +recurrent neural networks,” in Proceedings +of the IEEE International Conference on +Computer Vision , 2015, pp. 1529–1537. [Online]. +Available: http://www . robots . ox . ac . uk/~szheng/ +papers/CRFasRNN. pdf G LOSSARY +ACM active contour model. 6 +BOV bag-of-visual-words. 5 +CNN Convolution Neuronal Network. 5, 9 +CRF Conditional Random Field. 4, 8, 9, 11 +GPU graphics processing unit. 3 +HOG histogram of oriented gradients. 5, 6, 8 +ILSVRC ImageNet Large-Scale Visual Recognition +Challenge. 9 +MAP Maximum A Posteriori. 8 +MR magnetic resonance. 2, 6 +MRF Markov Random Field. 4, 8 +PCA principal component analysis. 5 +RBF radial basis function. 8 +SIFT scale-invariant feature transform. 5 +SVM Support Vector Machine. 4, 6–8 +16 +A PPENDIX A +TABLES +Database Image Resolution (width × height) Number +of +Images Number +of +Classes Channels Data source +Colon Crypt DB (302 px − 1116 px ) × (349 px − 875 px) 389 2 3 [CRSS] +DIARETDB1 1500 px × 1500 px 89 4 3 [KKV+ + 14] +KITTI Road (1226 px − 1242 px ) × (370 px − 376 px) 289 2 3 [FKG13] +MSRCv1 (213 px − 320 px) × (213 px − 320 px) 240 9 3 [MSR] +MSRCv2 (213 px − 320 px) × (162 px − 320 px) 591 23 3 [MSR] +Open-CAS Endoscopic Datasets 640 px × 480 px 120 2 3 [MHMK + + 14] +PASCAL VOC 2012 (142 px − 500 px) × ( 71 px − 500 px) 2913 20 3 [EVGW + + 12] +Warwick-QU (567 px − 775 px) × (430 px − 522 px) 165 5 3 [CSM09] +Table I: An overview over publicly available image databases with a semantic segmentation ground trouth. diff --git a/read/results/playa/1707.09725.txt b/read/results/playa/1707.09725.txt new file mode 100644 index 0000000..8a18faf --- /dev/null +++ b/read/results/playa/1707.09725.txt @@ -0,0 +1,4293 @@ +Analysis and Optimization of +Convolutional Neural Network +Architectures +Master Thesis of +Mar tin Thoma +Depar tment of Computer Science +Institute for Anthropomatics +and +FZI Research Center for Information Technology +Reviewer: Prof. Dr.–Ing. R. Dillmann +Second reviewer: Prof. Dr.–Ing. J. M. Zöllner +Advisor: Dipl.–Inform. Michael Weber +Research Period: 03. May 2017 – 03. August 2017 +KIT – University of the State of Baden-Wuerttemberg and National Research Center of the Helmholtz Association + www.kit.eduarXiv:1707.09725v1 [cs.CV] 31 Jul 2017 + +Analysis and Optimization of Convolutional Neural +Network Architectures +by +Mar tin Thoma +Master Thesis +August 2017 +Master Thesis, FZI +Department of Computer Science, 2017 +Gutachter: Prof. Dr.–Ing. R. Dillmann, Prof. Dr.–Ing. J. M. Zöllner +Abteilung Technisch Kognitive Assistenzsysteme +FZI Research Center for Information Technology +Affirmation +Ich versichere wahrheitsgemäß, die Arbeit selbstständig angefertigt, alle benutzten Hilfs- +mittel vollständig und genau angegeben und alles kenntlich gemacht zu haben, was aus +Arbeiten anderer unverändert oder mit Abänderungen entnommen wurde. +Karlsruhe, Martin Thoma +August 2017 + v + +Abstract +Convolutional Neural Networks (CNNs) dominate various computer vision tasks since +Alex Krizhevsky showed that they can be trained effectively and reduced the top-5 error +from 26.2 % to 15. 3 % on the ImageNet large scale visual recognition challenge. Many +aspects of CNNs are examined in various publications, but literature about the analysis +and construction of neural network architectures is rare. This work is one step to close this +gap. A comprehensive overview over existing techniques for CNN analysis and topology +construction is provided. A novel way to visualize classification errors with confusion +matrices was developed. Based on this method, hierarchical classifiers are described and +evaluated. Additionally, some results are confirmed and quantified for CIFAR-100. For +example, the positive impact of smaller batch sizes, averaging ensembles, data augmentation +and test-time transformations on the accuracy. Other results, such as the positive impact of +learned color transformation on the test accuracy could not be confirmed. A model which +has only one million learned parameters for an input size of32 × 32 × 3 and 100 classes and +which beats the state of the art on the benchmark dataset Asirra, GTSRB, HASYv2 and +STL-10 was developed. + vii +Zusammenfassung +Modelle welche auf Convolutional Neural Networks (CNNs) basieren sind in verschiedenen +Aufgaben der Computer Vision dominant seit Alex Krizhevsky gezeigt hat dass diese +effektiv trainiert werden können und er den Top-5 Fehler in dem ImageNet large scale visual +recognition challenge Benchmark von 26 .2 % auf 15.3 % drücken konnte. Viele Aspekte +von CNNs wurden in verschiedenen Publikationen untersucht, aber es wurden vergleich- +sweise wenige Arbeiten über die Analyse und die Konstruktion von Neuronalen Netzen +geschrieben. Diese Masterarbeit stellt einen Schritt dar um diese Lücke zu schließen. Eine +umfassende Überblick über Analyseverfahren und Topologielernverfahren wird gegeben. Ein +neues Verfahren zur Visualisierung der Klassifikationsfehler mit Konfusionsmatrizen wurde +entwickelt. Basierend auf diesem Verfahren wurden hierarchische Klassifizierer eingeführt +und evaluiert. Zusätzlich wurden einige bereits in der Literatur beschriebene Beobachtun- +gen wie z.B. der positive Einfluss von kleinen Batch-Größen, Ensembles, Erhöhung der +Trainingsdatenmenge durch künstliche Transformationen (Data Augmentation) und die In- +varianzbildung durch künstliche Transformationen zur Test-Zeit (Test-time transformations) +experimentell bestätigt. Andere Beobachtungen, wie beispielsweise der positive Einfluss +gelernter Farbraumtransformationen konnten nicht bestätigt werden. Ein Modell welches +weniger als eine Millionen Parameter nutzt und auf den Benchmark-Datensätzen Asirra, +GTSRB, HASYv2 und STL-10 den Stand der Technik neu definiert wurde entwickelt. +Acknowledgment +I would like to thank Stephan Gocht and Marvin Teichmann for the many inspiring +conversations we had about various topics, including machine learning. +I also want to thank my father for the support he gave me. He made it possible for me to +study without having to worry about anything besides my studies. Thank you! +Finally, I want to thank Timothy Gebhard, Daniel Schütz and Yang Zhang for proof-reading +my masters thesis and Stephan Gocht for giving me access to a GTX 1070. + ix +This work can be cited the following way: +@MastersThesis{Thoma:2017, +Title = {Analysis and Optimization of Convolutional Neural Network +Architectures}, +Author = {Martin Thoma}, +School = {Karlsruhe Institute of Technology}, +Year = {2017}, +Address = {Karlsruhe, Germany}, +Month = jun, +Type = {Masters’s Thesis}, +Keywords = {machine learning; artificial neural networks; +classification; supervised learning; CNNs}, +Url = {https://martin-thoma.com/msthesis/} +} +A DVD with a digital version of this master thesis and the source code as well as the used +data is part of this work. +Contents +1 Introduction 1 +2 Convolutional Neural Networks 3 +2.1 Linear Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3 +2.2 CNN Layer Types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4 +2.2.1 Convolutional Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . 5 +2.2.2 Pooling Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 7 +2.2.3 Dropout . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9 +2.2.4 Normalization Layers . . . . . . . . . . . . . . . . . . . . . . . . . . 9 +2.3 CNN Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 +2.3.1 Residual Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 +2.3.2 Aggregation Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 +2.3.3 Dense Blocks . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13 +2.4 Transition Layers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14 +2.5 Analysis Techniques . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 15 +2.5.1 Qualitative Analysis by Example . . . . . . . . . . . . . . . . . . . . 15 +2.5.2 Confusion Matrices . . . . . . . . . . . . . . . . . . . . . . . . . . . 16 +2.5.3 Validation Curves: Accuracy, loss and other metrics . . . . . . . . . 16 +2.5.4 Learning Curves . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20 +2.5.5 Input-feature based model explanations . . . . . . . . . . . . . . . . 21 +2.5.6 Argmax Method . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22 +2.5.7 Feature Map Reconstructions . . . . . . . . . . . . . . . . . . . . . . 22 +2.5.8 Filter comparison . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 23 +2.5.9 Weight update tracking . . . . . . . . . . . . . . . . . . . . . . . . . 23 +2.6 Accuracy boosting techniques . . . . . . . . . . . . . . . . . . . . . . . . . . 24 +3 Topology Learning 27 +3.1 Growing approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 27 +3.1.1 Cascade-Correlation . . . . . . . . . . . . . . . . . . . . . . . . . . . 27 +3.1.2 Meiosis Networks . . . . . . . . . . . . . . . . . . . . . . . . . . . . 28 +3.1.3 Automatic Structure Optimization . . . . . . . . . . . . . . . . . . . . 29 +3.2 Pruning approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 29 +3.3 Genetic approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30 +3.4 Reinforcement Learning . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30 +xi +3.5 Convolutional Neural Fabrics . . . . . . . . . . . . . . . . . . . . . . . . . . 31 +4 Hierarchical Classification 33 +4.1 Advantages of classifier hierarchies . . . . . . . . . . . . . . . . . . . . . . 34 +4.2 Clustering classes . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34 +5 Experimental Evaluation 37 +5.1 Baseline Model and Training setup . . . . . . . . . . . . . . . . . . . . . . . 38 +5.1.1 Baseline Evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . 40 +5.1.2 Weight distribution . . . . . . . . . . . . . . . . . . . . . . . . . . . . 41 +5.1.3 Training behavior . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 45 +5.2 Confusion Matrix Ordering . . . . . . . . . . . . . . . . . . . . . . . . . . . . 48 +5.3 Spectral Clustering vs CMO . . . . . . . . . . . . . . . . . . . . . . . . . . . 51 +5.4 Hierarchy of Classifiers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 53 +5.5 Increased width for faster learning . . . . . . . . . . . . . . . . . . . . . . . 54 +5.6 Weight updates . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 55 +5.7 Multiple narrow layers vs One wide layer . . . . . . . . . . . . . . . . . . . . 56 +5.8 Batch Normalization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 57 +5.9 Batch size . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59 +5.10 Bias . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59 +5.11 Learned Color Space Transformation . . . . . . . . . . . . . . . . . . . . . . 60 +5.12 Pooling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 60 +5.13 Activation Functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 60 +5.14 Label smoothing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 64 +5.15 Optimized Classifier . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66 +5.16 Early Stopping vs More Data . . . . . . . . . . . . . . . . . . . . . . . . . . 68 +5.17 Regularization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 68 +6 Conclusion and Outlook 71 +A Figures, Tables and Algorithms 75 +B Hyperparameters 79 +B.1 Preprocessing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 79 +B.2 Data augmentation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 80 +B.3 Initialization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 81 +B.4 Objective function . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 81 +B.5 Optimization Techniques . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 82 +B.6 Network Design . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 84 +B.7 Regularization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 85 +C Calculating Network Characteristics 87 +C.1 Parameter Numbers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87 +C.2 FLOPs . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87 +C.3 Memory Footprint . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 88 +D Common Architectures 89 +D.1 LeNet-5 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90 +D.2 AlexNet . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 +D.3 VGG-16 D . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92 +D.4 GoogleNet, Inception v2 and v3 . . . . . . . . . . . . . . . . . . . . . . . . . 94 +D.5 Inception-v4 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 95 +E Datasets 97 +F List of Tables 99 +G List of Figures 101 +H Bibliography 103 +I Glossary 119 + +1. Introduction +Computer vision is the academic field which aims to gain a high-level understanding of the +low-level information given by raw pixels from digital images. +Robots, search engines, self-driving cars, surveillance agencies and many others have +applications which include one of the following six problems in computer vision as sub- +problems: +• Classification:1 + The algorithm is given an image and k possible classes. The task is +to decide which of the k classes the image belongs to. For example, an image from +a self-driving cars on-board camera contains either paved road , unpaved road or +no road : Which of those given three classes is in the image? +• Localization: The algorithm is given an image and one class k . The task is to find +bounding boxes for all instances of k . +• Detection: Given an image and k classes, find bounding boxes for all instances of +those classes. +• Semantic Segmentation : Given an image and k classes, classify each pixel. +• Instance segmentation: Given an image and k classes, classify each pixel as one of +the k classes, but distinguish different instances of the classes. +• Content-based Image Retrieval : Given an image x and n images in a database, +find the top u images which are most similar to x . +There are many techniques to approach those problems, but since AlexNet [ KSH12] was +published, all of those problems have high-quality solutions which make use of Convolutional +Neural Networks (CNNs) [HZRS15a, LAE + + 16, RFB15, DHS16, SKP15]. +Today, most neural networks are constructed by rules of thumb and gut feeling. The +architectures evolved and got deeper, more hyperparameters were added. Although there +are methods for analyzing CNNs, those methods are not enough to determine all steps in +the development of network architectures without gut feeling. A detailed introduction to +CNNs as well as nine methods for analysis of CNNs is given in Chapter 2. +1 + Classification is also called identification if the classes are humans. Another name is object recognition, +although the classes can be humans and animals as well. + 1 +1. Introduction +Despite the fact that most researchers and developers do not use topology learning, a couple +of algorithms have been proposed for this task. Five classes of topology learning algorithms +are introduced in Chapter 3. +When datasets and the number of classes are large, evaluating a single idea how to improve +the network can take several weeks just for the training. Hence the idea of building a +hierarchy of classifiers which allows to split the classification task into various sub-tasks +that can easily be combined is evaluated in Chapter 4. +Confusion Matrix Ordering (CMO), the hierarchical classifier, 9 types of hyperparameters +and label smoothing are evaluated in Chapter 5. +This work focuses on classification problems to keep the presented ideas as pure and +simple as possible. The described techniques are relevant to all six described computer +vision problems due to the fact that Encoder-Decoder architectures are one component of +state-of-the-art algorithms for all six of them. +2 +2. Convolutional Neural Networks +In the following, it is assumed that the reader knows what a multilayer perceptron (MLP) +is and how they are designed for classification problems, what activation functions are and +how gradient descent works. In case the reader needs a refresher on any of those topics, I +recommend chapter 4.3 and 4.4 of [Tho14a] as well as [LBH15]. +This chapter introduces linear image filters in Section 2.1, then standard layer types of +CNNs are explained in Section 2.2. The layer block pattern is described in Section 2.3, +transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5. +2.1. Linear Image Filters +A linear image filter (also called a filter bank or a kernel ) is an element F ∈ R k + w × k +h × d + , +where k + w represents the filter’s width, k +h the filter’s height and d the number of input +channels. The filter F is convolved with the image I ∈ R w × h× d + to produce a new image I + . +The output image I + has only one channel. Each pixel I + ( x, y ) of the output image gets +calculated by point-wise multiplication of one filter element with one element of the original +image I : + I + (x, y ) = k +w +2 + +i + x =1− k +w +2 k +h +2 + +i + y =1− k + h +2 d + +i + c =1 I ( x + i +x , y + i +y , i +c ) · F ( i +x , i +y , i +c ) +This procedure is explained by Figure 2.1. It is essentially a discrete convolution.I ∈ R 7 ×7 + Filter kernel +F ∈ R3× 3 Result of point-wise +multiplication I + ∈ R 7 ×7 +104 116 116 112 58 47 47 +109 97 114 116 105 110 45 +116 104 111 109 97 46 100 +101 47 109 97 115 116 101 +114 47 99 97 116 99 97 +116 99 97 116 46 112 104 +112 63 118 61 49 46 48 9 -3 -1 +-6 5 3 +2 -8 0 936 -333 -109 +-282 545 291 +94 -792 0 -4 -254 -498 -662 -849 -642 187 +-520 45 240 211 388 215 -861 +-340 559 -105 185 -138 -180 503 +-718 429 350 173 251 268 -655 +-567 -53 -75 80 571 -128 24 +-408 596 -550 368 26 976 156 +302 647 879 223 811 54 660 +Figure 2.1.: Visualization of the application of a linear k × k × 1 image filter. For each pixel of the +output image, k 2 + multiplications and k 2 + additions of the products have to be calculated. +3 +2. Convolutional Neural Networks +One important detail is how boundaries are treated. There are four common ways of +boundary treatment: +• don’t compute: The image I + will be smaller than the original image. I + ∈ +R (w − k +w +1)× (h− k +h +1)× d +3 + , to be exact. +• zero padding + : The image I is padded by zeros where the filter would access elements +which do not exist. This will result in edges being detected at the border if the border +pixels are not black, but doesn’t need any computation. +• nearest: Repeat the pixel which is closest to the boundary. +• reflect: Reflect the image at the boundaries. +Common tasks that can be done with linear filters include edge detection, corner detection, +smoothing, sharpening, median filtering, box filtering. See Figure A.1 for five examples. +Please note that the result of a filtering operation is again an image. This means filters +can be applied successively. While each pixel after one filtering operation with a 3 × 3 +filter got influenced by 3 · 3 = 9 pixels of the original image, two successively applied 3 × 3 +filters increase the area of the original image which influenced the output. The output is +then influenced by 25 pixel. This is called the receptive field. The kind of pattern which is +detected by a filter is called a feature . The bigger the receptive field is, the more complex +can features get as they are able to consider more of the original image. Instead of taking +one 5 × 5 filter with 25 parameters, one might consider to take two successive 3 × 3 filters +with 2 · (3 · 3) = 18 parameters. The 5 × 5 filter is a strict superset of possible filtering +operations compared to the two 3 × 3 filters, but the relevance of this technique will become +clear in Section 2.2. +2.2. CNN Layer Types +While the idea behind deep MLPs is that feature hierarchies capture the important parts +of the input more easily, CNNs are inspired by the idea of translational invariance : Many +features in an image are translationally invariant. For example, if a car is developed, one +could try to detect it by its parts [FGMR10 ]. But then there are many positions at which +the wheels could be. Combining those, it is desirable to capture low-level, translationally +invariant features at lower layers of an artificial neural network (ANN) and in higher layers +high-level features which are combinations of the low-level features. +Also, models should utilize the fact that the pixels of images are ordered. One way to use +this is by learning image filters in so called convolutional layers . +While MLPs vectorize the input, the input of a layer in a CNN arefeature maps. A feature +map is a matrix m ∈ R w ×h + , but typically the width equals the height (w = h). For an RGB +4 +2.2. CNN Layer Types +input image, the number of feature maps is d = 3. Each color channel is a feature map. +Since AlexNet [ KSH12] almost halved the error in the ImageNet challenge, CNNs are +state-of-the-art in various computer vision tasks. +Traditional CNNs have three important building tools: +• Convolutional layers with a non-linear activation function as described in Section 2.2.1, +• pooling layers as described in Section 2.2.2 and +• normalization layers as described in Section 2.2.4. +2.2.1. Convolutional Layers +Convolutional layers take several feature maps as input and produce n feature maps 1 + as +output, where n is the number of filters in the convolution layer. The filter weights of +the linear convolutions are the parameters which are adapted to the training data. The +number n of filters as well as the filter’s size k +w × k +h are hyperparameters of convolutional +layers. Sometimes, it is denoted as n @k +w × k + h . Although the filter depth is usually omitted +in the notation, the filters are of dimensionk + w × k + h × d(i − 1) + , where d(i − 1) + is the number of +feature maps of the input layer ( i − 1). +Another hyperparameter of convolution layers is the stride s ∈ N + ≥ 1 and the padding. +Padding (usually zero-padding [SCL12, SEZ+ + 13, HZRS15a]) is used to make sure that the +size of the feature maps doesn’t change. +The hyperparameters of convolutional layers are +• the number of filters n ∈ N +≥ 1 , +• k + w , k +h ∈ N + ≥1 of the filter size k +w × k +h × d( i −1) + , +• the activation function of the layer (see Table B.3) and +• the stride s ∈ N + ≥ 1 +Typical choices are n ∈ { 32, 64, 128 }, k +w = k +h = k ∈ { 1 , 3 , 5 , 11 } such as in [ KSH12, +SZ14, SLJ + + 15], rectified linear unit (ReLU) activation and s = 1. +The concept of weight sharing is crucial for CNNs. This concept was introduced in [WHH + + 89]. +With weight sharing, the filters can be learned with stochastic gradient descent (SGD) just +like MLPs. In fact, every CNN has an equivalent MLP which computes the same function +if only the flattened output is compared. +1 + also called activation maps or channels + 5 +2. Convolutional Neural Networks +This is easier to see when the filtering operation is denoted formally: +o (i ) + (x ) = b + k + +j =1 w + ij · x + j with i ∈ { 1 , . . . , w } × { 1, . . . , h } × { 1, . . . , d } [2.1] +o (x,y,z ) + (I ) = b + k +w +2 + +i + x =1− k +w +2 k + h +2 + +i + y =1− k +h +2 d + +i + c =1 F +z (i + x , i +y , i +c ) · I (x + i + x , y + i + y , i +c ) [2.2] +with a bias b ∈ R , x ∈ { 1, . . . , w } , y ∈ { 1, . . . , h } and z ∈ { 1, . . . , d } +One can see that most weights of the equivalent MLP are zero and many weights are +equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters. +The effect of fewer parameters is that less training data is necessary to get suitable +estimations for those. This means a MLP which is able to compute the same functions as a +CNN will likely have worse results on the same dataset, if a CNN architecture is suitable +for the dataset. +See Figure 2.2 for a visualization of the application of a convolutional layer. +3 feature maps +(e.g. RGB) n feature mapsn filters of +size k × k × 3 +width w width wheight h height hneural +network +data apply + . . . +. . . +. . .. . . +. . . +. . . +Figure 2.2.: Application of a single convolutional layer with n filters of size k × k × 3 with stride +s = 1 to input data of size width × height with three channels. +6 +2.2. CNN Layer Types +A convolutional layer with n filters of size k +w × k +h and SAME padding after d(i − 1) + feature +maps of size s + x × s + y has n · d(i − 1) + · (k +w · k +h ) parameters if no bias is used. In contrast, a fully +connected layer which produces the same output size and does not use a bias would have +n · d( i −1) + · ( s +x × s +y ) 2 + parameters. This means a convolutional layer has drastically fewer +parameters. One the one hand, this means it can learn less complex decision boundaries. On +the other hand, it means fewer parameters have to be learned and hence the optimization +procedure needs fewer examples and the optimization ob jective is simpler. +It is particularly interesting to notice that even a convolutional layer of 1 × 1 filters does +learn a linear combination of the d input feature maps. This can be used for dimensionality +reduction, if there are fewer 1 × 1 filters in a convolutional layer than input feature maps. +Another insight recently got important: Every fully connected layer has an equivalent +convolutional layer which has the same weights. 2 + This way, one can use the complete +classification network as a very complex non-linear image filter which can be used for +semantic segmentation. +A fully connected layer with d ∈ N + ≥1 inputs and n ∈ N + ≥1 nodes can be interpreted as a +convolutional layer with an input of shape 1 × 1 × d and n filters of size 1 × 1 . This will +produce an output shape 1 × 1 × n . Every single output is connected to all of the inputs. +When a convolutional layer is followed by a fully connected layer, it is necessary to vectorize +to feature maps. If the 1 × 1 convolutional filter layer is applied to the vectorized output, +it is completely equivalent to a fully connected layer. However, the vectorization can be +omitted if a convolution layer without padding and a filter size equal to the feature maps +size is applied. This was used by [LSD15]. +2.2.2. Pooling Layers +Pooling summarizes a p × p area of the input feature map. Just like convolutional layers, +pooling can be used with a stride of s ∈ N + >1 . As s ≥ 2 is the usual choice, pooling layers +are sometimes also called subsampling layers. Typically, p ∈ { 2, 3, 4, 5 } and s = 2 such as +for AlexNet [KSH12] and VGG-16 [SZ14]. +The type of summary for the set of activations A varies between the functions listed +in Table 2.1, spatial pyramid pooling as introduced in [ HZRS14] and generalizing pooling +functions as introduced in [LGT16]. +2 + But convolutional layers only have equivalent fully connected layers if the output feature map is 1 × 1 +7 +2. Convolutional Neural Networks +Name Definition Used by +Max pooling max { a ∈ A } [BPL10, KSH12] +Average / mean pooling 1 +| A | +a∈ A a LeNet-5 [LBBH98] and [KSlB + + 10] + +2 pooling + + a∈A a 2 + [Le13] +Stochastic pooling * [ZF13] +Table 2.1.: Pooling types for a set A of activations a ∈ R . +(*) For stochastic pooling, each of thep × p activation values a +i in the pooling region gets +picked with probability p +i = a +i + +a + j ∈ A a +j . This assumes the activations a +i are non-negative. +Pooling is applied for three reasons: To get local translational invariance, to get invariance +against minor local changes and, most important, for data reduction to1 +s 2 th of the data by +using strides of s > 1 . +See Figure 2.3 for a visualization of max pooling. +7 9 3 5 9 40 7 0 0 9 05 0 9 3 7 59 2 9 6 4 3 + 2 × 2 max pooling + 9 5 99 9 72 2 +Figure 2.3.: 2 × 2 max pooling applied to a feature map of size6 × 4 with stride s = 2 and padding. +Average pooling of p × p areas with stride s can be replaced by a convolutional layer. If +the input of the pooling layer are d(i −1) + feature maps, the convolutional layer has to have +d(i −1) + filters of size p × p and stride s . The i th filter has the values + + + + 1 +p 2 . . . 1 +p 2 +. +. +. . + . + . . +. +. +1 +p 2 . . . 1 +p 2  + + + +for the dimension i and the zero matrix + + + + 0 . . . 0 +. +. +. . + . + . . +. +. +0 . . . 0 + + + +for all other dimensions i = 1, . . . , d ( i −1) + . +8 +2.2. CNN Layer Types +2.2.3. Dropout +Dropout is a technique used to prevent overfitting and co-adaptations of neurons by setting +the output of any neuron to zero with probabilityp. It was introduced in [HSK+ + 12] and is +well-described in [SHK + + 14]. +A Dropout layer can be implemented as follows: For an inputin of any shape s, a tensor of +the same shape D ∈ { 0, 1 }s + is sampled, where each element d +i is sampled independently +from a Bernoulli distribution. The results are element-wise multiplied to calculate the +output out of the Dropout layer: +out = D in with d +i ∼ B (1, p) +where is the Hadamard product +(A B ) + i,j := (A) + i,j ( B ) +i,j +Hence every value of the input gets set to zero with a dropout probability of p. Typically, +Dropout is used with p = 0. 5. Layers closer to the input usually have a lower dropout prob- +ability than later layers. In order to keep the expected output at the same value, the +output of a dropout layer is multiplied with 1 +1− p when dropout is enabled [ Las17, tf-16b]. +At inference time, dropout is disabled. +Dropout is usually only applied after fully connected layers, but not after convolutional +layers as it usually increases the test error as pointed out in [GG16]. +Models which use Dropout can be interpreted as an ensemble of models with different +numbers of neurons in each layer, but also with weight sharing. +Conceptually similar are DropConnect and networks with stochastic depth. DropCon- +nect [ WZZ+ + 13] is a generalization of Dropout, which sets weights to zero in contrast to +setting the output of a neuron to zero. Networks with stochastic depth as introduced +in [HSL+ + 16] dropout only complete layers. This can be done by having Residual networks +which have one identity connection and one residual feature connection. Hence the residual +features can be dropped out and the identity connection remains. +2.2.4. Normalization Layers +One problem when training deep neural networks is internal covariate shift : While the +parameters of layers close to the output are adapted to some input produced by lower layers, +those lower layers parameters are also adapted. This leads to the parameters in the upper +layers being worse. A very low learning rate has to be chosen to adjust for the fact that the +input features might drastically change over time. + 9 +2. Convolutional Neural Networks +One way to approach this problem is by normalizing mini-batches as described in [IS15]. A +Batch Normalization layer with d-dimensional input x = (x (1) + , . . . , x ( d) + ) is first normalized +point-wise to + ˆx( k ) + = x (k ) + − ¯x (k ) + +s + [ x (k ) + ]2 + + ε +with ¯x (k ) + = 1 +m +m +i =1 x (k ) +i being the sample mean and s + [ x ( k ) + ] 2 + = 1 +m +m +i =1 ( x (k ) +i − ¯x (k ) + ) the +sample variance where m ∈ N + ≥ 1 is the number of training samples per mini-batch, ε > 0 +being a small constant to prevent division by zero andx( k ) +i is the activation of neuron k for +training sample i. +Additionally, for each activation x (k ) + two parameters γ (k ) + , β (k ) + are introduced which scale +and shift the feature: + y (k ) + = γ ( k ) + · ˆx (k ) + + β (k ) +In the case of fully connected layers, this is applied to the activation, before the non-linearity +is applied. If it is applied after the activation, it harms the training in early stages. For +convolution, only one γ and one β is learned per feature map. +One important special case is γ (k ) + = +s + [x (k ) + ] 2 + + ε and β (k ) + = ¯x ( k ) + , which would make the +Batch Normalization layer an identity layer. +During evaluation time, 3 + the expected value and the variance are calculated once for the +complete dataset. An unbiased estimate of the empirical variance is used. +The question where Batch Normalization layers (BN) should be applied and for which +reasons is still open. For Dropout, it doesn’t matter if it is applied before or after the +activation function. Considering this, the possible options for the order are: +1.CONV / FC → BN → activation function → Dropout → . . . +2.CONV / FC → activation function → BN → Dropout → . . . +3.CONV / FC → activation function → Dropout → BN → . . . +4.CONV / FC → Dropout → BN → activation function → . . . +The authors of [ IS15] suggest to use Batch Normalization before the activation function +as in Items 1 and 4. Batch Normalization after the activation lead to better results in +https://github. com/ducha- aiki/caffenet-benchmark/blob/master/batchnorm.md +Another normalization layer is Local Response Normalization as described in [ KSH12], +which includes + 2 normalization as described in [WWQ13 ]. Those two normalization layers, +however, are superseded by Batch Normalization. +3 + also called inference time +10 +2.3. CNN Blocks +2.3. CNN Blocks +This section describes more complex building blocks than simple layers. CNN blocks act +similar to a layer, but they are themselves composed of layers. +2.3.1. Residual Blocks +Residual blocks as introduced in [ HZRS15a] are a milestone in computer vision. They +enabled the computer vision community to go from about 16 layers as in VGG 16-D (see +Appendix D.3) to several hundred layers. The key idea of deep residual networks (ResNets) +as introduced in [ HZRS15a] is to add an identity connection which skips two layers. This +identity connection adds the feature maps onto the other feature maps and thus requires +the output of the input layer of the residual block to be of the same dimension as last layer +of the residual block. +Formally, it can be described as follows. If x +i are the feature maps after layer i and x + 0 is +the input image, H is a non-linear transformation of feature maps, then +y = H (x ) +describes a traditional CNN. Note that this could be multiple layers. A residual block as +visualized in Figure 2.4 is described by +y = H (x ) + x +In [ HZRS15a], they only used residual skip connections to skip two layers. Hence, if +conv + i (x + i ) describes the application of the convolutional layer i to the input x + i without the +nonlinearity, then such a residual block is +x + i +2 = conv + i +1 (ReLU(conv + i (x + i ))) + x + i +Figure 2.4.: ResNet module +Image source: [HZRS15a] +[HM16] provides some insights why deep residual networks are successful. + 11 +2. Convolutional Neural Networks +2.3.2. Aggregation Blocks +Two common ways to add more parameters to neural networks are increasing their depth +by adding more layers or increasing their width by adding more neurons / filters. Inception +blocks [AM15] implicitly started a new idea which was explicitly described in [XGD + + 16] as +“ResNeXt block”: Increasing the cardinality C ∈ N + ≥1 . By cardinality, the authors describe +the concept of having C small convolutional networks with the same topology but different +weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not +combine aggregation blocks with residual blocks as the authors did. +256-d in +concatenate total 32 +groups +. . . +128-d out4 @ 1 × 1 × 256 +4 @ 3 × 3 × 4 4 @ 1 × 1 × 256 +4 @ 3 × 3 × 4 4 @ 1 × 1 × 256 +4 @ 3 × 3 × 4 +Figure 2.5.: Aggregation block with a cardinality of C = 32 . Each of the 32 groups is a 2-layer +convolutional network. The first layer receives 256 feature maps and applies four1 × 1 +filters to it. The second layer applies four 3 × 3 filters. Although every group has +the same topology, the learned weights are different. The outputs of the groups are +concatenated. +The hyperparameters of an aggregation block are: +• The topology of the group members. +• The cardinality C ∈ N + ≥1 . Note that a cardinality of C = 1 is equivalent in every +aspect to using the group network without an aggregation block. +12 +2.3. CNN Blocks +2.3.3. Dense Blocks +Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The +idea is to connect each convolutional layer directly to subsequent convolutional layers. +Traditional CNNs with L layers and one input layer have L connections between layers, +but dense blocks have L( L+1) +2 connections between layers. The input feature maps are +concatenated in depth. According to the authors, this prevents features from being re- +learned and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16 +have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors +used only on the order of 12 feature maps per layer. +A dense block is visualized in Figure 2.6. + 256 -d in +k @ 3 × 3 +concatenate +k @ 3 × 3 +concatenate256 -d +k -d +(256 + k )-d +k -d +(256 + L · k )-d out +Figure 2.6.: Dense block with L = 2 layers and a growth factor of k . +Dense block have five hyperparameters: +• The activation function being used. The authors use ReLU. +• The size k +w × k +h of filters. The authors use k +w = k + h = 3. +• The number of layers L, where L = 2 is a simple convolutional layer. +• The number k of filters added per layer (called growth rate in the paper) +It might be necessary use 1 × 1 convolutions to reduce the number of L · k feature maps. +13 +2. Convolutional Neural Networks +2.4. Transition Layers +Transition layers are used to overcome constraints imposed by resource limitations or +architectural design choices. One constraint is the number of feature maps (see Appendix C.3 +for details). In order to reduce the number of feature maps while still keeping as much +relevant information as possible in the network, a convolutional layer i with k + i filters of +the shape 1 × 1 × k +i − 1 is added. The number of filters k +i directly controls the number of +generated feature maps. +In order to reduce the dimensionality (width and height) of the feature maps, one typically +applies pooling. +Global pooling is another type of transition layer. It applies pooling over the complete +feature map size to shrink the input to a constant 1 × 1 feature map and hence allows one +network to have different input sizes. +14 +2.5. Analysis Techniques +2.5. Analysis Techniques +CNNs have dozens of hyperparameters and ways to tune them. Although there are +automatic methods like random search [ BB12], grid search [ LBOM98], gradient-based +hyperparameter optimization [ MDA15 ] and Hyperband [ LJD + + 16] some actions need a +manual investigation to improve the model’s quality. For this reason, analysis techniques +which guide developers and researchers to the important hyperparameters are necessary. In +the following, nine diagnostic techniques are explained. +A machine learning developer has the following choices to improve the model’s quality: +(I1)Change the problem definition (e.g., the classes which are to be distinguished) +(I2)Get more training data +(I3)Clean the training data +(I4)Change the preprocessing (see Appendix B.1) +(I5)Augment the training data set (see Appendix B.2) +(I6)Change the training setup (see Appendices B.3 to B.5) +(I7)Change the model (see Appendices B.6 and B.7) +The preprocessing is usually not changed in modern architectures. However, this still leaves +six very different ways to improve the classifier. Changing the training setup and the model +each have too many possible choices to explore them completely. Thus, techniques are +necessary to guide the developer to changes which are most promising to improve the model. +For all of the following methods, it is important to use only the training set and the +validation set. +2.5.1. Qualitative Analysis by Example +The most basic analysis technique which should always be used is looking at examples +which the network correctly predicted with a high certainty and what the classifier got +wrong with a high certainty. Those examples can be arranged by applying t-SNE [MH08]. +One the one hand, this might reveal errors in the training data. Most of the time, training +data is manually labeled by humans who make mistakes. If a model is fit to those errors, +its quality decreases. +On the other hand, this can show differences in the distribution of validation data which +are not covered by the training set and thus indicate the need to collect more data. + 15 +2. Convolutional Neural Networks +2.5.2. Confusion Matrices +A confusion matrix is a matrix ( c) + ij ∈ N K ×K +≥ 0 , where K ∈ N + ≥ 2 is the number of classes, +which contains all correct and wrong classifications. The item c +ij is the number of times +items of class i were classified as class j . This means the correct classification is on the +diagonal c +ii and all wrong classifications are of the diagonal. The sum +K +i =1 + K +j =1 c +ij is the +total number of samples which were evaluated and + i =1 c + ii + + K +i=1 + K +j =1 c + ij is the accuracy. +The sums r ( i ) = +K +j =1 c +ij of each class i are worth being investigated as they show if the +classes are skewed. If the number of samples of one class dominates the data set, then the +classifier can get a high accuracy by simply always prediction the most common class. If +the accuracy of the classifier is close to the a priory probability of the most common class, +techniques to deal with skewed classes might help. +An automatic criterion to check for this problem is +accuracy ≤ max({ r (i ) | i = 1, . . . , k } ) + +k +i =1 r (i ) + ε +where ε is a small value to compensate the fact that some examples might be correct just +by chance. +Other values which should be checked are the class-wise sensitivities: +s (k ) = # correctly identified instances of class k +# instances of class k = c +kk +r (k ) ∈ [0, 1] +If s ( i) is much lower than s( j ) , it is an indicator that more or cleaner training data is +necessary for s (i) . +The class-wise confusion + f +confusability (k +1 , k +2 ) = c +k +1 k + 2 + +K +j =1 c +k + 1 j +indicates if class k +1 gets often classified as class k +2 . The highest values here can indicate +if two classes should be merged or a specialized model for separating those classes could +improve the overall system. +2.5.3. Validation Curves: Accuracy, loss and other metrics +Validation curves display a hyperparameter (e.g., the training epoch) on the horizontal +axis and a quality metric on the vertical axis. Accuracy, error = (1 − accuracy) or loss are +typical quality metrics. Other quality metrics can be found in [OHIL16]. +In case that the number of training epochs are used as the examined hyperparameter, +validation curves give an indicator if training longer improves the model’s performance. By +16 +2.5. Analysis Techniques +plotting the error on the training set as well as the error on a validation set, one can also +estimate if overfitting might become a problem. See Figure 2.7 for an example. +10 20 30 40 50 60 70 80 90 1000 .20 .40 .60 .8 + overfitting + EpochsError + Training set +Validation set +Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs +and the quality metric is the error (1 − accuracy ) . The longer the network is trained, +the better it gets on the training set. At some point the network is fit too well to the +training data and loses its capability to generalize. At this point the quality curve of +the training set and the validation set diverge. While the classifier is still improving on +the training set, it gets worse on the validation and the test set. +When the epoch-loss validation curve has plateaus as in Figure 2.8, this means the opti- +mization process did not improve for several epochs. Three possible ways to reduce the +problem of plateaus are(i)to change weight initialization if the plateau was at the beginning, +(ii)regularizing the model or(iii)changing the optimization algorithm. +Loss functions +The loss function (also called error function or cost function ) is a function which assigns a +real value to a complex event like the predicted class of a feature vector. It is used to define +the objective function. For classification problems the loss function is typically cross-entropy +with + 1 or +2 regularization, as it was described in [NH92]: +E +C E (W ) = − +x ∈ X K + +k =1 [ tx +k log(o x +k ) + (1 − tx +k ) log(1 − o x +k )] + + +cross-entropy data loss + λ +1 · +1 + + +w ∈W |w | +λ + 2 · +2 + + +w ∈ W w 2 + +model complexity loss +where W are the weights, X is the training data set, K ∈ N + ≥ 0 is the number of classes and +tx +k indicates if the training example x is of class k . o x +k is the output of the classification +algorithm which depends on the weights. λ + 1 , λ +2 ∈ [0, ∞) weights the regularization and is +typically smaller than 0 .1 . + 17 +2. Convolutional Neural Networks +Figure 2.8.: Example for a validation curve (plotted loss function) with plateaus. The dark orange +curve is smoothed, but the non-smoothed curve is also plotted in light orange. +The data loss is positive whenever the classification is not correct, whereas the model +complexity loss is higher for more complex models. The model complexity loss exists due +to the intuition of Occam’s razor: If two models explain the same data with an accuracy of +100 %, the simpler model is to be preferred. +A reason to show the loss for the validation curve technique instead of other quality metrics +is that it contains more information about the quality of the model. A reason against the +loss is that it has no upper bound like the accuracy and can be hard to interpret. The +loss only shows relative learning progress whereas the accuracy shows absolute progress to +human readers. +There are three observations in the loss validation curve which can help to improve the +network: +• If the loss does not decrease for several epochs, the learning rate might be too low. +The optimization process might also be stuck in a local minimum. +• + Loss being NAN might be due to too high learning rates. Another reason is division +by zero or taking the logarithm of zero. In both cases, adding a small constant like +10 −7 + fixes the problem. +• If the loss-epoch validation curve has a plateau at the beginning, the weight initializa- +tion might be bad. +18 +2.5. Analysis Techniques +Quality criteria +There are several quality criteria for classification models. Most quality criteria are based +the confusion matrix c which denotes at c +ij the number of times the real class was i and j +was predicted. This means the diagonal contains the number of correct predictions. For +the following, let t + i = +k +j =1 c +ij be the number of training samples for class i. The most +common quality criterion is accuracy: +accuracy( c) = +k +i =1 c +ii + + k +i =1 t +i ∈ [0, 1] +One problem of accuracy as a quality criterion are skewed classes. If one class is by far +more common than all other classes, then the simplest way to achieve a high score is to +always classify everything as the most common class. +In order to fix this problem, one can use the mean accuracy: +mean-accuracy( c) = 1 +k · k + +i =1 c +ii +t + i ∈ [0, 1] +For two-class problems there are many other metrics like precision, recall and F +β -score. +Quality criteria for semantic segmentation are explained in [Tho16]. +Besides the quality of the classification result, several other quality criteria are important +in practice: +• Speed of evaluation for new images, +• latency, +• power consumption, +• + robustness against (non)random perturbations in the training data (see [ SZS+ + 13, +PMW + + 15]), +• robustness against (non)random perturbations in the training labels (see [ NDRT13 , +XXE12]), +• model size +As reducing the floating point accuracy allows to process more data on a given device [Har15], +analysis under this aspect is also highly relevant in some scenarios. +However, the following focuses on the quality of the classification result. + 19 +2. Convolutional Neural Networks +2.5.4. Learning Curves +A learning curve is a plot where the horizontal axis displays the number of training samples +given to the network and the vertical axis displays the error. Two curves are plotted: The +error on the training set (of which the size is given by the horizontal axis) and the error on +the test set (which is of fixed size). See Figure 2.9 for an example. The learning curve for the +validation set is an indicator if more training data without any other changes will improve +the networks performance. Having the training set’s learning curve, it is possible to estimate +if the capacity of the model to fit the data is high enough for the desired classification error. +The error on the validation set should never be expected to be significantly lower than the +error on the training set. If the error on the training set is too high, then more data will +not help. Instead, the model or the training algorithm need to be adjusted. +If the training set’s learning curve is significantly higher than the validation set’s learning +curve, then removing features (e.g., by decreasing the images resolution), more training +samples or more regularization will help. +10 20 30 40 50 60 70 80 90 1000 .20 .40 .6 + avoidable biasvariance + human-le vel error + Training samplesError + Validation set +Training set +Figure 2.9.: A typical learning curve: The more data is used for training, the more errors a given +architecture will make to fit the given training data. At the same time, it is expected +that the training data gets more similar to the true distribution of the data which +should be captured by the test data. At some point, the error on the training and +test set should be about the same. The term “avoidable bias” was coined by Andrew +Ng [ Ng16]. In some cases it is not possible to classify data correctly by the given +features. If humans can classify the data given the features correctly, however, then +the bias is avoidable by building a better classifier. +The ma jor drawback of this analysis technique is its computational intensity. In order to +get one point on the training curve and one point on the testing curve, a complete training +has to be executed. On the full data set, this can be several days on high-end computers. +20 +2.5. Analysis Techniques +2.5.5. Input-feature based model explanations +Understanding which clues the model took to come to its prediction is crucial to check if +the model actually learns what the developer thinks it learns. For example, a model which +has to distinguish sled dogs from Chihuahuas might simply look at the background and +check if there is snow. Depending on the training and test data, this works exceptionally +well. However, it is not the desired solution. +For classification problems in computer vision, there are two types of visualizations which +help to diagnose such problems. Both color superpixels of the original image to convey +information how the model used those superpixels: +• Correct class heatmap : The probability of the correct class is encoded to give a +heat map which superpixels are important for the correct class. This can also be done +by setting the opacity accordingly. +• Most-likely class image + : Each of the most likely classes for all superpixels is +represented by a color. The colored image thus gives clues why different predictions +were assigned a high probability. +Two methods to generate such images are explained in the following. +Occlusion Sensitivity Analysis +Occlusion sensitivity analysis is described in [ ZF14]. The idea is to occlude a part of the +image by something. This could be a gray square as in [ ZF14] or a black superpixel as +in [RSG16]. Then the classifier is run on the image again. This is done for each region (e.g., +superpixel or position of the square) and the regions are then colored to generate either a +correct class heatmap of the most-likely class image. It is important to note that the color +at region r +i denotes the result if r +i is occluded. +Both visualizations are shown in Figure 2.10. One can see that the network makes sensible +predictions for this image of the class “Pomeranian”. However, the image of the class “Afghan +Hound” gets confused with “Ice lolly”, which is a sign that this needs further investigation. +Gradient-based approaches +In [ SVZ13], a gradient-based approach was used to generate image-specific class saliency +maps . The authors describe the problem as a ranking problem, where each pixel of the +image I + 0 is assigned a score S +c (I + 0 ) for a class c of interest. CNNs are non-linear functions, +but they can be approximated by the first order Taylor expansion S +c (I ) ≈ w T + I + b where +w is the derivative of S + c at I + 0 . + 21 +2. Convolutional Neural Networks +2.5.6. Argmax Method +The argmax method has two variants: +• Fixed class argmax : Propagate all elements of a given class through the network +and analyze which neurons are activated most often / have the highest activation. +• Fixed neuron argmax: Propagate the data through the network and find the n +data elements which cause the highest activation for a given neuron. +Note that a “neuron” is a filter in a CNN. The amount of activation of a filter F by an +image I is calculated by applying F to I and calculating the element-wise sum of the result. +Fixed-neuron argmax was applied in [ZF14]. However, they did not stop with that. Besides +showing the 9 images which caused the highest activation, they also trained a deconvolutional +neural network to pro ject the activation of the filter back into pixel space. +The fixed neuron argmax can be used qualitatively to get an impression of the kind of +features which are learned. This is useful to diagnose problems, for example in [AM15] it is +described that the network recognized the class “dumbbell” only if a hand was present, too. +Fixed neuron argmax can also be used quantitatively to estimate the amount of parameters +being shared between classes or how many parameters are mainly assigned to which classes. +Going one step further from the fixed neuron argmax method is using an optimization +algorithm to change an initial image minimally in such a way that any desired class gets +predicted. This is called caricaturization in [MV16]. +2.5.7. Feature Map Reconstructions +Feature map visualizations such as the ones made in [ZF14] (see Figure 2.11) give insights +into the learned features. This shows what the network emphasizes. However, it is not +necessarily the case that the feature maps allow direct and easy conclusions about the +learned features. This technique is called inversion in [MV16]. +A key idea of feature map visualizations is to reconstruct a layers input, given its activation. +This makes it possible find which inputs would cause neurons to activate with extremely +high or low values. +More recent work like [ NYC16] tries to make the reconstructions appearance look more +natural. +22 +2.5. Analysis Techniques +2.5.8. Filter comparison +One question which might lead to some insight is how robust the features are which +are learned. If the same network is trained with the same data, but different weight +initializations, the learned weights should still be comparable. +If the set of learned filters changes with initialization, this might be an indicator for too +little capacity of that layer. Hence adding more filters to that layer could improve the +performance. +Filters can be compared with the k -translation correlation as introduced in [ZCZL16]: +ρ +k (W + i , W + j ) = max +(x,y ) ∈{−k,...,k }2 + \(0 , 0) W + i , T (W + j , x, y ) + f + W +i + 2 W + j + 2 ∈ [−1 , 1], +where T (·, x, y ) denotes the translation of the first operand by (x, y ), with zero padding at +the borders to keep the shape. ·, · +f denotes the flattened inner product, where the two +operands are flattened into column vectors before applying the standard inner product. The +closer the absolute value of the k -translation correlation to one, the more similar two filters +W + i , W +j are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and +VGG-16 (see Appendix D.3) have many filters which are highly correlated. They found +this by comparing the averaged maximum k -translational correlation of the networks with +Gaussian-distributed initialized filters. The averaged maximum k -translational correlation +is defined as + ¯ρ +k (W ) = 1 +N N + +i =1 N +max +j =1,j = i ρ +k (W + i , W +j ) +where N is the number of filters in the layer W and W + i denotes the ith filter. +2.5.9. Weight update tracking +Andrej Karpathy proposed in the 5th lecture of CS231n to track weight updates to check if +the learning rate is well-chosen. He suggests that the weight update should be in the order +of 10−3 + . If the weight update is too high, then the learning rate has to be decreased. If the +weight update is too low, then the learning rate has to be increased. +The order of the weight updates as well as possible implications highly depend on the model +and the training algorithm. See Appendix B.5 for a short overview of training algorithms +for neural networks. + 23 +2. Convolutional Neural Networks +2.6. Accuracy boosting techniques +There are techniques which can almost always be applied to improve accuracy of CNN +classifiers: +• Ensembles [CMS12] +• Training-time augmentation (see Appendix B.2) +• Test-time transformations [DDFK16, How13, HZRS15b] +• Pre-training and fine-tuning [ZDGD14, GDDM14] +One of the most simple ensemble techniques which was introduced in [CMS12] is averaging +the prediction of n classifiers. This improves the accuracy even if the classifiers use exactly +the same training setup by reducing variance. +Data augmentation techniques give the optimizer the possibility to take invariances like +rotation into account by generating artificial training samples from real training samples. +Data augmentation hence reduces bias and variance with no cost at inference time. +Data augmentation at inference time reduces the variance of the classifier. Similar to using +an ensemble, it increases the computational cost of inference. +Pretraining the classifier on another dataset to obtain start from a good position or finetuning +a model which was originally created for another task is also a common technique. +24 +2.6. Accuracy boosting techniques +Figure 2.10.: Occlusion sensitivity analysis by [ZF14]: The left column shows three example images, +where a gray square occluded a part of the image. This gray squares center(x, y ) was +moved over the complete image and the classifier was run on each of the occluded +images. The probability of the correct class, depending on the gray squares position, +is showed in the middle column. One can see that the predicted probability of the +correct class “Pomeranian” drops if the face of the dog is occluded. The last image +gives the class with the highest predicted probability. In the case of the Pomeranian, +it always predicts the correct class if the head is visible. However, if the head of the +dog is occluded, it predicts other classes. + 25 +2. Convolutional Neural Networks +Figure 2.11.: Filter visualization from [ ZF14]: The filters themselves as well as the input feature +maps which caused the highest activation are displayed. +26 +3. Topology Learning +The topology of a neural network is crucial for the number of parameters, the number +of floating point operations (FLOPs), the required memory, as well as the features being +learned. The choice of the topology, however, is still mainly done by trial-and-error. +This chapter introduces three general approaches to automatic topology learning: Growing a +networks from a minimal network in Section 3.1, pruning in Section 3.2, genetic approaches +in Section 3.3 and reinforcement learning approaches in Section 3.4. +3.1. Growing approaches +Growing approaches for topology learning start with a minimal network, which only has +the necessary number of input nodes and the number of output nodes which are determined +by the application and the features of the input. They then apply a criterion to insert new +layers / neurons into the network. +In the following, Cascade-Correlation, Meiosis Networks and Automatic Structure Opti- +mization are introduced. +3.1.1. Cascade-Correlation +Cascade-Correlation was introduced in [FL89]. It generates a cascading architecture which +is similar to dense block described in Section 2.3.3. +Cascade-Correlation works as follows: +1. Initialization: The number of input nodes and the number of output nodes are +defined by the problem. Create a minimal, fully connected network for those. +2. Training : Train the network until the error no longer decreases. +3. Candidate Generation : Generate candidate nodes. Each candidate node is con- +nected to all inputs. They are not connected to other candidate nodes and not +connected to the output nodes. + 27 +3. Topology Learning +4. Correlation Maximization: Train the weights of the candidates by maximizingS , +the correlation between candidates output value V with the networks residual error: +S = +o ∈ O + + + + + +p ∈ T + V + p − ¯ +V + (E +p,o − ¯ +E +o ) + + + + + +where O is the set of output nodes, T is the training set, V +p is the candidate neurons +activation for a training pattern p . E +p,o is the residual output error at node o for +pattern p . ¯ +V + and ¯ +E +o are averaged values over all elements of T . This step is finished +when the correlation no longer increases. +5. Candidate selection : Keep the candidate node with the highest correlation, freeze +its incoming weights and add connections to the output nodes. +6. Continue: If the error is higher than desired, continue with step 2. +One network with three hidden nodes trained by Cascade-Correlation is shown in Figure 3.1. +1 +Figure 3.1.: A Cascade-Correlation network with three input nodes (red) and one bias node (gray) +to the left, three hidden nodes (green) in the middle and two output nodes in the upper +right corner. The black squares represent frozen weights which are found by correlation +maximization whereas the white squares are trainable weights. +3.1.2. Meiosis Networks +Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, where +weights are deterministic and fixed at prediction time, each weightw + ij in Meiosis networks +follows a normal distribution: + w + ij ∼ N (µ + ij , σ 2 +ij ) +28 +3.2. Pruning approaches +Hence every connection has two learned parameters: µ + ij and σ 2 +ij . +The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell +division. A node j is splitted, when the random part dominates the value of the sampled +weights: + +i σ +ij + +i µ + ij > 1 and + k σ + jk + +k µ + jk > 1 +The mean of the new nodes is sampled around the old mean, half the variance is assigned +to the new connections. +Hence Meiosis networks only change the number of neurons per layer. They do not add +layers or add skip connections. +3.1.3. Automatic Structure Optimization +Automatic Structure Optimization (ASO) was introduced in [ BM93] for the task of on- +line handwriting recognition. It makes use of the confusion matrix C = ( c +ij ) ∈ Nk × k +≥ 0 +(see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix +S with s + i j = s +j i = c +ij · c +ji . The maximum of S defines where the ASO algorithm adds +more parameters. The details how the resources are added are not transferable to CNNs. +3.2. Pruning approaches +Pruning approaches start with a network which is bigger than necessary and prune it. The +motivation to prune a network which has the desired accuracy is to save storage for easier +model sharing, memory for easier deployment and FLOPs to reduce inference time and +energy consumption. Especially for embedded systems, deployment is a challenge and low +energy consumption is important. +Pruning generally works as follows: +1.Train a given network until a reasonable solution is obtained, +2.prune weights according to a pruning criterion and +3.retrain the pruned network. +This procedure can be repeated. +One family of pruning criterions uses the Hessian matrix . For example, Optimal Brain +Damage (OBD) as introduced in [LDS+ + 89]. For every single parameter k , OBD calculates +the effect on the ob jective function of deletingk . The authors call the effect of the deletion +29 +3. Topology Learning +of parameter k the saliency s + k . The parameters with the lowest saliency are deleted, which +means they are set to 0 and are not updated anymore. +A follow-up method called Optimal Brain Surgeon [ HSW93] claims to choose the weights +in a much better way. This requires, however, to calculate the inverse Hessian matrix +H −1 + ∈ R n×n + where n ∈ N is typically n > 106 + . +A much simpler and computationally cheaper pruning criterion is the weight magnitude . +[HPTD15] prunes all weights w which are below a threshold θ : +w ←  + +w if w ≥ θ +0 otherwise +3.3. Genetic approaches +The general idea of genetic algorithms (GAs) is to encode the solution space as genes, which +can recombine themselves via crossover and inversion. An introduction to such algorithms +is given in [ES03]. +Commonly used techniques to generate neural networks by GAs are NEAT [SM02] and its +successors HyperNEAT [SDG09] and ES-HyperNEAT [RLS10]. +The results, however, are of unacceptable quality: On MNIST (see Appendix E), where +random chance gives 10 % accuracy, even simple topologies trained with SGD achieve +about 92 % accuracy [ TF-16a] and state of the art is 99. 79 % [ WZZ+ + 13], the HyperNEAT +algorithm achieves only 23.9 % accuracy [VH13]. +Kocmánek shows in [ Koc15] that HyperNEAT approaches can achieve 96 .47 % accuracy +on MNIST. Kocmánek mentions that HyperNEAT becomes slower with each hidden layer +so that not more than three hidden layers could be trained. At the same time, VGG- +19 [SZ14] already has 19 hidden layers and ResNets are successfully trained with 1202 layers +in [HZRS15a]. +[ LX17] shows that Genetic algorithms can achieve competitive results on MNIST and +SVHN, but the best results on CIFAR-10 were 7.10 % error whereas the state of the art is +at 3.74 % [HLW16]. Similarly, the Genetic algorithm achieves29.03 % error on CIFAR-100, +but the state of the art is 17.18 % [HLW16]. +3.4. Reinforcement Learning +Reinforcement learning is a sub-field of machine learning, which focuses on the question +how to choose actions that lead to high rewards. +30 +3.5. Convolutional Neural Fabrics +One can think of the search for good neural network topologies as a reinforcement learning +problem. The agent is a recurrent neural network which can generate bitstrings. Those +variable-length bitstrings encode neural network topologies. +In 2016, this approach was applied to construct neural networks for computer vision. +In [BGNR16], Q-learning with an ε -greedy exploration was applied. +In [ZL16], the REINFORCE algorithm from [Wil92] was used to train state of the art models +for CIFAR-10 and the Penn Treebank dataset. A drawback of this method is that enormous +amounts of computational resources were used to obtain those results. +3.5. Convolutional Neural Fabrics +Convolutional Neural Fabrics are introduced in [ SV16]. They side-step hard decisions +about topologies by learning an ensemble of different CNN architectures. The idea is to +define a single architecture as a trellis through a 3D grid of nodes. Each node represents a +convolutional layer. One dimension is the index of the layer, the other two dimensions are +the amount of filters and the feature size. Each node is connected to nine other nodes and +thus represents nine possible choices of convolutional layers: +• Resolution + :(i)convolution with stride=1 or(ii)convolution with stride=2 or +(iii)deconvolution (doubling the resolution) +• Channels:(i)half the number of filters than the layer before(ii)the same number +of filters as the layer before(iii)double the number of filters than the layer before +They always use ReLU as an activation function and they always use filters of size 3 × 3 . +They don’t use pooling at all. + 31 +3. Topology Learning +32 +4. Hierarchical Classification +Designing a classifier for a new dataset is hard for two main reasons: Many design choices are +not clearly superior to others and evaluating one design choice takes much time. Especially +CNNs are known to take several days [ KSH12, SLJ + + 15] or even weeks [ SZ14] to train. +Additionally, some methods for analyzing a dataset become harder to use with more classes +and more training samples. Examples are t-SNE, the manual inspection of errors and +confusion matrices, and the argmax method. +One idea to approach this problem is by building a hierarchy of classifiers. The root +classifier distinguishes clusters of classes, whereas the leaf classifiers distinguish single +classes. Figure 4.1 gives an example for an hierarchy of classifiers. +Figure 4.1.: Example for a hierarchy of classifiers. Each classifier is visualized by a rounded rectangle. +The root classifier C + 0 has to distinguish six coarse classes (pedestrian, four+ + -wheelers, +traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C + 0 predicts a +pedestrian, another classifier has to predict if it is an adult or a child. Similar, if C +0 +predicts traffic sign, then another classifier has to predict if it is a speed limit, a +sign indicating danger or something else. If C +0 , however, predicts road, then no other +classifier will become active. +In this example, the problem has 17 classes. The hierarchical approach introduces +7 clusters of classes and thus uses 8 classifiers. +Such a hierarchy of classifiers needs clusters of classes. + 33 +4. Hierarchical Classification +4.1. Advantages of classifier hierarchies +Having a classifier hierarchy has five advantages: +• Division of labor : Different teams can work together. Instead of having a monolithic +task, the solutions can be combined. +• Guarantees : Changing a classifier will only change the prediction of itself and its +children. Siblings are not affected. In the example from Figure 4.1, the classifier +which distinguishes traffic signs can be changed while the classification aspedestrian , +four + + -wheelers , traffic sign , street , other will not be affected. Also, the +classification between speed limits, danger signs and other signs will not change. +• Faster training : Except for the root classifier C + 0 , each other classifier will have +less than the total amount of training data. Depending on the combined classes, the +models could also be simpler. Hence the training time is reduced. +• Weighting of errors: In practice, some errors are more severe than others. For +example, it could be acceptable if the two-wheelers classifier has an error rate of +40 %. But it is not acceptable if the speed limit classifier has such a high error rate. +• Post-hoc explanations: The simpler a model is, the easier it is to explain why a +classification is made the way it is made. +4.2. Clustering classes +There are two ways to cluster classes: By similarity or by semantics. While semantic +clustering needs either additional information or manual work, the similarity can be +automatically inferred from the data. As pointed out in [ XZY+ + 14], semantically similar +classes are often also visually similar. For example, in the ImageNet dataset most dogs +are semantically and visually more similar to each other than to non-dogs. An example +where this is obviously not the case are symbols: The summation symbol \sum is identical +in appearance to the Greek letter \Sigma , but semantically much closer to the addition +operator + . +One approach to cluster classes by similarity is to train a classifier and examine its +predictions. Each class is represented in the confusion matrix by one row. Those rows +can be directly with standard clustering algorithms such as k -means, DBSCAN [EKS+ + 96], +OPTICS [ ABKS99], CLARANS [ NH02], DIANA [ KR09], AHC (see [ HPK11]) or spectral +clustering as in [ XZY+ + 14]. Those clusterings, however, are hard to interpret and most of +them do not allow a human to improve the found clustering manually. +The confusion matrix ( c) + ij ∈ N k × k + states how often class i was present and class j was +34 +4.2. Clustering classes +predicted. The more often this confusion happens, the more similar those two classes are to +the classifier. Based on the confusion matrix, the classes can be clustered as explained in +the following. +[ HAE16] indicates that more classes make it easier to generalize, but the accuracy gains +diminish after a critical point of classes is reached. Hence a binary tree might not be a +good choice. As an alternative, an approach which allows building arbitrary many clusters, +is proposed. +The proposed algorithm has two main ideas: +• The order of columns and rows in the confusion matrix is arbitrary. This means one +can swap rows and columns. If row i and j are swapped, then the columns i and j +have to be swapped to in order to keep the same confusion matrix. +• If two classes are confused often, then they are similar to the classifier. +Hence the order of the classes is permutated in such a way that the highest errors are close +to the diagonal. One possible ob jective function to be minimized is +f (C ) = n + +i =1 n + +j =1 C + ij · | i − j | [4.1] +which punishes errors linearly with the distance to the diagonal. This method is called CMO +in the following. +As pointed out by Tobias Ribizel (personal communication), this optimization problem +is a weighted version of Optimal Linear Arrangement problem . That problem is NP- +complete [ GJ02, GJS76]. Simulated Annealing as described in Algorithm 1, however, +produces reasonable clusterings as well as visually appealing confusion matrices. The +algorithm works as follows: First, decide with probability 0.5 if only two random rows are +swapped or a block is swapped. If two rows are swapped, choose both of them randomly. +If a block is swapped, then choose the start randomly and the end of the block randomly +after the start. The insert position has to be a valid position considering the block length, +but besides that it is also chosen uniformly random. +Simple row-swapping can exploit local improvements. For example, in the context of +ImageNet, it can swap the dog-class Silky Terrier to the dog-class Yorkshire terrier +and both dog classes Dalmatian and Greyhound next to each other. Both the two clusters +of dog breeds could be separated by car and bus due to random chance. Moving any single +class increases the score, but moving either one of the dog breed clusters or the vehicle +cluster decreases the score. Hence it is beneficial to implement block moving. +One advantage of permutating the classes in order to minimize Equation (4.1) in comparison +to spectral clustering as used in [ XZY+ + 14] is that the adjusted confusion matrix can be +35 +4. Hierarchical Classification +split into many much smaller matrices along the diagonal. In the case of many classes (e.g., +1000 classes of ImageNet or 369 classes of HASYv2) this permutation makes it possible to +visualize the types of errors made. If the errors are systematic due to visual similarity, many +confusions are not made and thus many elements of the confusion matrix are close to 0. +Those will be moved to the corners of the confusion matrix by optimizing Equation (4.1). +Once a permutation of the classes is found which has a low score Equation (4.1), the clusters +can either be made by hand by deciding why classes should not be in one clusters. With +such a permutation, only n − 1 binary decisions have to be made and hence only the list of +classes has to be read. Alternatively, one can calculate the confusions C +i,i +1 + C +i +1,i for +each pair of classes which are neighbors in the confusion matrix. The higher this value, the +more similar are the classes according to the classifier. Hence a thresholdθ can be applied. +θ can either be set automatically (e.g., such that 10 % of all pairs are above the threshold) +or semi-automatically by asking the user for information if two classes belong to the same +cluster. Such an approach only needs log (n ) binary decisions from the user where n is the +number of classes. +Please note that CMO only works if the classifier is neither too bad nor too good. A classifier +which does not solve the task at all might just give almost uniform predictions whereas the +confusion matrix of an extremely good classifier is almost diagonal and thus contains no +information about the similarity of classes. One possible solution to this problem is to take +the prediction of the class in contrast to using only the argmax in order to find a useful +permutation. +36 +5. Experimental Evaluation +All experiments are implemented using Keras 2.0 [ Cho15] with Tensorflow 1.0 [ AAB+ + 16] +and cuDNN 5.1 [CWV + + 14] as the backend. The experiments were run on different machines +with different Nvidia graphics processing units (GPUs), including the Titan Black, GeForce +GTX 970 and GeForce 940MX. +The GTSRB [SSSI12], SVHN [NWC + + 11b], CIFAR-10 and CIFAR-100 [Kri], MNIST [YL98], +HASYv2 [Tho17a], STL-10 [CLN10] dataset are used for the evaluation. Those datasets are +used as their size is small enough to be trained within a day. Other classification datasets +which were considered are listed in Appendix E. +CIFAR-10 (Canadian Institute for Advanced Research 10) is a 10-class dataset of color +images of the size 32 px × 32 px. Its ten classes are airplane, automobile, bird, cat, deer, +dog, frog, horse, ship, truck. The state of the art achieves an accuracy of96. 54 % [HLW16]. +According to [Kar11], human accuracy is at about 94 %. +CIFAR-100 is a 100-class dataset of color images of the size32 px × 32 px. Its 100 classes +are grouped to 20 superclasses. It includes animals, people, plants, outdoor scenes, vehicles +and other items. CIFAR-100 is not a superset of CIFAR-10, as CIFAR-100 does not contain +the class airplane . The state of the art achieves an accuracy of 82.82 % [HLW16]. +GTSRB (German Traffic Sign Recognition Benchmark) is a 43-class dataset of traffic signs. +The 51 839 images are in color and of a minimum size of25 px × 25 px up to 266 px × 232 px. +The state of the art achieves 99.46 % accuracy with an ensemble of 25 CNNs [ SL11]. +According to [SSSI], human performance is at 98.84 %. +HASYv2 + (Handwritten Symbols version 2) is a 369 class dataset of black-and-white images +of the size 32 px × 32 px. The 369 classes contain the Latin and Greek letters, arrows, +mathematical symbols. The state of the art achieves an accuracy of 82 .00 % [Tho17a]. +STL-10 + (self-taught learning 10) is a 10-class dataset of color images of the size96 px × 96 px. +Its ten classes are airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck. The state +of the art achieves an accuracy of 74.80 % [ZMGL15]. It contains 100 000 unlabeled images +for unsupervised training and 500 images per class for supervised training. +SVHN (Street View House Numbers) exists in two formats. For the following experiments, +the cropped digit format was used. It contains the 10 digits cropped from photos of Google +Street View. The images are in color and of size 32 px × 32 px. The state of the art +37 +5. Experimental Evaluation +achieves an accuracy of 98. 41 % [ HLW16]. According to [ NWC + + 11a], human performance +is at 98.0 %. +As a preprocessing step, the pixel-features were divided by 255 to obtain values in [0 , 1] . +For GTSRB, the training and test data was scaled to 32 px × 32 px. +5.1. Baseline Model and Training setup +The baseline model is trained with Adam [KB14], an initial learning rate of 10− 4 + , a batch +size of 64 for at most 1000 epochs with data augmentation. The kind of data augmentation +depends on the dataset: +• CIFAR-10 , CIFAR-100 and STL-10: Random width and height shift by at most +±3 pixels in either direction; Random horizontal flip. +• GTSRB +, MNIST: Random width and height shift by at most ±5 pixels in either +direction; random rotation by at most ±15 degrees; random channel shift; random +zoom in [0.5 , 1 .5]; random shear by at most 6 degrees. +• HASYv2: Random width and height shift by at most ±5 pixels in either direction; +random rotation by at most ±5 degree. +• SVHN: No data augmentation. +If the dataset does not define a training/test set, a stratified67 % / 33 % split is applied. If +the dataset does not define a validation set, the training set is split in a stratified manner +into 90 % training set / 10 % test set. +Early stopping [Pre98] with the validation accuracy as a stopping criterion and a patience of +10 epochs is applied. After this, the model is trained without data augmentation for at most +1000 epochs with early stopping and the validation accuracy as a stopping criterion and a +patience of 10 epochs. Kernel weights are initialized according to the uniform initialization +scheme of He [HZRS15b] (see Appendix B.3). +The architecture of the baseline model uses a pattern of +Conv-Block (n ) = (Convolution − Batch Normalization − Activation)n + − Pooling +The activation function is the Exponential Linear Unit (ELU) (see Table B.3), except for +the last layer where softmax is used. Before the last two convolutional layer, a dropout +layer with dropout probability 0.5 is applied. The architecture is given in detail in Table 5.1. +Please note that the number of input- and output channels of the network depends on +the dataset. If the input image is larger than 32 px × 32 px, for each power of two a +Conv-Block (2) is added at the input. For MNIST, the images are bilinearly upsampled to +32 px × 32 px. +38 +5.1. Baseline Model and Training setup +# Type Filters @ +Patch size / stride Parameters FLOPs Output size +Input 0 0 3 @ 32 × 32 +1 Convolution 32 @ 3 × 3 × 3 / 1 896 1 736 704 32 @ 32 × 32 +2 BN + ELU 64 163 904 32 @ 32 × 32 +3 Convolution 32 @ 3 × 3 × 32 / 1 9 248 18 841 600 32 @ 32 × 32 +4 BN + ELU 64 163 904 32 @ 32 × 32 +Max pooling 2 × 2 / 2 0 40 960 32 @ 16 × 16 +5 Convolution 64 @ 3 × 3 × 32 / 1 18 496 9 420 800 64 @ 16 × 16 +6 BN + ELU 128 82 048 64 @ 16 × 16 +7 Convolution 64 @ 3 × 3 × 64 / 1 36 928 18 857 984 64 @ 16 × 16 +8 BN + ELU 128 82 048 64 @ 16 × 16 +Max pooling 2 × 2 / 2 20 480 64 @ 8 × 8 +9 Convolution 64 @ 3 × 3 × 64 / 1 36 928 4 714 496 64 @ 8 × 8 +10 BN + ELU 128 20 608 64 @ 8 × 8 +Max pooling 2 × 2 / 2 5 120 64 @ 4 × 4 +11 Convolution (v) 512 @ 4 × 4 × 64 / 1 524 800 1 048 064 512 @ 1 × 1 +12 BN + ELU 1 024 3 584 512 @ 1 × 1 +Dropout 0.5 0 0 512 @ 1 × 1 +13 Convolution 512 @ 1 × 1 × 512 / 1 262 656 523 776 512 @ 1 × 1 +14 BN + ELU 1 024 3 584 512 @ 1 × 1 +Dropout 0.5 0 0 512 @ 1 × 1 +15 Convolution k @ 1 × 1 × 512 / 1 k · (512 + 1) 1024 · k k @ 1 × 1 +Global avg Pooling 1 × 1 0 k k @ 1 × 1 +16 BN + Softmax 2 k 7k k @ 1 × 1 + + 515 k ++892 512 1032k ++55 729 664 103 424+2 k +Table 5.1.: Baseline architecture with 3 input channels of size 32 × 32 . All convolutional layers +use SAME padding, except for layer 11 which used VALID padding in order to decrease +the feature map size to 1 × 1 . If the input feature map is bigger than 32 × 32 , for +each power of two there are two Convolution + BN + ELU blocks and one Max pooling +block added. This is the framed part in the table.32 × 32Input +C 32@3 × 3 / 1 +BN + ELU +C 32@3 × 3 / 1 +BN + ELU 16 × 16max pooling 2 × 2 /2 +C 64@3 × 3 /1 +BN + ELU +C 64@3 × 3 /1 +BN + ELU 8 × 8max pooling 2 × 2 /2 +C 64@3 × 3 /1 +BN + ELU 4 × 4max pooling 2 × 2 /2 +C 512@4 × 4 /1 (V) +BN + ELU +Dropout, p = 0. 5 1 × 1C 512@1 × 1 /1 +BN + ELU +Dropout, p = 0. 5 +C k @1 × 1/ 1 +Global AVG pooling +BN + Softmax +Figure 5.1.: Architecture of the baseline model. C 32@3 × 3/ 1 is a convolutional layer with 32 filters +of kernel size 3 × 3 with stride 1. + 39 +5. Experimental Evaluation +5.1.1. Baseline Evaluation +The results for the baseline model evaluated on eight datasets are given in Table 5.2. The +speed for inference for different GPUs is given in Table 5.3. +Dataset Single Model Accuracy Ensemble of 10 +Training Set Test Set Training Set Test Set +Asirra 94.22 % σ = 3. 49 94.37 % σ = 3.47 97 .07 % 97. 37 % +CIFAR-10 91.23 % σ = 1. 10 85.84 % σ = 0.87 92 .36 % 86.75 % +CIFAR-100 76.64 % σ = 1.48 63. 38 % σ = 0.55 78 .30 % 64.70 % +GTSRB 100 .00 % σ = 0.00 99. 18 % σ = 0.11 100 .00 % 99.46 % +HASYv2 89.49 % σ = 0.42 85. 35 % σ = 0.10 89 .94 % 86.03 % +MNIST 99.93 % σ = 0.07 99. 53 % σ = 0.06 99 .99 % 99.58 % +STL-10 94.12 % σ = 0.87 75. 67 % σ = 0.34 96 .35 % 77.62 % +SVHN 99.02 % σ = 0.07 96. 28 % σ = 0.10 99 .42 % 97.20 % +Table 5.2.: Baseline model accuracy on eight datasets. The single model actuary is the 10 models +used in the ensemble. The empirical standard deviation σ of the accuracy is also given. +CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the +models uses unlabeled data or data from other datasets. For HASYv2 no test time +transformations are used. +Network GPU Tensorflow Inference per Training +1 Image 128 images time / epoch +Baseline Default Intel i7-4930K 3 ms 244 ms 231. 0 s +Baseline Optimized Intel i7-4930K 2 ms 143 ms 149. 0 s +Baseline Default GeForce 940MX 4 ms 120 ms 145. 6 s +Baseline Default GTX 970 6 ms 32 ms 25.0 s-26. 3 s +Baseline Default GTX 980 3 ms 24 ms 20.5 s-21. 1 s +Baseline Default GTX 980 Ti 5 ms 27 ms 22.0 s-22. 1 s +Baseline Default GTX 1070 2 ms 15 ms 14 . 4 s- 14 .5 s +Baseline Default Titan Black 4 ms 25 ms 28.1 s-28. 1 s +Baseline Optimized Titan Black 3 ms 22 ms 24.4 s-24. 4 s +DenseNet-40-12 Default GeForce 940MX 27 ms 2403 ms — +Table 5.3.: Speed comparison of the baseline model on CIFAR-10. The baseline model is evaluated on +six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken from [Ma j17]. +Weights the baseline model can be found at [Tho17b ]. The optimized Tensorflow build +makes use of SSE4.X, AVX, AVX2 and FMA instructions. +40 +5.1. Baseline Model and Training setup +5.1.2. Weight distribution +The distribution of filter weights by layer is visualized in Figure 5.2 and the distribution +of bias weights by layer is shown in Figure 5.3. Although both figures only show the +distribution for one specific model trained on CIFAR-100, the following observed patterns +are consistent for 70 models (7 datasets and 10 models per dataset): +• The empiric [0 .5 − percentile, 99 .5 − percentile] interval which contains 99 % of the +filter weights is almost symmetric around zero. The same is true for the bias weights. +• The farther a layer is from the input away, the smaller the 99-percentile interval is, +except for the last layer (see Table A.1). +• The 99-percentile interval of the first layers filter weights is about[−0. 5, +0. 5], except +for MNIST and HASYv2 where it is in [ −0. 8, 0.8]. +• The 99-percentile interval of the first layers bias weights is always in [ −0 .2 , 0 .2]. +• The distribution of filter weights of the last convolutional layer is not symmetric. In +some cases the distribution is also not unimodal. +• + The bias weights of the last three layers are very close to zero. The absolute value of +most of them is smaller than 10−2 + . +Similarly, Figure 5.4 and Figure 5.5 show the distribution of the γ and the β parameter of +Batch Normalization. It is expected that γ is close to 1 and β is close to 0. In those cases, +the Batch Normalization layer equals the identity and thus is only relevant for the training. +While γ and β do not show as clear patterns as the filter and bias weights of convolutional +layers, some observations are also consistent through all models even for different datasets: +• γ of the last layer (layer 16) is bigger than 1.3. +• The 99-percentile interval for β of the last layer is longer than the other 99-percentile +intervals. +• + The 99-percentile interval for β of the fourth-last (layer 14 for STL-10, layer 10 for +all other models) is more negative then all other layers. +Finally, the distribution of filter weight ranges is plotted in Figure 5.6 for each convolutional +layer. The ranges are calculated for each channel and filter separately. The smaller the +values are, the less information is lost if the filters are replaced by smaller filters. + 41 +5. Experimental Evaluation +Figure 5.2.: Violin plots of the distribution of filter weights of a baseline model trained on CIFAR- +100. The weights of the first layer are relatively evenly spread in the interval[−0. 4, +0.4]. +With every layer the interval which contains95 % of the weights and is centered around +the mean becomes smaller, especially with layer 11 where the feature maps are of +size 1 × 1 . In contrast to the other layers, the last convolutional layer has a bimodal +distribution. +This plot indicates that the network might benefit from bigger filters in the first layer, +whereas the filters in layers 7 – 11 could potentially be smaller. +Figure 5.3.: Violin plots of the distribution of bias weights of a baseline model trained on CIFAR-100. +While the first layers biases are in[− 0. 1, +0.1], after each max-pooling layer the interval +which contains 95 % of the weights and is centered around the mean becomes smaller. +In the last three convolutional layer, most bias weights are in [− 0. 005 , +0.005] . +42 +5.1. Baseline Model and Training setup +Figure 5.4.: Violin plots of the distribution of the γ parameter of Batch Normalization layers of a +baseline model trained on CIFAR-100. +Figure 5.5.: The distribution of the β parameter of Batch Normalization layers of a baseline model +trained on CIFAR-100. + 43 +5. Experimental Evaluation +Figure 5.6.: The distribution of the range of values (max - min) of filters by channel and layer. For +each filter, the range of values is recorded by channel. The smaller this range is, the +less information is lost if a n × n filter is replaced by a (n − 1) × (n − 1) filter. +44 +5.1. Baseline Model and Training setup +5.1.3. Training behavior +Due to early stopping, the number of epochs which a model was trained differ. The number +of epochs trained with augmentation ranged from 133 epochs to 182 epochs with a standard +deviation of 17.3 epochs for CIFAR-100. +Figure 5.7 shows the worst and the best validation accuracy during the training with +augmented data. Different initializations lead to very similar validation accuracies during +training. The image might lead to the wrong conclusion that models which are better at +the start are also better at the end. In order to check this hypothesis, the relative order of +validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering +stays approximately the same, then it can be considered to run the first few epochs many +times and only train the best models to the end. For 10 models, there can be 102 + −10 +2 = 45 +pair-wise changes in the ordering at maximum if the relative order of validation accuracies +is reversed. For the baseline model, 21.8 changes in the relative order of accuracies occurred +in average for each pair of epochs (i, i + 1). This means if one knows only the relative order +of the validation accuracy of two models m and m + in epoch i , it is doubtful if one can +make any statement about the ordering of m and m + in epoch i + 1 . +0 + 10 20 30 40 50 60 70 80 90 + 100 110 120 130 1400 .20 .30 .40 .50 .60 .7 + epochvalidation accuracy + maximum validation accuracy +minimum validation accuracy 1 .5 +2 +2 .5 +3 +3 .5 +4 +4 .5 loss +maximum validation accuracy +minimum validation accuracy +mean loss +Figure 5.7.: Minimum and maximum validation accuracy of the 10 trained models by epoch. The +differences do not exceed 1 % and does not increase by training epoch. Four models +stopped the first training stage at epoch 133 which causes the shift in the loss and the +maximum validation accuracy. +Figures 5.8 to 5.10 show how the weights changed while training on CIFAR-100. It was +expected that the absolute value of weight updates during epochs (sum, max, and mean) +decrease in later training stages. The intuition was that weights need to be adjusted in a +coarse way first. After that, the intuition was that only slight modifications are applied by +45 +5. Experimental Evaluation +the SGD based training algorithm (ADAM). The mean, max and sum of weight updates as +displayed in Figures 5.8 to 5.10, however, do not show such a clear pattern. The biggest +change happens as expected in the first epoch after the weights are initialized. The change +from augmented training to non-augmented training was at epoch 156 to epoch 157 +It can be observed, that layers which receive more input feature maps get larger weight +updates in mean. As layers which are closer to the output take more input feature maps, +their weight updates are larger. This pattern does not occur when SGD is used as the +optimizer. +Figure 5.8.: Mean weight updates of the baseline model between epochs by layer. +46 +5.1. Baseline Model and Training setup +Figure 5.9.: Maximum weight updates of the baseline model between epochs by layer. +Figure 5.10.: Sum of weight updates of the baseline model between epochs by layer. + 47 +5. Experimental Evaluation +5.2. Confusion Matrix Ordering +The visualization of the confusion matrix can give valuable information about which part +of the task is hard. For more than about 10 classes, however, it becomes hard to visualize +and read. +For CIFAR-10, the proposed method groups the four ob ject classes and the six animal +classes together (see Figure 5.11a). +(a) CIFAR-10 Test set (b) Random +Figure 5.11.: Figure 5.11a shows an ordered confusion matrix of the CIFAR-10 dataset. The diagonal +elements are set to 0 in order to make other elements easier to see. +Figure 5.11b shows a confusion matrix with random mistakes. +The first image of Figure 5.12 shows one example of a classifier with only 97.13 % test +accuracy where a good permutation was found. Please note that this is not the best classifier. +The confusion matrix which resulted from a baseline classifier with99.32 % test accuracy is +displayed in as the second image. +Those results suggest that the ordering of classes is a valuable tool to make patterns easier +to see. Humans, however, are good at finding patterns even if they come from random noise. +Hence, for comparison, a confusion matrix of a classifier with 30 classes, 60 % accuracy +and 40 % uniformly random errors of a balanced dataset is created, optimized according to +Equation (4.1) and shown in Figure 5.11b. It clearly looks different than Figure 5.11a. +On the HASYv2 dataset the class-ordering is necessary to see anything as most possible +confusions do not happen. See Figure 5.13 for comparison of the first 50 classes of the +unsorted confusion matrix and the sorted confusion matrix. If confusion matrices of a +maximum size of 50 × 50 are displayed, the ordered method can show only 8 matrices +because the off-diagonal matrices are almost 0. Without sorting, 64 matrices have to be +displayed. +48 +5.2. Confusion Matrix Ordering +Figure 5.12.: The first image shows the confusion matrix for the test of GTSRB set after optimization +to Equation (4.1). The diagonal elements are set to 0 in order to make other elements +easier to see. The symbols next to the label on the vertical axis indicate the shape +and the color of the signs. +The second image shows the same, but with baseline model. +Best viewed in electronic form. 49 +Figure 5.13.: The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal +elements are set to 0 in order to make other elements easier to see. The top image +shows arbitrary class ordering, the bottom image shows the optimized ordering. +5.3. Spectral Clustering vs CMO +5.3. Spectral Clustering vs CMO +This section evaluates the clustering quality of CMO in comparison to the clustering quality +of spectral clustering. +The evaluated model achieves 70 .50 % training accuracy and 53.16 % test accuracy on +CIFAR-100. Figure 5.14 shows the sorted confusion matrix. +Figure 5.14.: The first 50 entries of the ordered confusion matrix of the CIFAR-100 dataset. The +diagonal elements are set to 0 in order to make other elements easier to see. Best +viewed in electronic form. +CIFAR-100 has pre-defined coarse classes. Those are used as a ground truth for the clusters +which are to be found. The number of errors is determined by(i)Join alln clusters which +contain the classes of the coarse classC to a set M . The error is n .(ii)Within M , find the +set of classes M − + which do not belong to C .(iii)The final error is n + | M − + |. As can be +seen in Table 5.4, both clustering methods find reasonable clusters. CMO, however, has +only half the error of spectral clustering. +The results for the HASYv2 dataset are qualitatively similar (see Table 5.5). It should be +noted that the number of clusters was determined by using the semi-automatic method +based on CMO as described in Section 4.2. + 51 +5. Experimental Evaluation +Cluster Spectral clustering Errors CMO Errors +fish aquarium fish, orchid + flatfish ++ ray, shark + trout, lion 5 aquarium fish, orchid + flatfish ++ ray + shark, trout 4 +flowers orchid, aquarium fish + sun- +flower + poppy, tulip + rose, +train 5 + orchid, aquarium fish + sun- +flower, poppy, tulip, rose 2 +people baby, boy, man + girl + woman 2 baby, boy, girl, woman, man 0 +reptiles crocodile, plain, road, table, +wardrobe + dinosaur + lizard ++ snake, worm + turtle 9 crocodile, lizard, lobster, cater- +pillar + dinosaur + snake + tur- +tle, crab 6 +trees maple, oak, pine + willow, forest ++ palm 3 palm, willow, pine, maple, oak 0 +Total 24 12 +Table 5.4.: Differences in spectral clustering and CMO. Classes in a cluster are separated by , +whereas clusters are separated by +. +Cluster Spectral clustering Errors CMO Errors +A A , A, A 0 A , A, A , Å 1 +B B , B 0 B , B 0 +C C , c, ⊂ and C , ξ , E and C 4 C , c, ⊂ , C and C 1 +D D , D , D , 1 D , D , D 0 +E E and E , ε 2 E and E , ε , , ∈ 4 +F F and F , F 1 F and F , F 1 +H H and H , κ and H 3 H and H , H 1 +K K , κ 0 K , κ 0 +L L, and L, L 1 L, and L, L 1 +M M and M and M 2 M and µ , M and M 3 +N N and N , N and N 2 N and N , N and N , ℵ 3 +O O , O , 0, ◦, °, and o 1 O , O , 0, ◦, ° and and o 2 +P P , P and p, ρ and P and ℘ 3 P and P , P , ℘ and p , ρ 2 +Q Q, Q , Q, ι , , , , , Æ, 1 7 Q and Q , Q 1 +R R , R and R , R, k and 3 R and , R, R, R 1 +S S , s , S 0 S , s , S 0 +T T , and T , τ 1 T , and T , τ 1 +U U , ∪ and u , U , A 1 U , u, U , A and ∪ 2 +V V , v , ∨ 0 V , v , ∨ 0 +W W , w , ω 0 W , w and ω 1 +X X , x , X , χ , × 0 X , x , X , χ, × 0 +Y Y and y 1 Y , y 0 +Z Z , z , Z and Z, Z 1 Z , z , Z, Z , Z 0 +Total 34 25 +Table 5.5.: Differences in spectral clustering and CMO. +52 +5.4. Hierarchy of Classifiers +5.4. Hierarchy of Classifiers +In a first step, a classifier is trained on the 100 classes of CIFAR-100. The fine-grained root +classifier achieves an accuracy of 65.29 % with test-time transformations. The accuracy on +the found sub-classes are listed in Table 5.6. The fact that the root classifier achieves better +results within a cluster than the specialized leaf classifiers in 13 of 14 cases could either +be due to limited training data, overfitting or the small size of 32 px × 32 px of the data. +The experiment also shows that most of the errors are due to not identifying the correct +cluster. Hence, in this case, more work in improving the root classifier is necessary rather +than improving the discrimination of classes within a cluster. +Although the classes within a cluster capture most of the classifications, many misclassifica- +tions happen outside of the clusters. For example, in cluster 3, a perfect leaf classifier would +push the accuracy in the ful l column only to 63.50 % due to errors of the root classifier +where the root classifier does not predict the correct cluster. +The leaf classifiers use the same topology as the root classifier. By initializing them with +the root classifiers weights their performance can be pushed at about the inner accuracy. +They are, however, only useful if their accuracy is well above theinner accuracy of the root +classifier. Hence, for CIFAR-100, building hierarchies of classifiers is not useful. +Cluster Classes accuracy +root classifier leaf classifier +cluster identified class identified | cluster class identified | cluster +1 3 69.67 % 84.27 % 72.98 % +2 5 46.60 % 58.54 % 43.47 % +3 2 58.50 % 92.13 % 83.46 % +4 2 50.50 % 87.83 % 81.74 % +5 3 44.67 % 79.29 % 71.01 % +6 2 29.50 % 78.67 % 72.00 % +7 2 52.50 % 92.11 % 87.72 % +8 2 59.50 % 86.23 % 81.88 % +9 2 59.00 % 90.08 % 87.79 % +10 2 62.00 % 85.52 % 73.10 % +11 2 67.00 % 87.01 % 75.32 % +12 2 72.50 % 94.77 % 76.77 % +13 2 64.00 % 82.58 % 86.27 % +14 2 79.67 % 89.85 % 89.10 % +Table 5.6.: Accuracies of the root classifier trained on the full set of 100 classes evaluated on +14 clusters of classes. Each class has 100 elements to test. The columncluster identified +gives the percentage that the root classifiers argmax prediction is within the correct +cluster, but not necessarily the correct class. The columnsclass identified | cluster only +consider data points where the root classifier correctly identified the cluster. + 53 +5. Experimental Evaluation +5.5. Increased width for faster learning +More filters in one layer could simplify the optimization problem as each filter needs smaller +updates. Hence a CNN N with n + i filters in layer i is expected to take more epochs than a +CNN N + with 2 · n + i filters in layer i to achieve the same validation accuracy. +This hypothesis can be falsified by training a CNN N and a CNN N + and comparing the +trained number of epochs. As more filters can lead to different results depending on the +layer where they are added, five models are trained. The details about those models are +given in Table 5.7 + Name Layer Filter count Total +Baseline New parameters +m + 9 9 64 638 5 978 566 +m +9 9 64 974 8 925 622 +m + 11 11 512 3786 5 982 698 +m +11 11 512 1024 1 731 980 +m + 13 13 512 8704 5 982 092 +Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer +was increased. +The detailed results are given in Table 5.8. As expected, the number of training epochs of +the models with increased numbers of parameters is lower. The wall-clock time, however, is +higher due to the increase in computation per forward- and backward-pass. +For m +9 , m + 11 and m +13 , the filter weight range of the layer with increased capacity decreases +compared to Figure 5.6, the filter weights of the layer with increased capacity are more +concentrated around zero compared to Figure 5.2. For model m +13 , the distribution of +weight of the output layer changed to a more bell-shaped distribution. Except for this, the +distribution of filter weights in other layers did not change for all three models compared to +the baseline. +Model Parameters Accuracy Training +Single Model Ensemble Mean Epochs Mean Time +Mean std +baseline 944 012 63.38 % 0.55 64.70 % 154.7 3856 s +m + 9 5 978 566 65.53 % 0.37 66.72 % 105.7 4472 s +m +9 8 925 622 65.10 % 1.09 66.54 % 95.6 5261 s +m +11 5 982 698 65. 73 % 0.77 67. 38 % 149.2 5450 s +m +11 1 731 980 62.12 % 0.48 62.89 % 143.6 3665 s +m +13 5 982 092 62.39 % 0.66 63.77 % 147.8 4485 s +Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m +9 , m +11 , m + 13 +as well as their accuracies. +54 +5.6. Weight updates +5.6. Weight updates +Section 5.5 shows that wider networks learn faster. One hypothesis why this happens is +that every single weight updates can be smaller to learn the same function. Thus the loss +function is smoother and thus gradient descent based optimization algorithms lead to more +consistent weight updates. +Consequently, it is expected that layers with fewer filters have more erratic updates. If +there are many filters, the weights of a filter which does not contribute much to the end +results or is even harmful filter can gradually be set to zero, essentially removing one path +in the network. +In order to test the hypothesis, the baseline model was adjusted. The number of filters in +layer 5 was reduced from 64 filters to 3 filters. As one can see in Figure 5.15, the mean +weight update of the layers 1, 3, 5, 7 and 9 have a far bigger range than the layers 11, 13 and +15 after epoch 50. Compared to the baseline models mean updates (Figure 5.8, Page 46), +the mean weight updates of layers 1 and 3 are higher, the range of the mean weight update +from epoch 50 is higher for layer 5 and the range of mean updates of layer 7 is higher. +For the maximum and the sum, no similar pattern could be observed (see Figures A.3 +and A.4). +Figure 5.15.: Mean weight updates between epochs by layer. The model is the baseline model, but +with layer 5 reduced to 3 filters. + 55 +5. Experimental Evaluation +5.7. Multiple narrow layers vs One wide layer +On a given feature map size one can have an arbitrary number of convolutional layers with +SAME padding and each layer can have an arbitrary number of filters. A convolutional layer +with more filters is called wider [ZK16], a convolutional layer with fewer filters is thus called +narrower and the number of filters in a convolutional layer is the layers width. +If the number of parameters which may be used for the feature map scale is fixed and high +enough, there are still many combinations. If n + i with i = 0, . . . , k is the number of output +feature maps of layer i where i = 0 is the input layer and all filters are 3 × 3 filters without +a bias, then the number of parameters is +Parameters = k + +i =1 + (n + i − 1 · 3 2 + + 1) · n + i +Hence the width of one layer does not only influence the parameters in this layer, but also +in the next layer. +The number of possible subsequent layers of one feature map size is enormous, even if +constraints are placed on the number of parameters. For example, the first convolutional +layer of the baseline model has 896 parameters. If one assumes that less than 3 filters per +layer are not desirable, one keeps all layers having a bias and all layers only use3 × 3 filters, +then the maximum depth is 10. If one furthermore assumes that at least 800 parameters +should be used, there are still 120 possible layer combinations. As experimentally evaluating +one layer combination takes about 10 hours on a GTX 970 for CIFAR-100 it is not possible +to evaluate all layer combinations. In the following, a couple of changes to the network +width / depth will be evaluated. +Each layer expands the perceptive field. Hence deeper layer can use more of the input for +every single output value. But deeper networks need more time for inference as the output +of layer i has to be computed before the output of i + 1 can be computed. Hence there is +less potential to parallelize computations. Each filter can be seen as a concept which can +be learned. The deeper the filter is in the network, the higher is the abstraction level of the +concept. In most cases, both is necessary: Many different concepts (width) and high-level +concepts (depth). +Reducing the two first convolutional layers of the baseline model (see Page 39) to one +convolutional layer of 48 filters ( 944 396 parameters in total, whereas the baseline model +has 944 012 parameters) resulted in a mean accuracy of 61.64 % (- 1. 74 %) and a standard +deviation of σ = 1 .12 (+0.57). The ensemble achieved 63.18 % (- 1 .52 %). As expected, +the training time per epoch was reduced. For the GTX 980, it was reduced from 22.0 s of +the baseline model to 15 s of the model with one less convolutional layer, one less Batch +Normalization and one less activation layer. The inference time was also reduced from6 ms +56 +5.8. Batch Normalization +to 4 ms for 1 image and from 32 ms to 23 ms for 128 images. Due to the loss in accuracy of +more then one percentage point of the mean model and the increased standard deviation of +the models performance, at least two convolutional layers are on the 32 px × 32 px feature +map scale are recommendable for CIFAR-100. +Changing the baseline to have less filters but more layers is another option. This was tried +for the first block at the 32 px × 32 px feature map scale. The two convolutional layers +(layers 1 – 4 in Page 39) were replaced by two convolutional layers with 27 filters and one +convolutional layer with 26 filters in the convolution - BN - ELU pattern. The model +has 944 132 parameters. Compared to the baseline model, the time for inference was the +same. This is unexpected, because the inference time changed when a layer was removed at +this scale. The mean test accuracy was 63. 66 % (+0.28) and the standard deviation was +σ = 1.03 (+0.48). The ensemble achieved 64.91 % test accuracy (+0.21). +Having two nonlinearities at each feature map scale could be important to learn nonlinear +transformations at that scale. As the baseline model does only have one nonlinearity at the +8 × 8 feature maps scale, another convolutional layer with 64 filters, Batch Normalization +and ELU was added. To keep the number of parameters constant, layer 11 of the baseline +model was reduced from 512 filters to 488 filters. The new model achieves a mean accuracy +of 63. 09 % (-0.29) with a standard deviation of σ = 0 . 70 (+0.15). The ensemble achieves +an accuracy of 64 .39 % (+0.31). This could indicate that having two convolutional layers +is more important for layers close to the input than intermediate layer. Alternatively, the +parameters could be more important in layer 11 than having a new convolutional layer after +layer 9. +In order to control the hypothesis that having two convolutional layers are less important in +the middle of a network, the second convolutional layer at the16 × 16 feature map scale is +removed. The first convolutional layer was increased from 32 filters to 59 filters, the second +convolutional layer was increased from 32 filter s to 58 filters in order to keep the amount of +parameters of the model constant. The adjusted model achieved 62. 72 % (-0.66) mean test +accuracy with a standard deviation of σ = 0 .84 (+0.29). The ensemble achieved 63.88 % +test accuracy (-0.66). +Even more extreme, if both convolutional layers are removed from the16 × 16 feature map +scale, the mean test accuracy drops to61 .21 % (-2.17) with a standard deviation ofσ = 0.51 +(-0.04). The ensemble achieves a test accuracy of63.07 % (-1.63). Thus it is very important +to have at least one convolutional layer at this feature map scale. +5.8. Batch Normalization +In [CUH15], the authors write that Batch Normalization does not improve ELU networks. +Hence the effect of removing Batch Normalization from the baseline is investigated in this +57 +5. Experimental Evaluation +experiment. +As before, 10 models are trained on CIFAR-100. The training setup and the modelm +no-bn +are identical to the baseline model m, except that in m +no-bn the Batch Normalization layers +are removed. +One notable difference is the training time: While m needs 21 ms per epoch in average on +a GTX 980, m +no-bn only needs 21 ms per epoch. The number of epochs used for training, +however, also increased noticeably from 149 epochs to 178 epochs in average. The standard +deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs form +no-bn . +The mean accuracy of m +no-bn is 62. 86 % and hence 0.52 percentage points worse. The +standard deviation between models increased from 0.55 to 0.61. This is likely a result of the +early stopping policy and the differences in training epochs. This can potentially be fixed +by retraining the models which stopped earlier than the model which was trained for the +biggest amount of epochs. The ensemble test accuracy is63.88 % and hence 0.82 percentage +points worse than the baseline. +The filter weight range and distribution is approximately the same as Figure 5.6 and +Figure 5.2, but the distribution of bias weights changed noticeably: While the bias weights of +the baseline are spread out in the first layer and much more concentrated in subsequent layers +(see Figure 5.3), the model without Batch Normalization has rather concentrated weights +in the first layers and only the bias weights of the last layer is spread out (see Figure A.2). +Another model m +no-bn which has one more filter in the convolutional layer 1, 3, 5, and 7 to +compensate for the loss of parameters in Batch Normalization. The mean test accuracy of +10 such models is 62.87 % which is 0.51 percentage points worse than the baseline. The +ensemble of m +no-bn achieves 64.33 % which is 0.37 percentage points worse than the baseline. +The mean training time was 14 s per epoch and 157.4 epochs with a standard deviation of +20.7 epochs. +Hence it is not advisable to remove Batch Normalization for the final model. It could, +however, be possible to remove Batch Normalization for the experiments to iterate quicker +through different ideas if the relative performance changes behave the same with or without +Batch Normalization. +58 +5.9. Batch size +5.9. Batch size +The mini-batch size m ∈ N + ≥1 influences +• Epochs until convergence : The smaller m, the more often the model is updated +in one epoch. Those updates, however, are based on fewer samples of the dataset. +Hence the gradients of different mini-batches can noticeably differ. In the literature, +this is referred to as gradient noise [KMN + + 16]. +• Training time per epoch + : The smaller the batch size, the higher the training time +per epoch as the hardware is not optimally utilized. +• Resulting model quality : The choice of the hyperparameter m influences the +accuracy of the classifier when training is finished. [KMN+ + 16] supports the view that +smaller m result in less sharp minima. Hence smaller m lead to better generalization. +Empiric evaluation results can be found in Table 5.9. Those results confirm the claim +of [KMN + + 16] that lower batch sizes generalize better. +m Training + Epochs Mean total Single model Ensemble +time training time Accuracy std Accuracy +8 118 s +epoch 81 – 153 14 131 s 61 .93 % σ = 1.03 65.68 % +16 62 s +epoch 103 – 173 8349 s 64 . 16 % σ = 0.81 66. 98 % +32 35 s +epoch 119 – 179 5171 s 64 .11 % σ = 0.75 65.89 % +64 25 s +epoch 133 – 195 2892 s 63. 38 % σ = 0. 55 64.70 % +128 18 s +epoch 145 – 239 3126 s 62 .23 % σ = 0.73 63.55 % +Table 5.9.: Training time per epoch and single model test set accuracy (mean and standard deviation) +of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on +CIFAR-100. +5.10. Bias +Figure 5.3 suggests that the bias is not important for the layers 11, 13 and 15. Hence a +model m +no-bias is created which is identical to the baseline modelm, except that the bias of +layers 11, 13 and 15 is removed. +The mean test accuracy of 10 trained m +no-bias is 63.74 % which is an improvement of +0.36 percentage points over the baseline. The ensemble achieves a test accuracy of65.13 % +which is 0.43 percentage points better than the baseline. Hence the bias can safely be +removed. +Removing the biases did not have a noticeable effect on the filter weight range, the filter +weight distribution or the distribution of the remaining biases. Also, theγ and β parameters +of the Batch Normalization layers did not noticeably change. + 59 +5. Experimental Evaluation +5.11. Learned Color Space Transformation +In [MSM16] it is described that placing one convolutional layer with 10 filters of size 1 × 1 +directly after the input and then another convolutional layer with 3 filters of size1 × 1 acts +as a learned transformation in another color space and boosts the accuracy. +This approach was evaluated on CIFAR-100 by adding a convolutional layer with ELU ac- +tivation and 10 filters followed by another convolutional layer with ELU activation and +3 filters. The mean accuracy of 10 models was 63.31 % with a standard deviation of 1.37. +The standard deviation is noticeable higher than the standard deviation of the baseline +model (0.55) and the accuracy also decreased by 0.07 percentage points. The accuracy of +the ensemble is at 64.77 % and hence 0.07 percentage points higher than the accuracy of +the baseline models. +The inference time for 1 image and for 128 images did not change compared to the baseline. +The training time per epoch increased from 26 s to 30 s on the GTX 970. +Hence it is not advisable to use the learned color space transformation. +5.12. Pooling +An alternative to max pooling with stride 2 with a2 × 2 kernel is using a 3 × 3 kernel with +stride 2. +This approach was evaluated on CIFAR-100 by replacing all max pooling layers with the +3 × 3 kernel max pooling (and SAME padding). The mean accuracy of 10 models was63. 32 % +(−0.06) and the standard deviation was 0.57 (+0. 02). The ensemble achieved 65.15 % test +accuracy ( +0 .45). +The training time per epoch decreased from20.5 s-21 .1 s to 18. 6 s (mean of 10 training runs) +on the Nvidia GTX 970. The time for inference increased from 25 ms to 26 ms for a batch +of 128 images. +5.13. Activation Functions +Nonlinear, differentiable activation functions are important for neural networks to allow them +to learn nonlinear decision boundaries. One of the simplest and most widely used activation +functions for CNNs is ReLU [ KSH12], but others such as ELU [ CUH15], parametrized +rectified linear unit (PReLU) [ HZRS15b], softplus [ ZYL+ + 15] and softsign [ BDLB09 ] have +been proposed. The baseline uses ELU. +60 +5.13. Activation Functions +Activation functions differ in the range of values and the derivative. The definitions and +other comparisons of eleven activation functions are given in Table B.3. +Theoretical explanations why one activation function is preferable to another in some +scenarios are the following: +• Vanishing Gradient : Activation functions like tanh and the logistic function sat- +urate outside of the interval [ −5 , 5] . This means weight updates are very small for +preceding neurons, which is especially a problem for very deep or recurrent networks as +described in [BSF94]. Even if the neurons learn eventually, learning is slower [KSH12]. +• Dying ReLU: The dying ReLU problem is similar to the vanishing gradient problem. +The gradient of the ReLU function is 0 for all non-positive values. This means if all +elements of the training set lead to a negative input for one neuron at any point in the +training process, this neuron does not get any update and hence does not participate +in the training process. This problem is addressed in [MHN13]. +• Mean unit activation: Some publications like [ CUH15, IS15] claim that mean +unit activations close to 0 are desirable. They claim that this speeds up learning +by reducing the bias shift effect. The speedup of learning is supported by many +experiments. Hence the possibility of negative activations is desirable. +Those considerations are listed in Table 5.10 for 11 activation functions. Besides the +theoretical properties, empiric results are provided in Tables 5.11 and 5.12. The baseline +network was adjusted so that every activation function except the one of the output layer +was replaced by one of the 11 activation functions. +As expected, PReLU and ELU performed best. Unexpected was that the logistic function, +tanh and softplus performed worse than the identity and it is unclear why the pure-softmax +network performed so much better than the logistic function. One hypothesis why the +logistic function performs so bad is that it cannot produce negative outputs. Hence the +logistic− + function was developed: +logistic− + ( x) = 1 +1 + e− x − 0 .5 +The logistic− + function has the same derivative as the logistic function and hence still suffers +from the vanishing gradient problem. The network with the logistic− + function achieves an +accuracy which is 11.30 % better than the network with the logistic function, but is still +5.54 % worse than the ELU. +Similarly, ReLU was adjusted to have a negative output: +ReLU− + (x ) = max(−1 , x) = ReLU (x + 1) − 1 +The results of ReLU− + are much worse on the training set, but perform similar on the test +61 +5. Experimental Evaluation +set. The result indicates that the possibility of hard zero and thus a sparse representation +is either not important or similar important as the possibility to produce negative outputs. +This contradicts [GBB11, SMGS14]. +A key difference between the logistic − + function and ELU is that ELU does neither suffers +from the vanishing gradient problem nor is its range of values bound. For this reason, the +S2ReLU activation function, defined as +S2ReLU( x ) = ReLU ( x +2 + 1) − ReLU (− x +2 + 1) =  + + + + + + + +− x +2 + 1 if x ≤ −2 +x if − 2 ≤ x ≤ 2 +x +2 + 1 if x > −2 +This function is similar to SReLUs as introduced in [JXF + + 16]. The difference is that S2ReLU +does not introduce learnable parameters. The S2ReLU was designed to be symmetric, be +the identity close to zero and have a smaller absolute value than the identity farther away. +It is easy to compute and easy to implement. +Those results — not only the absolute values, but also the relative comparison — might +depend on the network architecture, the training algorithm, the initialization and the +dataset. Results for MNIST can be found in Table 5.13 and for HASYv2 in Table A.2. For +both datasets, the logistic function has a much shorter training time and a noticeably lower +test accuracy. +Function Vanishing Gradient Negative Activation possible Bound activation +Identity No Yes No +Logistic + Yes No Yes +Logistic− + Yes Yes Yes +Softmax Yes Yes Yes +tanh Yes Yes Yes +Softsign + Yes Yes Yes +ReLU Yes 1 + No Half-sided +Softplus No No Half-sided +S2ReLU No Yes No +LReLU/PReLU No Yes No +ELU No Yes No +Table 5.10.: Properties of activation functions. +1 + The dying ReLU problem is similar to the vanishing gradient problem. +62 +5.13. Activation Functions +Function Single model Ensemble of 10 +Training set Test set Training set Test set +Identity 66.25 % σ = 0 . 77 56 .74 % σ = 0. 51 68.77 % 58 .78 % +Logistic 51.87 % σ = 3.64 46 .54 % σ = 3. 22 61.19 % 54 .58 % +Logistic− + 66.49 % σ = 1.99 57 .84 % σ = 1. 15 69.04 % 60 .10 % +Softmax 75.22 % σ = 2.41 59 .49 % σ = 1. 25 78.87 % 63 .06 % +Tanh 67. 27 % σ = 2.38 55 .70 % σ = 1. 44 70.21 % 58 .10 % +Softsign 66. 43 % σ = 1.74 55 .75 % σ = 0. 93 69.78 % 58 .40 % +ReLU 78. 62 % σ = 2.15 62 .18 % σ = 0. 99 81.81 % 64 .57 % +ReLU− + 76. 01 % σ = 2.31 62 .87 % σ = 1. 08 78.18 % 64 .81 % +Softplus 66. 75 % σ = 2.45 56 .68 % σ = 1. 32 71.27 % 60 .26 % +S2ReLU 63. 32 % σ = 1.69 56 .99 % σ = 1. 14 65.80 % 59 .20 % +LReLU 74. 92 % σ = 2.49 61 .86 % σ = 1. 23 77.67 % 64 .01 % +PReLU 80 .01 % σ = 2.03 62 .16 % σ = 0. 73 83. 50 % 64. 79 % +ELU 76. 64 % σ = 1.48 63. 38 % σ = 0. 55 78.30 % 64 .70 % +Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation +functions on CIFAR-100. For LReLU, α = 0.3 was chosen. +Function Inference per Training + Epochs Mean total +1 Image 128 time training time +Identity 8 ms 42 ms 31 s +epoch 108 – 148 3629 s +Logistic 6 ms 31 ms 24 s +epoch 101 – 167 2234 s +Logistic− + 6 ms 31 ms 22 s +epoch 133 – 255 3421 s +Softmax 7 ms 37 ms 33 s +epoch 127 – 248 5250 s +Tanh 6 ms 31 ms 23 s +epoch 125 – 211 3141 s +Softsign 6 ms 31 ms 23 s +epoch 122 – 205 3505 s +ReLU 6 ms 31 ms 23 s +epoch 118 – 192 3449 s +Softplus 6 ms 31 ms 24 s +epoch 101 – 165 2718 s +S2ReLU 5 ms 32 ms 26 s +epoch 108 – 209 3231 s +LReLU 7 ms 34 ms 25 s +epoch 109 – 198 3388 s +PReLU 7 ms 34 ms 28 s +epoch 131 – 215 3970 s +ELU 6 ms 31 ms 23 s +epoch 146 – 232 3692 s +Table 5.12.: Training time and inference time of adjusted baseline models trained with different +activation functions on GTX 970 GPUs on CIFAR-100. It was expected that the +identity is the fastest function. This result is likely an implementation specific problem +of Keras 2.0.4 or Tensorflow 1.1.0. + 63 +5. Experimental Evaluation +Function Single model Ensemble Epochs +Accuracy std Accuracy Range Mean +Identity 99.45 % σ = 0. 09 99.63 % 55 – 77 62.2 +Logistic 97.27 % σ = 2. 10 99.48 % 37 – 76 54.5 +Softmax 99.60 % σ = 0. 03 99.63 % 44 – 73 55.6 +Tanh 99.40 % σ = 0.09 99.57 % 56 – 80 67.6 +Softsign 99.40 % σ = 0.08 99.57 % 72 – 101 84.0 +ReLU 99. 62 % σ = 0.04 99. 73 % 51 – 94 71.7 +Softplus 99.52 % σ = 0.05 99.62 % 62 – 70 68.9 +PReLU 99.57 % σ = 0.07 99. 73 % 44 – 89 71.2 +ELU 99.53 % σ = 0.06 99.58 % 45 – 111 72.5 +Table 5.13.: Test accuracy of adjusted baseline models trained with different activation functions +on MNIST. +5.14. Label smoothing +Ensembles consisting of n models trained by the same procedure on the same data but +initialized with different weights and trained with a different order of the training data +perform consistently better than single models. One drawback of ensembles in applications +such as self-driving cars is that they increase the computation by a factor of n . One idea +why they improve the test accuracy is by reducing the variance. +The idea of label smoothing is to use the ensemble prediction of the training data as labels +for another classifier. For every element x of the training set, the one-hot encoded target +t (x ) is smoothed by the ensemble prediction y + E (x ) +t + (x ) = α · t (x ) + (1 − α )y + E (x ) +where α ∈ [0, 1] is the smoothing factor. +There are three reasons why label smoothing could be beneficial: +• Training speed : The ensemble prediction contains more information about the +image than binary class decisions. Classifiers in computer vision predict how similar +the input looks to other input of the classes they are trained on. By smoothing the +labels, the information that one image could also belong to another class is passed to +the optimizer. In early stages of the optimization this could lead to a lower loss on +the non-smoothed validation set. +• Higher accuracy : Using smoothed labels for the optimization could lead to a higher +accuracy of the base-classifier due to a smoothed error surface. It might be less likely +64 +5.14. Label smoothing +that the classifier gets into bad local minima. +• Label noise : Depending on the way how the labels are obtained, it might not always +be clear which label is the correct one. Also, labeling errors can be present in training +datasets. Those errors severely harm the training. By smoothing the labels errors +could be relaxed. +10 models m + smooth are trained with the α = 0 . 5 smoothed labels from the prediction +of an ensemble of 10 baseline models. The mean accuracy of the models trained on the +smoothed training set labels was63.61 % (+0. 23 %) and the standard deviation wasσ = 0.72 +(+0 .17 %). The ensemble of 10 m + smooth models achieved 64 .79 % accuracy (+0 .09 %). Hence +the effect of this kind of label smoothing on the final accuracy is questionable. +The training speed didn’t noticeably change either: The number of trained epochs ranged +from 144 to 205, the mean number of epochs was 177. The baseline training ranged from +146 to 232 epochs with a mean of 174 epochs. After 10, 30 and 80 epochs both training +methods accuracy differed by less than one percentage point. Hence it is unlikely that label +smoothing has a positive effect on the training speed. +Hinton et al. called this method distil lation in [ HVD15]. Hinton et al. used smooth and +hard labels for training, this work only used smoothed labels. + 65 +5. Experimental Evaluation +5.15. Optimized Classifier +In comparison to the baseline classifier, the following changes are applied to the optimized +classifier: +• Remove the bias for the last layers: For all layers which output a 1 × 1 feature +map, the bias is removed +• Increase the max pooling kernel to 3 × 3 +• More filters in the first layers +The detailed architecture is given in Table 5.14 and visualized in Figure 5.16. The evaluation +is given in Table 5.15 and the timing comparison is given in Table 5.16. +# Type Filters @ +Patch size / stride Parameters FLOPs Output size +Input 0 0 3 @ 32 × 32 +1 Convolution 69 @ 3 × 3 × 3 / 1 1 932 3 744 768 69 @ 32 × 32 +2 BN + ELU 138 353 418 69 @ 32 × 32 +3 Convolution 69 @ 3 × 3 × 32 / 1 42 918 37 684 096 69 @ 32 × 32 +4 BN + ELU 138 353 418 69 @ 32 × 32 +Max pooling 2 × 2 / 2 0 40 960 32 @ 16 × 16 +5 Convolution 64 @ 3 × 3 × 32 / 1 39 808 20 332 544 64 @ 16 × 16 +6 BN + ELU 128 82 048 64 @ 16 × 16 +7 Convolution 64 @ 3 × 3 × 64 / 1 36 928 18 857 984 64 @ 16 × 16 +8 BN + ELU 128 82 048 64 @ 16 × 16 +Max pooling 2 × 2 / 2 20 480 64 @ 8 × 8 +9 Convolution 64 @ 3 × 3 × 64 / 1 36 928 4 714 496 64 @ 8 × 8 +10 BN + ELU 128 20 608 64 @ 8 × 8 +Max pooling 2 × 2 / 2 5 120 64 @ 4 × 4 +11 Convolution (v) 512 @ 4 × 4 × 64 / 1 524 288 1 048 064 512 @ 1 × 1 +12 BN + ELU 1 024 3 584 512 @ 1 × 1 +Dropout 0.5 0 0 512 @ 1 × 1 +13 Convolution 512 @ 1 × 1 × 512 / 1 262 144 523 776 512 @ 1 × 1 +14 BN + ELU 1 024 3 584 512 @ 1 × 1 +Dropout 0.5 0 0 512 @ 1 × 1 +15 Convolution k @ 1 × 1 × 512 / 1 512 · k 512 · k k @ 1 × 1 +Global avg Pooling 1 × 1 0 k k @ 1 × 1 +16 BN + Softmax 2 k 7k k @ 1 × 1 + + 514 k ++947 654 520 k ++87 870 996 179 200+2k +Table 5.14.: Optimized architecture with 3 input channels of size 32 × 32. All convolutional layers +use SAME padding, except for layer 11 which used VALID padding in order to decrease +the feature map size to 1 × 1. If the input feature map is bigger than32 × 32, for each +power of two there are two Convolution + BN + ELU blocks and one Max pooling +block added. This is the framed part in the table. +66 +5.15. Optimized Classifier32 × 32Input +C 69@3 × 3 / 1 +BN + ELU +C 69@3 × 3 / 1 +BN + ELU 16 × 16max pooling 3 × 3 /2 +C 64@3 × 3 /1 +BN + ELU +C 64@3 × 3 /1 +BN + ELU 8 × 8max pooling 3 × 3 /2 +C 64@3 × 3 /1 +BN + ELU 4 × 4max pooling 3 × 3 /2 +C* 512@4 × 4 /1 (V) +BN + ELU +Dropout, p = 0. 5 1 × 1C* 512@1 × 1 /1 +BN + ELU +Dropout, p = 0. 5 +C* k @1 × 1 / 1 +Global AVG pooling +BN + Softmax +Figure 5.16.: Architecture of the optimized model. C 32@3 × 3 / 1 is a convolutional layer with +32 filters of kernel size 3 × 3 with stride 1. The * indicates that no bias is used. +Dataset Single Model Accuracy Ensemble of 10 +Training Set Test Set Training Set Test Set +Asirra 95. 83 % σ = 4.70 90.75 % σ = 4. 73 98 . 78 % 93.09 % +CIFAR-10 94. 58 % σ = 0.70 87.92 % σ = 0. 46 96 . 47 % 89.86 % +CIFAR-100 77. 96 % σ = 2.18 64.42 % σ = 0. 73 81 . 44 % 67.03 % +GTSRB 100. 00 % σ = 0.00 99.28 % σ = 0. 10 100 . 00 % 99.51 % +HASYv2 88. 79 % σ = 0.45 85.36 % σ = 0. 15 89 . 36 % 85.92 % +MNIST 99. 88 % σ = 0.10 99.48 % σ = 0. 13 99 . 99 % 99.67 % +STL-10 95. 43 % σ = 3.57 75.09 % σ = 2. 39 98 . 54 % 78.66 % +SVHN 99. 08 % σ = 0.07 96.37 % σ = 0. 12 99 . 50 % 97.47 % +Table 5.15.: Optimized model accuracy on eight datasets. The single model actuary is the 10 models +used in the ensemble. The empirical standard deviationσ of the accuracy is also given. +CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the +models uses unlabeled data or data from other datasets. For MNIST, GTSRB, SVHN +and HASY, no test time transformations are used. +Network GPU Tensorflow Inference per Training +1 Image 128 images time / epoch +Optimized Default Intel i7-4930K 5 ms 432 ms 386 s +Optimized Optimized Intel i7-4930K 4 ms 307 ms 315 s +Optimized Default GeForce 940MX 4 ms 205 ms 192 s +Optimized Default GTX 970 6 ms 41 ms 35 s +Optimized Default GTX 980 3 ms 35 ms 27 s +Optimized Default GTX 980 Ti 6 ms 36 ms 26 s +Optimized Default GTX 1070 2 ms 24 ms 21 s +Optimized Default Titan Black 4 ms 46 ms 43 s +Table 5.16.: Speed comparison of the optimized model on CIFAR-10. The baseline model is +evaluated on six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken +from [ Ma j17]. Weights the baseline model can be found at [ Tho17b]. The optimized +Tensorflow build makes use of SSE4.X, AVX, AVX2 and FMA instructions. + 67 +5. Experimental Evaluation +5.16. Early Stopping vs More Data +A separate validation set is necessary for two reasons:(1)Early stopping and(2)preventing +overfitting due to many experiments. To prevent overfitting, a different dataset can be used. +For example, all decisions about hyperparameters in this thesis are based on CIFAR-100, +but the network is finally trained and evaluated with the same hyperparameters on all +datasets. 2 + The validation set can hence be removed if early stopping is removed. Instead, +the validation data is used in a first run to determine the number of epochs necessary for +training. In a second training run the validation data is added to the training set. The +number of used epochs for the second run is given in Table 5.17. +Dataset Mean epochs Train data classes average data / class +Asirra 60 15 075 2 7538 +MNIST 41 54 000 10 5400 +SVHN 45 543 949 10 54 395 +CIFAR-10 84 45 000 10 4500 +HASYv2 92 136 116 369 369 +GTSRB 97 35 288 43 821 +STL-10 116 4500 10 450 +CIFAR-100 155 45 000 100 450 +Table 5.17.: Mean number of training epochs for the optimized model. For comparison, the total +amount of used training data, the number of classes of the dataset and the average +amount of data per class is given. +Alternatively, the model can be trained with early stopping (ES) purely on the training +loss. All three methods – early stopping on the validation set accuracy, early stopping on +the training loss and training a fixed number of epochs are evaluated. While having more +data helped with Asirra and CIFAR-100, the results as shown in Table 5.18 on the other +datasets are only marginally different. For CIFAR-10, training with more data did not +improve the results when the number of epochs is fixed, but notably improved the results +when the training loss was used as the early stopping criterion. +5.17. Regularization +Stronger regularization might even improve the results when using the training loss as an +early stopping criterion. + 2 regularization with a weighting factor of λ = 0 .0001 is used in +all other experiments. While the accuracy as shown in Table 5.19 does not show a clear +pattern, the number of epochs increases with lower model regularization (see Table 5.20). +2 + Except data augmentation and test time transformations. +3 + Only 1 model is trained due to the long training time of 581 epochs and 12 hours for this model. +4 + Only 3 models are in this ensemble due to the long training time of more than 8 hours per model. +68 +5.17. Regularization +Dataset Early Stopping Fixed epochs +val. acc train loss +Asirra 93. 09 % 96. 01 %3 + 96.01 % +CIFAR-10 89. 86 % 91. 75 % 88 .88 % +CIFAR-100 67. 03 % 71. 01 % 69 .08 % +HASYv2 85. 92 % 82. 89 %4 + 85.05 % +MNIST 99. 67 % 99. 64 % 99 .57 % +STL-10 78. 66 % 83. 25 % 78 .64 % +Table 5.18.: Comparisons of trained optimized models with early stopping on the validation accuracy +compared training setups without a validation set and thus more training data. The +second column uses the training loss as a stopping criterion, the third column uses a +fixed number of epochs which is equal to the mean number of training epochs of the +models with early stopping on the validation set accuracy. +λ Single Model Accuracy Ensemble of 10 +Training Set Test Set Training Set Test Set +λ = 0.01 73. 83 % σ = 1.78 58.94 % σ = 1.33 87 .78 % 69. 98 % +λ = 0.001 82.86 % σ = 0. 89 63.03 % σ = 0.67 91 .86 % 71. 02 % +λ = 0.0001 77.96 % σ = 2. 18 64.42 % σ = 0.73 81 .44 % 67. 03 % +Table 5.19.: Different choices of +2 model regularization applied to the optimized model. +λ min max mean std +λ = 0. 01 457 503 404.6 37.2 +λ = 0. 001 516 649 588.4 41.6 +λ = 0. 0001 579 833 696.1 79.1 +Table 5.20.: Training time in epochs of models with early stopping on training loss by different +choices of +2 model regularization applied to the optimized model. + 69 +5. Experimental Evaluation +70 +6. Conclusion and Outlook +This master thesis gave an extensive overview over the design patterns of CNNs in Chapter 2, +the methods how CNNs can be analyzed and the principle directions of topology learning +algorithms in Chapter 3. +Confusion Matrix Ordering (CMO), originally developed as a method to make visualizations +of confusion matrices easier to read (see Figure 5.13), was introduced as a class clustering +algorithm in Chapter 4 and evaluated in Sections 4.2 and 5.4. The important insights are: +• Ordering the classes in the confusion matrix allows to display the relevant parts even +for several hundred classes. +• A hierarchy of classifiers based on the classes does not improve the results on CIFAR- +100. There are three possible reasons for this: +– 32 px × 32 px is too low dimensional +– 100 classes are not enough for this approach +– More classes are always easier to distinguish if each new class comes with more +data. One reason why this might be the case is that distinguishing the ob ject +from background has similar properties even for different classes. +• + Label smoothing had only a minor effect on the accuracy and no effect on the training +time when a single base classifier was used to train with the smoothed labels by an +ensemble of base classifiers. +A baseline model was defined and evaluated on eight publicly available datasets. The +baselines topology and training setup are described in detail as well as its behavior during +training and properties of the weights of the trained model. +The influence of various hyperparameters is examined in Sections 5.5 to 5.12 for CIFAR-100. +The insights of those experiments are: +• Averaging ensembles of 10 base classifiers of the same architecture and trained with the +same setup consistently improve the accuracy. The amount of improvement depends +on the base classifiers, but the ensemble tends to improve the test accuracy by about +one percentage point. +• Wider networks learn in fewer epochs. This, however, does not mean that the +71 +6. Conclusion and Outlook +wall-clock time is lower due to increased computation in forward- and backward +passes. +• Batch Normalization increases the training time noticeably. For the described ELU +baseline model it also increases accuracy, which contradicts [CUH15]. +• The lower the batch size, the longer the time for each epoch of training and the less +epochs need to be trained. Higher accuracy by lower batch sizes was empirically +confirmed. The batch size, however, can also be too low. +• An analysis of the weights of the baseline indicated that the bias of layers close to +the output layer can be removed. This was experimentally confirmed. +• It could not be confirmed that learned color space transformation, as described +in [MSM16], improves the network. Neither with ELU nor with leaky rectified linear +unit (LReLU) and α = 0. 3. +• It could be confirmed that ELU networks gives better results than any other activation +function on CIFAR-100. For the character datasets MNIST and HASYv2, however, +ReLU, LReLU, PReLU, Softplus and ELU all performed similar. +• Changing the activation functions to the identity had very little impact on the HASYv2 +and MNIST classifiers. Note that those networks are still able to learn nonlinear +decision boundaries due to max-pooling and SAME padding. For CIFAR-100, however, +the accuracy drops by 6 .64 % when ELU is replaced by the identity. +Based on the results of those experiments, an optimized classifier was developed and +evaluated on all eight datasets. +The state of the art of STL-10 was improved from 74.80 % [ ZMGL15] to 78.66 % without +using the unlabeled part of the dataset. The state of the art of HASYv2 was improved +from 81.00 % [ Tho17a] to 85 .92 %, for GTSRB the state of the art was improved from +99. 46 % [ SL11] to 99.51 %, for Asirra it was improved from 82 .7 % [ Gol08] to 93 .09 %. 1 +This was mainly achieved by the combination of ELU, Dropout, ensembles, training data +augmentation and test-time transformations. The removal of the bias of layers close to the +output and re-usage of those parameters in layers close to the input as well as using 3 × 3 +pooling instead of 2 × 2 pooling improved the baseline. +While writing this masters thesis, several related questions could not be answered: +• Deeper CNNs have generally higher accuracy, if trained long enough and if overfitting +is not a problem. But at which subsampling-level does having more layers have the +biggest effect? Can this question be answered before a deeper network is trained? +• Is label smoothing helpful for noisy labels? +1 + The baseline is better than the optimized model on Asirra and on HASYv2. +72 +• How does the choice of activation functions influence residual architectures? Could the +results be the same for different activation functions in architectures with hundreds +of layers? +• The results for the pooling kernel were inconclusive. Larger pooling kernels might be +advantageous as well as fractional max pooling [Gra15]. +• Why is the mean weight update (see Figure 5.8) not decreasing? Is this an effect that +can and should be fixed? +• Why is softmax so much better than the logistic function? Can the reason be used to +further improve ELU? +Besides those questions, the influence of optimizers on time per epoch, epochs until +convergence, total training time, memory consumption, accuracy of the models and standard +deviation of the models was not evaluated. This, and the stopping criterion for training +might be crucial for the models quality. + 73 +74 +A. Figures, Tables and Algorithms +(a) Original image (b) Smoothing filter (c) Laplace edge detection filter +(d) Sobel edge detection filter (e) Prewitt edge detection filter (f ) Canny filter +Figure A.1.: Examples of image filters. Best viewed in electronic form. +Layer 99-percentile interval +filter bias +1 [-0.50, 0.48] [-0.06, 0.07] +3 [-0.21, 0.19] [-0.07, 0.07] +5 [-0.20, 0.17] [-0.07, 0.05] +7 [-0.15, 0.14] [-0.05, 0.06] +9 [-0.14, 0.15] [-0.04, 0.03] +11 [-0.08, 0.08] [-0.00, 0.00] +13 [-0.08, 0.08] [-0.00, 0.00] +15 [-0.10, 0.11] [-0.01, 0.01] +Table A.1.: 99-percentile intervals for filter weights and bias weights by layer of a baseline model +trained on CIFAR-100. + 75 +Figure A.2.: The distribution of bias weights of a model without batch normalization trained on +CIFAR-100. +Algorithm 1 Simulated Annealing for minimizing Equation (4.1). +Require: C ∈ N n×n + , steps ∈ N, T ∈ R+ + , c ∈ (0, 1) +procedure SimulatedAnnealing (C , steps, T , c) +bestScore ← accuracy (C ) +bestC ← C +for i = 0; i < steps; i ← i + 1 do +p ← randomFloat(0, 1) +if p < 0 .5 then Swap rows +i ← randomInteger (1, . . . , n ) +j ← randomInteger (1, . . . , n ) \ { i } +p ← randomUniform (0, 1) +C + ← swap (C, i, j ) +s ← accuracy (C + ) +if p < exp( s −bestScore +T ) then +C ← C +if s > bestScore then +bestScore ← s +bestC ← C +T ← T · c +else Move Block +s ← randomInteger (1, . . . , n ) Block start +e ← randomInteger ( s, . . . , n ) Block end +i ← randomInteger (1, . . . , n − (e − s)) Block insert position +Move Block (s, . . . , e) to position i +return bestM +76 +Figure A.3.: Maximum weight updates between epochs by layer. The model is the baseline model, +but with layer 5 reduced to 3 filters. +Function Single model Ensemble of 10 Epochs +Training set Test set Train Test Range Mean +Identity 87.92 % σ = 0.40 84 . 69 % σ = 0.08 88 .59 % 85 . 43 % 92 – 140 114.5 +Logistic 81.46 % σ = 5.08 79 . 67 % σ = 4.85 86 .38 % 84 . 60 % 58 – 91 77.3 +Softmax 88.19 % σ = 0.31 84 . 70 % σ = 0.15 88 .69 % 85 . 43 % 124 – 171 145.8 +Tanh 88.41 % σ = 0.36 84 . 46 % σ = 0.27 89 .24 % 85 . 45 % 89 – 123 108.7 +Softsign 88.00 % σ = 0.47 84 . 46 % σ = 0.23 88 .77 % 85 . 33 % 77 – 119 104.1 +ReLU 88.93 % σ = 0.46 85 . 35 % σ = 0.21 89 .35 % 85 . 95 % 96 – 132 102.8 +Softplus 88.42 % σ = 0. 29 85. 16 % σ = 0.15 88 .90 % 85 . 73 % 108 – 143 121.0 +LReLU 88.61 % σ = 0.41 85 . 21 % σ = 0 . 05 89.07 % 85 . 83 % 87 – 117 104.5 +PReLU 89. 62 % σ = 0.41 85 .35 % σ = 0.17 90. 10 % 86. 01 % 85 – 111 100.5 +ELU 89.49 % σ = 0.42 85 .35 % σ = 0.10 89 .94 % 86 . 03 % 73 – 113 92.4 +Table A.2.: Test accuracy of adjusted baseline models trained with different activation functions on +HASYv2. For LReLU, α = 0.3 was chosen. + 77 +Figure A.4.: Sum of weight updates between epochs by layer. The model is the baseline model, but +with layer 5 reduced to 3 filters. +Function Single model Ensemble of 10 Epochs +Training set Test set Train Test Range Mean +Identity 87 .49 % σ = 2. 50 69 .86 % σ = 1.41 89 .78 % 71 .90 % 51 – 65 53.4 +Logistic 45.32 % σ = 14.88 40 .85 % σ = 12.56 51 . 06 % 45 .49 % 38 – 93 74.6 +Softmax 87.90 % σ = 3. 58 67 .91 % σ = 2.32 91 . 51 % 70 .96 % 108 – 150 127.5 +Tanh 85.38 % σ = 4. 04 67 .65 % σ = 2.01 90 . 47 % 71 .29 % 48 – 92 65.2 +Softsign 88.57 % σ = 4. 00 69 .32 % σ = 1.68 93 . 04 % 72 .40 % 55 – 117 83.2 +ReLU 94.35 % σ = 3. 38 71 .01 % σ = 1.63 98 . 20 % 74 .85 % 52 – 98 75.5 +Softplus 83.03 % σ = 2.07 68 .28 % σ = 1.74 93 . 04 % 75 .99 % 56 – 89 68.9 +LReLU 93.83 % σ = 3.89 74 . 66 % σ = 2.11 97 . 56 % 78 .08 % 52 – 120 80.1 +PReLU 95.53 % σ = 1.92 71 . 69 % σ = 1.37 98 .17 % 74 .69 % 59 – 101 78.8 +ELU 95.42 % σ = 3.57 75 . 09 % σ = 2.39 98 .54 % 78 .66 % 66 – 72 67.2 +Table A.3.: Test accuracy of adjusted baseline models trained with different activation functions on +STL-10. For LReLU, α = 0.3 was chosen. +78 +B. Hyperparameters +Hyperparameters are parameters of models which are not optimized automatically (e.g., by +gradient descent), but by methods like random search [ BB12], grid search [ LBOM98] or +manual search. +B.1. Preprocessing +Preprocessing used to be of ma jor importance in machine learning. However, with the +availability of data sets with hundreds of examples per class and the possibility of CNNs to +learn features themselves, most models today rely on raw pixel values. The only common +preprocessing is size normalization. In order to get a fixed input-size for a CNN, the +following procedure can be used: +• Take one or multiple crops of the image which have the desired aspect ratio. +• Scale the crop(s) to the desired size. +• In training, all crops can be used independently. In testing, all crops can be passed +through the network and the output probability distributions can get fusioned, for +example by averaging. +Other preprocessing methods are: +• Color space transformations (RGB, HSV, etc.) +• Mean subtraction +• Standardization of pixel-values to [0, 1] by dividing through 255 (used by [HLW16]) +• Dimensionality reduction +– + Principal component analysis (PCA): An unsupervised linear transformation +which can be learned in the first hidden layer. It is hence doubtful if PCA +improves the network. +– Linear discriminant analysis (LDA) +• Zero Components Analysis (ZCA) whitening (used by [KH09]) + 79 +B.2. Data augmentation +Data augmentation techniques aim at making artificially more data from real data items by +applying invariances. For computer vision, they include: +Name Augmentation Factor Used by +Horizontal flip 2 [KSH12, WYS + + 15] +Vertical flip 2 [DWD15] 1 +Rotation ∼ 40 (δ = 20) [DSRB14] +Scaling ∼ 14 (δ ∈ [0.7 , 1 .4]) [DSRB14] +Crops 322 + = 1024 [KSH12, WYS + + 15] +Shearing [Gra15] +GANs [BCW + + 17] +Brightness ∼ 20 (δ ∈ [0.5 , 1 .5]) [How13] +Hue 51 (δ = 0.1 ) [MRM15, DSRB14] +Saturation ∼ 20 (δ = 0.5) [DSRB14] +Contrast ∼ 20 (δ ∈ [0.5 , 1 .5]) [How13] +Channel shift [KSH12] +Table B.1.: Overview of data augmentation techniques. The augmentation factor is calculated for +typical situations. For example, the augmentation factor for random crops is calculated +for 256 px × 256 px images which are cropped to 224 px × 224 px. +Taking several scales if the original is of higher resolution than desired is another technique. +Combinations of the techniques above can also be applied. Please note that the order of +operations does matter in many cases and hence the order is another augmentation factor. +Less common, but also reasonable are: +• Adding noise +• Elastic deformations +• Color casting (used by [WYS+ + 15]) +• Vignetting (used by [WYS + + 15]) +• Lens distortion (used by [WYS+ + 15]) +1 + Vertical flipping combined with 180◦ + rotation is equivalent to horizontal flipping +80 +B.3. Initialization +Weight initializations are usually chosen to be small and centered around zero. One way to +characterize many initialization schemes is by +w ∼ α · U [−1 , 1] + β · N (0, 1) + γ with α, β , γ ≥ 0 +Table B.2 shows six commonly used weight initialization schemes. Several schemes use the +same idea, that unit-variance is desired for each layer as the training converges faster [IS15]. +Name α β γ Reference +Constant α = 0 β = 0 γ ≥ 0 used by [ZF14] +Xavier/Glorot uniform α = + 6 +n +in + n +out β = 0 γ = 0 [GB10] +Xavier/Glorot normal α = 0 β = + 2 +(n +in +n +out ) + 2 + γ = 0 [GB10] +He α = 0 β = 2 +n +in γ = 0 [HZRS15b] +Orthogonal — — γ = 0 [SMG13] +LSUV — — γ = 0 [MM15] +Table B.2.: Weight initialization schemes of the form w ∼ α · U [ −1 , 1] + β · N (0 , 1) + γ . +n + in , n +out are the number of units in the previous layer and the next layer. Typically, +biases are initialized with constant 0 and weights by one of the other schemes to prevent +unit-coadaptation. However, dropout makes it possible to use constant initialization for +all parameters. +LSUV and Orthogonal initialization cannot be described with this simple pattern. +B.4. Objective function +For classification tasks, the cross-entropy +E +C E ( W ) = − +x ∈X K + +k =1 [ tx +k log(o x +k ) + (1 − t x +k ) log(1 − ox +k )] +is by far the most commonly used ob jective function (e.g., used by [ZF14]). In this equation, +X is the set of training examples, K is the number of classes, tx +k ∈ { 0, 1 } indicates if the +training example x is of class k , o x +k is the output of the classifier for the training examplex +and class k . +However, regularization terms weighted with a constant λ ∈ (0, +∞ ) are sometimes added: +• LASSO: +1 (e.g., used in [HPTD15]) +• Weight decay: + 2 (e.g., λ = 0.0005 as in [MSM16]) +• Orthogonality regularization (|(W T + · W − I )|, see [VTKP17]) + 81 +B.5. Optimization Techniques +Most relevant optimization techniques for CNNs are based on SGD, which updates the +weights according to the rule +w + ji ← w + ji + ∆ w + ji with ∆ w + ji = −η ∂ E +x +∂ w +ji +where η ∈ (0, 1), typically 0. 01 (e.g., [MSM16]), is called the learning rate . +A slight variation of SGD is mini-batch gradient descent with the mini-batch B (typically +mini-batch sizes are | B | ∈ { 32, 64, 128, 256 , 512 }, e.g. [ ZF14]). Larger mini-batch sizes +lead to sharp minima and thus poor generalization [ KMN + + 16]. Smaller mini-batch sizes +lead to longer training times due to computational overhead and to more training steps due +to gradient noise. + w + ji ← w + ji + ∆ w + ji with ∆ w + ji = −η ∂ E +B +∂ w +ji +Nine variations which adjust the learning rate during training are: +• Momentum: + w (t+1) +ji ← w (t) +ji + ∆ w (t+1) +ji with ∆ w (t+1) +ji = −η ∂ E +B +∂ w +ji + α ∆w (t) +ji +with α ∈ [0, 1], typically 0 .9 (e.g., [ZF14, MSM16]) +• Adagrad [DHS11] +• RProp and the mini-batch version RMSProp [TH12] +• Adadelta [Zei12] +• Power Scheduling [ Xu11]: η ( t ) = η (0)(1 + a · t) − c + , where t ∈ N + 0 is the training step, +a, c are constants. +• Performance Scheduling [SHY+ + 13]: Measure the error on the cross validation set and +decrease the learning rate when the algorithms improvement is below a threshold. +• Exponential Decay Learning Rate [ SHY+ + 13]: η ( t) = η (0) · 10 − t +k + where t ∈ N + 0 is the +training step, η (0) is the initial learning rate, k ∈ N + ≥1 is the number of training steps +until the learning rate is decreased by 1 +10 th. +• Newbob Scheduling [new00]: Start with Performance Scheduling, then use Exponential +Decay Scheduling. +• Adam and AdaMax [KB14] +82 +• Nadam [Doz15] +Some of those are explained in [Rud16]. +Other first-order gradient optimization methods are: +• Quickprop [Fah88] +• Nesterov Accellerated Momentum (NAG) [Nes83] +• Conjugate Gradient method [ Cha92]: Combines a line search for the step size with +the gradients direction. +Higher-order gradient methods like Newtons method or quasi-Newton methods like BFGS +and L-BFGS need the inverse of the Hessian matrix which is intractable for today’s CNNs. +However, there are alternatives which do not use gradient information: +• Genetic algorithms such as NeuroEvolution of Augmenting Topologies (NEAT) [SM02] +• Simulated Annealing [vLA87] +• Twiddle: A local hill-climbing algorithm explained by Sebastian Thrun and described +on [Tho14b] +There are also approaches which learn the optimization algorithm [ADG+ + 16, LM16]. + 83 +B.6. Network Design +CNNs have the following hyperparameters: +• Depth: The number of layers +• Width : The number of filters per layer +• Layer and block connectivity graph +• Layer and block hyperparameters: +– Activation Functions as shown in Table B.3 +– For more, see Sections 2.2 and 2.3. +Name Function ϕ(x ) Range of Values ϕ + ( x) Used by +Sign function †  + ++1 if x ≥ 0 +−1 if x < 0 { − 1, 1 } 0 [KS02] +Heaviside +step function †  + ++1 if x > 0 +0 if x < 0 { 0, 1 } 0 [MP43] +Logistic function 1 +1+e−x [0, 1] e x +(e x + +1)2 [DJ99] +Tanh ex + − e−x +ex + + e−x = tanh(x ) [ −1 , 1] sech2 + (x ) [LBBH98, Tho14a] +ReLU † + max(0, x) [0 , +∞)  + +1 if x > 0 +0 if x < 0 [KSH12] +LReLU † 2 +(PReLU) ϕ (x ) = max(αx, x ) (−∞, +∞)  + +1 if x > 0 +α if x < 0 [MHN13, HZRS15b] +Softplus log(ex + + 1) (0 , +∞) e x +e x + +1 [DBB + + 01, GBB11] +ELU  + +x if x > 0 +α ( ex + − 1) if x ≤ 0 (−∞, +∞)  + +1 if x > 0 +αex + otherwise [CUH15] +Softmax‡ + o ( x ) +j = ex + j + + K +k =1 ex + k [0, 1]K + o (x ) + j · + K +k =1 e x + k + −e x + j + + K +k =1 e x +k [KSH12, Tho14a] +Maxout‡ + o ( x ) = max +x ∈x x (−∞, +∞)  + +1 if x + i = max x +0 otherwise [GWFM + + 13] +Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0 +and functions marked with ‡ operate on all elements of a layer simultaneously. The +hyperparameters α ∈ (0 , 1) of Leaky ReLU and ELU are typically α = 0 .01 . Other +activation function like randomized leaky ReLUs exist [ XWCL15 ], but are far less +commonly used. +Some functions are smoothed versions of others, like the logistic function for the +Heaviside step function, tanh for the sign function, softplus for ReLU. +Softmax is the standard activation function for the last layer of a classification network +as it produces a probability distribution. See Figure B.1 for a plot of some of them. +2 + α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function. +84 +−2. 0 −1 .5 −1 .0 −0. 5 0.5 1 .0 1. 5 2 .0 +−1. 0−0. 50. 51. 01 .52 .0 + xy +ϕ + 1 ( x) = 1 +1+e −x +ϕ + 2 ( x) = tanh( x ) +ϕ + 3 ( x) = max(0, x) +ϕ + 4 ( x) = log( ex + + 1) +ϕ + 5 ( x) = max(x, ex + − 1) +Figure B.1.: Activation functions plotted in [ − 2 , +2] . tanh and ELU are able to produce negative +numbers. The image of ELU, ReLU and Softplus is not bound on the positive side, +whereas tanh and the logistic function are always below 1. +B.7. Regularization +Regularization techniques aim to make the fitted function smoother and reduce overfitting. +Regularization techniques are: +• + 1 , +2 , and Orthogonality regularization: See Appendix B.4 +• Max-norm regularization (e.g. used ins [SHK+ + 14]) +• Dropout (introduced in [SHK+ + 14]), DropConnect (see [WZZ+ + 13]), Stochastic Depth +(see [HSL + + 16]) +• Feature scale clipping (see [ZF14]) +• Data augmentation (according to [ZBH+ + 16]) +• Global average pooling (according to [ZKL+ + 15]) +• Dense-Sparse-Dense training (see [HPN+ + 16]) +• Soft targets (see [HVD15]) + 85 +86 +C. Calculating Network Characteristics +C.1. Parameter Numbers +• A fully connected layer with n nodes, k inputs has n · (k + 1) parameters. The +1 is +due to the bias. +• A convolutional layer i with k +i filters of size n × m being applied to k +i −1 feature maps +has k +i · k + i −1 (n · m + 1) parameters. The +1 is due to the bias. +• A fully connected layer with n nodes after k feature maps of size m +1 × m + 2 has +n · ( k · m +1 · m + 2 + 1) parameters. +• A dense block with a depth of L, a growth rate of n and 3 × 3 filters has L + n · 32 + + +32 + · n 2 +L +i =0 (L − i) = L + 9 n + 9 n 2 L2 + −L +2 parameters. +According to [ HPTD15], AlexNet has 60 million parameters which is roughly the number +calculated in Table D.2. +C.2. FLOPs +The FLOPs of a layer depend on the implementation, the compiler and the hardware. Hence +the following number are only giving rough estimates. +In the following, n + ϕ denotes the number of FLOPs to compute the non-linearity ϕ. For +simplicity, n + ϕ = 5 was chosen. +• A fully connected layer with n nodes and k inputs has to calculate ϕ(W · x + b) with +W ∈ R n× k + , x ∈ Rk × 1 + , b ∈ R n×1 + . It hence needs about n · ( k + ( k − 1) + 1) = 2 nk +additions / multiplications before the non-linearityϕ is calculated. The total number +of FLOPs is 2 · n · k + n · n + ϕ . +• In the following, biases are ignored. A convolutional layer withk +i filters of size n × m +being applied to k +i − 1 filter maps of size w × h results in k +i filter maps of size w × h if +padding is applied. For each element of each filter map,n · m · k +i − 1 multiplications and +(n · m · k +i − 1 − 1) additions have to be made. This results in(2nmk +i − 1 − 1) · (k +i · w · h) +operations. The total number of FLOPs is(2 · n · m · k + i −1 − 1) · (k + i · w · h) + k +i · w · h · n + ϕ . +This is, of course, a naive way of calculating a convolution. There are other ways of +calculating convolutions [LG16]. + 87 +• A fully connected layer with n nodes after k feature maps of size w × h needs 2n (k · w · h ) +FLOPs. The total number of FLOPs is 2 n · (k · w · h) + n · n + ϕ . +• As Dropout is only calculated during training, the number of FLOPs was set to 0. +• + The number of FLOPs for max pooling is dominated by the number of positions to +which the pooling kernel is applied. For a feature map of size w × h a max pooling +filter with stride s gets applied w · h +s 2 . The number of FLOPs per application depends +on the kernel size. A 2 × 2 kernel is assumed to need 5 FLOPs. +• + The number of FLOPs for Batch Normalization is the same as the number of its +parameters. +Here are some references which give information for the FLOPs: +• AlexNet +– 1.5B in total [HPTD15]. +– 725M in total [KPY + + 15]. +– 3300M in total in Table D.2 +• VGG-16: +– 15484M in total [HPTD15]. +– 31000M in total in Table D.3. +• GoogleNet: 1566M in total [HPTD15]. +One can see that the numbers are by a factor of 2 up to a factor of 4 different for the same +network. +C.3. Memory Footprint +The memory footprint of CNNs determines when networks can be used at all and if they +can be trained efficiently. In order to be able to train CNNs efficiently, one weight update +step has to fit in the memory of the GPU. This includes the following: +• Activations: All activations of one mini-batch in order to calculate the gradients +in the backward pass. This is the number of floats in the feature maps of all weight +layers combined. +• Weights +• Optimization algorithm +: The optimization algorithm introduces some overhead. +For example, Adam stores two parameters per weights. +At inference time, every two consecutive layers have to fit into memory. When the forward +pass of layer A to layer B is calculated, the memory can be freed if no skip connections are +used. +88 +D. Common Architectures +In the following, some of the most important CNN architectures are explained. Understand- +ing the development of these architectures helps understanding critical insights the machine +learning community got in the past years for convolutional networks for image recognition. +It starts with LeNet-5 from 1998, continues with AlexNet from 2012, VGG-16 D from +2014, the Inception modules v1 to v3 as well as ResNets in 2015. The recently developed +Inception-v4 is also covered. +The summation row gives the sum of all floats for the output size column. This allows +conclusions about the maximum mini-batch size which can be in memory for training. + 89 +D.1. LeNet-5 +One of the first CNNs used was LeNet-5 [ LBBH98 ]. LeNet-5 uses two times the common +pattern of a single convolutional layer withtanh as a non-linear activation function followed +by a pooling layer and three fully connected layers. One fully connected layer is used to +get the right output dimension, another one is necessary to allow the network to learn a +non-linear combination of the features of the feature maps. +Its exact architecture is shown in Figure D.1 and described in Table D.1. It reaches a test +error rate of 0.8 % on MNIST. +Figure D.1.: Architecture of LeNet-5 as shown in [LBBH98]. +# Type Filters @ +Patch size / stride Parameters FLOPs Output size +Input 0 0 1 @ 32 × 32 +1 Convolution 6 @ 5 × 5 × 1 / 1 156 307 800 6 @ 28 × 28 +2 Scaled average pooling 2 × 2 / 2 2 336 6 @ 14 × 14 +3 Convolution 16 @ 5 × 5 × 6 / 1 2 416 942 400 16 @ 10 × 10 +4 Scaled average pooling 2 × 2 / 2 2 1 600 16 @ 5 × 5 +5 Fully Connected 120 neurons 48 120 240 000 120 +6 Fully Connected 84 neurons 10 164 20 580 84 +7 Fully Connected (output) 10 neurons 850 1 730 10 + + 61 710 15 144 446 9118 +Table D.1.: LeNet-5 architecture: After layers 1, 3, 5 and 6 the tanh activation function is applied. +After layer 7, the softmax function is applied. One can see that convolutional layer +need much fewer parameters, but an order of magnitude more FLOPs per parameter +than fully connected layers. +90 +D.2. AlexNet +The first CNN which achieved ma jor improvements on the ImageNet dataset was AlexNet [KSH12]. +Its architecture is shown in Figure D.2 and described in Table D.2. It has about60· 106 + param- +eters. A trained AlexNet can be downloaded at www.cs.toronto.edu/˜guerzhoy/tf_alexnet. +Note that the uncompressed size is at least 60 965 224 floats · 32 bit +float ≈ 244 MB. +Figure D.2.: Architecture of AlexNet as shown in [ KSH12]: Convolutional Layers are followed +by pooling layers multiple times. At the end, a fully connected network is applied. +Conceptually, it is identical to the architecture of LeNet-5 (see Figure D.1). +# Type Filters @ +Patch size / stride Parameters FLOPs Output size +Input 3 @ 224 × 224 +1 Convolution 96 @ 11 × 11 × 3 / 4 34 944 211 M 96 @ 55 × 55 +LCN 12 M 96 @ 55 × 55 +2 Max pooling 3 × 3 / 2 0 301 k 96 @ 27 × 27 +3 Convolution 256 @ 5 × 5 × 48 / 1 307 456 448 M 256 @ 13 × 13 +LCN 3 M 256 @ 13 × 13 +4 Max pooling 3 × 3 / 2 0 50 k 256 @ 13 × 13 +5 Convolution 384 @ 3 × 3 × 256 / 1 885 120 299 M 384 @ 13 × 13 +7 Convolution 384 @ 3 × 3 × 192 / 1 663 936 224 M 384 @ 13 × 13 +9 Convolution 256 @ 3 × 3 × 192 / 1 442 624 150 M 256 @ 13 × 13 +10 Max pooling 3 × 3 / 2 0 50 k 256 @ 6 × 6 +11 FC 4096 neurons 37 752 832 75 M 4096 +12 FC 4096 neurons 16 781 312 34 M 4096 +13 FC 1000 neurons 4 097 000 8 M 1000 + + 60 965 224 3300 M 1 122 568 +Table D.2.: AlexNet architecture: One special case of AlexNet is grouping of convolutions due to +computational restrictions at the time of its development. This also reduces the number +of parameters and allows parallel computation on separate GPUs. However, to make +the architecture easier to compare, this grouping was ignored for the parameter count. +The FLOPs are taken from [ HPTD15 ] and combined with rough estimates for Local +Contrast Normalization and max pooling. +The calculated number of parameters was checked against the downloaded version. It +also has 60 965 224 parameters. + 91 +D.3. VGG-16 D +Another widespread architecture is the VGG-16 (D) [ SZ14]. VGG comes from the V isual +Geometry Group in Oxford which developed this architecture. It has 16 layers which can +learn parameters. A ma jor difference compared to AlexNet is that VGG-16 uses only3 × 3 +filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a +detailed textual description is given in Table D.3. +A trained VGG-16 D for Tensorflow can be downloaded athttps://github.com/machrisaa/ +tensorflow- vgg . Note that the uncompressed size is at least 138 357 544 floats · 32 bit +float ≈ +520 MB. The downloaded Numpy binary file npz needs 553 MB without compression and +514 MB with compression.224 × 224Input +C 64@3 × 3 /1 +C 64@3 × 3 /1 112 × 112max pooling 2 × 2 /1 +C 128@3 × 3 /1 +C 128@3 × 3 /1 56 × 56max pooling 2 × 2/ 1 +C 256@3 × 3 /1 +C 256@3 × 3 /1 +C 256@3 × 3 /1 28 × 28max pooling 2 × 2/ 1 +C 512@3 × 3 /1 +C 512@3 × 3 /1 +C 512@3 × 3/ 1 14 × 14max pooling 2 × 2 /1 +C 512@3 × 3/ 1 +C 512@3 × 3/ 1 +C 512@3 × 3/ 1 7 × 7max pooling 2 × 2 /1 +Fully Connected 4096 +Dropout, p = 0.5 +Fully Connected 4096 +Dropout, p = 0.5 +Fully Connected 1000 +Figure D.3.: Architecture of VGG-16 D. C 512@3 × 3/1 is a convolutional layer with 512 filters of +kernel size 3 × 3 with stride 1. All convolutional layers use SAME padding. +92 +# Type Filters @ +Patch size / stride Parameters FLOPs Output size +Input 3 @ 224 × 224 +1 Convolution 64 @ 3 × 3 × 3 / 1 1 792 186 M 64 @ 224 × 224 +2 Convolution 64 @ 3 × 3 × 64 / 1 36 928 3712 M 64 @ 224 × 224 +Max pooling 2 × 2 / 2 0 2 M 64 @ 112 × 112 +3 Convolution 128 @ 3 × 3 × 64 / 1 73 856 1856 M 128 @ 112 × 112 +4 Convolution 128 @ 3 × 3 × 128 / 1 147 584 3705 M 128 @ 112 × 112 +Max pooling 2 × 2 / 2 0 1 M 128 @ 56 × 56 +5 Convolution 256 @ 3 × 3 × 128 / 1 295 168 1853 M 256 @ 56 × 56 +6 Convolution 256 @ 3 × 3 × 256 / 1 590 080 3703 M 256 @ 56 × 56 +7 Convolution 256 @ 3 × 3 × 256 / 1 590 080 3703 M 256 @ 56 × 56 +Max pooling 2 × 2 / 2 0 < 1 M 256 @ 28 × 28 +8 Convolution 512 @ 3 × 3 × 256 / 1 1 180 160 1851 M 512 @ 28 × 28 +9 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 3701 M 512 @ 28 × 28 +10 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 3701 M 512 @ 28 × 28 +Max pooling 2 × 2 / 2 0 < 1 M 512 @ 14 × 14 +11 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14 +12 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14 +13 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14 +Max pooling 2 × 2 / 2 0 < 1 M 512 @ 7 × 7 +14 FC 4096 neurons 102 764 544 206 M 4096 +Dropout 0 0 4096 +15 FC 4096 neurons 16 781 312 34 M 4096 +Dropout 0 0 4096 +16 FC 1000 neurons 4 097 000 8 M 1000 + + 138 357 544 31 000 M 15 245 800 +Table D.3.: VGG-16 D architecture: The authors chose to give only layers a number which have +learnable parameters. All convolutions are zero padded to prevent size changes and +use ReLU activation functions. The channels mean is subtracted from each pixel as +a preprocessing step ( −103 . 939 , − 116 .779 , − 123 .68 ). As Dropout is only calculated +during training time, the number of FLOPs is 0. The dropout probability is 0. 5. +The calculated number of parameters was checked against the downloaded version. It +also has 138 357 544 parameters. + 93 +D.4. GoogleNet, Inception v2 and v3 +The large number of parameters and operations is a problem when such models should get +applied in practice to thousands of images. In order to reduce the computational cost while +maintaining the classification quality, GoogleNet [SLJ+ + 15] and the Inception module were +developed. The Inception module essentially only computes 1 × 1 filters, 3 × 3 filters and +5 × 5 filters in parallel, but applied bottleneck 1 × 1 filters before to reduce the number of +parameters. It is shown in Figure D.4. +Figure D.4.: Inception module +Image source: [SLJ+ + 15] +Compared to GoogleNet, Inception v2 [ SVI+ + 15] removed the 5 × 5 filters and replaced +them by two successive layers of 3 × 3 filters. A visualization of an Inception v2 module +is given in Figure D.5. Additionally, Inception v2 applies successive asymmetric filters to +approximate symmetric filters with fewer parameters. The authors call this approachfilter +factorization. +Inception v3 introduced Batch Normalization to the network [SVI+ + 15]. +Figure D.5.: Inception v2 module +Image source: [SVI+ + 15] +94 +D.5. Inception-v4 +Inception-v4 as described in [ SIV16] consists of four main building blocks: The stem, +Inception A, Inception B and Inception C. To quote the authors: Inception-v4 is a deeper, +wider and more uniform simplified architecture than Inception-v3. The stem, Reduction A +and Reduction B use max-pooling, whereas Inception A, Inception B and Inception C use +average pooling. The stem, module B and module C use separable convolutions. +# × Type Parameters Output size +Input 3 @ 299 × 299 +1 Stem 605 728 384 @ 35 × 35 +2 4× Inception A 317 632 384 @ 35 × 35 +3 Reduction A 2 306 112 1024 @ 17 × 17 +4 7× + Inception B 2 936 256 1024 @ 17 × 17 +5 Reduction B 2 747 392 1536 @ 8 × 8 +6 3× Inception C 4 553 088 1536 @ 8 × 8 +Global Average Pooling 0 1536 @ 1 × 1 +Dropout (p=0.8) 0 1536 @ 1 × 1 +7 Softmax 1 537 000 1000 + + 42 679 816 +Table D.4.: Inception-v4 network. + 95 +96 +E. Datasets +Well-known benchmark datasets for classification problems in computer vision are listed +in Table E.1. The best results known to me are given in Table E.2. However, every semantic +segmentation dataset (e.g., PASCAL VOC) can also be used to benchmark image classifiers +using Algorithm 2. +Database Image Resolution +(width × height) Number +of +Images Number +of +Classes Channels Data source +MNIST 28 px × 28 px 70 000 10 1 [YL98, LBBH98] +HASYv2 32 px × 32 px 168 233 369 1 [Tho17a] +SVHN 32 px × 32 px 630 420 10 3 [NWC + + 11b], +[NWC + + 11a] +CIFAR-10 32 px × 32 px 60 000 10 3 [Kri, KH09] +CIFAR-100 32 px × 32 px 60 000 100 3 [Kri, KH09] +STL-10 96 px × 96 px 13 000 10 3 [CLN11, CLN10] +Caltech-101 (80 px − 3481 px) +×(92 px − 3999 px) 9144 102 3 [FFP03, FFFP06] +Caltech-256 (75 px − 7913 px) +×(75 px − 7913 px) 30 607 257 3 [Gri06, GG07] +ILSVRC 20121 (8 px − 9331 px) +×(10 px − 6530 px) 1.2 · 106 + 1000 3 [Ima12, RDS+ + 14] +Places3652 (290px − 3158 px ) +×(225px − 2630 px ) 1. 8 · 106 + 365 3 [Zho16, ZKL+ + 16] +GTSRB (25 px − 266 px) +×(25 px − 232 px) 51 839 43 3 [SSSI, SSSI12] +Asirra3 (4 px − 500 px) +×(4 px − 500 px) 25 000 2 3 [Asi17, EDHS07] +Graz-02 480 px × 640 px +and 640 px × 480 px 1096 3 3 [Mar08, MS07] +Table E.1.: An overview over publicly available image databases for classification. The number +of images row gives the sum of the training and the test images. Some datasets, like +SVHN, have additional unlabeled data which is not given in this table. +1 + ImageNet Large Scale Visual Recognition Competition +2 + The dimensions are only calculated for the validation set. +3 + Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs” competition on Kaggle +97 +Dataset Model type / name Result Score Achieved / +Claimed by +MNIST — 0 .21 % error [WZZ+ + 13] +HASYv2 TF-CNN 81.00 % accuracy [Tho17a] +SVHN DenseNet (k = 24) 1 .59 % error [HLW16] +CIFAR-10 DenseNet-BC ( k = 40) 3 .46 % error [HLW16] +CIFAR-100 WRN-28-10 16.21 % error [LH16] +STL-10 SWWAE-4layer 74.80 % accuracy [ZMGL15] +Caltech-101 SPP-net (pretrained) 93.42 %±0 .5 % accuracy [HZRS14] +Caltech-256 ZF-Net (pretrained) 74. 2 %±0 .3 % accuracy [ZF14] +ImageNet 2012 ResNet ensemble 3 .57 % Top-5 error [HZRS15a] +GTSRB MCDNN 99.46 % accuracy [SL11] +Asirra SVM 82. 7 % accuracy [Gol08] +Graz-02 Optimal NBNN 78.98 % accuracy [BMDP10] +Table E.2.: An overview over state of the art results achieved in computer vision datasets. +Algorithm 2 Create a classification dataset from a semantic segmentation dataset +Require: Semantic segmentation dataset (D + S ) +procedure CreateDataset (Annotated dataset D + S ) +D + C ← List +w ← desired image width +h ← desired image height +for Image and associated label ( x, y ) in D + S do +i ← randint(0, L.width − w ) +j ← randint (0, L.height − h ) +c +L ← crop (y, ( i, j ) , ( i + w, j + h)) +if at least 50% of s are of one class then +c +I ← crop (x, (i, j ), (i + w, j + h)) +D.append((c +I , c +L )) +return (D + C ) +98 +F. List of Tables +2.1 Pooling types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 +5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39 +5.2 Baseline model evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . . 40 +5.3 Baseline model speed comparison . . . . . . . . . . . . . . . . . . . . . . . . 40 +5.4 Clustering errors for spectral clustering and CMO on CIFAR-100 . . . . . . 52 +5.5 Differences in spectral clustering and CMO. . . . . . . . . . . . . . . . . . . 52 +5.6 Accuracies for hierarchy of classifiers on CIFAR-100 . . . . . . . . . . . . . . 53 +5.7 Parameters of models with increased capacity . . . . . . . . . . . . . . . . . 54 +5.8 Training time for models with increased capacity . . . . . . . . . . . . . . . 54 +5.9 Baseline model training time . . . . . . . . . . . . . . . . . . . . . . . . . . 59 +5.10 Activation function properties . . . . . . . . . . . . . . . . . . . . . . . . . . 62 +5.11 Activation function evaluation results on CIFAR-100 . . . . . . . . . . . . . 63 +5.12 Activation function timing results on CIFAR-100 . . . . . . . . . . . . . . . 63 +5.13 Activation function evaluation results on MNIST . . . . . . . . . . . . . . . 64 +5.14 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66 +5.15 Optimized model evaluation results . . . . . . . . . . . . . . . . . . . . . . . 67 +5.16 Optimized model speed comparison . . . . . . . . . . . . . . . . . . . . . . . 67 +5.17 Optimized model mean training epochs . . . . . . . . . . . . . . . . . . . . . 68 +5.18 Optimized model trained with early stopping vs training with more data . . 69 +5.19 Model regularization with early stopping on training loss . . . . . . . . . . . 69 +5.20 Model regularization with early stopping on training loss - Training time . . 69 +A.1 99-percentile intervals for filter weights on CIFAR-100 . . . . . . . . . . . . 75 +A.2 Activation function evaluation results on HASYv2 . . . . . . . . . . . . . . . 77 +A.3 Activation function evaluation results on STL-10 . . . . . . . . . . . . . . . 78 +B.1 Data augmentation techniques . . . . . . . . . . . . . . . . . . . . . . . . . . 80 +B.2 Weight initialization schemes . . . . . . . . . . . . . . . . . . . . . . . . . . 81 +B.3 Activation functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 84 +D.1 LeNet-5 architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90 +D.2 AlexNet architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 +D.3 VGG-16 D architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 93 +D.4 Inception-v4 network . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 95 +99 +E.1 Image Benchmark datasets . . . . . . . . . . . . . . . . . . . . . . . . . . . . 97 +E.2 State of the Art results . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 98 +100 +G. List of Figures +2.1 Application of a single image filter (Convolution) . . . . . . . . . . . . . . . 3 +2.2 Application of a convolutional layer . . . . . . . . . . . . . . . . . . . . . . . 6 +2.3 Max pooling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 +2.4 ResNet module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 +2.5 Aggregation block . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 +2.6 Dense block . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13 +2.7 Validation curve . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17 +2.8 Validation curve with plateaus . . . . . . . . . . . . . . . . . . . . . . . . . 18 +2.9 Learning curve . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20 +2.10 Occlusion analysis . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25 +2.11 Filter visualization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 26 +3.1 Cascade-correlation network . . . . . . . . . . . . . . . . . . . . . . . . . . . 28 +4.1 Class Tree . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 33 +5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39 +5.2 Baseline model filter weight distribution . . . . . . . . . . . . . . . . . . . . 42 +5.3 Baseline model bias weight distribution . . . . . . . . . . . . . . . . . . . . . 42 +5.4 Baseline model γ distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43 +5.5 Baseline model β distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43 +5.6 Baseline model filter weight range distribution . . . . . . . . . . . . . . . . . 44 +5.7 Baseline model CIFAR-100 validation accuracy . . . . . . . . . . . . . . . . 45 +5.8 Baseline Weight updates (mean) . . . . . . . . . . . . . . . . . . . . . . . . 46 +5.9 Baseline Weight updates (maximum) . . . . . . . . . . . . . . . . . . . . . . 47 +5.10 Baseline Weight updates (sum) . . . . . . . . . . . . . . . . . . . . . . . . . 47 +5.11 Confusion matrices for CIFAR-10 . . . . . . . . . . . . . . . . . . . . . . . . 48 +5.12 Confusion matrices for GTSRB . . . . . . . . . . . . . . . . . . . . . . . . . 49 +5.13 Confusion matrices for HASYv2 . . . . . . . . . . . . . . . . . . . . . . . . . 50 +5.14 Confusion matrix of CIFAR-100 . . . . . . . . . . . . . . . . . . . . . . . . . 51 +5.15 Mean weight updates of model with bottleneck . . . . . . . . . . . . . . . . 55 +5.16 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 67 +A.1 Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 75 +A.2 Bias weight distribution without BN . . . . . . . . . . . . . . . . . . . . . . 76 +101 +A.3 Maximum weight updates of baseline with bottleneck . . . . . . . . . . . . . 77 +A.4 Sum of weight updates of baseline with bottleneck . . . . . . . . . . . . . . 78 +B.1 Activation functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 85 +D.1 LeNet-5 architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 90 +D.2 AlexNet architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 +D.3 VGG-16 D architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92 +D.4 Inception module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94 +D.5 Inception v2 module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94 +102 +H. Bibliography +[AAB+ + 16] M. Abadi, A. Agarwal et al. , “Tensorflow: Large-scale machine learning on +heterogeneous distributed systems,” arXiv preprint arXiv:1603.04467 , Mar. +2016. [Online]. Available: https://arxiv . org/abs/1603. 04467 +[ABKS99] M. Ankerst, M. M. Breunig et al., “ OPTICS: Ordering points to identify the +clustering structure,” in ACM Sigmod record, vol. 28, no. 2. ACM, 1999, pp. +49–60. +[ADG+ + 16] M. Andrychowicz, M. Denil et al., “Learning to learn by gradient descent by +gradient descent,” in Advances in Neural Information Processing Systems 29 +(NIPS) , D. D. Lee, M. Sugiyama et al. , Eds. Curran Associates, Inc., Mar. +2016, pp. 3981–3989. [Online]. Available: http://papers .nips .cc/paper/6461- +learning-to- learn-by-gradient- descent- by- gradient-descent.pdf +[AM15] M. T. Alexander Mordvintsev, Christopher Olah, “Inceptionism: +Going deeper into neural networks,” Jun. 2015. [Online]. Avail- +able: https://research . googleblog . com/2015/06/inceptionism-going-deeper- +into- neural.html +[Asi17] “Kaggle cats and dogs dataset,” Oct. 2017. [Online]. Available: https: +//www .microsoft.com/en-us/download/details .aspx?id=54765 +[BB12] J. Bergstra and Y. Bengio, “Random search for hyper-parameter optimization,” +Journal of Machine Learning Research , vol. 13, no. Feb, pp. 281–305, +Feb. 2012. [Online]. Available: http://jmlr .csail .mit .edu/papers/volume13/ +bergstra12a/bergstra12a .pdf +[BCW+ + 17] + J. Bao, D. Chen et al., “ CVAE-GAN: Fine-grained image generation through +asymmetric training,” arXiv preprint arXiv:1703.10155, Mar. 2017. [Online]. +Available: https://arxiv .org/abs/1703.10155 +[BDLB09] + J. Bergstra, G. Desjardins et al. , “Quadratic polynomials learn better im- +age features,” Département d’Informatique et de Recherche Opérationnelle, +Université de Montréal, Tech. Rep. 1337, 2009. +[BGNR16] B. Baker, O. Gupta et al. , “Designing neural network architectures using +reinforcement learning,” arXiv preprint arXiv:1611.02167, Nov. 2016. [Online]. +Available: https://arxiv .org/abs/1611.02167 + 103 +[BM93] U. Bodenhausen and S. Manke, Automatical ly Structured Neural +Networks For Handwritten Character And Word Recognition . London: +Springer London, Sep. 1993, pp. 956–961. [Online]. Available: http: +//dx.doi.org/10.1007/978-1- 4471-2063- 6_283 +[BMDP10] + R. Behmo, P. Marcombes et al. , “Towards optimal naive Bayes nearest +neighbor,” in European Conference on Computer Vision (ECCV). Springer, +2010, pp. 171–184. +[BPL10] Y.-L. Boureau, J. Ponce, and Y. LeCun, “A theoretical analysis of +feature pooling in visual recognition,” in International Conference on +Machine Learning (ICML) , no. 27, 2010, pp. 111–118. [Online]. Available: +http://yann .lecun.com/exdb/publis/pdf/boureau- icml-10 .pdf +[BSF94] Y. Bengio, P. Simard, and P. Frasconi, “Learning long-term dependencies +with gradient descent is difficult,” IEEE transactions on neural networks , +vol. 5, no. 2, pp. 157–166, 1994. +[Cha92] C. Charalambous, “Conjugate gradient algorithm for efficient training +of artificial neural networks,” IEEE Proceedings G-Circuits, Devices +and Systems , vol. 139, no. 3, pp. 301–310, 1992. [Online]. Available: +http://ieeexplore. ieee.org/document/143326/ +[Cho15]F. Chollet, “Keras,” https://github.com/fchollet/keras, 2015. +[CLN10] A. Coates, H. Lee, and A. Y. Ng, “An analysis of single-layer networks +in unsupervised feature learning,” Ann Arbor , vol. 1001, no. 48109, +p. 2, 2010. [Online]. Available: http://cs . stanford .edu/~acoates/papers/ +coatesleeng_aistats_2011.pdf +[CLN11] A. Coates, H. Lee, and A. Y. Ng, “ STL-10 dataset,” 2011. [Online]. Available: +http://cs .stanford.edu/~acoates/stl10 +[CMS12] D. Ciregan, U. Meier, and J. Schmidhuber, “Multi-column deep neural +networks for image classification,” in Conference on Computer Vision and +Pattern Recognition (CVPR) . IEEE, Feb. 2012, pp. 3642–3649. [Online]. +Available: https://arxiv .org/abs/1202. 2745v1 +[CUH15] D.-A. Clevert, T. Unterthiner, and S. Hochreiter, “Fast and accurate +deep network learning by exponential linear units (ELUs),” arXiv +preprint arXiv:1511.07289 , Nov. 2015. [Online]. Available: https: +//arxiv .org/abs/1511. 07289 +[CWV+ + 14] S. Chetlur, C. Woolley et al. , “ cuDNN: Efficient primitives for deep +learning,” arXiv preprint arXiv:1410.0759 , Oct. 2014. [Online]. Available: +https://arxiv .org/abs/1410.0759 +104 +[DBB + + 01] C. Dugas, Y. Bengio et al. , “Incorporating second-order functional +knowledge for better option pricing,” in Advances in Neural Infor- +mation Processing Systems 13 (NIPS) , T. K. Leen, T. G. Dietterich, +and V. Tresp, Eds. MIT Press, 2001, pp. 472–478. [Online]. +Available: http://papers .nips .cc/paper/1920-incorporating-second-order- +functional-knowledge- for-better-option- pricing .pdf +[DDFK16] S. Dieleman, J. De Fauw, and K. Kavukcuoglu, “Exploiting cyclic symmetry +in convolutional neural networks,” arXiv preprint arXiv:1602.02660 , Feb. +2016. [Online]. Available: https://arxiv . org/abs/1602. 02660 +[DHS11] J. Duchi, E. Hazan, and Y. Singer, “Adaptive subgradient methods for +online learning and stochastic optimization,” Journal of Machine Learning +Research , vol. 12, no. Jul, pp. 2121–2159, 2011. [Online]. Available: +http://www . jmlr. org/papers/volume12/duchi11a/duchi11a . pdf +[DHS16] + J. Dai, K. He, and J. Sun, “Instance-aware semantic segmentation via +multi-task network cascades,” in Conference on Computer Vision and Pattern +Recognition (CVPR) . IEEE, 2016, pp. 3150–3158. [Online]. Available: +https://arxiv . org/abs/1512. 04412 +[DJ99] + W. Duch and N. Jankowski, “Survey of neural transfer functions,” Neural +Computing Surveys , vol. 2, no. 1, pp. 163–212, 1999. [Online]. Available: +ftp://ftp. icsi.berkeley . edu/pub/ai/jagota/vol2_6.pdf +[Doz15] T. Dozat, “Incorporating Nesterov momentum into Adam,” Stanford +University, Tech. Rep., 2015. [Online]. Available: http://cs229.stanford. edu/ +pro j2015/054_report .pdf +[DSRB14] A. Dosovitskiy, J. T. Springenberg et al. , “Discriminative unsupervised +feature learning with convolutional neural networks,” in Advances in Neural +Information Processing Systems 27 (NIPS) , Z. Ghahramani, M. Welling +et al. , Eds. Curran Associates, Inc., 2014, pp. 766–774. [Online]. +Available: http://papers . nips . cc/paper/5548-discriminative-unsupervised- +feature-learning- with- convolutional- neural-networks.pdf +[DWD15] + S. Dieleman, K. W. Willett, and J. Dambre, “Rotation-invariant convolutional +neural networks for galaxy morphology prediction,” Monthly notices of the +royal astronomical society , vol. 450, no. 2, pp. 1441–1459, 2015. +[EDHS07] J. Elson, J. J. Douceur et al. , “Asirra: A CAPTCHA that +exploits interest-aligned manual image categorization,” in ACM Con- +ference on Computer and Communications Security (CCS) , no. 14. +Association for Computing Machinery, Inc., Oct. 2007. [Online]. +105 +Available: https://www .microsoft .com/en-us/research/publication/asirra-a- +captcha- that-exploits- interest-aligned- manual-image- categorization/ +[EKS+ + 96] M. Ester, H.-P. Kriegel et al. , “A density-based algorithm for discovering +clusters in large spatial databases with noise.” in Kdd , vol. 96, no. 34, 1996, +pp. 226–231. +[ES03] A. E. Eiben and J. E. Smith, Introduction to evolutionary computing . +Springer, 2003, vol. 53. [Online]. Available: https://dx. doi. org/10. 1007/978- 3- +662- 44874- 8 +[Fah88] S. E. Fahlman, “An empirical study of learning speed in back-propagation +networks,” 1988. [Online]. Available: http://repository .cmu .edu/cgi/ +viewcontent. cgi?article=2799&context=compsci +[FFFP06] L. Fei-Fei, R. Fergus, and P. Perona, “One-shot learning of ob ject +categories,” IEEE transactions on pattern analysis and machine intel ligence , +vol. 28, no. 4, pp. 594–611, Apr. 2006. [Online]. Available: http: +//vision.stanford. edu/documents/Fei-FeiFergusPerona2006.pdf +[FFP03] R. F. Fei-Fei and P. Perona, “Caltech 101,” 2003. [Online]. Available: http: +//www . vision.caltech .edu/Image_Datasets/Caltech101/Caltech101.html +[FGMR10] P. F. Felzenszwalb, R. B. Girshick et al. , “Ob ject detection with discrimina- +tively trained part-based models,” IEEE transactions on pattern analysis and +machine intel ligence, vol. 32, no. 9, pp. 1627–1645, 2010. +[FL89] S. E. Fahlman and C. Lebiere, “The cascade-correlation learning architecture,” +1989. [Online]. Available: http://repository . cmu .edu/compsci/1938/ +[GB10] X. Glorot and Y. Bengio, “Understanding the difficulty of training deep +feedforward neural networks.” in Aistats , vol. 9, 2010, pp. 249–256. [Online]. +Available: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf +[GBB11] X. Glorot, A. Bordes, and Y. Bengio, “Deep sparse rectifier neural +networks.” in Aistats , vol. 15, no. 106, 2011, p. 275. [Online]. Available: +http://www .jmlr.org/proceedings/papers/v15/glorot11a/glorot11a. pdf +[GDDM14] R. Girshick, J. Donahue et al. , “Rich feature hierarchies for accurate ob ject +detection and semantic segmentation,” in Conference on Computer Vision +and Pattern Recognition (CVPR) . IEEE, 2014, pp. 580–587. [Online]. +Available: https://arxiv .org/abs/1311. 2524 +[GG07] P. P. Greg Griffin, Alex Holub, “Caltech-256 ob ject category dataset,” Apr. +2007. [Online]. Available: http://authors .library .caltech . edu/7694/ +106 +[GG16] Y. Gal and Z. Ghahramani, “Bayesian convolutional neural networks with +Bernoulli approximate variational inference,”arXiv preprint arXiv:1506.02158, +Jan. 2016. [Online]. Available: https://arxiv . org/abs/1506. 02158v6 +[GJ02] M. R. Garey and D. S. Johnson, Computers and intractability. wh freeman +New York, 2002, vol. 29. +[GJS76] M. R. Garey, D. S. Johnson, and L. Stockmeyer, “Some simplified NP-complete +graph problems,” Theoretical computer science , vol. 1, no. 3, pp. 237–267, +1976. +[Gol08] P. Golle, “Machine learning attacks against the Asirra CAPTCHA,” inACM +conference on Computer and communications security (CCS), no. 15. ACM, +2008, pp. 535–542. +[Gra15] + B. Graham, “Fractional max-pooling,” arXiv preprint arXiv:1412.6071, May +2015. [Online]. Available: https://arxiv . org/abs/1412. 6071 +[Gri06] A. P. Griffin, G. Holub, “Caltech 256,” 2006. [Online]. Available: +http://www . vision.caltech . edu/Image_Datasets/Caltech256/ +[GWFM+ + 13] I. J. Goodfellow, D. Warde-Farley et al. , “Maxout networks.” ICML , +vol. 28, no. 3, pp. 1319–1327, 2013. [Online]. Available: http: +//www .jmlr.org/proceedings/papers/v28/goodfellow13 . pdf +[HAE16] M. Huh, P. Agrawal, and A. A. Efros, “What makes ImageNet good for +transfer learning?” arXiv preprint arXiv:1608.08614 , Aug. 2016. [Online]. +Available: https://arxiv .org/abs/1608.08614 +[Han89] S. J. Hanson, “Meiosis networks.” in NIPS , 1989, pp. 533–541. [Online]. +Available: http://papers.nips.cc/paper/227- meiosis-networks.pdf +[Har15] M. Harris, “New features in CUDA 7.5,” Jul. 2015. [Online]. Available: +https://devblogs.nvidia .com/parallelforall/new- features- cuda- 7-5/ +[HLW16] G. Huang, Z. Liu, and K. Q. Weinberger, “Densely connected convolutional +networks,” arXiv preprint arXiv:1608.06993 , Aug. 2016. [Online]. Available: +https://arxiv . org/abs/1608. 06993v1 +[HM16] + M. Hardt and T. Ma, “Identity matters in deep learning,” arXiv +preprint arXiv:1611.04231 , Nov. 2016. [Online]. Available: https: +//arxiv .org/abs/1611.04231 +[How13] A. G. Howard, “Some improvements on deep convolutional neural network +based image classification,” arXiv preprint arXiv:1312.5402 , Dec. 2013. +[Online]. Available: https://arxiv . org/abs/1312.5402 + 107 +[HPK11] J. Han, J. Pei, and M. Kamber, Data mining: concepts and techniques . +Elsevier, 2011. +[HPN+ + 16] + S. Han, J. Pool et al. , “ DSD: Regularizing deep neural networks with +dense-sparse-dense training flow,” arXiv preprint arXiv:1607.04381, Jul. 2016. +[Online]. Available: https://arxiv .org/abs/1607.04381 +[HPTD15] S. Han, J. Pool et al. , “Learning both weights and connections for efficient +neural network,” in Advances in Neural Information Processing Systems 28 +(NIPS), C. Cortes, N. D. Lawrence et al., Eds. Curran Associates, Inc., Jun. +2015, pp. 1135–1143. [Online]. Available: http://papers .nips .cc/paper/5784- +learning-both-weights- and- connections-for- efficient- neural- network.pdf +[HSK+ + 12] G. E. Hinton, N. Srivastava et al., “Improving neural networks by preventing +co-adaptation of feature detectors,” arXiv preprint arXiv:1207.0580 , Jul. +2012. [Online]. Available: https://arxiv .org/abs/1207.0580 +[HSL+ + 16] G. Huang, Y. Sun et al. , “Deep networks with stochastic depth,” +arXiv preprint arXiv:1603.09382 , Mar. 2016. [Online]. Available: https: +//arxiv .org/abs/1603. 09382 +[HSW93] B. Hassibi, D. G. Stork, and G. J. Wolff, “Optimal brain surgeon +and general network pruning,” in International Conference on Neural +Networks . IEEE, 1993, pp. 293–299. [Online]. Available: http: +//ee. caltech .edu/Babak/pubs/conferences/00298572.pdf +[HVD15] + G. Hinton, O. Vinyals, and J. Dean, “Distilling the knowledge in a neural +network,” arXiv preprint arXiv:1503.02531 , Mar. 2015. [Online]. Available: +https://arxiv .org/abs/1503.02531 +[HZRS14] K. He, X. Zhang et al. , “Spatial pyramid pooling in deep convolutional +networks for visual recognition,” in European Conference on Computer +Vision (ECCV) . Springer, 2014, pp. 346–361. [Online]. Available: +https://arxiv .org/abs/1406.4729 +[HZRS15a] K. He, X. Zhang et al. , “Deep residual learning for image recognition,” +arXiv preprint arXiv:1512.03385 , Dec. 2015. [Online]. Available: https: +//arxiv .org/abs/1512. 03385v1 +[HZRS15b] K. He, X. Zhang et al., “Delving deep into rectifiers: Surpassing human-level +performance on imagenet classification,” in International Conference on +Computer Vision (ICCV) , Feb. 2015, pp. 1026–1034. [Online]. Available: +https://arxiv .org/abs/1502.01852 +[Ima12] “Imagenet large scale visual recognition challenge 2012 (ILSVRC2012),” +108 +2012. [Online]. Available: http://www .image-net .org/challenges/LSVRC/ +2012/nonpub-downloads +[IS15] S. Ioffe and C. Szegedy, “Batch normalization: Accelerating deep network +training by reducing internal covariate shift,”arXiv preprint arXiv:1502.03167, +Feb. 2015. [Online]. Available: https://arxiv . org/abs/1502.03167 +[JXF+ + 16] X. Jin, C. Xu et al. , “Deep learning with s-shaped rectified linear activation +units,” in Thirtieth AAAI Conference on Artificial Intel ligence , Dec. 2016. +[Online]. Available: https://arxiv . org/abs/1512.07030 +[Kar11] A. Karpathy, “Lessons learned from manually classifying CIFAR-10,” Apr. +2011. [Online]. Available: http://karpathy .github .io/2011/04/27/manually- +classifying-cifar10/ +[KB14] D. Kingma and J. Ba, “Adam: A method for stochastic optimization,” +arXiv preprint arXiv:1412.6980 , Dec. 2014. [Online]. Available: https: +//arxiv .org/abs/1412.6980 +[KH09] A. Krizhevsky and G. Hinton, “Learning multiple layers of features from tiny +images,” Apr. 2009. [Online]. Available: https://www . cs .toronto . edu/~kriz/ +learning-features- 2009- TR .pdf +[KMN+ + 16] N. S. Keskar, D. Mudigere et al., “On large-batch training for deep learning: +Generalization gap and sharp minima,” arXiv preprint arXiv:1609.04836 , +Sep. 2016. [Online]. Available: https://arxiv . org/abs/1609. 04836 +[Koc15] T. Kocmánek, “ HyperNEAT and novelty search for image recognition,” Ph.D. +dissertation, Master’s thesis, Czech Technical University in Prague, 2015. +[Online]. Available: http://kocmi .tk/photos/DiplomaThesis .pdf +[KPY+ + 15] Y.-D. Kim, E. Park et al., “Compression of deep convolutional neural networks +for fast and low power mobile applications,” arXiv preprint arXiv:1511.06530, +Nov. 2015. [Online]. Available: https://arxiv .org/abs/1511.06530 +[KR09] L. Kaufman and P. J. Rousseeuw, Finding groups in data: an introduction to +cluster analysis. John Wiley & Sons, 2009, vol. 344. +[Kri] A. Krizhevsky, “The CIFAR-10 dataset.” [Online]. Available: https: +//www .cs.toronto . edu/~kriz/cifar .html +[KS02] V. Kurkova and M. Sanguineti, “Comparison of worst case errors in linear +and neural network approximation,” IEEE Transactions on Information +Theory , vol. 48, no. 1, pp. 264–275, Jan. 2002. [Online]. Available: +http://ieeexplore.ieee. org/abstract/document/971754/ + 109 +[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, “Imagenet classification +with deep convolutional neural networks,” in Advances in Neural +Information Processing Systems 25 (NIPS) , F. Pereira, C. J. C. Burges +et al. , Eds. Curran Associates, Inc., 2012, pp. 1097–1105. [Online]. +Available: http://papers .nips .cc/paper/4824-imagenet-classification-with- +deep-convolutional-neural- networks .pdf +[KSlB+ + 10] K. Kavukcuoglu, P. Sermanet et al. , “Learning convolutional feature +hierarchies for visual recognition,” in Advances in Neural Information +Processing Systems 23 (NIPS) , J. D. Lafferty, C. K. I. Williams +et al. , Eds. Curran Associates, Inc., 2010, pp. 1090–1098. [Online]. +Available: http://papers .nips . cc/paper/4133-learning- convolutional-feature- +hierarchies- for-visual- recognition .pdf +[LAE + + 16] W. Liu, D. Anguelov et al. , “ SSD: Single shot multibox detector,” in +European Conference on Computer Vision (ECCV) . Springer, 2016, pp. +21–37. [Online]. Available: https://arxiv .org/abs/1512.02325 +[Las17] “Noise layers,” Jan. 2017. [Online]. Available: http://lasagne. readthedocs.io/ +en/latest/modules/layers/noise .html#lasagne .layers.DropoutLayer +[LBBH98] + Y. LeCun, L. Bottou et al. , “Gradient-based learning applied to document +recognition,” Proceedings of the IEEE , vol. 86, no. 11, pp. 2278–2324, Nov. +1998. [Online]. Available: http://yann .lecun .com/exdb/publis/pdf/lecun- +01a.pdf +[LBH15] Y. LeCun, Y. Bengio, and G. Hinton, “Deep learning,” Nature , +vol. 521, no. 7553, pp. 436–444, May 2015. [Online]. Available: +http://www .nature.com/nature/journal/v521/n7553/abs/nature14539 .html +[LBOM98] Y. A. LeCun, L. Bottou et al. , Efficient BackProp , ser. Lecture Notes in +Computer Science. Berlin, Heidelberg: Springer Berlin Heidelberg, 1998, vol. +1524, pp. 9–50. [Online]. Available: http://dx.doi.org/10.1007/3- 540- 49430- 8 +[LDS+ + 89] Y. LeCun, J. S. Denker et al., “Optimal brain damage.” in NIPs, vol. 2, 1989, +pp. 598–605. [Online]. Available: http://yann .lecun . com/exdb/publis/pdf/ +lecun-90b . pdf +[Le13] + Q. V. Le, “Building high-level features using large scale unsupervised +learning,” in International conference on acoustics, speech and signal +processing . IEEE, 2013, pp. 8595–8598. [Online]. Available: http: +//ieeexplore.ieee. org/stamp/stamp.jsp?arnumber=6639343 +[LG16] A. Lavin and S. Gray, “Fast algorithms for convolutional neural networks,” in +110 +Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. +2016, pp. 4013–4021. [Online]. Available: https://arxiv .org/abs/1509.09308 +[LGT16] + C.-Y. Lee, P. W. Gallagher, and Z. Tu, “Generalizing pooling functions in +convolutional neural networks: Mixed, gated, and tree,” in International +Conference on Artificial Intel ligence and Statistics , 2016. [Online]. Available: +https://arxiv . org/abs/1509. 08985v2 +[LH16] + I. Loshchilov and F. Hutter, “ SGDR: stochastic gradient descent +with warm restarts,” Learning , Aug. 2016. [Online]. Available: https: +//arxiv .org/abs/1608.03983 +[LJD + + 16] L. Li, K. Jamieson et al. , “Hyperband: A novel bandit-based approach to +hyperparameter optimization,” arXiv preprint arXiv:1603.06560 , Mar. 2016. +[Online]. Available: https://arxiv . org/abs/1603.06560 +[LM16] K. Li and J. Malik, “Learning to optimize,” arXiv preprint arXiv:1606.01885, +Jun. 2016. [Online]. Available: https://arxiv .org/abs/1606.01885 +[LSD15] J. Long, E. Shelhamer, and T. Darrell, “Fully convolutional networks for +semantic segmentation,” in Conference on Computer Vision and Pattern +Recognition (CVPR). IEEE, Mar. 2015, pp. 3431–3440. [Online]. Available: +https://arxiv . org/abs/1411. 4038v2 +[LX17] A. Y. Lingxi Xie, “Genetic CNN,” arXiv preprint arXiv:1703.01513 , Mar. +2017. [Online]. Available: https://arxiv . org/abs/1703. 01513 +[Ma j17] S. Ma jumdar, “Densenet,” GitHub, Feb. 2017. [Online]. Available: +https://github .com/titu1994/DenseNet +[Mar08] M. Marszałek, “ INRIA annotations for Graz-02 (IG02),” Oct. 2008. [Online]. +Available: http://lear .inrialpes .fr/people/marszalek/data/ig02/ +[MDA15] D. Maclaurin, D. Duvenaud, and R. Adams, “Gradient-based hyperparameter +optimization through reversible learning,” in International Conference on +Machine Learning (ICML) , 2015, pp. 2113–2122. +[MH08] L. v. d. Maaten and G. Hinton, “Visualizing data using t-SNE,” Journal of +Machine Learning Research , vol. 9, no. Nov, pp. 2579–2605, 2008. +[MHN13] + A. L. Maas, A. Y. Hannun, and A. Y. Ng, “Rectifier nonlinearities +improve neural network acoustic models,” in Proc. ICML , vol. 30, +no. 1, 2013. [Online]. Available: https://web .stanford . edu/~awni/papers/ +relu_hybrid_icml2013_final. pdf +[MM15] D. Mishkin and J. Matas, “All you need is a good init,” arXiv +111 +preprint arXiv:1511.06422 , Nov. 2015. [Online]. Available: https: +//arxiv .org/abs/1511. 06422 +[MP43] + W. S. McCulloch and W. Pitts, “A logical calculus of the ideas immanent in +nervous activity,” The bul letin of mathematical biophysics , vol. 5, no. 4, pp. +115–133, 1943. +[MRM15] N. McLaughlin, J. M. D. Rincon, and P. Miller, “Data-augmentation for +reducing dataset bias in person re-identification,” inInternational Conference +on Advanced Video and Signal Based Surveil lance (AVSS), no. 12, Aug. 2015, +pp. 1–6. [Online]. Available: http://ieeexplore .ieee . org/abstract/document/ +7301739/ +[MS07] M. Marszalek and C. Schmid, “Accurate ob ject localization with +shape masks,” in Conference on Computer Vision and Pattern +Recognition (CVPR) . IEEE, 2007, pp. 1–8. [Online]. Available: http: +//ieeexplore.ieee. org/document/4270110/ +[MSM16] D. Mishkin, N. Sergievskiy, and J. Matas, “Systematic evaluation of CNN +advances on the ImageNet,” arXiv preprint arXiv:1606.02228 , Jun. 2016. +[Online]. Available: https://arxiv .org/abs/1606.02228 +[MV16] + A. Mahendran and A. Vedaldi, “Visualizing deep convolutional neural +networks using natural pre-images,” International Journal of Computer Vision, +pp. 1–23, Apr. 2016. [Online]. Available: https://arxiv .org/abs/1512.02017 +[NDRT13] N. Natara jan, I. S. Dhillon et al. , “Learning with noisy labels,” in Advances +in Neural Information Processing Systems 26 (NIPS) , C. J. C. Burges, +L. Bottou et al., Eds. Curran Associates, Inc., 2013, pp. 1196–1204. [Online]. +Available: http://papers. nips.cc/paper/5073- learning- with- noisy- labels .pdf +[Nes83] Y. Nesterov, “A method of solving a convex programming problem with +convergence rate o (1/k2),” in Soviet Mathematics Doklady , vol. 27, no. 2, +1983, pp. 372–376. +[new00] “The training performed by qnstrn,” Aug. 2000. [Online]. Available: +http://www1 .icsi.berkeley .edu/Speech/faq/nn- train.html +[Ng16] A. Ng, “Nuts and bolts of building ai applications using deep learning,” NIPS +Talk, Dec. 2016. +[NH92] S. J. Nowlan and G. E. Hinton, “Simplifying neural networks by soft +weight-sharing,” Neural computation , vol. 4, no. 4, pp. 473–493, 1992. +[Online]. Available: https://www. cs.toronto.edu/~hinton/absps/sunspots.pdf +[NH02] R. T. Ng and J. Han, “ CLARANS: A method for clustering ob jects for spatial +112 +data mining,” IEEE transactions on know ledge and data engineering, vol. 14, +no. 5, pp. 1003–1016, 2002. +[NWC + + 11a] + Y. Netzer, T. Wang et al. , “Reading digits in natural images with +unsupervised feature learning,” in NIPS workshop on deep learning and +unsupervised feature learning, vol. 2011, no. 2, 2011, p. 5. [Online]. Available: +http://ufldl. stanford.edu/housenumbers/nips2011_housenumbers.pdf +[NWC + + 11b] Y. Netzer, T. Wang et al., “The street view house numbers (SVHN) dataset,” +2011. [Online]. Available: http://ufldl. stanford.edu/housenumbers/ +[NYC16] A. Nguyen, J. Yosinski, and J. Clune, “Multifaceted feature visualization: +Uncovering the different types of features learned by each neuron in deep +neural networks,” arXiv preprint arXiv:1602.03616 , May 2016. [Online]. +Available: https://arxiv .org/abs/1602.03616 +[OHIL16] J. Ortigosa-Hernández, I. Inza, and J. A. Lozano, “Towards competitive +classifiers for unbalanced classification problems: A study on the performance +scores,” arXiv preprint arXiv:1608.08984 , Aug. 2016. [Online]. Available: +https://arxiv . org/abs/1608. 08984 +[PMW+ + 15] N. Papernot, P. McDaniel et al. , “Distillation as a defense to adversarial +perturbations against deep neural networks,”arXiv preprint arXiv:1511.04508, +Nov. 2015. [Online]. Available: https://arxiv .org/abs/1511.04508 +[Pre98] L. Prechelt, Early Stopping - But When? Berlin, Heidelberg: Springer +Berlin Heidelberg, 1998, pp. 55–69. [Online]. Available: http://dx . doi . org/ +10. 1007/3-540- 49430-8_3 +[RDS+ + 14] O. Russakovsky, J. Deng et al. , “Imagenet large scale visual recognition +challenge,” arXiv preprint arXiv:1409.0575, vol. 115, no. 3, pp. 211–252, Sep. +2014. [Online]. Available: https://arxiv . org/abs/1409. 0575 +[RFB15] O. Ronneberger, P. Fischer, and T. Brox, “U-net: Convolutional networks +for biomedical image segmentation,” in International Conference on Medical +Image Computing and Computer-Assisted Intervention . Springer, 2015, pp. +234–241. [Online]. Available: https://arxiv .org/abs/1505. 04597 +[RLS10] S. Risi, J. Lehman, and K. O. Stanley, “Evolving the placement and den- +sity of neurons in the hyperneat substrate,” in Conference on Genetic and +evolutionary computation , no. 12. ACM, 2010, pp. 563–570. +[RSG16] M. T. Ribeiro, S. Singh, and C. Guestrin, “"why should i trust you?": +Explaining the predictions of any classifier,” arXiv preprint arXiv:1602.04938, +Feb. 2016. [Online]. Available: https://arxiv . org/abs/1602.04938 + 113 +[Rud16] S. Ruder, “An overview of gradient descent optimization algorithms,” +arXiv preprint arXiv:1609.04747 , Sep. 2016. [Online]. Available: https: +//arxiv .org/abs/1609. 04747 +[SCL12] P. Sermanet, S. Chintala, and Y. LeCun, “Convolutional neural networks +applied to house numbers digit classification,” in International Conference +on Pattern Recognition (ICPR) , no. 21. IEEE, Apr. 2012, pp. 3288–3291. +[Online]. Available: https://arxiv .org/abs/1204.3968 +[SDG09] K. O. Stanley, D. B. D’Ambrosio, and J. Gauci, “A hypercube-based encoding +for evolving large-scale neural networks,” Artificial life, vol. 15, no. 2, pp. 185– +212, 2009. [Online]. Available: http://ieeexplore. ieee.org/document/6792316/ +[SEZ+ + 13] P. Sermanet, D. Eigen et al. , “Overfeat: Integrated recognition, localization +and detection using convolutional networks,” arXiv preprint arXiv:1312.6229, +Feb. 2013. [Online]. Available: https://arxiv .org/abs/1312.6229v4 +[SHK+ + 14] N. Srivastava, G. E. Hinton et al. , “Dropout: a simple way to +prevent neural networks from overfitting.” Journal of Machine Learning +Research , vol. 15, no. 1, pp. 1929–1958, 2014. [Online]. Available: +https://www .cs. toronto .edu/~hinton/absps/JMLRdropout .pdf +[SHY+ + 13] A. Senior, G. Heigold et al. , “An empirical study of learning rates in deep +neural networks for speech recognition,” in International Conference on +Acoustics, Speech and Signal Processing. IEEE, 2013, pp. 6724–6728. [Online]. +Available: http://ieeexplore. ieee.org/document/6638963/?arnumber=6638963 +[SIV16] C. Szegedy, S. Ioffe, and V. Vanhoucke, “Inception-v4, inception-resnet and the +impact of residual connections on learning,” arXiv preprint arXiv:1602.07261, +Feb. 2016. [Online]. Available: https://arxiv .org/abs/1602.07261 +[SKP15] F. Schroff, D. Kalenichenko, and J. Philbin, “Facenet: A unified embedding +for face recognition and clustering,” in Conference on Computer Vision +and Pattern Recognition (CVPR) . IEEE, Mar. 2015, pp. 815–823. [Online]. +Available: https://arxiv .org/abs/1503. 03832 +[SL11] P. Sermanet and Y. LeCun, “Traffic sign recognition with multi-scale +convolutional networks,” in International Joint Conference on Neural +Networks (IJCNN) , Jul. 2011, pp. 2809–2813. [Online]. Available: +http://ieeexplore. ieee.org/document/6033589/ +[SLJ+ + 15] C. Szegedy, W. Liu et al. , “Going deeper with convolutions,” in Conference +on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. 2015, pp. +1–9. [Online]. Available: https://arxiv .org/abs/1409. 4842 +[SM02] + K. O. Stanley and R. Miikkulainen, “Evolving neural networks through +114 +augmenting topologies,” Evolutionary computation, vol. 10, no. 2, pp. 99–127, +2002. [Online]. Available: http://www.mitpressjournals.org/doi/abs/10.1162/ +106365602320169811 +[SMG13] A. M. Saxe, J. L. McClelland, and S. Ganguli, “Exact solutions to +the nonlinear dynamics of learning in deep linear neural networks,” +arXiv preprint arXiv:1312.6120 , Dec. 2013. [Online]. Available: https: +//arxiv .org/abs/1312.6120 +[SMGS14] R. K. Srivastava, J. Masci et al. , “Understanding locally competitive +networks,” arXiv preprint arXiv:1410.1165 , Oct. 2014. [Online]. Available: +https://arxiv . org/abs/1410. 1165 +[SSSI] J. Stallkamp, M. Schlipsing et al. , “The german traffic sign recognition +benchmark.” [Online]. Available: http://benchmark .ini . rub .de/?section= +gtsrb&subsection=news +[SSSI12] J. Stallkamp, M. Schlipsing et al. , “Man vs. computer: Benchmarking +machine learning algorithms for traffic sign recognition,” Neural Networks , +no. 0, pp. –, 2012. [Online]. Available: http://www.sciencedirect.com/science/ +article/pii/S0893608012000457 +[SV16] S. Saxena and J. Verbeek, “Convolutional neural fabrics,” arXiv preprint +arXiv:1606.02492, 2016. [Online]. Available: https://arxiv.org/abs/1606. 02492 +[SVI+ + 15] C. Szegedy, V. Vanhoucke et al. , “Rethinking the inception architecture +for computer vision,” arXiv preprint arXiv:1512.00567 , Dec. 2015. [Online]. +Available: https://arxiv .org/abs/1512.00567v3 +[SVZ13] K. Simonyan, A. Vedaldi, and A. Zisserman, “Deep inside convolutional +networks: Visualising image classification models and saliency maps,” +arXiv preprint arXiv:1312.6034 , Dec. 2013. [Online]. Available: https: +//arxiv .org/abs/1312.6034 +[SZ14] K. Simonyan and A. Zisserman, “Very deep convolutional networks for +large-scale image recognition,” arXiv preprint arXiv:1409.1556 , Sep. 2014. +[Online]. Available: https://arxiv . org/abs/1409.1556 +[SZS+ + 13] C. Szegedy, W. Zaremba et al. , “Intriguing properties of neural +networks,” arXiv preprint arXiv:1312.6199 , Dec. 2013. [Online]. Available: +https://arxiv . org/abs/1312. 6199v4 +[TF-16a] “ MNIST for ML beginners,” Dec. 2016. [Online]. Available: https: +//www .tensorflow .org/tutorials/mnist/beginners/ + 115 +[tf-16b] “tf.nn.dropout,” Dec. 2016. [Online]. Available: https://www.tensorflow .org/ +api_docs/python/nn/activation_functions_#dropout +[TH12] T. Tieleman and G. Hinton, “Lecture 6.5-rmsprop: Divide the gradient +by a running average of its recent magnitude,” COURSERA: Neural +Networks for Machine Learning , vol. 4, no. 2, 2012. [Online]. Available: +http://www .cs.toronto . edu/~tijmen/csc321/slides/lecture_slides_lec6 .pdf +[Tho14a] M. Thoma, “On-line recognition of handwritten mathematical symbols,” +Karlsruhe, Germany, Nov. 2014. [Online]. Available: http://martin- +thoma.com/write-math +[Tho14b] M. Thoma, “The Twiddle algorithm,” Sep. 2014. [Online]. Available: +https://martin- thoma .com/twiddle/ +[Tho16] M. Thoma, “A survey of semantic segmentation,” arXiv preprint +arXiv:1602.06541 , Feb. 2016. [Online]. Available: https://arxiv . org/abs/ +1602. 06541 +[Tho17a] M. Thoma, “The HASYv2 dataset,” arXiv preprint arXiv:1701.08380 , Jan. +2017. [Online]. Available: https://arxiv .org/abs/1701.08380 +[Tho17b] M. Thoma, “Master thesis (blog post),” Apr. 2017. [Online]. Available: +https://martin- thoma .com/msthesis +[VH13] P. Verbancsics and J. Harguess, “Generative neuroevolution for deep +learning,” arXiv preprint arXiv:1312.5355 , Dec. 2013. [Online]. Available: +https://arxiv .org/abs/1312.5355 +[vLA87] P. J. M. van Laarhoven and E. H. L. Aarts, Simulated annealing . +Dordrecht: Springer Netherlands, 1987, pp. 7–15. [Online]. Available: +http://dx . doi.org/10. 1007/978-94- 015-7744- 1_2 +[VTKP17] E. Vorontsov, C. Trabelsi et al. , “On orthogonality and learning recurrent +networks with long term dependencies,” arXiv preprint arXiv:1702.00071 , +Jan. 2017. [Online]. Available: https://arxiv .org/abs/1702.00071 +[WHH + + 89] A. Waibel, T. Hanazawa et al. , “Phoneme recognition using time-delay +neural networks,” IEEE transactions on acoustics, speech, and signal +processing , vol. 37, no. 3, pp. 328–339, Aug. 1989. [Online]. Available: +http://ieeexplore. ieee.org/document/21701/ +[Wil92] R. J. Williams, “Simple statistical gradient-following algorithms for connec- +tionist reinforcement learning,” Machine learning, vol. 8, no. 3-4, pp. 229–256, +1992. +116 +[WWQ13] X. Wang, L. Wang, and Y. Qiao, A Comparative Study of Encoding, Pooling +and Normalization Methods for Action Recognition . Berlin, Heidelberg: +Springer Berlin Heidelberg, Nov. 2013, no. 11, pp. 572–585. [Online]. +Available: http://dx .doi.org/10.1007/978-3- 642-37431- 9_44 +[WYS+ + 15] + R. Wu, S. Yan et al. , “Deep image: Scaling up image recognition,” arXiv +preprint arXiv:1501.02876 , vol. 7, no. 8, Jul. 2015. [Online]. Available: +https://arxiv . org/abs/1501. 02876v4 +[WZZ+ + 13] L. Wan, M. Zeiler et al., “Regularization of neural networks using dropconnect,” +in International Conference on Machine Learning (ICML) , no. 30, 2013, +pp. 1058–1066. [Online]. Available: http://www .matthewzeiler .com/pubs/ +icml2013/icml2013 .pdf +[XGD + + 16] S. Xie, R. Girshick et al. , “Aggregated residual transformations for deep +neural networks,” arXiv preprint arXiv:1611.05431 , Nov. 2016. [Online]. +Available: https://arxiv .org/abs/1611.05431v1 +[Xu11] W. Xu, “Towards optimal one pass large scale learning with averaged +stochastic gradient descent,” arXiv preprint arXiv:1107.2490 , Jul. 2011. +[Online]. Available: https://arxiv . org/abs/1107.2490 +[XWCL15] B. Xu, N. Wang et al. , “Empirical evaluation of rectified activations in +convolutional network,” arXiv preprint arXiv:1505.00853, May 2015. [Online]. +Available: https://arxiv .org/abs/1505.00853 +[XXE12] H. Xiao, H. Xiao, and C. Eckert, “Adversarial label flips attack on +support vector machines.” in ECAI , 2012, pp. 870–875. [Online]. Available: +https://www .sec.in.tum.de/assets/Uploads/ecai2 . pdf +[XZY+ + 14] T. Xiao, J. Zhang et al., “Error-driven incremental learning in deep convolu- +tional neural network for large-scale image classification,” in International +Conference on Multimedia, no. 22. ACM, 2014, pp. 177–186. +[YL98] C. J. B. Yann LeCun, Corinna Cortes, “The MNIST database of handwritten +digits,” 1998. [Online]. Available: http://yann.lecun. com/exdb/mnist/ +[ZBH+ + 16] C. Zhang, S. Bengio et al., “Understanding deep learning requires rethinking +generalization,” arXiv preprint arXiv:1611.03530 , Nov. 2016. [Online]. +Available: https://arxiv .org/abs/1611.03530 +[ZCZL16] S. Zhai, Y. Cheng et al. , “Doubly convolutional neural networks,” in +Advances in Neural Information Processing Systems 29 (NIPS) , D. D. Lee, +M. Sugiyama et al., Eds. Curran Associates, Inc., Oct. 2016, pp. 1082–1090. +[Online]. Available: http://papers.nips.cc/paper/6340- doubly-convolutional- +neural-networks.pdf + 117 +[ZDGD14] N. Zhang, J. Donahue et al. , “Part-based R-CNNs for fine-grained category +detection,” in European Conference on Computer Vision (ECCV). Springer, +Jul. 2014, pp. 834–849. [Online]. Available: https://arxiv.org/abs/1407. 3867 +[Zei12] M. D. Zeiler, “Adadelta: an adaptive learning rate method,” arXiv preprint +arXiv:1212.5701 , Dec. 2012. [Online]. Available: https://arxiv . org/abs/ +1212. 5701v1 +[ZF13] M. D. Zeiler and R. Fergus, “Stochastic pooling for regularization of deep +convolutional neural networks,” arXiv preprint arXiv:1301.3557 , Jan. 2013. +[Online]. Available: https://arxiv .org/abs/1301.3557v1 +[ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional +networks,” in European Conference on Computer Vision (ECCV). Springer, +Nov. 2014, pp. 818–833. [Online]. Available: https://arxiv.org/abs/1311. 2901 +[Zho16] B. Zhou, “Places2 download,” 2016. [Online]. Available: http:// +places2.csail. mit. edu/download.html +[ZK16] S. Zagoruyko and N. Komodakis, “Wide residual networks,” arXiv +preprint arXiv:1605.07146 , May 2016. [Online]. Available: https: +//arxiv .org/abs/1605. 07146 +[ZKL+ + 15] B. Zhou, A. Khosla et al. , “Learning deep features for discriminative +localization,” arXiv preprint arXiv:1512.04150, Dec. 2015. [Online]. Available: +https://arxiv .org/abs/1512.04150 +[ZKL+ + 16] B. Zhou, A. Khosla et al. , “Places: An image database for deep scene +understanding,” arXiv preprint arXiv:1610.02055 , Oct. 2016. [Online]. +Available: https://arxiv .org/abs/1610. 02055 +[ZL16] B. Zoph and Q. V. Le, “Neural architecture search with reinforcement +learning,” arXiv preprint arXiv:1611.01578 , Nov. 2016. [Online]. Available: +https://arxiv .org/abs/1611.01578 +[ZMGL15] J. Zhao, M. Mathieu et al. , “Stacked what-where auto-encoders,” +arXiv preprint arXiv:1506.02351 , Jun. 2015. [Online]. Available: https: +//arxiv .org/abs/1506. 02351v1 +[ZYL + + 15] + H. Zheng, Z. Yang et al. , “Improving deep neural networks using softplus +units,” in International Joint Conference on Neural Networks (IJCNN) , Jul. +2015, pp. 1–4. +118 +I. Glossary +ANN artificial neural network. 4 +ASO Automatic Structure Optimization. 29 +CMO Confusion Matrix Ordering. 2, 35, 36, 51, 52, 71 +CNN Convolutional Neural Network. 1, 3–6, 11, 13, 15, 21–23, 28, 29, 31, 33, 37, 54, 60, +71, 72, 79, 82–84, 88–91 +ELU Exponential Linear Unit. 38, 57, 60–64, 72, 73, 77, 78, 84 +ES early stopping. 68 +FC Fully Connected. 91, 93 +FLOP floating point operation. 27, 29, 87, 88, 90, 91, 93 +GA genetic algorithm. 30 +GAN Generative Adverserial Network. 80 +GPU graphics processing unit. 37, 40, 59, 63, 67, 88, 91 +HSV hue, saturation, value. 79 +LCN Local Contrast Normalization. 91 +LDA linear discriminant analysis. 79 +LReLU leaky rectified linear unit. 63, 72, 77, 78, 84 +MLP multilayer perceptron. 3–6, 28 +NAG Nesterov Accellerated Momentum. 83 +NEAT NeuroEvolution of Augmenting Topologies. 83 +OBD Optimal Brain Damage. 29 + 119 +PCA principal component analysis. 79 +PReLU parametrized rectified linear unit. 60, 61, 63, 64, 72, 77, 78, 84 +ReLU rectified linear unit. 5, 13, 60, 61, 63, 64, 72, 77, 78, 84 +SGD stochastic gradient descent. 5, 30, 45, 46, 82 +ZCA Zero Components Analysis. 79 +120 diff --git a/read/results/playa/2201.00021.txt b/read/results/playa/2201.00021.txt new file mode 100644 index 0000000..522c23d --- /dev/null +++ b/read/results/playa/2201.00021.txt @@ -0,0 +1,1102 @@ +Astronomy & Astrophysics manuscript no. mainArxiv © ESO 2022 +April 12, 2022 +Discovery of ammonia (9,6) masers in two high-mass star-forming +regions +Y. T. Yan (闫耀庭) 1, + , C. Henkel1, 2, 3 + , K. M. Menten 1 + , Y. Gong (龚龑) 1 + , J. Ott4 + , T. L. Wilson1 + , A. Wootten4 + , A. +Brunthaler1 + , J. S. Zhang (张江水 )5 + , J. L. Chen ( 陈家梁) 5 + , and K. Yang ( 杨楷) 6, 7 +1 + Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, 53121 Bonn, Germany +e-mail: yyan@mpifr-bonn.mpg.de +2 + Astronomy Department, Faculty of Science, King Abdulaziz University, P. O. Box 80203, Jeddah 21589, Saudi Arabia +3 + Xinjiang Astronomical Observatory, Chinese Academy of Sciences, 830011 Urumqi, PR China +4 + National Radio Astronomy Observatory, 520 Edgemont Road, Charlottesville, VA 22903-2475, USA +5 + Center for Astrophysics, Guangzhou University, 510006 Guangzhou, People’s Republic of China +6 + School of Astronomy and Space Science, Nanjing University, 163 Xianlin Avenue, Nanjing 210023, People’s Republic of China +7 + Key Laboratory of Modern Astronomy and Astrophysics (Nanjing University), Ministry of Education, Nanjing 210023, People’s +Republic of China +Received 13 December 2021 / Accepted 30 December 2021 + ABSTRACT +Context. Molecular maser lines are signposts of high-mass star formation, probing the excitation and kinematics of very compact +regions in the close environment of young stellar objects and providing useful targets for trigonometric parallax measurements. +Aims. Only a few NH + 3 (9,6) masers are known so far, and their origin is still poorly understood. Here we aim to find new NH + 3 (9,6) +masers to provide a better observational basis for studying their role in high-mass star-forming regions. +Methods. We carried out NH +3 (9,6) observations toward Cepheus A and G34.26+ 0.15 with the Eff elsberg 100-meter telescope (beam +size 49 + ) and the Karl G. Jansky Very Large Array (JVLA; beam size about 1 +. 2). +Results. We discovered new NH +3 (9,6) masers in Cep A and G34.26+0.15, which increases the number of known high-mass star- +forming regions hosting NH + 3 (9,6) masers from five to seven. Long-term monitoring (20 months) at Eff elsberg shows that the intensity +of the (9,6) maser in G34.26+0.15 is decreasing, while the Cep A maser remains stable. Compared to the E ff elsberg data and assuming +linear variations between the epochs of observation, the JVLA data indicate no missing flux. This suggests that the NH +3 (9,6) emission +arises from single compact emission regions that are not resolved by the interferometric measurements. As JVLA imaging shows, the +NH +3 (9,6) emission in Cep A originates from a sub-arcsecond-sized region, slightly to the west (0 +. 28 ± 0 +. 10) of the peak position +of the 1.36 cm continuum object, HW2. In G34.26+0.15, three NH +3 (9,6) maser spots are observed: one is close to the head of the +cometary ultracompact H ii region C, and the other two are emitted from a compact region to the west of the hypercompact H ii region +A. +Conclusions. The newly found (9,6) masers appear to be related to outflows. The higher angular resolution of JVLA and very long +baseline interferometry observations are needed to provide more accurate positions and constraints for pumping scenarios. +Key words. Masers – ISM: clouds – ISM: individual objects: Cep A, G34.26+0.15 – ISM: H ii regions – Radio lines: ISM +1. Introduction +Since its discovery more than five decades ago (Cheung et al. +1968), ammonia (NH +3 ) has been a most valuable molecule for +investigating the physical properties of molecular clouds (e.g., +Ho & Townes 1983). While thermally excited transitions in +the centimeter-wavelength inversion transitions of ammonia are +regarded as a reliable thermometer of molecular clouds (e.g., +Walmsley & Ungerechts 1983; Danby et al. 1988), ammonia +masers have attracted attention since the first detection of maser +action in the ( J, K ) = (3,3) metastable ( J = K ) line toward the +massive star-forming region W33 (Wilson et al. 1982). Subse- +quent observations have led to the detection of new metastable +ammonia masers, including 15 + NH + 3 (3,3) (Mauersberger et al. +1986), NH +3 (1,1) (Gaume et al. 1996), NH + 3 (2,2) (Mills et al. +2018), NH +3 (5,5) (Cesaroni et al. 1992), NH +3 (6,6) (Beuther + + Member of the International Max Planck Research School (IM- +PRS) for Astronomy and Astrophysics at the universities of Bonn and +Cologne. et al. 2007), NH +3 (7,7), NH +3 (9,9), and NH + 3 (12,12) (Henkel +et al. 2013). These have led to the discovery of metastable maser +lines in 22 different regions (Mauersberger et al. 1986, 1987; +Wilson & Henkel 1988; Wilson et al. 1990; Pratap et al. 1991; +Cesaroni et al. 1992; Wilson & Schilke 1993; Mangum & Woot- +ten 1994; Kraemer & Jackson 1995; Zhang & Ho 1995; Zhang +et al. 1999; Walsh et al. 2007; Hunter et al. 2008; Galván-Madrid +et al. 2009; Brogan et al. 2011; Urquhart et al. 2011; Walsh +et al. 2011; Wang et al. 2012; Henkel et al. 2013; Hoffman & +Joyce 2014; McEwen et al. 2016; Mills et al. 2018; Hogge et al. +2019; Mei et al. 2020; Towner et al. 2021). Compared with the +metastable ammonia masers, detected non-metastable ( J > K ) +ammonia maser transitions are more numerous. The first highly +excited non-metastable ammonia maser was detected by Mad- +den et al. (1986) in the ( J, K ) = (9,6) and (6,3) lines. Thereafter, +many other NH +3 non-metastable inversion transition lines have +been identified as masers, including the (5,3), (5,4), (6,1), (6,2), +(6,4), (6,5), (7,3), (7,4), (7,5) (7,6), (8,3), (8,4), (8,5), (8,6), (9,3), +(9,4), (9,5), (9,7), (9,8), (10,7), (10,8), (10,9), and (11,9) transi- +Article number, page 1 of 10arXiv:2201.00021v3 [astro-ph.GA] 9 Apr 2022 +A & A proofs: manuscript no. mainArxiv +tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007; +Henkel et al. 2013; Mei et al. 2020). Except for the NH +3 (3,3) +masers proposed to be associated with four supernova remnants +(McEwen et al. 2016), almost all the other ammonia masers are +detected in high-mass star-forming regions (HMSFRs). How- +ever, while many HMSFRs host water (H + 2 O), hydroxyl (OH), +or methanol (CH + 3 OH) masers, ammonia masers are quite rare +in these sources, and the role that the environment of a young +high-mass star plays in their excitation remains unclear. There- +fore, dedicated searches for ammonia masers in HMSFRs are +indispensable in regard to their overall incidence and associa- +tion with di ff erent environments, which can provide additional +constraints on the pumping mechanism of ammonia masers. +So far, a total of 32 NH + 3 inversion transitions ( ∆ K = 0 +and ∆ J = 0) have been identified as masers. Among these, and +despite arising from energy levels as high as 1090 K above +the ground state, the NH +3 (9,6) maser stands out as being the +strongest and most variable one in W51-IRS2 (e.g., Henkel et al. +2013). Maser emission in this line has only been detected in five +HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al. +1986), and Sgr B2(N) (Mei et al. 2020). The NH + 3 (3,3) masers +are thought to be collisionally excited (e.g., Flower et al. 1990; +Mangum & Wootten 1994); in contrast, the pumping mecha- +nism of NH +3 (9,6) masers is less well constrained (Madden et al. +1986). Brown & Cragg (1991) have studied ortho-ammonia and +found that it could possibly pump the (6,3) inversion line, but +they did not extend their model to the (9,6) transition due to the +fact that collision rates are only known for inversion levels up to +J = 6 (e.g., Danby et al. 1988). +NH + 3 (9,6) masers are found to be strongly variable, similar to +H + 2 O masers (Madden et al. 1986; Pratap et al. 1991; Henkel et al. +2013). In W51-IRS2, Henkel et al. (2013) found that the (9,6) +line showed significant variation in line shape within a time in- +terval of only two days. Mapping of the (9,6) maser toward W51 +with very long baseline interferometry (VLBI) suggests that the +masers are closer to the H +2 O masers than to the OH masers or +to ultracompact (UC) H ii regions (Pratap et al. 1991). While +Henkel et al. (2013) and Goddi et al. (2015) showed that the SiO +and NH + 3 masers in W51-IRS2 are very close to each other, their +positions, diff ering by 0 +. 065 ( ∼0.015 pc), do not fully coincide. +In this paper we report the discovery of NH +3 (9,6) masers +in two HMSFRs, Cepheus A and G34.26+ 0.15. This increases +the number of (9,6) maser detections in our Galaxy from five +to seven. In Sect. 2 observations with the Eff elsberg 100-meter +telescope and the Karl G. Jansky Very Large Array (JVLA) are +described. Results are presented in Sect. 3. The morphology of +Cep A and G34.26+ 0.15 as well as a comparison of the emission +distributions of di ff erent tracers with the NH + 3 (9,6) masers are +presented in Sect. 4. Our main results are summarized in Sect. 5. +2. Observations and data reduction +2.1. Effelsberg observations and data reduction +The NH +3 (9,6) line was observed toward Cep A and +G34.26 +0.15 with the 100-meter E ff elsberg telescope 1 + in 2020 +January and 2021 February, July, and August. The S14mm dou- +ble beam secondary focus receiver was employed. The full width +at half maximum (FWHM) beam size is 49 + at 18.5 GHz, the +frequency of the target line. The observations were performed in +position switching mode, and the o ff position was 10 + in azimuth +1 + Based on observations with the 100-meter telescope of the MPIfR +(Max-Planck-Institut für Radioastronomie) at E ff elsberg. away from the source. For observations made before 2021 Au- +gust, we used a spectrometer that covered 2 GHz wide backends +with a channel width of 38.1 kHz, corresponding to ∼0.62 km s −1 +at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar +1975). A high spectral resolution backend with 65536 channels +and a bandwidth of 300 MHz was employed in 2021 August, +providing a channel width of 0.07 km s−1 + at 18.5 GHz. Point- +ing was checked every 2 hours using 3C 286 or NGC 7027. +Focus calibrations were done at the beginning of the observa- +tions and during sunset and sunrise toward the abovementioned +pointing sources. The system temperatures were 100–130 K on +a main-beam brightness temperature, T + MB , scale. This flux den- +sity was calibrated assuming a T + MB / S ratio of 1.95 K /Jy, derived +from continuum cross scans of NGC 7027 (the flux density was +adopted from Ott et al. 1994). Calibration uncertainties are esti- +mated to be ∼ 10%. +We used the GILDAS / CLASS 2 + package (Pety 2005) to re- +duce the spectral line data. A first-order polynomial was sub- +tracted from each spectrum for baseline removal. +2.2. JVLA observations and data reduction +Observations of the NH + 3 (9,6) line toward Cep A and +G34.26 +0.15 were obtained on 2021 July 13 with the JVLA +of the National Radio Astronomy Observatory 3 + (NRAO) in the +C configuration (project ID: 21A-157, PI: Yaoting Yan). We +employed 27 antennas for the observations. The primary beam +of the JVLA antennas is 150 + (FWHM) at 18.5 GHz. A mix- +ture of mixed three-bit and eight-bit samplers were used to per- +form the observations. For the NH +3 (9,6) line observations, we +used one subband with the eight-bit sampler covering a band- +width of 16 MHz with full polarization, eight recirculations, and +four baseline board pairs (BIBPs) to provide a velocity range +of 260 km s−1 + with a channel spacing of 0.13 km s −1 + . Two +additional subbands of bandwidth 16 MHz were used to cover +the NH +3 (8,5) and (10,7) lines. The three-bit sampler with 32 +subbands, each with a bandwidth of 128 MHz to cover a to- +tal range of 4 GHz between 20–24 GHz, was used to mea- +sure the continuum emission. 3C 286 with a flux density of +2.89 Jy at 18.5 GHz (Perley & Butler 2013) was used as a +calibrator for pointing, flux density, bandpass, and polarization. +J2230 +6946 and J1851+0035 served as gain calibrators for Cep +A and G34.26+0.15, respectively. The on-source times were +4m + 30 s + and 4 m + 50 s + toward Cep A and G34.26 +0.15, respectively. +Data from two antennas were lost due to technical is- +sues. The data from the remaining 25 antennas were reduced +through the Common Astronomy Software Applications pack- +age (CASA 4 + ; McMullin et al. 2007). We calibrated the data with +the JVLA CASA calibration pipeline using CASA 6.1.2. The +results were obtained after flagging data that contain artifacts. +We inspected the phase, amplitude, and bandpass variations of +the calibrated visibility data to search for additional artifacts be- +fore imaging. Then, the uvcontsub task in CASA was used to +separate the calibrated visibilities into two parts, one with line- +only data and the other with the continuum data. The tclean task +with a cell size of 0 +. 2 and Briggs weighting with robust=0 was +used to produce the images of spectral line and continuum emis- +sion. The synthesized beams for NH +3 (9,6) are 1 +. 47 × 0 +. 99 at +2 + https: //www.iram.fr /IRAMFR /GILDAS/ +3 + The National Radio Astronomy Observatory is a facility of the Na- +tional Science Foundation operated under cooperative agreement by As- +sociated Universities, Inc. +4 + https: //casa.nrao.edu/ +Article number, page 2 of 10 +Y. T. Yan ( 闫耀庭 ) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +P.A. = 58◦ +. 79 and 1 +. 33 × 1 +. 06 at P.A. = 5 ◦ +. 36 toward Cep A +and G34.26 + 0.15, respectively. For the 1.36 cm (20–24 GHz) +continuum emission, the synthesized beams are 1 +. 08 × 0 +. 67 at +P.A. = 60 ◦ +. 64 and 0 +. 95 × 0 +. 71 at P.A. = 5◦ +. 91 toward Cep A and +G34.26 + 0.15. The typical absolute astrometric accuracy of the +JVLA is ∼ 10% of the synthesized beam5 + . The flux density scale +calibration accuracy is estimated to be within 15%. +Fig. 1. Spectra from NH + 3 (9,6) transition lines. Left: Top to bottom: +Time sequence of NH +3 (9,6) profiles observed toward Cep A with the +E ff elsberg 100-meter telescope (after subtracting a first-order polyno- +mial baseline). A JVLA spectrum is interspersed. The systemic veloc- +ity from CO and HCO+ + lines is indicated by a dashed blue line. The +two dashed red lines at LSR velocities, V + LSR , of − 0.90 km s −1 + and +− 0.28 km s− 1 + indicate the central velocities of the two major compo- +nents. Right : NH +3 (9,6) spectra from G34.26 +0.15. The systemic ve- +locity from C 17 + O is indicated by a dashed blue line. The three dashed +red lines at V +LSR = 54.1 km s − 1 + , 55.8 km s −1 + , and 62.5 km s − 1 + show the +central velocities of the main ammonia emission components. +3. Results +The spectra from diff erent epochs are shown in Figs. 1 and 2. +Toward Cep A, the NH +3 (9,6) line profile from the JVLA is ex- +tracted from an Effelsberg-beam-sized region (FWHM, 49 + ). In +the case of G34.26+0.15, the NH +3 spectrum is below the noise +level if a similarly large beam size is used. Therefore, we de- +rived the JVLA NH + 3 (9,6) spectrum from a smaller region, with +radius 3 +. 5, that contains all the detected NH +3 (9,6) emission. In +Table A.1, the observed NH +3 (9,6) line parameters obtained by +Gaussian fits are listed. NH +3 (8,5) and (10,7) emission is not de- +tected by our JVLA observations. The 3σ upper limits for the +NH + 3 (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1 +5 + https://science.nrao.edu/ facilities / vla/ docs /manuals/ oss/ performance- +/ positional-accuracy Fig. 2. NH +3 (9,6) line profiles emphasizing, in contrast to the spectra +in Fig. 1, weaker features. Cep A spectra are presented on the left, +G34.26+ 0.15 spectra on the right. The two dashed red lines in the left +panels indicate V + LSR = 1.48 km s− 1 + and 2.89 km s−1 + . In the right panels, +the two dashed red lines refer to 54.1 km s− 1 + and 55.8 km s− 1 + . +and 27.2 mJy beam−1 + , respectively. In G34.26 + 0.15, the corre- +sponding 3σ upper limits for the NH +3 (8,5) and (10,7) lines are +22.1 mJy beam −1 + and 30.4 mJy beam−1 + . For both sources, sen- +sitivity levels refer to emission from a single channel of width +0.13 km s−1 + . Taking the larger measured line widths of the (9,6) +maser features (see Table A.1), these limits could be further low- +ered by factors of two to four. +3.1. Centimeter-continuum emission +The 1.36 cm continuum, derived from our JVLA observations, +toward Cep A is presented in Fig. 3. Six published compact +sources, HW2, HW3a, HW3b, HW3c, HW3d, and HW9, are de- +tected in our observations. Figure 4 shows the 1.36 cm contin- +uum in G34.26 + 0.15. Three main continuum objects, A, B, and +C, are detected. By using the imfit task in CASA, we measured +the continuum flux at 1.36 cm toward individual compact source +components in Cep A and G34.26+0.15. Details are given in Ta- +ble A.2. +3.2. NH +3 (9,6) emission in Cep A +In 2020 January, NH +3 (9,6) emission with a peak flux density of +0.67 ± 0.07 Jy was first detected with the Effelsberg 100-meter +telescope in Cep A. Emission with similar strength was also de- +tected in 2021 February and August with the same telescope. +Higher velocity resolution data, which were obtained in 2021 +August, again with the E ff elsberg 100-meter telescope, show +that the (9,6) emission contains two main velocity components. +Overall, the flux densities of the NH + 3 (9,6) emission line mea- +sured with the Eff elsberg 100-meter telescope are, within the cal- +ibration uncertainties, unchanged. This is valid for the time inter- +val between 2020 January and August 2021, when we smoothed +the obtained spectra to the same velocity resolution. We also +see another two weaker components. Figure 2 emphasizes these +weak components with an expanded flux density scale. +Higher angular resolution data from the JVLA pinpoint the +position of the NH +3 (9,6) emission with an o ffset of (− 0 +. 28, +0 +. 02) relative to the 1.36 cm continuum peak of Cep A HW2 +(Fig. 3). The deconvolved NH +3 (9,6) component size is (0 +. 29 ± +0 +. 15) × (0 +. 19 ± 0 +. 14) at P.A. = 174 ◦ + , derived with the imfit task +in CASA, and can thus be considered, accounting for the uncer- +tainties, as unresolved. + Article number, page 3 of 10 +A & A proofs: manuscript no. mainArxiv +Fig. 3. Cepheus A. White contours mark the 1.36 cm JVLA continuum map of Cep A; levels are −5, 5, 10, 20, 30, 40, 50, 70, 90, +and 110 × 0.125 mJy beam − 1 + . The background image is the Spitzer 4.5 µm emission, taken from the Galactic Legacy Infrared Mid-Plane +Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is α +J2000 = 22h + 56 m + 17 s +. 972, and +δ + J2000 = 62◦ + 01 + 49 +. 587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black +ellipse denoting the position of the NH + 3 (9,6) emission with a purple star at its center. OH (Bartkiewicz et al. 2005), H +2 O (Sobolev et al. 2018), +and CH +3 OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, respectively. The color bar on the right-hand side indicates +the LSR velocity range of the maser spots. +Fig. 4. 1.36 cm JVLA continuum map of G34.26+ 0.15 presented as white contours with levels of − 5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130, +150, 180, and 200 × 5.0 mJy beam −1 + . The background image is the Spitzer 4.5 µm emission, taken from GLIMPSE. The reference position is +α +J2000 = 18 h + 53m + 18s +. 560, and δ + J2000 = 01◦ + 14 + 58 +. 201, the peak position, is marked by a black cross. The black ellipses show the positions of NH + 3 +(9,6) emissions with stars at their center (i.e., M1, M2, and M3). OH (Zheng et al. 2000), H +2 O (Imai et al. 2011), and CH +3 OH (Bartkiewicz et al. +2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range (V + LSR ) of maser spots. +In view of the constancy of the flux densities obtained at Ef- +felsberg and the similar JVLA flux density, measured in 2021 +July, there is no missing interferometric flux density in the JVLA +data. +3.3. NH +3 (9,6) emission in G34.26 +0.15 +The NH + 3 (9,6) emission was first detected toward G34.26+0.15 +in 2020 January with the Effelsberg 100-meter telescope. Higher velocity resolution data from 2021 August show the NH +3 (9,6) +emission to be composed of two di fferent components. The spec- +tra of weak components on a smaller flux density scale are pre- +sented in Fig. 2. +Three di fferent locations showing NH + 3 (9,6) emission are +found toward G34.26 +0.15 (Fig. 4). The deconvolved NH + 3 (9,6) +component sizes are (1 +. 42 ± 0 +. 43) × (0 +. 54 ± 0 +. 62) at P.A. = 97 ◦ +(M1), (0 +. 42 ± 0 +. 27) × (0 +. 15 ± 0 +. 27) at P.A. = 150 ◦ + (M2), and +Article number, page 4 of 10 +Y. T. Yan ( 闫耀庭 ) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +(1 +. 17 ± 0 +. 34) × (0 +. 27 ± 0 +. 46) at P.A. = 53◦ + (M3) and are thus +comparable to or smaller than the beam size. +Overall, the NH + 3 (9,6) line from G34.26 +0.15 weakened +during the time interval from 2020 January to 2021 August by +about 70%. A comparison between the JVLA spectrum and the +E ffelsberg data, assuming a linear decrease in the integrated in- +tensity as a function of time between diff erent epochs of the +100-meter observations, suggests there is no missing flux in the +JVLA data. This is similar to the situation in Cep A. +4. Discussion +4.1. Morphology of Cep A and G34.26+ 0.15 +Cep A, at a trigonometric parallax distance of 0.70 ± 0.04 kpc +(Moscadelli et al. 2009; Dzib et al. 2011), is the second closest +HMSFR (after Orion) and by far the closest NH + 3 (9,6) maser +known. About 16 compact ( ∼1 + ) radio sources (e.g., Hughes & +Wouterloot 1984; Hughes 1991; Garay et al. 1996) have been +identified in Cep A. Hughes & Wouterloot (1984) discovered +these targets at radio wavelengths, which are UC and hypercom- +pact (HC) H ii regions and /or stellar wind sources, subsequently +named as HW sources. The HW2 object is one of the best known +examples of a protostellar jet or disk system driving a powerful +outflow (e.g., Rodriguez et al. 1980; Güsten et al. 1984; Torrelles +et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021). +The observed NH + 3 (9,6) emission is slightly offset ( −0 +. 28, 0 +. 02) +from the center of HW2 (see Fig. 3). +G34.26 +0.15 is an HMSFR located at a distance of 3.3 kpc +(Kuchar & Bania 1994). It hosts four radio continuum compo- +nents named A, B, C, and D. Component C is a prototypical +cometary UC H ii region containing a compact head and a diffuse +tail that extends from east to west (e.g., Reid & Ho 1985; Garay +et al. 1986; Sewilo et al. 2004; Sewiło et al. 2011). Components +A and B are HC H ii regions, located to the east of component +C. An extended ring-like H ii region, called component D, is lo- +cated southeast of components A-C. One of the three observed +NH + 3 (9,6) emission line sources, M1, is close to the head of com- +ponent C, whereas M2 and M3 originate from another compact +region in the west of the HC H ii component A (see Fig. 4). +4.2. NH +3 (9,6) emission possibly caused by maser action +As shown in Fig. 1, the NH +3 (9,6) profiles in Cep A and +G34.26 +0.15 are narrow (∆V + 1 /2 ≤2.0 km s−1 + ), much narrower +than the expected line widths ( 4 km s − 1 + ) of thermal lines ob- +served at a similar angular resolution (e.g., Torrelles et al. 1985, +1986, 1993, 1999; Henkel et al. 1987; Comito et al. 2007; Mook- +erjea et al. 2007; Wyrowski et al. 2012; Beuther et al. 2018). Ve- +locity shifts with respect to the systemic velocities of the two +sources are both observed, that is, V ∼10 km s −1 + in Cep A and +V ∼4 km s− 1 + in G34.26 +0.15 (see details in Sect. 4.3). Further- +more, time variability is observed in the case of G34.26+0.15, +which is also a characteristic feature of maser emission. +Additional evidence of their maser nature is the high bright- +ness temperatures of the (9,6) emission spots toward Cep A and +G34.26 +0.15. The spectral parameters are listed in Table A.3. +Because at least a significant part of the NH +3 (9,6) emission +is not resolved by our JVLA observations, the derived bright- +ness temperatures are only lower limits. Nevertheless, the lower +limits on the brightness temperature are >800 K in Cep A (see +Table A.3), which is much higher than the expected thermal +gas temperature of ∼250 K (e.g., Patel et al. 2005; Comito +et al. 2007; Beuther et al. 2018). This strongly suggests that the NH +3 (9,6) emission in Cep A is due to maser action. Be- +cause G34.26+ 0.15 is located at about five times the distance to +Cep A, beam dilution e ffects reduce the lower main beam bright- +ness temperature limit to 400 K in G34.26 +0.15 (M2) (see Ta- +ble A.3). We also note that the luminosity of the NH +3 (9,6) emis- +sion in G34.26 +0.15 is higher than or comparable to that in Cep +A, depending on the epoch of our observations. +Finally, the non-detections of the (8,5) and (10,7) lines also +indicate that the (9,6) line is special. This allows us to derive +lower 3σ limits of the (9,6)/(8,5) and (9,6)/ (10,7) line intensity +ratios. The (9,6) line arises from ortho-NH + 3 ( K = 3n), whereas +the NH +3 (8,5) and (10,7) lines are para-NH + 3 ( K 3n) lines. +The minimum ortho-to-para ratios are in the range 12–42 and 1– +8 toward Cep A and G34.26+0.15, respectively. The statistical +weights for the ortho states are twice as large as those for the +para states (e.g., Umemoto et al. 1999; Goddi et al. 2011; Henkel +et al. 2013). In Cep A, the line intensity ratios are far higher than +this factor of two. Thus, at least in Cep A the higher main beam +brightness peak temperature of the (9,6) emission is caused by +maser action, perhaps involving exponential amplification, and +the case of G34.26 +0.15 is likely similar. +4.3. Comparison of NH + 3 (9,6) masers with previously +published (quasi-)thermal NH +3 emission +The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines +show thermal emission toward Cep A over a velocity range of +−13 km s−1 + ≤ V +LSR ≤ −4 km s−1 + (Brown et al. 1981; Güsten +et al. 1984; Torrelles et al. 1985, 1986, 1993, 1999). An average +NH + 3 column density of ∼5 ×10 15 + cm−2 + was estimated for a region +of 3 + around HW2 (Torrelles et al. 1999). This high NH +3 abun- +dance could provide a suitable environment for maser species. +Large line widths (∆ V +1 / 2 7.0 km s−1 + ) with V +LSR ∼ −10 km s−1 +in both (1,1) and (2,2) lines were found toward HW2 (Torrelles +et al. 1993). The velocity is similar to the cloud’s systemic lo- +cal standard of rest (LSR) velocity of −11 .2 km s−1 + , which +is based on CO (Narayanan & Walker 1996) and HCO + + ob- +servations (Gómez et al. 1999). Our (9,6) maser is redshifted +(− 0.9 km s− 1 + ≤ V +LSR ≤2.9 km s−1 + ) and shares positions with +the outflowing gas seen in CO and HCO + + with similarly red- +shifted velocities. Therefore, we argue that the (9,6) masers are +related to outflowing gas. +In G34.26 +0.15, a large NH + 3 column density, +1018 . 5 ±0 .2 + cm −2 + , and a kinetic temperature of 225±75 K +were derived by Henkel et al. (1987) based on measurements +of 15 NH + 3 inversion transitions in the frequency range of +22.0–26.0 GHz. These did not include the (9,6) transition. +While these lines were measured with a beam size of about +40 + , a comparison of the peak intensities of the optically thick +lines with the kinetic temperature reveals the size of the hot, +ammonia-emitting core to be only ∼2.5 + . All those measured +NH + 3 lines were quasi-thermal and had LSR velocities of +∼ 58.5 km s −1 + , close to the systemic velocity of ∼ 58.1 km s − 1 +obtained from C 17 + O observations (Wyrowski et al. 2012). +Their line widths (∆ V +1 / 2 ≥3.6 km s−1 + ) are larger than what +we find (0.35 km s −1 + ≤ ∆ V +1 / 2 ≤ 0.94 km s−1 + ) for each (9,6) +maser component (see details in Table A.3). In all, we may +have observed four di ff erent (9,6) velocity features. Three +are blueshifted at V +LSR ∼ 53.8 km s −1 + , 55.8 km s− 1 + , and +56.8 km s−1 + , and a fourth, tentatively detected, at 62.5 km s −1 + . +This tentative redshifted feature was only potentially detected +with E ffelsberg in 2020 January. The velocity is similar to that +of the JVLA measurements on the NH +3 (1,1) absorption line +against continuum source C ( ∼ 7 + resolution; Keto et al. 1987) +Article number, page 5 of 10 +A & A proofs: manuscript no. mainArxiv +and the NH + 3 (3,3) emission surrounding continuum source B as +well as the head of C (1 +. 4×1 +. 2 resolution; Heaton et al. 1989). +However, we did not find this redshifted component in our +JVLA observations. Therefore, its position within G34.26+0.15 +cannot be determined. The blueshifted (9,6) masers with a +velocity range of 53.8–56.8 km s− 1 + (M1, M2, and M3) show +velocities compatible with those of the NH + 3 (3,3) emission at +the proper positions (Heaton et al. 1989), which might be a +suitable environment for maser species. +4.4. Comparison of NH +3 (9,6) masers with other maser lines +To characterize the environment of NH + 3 (9,6) masers, we can +compare their positions with respect to those of other maser +species (i.e., OH, H + 2 O, and CH + 3 OH). Toward Cep A HW2, +many CH +3 OH (e.g., Menten 1991; Sugiyama et al. 2008; Sanna +et al. 2017) and H +2 O maser spots (e.g., Torrelles et al. 1998, +2011; Sobolev et al. 2018) are detected and are associated with +its disk. Sobolev et al. (2018) also found that most of the H +2 O +maser flux is associated with the compact H ii region HW3d. OH +maser features close to the H ii regions are also seen in HW2 +(e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These +three kinds of masers in Cep A have a large velocity range of +−25 km s−1 + ≤ V +LSR ≤ − 2 km s−1 + and are widespread around +HW2 and HW3, while NH +3 (9,6) emission is only detected at +−0.9 km s −1 + ≤ V +LSR ≤ 2.9 km s −1 + toward a sub-arcsecond- +sized region to the west of the peak continuum position of HW2 +(see Fig. 3). This suggests that the NH + 3 (9,6) maser in Cep A +is unique and not related to maser spots seen in other molecular +species. +In G34.26 +0.15, OH (Zheng et al. 2000), H + 2 O (Imai et al. +2011), and CH +3 OH (Bartkiewicz et al. 2016) masers have been +detected east of source C (Fig. 4), and none of them coincides +with the head of C. The NH + 3 (9,6) maser M1 is also found +slightly o ff the head of source C. This could suggest that M1 +is powered by continuum source C or by an outflow. Near com- +ponent B, there are some OH and CH +3 OH masers but no H +2 O +or NH + 3 masers. A group of H + 2 O masers, well-known tracers +of outflows, with a large velocity distribution of 43 km s−1 + ≤ +V +LSR ≤54 km s −1 + , was found to the west of the centimeter- +continuum source A and close to the peak of the millimeter- +continuum emission (see details in our Fig. A.2 and also in Fig. 5 +of Imai et al. 2011). The closeness of NH +3 (9,6) maser spots M2 +and M3 to this group of water masers and their similar velocities +again suggest an association of NH +3 (9,6) masers with outflow +activity. +4.5. Constraints on pumping scenarios +Our observations have resulted in the detection of NH + 3 (9,6) +masers in Cep A and G34.26 +0.15. The new detections could +provide additional constraints on the maser line’s pumping +mechanism. As mentioned in Sect. 1, the pumping mechanism +of the (9,6) maser is unclear (Madden et al. 1986; Brown & +Cragg 1991). Previous studies have suggested that there are three +main pumping scenarios to explain the observed NH +3 maser +lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared ra- +diation from the dust continuum emission, (2) line overlap, and +(3) collisional pumping. +For the first mechanism, infrared photons near 10 µ m are +needed for vibrational excitation. The high dust temperature +( ∼300 K) of W51-IRS2 can provide substantial infrared pho- +tons near 10 µ m, which is used for radiative pumping (Henkel et al. 2013). Both Cep A and G34.26 + 0.15 have similar kinetic +temperatures of 200 K (Henkel et al. 1987; Patel et al. 2005; +Comito et al. 2007; Beuther et al. 2018). This suggests that +high kinetic temperatures are needed to excite NH +3 (9,6) masers. +However, it should be noted that the silicate dust absorption fea- +ture might dominate at 10 µ m (see the spectral energy distribu- +tion of Cep A in De Buizer et al. 2017). Additionally, there is +no bright infrared emission around the two (9,6) masers, M2 and +M3, in G34.26+0.15 (see Fig. 4; see also Fig. 11 in De Buizer +et al. 2003 for a 10.5 µ m map). This indicates that the pumping +mechanism via infrared photons near 10 µ m may not be viable +to explain the (9,6) masers in Cep A and G34.26+0.15. Further- +more, Wilson & Schilke (1993) argued that radiative pumping by +dust emission tends to excite multiple adjacent ammonia maser +transitions, which appears to contradict our failure to detect the +adjacent (8,5) and (10,7) lines (with respect to quantum numbers +and frequency) and to only measure the (9,6) transitions in Cep +A and G34.26 + 0.15. Therefore, we suggest that infrared radia- +tion from dust is not the main pumping source. +Madden et al. (1986) suggested that there might be some +line overlaps between the rotational NH +3 transitions in the far- +infrared band. However, this would be unlikely to affect only the +(9,6) line. Nevertheless, far-infrared spectral observations will +be needed to clarify this scenario. +Based on our observations, the (9,6) maser spots are close +to, but not coincident with, the peaks of the radio continuum +emission in Cep A and G34.26+ 0.15. Furthermore, the (9,6) +masers show velocity off sets with respect to their systemic ve- +locities. This indicates that the (9,6) masers are located at the +base of outflows, similar to the H +2 O masers. This is supported +by VLBI observations that show that (9,6) masers tend to be +closely associated with H +2 O masers (Pratap et al. 1991). The ob- +served time variability in G34.26 + 0.15 and W51-IRS2 can also +be attributed to episodic molecular outflows. This indicates that +collisional pumping could be the driver of the (9,6) maser. On +the other hand, collisional pumping has been successfully used +to explain the NH +3 (3,3) maser (Walmsley & Ungerechts 1983; +Flower et al. 1990; Mangum & Wootten 1994). Collisions tend to +pump from the K =0 level to the K = 3 level with parity changes, +that is, the upper level of the (3,3) metastable transition will be +overpopulated. NH +3 (9,6) arises from the ortho species, so a sim- +ilar mechanism might also occur in the case of the (9,6) transi- +tion. Further measurements of collisional rates of ammonia will +allow us to test this scenario. +5. Summary +We report the discovery of NH + 3 (9,6) masers in two HMSFRs, +Cep A and G34.26 +0.15. The narrow line width of the emis- +sion features (∆ V + 1/ 2 ≤ 2.0 km s −1 + ) and their high brightness tem- +peratures ( > 400 K) indicate the maser nature of the lines. +The intensity of the (9,6) maser in G34.26 +0.15 is decreasing +with time, while toward Cep A the maser is stable based on 20 +months of monitoring at E ffelsberg. Linearly interpolating the +integrated intensities obtained at E ff elsberg as a function of time, +the JVLA measurements show that there is no missing flux den- +sity on scales on the order of 1.2 arcsec (4 ×10 −3 + and 2 ×10− 2 + pc) +to the total single-dish flux. The JVLA-detected emission in- +dicates that the NH + 3 (9,6) maser in Cep A originates from a +sub-arcsecond-sized region slightly (0 +. 28 ± 0 +. 10) to the west +of the peak position of the 1.36 cm continuum object, HW2. In +G34.26 +0.15, three NH +3 (9,6) maser spots are observed: one is +close to the head of the cometary UC H ii region C, and the other +two are emitted from a compact region to the west of the HC H ii +Article number, page 6 of 10 +Y. T. Yan ( 闫耀庭 ) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +region A. We suggest that the (9,6) masers may be connected to +outflowing gas. Higher angular resolution JVLA and VLBI ob- +servations are planned to provide more accurate positions and +constraints on pumping scenarios. +Acknowledgements. We would like to thank the anonymous referee for the use- +ful comments that improve the manuscript. Y.T.Y. is a member of the Interna- +tional Max Planck Research School (IMPRS) for Astronomy and Astrophysics +at the Universities of Bonn and Cologne. Y.T.Y. would like to thank the China +Scholarship Council (CSC) for its support. We would like to thank the staff at +the E ffelsberg for their help provided during the observations. We thank the sta ff +of the JVLA, especially Tony Perreault and Edward Starr, for their assistance +with the observations and data reduction. This research has made use of the +NASA / IPAC Infrared Science Archive, which is funded by the National Aero- +nautics and Space Administration and operated by the California Institute of +Technology. +References +Bartkiewicz, A., Szymczak, M., Cohen, R. J., & Richards, A. M. S. 2005, MN- +RAS, 361, 623 +Bartkiewicz, A., Szymczak, M., & van Langevelde, H. J. 2016, A&A, 587, A104 +Benjamin, R. A., Churchwell, E., Babler, B. L., et al. 2003, PASP, 115, 953 +Beuther, H., Mottram, J. C., Ahmadi, A., et al. 2018, A&A, 617, A100 +Beuther, H., Walsh, A. J., Thorwirth, S., et al. 2007, A&A, 466, 989 +Brogan, C. L., Hunter, T. R., Cyganowski, C. J., et al. 2011, ApJ, 739, L16 +Brown, A. T., Little, L. T., MacDonald, G. H., Riley, P. W., & Matheson, D. N. +1981, MNRAS, 195, 607 +Brown, R. D. & Cragg, D. M. 1991, ApJ, 378, 445 +Carrasco-González, C., Sanna, A., Rodríguez-Kamenetzky, A., et al. 2021, ApJ, +914, L1 +Cesaroni, R., Walmsley, C. M., & Churchwell, E. 1992, A&A, 256, 618 +Cheung, A. C., Rank, D. M., Townes, C. H., Thornton, D. D., & Welch, W. J. +1968, Phys. Rev. Lett., 21, 1701 +Churchwell, E., Babler, B. L., Meade, M. R., et al. 2009, PASP, 121, 213 +Cohen, R. J. & Brebner, G. C. 1985, MNRAS, 216, 51P +Comito, C., Schilke, P., Endesfelder, U., Jiménez-Serra, I., & Martín-Pintado, J. +2007, A&A, 469, 207 +Curiel, S., Ho, P. T. P., Patel, N. A., et al. 2006, ApJ, 638, 878 +Danby, G., Flower, D. R., Valiron, P., Schilke, P., & Walmsley, C. M. 1988, +MNRAS, 235, 229 +De Buizer, J. M., Liu, M., Tan, J. C., et al. 2017, ApJ, 843, 33 +De Buizer, J. M., Radomski, J. T., Telesco, C. M., & Piña, R. K. 2003, ApJ, 598, +1127 +Dzib, S., Loinard, L., Rodríguez, L. F., Mioduszewski, A. J., & Torres, R. M. +2011, ApJ, 733, 71 +Flower, D. R., O ffer, A., & Schilke, P. 1990, MNRAS, 244, 4P +Galván-Madrid, R., Keto, E., Zhang, Q., et al. 2009, ApJ, 706, 1036 +Garay, G., Ramirez, S., Rodriguez, L. F., Curiel, S., & Torrelles, J. M. 1996, ApJ, +459, 193 +Garay, G., Rodriguez, L. F., & van Gorkom, J. H. 1986, ApJ, 309, 553 +Gaume, R. A., Wilson, T. L., & Johnston, K. J. 1996, ApJ, 457, L47 +Goddi, C., Greenhill, L. J., Humphreys, E. M. L., Chandler, C. J., & Matthews, +L. D. 2011, ApJ, 739, L13 +Goddi, C., Henkel, C., Zhang, Q., Zapata, L., & Wilson, T. L. 2015, A&A, 573, +A109 +Gómez, J. F., Sargent, A. I., Torrelles, J. M., et al. 1999, ApJ, 514, 287 +Güsten, R., Chini, R., & Neckel, T. 1984, A&A, 138, 205 +Heaton, B. D., Little, L. T., & Bishop, I. S. 1989, A&A, 213, 148 +Henkel, C., Wilson, T. L., Asiri, H., & Mauersberger, R. 2013, A&A, 549, A90 +Henkel, C., Wilson, T. L., & Mauersberger, R. 1987, A&A, 182, 137 +Ho, P. T. P. & Townes, C. H. 1983, ARA&A, 21, 239 +Ho ffman, I. M. & Joyce, S. A. 2014, ApJ, 782, 83 +Hogge, T. G., Jackson, J. M., Allingham, D., et al. 2019, ApJ, 887, 79 +Hughes, V. A. 1991, ApJ, 383, 280 +Hughes, V. A. & Wouterloot, J. G. A. 1984, ApJ, 276, 204 +Hunter, T. R., Brogan, C. L., Indebetouw, R., & Cyganowski, C. J. 2008, ApJ, +680, 1271 +Imai, H., Omi, R., Kurayama, T., et al. 2011, PASJ, 63, 1293 +Keto, E. R., Ho, P. T. P., & Reid, M. J. 1987, ApJ, 323, L117 +Kraemer, K. E. & Jackson, J. M. 1995, ApJ, 439, L9 +Kuchar, T. A. & Bania, T. M. 1994, ApJ, 436, 117 +Madden, S. C., Irvine, W. M., Matthews, H. E., Brown, R. D., & Godfrey, P. D. +1986, ApJ, 300, L79 +Mangum, J. G. & Wootten, A. 1994, ApJ, 428, L33 +Mauersberger, R., Henkel, C., & Wilson, T. L. 1987, A&A, 173, 352 +Mauersberger, R., Wilson, T. L., & Henkel, C. 1986, A&A, 160, L13 Mauersberger, R., Wilson, T. L., & Henkel, C. 1988, A&A, 201, 123 +McEwen, B. C., Pihlström, Y. M., & Sjouwerman, L. O. 2016, ApJ, 826, 189 +McMullin, J. P., Waters, B., Schiebel, D., Young, W., & Golap, K. 2007, in As- +tronomical Society of the Pacific Conference Series, Vol. 376, Astronomical +Data Analysis Software and Systems XVI, ed. R. A. Shaw, F. Hill, & D. J. +Bell, 127 +Mei, Y., Chen, X., Shen, Z.-Q., & Li, B. 2020, ApJ, 898, 157 +Menten, K. M. 1991, ApJ, 380, L75 +Mills, E. A. C., Ginsburg, A., Clements, A. R., et al. 2018, ApJ, 869, L14 +Mookerjea, B., Casper, E., Mundy, L. G., & Looney, L. W. 2007, ApJ, 659, 447 +Moscadelli, L., Reid, M. J., Menten, K. M., et al. 2009, ApJ, 693, 406 +Narayanan, G. & Walker, C. K. 1996, ApJ, 466, 844 +Ott, M., Witzel, A., Quirrenbach, A., et al. 1994, A&A, 284, 331 +Patel, N. A., Curiel, S., Sridharan, T. K., et al. 2005, Nature, 437, 109 +Perley, R. A. & Butler, B. J. 2013, ApJS, 204, 19 +Pety, J. 2005, in SF2A-2005: Semaine de l’Astrophysique Francaise, ed. F. Ca- +soli, T. Contini, J. M. Hameury, & L. Pagani, 721 +Poynter, R. L. & Kakar, R. K. 1975, ApJS, 29, 87 +Pratap, P., Menten, K. M., Reid, M. J., Moran, J. M., & Walmsley, C. M. 1991, +ApJ, 373, L13 +Reid, M. J. & Ho, P. T. P. 1985, ApJ, 288, L17 +Rodriguez, L. F., Ho, P. T. P., & Moran, J. M. 1980, ApJ, 240, L149 +Sanna, A., Moscadelli, L., Surcis, G., et al. 2017, A&A, 603, A94 +Sewilo, M., Churchwell, E., Kurtz, S., Goss, W. M., & Hofner, P. 2004, ApJ, +605, 285 +Sewiło, M., Churchwell, E., Kurtz, S., Goss, W. M., & Hofner, P. 2011, ApJS, +194, 44 +Sobolev, A. M., Moran, J. M., Gray, M. D., et al. 2018, ApJ, 856, 60 +Sugiyama, K., Fujisawa, K., Doi, A., et al. 2008, PASJ, 60, 1001 +Torrelles, J. M., Gómez, J. F., Garay, G., et al. 1998, ApJ, 509, 262 +Torrelles, J. M., Gómez, J. F., Garay, G., et al. 1999, MNRAS, 307, 58 +Torrelles, J. M., Ho, P. T. P., Rodriguez, L. F., & Canto, J. 1985, ApJ, 288, 595 +Torrelles, J. M., Ho, P. T. P., Rodriguez, L. F., & Canto, J. 1986, ApJ, 305, 721 +Torrelles, J. M., Patel, N. A., Curiel, S., et al. 2011, MNRAS, 410, 627 +Torrelles, J. M., Verdes-Montenegro, L., Ho, P. T. P., Rodriguez, L. F., & Canto, +J. 1993, ApJ, 410, 202 +Towner, A. P. M., Brogan, C. L., Hunter, T. R., & Cyganowski, C. J. 2021, ApJ, +923, 263 +Umemoto, T., Mikami, H., Yamamoto, S., & Hirano, N. 1999, ApJ, 525, L105 +Urquhart, J. S., Morgan, L. K., Figura, C. C., et al. 2011, MNRAS, 418, 1689 +Walmsley, C. M. & Ungerechts, H. 1983, A&A, 122, 164 +Walsh, A. J., Breen, S. L., Britton, T., et al. 2011, MNRAS, 416, 1764 +Walsh, A. J., Longmore, S. N., Thorwirth, S., Urquhart, J. S., & Purcell, C. R. +2007, MNRAS, 382, L35 +Wang, K., Zhang, Q., Wu, Y., Li, H.-b., & Zhang, H. 2012, ApJ, 745, L30 +Wilson, T. L., Batrla, W., & Pauls, T. A. 1982, A&A, 110, L20 +Wilson, T. L. & Henkel, C. 1988, A&A, 206, L26 +Wilson, T. L., Johnston, K. J., & Henkel, C. 1990, A&A, 229, L1 +Wilson, T. L. & Schilke, P. 1993, in Lecture Notes in Physics, Astrophysical +Masers, ed. A. W. Clegg & G. E. Nedoluha, Vol. 412, 123–126 +Wyrowski, F., Güsten, R., Menten, K. M., Wiesemeyer, H., & Klein, B. 2012, +A&A, 542, L15 +Zhang, Q. & Ho, P. T. P. 1995, ApJ, 450, L63 +Zhang, Q., Hunter, T. R., Sridharan, T. K., & Cesaroni, R. 1999, ApJ, 527, L117 +Zheng, X. W., Moran, J. M., & Reid, M. J. 2000, MNRAS, 317, 192 +Article number, page 7 of 10 +A & A proofs: manuscript no. mainArxiv +Appendix A: +Table A.1. Summary of NH +3 (9, 6) maser observations. +Source Telescope Beam Epoch Channel S + ν rms + S + ν dv V +LSR ∆ V +1 /2 +size spacing +(km s − 1 + ) (Jy) (mJy) (Jy km s −1 + ) (km s − 1 + ) +Cep A E ff elsberg 49 + 2020, Jan. 04 0.62 0.67 3.41 1.19 ± 0.02 -1.11 ± 0.02 1.67 ± 0.04 +Eff elsberg 49 + 2021, Feb. 11 0.62 0.59 5.97 1.08 ± 0.02 -0.74 ± 0.02 1.70 ± 0.04 +Eff elsberg 49 + 2021, Feb. 15 0.62 0.65 10.98 1.11 ± 0.03 -0.75 ± 0.02 1.60 ± 0.05 +JVLAa + 1 +. 47 × 0 +. 99 2021, Jul. 13 0.13 1.13 144 0.89 ± 0.09 -0.86 ± 0.03 0.74 ± 0.12 +Eff elsberg 49 + 2021, Aug. 11 0.07 0.98 13.36 0.49 ± 0.02 -0.90 ± 0.01 0.47 ± 0.01 +0.35 0.26 ± 0.02 -0.28 ± 0.02 0.69 ± 0.05 +Eff elsberg 49 + 2021, Aug. 12 0.07 0.98 13.35 0.50 ± 0.01 -0.89 ± 0.07 0.48 ± 0.07 +0.35 0.20 ± 0.01 -0.29 ± 0.07 0.54 ± 0.07 +0.06 0.07 ± 0.01 0.51 ± 0.07 1.09 ± 0.07 +0.02 0.02 ± 0.01 2.15 ± 0.07 0.80 ± 0.07 +0.07 0.06 ± 0.01 2.89 ± 0.07 0.92 ± 0.07 +G34.26 +0.15 E ff elsberg 49 + 2020, Jan. 03 0.62 0.30 1.26 0.65 ± 0.03 62.50 ± 0.05 2.05 ± 0.13 +Eff elsberg 49 + 2021, Feb. 11 0.62 0.24 2.42 0.40 ± 0.02 55.76 ± 0.04 1.60 ± 0.12 +Eff elsberg 49 + 2021, Feb. 15 0.62 0.20 4.86 0.38 ± 0.02 55.71 ± 0.05 1.80 ± 0.14 +JVLAb + 1 +. 33 × 1 +. 06 2021, Jul. 13 0.13 0.23 37.1 0.09 ± 0.02 54.41 ± 0.03 0.38 ± 0.09 +0.22 0.22 ± 0.02 55.82 ± 0.05 0.95 ± 0.12 +0.15 0.06 ± 0.01 57.21 ± 0.04 0.35 ± 0.08 +Eff elsberg 49 + 2021, Aug. 11 0.07 0.08 13.92 0.06 ± 0.007 54.10 ± 0.05 0.68 ± 0.12 +0.07 0.02 ± 0.006 54.82 ± 0.03 0.31 ± 0.09 +0.12 0.10 ± 0.006 55.85 ± 0.02 0.75 ± 0.06 +Eff elsberg 49 + 2021, Aug. 12 0.07 0.16 27.40 0.09 ± 0.008 55.83 ± 0.02 0.56 ± 0.05 +Notes. The spectral parameters are obtained from Gaussian fitting. (a ) + The JVLA spectrum toward Cep A is extracted from the E ff elsberg-beam- +sized region (FWHM 49 + ). (b ) + For G34.26+ 0.15, the JVLA beam samples the NH + 3 (9,6) spectrum over a region of radius 3 +. 5, which contains all +detected NH +3 (9,6) emissions. +Table A.2. 1.36 cm JVLA flux densities of individual continuum sources. +Source R.A. Dec. Size P.A. S + ν +( h m s) ( ◦ + ) (arcsec) (deg) (mJy) +Cep A HW2 22 56 17.972 ± 0.003 +62 01 49.587 ± 0.015 (0.45 ± 0.19) × (0.22 ± 0.10) 50.0 20.2 ± 1.4 +HW3a 22 56 17.420 ± 0.022 +62 01 44.576 ± 0.076 (2.35 ± 0.45) × (0.55 ± 0.14) 66.6 4.75 ± 0.74 +HW3b 22 56 17.578 ± 0.009 +62 01 45.041 ± 0.043 (1.43 ± 0.24) × (0.45 ± 0.10) 59.9 3.19 ± 0.36 +HW3c 22 56 17.956 ± 0.016 +62 01 46.224 ± 0.038 (1.44 ± 0.37) × (0.36 ± 0.19) 86.0 9.90 ± 1.7 +HW3d 22 56 18.195 ± 0.005 +62 01 46.325 ± 0.014 (1.26 ± 0.12) × (0.30 ± 0.19) 102.5 13.75 ± 0.92 +HW9 22 56 18.626 ± 0.014 +62 01 47.851 ± 0.137 (1.53 ± 0.51) × (0.29 ± 0.30) 28.0 3.26 ± 0.78 +G34.26 +0.15 A 18 53 18.774 ± 0.005 +01 14 56.208 ± 0.125 (0.66 ± 0.49) × (0.50 ± 0.33) 10.0 94 ± 33 +B 18 53 18.649 ± 0.005 +01 15 00.071 ± 0.180 (2.31 ± 0.49) × (0.85 ± 0.21) 17.4 597 ± 110 +C 18 53 18.560 ± 0.004 +01 14 58.201 ± 0.112 (2.03 ± 0.30) × (1.34 ± 0.20) 178.0 5070 ± 660 +Article number, page 8 of 10 +Y. T. Yan ( 闫耀庭 ) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +Table A.3. NH +3 (9,6) maser positions derived from the JVLA observations. +Source R.A. Dec. S + ν T + MB V +LSR ∆ V +1 /2 +(h m s ) ( ◦ + ) (mJy beam−1 + ) (K) (km s− 1 + ) +Cep A M 22 56 17.933 ± 0.002 +62 01 49.608 ± 0.011 985.2 2464.8 -0.88 ± 0.01 0.51 ± 0.02 +343.2 829.5 -0.24 ± 0.03 0.63 ± 0.05 +G34.26 +0.15 M1 18 53 18.569 ± 0.007 +01 14 57.997 ± 0.056 37.1 94.5 56.82 ± 0.06 0.68 ± 0.14 +M2 18 53 18.696 ± 0.002 +01 14 55.807 ± 0.034 48.4 122.4 53.77 ± 0.05 0.35 ± 0.08 +57.8 146.2 54.35 ± 0.07 0.83 ± 0.14 +180.8 457.6 55.83 ± 0.01 0.59 ± 0.03 +M3 18 53 18.667 ± 0.005 +01 14 55.348 ± 0.066 78.1 197.2 54.22 ± 0.04 0.94 ± 0.08 +73.7 186.3 55.78 ± 0.04 0.79 ± 0.08 +Fig. A.1. Cepheus A. The grey shaded areas mark the 1.36 cm JVLA continuum map of Cep A. The reference position is α + J2000 = 22h + 56 m + 17 s +. 972, +and δ +J2000 = 62◦ + 01 + 49 +. 587, the peak position of the continuum map, is marked by a red cross. Slightly to the west of the cross is the white ellipse +denoting the position of the NH +3 (9,6) emission with a purple star at its center. The red contours show the NOrthern Extended Millimeter Array +(NOEMA) 1.37 mm continuum, taken from Beuther et al. (2018). Contour levels are -5, 5, 10, 20, 40, 80, 100, 150, and 200 × 2.43 mJy beam − 1 + . +OH (Bartkiewicz et al. 2005), H + 2 O (Sobolev et al. 2018), and CH + 3 OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, +respectively. The color bar on the right-hand side indicates the velocity range ( V +LSR ) of maser spots. + Article number, page 9 of 10 +A & A proofs: manuscript no. mainArxiv +Fig. A.2. 1.36 cm JVLA continuum map of G34.26 +0.15 presented as gray shaded areas. The reference position is α +J2000 = 18h + 53 m + 18 s +. 560, and +δ + J2000 = 01◦ + 14 + 58 +. 201, the peak position, is marked by a red cross. The red ellipses show the positions of NH + 3 (9,6) emission with stars at their +center (i.e., M1, M2, and M3). The blue contours show the Berkeley-Illinois-Maryland Association (BIMA) array 2.8 mm continuum, taken from +Mookerjea et al. (2007). Contour levels are -3, 3, 10, 20, 30, 40, 50, 70, 90, 100, 120, and 140 × 20 mJy beam−1 + . OH (Zheng et al. 2000), H +2 O (Imai +et al. 2011), and CH + 3 OH (Bartkiewicz et al. 2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates +the velocity range (V + LSR ) of maser spots. +Article number, page 10 of 10 diff --git a/read/results/playa/2201.00022.txt b/read/results/playa/2201.00022.txt new file mode 100644 index 0000000..963630f --- /dev/null +++ b/read/results/playa/2201.00022.txt @@ -0,0 +1,1383 @@ +Draft version July 7, 2022 +Typeset using LA +T +EX twocolumn style in AASTeX631 +The Formation of Intermediate Mass Black Holes in Galactic Nuclei +Sanaea C. Rose, 1, 2 + Smadar Naoz, 1, 2 + Re’em Sari,3 + and Itai Linial 3 +1 + Department of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA +2 + Mani L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA +3 + Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel +ABSTRACT +Most stellar evolution models predict that black holes (BHs) should not exist above approximately +50 − 70 M + , the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections +indicate the existence of BHs with masses at and above this threshold. We suggest that massive +BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions +between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical +processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite +efficient, forming IMBHs as massive as 104 + M + . This upper limit assumes that (1) the BHs accrete a +substantial fraction of the stellar mass captured during each collision and (2) that the rate at which +new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar +disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our +results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic +centers. This formation channel has implications for observations. Collisions between stars and BHs +can produce electromagnetic signatures, for example, from x-ray binaries and tidal disruption events. +Additionally, formed through this channel, both black holes in the mass gap and IMBHs can merge +with the supermassive black hole at the center of a galactic nucleus through gravitational waves. +These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, +respectively). +1. INTRODUCTION +The recently detected gravitational wave source +GW190521 (The LIGO Scientific Collaboration et al. +2020a,b) produced an intermediate mass black hole of +approximately 142 M + . This event may have also had a +85 M + progenitor, which falls within the pair-instability +mass gap that limits stellar black holes (BHs) to no +more than + ∼< + 50 M + (e.g.,Heger et al.2003;Woosley +2017) 1 + . Similarly, the merger products of GW150914, +GW170104, and GW170814 fall within the mass gap +(e.g.,Abbott et al.2016,2017a,b). BH mergers that +form second generation BHs and, in some cases, inter- +mediate mass BHs (IMBHs), these gravitational wave +(GW) events can occur in globular clusters, young stel- +Corresponding author: Sanaea C. Rose +srose@astro.ucla.edu +1 + Note that the exact lower and upper limits may be sensitive to +metallicity of the progenitor (e.g.,Woosley2017;Spera & Mapelli +2017a;Limongi & Chieffi2018a;Sakstein et al.2020;Belczynski +et al.2020a;Renzo et al.2020;Vink et al.2021). lar clusters, or the field (e.g.,Rodriguez et al.2018;Ro- +driguez et al.2019;Fishbach et al.2020;Mapelli et al. +2021b,a;Di Carlo et al.2019,2021;Dall’Amico et al. +2021;Arca Sedda et al.2021). However, IMBHs are +not limited to these locations and may reside in galac- +tic nuclei as well. Several studies propose that our +own galactic center may host an IMBH in the inner pc +(e.g.,Hansen & Milosavljevi´c2003;Maillard et al.2004; +G¨urkan & Rasio2005;Gualandris & Merritt2009;Chen +& Liu2013;Generozov & Madigan2020;Fragione et al. +2020a;Zheng et al.2020;Naoz et al.2020;GRAVITY +Collaboration et al.2020). +Several IMBH formation channels have been suggested +in the literature. For example, IMBHs may have a cos- +mological origin, forming in the early universe either +as a result of the very first stars (e.g.,Madau & Rees +2001;Schneider et al.2002;Johnson & Bromm2007; +Valiante et al.2016) or from direct collapse of accumu- +lated gas (e.g.,Begelman et al.2006;Yue et al.2014; +Ferrara et al.2014;Choi et al.2015;Shlosman et al. +2016). These high redshift IMBHs would need to sur- +vive galaxy evolution and mergers to present day (e.g.,arXiv:2201.00022v2 [astro-ph.GA] 6 Jul 2022 +2 Rose et al. +Rashkov & Madau2014), with significant effects on their +stellar and even dark matter surroundings (e.g.,Bertone +et al.2009;Chen & Liu2013;Bringmann et al.2012;Eda +et al.2013;Naoz & Silk2014;Naoz et al.2019). Another +popular formation channel relies on the coalescence of +many stellar-mass black holes, which may seed ob jects +as massive as SMBHs (e.g.,Kroupa et al.2020). IMBHs +may form in the centers of globular clusters, where few- +body interactions lead to the merger of stellar-mass BHs +(e.g.,O’Leary et al.2006;G¨urkan et al.2006;Blecha +et al.2006;Freitag et al.2006;Umbreit et al.2012;Ro- +driguez et al.2018;Rodriguez et al.2019;Fragione et al. +2020b). Other formation mechanisms invoke successive +collisions and mergers of massive stars (e.g.,Ebisuzaki +et al.2001;Portegies Zwart & McMillan2002;Portegies +Zwart et al.2004;Freitag et al.2006;Sakurai et al.2017; +Kremer et al.2020;Gonz´alez et al.2021;Di Carlo et al. +2021;Das et al.2021a,b;Escala2021). +The main obstacle to sequential BH mergers in clus- +ters is that the merger recoil velocity kick often exceeds +the escape velocity from the cluster (e.g.,Schnittman +& Buonanno2007;Centrella et al.2010;O’Leary et al. +2006;Baibhav et al.2020, Rom & Sari, in prep.). How- +ever, nuclear star clusters at the centers of galaxies do +not encounter this problem. For example,Fragione et al. +(2021) explore repeated BH-BH mergers in nuclear star +clusters without a SMBH. They considered BH binary- +single interactions, binary BH GW merger, and GW +merger recoil kicks. The post-kick merger product sinks +back towards the cluster center over a dynamical fric- +tion timescale. Using this approach, they showed that +10 3 + − 10 4 + M + IMBHs can form efficiently over the life- +time of a cluster. +However, as discussed in Section2.2, direct BH-star +collisions are much more frequent than BH-BH collision +in galactic nuclei, making the former a promising chan- +nel for BH growth. In an N-body study of young star +clusters,Rizzuto et al.(2022) find that BH-star colli- +sions are a main contributor to the formation of BHs +in the mass gap and IMBHs. In a similar vein,Stone +et al.(2017) demonstrate that massive BHs can form +from repeated tidal encounters between stars and BHs. +More generally, several studies have explored the role of +collisions in a GN, with implications for the stellar and +red giant populations (e.g.,Dale & Davies2006;Dale +et al.2009;Balberg et al.2013;Mastrobuono-Battisti +et al.2021). We propose that IMBHs can form naturally +within the central pc of a galactic center through re- +peated collisions between BHs and main sequence stars. +During a collision, the BH can accrete some portion of +the star’s mass. Over many collisions, it can grow ap- +preciably in size. We demonstrate that this channel can create IMBHs with masses as large as 10 4 + M + , an upper +limit that depends on the density profile of the surround- +ing stars and the efficiency of the accretion. +The paper is structured as follows: we describe rele- +vant physical processes and our approach in Section2. +In particular, we provide an overview of collisions in +Section2.2and present our statistical approach in Sec- +tion2.3. Section2.4discusses our treatment of the +mass growth with each collision and presents analytic +solutions to our equations in two different regimes, ef- +ficient collisions and inefficient collisions We compare +these solutions to our statistical results. Sections2.6 +and2.8discuss implications for GW merger events be- +tween IMBHs and the SMBH. We then incorporate re- +laxation processes and discuss the subsequent results in +Section2.9. Finally, we discuss and summarize our find- +ings in Section3. +2. METHODOLOGY +We consider a population of stellar mass BHs embed- +ded in a cluster of 1 M + stars. When stars and BHs +collide, the BHs can accrete mass. The growth rate de- +pends on the physical processes outlined below. We use +a statistical approach to estimate the stellar encounters +and final IMBH masses. +2.1. Physical Picture +We consider a population of BHs within the inner few +parsecs of the SMBH in a galactic nucleus (GN). We as- +sume that the BH mass distribution follows that of the +stars from which they originate, a Kroupa initial mass +function dN/dm ∝ m− 2.35 + . While this choice represents +a gross oversimplification, it has very little bearing on +our final results. Future work may address the particu- +lars of the BH mass distribution, but we do not expect +that it will significantly alter the outcome. The upper +and lower limits of the BH mass distribution are 5 and +50 M + , respectively. We select the upper limit to en- +compass the range of upper bounds predicted by stellar +evolution models, which vary between 40 and 125 M + +depending on the metallicity (Heger et al.2003;Woosley +2017;Spera & Mapelli2017b;Limongi & Chieffi2018b; +Belczynski et al.2020b;Renzo et al.2020). We assume +that the orbits of the BHs follow a thermal eccentricity +distribution. We draw their semima jor axes, a +• , from a +uniform distribution in log distance, dN/d(log r ) being +constant. While this distribution is not necessarily rep- +resentative of actual conditions in the GN, we use it to +build a comprehensive physical picture of BH growth at +all distances from the SMBH, including within 0 . 01 pc. +Otherwise, the innermost region of the GN would be +poorly represented in our sample. We consider other +IMBH Formation in Galactic Nuclei 3 +Figure 1. We plot the relevant timescales, including col- +lision (green), relaxation (gold), and BH-BH GW capture +(purple), for a single BH in the GN as a function of distance +from the SMBH. For the collision timescale, we assume the +BH is on a circular orbit. The timescales depend on the +density, so we adopt a range of density profiles, bounded by +α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark +blue line represents the time for a 105 + M + BH to merge with +the SMBH through GW emission. +observationally motivated distributions in Section2.9, +but reserve a more detailed examination of the distribu- +tion’s impact for future work. +2.2. Direct Col lisions +BHs in the GN can undergo direct collisions with other +ob jects. The timescale for this process, t + coll , can be es- +timated using a simple rate calculation: t− 1 +coll = nσA, +where n is the number density of ob jects, σ is the ve- +locity dispersion, and A is the cross-section. We use the +collision timescale fromRose et al.(2020): +t− 1 +coll = πn (a +• ) σ (a +• ) +× + f +1 (e + • )r 2 +c + f +2 (e + • )r +c 2G (m +BH + m + ) +σ ( a +• )2 + . (1) +where G is the gravitational constant and r +c is the sum +of the radii of the interacting ob jects, a black hole with +mass m + BH and a star with mass m + . Detailed inRose +et al.(2020), f + 1 ( e + • ) and f + 2 (e + • ) account for the effect of +the eccentricity of the BH’s orbit about the SMBH on +the collision rate, while n and σ are simply evaluated +at the semima jor axis of the orbit (see below). Note +that this timescale equation includes the effects of grav- +itational focusing, which enhances the cross-section of +interaction. +Assuming a circular orbit for simplicity, we plot the +timescale for a BH orbiting in the GN to collide with +a 1 M + star as a function of distance from the SMBH in Figure1. 2 + As this timescale depends on the density +of surrounding stars, we adopt a density profile of the +form: + ρ ( r +• ) = ρ + 0 + r +• +r +0 + −α + , (2) +where r +• denotes the distance from the SMBH. We adopt +a SMBH mass of 4 × 10 6 + M + such that our fiducial GN +matches our own galactic center (e.g.,Ghez et al.2005; +Genzel et al.2003). In this case, the normalization in +Eq. (2) is ρ + 0 = 1. 35 × 10 6 + M + /pc3 + at r +0 = 0.25 pc (Gen- +zel et al.2010). Additionally, in Eq. (2), α gives the +slope of the power law. We assume that a uniform pop- +ulation of solar mass stars account for most of the mass +in the GN, making the stellar number density: +n ( r +• ) = ρ ( r +• ) +1 M + . (3) +The collision timescale also depends on the velocity dis- +persion, which we express as: +σ (r +• ) = + GM +• +r +• (1 + α ) , (4) +where α is the slope of the density profile and M + • de- +notes the mass of the SMBH (Alexander1999;Alexan- +der & Pfuhl2014). As mentioned above, Eq. (1) depends +on the sum of the radii of the colliding ob jects, r +c . We +take r +c = 1 R + because these interactions involve a BH +and a star, and the former has a much smaller physi- +cal cross-section. For example, the Schwarzschild radius +of a 10 M + BH is only 30 km, or 4 . 31 × 10 −5 + R + . For +this reason, direct collisions between compact ob jects +are very rare and not included in our model. +We note that direct collisions between BHs, via GW +emission, were shown to be efficient in nuclear star clus- +ters without SMBHs (e.g.,Portegies Zwart & McMil- +lan2000;O’Leary et al.2006;Rodriguez et al.2016). +However, in the GN, star-BH collisions are much more +frequent than direct BH-BH collisions. As depicted in +Figure1, the star-BH collision timescale for a range +of density profiles is many orders of magnitude shorter +than the BH-BH GW collision timescale (for the rele- +vant equations, seeO’Leary et al.2009;Gond´an et al. +2018, for example). Thus, we expect that star-BH col- +lisions will be the main driver of IMBH growth in the +GN. +2 + We note that the eccentricity has a very minor effect on the +collision timescale (Rose et al.2020). +4 Rose et al. +2.3. Statistical Approach to Col lisions +We simulate the mass growth of a population of BHs +with initial conditions detailed in Section2.1. Over an +increment ∆t of 10 6 + yr, we calculate the probability of +a collision occurring, given by ∆ t/t + coll . This choice of +∆ t is motivated by our galactic center’s star formation +timescale (e.g.,Lu et al.2009), allowing for regular re- +plenishment of the stellar population in the GN. We have +checked that the results are not sensitive to this choice +of ∆ t , omitted here to avoid clutter. We draw a number +between 0 and 1 using a random number generator. If +that number is less than or equal to the probability, we +increase the BH’s mass by ∆ m , the mass that the BH is +expected to accrete in a single collision (see Section2.4 +for details). We recalculate the collision timescale using +the updated BH mass and repeat this process until the +time elapsed equals the simulation time of 10 Gyr3 + . +2.4. Mass Growth +When a BH collides with a star, it may accrete ma- +terial and grow in mass. The details of the accretion +depend on the relative velocity between the BH and +star. For simplicity, this calculation assumes that the +two ob jects experience a head on collision, with the BH +passing through the star’s center. We begin by con- +sidering the escape velocity from the BH at the star’s +outermost point, its surface, which corresponds to the +maximum impact parameter 1 R + . Qualitatively, one +might expect that the BH could capture the entire star +(i.e., ∆ m ∼ 1 M + ) if the relative velocity is smaller than +the escape velocity from the BH at this point. However, +in the vicinity of the SMBH, the dispersion velocity of +the stars may be much larger than the escape velocity +from the BH at the star’s surface. In this case, the BH +captures a “tunnel” of material through the star. This +tunnel has radius equal to the Bondi radius and length +approximately 1 R + . For the purposes of this study, we +assume that the BH accretes all of the material that +it captures. The details of the accretion are uncertain, +however, and it may be much less efficient than our re- +sults imply. We discuss accretion in Section2.5. +To estimate ∆m, we begin with the Bondi-Hoyle ac- +cretion rate, ˙m, given by: +˙m = 4 πG2 + m 2 +BH ρ + star +(c2 +s + σ 2 + )3/ 2 , (5) +3 + Closer to the SMBH, ∆t may exceed the collision timescale by +a factor of a few for steep density profiles. We include a safe- +guard in our code which takes the ratio t +coll /∆ t and rounds it +to the nearest integer. We take this integer to be the number of +collisions and increase the BH mass accordingly. Figure 2. We consider an example that highlights the mass +growth as a function of distance from the SMBH. Grey dots +represent the initial masses and distances from the SMBH +of the BHs involved in the simulation. For simplicity, we set +the inital mass equal to 10 M + for all of the BHs. Assuming +the density profile of stars has α = 1, we consider two cases: +BHs accrete all of the star’s mass during a collision (red) and +only a portion of the star’s mass is accreted during a collision +given by Eq.6(blue). The latter case results in less growth +closer to the SMBH where the velocity dispersion becomes +high. The shaded regions and dashed lines represent the +analytical predictions detailed in Section2.4. +where c +s is the speed of sound in the star and ρ + star is its +density (e.g.,Bondi1952;Bondi & Hoyle1944;Shima +et al.1985;Edgar2004, see latter for a review). We +approximate the density as 1 M + / (4πR 3 + /3) and take +the conservative value of c + s = 500 km s−1 + , which is +consistent with the sound speed inside a 1 M + star +(Christensen-Dalsgaard et al.1996) and allows us to set +a lower limit on ∆ m. To find ∆m, at each collision, we +have: + ∆ m = min( ˙m × t + , cross , 1 M + ) , (6) +where t + , cross ∼ R + /σ is the crossing time of the BH in +the star. We take the minimum between ˙m × t +, cross and +1 M + because the BH cannot accrete more mass than +one star at each collision. +Figure2juxtaposes the expected growth using Bondi- +Hoyle-Lyttleton accretion (blue small points) with a +much simpler model in which the BH accretes the star’s +entire mass, 1 M + (red large points). Both examples +start with identical populations of 10 M + BHs (grey) +and simulate growth through collisions using a statisti- +cal approach. As the BHs grow, the collision timescale, +which depends on m + BH , decreases. Simultaneously, +∆ m , which also depends on m +BH , increases. The re- +sult is exponential growth (see discussion and details +surrounding Eq. (8)). In Figure2, however, the simula- +tions assume α = 1 for the stellar density profile, ensur- +ing the collision timescale is long compared to the sim- +IMBH Formation in Galactic Nuclei 5 +ulation time, 10 Gyr. Therefore, the BHs grow slowly, +and their final masses can be approximated using the +following equation: +m +final (t + coll → const .) = m + initial + ∆ m T +t + coll , (7) +in which T represents the simulation time and ∆ m and +t + coll remain constant, approximated as their initial val- +ues. +This equation is plotted in Figure2for both cases, +∆ m = 1 M + (red) and ∆m from Bondi-Hoyle-Lyttleton +accretion (blue), and the curves coincide with the cor- +responding simulated results. The shaded regions rep- +resent one standard deviation from Eq. (7), calculated +using the square root of the number of collisions, T /t + coll . +As indicated by the results in red, in the absence of +Bondi-Hoyle-Lyttleton accretion, the BHs closest to the +SMBH experience the most growth because they have +shorter collision timescales. However, Bondi-Hoyle- +Lyttleton accretion becomes important closer to the +SMBH, where the velocity dispersion is large compared +with the stars’ escape velocity, and curtails the mass +growth for BHs in this region. Outside of 10− 2 + pc, a BH +consumes the star’s entire mass: the accretion-limited +∆ m governed by Eq. (7) is greater than or equal to the +star’s mass. +Eq.7does not apply for other values of α . When the +collision timescale is shorter, corresponding to a larger +index α in the density profile (see Figure1), the growth +is very efficient and ∆m quickly approaches 1 M + . Con- +sequently, while we can now assume ∆ m = 1 M + , we +can no longer assume the collision timescale is constant. +The final mass grows exponentially as a result. For +∆ m = 1M + , the general solution is reached by solving +the differential equation dm/dt = 1 M + /t + coll (m ), which +gives: +m +final (∆ m → 1 M + ) = − A + ( m +initial + A ) e CT + (8) +where A = σ 2 + R + star /G and C = 2πGn +star R + star /σ . As an +example, we plot this curve in purple for the α = 2 case, +in Figure3, which agrees with the simulated masses. +2.5. Uncertainties in Accretion +We note that the ∆ M calculated in this proof-of- +concept study assumes that the BH accretes all of the +material that it captures. Estimating the true fraction +of the material accreted by the BH is very challeng- +ing; this complex problem requires numerically solving +the generalized GR fluid equations with cooling, heat- +ing, and radiative transfer, etc. and remains an active +field of research (e.g.,Blandford & Begelman1999;Park +& Ostriker2001;Narayan et al.2003;Igumenshchev et al.2003;Ohsuga et al.2005;Yuan et al.2012;Jiang +et al.2014;McKinney et al.2014;Narayan et al.2022). +Heuristically, if a collision between a BH and a star re- +sults in an accretion disk, the disk’s viscous timescale +may be as low as days. The resultant luminosity can +unbind most of the captured material, though details +such as the amount accreted and peak luminosity re- +main uncertain (e.g.,Yuan et al.(2012);Jiang et al. +(2014), see also the discussion inStone et al.(2017), +Rizzuto et al.(2022), andKremer et al.(2022)). The +question becomes whether or not a BH can still accu- +mulate significant amounts of mass over many collisions +even if it accretes very little in a single one. We ex- +plore the viability of our channel using a physically mo- +tivated inefficient accretion model. Several studies have +invoked momentum-driven winds in BH accretion (e.g., +Murray et al.2005;Ostriker et al.2010;Brennan et al. +2018). We thus estimate the fraction of captured mass +accreted to be approximately v + esc /(cη ), where v + esc is +the escape velocity from the BH at 1 R + and η is the +accretion efficiency at the ISCO. We take η to be 0 .1 +(e.g.,Yu & Tremaine2002). This expression for the +fraction accreted is consistent withKremer et al.(2022) +equation 19 for s = 0.5, which is a reasonable value for +s, a free parameter between 0 . 2 and 0 . 8. We discuss +the results of the momentum-driven winds estimate in +Section3. We note that the accretion process may be +more efficient than this estimate implies if, for example, +jets or other instabilities result in the beaming of radi- +ation away from the captured material (e.g.,Blandford +& Zna jek1977;Begelman1979;De Villiers et al.2005; +McKinney & Gammie2004;McKinney2006;Igumen- +shchev2008;Begelman2012a,b;McKinney et al.2014). +2.6. GW Inspiral +When a BH is close to the SMBH, GW emission can +circularize and shrink its orbit. We implement the ef- +fects of GW emission on the BH’s semima jor axis and +eccentricity followingPeters & Mathews(1963a). The +characteristic timescale to merge a BH with an SMBH +is given by: +t +GW ≈ 2. 9 × 10 12 + yr + M + • +10 6 + M + + −1 + m +BH +10 6 + M + + −1 +× + M + • + m +BH +2 × 10 6 + M + +− 1 + a +• +10− 2 + pc + 4 +× f (e + • )(1 − e 2 +• )7/ 2 + , (9) +where f (e + • ) is a function of e + • . For all values of e + • , +f (e + • ) is between 0 . 979 and 1 .81 (Blaes et al.2002). We +plot this timescale for a 1 × 105 + M + BH in Figure1in +blue. +6 Rose et al. +Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow ( α = 1) to +cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq.8, taking m +initial to be the average mass +of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and +merger times of these BHs. +In our simulations, we assume a BH has merged with +the SMBH when the condition t + GW < t +elapsed is met. +When this condition is satisfied, we terminate mass +growth through collisions for that BH.4 +2.7. IMBH growth +As detailed above, BH-stellar collisions can increase +the BH masses as a function of time. Here, we examine +the sensitivity of the BH growth to the density power +law. From Eq. (1), it is clear that the growth rate de- +pends on the stellar density profile, governed by the in- +dex α . We expect that higher values of α , or steeper +profiles, will result in more efficient mass growth. In +Figure1, larger values of α lead to collision timescales +in the GN’s inner region, inwards of 0 .25 pc, that are +much smaller that the 10 Gyr simulation time. Figure3 +confirms this expectation. It depicts the mass growth of +a uniform distribution of BHs with initial conditions de- +tailed in Section2.1for five α values, spanning 1 (green) +to 2 (purple). The most massive IMBHs form inwards +of 0 .25 pc for the α = 2 case. +2.8. Gravitational Wave Mergers and Intermediate +and Extreme Mass Ratio Inspiral Candidates +Towards the SMBH, efficient collisions can create BHs +massive enough to merge with the SMBH through GWs. +Following the method detailed in Section2.6, when a +given BH meets the criterion t +GW < t +elapsed , we mark +4 + For comparison, we also incrementally changed the semimajor +axis and eccentricity from GW emission following the equations +inPeters & Mathews(1963b). This method leads to a slight +increase in the final IMBH masses because it accounts for the +collisions that take place while the orbit is gradually shrinking. it as merged with the SMBH. We assume that at this +point the dynamics of the BH will be determined by GW +emission, shrinking and circularizing the BHs orbit un- +til it undergoes an extreme or intermediate mass ratio +inspiral (EMRI and IMRI, respectively). The righthand +plot in Figure3shows the BH masses versus time of +merger. It is interesting to note that even in the ab- +sence of relaxation processes, which are often invoked +to explain the formation of EMRIs, EMRIs and notably +IMRIs can form in this region. +2.9. Two Body Relaxation Processes +A BH orbiting the SMBH experiences weak gravita- +tional interactions with other ob jects in the GN. Over a +relaxation time, these interactions alter its orbit about +the SMBH. The two-body relaxation timescale for a +single-mass system is: +t + relax = 0.34 σ 3 +G2 + ρ M + ∗ ln Λ +rlx , (10) +where ln Λ +rlx is the Coulomb logarithm and M + ∗ is the +average mass of the surrounding ob jects, here assumed +to be 1 M + (Spitzer1987;Binney & Tremaine2008, +Eq. (7.106)). This equation represents the approximate +timescale for a BH on a semi-circular orbit to change +its orbital energy and angular momentum by order of +themselves. The BH experiences diffusion in its angular +momentum and energy as a function of time (depending +on the eccentricity of the orbit, this process can be more +efficientFragione & Sari2018;Sari & Fragione2019). +Relaxation can cause the orbit of an ob ject in a GN to +reach high eccentricities. If the ob ject is a BH, it can +spiral into the SMBH and form an EMRI, while a star +IMBH Formation in Galactic Nuclei 7 +can be tidally disrupted by the SMBH (e.g.Magorrian +& Tremaine1999;Wang & Merritt2004;Hopman & +Alexander2005;Aharon & Perets2016;Stone & Met- +zger2016;Amaro-Seoane2018;Sari & Fragione2019; +Naoz et al.2022). The relaxation process is therefore +crucial to our study. In Figure1, we plot the relaxation +timescale in gold for a range of α . We note that theBah- +call & Wolf(1976) profile, α = 7/4, corresponds to zero +net flux and therefore does not preferentially migrate +ob jects inward. +Additionally, because BHs are more massive on av- +erage than the surrounding ob jects, they are expected +to segregate inwards in the GN (e.g.,Shapiro & +Marchant1978;Cohn & Kulsrud1978;Morris1993; +Miralda-Escud´e & Gould2000;Baumgardt et al.2004). +They sink toward the SMBH on the mass segregation +timescale, t + seg ≈ M + ∗ /m + BH × t + relax (e.g.,Spitzer1987; +Fregeau et al.2002;Merritt2006), which is typically an +order of magnitude smaller than the relaxation timescale +plotted in Figure1. +We incorporate relaxation processes by introducing a +small change in the BH’s energy and angular momen- +tum each time it orbits the SMBH. We apply a small +instantaneous velocity kick to the BH, denoted as ∆ v . +We draw ∆v from a Guassian distribution with average +of zero and a standard deviation of ∆ v + rlx / √ + 3, where +∆ v + rlx = v + • + P + • /t + rlx (seeBradnick et al.2017, for an +approach to changes in the angular momentum). The +new orbital parameters can be calculated followingLu +& Naoz(2019), and seeNaoz et al.(2022) for the full +set of equations. +We account for the effects of relaxation processes, +including mass-segregation, using a multi-faceted ap- +proach. We begin by migrating each BH towards the +center over its mass-segregation timescale, shifting it in- +crementally inward such that its orbital energy changes +by order of itself within the segregation timescale. +As the BHs segregate down the potential well, their +abundance with respect to stars increases, until at some +turnover radius, BHs become the dominant source of +scattering for both black holes and stars. Within this ra- +dius, BH self-interaction dominates over two-body scat- +terings with the now rarer main-sequence stars. The +BHs will then settle onto a Bahcall-Wolf profile, while +the stars may follow a shallower profile, with approx- +imately n + ∝ r − 1.5 + , inwards of the transition radius +(Linial & Sari in prep.). +Therefore, after the initial mass segregation, we allow +the BHs to begin diffusing over a relaxation timescale, +their orbital parameters changing slowly through a ran- +dom process. In this random process, some of the BHs +may migrate closer to the SMBH. We terminate mass growth when the BH enters the inner 200 au of the GN, +within which the density of stars is uncertain. This cut- +off is based on the 120 au pericenter of S0-2, the closest +known star to the SMBH (e.g.,Ghez et al.2005). +Another physical process that causes inward migra- +tion is dynamical friction. A cursory derivation based +on the dynamical friction equations described inBinney +& Tremaine(2008) reveals the process to have a simi- +lar timescale to mass segregation. If a BH diffuses to +a distance greater than 2 pc from the SMBH, exiting +the sphere of influence, we have it sink inwards, back +towards the center, over a dynamical friction timescale. +After one dynamical friction timescale has passed, we +restart diffusion. +We note that our prescription ignores self-interactions +between the BHs. As mentioned above, as the BHs sink +towards the SMBH, their concentration in the inner re- +gion of the GN increases, allowing them to dominate the +scattering. We reserve the inclusion of these interactions +for future study. +2.10. Effect of Relaxation Processes +As depicted in Figure4, two-body relaxation processes +result in more EMRIs and IMRIs events. These pro- +cesses allow BHs that begin further from the SMBH +to migrate inwards and grow more efficiently in mass. +However, it also impedes the growth of BHs that are +initially closer to the SMBH by allowing them to dif- +fuse out of the inner region where collisions are efficient. +As can be seen in Figure4, the net result is that more +BHs grow, but the maximum mass is lower compared +to the scenario that ignores two-body relaxation. The +histogram in Figure4presents the final BH mass distri- +butions for different power law indices α . As expected, +the two-body relaxation suppresses the α dependence +highlighted in Figure3. In fact, using a KS test, we +find that we cannot reject the hypothesis that the two +distributions were drawn from the same sample for the +α = 1.75 and α = 2 results. Interestingly, a BH mass +IMF with an average of 10 M + leads to a final distri- +bution with an average of ∼ 200 M + and a median of +∼ 45 M + , which lies within the mass gap. +3. DISCUSSION AND PREDICTIONS +We explore the feasibility of forming IMBHs in a +GN through successive collisions between a stellar-mass +BH and main-sequence stars. Taking both a statisti- +cal and analytic approach, we show that this channel +can produce IMBHs efficiently with masses as high as +10 3− 4 + M + and may result in many IMBH-SMBH merg- +ers (intermediate-mass ratio inspirals, or IMRIs) and +EMRIs. +8 Rose et al. +Figure 4. Similar to Figure3, we plot the initial masses versus initial distance (grey) and final mass versus final distance +(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. +We assume α = 1 . 75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward +migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure3. Additionally, +more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses +for two different values of α , 1 . 5 (orange, solid), α , 1 . 75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation +processes. We also show the results for a simulation with α = 1. 75 that accounts for momentum-driven winds (black, dotted). +Despite the substantially reduced accretion, BHs in the mass gap still form. +As the stellar mass BH collides with a star, the BH +will grow in mass. The increase may equal star’s en- +tire mass if the relative velocity is smaller than the es- +cape velocity from the BH at 1 R + . However, near the +SMBH, the velocity dispersion may be larger than the +escape velocity from the BH at the star’s radius. In this +limit, the BH captures a “tunnel” of material through +the star, estimated using Bondi-Hoyle-Lyttleton accre- +tion. In our statistical analysis, we account for Bondi- +Hoyle-Lyttleton accretion and find that BHs outside of +10 −2 + pc from the SMBH can capture the entire star (see +Figure2). +The efficiency of collisions, and therefore IMBH, +EMRI, and IMRI formation as well, are sensitive to +the underlying stellar density. As shown in Figure3, a +steeper density profile results in larger IMBHs. This be- +havior can be understood from the collision timescale’s +dependence on the stellar density profile. A steeper pro- +file yields shorter collision timescales near the SMBH. +However, the inclusion of relaxation processes in the +simulations dampens the influence of the stellar density +profile by allowing BHs to diffuse into regions of more +or less efficient growth. As a result, more BHs grow in +mass, but their maximum mass is smaller ( ∼ 104 + M + ). +Additionally, the final masses have no apparent depen- +dence on distance from the SMBH (see Figure4). +Most simulations in our study assume that the BHs +accrete all of the mass that they capture. The final BH +masses can be taken as an upper limit. We note that +the accretion is a highly uncertain process and repre- +sents an active field of study (e.g.,Blandford & Begel- +man1999;Park & Ostriker2001;Narayan et al.2003; +Igumenshchev et al.2003;Ohsuga et al.2005;Yuan +et al.2012;Jiang et al.2014;McKinney et al.2014; +Narayan et al.2022). To assess the limits of our model, we also consider a physically motivated accretion model, +momentum-driven winds (Section2.5). We present the +final mass distribution for momentum-driven winds in +Figure4. Importantly, we find that BHs within the +mass gap still form naturally despite the substantially +reduced accretion. About 5% of the BHs grow by 10 +to 100 M + . Furthermore, if we increase this ∆M esti- +mate by a factor of 2 (i.e., use η = 0. 05), the simula- +tion produces a 3. 5 × 10 3 + M + IMBH for the same initial +conditions. Our proof-of-concept demonstrates that col- +lisions between BH and stars are an important process +that should be taken into account in dense places such +as a GN. +Mass growth through BH-main-sequence star colli- +sions may act in concert with other IMBH formation +channels, such as compact ob ject binary mergers (e.g., +Hoang et al.2018;Stephan et al.2019;Fragione et al. +2021;Wang et al.2021). While in some cases colli- +sions can unbind a binary (e.g.,Sigurdsson & Phinney +1993;Fregeau et al.2004), BH binaries can be tightly +bound enough to withstand the collisions. Wide bina- +ries may also become unbound due to interactions with +the neighboring stars and compact ob jects (e.g.,Binney +& Tremaine1987;Rose et al.2020, see latter study for +the timescale for an arbitrary eccentricity). However, +as highlighted in previous studies, a substantial frac- +tion of these binaries may merge due to the Eccentric +Kozai Lidov mechanism, leaving behind a single star or +a single compact ob ject (e.g.,Stephan et al.2016,2019; +Hoang et al.2018). Additionally, to be susceptible to +evaporation, BH binaries must have a wider configura- +tion. Otherwise, they will be more tightly bound than +the average kinetic energy of the surrounding ob jects +and will only harden through weak gravitational inter- +IMBH Formation in Galactic Nuclei 9 +actions with neighboring stars (see for example Figure +6 inRose et al.2020). +We note that we assume a steady-state and treat the +stars as a reservoir in this model. Future work will take a +more nuanced approach to the background stars, whose +density as a function of time can be influenced by several +factors. Firstly, the relaxation of the stellar population +occurs on Gyr timescales. Some studies have suggested +that in situ star formation can occur in the Galactic +Center as close as 0.04 pc from the SMBH (e.g.,Levin +& Beloborodov2003;Paumard et al.2006), and star +formation episodes can occur as often as every ∼ 5 Myr +(e.g.Lu et al.2009). Therefore, we expect that after +the first Gyr, stars within 0 .01 pc will be replenished +at intervals consistent with the star formation episodes; +the infalling populations of stars are separated by ∼ +5 − 10 Myr, which is shorter than the collision timescale. +However, star-star collisions may complicate this pic- +ture within ∼ 0. 01 pc. As discussed above, regular star +formation ensures the BHs always have a stellar popula- +tion to interact with outside of ∼ 0. 01 pc.5 + At 0 . 01 pc, +however, the kinetic energy during a collision between +two 1 M + stars is larger than their binding energies. +Collisions can therefore thin out the stellar populations +during the time it takes them to diffuse to these small +radii, 0 .01 pc, and may reduce the BH growth in the +innermost region. We reserve the inclusion of star-star +collisions for future work. We also note that the disrup- +tion of binary stars by the SMBH may help replenish +the stellar population even as collisions work to deplete +it (e.g.,Balberg et al.2013); when a binary is disrupted, +one of the stars is captured on a tightly bound orbit +about the SMBH. +An IMBH may also affect the stellar density profile. +As it spirals into the SMBH, it can perturb stellar orbits, +and these interactions can lead to hypervelocity stars +(e.g.,Baumgardt et al.2006a;L¨ockmann & Baumgardt +2008).L¨ockmann & Baumgardt(2008) show that an +IMBH can modify an initially steep stellar density pro- +file to become consistent with the flatter cusp observed +in the Galactic Center. The stars may then be replen- +ished on 100 Myr timescales (Baumgardt et al.2006a). +Therefore, after the formation of the first few IMBHs, +subsequent BH growth may occur in bursts, coinciding +with replenishment of the stars. +While there are many competing dynamical processes +that shape the stellar density profile, we stress that α +5 + In fact, the star-star collision timescale is greater than 10 Myr +for the entire parameter space, save at 0. 001 pc for larger values +of α ; the BH-star collision timescale plotted in Fig. 1 is the same +order of magnitude as the star-star collision timescale. can simply be chosen to encapsulate all of the relevant +physics. A value for α that is constrained by observa- +tions must already reflect ongoing processes like star- +star collisions and replenishment.Sch¨odel et al.(2018) +find the observed stellar mass enclosed within 0.01 pc of +the Milky Way’s Galactic Center to be approximately +180 M + . This estimate is consistent to order of magni- +tude with our α = 1.25 case. In a simulation like those +depicted in Figure 4, which include relaxation, α = 1. 25 +leads to a maximum IMBH mass of 140 M + . Further- +more, while the stellar mass within 0.01 pc may be a +few hundred M + ,Do et al.(2019) andGRAVITY Col- +laboration et al.(2020) set an upper limit on the mass +enclosed within the orbit of S0-2 to be about a few thou- +sand M + , or 0. 1% of the central mass. This upper limit +can include mass that was previously in stars but is now +in BHs. In that case, the 180 M + is what remains of the +stars, while BHs and IMBHs make up the ∼ 1000 M + +in the innermost region. +Also not included in this study, collisions between the +BH and other compact ob jects will increase the BH +growth rate. BH-BH mergers (e.g.,O’Leary et al.2009; +Fragione et al.2021) and even neutron star BH mergers +(e.g.,Hoang et al.2020) become more likely as the BHs +increase in mass through stellar collisions. As a result, +the BH-BH collision timescale, discussed in Section2.2, +will become relevant to our simulations, allowing the +BHs to grow through this channel in addition to stel- +lar collisions. Additionally, this compact ob ject mergers +result in GW recoil, which may have a large impact on +the dynamics (e.g.,Baibhav et al.2020;Fragione et al. +2021). +The BH’s mass growth increases GW emission, which +dissipates energy from the orbit. Along with relaxation, +GW emission causes BHs to sink towards the SMBH +and eventually undergo a merger. As a result, the GN +environment is conducive to the formation of EMRIs +and IMRIs. The GW emission from EMRIs and IM- +RIs is expected to be at mHz frequencies, making them +promising candidates for LISA to observe. While the +exact rate calculation is beyond the scope of this study, +the mechanism outlined here seems very promising. +Our results also suggest that BHs within the mass gap +as well as IMBHs likely exist in many galactic nuclei, as +well as within our own galactic center. This implication +seems to be consistent with recent observational and +theoretical studies (e.g.,Hansen & Milosavljevi´c2003; +Maillard et al.2004;G¨urkan & Rasio2005;Gualandris +& Merritt2009;Chen & Liu2013;Generozov & Madi- +gan2020;Fragione et al.2020a;Zheng et al.2020;Naoz +et al.2020;GRAVITY Collaboration et al.2020). +10 Rose et al. +Lastly, the collisions between stellar mass BHs and +stars may contribute to the x-ray emission from our +galactic centre (e.g.,Muno et al.2005,2009;Hailey +et al.2018;Zhu et al.2018;Cheng et al.2018, seeKre- +mer et al.(2022) for a discussion of electromagnetic sig- +natures from BH-star collisions) 6 + . These interactions, +in particular grazing collisions, may also result in tidal +disruption events (e.g.,Baumgardt et al.2006b;Perets +et al.2016;Stone et al.2017;Samsing et al.2019;Kre- +mer et al.2021). Thus, the process outlined here may +produce electromagnetic signatures in addition to GW +mergers. +We thank the anonymous referee for useful comments. +We also thank Jessica Lu, Fred Rasio, Kyle Kremer, +Ryosuke Hirai, Ilya Mandel, and Erez Michaely for use- +ful discussion. +SR thanks the Charles E. Young Fellowship, the Nina +Byers Fellowship, and the Michael A. Jura Memorial +Graduate Award for support. SR and SN acknowledge +the partial support from NASA ATP 80NSSC20K0505. +SN thanks Howard and Astrid Preston for their gener- +ous support. IL thanks support from the Adams Fellow- +ship. SN and RS thank the Bhaumik Institute visitor +program. This work was performed in part at the As- +pen Center for Physics, which is supported by National +Science Foundation grant PHY-1607611. + REFERENCES +Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016, +PhRvL, 116, 241102, +doi:10.1103/PhysRevLett.116.241102 +—. 2017a, PhRvL, 118, 221101, +doi:10.1103/PhysRevLett.118.221101 +—. 2017b, PhRvL, 119, 141101, +doi:10.1103/PhysRevLett.119.141101 +Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1, +doi:10.3847/2041- 8205/830/1/L1 +Alexander, T. 1999, ApJ, 527, 835, doi:10.1086/308129 +Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, +doi:10.1088/0004- 637X/780/2/148 +Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4, +doi:10.1007/s41114- 018-0013- 8 +6 + The connection between the observed X-ray sources at the Galac- +tic Center and tidal capture has been suggested byGenerozov +et al.(2018), but seeZhu et al.(2018);Stephan et al.(2019) for +alternative channels. Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. +2021, arXiv e-prints, arXiv:2109.12119. +https://arxiv.org/abs/2109.12119 +Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214, +doi:10.1086/154711 +Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102, +043002, doi:10.1103/PhysRevD.102.043002 +Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26, +doi:10.1093/mnrasl/slt071 +Baumgardt, H., Gualandris, A., & Portegies Zwart, S. +2006a, MNRAS, 372, 174, +doi:10.1111/j.1365- 2966.2006.10818.x +Baumgardt, H., Hopman, C., Portegies Zwart, S., & +Makino, J. 2006b, MNRAS, 372, 467, +doi:10.1111/j.1365- 2966.2006.10885.x +Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ, +613, 1143, doi:10.1086/423299 +Begelman, M. C. 1979, MNRAS, 187, 237, +doi:10.1093/mnras/187.2.237 +—. 2012a, ApJL, 749, L3, doi:10.1088/2041- 8205/749/1/L3 +IMBH Formation in Galactic Nuclei 11 +—. 2012b, MNRAS, 420, 2912, +doi:10.1111/j.1365- 2966.2011.20071.x +Begelman, M. C., Volonteri, M., & Rees, M. J. 2006, +MNRAS, 370, 289, doi:10.1111/j.1365-2966.2006.10467.x +Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ, +890, 113, doi:10.3847/1538- 4357/ab6d77 +—. 2020b, ApJ, 890, 113, doi:10.3847/1538- 4357/ab6d77 +Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R. +2009, New Journal of Physics, 11, 105016, +doi:10.1088/1367- 2630/11/10/105016 +Binney, J., & Tremaine, S. 1987, Galactic dynamics +—. 2008, Galactic Dynamics: Second Edition +Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775, +doi:10.1086/342655 +Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303, +L1, doi:10.1046/j.1365-8711.1999.02358.x +Blandford, R. D., & Zna jek, R. L. 1977, MNRAS, 179, 433, +doi:10.1093/mnras/179.3.433 +Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642, +427, doi:10.1086/500727 +Bondi, H. 1952, MNRAS, 112, 195, +doi:10.1093/mnras/112.2.195 +Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273, +doi:10.1093/mnras/104.5.273 +Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469, +2042, doi:10.1093/mnras/stx1007 +Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ, +860, 14, doi:10.3847/1538- 4357/aac2c4 +Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger, +C. 2012, JCAP, 2012, 054, +doi:10.1088/1475- 7516/2012/07/054 +Centrella, J., Baker, J. G., Kelly, B. J., & van Meter, J. R. +2010, Reviews of Modern Physics, 82, 3069, +doi:10.1103/RevModPhys.82.3069 +Chen, X., & Liu, F. K. 2013, ApJ, 762, 95, +doi:10.1088/0004- 637X/762/2/95 +Cheng, Z., Li, Z., Xu, X., & Li, X. 2018, ApJ, 858, 33, +doi:10.3847/1538- 4357/aaba16 +Choi, J.-H., Shlosman, I., & Begelman, M. C. 2015, +MNRAS, 450, 4411, doi:10.1093/mnras/stv694 +Christensen-Dalsgaard, J., Dappen, W., Ajukov, S. V., +et al. 1996, Science, 272, 1286, +doi:10.1126/science.272.5266.1286 +Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087, +doi:10.1086/156685 +Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424, +doi:10.1111/j.1365- 2966.2005.09937.x +Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M. +2009, MNRAS, 393, 1016, +doi:10.1111/j.1365- 2966.2008.14254.x Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, +MNRAS, 508, 3045, doi:10.1093/mnras/stab2783 +Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T. +C. N. 2021a, MNRAS, 505, 2186, +doi:10.1093/mnras/stab1428 +Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt, +T. C. N. 2021b, MNRAS, 503, 1051, +doi:10.1093/mnras/stab402 +De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S. +2005, ApJ, 620, 878, doi:10.1086/427142 +Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019, +MNRAS, 487, 2947, doi:10.1093/mnras/stz1453 +Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021, +MNRAS, 507, 5132, doi:10.1093/mnras/stab2390 +Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664, +doi:10.1126/science.aav8137 +Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL, +562, L19, doi:10.1086/338118 +Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL, +110, 221101, doi:10.1103/PhysRevLett.110.221101 +Edgar, R. 2004, NewAR, 48, 843, +doi:10.1016/j.newar.2004.06.001 +Escala, A. 2021, ApJ, 908, 57, +doi:10.3847/1538- 4357/abd93c +Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014, +Monthly Notices of the Royal Astronomical Society, 443, +2410, doi:10.1093/mnras/stu1280 +Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891, +L31, doi:10.3847/2041-8213/ab77c9 +Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021, +arXiv e-prints, arXiv:2107.04639. +https://arxiv.org/abs/2107.04639 +Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a, +ApJ, 897, 46, doi:10.3847/1538-4357/ab94b2 +Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902, +L26, doi:10.3847/2041-8213/abbc0a +Fragione, G., & Sari, R. 2018, ApJ, 852, 51, +doi:10.3847/1538- 4357/aaa0d7 +Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., & +Rasio, F. A. 2004, MNRAS, 352, 1, +doi:10.1111/j.1365- 2966.2004.07914.x +Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., & +Rasio, F. A. 2002, ApJ, 570, 171, doi:10.1086/339576 +Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ, +649, 91, doi:10.1086/506193 +Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137, +doi:10.3847/1538- 4357/ab94bc +Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker, +J. P. 2018, MNRAS, 478, 4030, +doi:10.1093/mnras/sty1262 +12 Rose et al. +Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of +Modern Physics, 82, 3121, +doi:10.1103/RevModPhys.82.3121 +Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812, +doi:10.1086/377127 +Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ, +620, 744, doi:10.1086/427175 +Gond´an, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ, +860, 5, doi:10.3847/1538- 4357/aabfee +Gonz´alez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL, +908, L29, doi:10.3847/2041- 8213/abdf5b +GRAVITY Collaboration, Abuter, R., Amorim, A., et al. +2020, A&A, 636, L5, doi:10.1051/0004- 6361/202037813 +Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361, +doi:10.1088/0004- 637X/705/1/361 +G¨urkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL, +640, L39, doi:10.1086/503295 +G¨urkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236, +doi:10.1086/430694 +Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature, +556, 70, doi:10.1038/nature25029 +Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593, +L77, doi:10.1086/378182 +Heger, A., Fryer, C. L., Woosley, S. E., Langer, N., & +Hartmann, D. H. 2003, ApJ, 591, 288, +doi:10.1086/375341 +Hoang, B.-M., Naoz, S., Kocsis, B., Rasio, F. A., & +Dosopoulou, F. 2018, ApJ, 856, 140, +doi:10.3847/1538- 4357/aaafce +Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8, +doi:10.3847/1538- 4357/abb66a +Hopman, C., & Alexander, T. 2005, ApJ, 629, 362, +doi:10.1086/431475 +Igumenshchev, I. V. 2008, ApJ, 677, 317, +doi:10.1086/529025 +Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A. +2003, ApJ, 592, 1042, doi:10.1086/375769 +Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796, +106, doi:10.1088/0004-637X/796/2/106 +Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the +Royal Astronomical Society, 374, 1557, +doi:10.1111/j.1365- 2966.2006.11275.x +Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., & +Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368. +https://arxiv.org/abs/2201.12368 +Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104, +doi:10.3847/1538- 4357/abeb14 +Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, +45, doi:10.3847/1538-4357/abb945 Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020, +MNRAS, 498, 5652, doi:10.1093/mnras/staa2276 +Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33, +doi:10.1086/376675 +Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, +doi:10.3847/1538- 4365/aacb24 +—. 2018b, ApJS, 237, 13, doi:10.3847/1538- 4365/aacb24 +L¨ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323, +doi:10.1111/j.1365- 2966.2007.12699.x +Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506, +doi:10.1093/mnras/stz036 +Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ, +690, 1463, doi:10.1088/0004- 637X/690/2/1463 +Madau, P., & Rees, M. J. 2001, ApJL, 551, L27, +doi:10.1086/319848 +Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447, +doi:10.1046/j.1365- 8711.1999.02853.x +Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F. +2004, A&A, 423, 155, doi:10.1051/0004- 6361:20034147 +Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda, +M., & Artale, M. C. 2021a, arXiv e-prints, +arXiv:2109.06222.https://arxiv.org/abs/2109.06222 +Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b, +MNRAS, 505, 339, doi:10.1093/mnras/stab1334 +Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B. +2021, MNRAS, 505, 3314, doi:10.1093/mnras/stab1409 +McKinney, J. C. 2006, MNRAS, 368, 1561, +doi:10.1111/j.1365- 2966.2006.10256.x +McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977, +doi:10.1086/422244 +McKinney, J. C., Tchekhovskoy, A., Sadowski, A., & +Narayan, R. 2014, MNRAS, 441, 3177, +doi:10.1093/mnras/stu762 +Merritt, D. 2006, Reports on Progress in Physics, 69, 2513, +doi:10.1088/0034- 4885/69/9/R01 +Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847, +doi:10.1086/317837 +Morris, M. 1993, ApJ, 408, 496, doi:10.1086/172607 +Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL, +622, L113, doi:10.1086/429721 +Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, +ApJS, 181, 110, doi:10.1088/0067-0049/181/1/110 +Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ, +618, 569, doi:10.1086/426067 +Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927, +L18, doi:10.3847/2041-8213/ac574b +Naoz, S., & Silk, J. 2014, ApJ, 795, 102, +doi:10.1088/0004- 637X/795/2/102 +Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885, +L35, doi:10.3847/2041-8213/ab4fed +IMBH Formation in Galactic Nuclei 13 +Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL, +888, L8, doi:10.3847/2041- 8213/ab5e3b +Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., & +Curd, B. 2022, MNRAS, 511, 3795, +doi:10.1093/mnras/stac285 +Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A. +2003, PASJ, 55, L69, doi:10.1093/pasj/55.6.L69 +Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005, +ApJ, 628, 368, doi:10.1086/430728 +O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395, +2127, doi:10.1111/j.1365-2966.2009.14653.x +O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N., +& O’Shaughnessy, R. 2006, ApJ, 637, 937, +doi:10.1086/498446 +Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga, +D. 2010, ApJ, 722, 642, +doi:10.1088/0004- 637X/722/1/642 +Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100, +doi:10.1086/319042 +Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643, +1011, doi:10.1086/503273 +Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek, +Stephen R., J. 2016, ApJ, 823, 113, +doi:10.3847/0004- 637X/823/2/113 +Peters, P. C., & Mathews, J. 1963a, Physical Review, 131, +435, doi:10.1103/PhysRev.131.435 +—. 1963b, Physical Review, 131, 435, +doi:10.1103/PhysRev.131.435 +Portegies Zwart, S. F., Baumgardt, H., Hut, P., Makino, J., +& McMillan, S. L. W. 2004, Nature, 428, 724, +doi:10.1038/nature02448 +Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL, +528, L17, doi:10.1086/312422 +—. 2002, ApJ, 576, 899, doi:10.1086/341798 +Rashkov, V., & Madau, P. 2014, ApJ, 780, 187, +doi:10.1088/0004- 637X/780/2/187 +Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640, +A56, doi:10.1051/0004-6361/202037710 +Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022, +MNRAS, doi:10.1093/mnras/stac231 +Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., & +Rasio, F. A. 2018, PhRvL, 120, 151101, +doi:10.1103/PhysRevLett.120.151101 +Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016, +PhRvD, 93, 084029, doi:10.1103/PhysRevD.93.084029 +Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019, +Phys. Rev. D, 100, 043027, +doi:10.1103/PhysRevD.100.043027 +Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904, +113, doi:10.3847/1538-4357/abc557 Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., +& Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213. +https://arxiv.org/abs/2009.01213 +Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017, +MNRAS, 472, 1677, doi:10.1093/mnras/stx2044 +Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD, +100, 043009, doi:10.1103/PhysRevD.100.043009 +Sari, R., & Fragione, G. 2019, ApJ, 885, 24, +doi:10.3847/1538- 4357/ab43df +Schneider, R., Ferrara, A., Natara jan, P., & Omukai, K. +2002, The Astrophysical Journal, 571, 30, +doi:10.1086/339917 +Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63, +doi:10.1086/519309 +Sch¨odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A, +609, A27, doi:10.1051/0004- 6361/201730452 +Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603, +doi:10.1086/156521 +Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985, +MNRAS, 217, 367, doi:10.1093/mnras/217.2.367 +Shlosman, I., Choi, J.-H., Begelman, M. C., & Nagamine, +K. 2016, MNRAS, 456, 500, doi:10.1093/mnras/stv2700 +Sigurdsson, S., & Phinney, E. S. 1993, ApJ, 415, 631, +doi:10.1086/173190 +Spera, M., & Mapelli, M. 2017a, MNRAS, 470, 4739, +doi:10.1093/mnras/stx1576 +—. 2017b, MNRAS, 470, 4739, doi:10.1093/mnras/stx1576 +Spitzer, L. 1987, Dynamical evolution of globular clusters +Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv +e-prints.https://arxiv.org/abs/1603.02709 +—. 2019, ApJ, 878, 58, doi:10.3847/1538- 4357/ab1e4d +Stone, N. C., K¨upper, A. H. W., & Ostriker, J. P. 2017, +MNRAS, 467, 4180, doi:10.1093/mnras/stx097 +Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859, +doi:10.1093/mnras/stv2281 +The LIGO Scientific Collaboration, the Virgo +Collaboration, Abbott, R., et al. 2020a, arXiv e-prints, +arXiv:2009.01075.https://arxiv.org/abs/2009.01075 +—. 2020b, arXiv e-prints, arXiv:2009.01190. +https://arxiv.org/abs/2009.01190 +Umbreit, S., Fregeau, J. M., Chatterjee, S., & Rasio, F. A. +2012, ApJ, 750, 31, doi:10.1088/0004- 637X/750/1/31 +Valiante, R., Schneider, R., Volonteri, M., & Omukai, K. +2016, Monthly Notices of the Royal Astronomical +Society, 457, 3356, doi:10.1093/mnras/stw225 +Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit, +G. N. 2021, MNRAS, 504, 146, +doi:10.1093/mnras/stab842 +14 Rose et al. +Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., & +Breivik, K. 2021, ApJ, 917, 76, +doi:10.3847/1538- 4357/ac088d +Wang, J., & Merritt, D. 2004, ApJ, 600, 149, +doi:10.1086/379767 +Woosley, S. E. 2017, ApJ, 836, 244, +doi:10.3847/1538- 4357/836/2/244 +Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965, +doi:10.1046/j.1365- 8711.2002.05532.x +Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129, +doi:10.1088/0004- 637X/761/2/129 Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. +2014, Monthly Notices of the Royal Astronomical +Society, 440, 1263, doi:10.1093/mnras/stu351 +Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints, +arXiv:2011.04653.https://arxiv.org/abs/2011.04653 +Zhu, Z., Li, Z., & Morris, M. R. 2018, ApJS, 235, 26, +doi:10.3847/1538- 4365/aab14f diff --git a/read/results/playa/2201.00029.txt b/read/results/playa/2201.00029.txt new file mode 100644 index 0000000..1901ba9 --- /dev/null +++ b/read/results/playa/2201.00029.txt @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/read/results/playa/2201.00037.txt b/read/results/playa/2201.00037.txt new file mode 100644 index 0000000..52535ee --- /dev/null +++ b/read/results/playa/2201.00037.txt @@ -0,0 +1,2848 @@ +Confidential manuscript submitted to JGR-Planets +The influence of a fluid core and a solid inner core on the +Cassini sate of Mercury +Mathieu Dumberry 1 +1 + Department of Physics, University of Alberta, Edmonton, Alberta, Canada. +Key Points: +• + The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid +planet by no more than 0.01 arcmin. +• + For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid +cores into a common precession motion. +• + The larger the inner core is, the more the obliquity of the polar moment of inertia ap- +proaches that expected for a rigid planet. +Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca +–1–arXiv:2201.00037v1 [astro-ph.EP] 31 Dec 2021 +Confidential manuscript submitted to JGR-Planets +Abstract +We present a model of the Cassini state of Mercury that comprises an inner core, a fluid core +and a mantle. Our model includes inertial and gravitational torques between interior regions, +and viscous and electromagnetic (EM) coupling at the boundaries of the fluid core. We show +that the coupling between Mercury’s interior regions is sufficiently strong that the obliquity of +the mantle spin axis deviates from that of a rigid planet by no more than 0.01 arcmin. The man- +tle obliquity decreases with increasing inner core size, but the change between a large and no +inner core is limited to 0.015 arcmin. EM coupling is stronger than viscous coupling at the in- +ner core boundary and, if the core magnetic field strength is above 0.3 mT, locks the fluid and +solid cores into a common precession motion. Because of the strong gravitational coupling be- +tween the mantle and inner core, the larger the inner core is, the more this co-precessing core +is brought into an alignment with the mantle, and the more the obliquity of the polar moment +of inertia approaches that expected for a rigid planet. The misalignment between the polar mo- +ment of inertia and mantle spin axis increases with inner core size, but is limited to 0.007 ar- +cmin. Our results imply that the measured obliquities of the mantle spin axis and polar mo- +ment of inertia should coincide at the present-day level of measurement errors, and cannot be +distinguished from the obliquity of a rigid planet. +Plain language summary: The plane of Mercury’s orbit around the Sun is slowly precess- +ing about an axis fixed in space. This entrains a precession of the spin axis of Mercury at the +same rate, an equilibrium known as a Cassini state. The angle between the spin axis and the +normal to the orbital plane is known as the obliquity and remains fixed. Observations have con- +firmed that Mercury’s obliquity matches, within measurement errors, the theoretical predic- +tion based on an entirely rigid planet. However, we know that Mercury has a large metallic core +which is liquid, although the central part may be solid. In this work, we investigate how the +presence of a fluid and solid core affect the Cassini state of Mercury. We show that the inter- +nal coupling between the solid core, fluid core and the mantle is sufficiently strong that the obliq- +uity of the mantle does not depart from that of a rigid planet by more than 0.01 arcmin, an +offset smaller than the present-day error in measurements. We also show that the larger the +solid inner core is, the more the planet behaves as if it were precessing as an entirely rigid body. +1 Introduction +Mercury is expected to be in a Cassini state (Figure 1) whereby its orbit normal and spin- +symmetry axis are both coplanar with, and precess about, the normal to the Laplace plane [ Colombo , +1966; Peale , 1969, 2006]. The orientation of the Laplace plane varies on long timescales, but +its present-day orientation can be reconstructed from ephemerides data [ Yseboodt and Margot , +2006; Baland et al., 2017]. Likewise, the rate of precession is also not observed directly, but is +reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513 +yr with an inclination angle of I = 8.5330 ◦ + between the orbit and Laplace plane normals [ Ba- +land et al., 2017]. Measurements of the obliquity ε +m , defined as the angle of misalignment be- +tween the spin-symmetry axis and the orbit normal, have been obtained by different techniques, +including ground based radar observations [Margot et al. , 2007, 2012], and stereo digital ter- +rain images [Stark et al., 2015a] and radio tracking data [Mazarico et al. , 2014; Verma and Mar- +got , 2016; Genova et al., 2019; Konopliv et al., 2020] from the MErcury Surface Space ENvi- +ronment GEochemistry and Ranging (MESSENGER) spacecraft. Within measurement errors, +all techniques yield an obliquity which is coplanar with the orbit and Laplace plane normals +and consistent with a Cassini state. Furthermore, the observed obliquity angle (2. 042 ± 0 .08 +–2– +Confidential manuscript submitted to JGR-Planets +Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded +rectangle) and the Cassini state of Mercury. The normal to the orbital plane ( ˆe I +3 ) is offset from the nor- +mal to the Laplace plane ( ˆe L +3 ) by an angle I = 8. 5330◦ + . The symmetry axis of the mantle ˆe p +3 is offset +from ˆe I +3 by ε + m ≈ 2 arcmin. ˆe I +3 and ˆe p +3 are coplanar with, and precess about, ˆe L +3 in a retrograde direction +at frequency Ω + p = 2 π/325, 513 yr− 1 + . The blue (orange) shaded region indicates the portion of the orbit +when Mercury is above (below) the Laplace plane. Angles are not drawn to scale. +arcmin [ Margot et al. , 2012], 2.029 ± 0. 085 arcmin [Stark et al., 2015a] and 1.968 ± 0 .027 [ Gen- +ova et al., 2019] to list a few) matches that expected if Mercury occupies Cassini state 1. +The prediction of Mercury’s obliquity is based on the assumption that the whole planet +precesses as a single body. However, we know that Mercury has a fluid core from two main lines +of evidence. First, Mercury’s large scale magnetic field is intrinsic, and must be maintained by +dynamo action [Anderson et al., 2011, 2012; Johnson et al., 2012]. This requires fluid motion +in its metallic core, and hence that Mercury’s core is at least partially liquid. Second, the ob- +served amplitude of the 88-day longitudinal libration is approximately twice as large as that +expected if Mercury were librating as a rigid body [ Margot et al. , 2007, 2012; Stark et al., 2015a]. +This indicates that it is only the mantle that librates, and that the outer part of the core is fluid. +These evidences do not necessarily imply that the whole of Mercury’s core is fluid, but only that +its outermost part must be. A solid inner core may have nucleated at the centre although its +size is not well constrained. Inner core growth leads to planetary contraction, and the inferred +radial contraction of ∼ 7 km since the late heavy bombardment [Byrne et al. , 2014] places an +approximate limit of 800 km on the inner core radius [ Grott et al. , 2011]. However, the inner +core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history. +–3– +Confidential manuscript submitted to JGR-Planets +With a fluid core, and possibly a solid inner core, the observed obliquity ε + m reflects the +orientation of the spin-symmetry axis of the precessing mantle and crust alone. Neglecting dis- +sipation, and at equilibrium in the Cassini state, the spin axis of the fluid core and the spin- +symmetry axis of the inner core should both also precess about the normal to the Laplace plane +in a retrograde direction with a period of 325,513 yr. Both of these axes should also lie in the +plane that defines the equilibrium Cassini state [e.g. Dumberry and Wieczorek , 2016], although +their obliquity angles may be different than ε + m . Whether the spin axis of the fluid core is brought +into an alignment with the mantle obliquity depends primarily on the pressure torque (also re- +ferred to as the inertial torque) exerted by the centrifugal force of the rotating fluid core on the +misaligned elliptical shape of the core-mantle boundary (CMB) [ Poincar´e , 1910]. The more flat- +tened the CMB is, the stronger the pressure torque is, and the more the fluid core is entrained +into a co-precession at a similar obliquity to that of the mantle. The flattening of Mercury’s +CMB is not known. But if one assumes that the topography of the CMB coincides with an equipo- +tential surface at hydrostatic equilibrium with the imposed frozen-in mass anomalies in the up- +per mantle and crust, then the pressure torque at the CMB is sufficient to bring the fluid core +into a close alignment with the mantle [ Peale et al. , 2014]. The spin axis of the fluid core is not +expected to be exactly aligned with the spin-symmetry axis of the mantle, but sufficiently close +that the resulting mantle obliquity does not differ much from that of a single body planet. Fur- +thermore, viscous and electromagnetic (EM) coupling at the CMB can further restrict the mis- +alignment between the mantle and core [Peale et al. , 2014]. +If an inner core is present, its obliquity angle is determined by the sum of the torques act- +ing on it. This includes the gravitational torque from the Sun acting on its tilted figure, anal- +ogous to the torque applied on the tilted mantle that sets the obliquity ε + m . In addition, the +tilt of the inner core also depends on the gravitational torque imposed by the mantle and the +pressure torque at the inner core boundary (ICB) imposed by the fluid core. If the mantle grav- +itational torque dominates, the inner core tilt is expected to remain closely aligned with the +mantle. Conversely, if the pressure torque at the ICB is the largest, the inner core should in- +stead be closely aligned with the spin axis of the fluid core. A strong viscous and/or EM cou- +pling at the ICB should also enforce a closer alignment between the rotation vectors of the in- +ner core and fluid core. +It is on the basis of the observed mantle obliquity that the polar moment of inertia of Mer- +cury is inferred [e.g. Peale , 1976; Margot et al. , 2018]. Inherent in this calculation is the built- +in assumption that the mantle obliquity does not deviate from that of a rigid planet by a sub- +stantial amount. However, the recent study by Peale et al. [2016] suggests that the inner core +can be misaligned from the mantle by a few arcmin and that a large inner core can perturb the +orientation of the spin vector of the mantle by as much as 0.1 arcmin. This challenges the as- +sumption that the observed obliquity reflects the orientation of the whole planet. +Furthermore, if a large inner core is misaligned with the mantle, then the mantle spin axis +does not coincide with the orientation of the polar moment of inertia of the whole planet. This +can introduce a systematic offset between different types of obliquity measurements. Those based +on tracking topographic features [ Margot et al. , 2007, 2012; Stark et al., 2015a] capture the obliq- +uity of the mantle spin axis. While those based on the orientation of the gravity field [ Mazarico +et al., 2014; Verma and Margot , 2016; Genova et al., 2019; Konopliv et al., 2020] are instead +tied to the orientation of the principal moment of inertia of the whole planet. An offset of the +obliquity of the mantle spin axis with respect to the gravity field could be used to constrain the +size of the inner core, even though this is difficult to do at present because the different esti- +mates of the obliquity of the gravity field do not match well with one another. +–4– +Confidential manuscript submitted to JGR-Planets +There is thus a significant interest in properly assessing how the presence of a solid in- +ner core at the centre of Mercury may affect its Cassini state equilibrium. Here, we present a +model of Mercury’s Cassini state that comprises a fluid core and solid inner core. The model +is an adaptation of a similar model developed to study the Cassini state of the Moon [Dumb- +erry and Wieczorek , 2016; Stys and Dumberry , 2018; Organowski and Dumberry , 2020]. The +specific questions that motivate our study are the following. First, we want to determine how +large the misaligned obliquities of the fluid core and solid inner core can be and how they de- +pend on model parameters. Second, we want to assess by how much the mantle obliquity may +differ from that of an entirely rigid Mercury, and third, by how much the obliquities of the spin- +symmetry axis of the mantle and gravity field may differ. +2 Theory +2.1 The interior structure of Mercury +Our model of Mercury consists of four layers of uniform density: a solid inner core, a fluid +outer core, a solid mantle, and a thin crust. The outer radii of each of these layers, are denoted +by r +s , r +f , r +m , and R , and their densities by ρ + s , ρ + f , ρ + m , and ρ + c , respectively. The inner core ra- +dius r +s corresponds to the ICB radius, the fluid core radius r +f to the CMB radius, and R = +2439 .36 km to the planetary radius of Mercury. Compressibility effects from increasing pres- +sure with depth are not negligible in the core of Mercury. However adopting uniform densities +simplifies the analytical expressions of the model while still capturing the first order rotational +dynamics. Uniform densities were also adopted by Peale et al. [2016] and following the same +strategy facilitates comparisons between our results. +We build our interior model as detailed in Peale et al. [2016]. We first specify r +s , ρ +s (or +a density contrast at the ICB), the crustal density ρ + c and crustal thickness h = R − r +m . The +three unknowns r +f , ρ +f and ρ + m are then solved such that the interior model is consistent with +the known mass M and chosen values of the moments of inertia of the whole planet C and that +of the mantle and crust C + m . +Each layer is triaxial in shape. We denote the polar flattening (or geometrical ellipticity) +by + i , defined as the difference between the mean equatorial and polar radii, divided by the mean +spherical radius. Likewise, we denote the equatorial flattening by the variable ξ + i , defined as the +difference between the maximum and minimum equatorial radii, divided by the mean spher- +ical radius. As above, we use the subscript i = s, f , m and r , to denote the polar or equa- +torial flattenings at the ICB, CMB, crust-mantle boundary (CrMB), and surface. +The measured polar and equatorial flattenings are taken from Perry et al. [2015] and their +numerical values are given in Table 1. We then assume that the ICB and CMB are both at hy- +drostatic equilibrium with the imposed gravitational potential induced by the flattenings at the +CrMB and surface. The flattenings at all interior boundaries are specified such that they are +consistent with the observed degree 2 spherical harmonic coefficients of gravity J +2 and C + 22 ; their +numerical values are given in Table 1. Specifically, J +2 and C + 22 are connected to the principal +moments of inertia of Mercury (C > B > A) and to the polar and equatorial flattenings by +J + 2 = C − ¯ +A +M R 2 = 8π +15 1 +M R 2 +(ρ + s − ρ + f )r 5 +s + s + ( ρ + f − ρ + m )r 5 +f + f + ( ρ + m − ρ + c ) r 5 +m + m + ρ + c R 5 + + r + , (1a) +C + 22 = B − A +4M R 2 = 8π +15 1 +4M R 2 +( ρ + s − ρ + f )r 5 +s ξ + s + ( ρ + f − ρ + m )r 5 +f ξ +f + ( ρ + m − ρ + c ) r 5 +m ξ + m + ρ + c R 5 + ξ + r + . (1b) +where ¯ +A is the mean equatorial moment of inertia defined below. The same procedure was used +in Peale et al. [2016] and the mathematical details are given in Equations (18-20) of Dumberry +–5– +Confidential manuscript submitted to JGR-Planets +Mercury Parameter Numerical value Reference +mean motion, n 2π/87 .96935 day− 1 + Stark et al. [2015b] +rotation rate, Ω +o = 1.5n 2π/58 .64623 day− 1 + Stark et al. [2015b] +orbit precession rate, Ω + p 2π/325 , 513 yr −1 + Baland et al. [2017] +Poincar´e number, δω = Ω +p /Ω + o 4. 9327 × 10 − 7 +orbital eccentricity, e + c 0. 20563 Baland et al. [2017] +orbital inclination, I 8. 5330◦ + Baland et al. [2017] +mean planetary radius, R 2439. 360 km Perry et al. [2015] +mass, M 3. 3012 × 10 23 + kg Genova et al. [2019] +mean density, ¯ρ 5429. 5 kg m− 3 +J + 2 5. 0291 × 10 − 5 + Genova et al. [2019] +C + 22 8. 0415 × 10 − 6 + Genova et al. [2019] +polar surface flattening, + r 6. 7436 × 10 − 4 + Perry et al. [2015] +equatorial surface flattening, ξ + r 5. 1243 × 10 − 4 + Perry et al. [2015] +Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031. 8636 × 109 +m 3 + /s 2 + taken from Genova et al. [2019]. The mean density is calculated from 4 π +3 ¯ρR 3 + = M . The numerical +values of +r and ξ +r are calculated from +r = (¯a − c )/R and ξ + r = (a − b ) /R , where ¯a = 1 +2 ( a + b ) and where +a = 2440 . 53 km, b = 2439 . 28 km and c = 2438 .26 km are the semima jor, intermediate and semiminor +axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015]. J +2 and C + 22 are +computed from Equation (4) in the Supporting Information of Genova et al. [2019]. +and Wieczorek [2016] who adopted the same strategy in their interior modelling of the Moon. +Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topog- +raphy and the axes of the principal moments of inertia, which amount to a polar offset of ∼ 2◦ +and an equatorial offset of ∼ 15 ◦ + [Perry et al. , 2015]. +Once the densities and flattenings of all interior regions are known, we can specify the mo- +ments of inertia of the fluid core ( C + f > B + f > A + f ) and solid inner core (C +s > B + s > A + s ) +along with the mean equatorial moments of inertia +¯ +A = 1 +2 ( A + B ) , ¯ +A + f = 1 +2 (A + f + B + f ) , ¯ +A + s = 1 +2 (A + s + B + s ) . (2) +From these, we define the polar (e , e + f , e + s ) and equatorial (γ , γ + s ) dynamical ellipticities of the +whole planet (no subscript), fluid core (subscript f ) and solid inner core (subscript s), which +enter our rotational model, +e = C − ¯ +A +¯ +A e +f = C + f − ¯ +A + f +¯ +A + f e +s = C +s − ¯ +A + s +¯ +A + s , (3a) +γ = B − A +¯ +A γ + s = B + s − A + s +¯ +A + s . (3b) +We further note that e and γ are connected to J + 2 and C +22 by +e = M R 2 +¯ +A J +2 , γ = 4 M R 2 +¯ +A C + 22 . (4) +–6– +Confidential manuscript submitted to JGR-Planets +Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b) +in a frame attached to the rotating mantle. The orbit normal ( ˆe I +3 ) is tilted by an angle I = 8 . 533◦ + from +the Laplace normal ( ˆe L +3 ) and the symmetry axis of Mercury’s mantle ( ˆe p +3 ) is tilted by an obliquity ε + m +with respect to ˆe I +3 . Shown in (a) are the orientations of the symmetry axis of the inner core ( ˆe s +3 ), the +rotation rate vectors of the mantle ( Ω ), fluid core (Ω + f ) and inner core ( Ω +f ) and angles θ + p , θ + n , θ + m , θ +f +and θ + s in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer +to as the Cassini plane. The light grey, white, and dark grey ellipsoid represent a polar cross-section of +the mantle, fluid core and inner core, respectively; blue shaded parts show an equatorial cross section. +The black curved arrow in the equatorial plane in (a) indicates the direction of rotation of the equatorial +mantle axes ˆe p +1 and ˆe p +2 with respect to the Cassini plane. Viewed in the frame attached to the rotating +mantle (b), the Cassini plane is rotating at frequency ω Ω + o = −Ω + o − Ω + p cos I in the longitudinal direc- +tion. The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of +illustration. + –7– +Confidential manuscript submitted to JGR-Planets +2.2 The rotational model +Mercury’s rotation is characterized by a 3:2 spin-orbit resonance in which it completes +3 rotations around itself for every 2 orbital revolutions around the Sun. The orbital period is +87.96935 day and the sidereal rotation period is 58.64623 day [ Stark et al., 2015b]. These de- +fine the mean motion n = 2 π/87 . 96935 day −1 + and the sidereal frequency Ω + o = 2 π/58 .64623 +day −1 + , with Ω +o = 1. 5 n . Mercury’s rotational state is also characterized by a Cassini state whereby +the orientations of the orbit normal ( ˆeI +3 ) and of the mantle symmetry axis ( ˆep +3 ) are both copla- +nar with, and precess about, the normal to the Laplace plane ( ˆeL +3 ). The orientation of the Laplace +plane varies on long timescales, but it can be taken as invariable in inertial space for our present +purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between ˆeL +3 and ˆeI +3 +is the orbital inclination I = 8.5330◦ + [ Baland et al., 2017], the angle between ˆeI +3 and ˆep +3 is the +obliquity ε + m and the angle between ˆeL +3 and ˆep +3 is θ +p = I + ε + m . The precession of ˆeI +3 and ˆep +3 +about the Laplace pole is retrograde with frequency Ω + p = 2π/325 , 513 yr − 1 + [ Baland et al. , 2017]. +The mantle and crust are welded together and form a single rotating region which we re- +fer to as the ‘mantle’ in the context of our rotational model. The rotation and symmetry axes +of the mantle are expected to remain in close alignment, but they do not coincide exactly. We +define the rotation rate vector of the mantle by Ω, and its misalignment from ˆe p +3 by an angle +θ + m . Note that θ +m ε +m and it is often the spin axis of Mercury which is used to define the +obliquity ε + m [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, ˆep +3 and Ω would +characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and +the angles I , ε + m and θ + m would completely describe the Cassini state. The presence of a fluid +outer core and solid inner core require three additional orientation vectors and angles. The sym- +metry axis of the inner core is defined by unit vector ˆes +3 and its misalignment from ˆep +3 by an +angle θ + n . The rotation vectors of the fluid core and inner core are defined as Ω +f and Ω +s , re- +spectively, and their misalignment from the rotation vector of the mantle Ω are defined by an- +gles θ +f and θ + s (see Figure 2a). The rotation and symmetry axes of the inner core remain in close +alignment, so θ + n ≈ θ + s . To be formal in our definition of the different angles of misalignment, +for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise +direction. +At equilibrium in the Cassini state, the three orientation vectors ( ˆe I +3 , ˆep +3 , ˆe s +3 ) and three +rotation vectors (Ω, Ω +f , Ω + s ) are forced to precess about ˆeL +3 at the same frequency. If we ne- +glect dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed +in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ω + p . Viewed +in the frame attached to the mantle rotating at sidereal frequency Ω + o , the Cassini plane is ro- +tating in a retrograde direction at frequency ω Ω + o (see Figure 2b), where ω , expressed in cycles +per Mercury day, is equal to + ω = − 1 − δω cos( θ + p ) . (5) +The factor δω = Ω +p /Ω + o = 4.933 × 10 − 7 + is the Poincar´e number, expressing the ratio of the +forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal +as seen in the mantle frame is expressed as +d +dt ˆe L +3 + Ω × ˆeL +3 = 0 , (6) +or equivalently, by Equation (19e) of Stys and Dumberry [2018], +ω sin(θ + p ) + sin( θ + m + θ + p ) = 0 . (7) +–8– +Confidential manuscript submitted to JGR-Planets +This expresses a formal connection between θ + p and θ + m which is independent of the interior struc- +ture of Mercury. Using Equation (5) and cos(θ + m ) → 1, this connection can be rewritten as +sin( θ +m ) = δω sin( θ + p ) . (8) +and thus the relative amplitudes of θ + m and θ + p depend of the Poincar´e number δω . +To investigate Mercury’s response to the gravitational torque from the Sun, we take ad- +vantage of the framework developed in Mathews et al. [1991] to model the forced nutations of +Earth [see also Mathews et al., 2002; Dehant and Mathews , 2015]. This model takes into ac- +count the pressure torque (also referred to as the inertial torque) that results when the spin axis +of the fluid core is misaligned from the symmetry axes of the elliptical surfaces of the CMB and +ICB. It also includes the gravitational torque exerted on the inner core when it is misaligned +with the mantle. Electromagnetic and viscous torques at both the CMB and ICB have been +incorporated into the framework [e.g Buffett , 1992; Buffett et al., 2002; Mathews and Guo , 2005; +Deleplace and Cardin , 2006]. The framework was adapted to model the Cassini state of the Moon +in Dumberry and Wieczorek [2016] and further developed in Stys and Dumberry [2018] and Organowski +and Dumberry [2020]. We adapt it here to capture the Cassini state of Mercury. +Because the forced precession period is much longer than the rotation and orbital peri- +ods of Mercury, the gravitational solar torque that is relevant to the Cassini state is the mean +torque averaged over one orbit. This mean torque is perpendicular to the Cassini plane, point- +ing in the same direction as the vector connecting the Sun to the descending node of Mercury’s +orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque +is periodic, rotating at frequency ω Ω + o . Setting the equatorial directions ˆe p +1 and ˆep +2 to correspond +to the real and imaginary axes of the complex plane, respectively, we can write the equatorial +components of this periodic applied torque in a compact form as +Γ + 1 ( t) + iΓ + 2 ( t) = − i ˜ +Γ( ω ) exp[ iω Ω + o t ] , (9) +where ˜ +Γ(ω ) represents the amplitude of the torque at frequency ω Ω + o . In response to this torque, +the axes defining all angles (θ + p , ε +m , θ + m , θ + f , θ + s , θ + n ) as viewed in the mantle frame are also ro- +tating at frequency ω Ω + o (see Figure 2). The longitudinal direction of each of these angles at +a specific time t can then also be written in the equatorial complex plane and is proportional +to exp[ iω Ω + o t]. For instance, the two equatorial time-dependent components θ + m 1 and θ + m 2 of the +angle θ + m , as seen in the mantle frame, can be written as +θ + m 1 ( t) + iθ + m 2 (t ) = ˜m exp[iω Ω + o t ] , (10a) +where + ˜m ≡ ˜m( ω ) = Re[ ˜m] + iI m [ ˜m ] , (10b) +is the amplitude at frequency ω Ω + o . Equivalent definitions apply for all other angles, with the +connection as follows: +θ + m ⇔ ˜m , θ + f ⇔ ˜m +f , θ + s ⇔ ˜m +s , θ + n ⇔ ˜n + s , θ + p ⇔ ˜p , ε + m ⇔ ˜ε + m . (11) +The notation ˜m, ˜m + f , ˜m +s , ˜n +s follows that introduced in the original model of Mathews et al. [1991]. +Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase re- +sponse to the applied torque as a result of dissipation, for instance from viscous or EM coupling +–9– +Confidential manuscript submitted to JGR-Planets +at the boundaries of the fluid core. In the absence of dissipation, all tilded variables are purely +real. We concentrate our analysis in this work on the real part of the solutions, which corre- +sponds to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜ε +m +corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to ε + m , +though we keep the tilde notation in the presentation of our results to emphasize that it rep- +resents the real part of the solution from our system. Furthermore, since ˜m ˜ε + m , we often +refer to ˜ε + m as the orientation of spin axis of the mantle, since the Cassini state of Mercury is +more customarily described in terms of the latter in the literature. +The model of Mathews et al. [1991] is developed under the assumption of small angles as +appropriate for the nutations on Earth. The details on how the equations of the model are de- +rived can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. Three equa- +tions describe, respectively, the time rate of change of the angular momenta of the whole of Mer- +cury, the fluid core, and the inner core in the reference frame of the rotating mantle. These three +equations are + (ω − e ) ˜m + (1 + ω ) + ¯ +A + f +¯ +A ˜m + f + ¯ +A + s +¯ +A ˜m + s + α + 3 e + s ¯ +A + s +¯ +A ˜n +s + = 1 +i Ω 2 +o ¯ +A + ˜ +Γ +sun + , (12a) +ω ˜m + (1 + ω + e + f ) ˜m + f − ωα +1 e + s ¯ +A + s +¯ +A + f ˜n + s = 1 +iΩ 2 +o ¯ +A + f + − ˜ +Γ +cmb − ˜ +Γ + icb + , (12b) +(ω − α +3 e + s ) ˜m + α +1 e +s ˜m +f + (1 + ω ) ˜m +s + (1 + ω − α +2 ) e + s ˜n + s = 1 +iΩ 2 +o ¯ +A + s + ˜ +Γ s +sun + ˜ +Γ +icb + , (12c) +and a fourth equation consists of a kinematic relation that expresses the change in the orien- +tation of the inner core figure as a result of its own rotation, +˜m +s + ω ˜n + s = 0 . (12d) +In these equations, the parameters α +1 , α +2 and α +3 involve the density contrast at the ICB +and are given by + α + 1 = ρ + f +ρ + s , α + 3 = 1 − α +1 , α + 2 = α +1 − α +3 α +g , (13a) +where the parameter α +g is a measure of the ratio of the gravitational to inertial torque applied +on the inner core, + α +g = 8πG +5Ω2 +o [ρ + c ( + r − + m ) + ρ +m ( + m − +f ) + ρ + f +f ] , (13b) +where G is the gravitational constant. +˜ +Γ +sun is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For +a small mantle obliquity ˜ε +m and a small inner core tilt ˜n + s , it is given by +˜ +Γ + sun = − iΩ 2 +o ¯ +A +φ + m ˜ε + m + ¯ +A + s +¯ +A α +3 φ + s ˜n +s + , (14) +where + –10– +Confidential manuscript submitted to JGR-Planets +φ + m = 3 +2 n 2 +Ω 2 +o +G +210 e + 1 +2 G +201 γ + , (15a) +φ + s = 3 +2 n 2 +Ω 2 +o +G +210 e + s + 1 +2 G + 201 γ + s + , (15b) +and where G +210 and G + 201 are functions of the orbital eccentricity e + c , +G +210 = 1 +(1 − e 2 +c ) 3/ 2 , (16a) +G +201 = 7 +2 e + c − 123 +16 e 3 +c + 489 +128 e5 +c . (16b) +The gravitational torque by the Sun acting on the inner core alone, ˜ +Γ s +sun , is +˜ +Γ s +sun = − iΩ 2 +o ¯ +A + s α +3 φ + s ( ˜ε + m + ˜n + s ) . (17) +˜ +Γ + cmb and ˜ +Γ +icb are the torques from tangential stresses by the fluid core on the mantle at the +CMB and on the inner core at the ICB, respectively. These torques can be parameterized in +terms of dimensionless complex coupling constants K + icb and K +cmb and the differential angu- +lar velocities at each boundary [e.g Buffett , 1992; Buffett et al., 2002], +˜ +Γ + icb = iΩ 2 +o ¯ +A + s K + icb ( ˜m +f − ˜m + s ) , (18a) +˜ +Γ +cmb = iΩ 2 +o ¯ +A + f K +cmb ˜m + f . (18b) +Specific expressions for K +icb and K +cmb are delayed to sections 4 and 5 when we consider the +effects of viscous and EM coupling, respectively. +A fifth equation is required to connect this interior model to the obliquity of the mantle, +and this is provided by Equation (7). For small angles θ + m and θ + p , this gives [e.g. Mathews et al., +1991; Dumberry and Wieczorek , 2016; Baland et al., 2019] +˜m + (1 + ω ) ˜p = 0 . (19) +For Mercury, it is more convenient to connect the internal model with ˜ε + m instead of ˜p. This +is because θ + p ≈ 8 .567 ◦ + whereas ˜ε +m ≈ 2 arcmin and thus the latter obeys more strictly the +condition of small angles assumed in our framework. Furthermore, the external torques act- +ing on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜ε + m . Writ- +ten in terms of ˜ε + m , and with the approximation of ˜ε + m 1 and ˜m 1, Equation (7) becomes +˜m + (1 + ω ) ˜ε +m = −(1 + ω ) tan I . (20) +Likewise, the frequency ω from Equation (5) can be written simply in terms of I , +ω = − 1 − δω cos I . (21) +The set of four Equations (12) with the addition of Equation (20) form a linear system +of equations for the five rotational variables ˜m , ˜m +f , ˜m + s , ˜n + s and ˜ε + m . It captures the response +of Mercury, in the frequency domain, when sub ject to a periodic solar torque applied at fre- +quency ω . The system can be written in a matrix form as +–11– +Confidential manuscript submitted to JGR-Planets +M · x = y , (22a) +where the solution (x ) and forcing (y ) vectors are +x T + = [ ˜m, ˜m +f , ˜m +s , ˜n + s , ˜ε + m ] , (22b) +y T + = [0, 0, 0, 0 , − (1 + ω ) tan I ] , (22c) +and the elements of matrix M are +M =  + + + + + + ω − e (1 + ω ) ¯ +A +f +¯ +A (1 + ω ) ¯ +A + s +¯ +A ¯ +A +s +¯ +A α +3 + (1 + ω )e + s + φ + s + φ + m +ω 1 + ω + e + f + K +cmb + ¯ +A + s +¯ +A + f K +icb − ¯ +A +s +¯ +A + f K +icb − ωe +s α +1 ¯ +A +s +¯ +A +f 0 +ω − α +3 e +s α +1 e + s − K +icb 1 + ω + K +icb (1 + ω − α +2 ) e + s + α + 3 φ + s α +3 φ + s +0 0 1 ω 0 +1 0 0 0 (1 + ω ) + + + + + + . +(22d) +Solutions of the homogeneous system (i.e. y = 0) represent free modes of precession. Three +modes have periods which, when seen in inertial space, are typically in the range of a few hun- +dred to a few thousand years. The first is the free axial precession of Mercury maintained by +the solar torque acting on its elliptical figure [e.g. Peale , 2005]. The second is the free core nu- +tation (FCN), which is the free precession of the spin axis of the fluid core about the symme- +try axis of the CMB [e.g. Mathews et al., 1991]. The third is the free inner core nutation (FICN), +a free mode of rotation similar to the FCN but associated with the inner core [e.g. Mathews et al., +1991]. +A few remarks on our model are important to point out before we proceed further. First, +although we have retained the triaxial shape of Mercury in the expression of the solar torque, +we treat its angular momentum response as if it were an axially symmetric body. This is con- +venient as the two equatorial angular momentum equations for each region can be combined +into a single equation. To first order, the frequency of the free precession of Mercury is not largely +altered by triaxiality [e.g. Peale , 2005]. Baland et al. [2019] showed that the frequencies of the +FCN and FICN for a triaxial planetary body may be slightly different than those for an axi- +ally symmetric body, but not by large factor. As the response of Mercury to the solar torque +is largely determined by the resonant amplification due to the presence of these three modes, +our model should capture correctly the first order Cassini state of Mercury. Considering the +triaxial shape of Mercury may alter the numerical results, but not our general conclusions. +Second, our modelling approach is different than in the studies of Peale et al. [2014] and +Peale et al. [2016]. In these two studies, dynamical models of Mercury’s Cassini state are de- +veloped and must then be integrated in time. The equilibrium Cassini state is the quasi-steady +state that remains after transient effects associated with the initial conditions have decayed away. +An advantage of these models compared to ours is that the complete triaxial dynamics of Mer- +cury, including its longitudinal librations, are retained. However, the numerical integration can +be lengthy if dissipation is weak, which restricts the number of possible interior models of Mer- +cury that can be tested. In contrast, our model is a simple linear system in the frequency do- +main, focused on one specific frequency: the forced precession associated with the Cassini state. +Solutions are straightforward to obtain for a given interior model, and this allows us to cover +a larger span of the parameter space. One drawback, however, is that our model does not cap- +ture time-dependent variations at any other frequencies, including the precession of the peri- +center of Mercury’s orbit about the Sun. + –12– +Confidential manuscript submitted to JGR-Planets +2.3 Analytical solutions and limiting cases +2.3.1 The Cassini state of a single-body, rigid Mercury +For a rigid planet with no fluid and solid cores, our system of equations reduces to Equa- +tions (12a) and (20), + (ω − e ) ˜m + φ + m ˜ε +m = 0 , (23a) +˜m + (1 + ω ) ˜ε + m = −(1 + ω ) tan I . (23b) +Using Equation (21), δω 1, and the approximation ¯ +A (1 + e + δω cos I ) = C + ¯ +Aδω cos I ≈ +C , these can be written as + C ˜m = ¯ +Aφ + m ˜ε + m , (24a) +˜m = δω + sin I + cos I ˜ε + m + . (24b) +Equation (24b) gives a direct relationship between ˜m and ˜ε + m . For I = 8 . 5330◦ + , δω = +4 .9327 × 10 −7 + and taking ˜ε +m = 2.04 arcmin, this gives ˜m = 2.52 × 10 − 4 + arcmin, much smaller +than ˜ε + m : the offset of the rotation axis of the mantle with respect to its symmetry axis is very +small. Substituting Equation (24b) in Equation (24a) gives +C Ω + p + sin I + cos I ˜ε + m + = ¯ +A Ω + o φ + m ˜ε + m , (25) +and isolating for ˜ε + m , + ˜ε + m = C Ω + p sin I +− C Ω + p cos I + ¯ +A Ω + o φ + m . (26) +Upon using Equations (4), (15a), and Ω + o = 3 +2 n , we can write +˜ε + m = C Ω + p sin I +−C Ω + p cos I + nM R 2 + (G +210 J +2 + 2 G +201 C +22 ) . (27) +This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1 +[see for instance Equation (1) of Baland et al., 2017, where their definition of ˙ +Ω is equal to − Ω + p ]. +Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of +Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized mo- +ment of inertia ˆ +C , + ˆ +C = C +M R 2 = n +Ω + p G +210 J +2 + 2 G + 201 C + 22 +cos I + sin I / ˜ε + m . (28) +which is equivalent to Equation (89) of Van Hoolst [2015]. It is based on the latter equation +that a measurement of the obliquity gives a constraint on ˆ +C . +Two free modes of precession are found by setting y = 0 in Equation (23). One mode cor- +responds to the Eulerian wobble, or Chandler wobble, and represents the prograde precession +of the rotation axis about the symmetry axis. The second mode is the free retrograde axial pre- +cession of Mercury. As seen in the inertial frame, its frequency is given by +–13– +Confidential manuscript submitted to JGR-Planets +ω +f p = n M R 2 +C + G +210 J + 2 + 2 G +201 C +22 + , (29) +which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical com- +ponent. Note that in Peale [2005] it was assumed that only the mantle was involved in the solid- +body precession and hence C was replaced by C + m . Using C = 0.346 · M R 2 + [ Margot et al. , +2012] and the numerical values for n , J +2 , C +22 and e + c given in Table 1, we obtain a free preces- +sion period of T +f p = 2π/ω +f p = 1298 yr. If we use C +m instead of C in Equation (29), and take +C + m = 0. 431 · C = 0.431 · 0. 346 · M R 2 + [Margot et al. , 2012], we obtain T +f p = 2π/ω +f p = 560 yr. +These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical, +the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid +core, the degree of which depends on the amplitude of the pole-to-equator CMB flattening. The +true free precession period lies somewhere between 560 and 1298 yr. Regardless of its exact value, +the free precession period is much shorter than the forcing period of 325 kyr. Using Equation +(29), Equation (27) can be written as [e.g. Baland et al., 2017] +˜ε + m = Ω + p sin I +−Ω + p cos I + ω +f p . (30) +The obliquity of Mercury is thus determined by how the forcing frequency Ω + p compares with +the free precession frequency ω +f p . Because ω +f p > Ω + p , Mercury occupies Cassini state 1 [Peale , +1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant +amplification if Ω +p ≈ ω +f p . Since ω +f p Ω + p , resonant amplification is minimal and the re- +sulting obliquity, ˜ε + m ≈ 2 arcmin, is much smaller than the inclination angle I ≈ 8 .5◦ + . +2.3.2 The misalignment of the fluid and solid cores +With ω = − 1 − δω cos I and δω 1, Equation (12d) gives ˜n + s ≈ ˜m +s ; as for the mantle, +the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state. +The relationship between ˜m and ˜ε + m of Equation (24b) is independent of the interior structure, +so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equa- +tion (12a), and setting ˜n + s = ˜m +s , the angular momentum equation of the whole planet becomes +C Ω + p + sin I + cos I ˜ε +m + + ( ¯ +A + f cos I Ω + p ) ˜m +f + ¯ +A + s (cos I Ω + p − Ω + o α + 3 φ + s )˜n + s = ¯ +A Ω + o φ + m ˜ε + m . (31) +This latter equation shows how the misaligned inner core and fluid core can lead to a modifi- +cation of the mantle obliquity ˜ε + m . Approximate analytical solutions of ˜n + s and ˜m +f are given by +˜n + s ≈ Ω + p +κλ + s + 1 + Ω + o (K + icb − α +1 e + s ) +λ + f + + sin I + cos I ˜ε + m + − Ω + o α +3 φ + s +κλ + s ˜ε + m , (32a) +˜m + f ≈ Ω + p +λ + f + sin I + cos I ˜ε +m + + Ω + o +λ + f ¯ +A + s +¯ +A + f + K +icb − α +1 e + s + ˜n + s , (32b) +where + κ = 1 − ¯ +A + s +¯ +A + f Ω 2 +o + K +icb − α +1 e + s + 2 +λ + s λ +f , (33a) +λ + f = ¯σ + f − Ω + p cos I , (33b) +λ +s = ¯σ + s − Ω + p cos I , (33c) +–14– +Confidential manuscript submitted to JGR-Planets +and where we have introduced the frequencies +¯σ +f = Ω +o +e + f + K + cmb + ¯ +A + s +¯ +A + f K +icb + , (33d) +¯σ + s = Ω +o + e + s α + 3 α +g − e + s α +1 + α +3 φ + s + K + icb + . (33e) +These solutions are good approximations for all the results that we present in section 3. For +an observed mantle obliquity ˜ε + m and for a chosen set of interior model parameters, they pro- +vide useful predictions of ˜n + s and ˜m +f . +In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯σ + s +Ω + p and ¯σ + f Ω + p , so that ˜n +s → 0, ˜m +f → 0 and Equation (31) reverts back to Equation (25) +for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and +mantle (i.e. for spherical internal boundaries, e + f = e + s = γ + s = 0 and no viscous or EM cou- +pling, K + cmb = K +icb = 0), then +φ + s = 0 , κ = 1 , λ + f = λ +s = − Ω + p cos I , ˜m + f = ˜n + s = − (tan I + ˜ε + m ) . (34) +Inserting these in Equation (31), and with the moment of inertia of the mantle equal to C + m = +C − ¯ +A + f − ¯ +A + s , we obtain + C +m Ω + p + sin I + cos I ˜ε + m + = ¯ +A Ω + o φ + m ˜ε + m . (35) +which describes, as expected, a forced precession of the mantle alone. If this was the case for +Mercury, taking C +m /C = 0. 431, the obliquity should be ˜ε + m ≈ 0. 88 arcmin, substantially smaller +than the observed obliquity of ˜ε +m ≈ 2 arcmin. +If ¯σ +f ≈ Ω + p (and thus λ +f → 0) and/or ¯σ + s ≈ Ω + p (and thus λ +s → 0) resonant amplifica- +tion leads to large amplitudes for ˜m +f , ˜n + s and the mantle obliquity ˜ε + m . The frequencies ¯σ + f and +¯σ +s are closely related to the FCN and FICN frequencies ω + f cn and ω + f icn , respectively. Hence, +just as a large mantle obliquity can result from resonant amplification when the forcing frequency +approaches the free precession frequency, a large mantle obliquity can likewise result from res- +onant amplification when the forcing frequency approaches the FCN or FICN frequencies. These +frequencies depend on the interior density structure and are not known. However, we will show +that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of +a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not ex- +pect an important amplification effect. Furthermore, since ω +f cn , ω +f icn Ω + p , then ¯σ + f Ω + p +and ¯σ +s Ω + p , and we are in the strong coupling limit. The mantle obliquity should be close +to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜m +f and +˜n + s should be of the order of ˜ε + m or smaller. This further justifies the assumption of small an- +gles that we have adopted. +3 Results +3.1 Geodetic constraints and interior density structure +All our interior models are constrained to match the mass M of Mercury and specific choices +of ˆ +C = C/M R 2 + and C + m /C . The choice of ˆ +C is determined from Equation (28). For the pa- +rameters listed in Table 1, and an observed obliquity of ε + m = 2. 04 arcmin [Margot et al. , 2012], +this gives ˆ +C = C/M R 2 + = 0. 3455 and all our interior models are consistent with this choice. +Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are +–15– +Confidential manuscript submitted to JGR-Planets +perfectly aligned with the mantle, which is not strictly correct. Hence, we make an error in es- +timating ˆ +C from Equation (28), or conversely in predicting ε + m based on a given choice for ˆ +C . +Part of the ob jective of our study is to estimate how large this error is. The ratio C +m /C is ob- +tained from the amplitude of the 88-day longitudinal mantle libration φ + o , which is given by +φ + o = 6 · f (e + c )C + 22 M R 2 +C C +C + m 1 +1 + ζ , (36) +where + f (e + c ) = 1 − 11 e 2 +c + 959 +48 e 4 +c , (37) +and where ζ is a correction that takes into account the entrainment of the inner core in the li- +bration [ Van Hoolst et al., 2012; Dumberry et al. , 2013; Dumberry and Rivoldini , 2015]; this cor- +rection is small and, to simplify, we neglect it here. Taking the observed libration amplitude +to be 38.5 arcsec [ Margot et al. , 2012], ˆ +C = C/M R 2 + = 0.3455 and C + 22 and e + c from Table 1, +this corresponds to a ratio C +m /C = 0. 4269, or equivalently ˆ +C +m = C + m /M R 2 + = 0. 1475. +For all results presented in our study, the crustal density is set at ρ + c = 2974 kg m −3 + [Sori , +2018]. Our standard choice for the crustal thickness is h = 26 km [ Sori , 2018], although in +section 3.2 we also present some results with other choices of h. We have considered two pos- +sible prescriptions connected to the density of the inner core. First, for all the results presented +in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρ +s = 8800 kg m − 3 + ap- +proximately that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure +Fe composition in face-centered cubic phase. This captures an end-member scenario where the +core composition is an Fe-S alloy; at Mercury’s core conditions, crystallization of Fe is relatively +free of S on the Fe-rich side of the eutectic [ Li et al., 2001]. If the core composition is instead +an Fe-Si alloy, approximately equal partitioning of Si between the liquid and solid phase [e.g. +Schaefer et al., 2017] implies a weak chemical contrast at the ICB. The density jump across the +ICB is expected to be small, although since density increases with depth, the contrast between +the mean densities of the fluid and solid cores is larger. It is these mean densities that enter +our Mercury model with uniform density layers. To capture this other end-member core com- +position scenario, in section 3.5 we present results where we instead prescribe a fixed density +contrast between the fluid and solid core; specifically, we set the numerical value of α +3 . +For a given choice of inner core radius r +s , the densities of the mantle ( ρ + m ) and fluid core +( ρ + f ) and the radius of the CMB ( r +f ) are determined such that the interior model matches M , +ˆ +C = 0. 3455 and ˆ +C +m = 0.1475. Figure 3a shows how ρ + m , ρ + f and r +f vary as a function of in- +ner core radius r +s for each of the two inner core density scenarios: a fixed ρ +s , or a fixed α +3 . When +the inner core is small, its presence has a limited influence on the resulting density structure, +and we find ρ + m = 3197 kg m− 3 + , ρ + f = 7263 kg m−3 + and r +f = 2000 km in each of the two +scenarios. When ρ + s is fixed to 8800 kg m− 3 + , as the inner core reaches 1500 km in size, r +f in- +creases to above 2100 km, ρ + m approaches 4000 kg m −3 + and ρ + f is reduced to below 5000 kg m− 3 + . +Figure 3a illustrates that when adopting a fixed ρ + s , there is a limit in the possible inner core +size, as otherwise ρ +m gets unreasonably large and ρ + f gets inappropriately small (as it would +require an excessively large concentration of light elements). When adopting instead a fixed den- +sity contrast, with α +3 = 0. 1, the changes in r +f , ρ + m and ρ + f with inner core radius are more mod- +est, allowing larger possible inner core sizes. Different assumptions on ρ + c and h would alter the +numerical values shown on Figure 3a but not their trends with r +s . +Figure 3b shows how the FCN and FICN periods vary with r +s for each of the two inner +core density scenarios and in the absence of viscous and EM coupling (i.e. K + cmb = K +icb = +–16– +Confidential manuscript submitted to JGR-Planets +Figure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand +side scale) and b) FICN (blue) and FCN (red) periods as a function of inner core radius. The FCN +period when the external torque is set to zero (FCN + int ) is shown in orange. Solid lines correspond to +a scenario where the density of the inner core is set to 8800 kg m −3 + ; thin dashed lines correspond to a +scenario where the density contrast between the fluid and solid cores is set to α +3 = 0. 1. +0). Both of these free modes are retrograde. The FCN period is close to 400 yr for a small in- +ner core, increasing to approximately 600 yr at the largest r +s . The FICN period is shorter, close +to 100 yr (160 yr) for a small inner core and decreasing to approximately 40 yr (120 yr) at the +largest r +s under the fixed ρ + s (fixed α +3 ) scenario. This confirms that the FCN and FICN peri- +ods are both much shorter than the forcing precession period of 325 kyr and sufficiently far away +from it that we do not expect large ˜m + f and ˜n + s from resonant amplification. +The FCN and FICN periods that we have computed include the influence of the exter- +nal torque. As shown by Baland et al. [2019], the external torque allow solid regions to have +a free motion in inertial space thereby affecting the free rotational modes. To a good approx- +imation, the FCN and FICN frequencies (as seen in an inertial frame) for K +cmb = K +icb = 0 +are given by + ω +f cn ≈ −Ω + o + ¯ +A +¯ +A + m + ¯ +A + s + + e + f + φ + m + + Ω + o e + f φ + m +( e + f + φ + m ) , (38a) +ω + f icn ≈ Ω + o + ¯ +A + ¯ +A + s +¯ +A − ¯ +A + s + + e + s α +1 − e +s α +3 α +g − α +3 φ + s + . (38b) +The expression of the FICN frequency involves the inertial torque (term e + s α +1 ) and the grav- +itational torque from the rest of Mercury ( e + s α +3 α +g ) and the Sun ( α +3 φ + s ) acting on the inner core. +For both of our inner core density scenarios (and our choices of ρ +s = 8800 kg m −3 + and α +3 = +0 .1), the internal gravitational torque dominates that from the Sun. Furthermore, α +3 α +g α +1 ; +the gravitational torque dominates the inertial torque, in large part because of the slow rota- +tion rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion +is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek , 2016; Stys and +Dumberry , 2018], but it is different for Earth, where α + 1 > α +3 α +g because of its faster rotation +and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approximate expres- +–17– +Confidential manuscript submitted to JGR-Planets +sion for the FICN differs by a factor ( ¯ +A + ¯ +A + s ) /( ¯ +A − ¯ +A + s ) compared to that given in Dumberry +and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon. +The expression for FCN frequency differs from the usual expression for Earth. First, it +involves the external torque from the Sun captured by the parameter φ + m . If we set φ + m = 0, +we obtain the FCN frequency for a decoupled model in which only interior torques contribute, +ω +f cn,int ≈ −Ω + o + ¯ +A +¯ +A + m + ¯ +A + s + e + f . (38c) +This frequency is slightly different from the usual expression for Earth, involving the ratio ¯ +A/( ¯ +A + m + +¯ +A + s ) rather than ¯ +A/ ¯ +A + m . This is because of the relatively thin mantle of Mercury; for the largest +r +s considered, the moment of inertia of the inner core can get close to 40% of that of the man- +tle and is not negligible. The period of the FCN when only interior torques contribute is shown +in Figure 3b. It is close to 1100 yr for a small inner core, increasing to approximately 1500 yr +at the largest r +s . Hence, the influence of the solar torque reduces the FCN period by a factor +of approximately 3. We note that the FICN period, in contrast, is not altered substantially when +the external torque is set to zero. +3.2 Gravitational and inertial coupling +Let us now investigate the obliquities of the mantle, fluid core and inner core in their equi- +librium Cassini state. We assume a fixed inner core density scenario in this section, with ρ + s = +8800 kg m− 3 + . Viscous and EM coupling are set to zero in order to isolate the influence of grav- +itational and inertial coupling. Figure 4 shows how ˜ε + m , ˜m +f and ˜n + s vary as functions of inner +core radius. We show calculations for three different choices of crustal thickness, but let us con- +centrate first on the case for h = 26 km. For small r +s , we retrieve an obliquity of ˜ε + m = 2. 0494 +arcmin (Figure 4a). ˜ε + m decreases with r +s , but not substantially; at the largest r +s (1500 km), +˜ε + m = 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜ε + m = 2.04 +arcmin, the obliquity that we used in setting the constraint for ˆ +C – and hence the prediction +we should recover for a rigid planet – is an overestimate of approximately 0 . 01 arcmin which +occurs for small inner cores. +The deviation of ˜ε + m from that of a rigid planet is due to the misalignments of the fluid +core ( ˜m +f ) and solid inner core ( ˜n + s ) with respect to the mantle (Figure 4b). The misalignment +of the fluid core spin axis from the mantle is significant: ˜m + f is approximately 4.02 arcmin for +a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin +at the largest r +s . Recall that ˜m +f is measured with respect to the mantle rotation axis (which +coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with +respect to the orbit normal is ˜ε +m + ˜m +f ≈ 6 arcmin. The reason why the obliquity of the spin +axis of the fluid core is larger than that of the mantle can be understood from Equation (32b), +which shows that ˜m +f is determined by the resonant amplification of the FCN mode at the forc- +ing frequency. When the FCN frequency is much larger than the forcing frequency, as is the +case for Mercury, the resonant amplification is very weak but remains present and ˜m +f is larger +than zero. +In contrast to ˜m + f , the misalignment of the inner core with respect to the mantle is much +smaller; ˜n + s is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜ε + m . +Physically, this is because the gravitational torque acting on the inner core when it is tilted from +the mantle is much stronger than the inertial torque acting at the ICB. As a result, the inner +core must remain in close alignment with the mantle. Presented differently, since the FICN pe- +riod is more than 3000 times shorter than the forced precession period, the inner core can eas- +–18– +Confidential manuscript submitted to JGR-Planets +Figure 4. a) Obliquity of the mantle ( ˜ε + m , solid lines) and of the principal moment of inertia ( ˜ε + g , +dashed line) b) ˜m +f (solid lines) and ˜n +s (dashed lines, x100) as a function of inner core radius and for +different choices of crustal thickness. +ily follow the forced precession of the mantle and remains gravitationally locked to it. ˜n + s does +not change substantially as the inner core increases in size. +When K +icb = K +cmb = 0, a good approximation of ˜ε + m is given by +˜ε + m = C + Ω + p sin I +− C + Ω + p cos I + ¯ +A Ω + o φ + m , (39) +which is identical to the prediction of Equation (26) for a rigid Mercury, except C is replaced +by C + . The latter represents an effective moment of inertia that accounts for the coupling of +the core to the mantle, + C + = C + ¯ +A + c χ , (40) +where ¯ +A + c = ¯ +A + f + ¯ +A + s and +χ = Ω + p cos I +¯ +A + c + ¯ +A + f +( ¯σ + f − Ω + p cos I ) + ¯ +A + s +( ¯σ + s − Ω + p cos I ) + − ¯ +A + s +¯ +A + c Ω + o α +3 φ + s +( ¯σ + s − Ω + p cos I ) . (41) +The frequencies ¯σ +f and ¯σ + s are given in Equations (33d-33e) and closely approximate the FCN +and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then +how the core is entrained to precess with the mantle, with the coupling between the two ex- +pressed in terms of the resonant amplification of the FCN and FICN frequencies. In the limit +of ¯σ + f , ¯σ + s → 0, then χ = − 1, C + = C +m , the core is fully decoupled from the mantle and we +retrieve Equation (35). If instead ¯σ + f , ¯σ + s → ∞ , then χ = 0, C + = C and we retrieve the pre- +diction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ω + p , +as is the case here, resonant amplification is weak, χ is small and positive, C + > C and this +leads to a slightly larger ˜ε + m compared to a rigid planet. Because the inner core core is grav- +itationally locked to the mantle, deviations from a rigid planet are dominantly caused by the +misalignment of the fluid core. In Equation (41), ¯σ + s ¯σ +f , so to a good approximation +–19– +Confidential manuscript submitted to JGR-Planets +χ ≈ ¯ +A + f +¯ +A + c Ω + o cos I +( ¯σ +f − Ω + p cos I ) . (42) +For a small inner core, χ ≈ 7. 55 ×10 − 3 + . As the inner core grows, ¯ +A + f decreases, and the com- +bination ¯ +A + c χ also decreases. This implies that C + decreases with inner core size and, consequently, +˜ε + m also decreases with inner core size, as seen in Figure 4a, though it remains larger than the +prediction for a rigid planet. +The specific predictions of ˜ε + m , ˜m +f and ˜n + s on Figure 4 depend sensitively on the assumed +interior density model and on the dynamical ellipticities of the inner core (e + s ) and fluid core +(e + f ). Hence, it depends on the choices we have made for the inner core density ρ + s , the crustal +density ρ + c and its thickness h. Changing ρ + s , ρ +c and/or h requires a different combination of ρ + f , +ρ +m and r +f in order to match M , ˆ +C and ˆ +C +m . In turn, this leads to different ellipticities at in- +terior boundary in order to match J + 2 and C +22 , and thus different predictions for ˜ε + m , ˜m + f and +˜n + s . To illustrate this, we show on Figure 4 two additional predictions computed with crustal +thicknesses changed to h = 16 and 36 km. The change in ˜ε + m remains modest, ∼ 0. 025%, but +the changes in ˜m + f and ˜n + s are more substantial, ∼ 5% and ∼ 10%, respectively. +We also show on Figure 4a (only for h = 26 km) the obliquity of the principal moment +of inertia of the whole planet, which we denote by ˜ε + g . A difference between ˜ε +g and ˜ε + m occurs +if the inner core is misaligned with the mantle. As seen in the mantle frame, a tilted inner core +(with ˜n +s assumed small) leads to an off-diagonal component of the moment of inertia tensor +of ( C + s − ¯ +A + s ) α +3 ˜n + s = ¯ +A + s e + s α +3 ˜n + s . The angle by which the mantle frame must be rotated so that +the moment of inertia of the whole planet is purely diagonal is ( ¯ +A + s e + s α +3 ˜n + s ) /( ¯ +Ae ), and hence a +good approximation of ˜ε +g is + ˜ε + g = ˜ε + m + ¯ +A + s e + s +¯ +Ae α +3 ˜n + s . (43) +Since the inner core is gravitationally forced into a close alignment with the mantle, the dif- +ference between ˜ε +g and ˜ε + m remains very small. For the largest inner core radius that we have +considered, ˜ε + g differs from ˜ε +m only by approximately 0.001 arcmin. +3.3 Viscous coupling +We now investigate how viscous coupling at the CMB and ICB affects the equilibrium Cassini +state. Peale et al. [2014] present two different parameterizations of viscous coupling based on +the timescale of attenuation of the differential rotation between the fluid core and mantle. More +complete analytical solutions for the flow resulting from a differentially precessing shell have +been derived [e.g. Stewartson and Roberts , 1963; Busse , 1968; Rochester , 1976] and we exploit +these solutions here. The parametrization of the viscous coupling constants K +cmb and K + icb based +on them are given in Mathews and Guo [2005], +K +cmb = πρ +f r 4 +f +¯ +A + f + ν +2Ω + o + 0.195 − 1. 976 i + , (44a) +K +icb = πρ +f r 4 +s +¯ +A + s + ν +2Ω + o + 0. 195 − 1 .976 i + , (44b) +where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary inte- +rior is not well known but based on theoretical and experimental studies it is expected to be +of the order of 10 −6 + m 2 + s −1 + [e.g. Gans , 1972; de Wijs et al. , 1998; Alf`e et al., 2000; Rutter et al., +2002a,b]. + –20– +Confidential manuscript submitted to JGR-Planets +The above parameterizations are valid only under the assumption that the flow in the bound- +ary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds +number Re = r +f ∆ u +f /ν , associated with the differential velocity ∆ u +f = r +f Ω + o ˜m +f at the CMB. +For r +f = 2000 km, and taking ˜m +f = 4 arcmin ≈ 0. 001 rad from the results in the previous +section, we get ∆ u +f ∼ 2 mm/s and Re ∼ 6 × 109 + . Such a large Reynolds number indicates +that the viscous friction between the fluid core and mantle should induce turbulent flows, as +is the case for the Cassini state of the Moon [ Yoder , 1981; Wil liams et al. , 2001; C´ebron et al. , +2019]. For a boundary layer that involves turbulent flows, the viscous torque should be inde- +pendent of the fluid viscosity and proportional to the square of the differential velocity. The +coupling constant K +cmb should be in the form +K + cmb = f +cmb + + ˜m +f + + 0.195 − 1. 976 i + , (45) +where f + cmb is a numerical factor that depends among other things on surface roughness. In- +corporating a viscous coupling of this form in our rotational model is more challenging not only +because f + cmb is not known but also because the viscous torque is no longer linear in ˜m +f . One +strategy is to find solutions through an iterative process. The simpler alternative strategy that +we adopt is to use the laminar formulas of Equation (44) but with the understanding that ν +represents an effective turbulent viscosity. +To give an estimate of an appropriate turbulent value for ν , we turn to the Cassini state +of the Moon. A measure of the viscous dissipation at the CMB of the Moon has been obtained +by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR) +[ Wil liams et al. , 2001, 2014; Wil liams and Boggs , 2015]. Viscous dissipation is reported in terms +of a coupling parameter K and a recent estimate is K /C + L = (1.41 ± 0.34)× 10 −8 + day − 1 + [ Wil liams +and Boggs , 2015], where C + L is the lunar polar moment of inertia. The connection between K +and K + cmb is + + + I m [K + cmb ] + + = K +C +L C + L +C +f L 1 +Ω + L , (46) +where C + f L is the moment of inertia of the lunar core and Ω + L = 2. 66 × 10 − 6 + s −1 + the lunar +rotation rate. With C + f L /C + L ∼ 7 × 10− 4 + [e.g. Wil liams et al. , 2014], this gives |I m [K +cmb ]| ∼ +9 × 10− 5 + . In order to match this amplitude in Equation (44a), with lunar parameters and as- +suming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10− 4 + m 2 +s − 1 + , about 500 times larger than the laminar viscosity. Note that the differential velocity at the +CMB of the Moon is closer to 3 cm/s [ Yoder , 1981; Wil liams et al. , 2001], more than 10 times +larger than our estimate for Mercury above. Since the effective turbulent coupling constant K +cmb +is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mer- +cury should be smaller. Thus, ν ≈ 5 × 10− 4 + m 2 + s− 1 + gives a conservative upper bound for the +possible effective turbulent viscosity that can be expected for Mercury. +Figure 5 shows how ˜ε + m , ˜m +f and ˜n + s vary as functions of inner core radius for different choices +of effective viscosities. For ν = 10 − 5 + m 2 + s − 1 + , viscous coupling is too weak to affect ˜ε +m and +˜m + f and they are essentially unchanged from the solutions shown in Figure 4. With increasing +ν , the stronger viscous coupling between the core and the mantle reduces their differential ve- +locity, and ˜m +f is reduced. With the reduced differential velocity at the CMB, the prediction +of ˜ε +m gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB +viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜ε + m +and ˜m + f are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the +fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent vis- +cosity that we have identified above (i.e ν ≈ 5 × 10 − 4 + m 2 + s − 1 + ), the influence of viscous cou- +–21– +Confidential manuscript submitted to JGR-Planets +Figure 5. a) Obliquity of the mantle ( ˜ε + m , solid lines) and gravity field ( ˜ε + g , dashed lines) b) ˜m +f +(solid lines) and ˜n +s (dashed lines) as a function of inner core radius and for different choices of kinematic +viscosity (color in legend). +pling on ˜ε + m remains modest, reducing its amplitude by a maximum of approximately 0.0015 +arcmin. +The inclusion of viscous coupling at the ICB can lead to a substantial change in inner core +tilt. A larger viscosity leads to stronger viscous coupling and to a closer alignment of the in- +ner core with the fluid core spin axis. The viscous coupling strength is inversely proportional +to r +s , so a larger viscosity results in a larger inner core radius at which viscous coupling is of +a similar magnitude to gravitational coupling. Taking again an upper bound of ν = 5× 10− 4 +m 2 + s −1 + , Figure 5 indicates that ˜n + s may be 1 arcmin or larger only if the inner core radius is +smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravi- +tational coupling is much larger than viscous coupling, and the inner core tilt is limited to a +fraction of 1 arcmin. +The larger inner core tilt observed with increasing effective viscosity results in a larger +offset between the obliquity of the principal moment of inertia ˜ε + g and that of the mantle ˜ε + m , +though it remains limited. For the upper bound of ν = 5 × 10 − 4 + m 2 + s − 1 + , and for r +s = 1500 +km, the difference between ˜ε +g and ˜ε + m is limited to 0.0013 arcmin. +The conclusion that emerges from Figure 5 is that the larger the inner core is, the smaller +the misalignments of both the fluid core and inner core are with respect to the mantle. This +implies that the larger the inner core is, the more we approach a planet precessing as a rigid +body, although the misalignment of the spin axis of the fluid core remains important, approx- +imately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜ε + m , ˜m + f +and ˜n + s change with inner core size would certainly be different for a turbulent model of viscous +coupling. But the general conclusion remains that the addition of viscous coupling at the CMB +and ICB does not significantly modify the Cassini state equilibrium angle of the mantle. +–22– +Confidential manuscript submitted to JGR-Planets +3.4 Electromagnetic coupling +Let us now turn to electromagnetic (EM) coupling. To focus on its role in the equilibrium +Cassini state, we set the viscous coupling back to zero. Because magnetic field lines tend to re- +main attached to electrically conducting materials, a differential tangential motion between two +electrically conducting regions stretches existing magnetic field lines that thread their interface. +This induces a secondary magnetic field (or equivalently, an electrical current) and an associ- +ated tangential EM stress resisting the differential motion. EM coupling at the CMB and ICB +acts then in a similar way to viscous coupling, and this ’magnetic friction’ depends on the strength +of the radial magnetic field B +r and the electrical conductivity σ on either side of the bound- +ary [ Rochester , 1960, 1962, 1968]. +The parametrization of EM coupling in terms of the coupling constants K +cmb and K +icb +has been developed in a few studies [e.g. Buffett , 1992; Buffett et al., 2002; Dumberry and Koot , +2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given +by B + r = √ + 3 + B d +r + cos θ , where +B d +r + is the r.m.s. strength of the field, the coupling constant +K + cmb can be written is the form + K +cmb = 3(1 − i)F + cmb + B d +r +2 + , (47) +where + F +cmb = 1 +Ω + o ρ + f r +f + 1 +σ + m δ + m + 1 +σ + f δ + f + −1 + , (48) +and where σ + m , δ + m = + 2/ (σ + m µΩ + o ) and σ +f , δ + f = + 2/( σ +f µΩ + o ) are the electrical conductivi- +ties and magnetic skin depths in the mantle and fluid core, respectively, with µ = 4π × 10 − 7 +N A − 2 + the magnetic permeability of free space. The r.m.s. field strength + B d +r + is connected to +the Gauss coefficient g 0 +1 of the surface magnetic field by + + B d +r + = 2 +√ + 3 + R +r +f + 3 + + + g 0 +1 + + . (49) +We can readily build an estimate of the amplitude of K +cmb . The electrical conductivity +of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding +to the CMB of Mercury is in the range of σ + m ∼ 0 .01 − 1 S m−1 + [Constable , 2015]. In con- +trast, the electrical conductivity of Fe in planetary cores is expected to be close σ + f ∼ 10 6 + S +m −1 + [Pozzo et al., 2012; de Koker et al. , 2012]. This implies that (σ + m δ + m )− 1 + (σ + f δ +f )− 1 + . Tak- +ing σ + m = 1 S m −1 + , + + g 0 +1 + + = 190 nT for Mercury’s dipole field [ Anderson et al., 2012], r +f = +2000 km, ρ + f = 7000 kg m − 3 + , this gives K +cmb ≈ (3 .1 × 10 − 11 + ) · (1 − i). To put this amplitude +in perspective, taking a molecular viscosity of ν = 10−6 + m 2 + s − 1 + in Equation (44a) gives a vis- +cous coupling constant of K +cmb ≈ (6 .0 × 10 − 7 + ) · (0 .195 − 1. 976 i). Hence, EM coupling at the +CMB is much weaker than viscous coupling, even if we include other spherical harmonic com- +ponents of the radial magnetic field. +EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by +CMB cavities [Buffett , 2010; Glane and Buffett , 2018], in which case the effective σ + m could be +closer to σ + f . Likewise, σ +m can be increased if a more electrically conducting layer has formed +at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction +of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even +in the extreme case of σ + m = σ + f = 10 6 + S m − 1 + , K +cmb ≈ (1. 6 × 10 − 8 + ) · (1 − i ), which remains +–23– +Confidential manuscript submitted to JGR-Planets +smaller by a factor ∼ 60 than the smallest possible viscous coupling constant. Viscous forces +dominate the tangential stress on the CMB of Mercury. +At the ICB, because we can expect the electrical conductivity in both the solid inner core +and fluid core to be similar, and because the radial magnetic field is likely much stronger, EM +coupling can be much larger and dominate viscous coupling. We assume that the magnetic field +morphology at the ICB is dominantly comprised of small spatial scales for example as predicted +by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in +terms of an equivalent uniform radial magnetic field B +r capturing its r.m.s. strength [ Buf- +fett et al., 2002; Dumberry and Koot , 2012]. Assuming an electrical conductivity σ equal in the +fluid and solid core, the coupling constant K +icb can be written in the form +K +icb = 5 +4 (1 − i )F + icb B + r 2 + , (50) +where + F + icb = σδ +Ω + o ρ + s r +s , (51) +and where δ = + 2/ (σµΩ + o ) is the magnetic skin depth. As F +icb is inversely proportional to +r +s , K +icb is inversely proportional to inner core size. Note that computing the EM coupling based +on the r.m.s. strength B + r rather than a true field morphology tends to overestimate the strength +of the coupling [Koot and Dumberry , 2013]. However, since the strength of the radial magnetic +field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are +absorbed in the range of possible B + r values. +The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al., +2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected. +When B + r is sufficiently large, this is no longer the case. EM coupling then enters a ’strong +field’ regime [Buffett et al., 2002; Dumberry and Koot , 2012; Koot and Dumberry , 2013] in which +K + icb increases linearly with B + r instead of quadratically. A good approximation of K + icb cal- +culated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012], +K E +icb = (0.175 − i0. 138) B + r , (52) +where B + r is in units of Tesla. The superscript E emphasizes that the numerical factors are +appropriate for the parameter values adopted for Earth in the computation of Dumberry and +Koot [2012]. To adapt these numerical factors to Mercury, we write, +K + icb = (0.175 − i0. 138) F + icb +F E +icb B + r , (53) +where F E +icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumb- +erry and Koot [2012]. These are Ω + o = 7. 292 × 10− 5 + s − 1 + , ρ + s = 12846 kg m− 3 + , r +s = 1221. 5 +km, σ = 5 × 10 5 + S m − 1 + , which gives F E +icb = 90.36 T − 2 + . +To compute F + icb , we assume an electrical conductivity of σ = 106 + S m − 1 + in the core of +Mercury [e.g. de Koker et al. , 2012; Deng et al., 2013]. The transition between the weak and +strong field regime occurs when B + r ≈ 1. 53 mT for the real part of K + icb . B + r at the ICB +of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geom- +etry inside the core could be dominated by small length scales, yet only the weaker lower har- +monics of the field would penetrate through a thermally stratified layer in the upper region of +–24– +Confidential manuscript submitted to JGR-Planets +the fluid core and reach the surface. If so, the field strength inside the core can exceed the sur- +face field strength by a factor 1000. Taking a surface field strength equal to ∼ 300 nT [e.g An- +derson et al., 2012], B + r at the ICB could be as large as 0.3 mT, corresponding to approxi- +mately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mer- +cury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of +Mercury remains in the weak field regime. +Figure 6 shows how ˜ε + m , ˜m +f and ˜n + s vary as functions of inner core radius for different choices +of B + r . The larger B + r is, the stronger is the EM coupling at the ICB, and the smaller is the +differential rotation between the fluid core and inner core. The inner core and fluid core are vir- +tually locked into a common precession motion when B +r > 0. 3 mT. Further increasing B + r +above 1 mT does not change the solution as EM coupling already dominates all other torques +on the inner core. This is the case even when EM coupling transitions into the strong field regime. +EM coupling at the CMB is included in these calculations, with σ + m = 1 S m−1 + and + + g 0 +1 + + = +190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core +we retrieved the solutions of ˜ε + m and ˜m + f shown in Figure 4. +As the inner core radius is increased, both ˜ε + m and ˜m +f get smaller, as it was the case with +viscous coupling alone, although the addition of EM coupling lead to more substantial changes. +The inner core needs to be larger than approximately 500 km for changes in the Cassini state +equilibrium to be noticeable. It is important to point out that ˜m +f is reduced not because of +EM coupling at the CMB, but rather from the combination of EM coupling at the ICB, which +pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the +inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the +greater is the reduction in ˜ε + m and ˜m + f . +When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are +locked into a common precession motion, a good approximation of ˜ε + m is given by the same pre- +diction as Equations (39-40) involving the effective moment of inertia C + , except χ is now given +by + χ = ¯ +A + c Ω + p cos I − ¯ +A + s Ω + o α +3 φ + s +¯ +A + f Ω + o ( e + f + K +cmb ) + ¯ +A + s Ω + o e + s α +3 α +g − ¯ +A + c Ω + p cos I . (54) +For a small inner core, ¯ +A + c Ω + p cos I > ¯ +A + s Ω + o α +3 φ + s and χ is positive. Because ¯ +A + s Ω + o α +3 φ + s increases +with inner core size, χ gets smaller, and so do C + and ˜ε +m . The mantle obliquity drops from 2.049 +arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015 +arcmin. For an inner core larger than ≈ 1000 km, ¯ +A + c Ω + p cos I < ¯ +A + s Ω + o α +3 φ + s , so χ becomes neg- +ative, C + becomes smaller than the moment of inertia of a rigid Mercury C , and ˜ε + m becomes +smaller than the prediction based on a rigid planet. +The larger the inner core is, the smaller are the misalignments of the fluid and solid cores +with respect to the mantle. Hence, the general conclusion we reached for viscous coupling alone +is not altered with the addition of EM coupling but further strengthened; the larger the inner +core is, the closer we approach a planet precessing as a rigid body. This is best revealed by the +obliquity of the gravity field ˜ε + g which, for a large inner core, asymptotically approaches the obliq- +uity expected for a rigid planet. Note that with strong EM coupling at the ICB, the offset be- +tween ˜ε + m and ˜ε + g can be as large as 0.008 arcmin for a large inner core. +3.5 Fixed inner core density versus fixed ICB density contrast +Coupling models when viscous and EM stresses are both present have been presented in +Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of our results, +–25– +Confidential manuscript submitted to JGR-Planets +Figure 6. a) Obliquity of the mantle ( ˜ε + m , solid lines) and gravity field ( ˜ε + g , dashed lines) b) ˜m +f +(solid lines) and ˜n +s (dashed lines) as a function of inner core radius and for different choices of B +r +(colour in legend). +for the Cassini state equilibrium of Mercury, the tangential stress at the CMB is dominated by +viscous forces, and that at the ICB should be dominated by EM forces. To simplify, we con- +sider a model where K + cmb is purely from viscous coupling and K +icb purely from EM coupling. +We choose an effective viscosity at the CMB of ν = 10 −4 + m 2 + s −1 + , which we believe to be a +representative value given the comparison with the Moon (see section 3.3). We take a radial +field strength at the ICB of B + r = 0.3 mT, approximately the field strength expected under +the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representa- +tive’ coupling model, although the uncertainty on ν and B +r obviously remains high. +Figure 7 shows how ˜ε + m , ˜m +f and ˜n + s vary with inner core radius for the ’representative’ +coupling model (black lines) under the fixed inner core density scenario that we have used in +sections 3.2, 3.3 and 3.4. Figure 7 also shows how the results change when, for the same rep- +resentative coupling model, we adopt instead a fixed density contrast between the fluid and solid +cores and for different choices of α +3 (coloured lines). For a relatively high density contrast (α + 3 = +0 .2), the results are qualitatively similar to the fixed inner core density scenario. For a smaller +α +3 , the point at which the orientation of the co-precessing fluid and inner cores begins to be +pulled into an alignment with the mantle is pushed to a larger inner core radius. However, the +general behaviour of ˜ε +m , ˜m + f and ˜n + s as functions of inner core radius is unchanged. Hence, all +our results in the previous three sections would be qualitatively similar under a fixed density +contrast scenario. A smaller density contrast at the ICB only implies that a larger inner core +is required in order to produce an equivalent change in the Cassini state equilibrium. +4 Discussion +The study of Peale et al. [2016] also presented predictions of the obliquities of the man- +tle, fluid core and inner core associated with the equilibrium Cassini state of Mercury. Their +model included the tangential viscous stress at the ICB and CMB, but not the EM stress. Their +Table 1 gives the obliquities of the mantle, fluid core and inner core, denoted respectively as +–26– +Confidential manuscript submitted to JGR-Planets +Figure 7. a) Obliquity of the mantle ( ˜ε + m , solid lines) and gravity field ( ˜ε + g , dashed lines) b) ˜m +f +(solid lines) and ˜n +s (dashed lines) as a function of inner core radius, for a fixed inner core density of +8800 kg m −3 + (black lines) and for different choices of α +3 (coloured lines). +i +m , i +f and i +s ; these represent the obliquities with respect to the orbital plane and are connected +to our variables by: i +m = ˜ε + m , i +f = ˜ε + m + ˜m + ˜m + f ≈ ˜ε + m + ˜m +f and i +s = ˜ε +m + ˜n + s . To summarize +their results, i +f and i +s vary substantially for different inner core sizes, are always of compara- +ble amplitude, and i +s is always larger than i +f . Furthermore, they find that as the inner core +size is increased, the mantle obliquity i +m gets progressively larger and is displaced further away +from its expected orientation based of a rigid planet (see their Figure 6). The change in i +m they +obtain between a case with no inner core and an inner core radius equal to 0.6 times the plan- +etary radius (≈ 1463 km, close to the maximum inner core size of 1500 km we have considered), +is approximately an increase of 5 × 10 −5 + rad = 0.17 arcmin. This also corresponds approxi- +mately to the deviation of the obliquity with respect to that of a rigid planet. +When only viscous stress is included in our model (section 3.3), our results are substan- +tially different. As illustrated in Figure 4, we find instead that the obliquity of the fluid core +gets smaller with inner core size and that the change is very modest. In contrast with the re- +sults of Peale et al. [2016], we find that the inner core obliquity is typically smaller than that +of the fluid core, except when the inner core is very small or when the effective viscosity is un- +reasonably large. We also find that as the inner core size is increased, the mantle obliquity gets +smaller, opposite to the results of Peale et al. [2016], and that the changes remain small, at most +of the order of 0.005 arcmin. A part of the difference is due to the different viscous coupling +model that we use. But even when we adopt their model parameters and use their viscosity model, +we were not able to reproduce their results. +In the absence of viscous and EM coupling, the strong gravitational torque exerted on the +inner core by the mantle should prevent any large misalignment between the two. This is cap- +tured by the period of the FICN, which is of the order of 100 yr, much shorter than the forc- +ing period of 325 kyr. Viscous and/or EM coupling at the ICB can counteract the gravitational +torque (and alter the period of the FICN), but only for a small inner core. The ratio of the viscous- +EM torque to the gravitational torque decreases with inner core size, so a large inner core should +be more strongly aligned with the mantle. The more strongly the inner core and mantle are +–27– +Confidential manuscript submitted to JGR-Planets +gravitationally locked together, the more they behave as a single rigid body in response to the +external torque from the Sun. We expect then that the obliquity of the mantle should be brought +closer to that of a rigid planet when the inner core is larger. Hence, we find puzzling the re- +sults of Peale et al. [2016], which suggest the opposite. +We showed that EM coupling is most likely larger than viscous coupling at the ICB, even +though our knowledge of the radial magnetic field strength inside Mercury (on which EM cou- +pling depends) remains poor. If the magnetic field strength at the ICB is above 0.3 mT, EM +coupling is sufficiently strong to bring the fluid and solid cores into a locked procession motion. +The larger the inner core is, the more this co-precessing core is forced into an alignment with +the mantle because of the mantle gravitational torque on the inner core. As a result, the larger +the inner core is, the closer we approach a situation resembling a whole planet precessing as +a rigid body. The addition of EM coupling at the ICB does not change the overall picture that +we observe with viscous coupling alone; the mantle obliquity decreases with inner core size. The +amplitude of the decrease can be as large as 0.015 arcmin, 3 times larger than for viscous cou- +pling alone; this remains a factor 10 smaller than the changes suggested in Peale et al. [2016], +and again, importantly, in the reverse direction. +Our results suggest then that the presence and size of an inner core leads to only mod- +est changes of the mantle obliquity ε + m compared to the obliquity predicted on the basis of an +entirely rigid planet ( ε r +m ). Let us denote this difference as ∆ε + m = ε + m −ε r +m . The largest ∆ ε + m +occurs for a small or no inner core, and is ∆ ε +m ≈ 0.01 arcmin. This difference is decreased +as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM +coupling and large density contrast at the ICB, ∆ ε +m can be negative, but its absolute value +remains smaller than 0.01 arcmin. +To put these results in perspective, the uncertainty in the measurement of the mantle obliq- +uity reported by Margot et al. [2012] and Stark et al. [2015a] is of the order of 0.08 arcmin, much +larger than this difference. This means that, at the current level of precision, it is not possi- +ble to distinguish the position of the mantle obliquity from the obliquity of a rigid planet. This +is consistent with the fact that the observed obliquity falls close to that expected from a rigid +planet. But it also implies that the observed obliquity cannot be used to place constraints on +the inner core size. +Nevertheless, our results show that the presence of a fluid core and inner core affect the +resulting mantle obliquity by as much as 0.01 arcmin. This is of the same order as the change +in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec ( ≈ 0 .006 +arcmin) [ Baland et al. , 2017]. This is also of the same order as the amplitude of the nutation +motion about the mean equilibrium Cassini state forced by the precession of the pericenter, which +is approximately 0.85 arcsec (≈ 0.014 arcmin) [Baland et al. , 2017]. The precision on the obliq- +uity from the upcoming BepiColombo satellite mission is expected to be ≤ 0 .5 arcsec (≤ 0 .008 +arcmin) [ Cical`o et al. , 2016]. Thus, in addition to including tidal deformation and the preces- +sion of the pericenter, a Cassini state model that includes a fluid and solid core will then be +necessary in order to properly tie Mercury’s obliquity to its interior structure. In turn, this opens +the possibility of further constraining the interior structure of Mercury on the basis of its obliq- +uity. + Obliquity measurements based on tracking topographic features reflect the orientation of +the spin-symmetry axis of the mantle ( ε + m ). Measurements based on tracking the gravity field +of Mercury reflect instead the orientation of the principal moment of the whole planet (ε + g ). These +two orientations do not coincide when an inner core is present and is misaligned from the man- +tle. Since gravitational coupling prevents a large inner core tilt with respect to the mantle, we +–28– +Confidential manuscript submitted to JGR-Planets +find that the misalignment ∆ ε +g = ε + g − ε + m is limited. The maximum offset that we obtain +is approximately ∆ε + g ≈ 0 .007 arcmin. This limited magnitude of offset is important in the +light of the recent obliquity of the gravity field estimated in Genova et al. [2019], ε + g = 1.968 ± +0 .027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the +spin-symmetry axis of the mantle: ε +m = 2 . 04 ± 0.08 arcmin [Margot et al. , 2012] and ε + m = +2 .029 ± 0. 085 arcmin [Stark et al., 2015a], although all three measurements remain consistent +with one another within their error estimates. In their interpretation, Genova et al. [2019] sug- +gest that the different central value of the obliquity that they obtain (smaller by ∼ 0 .07 ar- +cmin) is perhaps explained by an offset ∆ ε + g due to the presence of a (possibly large) solid in- +ner core. However, this is one order of magnitude larger than the maximum magnitude of ∆ε +g +that we predict. Moreover, we predict that the obliquity of the gravity field should be larger +than that of the mantle spin axis, not smaller. Hence, at the present-day level of the precision +of the measurements, ε + g and ε + m should coincide, and their difference cannot be interpreted as +reflecting the misalignment between the polar moment of inertia of the whole planet and the +mantle spin axis. +Lastly, we have concentrated our efforts on the mutual orientations of the different spin +and symmetry axes in the Cassini plane. Dissipation at the CMB and ICB introduced by vis- +cous and EM coupling also lead to a displacement of these axes in the direction perpendicu- +lar to the Cassini plane [e.g Peale et al. , 2014]. Indeed, the two measurements based on track- +ing surface topographic features from Margot et al. [2012] and Stark et al. [2015a] suggest that +the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼ 0. 03 arcmin). +Although this offset is smaller than the measurement errors, so that the observed obliquity is +still consistent with no deviation away from the Cassini plane, some amount of dissipation in- +variably takes place. These measurements give then a measure of the possible amplitude of the +dissipation. One source of dissipation is from anelastic tidal deformation [ Baland et al., 2017], +but viscous and EM coupling at the boundaries of the fluid core is another. Hence, the out-of- +plane component of the observed obliquity may further help to quantify and constrain the in- +terior coupling mechanisms. This will be the sub ject of a future study. +5 Conclusion +We have investigated how the presence of a fluid core and solid inner core affects the Cassini +state equilibrium of Mercury. Our general conclusion is that the coupling strength between Mer- +cury’s interior regions is sufficiently strong that the obliquity of the mantle spin-symmetry axis +does not deviate from that of a rigid planet by more than 0.01 arcmin. This largest offset oc- +curs for a small or no inner core. The larger the inner core is, the more it is forced into an align- +ment with the mantle because of the strong gravitational torque between the two, and the closer +we approach a situation resembling a whole planet precessing as a rigid body. The misalign- +ment between the polar moment of inertia and mantle spin axis increases with inner core size, +but is limited to approximately 0.007 arcmin. These conclusions apply irrespective of the core +composition and thus of the partitioning of light elements into the solid core; a smaller den- +sity contrast at the ICB only implies that a larger inner core is required in order to produce +an equivalent change in the Cassini state equilibrium. +Our results imply that the obliquities of the mantle spin axis and polar moment of iner- +tia (or, equivalently, the gravity field) should coincide at the present-day level of measurement +errors. Moreover, neither of these can be distinguished from the obliquity predicted on the ba- +sis of a rigid planet. However, the smaller measurement errors expected from the upcoming Bepi- +Columbo satellite mission may permit this distinction, and thus provide further constraints on +Mercury’s interior structure. + –29– +Confidential manuscript submitted to JGR-Planets +Acknowledgments +Figures were created using the GMT software [ Wessel et al. , 2013]. The source codes, GMT +scripts and data files to reproduce all figures are freely accessible in Dumberry [2020]. This work +was supported by an NSERC/CRSNG Discovery Grant. +References +Alf`e, D., G. Kresse, and M. Gillan (2000), Structure and dynamics of liquid iron under core +conditions, Phys. Rev., B61, 132–142. +Anderson, B. J., C. L. Johnson, H. Korth, M. E. Purucker, R. M. Winslow, J. A. Slavin, +S. C. Solomon, R. L. McNutt, M. Raines, Jim, and T. H. Zurbuchen (2011), The global +magnetic field of Mercury from MESSENGER orbital observations, Science, 333 , 1859– +1862. +Anderson, B. J., C. L. Johnson, H. Korth, R. M. Winslow, J. E. Borovsky, M. E. Pu- +rucker, J. A. Slavin, S. C. Solomon, M. T. Zuber, and R. L. McNutt (2012), Low- +degree structure in mercury’s planetary magnetic field, J. Geophys. Res. , 117 , E00L12, +doi:10.1029/2012JE004159. +Baland, R.-M., A. Yseboodt, M. Rivoldini, and T. Van Hoolst (2017), Obliquity of Mer- +cury: Influence of the precession of the pericenter and of tides, Icarus, 291 , 136–159. +Baland, R.-M., A. Coyette, and T. Van Hoolst (2019), Coupling between the spin pre- +cession and polar motion of a synchronously rotating satellite: application to Titan, +Celestial Mechanics and Dynamical Astronomy, 131 (11), 1–50. +Buffett, B. A. (1992), Constraints on magnetic energy and mantle conductivity from the +forced nutations of the Earth, J. Geophys. Res. , 97 , 19,581–19,597. +Buffett, B. A. (2010), Chemical stratification at the top of earth’s core: Constraints from +observations of nutations, Earth Planet. Sci. Lett. , 296 , 367–372. +Buffett, B. A., P. M. Mathews, and T. A. Herring (2002), Modeling of nutation-precession: +effects of electromagnetic coupling, J. Geophys. Res. , 107 , doi:10.1029/2001JB000056. +Busse, F. H. (1968), Steady fluid flow in a precessing spheroidal shell, J. Fluid Mech. , 33 , +739–751. +Byrne, P. K., C. Klimczak, A. M. C. Seng¨or, S. C. Solomon, T. R. Watters, and S. A. +Hauck (2014), Mercury’s global contraction much greater than earlier estimates, Nature +Geosci., 7 , 301–307. +C´ebron, D., R. Laguerre, J. Noir, and N. Schaeffer (2019), Precessing spherical shells: +flows, dissipation, dynamo and the lunar core, Geophys. J. Int. , 219 (Supplement + 1), +S34–S57, doi:10.1093/gji/ggz037. +Christensen, U. R. (2006), A deep dynamo generating Mercury’s magnetic field, Nature, +444 , 1056–1058. +Cical`o, S., G. Schettino, S. Di Ruzza, E. M. Alessi, G. Tommei, and A. Milani (2016), The +BepiColombo MORE gravimetry and rotation experiments with the ORBIT14 software, +Month. N. Roy. Astr. Soc., 457 , 1507–1521. +Colombo, G. (1966), Cassini’s second and third laws, Astron. J., 71 , 891–896. +Constable, S. (2015), Geomagnetic induction studies, in Treatise on Geophysics, Second +Edition, vol. 5, edited by G. Schubert and M. Kono, chap. 7, pp. 219–254, Elsevier, Ox- +ford. +de Koker, N., G. Seinle-Neumann, and V. Vlˇcek (2012), Electrical resistivity and thermal +conductivity of liquid Fe alloys at high P and T, and heat flux in Earth’s core, Proc. +Nat. Acad. Sci. , 109 , 4070–4073. + –30– +Confidential manuscript submitted to JGR-Planets +de Wijs, G. A., G. Kresse, L. Voˇcadlo, D. Dobson, D. Alf´e, M. J. Gillan, and G. D. Price +(1998), The viscosity of liquid iron at the physical conditions of the Earth’s core, Nature, +392 , 805–807. +Dehant, V., and P. Mathews (2015), Earth rotation variations, in Treatise on Geophysics , +vol. 3, edited by G. Schubert, chap. 10, pp. 263–305, Elsevier, Oxford. +Deleplace, B., and P. Cardin (2006), Viscomagnetic torque at the core mantle boundary, +Geophys. J. Int. , 167 , 557–566. +Deng, L., C. Seagle, Y. Fei, and A. Shahar (2013), High pressure and temperature electrical +resistivity of iron and implications for planetary cores, Geophys. Res. Lett. , 40 , 33–37, +doi:10.1029/2012GL054347. +Dumberry, M. (2020), Replication Data for: The influence of a fluid core and a solid in- +ner core on the Cassini sate of Mercury, https://doi.org/10.7939/DVN/903HUV, UAL +Dataverse, V2. +Dumberry, M., and L. Koot (2012), A global model of electromagnetic coupling for nuta- +tions, Geophys. J. Int. , 191 , 530–544. +Dumberry, M., and A. Rivoldini (2015), Mercury’s inner core size and core-crystallization +regime, Icarus, 248 , 254–268. +Dumberry, M., and M. A. Wieczorek (2016), The forced precession of the Moon’s inner +core, J. Geophys. Res. Planets , 121 , 1264–1292. +Dumberry, M., A. Rivoldini, T. Van Hoolst, and M. Yseboodt (2013), The role of Mer- +cury’s core density structure on its longitudinal librations, Icarus, 225 , 62–74. +Gans, R. F. (1972), Viscosity of the Earth’s core, J. Geophys. Res. , 77 , 360–366. +Genova, A., S. Goossens, E. Mazarico, F. G. Lemoine, G. A. Neumann, W. Kuang, +T. J. Sabaka, S. A. Hauck II, D. E. Smith, S. C. Solomon, and M. T. Zuber (2019), +Geodetic evidence that Mercury has a solid inner core, Geophys. Res. Lett. , 46 , +doi:10.1029/2018GL081135. +Glane, S., and B. A. Buffett (2018), Enhanced core-mantle coupling due to stratification at +the top of the core, Frontiers in Earth Science , 6 , 171, doi:10.3389/feart.2018.00171. +Grott, M., D. Breuer, and M. Laneuville (2011), Thermo-chemical evolution and global +contraction of Mercury, Earth Planet. Sci. Lett. , 307 , 135–146. +Hauck, S. A., J.-L. Margot, S. C. Solomon, R. J. Phillips, C. L. Johnson, F. G. Lemoine, +E. Mazarico, T. J. McCoy, S. Padovan, S. J. Peale, M. E. Perry, D. E. Smith, and M. T. +Zuber (2013), The curious case of Mercury’s internal structure, J. Geophys. Res. , 118 , +doi:10.1002/jgre.20091. +Johnson, C. L., M. E. Purucker, H. Korth, B. J. Anderson, R. M. Winslow, M. M. H. +Al Asad, J. A. Slavin, I. I. Alexeev, R. J. Phillips, M. T. Zuber, and S. C. Solomon +(2012), MESSENGER observations of mercury’s magnetic field structure, J. Geophys. +Res., 117 , E00L14, doi:10.1029/2012JE004217. +Konopliv, A. S., R. S. Park, and A. I. Ermakov (2020), The Mercury gravity field, orien- +tation, love number, and ephemeris from the MESSENGER radiometric tracking data, +Icarus, 335 , 113,386. +Koot, L., and M. Dumberry (2013), The role of the magnetic field morphology on the +electromagnetic coupling for nutations, Geophys. J. Int. , 195 , 200–210. +Li, J., Y. Fei, H. Mao, K. Hirose, and S. Shieh (2001), Sulfur in Earth’s inner core, Earth +Planet. Sci. Lett. , 193 , 509–514. +Margot, J. L., S. J. Peale, R. F. Jurgens, M. A. Slade, and I. V. Holin (2007), Large longi- +tude libration of Mercury reveals a molten core, Science, 316 , 710–714. +Margot, J. L., S. J. Peale, S. C. Solomon, S. A. Hauck, F. D. Ghigo, R. F. Jurgens, +M. Yseboodt, J. D. Giorgini, S. Padovan, and D. B. Campbell (2012), Mercury’s +–31– +Confidential manuscript submitted to JGR-Planets +moment of inertia from spin and gravity data, J. Geophys. Res. , 117 , E00L09, +doi:10.1029/2012JE004161. +Margot, J. L., S. A. Hauck II, E. Mazarico, S. Padovan, and S. J. Peale (2018), Mercury’s +internal structure, in Mercury: The View after MESSENGER , edited by S. Solomon, +L. Nittler, and B. Anderson, pp. 85–113, Cambridge University Press, Cambridge, doi: +10.1017/9781316650684.005. +Mathews, P. M., and J. Guo (2005), Viscoelectromagnetic coupling in precession-nutation +theory, J. Geophys. Res. , 110 (B02402), doi:10.1029/2003JB002915. +Mathews, P. M., B. A. Buffett, T. A. Herring, and I. I. Shapiro (1991), Forced nutations of +the Earth: Influence of inner core dynamics. 1. theory, J. Geophys. Res. , 96 , 8219–8242. +Mathews, P. M., T. A. Herring, and B. A. Buffett (2002), Modeling of nutations and pre- +cession: New nutation series for nonrigid Earth and insights into the Earth’s interior, J. +Geophys. Res., 107 , doi:10.1029/2004JB000390. +Mazarico, E., A. Genova, S. Goossens, F. G. Lemoine, G. A. Neumann, M. T. Zuber, +D. E. Smith, and S. C. Solomon (2014), The gravity field, orientation, and ephemeris of +Mercury from MESSENGER observations after three years in orbit, J. Geophys. Res. +Planets, 119 , 2417–2436. +Organowski, O., and M. Dumberry (2020), Viscoelastic relaxation within the Moon +and the phase lead of its Cassini state, Journal of Geophysical Research Planets , 125 , +e2020JE006386. +Peale, S. J. (1969), Generalized Cassini’s laws, Astron. J., 74 , 483–489. +Peale, S. J. (1974), Possible histories of the obliquity of Mercury, Astron. J., 79 , 722–744. +Peale, S. J. (1976), Does Mercury have a molten core?, Nature, 262 , 765–766. +Peale, S. J. (2005), The free precession and libration of Mercury, Icarus, 178 , 4–18. +Peale, S. J. (2006), The proximity of Mercury’s spin to Cassini state 1 from adiabatic in- +variance, Icarus, 181 , 338–347. +Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2014), Effect of core-mantle +and tidal torques on Mercury’s spin axis orientation, Icarus, 231 , 206–220. +Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2016), Consequences of a +solid inner core on Mercury’s spin configuration, Icarus, 264 , 443–455. +Perry, M. E., G. A. Neumann, R. J. Phillips, and et al. (2015), The low-degree shape of +Mercury, Geophys. Res. Lett. , 42 , 6951–6958. +Poincar´e, H. (1910), Sur la pr´ecession des corps d´eformables, Bul l. Astron. Ser. 1 , 27 , +321–356. +Pozzo, M., C. Davies, D. Gubbins, and D. Alf´e (2012), Thermal and electrical conductivity +of iron at Earth’s core conditions, Nature, 485 , 355–358. +Rochester, M. G. (1960), Geomagnetic westward drift and irregularities in the Earth’s +rotation, Phil. Trans. R. Soc. Lond., A, 252 , 531–555. +Rochester, M. G. (1962), Geomagnetic core-mantle coupling, J. Geophys. Res. , 67 , 4833– +4836. +Rochester, M. G. (1968), Perturbations in the Earth’s rotation and geomagnetic core- +mantle coupling, J. Geomag. Geoelectr., 20 , 387–402. +Rochester, M. G. (1976), The secular decrease of obliquity due to dissipative core-mantle +coupling, Geophys. J. R. Astron. Soc., 46 , 109–126. +Rutter, M., R. Secco, T. Uchida, H. Liu, Y. Wang, M. Rivers, and S. Sutton (2002a), To- +wards evaluating the viscosity of the Earth’s outer core: an experimental high pressure +study of liquid Fe-S (8.5 wt. per cent S), Geophys. Res. Lett. , 29 , 080,000–1. +Rutter, M. D., R. A. Secco, H. Liu, T. Uchida, M. Rivers, S. Sutton, and Y. Wang +(2002b), Viscosity of liquid Fe at high pressure, Phys. Rev. B , 66 , 060,102, +–32– +Confidential manuscript submitted to JGR-Planets +doi:10.1029/2001GL014392. +Schaefer, L., S. B. Jacobsen, J. L. Remo, M. I. Petaev, and D. D. Sasselov (2017), Metal- +silicate partitioning and its role in core formation and composition on Super-Earths, +Astrophys. J., 835 , 234. +Sori, M. M. (2018), A thin, dense crust for Mercury, Earth Planet. Sci. Lett. , 489 , 92–99. +Stark, A., J. Oberst, F. Preusker, S. J. Peale, J.-L. Margot, R. J. Phillips, G. A. Neumann, +S. D. E., M. T. Zuber, and S. C. Solomon (2015a), First MESSENGER orbital observa- +tions of Mercury’s librations, Geophys. Res. Lett. , 42 , 7881–7889. +Stark, A., J. Oberst, and H. Hussmann (2015b), Mercury’s resonant rotation from secular +orbital elements, Celest. Mech. Dyn. Astr., 123 , 263–277. +Stewartson, K., and P. H. Roberts (1963), On the motion of a liquid in a spheroidal cavity +of a precessing rigid body, J. Fluid Mech. , 17 , 1–20. +Stys, C., and M. Dumberry (2018), The cassini state of the Moon’s inner core, J. Geophys. +Res. Planets, 123 , 1–25, doi:10.1029/2018JE005607. +Van Hoolst, T. (2015), Rotation of the terrestrial planets, in Treatise on Geophysics , +vol. 10, edited by G. Schubert, chap. 4, pp. 121 – 151, Elsevier, Oxford. +Van Hoolst, T., A. Rivoldini, R.-M. Baland, and M. Yseboodt (2012), The effects of tides +and an inner core on the forced libration of mercury, Earth Planet. Sci. Lett. , 333–334 , +83–90. +Verma, A. K., and J. L. Margot (2016), Mercury’s gravity, tides, and spin from MESSEN- +GER radio science data, J. Geophys. Res. Planets , 121 , 1627–1640. +Wessel, P., W. H. F. Smith, R. Scharroo, J. Luis, and F. Wobbe (2013), Generic Mapping +Tools: Improved version released, EOS Trans. AGU , 94 , 409–410. +Williams, J. G., and D. H. Boggs (2015), Tides on the Moon: theory and determination of +dissipation, J. Geophys. Res. Planets , 120 (4), 689–724, doi:10.1002/2014JE004755. +Williams, J. G., D. H. Boggs, C. F. Yoder, J. T. Ratcliff, and J. O. Dickey (2001), Lunar +rotational dissipation in solid body and molten core, J. Geophys. Res. , 106 , 27,933– +27,968. +Williams, J. G., A. S. Konopliv, D. H. Boggs, R. S. Park, D.-N. Yuan, F. G. Lemoine, +S. Goossens, E. Mazarico, F. Nimmo, R. C. Weber, S. W. Asmar, H. J. Melosh, G. A. +Neumann, R. J. Phillips, D. E. Smith, S. C. Solomon, M. M. Watkins, M. A. Wieczorek, +J. C. Andrews-Hanna, J. W. Head, W. S. Kiefer, I. Matsuyama, P. J. McGovern, G. J. +Taylor, and M. T. Zuber (2014), Lunar interior properties from the GRAIL mission, J. +Geophys. Res. Planets , 119 (7), 1546–1578, doi:10.1002/2013JE004559. +Yoder, C. F. (1981), The free librations of a dissipative Moon, Phil. Trans. R. Soc. Lond. +A , 303 , 327–338. +Yseboodt, M., and J. L. Margot (2006), Evolution of Mercury’s obliquity, Icarus, 181 , +327–337. + –33– diff --git a/read/results/playa/2201.00069.txt b/read/results/playa/2201.00069.txt new file mode 100644 index 0000000000000000000000000000000000000000..7b7f0bd1c96b1e832782e596803bde593614f114 GIT binary patch literal 55856 zcmeIbTZ?4rdgr&%g)WxC$lwhP-Le-0+B*wJM8zpj)d){iR#sJZWoD%^vZ}h3s@h)zyI?d){3aC z?mc+VfWb(+D71McIyiFIwS)QlkVvvnT^ZX)xdir>twvt|Y^znFM6dTeFKF_n+V3@uw zX6bc4n~&y;e7Z<94tyck)#uec4F&hNJUk z{ywjzt?&Oy+D`xWpZ=pHJ>vUpoS)Y^DPMMz^e#PYr2XsBVw}%v>1M({)82e|mCb5f zdZe|Tq)&NbF#PbV`P?4bO47%T^bKDwvTH55)=l;r>1!6s7_B5dXr#~cyW66;sHNMC zv7>=@leE{+{QX9HIvNbK*~Pq;c9Qf7pP7>pN8|A*o6Ku`$#XAe*+grym2M_H^EjX1 zjs{oumNueq-Y-WN`E*z;7kZ6{PWX0~FN&!>#^#P2>66(gpU-pm6}$N=n_d;S^Q)1b z*>7lrFY;`f*IG@R`UntB7nxtk5p#mtOBTZvuh~Q~%g5t_4>f`74@S#ci*3JPBiVPi z*+tHC?6RLtM;BSxPJcO@j_kvk1KwbkU0&KE*$4|~q&M04idn#d(9go(%tn{v{EqP= z;p`RTaduwJjQ$>J={;asOh?(Y)@mEd3}P*is~<-TpftT2vV&TSk@p+WMsIrg;a5{( z3uFSy$JyX22OX^)p*&%akF(3+B%9tbYT5#K_SNjXm}luxHd~CQ^I98V2`ky+eeuP# z)&>OM@G&0@^Vw)Rs_lkc+W6#UnFi|oVyVQbO1le@)iG|F$Z9B6U>@;sj{ z#AAH^s!?a&{n<31-sTJ?yz|b}kpv2><|C_o3SZ_6!Fg|-jb?8_LN*exyV}Fk@KG`W zP(58vfhlm|?`PThXdslkPs8kOv7C+G!wW;BI$nP^%5FxB1)yvuP{gxwc8f&wfUQIQ z&!FN9)}~#8_2(b{dUh@l05dq`IW)rR1O@g3=wa>W#dt8(c;603e>hsE&zGQ9q@ee| z$|keo{Tv>35{m;3F$dEl;_l`{h$Uq($+Qe*a{)dCJxGPmK>HG@!hirAV$oNHv<~k; z0iaZp;KTo3hdT2)Gj*T^1Onf_vuX9tNQ^!nvnvxV46Y}N3)F^*U)BmDkBZ?8X(L&~ z6QJR!fMfZ{-n91R*fbA>2N;jDE4DhCFr(*62=+Kz&Y+N*Xp?!5v%7pYEG~?XHN^1< z_~rw#wSGUDjSy{FdR*k=8U(_QU;g|tYC`nIBarmV;e-i54(oe49wB$KI-@|qVUbMx zk}=R*Y!QKCPeXYx=ZjfTFKuap6SgKoH247glWYdpp$yobejcG~4DBG|q!`WeY-ud{ zq+v@fu7<^|xT$rUr~#HoNFr(0CGC;qP*$Jk|Qf0$ht*N%4uh5mIuC~ijR292TzuJh@@3t@<`zsS*x z+2pzgvkMJ;F`6QV=TNK25EeUw)P^FqTG|cYO!=u?{+kiV8Vrl^oasyj zf3r;AET`$40vTtN&r{zl=h@r$NGyf~Oy7cXvw3b& zw-@h6(4wT9eU*NT_BH`xOZs*Q9X18KZ;PdAD})DI{Ql4HZkHHEtSjja2Klw@(XYS$ zT4XfHVU>&Yn{U4PG969R0t;p|&Bn5CC+WA}etUF$+UuvyX0z7%X=`(%-E6lWWV5eF zH;>xwW~;#uTe~-{H2rTfaZ&wx*m%DG-=+wCl;8lGM;P5V1ZfSx9_}@*Z(M)=e`)8-Uy|d&^o-$l9Tciax zcY3dhp3bt1QBj)aZ)e2>dpzfryk2~sK4k+KgnH)ja>m~qI0g?AHo4o}ZgqP#jQLb- zrhnbbw}qz7QSGc$ZUks_kGFbe67p%g#W%d{w!?ghcQgEY7Wp`bGp}`Qk0#48%$FBSoSBMelX(9kPoIx2hv`1Fd3h%undODX9}Uv}9lpvW z-3Yen10ftad6y3L3f7EXxy&Y$jK$r}wS~mNl9}c3%tx2d#$4|Sa;#*Akx-0tjKAV~ zF~X$+OxVI-*BKx^=g+y@*J3!DYjZX2sW_LN0-|2MPJVa!E?vqkNynoJ#xD+9EB*e@ zkm~7grLAWA6PZ3NEFDaKl0we&pU4RP@Q+gax8`Sx?|zoz2K^cTaWAJbtNq*YWo;`9 z)481p3UxuDUWI~fLJRzx|1JIg-==@}mw)oh^!vX;B{$P&`RnihUHbk1;ugDY)3GAv zsLaOVle8h~j5~HjxVGWXTAK|17yncG6)?-QiC$avFH5q`76y8`uhFADFBZc@^!I7< zJ73C9T;v1(8GXK-!!WDtVC0kG(q>obIYMB#nBs#apyRX6&PS_T_iH`bI!;Mh8(+X^iCdT4W%JI%(ja;mYq^}1o4+pPY>%?> z`0gIpI!}ifY3ar#F2K9H2O5oat#B};$s&p=?~8z;%GpJFH7#zZ#(9e(9pbU1^CccW zsvvBWwaFm9$sM&^7kKyQ<2y~aeu4jWg*VdBJ9dM*JM zq}ULAXnjJ?O^z#lkxq+rn%{bYVe|9!LLdS>S^B$6sb#I9*=X*FWG9SN%wVaO{=8T^ zU)N32NM7W3>1{#u;1SRPJ&x}D3V!N8MqEEa{(wDg+5Kizo=(tnn6~o?qfCow9pqtv zj+0h;aC~<9^2w{cvxAq%iOKMl2_(el57KM~9G95(5}PqjBN_ypV=YVkHW}5Efm~9_ zTYfC~h8RcqEWuU`2FsZUJb%Y9TI&F@r$-0Hd{Wetad9az2$#(f(*yRgkvA?IhbUIe zw-k3R8#mg`=JuDeMy3Ql;&-jimkDP7AEFEy-#^%HJ`n5VX+B?ILrVvkw0666dzz0H z0+`9QGT>Blaf;YS!e)ylya$&55QCF(SoHO*K(UAsLGK`28! z%f^j2%H0XFw}Rp%BP5}LCJ8)%1X)TeZk*(p1)rDOX?#3Z;mce2zs5l11uzO7;lPTrbD-9If^eosn)en~rtj z+J$(isNmQ90y)mhw?k}sR*%{!ADbID2Lr-sO*V>j@&a;vmn>!fnyS3T&xho-4kRBo z$VHKEI6wwMY3+WA3k?dw7owcxOq3&1g#)qc(Df0vGCiSQfxoNEPHmhOu-ypJ*!l9C4DSH9#6v`i zVCO+Cm6ME&HZ(fITIFT^!jgb5Fk=-z!_+4lIjbi)MU1ia}}ll29b{; zPBFGxe5J!8R)F+7gJN=ChaWHhUb^uu67ttXdbJazb-ja?J&6d|FfrTZMc1<_^Xd?6ziBE2JIKW`k4 zmPxs`tp{2Inq0yU@x|00wqn@~=`@?wkMmn_o}MG+uP`pq_zok^hzLtgug8QmWe}u< z1mU$Yf)g(aI*?W|)hb?MiVOqlT2dyhP%cYg6DJC^CM6 zM0D8&goT%4EYgheI+1QP=8dtm(RelkGGgQ$KEyS`ins=9$?`fKNuW>8En_hO)>v8w zL~jeovB@v6l)RGA24*I1@tB0oxt7XiORSb^x{)3@TPYB|!)khdN4N=s8nF!{A4ex< zAej`AI=bL7&-APGTw(eI8X#`~^&%$ROA-UwH3*eX%HK(PJ0T_W%2Uk94GuKxiHGO9 z2~wJyj2(5M5tPE*!wN|ymZbSkVz)8y0p!0wyeD28D)APU2C&DNo zai#~BR|)!IxL91zA3c0{dwbisMRhEu^9J#Whoz=4v;)}t1TtY9C< za)2^N#yWZAFo7=rA|GSjjHbV9)`TA<;>iiHjWskGH+w_mozbDQ0qos=*f8| z10sYMBBdLtW+KZF4$>VSaY}eJ^RWj76Y*J%3nFDD0~mgUG+}$cf^&(nCV$0)5E(Zk^TAAT_OtYN?O>s^QujllE?Cv zEYR1Wkv9Xu-I@4xLSR)37>i*4G=q15RoHFsq_@NuFe1DOk!X*?vRox2N2jwgGnCEv z45%ah`RtU?Q)lzPYPUAKINnRBoOfpvASx5Xd&?EjIOs!DCPFo{oJ25Joz^TuQCr}6 z(n*Wia$+Xo##u(xEC@2$b6=j0E(nB|iixj?dd%`mte8dR$>x^_BusHN>(9SFIuj_c zSYA47tF`N3X2Q8No7QN{d3@$F0Ox#;|4k$!xOemAd7U7*v)@C;*EtlROF&%--^f7l z4iz++RmR5VLRj(i1v-}mGjCTN+}3V;yOaL5^LB}d?Ias#IYMbnTDGDkzkJNX9*C`nt9fUji@PEXA z{^b8k+@F}!O$(kYV&TFnw#&D8ei+xOM8E)t+wVZo**0GUYH<@t4LaxDtY;(d4i>RY z^q;2fEjP?*KA()dv=@%QBnhYN+IM8(t0sh0q=xo}asCd03QV-j-!nV2o{+Udb6~?x zQ*?VpI2$j?)}P_aiH(VqjmAJWN1_&US^WKf+~@C=$Ma2U|l@}tB`WGfeMo-W2DSHyjL&kv6FALG5H##gTodyGLc zbx;w|7y1XHg~T_rDOGw9K^XEr}h><{=vY8z~~{Bh*o?QX*F3c9<`9FNjZ7s8;@}!FYx)}B28zPOTh>VlPZ%V zRpn`}ceV%3RZ|vTKFrNAZ|=6cWCtX6TQD*i6aL(765iZ1y0%YS%^e&)AdJ8uF5z<` zrnSuT$}2%0gwIxEQxGScOr)#i$jFP?Jc&U;1cJ5;ZB?al@Q1V*ZyK0+<{3B|X9jU+ zu-p%#^v#g0mQ8`>#2gR?v?|#b21c|}p`loGEx5qvnF`dZfQ%$q zx>~o!g9-6;lmQ_VvF*HGvOS~m1Vfetc8N1YRC=IbQxZbitPC*TA$C{gtQY`il?JK`l$|9KVU67N`;T8uB-<0S?cSRBlhN-@TyOLiV-Yk+AwqP< zHr;Z)5g>oZXWyKUAre0)DU3N&084Bo9JbUPZVQ@ioRX~)At5TqlG(Oqz=wZ;GnrY& zG0M51APnf(CC(!knE)we=Hz9{#c)9g)FyF93<=_)&?ge^oO%oknvaX($_#4Cf~bCT zo|E!;Bb*MrAk;s+b$*u!vli13U957RtRs7+F1w}V}-o8+jU0m$)KVA{7WJleD2oRpMG?mYA zvaRr9ODgHb5@L;n+S(TFkGTJ54^FJ2j#gKFu^nbPN>4#~M;>iK50^7z4cb&DCe#woNSeaXCk0 zN?TTv#3o5iA29|)@q%Ujd0l^ZY6+_jFB{1~+mNW^~r`jT*MSn@)@t*$gg-*1~{&=Ip73I%CqMe3Vxl* zL2to=3XSv^g;I3~>BfR&$ru`d(8wXWlzA07YkL-!pr$!WOftwgAm|&1dy_4YpbJF8 z(9YiWz`Lj#t02*%YZl%fv zPnBbG`94E-0G1F-_784j|5CB|$VTLm3iLxUrYe}_0mRSR&E51$*gk`1Yy*Z$laTCk ziN`Oh6I|xlF7<4=C|n{oz}{K-RK^gxLH&-dJ*n3fCHf2g_rL0of2#ldG5`B3s@OMt z#XagxK`$g2GVH)RF;qI*P$s{4;l3flEJPfy$AyY@04eEy6Xe?0_3|7^r$UJv=*0pw zR74_L3Pbpe0z|Sd3tYohg@8_fyMVDNND@H4P%)T20Rvvt6Uyp>cSS|vxIsqi;ed?a z!<&cgE!YpOf)H_;JAo~LQT(R3%QV%LhS5L{?Hefp^U-v z*^i-t^;2A^I)7I3yBrl@^DJEXXp83A#)H~h-=bDVRLAnl2QF0YhX}vaOrWFh>zBx6 zi(Ox1Hm3Q7c3u$`>CTP|zzN{3t<4>YjRet1ILm%O!~hBn4r zQ)~s#LdAon3n7i+-G!xo2|>wq!p-frb{jUU1ahm}ZZ(`VMXkGhkq@_7WTWIt%SS?# zW*={yK0UAm9b=*(S%3tdt-xjF$yI3^Fy*wem$(mC%78k=Sf5&!eqyT#{!|`ziIz%?gy1l(#zn78#L!T8 z70a$=xkR+svrujl*JkzW5R6<<_f3CqOnfh~M^NrBKi3qMs1~_2#ZY8(0rOxMnDY>+ zl4{nA+-$TkvD4StSjO^VNSzsHEwE|8kfI*)|0KzZTU(l=c`vzry<-eJc*7`wEka4A zmD{Avg&%`1&pA~yBROtQBWsjTuyuKz!yLJ+`5iOQ*v>_6zf<+;;E+<)0xd}^2)fQk zDy821j*K#lh73P?+J8VW{aTEhd{-c4F6y2hBlsN@l>PLeL+g9Amt!I*OA{7#;v}Bc z;5igAlO_@oLeMam@w2m<_}(H3e)+^jwLK5YuzRE5fEmSr=ec|<0*=)BDdmgM#dGkL znpUqy=HyDlPy|IH>W`BdDBOs`HI-4N?8=qM@h!0%nP)_}EM060+b;-ffF}b`Ax8AT zA{STIfq*X_@#{@0ay81@Pw8IIm|+dda^T5~FN_WlHf+y4k!aDdjEuY#Ic!a5HL`A7 zo*&I}@-MO%o)OJBhJq4)L_CT=epoyvdrJ;+A)kZ6M)V|~-k~ajprnS#w!OY(XK%LC zK8wM-w^FDOte7i|6rvacwJTi~T7tNMCN)y*2^m)m!|1iPOg|(j*26Y(?;^t0+}!xq%6OWT)#~T0I0mtL|Uyu z4+E%3w{gwQE2~GZ3=bGd;M_-T3oF+gBGdMgqQVkKX~^2J8Vz?%t*J`=^t7-+3YCpr zk{I!PY=ep5{nXKv3TV=XJI$>Ir&9jrS1ELeiYFcr{U&cfpT!f;pM>z4;EN^1+u69T zamxBz7Nv@GSvMShx*}zc)BuK7S-_70`ffH$)o|Y9gp&S+cIJ`tY6@Rc9HqSQTTZjZ z8XGGl(HzcSo+erg@=5Zp;vY-2BSx0jphT2p;nQ>@oR;?7B%zGvxV8*jL*Vn7C=@KI@L@pObK@cTq0h@wk7S9JJP*viI z)F|x95%D4-J8AQ$o2}LknXT&qCp6NZ;2x?Ja8LD4Q4XoZ1y)QftKjj0(+eJhE2-+I z2@yODm}@J4N`O=N@e+-)Ja2%WhgEn2&6gBPWddM(#n~oq??8#zJ6Ue z-R;;qAbnX0lO|_&iR+?(LRFI3?;P%uYxDvtzrZZt@Nx|Kb&)?x_r7US@6vgY0Gd`O zoa9pOjx?(XAq}Br0vv64r$2fJvsqwM-a6K+B$)CqRQihhw6pEa`h?@Mus?=7She#} zrXVW&zQ#f_I~&MZzT61@RLA6gpAcU9xBtT*Cmha`p8aP@x@mv>bB^P*2$5sx=_%4= zo&PjRn=J(mdGz}~Qy6i(f!EsH{{ENk9ZsMJE>d(svq#S-_(KJ$mR6_J`u>-lW~Uh* zlaIv{7@`Vzpai7e$n;rmj`EQ3oGHH;7%65>D90RpevQJyDRe4AWlqTPXac>F_qGHmsBMU8`qtTk!Nc*?C-+~gGU{dnzmg<#=z zYiqYeEcj>dn-&e;-1UehoGO61p)irxNQ9CdKNM&xhw?PCD;fcBjz*Z4MRkeUDcsDU zyNE#k46ZAA)si}~ib6QsnUIDPop->n2njV247ovnlmkF^F`^+Ef+rKs*a+fsC|6W_ z>Ckfi)Nzt%lnPi$+jXli$S%P&Fq1hnh&fj&s{(y*cFA}(7Y^ERH0jJ)McUHfw7eHi zrHLUoo%9m-aMbI!;)m8I<{hHb(-35hrfhA38tf{ZLP-{nX;=~;-%L8dM0lsKUdX8Jm`UUNv@kkhg9TFR)$5mMw&Sl)&sGye$N7j#nH*am58k~O z6hVOjwc)?B#1FNhCRQu?tz$w$$((2oeuuBg##|J?!clBYmU-QZPVuzBrqA;|e8m~; z=MsLmxxygM{tL9@i!{qZiVc=cYV`4Preo?+t2Q?u_|UkU6JGPM)UZHSh^E(I`S5o}y^~0C$JZD;2hv2BPA!jE!1B6T&3t?xKZA(tR zdTq{wAH*+XgSrHcbXq`eI>7{Z;?9ET1+OH1Uic$WZydcj40;gg5{{y}1XCS%5CPIo zi5oeESTEkzi)-b*Y1e>F$cd1b;YA}ar*{$-EcXBkG->9LZ#a;ut~Yn<)Z(^w9(A@K zHQU-t`E}B6J=$zP+C&I|RQtZwthem(X6F%c*6_Idy4`%_zDAC+Rjc~oEx*;3dPEoO zl%5m>+&N$GR6AO%Cs0Gpf=@=)J`Q23)S)mM1g~BnwK*)U(8z{&nPPHwi27@{yIY*3 z1vtU6!X8qt3kV@@@NQjxh*J7uj^q~=bpf=CQ2L4GI%F1;t$T3R$R0_AJif?hA%Z2GL zMM^krv*)aKfi6ASbL^&tn^Gg}?rGrNY!RZ9u>R@-=?~?1WE+>rvtvUhISpLz5V(jf z$)T>*+}VL*cZSX%tF{BhZgqha1?X@fJ6Qn&&|c*U!?B$QcRqP3Ji#Az<`!r9==`#0 zy+{>dvY?7lWIDGvzsB?@#JFc30Ee2YmX{1dqLEHL5d!pb#<>8&F3A#=2`ZH+Lg9e4 zlX)13$PfuU-;qn83rTdx5Hw~%OwALOV&Gj5CGaT^jN_D|__`*tr;60v0_Hv9;v`Ic*qd;i%%GKIh zbsR}HQag6)9&UuYMbTD~?~}tgp7#_Wuz@vZC%!P4jn2g*n%}8b7d%0p$}Wqz6~-)( z5&`GTyaN&z4k4>nbF3o8vYFt#WmYAS?47mh#1w)VP1LP7AFfo6Qr94$YfDs_q2CdF zl>xB#?(xL(I)`!4*bj5(HT(kw>;FMypS`oqa!z!YYY`Q77Y8bIL^&Fr1_2JfT0+qH z$eRp;hywYF+jy|svm3FCRvfb_F>=NxK&Q^Cs<2G(ljW?)t#PLS@Ak@plhV%6cb;=F zJrF@wX~V-I`wNv6mvvRajYmT|rJ?Vql$iU9JX}-apAt9%qDG)P8WE`K{+Ww}8&fFP zrlYac*l9l%oVY`|8UeeK_&FsL(re`-&Wm^PojIs=eHu)l*nWv&DIV!E)XE=|`@}N0 zgEHbH`a`*ba~(fKDEbCG#0*YJ8?%4(_%0e5JMtp`LQ%VPFq#dl6xb;xDOLrgZ~+z; zg0oF{4wjWCKS5#6HL2xPiAsn#C(6%FTrl|6b&TK;w~SS0GmF_EoH!>y&&HrVWDrku znUY?a#Nk~rBu)sD$^e_j)J{+#Z(bgQ6{Qi4#T0f@V9@Xzuoh@B`5i-~26IFGXJCOo zP9qKvL&!RIT9O<`I9wEnk<~NFIv|)!|t`>68^(R@A_&B+DF$ z^0&_NQ=m-TS4h{mL7KWcN;$|PMJ)_Hft&U}CaE$Ac=~+$i~mkXTK@D;(*FL_C&}Ob z(LejYfBE0rH-DZ$R%09ze*K58g$Ms)0@J(Wx6;4)pZT7n5ccz5B@oL3Cnnu!J+L*- z?+EbFe1d2(QGSJsOCJrhQgdNCm4e@eBNO5rb~s&XuG=LhLe>zFRaPm4v&9fjhx+sX z!g7Rnr5neB-QpAMG;IR$;*YFgw5Y}iHval+8~U4X2(TUl3&oD3OsU2F{eJRsag1Bi zpZZ-hM@vW9=d|o8jVBA%iaDv@Nledqa&@IR+5`WKcoaSW2j|?i+1Sj)H9;}Kyttzc zjrS`NlX~7d!GKjGXQkqhE-vfr?Ms05S-RRD#YUu5(W2n|-lhglYjt3aVq$G=PDaGM zA~`T8mrh{_qnXb{PPX=S1TW61ZeJ?W`HmWQ(SFSs1cpW??b5nO>o(fOajI z#?W`DN%N5pb;nk@gZJiQ!dED(?oSRF zNOb0m9dfSFbM-+-tmcy|en}OPJ5b;hpVLYucPQ+`K$?vhvX1En!RLS>t&IAmv((FU z2dBHGdDRj6Qw5$=t0`&cnh66Z|I7!as0YZ0(GS%x`btj z{rX5c<|GkH9kV{C*8^fjFHXNOcQ6V}xkb|0D;uLXg{mpNgp6`Z{G zBlC`AT+lk;x=JwK99ca$ zgice%sOM8Em+pJht&mCp%jr951hmU7z zK<)CguB#a}oWV+=Akd{7DH*`HLE=X(TMm{j!kM9;tCvcAQxt#^SXr0+F>rk!umjUu zzaXB8%nx>ER!9=k1g-;y(WK;o$BqlxNMlB$homi7h*>ahZTDH;ApgwzOt$>Q#wQ&RaU{i@c#U1 z5yTN zs8n1g6nHNnH^R z(P)44)DK^fFL73-KbTtr!y}#Eo_O*TsLbn)vtTjVW=^x~(M57f?K#9n_R86S8@$y& z>z$tVYcX_zx%V8iknWPS)u`$!Z&qx``@<`+Lazn9YvEZ(!1FfJqT}7w5+trV*U^B? z#|RR2#IxY~P6?8G!iEBJ6^}@3nn;=-N8B9%tj=x>UP*yQlSJbf4^pQdP=u9WvOB~x z?5Gb+?augB1Venml1!H1cVr}{3K`8xw$X*tfaa%CJeAa4H7`TD5iio`?r5HA0P5~2 zS+ujYVu51sCkkwbHt}A=EbP3cl?bn>LnsIZuY<;z*z`E-*5tAh<8~**BF$HI=SD>p zsTPW!NoLl>q@9IsVpTJuRO-S7tX-L?Pl`ruQ8s^4UA|vRtE06vH>rqzJiPXXV=m1%kPjB3?-ARE>p8V&qddA~RIjmG#r31n2Mqn#=v0`#jS z!Gb>9{5=)(Y1EM9q3;bp06+B$a*a2Ur4{N?3FAd27v>k`=m*n4cF7tM(~Dj2`8fHfBK$S+mvoG#qJ#|= zF1;=zrF#hr^dh~34bi_+K57bU;j$1jG2&UXLkXf;Nd~?pxhiu77$UQBrx>cL2+yij z9|cW$5|bc&nXwlwS^`5^d5CwPs%P#Eh-b6aw`K<&qe?Y9ul1I&Qn0?Xi!ot;S&lxg z;MmbDE9E3b4p@q@{hd>&{2bhFmu{< z2O_GPjUA+!leZ2}3sjtr*dX{6Q5=0iKm^1yxmFg%C-E86#}2drgep3Kg^_B0M`CG| z@@Zaod;M__!!{=FNY;A3Mg}ElpdutEJiO6mW)DYFe7;5<4&&_egjNCfOuS;t*x^nX z=#%G|No^3WuGCHYSQc9Yyzo{q^Zn6IVnT8V*W+AH!wGqw+7x}agL>;7lOd33sNGk6 zwzkp$9=cSAYRD>)QhJ-wY#IG(z;kB@Ne%3wZL9}ZXrAa&Xe1rLzKT1Gx{Kj%^dBBu zXp~ZeAWhNbAJ<8mcPB)stP__GIY#XeX&~qgF7X}QivJR=)DULWl-2-`0p-sAueY)- zoB8@IvO_H;W?`|ue{E!H(QBoXZRBtrizQYU+S#HrX~5M;I}{CCt$^7^mV#f;3&)d} zrOu%l*BN-!;})&cq!z2MN+COg!Ond=u4+thOM)XFb3euu)T{_?{pj_QFDlqL?GX^( z#@@k`GV5-CPhaRG($*ZxL@9X(ACvlY4^b915T^r}$j=!~KJlQzynD)T!5#3^R@dzG zNpM~>h}heM6Z-p3r~~ka2IRMvT4iqP0w`E2(jVZ`A`H|g{ioz$h^mpXy(p_rP+ z!dZ%OxRXJoGibW!GyJaAGvN)6xm_-U8O7Oj(q7uWkF!KuumX-iNb)4K7K$8kHkihH zg*@Y~))F#jg<5s(~y|vli+}+*WZtm0{x$P3rJt*YZJillB+ z9AKbjk5c`y9f1$#SBx(w%cR>@x9*}dS-NSp2Qh0}<{Vv^)J|IpKKEFYo_)YL&5PYQ z>C2~2S+pfBjVxoHLi$NXU6T4>88TMXjdXQw$m$lbs9f^SpI}0fZS2!O zaUO*kryKoOQ5uk#7q9qqa0L~SOPY;NJ@JKo7P!~RUIE9_AsAGIc|n^od=s}|XVd*M zSHjbyENF!{`T5Dg(NUjkbvAA&acyO;KBS@a@uUs|plgdRCc1?vPDK~VN9(P$7G~kQX%P1VDkgYqP7hIinffVi#zzF3&GI*k0 zy{M$`^LIKc?wtVRfY9r<gwx69Syk-k%2+UksqwAc21_%u}KiNL3`uSmRV(YsRG5^cfQZO>w4g(_~z91I@8rtepC@b;@Uz1zM^TEKqX@R8s4ib zye`vF>9=QjuRdLg7Yf{O9m}AqSe@Tmecxe=>UJFv$H4_dTnfm^wqQRJj(6UC$*OU| z=BW$7SY@0w^QZkCh^Zm@lV4(ws4CXq?&5Y`focG%lF%!6>}LJw7Pp_OwQ zdN~*8I{No8l>BB1woiDNOmbsH1-V*8s|={YN$vE@6bnfKG)}^b!hAFXy*s8H znN{0!6T zA(dztXpPJH+d+diNm=8vxOq6Z9M#pI`{9}7@Wb2A^=LAgKTP}|jcYFEKv@6yAk{IZ zLk5}-8wqd6jN8fyuC0$wa5=vo25|@Ne9cuQeHdy}gXJoiSjso%(LO;*D5UDDHPl*%<+;&wZ2l%Mtx&k75Ki4 z0NRB_b}JI3@&k7iam^??xoPGadI@zbtsHMtLh$^+%Vz4i3y$2oY7e_|8IvTgiCH@+ zM4`fO<4|t~X1DD?!>(@!qSPtel2t43dYiwhf+0=Bv?M${{o8c6rc9zo80uQ%SJZKZ zQ{^XNmmt+m$XRY7s4ME&+}%AU$^Cpm<65R#83g6VUi0}RAwRdeR>{Xwg@sFmqg~QraeTsA* zP?4=(BZXv>HI>>~E)H;*I9R(Uj!m26VYW>v=Xu!JF3?xYSVcl}L8p3$9BosA;LT$)NLt2NtXc`G?^ z3nloAzFZ%4Zc4-I{dWY?PvTyD_&V&!X>i8^XhrpHj)l2)pu#IWYj&LcP_N@ut&H#Q zWGtIQ<;yfxF-s4;uXi%86x+G(>kxR^)EGjGul~lG8spd6%^nW%OyMTB`a|TFOQ(BG zx%zahsuE@8HGe?QT0g@3Xnb`&7ah4?A2D-fZSyMF6Nx&oyYA*a>n=E5X^$JC@A3Ks z-&quuXQ%(*DJK9s3QplDVCXAdWQt7H9+7};q>!q#Ys~yJa@hkP`ZI9~2go1r+*t{` zsGk=znB*kMHON#?@l6<|4Z# zR~j9l_%=)Ruyj(KGedv(mw)~%RUZGz|4EFUf?96W<}2&C@4up+_oI`il?n&E(E3Vc zLp+t-{PMm{%3RTcoGp!ZtwzYa39d!PMI$mou9G&0cF;_X@<C+JvcDztpCdV?uS{C=ps_`yn;10KPXZpB_f*h1txJ&~0xIUw@jUZB6!3Tw+8Q};M&mnYGy0KLXPQeEVHU6J=9ZQq!hiR-VvxcnxDDfvp zHmtjmShj2 z8S#r;1^L(h7&x*S`R}3`4eL2zhGIO(BQIB~FO)`A?@MNua3wFx%zQR8mRu=Q<(FSE zU_J9h+mi&)bn2(&=g@lth?_NLR<-f4F|j8dM(2h&5%be4qyl#+9Mnh*n zg|;^c`~F05Pd96)>%+TN@oF5VTO2D&GQG7sSdn4G=c_OYv1!Dj7Sz$b1ssxCNg5#7 zO0&_`abvTPMA9r@9#UKpv){U1>Wtva%GA1D*$3ejVP7Ra=)T%nn_|?awT$!Se>|+1 zp2dmjJx+-I&=X03F@8FA*BhTe1@{Wm|CQc>P0S7rGA?8o8`^!_mZ1H zvlg(=sEf#Eb-;*iq#G8@Kk?ZWL57Q+xkI56+}0Y2w@}S5??EjZ~UNY5Wb~s1RDGyq` zaP*-fMQjfNaO-$=ltctp3eRuM?jWURb?TN#)zpu*Q1PmWXst2%BD;q$M5-+{AEs|)brh@!mT)KGx5^f^26 zU)c~sUcX}k6?0@{Onn)UDAMJIcp#%3Msvv=B?zLc=SofwI1B2Dl2L*os<<$o4(GWm zD0liZ-0|$MNVC&d*KnaWVLfA%^A))X{UTK1{}%g#?|6C=m4US_jPL_+C&rF6|RHE2<-?RvOgh9)tW5&cpvP0MyFZGVsp9FC*y6vf+I zhJcDsE;y3K{$oh8%Ok*AaX%vz z@OduBfU1$MT%d^80c0J#9MTndazIw2cu2`vB>}WL8>r*`?zRGIVbrtR?p1+0wQ^X2 zZbQIAAmzCJr|!GQCU&tlgNJ3R^Xbiq8X=uHbFWuSeYjv!aO3PXC%Fj2$*8BwO%ePx zd5S9~rk4( zsVcidh<5YMB-e%B$W)RTncMDuDXWIrp<*;E}vhd#lpe zD%rs~iRQGsrp08;4M#|^XpT5Ovkp8V?JT%dtZ1iJU?XOARt|Sq-s{bJ6Y@8bWo!{O zwB|3g+N3?c6;7=z%JAzDc&XbgT~oQT(mCuV25Rnwy*GYniYJK!)uDpv66DI zGC4n79gBBpZ9B~tSJ|RW2I0LdaKrm^HExuwh04-_Z<=_YYb9nNS!C_5W#74ZZ8(hfi&tp-M<>-vj{aNw-g0T#A&b56h#-R_PS^6T%}H80_YPfq|bpJ(zgN&xRPlC7>Xh}catu8Cc4hJGZO-d=*N>ydC^i>l`g2z8FlMLAbC<+pP^Ea zc~yJ0@qAge+f^lFW$vIEMJT1K11dojI>vU>8Vi>~&so5BHt`+}B-0f}h~b3L{s{k+3W^5Fpv091oy-DOb1>!2NDK3}f z%=wiK!xpNKbqlongUghIza;$QQbqNCmCfX$0v|P#ju}iy%KTMxNLggK$k<5_@@{o^vWaS+dg@hoy|1@$i5mua}PgbiM?u-Dyn=EsL)vMPvZ$D`} z5`$Gzb3sIH?qmBC0gHyRi)oxVh&!mPJl2RB!E*1Zdauh$^BOdo4=h;4oOWr}gqR0{C_p74y^2p*xmf7{3NCNEdZNp|^bER|9@;$x4rqZx55|Kh z2i)R*qx0!%=Iq-e;~~YvAZQOhHb!K28k~B0&vn>(Ng%``Wc8@u`EX=bs$^JY5uj|3 ztI)ZQ@Rk$f;?vb1PwN774Kpy;RJ*o&YzxP&Uf+ zn5{M<=V>-@hq~5}+m!3m8p^Aw6j1)ExZnioIDMQ=uVP7tE}ekG&U5RW!aM!5r+ZEu zaNK98xCo}=w74ZhL|912gT(*QGT&bk!r}JCNH28gq9T+_E;ja0_fR$@7M!0IpJa{2 zRL3j<-FxO%q+KCmJ3?n#l#9g!EHRedC{2zeWr~f!D>wgfk~l-Tn1$1Y8&ExI5w zTm`i@ou#pY6CJ_LS3g$4>15Ap;UnnM2JR_*^e}ZpV;EP9H0{ckkgN48Yb5(vg)T~_ zeC+CE-$?{ZWo%Z=uy|0EPW{7(|C)&qraq+cs<=RFm^qf%G!_&ti z51dQJ93O{Q@OfxMfZ~_Hqi(o;EPN6Iy_J8x8|DZr`96DAZz z0dbJ<)KlauKE#{AF!lg?@Lqw;nRbuAud-{N@GK>HV_c0f!o6)?sQDReYA>f&q1Bs6 zxm{!vv$fT_DIY+XE~~8*f%rKN)XxUo5M4{pKKzO(HWrByBjb1>9-TLQYPu{?5>O(uBhy@uxBHkvOoH0wcGcsmUcR< z{}EnLtI=xiY;GpyEgw%A!DVB#+2&TQ_p@HDyU8#+yUecUIhw9~guBPifT$U#F{||z zXjY@fJ}Teej@ajWx2F)?{58clXd!@P=Zizb*;_X5fKnICpbo&N^W?HJ}vGbh2XtN z5GK*A=|W)~blK6!baoK+olrrY{sDhfZch<10R0r!<`h03WIfj$&B6Z-fq-whBDJP| zDBU)I_t&^RX{5*eR06fRS##(3p%Yj>ziHG6n`;%Dv|iJFs+#Qu);(g$P1UM-C6L!( zgzjdi@?d{~5&)Q~iVb=EH2dyWZk`0k)A8LDKgb4w?9IBZb$7auC1nGZLCV#!693pP zkm-ti_c-zgy1vsbpA1z#3vr^a$vo9IyI^Q@het`ZKa@?p+O0+DDD+0)Q+%#mQD7Ad z%g?B_y4_uN*20b9Mx8mtY(RJe<2Wn*!0llxEKUNU-DxLJvQhf2W2`q66Y52+@tbA( zjRQx?LLGvrhucY2Xt`^7gJ6XfKE?YjZoh3bw>#a=>ig(nuJRSA2|i?(6bO8mp780E zY!kgJ`feL9_+HY2`kkeyHd&o_N37VdwL1tVdX4>uKQGeigiO&+bYOgtIyf+^2hzlx z|4|$n+(7wAsLNl&k%@g!CBscH;vn$nn-O&bxm{kkeuqC5pRo$cLEJ{Fj(V`SXg=E$GA^uB4--!gs4GO&T7HyYP0eMXN5Kl4t({No4^}@b5NJI<3t%&)wbX4hzR% zl0(J2t8DRz0}+!^9m|B^rYy2hEsabKG~#2b&dS5b$>xLcBedi${a3jv`1xpXHJT4i zF_rUUz69mWP*tH^Q8CVtgCAR4U8*`;j5Q{Yj5|36on-A-cH}{-mam5>XL3Gb$j2x> zFmM!v1A!==!XF|ouyY{={fZXRvj~ZRK#ZKNTGHvXw}0B{bax4nkzE=t>d(JEI&)G# zr9>e8sw@()1RoiH9nDrNlt&LA4&O}(nl(nl{JO#Y3ys0_;rZLUhuj|A_-?qEjFYdB z8~AwZ#zRH;viWuwJbr}%DS23nU_aAMtj-R^^bW}IM#rP{rHd7uWQ(DWGgB@ugR}pF ztzyEpk{20ax4UeT+0GA&EBD9IhhNO!fA}j}D4PId{EkuT_BLwZu(O-I$ftLJ-wW9O zWTe8m^re$<&nVR3R_^pnO^Q&V`HV~;H?1~o_#!EuD6H&+78}5-F7Io~=*AS1A z+A#DBOt@W|p#VSKMj{>FEze;huG0C&&E9o@e3TyMQt;Sg-7en$;VxAu18(U<5QA>% zv&xQ}QRnZV(mT5?!bP)zN~RvKJQyt34iX9L_Wp+;_+UY7OaJdSKlR{`m=9Yf&-Ah@ z=+as$Dk0Q1Vk60R2J(T>gj$5gQKky0kDLL+&JOU~Y9&X5Bf^Vdz{BJXa?#8#tlGhF zlvB@dgMULTMv@<&h+xaL7cd|M_c-YfA=OidLWJ7bEKV$cy=r)*dnM%RDbbwv5HIl7 z>YRUl&|%NgL3$*DZ+OoBG~teXb0qn2yjDJy6`O7xVP%*9T5G za1Z(sxnn|Yi^V5A2HY^oSq$7Yn67}nucEY($pV9Quso4!-R5p9dHK17b0+yKN-xQ) z!WePq}3VuyL{95XQyV#qiG1ORFwJ=m+{V~NpK6F z)Y@Y0oo<)dMMAU-Lu`)L0S#ZXtqSg|uCuV$X(gxG%_x1%V@5_F$yy+=i^)txdg7?< zHHCM^M+bA%-z=-+yiDo&D$y;o0Tkco;s+ErTh#n?QT&NJZXx14ftH#{d34{UOdPNML`G)! zM;w~aP~ks{M`M=PzlTTjk<2IoyrNCF2}aWQLe-YiW|gNprPeo(ypx7oy+b}ftX=FI<)BARt zvOU@@rGLUbxFSA(6RuZJO-uplTP3AeG)RZ7NCx#zPWF$V9DHrjtXF5xU!JDDr%#Cr z_s*<(ivGtw-=vVfi|vFQ85eev8XeY^lTOP>zGSP~mA}=*gu2_C9hj_IOOQ|sxteqd z@>HZ741_ykJQ+8px`K1}gug!tP=N{dnC_m(V)BzZ`CCqkI=?~|7Vd*7uB6g-<{L*gXV2Tn0?yYSHp z1rmy?eukH=ge$_iTFqb~7K&R>Nq6GZ;hexbM48|R60!LLjp>gSQu-k3^YtC+_HG+t zRGmkCNo|hnAEcuS{H!kKc!bP-lpibW~ z7v=nT&MV`);rtKDb5L{kqJ#0_UrDIFmW#c*$xl^&h_VOjm7+!-HDac@Y%BTbw6WNI z;;3BRJD)7i*?rnzE}>&Zn>@2_WjF+2YaJ;0D?mcK&(<+^x+o}W~>F^?~{`w z;mSW4*^iOq#)P*+f|Z=`$?K;G(xqtq!#}1fk1kmS*RxylmE@lr8*J#lGZR~@zboAU z-zd*&>`%{+N(R<*ehZ#lCEDX`GCG$)zgHrF%Ne&?%Q??3lUAoAIsS%YA;W^Wwd6X? z=Z8qh78$MG(JL5=dVhxk{wn)VxH5sp>cq8s$F+2ckBJy~Kf2<)f{9Z>RJBty1ZW9r9xP5MtH9BuMmRp zd5g7?OrIg#i5~w8FFDhhGpylH!j7 zS2SAg%{>eajuz58eZ-C1?){+HC5gz<`+UjG*KFEt@?N9dMqq;ihX+v0$Mv;@ zm#fFiEvBKO@7T#w1yyrs3X@3cXLgPPUE|T}LXBh6CsZwP+*EqRzhK`dd)nhFQwNC@ zfFd2EDj?-KKm3AY2!?3D^_UXk-?tKx$V`Xk3(O%{NH541Vpw*RQ_I-qU>AF?Nc5&}`NT*Amun&jg-E20ewu zE~pF|)?-OmSw0O44VuB}xm$g+Iftced*&Kb&LY zHBng+^})yac$_e?@sm)S&+Hu~w|Y88PDL~&iarCwN~I)8KP~PC%%Qpr8P$C zAmIW49A@K8>L%f_8;r6Q^u`LTTGE%vYTU8iZi2}2r)Z*eCmb1{Rr7OPea7_;VzdHIv9MRvM`yx}bc?ziTLEBp8-k`bDzxgA|gh z$b_%<(rpY3vue}*<>iNeg+6#pWje;2sieA^vY;g^BjAs#u?h!n*p>T*^e)#@aKNQpD6B=T>HK`Q9AGH->cl&mE8n*4d`t|ebLsBfqq}5bJFT-!1bnh|i zTx84)YQ~l^-UIuhj^QD?PC2oroR6OZwyE08PT`x z?fTBo?hD6H?+Tj|Ry0)2}DiGclisp+QKg{I;FK40Dy z82VeAJ4_=F!cT+zIz4!?e+=*$;yH10Yzba^lU*@VKckkPG&4qQoqQW&(7jeUrZXh9 uZQa@ych{UmShxcYS$dZ2fBo>Vm+0U03v6$K-PdHT!l@fyG5LvZyZ;ZF?^+E2 literal 0 HcmV?d00001 diff --git a/read/results/playa/2201.00151.txt b/read/results/playa/2201.00151.txt new file mode 100644 index 0000000..6ac4169 --- /dev/null +++ b/read/results/playa/2201.00151.txt @@ -0,0 +1,1146 @@ +arXiv:2201.00151v1 [astro-ph.GA] 1 Jan 2022Astronomy & Astrophysics manuscript no. Populations4 © ESO 2022 +January 4, 2022 +Multiple stellar populations in Schwarzschild modeling +and the application to the Fornax dwarf +Klaudia Kowalczyk and Ewa L. Łokas +Nicolaus Copernicus Astronomical Center, Polish Academy of Sciences, Bartycka 18, 00-716 Warsaw, Poland +e-mail: klaudia.kowalczyk@gmail.com, lokas@camk.edu.pl +January 4, 2022 + ABSTRACT +Dwarf spheroidal (dSph) galaxies are believed to be strongl y dark matter dominated and thus are considered perfect objects to study +dark matter distribution and test theories of structure for mation. They possess resolved, multiple stellar populations that off er new +possibilities for modeling. A promising tool for the dynami cal modeling of these objects is the Schwarzschild orbit superposition +method. In this work we extend our previous implementation of the scheme to include more than one population of stars and a more +general form of the mass-to-light ratio function. We tested the improved approach on a nearly spherical, gas-free galaxy formed in +the cosmological context from the Illustris simulation. We modeled the binned velocity moments for stars split into two populations +by metallicity and demonstrate that in spite of larger sampl ing errors the increased number of constraints leads to significantly tighter +confidence regions on the recovered density and velocity ani sotropy profiles. We then applied the method to the Fornax dSph galaxy +with stars similarly divided into two populations. In comparison with our earlier work, we find the anisotropy parameter to be slightly +increasing, rather than decreasing, with radius and more st rongly constrained. We are also able to infer anisotropy for each stellar +population separately and find them to be significantly di ff erent. +Key words. galaxies: kinematics and dynamics – galaxies: structure – galaxies: fundamental parameters – galaxies: dwarf – galaxi es: +star clusters: individual: Fornax +1. Introduction +Dwarf spheroidal (dSph) galaxies of the Local Group (Mateo +1998; Tolstoy et al. 2009) are considered to be a perfect tool to +test our current theories of structure formation involving dark +matter in the context of near-field cosmology. The objects ar e +believed to be strongly dark matter dominated with mass-to- light +ratios even on the order of a few hundred solar units. Due to th eir +proximity they are also the only extragalactic systems wher e in- +dividual stars can be resolved and their velocities measure d of- +fering the possibility to create interesting dynamical mod eling +techniques. +The first estimates of dark matter content in dSph galaxies +were based on a single measurement of the line-of-sight velo city +dispersion of the stars and the application of the virial the orem. +As the samples of the stars with kinematic measurements grew, +it became possible to estimate the profile of the velocity disper- +sion and model it using the Jeans equation (Binney & Tremaine +2008). Since the stars in the galaxy can move on a variety +of orbits, from circular to radial, the degeneracy between the +anisotropy of the orbits and the mass distribution is inhere nt in +this type of modeling. The reason for this lies in the fact tha t +diff erent combinations of these quantities can reproduce the ve - +locity dispersion profile equally well. +A way to overcome this issue, at least partially, is to resort to +higher order line-of-sight velocity moments, such as the ku rto- +sis, and use the corresponding Jeans equations. Since the ku rto- +sis is more sensitive to the velocity anisotropy than to the m ass +distribution, useful constraints can be obtained on both. Still, the +method requires large kinematic samples to estimate the velocity moments reliably and some assumption on the functional form +of the anisotropy (Łokas 2002; Łokas et al. 2005). +The Schwarzschild modeling technique (Schwarzschild +1979) o ff ers a diff erent approach to estimate the properties of +dSph galaxies without prior assumptions on the type of orbits. +It relies on building a galaxy model out of a set of best-fittin g +orbits probed in the range of energy and angular momenta. In +this method, the anisotropy of the stellar orbits comes out a s a +result of the modeling in the same way as the density profile. A l- +though it has been originally developed for large elliptica l galax- +ies (van der Marel et al. 1998; Valluri et al. 2004; Gebhardt e t al. +2015), it has recently been adopted for use on discrete data +characteristic of dSph galaxies and applied to a number of +dwarfs, including Carina, Draco, Fornax, Sculptor, and Sex tans +(Jardel & Gebhardt 2008; Jardel et al. 2013; Breddels & Helmi +2013; Breddels et al. 2013; Kowalczyk et al. 2019). +Many dSph galaxies show signs of the presence of multiple +stellar populations resulting from a few star formation episodes +(Bellazzini et al. 2001; del Pino et al. 2015; Fabrizio et al. 2016; +Pace et al. 2020). This observation o ff ers a way to improve the +modeling methods since, assuming dynamical equilibrium, a ll +populations are supposed to be influenced by the same under- +lying gravitational potential of the galaxy, but they have d if- +ferent distributions so more constraints can be imposed dur ing +the modeling. This approach was first used by Battaglia et al. +(2008) to model the mass distribution in the Sculptor dSph +galaxy. A few attempts have also been made to constrain the +inner slope of the dark matter profile in dSph galaxies using +this technique (Walker & Peñarrubia 2011; Amorisco & Evans +2012; Hayashi et al. 2018) in order to resolve the so-called c usp- +core problem. It has been shown to be diffi cult, however, due +Article number, page 1 of 12 +A&A proofs: manuscript no. Populations4 +Table 1. Properties of the Illustris galaxy used to create mock data. +Property Value +Subhalo ID 16960 +Number of stellar particles ( N +⋆ ) 70446 +Number of dark matter particles ( N +DM ) 78448 +Stellar mass ( M +⋆ ) 5 . 74 × 10 10 + M + ⊙ +Dark matter mass ( M +DM ) 4 . 91 × 10 11 + M + ⊙ +Mean mass of stellar particles 815808 M + ⊙ +Stellar half-mass radius 9 . 99 kpc +Stellar half-number radius ( r +1/ 2 ) 9.6 kpc +Axis ratio c / a within r +1/ 2 0.907 +Axis ratio b / a within r +1/ 2 0.949 +Triaxiality 0.56 +to the nonsphericity of the dwarfs that introduces biases in such +measurements (Kowalczyk et al. 2013; Genina et al. 2018). +In our recent papers (Kowalczyk et al. 2017, 2018, 2019) we +developed the Schwarzschild technique in the form applicab le to +binned velocity moments of a single tracer and verified its ab il- +ity to reproduce the mass distribution and velocity anisotr opy of +simulated galaxies. We have also studied biases resulting f rom +the nonsphericity of the modeled objects. Later, we applied the +method to model the kinematics of the Fornax dSph galaxy esti- +mating its mass and anisotropy profiles with unprecedented p re- +cision. +In this paper we extend our Schwarzschild modeling tech- +nique to include multiple stellar populations with the aim to +constrain the properties of dSph galaxies even more strongly. +We test our approach on a realistic simulated galaxy formed in +the cosmological context, originating from the Illustris p roject +(Vogelsberger et al. 2014a). Although no precise analogues of +dSph galaxies are available in this simulation because of th e res- +olution, we use a more massive galaxy but with properties oth - +erwise similar to dSphs. The reliability of the modeling doe s not +depend on the particular value of the mass so we believe these +tests to be viable. We do not attempt to constrain the inner da rk +matter density profile (which is poorly resolved anyway) but try +to put tighter limits on the estimates of the mass and anisotr opy +profiles. Finally, we apply the improved method to the availa ble +kinematic data for the distinct stellar populations of the Fornax +dSph. +This paper is organized as follows. In Section 2 we present +the data for the simulated galaxy as well as their splitting into +stellar populations and mock observations along the main axes. +Section 3 contains an overview of our modeling method, the ap - +plication of the method to all stars and to two populations, a nd +a comparison of the results obtained with these two approach es. +The results of the application of the method to the Fornax dSp h +galaxy are presented in Section 4. We discuss our findings and +summarize the paper in Section 5. +2. Mock data +2.1. Selection of the simulated galaxy +In order to test our modeling method on realistic simulated +data, we decided to use a galaxy from the Illustris project +(Vogelsberger et al. 2014a,b; Genel et al. 2014; Nelson et al. +2015), namely the Illustris-1 cosmological simulation. Th is sim- +ulation follows the formation and evolution of galaxies fro m the +early Universe to the present by solving gravity and hydrody - +namics, as well as modeling of star formation, galactic wind s, SFR [M⊙ yr-1] + t [Gyr] 0 4 8 12 16 + 0 2 4 6 8 10 12 +Fig. 1. Star formation rate as a function of the age of the Universe in +the simulated galaxy from the Illustris project used to create mock data. +The black and gray vertical arrows indicate the last mergers which the +galaxy underwent, wet and dry, respectively. +t [Gyr] + Z [Z +⊙] 0 2 4 6 8 10 + 0 1 2 3 4 5 0 2 4 6 + N [102 ] +Fig. 2. Number of stars as a function of their metallicity and time of +formation (the age of the Universe) in the simulated galaxy. The vertical +line indicates the applied split into stellar populations. +magnetic fields, and the feedback from black holes. Although +dwarf galaxies that are of our interest here are not resolved in the +suite, this can be easily overcome with the appropriate choice of +the object and the treatment of data. +As the key properties of dSph galaxy equivalents we iden- +tified: the lack of gas, the lack of a black hole, a low spin, +the stellar mass much smaller than the dark matter mass and a +nearly spherical shape. The last condition was adopted in an at- +tempt to avoid any strong bias introduced by the spherical mo d- +eling of a nonspherical object. Moreover, we required the ga laxy +to possess a significant number of both stellar and dark mat- +ter particles (over 10 5 + ), and a well resolved center. Due to the +large softening scale for dark matter particles in the simulation +( ǫ +DM = 1 . 42 kpc), we looked for an object in which even the +more concentrated stellar population (see Section 2.2) extended +over 43 kpc so that the region a ff ected by the numerical artifacts +was enclosed within 2-3 innermost data bins (we used 20 linea rly +spaced spatial bins, see Section 3.1). +Out of 27345 galaxies listed in the catalog of stellar circu- +larities, angular momenta, and axis ratios published by the Illus- +tris team (Genel et al. 2015) containing subhalos with the stellar +mass larger than 10 9 + M + ⊙ , only a few met our restrictive require- +Article number, page 2 of 12 +K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling +-80-4004080 POPULATION I +[kpc] major POPULATION I +intermediatePOPULATION I + minor + 5.3 5.9 6.5 7.1 7.7 + log(Σ) [M⊙/kpc2 ] +-80-40040 POPULATION II +[kpc] POPULATION IIPOPULATION II +-160-80 0 80 160 + V [km/s] +-80-40040 + -80 -40 0 40 POPULATION II +[kpc] + [kpc] -80 -40 0 40 POPULATION II +[kpc] -80 -40 0 40 80 POPULATION II +[kpc] 0 30 60 90 + σ [km/s] -80-4004080 POPULATION II +[kpc] major POPULATION II +intermediatePOPULATION II + minor + 5.3 5.9 6.5 7.1 7.7 + log(Σ) [M⊙/kpc2 ] +-80-40040 POPULATION II +[kpc] POPULATION IIPOPULATION II + -160-80 0 80 160 + V [km/s] +-80-40040 + -80 -40 0 40 POPULATION II +[kpc] + [kpc] -80 -40 0 40POPULATION II +[kpc] -80 -40 0 40 80POPULATION II + [kpc] 0 30 60 90 + σ [km/s] +Fig. 3. Maps of the projected stellar density, mean stellar velocit y, and stellar velocity dispersion (in rows) for two stellar populations: the metal- +rich population I (left-hand side panels) and the metal-poor population II (right-hand side), and observations along t he principal axes determined +for all stars (in columns, along the major, the intermediate, and the minor axis, respectively). +-1-0.5 0 0.5 1 + 1 10 100β(r) + r [kpc] +-1-0.5 0 0.5 1 + 0 10 20 30 40 50β(r) + r [kpc]all stars +pop I +pop II 40 60 80 100 120 + 1 10 100σr(r) + r [kpc] + 40 60 80 100 120 + 0 10 20 30 40 50σr(r) + r [kpc] 40 60 80 100 120 + 1 10 100σt(r) + r [kpc] + 40 60 80 100 120 + 0 10 20 30 40 50σt(r) + r [kpc] +Fig. 4. Profiles of the velocity anisotropy parameter, radial velocity dispersion, and tangential velocity dispersion (in consecutive columns) calcu- +lated from all stars (in red), including only population I (i n orange), and only population II (in blue). The upper row shows the profiles using the +logarithmic distance scale and reaching the outskirts of the galaxy whereas the bottom row presents in the linear scale only the radial range used +in the modeling. +ments. We decided to use a galaxy labeled as subhalo 16960. +All the relevant properties of the galaxy are given in Table 1, +including numbers of particles and total masses for both com po- +nents, and details on the shape of the stellar component: the axis +ratios minor to major (shortest to longest) c / a , intermediate to +major b / a , and the triaxiality parameter T = ( a 2 + − b 2 + ) / ( a 2 + − c 2 + ). +We distinguish between the half-mass radius provided in the Il- +lustris database and the half-number radius r +1/ 2 , which we use for further calculations in this paper. The diff erence between the +two comes from a small gradient in the stellar mass-to-light ratio +with the distance from the galactic center. Since in our appr oach +we treat stars as equal-mass particles and refer to number de n- +sities (multiplied by the mean mass of a stellar particle whe n +needed), the application of the half-number radius is more self- +consistent. + Article number, page 3 of 12 +A&A proofs: manuscript no. Populations4 +10 -310 -110 110 3 + 10 100n⋆(R) [kpc-2] + R [kpc]major + 10 100 +R [kpc]intermediate + 10 100 +R [kpc]minor +all stars +pop I +pop II +Fig. 5. Surface number density profiles of the stellar data samples f or the simulated galaxy observed along di ff erent lines of sight (from the left to +the right). Di ff erent lines show profiles for all available stars (in red), the metal-rich population I (in orange), and the metal-poor population II (in +blue). Thin vertical lines indicate r +0 (see text) and the outer boundary of the spectroscopic data. +2.2. Splitting the stars into populations +Our chosen galaxy shows a complex formation history under- +going multiple mergers which result in extended star formation +with a few star formation bursts. The last wet merger, that is a +merger with an object containing gas, happens at 6.9 Gyr from +the beginning of the simulation, whereas the last dry merger (no +gas transfer) at 12.1 Gyr, giving the galaxy enough time to regain +dynamical equilibrium. We present the star formation rate ( SFR) +as a function of time (the age of the Universe) in Fig. 1, where +these last mergers are indicated with black and gray vertica l ar- +rows. In Fig. 2 we show the distribution of stars as a function of +their metallicity (in solar units) and the time of formation . In or- +der to divide the stellar sample into two populations we cut it in +half based on the metallicity index of each stellar particle . This +split is indicated in Fig. 2 with the vertical line. With satisfying +accuracy it separates the stars born before and after 4 Gyr since +the start of the simulation, which corresponds to the formation +time before and after the end of the second major star burst, a s +shown in Fig. 1. We refer to the metal-rich stars as populatio n I +and to the metal-poor as population II, following the common ly +used nomenclature in astronomy. +In Fig. 3 we present maps of the projected stellar mass den- +sity, line-of-sight velocity, and line-of-sight velocity dispersion +for both populations obtained by projecting the galaxy alon g its +principal axes. The orientation was determined from the ine r- +tia tensor calculated from all stars within the half-number radius +r +1/ 2 and therefore is the same in both panels. The two popula- +tions diff er significantly in the spatial distribution and kinemat- +ics with the metal-rich (considered to be younger) population I +being more concentrated but having lower central velocity d is- +persion. Both populations show a weak rotation signal at large +distances from the center. +The velocity anisotropy parameter β( r ) = 1 − ( σ2 +θ + +σ2 +φ ) / (2 σ2 +r ), where σ +i are velocity dispersions in spherical coordi- +nates (Binney & Tremaine 2008), describes the orbital struc ture +of galaxies. It is one of the most important dynamical proper ties +of bound systems which cannot be inferred directly from ob- +servations and has to be recovered by dynamical modeling. Th e +profiles of the anisotropy parameter β as well as the radial σ +r +and tangential σ +t = [( σ2 +θ + σ2 +φ ) / 2] 1/ 2 + velocity dispersions for our +simulated galaxy are presented in the consecutive columns o f Fig. 4. Throughout the paper we use red, orange, and blue colo rs +to indicate values calculated or recovered for all stars, po pula- +tion I, and population II, respectively. The two rows of the figure +show the behavior of the parameters at diff erent scales. The top +row plots the profiles with the distance from the center of the +galaxy in the logarithmic scale and shows the drop of anisotr opy +at the outer edges of the object. The bottom row uses the linea r +distance scale and focuses on the main body of the galaxy. +Figure 5 shows the surface number density profiles of the +stars as measured in diff erent directions. We can see that while +the diff erent subsamples have quite distinguishable profiles, the +diff erence between the lines of sight is small because the galaxy +is close to spherical. +2.3. Observables +We generated nine sets of mock data by observing all stars and +each population separately along the principal axes determ ined +from all stars. For the observables to be used in the modeling we +divided the stars into 20 bins spaced linearly in distance fr om +the center of the galaxy up to 50 kpc, measuring the fraction +of the total number of stars and the 2nd, 3rd, and 4th proper +moments of the line-of-sight velocity defined in Eq. 8 and 9 +of Kowalczyk et al. (2018). The profiles of these quantities a re +shown in consecutive rows in Fig. 6. Columns correspond to dif- +ferent lines of sight, from the left to the right: along the ma jor, +intermediate, and minor axis of the galaxy. For clarity of th e fig- +ure, in each panel we indicate only the error bars for one of th e +data sets. However, as the number of stars in a sample remains +roughly constant between the lines of sight, the error bars a re +very similar among the panels in a given row. +Although in our previous studies of the reliability of +the Schwarzschild modeling and its applications to real data +(Kowalczyk et al. 2017, 2018, 2019) we approximated the den- +sity profile of the tracer with the Sérsic formula, we found th at it +does not provide a good approximation of the data for the simu - +lated galaxy considered here. We therefore fit the projected den- +sity profile with the King formula (King 1962) +I ( R ) = I + 0  + + + + + + 1 +p + 1 + ( R / R + c ) 2 − 1 +p + 1 + ( R + t / R + c ) 2  + + + + + +2 + , (1) +Article number, page 4 of 12 +K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling +10 -310 -210 -110 0 + 0 10 20 30 40M(R) + R [kpc]major + 0 10 20 30 40 +R [kpc]intermediate + 0 10 20 30 40 50 +R [kpc]minor +36912 + 0 10 20 30 40m2(R)[103 (km s-1)2 ] + R [kpc] 0 10 20 30 40 +R [kpc] 0 10 20 30 40 50 +R [kpc] +-10-50510 + 0 10 20 30 40m3(R)[104 (km s-1)3 ] + R [kpc] 0 10 20 30 40 +R [kpc] 0 10 20 30 40 50 +R [kpc] +01234 + 0 10 20 30 40m4(R)[108 (km s-1)4 ] + R [kpc] 0 10 20 30 40 +R [kpc] 0 10 20 30 40 50 +R [kpc]all stars +pop I +pop II +Fig. 6. Observables used in our Schwarzschild modeling scheme of the simulated galaxy. In rows: the fraction of the total number of stars, 2nd, +3rd, and 4th velocity moment. In columns: mock data from the simulated galaxy along the major, intermediate, and minor axis. In red we present +the values obtained for all stars whereas in orange and blue t hose for populations I and II, respectively. For clarity of t he figure, in each panel we +indicate only the error bars for one of the data sets. +where I + 0 , R + c , and R + t are the model parameters. The profile can +be analytically deprojected to obtain the 3D density +ρ( r ) = ρ +0 +z2 " + 1 +z arccos( z) − p + 1 − z2 # + , (2) +where +ρ +0 = I + 0 +π R + c [1 + ( R + t / R + c ) 2 + ] 3/ 2 (3) +and +z = s + r 2 + + R 2 +c +R 2 +c + R 2 +t . (4) 3. Schwarzschild modeling +In this section we briefly present our modeling method and its +application to the data sets derived for all stars and the two pop- +ulations of the simulated galaxy separately. In both cases o ur +aim was to recover the profiles of the total mass and the velocity +anisotropy. +3.1. Overview of the method +We follow the approach introduced in Kowalczyk et al. (2018) , +namely we model the total mass profile with the mass-to-light +ratio Υ varying with radius: +log Υ ( r ) = ( + log( Υ + 0 ) r ≤ r +0 +a (log r − log r +0 ) c + + log( Υ + 0 ) r > r +0 (5) +Article number, page 5 of 12 +A&A proofs: manuscript no. Populations4 + 1 2 3 + 0 0.5 1 1 2 3 ALL + Υ +0ac + 1 2 3 + 0 0.5 1 1 2 3 POPULATIONS + Υ +0ac + 10 100 + χ2 + 1 2 3 + 0 0.5 1 1 2 3 POP I + Υ +0ac + 1 2 3 + 0 0.5 1 1 2 3 POP II + Υ +0ac + 10 100 + χ2 +Fig. 7. Absolute values of χ2 + obtained from the fits of three data sets: all stars (top left panel), population I (bottom left), and population II (bottom +right) for the observations along the major axis of the simul ated galaxy. The results for the modeling of two populations (top right) were obtained +as an algebraic sum of values for populations I and II. To avoi d large numbers in the figure, Υ +0 was divided by the mean mass of a stellar particle. +where r is the distance from the center of the galaxy, r +0 is a +constant, while Υ + 0 , a , and c are the parameters of a model. We +have assumed log r +0 = 0 . 33 which corresponds to three softening +scales for stellar particles in the Illustris simulation. +We probed the parameter a ∈ [0 : 1 . 3] with a step ∆a = 0 . 04 +and c ∈ [1 . 1 : 2 . 9] with a step ∆c = 0 . 2, imposing the require- +ment on the total density profile to be monotonically decreasing +with radius. For each set of parameters and for each line of sight +we generated 1200 orbits using 100 values of energy (expressed +with the radius of a circular orbit) spaced logarithmically and +12 values of the relative angular momentum spaced linearly. The +outer radius of the orbit library, that is the apocenter of th e most +extended orbit, was set to r +out = 165 kpc in order to cover over +0.999 of the total stellar mass based on the fitted King profile +parameters. +We fit the kinematics weighted with the fraction of mass with +the constrained least squares algorithm where diff erent values +of Υ + 0 were obtained with a simple transformation of velocities +given by Eq. 12, 13, and 15 in Kowalczyk et al. (2018). In or- +der to smooth out the numerical artifacts, the three-dimensional +χ2 + spaces were then interpolated with 12-order polynomials ( ∼ a 4 + c 4 + Υ 4 +0 ) that were further used to determine the global min- +imums (identified as the best-fitting models) and 1, 2, 3 σ con- +fidence levels which for three parameters correspond to ∆χ2 + = +3 . 53 , 8 . 02 , 14 . 2 (Press et al. 1992). +3.2. Application to mock data +In the following we present the direct and inferred results o f +the Schwarzschild modeling of the data sets described in Sec - +tion 2.3. +First, Fig. 7 shows the distribution of the absolute values o f +the χ2 + as a function of three parameters of the mass-to-light ra- +tio. In order to avoid unnecessary repetitions, we include o nly +the plot for the mock data obtained by observing the Illustris +galaxy along its major axis as the others are qualitatively similar. +The four panels refer to fits for all stars (top left), the meta l-rich +population I (bottom left), the metal-poor population II (b ottom +right), and the one named "populations" (top right) which is the +algebraic sum of values for both populations. +As our parametrization of the mass-to-light ratio is not intu- +itive we present its profiles explicitly in the first rows of th e left- +Article number, page 6 of 12 +K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling +10 610 710 810 910 10 + 10 100 ALL +Υ(r) [M⊙/L⊙] + r [kpc]major + 10 100ALL +r [kpc]intermediate + 10 100ALL + r [kpc]minor +3 σ +2 σ +1 σ +best model +data +10 410 610 8 + 10 100 ALL +νtot(r) [M⊙ kpc-3] + r [kpc] 10 100ALL +r [kpc] 10 100ALL + r [kpc] +10 1010 1110 12 + 10 100 ALL +Mtot(r) [M⊙] + r [kpc] 10 100ALL +r [kpc] 10 100ALL + r [kpc] +-2-101 + 0 10 20 30 40 ALL +β(r) + r [kpc] 0 10 20 30 40ALL +r [kpc] 0 10 20 30 40 50ALL + r [kpc] 10 610 710 810 910 10 + 10 100 POPULATIONS +Υ(r) [M⊙/L⊙] + r [kpc]major + 10 100POPULATIONS +r [kpc]intermediate + 10 100POPULATIONS + r [kpc]minor +3 σ +2 σ +1 σ +best model +data +10 410 610 8 + 10 100 POPULATIONS +νtot(r) [M⊙ kpc-3] + r [kpc] 10 100POPULATIONS +r [kpc] 10 100POPULATIONS + r [kpc] +10 1010 1110 12 + 10 100 POPULATIONS +Mtot(r) [M⊙] + r [kpc] 10 100POPULATIONS +r [kpc] 10 100POPULATIONS + r [kpc] +-2-101 + 0 10 20 30 40 POPULATIONS +β(r) + r [kpc] 0 10 20 30 40POPULATIONS +r [kpc] 0 10 20 30 40 50POPULATIONS + r [kpc] +Fig. 8. Left-hand side: results of Schwarzschild modeling of three mock data sets obtained by observing the simulated galaxy al ong the principal +axes. In rows: derived mass-to-light ratio, total density, total mass, and anisotropy parameter. In columns: observations along the major, interme- +diate, and minor axis, respectively. Green lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the +1, 2, and 3 σ confidence levels. The true values are presented as black lines. Thin vertical lines mark the values of r +0 and the outer range of the +data sets, from left to right. Right-hand side: same as left but for the fit of two stellar populations. +and right-hand side panels of Fig. 8 for the results obtained for +all stars and the populations, respectively. We further calculate +the total density (second rows) and the total mass content (third +rows). We include the obtained orbit anisotropy within the m od- +eled range in the bottom rows. The consecutive columns prese nt +the results for the observations along the major, intermediate, +and minor axis. Green lines indicate values for the best-fit m od- +els whereas the colored areas of decreasing intensity corre spond +to 1, 2, and 3 σ confidence regions obtained as extreme values al- +lowed by the models with χ2 + within a given region. In each panel +the true values from the simulation are presented with black lines +while thin vertical lines mark the values of r +0 and the outer range +of the data sets beyond which the reliability of results drop s sig- +nificantly. The true mass-to-light ratio profile was obtaine d by +dividing the total mass by the fitted King profiles, therefore the +drop at 100 kpc is the numerical artifact occurring at the ver y +outskirts of the galaxy. +Whereas in the right-hand side panels of Fig. 8 the resulting +anisotropy is obtained from the fit of all stars and uses only the +location of global minimum and confidence levels from two pop - +ulations (as in the top right panel of Fig. 7), in Fig. 9 we present +another method of calculating the anisotropy. In the second and +third row we show the derived profiles for population I and II +separately and combine them as stellar mass weighted averag e +in the top row. As in previous figures, three columns refer to the +diff erent lines of sight whereas the narrow fourth one shows the +behavior of the true profiles outside the modeled range which , as +we noticed in our previous studies, in a limited way influence s +the results. Such an impact is understandable since the star s at +larger distances from the center are still included in the line-of- +sight measurements. 3.3. Comparison of fitting results +The main strength of the two populations method comes from +tracing the underlying gravitational potential at diff erent scales. +As can be seen in the bottom panels of Fig. 7, population I, which +is more concentrated, is also more sensitive to Υ + 0 , but gives +weaker constraints on a or c . On the other hand, population II +attempts to reproduce the total mass content at larger dista nces +as well, therefore showing stronger coupling between the pa ram- +eters. +The global minimums of the χ2 + distributions for both ap- +proaches, that is modeling one and two populations, which we +identify as the best-fitting models, closely coincide showing that +there is no internal bias in the improved method. However, sig- +nificant diff erences can be observed when comparing the confi- +dence levels, mainly at 1 and 3 σ. Namely, we find that using +two populations, the constraints we obtain on the density an d +anisotropy profile are much stronger. +Additionally, the more accurate method allows us to study +other e ff ects and biases, for example the consequences of the +nonsphericity of the modeled object. Whereas for the fit of all +stars the true values of the density, mass, and anisotropy pr ofiles +are contained within 1 σ confidence regions, the results for the +populations are more or less biased depending on the axis. Th ey +are well reproduced for the observation along the intermediate +axis, for which the e ff ects of nonsphericity seem to cancel out, +and more biased for the remaining lines of sight. We notice a +trend from under- to overestimation of the anisotropy when g o- +ing from the major to the minor axis. + Article number, page 7 of 12 +A&A proofs: manuscript no. Populations4 +-101 + 0 10 20 30 40POP I + POP IIβ(r) + r [kpc]major + 0 10 20 30 40 +r [kpc]intermediate + 0 10 20 30 40 +r [kpc]minor + 50 60 70 80 +-101 + 0 10 20 30 40POP Iβ(r) + r [kpc] 0 10 20 30 40 +r [kpc] 0 10 20 30 40 +r [kpc] 50 60 70 80 +-101 + 0 10 20 30 40POP IIβ(r) + r [kpc] 0 10 20 30 40 +r [kpc] 0 10 20 30 40 +r [kpc] 50 60 70 80 +data +best model + 1σ +2σ +3σ +Fig. 9. Profiles of the anisotropy parameter obtained with the Schwarzschild modeling of two stellar populations of the simulat ed galaxy. In rows: +results for all stars (calculated as the superposition of two populations), population I, and population II. Colors fol low the convention used in +previous figures. In columns: observations along the major, intermediate, and minor axis. The last narrower column shows the data (black lines) +outside the modeled radial range. Color lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the 1, +2, and 3 σ confidence regions. +4. Modeling Fornax dSph +In this section we present the application of our Schwarzsch ild +modeling scheme to the observational data for the Fornax dSp h +galaxy obtained by del Pino et al. (2015) and del Pino et al. +(2017). This study is a follow-up of the work of Kowalczyk et a l. +(2019) and can be directly compared to the results presented +there. Moreover, we refer the reader to these previous publica- +tions for details on the origin of data and our procedures use d +for cleaning the spectroscopic sample. +Similarly to the approach introduced in Section 2.2, we di- +vided all available stars into two equal-size populations b ased on +their metallicity and then cross-correlated the samples with the +data used in Kowalczyk et al. (2019). The metallicity histog ram +of the final spectroscopic sample is shown in Fig. 10. Additio n- +ally, we color-coded each bin with the population it has been +assigned to, namely orange or blue for population I or II. Inter- +estingly, the case of Fornax is similar to our simulated gala xy +as the split at [Fe / H] = − 1 also captures an important feature +of the object’s star formation history, separating stars in to sub- +samples older and younger than 6 Gyr, as shown in Fig. 12 of +del Pino et al. (2015) and Fig. 8 of del Pino et al. (2017). The +numbers of stars contained in the samples of all stars, popula- +tion I, and population II are given in Table 2, where the indic es +"phot" and "spec" refer to the photometric and kinematic sam - +ples. The sum of stars in the populations is lower than in the sample of all stars since only stars with reliable measureme nts +of metallicity could be included. +N + [Fe/H]pop I +pop II + 0 20 40 60 80 100 +-2.5 -2 -1.5 -1 -0.5 0 +Fig. 10. Metallicity histogram of the final spectroscopic sample used in +the modeling of two stellar populations in the Fornax dSph. E ach bin is +color-coded according to the population it has been assigned to, orange +or blue for population I and II, respectively. +As we have shown in our earlier work, the light profile of the +Fornax dSph can be well reproduced with the three-parameter +Article number, page 8 of 12 +K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling +Table 2. Properties of the data samples for the Fornax dSph. +Property ALL POP I POP II +Number of stars ( N +phot ) 65 797 14 882 49 205 +Number of stars ( N +spec ) 3286 1136 1151 +Stars within 1.8 kpc 3268 1134 1130 +Fitted normalization ( N +0 ) [ × 10 4 + ] 6.95 1.81 5.45 +Sérsic radius ( R + S ) [kpc] 0.454 0.429 0.420 +Sérsic parameter ( m ) 0.808 0.807 0.898 +102103104105 + 0.2 0.5 2 0.1 1n⋆(R) [kpc-2] + R [kpc]all stars +popI +popII +Fig. 11. Surface number density profiles of the photometric data sam- +ples for the Fornax dSph: all available stars (in red), the metal-rich pop- +ulation I (in orange), and the metal-poor population II (in blue). Thin +vertical lines indicate r +0 (see text) and the outer boundary of the spec- +troscopic data. +Sérsic formula (Sérsic 1968). The profiles of number density for +all stars and both populations together with the best-fittin g Sérsic +profiles are presented in Fig. 11. The colors follow the conve n- +tion introduced in previous sections. Thin vertical lines indicate +the innermost data point for the light profile for all stars an d +the outer boundary of the kinematic sample. The former, set a t +log r = − 0 . 16, is also used as the minimum of the mass-to-light +ratio profile ( r +0 in Eq. 5). The fitted parameters of the profiles, +that is the normalization N +0 , the Sérsic radius R + S , and the Sérsic +parameter m , are included in the second part of Table 2. +Figure 12 presents the profiles of the observables used in the +Schwarzschild modeling: the fraction of stars and the 2nd, 3 rd, +and 4th velocity moments (top to bottom) for the three data sa m- +ples: all stars, population I, and population II (in red, ora nge, and +blue, respectively). The error bars indicate 1 σ sampling errors. +The parameter space for Υ ( r ) has been probed as follows: +a ∈ [0 : 1 . 85] with a step ∆a = 0 . 05 and c ∈ [1 . 2 : 6] with a +step ∆c = 0 . 2. We point out that in Kowalczyk et al. (2019) the +parameter c was fixed at c = 3 and now we fit it as a free pa- +rameter. As for the mock data in Section 3.2, diff erent values of +Υ + 0 were obtained with the transformation of velocity moments +within the χ2 + fitting routine. The values of ∆χ2 + for all stars and +the populations are shown in the two panels of Fig. 13 (left an d +right-hand side, respectively). Due to the dense coverage o f the +grid, we decided to include only the values within 3 σ from the +fitted minimums (see Section 3.1). +The profiles of the mass-to-light ratio, total density, tota l +mass, and velocity anisotropy resulting from the χ2 + distributions +are presented in the consecutive rows of Fig. 14. The anisotr opy +profile for the populations is based on the fit of all stars but u sing 0 0.05 0.1 0.15 0.2 0.25 + 0 0.4 0.8 1.2 1.6M(R) + R [kpc]all stars +pop I +pop II +04080120160200 + 0 0.4 0.8 1.2 1.6m2(R)[(km s-1)2 ] + R [kpc] +-16-80816 + 0 0.4 0.8 1.2 1.6m3(R)[102 (km s-1)3 ] + R [kpc] +0481216 + 0 0.4 0.8 1.2 1.6m4(R)[104 (km s-1)4 ] + R [kpc] +Fig. 12. Observables of the Fornax dSph used in our Schwarzschild +modeling scheme. In rows: the fraction of the total number of stars, the +2nd, 3rd, and 4th velocity moment. In red we present the values obtained +for all stars whereas in orange and blue those for populations I and II, +respectively. +the confidence levels on Υ from the fit of two populations. Green +lines indicate the values for the best-fitting models wherea s the +colored areas of decreasing intensity show the 1, 2, and 3 σ con- +fidence regions. Additionally, with black dashed lines we in clude +the results from Kowalczyk et al. (2019) for comparison. +As a result of freeing the steepness of the mass-to-light +ratio profile (parameter c ) with respect to the previous study +Article number, page 9 of 12 +A&A proofs: manuscript no. Populations4 + 0 0.5 1 1.5 + 0 + 0.5 + 1 + 1.5 2 3 4 5 6 ALL +Υ +0 + ac + 0 0.5 1 1.5 + 0 + 0.5 + 1 + 1.5 2 3 4 5 6 POPULATIONS +Υ +0 + ac + 0 3 6 9 12 + χ2 -χ2 min +Fig. 13. Values of χ2 + relative to the fitted minimum within the range of 3 σ confidence level for all stars (left panel) and for the populations (right +panel) for the Fornax dSph. +(Kowalczyk et al. 2019), we obtained higher estimates of the en- +closed total mass at larger radii. In particular, for the mass en- +closed within 1.8 kpc we get M +all ( < 1 . 8 kpc) = 3 . 87 + 1. 48 +− 1. 56 × 10 8 +M + ⊙ from the fit for all stars and M +pops ( < 1 . 8 kpc) = 4 . 71 + 0. 87 +− 1. 13 × +10 8 + M + ⊙ from the fit of populations, while previously we had +M +old ( < 1 . 8 kpc) = 3 . 7 + 1. 4 +− 1. 3 × 10 8 + M + ⊙ . +Interestingly, despite the significant shift of the positio n of +χ2 +min (to c = 4 . 2 for all stars and 3.6 for populations), the ob- +tained profile of the anisotropy parameter remains decreasing or +flat for all stars but changes to increasing from 0 to 0.5 for th e +populations. Nevertheless, even in the latter case the prev ious +result agrees with the new finding within 1 σ. +The detailed analysis of the anisotropy is shown in Fig. 15 +where the middle and bottom panels present the profiles ob- +tained for each population separately. We notice that the pr ofile +for population I is decreasing or has a local minimum whereas +for population II is increasing (from − 0 . 25 to 0.5 for the best- +fitting model). Since population I is more concentrated, the last +bins contain very few stars, which limits their credibility. The +top panel of Fig. 15 presents the anisotropy of all stars calc u- +lated as a weighted superposition of two populations. With such +approach we still obtain the increasing profile (from 0 to 0.5 ) but +the previous result agrees with it only within 2 σ. +Since Fornax dSph is significantly elongated with the pro- +jected ellipticity of ǫ = 0 . 30 ± 0 . 01 (Irwin & Hatzidimitriou +1995), we anticipate some bias in the obtained results cause d +by the spherically symmetric modeling. Kowalczyk et al. (20 18) +studied such bias in an axisymmetric simulated object qualita- +tively similar to Fornax and identified diff erences in the system- +atic errors depending on whether the galaxy was observed alo ng +its major or minor axis. Assuming that Fornax is observed alo ng +the line of sight in between these extremes, we expect the total +mass profile to be slightly overestimated and the anisotropy to be +underestimated, further strengthening the likelihood of the real +anisotropy to be radial and its profile to be growing with radius +with respect to the results of Kowalczyk et al. (2019). +Both constant (like for our population I) and growing (pop- +ulation II) anisotropy profiles can arise from biased modeling of the real growing profile by observing an object along the +minor and major axis, respectively. However, for the bias to +occur in two populations presented here, their inner orienta- +tions would need to be opposite. Since such morphological fe a- +tures are not supported by the photometric studies of Fornax +(del Pino et al. 2015; Wang et al. 2019) which rather find a good +spatial alignment between the stellar populations, we conc lude +that the anisotropy profiles of the two populations modeled in +this work are indeed significantly distinct. +Finally, it is worth noticing that the so-called mass-follows- +light model, that is the one following from the assumption th at +the total density traces the stellar distribution, is no lon ger sup- +ported by the fit of the populations. With our parametrizatio n, +the mass-follows-light model corresponds to a = 0 and whereas +it is enclosed within 3 σ for the fit of all stars, as was the case +in Kowalczyk et al. (2019), the allowed values for the improved +method are much larger, as demonstrated by the right panel of +Fig. 13. +5. Summary and discussion +Building on the previously created implementation of the +Schwarzschild orbit superposition method focused on modeling +dSph galaxies of the Local Group (Kowalczyk et al. 2017, 2018, +2019), we improved our tool by introducing multiple stellar pop- +ulations. Such an improvement is desirable and justified sin ce +many of the dwarfs show signs of multiple star formation bursts +or extended star formation episodes. As the diff erent populations +trace the common underlying gravitational potential, one m ay +expect a significant improvement in the estimates of not only the +total mass content but also the orbit anisotropy since this r obust +modeling technique reproduces the anisotropy as a by-produ ct +of the modeling rather than taking it as an assumption. +We have tested our hypothesis by modeling mock data gener- +ated from a galaxy formed in the Illustris simulation. Due to the +limitations of the resolution, we chose a galaxy of mass a few or- +ders of magnitude larger than the estimated masses of classical +dwarfs. Still, the galaxy possessed appropriate qualitative char- +acteristics, such as the lack of gas and an almost spherical shape, +Article number, page 10 of 12 +K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling +101103105 + 0.1 1Υ(r) [M⊙/L⊙] + r [kpc]ALL + 0.1 1 +r [kpc]POPULATIONS +3σ +2σ +1σ +best model +K19 +104106108 + 0.1 1νtot(r) [M⊙ kpc-3] + r [kpc] 0.1 1 +r [kpc] +105107109 + 0.1 1Mtot(r) [M⊙] + r [kpc] 0.1 1 +r [kpc] +-3-2-101 + 0 0.4 0.8 1.2 1.6β(r) + r [kpc] 0 0.4 0.8 1.2 1.6 +r [kpc] +Fig. 14. Results of Schwarzschild modeling of the Fornax dSph. +In rows: derived mass-to-light ratio, total density, total mass, and +anisotropy parameter. In columns: results for all stars and the popula- +tions, respectively. Green lines indicate the values for the best-fit models +whereas the colored areas of decreasing intensity show the 1, 2, and 3 σ +confidence regions. The best-fitting values obtained by Kowalczyk et al. +(2019) are shown with black dashed lines. +that made it a good test bed for modeling techniques applica- +ble to dSph galaxies. We applied our approach to all data and +to two stellar populations separately, comparing the accur acy of +the obtained results. Although the addition of the second tr acer +seemingly increases the number of constraints twice, the in cre- +ment is somewhat compromised by the sampling errors since th e +number of stars in each sample is then reduced. Still, we foun d +strong improvements in the accuracy of the method when us- +ing two populations. The results of the modeling show that th e +density and velocity anisotropy profiles are more strongly c on- +strained, most importantly at the 3 σ level, that is the range of +allowed values is much narrower. +Similarly to the conclusions of Kowalczyk et al. (2018) who +explored the e ff ects of nonsphericity using large and small +data samples, the comparison of results presented in the lef t- +and right-hand side panels of Fig. 8 suggests that the improved +method using two stellar populations gives more precise but less +accurate outcome. However, in both studies the apparent dete- +rioration of the reliability is a consequence of modeling of a +nonspherical object. In both cases, a simpler approach (muc h +smaller data samples or using one stellar population) resulted -2-101 + 0 0.4 0.8 1.2 1.6POP I + POP IIβ(r) + r [kpc] +-2-101 + 0 0.4 0.8 1.2 1.6POP Iβ(r) + r [kpc] +-2-101 + 0 0.4 0.8 1.2 1.6POP IIβ(r) + r [kpc] +best model +1 σ +2 σ 3 σ +K19 +Fig. 15. Profiles of the anisotropy parameter obtained with the +Schwarzschild modeling of two stellar populations for the Fornax dSph. +In rows: results for all stars (calculated as the superposition of two pop- +ulations), population I, and population II. Color lines indicate values +for the best-fit models whereas the colored areas of decreasi ng intensity +show the 1, 2, and 3 σ confidence regions. The dashed black line shows +the result from Kowalczyk et al. (2019) for comparison. +in larger final uncertainties, usually containing the true values +within 1 σ confidence region. On the other hand, the improved +methods exhibit substantially reduced uncertainties, hig hlighting +the underlying bias. +Our method parametrizes the total mass content with the +mass-to-light ratio varying with radius as a power-law in th e log- +log scale. We made two main changes with respect to our previ- +ous work: we added a third parameter c controlling the steepness +of the mass-to-light ratio profile (previously fixed at the va lue of +3) and allowed for diff erent stellar density profiles (previously +only Sérsic, now also King). These changes are of course cou- +pled since diff erent density profiles require diff erent exponents to +reproduce the same mass profile. It is visible also in our resu lts +since the King profile applied in the simulated galaxy gave us +values of c lower than 3. Nevertheless, we decided to use diff er- +ent density profiles to make our method more general and appli- +cable to objects, such as our Illustris galaxy, for which the Sérsic +formula does not provide a good approximation of the density +distribution. +Finally, we applied the improved method to the data for the +Fornax dSph galaxy. Due to the addition of another free param - +eter in our functional form for the mass-to-light ratio, our re- +sults for modeling all stars are slightly diff erent from the ones +Article number, page 11 of 12 +A&A proofs: manuscript no. Populations4 +obtained in Kowalczyk et al. (2019). However, in terms of the +total density and mass distribution the estimates obtained here +agree very well with those earlier results in the range cover ed +by the data. Therefore, the detailed comparison with other e sti- +mates from the literature presented in Kowalczyk et al. (201 9) is +still valid and we do not repeat it here. +A more significant diff erence with respect to these previous +estimates is seen in the results of modeling two populations in +Fornax. In this case we find the anisotropy to be slightly incr eas- +ing rather than decreasing with radius and, most importantly, the +confidence regions for this parameter, as well as for the den- +sity, are much narrower. We were thus able to obtain tighter c on- +straints on the properties of Fornax, which means that the im - +proved method is successful. For the first time, we were also a ble +to deduce the velocity anisotropy profiles for each of the pop ula- +tions separately. We found that the more concentrated, meta l-rich +population I has a decreasing anisotropy profile while the mo re +extended, metal-poor population II has the anisotropy incr easing +with radius. This finding may partially explain the large spr ead +of the anisotropy values obtained in the literature and summ a- +rized in Table 2 and 3 of Kowalczyk et al. (2019), which were +often based on modeling subsamples of our spectroscopic data +set. + For both studied objects we split the stars into two popula- +tions by dividing them in half based on their metallicity, Z (in +solar units), for the Illustris galaxy and [Fe / H] for Fornax. Such +a method is approximate but justified. Both galaxies have com - +plex star formation history with multiple star formation bu rsts, as +demonstrated by Fig. 1 in this work and Fig. 7 in del Pino et al. +(2013), producing multiple stellar populations which cann ot be +easily tracked as the metallicity is a good but not perfect pr oxy +for the stellar age. Moreover, the metallicity histograms f or both +objects are approximately unimodal not allowing for a conve - +nient separation. More refined methods of division have been +suggested in the literature, for example in the form of the likeli- +hood function based on the position, velocity, and metallic ity in- +dex (Walker & Peñarrubia 2011). However, the likelihood fun c- +tion requires many assumptions which introduce additional un- +certainties into the treatment of the data. On the other hand , our +approach ensures the maximization of each sample (and there - +fore minimization of sampling errors) while capturing the im- +portant features of the star formation history. +Further improvements to the Schwarzschild modeling +method are certainly possible. One way to proceed would be to +include the modeling of the proper motions of the stars. For n ow, +measurements of transverse velocities are available only f or the +brightest stars in dSph galaxies, but even small samples of this +type could provide further constraints on the models, as dem on- +strated by Strigari et al. (2007) and Massari et al. (2020). +Acknowledgements. We are grateful to Andrés del Pino for providing the data for +the Fornax dSph and to the Illustris team for making their sim ulations publicly +available. Useful comments from the anonymous referee are kindly appreciated. +This research was supported by the Polish National Science Center under grant +2018/28/C/ST9/00529. +References +Amorisco, N. C., & Evans, N. W. 2012, MNRAS, 419, 184 +Battaglia, G., Helmi, A., Tolstoy, E., et al. 2008, ApJ, 681, L13 +Bellazzini, M., Ferraro, F. R., & Pancino, E. 2001, MNRAS, 327, L15 +Binney, J., & Tremaine, S. 2008, Galactic Dynamics, 2nd edn. (Princeton Uni- +versity Press, Princeton) +Breddels, M. A., & Helmi, A. 2013, A&A, 558, A35 +Breddels, M. A., Helmi, A., van den Bosch, R. C. E., van de Ven, G., & Battaglia, +G. 2013, MNRAS, 433, 3173 del Pino, A., Hidalgo, S. L., Aparicio, A., et al. 2013, MNRAS , 433, 1505 +del Pino, A., Aparicio, A., & Hidalgo, S. L. 2015, MNRAS, 454, 3996 +del Pino, A., Aparicio, A., Hidalgo, S. L., & Łokas, E. L. 2017, MNRAS, 465, +3708 +Fabrizio, M., Bono, G., Nonino, M., et al. 2016, ApJ, 830, 126 +Gebhardt, K., Richstone, D., Tremaine, S., et al. 2003, ApJ, 583, 92 +Genel, S., Fall, S. M., Hernquist, L., et al. 2015, ApJ, 804, L 40 +Genel, S., Vogelsberger, M., Springel, V., et al. 2014, MNRA S, 445, 175 +Genina, A., Benitez-Llambay, A., Frenk, C. S., et al. 2018, MNRAS, 474, 1398 +Hayashi, K., Fabrizio, M., Łokas, E. L., et al. 2018, MNRAS, 481, 250 +Irwin, M., & Hatzidimitriou, D. 1995, MNRAS, 277, 1354 +Jardel, J. R., & Gebhardt, K. 2012, ApJ, 746, 89 +Jardel, J. R., Gebhardt, K., Fabricius, M. H., Drory, N., & Williams, M. J. 2013, +ApJ, 763, 91 +King, I. 1962, AJ, 67, 471 +Kowalczyk, K., Łokas, E. L., Kazantzidis, S., & Mayer, L. 2013, MNRAS, 431, +2796 +Kowalczyk, K., Łokas, E. L., & Valluri, M. 2017, MNRAS, 470, 3959 +Kowalczyk, K., Łokas, E. L., & Valluri, M. 2018, MNRAS, 476, 2918 +Kowalczyk, K., del Pino, A., Łokas, E. L., & Valluri, M. 2019, MNRAS, 482, +5241 +Łokas, E. L., 2002, MNRAS, 333, 697 +Łokas, E. L., Mamon, G. A., & Prada, F. 2005, MNRAS, 363, 918 +Massari, D., Helmi, A., Mucciarelli, A. et al. 2020, A&A, 633, A36 +Mateo, M. 1998, ARA&A, 36, 435 +Nelson, D., Pillepich, A., Genel, S., et al. 2015, Astronomy and Computing, 13, +12 +Pace, A. B., Kaplinghat, M., Kirby, E., et al. 2020, MNRAS, 495, 3022 +Press, W. H., Teukolsky, S. A., Vetterling, W. T., & Flannery, B. P. 1992, Numer- +ical Recipes in C, 2nd edn. (Cambridge University Press, Cam bridge) +Schwarzschild, M. 1979, ApJ, 232, 236 +Sérsic, J. L. 1968, Atlas de Galaxias Australes (Observatorio Astronomico, Cor- +doba, Argentina) +Strigari, L. E., Bullock, J. S., & Kaplinghat, M. 2007, ApJ, 657, L1 +Tolstoy, E., Hill, V., & Tosi, M. 2009, ARA&A, 47, 371 +Valluri, M., Merritt, D., & Emsellem, E. 2004, ApJ, 602, 66 +van der Marel, R. P., Cretton, N., de Zeeuw, P. T., & Rix, H.-W. 1998, ApJ, 493, +613 +Vogelsberger, M., Genel, S., Springel, V., et al. 2014a, Nature, 509, 177 +Vogelsberger, M., Genel, S., Springel, V., et al. 2014b, MNRAS, 444, 1518 +Walker, M. G., & Peñarrubia, J. 2011, ApJ, 742, 20 +Wang, M. Y., de Boer, T., Pieres, A., et al. 2019, ApJ, 881, 118 +Article number, page 12 of 12 diff --git a/read/results/playa/2201.00178.txt b/read/results/playa/2201.00178.txt new file mode 100644 index 0000000..83c41dd --- /dev/null +++ b/read/results/playa/2201.00178.txt @@ -0,0 +1,1272 @@ +Draft version January 4, 2022 +Typeset using LA +T +EX default style in AASTeX631 +Imaging the Sun’s near-surface flows using mode-coupling analysis +Prasad Mani , 1 + Chris S. Hanson , 2 + andShravan Hanasoge 1, 2 +1 + Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India +2 + Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE +ABSTRACT +The technique of normal-mode coupling is a powerful tool with which to seismically image non- +axisymmetric phenomena in the Sun. Here we apply mode coupling in the Cartesian approximation to +probe steady, near-surface flows in the Sun. Using Doppler cubes obtained from the Helioseismic and +Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on mode-coupling +measurements to show that the resulting divergence and radial vorticity maps at supergranular length +scales (∼ 30 Mm) near the surface compare extremely well with those obtained using the Local Corre- +lation Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows, +while ≥ 0.8 is obtained for the radial vorticity. +Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662) +1. INTRODUCTION +Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its effect +on solar oscillations (seeChristensen-Dalsgaard2002, for a review). These are resonant normal modes of the Sun, +behaving as standing waves in a cavity bounded by the solar surface and a depth that depends on the wavenumber +of the oscillation. As these waves penetrate the interior, they register information of the properties and dynamics of +the solar interior and return to the surface, where they are observed. The internal structure of the Sun can then be +retrieved through meticulous inversions of these seismic measurements. +Several important flow systems on the Sun have been inferred using various global and local helioseismic methods. +Of those, the most notable global helioseismic results include inferences on the solar differential rotation, through +global mode frequency splitting (Thompson et al.1996;Schou et al.1998), and the resolving the neutrino problem +(Bahcall & Pinsonneault1992). Notable local helioseismic results include imaging of the meridional flow (Giles et al. +1997;Gizon et al.2020) through time-distance helioseismology (Duvall et al.1993), and farside imaging of active +regions (Braun & Lindsey2001) and their near side emergence (Birch et al.2016), through helioseismic holography +(Lindsey & Braun2000). The recent discovery of various inertial waves (Gizon et al.2021), including the equatorial +Rossby wave (L¨optien et al.2018), has been achieved through local helioseismic ring-diagram analysis (Hill1988) and +the non-helioseismic local correlation tracking (LCT,November & Simon1988) of granulation. +In recent years, the use of global mode-coupling helioseismology (Woodard1989;Lavely & Ritzwoller1992) has +received attention, with many studies seeking to validate and demonstrate the importance of such a technique for +investigating numerous solar phenomena. While the derivation of the mode-coupling technique is mathematically +challenging, the data analysis is simple and utilizes all the information registered by the mode. Thus far, global +mode-coupling has been validated through observations of the meridional flow (Vorontsov2011;Woodard et al.2013), +differential rotation (Schad & Roth2020;Kashyap et al.2021), global-scale convection (Woodard2014,2016;Hanasoge +et al.2020;Mani & Hanasoge2021) and Rossby modes (Hanasoge & Mandal2019;Mandal & Hanasoge2020;Mandal +et al.2021). Local mode-coupling analysis in the Cartesian approximation, formulated byWoodard(2006), was +validated byHanson et al.(2021) (hereafter H21) by examining the power-spectrum of supergranular waves and +comparing with previous time-distance studies (Langfellner et al.2018). +prasad.subramanian@tifr.res.inarXiv:2201.00178v1 [astro-ph.SR] 1 Jan 2022 +2 Mani et al. +Normal-mode coupling refers to the concept of expressing solar-oscillation eigenfunctions as a linear weighted combi- +nation of model-eigenfunctions (e.g., Model SChristensen-Dalsgaard2021). The model eigenfunctions form a complete +and orthogonal basis. By design, the model Sun is spherically symmetric, adiabatic, free from rotation, magnetism and +flows. In this state, the oscillations are considered to be uncoupled. The weights needed to express the solar-oscillation +eigenfunctions would then encode all the perturbations that are absent in the model. The forward problem then +reduces to relating observed seismic measurements to the perturbations that we want to infer. The surface wavefield +cross-correlation is the primary measurement in the mode-coupling analysis and can be directly related to the weights +(Woodard2016). As mode coupling is a Fourier domain technique, wavefields are cross-correlated at different spatial +and temporal frequencies, leaving us with measurements sensitive to different quantities of interest. +In this study, we extend the spectral analysis of H21 and develop the method to produce near-surface flow maps +at supergranulation length scales. A part of the formalism that was used to derive the forward model in H21 is +reworked, primarily to image steady flows. Measurements are then constructed, and inversions to infer divergence flow +and radial vorticity are described. We also demonstrate signal associated with supergranular flow in a radial-order +coupling (p +2 -p +2 ), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface. +We compare our results with flows obtained using the Local Correlation Tracking method on solar granules. +1.1. Forward problem +In favor of algebraic brevity, we only show crucial steps here and refer the interested reader to AppendixAfor a +complete derivation of the forward problem. Working in the plane-parallel atmosphere (see alsoWoodard2006), we +denote the horizontal unit vectors e +x and e +y in our local Cartesian domain as pointing towards west and north on the +solar surface, respectively, and e + z points outwards. This approximation is valid when observing patches of the surface +that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood +of the supergranular scale ( ∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the +horizontal wavenumber qR + ≈ 120 (Rincon & Rieutord2018), where q = | q | = |( q +x , q +y )| is the vector horizontal +wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow +perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, seeRincon +& Rieutord2018), permitting us to model the flow vector uu +u = (u +x , u +y , u +z ) in the Cartesian domain like so (Unno et al. +1989;Woodard2006) + u σ + = ∇× [ ∇× ( P e +z )] + ∇× (T e +z ), (1) +where P = P σ + (x ) and T = T σ + (x ) are poloidal and toroidal scalar functions, varying with position x and temporal +frequency σ . ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying +perturbations (seeWoodard2016;Mani & Hanasoge2020;Hanasoge et al.2020;Mandal & Hanasoge2020, for +example), here we only consider the frequency bin σ = 0, denoting the temporally averaged flow over the period +of analysis. We therefore suppress σ from all terms this point forward, remembering that temporal dynamics of +perturbations may also be studied using the same model outlined in the following paragraphs. Simplifying eq1using +vector calculus results in + u = − ∇ 2 + P e +z + ∇ (∂ +z P ) + ∇ + h T × e +z , (2) +where ∇ +h refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the +Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a +function of horizontal wavenumber q and depth z e +z . Hence the poloidal and toroidal flows are described by P + q (z ) and +T +q (z ), respectively. Furthermore, we parametrize the flow along e +z using basis functions f (z ) (Chebyshev, B -spline, +etc). This is expressed as + P ≡ P + q (z ) = +j f +j (z ) P + q j , T ≡ T +q ( z ) = +j f + j (z ) T +q j . (3) +The flow coefficients P + q j and T +q j , represented by the discrete indices q and j , become ideal candidates for inversions, +where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be +exploited to expedite inversions. Note that P + q j = P ∗ +− q j and T +q j = T ∗ +− q j for the flow field to be real in the spatio- +temporal domain. +To infer flows from wavefields φ scattered by a perturbation of length scale q , cross-correlate them in the manner +Imaging near-surface flows using mode-coupling analysis 3 +φ ω ∗ +k φ ω +k +q , where k is the oscillation mode wavenumber (k + x , k +y ) and ω is the temporal frequency. Relate φ ω ∗ +k φ ω +k +q thus +to the flow coefficients P + q j and T +q j (see eqA7) +φ ω ∗ +k φ ω +k +q = H ω +kk + nn +j C + q j, k P + q j + D +q j, k T +q j . (4) +The weight factor H ω + (see eqA8) is a function of frequency, capturing information about the extent of coupling between +the two modes [ n, k ] and [n + , k + ], where n and n + are the radial orders of the modes, and k = |k | and k + = |k + | = |k + q |. +The spectral profile of the mode (see eqA9) is approximated using a Lorentzian (Anderson et al.1990). The more the +Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms C + q j, k and D +q j, k are poloidal +and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements +and are derived from the solar model see AppendixA. They possess the symmetry relation: C +q j, k = C +− q j, −k and +D +q j, k = D +− q j, − k (see eqA6). The kernels, as flows, are expressed on the basis f +j ( z ). +1.2. Least-squares of cross-correlation +Even though φ ω ∗ +k φ ω +k +q isolates the effect of flow perturbations at individual wavenumbers q , a more compact mea- +surement, known in mode-coupling literature as ’ B -coefficients’, is much better designed for inversion as it reduces the +dimension of the problem. A least-squares fit to the cross-correlation φ ω ∗ +k φ ω +k +q (seeWoodard2006,2014,2016) results +in the B -coefficients B +k ,q , according to + B + k ,q = +ω H ω ∗ +kk + nn φ ω ∗ +k φ ω +k +q + +ω |H ω +kk + nn | 2 . (5) +Multiplying eq4on both sides by H ω ∗ +kk + nn and substituting by eq5on the left-hand-side results in a concisely defined +forward problem (compare with eq4) + B + k ,q = +j C +q j, k P + q j + D + q j, k T +q j . (6) +In eq5,Woodard(2007) and H21 thus far only considered positive-frequency components in the summation over ω . +Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ω +nk , +|ω | ∈ + ω +nk − Γ +nk /2 , ω +nk + Γ + nk / 2 + or +|ω | ∈ + ω +n + k + − Γ + n + k + / 2, ω +n + k + + Γ +n + k + / 2 + . (7) +Summing over ± ω guarantees that the parity B + k ,q = B ∗ +−k ,− q (see AppendixAfor derivation) is obeyed, thereby +ensuring that the flow field on the right-hand-side of eq6is a real physical quantity in the spatio-temporal domain. +Taking the complex conjugate on both sides of eq6and considering the negative wavenumber components − q and +− k , + B ∗ +− k , −q = +j C + − q j, − k P ∗ +−q j + D +− q j, −k T ∗ +− q j . (8) +Substituting parity and symmetry relations for all terms in the above results in eq6. As B + k , q is constructed by a +least-squares fitting, it is noteworthy that summing over − ω will also lead to improvement in its signal-to-noise as a +by-product. + 1.3. Noise model +In the addition to the sensitivity kernels, a systematic background noise model is required to infer the flows from +the observed B -coefficients. For estimating the contribution from realization noise to the measurements, we make the +following assumptions (Gizon & Birch2004): that the excitation of the wavefield is modelled as a multivariate Gaussian +random process and the wavefields are uncorrelated across wavenumber and frequency in the absence of perturbations. +Every independent realization of a mode can be understood as the output of a damped harmonic oscillator driven by a +random forcing function (seeDuvall & Harvey1986). Modes are thus generated with random phases and amplitudes +and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters +4 Mani et al. +Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p +1 (orange) and p + 2 (green). The shaded +regions of the same colours indicate 1-linewidth Γ about the mode frequency. The yellow shaded region indicates the range of +kR + and ω/2 π to which we have restricted ourselves in this analysis. Beyond kR + of 2000, it is seen that the theoretical fitting +of mode frequencies start deviating from the observed dispersion relation for the f -mode. +such as its amplitude, frequency and linewidth, and consequently in B + k ,q in our case. We use the same noise model +as in H21, which was motivated by the above discussion, +G +k ,q ≡ |B + k ,q | 2 + , (9) +where, unlike H21, we again sum over ± ω . G +k ,q is real, with the symmetry relation G +k ,q = G +− k ,− q (see AppendixA +for explanation). + 2. DATA ANALYSIS +In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the +Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO,Scherrer et al.2012). Each image +is Postel pro jected, with a spatial resolution of approximately 0. 48Mm, sperated in time by 45 seconds, and is tracked +at the (Snodgrass1984) rotation rate. Here, we select a patch that is 194 .4 × 194 .4 Mm2 + in size, tracked for 24 hours +and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number +2197, Carrington longitude 90 ◦ + ). This Dopplercube is considered as the physical wavefield φ ( x, y ; t ). The Fourier-space +wavefield φ ω +k (and subsequently, the cross-correlation φ ω ∗ +k φ ω +k +q ) is obtained by computing the 3D spatial and temporal +Fourier transform of the Dopplercube. +The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in +Eq6, while short enough that supergranules do not substantially evolve (lifetime is purported to be 1.6 days;Rincon +& Rieutord2018) over this period. Our observation region is close to the disk center to also avoid any contamination +from center-to-limb systematics (Zhao et al.2012;Langfellner et al.2015). +Maximum signal can be extracted from the weighted summation of the cross correlations (eq5) when the spectral +profiles of the two modes [ n, k ] and [n + , k + ] closely align in ω space. This implies that their mode frequencies should be +sufficiently close ( |ω +nk − ω +n + k + | ≤ δ , the separation parameter). Since Lorentzians decay rapidly, the summation over +± ω is significant only over a few linewidths ( , the summation parameter; see eq7). We have empirically found and +tabulated δ in Table1for the radial order couplings n - n + ∈ f-f, p +1 -p + 1 , and p + 2 -p + 2 (the signal strength depends only +weakly on ; we set it to 3 line widths). +Figure1shows that for any two adjacent ridges (adjacent n and n + ), mode frequencies ω +nk and ω + n + k become spaced +farther apart with increasing wavenumber kR + . It is also known that mode linewidth Γ grows with radial orders for +a given kR + . Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of +observation set the total number of modes within a range of kR + (and ω/2π ) that can be clearly observed, thereby +affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually +inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR + at fixed +radial order are different. In wavenumber, we restrict our analysis to within 200 ≤ kR + ≤ 2000 and qR + ≤ 300. Our +frequency range is confined to span the range over which acoustic modes are observed (2 ≤ ω/2π ≤ 5 in mHz). +Imaging near-surface flows using mode-coupling analysis 5 +Coupling k R + range # of δ +modes +f-f [400,1000] 5240 4 +[1000,1500] 7784 1.1 +[1500,2000] 10940 0.4 +p +1 -p + 1 [400,1000] 5240 4.5 +[1000,1750] 12852 2 +p +2 -p + 2 [200,1000] 5886 3 +[1000,1300] 4280 3 +Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different +ranges of kR + . + 3. INVERSION +The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements +B + k , q from the linear relation in eq6. We describe inversion using regularized-least-squares (RLS) method here and +leave Subtractive Optimally Localized Averages (SOLA,Pijpers & Thompson1994) for AppendixB. The methods +complement each other (seeSekii1997), where RLS tries to minimize the misfit between data and model, whereas +SOLA gives better localization. For total number of modes M , RLS scales as M xJ where J is the number of basis +functions f + j (z ) (J M ; see eq3and section3.1), whereas SOLA scales as M 2 + (see AppendixB). For M > 5000, +computation starts to quickly become expensive for SOLA. +Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While +f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is +present even in p +1 -p + 1 , and p + 2 -p +2 (see Figure3), and possibly other higher order self- and cross-couplings. Since we are +interested in only surface flows, we leave higher order coupling to future work. +It bears mentioning that the slopes of the ridges in the kR + - ν spectrum (Figure1) increase with radial order. This +limits us to low-to-intermediate kR + (< 1000) for these higher radial orders if we are to remain under the acoustic cut- +off frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals +from low kR + - too large an observation region could possibly render invalid the Cartesian geometry approximation. +Regardless, in addition to performing inversions using all the couplings stacked together, we also demonstrate inversions +separately for the three couplings (see Table2) in order to account for the full gamut of mode-coupling as a signal-rich +helioseismic technique. + 3.1. RLS +For given q , the forward problem may be stated as + KU = B, (10) +with the aim to minimize the misfit +k ||KU − B|| +2 , with || || + 2 denoting the L +2 norm. Here, K is the matrix formed +by the sensitivity kernels: {C + q j, k , D +q j, k } . U is a vector composed of the flow coefficients: {P + q j , T +q j } and B is a vector +composed of computed B -coefficients: { B +k ,q }. The least-squares problem is solved simultaneously for poloidal and +toroidal flow. We use B -spline basis functions as our f + j (z ), comprising 11 knots spaced uniformly in acoustic radius, +for both poloidal and toroidal coefficients. Hence, for M modes (total number of k for a given q is M ) and 11 basis +functions for each poloidal and toroidal, the dimensions of K, U and B are thus M × 22, 22 × 1, and M × 1 respectively. +Normalizing both sides of eq10by the noise covariance Λ (a diagonal matrix with the entries G +k ,q ; see eq9; dimension +M × M ) and pre-multiplying by K + , + (K + Λ −1 + K)U =(K + Λ− 1 + ) B, (11) +U =(K + Λ− 1 + K )− 1 + K + Λ −1 + B. (12) +6 Mani et al. +Figure 2. Left : Averaging kernel for poloidal flow (see sectionB.2, eqB17, and left panel of Figure8) for q R + = [ −112, − 45], +at the depth z + o = −0 . 41 Mm. Right : L-curve for the mode q R + = [ −112, − 45]; the knee (λ = 2 . 48) is marked by a blue +diamond. +Since the least-squares problem is typically ill-posed, we restate the minimization as +k ||KU − B|| +2 + λ ||U || +2 with +the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution +norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the +data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this +regularization makes the problem better conditioned and is now defined as +U = (K + Λ− 1 + K + λ I )− 1 + K + Λ −1 + B, (13) +where I is the identity matrix for L +1 regularization. The knee-point of the L-curve (Hansen1992), a curve formed +by plotting ||U || +2 vs ||KU − B|| +2 for different values of λ (see right panel of Figure2), is usually chosen as the +regularization parameter. After successfully inverting for U , we reconstruct the flow using eq3. Results for poloidal +flow P + q are shown in Figure3. + 4. LCT +To improve confidence in the imaged near-surface flows through mode-coupling, we compare them with flows obtained +from Local Correlation Tracking method (LCT;November & Simon1988). LCT provides surface-flow maps by +examining the advection of convective granules (1.2 Mm, qR + ≈ 3500;Hathaway et al.2015) by underlying larger- +scale flow systems. Since granules are used as tracers, which are much smaller in size than supergranules ( ≈ 35 Mm), +LCT is an effective method (seeRieutord et al.2001) to produce surface horizontal flow maps of supergranulation. +Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section2 +(tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are ob- +tained and Postel pro jected. The horizontal flows are deduced by tracking the proper motions of granules between +consecutive intensity images, which we denote as I + 1 , I + 2 . The LCT method selects a patch in two images each +(I + 1 = I + 1 e ( x− x +ij )2 + / 2 sigma2 + , I +2 = I + 2 e (x − x +ij )2 + / 2 sigma2 + ) that observe the same granule at the grid point x + ij = ( x + i , y +j ). +A Gaussian of width sigma allows to isolate a small region surrounding the grid point of interest as the distance +moved by granules are usually in sub-pixel regime. The convention for the direction of x is the same as described in +section1.1. The two patches I + 1 , I +2 are then cross correlated for different values of position shifts ∆x , +C + ij (∆x, ∆ y ) = + dx I ∗ +1 (− x )I +2 (∆x − x) . (14) +The shift ∆x = (∆x, ∆ y ) that maximizes the cross-correlation C + ij is taken to be the proper motion of the granule. +Provided that the time difference ∆t, here 45 seconds, between the images is less than the lifetime of granules ( < 10 +min), the velocities are given by v + x = ∆x/ ∆ t and v + y = ∆y/∆ t . This exercise is repeated for all grid points in the +images I +1 , I +2 and for each consecutive pair of images in the cube. +In practice, we use the Fourier LCT algorithm (FLCT,Fisher & Welsch2008) for computing v + x and v + y . FLCT +requires the input sigma, which we set to 4 pix, that captures the extent of localization desired, and depends on the +Imaging near-surface flows using mode-coupling analysis 7 +Figure 3. Top : Inverted poloidal flow power-spectrum for the three couplings f-f, p + 1 -p + 1 , and p +2 -p +2 as a function of q + x R + and +q + y R + . Bottom : Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ± 1 σ error around the +mean. Total power appears to increase through the radial orders. Power is in units of m2 + /s 4 + . +dominant length scale of the velocity field in the images. The Postel-pro jected intensity images are fed as input to the +FLCT code. v + x and v + y are then computed for consecutive pairs of images and are averaged over the entire day. +5. MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY +For mode-coupling, horizontal divergence (hereafter div ) and radial vorticity (hereafter curl ) are computed by +substituting P and T from eq3into eq2as below - +uu +u( q , z ) = − ∇2 + P e + z + ∇(∂ + z P ) + ∇ +h T × e + z , += −(0 , 0 , ∂ 2 +x P + ∂ 2 +y P + ∂ 2 +z P ) + ( ∂ +x ∂ +z P, ∂ + y ∂ +z P, ∂ 2 +z P ) + ( ∂ +y T , − ∂ +x T , 0) . (15) +Setting ∂ 2 +x + ∂ 2 +y = q 2 + , div is given by, + ∇ +h · uu +u(q , z ) = q 2 + ∂ +z P, (16) +and curl is given by, + + ∇ × uu +u( q , z ) + z = q 2 + T . (17) +We follow similar steps to those taken inLangfellner et al.(2015) for comparison of flow maps with LCT. The +essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR + of +interest (see Figure4), and subsequently convert it to real space. +We seek to show comparisons (see Figures5,6, and7) for qR + = 100, 150, 200 and 250. To sufficiently delineate +flows at these length scales, we apply a Gaussian filter (see Figure4) to flows obtained from eqns16and17. The +Gaussian is centered at the desired wavenumber with a half-width of 25. We then perform a 2D Fourier transform to +obtain a real-space steady-flow map. +8 Mani et al. +Figure 4. Left: Divergence-flow power spectrum | div | 2 + , from eqn16, obtained from inversion using all the couplings. The +power-spectrum is then filtered with a bandpass centered around qR + = 150 (middle panel). The resulting spectra is shown in +the right panel. The units of | div | 2 + are in s − 2 + . For illustration, we show the action of the filter on the power-spectrum | div | 2 +since it is a real quantity, but recall that it is the Fourier-space flow div (a complex quantity) on which we apply the filter. +For LCT, we first apply a Gaussian smoothing to v + x and v + y to average over small-scale features; the extent of +smoothing depends on the length scale qR + to be compared with mode-coupling. div and curl are then simply +computed by + div = ∂ + x v + x + ∂ +y v + y , (18) +curl = ∂ + x v + y − ∂ +y v + x . (19) +We then perform a 2D Fourier transform on eqns18and19, apply the same Gaussian filters as for mode-coupling, +and transform back to real space. +Condensing all of the above, the following sequence of operations to compare flows at desired length scales are +performed for mode-coupling (M-C) and for LCT - +M-C : φ (x, y ; t) 3D FFT +=====⇒ φ ω +k , B +k ,q inversion +======⇒ P, T ∇ +h · +===⇒ +∇× eqns16 , 17 Filter, +=====⇒ +2D FFT div, curl +LCT : I + 1 , I + 2 FLCT +====⇒ v + x , v +y smooth, +======⇒ +∇ +h · ∇× eqns18 , 19 2D FFT, +======⇒ +Filter Filtered, +Fourier-space +flows 2D FFT +=====⇒ div, curl +6. RESULTS +Table2summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure5, +where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from +the two methods near supergranular scale ( qR + ≈ 100). Near-surface flows are imaged most faithfully when all the +couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of +vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between +the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence +flows (this is consistent with the results ofHathaway et al.2015;Langfellner et al.2015;Rincon et al.2017). Due to +insufficient modes for the p + 2 -p + 2 case (see Table1), we are unable to infer vortical flows with conviction other than near +the supergranular scale, as can be seen from Table2. Figure6also aligns with what we believe can be accomplished +through mode-coupling helioseismology - using f-f or p + 1 -p + 1 alone to seismically infer near-surface divergence and vortical +flows at different scales (qR + = 100, 150) can yield extremely good agreement with LCT. As the length scale of the +inferred flow moves further away from that of supergranules (Figure7), the demand on signal-to-noise also increases. +An adequate number of modes (and coupling strength between higher radial-orders) thus becomes a necessity to +comment substantively on the flows at these scales. +6.1. Amplitudes of mode-coupling flows +Imaging near-surface flows using mode-coupling analysis 9 +(a) qR + = 100 , f-f + p + 1 -p + 1 + p + 2 -p +2 +Figure 5. Real-space divergence flows (left column, in units of 10−5 + s − 1 + ) and radial vorticity (right column, in units of 10−6 + s −1 + ) +for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass filtered around +qR + = 100 (see Figure4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges +out from the flow maps and compare a circular region of diameter ≈175 Mm. The slopes of the best-fit line through the scatter +plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps are saturated to show only 40% of the maximum +values. +For both LCT and mode-coupling divergence and vorticity maps, numerous factors, arising from the associated +numerous data processing steps, can influence the final inference of flow amplitudes, making it difficult to put forward +a precise statement on them. H21 reported a 60% greater amplitude for p +1 -p + 1 over f-f coupling (Figure3reflects a +similar conclusion), another element to consider when combining different radial orders. The choice of regularization +(see right panel of Figure2) has the potential to affect the amplitudes of the inverted flows to some degree. Flow +amplitudes also vary with depth, implying that different radial orders and LCT will measure different flow averages. +This variability emerges as a natural consequence of any helioseismic inversion procedure necessitating the use of a +radial grid along which kernels and flows tend to be described. +Thus, the amplitudes of the mode-coupling flows (and the correlation coefficient) depend upon the following factors: +• Coupling(s) used, +• Regularization parameter in the inversion, +• Smoothing applied to LCT flows (indirectly; see below paragraph), +• The depth at which flows are inferred. +Here, we report in Table2only the maximum correlation found from among the points in the radial grid close +to the surface (within ± 0.5 Mm from z =0). For a desired comparison length scale qR + , we first fix the coupling(s) +and the regularization parameter to be used in the inversion. We then separately compute filtered divergence and +10 Mani et al. +(a) qR + = 100 , f-f (b) qR + = 150, p + 1 -p +1 +Figure 6. Real-space divergence flows (left column, in units of 10−5 + s − 1 + ) and radial vorticity (right column, in units of 10− 6 + s −1 + ) +for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around +qR + = 100, and using (b) p +1 -p + 1 coupling (bottom row), bandpass filtered around qR + = 150. We cut edges out from the flow +maps and compare a circular region of diameter ≈175 Mm. +(a) qR + = 200 , f-f + p + 1 -p + 1 + p + 2 -p +2 (b) qR + = 250 , f-f + p + 1 -p +1 + p + 2 -p +2 +Figure 7. Real-space divergence flows (left column, in units of 10−5 + s − 1 + ) and radial vorticity (right column, in units of 10− 6 + s −1 + ) +for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass filtered around +(a) qR + = 200, and (b) qR + = 250. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. +vorticity maps for LCT for different values of smoothing. These flow maps are then compared with those obtained +from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation +(corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired +qR + . +It has been shown (seeDe Rosa & Toomre2004;Langfellner et al.2015) that line-of-sight velocity from Dopplergrams +and LCT agree closely in amplitudes. But, to recapitulate, a host of factors described above can skew the amplitudes +for divergence flows owing to the multi-step process involved in obtaining them. For example, there has been a history +(see, e.g.,De Rosa et al.2000;Sekii et al.2007;Zhao et al.2007;Langfellner et al.2018;B¨oning et al.2020;Korda +& ˇ +Svanda2021) of using travel-time difference as only a proxy for horizontal divergence. However,Langfellner et al. +Imaging near-surface flows using mode-coupling analysis 11 +Coupling qR + div curl +f-f 100 0.97 0.87 ++ p +1 -p +1 150 0.95 0.76 ++ p +2 -p +2 200 0.92 0.76 +250 0.85 0.65 +f-f 100 0.96 0.85 +150 0.93 0.76 +200 0.89 0.69 +250 0.77 0.58 +p + 1 -p +1 100 0.95 0.83 +150 0.95 0.75 +200 0.92 0.75 +250 0.85 0.61 +p + 2 -p +2 100 0.94 0.7 +150 0.91 0.39 +200 0.79 0.3 +250 0.55 0.3 +Table 2. Correlation between mode-coupling flow maps and LCT maps derived from HMI Dopplergrams and intensity images, +respectively. +(2015),Birch et al.(2016) andBirch et al.(2019) use empirically determined conversion factors to align flow amplitudes +from travel-time measurements with those of LCT, while acknowledging that LCT underestimates magnitudes (see +Verma et al.2013;L¨optien et al.2016). Even for the case of supergranulation divergence maps obtained through +ring-diagram helioseismology,Greer et al.(2016) only report normalized amplitudes. +In this work, we have developed inversions to show that the Cartesian approximation of mode-coupling can be used +with great confidence to investigate flows near the surface. Careful inversions of mode-coupling measurements, built +using a sufficiently large modeset that penetrates into the deeper layers of the convection zone, can also enable probing +of the depth structure and time-evolution of supergranules, part of future work. With enough modes to improve +signal-to-noise through larger observation sizes, we suggest that Cartesian mode-coupling can find local helioseismic +applications to investigate other depth- and time-varying features such as giant cell flows (seeHathaway et al.2013; +Hanson et al.2020), emerging active regions, meridional flows and Rossby waves. +APPENDIX +A. DERIVATION OF THE FORWARD MODEL +As described in section1.1, we seek to describe the flow u as a function of q along e +z . To that end, substituting +eq3into eq2, + u σ +q (z ) = +j + q 2 + f + j e +z + iq f +j + P σ +j q + iq × e +z f + j T σ +j q . (A1) +For flows in the anelastic limit (u speed of sound), we can denote the flow perturbation operator as δ L σ + = +− 2iωρu σ + · ∇ (seeHanasoge et al.2017). Substituting Eq.A1into the operator, we get, +δ Lσ +q = − 2i ω ρ ( i u σ +q · k + uσ +q · e + z ∂ +z ), (A2) += − 2 i ωρ +j +− k · q f +j P σ +j q − k · ( q × e +z ) f + j T σ +j q + q 2 + f + j P σ +j q ∂ +z + . (A3) +12 Mani et al. +Express the mode eigenfunction describing oscillations in the Cartesian domain by (seeWoodard2006) +ξ + k ≡ ξ + nk (z ) = i ˆ +k H +nk (z )e +z + ˆzV +nk (z ), (A4) +where H and V are real-valued functions; n and n + are dropped for compactness of notation. Then the coupling of +two modes ξ + k and ξ + k (k + = k + q ), by the flow perturbation operator δ Lσ +q , denoted by coupling integral Λk +k (σ ), is +given by + Λk +k (σ ) ≡ + dx (δ L σ +q ξ + k ) · ξ ∗ +k = + dx + − 2i ωρ +j +q 2 + f + j P σ +j q ( ˆ +k · ˆ +k + H +k H ∗ +k + V +k V ∗ +k ) +− +k · q f +j P σ +j q + k · ( q × e +z ) f + j T σ +j q + ( ˆ +k · ˆ +k + H +k H ∗ +k + V + k V ∗ +k ) + (A5) +We desire to linearly relate the coupling integral in the above equation to the flows P and T , through poloidal and +toroidal sensitivity kernels, C +q j, k and D +q j, k respectively. Hence, they are given by +C + q j, k = + dz ρ + q 2 + f +j ( ˆ +k · ˆ +k + H +k H ∗ +k + V +k V ∗ +k ) +− k · q f +j ( ˆ +k · ˆ +k + H +k H ∗ +k + V +k V ∗ +k ) + , +D + q j, k = k · ( q × e +z ) + dz ρ f + j ( ˆ +k · ˆ +k + H + k H ∗ +k + V + k V ∗ +k ) . (A6) +Note the symmetry C +q j, k = C + −q j, −k and D + q j, k = D +− q j, − k . This coupling integral contributes to the cross-spectral +measurement between modes k and k + q From eq 8 ofWoodard(2014), we write the first-order effect of flow on +wavefield cross-correlation as + φ ω ∗ +k φ ω +σ +k +q = H ω +kk + σ Λk +k ( σ ), (A7) +where the function H is given by + H ω +kk + σ = −2 i ω ( N +k |R ω +k | 2 + R ω +σ +k + N +k + |R ω +σ +k |2 + R ω ∗ +k ). (A8) +We absorb the factor − 2i ω into the definition of H . Substitute eqA6in right-hand-side of eqA7to obtain eq4. +The mode spectral profile R is a Lorentzian, given by +R ω +k = 1 +ω 2 +nk − ω 2 + − iωγ +nk /2 , (A9) +where ω + nk is the resonant frequency of the mode, and γ + nk is the mode linewidth. EqA9can be derived by introducing +mode damping − i ωγ ρ as an operator in the differential equation that governs undamped, driven oscillations (see eq +5 ofHanasoge et al.2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation. +Also, the parity H ω +kk + σ = H − ω ∗ +kk + − σ and R ω +k = R − ω ∗ +k are established. Mode normalization N is given by +N + k = 1 +Q Q + +k +ω |φ ω +k | 2 + +ω R ω +k , (A10) +where the 1 +Q Q + +k on the right-hand-side implies average over all [k + x , k +y ] (Q terms in all) such that k = |k | is constant. +This forces N to be isotropic, i.e., to only depend on k , and not k . The sum over ω is within five linewidths of ω +nk . +Note that Eq.A8throughA10are modified from H21 to ensure parity and that flow maps are real. +The three equationsA8throughA10, along with the symmetry relation for kernels, and summation over ± ω , serve +to establish the parity B σ +k ,q = B ∗− σ +− k ,−q . This allows for obtaining P σ +q = P ∗− σ +−q , and subsequently, purely real flow in +the real domain. Setting σ = 0 gives us the linear, invertible equation eq6. Substituting eqnsA8throughA10into +the noise model obtained in H21 and summing over ± ω establishes the symmetry Gσ +k , q = G− σ +− k ,− q . +Imaging near-surface flows using mode-coupling analysis 13 +B. SOLA INVERSIONS +Subtractive Optimally Localized Averages (SOLA,Pijpers & Thompson1994) aims to obtain a set of weight factors +for the mode q and depth z +o , which we will call α +k ,zo . A linear weighted sum of the measurements B +k ,q in the fashion + +k α +k ,zo B + k ,q allows for an average value of the flow P + q (z ) to be estimated at the depth z +o . To obtain the coefficients +α +k ,zo , it is assumed that a set of sensitivity kernels K +k ,q (z ) for the mode q can be summed up coherently to give an +’averaging kernel’ that is localized at the depth z +o . Conventionally, a Gaussian centered at z +o and a width ∆ is chosen +which the averaging kernel should resemble after performing inversion. +B.1. Kernels in the integral form +Since the kernels in eqA6are manifest as coefficients on a basis f + j (z ), we first derive kernels that can be expressed +as a function of depth z (see Figure8). It is convenient to derive in matrix form. Thus, with the following definitions: +P ≡ P + q ( z ), p ≡ P + q j , F ≡ f + j (z ), B ≡ B + k ,q C ≡ C + q j, k and K ≡ K +k , q ( z ), we write (assume only poloidal flow for +simplicity, the same derivations hold true for toroidal flow as well) +P = F p (B11) +The size of P is thus the same as the length of the radial grid z . +Now, pre-multiply by F T + and integrate over z on both sides (drop the integral notation for compactness), +F T + P = (F T + F ) p +p = (F T + F ) −1 + F T + P (B12) +Now, substituting eqB12into the forward problem eq6, +B = C p += (F T + F )− 1 + F T + C P += K P (B13) +where + K = (F T + F )− 1 + F T + C, +i.e., K + k ,q ( z ) = +j,j + dz f +j (z ) f +j + (z ) +− 1 + f + j + (z ) C +q j + ,k (B14) +B.2. Obtaining the coefficients α +Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at z + o +T (z, z +o ) = 1 +√ + 2π ∆ 2 exp + z − z +o +2∆ 2 + . (B15) +This can be achieved by solving the optimization problem +minimize X = + dz + T ( z, z +o ) − Θ + q (z, z +o ) +2 + , (B16) +where we introduce the averaging kernel for mode q thus +Θ + q (z, z +o ) = +k α +k ,zo K + k ,q ( z ). (B17) +As an aside, we note that averaging kernels can similarly be constructed for RLS (see section3.1) using eqns13 +andB14. +14 Mani et al. +Figure 8. Left : Kernel K +k ,q ( z ) (eqB14) shown vs depth z for the three radial order couplings f-f, p +1 -p +1 , and p +2 -p + 2 . q R + = +[ −112 , − 45] and k R + = [− 853, − 157] is chosen for all the radial order couplings for comparison. Right : Averaging kernel +(eqB17) using SOLA, for q R + = [− 112, −45] at depth z + 0 = − 0. 48 Mm, and the corresponding target Gaussian (eqB15). +Integral of the averaging kernel over z is 0.89. +Setting ∂ X +∂α → 0 gives us the matrix problem to be solved +A { α } = v, +{ α } = +A + µI +− 1 + v, (B18) +where the square matrix A = + dz K +k ,q (z ) K +k + ,q (z ) and v = + dz K +k ,q (z ) T (z, z +o ). Here, k + is just a dummy index for +denoting elements in the matrix A , ( k + = k + q ). In the last line of eqB18, we introduce regularization using an Identity +matrix I , with the regularization parameter µ - purpose being the same as that described in section3.1. Obtaining +α thus becomes a highly expensive computationally for very large number of modes (see section3). Substitute α +obtained from eqB18into last line of eqB13, and +k on both sides + +k α +k ,z +o B σ +k ,q = +k α +k ,z +o + dz K +k ,q ( z )P σ +q (z ), += + dz Θ +q ( z, z +o ) P σ +q (z ) , +≈ P σ +q ( z +o ) (B19) +Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Di- +vergence flow can then be obtained from eq16. Results are shown in Figures9and10. +REFERENCES +Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M. +1990, ApJ, 364, 699, doi:10.1086/169452 +Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of +Modern Physics, 64, 885, +doi:10.1103/RevModPhys.64.885 +Birch, A. C., Schunker, H., Braun, D. C., et al. 2016, +Science Advances, 2, e1600557, +doi:10.1126/sciadv.1600557 +Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019, +A&A, 628, A37, doi:10.1051/0004-6361/201935591 B¨oning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., & +Schou, J. 2020, A&A, 635, A181, +doi:10.1051/0004- 6361/201937331 +Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189, +doi:10.1086/324323 +Christensen-Dalsgaard, J. 2002, Reviews of Modern +Physics, 74, 1073, doi:10.1103/RevModPhys.74.1073 +—. 2021, Living Reviews in Solar Physics, 18, 2, +doi:10.1007/s41116- 020-00028- 3 +Imaging near-surface flows using mode-coupling analysis 15 +Figure 9. Left : Poloidal flow power-spectrum for f-f as a function of q + x R + and q + y R + . Right : Corresponding power-spectrum +averaged over the azimuthal angle. Shaded region shows ± 1 − σ error around the mean. Power is in units of m 2 + /s4 + . +Figure 10. Real-space divergence flows (in units of 10 − 5 + s −1 + ) for mode-coupling inversion through SOLA using f-f coupling, +and LCT, bandpass filtered around qR + = 100. We cut edges out from the flow maps and compare a circular region of diameter +≈ 175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is +1.05. For demonstration, we show inversions only for poloidal flow using SOLA. +De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh, +192, 351, doi:10.1023/A:1005269001739 +De Rosa, M. L., & Toomre, J. 2004, ApJ, 616, 1242, +doi:10.1086/424920 +Duvall, T. L., J., & Harvey, J. W. 1986, in NATO Advanced +Study Institute (ASI) Series C, Vol. 169, Seismology of +the Sun and the Distant Stars, ed. D. O. Gough, 105–116 +Duvall, T. L., J., Jefferies, S. M., Harvey, J. W., & +Pomerantz, M. A. 1993, Nature, 362, 430, +doi:10.1038/362430a0 +Fisher, G. H., & Welsch, B. T. 2008, in Astronomical +Society of the Pacific Conference Series, Vol. 383, +Subsurface and Atmospheric Influences on Solar Activity, +ed. R. Howe, R. W. Komm, K. S. Balasubramaniam, & +G. J. D. Petrie, 373.https://arxiv.org/abs/0712.4289 +Giles, P. M., Duvall, T. L., Scherrer, P. H., & Bogart, R. S. +1997, Nature, 390, 52, doi:10.1038/36294 +Gizon, L., & Birch, A. C. 2004, ApJ, 614, 472, +doi:10.1086/423367 +Gizon, L., Cameron, R. H., Pourabdian, M., et al. 2020, +Science, 368, 1469, doi:10.1126/science.aaz7119 Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A, +652, L6, doi:10.1051/0004- 6361/202141462 +Greer, B. J., Hindman, B. W., & Toomre, J. 2016, ApJ, +824, 128, doi:10.3847/0004- 637X/824/2/128 +Hanasoge, S., & Mandal, K. 2019, ApJL, 871, L32, +doi:10.3847/2041- 8213/aaff60 +Hanasoge, S. M., Hotta, H., & Sreenivasan, K. R. 2020, +Science Advances, 6, eaba9639, +doi:10.1126/sciadv.aba9639 +Hanasoge, S. M., Woodard, M., Antia, H. M., Gizon, L., & +Sreenivasan, K. R. 2017, MNRAS, 470, 1404, +doi:10.1093/mnras/stx1298 +Hansen, P. C. 1992, SIAM review, 34, 561 +Hanson, C. S., Duvall, T. L., Birch, A. C., Gizon, L., & +Sreenivasan, K. R. 2020, A&A, 644, A103, +doi:10.1051/0004- 6361/202039108 +Hanson, C. S., Hanasoge, S., & Sreenivasan, K. R. 2021, +ApJ, 910, 156, doi:10.3847/1538-4357/abe770 +Hathaway, D. H., Teil, T., Norton, A. A., & Kitiashvili, I. +2015, ApJ, 811, 105, doi:10.1088/0004- 637X/811/2/105 +16 Mani et al. +Hathaway, D. H., Upton, L., & Colegrove, O. 2013, Science, +342, 1217, doi:10.1126/science.1244682 +Hill, F. 1988, ApJ, 333, 996, doi:10.1086/166807 +Kashyap, S. G., Das, S. B., Hanasoge, S. M., Woodard, +M. F., & Tromp, J. 2021, ApJS, 253, 47, +doi:10.3847/1538- 4365/abdf5e +Korda, D., & ˇ +Svanda, M. 2021, A&A, 646, A184, +doi:10.1051/0004- 6361/202039928 +Langfellner, J., Birch, A. C., & Gizon, L. 2018, A&A, 617, +A97, doi:10.1051/0004-6361/201732471 +Langfellner, J., Gizon, L., & Birch, A. C. 2015, A&A, 581, +A67, doi:10.1051/0004-6361/201526024 +Lavely, E. M., & Ritzwoller, M. H. 1992, Philosophical +Transactions of the Royal Society of London Series A, +339, 431, doi:10.1098/rsta.1992.0048 +Lindsey, C., & Braun, D. C. 2000, SoPh, 192, 261, +doi:10.1023/A:1005227200911 +L¨optien, B., Birch, A. C., Duvall, T. L., Gizon, L., & +Schou, J. 2016, A&A, 587, A9, +doi:10.1051/0004- 6361/201526805 +L¨optien, B., Gizon, L., Birch, A. C., et al. 2018, Nature +Astronomy, 2, 568, doi:10.1038/s41550-018- 0460-x +Mandal, K., & Hanasoge, S. 2020, ApJ, 891, 125, +doi:10.3847/1538- 4357/ab7227 +Mandal, K., Hanasoge, S. M., & Gizon, L. 2021, A&A, 652, +A96, doi:10.1051/0004-6361/202141044 +Mani, P., & Hanasoge, S. 2020, ApJ, 901, 139, +doi:10.3847/1538- 4357/abb133 +—. 2021, ApJ, 920, 36, doi:10.3847/1538- 4357/ac1ad6 +November, L. J., & Simon, G. W. 1988, ApJ, 333, 427, +doi:10.1086/166758 +Pijpers, F. P., & Thompson, M. J. 1994, A&A, 281, 231 +Rieutord, M., Roudier, T., Ludwig, H. G., Nordlund, ˚ +A., & +Stein, R. 2001, A&A, 377, L14, +doi:10.1051/0004- 6361:20011160 Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar +Physics, 15, 6, doi:10.1007/s41116- 018-0013- 5 +Rincon, F., Roudier, T., Schekochihin, A. A., & Rieutord, +M. 2017, A&A, 599, A69, +doi:10.1051/0004- 6361/201629747 +Schad, A., & Roth, M. 2020, ApJ, 890, 32, +doi:10.3847/1538- 4357/ab65ec +Scherrer, P. H., Schou, J., Bush, R. I., et al. 2012, SoPh, +275, 207, doi:10.1007/s11207- 011-9834- 2 +Schou, J., Antia, H. M., Basu, S., et al. 1998, ApJ, 505, +390, doi:10.1086/306146 +Sekii, T. 1997, in Sounding Solar and Stellar Interiors, ed. +J. Provost & F.-X. Schmider, Vol. 181, ISBN0792348389 +Sekii, T., Kosovichev, A. G., Zhao, J., et al. 2007, PASJ, +59, S637, doi:10.1093/pasj/59.sp3.S637 +Snodgrass, H. B. 1984, SoPh, 94, 13, +doi:10.1007/BF00154804 +Thompson, M. J., Toomre, J., Anderson, E. R., et al. 1996, +Science, 272, 1300, doi:10.1126/science.272.5266.1300 +Unno, W., Osaki, Y., Ando, H., Saio, H., & Shibahashi, H. +1989, Nonradial oscillations of stars +Verma, M., Steffen, M., & Denker, C. 2013, A&A, 555, +A136, doi:10.1051/0004-6361/201321628 +Vorontsov, S. V. 2011, MNRAS, 418, 1146, +doi:10.1111/j.1365- 2966.2011.19564.x +Woodard, M. 2014, SoPh, 289, 1085, +doi:10.1007/s11207- 013-0386- 5 +Woodard, M., Schou, J., Birch, A. C., & Larson, T. P. +2013, SoPh, 287, 129, doi:10.1007/s11207- 012-0075- 9 +Woodard, M. F. 1989, ApJ, 347, 1176, doi:10.1086/168206 +—. 2006, ApJ, 649, 1140, doi:10.1086/506927 +—. 2007, ApJ, 668, 1189, doi:10.1086/521391 +—. 2016, MNRAS, 460, 3292, doi:10.1093/mnras/stw1223 +Zhao, J., Georgobiani, D., Kosovichev, A. G., et al. 2007, +ApJ, 659, 848, doi:10.1086/512009 +Zhao, J., Nagashima, K., Bogart, R. S., Kosovichev, A. G., +& Duvall, T. L., J. 2012, ApJL, 749, L5, +doi:10.1088/2041- 8205/749/1/L5 diff --git a/read/results/playa/2201.00200.txt b/read/results/playa/2201.00200.txt new file mode 100644 index 0000000..51d3bc7 --- /dev/null +++ b/read/results/playa/2201.00200.txt @@ -0,0 +1,736 @@ +Astronomy & Astrophysics manuscript no. solar˙model˙v10˙corrected © ESO 2022 +January 4, 2022 +Local heating due to convective overshooting and the solar +modelling problem +I. Baraff e1, 2 + , T. Constantino 1 + , J. Clarke1 + , A. Le Saux1, 2 + , T. Goffrey 4 + , T. Guillet1 + , J. Pratt3 + , D. G. Vlaykov1 +1 + University of Exeter, Physics and Astronomy, EX4 4QL Exeter, UK (e-mail: i.baraffe@ex.ac.uk) +2 + ´ +Ecole Normale Sup ´ +erieure, Lyon, CRAL (UMR CNRS 5574), Universit ´ +e de Lyon, France +3 + Department of Physics and Astronomy, Georgia State University, Atlanta GA 30303, USA +4 + Centre for Fusion, Space and Astrophysics, Department of Physics, University of Warwick, Coventry, CV4 7AL, UK +ABSTRACT +Recent hydrodynamical simulations of convection in a solar-like model suggest that penetrative convective flows at the boundary +of the convective envelope modify the thermal background in the overshooting layer. Based on these results, we implement in one- +dimensional stellar evolution codes a simple prescription to modify the temperature gradient below the convective boundary of a +solar model. This simple prescription qualitatively reproduces the behaviour found in the hydrodynamical simulations, namely a +local heating and smoothing of the temperature gradient below the convective boundary. We show that introducing local heating in +the overshooting layer can reduce the sound-speed discrepancy usually reported between solar models and the structure of the Sun +inferred from helioseismology. It also affects key quantities in the convective envelope, such as the density, the entropy, and the +speed of sound. These eff ects could help reduce the discrepancies between solar models and observed constraints based on seismic +inversions of the Ledoux discriminant. Since mixing due to overshooting and local heating are the result of the same convective +penetration process, the goal of this work is to invite solar modellers to consider both processes for a more consistent approach. +Key words. Convection – Hydrodynamics – Stars: evolution – Sun: evolution - helioseismology - interior +1. Introduction +Modelling the internal structure of the Sun is still a challenge. +A recent review by Christensen-Dalsgaard (2021) describes in +detail the long-standing eff orts to improve solar models. The so- +lar modelling problem refers to the discrepancy between helio- +seismology and solar interior models that adopt low metallici- +ties predicted by the three-dimensional (3D) atmosphere models +of, for example, Asplund et al. (2009) and Ca ffau et al. (2011), +in contrast to the high metallicities based on previous litera- +ture compilations by, for example, Anders & Grevesse (1989) +and Grevesse & Noels (1993). Asplund et al. (2021) have re- +cently confirmed with state-of-the-art 3D simulations the rela- +tively low metal abundances for the Sun. Asplund et al. (2021) +consider that their study yields the most reliable solar abun- +dances available today, suggesting that the solar modelling prob- +lem is no longer a problem of abundances but rather a problem +of stellar physics. The treatment of mixing below the convective +zone is one of the key processes that could improve solar mod- +els. Several studies indeed reveal that the process of convective +penetration, also called overshooting, at the bottom of the con- +vective envelope could play an important role in improving the +agreement between solar models and helioseismic constraints +(see for example Christensen-Dalsgaard et al. 2011; Zhang et al. +2012; Buldgen et al. 2019b). Overshooting in solar models has +most often been treated using diff usive or instantaneous chemi- +cal mixing. A temperature gradient that sharply transitions from +a nearly adiabatic form to a radiative form is usually assumed, +as suggested by the theoretical work of Zahn (1991). Models +with a smoother transition have also been investigated. Based +on the analysis of models with di ff erent stratifications near the +Send o ffprint requests to : I. Bara ffe base of the convective zone, Christensen-Dalsgaard et al. (2011) +found that models that better fit the helioseismic data have a +weakly sub-adiabatic temperature gradient in the lower part of +the convective zone and a smooth transition to the radiative gra- +dient in the overshooting layer. But Christensen-Dalsgaard et al. +(2011) noted that the required temperature stratification is diffi- +cult to reconcile with existing overshooting models and numer- +ical simulations. They concluded that only non-local turbulent +convection models could produce the desired degree of smooth- +ness in the transition (see for example Zhang & Li 2012; Zhang +et al. 2012). But these non-local models remain uncertain, and +their description of overshooting under the conditions found at +the base of the solar convective zone is yet to be validated. +Zhang et al. (2019) explored the impact of overshooting by +introducing a parametrised turbulent kinetic energy flux based +on a model with parameters that are adjusted to improve the +helioseismic properties. They suggest that amelioration can be +obtained specifically below the convective envelope. However, +Zhang et al. (2019) find that this model cannot solve the whole +solar problem because such a flux worsens the sound-speed pro- +file in the deep radiative interior of their solar model. Given the +uncertainties regarding the temperature stratification of the over- +shooting region, solar modellers have considered these effects as +secondary and have focused their e fforts on exploring the impact +of solar abundances, microphysics (opacities, equations of state, +nuclear reaction rates), and chemical mixing and diffusion (see +details and references in the review of Buldgen et al. 2019a). +Additional, more exotic e ff ects such as early disk accretion or +solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot +2021) are also attracting increasing attention. +To reinvigorate the debate, Buldgen et al. (2019b) recently +highlighted once again how the transition of the temperature gra- +1arXiv:2201.00200v1 [astro-ph.SR] 1 Jan 2022 +Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem +dient just below the convective envelope can significantly impact +the disagreement between solar models and helioseismic con- +straints. Their results, based on a method that combines multi- +ple structural inversions, suggest that the transition in temper- +ature gradient is improperly reproduced by adopting either an +adiabatic or a radiative temperature gradient in the overshoot- +ing layer. The solution should be somewhere in between these +two extremes. Christensen-Dalsgaard et al. (2018) also note that +an increase in the temperature at the transition would remove +a remaining small sharp dip in the speed of sound immediately +beneath the convective zone of the model. A major difficulty is +to disentangle the eff ects of overshoot from the e ff ects of opaci- +ties, which can also alter the temperature gradient in these layers. +Given the large number of parameters to deal with in order to im- +prove solar models and the current lack of strong arguments in +favour of modifying the thermal stratification in the overshoot- +ing layer, there has been no real motivation to deviate from the +traditional picture of a sharp transition as formalised by Zahn +(1991). +The present work is motivated by arguments inspired by hy- +drodynamical simulations of convection and convective penetra- +tion in solar-like models. Recent hydrodynamical simulations by +Bara ffe et al. (2021, hereafter B21) highlight the process of local +heating in the overshooting region due to penetrating convective +motions across the convective boundary. In the following, we +analyse the potential impact of this feature on one-dimensional +(1D) stellar evolution structures in the context of solar models. +The hydrodynamical results of B21 are briefly summarised in +Sect. 2, and their impact on 1D models are analysed in Sect. 3 +and discussed in Sect. 4. +2. Modification of the thermal background in the +overshooting layer: Results from +two-dimensional hydrodynamical simulations +B21 performed two-dimensional (2D) fully compressible time- +implicit simulations of convection and convective penetration in +a solar-like model with the MUlti-dimensional Stellar Implicit +Code MUSIC (Viallet et al. 2011, 2016; Go ff rey et al. 2017). +The main motivation was to explore the impact of an artificial +increase in the stellar luminosity on the properties of convection +and convective penetration. This procedure is a common tactic +adopted in hydrodynamical simulations of convection (Rogers +et al. 2006; Meakin & Arnett 2007; Brun et al. 2011; Hotta 2017; +Edelmann et al. 2019). The experiments of B21 highlight the im- +pact of penetrative downflows on the local thermal background +in the overshooting layer. They illustrate how convective down- +flows, when penetrating the region below the convective bound- +ary of the envelope, can induce a local heating and a modification +of the temperature gradient as a result of compression and shear +in the overshooting layer. This modification of the local back- +ground is connected to a local increase in the radiative flux to +counterbalance the negative enthalpy flux (or heat flux) produced +by penetrating flows. The negative peak of the enthalpy flux +and the positive bump of the radiative flux below the convective +boundary are well-known features described in many numeri- +cal works (Hurlburt et al. 1986; Muthsam et al. 1995; Brummell +et al. 2002; Brun et al. 2011; Hotta 2017; K ¨ +apyl ¨ +a 2019; Cai +2020). A few works (Rogers et al. 2006; Viallet et al. 2013; Korre +et al. 2019; Higl et al. 2021) have also reported a modification +of the local thermal background in the overshooting region, but +without providing a detailed description. The simulations of B21 +provide a physical explanation that links the convective penetra- tion process to the local heating and to the radiative bump in the +overshooting layer. The solar-like star simulated in B21 is based +on a model that is not thermally relaxed. It is reasonable to as- +sume that the local heating seen in B21 is present in stars because +the negative heat flux in the overshooting layer and the bump in +the radiative flux that compensates for this feature are persistent. +These two features are also commonly observed in other hydro- +dynamical simulations, as mentioned above. An exploration of +the impact of this heating on stellar evolution models may reveal +that heating is a necessary aspect of models for overshooting. +Fig. 1. Radial profile of the temperature departure ∆ T / T + 0 from +the initial profile T + 0 and of the sub-adiabaticity (∇ − ∇ + ad ) close to +the convective boundary predicted by 2D hydrodynamical simu- +lations (B21) of solar-like models. The lower panel corresponds +to the model with a realistic stellar luminosity and the upper +panel to a model with luminosity enhanced by a factor of ten. +The dash-dotted red lines show ∆ T /T + 0 (in %), the relative dif- +ference between the time and space averages of the temperature, +T , and the initial temperature, T + 0 . The solid blue lines show the +time and space averages of the sub-adiabaticity (∇ − ∇ +ad ). The +dashed black lines show the initial profile of the sub-adiabaticity, +( ∇ − ∇ + ad ) + init . The convective boundary is indicated by the vertical +solid line (see details in B21) +The behaviour of the thermal profile below the convective +boundary found in the simulations of B21 is illustrated in Fig. +1. It is displayed for the model with a realistic stellar luminosity +(lower panel). We also show the results for a model with an artifi- +cial enhancement in the luminosity by a factor of ten because the +features are intensified in these ‘boosted’ models (upper panel). +The figure shows the local heating in the overshooting layer and +its impact on the sub-adiabaticity ( ∇ − ∇ +ad ), with ∇ = d log T +d log P the +2 +Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem +temperature gradient and ∇ +ad = d log T +d log P | +S the adiabatic gradient. +The initial stratification below the convective boundary (located +at r = 0 .6734 × R +star for this specific stellar model) is set by +the stable radiative gradient, ∇ +rad (see the dashed black line be- +low the convective boundary in Fig. 1). B21 show that, as a re- +sult of the local heating below the convective boundary charac- +terised by the bump in temperature di fference ∆ T /T + 0 displayed +in Fig. 1, the temperature gradient becomes less sub-adiabatic +immediately below the convective boundary1 + . The net result is +a smoother transition just below the convective boundary with +a temperature gradient that has an intermediate value between +the radiative temperature gradient and the adiabatic one. In the +next section we analyse the impact of this local heating on 1D +solar structures by adopting a simple prescription that mimics +the behaviour of the temperature gradient suggested by hydro- +dynamical simulations. +3. Impact on one-dimensional solar structure +models +3.1. Helioseismic constraints +Our primary goal in this short paper is to illustrate the potential, +qualitative impact of the local heating produced by overshoot- +ing. We adopted a strategy inspired by the analysis of Buldgen +et al. (2020), who constructed a static structure of the Sun in +agreement with seismic inversions of the Ledoux discriminant +defined by +A = 1 +Γ +1 d ln P +d ln r − d ln ρ +d ln r , (1) +with Γ +1 = (∂ ln P/∂ ln ρ) + ad . Starting from a reference evolu- +tionary model, Buldgen et al. (2020) used an inversion pro- +cedure to iteratively reconstruct a solar model. Successive in- +versions of the Ledoux discriminant allowed them to obtain a +model-independent profile for this quantity. Their reconstruction +method also gives solar structures that are in excellent agree- +ment with other structural inversions, namely the entropy, S , the +square of the speed of sound, c2 +s , and the density, ρ. To illustrate +the convergence of their reconstruction procedure, they show +(right panels of their Figs. 3-6) the successive iterations that con- +verge to an excellent level of agreement for the four structural +inversions ( A , S , c2 +s , ρ) starting from the initial reference model +adopted in their work. The diff erences found between the recon- +structed model and the reference model are useful as they indi- +cate the modifications of the reference model that are required to +converge towards a solar model in agreement with helioseismic +data. We recall here the major trends found by Buldgen et al. +(2020) for the four structural quantities, which are used for our +analysis in Sect. 3.2. +The first concerns the Ledoux discriminant. The major dis- +crepancy between the Sun and the reference model occurs just +below the convective boundary, with a large positive bump for +the quantity ( A +Sun - A +ref ). +The second concerns the speed of sound. The same positive +bump at the same location as for the Ledoux discriminant, A, is +observed for the quantity (c2 +s ,Sun − c2 +s ,ref ) /c 2 +s ,ref . The corrections +applied to A during the reconstruction procedure also reduce the +discrepancy in the speed of sound in the radiative region. +The third concerns the entropy. Large discrepancies are ob- +served in both the radiative region and the convective zone. The +1 + Less sub-adiabatic means that |∇ − ∇ +ad | decreases compared to the +initial profile. entropy discrepancy (S + Sun − S + ref )/ S + ref has two positive peaks in +the radiative zone, one just below the overshooting region and a +larger peak deeper at ∼ 40% of the stellar radius. This discrep- +ancy is negative in the convective zone. The corrections applied +to A help reduce these entropy discrepancies in both regions. +The fourth concerns the density. The quantity (ρ +Sun − +ρ +ref )/ρ + ref has a negative peak in the radiative region, at ∼ 35% +of the stellar radius, and is positive in the convective zone. +Importantly, Buldgen et al. (2020) mention that their recon- +struction procedure gives similar Ledoux discriminant profiles +for a wide range of initial reference models. We used these re- +sults to gauge whether the modifications of the thermal profile +predicted by B21 can help in qualitatively improving all the +structural quantities used by Buldgen et al. (2020). +3.2. Testing one-dimensional solar models +Our main motivation is to show the potential impact of the local +heating described in Sect. 2 on stellar models. We are not aim- +ing in this short work at constructing the best solar model to fit +helioseismic constraints. Using stellar evolution codes, we have +adopted two di ff erent methods that can be found in the litera- +ture to construct solar models (e.g. Zhang et al. 2012; Vinyoles +et al. 2017). Our first method relies on the thermal relaxation +of a reference model with solar radius and luminosity that is +modified to reproduce the temperature gradient in the overshoot- +ing layer suggested by hydrodynamical simulations. In this case, +the chemical abundances are not modified by nuclear reactions, +mixing, or microscopic diffusion during the relaxation process. +For these tests, we used the 1D Lyon stellar evolution code +(Bara ff e et al. 1998). We repeated this experiment based on ther- +mal relaxation with the stellar evolution code MONSTAR (e.g. +Constantino et al. 2014) and obtained the same qualitative re- +sults. +The second method considers models that account for the +modification of the temperature gradient in the overshooting +layer from the zero age main sequence (ZAMS). The models +are then evolved until they reach the solar radius and luminosity. +With this approach, changes in the chemical abundances from +nuclear reactions, microscopic diffusion, and overshooting mix- +ing are also consistent with any modification of the structure +induced by the forced local heating in the overshooting layer. +These tests were performed with MONSTAR as it includes the +treatment of microscopic di ff usion. +The first method allows the impact of local heating in +the overshooting layer after thermal relaxation to be isolated. +The second method provides evolutionary models that are self- +consistent since the eff ect of the modification of the temperature +gradient is accounted for during their evolution on the main se- +quence. +In the following, we adopt a modification of the local temper- +ature gradient in the overshooting layer that qualitatively repro- +duces the behaviour displayed in Fig. 1. We define an overshoot- +ing length d +ov = α + ov H + P, CB , with H + P, CB the pressure scale height +at the convective boundary and α +ov a free parameter. We also de- +fine two radial locations, r +ov = r + CB − d +ov and r + mid = r +CB − d +ov / 2, +with r +CB the radial location of the convective boundary. The tem- +perature gradient is modified as follows. For r +mid ≤ r < r + CB , we +use +∇ = g( r ) ∇ +ad + (1 − g (r ))∇ +rad , (2) +with +g( r ) = sin{ [(r − r +mid )/ (r + CB − r +mid )]a + × π/ 2} . (3) +3 +Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem +For r + ov ≤ r < r +mid , we use +∇ = ∇ +rad − h( r ) ∇ +ad , (4) +with +h( r ) = b × sin {[( r +mid − r ) /( r +mid − r + ov )] × π } . (5) +Sine functions are used in Eqs. (3) and (5) to reproduce the +smooth variations in the temperature gradient below the convec- +tive boundary produced by the hydrodynamical simulations. We +have verified that the results are insensitive to the smoothness of +these variations and to the exact shape of the temperature gra- +dient radial profile.We adopted a =0.3 in Eq. (3) as it provides a +behaviour for the temperature gradient very close to the one dis- +played in Fig. 1. Results are rather insensitive to variations in the +values of a between 0.2 and 0.4. We adopted b=0.03 in Eq. (5), +which also provides a close visual match to the hydrodynamical +results, but we note that the results are insensitive to the value of +b. +3.2.1. Thermal equilibrium models +The details of the procedure for the first method are the follow- +ing. We calculate the evolution of a 1 M + model with an initial +helium mass fraction of 0.28, metallicity Z = 0. 02 , and a mix- +ing length l + mix = 1 .9 H + P . We use a reference model that is in +thermal equilibrium 2 + and has the luminosity and radius of the +current Sun. Starting from this reference model, the tempera- +ture gradient is modified over a prescribed depth to mimic the +impact of overshooting according to the hydrodynamical sim- +ulations described in Sect. 2. We adopt the prescription given +by Eqs. (2)-(5) over a distance d +ov below the convective bound- +ary. We show the results in Fig. 2 for α + ov = 0.15 and α + ov = 0.20. +These overshooting widths are in good agreement with the maxi- +mal depth reached by downflows below the convective boundary +predicted by the hydrodynamical simulations for the solar-like +model investigated in B21. We note that the stellar model used +in B21 is slightly under-luminous compared to the Sun (see B21 +for details). B21 also mention that one should be cautious when +directly applying the overshooting depths predicted by their sim- +ulations to real stars since the final relaxed state for these simula- +tions may have di fferent properties from non-thermally relaxed +states. We varied α + ov between 0.15 and 0.35 and find that the +results do not change qualitatively. However, the amplitude of +the variations in the model properties depends on d + ov (see be- +low). As shown below, this simple prescription implemented in +a stellar evolution code yields a local increase in the tempera- +ture below the convective boundary, similar to that observed in +the hydrodynamical simulations. We stress that Eqs. (2)-(5) have +been chosen for simplicity. They are only a rough approximation +that can mimic the thermal profile behaviour suggested in the 2D +simulations. +The model with a modified temperature gradient is then ther- +mally relaxed, that is to say, it is evolved over many thermal +timescales without any modification of the abundances from nu- +clear reactions until thermal equilibrium is reached. The temper- +ature gradient is modified in the overshooting layer during the +whole relaxation process, and this is referred to as a ‘forced local +heating’. This procedure ensures that the model with a modified +temperature gradient can be consistently compared to the refer- +ence model. As shown in Fig. 2, the simple prescription given +2 + Thermal equilibrium means that the total nuclear energy produced +in the central regions balances the radiative losses at the surface, i.e. the +total nuclear luminosity, L +nuc , equals the total stellar luminosity, L . by Eqs. (2)-(5) yields similar qualitative changes in the temper- +ature and the sub-adiabaticity close to the convective boundary +that was found in the hydrodynamical simulations of B21. +Fig. 2. Radial profile of the temperature difference and of the +sub-adiabaticity of a 1D solar-like structure with a modified tem- +perature gradient in the overshooting layer according to Eqs. +(2)-(5). The temperature gradient is modified over a distance +d +ov = α + ov H + P,CB , with α + ov =0.15 in the lower panel and α + ov =0.20 +in the upper panel. The dash-dotted red lines show the percent- +age relative temperature di ff erence, ∆ T / T + ref , with ∆ T = T − T + ref . +The solid blue lines correspond to the sub-adiabaticity ( ∇ − ∇ +ad ). +The dashed black lines show the sub-adiabaticity of the refer- +ence model. The convective boundary is indicated by the vertical +solid line. The vertical dashed line in each panel is located at a +distance d + ov below the convective boundary. +The impact on the whole stellar structure was quantified by +comparing the four structural quantities ( A , S , c 2 +s , ρ) between the +modified and the reference model. The results are displayed in +Fig. 3, with ∆ X defined as ( X − X + ref ) for any structural quantity X . +The forced local heating in the overshooting layer produces sim- +ilar positive peaks for ∆ A, ∆ S , and ∆ c 2 +s , as found for the temper- +ature. The modification thus provides the correction required to +improve the discrepancy for the Ledoux discriminant described +in the first of the trends outlined in Sect. 3.1. Unsurprisingly, +such a modification of the temperature gradient is expected to +improve the agreement with helioseismic constraints and help +4 +Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem +remove the sound speed anomaly below the convective bound- +ary (second trend in Sect. 3.1), as suggested by the results of +Christensen-Dalsgaard et al. (2011). But it is also interesting to +note that such a modification yields a slight cooling of the con- +vective zone (see Fig. 2) and thus a negative di ff erence for the +entropy (see Fig. 3). A negative di fference in the convective en- +velope is in agreement with the correction required for the ref- +erence model of Buldgen et al. (2020) to better match the Sun +(see third trend in Sect. 3.1). Regarding the density, the modifi- +cation of the temperature gradient has an interesting impact in +the radiative zone, with a large decrease in the density compared +to the reference model over a broad region below the convective +boundary. The impact on the density in the convective region for +this specific model is partly in agreement with the correction re- +quired for this quantity in the Buldgen et al. (2020) study, with a +positive diff erence found only in the upper part of the convective +envelope (see the fourth trend in Sect. 3.1). +These trends are insensitive to the depth over which the tem- +perature gradient is modified. Increasing the depth increases the +magnitude of the di ff erences but has no impact on their sign. We +find that the maximum variation in the model properties, such as +the speed of sound, ∆ c2 +s / c2 +s , ref , roughly scales with d 2 +ov . This scal- +ing is linked to the integrated area between the modified temper- +ature gradient curve and the one for the reference (non-modified) +temperature gradient, which roughly decreases linearly with r . +This area is proportional to the square of the overshooting depth, +and consequently, the maximum variation in the model proper- +ties is also proportional to d 2 +ov . The qualitative trends also remain +the same whether overshooting mixing in the reference model +is ignored or included using a step function (with instantaneous +mixing) or an exponential decay for the diff usion coefficient (e.g. +Freytag et al. 1996). +3.2.2. Self-consistent evolutionary models +For the tests based on the second method, we ran di ff erent sets +of models with diff erent combinations of assumptions, including +or not microscopic diff usion and with or without overshooting +mixing. When overshooting mixing was included in the over- +shooting layer, it was based either on a step function or on an +exponential decay for the di ffusion coe ffi cient. Microscopic dif- +fusion for H and He was implemented according to Thoul et al. +(1994). For these tests, the temperature gradient was modified +according to Eqs. (2)-(5). All models start from the ZAMS and +are evolved until they reach the solar radius and luminosity at the +same age. This was achieved by making small adjustments to the +mixing length, l +mix . The models with temperature gradient mod- +ifications were compared to the relevant reference model, which +has no modification of the temperature gradient but everything +else is the same (i.e. the same treatment of microscopic diff u- +sion and of overshooting mixing). The evolutionary models with +temperature gradient modifications are thus self-consistent. The +main di fference between this approach and the one in the previ- +ous section is that these models accumulate small di ff erences in, +for example, central H abundance when compared to their ref- +erence model. These tests produce the same trends in the over- +shooting layer as found for the tests based on the first method +(Sect. 3.2.1), independently of the treatment of overshooting +mixing and whether microscopic di ffusion is included or not. +In the convective zone, all models give a positive di ff erence for +the density between the model with a modified temperature gra- +dient and the relevant reference model. For the other quantities +( S , c2 +s ), the diff erences in the convective zone are very sensitive Fig. 3. Di fference of various structural quantities between a +model with a modified temperature gradient in the overshoot- +ing layer and a reference model calculated with the Lyon stellar +evolution code. The temperature gradient in the modified model +is changed over a distance d +ov = α +ov H + P, CB below the convec- +tive boundary (indicated by the vertical solid line). The lower +panel shows the results for α + ov = 0. 15 and the upper panel for +α + ov = 0. 20. +to the assumptions regarding whether overshooting mixing is in- +cluded or not. But at least we find solutions that are compatible +with the four trends found by Buldgen et al. (2020) for the four +structural quantities. This is illustrated in Fig. 4 with a model +that accounts for step function overshooting mixing over a dis- +tance d +ov = 0 .15 H + P, CB (lower panel) and d +ov = 0. 20 H + P, CB (upper +panel). +4. Conclusion +The tests performed in Sect. 3 are based on di ff erent methods +(relaxed models versus consistent evolution) that can be used to +construct solar models. Independently of the method used, the +tests show that a local increase in the temperature in the over- +shooting region due to convective penetration provides the quali- +tative e ffects required to improve the speed of sound discrepancy +below the convective boundary. This discrepancy is persistent in +5 +Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem +Fig. 4. Di fference of various structural quantities between a +modified model and a reference model calculated with the +MONSTAR stellar evolution code. The reference model is +evolved from the ZAMS with microscopic di ff usion and step +function overshooting mixing over a distance d + ov = α + ov H + P,CB be- +low the convective boundary. The lower panel shows the results +for α + ov = 0. 15 and the upper panel for α + ov = 0. 20. The models +with a modified temperature gradient in the overshooting layer +(same microscopic diffusion and overshooting mixing treatment +as the reference model) are evolved similarly from the ZAMS. +The convective boundary is indicated by the vertical solid line. +solar models that use low solar metal abundances. This is not +surprising because an increase in the temperature in this spe- +cific region has previously been invoked in the literature to solve +this problem, as mentioned in Sect. 1. However, the details of +the physical process responsible for this local heating have been +lacking, whereas we can now suggest an explanation based on +the B21 results. The trends that we find for the four structural +quantities ( A, S , c2 +s , ρ) are robust below the convective bound- +ary and in a large fraction of the radiative core, independently of +the treatment of mixing and di ffusion and of the method for con- +structing the models in Sects. 3.2.1 and 3.2.2. Our experiments +additionally show that such a local change in the temperature, +despite being made over a very limited region below the convec- +tive boundary, can also aff ect the density, the entropy, and the speed of sound in the convective envelope after thermal relax- +ation or evolution on the main sequence. How these quantities +are affected in the convective envelope compared to a reference +model with no local heating depends on the strategy for building +solar models and on the treatment of overshooting mixing. This +mixing is obviously linked to the local heating given that both +result from the same dynamical process. A combined testing of +both eff ects in stellar models could provide more constraints on +the general process of overshooting. +Increasingly, eff orts are now devoted to characterising the +process of convective boundary mixing in stellar models based +on multi-dimensional hydrodynamical simulations. More work +is required to obtain reliable determinations of an overshooting +depth and to describe quantitatively the mixing and impact on +the temperature gradient. Understanding the e ff ects of rotation +and magnetic fields on overshooting is a significantly more dif- +ficult theoretical and numerical problem to address; however, +eff orts to study these combined non-linear e ff ects are ongoing +(Hotta 2017; Korre et al. 2021). Despite the limitations of ex- +isting hydrodynamical simulations, they are already providing +constraints on physical processes usually treated with several +free parameters in 1D stellar evolution models. They can thus +limit the degrees of freedom in a problem as complex as so- +lar modelling. Our primary goal in this work is to highlight the +potential impact of convective penetration on the thermal back- +ground in the overshooting region. The processes studied in B21 +that produce a local change in the temperature gradient are also +responsible for the mixing in this region. Because much observa- +tional evidence points towards the need for extra mixing at con- +vective boundaries, for example lithium depletion in solar-like +stars (Baraff e et al. 2017), the size of convective cores (Claret +& Torres 2016), and colour-magnitude diagrams (Castro et al. +2014), solar modellers often include this extra mixing in their +models. But a consistent approach should also require account- +ing for a local change in the temperature gradient. The impact of +this local heating goes in the right direction to improve not only +the discrepancies of solar models below the convective bound- +ary, but also in the convective envelope. This e ffect o ff ers an in- +teresting step forward for solving the solar modelling problem. +In this exploratory work, we adopt a simple prescription for the +local heating in the overshooting layer since the main goal is +to highlight its qualitative impact on stellar models. However, +this eff ect should not be considered as another free parameter in +the solar modelling problem. Future multi-dimensional hydro- +dynamical simulations will enable this process, and its treatment +in 1D stellar evolution codes, to be better constrained. +5. Acknowledgements +We thank our anonymous referee for valuable comments which +helped improving the manuscript. This work is supported by the +ERC grant No. 787361-COBOM and the consolidated STFC +grant ST /R000395 / 1. IB thanks the Max Planck Institut f ¨ +ur +Astrophysics (Garching) for warm hospitality during completion +of part of this work. The authors would like to acknowledge the +use of the University of Exeter High-Performance Computing +(HPC) facility ISCA and of the DiRAC Data Intensive service +at Leicester, operated by the University of Leicester IT Services, +which forms part of the STFC DiRAC HPC Facility. The equip- +ment was funded by BEIS capital funding via STFC capital +grants ST/ K000373 /1 and ST / R002363 /1 and STFC DiRAC +Operations grant ST/ R001014 / 1. DiRAC is part of the National +e-Infrastructure. +6 +Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem +References +Anders, E. & Grevesse, N. 1989, Geochim. Cosmochim. Acta, 53, 197 +Asplund, M., Amarsi, A. M., & Grevesse, N. 2021, A&A, 653, A141 +Asplund, M., Grevesse, N., Sauval, A. J., & Scott, P. 2009, ARA&A, 47, 481 +Bara ffe, I., Chabrier, G., Allard, F., & Hauschildt, P. H. 1998, A&A, 337, 403 +Bara ffe, I., Pratt, J., Goffrey, T., et al. 2017, ApJ, 845, L6 +Bara ffe, I., Pratt, J., Vlaykov, D. G., et al. 2021, A&A, 654, A126 +Brummell, N. H., Clune, T. L., & Toomre, J. 2002, ApJ, 570, 825 +Brun, A. S., Miesch, M. S., & Toomre, J. 2011, ApJ, 742, 79 +Buldgen, G., Eggenberger, P., Baturin, V. A., et al. 2020, A&A, 642, A36 +Buldgen, G., Salmon, S., & Noels, A. 2019a, Frontiers in Astronomy and Space +Sciences, 6, 42 +Buldgen, G., Salmon, S. J. A. J., Noels, A., et al. 2019b, A&A, 621, A33 +Ca ffau, E., Ludwig, H. G., Steffen, M., Freytag, B., & Bonifacio, P. 2011, +Sol. Phys., 268, 255 +Cai, T. 2020, ApJ, 888, 46 +Castro, N., Fossati, L., Langer, N., et al. 2014, A&A, 570, L13 +Christensen-Dalsgaard, J. 2021, Living Reviews in Solar Physics, 18, 2 +Christensen-Dalsgaard, J., Gough, D. O., & Knudstrup, E. 2018, MNRAS, 477, +3845 +Christensen-Dalsgaard, J., Monteiro, M. J. P. F. G., Rempel, M., & Thompson, +M. J. 2011, MNRAS, 414, 1158 +Claret, A. & Torres, G. 2016, A&A, 592, A15 +Constantino, T., Campbell, S., Gil-Pons, P., & Lattanzio, J. 2014, ApJ, 784, 56 +Edelmann, P. V. F., Ratnasingam, R. P., Pedersen, M. G., et al. 2019, ApJ, 876, 4 +Freytag, B., Ludwig, H. G., & Ste ffen, M. 1996, A&A, 313, 497 +Go ffrey, T., Pratt, J., Viallet, M., et al. 2017, A&A, 600, A7 +Grevesse, N. & Noels, A. 1993, in Origin and Evolution of the Elements, ed. +N. Prantzos, E. Vangioni-Flam, & M. Casse, 15–25 +Higl, J., M ¨ +uller, E., & Weiss, A. 2021, A&A, 646, A133 +Hotta, H. 2017, ApJ, 843, 52 +Hurlburt, N. E., Toomre, J., & Massaguer, J. M. 1986, ApJ, 311, 563 +K ¨ +apyl ¨ +a, P. J. 2019, A&A, 631, A122 +Korre, L., Brummell, N., Garaud, P., & Guervilly, C. 2021, MNRAS, 503, 362 +Korre, L., Garaud, P., & Brummell, N. H. 2019, MNRAS, 484, 1220 +Kunitomo, M. & Guillot, T. 2021, arXiv e-prints, arXiv:2109.06492 +Meakin, C. A. & Arnett, D. 2007, ApJ, 667, 448 +Muthsam, H. J., Goeb, W., Kupka, F., Liebich, W., & Zoechling, J. 1995, A&A, +293, 127 +Rogers, T. M., Glatzmaier, G. A., & Jones, C. A. 2006, ApJ, 653, 765 +Thoul, A. A., Bahcall, J. N., & Loeb, A. 1994, ApJ, 421, 828 +Viallet, M., Bara ffe, I., & Walder, R. 2011, A&A, 531, A86 +Viallet, M., Go ffrey, T., Bara ffe, I., et al. 2016, A&A, 586, A153 +Viallet, M., Meakin, C., Arnett, D., & Moc ´ +ak, M. 2013, ApJ, 769, 1 +Vinyoles, N., Serenelli, A. M., Villante, F. L., et al. 2017, ApJ, 835, 202 +Zahn, J. P. 1991, A&A, 252, 179 +Zhang, C., Deng, L., Xiong, D., & Christensen-Dalsgaard, J. 2012, ApJ, 759, +L14 +Zhang, Q. S. & Li, Y. 2012, ApJ, 746, 50 +Zhang, Q.-S., Li, Y., & Christensen-Dalsgaard, J. 2019, ApJ, 881, 103 + 7 diff --git a/read/results/playa/2201.00201.txt b/read/results/playa/2201.00201.txt new file mode 100644 index 0000000..8d7c0a1 --- /dev/null +++ b/read/results/playa/2201.00201.txt @@ -0,0 +1,932 @@ +Astronomy & Astrophysics manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs © ESO 2022 +January 19, 2022 + Letter to the E ditor +The period-age relation of long-period variables +M. Trabucchi1, + , N. Mowlavi1 +Department of Astronomy, University of Geneva, Ch. Pegasi 51, 1290 Versoix, Switzerland +December 2021 + ABSTRACT +Context. Pieces of empirical evidence suggest the existence of a period-age relation for long-period variables (LPVs). Yet, this +property has hardly been studied on theoretical grounds thus far. +Aims. We aim to examine the period-age relation using the results from recent nonlinear pulsation calculations. +Methods. We combined isochrone models with theoretical periods to simulate the distribution of fundamental mode LPV pulsators, +which include Miras, in the period-age plane, and we compared it with observations of LPVs in Galactic and Magellanic Clouds’ +clusters. +Results. In agreement with observations, models predict that the fundamental mode period decreases with increasing age because of +the dominant role of mass in shaping stellar structure and evolution. At a given age, the period distribution shows a non-negligible +width and is skewed toward short periods, except for young C-rich stars. As a result, the period-age relations of O-rich and C- +rich models are predicted to have diff erent slopes. We derived best-fit relations describing age and initial mass as a function of the +fundamental mode period for both O- and C-rich models. +Conclusions. The study confirms the power of the period-age relations to study populations of LPVs of specific types, either O-rich +or C-rich, on statistical grounds. In doing so, it is recommended not to limit a study to Miras, which would make it prone to selection +biases, but rather to include semi-regular variables that pulsate predominantly in the fundamental mode. The use of the relations to +study individual LPVs, on the other hand, requires more care given the scatter in the period distribution predicted at any given age. +Key words. stars: AGB and post-AGB – stars: evolution – stars: variables: general – Galaxy: stellar content – Galaxy: globular +clusters: general – Magellanic Clouds +1. Introduction +Low- to intermediate-mass stars approach the end of their lives +through the asymptotic giant branch (AGB) evolutionary phase, +during which they exhibit pulsations with timescales up to sev- +eral hundreds of days, and they are hence known as long-period +variables (LPVs). If their V -band amplitude exceeds 2.5 mag, +they are classified as Miras, which have a rather regular periodic- +ity and they are believed to pulsate only in the radial fundamen- +tal mode (FM). If their photometric amplitude is smaller, they +are known as semi-regular variables (SRVs), which are thought +to be the progenitors of Miras. The name stems from the lesser +degree of regularity of their light curves, likely due to the fact +that they can pulsate in multiple modes simultaneously. +The notion that younger LPVs tend to display longer periods +compared to older ones, often referred to as the period-age (PA) +relation, is rooted in the empirical evidence from stellar kinemat- +ics in the solar neighborhood. The first such piece of evidence +is probably due toMerrill(1923), who pointed out that M-type +LPVs increasingly lag behind the local standard of rest (i.e., pos- +sess a higher asymmetric drift) as their period decreases. Later +studies (as summarized byWyatt & Cahn1983) confirmed this +behavior (also using proper motion data, e.g.,Wilson & Mer- +rill1942), and showed that the shorter periods are also accom- +panied by a higher velocity dispersion. Furthermore, groups of +LPVs with relatively short periods are characterized by a greater +scale height above the Galactic plane. This was shown, using for + + Corresponding author: M. Trabucchi +( michele.trabucchi@unige.ch) the first time the radial velocity of LPVs in the southern hemi- +sphere, byFeast(1963). In this seminal paper, Feast realized +that LPVs with shorter periods must be members of older stellar +populations and emphasized their highly promising applications +for both Galactic and extra-galactic studies over a wide range +of stellar ages. It should be noted that the PA relation is con- +nected with the existence of a period-metallicity relation (Lloyd +Evans & Menzies1973;Lloyd Evans1983b;Feast1981;Feast +& Whitelock2000a, and references therein). +A number of subsequent works have corroborated the PA +relation on empirical grounds, or have exploited it to interpret +observational results. Relevant examples are studies of LPVs in +globular clusters (e.g.,Feast1966;Lloyd Evans1983b;White- +lock1986), toward the galactic center and bulge (Lloyd Evans +1976;Feast et al.1980;Whitelock et al.1991) or at high galactic +latitude (Jura & Kleinmann1992;Whitelock et al.1994). Of par- +ticular interest is the recent eff ort to extend the analysis of LPVs +to dwarf galaxies in the Local Group (Menzies et al.2002,2008; +Whitelock et al.2009;Menzies et al.2010,2011;Sakamoto et al. +2012;Battinelli & Demers2012,2013;Whitelock et al.2013; +Menzies et al.2015). +The Hipparcos mission provided the means to refine the re- +sults on the period-kinematics connection. This was done by +Feast & Whitelock(2000b), who found evidence supporting the +existence of a bar-like structure in the Bulge from the orbits of +local LPVs. A similar study dedicated to C-rich LPVs was per- +formed byFeast et al.(2006), who provided quantitative age +estimates for these stars. A summary of the main results and +prospects emerging from these Hipparcos-era studies is given by +Article number, page 1 of 9arXiv:2201.00201v2 [astro-ph.SR] 17 Jan 2022 +A & A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs +Feast(2007). More recently, the study of the Galaxy with LPVs +has been stimulated by the wealth of data acquired by large-scale +surveys (e.g.,Catchpole et al.2016;Urago et al.2020), espe- +cially the Gaia mission (Grady et al.2019,2020). +It seems relevant that just a few years after the study ofFeast +(1963),Kippenhahn & Smith(1969) predicted the PA relation +of classical Cepheids from stellar evolution and pulsation mod- +els. The theoretical modeling of Cepheids and of their period- +luminosity (PL) and PA relations is now an active field of re- +search (e.g.,Bono et al.2005;Anderson et al.2016;De Somma +et al.2020). In contrast, when it comes to theoretical assessments +of the LPV PA relation, the literature is surprisingly scarce (espe- +cially in comparison with the significant e ff ort put into empirical +studies). In fact, we were able to identify only two relevant stud- +ies addressing this subject (Wyatt & Cahn1983;Eggen1998). +The discrepancy in period predictions between linear and nonlin- +ear pulsation models (e.g.,Ya’Ari & Tuchman1996;Lebzelter +& Wood2005;Trabucchi et al.2021b), and more generally the +di fficulty in modeling the structure of evolved red giants, likely +played a role in hampering the theoretical investigation of the PA +relation of LPVs. +Motivated by the release of updated AGB evolutionary mod- +els (Pastorelli et al.2019,2020) and the availability of new, ac- +curate model predictions for the FM period of AGB stars (Tra- +bucchi et al.2019,2021b), we decided to investigate the nature +of the PA relation of LPVs on theoretical grounds. The adopted +models and observed data are described in Sect.2, while in +Sect.3we present the results, which are discussed in Sect.4. +We summarize our conclusions in Sect.5. +2. Methods +2.1. Models +We employed PARSEC-COLIBRI isochrones (Marigo et al. +2017) with stellar evolutionary models fromPastorelli et al. +(2019,2020) for the thermally pulsing asymptotic giant branch +(TP-AGB) phase, and from PARSEC (Bressan et al.2012, ver- +sion 1.2S) for the preceding evolution. The adopted set of +isochrones covers the range 0.001 to 0.016 in initial metal- +licity (Z +i ), with a 0.001 step, while it spans the age interval +8 .00 ≤ log( τ/ yr) ≤ 10 . 45 with a step of 0.05. Since the AGB +phase is short-lived, it only spans a small range of initial masses +for each given isochrone, of order of 10 − 2 + M + at most. +The adopted isochrones include linear pulsation periods from +Trabucchi et al.(2019) for overtone modes and nonlinear periods +computed with the period-mass-radius relation fromTrabucchi +et al.(2021b) for the FM 1 + . Pulsation properties were computed +along both the early-AGB and the TP-AGB. We did not extend +our analysis to red supergiant stars as the pulsation prescription +we employed are strictly valid only below 7 M + . +We recall that, with the adopted nonlinear relation, the period +increases with radius (R) as a broken power law, whose exponent +decreases as soon as the “bending radius” R + b is exceeded, it and +becomes zero when the “saturation radius” R +s > R +b is reached +(i.e., the period becomes independent of radius). The exact val- +ues of R + b and R +s , as well as of the exponents, depend on the +current mass ( M ). We assume that the FM is dominant if the +stellar radius is larger than the critical value R +dom, 0 , which we +computed from the current stellar mass using Eq. 4 ofTrabucchi +et al.(2021b). +1 + Hereinafter, whenever we discuss periods, it should be understood +that we refer to FM periods on which this work is focused. 2.2. Data +As a first set of data, we considered the cluster-LPV pairs used +byGrady et al.(2019, see their tables 1 and 2). These consist of +19 clusters in the Large Magellanic Cloud, hosting a total of 20 +potential LPV members, and eight Galactic clusters each hosting +a potential LPV member. +We expanded this list with data for LPVs in a few populous +clusters, namely the Galactic clusters NGC 362, NGC 2808, 47 +Tuc (NGC 104), and ω Cen (NGC 5139); the LMC clusters NGC +1978 and NGC 1846; and the cluster NGC 419 in the Small Mag- +ellanic Cloud (SMC). The source lists were taken fromLebzel- +ter & Wood(2005,2007,2011,2016) andKamath et al.(2010), +whose notation for the sources names is adopted here. After ex- +cluding the star LW3 in NGC 1846 and the star V129 in ω Cen, +which are unlikely cluster members (cf.Lebzelter & Wood2007, +2016), we reached a total of 203 sources. +The aforementioned studies also provide a lot of informa- +tion, possibly including J H K photometry, one or more periods, +and a spectral type. In order to expand on the available data, +we crossmatched the selected sample with the Two Micron All- +Sky Survey (2MASS,Skrutskie et al.2006), the all-sky data +release of the Wide-field Infrared Survey Explorer (AllWISE, +Cutri et al.2013), the catalog of variable stars from the All- +Sky Automated Survey for SuperNovae (ASAS-SNJayasinghe +et al.2020), the catalogs of LPVs in the Magellanic Clouds from +the third phase of the Optical Gravitational Lensing Experiment +(OGLE-III,Soszy ´ +nski et al.2009,2011), the early third data re- +lease from the Gaia mission ( Gaia EDR3,Gaia Collaboration +et al.2021), and the catalog of LPV candidates from Gaia DR2 +(Mowlavi et al.2018). +FollowingGrady et al.(2019), we took ages from +Kharchenko et al.(2016) andBaumgardt et al.(2013) for clusters +in the Galaxy and LMC, respectively, thereby ensuring that ages +would be homogeneously derived for clusters in both galaxies. +Age uncertainties fromBaumgardt et al.(2013), provided for +each cluster, are generally around σ + log( τ ) 0. 05.Kharchenko +et al.(2016) do not provide age uncertainties, but a reasonable +upper limit for their method should be σ + log(τ ) = 0. 2 based on +the analysis ofKharchenko et al.(2005) (the same value was +adopted byGrady et al.2019, in their Fig. 7). +As discussed byKamath et al.(2010), the age of the SMC +cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is +consistent with the value τ = 1. 45 ± 0 .05 Gyr fromGoudfrooij +et al.(2014), while it is as young as τ 0. 89 ± 0. 015 Gyr ac- +cording toPerren et al.(2017). Since an accurate estimate is not +necessary for our exploratory analysis, we took a rough average +and assumed log( τ/ yr) = 9 .1 ± 0. 1. NGC 419 and NGC 1846 +likely exhibit TP-AGB boosting (Girardi et al.2013). We note +that some clusters show multiple stellar populations, whose age +spread has been estimated in some cases (e.g.,Mackey & Broby +Nielsen2007;Joo & Lee2013;Villanova et al.2014) and is con- +sistent with the age uncertainties we adopted. +Distances of Galactic clusters were also taken from +Kharchenko et al.(2016), while for the Magellanic Clouds and +their clusters we adopted the distance moduli µ + LMC = 18 .49 ± +0. 09 mag and µ + SMC = 18. 96 ± 0. 02 mag fromde Grijs et al. +(2017). We searched for data on interstellar extinction from sev- +eral literature works (e.g.,Nayak et al.2016;Kharchenko et al. +2016;Perren et al.2017), all of which suggest that extinction +in the K +s filter is smaller than ∼ 0 .1 mag for most of the clus- +ters we considered, and at most as large as ∼ 0 .3 mag, which is +negligible for our purposes. +Article number, page 2 of 9 +Trabucchi et al.: The period-age relation of LPVs +A detailed membership verification is beyond the scope of +this work, and we relied on the checks performed by authors +whose source lists we adopted. It should be kept in mind that +some sources may not be real cluster members. +For sources without a spectral type, we used the Gaia- +2MASS diagram (Lebzelter et al.2018,2019) to determine +whether they are O- or C-rich. We used the near-infrared period- +luminosity diagram to identify the most likely pulsation mode +associated with each period of each observed source. We se- +lected only FM periods and rejected long secondary periods and +periods attributed to overtone mode pulsation. The details of +these classification steps are provided in AppendixA. Out of +203 sources from the initial list, we identified 95 LPVs pulsat- +ing in the FM, consisting of 40 C-rich and 55 O-rich sources. +They consist of 29 Miras, 33 semi-regular variables, and 33 other +sources (most likely LPVs) whose variability type has not been +determined. We note that, with the exception of Gaia DR2, the +sources of variability data considered here do not report the un- +certainty associated with observed periods. However, since peri- +ods were derived in most cases from well-sampled, high-quality +variability observations, relative period uncertainties are most +likely negligible compared with those associated with age. +3. Results +Panel (a) of Fig.1shows a comparison between model predic- +tions and observations in the P +FM –log(τ/ yr) plane. The former +are displayed by a density map showing the expected number +N + FM of LPVs pulsating in the FM in each period-age bin, nor- +malized to maximum. Model predictions are in good agreement +with data derived from observations (i.e., individual LPVs in +clusters, represented by symbols), and they show that the pe- +riod of LPVs pulsating in the FM decreases with increasing age. +Crosses mark the average properties of the three groups of C- +rich LPVs fromFeast et al.(2006, their table 4), which fit the +general pattern with the exception of their group 3, estimated to +be older than what our models predict at P 650. +We also show a linear best-fit to the models distribution +(weighted by N +FM ), which shows a fairly good agreement with +the best-fit to observations byGrady et al.(2019, also shown). +However, the best-fit line does not fully capture the properties +of the predictions, nor of the observed trend. Indeed, models are +indicative of a substantial dispersion around the relation. For in- +stance, at 1 Gyr, the FM period ranges from ∼ 200 days to ∼ 550 +days. Conversely, LPVs pulsating in the FM with a period of 350 +days are predicted to be at least ∼200 Myr old, but they can be as +old as ∼3 Gyr. Observed data are consistent with the predicted +spread, although the agreement cannot be considered as the ob- +served sample adopted is not complete. +Nonetheless, it is relevant that some clusters host multiple +LPVs, which are thus almost coeval, and they do span a wide +period range. Some of these clusters host multiple stellar popu- +lations that are believed to have formed over a time comparable +with the age uncertainties we adopted. This means that longer- +period (more massive) LPVs in these clusters probably lean to- +ward the lower age limit assumed for their host cluster, and the +opposite is true at shorter periods. This tends to strengthen the +agreement between models and observations. +Our data set samples the intermediate-age range (NGC 419 +and NGC 1846) relatively well as well as old ages ( ω Cen, 47 +Tuc, NGC 362, and NGC 2808). This provides us with the op- +portunity to study the period distribution at these ages, and for +a more detailed comparison between models and observations. On the basis of the average age of these two groups of clus- +ters and the associated uncertainty, and taking the discrete age +sampling of the isochrones into account, we considered the age +ranges log( τ/yr) = 9. 15 ± 0. 10 and log(τ/yr) = 10. 10 ± 0. 20. Pe- +riod distributions at those ages are displayed in panels (b) and (c) +of Fig.1, respectively, showing good agreement between model +predictions and observations. We note that in both cases, the dis- +tribution is skewed toward short periods, which seems to be true +at all ages for O-rich stars. This can be seen in panel (a) of Fig.2, +which is a version of the PA plane limited to an O-rich compo- +sition 2 + . Indeed, although at τ 5 Gyr the observed sample is +very scarce, it appears to be consistent with models predicting a +more densely populated region in the shorter-period half of the +PA distribution. +The case of C-stars, shown in panel (b) of Fig.2, is diff er- +ent. They only form over a restricted range of initial masses +and ages, so their occurrence in a given stellar population is an +age indicator on its own. Toward the low-mass (old age) side +of the C-star regime, the behavior is similar to the O-rich case +with a concentration around relatively short periods. C-rich mod- +els tend to have a lower surface temperature and larger radii, +at a given mass, compared to O-rich models, and thus they at- +tain longer periods more easily. This occurs in particular toward +higher masses, so that younger C-rich models are more concen- +trated at longer periods, leading to a steeper PA relation com- +pared with the O-rich case. These predictions agree with ob- +servations on the old side of the period distribution, while the +scarcity of C stars at τ 0 .6 Gyr prevents us from performing a +comparison at younger ages. +In appendixB, we provide analytic PA relations by fitting the +high-density parts of the O- and C-rich models’ distribution. We +emphasize that, because of the large scatter of the relation, ages +estimated in this way for individual LPVs are bound to be highly +uncertain. As a way to assess the error in age determination, we +also provide analytic best-fit relations to the boundaries of the +PA distribution of the models in the appendix. These relations +are displayed in Fig.2. +4. Discussion +In general agreement with observations, models confirm that +LPVs pulsating predominantly in the FM follow a PA relation, +which exhibits a non-negligible dispersion. Thanks to the newly +available nonlinear period predictions, we were able to better ex- +amine the nature of this relation and the origin of its scatter. +The PA relation is intimately connected with the PL relation, +both patterns emerging because of the prominent role of mass in +shaping stellar structure and evolution. Indeed, stellar mass de- +termines the lifetimes of the main evolutionary stages, and thus +the age of stars in the AGB phase. Pulsation models (Trabuc- +chi et al.2021b) show that the radius R +dom ,0 (and correspond- +ing luminosity) at the onset of dominant FM pulsation (DFMP) +increases with mass, so that the most massive FM-dominated +LPVs are brighter. They also have longer periods, as this in- +creases with radius. In other words, the period, luminosity, and +age near the tip of the AGB are all functions of initial stellar +mass (at least to a good approximation). +We note that this would not be the case if the FM were dom- +inant along the entire AGB, as the large change in radius during +this phase would result in a wide range of periods at a given age. +It is the very fact that DFMP occurs only during the final portion +2 + A further version of the PA plane highlighting both chemical types +can be found in Fig.A.2of appendixA.1. + Article number, page 3 of 9 +A & A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs +Fig. 1. Period-age diagram. Panel (a) shows the predicted period-age distribution (darker tones indicate a higher expected number of LPVs on +a linear scale, normalized to maximum). Symbols represent observed LPVs (green: SRVs; purple: Miras; white: unclassified) with the shape +indicating their host cluster or literature source as indicated in the legend. The age uncertainties are marked by the error bars. The groups of +galactic C-stars ofFeast et al.(2006) are marked by crosses annotated with the group number. The solid and dotted line represent a linear best-fit +to models and the best-fit byGrady et al.(2019), respectively. Period distributions at selected ages are compared in panels (b) and (c) and marked +in panel (a) by the blue and red shaded areas (at log( τ/yr) ∼ 9. 15 and ∼ 10. 10, respectively). For clarity, the eff ect of the TP-AGB boosting is +suppressed in panel (a). +Fig. 2. Similar to Fig.1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while +dashed lines are best fits to the edges of the model distribution (see the text for more details). +of the AGB that limits the range of periods a FM-pulsating LPV +can have at a given age. Yet, the DFMP part of the AGB is long +enough for significant variations in radius to occur, which result +in the dispersion of the PA relation seen in Fig.1. +At a given initial metallicity Z +i , the shape of the period dis- +tribution primarily results from the fact that, throughout the TP- +AGB (the stage during which the FM is normally excited), the +envelope expansion accelerates, while the period becomes pro- +gressively less sensitive to changes in radius (see AppendixC). +In particular, the slope of the period-radius relation decreases +sharply at P +b = P(R +b ). The FM period distribution is roughly +symmetric around that value, but at its short-period side, the FM +is not dominant. Therefore, when only FM-dominated LPVs are +considered, as is done here, the observed period distribution ap- +pears skewed toward short periods. This feature is strengthened when a set of isochrones is con- +sidered which spans a range of initial metallicities because the +adopted criterion for the onset of DFMP does not depend on +metallicity, but the FM period does as metal-poor LPVs are +warmer and have smaller radii compared with metal-rich ones. +As a consequence, the bulk of the period distribution of metal- +poor LPVs is at periods shorter than P +b , so they only contribute +to the global distribution (i.e., at all Z +i at a given age) over a +small period range at P P +b . In contrast, metal-rich LPVs have +periods well beyond P + b , so they contribute both at that value and +at longer periods. The result is an excess of FM-dominated LPVs +near P +b , that is to say on the short side of the overall period dis- +tribution. +We note that, in contrast with the prescription we adopted, +the onset of DFMP in reality is probably sensitive to metallic- +Article number, page 4 of 9 +Trabucchi et al.: The period-age relation of LPVs +ity. While the good degree of agreement with observations sug- +gests that the dependence is weak at most, it is possible for +any discrepancy to be smeared out by the fact that our set of +isochrone implicitly assumes a flat star-formation rate with no +age-metallicity relation, so it is not an accurate representation of +any realistic stellar environment. In this sense, the PA relation is +environment-dependent, and it is not necessarily universal. +A further point of uncertainty stems from the fact that the +prescription we adopted assumes that the FM period only de- +pends upon the mass and radius, and that it is a ffected by a +change in composition only through the eff ect that such a varia- +tion has on the radius. While this is true to a good approximation, +linear models show a small dependence of periods on metallic- +ity at a fixed mass and radius, but the quantitative impact in the +nonlinear case is unknown. We can only estimate, based on the +results ofTrabucchi et al.(2019), an uncertainty of ±10% at most +with respect to the prescriptions adopted here. +Qualitatively, a realistic age-metallicity relation and the +metallicity dependence of the period and of the onset of DFMP +are all expected to result in a steeper PA relation than the one +we predict, but it is di fficult to assess the relative importance of +these e ff ects. In this sense, the composition probably a ffects the +shape of the PA relation more than its dispersion. The latter is +likely aff ected by the composition indirectly through mass loss, +the analysis of which is beyond the scope of this study. How- +ever, we point out that mass loss represents a source of scatter in +combination with the occurrence of thermal pulses, because it re- +duces the minimum radius for the onset of DFMP. Thus, during +the luminosity dips associated with thermal pulses, a LPV can +have a period shorter than the one it had when it first entered the +DFMP regime (see AppendixC). An additional source of uncer- +tainty, which we disregarded, is rotation (or other processes that +induce extra mixing in the core) which causes a spread in ages +at a given initial mass (cf.Anderson et al.2016, for the case of +classical Cepheids). +The fairly good agreement between models and observations +encourages the use of LPVs as age indicators, but the scatter of +the PA relation hampers this application. We attempted to reduce +the scatter through corrections involving photometric properties, +as is customarily done for classical Cepheids with a color term +(e.g.,Bono et al.2005), but with unsatisfactory results. A correc- +tion dependent on the photometric amplitude of variability rep- +resents a promising alternative, but it cannot be pursued at the +moment. Indeed, for computational efficiency, current pulsation +models include only a crude treatment of the atmospheric layers +as they do not aff ect pulsation periods. On the other hand, the +atmosphere is crucial in determining the spectral energy distri- +bution and its variation throughout the pulsation cycle, and hence +the amplitude of variability. At the same time, the observational +sample adopted here is too heterogeneous for a self-consistent +investigation of amplitude, but this kind of study could be made +possible by the upcoming data release 3 of the Gaia mission +(Gaia Collaboration et al.2021) and the future Legacy Survey +of Space and Time (LSST,Ivezi ´ +c et al.2019) of the Vera Rubin +Observatory. +It is worth noting that our analysis applies to Miras as well +as SRVs, provided that they predominantly pulsate in the FM. +The limitation of PA relation studies to Miras, as has mainly +been done in literature so far, undoubtedly has some advan- +tages: to begin with, the fact that Miras are typically easier to +detect than SRVs, and their light curves are easier to process +as they tend to be more regular. Moreover, Miras represent the +end-point of AGB evolution, so in principle they correspond to a +smaller range of stellar parameters compared to the full extent of the DFMP regime, and they display a smaller range of periods +at a given age (cf.Feast & Whitelock2000b). In other words, +they should exhibit a relatively narrow PA relation (even though, +based on the observational data set we adopted, there is no con- +clusive evidence that considering only Miras reduces the scatter +of the PA relation). +Nonetheless, we caution against this approach as it is prone +to introducing uncontrolled biases, as the traditional distinction +between SRVs and Miras is arbitrary (seeTrabucchi et al.2021a, +and references therein). As such, it disregards the physical pro- +cesses at the origin of the range of amplitudes characterizing +LPVs. In particular, photometric amplitudes are largely deter- +mined by the formation and dissociation of molecules in the stel- +lar atmosphere, and they are likely to be metallicity-dependent. +It is therefore reasonable to assume that metal-poor (old) Mira +analogs might be classified as SRVs, thereby undermining the +potential application of the PA relation if restricted to Miras. +This seems to be supported by the fact that the bulk of old LPVs +in our sample are classified as SRVs. Therefore, studies involv- +ing PA relations of LPVs would advantageously include both +Miras and FM-pulsating SRVs. +The challenge associated with SRVs stems from the fact that +they are often multiperiodic (even when predominantly pulsat- +ing in the FM), a property that complicates the light curve anal- +ysis and period extraction. At the same time, this feature could +potentially improve age determinations as overtone modes are +expected to display a PA relation as well. +5. Conclusions +We used the results from recent nonlinear pulsation calculations +and combined them with state-of-the-art isochrone models to in- +vestigate the PA relation of FM-dominated LPVs, finding good +agreement with the distribution of observed LPVs in star clus- +ters. The theoretical PA relation displays a non-negligible scat- +ter, whose origin we identified due to the fact that, despite being +very brief, the portion of AGB evolution during which the FM +becomes dominant shows a relatively large range in mass and +radius at a given age. +The theoretical distribution of FM periods is roughly sym- +metric, but the FM is not dominant at the shortest periods. As a +result, models predict that the distribution of dominant FM peri- +ods at a given age is skewed toward short periods, in agreement +with observations. Depending on stellar populations, metallicity +may enhance this feature as metal-poor LPVs, which tend to be +warmer and more compact, only contribute near short periods. +We provide the best-fit PA relation separately for O-rich and +C-rich FM-pulsating LPVs. The latter LPVs show a steeper PA +relation because of their lower surface temperatures, which allow +them to reach longer periods more easily. +Our analysis concerns all LPVs predominantly pulsating in +the FM, regardless of whether they are classified as Miras or +SRVs. We discourage such a distinction in that it is arbitrary and +prone to selection biases that risk compromising the use of LPVs +as age indicators. +The main limitation in the use of the PA relation for age de- +terminations of individual LPVs stems from its relatively large +scatter. We suggest that corrective terms, involving the ampli- +tude of variability, might help to reduce this scatter and antici- +pate that upcoming data from ongoing and future surveys dedi- +cated to time-domain astronomy will be highly valuable to probe +this possibility. A study of the impact of metallicity on nonlinear +pulsation is highly desirable to pursue this line of investigation, +Article number, page 5 of 9 +A & A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs +as would be a theoretical investigation of the dependence of pho- +tometric amplitudes upon global stellar parameters. +Acknowledgements. M.T. and N.M. acknowledge the support provided by the +Swiss National Science Foundation through grant Nr. 188697. We are grateful +to the anonymous referee for the constructive comments that helped improving +this paper, and to Léo Girardi for helping with the computation and interpre- +tation of isochrones. This research has made use of: data from the OGLE-III +Catalog of Variable Stars; data products from the Two Micron All Sky Sur- +vey, which is a joint project of the University of Massachusetts and the In- +frared Processing and Analysis Center/ California Institute of Technology, funded +by the National Aeronautics and Space Administration and the National Sci- +ence Foundation; data from the European Space Agency (ESA) mission Gaia +(https://www.cosmos.esa.int/gaia ), processed by the Gaia Data Process- +ing and Analysis Consortium (DPAC, https://www.cosmos.esa.int/web/ +gaia/dpac/consortium ). Funding for the DPAC has been provided by na- +tional institutions, in particular the institutions participating in the Gaia Multi- +lateral Agreement. This research has made use of the following free / open source +software and/ or libraries: the Starlink Tables Infrastructure Library (STILTS and +Topcat,Taylor2006); IPython (Pérez & Granger2007) and Jupyter (Kluyver +et al.2016) notebooks; the P ython libraries N umPy (Harris et al.2020), SciP y +(Virtanen et al.2020), matplotlib (a Python library for publication quality graph- +ics,Hunter2007), and A stropy (a community-developed core Python package +for Astronomy,Astropy Collaboration et al.2018). This research has made use of +NASA’s Astrophysics Data System Bibliographic Services, and of the following +services provided by CDS, Strasbourg: the SIMBAD data base, VizieR catalogue +access tool (DOI: 10.26093/ cds/ vizier,Ochsenbein et al.2000), the “Aladin sky +atlas” (Bonnarel et al.2000), and the cross-match service (Boch et al.2012; +Pineau et al.2020). +References +Anderson, R. I., Saio, H., Ekström, S., Georgy, C., & Meynet, G. 2016, A&A, +591, A8 +Astropy Collaboration, Price-Whelan, A. M., Sip ˝ +ocz, B. M., et al. 2018, AJ, 156, +123 +Battinelli, P. & Demers, S. 2012, A&A, 544, A10 +Battinelli, P. & Demers, S. 2013, A&A, 553, A93 +Baumgardt, H., Parmentier, G., Anders, P., & Grebel, E. K. 2013, MNRAS, 430, +676 +Boch, T., Pineau, F., & Derriere, S. 2012, in Astronomical Society of the Pa- +cific Conference Series, Vol. 461, Astronomical Data Analysis Software and +Systems XXI, ed. P. Ballester, D. Egret, & N. P. F. Lorente, 291 +Bonnarel, F., Fernique, P., Bienaymé, O., et al. 2000, A&AS, 143, 33 +Bono, G., Marconi, M., Cassisi, S., et al. 2005, ApJ, 621, 966 +Bressan, A., Marigo, P., Girardi, L., et al. 2012, MNRAS, 427, 127 +Catchpole, R. M., Whitelock, P. A., Feast, M. W., et al. 2016, MNRAS, 455, +2216 +Cutri, R. M., Wright, E. L., Conrow, T., et al. 2013, Explanatory Supplement +to the AllWISE Data Release Products, Explanatory Supplement to the All- +WISE Data Release Products +de Grijs, R., Courbin, F., Martínez-Vázquez, C. E., et al. 2017, Space Sci. Rev., +212, 1743 +De Somma, G., Marconi, M., Cassisi, S., et al. 2020, MNRAS, 496, 5039 +Eggen, O. J. 1998, AJ, 115, 2435 +Feast, M. 2007, in Astronomical Society of the Pacific Conference Series, Vol. +378, Why Galaxies Care About AGB Stars: Their Importance as Actors and +Probes, ed. F. Kerschbaum, C. Charbonnel, & R. F. Wing, 479 +Feast, M. & Whitelock, P. 2000a, in Astrophysics and Space Science Library, +Vol. 255, Astrophysics and Space Science Library, ed. F. Matteucci & F. Gio- +vannelli, 229 +Feast, M. W. 1963, MNRAS, 125, 367 +Feast, M. W. 1966, The Observatory, 86, 120 +Feast, M. W. 1981, in Astrophysics and Space Science Library, Vol. 88, Physical +Processes in Red Giants, ed. J. Iben, I. & A. Renzini, 193–204 +Feast, M. W., Robertson, B. S. C., & Black, C. 1980, MNRAS, 190, 227 +Feast, M. W. & Whitelock, P. A. 2000b, MNRAS, 317, 460 +Feast, M. W., Whitelock, P. A., & Menzies, J. W. 2006, MNRAS, 369, 791 +Gaia Collaboration, Brown, A. G. A., Vallenari, A., et al. 2021, A&A, 649, A1 +Girardi, L., Marigo, P., Bressan, A., & Rosenfield, P. 2013, ApJ, 777, 142 +Goudfrooij, P., Girardi, L., Kozhurina-Platais, V., et al. 2014, ApJ, 797, 35 +Grady, J., Belokurov, V., & Evans, N. W. 2019, MNRAS, 483, 3022 +Grady, J., Belokurov, V., & Evans, N. W. 2020, MNRAS, 492, 3128 +Harris, C. R., Millman, K. J., van der Walt, S. J., et al. 2020, Nature, 585, 357 +Hunter, J. D. 2007, Computing in Science & Engineering, 9, 90 +Ivezi ´ +c, Ž., Kahn, S. M., Tyson, J. A., et al. 2019, ApJ, 873, 111 +Jayasinghe, T., Stanek, K. Z., Kochanek, C. S., et al. 2020, MNRAS, 491, 13 Joo, S.-J. & Lee, Y.-W. 2013, ApJ, 762, 36 +Jura, M. & Kleinmann, S. G. 1992, ApJS, 79, 105 +Kamath, D., Wood, P. R., Soszy ´ +nski, I., & Lebzelter, T. 2010, MNRAS, 408, 522 +Kharchenko, N. V., Piskunov, A. E., Röser, S., Schilbach, E., & Scholz, R. D. +2005, A&A, 438, 1163 +Kharchenko, N. V., Piskunov, A. E., Schilbach, E., Röser, S., & Scholz, R. D. +2016, A&A, 585, A101 +Kippenhahn, R. & Smith, L. 1969, A&A, 1, 142 +Kluyver, T., Ragan-Kelley, B., Pérez, F., et al. 2016, in Positioning and Power +in Academic Publishing: Players, Agents and Agendas, ed. F. Loizides & +B. Scmidt (Netherlands: IOS Press), 87–90 +Lebzelter, T., Mowlavi, N., Marigo, P., et al. 2018, A&A, 616, L13 +Lebzelter, T., Trabucchi, M., Mowlavi, N., et al. 2019, A&A, 631, A24 +Lebzelter, T. & Wood, P. R. 2005, A&A, 441, 1117 +Lebzelter, T. & Wood, P. R. 2007, A&A, 475, 643 +Lebzelter, T. & Wood, P. R. 2011, A&A, 529, A137 +Lebzelter, T. & Wood, P. R. 2016, A&A, 585, A111 +Lloyd Evans, T. 1976, MNRAS, 174, 169 +Lloyd Evans, T. 1983a, MNRAS, 204, 985 +Lloyd Evans, T. 1983b, MNRAS, 204, 961 +Lloyd Evans, T. & Menzies, J. W. 1973, in Astrophysics and Space Science Li- +brary, Vol. 36, IAU Colloq. 21: Variable Stars in Globular Clusters and in +Related Systems, ed. J. D. Fernie, 151 +Mackey, A. D. & Broby Nielsen, P. 2007, MNRAS, 379, 151 +Marigo, P., Girardi, L., Bressan, A., et al. 2017, ApJ, 835, 77 +Menzies, J., Feast, M., Tanabé, T., Whitelock, P., & Nakada, Y. 2002, MNRAS, +335, 923 +Menzies, J., Feast, M., Whitelock, P., et al. 2008, MNRAS, 385, 1045 +Menzies, J. W., Feast, M. W., Whitelock, P. A., & Matsunaga, N. 2011, MNRAS, +414, 3492 +Menzies, J. W., Whitelock, P. A., & Feast, M. W. 2015, MNRAS, 452, 910 +Menzies, J. W., Whitelock, P. A., Feast, M. W., & Matsunaga, N. 2010, MNRAS, +406, 86 +Merrill, P. W. 1923, ApJ, 58, 215 +Mowlavi, N., Lecoeur-Taïbi, I., Lebzelter, T., et al. 2018, A&A, 618, A58 +Nayak, P. K., Subramaniam, A., Choudhury, S., Indu, G., & Sagar, R. 2016, +MNRAS, 463, 1446 +Ochsenbein, F., Bauer, P., & Marcout, J. 2000, A&AS, 143, 23 +Pastorelli, G., Marigo, P., Girardi, L., et al. 2020, MNRAS, 498, 3283 +Pastorelli, G., Marigo, P., Girardi, L., et al. 2019, MNRAS, 485, 5666 +Pérez, F. & Granger, B. E. 2007, Computing in Science and Engineering, 9, 21 +Perren, G. I., Piatti, A. E., & Vázquez, R. A. 2017, A&A, 602, A89 +Pineau, F.-X., Boch, T., Derrière, S., & Schaaff, A. 2020, in Astronomical So- +ciety of the Pacific Conference Series, Vol. 522, Astronomical Data Analysis +Software and Systems XXVII, ed. P. Ballester, J. Ibsen, M. Solar, & K. Short- +ridge, 125 +Sakamoto, T., Matsunaga, N., Hasegawa, T., & Nakada, Y. 2012, ApJ, 761, L10 +Skrutskie, M. F., Cutri, R. M., Stiening, R., et al. 2006, AJ, 131, 1163 +Soszy ´ +nski, I., Olechowska, A., Ratajczak, M., et al. 2021, ApJ, 911, L22 +Soszy ´ +nski, I., Udalski, A., Szyma ´ +nski, M. K., et al. 2009, Acta Astron., 59, 239 +Soszy ´ +nski, I., Udalski, A., Szyma ´ +nski, M. K., et al. 2011, Acta Astron., 61, 217 +Taylor, M. B. 2006, in Astronomical Society of the Pacific Conference Se- +ries, Vol. 351, Astronomical Data Analysis Software and Systems XV, ed. +C. Gabriel, C. Arviset, D. Ponz, & S. Enrique, 666 +Trabucchi, M., Mowlavi, N., & Lebzelter, T. 2021a, A&A, 656, A66 +Trabucchi, M., Wood, P. R., Montalbán, J., et al. 2017, ApJ, 847, 139 +Trabucchi, M., Wood, P. R., Montalbán, J., et al. 2019, MNRAS, 482, 929 +Trabucchi, M., Wood, P. R., Mowlavi, N., et al. 2021b, MNRAS, 500, 1575 +Urago, R., Omodaka, T., Nagayama, T., et al. 2020, ApJ, 891, 50 +Villanova, S., Geisler, D., Gratton, R. G., & Cassisi, S. 2014, ApJ, 791, 107 +Virtanen, P., Gommers, R., Oliphant, T. E., et al. 2020, Nature Methods, 17, 261 +Wenger, M., Ochsenbein, F., Egret, D., et al. 2000, A&AS, 143, 9 +Whitelock, P., Feast, M., & Catchpole, R. 1991, MNRAS, 248, 276 +Whitelock, P., Menzies, J., Feast, M., et al. 1994, MNRAS, 267, 711 +Whitelock, P. A. 1986, MNRAS, 219, 525 +Whitelock, P. A., Menzies, J. W., Feast, M. W., et al. 2009, MNRAS, 394, 795 +Whitelock, P. A., Menzies, J. W., Feast, M. W., Nsengiyumva, F., & Matsunaga, +N. 2013, MNRAS, 428, 2216 +Wilson, R. E. & Merrill, P. W. 1942, ApJ, 95, 248 +Wyatt, S. P. & Cahn, J. H. 1983, ApJ, 275, 225 +Ya’Ari, A. & Tuchman, Y. 1996, ApJ, 456, 350 +Article number, page 6 of 9 +Trabucchi et al.: The period-age relation of LPVs +Fig. A.1. Absolute- K + s Gaia -2MASS diagram for the stars with or with- +out a spectral type (left and right panels, respectively) in the selected +sample. Symbol colors and shapes indicate the spectral type and host +cluster described in the legend, respectively, which also reports the num- +ber of sources displayed (i.e., having both optical and NIR photometry). +The dashed line marks the separation between O- and C-rich sources +according toLebzelter et al.(2018). An arrow marks the source MSX +LMC 124 in NGC 1830 that, having W +BP, RP − W +J, K + s = 9. 73 mag, lies out- +side the plot area. Background dots are LPVs in the LMC from OGLE- +III (light gray) andMowlavi et al.(2018) (darker gray). +Appendix A: Classification of observed LPVs +Appendix A.1: Spectral type +We adopted the spectral types provided byLebzelter & Wood +(2007) andKamath et al.(2010) for 52 of the LPVs they studied +in NGC 1846, NGC 1978, and NGC 419. The only exception +is the star 5-3 in NGC 419, for which we adopted the S-type as +reported byLloyd Evans(1983a). +We also searched the SIMBAD astronomical database +(Wenger et al.2000) for spectral type information, which we +found for 26 more stars. We used the Gaia-2MASS diagram of +Lebzelter et al.(2018) to confirm the chemical type classification +taken from literature and to characterize the surface chemistry of +sources of an unknown spectral type (see Fig.A.1). Among the +latter, we identified 13 C-rich stars and 106 O-rich sources. +Three of the sources without a spectral type lack Gaia pho- +tometry, so they cannot be classified with the Gaia-2MASS. Two +of them (LW5 and LW22 in 47 Tuc) have no match in Gaia +EDR3, but they have NIR data and are probably O-rich based on +their position in the J − K +s versus K +s color-magnitude diagram. +The third source is one of the two stars in NGC 1903 from the +list ofGrady et al.(2019), which we identified with the 2MASS +source J05171633-6920298. It is likely C-rich according to the +NIR color-magnitude diagram. +Finally, the sources V138 in ω Cen, LW15 in NGC 2808, +and LW4 in NGC 362 lack NIR data. They cannot be placed in +the NIR PL diagram, upon which we relied to assign pulsation +modes to periods, so we excluded them from the sample. The distribution of O- and C-rich sources in the period-age diagram +is shown in Fig.A.2. +Appendix A.2: Variability +For variability information, we complemented the data from +Lebzelter & Wood andKamath et al.(2010) with the catalogs +from OGLE-III, ASAS-SN, and Gaia DR2. Combining these +data sets, we found at least one period for each of the 176 sources +in our sample. +In order to identify the pulsation mode most likely respon- +sible for periods in a given source, we assumed that the second +overtone mode is associated with sequence A, the first overtone +mode with sequences B and C + , and the fundamental mode with +sequence C (e.g.,Trabucchi et al.2017). We excluded long sec- +ondary periods on sequence D as they are not due to stellar pul- +sation (Soszy ´ +nski et al.2021, and references therein), and we +used the pattern of PL sequences in the LMC as a reference to +guide the mode identification (cf.Trabucchi et al.2021a). +We performed this classification separately for periods com- +ing from each distinct data set. If two or more periods from dif- +ferent data sets were assigned to the same pulsation mode, we +retained only one of those periods, with priority to the values +from Lebzelter & Wood andKamath et al.(2010). If the latter +authors do not provide this information, we adopted the period +from OGLE-III if available, and otherwise from ASAS-SN or +from Gaia DR2. +For some sources, the periods reported in di fferent catalogs +were assigned to the same mode through this procedure. In most +cases, these periods are reasonably similar to each other. Only +in a few cases were they significantly di ff erent, but this did not +alter our conclusions. +When available, the variability type was taken from OGLE- +III or ASAS-SN. We note that we are only interested in whether +a star is classified as a Mira or semi-regular variable. In many +cases, this type is not given or the star is simply considered, for +instance, as an LPV or AGB in SIMBAD, in which case we con- +sidered the variability type as undetermined. +Appendix B: Fitting relations +We obtained analytic expressions for the PA relations separately +for O- and C-rich stars, proceeding as follows. For each bin of +log( τ/ yr), we modeled the period distribution with a Gaussian +kernel density estimator (KDE) and identified the peak of the +distribution. To describe the boundaries of the PA relation, we +adopted, at each age, the values of the period at which the dis- +tribution equals 25% of its maximum. We selected this arbitrary +value upon visual inspection of the PA plane. We modeled the +central trend of the PA relation, as well as its short- and long- +period edges, with linear or quadratic functions in the form +log( τ/ yr) = a +0 + a +1 ( P/ ˜ +P ) + a +2 ( P/ ˜ +P) 2 + , (B.1) +(where ˜ +P = 350 days) and employed a Lenvenberg-Marquardt +nonlinear regression algorithm 3 + to derive the best-fit coeffi cients, +which are listed in TableB.1. We remark that these best-fit ex- +pressions are only valid in the intervals 8 . 0 ≤ log( τ/ yr) ≤ 10 . 3 +and 20 < P/days < 700 for O-rich composition, and within +3 + We made use of the Python library SciPy to perform Gaussian KDE +modeling and best-fit, respectively, by means of the gaussian_kde +tool from the stats module and the curve_fit function from the +optimize module. + Article number, page 7 of 9 +A & A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs +Fig. A.2. Similar to Fig.1, except each source is color-coded according to whether it has been classified as O-rich (blue) or C-rich (red). +Table B.1. Best-fit coe ffi cients for the PA relation and its boundaries in +the form given in Eq.B.1. +Sp. type relation a +0 a + 1 a + 2 +O-rich center 10.78 -2.660 0.5953 +lower edge 10.46 -2.818 0.6578 +upper edge 10.54 -0.8187 -0.2335 +C-rich center 9.755 -0.7532 +lower edge 9.982 -1.698 +upper edge 8.498 -1.827 -0.9959 +8 .6 ≤ log(τ/ yr) ≤ 9 .3 and 140 < P/days < 620 in the C-rich +case. +Because of the connection between age and initial mass, the +PA relation can be translated into a period-initial mass relation, +which we derived using the same approach described above, and +assuming the form +log( M + i / M + ) = b + 0 + b +1 ( P/ ˜ +P) + b + 2 ( P/ ˜ +P)2 + . (B.2) +The resulting best-fit lines are displayed in Fig.B.1, and the co- +e fficients are given in TableB.2. +We remark that both the PA and the period-initial mass rela- +tions depend on model assumptions, in particular mass loss and +mixing, as well as on the properties of the population of LPVs, +namely the star-formation history and age-metallicity relation. +Appendix C: The shape of the period distribution +As an example case, we consider an isochrone of age log(τ/ yr) = +8 .3 and initial metallicity Z + i = 0 .006. Stars on the TP-AGB have +initial masses M + i 3. 85 M + over a small range of ∼ 10 −3 + M + . +The relation between period and initial mass is displayed in +panel (a) of Fig.C.1, where isochrone portions undergoing Table B.2. Best-fit coefficients for the period-initial mass relation and +its boundaries in the form given in Eq.B.2. +Sp. type relation b + 0 b + 1 b + 2 +O-rich center -0.2790 0.8958 -0.1828 +lower edge -0.1772 0.9975 -0.2203 +upper edge -0.1740 0.2783 0.8247 +C-rich center -0.0304 0.2885 +lower edge -0.0131 0.5752 +upper edge -0.2245 -0.2720 0.2343 +DFMP are indicated by solid lines. Panel (b) shows the period +distributions for a few di ff erent cases. +It is instructive, to begin with, to ignore the effect of thermal +pulses and consider only the quiescent evolution (green lines in +Fig.C.1). The smallest initial mass corresponds to a star that just +entered the TP-AGB, when the FM has a period of ∼ 240 days +but is not dominant. It only becomes dominant above a threshold +radius R +dom, 0 , that is for periods longer than a (mass-dependent) +critical period P +dom ,0 (the solid gray line in Fig.C.1). The least +evolved (quiescent) model with dominant FM has P +FM 360 +days (green circle and horizontal line), corresponding to a sharp +cut in the period distribution shown in panel (b) of Fig.C.1. +As a star evolves along the AGB it expands, and its period be- +comes longer in response to the increase in radius. Models with +a higher initial mass are more evolved, hence they have a larger +radius and a longer period. The rate at which a period increases +with radius is not fixed, but rather decreases with evolution. Ac- +cording to the prescription ofTrabucchi et al.(2021b), a period +grows with radius as a broken power-law with exponent α 1. 8 +if R < R +b , and with α 1. 25 at larger radii. +This is equivalent to saying that the period grows more +slowly after it exceeds a critical value P +b = P( R +b ), marked by +the gray dotted line in Fig.C.1. The isochrone reaches it at +Article number, page 8 of 9 +Trabucchi et al.: The period-age relation of LPVs +Fig. B.1. Similar to Fig.2, but showing initial mass M +i in place of age. The best-fit lines to the most populated band and edges of the theoretical +P +FM – M +i relation are shown. +Fig. C.1. Period distribution at fixed age and metallicity. Panel (a) shows +period as a function of initial mass (current mass on the top axis) on the +TP-AGB for a ∼ 200 Myr old isochrone with Z +i = 0. 006. Red lines +show full thermal pulses, while blue lines ignore luminosity spikes and +green lines show only the quiescent evolution. The same color code +is used for the period distributions (normalized to their maximum) on +panel (b). Solid lines indicate that the FM is dominant. Circles indi- +cate the earliest onset of DFMP accounting for (red) or ignoring (green) +luminosity spikes, and the shortest period of the dominant FM (blue). +Gray lines mark the critical values of periods at which the FM becomes +dominant (solid line), less sensitive to radius (dotted line, which occurs +at the vertical line for this specific isochrone), and independent of radius +(dashed line). +M +i 3. 8524 M + (vertical gray line), when P +FM 420 days. In +models with a smaller initial mass, the period is still increasing +at a relatively large rate as the envelope expands, while in more +massive models the period has already become less sensitive to +changes in radius. This is reflected by a slight inflection of the +green curve, which corresponds to the maximum in the period +distribution shown in panel (b) of Fig.C.1. The period distri- +bution of the full TP-AGB range is roughly symmetric around +this maximum, while limiting the selection to DFMP, produces +a distribution skewed toward short periods, as found in Sect.3. +If the luminosity dips following thermal pulses are taken +into account (blue lines), the corresponding envelope contrac- tion causes the period to decrease, and the cut at ∼ 360 days +becomes less sharp. Because of mass loss, the threshold period +P +dom ,0 is lowered, so that the shortest period associated with +DFMP does not correspond to the least evolved model (green +circle), but rather to the luminosity dip of a thermal pulse (blue +circle). +To be precise, the earliest occurrence of DFMP is on the left- +most luminosity spike (red circle), whose duration is so short that +it is unlikely to be observed. Indeed, the inclusion of luminosity +spikes alters the period distribution at long periods very little. +Luminosity spikes are relevant only for relatively massive and +young TP-AGB stars, and they give rise to the poorly populated +portion of the PA relation at the longest periods, as seen in panel +(a) of Fig.2. + Article number, page 9 of 9 diff --git a/read/results/playa/2201.00214.txt b/read/results/playa/2201.00214.txt new file mode 100644 index 0000000..0661679 --- /dev/null +++ b/read/results/playa/2201.00214.txt @@ -0,0 +1,766 @@ +arXiv:2201.00214v1 [astro-ph.SR] 1 Jan 2022Temperature Analysis of Flaring +(AR11283) and non-Flaring (AR12194) +Coronal Loops +N. Fathalian1 + , S. S. H osseini R ad 2 + , N. A lipour2 + , H. Safari2 +1 + Department of Physics, Payame Noor University (PNU), 19395 -3697, Tehran, Iran. +2 + Department of Physics, Faculty of Science, University of Za njan, 45195-313, Zanjan, Iran. +e-mail: narges_fathalian@alum.sharif.edu +January 4, 2022 +Abstract +Here, we study the temperature structure of flaring and non-fl aring coronal loops, using extracted +loops from images taken in six extreme ultraviolet (EUV) channels recorded by Atmospheric Imaging +Assembly (AIA)/ Solar Dynamic Observatory (SDO). We use dat a for loops of X2.1-class-flaring active +region (AR11283) during 22:10UT till 23:00UT, on 2011, Sept ember 6; and non-flaring active region +(AR12194) during 08:00:00UT till 09:00:00UT on 2014, Octob er 26. By using spatially-synthesized +Gaussian DEM forward-fitting method, we calculate the peak t emperatures for each strip of the loops. +We apply the Lomb-Scargle method to compute the oscillation s periods for the temperature series of each +strip. The periods of the temperature oscillations for the fl aring loops are ranged from 7 min to 28.4 +min. These temperature oscillations show very close behavi or to the slow-mode oscillation. We observe +that the temperature oscillations in the flaring loops are st arted at least around 10 minutes before the +transverse oscillations and continue for a long time durati on even after the transverse oscillations are +ended. The temperature amplitudes are increased at the flari ng time (during 20 min) in the flaring loops. +The periods of the temperatures obtained for the non-flaring loops are ranged from 8.5 min to 30 min,but +their significances are less (below 0.5) in comparison with t he flaring ones (near to one). Hence the +detected temperature periods for the non-flaring loops’ strips are less probable in comparison with the +flaring ones, and maybe they are just fluctuations. Based on ou r confined observations, it seems that the +flaring loops’ periods show more diversity and their temperatures have wider ranges of variation than the +non-flaring ones. More accurate commentary in this respect requires more extensive statistical research +and broader observations. +Coronal Loops,Temperature Analysis, Temperature Oscillations,Flaring and non-Flaring Active Regions +I. I ntroduction +Analyzing the thermal structure of coronal loops is of considerable interest, especially as these +magnetic loops have an essential role in heating the solar chromosphere and corona. Such anal- +ysis can help to describe how the process of solar flaring is correlated with the loop’s thermal +structure. +Detections of coronal waves have a historical preview and have been reported for several times +(e.g., + Aschwanden et al. ( 1999 ); Nakariakov et al. ( 1999 ); Wang et al. ( 2003 ); Wang & Solanki ( 2004 ); +Berghmans & Clette ( 1999 ); De Moortel et al. ( 2000 ), Verwichte et al. ( 2004 ), De Moortel & Brady +( 2007 ), Ballai et al. ( 2011 )). Coronal seismology and MHD waves have been reviewed wide ly by +1 +De Moortel ( 2005 ), Nakariakov & Verwichte ( 2005 ), Aschwanden ( 2006 ), Banerjee et al. ( 2007 ) and +De Moortel & Nakariakov ( 2012 ). Along with the development of the observations, transver se +and longitudinal oscillations have also been studied theor etically (e.g., Gruszecki et al. ( 2006 ), +Pascoe et al. ( 2007 ), Fathalian et al. ( 2010 ); Luna et al. ( 2010 ); Fathalian & Safari ( 2010 ). Coronal +seismology techniques help to elicit the information from observations of oscillatory phenomena +and the results to be interpreted by using theoretical models (see for e.g., + Roberts et al. ( 1984 ); +Goossens et al. ( 1992 )). Oscillatory patterns and processes which happen during solar flares, were +interesting and subject of investigations from different a pproaches (e.g., Nakariakov et al. ( 2010 ), +Nisticò et al. ( 2013 ), Anfinogentov et al. ( 2013 ), Hindman & Jain ( 2014 ), Russell et al. ( 2015 )). As +we know the transverse loops oscillations usually occur in r esponse to a close filament or flare +( Wills-Davey & Thompson ( 1999 )). +Rapidly decaying long-period oscillations are mostly inte rpreted as global (or fundamen- +tal mode) standing slow magnetoacoustic waves (reviewed by Liu & Ofman ( 2014 ), and Wang +( 2011 ), also see Ofman & Wang ( 2002 ), and for slow-mode observed in fan-loops see Pant et al. +( 2017 )). They often occur in hot coronal loops of active regions, a ssociated with tiny (or micro-) +flares.Increasing evidence has suggested that the harmonic type of decaying pulsations detected +in intensity plots of solar and stellar flares are possibly ca used by standing slow-mode waves (see +reviews by Van Doorsselaere et al. ( 2016 ), and McLaughlin et al. ( 2018 )).Excitation, propagation, +and damping mechanisms of slow-mode waves have been studied theoretically (e.g., Wang et al. +( 2007 ); Wang et al. ( 2015 ); Jess et al. ( 2016 ); Nakariakov et al. ( 2017 ); Nisticò et al. ( 2017 ); Kolotkov +et al. ( 2019 ); Krishna Prasad et al. ( 2019 ); Reale et al. ( 2019 ); Wang & Ofman ( 2019 )). To have +a complete overview of slow-mode magnetoacoustic waves in c oronal loops see the review by +Wang et al. ( 2021 ). +Investigating and comparing the thermal structures and osc illations of coronal loops in loops +of flaring and non-flaring active regions could help us in better understanding the loops’ material +oscillations and the flare impact on them. Several different methods have been developed to in- +vestigate the thermal structure of the coronal loops and loop strands. The thermal stability of the +coronal loops was the subject of research, done by Habbal & Rosner ( 1979 ) (and references cited +therein). McClymont & Craig ( 1985 ) stated that a pressure fluctuation must assist asymmetric +coronal temperature perturbation. They concluded that coronal loops are impartially stable in +the case of uniform heating. + Van Doorsselaere et al. ( 2011 ) used spectroscopic line ratios to obtain +the required temperature (via CHIANTI code) and estimated the adiabatic index of the corona. +The dependence of coronal loop temperature on loop length and magnetic field strength is also +a favorite topic. For instance, Dahlburg et al. ( 2018 ) probed the temperature properties of solar +coronal loops over a wide range of lengths and magnetic field strengths via numerical simula- +tions and observed a very high correlation between magnetic field strength and a maximum of +the temperature. The effect of temperature inhomogeneity on the periods and the damping times +of the standing slow-modes in stratified solar coronal loops was studied either (e.g., Abedini et al. +( 2012 )). Fathalian ( 2019 ) estimated the loop temperature using the intensity ratios and the AIA re- +sponse functions in different wavelengths. Different emission measure (DEM) computations and +methods have been developed to estimate the temperature in the corona, which led to various +discussions. + Schmelz et al. ( 2010 ) analyzed a coronal loop, which was observed on 2010 August +3, by AIA. They took some differential emission measure (DEM ) curves, claiming a multithermal +rather than an isothermal DEM distribution (for the cross-sectional temperature of the loop). Af- +ter that, Aschwanden & Boerner ( 2011 ) criticized the method of background subtraction which +Schmelz et al. had applied. They claimed that the background subtraction method caused their +inferred result of a multithermal loop. Aschwanden & Boerner ( 2011 ) analyzed a set of hundred +loops and understood that 66% of the loops could be fitted with a narrowband single-Gaussian +DEM model. In this regard, some attention was paid to the instrumental limitations and abil- +ity of AIA and Guennou et al. ( 2012a , b ) discussed on the accuracy of the differential emission +measure diagnostics of solar plasmas in respect of the AIA instrument of SDO. The abovemen- +tioned controversy of whether the cross-field temperatures of coronal loops are multithermal or +isothermal, continued by + Schmelz et al. ( 2013 ) (similar to Schmelz et al. ( 2011 )). They analyzed +twelve loops to understand the cross-field temperature distributions of them and reveal the loops’ +substructure. Based on their achievements, the warmer loop s entail broader DEMs. Thereafter, +Schmelz et al. ( 2014 ) found indications of a relationship between the DEM weighted-temperature +and the cross-field DEM width for coronal loops. They argued that cooler loops tend to have +narrower DEM widths. This could imply that fewer strands are seen emitting in the later cool- +ing phase, which they claim could potentially resolve the ab ovementioned controversy. In this +subject, Aschwanden et al. ( 2015 ) (as well as 2013 ( Aschwanden, 2013 )) developed a method to +extract the loop temperature which is based on Gaussian fit for Differential Emission Measure, +named spatially-synthesized Gaussian DEM forward-fitting method (DEM hereafter). +This paper aims to analyze and compare thermal oscillations of coronal loops in flaring and +non-flaring active regions, 11283 and 12194, respectively. The contents of this paper are as follows: +In section + II , data, we introduce the considered flaring and non-flaring ac tive regions and describe +the data employed and the time and properties of the flare, occ urred in the active region. In +section III , we explain the method we use to analyze the time-series of te mperatures in different +strips of the loops. Section IV is specified to our results, obtained related to flaring and non- +flaring regions. In section V we briefly state a summary of this work. +II. Data +We investigate the thermal structure and treatment of loops in a flaring region to see if it follows +the transverse oscillations of the loops, and we examine the thermal fluctuations at the flare time. +For this purpose, we select a high energy flare x2.1 which the transverse oscillations of two loops +of it have been analyzed by Jain et al. ( 2015 ). They analyzed intensity variations in the wavelength +171 in two coronal loops of this region and detected obvious transverse oscillation with periods +of roughly 2 minutes and decay times of 5 minutes for these loops at the flare time. To see +the specific thermal properties of the flaring loops, as a blind test, we select a non-flaring active +region, extract its loops and analyze their thermal treatme nt. Then we compare the temperature +treatment of the loops at the flaring region with the loops of the non-flaring region to see the +differences. +The temperature analysis done here uses EUV images from the A IA onboard the SDO. AIA +has ten different wavelength channels, three in white light and UV, and the other seven in EUV +channels. Between these seven, the 304 filter, which is mostly sensitive to chromospheric temper- +atures (in order of T = 10 4.7 + K), not the corona, is not taken into account (Aschwanden et a l. 2015). +Therefore, we consider the images of the events in the six wavelengths (94, 131, 171, 193, 211, 335 +). These are covering the coronal temperature range from T ≈ 0.6 to T ≥ 16 MK . +The two below data sets are finally selected to study thermal variations and coronal loops +oscillations in flaring or non-flaring active regions. A few d istinct loops are visible in the regions. +Finally, these loops are chosen: +– Three loops of the x-flaring active region 11283: Observationally, the X-class flares are rarely +happening around the loops with the specification we are looking for. So this selected LOS +X-flare, which occurs near the loops is of rare cases. We consider EUV images of NOAA +AR 11283, in the time period of 22:10UT till 23:00UT of 2011 Se ptember 6 with the cadence +of 12 sec. This period of time is selected since no other flare is happening during it. A +few distinct loops are visible and follow-able here during this period. Loop shapes in our +active region change permanently; therefore, it is difficult or impossible to follow a loop +over a very long time. Hence, it is not useful to extend the time interval of this region +to the time before the flare. The transverse oscillations of two loops in this region were +analyzed before by + Jain et al. ( 2015 ). We mark these loops by A and B in Figure 1 b. They +detected fundamental mode oscillation with periods of roughly 2 minutes and decay time +of 5 minutes for these loops. We are curious to see the loops’ thermal oscillations (if any) +or thermal fluctuations in this condition. Figure 1 a (left) displays AR 11283 and the area, +indicated by the white box is featured in a zoom-in view in Figure 1 .b (right) and the five +selected parts of the center of the three chosen loops are shown by red lines (the movie of +the region is available in this link). As it is clear in the mov ie, these three loops oscillate +together and their oscillations decay simultaneously. The center of figure 1 .a is coordinated +at (230, 165) arcsec and its width and height are 450 ′′ + × 456 ′′ + /750 × 775 pixels. The flare +occurring in this active region is an X2.1 class flare located close to the disk center at latitude +14 ◦ + north and longitude 18 ◦ + west (269.9 arcsec, 129.9 arcsec). This flare initiates at 22 :12UT, +ends about 22:24UT with the peak at 22:20UT, and associates with a coronal mass ejection +(CME) which occurs from 2011 September 6, 21:36:05T to 2011 S eptember 7, 02:24:05T, with +the radial velocity of 469 km/s,angular width of 252 deg, and position angle of 275 deg (for +more details look at LASCO CME catalogue.) 1 +– Three loops of non-flaring active region 12194: As a blind te st, we select three loops of the +non-flaring (nonf hereafter) active region 12194 in the smooth time period of 08:00:00UT till +09:00:00UT of 2014 October 26. The center of figure 2 .a is coordinated at (0, -264) arcsec +and its width and height are 615 ′′ + × 615 ′′ + /1025 × 1025 pixels. We consider the images of +the selected area with the cadence of 12 sec in the same six wavelengths mentioned above. +These loops are relatively motionless and do not show any tra nsversal oscillation (see the +region’s movie in the link). We select the loops in such a way that they do not have any +crossing over the neighbor loops (in our perspective) during this time. In figure + 2 the +selected loops are distinguished in red in the mentioned active region. The size of the final +cut of non-flaring region (represented in the right) is 351 × 401 pixels. +The data set are primarily downloaded at level 1 with a pixel r esolution of 0.6 arcsec. We use +the standard aia _ pre p . pro subroutine available in SDO package SolarSoftWare library to adjust +the screen scale between the four arms of the AIA. This pre-processing step increases the data +level from 1 to 1.5, so that finally no jump or sudden movement is observed in the image series. +We also used drot _ ma p . pro subroutine to correct the differential rotation effect. Ac cording to the +movie made by pre-processed images, the most obvious loops ( marked in the abovementioned +figures) are selected in each region (with obvious transversal oscillations in the case of the flaring +active region). + III. Temperature A nalysis Method +We extract the selected loop segment pixels, for each loop, a nd calculate the normal vectors +to each point of the loop’s direction. Then by using these data, we straighten each loop in a +considered box with the thickness of 15 to 40 pixels (macro-p ixels, depending on the available +empty area around each loop and the distance to the neighbor loop). The area around the +loop is needed for calculations of background subtraction. The selected loop segment is cut in +1 + Based on data on these WebSites: https://solarflare.njit.e du/webapp.html, and https://www.swpc.noaa.gov/ +all wavelengths and at the same considered box from the image s set. These loop images are +necessary entrances for our thermal analysis process. Then the loop is divided into different +strips and its best division in terms of pixel intervals is considered. To do thermal analysis, we +use the spatially-synthesized Gaussian DEM forward-fitting method founded by Aschwanden +et al. + ( 2015 ). +The images in the above six wavelength filters are considered to calculate the temperature in +each strip of the loop. The DEM function is considered a single-Gaussian function relative to the +temperature determined by the forward fitting method. To obtain the temperature for each loop, +we divided the loop into narrow strips, and then the intensity flux was averaged over each strip. +The number of each strip is displayed with the index i. One of the usual methods to subtract +the background from observed data is fitting a single-Gaussian cospatial function with a linear +function on the flux profile. The DEM for each strip is consider ed to be single-Gaussian DEM +in terms of the logarithm of the temperature, which has three free parameters ( Aschwanden & +Boerner , 2011 ): + D E M + i = dE M + i +dT = E M + p , i exp ( − [ log ( T ) − log ( T +p , i ) +2 σ 2 +T , i ) . (1) +In which, T +p , i is the DEM peak temperature, E M + p , i is the peak EM function, and σ +T , i is the +logarithmic width of the temperature for that strip. To calc ulate the background-subtracted fluxes +(for each strip) we use Eq.6 of Aschwanden & Boerner ( 2011 ) (in below): +F +0 λ = Z + dE M ( T ) +dT R + λ ( T ) dT = + ∑ +k E M ( T +k ) R + λ ( T +k ) . (2) +Here, R + λ ( T ) is the instrumental temperature response function of each wavelength filter λ , which +is obtained by the code aia _ get _res ponse . pro in the SSW package. As time has passed, the AIA +response functions calibration has partly changed. Here, we use the updated calibration of the +temperature response functions, for each of the AIA tempera ture filters, according to the CHI- +ANTI Version 2019 code available in the Solar SoftWare (SSW) . After forward-fitting the Gaussian +DEM to the background-subtracted observed fluxes in multiple wavelengths, the three-fitting pa- +rameters, temperature width ( σ +T , i ), peak of temperature ( T +p , i ), and peak emission measure ( E M + p , i ) +are found by minimizing χ 2 +i . +Our data sample is uneven because of omitting some damaged images in between. There- +fore to analyze the temperature oscillations, we use the Lomb-Scargle method. This method is +developed to use the technique periodogram, in the case wher e the observation times are un- +evenly spaced ( + Scargle , 1982 ). The Lomb-Scargle periodogram method is useful in cases where +the periodicity of data treatment is not immediately appare nt. This method allows efficient com- +putation of a Fourier-like power spectrum estimator from unevenly-sampled data, resulting in +an intuitive means of determining the period of oscillation ( VanderPlas, 2018 ). Therefore we use +Lomb-Scargle Periodogram to evaluate and estimate the effic ient periods of temperature oscilla- +tions in our loops. We select the first period related to the highest power frequency, which is +obtained by this method.We considered the achieved periods with the highest significances and +amplitudes. The most significant (highest) periods observe d in temperature (minute) for flaring +and non-flaring loops are listed in Tables 1 and 2, respective ly. To estimate the significance of +the periods, we computed the probability values (p-values) . In the Lomb-Scargle method, the +significance returned here is the false alarm probability of the null hypothesis, i.e., as the data +is composed of independent Gaussian random variables. Accordingly, low probability values +(p-value less than 0.05) indicate a high degree of significance in the associated periodic signal. +IV. R esults +i. Temperature Analysis of Flaring Active Region Loops +Thenceforth the temperature time-series of different strips of the selected loops are calculated +using the method described in section 3. In the following figures, the vertical axis shows the +logarithm of the temperature and the horizontal axis shows the time duration. To be comparable +by eyes, all the forthcoming figures (which show the loops temperature oscillations) have been co- +scaled in the (log) temperature range of 5.7 to 6.9. The color maps are shown for each temperature +map. Loops A, B1, B2, C1, and C2 are subdivided into 25, 11, 8, 1 2, and 6 strips, respectively. Each +strip’s length is equal to 4 pixels (macro-pixel), for all loops in this paper. For brevity, a few strips’ +temperature oscillations are presented here. Figure + 3 displays the time-series of temperature +oscillations for the first 3 strips of Loop A, and first 2 strips of loops B1. We calculated the +errors for each point (temperature) but removed in the prese ntation to avoid overcrowding of the +figures. As we observe in Figures 3 and 4 ), the temperature oscillations are started and increase +around 22:12 before the flare peak time (22:20) and are mostly continuing after the flare ended +(22:24). These temperature oscillations follow the transverse loop oscillations observed by Jain +et al. ( 2015 ). As Jain et al. reported, LoopA and B have a transverse oscillation with periods +of roughly 2 minutes and decay times of 5 minutes, starting at 22:18 around the flare peak time +(23:20) and decaying after the flare ended (22:24). So as we ob serve, the temperature oscillations in +these flaring loops happen before the start of their transver se oscillations and are continuing even +in the time interval after the transverse oscillations decay. Although the temperature oscillations +do not decay as rapid as the transverse oscillations do, and c onversely, the loop temperature +increases at the end of the oscillating mode (see Fig. 4 , the temperature map of the loop A, for +instance) +We calculate the temperature oscillations periods, using L omb-Scargle method. We consider +the thermal oscillations periods with the highest significa nces. As this method shows, the most +powerful period in the range of data time-series (listed in Table + 1 ) are from 7 to 28.4 minutes +observed in the strips of the marked loops of this flaring region. These loops of flaring region +also show some short periods in temperature oscillations which some are less than 10 minutes +(listed in Table 1 ). These short periods are more frequently observed in the loops of the flaring +active region. Such short periods are very scarce for the loops of the non-flaring active region +(compare Tables1 and 2 ). +The first column in Table 1 is the number of every strip along the loop. The second column is +the period of the most powerful frequency observed for the loop strips, calculated by the Lomb- +Scargle method. The third column shows the maximum of log ( T ) minus its minimum in each +strip. The columns of Table + 2 are exactly the same as Table 1 ; the only difference is that Table 2 is +for the non-flaring loops. +The loop A, has the length of 42.3 (Mm) which is the length of the selected part of the loop +marked in Figure 1 .b. The mean of the parameter (Max(log T )-Min(log T )) for the strips of loop A +is 1.21. Mean of the temperature (log) of this loop over time is 6.15 ± 0.25. The loop B1, divided +into 11 strips, has the length of 20.24 (Mm). The mean of (Max( log T )-Min(log T )) and the mean +of the temperature for this loop are, 1.10, and 6.28 ± 0.22 respectively. The loop B2, which has 8 +strips, with the length of 15.61 (Mm), has the mean temperature (log) of 6.21 ± 0.21. The mean +of (Max(log T )-Min(log T )) is 0.81 through this loop segment. The loops C1 and C2, divided into +12, and 6 strips, have the lengths of 22.08 and 11.06 (Mm), the mean temperatures of 6.25 ± 0.22, +and 6.14 ± 0.25 (log), and the mean (Max(log T )-Min(log T )) of 1.48, 0.88, respectively. +We observe that despite the temperature oscillations, the flaring loops show a temperature +rise at the end of the considered time interval (figure 3 ). As their temperature maps also show, +the oscillations follow with a relatively sensible rise in the final temperature of the loop segments +(Figures 4 ). Although in the case of the transverse oscillations, the loops oscillate as the flare +occurs and then the oscillations decay and stop, in the case of temperature oscillations, the tem- +peratures of the various strips of the loops oscillate and at the end of the flare occurrence, they +get to a relatively higher value of temperature in average. +Figure + 4 shows the temperature maps of the flaring loops A, B1, B2, C1, a nd C2, respectively +as a time series. In each plot, the vertical axis is the distance along the loop segment in Mm, and +the horizontal axis shows time. The color bar (in the left) shows the temperature range. Each +separated grid part on the map is standing for one strip. Figure 4 shows that the temperature +for most of the strips increased, bypassing a few oscillations. Before the end of the time duration, +some strips become hotter (yellow ones) and some cooler (blue ones). The loop B1 is colder at +the early times of the duration and becomes hotter at the midd le and end times with a swing +to lower temperatures again (see Fig. 4 ). There are some temperature fluctuations at the middle +times (the red and green stripes) while at the end the strips temperatures are smoother with less +fluctuations. The temperature map of the loop segment B2 (Fig. + 4 ) shows that at the beginning of +the time duration, the first strips of the loop are hotter, and the last ones are colder, but at the end +times this pattern is reversed in this loop segment. In loop segment C1 (Fig. 4 ), the temperature +fluctuations are mainly observed to start after the end of the flare (22:24), and at the end time +(23:00) the temperature is much higher than the beginning. T he temperature is increasing after +the flare time (22:24) for the loop C2 either (see Fig. 4 ). This happens with some oscillations in +the strips’ temperatures. So as figure 4 shows, the temperature increases with some fluctuation +in most of the flaring loops’ strips after the flare time. According to these temperature maps, +the temperature fluctuations in the flaring loops are increasing at the flaring time and around 20 +minutes after that. +We expect the flaring loops to cool down as a result of heat cond uction and radiative cooling. +Hence this relative temperature increase should be scrutinized. As we probed, this temperature +rise is also followed in intensity time-series. As the intensity time-series show, the related intensity +in the Loop A of the flaring AR increases at the end of the time duration. To be assured, the +authors also checked the wavelength of Fe XV I I I which has a peak formation temperature of +7 × 10 6 ◦ + K ( Ugarte-Urra & Warren ( 2014 )). By using the method developed by Warren et al. ( 2012 ) +the contribution of the Fe XV I I I emission line can be isolated from the AIA 94 , to analyze the +evolution of hot plasma in the loops. We do it to omit the conta mination from the cooler plasma +(mostly around 1MK) which also contributes to this AIA channel Boerner et al. ( 2012 ). This is +done by subtracting the contaminating warm (i.e., around 1M K) component to the bandpass. +This warm contribution is calculated from a weighted combination of the emission from the AIA +171 and 193 channels dominated by Fe X and Fe X I I emission, respectively. This intensity +analysis is done directly and it has not gone through any othe r process like the thermal analysis. +For this purpose, we applied the formulation (1) used by Li et al. ( 2015 ). Plots in Figure 5 show +the intensity map, and the mean intensity variation of the wavelength Fe XV I I I , for Loop A of +the flaring region, respectively. As these plots show, this intensity is also higher at the end of +the time duration in respect of the flare time. It seems to us that the expected cooling has not +occurred in these flaring loops yet, even after the flare occur rence in the probed duration due to +some plausible reasons. We consider that the mentioned simultaneous CME (see section +II ) which +this flare is associated with could cause this increase in temperature. We can be sure that the +source of this CME is AR 11283 ( Romano et al. ( 2015 )). This CME is in our flare region, hence +the loops receive energy even after the flare occurrence and it is probably the reason why the +expected cooling does not occur. +The thermal oscillations periods obtained the Lomb-Scargle method, do not have the same +significance in all strips of the loops, but for most strips of the flaring loops, the significances are +very near to one. To be assured about these oscillations, we p robed the intensity time-series for +each strip of the loops and we observed that this loop’s intensities shows intensity oscillations +too (i.e., alongside the loop). The most probable dominant p eriods observed in intensity, for +wavelength of 171 is 18.22, and 16.7 min for strips of F-Loop A , 16.7, and 18.22 min for strips of +F-Loop B1, 16.70, and 12.52 for F-Loop B2, and 16.7 for F-Loop C1 and F-Loop C2. These periods +are in the same order of the observed thermal oscillation per iods. The intensity in this time series +has not passed any thermal process but still shows oscillation periods close to thermal ones. So +we think these results confirm the observation of thermal osc illations. +ii. Temperature Analysis of non-Flaring Active Region Loop s +The temperature time-series for different strips of the selected loops of the non-flaring active +region 12194 are calculated using the Lomb-Scargle method. In the following figures (Fig. + 6 ), +the vertical axis shows the logarithm of the temperature and the horizontal axis shows the time +duration. Figure 6 displays the time-series of temperature variations for the first two strips of +the non-flaring Loops A, and B. These figures are all co-scaled in the range of 5.7 to 6.9 for the +logarithm of temperature (like the flaring loops range). The most powerful periods, observed in +most of these non-flaring loops’ strips (listed in Table 2 ) are from 8.5 min. to 30 min. Comparing +the periods of the loops in the flaring region (Table 1 ) with the non-flaring one (Table 2 ), we see +that the temperature periods of the flaring loops have lower values on average and have more +diversity than the non-flaring ones. As Tables + 1 and 2 show, the mean temperatures of nonf- +loops are lower in comparison with the f-loops, a fact we also expected from common sense. +The parameter (Max(log T )-Min(log T )) in nonf-loops’ strips is less than that for the flaring loop s’ +strips. +Nonf-loop A, divided into 11 strips, has the length of 19.91 ( Mm) which is the length of the +selected part of the loop marked in Figure 2 b. The mean of (Max(log T )-Min(log T )) for the strips +of nonf-loop A is 0.81. Mean of the temperature (log) of this loop segment over time is 5.93 ± 0.10. +Nonf-Loop B, divided into 6 strips, has the length of 11.11 (M m), and the mean temperature (log), +and the mean of (Max(log T )-Min(log T )) for this loop are, 5.99 ± 0.13 and 0.62 respectively. Nonf- +loop C, which has 5 strips, with the length of 10.13 (Mm), has the mean temperature (log) of +5.82 ± 0.12, and the mean (Max(log T )-Min(log T )) of 0.56. +The first highest period observed for the temperature oscillations of these non-flaring loops’ +strips is reported in Table 2 . As we observe the temperature periods in these non-flaring loops +are mostly longer than those of the flaring loops (compare the values listed in Table + 1 and Table 2 ). +Therefore the temperature oscillations of these loops are a little slower than the flaring ones. +Figure + 7 shows the temperature maps of the non-flaring loops A, B, and C , respectively as a +time series. In each plot, the vertical axis is the distance a long the loop in Mm, and the horizontal +axis is the time. The color bar in the left shows the colors considered for the temperature range. +Each separated colored part in the map is one strip. These color maps are plotted totally at the +same color range of the loops of the flaring region either. +As figure 7 shows, the strips’ temperature of these non-flaring loops have fewer temperature +fluctuations and are smoother in comparison with the flaring ones (Fig. 4 ). Furthermore, that +much increase in the temperatures of the strips, which was obvious in the loops of the flaring +region toward the end times, is not observed here. The temper atures are also totally lower in the +nonf-loops in comparison with the flaring loops. Conversely, it seems that different strips of the +non-flaring loops have relatively more similar temperature fluctuations. +As figure 8 shows, the peaks of the observed temperature periods for the loops’ strips of the +flaring active region (blue ones), and non-flaring active region (red ones), are around 18 minutes, +and 30 minutes, respectively. The temperature periods’ diversity is higher in the loops’ strips of +the flaring active region, and shorter temperature periods ( less than 10 minutes, nearer to the +transverse oscillations periods) are observed in the case of the flaring loops’ strips in comparison +with the non-flaring ones. And figure + 9 shows that the increasing and decreasing of temperature +range, or the difference between maximum and minimum of the temperature value (max(log( T ) )- +min(log ( T ) )), is much higher on average for the loops’ strips of the flaring AR in comparison with +the loops’ strips of the non-flaring one. + V. Summery +We reported the temperature oscillations of coronal loops of a flaring active region. We selected +the flaring active region 11283 to investigate the thermal structure and treatment of its loops. This +region includes a high energy flare x2.1 and the transverse oscillations of two loops of it have been +analyzed before by Jain et al. ( 2015 ). They analyzed intensity variations in the wavelength 171 +in two coronal loops of this region and detected obvious transverse oscillation with periods of +roughly 2 minutes and decay times of 5 minutes for these loops (loops A and B in Figure. 1 b) +at the flare time. We were curious to know if the temperature va riations follow the transverse +oscillations of the loops, or there is any relation or correlation between them. We also wanted to +investigate the thermal fluctuations at the flare time. As a blind test to see the specific thermal +properties of the flaring loops, we selected a LOS non-flaring active region (12194), extracted three +segments of its loops and analyzed their thermal treatment. Then we compared the temperature +treatment of the loops at the flaring region with the loops of the non-flaring region to see the +differences. We were eager to observe the probable discrepa ncies between flaring and non-flaring +loops in this respect. +Here we used data of three loops of the flaring active region (A R11283) around the time of the +Flare X2.1, from 22:10UT till 23:00UT on 2011 September 6, plus three loops of the non-flaring +active region (AR12194), from 08:00:00UT till 09:00:00UT of 2014 October 26 (marked in figures +1 and 2 ). To calculate the time series of the loop temperature value s, we first extracted the loop +pixels in each image and then displayed the loop straightly f or all the images in the time series +of different wavelengths. To do thermal analysis, we used the spatially-synthesized Gaussian +DEM forward-fitting method founded by Aschwanden et al. ( 2015 ). We calculated the peak +temperatures for each strip of the loops. Then we applied the Lomb-Scargle method to analyze +temperature oscillations of the time-series for each strip of the loops. +We observed temperature oscillations which are following the transverse loop oscillations +observed by Jain et al. ( 2015 ) for the flaring loops. Furthermore, the temperature oscillations in +these flaring loops happen before the transverse oscillations start and continue even in the time +duration after the transverse oscillations decay. As obser ved, the temperature oscillations do not +decay as rapidly as the transverse oscillations do. Conversely, the strips’ temperatures increase +at the end of the oscillating mode and a rather sensible rise is observed in the final temperatures +of the f-loops’ segments. The ranges of the obtained periods are from 7 min. to 28.4 min. for the +flaring loops, and from 8.5 min. to 30 min. for the non-flaring loops. With the onset of X-flare in +the F-loopA, which has a distinct transverse oscillation in the flaring time with period of roughly +2 minutes and decay time of 5 minutes, a temperature oscillation is observed with periods of +roughly 10 to 28.5 minutes in different segments of this loop . And as the transverse oscillation +decays in this interval, no special definite decay is observe d in its temperature oscillations. +The temperature periods of the flaring loops are rather shorter than the temperature periods +of the non-flaring loops. The loops of the flaring region show some short temperature oscillations +periods in which some are less than 10 minutes (Table 1 ). These kind of short periods are more +frequently observed for the loops of the flaring active region and in the case of the non-flaring +ones, are very scarce. We observed that the periods of the flar ing loops have more diversity +than those of the non-flaring ones. Based on our confined obser vations, the non-flaring loops’ +periods are longer and their temperatures’ values are totally lower. So our research showed that +thermal structures of the flaring loops differ from the non-flaring ones in the ways described +above. As temperature maps show, the temperature fluctuations are increasing at the flaring time +and around 20 min. after, in the flaring loops. This happens with some oscillations in strips’ +temperature. Conversely, it seems that different strips of the non-flaring loops have relatively +more similar temperature fluctuations. The temperatures ar e either higher in average in the flar- +ing loops’ segments as expected. The significances of the per iods, obtained by the Lomb-Scargle +method, are calculated for each strip of each loop and the results show that these significances +for the loops’ strips of the flaring region are high and close to one, while for the loops’ strips of +the non-flaring region are less than 0.5. Hence the detected p eriods in the flaring loops’ strips +have high significances (near to one) and are oscillations. Whereas the detected periods in the +non-flaring loops’ strips have less significances in comparison with the flaring ones, and maybe +they are just fluctuations. +Using this method for the coronal loops showed that the oscillation modes obtained for the +temperatures of the flaring loops are very close to those of the spatial slow-mode oscillations of +the coronal loops. So the origin of temperature oscillation is probably slow-mode waves. These +kind of oscillations often occur in hot coronal loops (log ( T ) > 6) of active regions especially the +ones associated with small (or micro-) flares ( Wang et al. ( 2021 )). The loops of our flaring active +region are also hot loops with the mean temperature above this range. They also show intensity +oscillations. Hence we think the above evidence confirms the slow-mode oscillations for flaring +loops. The temperature of the non-flaring loops are lower (log ( T ) < 6) and as discussed above, +we believe that the observed oscillation-like periods in non-flaring loops should be more probably +related to the high amplitude fluctuations. +Comparing the loops of the flaring and non-flaring regions, we observed that the amplitudes +of the fluctuations show a discrepancy. Mean of the parameter (Max(log T )-Min(log T )) in the +FloopA, , FloopB1, FLoopB2, FloopC1, and FloopC2, are 1.21, 1.10, 0.81, 1.48, and 0.88, respec- +tively. And for non-flaring region, mean of (Max(log T )-Min(log T )), are 0.81, 0.62, and 0.56, for +nonfloopA, B, and C respectively. Therefore the values of the quantity mean of (Max(log T )- +Min(log T )) for these non-flaring loops show a difference from the flaring ones and are lower. +Loops of the non-flaring active region 12194 have a relatively uniform temperature at the +beginning of the time interval, which rises slightly at its e nd. As the Solar Monitor reports in the +neighborhood of this region, the flaring active region 12192 exists of which between its multiple +flares, there is a c 4.6 class flare occurring at 9:44UT. Therefore, it could be a p ossible suggestion +that the abovementioned slight temperature rise in the loop s of AR 12194 (in the time interval +8:00 to 9:00) originated from the influence of an increase in the energy at the pre-flare conditions +exist in the AR 12192. +Hence as our study shows, the temperature of coronal loops of flaring AR changes in an +oscillatory manner. Compared with these non-flaring loops, the flaring loops show higher tem- +peratures on average and higher oscillation periods with higher peaks and deeper valleys. More +accurate commentary in this respect requires more extensive statistical research and broader ob- +servations. +arcsecarcsec + 79 154 229 304 379 454−6825118211304397 + a + arcsecarcsec + +114.6 171.2 227.8 284.4 341171.4206.3241.2276.1311 + Loop B1 + Loop ALoop C2 +Loop C1b + Loop B2 +Figure 1: (a) AIA image of the AR 11283 on 2011 September 6, 22:10 UT as seen in the 171 filter. (b) Zoom-in view +of the area marked by a box in the left. The selected loops are distinguished in red. The loops A and B are +the same loops studied by + Jain et al. ( 2015) (see Fig.3a in Jain et al. ( 2015)). +arcsecarcsec + −154 0 154 308−572−418−264−11044 + a + arcsecarcsec +−202 −134 −66 2 70−396−338−280−221−162 + nonf−LoopAnonf−LoopB + nonf−LoopCb +Figure 2: (a) The NOAA AR12194 on 2014 October 26, at 08:00:00UT in 171 recorded by AIA/SDO. (b) Zoom-in +view of the area, marked by a box in the left, the loops are dist inguished in red. +5.866.26.46.66.8 +LogT F−LoopA +5.866.26.46.66.8 +LogT +22:10 22:20 22:30 22:40 22:50 23:005.866.26.46.66.8 + timeLogT + 5.866.26.46.66.8 +LogT F−LoopB1 +22:10 22:20 22:30 22:40 22:50 23:005.866.26.46.66.8 + timeLogT +Figure 3: From up to down: The time-series of the temperature oscillat ions for the first 3 strips of Loop A (strip 1 to +3 from top to down), and the first 2 strips of LoopB1. Horizontal axis is the time and the vertical axis is the +logarithm of the temperature. The red lines mark the initial and final time of the flare x2.1. +22:10 22:20 22:30 22:40 22:50 23:000 11213242 F−loopA +Time Loop Length(Mm) + 5.866.26.46.66.8 +22:10 22:20 22:30 22:40 22:50 23:000 5 101520 F−loopB1 +Time Loop Length(Mm) + 66.056.16.156.26.256.36.356.46.456.5 +22:10 22:20 22:30 22:40 22:50 23:000 4 8 1216 F−loopB2 +Time Loop Length(Mm) + 5.866.26.46.66.8 +22:10 22:20 22:30 22:40 22:50 23:000 6 111722 F−loopC1 +Time Loop Length(Mm) + 5.65.866.26.46.66.8 +22:10 22:20 22:30 22:40 22:50 23:000 3 6 8 11 F−loopC2 +Time Loop Length(Mm) + 5.866.26.46.66.8 +Figure 4: Temperature map of the flaring loops A, B1, B2, C1, and C2 (from top to down) as a time series. The vertical +axis is the distance along the loop in Mm, and the horizontal axis is the time. The colorbar in the left shows +the colors considered for the temperature range. +Table 1: The properties observed for the loop segments of the flaring A R. +FLoopA +(Strip Number) The highest +Temp.’s period +observed Max(log(T))- +Min(log(T)) FLoopB2 +(Strip Number) The highest +Temp.’s period +observed Max(log(T))- +Min(log(T)) +1 9.94 1.09 1 18.07 0.68 +2 16.57 0.79 2 24.85 0.83 +3 8.46 0.65 3 24.85 0.85 +4 28.4 1.11 4 7.36 0.84 +5 28.4 0.75 5 8.64 0.85 +6 24.85 0.76 6 8.28 0.93 +7 22.09 0.58 7 18.07 0.84 +8 18.07 1.55 8 28.4 0.73 +9 18.07 1.6 FLoopC1 - - +10 12.42 1.57 1 28.4 1.46 +11 12.42 1.42 2 22.09 1.34 +12 24.85 1.56 3 16.57 1.36 +13 19.88 1.6 4 28.04 1.49 +14 19.88 1.24 5 24.85 1.6 +15 18.07 1.58 6 24.85 1.42 +16 19.88 1.45 7 15.29 1.6 +17 16.57 0.7 8 13.25 1.56 +18 7.36 1.6 9 13.25 1.6 +19 8.64 0.95 10 16.57 1.6 +20 16.57 1.54 11 16.57 1.6 +21 7.36 1.18 12 9.46 1.13 +22 7.36 1.51 FLoopC1 - - +23 18.07 1.58 1 18.07 0.88 +24 22.09 1.33 2 28.4 0.8 +25 24.85 0.72 3 15.29 0.87 +FLoopB1 - - 4 16.57 0.93 +1 18.07 1.43 5 18.07 1.22 +2 15.29 0.76 6 28.4 0.58 +3 18.07 0.76 +4 18.07 0.75 +5 18.07 0.59 +6 19.88 0.8 +7 19.88 0.91 +8 19.88 1.36 +9 11.04 1.6 +10 18.07 1.6 +11 18.07 1.6 +Table 2: The properties observed for the loop segments of the non flari ng AR. +Nonf-LoopA +(Strip Number) The highest +Temp.’s period +observed Max(log(T))- +Min(log(T)) +1 24 0.61 +2 30 0.95 +3 30 0.81 +4 20 1.51 +5 20 0.77 +6 20 0.81 +7 11.42 0.71 +8 12 0.73 +9 30 0.72 +10 30 0.77 +11 30 0.61 +Nonf-LoopB +(Strip Number) The highest +Temp.’s period +observed Max(log(T))- +Min(log(T)) +1 26.66 0.36 +2 26.66 0.64 +3 10.43 0.45 +4 12 0.62 +5 30 0.98 +6 8.57 0.67 +Nonf-LoopC +(Strip Number) The highest +Temp.’s period +observed Max(log(T))- +Min(log(T)) +1 26.66 0.76 +2 26.66 0.75 +3 26.66 0.26 +4 30 0.27 +5 30 0.8 +22:10 22:20 22:30 22:40 22:50 23:000 11223243 Int−Fe−LoopA +Time Loop Length(Mm) + 00.020.040.060.080.10.120.140.160.180.2 +22:10 22:20 22:30 22:40 22:50 23:0000.10.20.30.40.50.60.70.80.91 Int−Fe−LoopA +TimeNormalized Intensity Fe XVIII +Figure 5: Normalized intensity map of the flaring loop A for the wavelen gth Fe XV I I I, and mean intensity of Fe +XV I I I (from top to down). The vertical axis is the distance al ong the loop in Mm for the first plot, and +normalized intensity for the second. The horizontal axis is the time. The colorbar in the left shows the colors +considered for the Intensity range. +VI. acknowledgements +The author Narges Fathalian wishes to also express her thanks for the technical support and +comments which has received from Dr.Farhad Daii and Dr.Mohsen Javaherian regarding to this +work. + 5.866.26.46.66.8 +LogT NonF−LoopA +8:00 8:10 8:20 8:30 8:40 8:50 9:005.866.26.46.66.8 + timeLogT + 5.866.26.46.66.8 +LogT NonF−LoopB +8:00 8:10 8:20 8:30 8:40 8:50 9:005.866.26.46.66.8 + timeLogT +Figure 6: from top to down: The time-series of the temperature for the first 2 strips (from top to down) of the non- +flaring Loops A and B. Horizontal axis is the time and the verti cal axis is the logarithm of the temperature. +8:10 8:20 8:30 8:40 8:50 9:000 5 101520 NonF−loopA +Time Loop Length(Mm) + 5.866.26.46.66.8 +8:10 8:20 8:30 8:40 8:50 9:000 5 9 1418 NonF−loopB +Time Loop Length(Mm) + 5.866.26.46.66.8 +8:10 8:20 8:30 8:40 8:50 9:000 3 5 8 10 NonF−loopC +Time Loop Length(Mm) + 5.866.26.46.66.8 +Figure 7: from top to down: Temperature map of the non-flaring loops A, B and C as a time-series. The vertical axis +is the distance along the loop in Mm, and the horizontal axis i s the time. The color-bar in the left shows the +colors considered for the temperature range. +6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 3000.050.10.150.20.250.30.350.4 + Temp. Period (min)Percentage of Temp. Periods +Figure 8: Hisogram of the temperature periods percentages for the loops’ strips of the flaring (blue bars) and non- +flaring (red bars) ARs. The horizontal axis shows the temperature periods in minute. +0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7024681012 + max(log(T))−min(log(T))Number +Figure 9: Hisogram of the parameter of (max(log(T))-min(log(T))) for each strip of the loops of the flaring (blue bars) +and non-flaring (red bars) ARs. +R eferences +Abedini, A., Safari, H., & Nasiri, S. 2012, Solar Physics, 28 0 +Anfinogentov, S., Nakariakov, V. M., Mathioudakis, M., Van Doorsselaere, T., & Kowalski, A. F. +2013, ApJ, 773, 156 +Aschwanden, M., B. P. S. C. M. A. 2013, Solar Physics, 283, 5 +Aschwanden, M. J. 2006, Philosophical Transactions of the Royal Society of London Series A, 364, +417 +Aschwanden, M. J., & Boerner, P. 2011, The Astrophysical Journal, 732, 81 +Aschwanden, M. J., Boerner, P., Ryan, D., et al. 2015, The Astrophysical Journal, 802, 53 +Aschwanden, M. J., Fletcher, L., Schrijver, C. J., & Alexand er, D. 1999, ApJ, 520, 880 +Ballai, I., Jess, D. B., & Douglas, M. 2011, A&A, 534, A13 +Banerjee, D., Erdélyi, R., Oliver, R., & O’Shea, E. 2007, Solar Physics, 246, 3 +Berghmans, D., & Clette, F. 1999, Solar Physics, 186, 207 +Boerner, P., Edwards, C., Lemen, J., et al. 2012, Solar Physics, 275, 41 +Dahlburg, R. B., Einaudi, G., Ugarte-Urra, I., Rappazzo, A. F., & Velli, M. 2018, ApJ, 868, 116 +De Moortel, I. 2005, Philosophical Transactions of the Roya l Society of London Series A, 363, 2743 +De Moortel, I., & Brady, C. S. 2007, ApJ, 664, 1210 +De Moortel, I., Ireland, J., & Walsh, R. W. 2000, A&A, 355, L23 +De Moortel, I., & Nakariakov, V. M. 2012, Philosophical Transactions of the Royal Society of +London Series A, 370, 3193 +Fathalian, N. 2019, arXiv e-prints, arXiv:1908.11369 +Fathalian, N., & Safari, H. 2010, ApJ, 724, 411 +Fathalian, N., Safari, H., & Nasiri, S. 2010, New Astronomy, 15, 403 +Goossens, M., Hollweg, J. V., & Sakurai, T. 1992, Solar Physics, 138, 233 +Gruszecki, M., Murawski, K., Selwa, M., & Ofman, L. 2006, A&A , 460, 887 +Guennou, C., Auchère, F., Soubrié, E., et al. 2012a, ApJ, 203 , 25 +Guennou, C., Auchère, F., Soubrié, E., et al. 2012b, ApJ, 203 , 26 +Habbal, S. R., & Rosner, R. 1979, ApJ, 234, 1113 +Hindman, B. W., & Jain, R. 2014, ApJ, 784, 103 +Jain, R., Maurya, R. A., & Hindman, B. W. 2015, ApJ, 804, L19 +Jess, D. B., Reznikova, V. E., Ryans, R. S. I., et al. 2016, Nature Physics, 12, 179 +Kolotkov, D. Y., Nakariakov, V. M., & Zavershinskii, D. I. 20 19, A&A, 628, A133 +Krishna Prasad, S., Jess, D. B., & Van Doorsselaere, T. 2019, Frontiers in Astronomy and Space +Sciences, 6, 57 +Li, L. P., Peter, H., Chen, F., & Zhang, J. 2015, A&A, 583, A109 +Liu, W., & Ofman, L. 2014, Solar Physics, 289, 3233–3277 +Luna, M., Terradas, J., Oliver, R., & Ballester, J. L. 2010, A pJ, 716, 1371 +McClymont, A. N., & Craig, I. J. D. 1985, ApJ, 289, 834 +McLaughlin, J. A., Nakariakov, V. M., Dominique, M., Jelíne k, P., & Takasao, S. 2018, Space +Science Reviews volume, 214, 45 +Nakariakov, V. M., Afanasyev, A. N., Kumar, S., & Moon, Y. J. 2 017, ApJ, 849, 62 +Nakariakov, V. M., Inglis, A. R., Zimovets, I. V., et al. 2010 , Plasma Physics and Controlled Fusion, +52, 124009 +Nakariakov, V. M., Ofman, L., Deluca, E. E., Roberts, B., & Davila, J. M. 1999, Science, 285, 862 +Nakariakov, V. M., & Verwichte, E. 2005, Living Reviews in Solar Physics, 2, 3 +Nisticò, G., Nakariakov, V. M., & Verwichte, E. 2013, A&A, 55 2, A57 +Nisticò, G., Polito, V., Nakariakov, V. M., & Del Zanna, G. 20 17, A&A, 600, A37 +Ofman, L., & Wang, T. 2002, ApJ, 580, L85 +Pant, V., Tiwari, A., Yuan, D., & Banerjee, D. 2017, ApJ, 847, L5 +Pascoe, D. J., Nakariakov, V. M., & Arber, T. D. 2007, Solar Physics, 246, 165 +Reale, F., Testa, P., Petralia, A., & Kolotkov, D. Y. 2019, Ap J, 884, 131 +Roberts, B., Edwin, P. M., & Benz, A. O. 1984, ApJ, 279, 857 +Romano, P., Zuccarello, F., Guglielmino, S. L., et al. 2015, A&A, 582, A55 +Russell, A. J. B., Simões, P. J. A., & Fletcher, L. 2015, A&A, 5 81, A8 +Scargle, J. D. 1982, ApJ, 263, 835 +Schmelz, J. T., Jenkins, B. S., Worley, B. T., et al. 2011, ApJ , 731, 49 +Schmelz, J. T., Kimble, J. A., Jenkins, B. S., et al. 2010, ApJ , 725, L34 +Schmelz, J. T., Pathak, S., Brooks, D. H., Christian, G. M., & Dhaliwal, R. S. 2014, ApJ, 795, 171 +Schmelz, J. T., Pathak, S., Jenkins, B. S., & Worley, B. T. 201 3, ApJ, 764, 53 +Ugarte-Urra, I., & Warren, H. P. 2014, ApJ, 783, 12 +Van Doorsselaere, T., Kupriyanova, E. G., & Yuan, D. 2016, Solar Physics, 291, 3143 +Van Doorsselaere, T., Wardle, N., Del Zanna, G., et al. 2011, ApJ, 727, L32 +VanderPlas, J. T. 2018, ApJ, 236, 16 +Verwichte, E., Nakariakov, V. M., Ofman, L., & Deluca, E. E. 2 004, Solar Physics, 223, 77 +Wang, T. 2011, Space Science Reviews, 158, 397–419 +Wang, T., Innes, D. E., & Qiu, J. 2007, ApJ, 656, 598 +Wang, T. J., & Solanki, S. K. 2004, A&A, 421, L33 +Wang, T. J., Solanki, S. K., Innes, D. E., Curdt, W., & Marsch, E. 2003, A&A, 402, L17 +Wang, T., & Ofman, L. 2019, ApJ, 886, 2 +Wang, T., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M . 2015, ApJ, 811, L13 +Wang, T., Ofman, L., Yuan, D., et al. 2021, Space Science Reviews, 217 +Warren, H. P., Winebarger, A. R., & Brooks, D. H. 2012, ApJ, 75 9, 141 +Wills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 19 0, 467 diff --git a/read/results/playa/GeoTopo-book.txt b/read/results/playa/GeoTopo-book.txt new file mode 100644 index 0000000..eb0a8c8 --- /dev/null +++ b/read/results/playa/GeoTopo-book.txt @@ -0,0 +1,7991 @@ +Einführung in die +Geometrie und Topologie +0. Auflage, 31. Dezember 2016 Martin Thoma +Vorwort +Dieses Skript wurde im Wintersemester 2013/2014 von Martin Thoma geschrieben. Es beinhaltet +die Mitschriften aus der Vorlesung von Prof. Dr. Herrlich sowie die Mitschriften einiger Übungen +und Tutorien. +Das Skript ist kostenlos übermartin-thoma.com/geotopoverfügbar. Wer es gerne in A5 (Schwarz- +Weiß, Ringbindung) für 10 Euro hätte, kann mir eine E-Mail schicken (info@martin-thoma.de). +Danksagungen +An dieser Stelle möchte ich Herrn Prof. Dr. Herrlich für einige Korrekturvorschläge und einen +gut strukturierten Tafelanschrieb danken, der als Vorlage für dieses Skript diente. Tatsächlich +basiert die Struktur dieses Skripts auf der Vorlesung von Herrn Prof. Dr. Herrlich und ganze +Abschnitte konnten direkt mit LA +T +EX umgesetzt werden. Vielen Dank für die Erlaubnis, Ihre +Inhalte in diesem Skript einbauen zu dürfen! +Vielen Dank auch an Frau Lenz und Frau Randecker, die es mir erlaubt haben, ihre Übungsauf- +gaben und Lösungen zu benutzen. +Jérôme Urhausen hat durch viele Verbesserungsvorschläge und Beweise zu einer erheblichen +Qualitätssteigerung am Skript beigetragen und meine Tutorin Sarah hat mir viele Fragen per +E-Mail und nach dem Tutorium beantwortet. Danke! +Was ist Topologie? +Die Kugeloberfläche S 2 + lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche +oder der Oberfläche einer Pyramide verformen, aber nicht zumR 2 + oder zu einem Torus T 2 + . Für +den R 2 + müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein +Loch machen. +Erforderliche Vorkenntnisse +Es wird ein sicherer Umgang mit den Quantoren (∀, ∃), Mengenschreibweisen (∪, ∩, \ , ∅ , R , P (M )) +und ganz allgemein formaler Schreibweise vorausgesetzt. Auch die Beweisführung mittels Wider- +spruchsbeweisen sollte bekannt sein und der Umgang mit komplexen Zahlen C , deren Betrag, +Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werden vor allem +in „Analysis I“ vermittelt. +Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit, +der Spektralsatz und der pro jektive RaumP (R) aus „Lineare Algebra I“ bekannt sind. In „Lineare +Algebra II“ wird der Begriff der Orthonormalbasis eingeführt. +iii + (a) S 2 + (b)Würfel (c)Pyramide +y + x +(d) R 2 + (e) T 2 +Abbildung 0.1:Beispiele für verschiedene Formen +Obwohl es nicht vorausgesetzt wird, könnte es von Vorteil sein „Einführung in die Algebra und +Zahlentheorie“ gehört zu haben. +Inhaltsverzeichnis +1 Topologische Grundbegriffe2 +1.1 Topologische Räume. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .2 +1.2 Metrische Räume. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .6 +1.3 Stetigkeit. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .9 +1.4 Zusammenhang. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .11 +1.5 Kompaktheit. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .14 +1.6 Wege und Knoten. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .17 +Übungsaufgaben. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .22 +2 Mannigfaltigkeiten und Simplizialkomplexe24 +2.1 Topologische Mannigfaltigkeiten. . . . . . . . . . . . . . . . . . . . . . . . . . . .24 +2.2 Differenzierbare Mannigfaltigkeiten. . . . . . . . . . . . . . . . . . . . . . . . . .29 +2.3 Simplizialkomplex. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .34 +Übungsaufgaben. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .43 +3 Fundamentalgruppe und Überlagerungen44 +3.1 Homotopie von Wegen. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .44 +3.2 Fundamentalgruppe. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .47 +3.3 Überlagerungen. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .51 +3.4 Gruppenoperationen. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .61 +4 Euklidische und nichteuklidische Geometrie64 +4.1 Axiome für die euklidische Ebene. . . . . . . . . . . . . . . . . . . . . . . . . . .64 +4.2 Weitere Eigenschaften einer euklidischen Ebene. . . . . . . . . . . . . . . . . . .74 +4.2.1 Flächeninhalt. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .74 +4.3 Hyperbolische Geometrie. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .77 +Übungsaufgaben. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .86 +5 Krümmung 87 +5.1 Krümmung von Kurven. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .87 +5.2 Tangentialebene. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .89 +5.3 Gauß-Krümmung. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .91 +5.4 Erste und zweite Fundamentalform. . . . . . . . . . . . . . . . . . . . . . . . . .94 +Lösungen der Übungsaufgaben99 +Bildquellen 105 +Abkürzungsverzeichnis106 +Ergänzende Definitionen und Sätze107 +Symbolverzeichnis108 +2 Inhaltsverzeichnis +Stichwortverzeichnis111 +1 Top ologische Grundb egriffe +1.1 Topologische Räume +Definition 1 +Ein topologischer Raum ist ein Paar (X, T) bestehend aus einer Menge X und T ⊆ P (X ) +mit folgenden Eigenschaften +(i) ∅, X ∈ T +(ii)Sind U +1 , U +2 ∈ T, so ist U + 1 ∩ U + 2 ∈ T +(iii)Ist I eine Menge und U + i ∈ T für jedes i ∈ I , so ist +i ∈ I U +i ∈ T +Die Elemente von T heißen offene Teilmengen von X . +A ⊆ X heißt abgeschlossen, wenn X \ A offen ist. +Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0 , 1) . Auch gibt es +Mengen, die sowohl abgeschlossen als auch offen sind. +Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.) +Betrachte ∅ und X mit der trivialen Topologie T +triv = { ∅ , X } . +Es gilt: X ∈ T und ∅ ∈ T, d. h. X und ∅ sind offen. Außerdem X C + = X \ X = ∅ ∈ T und +X \ ∅ = X ∈ T, d. h. X und ∅ sind als Komplement offener Mengen abgeschlossen. +Beispiel 1 (Topologien) +1) X = R n + mit der von der euklidischen Metrik erzeugten Topologie T +Euklid : +U ⊆ R n + offen ⇔ für jedes x ∈ U gibt es r > 0 , +sodass B +r (x ) = { y ∈ Rn + | d(x, y ) < r } ⊆ U +Diese Topologie wird auch „Standardtopologie des R n + “ genannt. Sie beinhaltet unter +anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedli- +chem Mittelpunkt (vgl.Definition 1.ii). +2)Jeder metrische Raum ( X, d) ist auch ein topologischer Raum. +3)Für eine Menge X heißt T +Diskret = P (X ) diskrete Topologie . +4) X := R, T +Z := { U ⊆ R | R \ U endlich } ∪ { ∅ } heißt Zariski-Topologie +Beobachtungen: +• U ∈ T +Z ⇔ ∃f ∈ R [X ] , sodass R \ U = V (f ) = { x ∈ R | f (x ) = 0 } +• Es gibt keine disjunkten offenen Mengen in T +Z . +4 1.1. TOPOLOGISCHE RÄUME +5) X := Rn + , T +Z = { U ⊆ R n + |Es gibt Polynome f +1 , . . . , f + r ∈ R[ X +1 , . . . , X + n ] sodass +R n + \ U = V ( f +1 , . . . , f + r )} +6) X := { 0 , 1 } , T = { ∅ , { 0 , 1 } , { 0 } } heißt Sierpińskiraum. +∅ , { 0, 1 } , { 1 } sind dort alle abgeschlossenen Mengen. +Definition 2 +Sei ( X, T) ein topologischer Raum und x ∈ X . +Eine Teilmenge U ⊆ X heißt Umgebung von x , wenn es ein U +0 ∈ T gibt mit x ∈ U + 0 und +U + 0 ⊆ U . +Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokal gilt. +Definition 3 +Sei (X, T) ein topologischer Raum und M ⊆ X eine Teilmenge. +a) M ◦ + := { x ∈ M | M ist Umgebung von x } = +U ⊆M +U ∈ T U heißt Inneres oder offener +Kern von M . +b) M := +M ⊆A +A abgeschlossenA heißt abgeschlossene Hülle oder Abschluss von M . +c) ∂ M := + M \ M ◦ + heißt Rand von M . +d) M heißt dicht in X , wenn M = X ist. +Beispiel 2 +1)Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M ◦ + = ∅ +2)Sei X = R und M = (a, b). Dann gilt: M = [a, b] +3)Sei X = R , T = T +Z und M = (a, b) . Dann gilt: M = R +Definition 4 +Sei ( X, T) ein topologischer Raum. +a) B ⊆ T heißt Basis der Topologie T , wenn jedes U ∈ T Vereinigung von Elementen +aus B ist. +b) S ⊆ T heißt Subbasis der Topologie T, wenn jedes U ∈ T Vereinigung von endlichen +Durchschnitten von Elementen aus S ist. +Beispiel 3 (Basis und Subbasis) +1)Jede Basis ist auch eine Subbasis, z.B. +S = { ( a, b) | a, b ∈ R, a < b } ist für R mit der Standardtopologie sowohl Basis als +auch Subbasis. +2)Gegeben sei X = R n + mit euklidischer Topologie T. Dann ist +B = { B + r ( x ) | r ∈ Q +>0 , x ∈ Qn + } +ist eine abzählbare Basis von T. +3) Sei (X, T) ein topologischer Raum mitX = { 0, 1, 2 } und T = { ∅ , { 0 } , { 0 , 1 } , { 0, 2 } , X }. +Dann ist S = { ∅ , { 0 , 1 } , { 0, 2 } } eine Subbasis von T , da gilt: +5 1.1. TOPOLOGISCHE RÄUME +•S ⊆ T +•∅ , { 0, 1 } und { 0 , 2 } ∈ S +•{ 0 } = { 0 , 1 } ∩ { 0 , 2 } +• X = { 0 , 1 } ∪ { 0 , 2 } +Allerings ist S keine Basis von ( X, T) , da { 0 } nicht als Vereinigung von Elementen +aus S erzeugt werden kann. +Bemerkung 2 +Sei X eine Menge und S ⊆ P ( X ) . Dann gibt es genau eine Topologie T auf X , für die S +Subbasis ist. +Definition 5 +Sei ( X, T) ein topologischer Raum und Y ⊆ X . +T +Y := { U ∩ Y | U ∈ T } ist eine Topologie auf Y . +T +Y heißt Teilraumtopologie und (Y , T +Y ) heißt ein Teilraum von ( X, T) . +Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt. +Definition 6 +Seien X +1 , X +2 topologische Räume. +U ⊆ X +1 × X +2 sei offen, wenn es zu jedem x = ( x + 1 , x +2 ) ∈ U Umgebungen U +i um x + i mit +i = 1, 2 gibt, sodass U +1 × U +2 ⊆ U gilt. +T = { U ⊆ X +1 × X +2 | U offen } ist eine Topologie auf X +1 × X +2 . Sie heißt Produkttopologie. +B = { U +1 × U +2 | U + i offen in X +i , i = 1, 2 } ist eine Basis von T. +U +x +x + 2 + x +1U + 2 + U +1 X +1X +2 +Abbildung 1.1:Zu x = (x + 1 , x +2 ) gibt es Umgebungen U +1 , U +2 mit U +1 × U +2 ⊆ U +Beispiel 4 (Produkttopologien) +1) X +1 = X +2 = R mit euklidischer Topologie. +⇒ Die Produkttopologie auf R × R = R2 + stimmt mit der euklidischen Topologie auf +R 2 + überein. +2) X +1 = X +2 = R mit Zariski-Topologie. T Produkttopologie auf R2 + : U + 1 × U + 2 +(SieheAbbildung 1.2) +6 1.1. TOPOLOGISCHE RÄUME +U +1 = R \ NU + 2 = R \ N +Abbildung 1.2:Zariski-Topologie auf R 2 +Definition 7 +Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X , X = X/ + ∼ sei die Menge +der Äquivalenzklassen, π : X → X , x → [ x] + ∼ . +T +X := + U ⊆ + X + + π − 1 + (U ) ∈ T +X +(X , T + X ) heißt Quotiententopologie . +Beispiel 5 +X = R , a ∼ b : ⇔ a − b ∈ Z + R +-1 0 1 2 3 4 5 +0a + U aπ − 1 + (u) +0 ∼ 1 , d. h. [0] = [1] +Beispiel 6 +Sei X = R2 + und (x + 1 , y +1 ) ∼ (x + 2 , y +2 ) ⇔ x + 1 − x + 2 ∈ Z und y + 1 − y + 2 ∈ Z. Dann ist X/ + ∼ ein Torus. +Beispiel 7 (Pro jektiver Raum) +X = R n+1 + \ { 0 } , x ∼ y ⇔ ∃λ ∈ R × + mit y = λx +⇔ x und y liegen auf der gleichen +Ursprungsgerade +X = P n + ( R ) +7 1.2. METRISCHE RÄUME +Also für n = 1: +−4 −2 2 4 6 8 +−4−224 +1.2 Metrische Räume +Definition 8 +Sei X eine Menge. Eine Abbildung d : X × X → R + +0 heißt Metrik , wenn gilt: +(i)Definitheit: d(x, y ) = 0 ⇔ x = y ∀x, y ∈ X +(ii)Symmetrie: d(x, y ) = d( y, x ) ∀x, y ∈ X +(iii)Dreiecksungleichung: d(x, z ) ≤ d( x, y ) + d(y, z ) ∀x, y, z ∈ X +Das Paar (X, d) heißt ein metrischer Raum . +Bemerkung 3 +Sei (X, d) ein metrischer Raum und +B +r (x ) := { y ∈ X | d(x, y ) < r } für x ∈ X, r ∈ R + +B = { B + r ( x ) ⊆ P ( X ) | x ∈ X, r ∈ R + + } ist Basis einer Topologie auf X . +Definition 9 +Seien ( X, d + X ) und (Y , d + Y ) metrische Räume und ϕ : X → Y eine Abbildung mit +∀x + 1 , x +2 ∈ X : d +X ( x + 1 , x +2 ) = d +Y ( ϕ( x +1 ), ϕ(x + 2 )) +Dann heißt ϕ eine Isometrie von X nach Y . +Beispiel 8 (Skalarprodukt erzeugt Metrik) +Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt·, ·. Dann wird V +durch d(x, y ) := +x − y, x − y zum metrischen Raum. +Beispiel 9 (diskrete Metrik) +Sei X eine Menge. Dann heißt + d(x, y ) = +0 falls x = y +1 falls x = y +die diskrete Metrik. Die Metrik d induziert die diskrete Topologie . +8 1.2. METRISCHE RÄUME +Beispiel 10 +X = R 2 + und d ((x + 1 , y +1 ) , ( x + 2 , y +2 )) := max( x + 1 − x + 2 , y +1 − y + 2 ) ist Metrik. +Beobachtung: d erzeugt die euklidische Topologie. +B +r (0) = r r + r +r +(a) B +r (0) (b)Euklidische Topologie +Abbildung 1.3:Veranschaulichungen zur Metrik d ausBeispiel 10 +9 1.2. METRISCHE RÄUME +Beispiel 11 (SNCF-Metrik1 + ) +X = R 2 +−4 −2 2 4 6 8 +−4−224 +Definition 10 +Ein topologischer Raum X heißt hausdorffsch , wenn es für je zwei Punkte x = y in X +Umgebungen U +x um x und U +y um y gibt, sodass U +x ∩ U +y = ∅ . +Bemerkung 4 (Trennungseigenschaft) +Metrische Räume sind hausdorffsch, wegen +d( x, y ) > 0 ⇒ ∃ε > 0 : B + ε ( x) ∩ B +ε (y ) = ∅ +Beispiel 12 (Topologische Räume und Hausdorff-Räume) +1) (R , T +Z ) ist ein topologischer Raum, der nicht hausdorffsch ist. +2) (R , T +Euklid ) ist ein topologischer Hausdorff-Raum. +Bemerkung 5 (Eigenschaften von Hausdorff-Räumen) +Seien X, X +1 , X +2 Hausdorff-Räume. +a)Jeder Teilraum von X ist hausdorffsch. +b) X +1 × X +2 ist hausdorffsch (vgl.Abbildung 1.4). +Definition 11 +Sei X ein topologischer Raum und ( x ) + n∈N eine Folge in X . x ∈ X heißt Grenzwert oder +Limes von ( x +n ) , wenn es für jede Umgebung U von x ein n + 0 gibt, sodass x + n ∈ U für alle +n ≥ n + 0 . +Bemerkung 6 +Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert. +Beweis: Sei ( x + n ) eine konvergierende Folge und x und y Grenzwerte der Folge. +Da X hausdorffsch ist, gibt es Umgebungen U +x von x und U + y von y mit U + x ∩ U + y = ∅ falls +x = y . Da (x + n ) gegen x und y konvergiert, existiert ein n + 0 mit x + n ∈ U +x ∩ U +y für alle n ≥ n + 0 +⇒ x = y +1 + Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt. +10 1.3. STETIGKEIT +(x + 1 , y +1 ) (x + 2 , y +2 ) +x +1 x +2 +U + 1 × X +2 U + 2 × X +2 X +1X +2 +Abbildung 1.4:Wenn X +1 , X +2 hausdorffsch sind, dann auch X +1 × X +2 +1.3 Stetigkeit +Definition 12 +Seien ( X, T +X ) , (Y , T +Y ) topologische Räume und f : X → Y eine Abbildung. +a) f heißt stetig :⇔ ∀U ∈ T +Y : f − 1 + (U ) ∈ T +X . +b) f + heißt Homöomorphismus , wenn f stetig ist und es eine stetige Abbildung g : +Y → X gibt, sodass g ◦ f = id +X und f ◦ g = id +Y . +Bemerkung 72 +Seien X, Y metrische Räume und f : X → Y eine Abbildung. +Dann gilt: f ist stetig ⇔ zu jedem x ∈ X und jedem ε > 0 gibt es δ ( x, ε) > 0 , sodass für +alle y ∈ X mit d(x, y ) < δ gilt d +Y (f ( x ), f (y )) < ε. +Beweis: „ ⇒“: Sei x ∈ X, ε > 0 gegeben und U := B +ε (f ( x )). +Dann ist U offen in Y . +Def. 12.a +=====⇒ f −1 + ( U ) ist offen in X . Dann ist x ∈ f − 1 + ( U ). +⇒ ∃δ > 0, sodass B + δ (x ) ⊆ f − 1 + (U ) +⇒ f (B +δ ( x )) ⊆ U +⇒ { y ∈ X | d +X ( x, y ) < δ } ⇒ Beh. +„ ⇐“: Sei U ⊆ Y offen, X ∈ f −1 + (U ). +Dann gibt es ε > 0 , sodass B +ε (f ( x )) ⊆ U +Vor. +==⇒ Es gibt δ > 0 , sodass f ( B +δ (x )) ⊆ B + ε ( f (x ))) +⇒ B + δ (x ) ⊆ f − 1 + (B + ε ( f (x ))) ⊆ f −1 + (U ) +Bemerkung 8 +Seien X, Y topologische Räume und f : X → Y eine Abbildung. Dann gilt: +f ist stetig +⇔ für jede abgeschlossene Teilmenge A ⊆ Y gilt : f −1 + ( A ) ⊆ X ist abgeschlossen. +Beispiel 13 (Stetige Abbildungen und Homöomorphismen) +1)Für jeden topologischen Raum X gilt: id + X : X → X ist Homöomorphismus. +2 + Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt. +11 1.3. STETIGKEIT +2) Ist ( Y , T +Y ) trivialer topologischer Raum, d. h. T +Y = T +triv , so ist jede Abbildung +f : X → Y stetig. +3) Ist X diskreter topologischer Raum, so ist f : X → Y stetig für jeden topologischen +Raum Y und jede Abbildung f . +4)Sei X = [0, 1), Y = S 1 + = { z ∈ C | z = 1 } und f (t ) = e 2πit + . +R +0 1 0f +g +Abbildung 1.5:Beispiel einer stetigen Funktion f , deren Umkehrabbildung g nicht stetig ist. +Die Umkehrabbildung g ist nicht stetig, da g − 1 + (U ) nicht offen ist (vgl.Abbildung 1.5). +Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig) +Seien X, Y , Z topologische Räume, f : X → Y und g : Y → Z stetige Abbildungen. +Dann ist g ◦ f : X → Z stetig. + X f + +g ◦f + Y +g + +Z +Beweis: Sei U ⊆ Z offen ⇒ ( g ◦ f ) − 1 + ( U ) = f −1 + ( g −1 + ( U )) . g − 1 + ( U ) ist offen in Y weil g stetig +ist, f − 1 + ( g − 1 + ( U )) ist offen in X , weil f stetig ist. +Bemerkung 10 +a)Für jeden topologischen Raum X ist +Homöo(X ) := { f : X → X | f ist Homöomorphismus } +eine Gruppe. +b)Jede Isometrie f : X → Y zwischen metrischen Räumen ist ein Homöomorphismus. +c) Iso + ( X ) := { f : X → X | f ist Isometrie } ist eine Untergruppe von Homöo( X ) für +jeden metrischen Raum X . +Bemerkung 11 (Pro jektionen sind stetig) +Seien X, Y topologische Räume. π +X : X × Y → X und π +Y : X × Y → Y die Pro jektionen +π + X : (x, y ) → x und π + Y : (x, y ) → y +Wird X × Y mit der Produkttopologie versehen, so sind π +X und π + Y stetig. +Beweis: Sei U ⊆ X offen +⇒ π − 1 +X ( U ) = U × Y ist offen in X × Y . +Bemerkung 12 +Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X , X = X/ + ∼ der Bahnenraum +versehen mit der Quotiententopologie, π : X → X , x → [x ] + ∼ . +Dann ist π stetig. +12 1.4. ZUSAMMENHANG +Beweis: Nach Definition ist U ⊆ X offen ⇔ π − 1 + ( U ) ⊆ X offen. +Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird. +Beispiel 14 (Stereographische Pro jektion) +R n + und S n + \ { N } sind homöomorph für beliebiges N ∈ S n + . Es gilt: +S n + = + x ∈ R n+1 + + x = 1 += + x ∈ R n+1 + + + + n+1 + +i =1 x 2 +i = 1 +O. B. d. A. sei N =  + + + + 0 +. +. +. +0 +1  + + + +. Die Gerade durch N und P schneidet die Ebene H in genau +einem Punkt ˆ +P . P wird auf ˆ +P abgebildet. +f : S n + \ { N } → R n +P → genau ein Punkt + + +L +P ∩ H +wobei R n + = H =  + + + +  + + x + 1 +. +. +. +x + n+1  + + ∈ R n+1 + + + + + + x +n +1 = 0  + + + + und L +P die Gerade in R n+1 + durch N +und P ist. +Sei P =  + + x +1 +. +. +. +x + n+1  + + , so ist x +n +1 < 1 , also ist L + P nicht parallel zu H . Also schneiden sich L + P +und H in genau einem Punkt ˆ +P . +Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig. +1.4 Zusammenhang +Definition 13 +a) Ein Raum X heißt zusammenhängend, wenn es keine offenen, nichtleeren Teilmengen +U +1 , U +2 von X gibt mit U +1 ∩ U +2 = ∅ und U +1 ∪ U +2 = X . +b) Eine Teilmenge Y ⊆ X heißt zusammenhängend, wenn Y als topologischer Raum mit +der Teilraumtopologie zusammenhängend ist. +13 1.4. ZUSAMMENHANG +x yz +N + ˆ +P0 P +Abbildung 1.6:Visualisierung der stereographischen Pro jektion +Bemerkung 13 +X + ist zusammenhängend ⇔ Es gibt keine abgeschlossenen, nichtleeren Teilmengen A + 1 , A +2 +mit A + 1 ∩ A + 2 = ∅ und A +1 ∪ A + 2 = X . +Beispiel 15 (Zusammenhang von Räumen) +1) (R n + , T +Euklid ) ist zusammenhängend, denn: +Annahme: R n + = U +1 ˙ +∪ U +2 mit ∅ = U + 1 , U +2 ∈ T +Euklid existieren. +Sei x ∈ U + 1 , y ∈ U + 2 und [ x, y ] die Strecke zwischen x und y . Sei V = [ x, y ] . Nun +betrachten wir V Rn + als (metrischen) Teilraum mit der Teilraumtopologie T +V . +Somit gilt U + 1 ∩ [ x, y ] ∈ T + V wegen der Definition der Teilraumtopologie. +Dann gibt es z ∈ [ x, y ] mit z ∈ ∂ ( U +1 ∩ [ x, y ]) , aber z /∈ U +1 ⇒ z ∈ U + 2 . In jeder +Umgebung von z liegt ein Punkt von U +1 ⇒ Widerspruch zu U +2 offen. +2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R + < 0 ∪ R + >0 +3) R 2 + \ { 0 } ist zusammenhängend. +4) Q R ist nicht zusammenhängend, da (Q ∩ R + < √ + 2 ) ∪ (Q ∩ R + >√ + 2 ) = Q +5) { x } ist zusammenhängend für jedes x ∈ X , wobei X ein topologischer Raum ist. +6) R mit Zariski-Topologie ist zusammenhängend. +Bemerkung 14 +Sei X ein topologischer Raum und A ⊆ X zusammenhängend. Dann ist auch A zusammen- +hängend. +14 1.4. ZUSAMMENHANG +Beweis: durch Widerspruch +Annahme: A = A + 1 ∪ A + 2 , A + i abgeschlossen, A + i = ∅, A + 1 ∩ A + 2 = ∅ +⇒ A = (A ∩ A + 1 ) + +abgeschlossen ˙ +∪ ( A ∩ A +2 ) + +abgeschlossen + + +disjunkt +Wäre A ∩ A +1 = ∅ +⇒ A ⊆ A = A + 1 ˙ +∪ A + 2 +⇒ A ⊆ A + 2 ⇒ A ⊆ A + 2 +⇒ A + 1 = ∅ +⇒ Widerspruch zu A + 1 = ∅ +⇒ A ∩ A + 1 = ∅ und analog A ∩ A + 2 = ∅ +⇒ Widerspruch zu A ist zusammenhängend. +Bemerkung 15 +Sei X ein topologischer Raum und A, B ⊆ X zusammenhängend. +Ist A ∩ B = ∅ , dann ist A ∪ B zusammenhängend. +Beweis: Sei A ∪ B = U +1 ˙ +∪ U +2 , U +i = ∅ offen +o. B. d. A. +======⇒ A = (A ∩ U +1 ) ˙ +∪ (A ∩ U + 2 ) offen +A zhgd. +====⇒ A ∩ U +1 = ∅ +A ∩B =∅ +====⇒ U +1 ⊆ B +B = (B ∩ U + 1 ) + + +=U + 1 ∪ ( B ∩ U +2 ) + += ∅ ist unerlaubte Zerlegung. + +Definition 14 +Sei X ein topologischer Raum. +Für x ∈ X sei Z (x ) ⊆ X definiert durch +Z ( x) := +A ⊆X zhgd. +x ∈ AA +Z ( x ) heißt Zusammenhangskomponente . +Bemerkung 16 (Eigenschaften von Zusammenhangskomponenten) +Sei X ein topologischer Raum. Dann gilt: +a) Z ( x) ist die größte zusammenhängende Teilmenge von X , die x enthält. +b) Z ( x) ist abgeschlossen. +c) X ist disjunkte Vereinigung von Zusammenhangskomponenten. +Beweis: +15 1.5. KOMPAKTHEIT +a)Sei Z ( x) = A + 1 ˙ +∪ A +2 mit A + i = ∅ abgeschlossen. +O. B. d. A. sei x ∈ A + 1 und y ∈ A + 2 . y liegt in einer zusammehängenden Teilmenge A , +die auch x enthält. ⇒ A = (A ∩ A +1 ) + + +x ∪ ( A ∩ A +2 ) + + y ist unerlaubte Zerlegung. +b)NachBemerkung 14ist Z (x ) zusammenhängend ⇒ Z (x ) ⊆ Z (x ) ⇒ Z ( x ) = Z (x ) +c)Ist Z (y ) ∩ Z ( x) = ∅ Bem. 15 +=====⇒ Z (y ) ∪ Z (x ) ist zusammenhängend. +⇒ Z (x ) ∪ Z ( y ) ⊆ Z (x ) ⇒ Z ( y ) ⊆ Z (x ) +⊆ Z (y ) ⇒ Z ( x ) ⊆ Z (y ) + +Bemerkung 17 +Sei f : X → Y stetig. Ist A ⊆ X zusammenhängend, so ist f ( A ) ⊆ Y zusammenhängend. +Beweis: Sei f (A ) = U +1 ∪ U +2 , U +i = ∅ , offen, disjunkt. +⇒ f −1 + (f ( A)) = f −1 + (U + 1 ) ∪ f −1 + ( U +2 ) +⇒ A = (A ∩ f − 1 + ( U +1 )) + += ∅ ∪ ( A ∩ f −1 + (U + 2 )) + +=∅ +1.5 Kompaktheit +Definition 15 +Sei X eine Menge und U ⊆ P (X ) . +U heißt eine Überdeckung von X , wenn gilt: +∀x ∈ X : ∃M ∈ U : x ∈ M +Definition 16 +Ein topologischer Raum X heißt kompakt , wenn jede offene Überdeckung von X +U = { U +i } +i ∈I mit U +i offen in X +eine endliche Teilüberdeckung + +i ∈J ⊆IU + i = X mit |J | ∈ N +besitzt. +Bemerkung 18 +Das Einheitsintervall I := [0, 1] ist kompakt bezüglich der euklidischen Topologie. +Beweis: Sei ( U +i ) +i ∈ J eine offene Überdeckung von I . +Es genügt zu zeigen, dass es ein δ > 0 gibt, sodass jedes Teilintervall der Länge δ von I in +einem der U + i enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich viele Intervalle +16 1.5. KOMPAKTHEIT +der Länge δ unterteilen und alle U +i in die endliche Überdeckung aufnehmen, die Teilintervalle +enthalten. +Angenommen, es gibt kein solches δ . Dann gibt es für jedes n ∈ N ein Intervall I +n ⊆ [0 , 1] +der Länge 1 +/n sodass I +n U + i für alle i ∈ J . +Sei x + n der Mittelpunkt von I + n . Die Folge ( x + n ) hat einen Häufungspunkt x ∈ [0 , 1] . Dann +gibt es i ∈ J mit x ∈ U + i . Da U + i offen ist, gibt es ein ε > 0 , sodass ( x − ε, x + ε ) ⊆ U + i . +Dann gibt es n + 0 , sodass gilt: 1 +/n +0 < ε +/2 und für unendlich viele3 + n ≥ n + 0 : |x − x +n | < ε +/2, also +I + n ⊆ ( x − ε, x + ε ) ⊆ U + i für mindestens ein n ∈ N .4 +⇒ Widerspruch +Dann überdecke [0 , 1] mit endlich vielen Intervallen I + 1 , . . . , I + d der Länge δ . Jedes I + j ist in +U + ij enthalten. +⇒ U + j + 1 , . . . , U + j +d ist endliche Teilüberdeckung von U . +Beispiel 16 (Kompakte Räume) +1) R ist nicht kompakt. +2) (0, 1) ist nicht kompakt. +U +n = (1 +/n, 1 − 1 +/n ) ⇒ +n ∈N U +n = (0, 1) +3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch. +Bemerkung 19 +Sei X kompakter Raum, A ⊆ X abgeschlossen. Dann ist A kompakt. +Beweis: Sei (V + i ) +i ∈I offene Überdeckung von A. +Dann gibt es für jedes i ∈ I eine offene Teilmenge U + i ⊆ X mit V + i = U + i ∩ A . +⇒ A ⊆ +i ∈ I U + i +⇒ U = { U +i | i ∈ I } ∪ { X \ A } ist offene Überdeckung von X +X kompakt +=======⇒ es gibt i +1 , . . . , i + n ∈ I , sodass n + +j =1 U + i + j ∪ (X \ A ) = X +⇒  + n + +j =1 U + i + j ∪ (X \ A ) + + ∩ A = A +⇒ n + +j =1 (U + i + j ∩ A ) + + += V + i +j ∪ ((X \ A ) ∩ A ) + += ∅ = A +⇒ V +i + 1 , . . . , V + i + n überdecken A . + +Bemerkung 20 +Seien X, Y kompakte topologische Räume. Dann ist X × Y mit der Produkttopologie +kompakt. +Beweis: Sei ( W + i ) + i ∈I eine offene Überdeckung von X × Y . Für jedes ( x, y ) ∈ X × Y gibt es +offene Teilmengen U +x,y von X und V +x,y von Y sowie ein i ∈ I , sodass U + x,y × V + x,y ⊆ W + i . +3 + Dies gilt nicht für alle n ≥ n +0 , da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. +4 + Sogar für unendlich viele. +17 1.5. KOMPAKTHEIT +W + i +x +y + xV + x,y + U +x,y YX +Abbildung 1.7:Die blaue Umgebung ist Schnitt vieler Umgebungen +Die offenen Mengen U + x +0 ,y × V + x + 0 ,y für festes x + 0 und alle y ∈ Y überdecken { x + 0 } × y . Da Y +kompakt ist, ist auch { x + 0 } × Y kompakt. Also gibt es y +1 , . . . , y + m (x + 0 ) mit + m (x + 0 ) +i =1 U + x + 0 ,y + i × +V + x + 0 ,y + i ⊇ { x + 0 } × Y . +Sei U + x + 0 := + m (x ) +i =1 U +x + 0 ,y + i . Da X kompakt ist, gibt es x + 1 , . . . , x + n ∈ X mit + n +j =1 U +x + j = X +⇒ +k +j =1 + m (x + j ) +i =1 + U + x + j ,y + i × V +x + j ,y + i + + +Ein grün-oranges Kästchen⊇ X × Y +⇒ +j + i W + i ( x + j , y +i ) = X × Y +Bemerkung 21 +Sei X ein Hausdorffraum und K ⊆ X kompakt. Dann ist K abgeschlossen. +Beweis: z. Z.: Komplement ist offen +Ist X = K , so ist K abgeschlossen in X . Andernfalls sei y ∈ X \ K . Für jedes x ∈ K seien +U + x bzw. V + y Umgebungen von x bzw. von y , sodass U +x ∩ V +y = ∅ . +X +i +Kx +y +Da K kompakt ist, gibt es endlich viele x + 1 , . . . , x + n ∈ K , sodass + m +i =1 U + x + i ⊇ K . +Sei V := n + +i =1 V + x + i +18 1.6. WEGE UND KNOTEN +⇒ V ∩ + n + +i =1 U +x + i + = ∅ +⇒ V ∩ K = ∅ +⇒ V ist Überdeckung von y , die ganz in X \ K enthalten ist. +⇒ X \ K ist offen +Damit ist K abgeschlossen. +Bemerkung 22 +Seien X, Y topologische Räume, f : X → Y stetig. +Ist K ⊆ X kompakt, so ist f ( K ) ⊆ Y kompakt. +Beweis: Sei (V + i ) +i ∈ I offene Überdeckung von f ( K ) +f stetig +====⇒ (f − 1 + ( V +i )) +i ∈ I ist offene Überdeckung von K +Kompakt +=====⇒ es gibt i +1 , . . . , i + n , sodass f − 1 + ( V +i + 1 ), . . . , f − 1 + ( V +i + n ) Überdeckung von K ist. +⇒ f (f − 1 + (V + i + 1 )), . . . , f (f − 1 + ( V +i + n )) überdecken f (K ). +Es gilt: f (f −1 + ( V )) = V ∩ f ( X ) +Satz 1.1 (Heine-Borel) +Eine Teilmenge von R n + oder Cn + ist genau dann kompakt, wenn sie beschränkt und +abgeschlossen ist. +Beweis: „ ⇒“: Sei K ⊆ R n + (oder C n + ) kompakt. +Da R n + und C n + hausdorffsch sind, ist K nachBemerkung 21abgeschlossen. Nach Vorausset- +zung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ K ist +beschränkt. +„ ⇐“ Sei A ⊆ R n + (oder Cn + ) beschränkt und abgeschlossen. +Dann gibt es einen Würfel W = [−N , N ] × · · · × [−N , N ] + +n mal mit A ⊆ W bzw. „Polyzylinder“ +Z = { (z + 1 , . . . , z + n ) ∈ C n + | z + i ≤ N für i = 1, . . . , n } +NachBemerkung 20undBemerkung 18istW kompakt, also ist A nachBemerkung 19auch +kompakt. Genauso ist Z kompakt, weil +{ z ∈ C z | ≤ 1 } +homöomorph zu + + (x, y ) ∈ R 2 + + ( x, y ) ≤ 1 +ist. +1.6 Wege und Knoten +Definition 17 +Sei X ein topologischer Raum. +19 1.6. WEGE UND KNOTEN +a)Ein Weg in X ist eine stetige Abbildung γ : [0, 1] → X . +b) γ heißt geschlossen , wenn γ (1) = γ (0) gilt. +c) γ heißt einfach , wenn γ | +[0, 1) injektiv ist. +Beispiel 17 +Ist X diskret, so ist jeder Weg konstant, d. h. von der Form +∀x ∈ [0, 1] : γ ( x) = c, c ∈ X +Denn γ ([0, 1]) ist zusammenhängend für jeden Weg γ . +Definition 18 +Ein topologischer Raum X heißt wegzusammenhängend, wenn es zu je zwei Punkten +x, y ∈ X einen Weg γ : [0, 1] → X gibt mit γ (0) = x und γ (1) = y . +Bemerkung 23 +Sei X ein topologischer Raum. +a) X ist wegzusammenhängend ⇒ X ist zusammenhängend +b) X ist wegzusammenhängend ⇐ X ist zusammenhängend +Beweis: +a) + Sei X ein wegzusammenhängender topologischer Raum, A +1 , A +2 nichtleere, disjunkte, +abgeschlossene Teilmengen von X mit A + 1 ∪ A + 2 = X . Sei x ∈ A + 1 , y ∈ A +2 , γ : [0, 1] → X +ein Weg von x nach y . +Dann ist C := γ ([0, 1]) ⊆ X zusammenhängend, weil γ stetig ist. +C = (C ∩ A + 1 ) + + x ∪ ( C ∩ A + 2 ) + + y +ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒ Widerspruch +b)Sei X = + ( x, y ) ∈ R 2 + + x 2 + + y 2 + = 1 ∨ y = 1 + 2 · e − 1 +10 x +. +Abbildung 1.8averanschaulicht diesen Raum. +Sei U + 1 ∪ U +2 = X, U +1 = U +2 = ∅, U +i offen. X = C ∪ S . Dann ist C ⊆ U +1 oder C ⊆ U +2 , +weil C und S zusammenhängend sind. +Also ist C = U +1 und S = U +2 (oder umgekehrt). +Sei y ∈ C = U +1 , ε > 0 und B + ε ( y ) ⊆ U + 1 eine Umgebung von y , die in U +1 enthalten ist. +Aber: B +ε ( y ) ∩ S = ∅ ⇒ Widerspruch ⇒ X ∪ S ist zusammenhängend, aber nicht +wegzusammenhängend. +Beispiel 18 (Hilbert-Kurve) +Es gibt stetige, surjektive Abbildungen [0 , 1] → [0 , 1] × [0 , 1] . Ein Beispiel ist die inAbbil- +dung 1.9dargestellte Hilbert-Kurve. +Definition 19 +Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ : +[0, 1] → C ⊆ X bzw. γ : S 1 + → C ⊆ X , wobei C := Bild γ . +20 1.6. WEGE UND KNOTEN +(a)Spirale S mit Kreis C 0. 1 1 +−101 + X +Y {( x, sin( 1 +x )) ∈ X × Y } +(−1 , 1) ⊆ Y +(b)Sinus +Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend +sind. +(a) n = 1 (b) n = 2 (c) n = 3 (d) n = 4 (e) n = 5 +Abbildung 1.9:Hilbert-Kurve +Jede Jordankurve ist also ein einfacher Weg. +Satz 1.2 (Jordanscher Kurvensatz) +Ist C = γ ([0 , 1]) eine geschlossene Jordankurve in R 2 + , so hat R 2 + \ C genau zwei +Zusammenhangskomponenten, von denen eine beschränkt ist und eine unbeschränkt. +außen +innen +Jordankurve +Abbildung 1.10: Die unbeschränkte Zusammenhangskomponente wird häufig inneres, die be- +schränkte äußeres genannt. +Beweis: ist technisch mühsam und wird hier nicht geführt. Er kann in „Algebraische Topologie: +Eine Einführung“ von R. Stöcker und H. Zieschang auf S. 301f (ISBN 978-3519122265) +nachgelesen werden. +Idee: Ersetze Weg C durch Polygonzug. +21 1.6. WEGE UND KNOTEN +Definition 20 +Eine geschlossene Jordankurve in R 3 + heißt Knoten. +Beispiel 19 (Knoten) +(a)Trivialer Knoten (b)Kleeblattknoten (c)Achterknoten (d) 6 + 2 -Knoten +Abbildung 1.11:Beispiele für verschiedene Knoten +Definition 21 +Zwei Knoten γ + 1 , γ +2 : S 1 + → R 3 + heißen äquivalent, wenn es eine stetige Abbildung +H : S 1 + × [0, 1] → R 3 +gibt mit + H (z, 0) = γ + 1 ( z ) ∀z ∈ S 1 +H (z, 1) = γ + 2 ( z ) ∀z ∈ S 1 +und für jedes feste t ∈ [0, 1] ist + H +z : S 1 + → R 3 + , z → H (z, t) +ein Knoten. Die Abbildung H heißt Isotopie zwischen γ + 1 und γ + 2 . +Definition 22 +Sei γ : [0, 1] → R3 + ein Knoten, E eine Ebene und π : R 3 + → E eine Pro jektion auf E . +π heißt Knotendiagramm von γ , wenn gilt: + + + π − 1 + ( x ) + + ≤ 2 ∀x ∈ π (γ ) +Ist (π | +γ ([0, 1]) ) −1 + (x ) = { y +1 , y +2 }, so liegt y + 1 über y + 2 , wenn gilt: +∃λ > 1 : ( y + 1 − x) = λ (y + 2 − x ) +Satz 1.3 (Satz von Reidemeister) +Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie +durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können. +22 1.6. WEGE UND KNOTEN +(a) Ω + 1 (b) Ω +2 +(c) Ω + 3 +Abbildung 1.12:Reidemeister-Züge +Beweis: Durch sorgfältige Fallunterscheidung. 5 +Definition 23 +Ein Knotendiagramm heißt 3-färbbar, wenn jeder Bogen von D so mit einer Farbe gefärbt +werden kann, dass an jeder Kreuzung eine oder 3 Farben auftreten und alle 3 Farben +auftreten. + Abbildung 1.13:Ein 3-gefärber Kleeblattknoten +5 + Siehe „Knot Theory and Its Applications“ von Kunio Murasugi. ISBN 978-0817638177. +23 1.6. WEGE UND KNOTEN +Übungsaufgaben +Aufgabe 1 (Sierpińskiraum) +Es sei X := { 0, 1 } und T +X := { ∅ , { 0 } , X }. Dies ist der sogenannte Sierpińskiraum. +(a)Beweisen Sie, dass (X, T +X ) ein topologischer Raum ist. +(b)Ist (X, T +X ) hausdorffsch? +(c)Ist T +X von einer Metrik erzeugt? +Aufgabe 2 +Es sei Z mit der von den Mengen U + a,b := a + bZ(a ∈ Z, b ∈ Z \ { 0 }) erzeugten Topologie +versehen. +Zeigen Sie: +(a)Jedes U +a,b und jede einelementige Teilmenge von Z ist abgeschlossen. +(b) { − 1, 1 } ist nicht offen. +(c)Es gibt unendlich viele Primzahlen. +Aufgabe 3 (Cantorsches Diskontinuum) +Für jedes i ∈ N sei P + i := { 0, 1 } mit der diskreten Topologie. Weiter Sei P := + i ∈N P + i . +(a)Wie sehen die offenen Mengen von P aus? +(b)Was können Sie über den Zusammenhang von P sagen? +Aufgabe 4 (Kompaktheit) +(a)Ist GL + n ( R) = { A ∈ R n×n + | det(A) = 0 } kompakt? +(b)Ist SL +n ( R) = { A ∈ R n× n + | det(A) = 1 } kompakt? +(c)Ist P (R ) kompakt? +Aufgabe 5 (Begriffe) +Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“. +Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist, +begründen Sie warum. +1)Ein Homomorphismus, der zugleich ein Homöomorphismus ist, +2)ein Homomorphismus, der kein Homöomorphismus ist, +24 1.6. WEGE UND KNOTEN +3)ein Homöomorphismus, der kein Homomorphismus ist +Aufgabe 6 (Begriffe) +Definieren Sie die Begriffe „Isomorphismus“, „Isotopie“ und „Isometrie“. +2 Mannigfaltigkeiten und +Simplizialkomplexe +2.1 Topologische Mannigfaltigkeiten +Definition 24 +Sei ( X, T) ein topologischer Raum und n ∈ N. +a) Eine n -dimensionale Karte auf X ist ein Paar ( U, ϕ) , wobei U ∈ T und ϕ : U → V +Homöomorphismus von U auf eine offene Teilmenge V ⊆ R n + . +b) Ein n -dimensionaler Atlas A auf X ist eine Familie ( U + i , ϕ +i ) + i ∈ I von Karten auf X , +sodass + i ∈ I U +i = X . +c) X + heißt (topologische) n -dimensionale Mannigfaltigkeit , wenn X hausdorffsch ist, +eine abzählbare Basis der Topologie hat und einen n -dimensionalen Atlas besitzt. +Anschaulich ist also ein n -dimensionale Mannigfaltigkeit lokal dem Rn + ähnlich. +Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten) +Jede n -dimensionale Mannigfaltigkeit mit n ≥ 1 ist mindestens so mächtig wie R . +Beweis: Sei (X, T ) ein topologischer Raum und (U, ϕ) mit U ∈ T und ϕ : U → V ⊆ R n + , wobei +V offen und ϕ ein Homöomorphismus ist, eine Karte auf X . +Da jede offene Teilmenge des R n + genauso mächtig ist wie der R n + , ϕ als Homöomorphismus +insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig +sind, ist U genauso mächtig wie der R n + . Da jede Mannigfaltigkeit mindestens eine Karte +hat, muss jede Mannigfaltigkeit X mindestens so mächtig sein wie der Rn + . +Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können +beliebig viele Elemente haben. +Bemerkung 25 +a)Es gibt surjektive, stetige Abbildungen [0, 1] → [0, 1] × [0, 1] +b) Für n = m sind Rn + und R m + nicht homöomorph. Zum Beweis benutzt man den „Satz +von der Gebietstreue“ (Brouwer): +Ist U ⊆ R n + offen und f : U → R n + stetig und injektiv, so ist f (U ) offen. +Ist n < m und R m + homöomorph zu R n + , so wäre +f : R n + → R m + → R n + , ( x + 1 , . . . , x + n ) → (x + 1 , x +2 , . . . , x + n , 0, . . . , 0) +eine stetige injektive Abbildung. Also müsste f (R n + ) offen sein ⇒ Widerspruch +26 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN +Beispiel 20 (Mannigfaltigkeiten) +1) Jede offene Teilmenge U ⊆ R n + ist eine n -dimensionale Mannigfaltigkeit mit einem +Atlas aus einer Karte. +2) C n + ist eine 2n -dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte: +(z + 1 , . . . , z + n ) → ( (z + 1 ), (z + 1 ), . . . , ( z + n ) , (z + n )) +3) P n + (R ) = (R n+1 + \ { 0 } )/ +∼ = S n + / + ∼ und P n + (C ) sind Mannigfaltigkeiten der Dimension +n bzw. 2 n , da gilt: +Sei U +i := { ( x +0 : · · · : x +n ) ∈ P n + (R ) | x + i = 0 } ∀i ∈ 0, . . . , n . Dann ist P n + (R ) = + n +i =0 U +i +und die Abbildung + U +i → R n +( x + 0 : · · · : x + n ) → + x + 0 +x + i , . . . , + x + i +x + i , . . . , x + n +x + i +(y + 1 : · · · : y +i −1 : 1 : y + i : · · · : y + n ) →(y + 1 , . . . , y + n ) +ist bijektiv. +Die U +i mit i = 0, . . . , n bilden einen n -dimensionalen Atlas: +x = (1 : 0 : 0) ∈ U + 0 → R 2 + x → (0, 0) +y = (0 : 1 : 1) ∈ U + 2 → R 2 + y → (0, 1) +Umgebung: B +1 (0, 1) → { (1 : u : v ) | ( u, v ) < 1 } = V + 1 +Umgebung: B +1 (0, 1) → + (w : z : 1) + + w 2 + + z 2 + < 1 + = V +2 +V + 1 ∩ V + 2 = ∅ ? +(a : b : c) ∈ V + 1 ∩ V +2 +⇒ a = 0 und ( b +a ) 2 + + ( c +a )2 + < 1 ⇒ c +a < 1 +⇒ c = 0 und ( a +c )2 + + ( b +c ) 2 + < 1 ⇒ a +c < 1 +⇒ Widerspruch +4) S n + = + x ∈ R n+1 + + x = 1 + ist n -dimensionale Mannigfaltigkeit. +Karten: +D + i := {( x + 1 , . . . , x + n+1 ) ∈ S n + | x +i > 0 } → B +1 (0, . . . , 0 + + +∈ Rn ) +C + i := {( x + 1 , . . . , x + n+1 ) ∈ S n + | x +i < 0 } → B +1 (0, . . . , 0) +(x + 1 , . . . , x + n+1 ) → ( x + 1 , . . . , + x + i , . . . , x + n +1 )1 +(x + 1 , . . . , x + n ) → (x +1 , . . . , x + i − 1 , +1 − +n +k =1 x 2 +k , x +i , . . . , x + n ), oder − +1 − +n +k =1 x2 +k für C + i +S n + = + n+1 +i =1 (C + i ∪ D +i ) +Als kompakte Mannigfaltigkeit wird S n + auch „ geschlossene Mannigfaltigkeit“ genannt. +5) [0, 1] ist keine Mannigfaltigkeit, denn: +Es gibt keine Umgebung von 0 in [0 , 1] , die homöomorph zu einem offenem Intervall +ist. +1 + x +i wird rausgenommen +27 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN +6) V + 1 = + ( x, y ) ∈ R 2 + + x · y = 0 + ist keine Mannigfaltigkeit. +Das Problem ist (0 , 0) . Wenn man diesen Punkt entfernt, zerfällt der Raum in 4 +Zusammenhangskomponenten. Jeder R n + zerfällt jedoch in höchstens zwei Zusammen- +hangskomponenten, wenn man einen Punkt entfernt. +7) V + 2 = + ( x, y ) ∈ R 2 + + x 3 + = y 2 + ist eine Mannigfaltigkeit. +8) X = (R \ { 0 }) ∪ (0 +1 , 0 +2 ) +U ⊆ X offen ⇔ +U offen in R \ { 0 } , falls 0 +1 /∈ U, 0 + 2 ∈ U +∃ε > 0 : (−ε, ε) ⊆ U falls 0 +1 ∈ U, 0 + 2 ∈ U +Insbesondere sind ( R \ { 0 }) ∪ { 0 + 1 } und ( R \ { 0 } ) ∪ { 0 +2 } offen und homöomorph +zu R . +Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 0 + 1 +und 0 + 2 . +9) GL + n ( R ) ist eine Mannigfaltigkeit der Dimension n 2 + , weil offene Teilmengen von R n2 +eine Mannigfaltigkeit bilden. +Definition 25 +Seien X, Y n -dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y offen, Φ : U → V ein Ho- +möomorphismus Z = (X ˙ +∪ Y )/ + ∼ mit der von u ∼ Φ(u) ∀u ∈ U erzeugten Äquivalenzrelation +und der von ∼ induzierten Quotiententopologie. +Z heißt Verklebung von X und Y längs U und V . Z besitzt einen Atlas aus n -dimensionalen +Karten. Falls Z hausdorffsch ist, ist Z eine n -dimensionale Mannigfaltigkeit. +Bemerkung 26 +Sind X, Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X × Y eine Mannigfaltigkeit +der Dimension n + m. +Beweis: Produkte von Karten sind Karten. +Beispiel 21 +Mannigfaltigkeiten mit Dimension 1: +1)Offene Intervalle, R , (0, 1) sind alle homöomorph +2) S 1 +Mannigfaltigkeiten mit Dimension 2: +1) R 2 +2) S 2 + (0 Henkel) +3) T 2 + (1 Henkel) +4)oder mehr Henkel, wie z.B. der Zweifachtorus inAbbildung 2.1 +Bemerkung 27 +Sei n ∈ N , F : R n + → R stetig differenzierbar und X = V ( F ) := { x ∈ R n + | F ( x) = 0 } das +„vanishing set“ . +Dann gilt: +28 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN +Abbildung 2.1:Durch Verklebung zweier Tori entsteht ein Zweifachtorus. +a) X ist abgeschlossen in R n +b)Ist grad(F )(X ) = 0 ∀x ∈ X , so ist X eine Mannigfaltigkeit der Dimension n − 1. +Beweis: +a) + Sei y ∈ Rn + \ V (F ). Weil F stetig ist, gibt es δ > 0, sodass F (B +δ (y )) ⊆ B +ε (F (y )) mit +ε = 1 +2 F ( y ) . Folgt B +δ ( y ) ∩ V (F ) = ∅ ⇒ Rn + \ V (F ) ist offen. +b) Sei x ∈ X mit grad( F )( x ) = 0 , also o. B. d. A. ∂F +∂X + 1 ( x ) = 0 , x = ( x + 1 , . . . , x + n ) , +x + := ( x + 2 , . . . , x + n ) ∈ R n− 1 + . Der Satz von der impliziten Funktion liefert nun: Es +gibt Umgebungen U von x + und differenzierbare Funktionen g : U → R , sodass +G : U → R n + , u → (g (u), u) eine stetige Abbildung auf eine offene UmgebungV von x +in X ist. + +Beispiel 22 +1) F + : R 3 + → R , (x, y, z ) → x 2 + + y 2 + + z 2 + − 1, V (F ) = S 2 + , grad(F ) = (2x, 2y, 2z ) Bem. 27.b +======⇒ +S n + ist n -dimensionale Mannigfaltigkeit in R n+1 +2) F : R 2 + → R , (x, y ) → y 2 + − x 3 + Es gilt: grad(F ) = (−3x 2 + , 2y ). Also: grad(0, 0) = (0, 0). +−5 +−4 +−3 +−2 +−1 +0 +1 +2 +3 +4 +5−4 + −2 + 0 + 2 + 4−1000100 + xyz + −1000100f (x, y ) + (a) F ( x, y ) = y 2 + − x 3 2 4 6 8 10 12 +−10−5510 + xy + a = 1 +3 +a = 1 +a = 2 +(b) y 2 + − ax3 + = 0 +Abbildung 2.2:Rechts ist die Neilsche Parabel für verschiedene Parameter a. +Daher istBemerkung 27.bnicht anwendbar, aberV (F ) ist trotzdem eine 1-dimensionale +topologische Mannigfaltigkeit. +29 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN +Definition 26 +Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n -dimensionale +Mannigfaltigkeit mit Rand , wenn es einen Atlas (U + i , ϕ +i ) gibt, wobei U + i ⊆ X +i offen und +ϕ +i ein Homöomorphismus auf eine offene Teilmenge von +R n ++ , 0 := { (x + 1 , . . . , x + n ) ∈ Rn + | x +n ≥ 0 } +ist. +R n ++, 0 ist ein „Halbraum“ . +Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten. +∼ += +(a)Halbraum +∼ += +(b)Pair of pants ∼ += +(c)Sphäre mit einem Loch +Abbildung 2.3:Beispiele für Mannigfaltigkeiten mit Rand +Definition 27 +Sei X eine n -dimensionale Mannigfaltigkeit mit Rand und Atlas A . Dann heißt +∂ X := +( U,ϕ )∈A { x ∈ U | ϕ( x ) = 0 } +Rand von X . +∂ X ist eine Mannigfaltigkeit der Dimension n − 1 . +Definition 28 +Sei X eine n -dimensionale Mannigfaltigkeit mit Atlas (U + i , ϕ +i ) + i ∈I +Für i, j ∈ I mit U + i ∩ U + j = ∅ heißt + ϕ +ij := ϕ + j ◦ ϕ− 1 +i +ϕ + i ( U +i ∩ U + j ) → ϕ +j (U + i ∩ U +j ) +Kartenwechsel oder Übergangsfunktion. +30 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN +R n + R nU +i U + j +V +i V + jX +ϕ + i ϕ + j +Abbildung 2.4:Kartenwechsel +2.2 Differenzierbare Mannigfaltigkeiten +Definition 29 +Sei X eine n -dimensionale Mannigfaltigkeit mit Atlas (U + i , ϕ +i ) + i ∈I . +a) X heißt differenzierbare Mannigfaltigkeit der Klasse C k + , wenn jede Karten- +wechselabbildung ϕ + ij , i, j ∈ I k -mal stetig differenzierbar ist. +b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannig- +faltigkeit der Klasse C ∞ + ist. +Differenzierbare Mannigfaltigkeiten der Klasse C ∞ + werden auch glatt genannt. +Definition 30 +Sei X eine differenzierbare Mannigfaltigkeit der Klasse C k + ( k ∈ N ∪ { ∞ }) mit Atlas +A = (U + i , ϕ +i ) + i ∈I . +a) Eine Karte ( U, ϕ) auf X heißt verträglich mit A , wenn alle Kartenwechsel ϕ ◦ ϕ −1 +i +und ϕ + i ◦ ϕ− 1 + (i ∈ I mit U +i ∩ U = ∅ ) differenzierbar von Klasse C k + sind. +b) Die Menge aller mit A verträglichen Karten auf X bildet einen maximalen Atlas der +Klasse C k + . Er heißt C k + -Struktur auf X . +Eine C ∞ + -Struktur heißt auch differenzierbare Struktur auf X . +Bemerkung 28 +Für n ≥ 4 gibt es auf S n + mehrere verschiedene differenzierbare Strukturen, die sogenannten +„exotische Sphären“ . +Definition 31 +Seien X, Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m , x ∈ X . +a) Eine stetige Abbildung f : X → Y heißt differenzierbar in x (von Klasse C k + ), wenn +es Karten ( U, ϕ) von X mit x ∈ U und ( V , ψ ) von Y mit f ( U ) ⊆ V gibt, sodass +ψ ◦ f ◦ ϕ −1 + stetig differenzierbar von Klasse C k + in ϕ (x ) ist. +b) f heißt differenzierbar (von Klasse C k + ), wenn f in jedem x ∈ X differenzierbar ist. +c) f heißt Diffeomorphismus , wenn f differenzierbar von Klasse C ∞ + ist und es eine +differenzierbare Abbildung g : Y → X von Klasse C ∞ + gibt mit g ◦ f = id +X und +f ◦ g = id +Y . +31 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN +Bemerkung 29 +Die Bedingung inDefinition 31.ahängt nicht von den gewählten Karten ab. +Beweis: Seien (U + , ϕ + ) und (V + , ψ + ) Karten von X bzw. Y um x bzw. f (x ) mit f (U + ) ⊆ V + . +⇒ ψ + ◦ f ◦ (ϕ + )− 1 += ψ + ◦ (ψ −1 + ◦ ψ ) ◦ f ◦ ( ϕ− 1 + ◦ ϕ) ◦ (ϕ + )−1 +ist genau dann differenzierbar, wenn ψ ◦ f ◦ ϕ−1 + differenzierbar ist. +Beispiel 23 +f + : R → R , x → x 3 + ist kein Diffeomorphismus, aber Homöomorphismus, da mitg (x ) := 3√ + x +gilt: f ◦ g = id +R , g ◦ f = id +R +Bemerkung 30 +Sei X eine glatte Mannigfaltigkeit. Dann ist +Diffeo(X ) := { f : X → X | f ist Diffeomorphismus } +eine Untergruppe von Homöo( X ). +Definition 32 +S ⊆ R 3 + heißt reguläre Fläche : ⇔ ∀ s ∈ S ∃ Umgebung V ( s ) ⊆ R 3 + ∃U ⊆ R 2 + offen: +∃ differenzierbare Abbildung F : U → V ∩ S : Rg(J +F (u)) = 2 ∀u ∈ U . +F heißt (lokale) reguläre Parametrisierung von S . +F (u, v ) = ( x (u, v ), y (u, v ), z (u, v )) +J +F (u, v ) =  + ∂x +∂u (p ) ∂x +∂v ( p) +∂y +∂u (p ) ∂y +∂v (p ) +∂z +∂u (p ) ∂z +∂v ( p )  + +Beispiel 24 +1)Rotationsflächen: Sei r : R → R + >0 eine differenzierbare Funktion. +F : R 2 + → R 3 + (u, v ) → (r ( u) cos(u), r ( v ) sin(u), v ) +J +F ( u, v ) =  + −r ( v ) sin u r + ( v ) cos u +r (v ) cos u r + ( v ) sin u +0 1  + +hat Rang 2 für alle (u, v ) ∈ R 2 + . +2)Kugelkoordinaten: F : R2 + → R 3 + , +(u, v ) → (R cos v cos u, R cos v sin u, R sin v ) +Es gilt: F ( u, v ) ∈ S 2 +R , denn +R 2 + cos 2 + (v ) cos2 + (u) + R 2 + cos 2 + (v ) sin2 + (u) + R 2 + sin2 + (v ) +=R 2 + (cos 2 + ( v ) cos2 + (u) + cos 2 + ( v ) sin2 + (u) + sin 2 + ( v )) +=R 2 + cos 2 + (v )(cos 2 + (u) + sin 2 + ( u)) + sin 2 + (v ) +=R 2 + cos 2 + (v ) + sin 2 + ( v ) +=R 2 +32 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN +(a)Kugelkoordinaten −1 + 0 + 1 + 2 + −2 −1 0 1 20. 60. 81 + (b)Rotationskörper +(c)Sinus und Kosinus haben keine gemeinsame Nullstelle +33 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN +Die Jacobi-Matrix + J + F ( u, v ) =  + −R cos v sin u −R sin v cos u +R cos v cos u −R sin v sin u +0 R cos v  + +hat Rang 2 für cos v = 0. In N und S ist cos v = 0. +Bemerkung 31 +Jede reguläre Fläche S ⊆ R 3 + ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit. +Beweis: +S ⊆ R3 + ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von +regulären Flächen folgt direkt, dass Karten(U +i , F +i ) und (U + j ⊆ R 2 + , F +j : R 2 + → R 3 + ) von S mit +U + i ∩ U +j = ∅ existieren, wobei F +i und F +j nach Definition differenzierbare Abbildungen sind. +z.Z.: F − 1 +j ◦ F +i ist ein Diffeomorphismus. +U +i U + jS +s +F +i F +j +F −1 +j ◦ F +i +Abbildung 2.5:Reguläre Fläche S zum Beweis vonBemerkung 31 +Idee: + Finde differenzierbare Funktion +F −1 +j in Umgebung W von s, sodass +F − 1 +j | +S ∩ W = F − 1 +j . +Ausführung: Sei u +0 ∈ U + i , v +0 ∈ U +j mit F + i (u +0 ) = s = F +j (v + 0 ) . +Da Rg(J +F +j ( v + 0 )) = 2 ist, ist o. B. d. A. +det + ∂x +∂u ∂x +∂v +∂y +∂u ∂y +∂v + (v + 0 ) = 0 +und F +j (u, v ) = (x ( u, v ) , y ( u, v ) , z ( u, v )). +Definiere +F +j : U + j × R → R3 + durch + +F +j ( u, v, t ) := (x (u, v ), y (u, v ), z (u, v ) + t) +Offensichtlich: +F +j | + U + j ×{ 0 } = F +j +J + +F + j =  + ∂x +∂u ∂x +∂v 0 +∂y +∂u ∂y +∂v 0 +∂z +∂u ∂z +∂v 1 + + ⇒ det J + +F + j ( v + 0 , 0) = 0 +Analysis II +======⇒ Es gibt Umgebungen W von F +j von +F +j (v + 0 , 0) = F +j (v + 0 ) = s , sodass +F +j auf W eine +differenzierbar Inverse F −1 +j hat. +34 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN +Weiter gilt: + +F +j −1 + | + W ∩ S = F −1 +j | + W ∩ S +⇒ F − 1 +j ◦ F +i | + F −1 +i (W ∩ S ) = F − 1 +j ◦ F +i | +F −1 +i ( W ∩ S ) +ist differenzierbar. +Definition 33 +Sei G eine Mannigfaltigkeit und ( G, ◦) eine Gruppe. +a) G heißt topologische Gruppe , wenn die Abbildungen ◦ : G × G → G und ι : G → G +definiert durch + g ◦ h := g · h und ι (g ) := g −1 +stetig sind. +b) Ist G eine differenzierbare Mannigfaltigkeit, so heißtG Lie-Gruppe, wenn (G, ◦ ) und +(G, ι) differenzierbar sind. +Beispiel 25 (Lie-Gruppen) +1)Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen. +2) GL + n ( R) +3) (R × + , · ) +4) (R + > 0 , · ) +5) (R n + , +), denn A · B ( i, j ) = +n +k =1 a +ik b +kj ist nach allen Variablen differenzierbar +(A −1 + )(i, j ) = det(A + ij ) +det A + A +ij =  + + a + i 1 . . . a +in +. +. +. . + . + . . +. +. +a +n1 . . . a +nn  + + ∈ R (n− 1)×( n−1) +ist differenzierbar. +det A +ij kann 0 werden, da: + + 1 1 +−1 0 +6) SL +n ( R) = { A ∈ GL + n ( R) | det(A) = 1 } +Bemerkung 32 +Ist G eine Lie-Gruppe und g ∈ G , so ist die Abbildung +l +g : G → G +h → g · h +ein Diffeomorphismus. +35 2.3. SIMPLIZIALKOMPLEX +2.3 Simplizialkomplex +Definition 34 +Seien v + 0 , . . . , v + k ∈ Rn + Punkte. +a) v + 0 , . . . , v + k sind in allgemeiner Lage +⇔ + es gibt keinen (k − 1)-dimensionalen affinen Untervektorraum, derv + 0 , . . . , v + k enthält +⇔ v + 1 − v + 0 , . . . , v + k − v + 0 sind linear unabhängig. +b) conv (v + 0 , . . . , v + k ) := + +k +i =0 λ +i v + i + + λ + i ≥ 0, +k +i =0 λ + i = 1 + heißt die konvexe Hülle von +v + 0 , . . . , v + k . +Definition 35 +a) + Sei ∆ n + = conv ( e + 0 , . . . , e + n ) ⊆ R n+1 + die konvexe Hülle der Standard-Basisvektoren +e +0 , . . . , e + n . +Dann heißt ∆ n + Standard-Simplex und n die Dimension des Simplex. +b) Für Punkte v + 0 , . . . , v + k im R n + in allgemeiner Lage heißt ∆(v +0 , . . . , v + k ) = conv (v + 0 , . . . , v + k ) +ein k -Simplex in R n + . +c) Ist ∆( v + 0 , . . . , v + k ) ein k -Simplex und I = { i +0 , . . . , i + r } ⊆ { 0, . . . , k } , so ist s +i + 0 ,...,i + r := +conv( v + i + 0 , . . . , v + i + r ) ein r -Simplex und heißt Teilsimplex oder Seite von ∆ . +(a)0-Simplex ∆ 0 +1 2 3123 + e +0e + 1 +(b)1-Simplex ∆ 1 1 2 3123 + e +0e +1 + e + 2 +(c)2-Simplex ∆ 2 e + 0 e + 1e +2 +e +3 +(d)3-Simplex ∆ 3 +Abbildung 2.6:Beispiele für k -Simplexe +Definition 36 +a) + Eine endliche Menge K von Simplizes im Rn + heißt (endlicher) Simplizialkomplex , +wenn gilt: +(i)Für ∆ ∈ K und S ⊆ ∆ Teilsimplex ist S ∈ K . +(ii)Für ∆ + 1 , ∆ + 2 ∈ K ist ∆ + 1 ∩ ∆ + 2 leer oder ein Teilsimplex von ∆ + 1 und von ∆ + 2 . +b) |K | := + ∆ ∈K ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K . +c)Ist d = max { k ∈ N + 0 | K enthält k -Simplex }, so heißt d die Dimension von K . +36 2.3. SIMPLIZIALKOMPLEX +(a)1D Simplizialkomplex (b) 2D Simplizialkomplex +(ohne untere Fläche!) (c)2D Simplizialkomplex +(d)1D Simplizialkomplex (e)2D Simplizialkomplex +P +(f ) P ist kein Teilsimplex, da Eigen- +schaftPunkt b.iiverletzt ist P +(g)Simplizialkomplex +Abbildung 2.7:Beispiele für Simplizialkomplexe +Definition 37 +Seien K, L Simplizialkomplexe. Eine stetige Abbildung +f : |K | → |L| +heißt simplizial, wenn für jedes ∆ ∈ K gilt: +a) f (∆) ∈ L +b) f | +∆ : ∆ → f (∆) ist eine affine Abbildung. +Beispiel 26 (Simpliziale Abbildungen) +1) ϕ( e +1 ) := b +1 , ϕ(e + 2 ) := b +2 +ϕ ist eine eindeutig bestimmte lineare Abbildung +37 2.3. SIMPLIZIALKOMPLEX +0 e + 2e + 1 + 0 b +1b +2 +ϕ +2)Folgende Abbildung ϕ : ∆n + → ∆ n−1 + ist simplizial: +ϕ +3)Tori können simplizial auf Sphären abgebildet werden (vgl.Abbildung 2.8) +Abbildung 2.8:Abbildung eines Torus auf eine Sphäre +Definition 38 +Sei K ein endlicher Simplizialkomplex. Für n ≥ 0 sei a + n ( K ) die Anzahl der n -Simplizes in +K . +Dann heißt + χ (K ) := dim K + +n =0 (−1)n + a + n ( K ) +Eulerzahl (oder Euler-Charakteristik) von K . +Beispiel 27 +1) χ (∆1 + ) = 2 − 1 = 1 +χ (∆2 + ) = 3 − 3 + 1 = 1 +χ (∆3 + ) = 4 − 6 + 4 − 1 = 1 +2) χ (Oktaeder-Oberfläche ) = 6 − 12 + 8 = 2 +χ (Rand des Tetraeders) = 2 +χ (Ikosaeder ) = 12 − 30 + 20 = 2 +3) χ (Würfel) = 8 − 12 + 6 = 2 +χ (Würfel, unterteilt in Dreiecksflächen) = 8 − (12 + 6) + (6 · 2) = 2 +Bemerkung 33 +χ (∆n + ) = 1 für jedes n ∈ N + 0 +38 2.3. SIMPLIZIALKOMPLEX +Beweis: ∆n + ist die konvexe Hülle von (e + 0 , . . . , e + n ) in Rn +1 + . Jede (k + 1)-elementige Teilmenge +von { e +0 , . . . , e + n } definiert ein k -Simplex. +⇒ a + k (∆n + ) = + n+1 +k +1 + , k = 0, . . . , n +⇒ χ (∆n + ) = +n +k =0 ( −1)k + n +1 +k +1 +f ( x) = (x + 1) n+1 Binomischer +Lehrsatz += +n +1 +k =0 + n+1 +k + xk +⇒ 0 = +n +1 +k =0 + n+1 +k + (−1)k + = χ (∆n + ) − 1 +⇒ χ (∆n + ) = 1 +Definition 39 +a)Ein 1D-Simplizialkomplex heißt Graph. +b)Ein Graph, der homöomorph zu S 1 + ist, heißt Kreis. +c)Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält. +(a) Dies wird häufig auch als +Multigraph bezeichnet. (b) Planare Einbettung des Te- +traeders +(c) K +5 (d) K +3 ,3 +Abbildung 2.9:Beispiele für Graphen +Bemerkung 34 +Für jeden Baum T gilt χ( T ) = 1. +Beweis: Induktion über die Anzahl der Ecken. +Bemerkung 35 +a) + Jeder zusammenhängende Graph Γ enthält einen Teilbaum T , der alle Ecken von Γ +enthält. 2 +b)Ist n = a +1 (Γ) − a +1 (T ) , so ist χ (Γ) = 1 − n . +Beweis: +a)Siehe „Algorithmus von Kruskal“. +2 + T wird „Spannbaum“ genannt. +39 2.3. SIMPLIZIALKOMPLEX +b) χ (Γ) = a + 0 (Γ) − a + 1 (Γ) += a + 0 (Γ) − ( n + a +1 (T )) += a + 0 (T ) − a + 1 (T ) − n += χ (T ) − n += 1 − n +Bemerkung 36 +Sei ∆ ein n -Simplex und x ∈ ∆ ◦ + ⊆ R n + . Sei K der Simplizialkomplex, der aus ∆ durch +„Unterteilung“ in x entsteht. Dann ist χ ( K ) = χ (∆) = 1. +(a) K (b) ∆, das aus K durch Unter- +teilung entsteht +Abbildung 2.10:Beispiel fürBemerkung 36. +Beweis: χ( K ) = χ (∆) − (−1)n + +n-Simplex + n + +k =0 ( −1)k + n + 1 +k + + +(1+(−1))n +1 = χ (∆) +Definition 40 +Sei X ein topologischer Raum, K ein Simplizialkomplex und +h : | K | → X +ein Homöomorphismus von der geometrischen Realisierung |K | auf X . Dann heißt h eine +Triangulierung von X . +Beispiel 28 (Triangulierung des Torus) +Für eine Triangulierung des Torus werden mindestens 14 Dreiecke benötigt. Beispiele für +fehlerhafte „Triangulierungen“ sind inBeispiel 28zu sehen. Korrekte Triangulierungen sind +inBeispiel 28. +Satz 2.1 (Eulersche Polyederformel) +Sei P ein konvexes Polyeder in R 3 + , d. h. ∂ P ist ein 2-dimensionaler Simplizialkomplex, +sodass gilt: + ∀x, y ∈ ∂ P : [x, y ] ⊆ P +Dann ist χ (∂ P ) = 2. +Beweis: +1)Die Aussage ist richtig für den Tetraeder. +2) O. B. d. A. sei 0 ∈ P und P ⊆ B +1 (0) . Pro jeziere ∂ P von 0 aus auf ∂ B +1 (0) = S 2 + . +Erhalte Triangulierung von S 2 + . +40 2.3. SIMPLIZIALKOMPLEX +(a) Die beiden markierten Dreiecke schneiden sich im +Mittelpunkt und in einer Seite. (b) Die beiden markierten Dreiecke schneiden sich im +Mittelpunkt und außen. +Abbildung 2.11:Fehlerhafte Triangulierungen +(a)Einfache Triangulierung (b)Minimale Triangulierung +Abbildung 2.12:Triangulierungen des Torus +41 2.3. SIMPLIZIALKOMPLEX +3) Sind P +1 und P + 2 konvexe Polygone und T +1 , T +2 die zugehörigen Triangulierungen von +S 2 + , so gibt es eine Triangulierung T , die sowohl um T +1 als auch um T +2 Verfeinerung +ist (vgl.Abbildung 2.13). + T +1 +T +2 +T +Abbildung 2.13: T ist eine Triangulierung, die für T +1 und T +2 eine Verfeinerung ist. +NachBemerkung 36ist χ (∂ P +1 ) = χ (T +1 ) = χ(T ) = χ (T +2 ) = χ (∂ P +2 ) = 2, weil o. B. d. A. +P + 2 ein Tetraeder ist. +Bemerkung 37 (Der Rand vom Rand ist 0) +Sei K ein endlicher Simplizialkomplex mit KnotenmengeV und < eine Totalordnung auf V . +Sei A + n die Menge der n -Simplizes in K , d. h. +A + n ( K ) := { σ ∈ K | dim( σ ) = n } für n = 0, . . . , d = dim(K ) +und C +n (K ) der R -Vektorraum mit Basis A + n ( K ) , d. h. +C + n (K ) =  + + +σ ∈ A + n (K ) c +σ · σ + + + + + c +σ ∈ R  + + +Sei σ = ∆(x + 0 , . . . , x + n ) ∈ A + n ( K ) , sodass x + 0 < x +1 < · · · < x +n . +Für i = 0 , . . . , n sei ∂ + i σ := ∆( x + 0 , . . . , ˆx + i , . . . , x + n ) die i-te Seite von σ und d +σ = d +n σ := + +i =0 ( −1) i + ∂ +i σ ∈ C + n− 1 ( K ) und d +n : C + n ( K ) → C + n− 1 ( K ) die dadurch definierte lineare +Abbildung. +Dann gilt: d +n− 1 ◦ d +n = 0 + a + bc +σ +e +3 e + 1e + 2 +Abbildung 2.14:Simplizialkomplex mit Totalordnung +Beispiel 29 +Sei a < b < c. Dann gilt: + d +2 σ = e +1 − e +2 + e +3 +d +1 ( e +1 − e +2 + e + 3 ) = (c − b) − (c − a) + ( b − a ) +42 2.3. SIMPLIZIALKOMPLEX += 0 +Sei a < b < c < d . Dann gilt für Tetraeder: +d +3 (∆( a, b, c, d)) = ∆(b, c, d ) − ∆(a, c, d ) + ∆( a, b, d ) − ∆( a, b, c ), wobei: +d +2 ( ∆(b, c, d )) =∆( c, d)−∆(b, d)+∆( b, c) +d +2 (−∆(a, c, d )) = −∆(c, d)+∆( a, d)−∆(a, c) +d +2 ( ∆(a, b, d )) =∆( b, d) −∆(a, d)+∆( a, b) +d +2 (−∆(a, b, c )) = −∆(b, c)+∆( a, c) −∆( a, b) +⇒ d +2 (d +3 (∆(a, b, c, d))) = 0 +Beweis: Sei σ ∈ A +n . Dann gilt: +d +n−1 (d +n σ ) = d +n −1 ( n + +i =0 (−1)i + ∂ + i σ ) += n + +i =0 (−1)i + d +n−1 (∂ + i σ ) += n + +i =0 (−1)i n−1 + +j =0 ∂ +i (∂ + j σ )(−1)j += +0 ≤i ≤ j ≤n− 1(−1)i + j + ∂ + j (∂ + i ( σ )) + +0≤ j d ( P, C ) = d( P, B ) + d( B , C ) = d( P, A ) + d( B , C ) ⇒ +d(A, C ) > d(B , C ) ⇒ Widerspruch zuPunkt (i) +b) C liegt zwischen P und B +d(P, C ) + d(C, A) > d( P, A ) = d(P, B ) = d( P, C ) + d(C, B ) +⇒ d( C, A) > d(C, B ) +⇒ Widerspruch zuPunkt (i) +2. Fall +: Q und B liegen auf verschieden Halbebenen bzgl. P A. +Dann liegen A und Q in derselben Halbebene bzgl. P B . +Tausche A und B ⇒ Fall 1 +Bemerkung 63 +Sei (X, d, G ) eine Geometrie, die§1-§3erfüllt, P, Q ∈ X mit P = Q und ϕ eine Isometrie +mit ϕ( P ) = P und ϕ(Q ) = Q . +Dann gilt ϕ( S ) = S ∀S ∈ P Q. +Beweis: + O. B. d. A. sei S ∈ P Q 2 +⇔ d(P, Q ) = d(P, S ) + d( S, Q) +ϕ∈ Iso(X ) +⇒ d( ϕ(P ) , ϕ(Q)) = d(ϕ( P ), ϕ(S )) + d(ϕ (S ) , ϕ( Q)) +P,Q∈ Fix( ϕ) +⇒ d( P, Q ) = d(P, ϕ (S )) + d(ϕ (S ) , Q) +⇒ ϕ(S ) liegt zwischen P und Q +⇒ d(P, S ) = d( ϕ( P ), ϕ(S )) = d(P, ϕ (S )) +3(i ) +⇒ ϕ(S ) = S + +Proposition 4.2 +In einer Geometrie, die§1-§3erfüllt, gibt es zu P, P + , Q, Q + mit d( P, Q ) = d( P + , Q + ) +höchstens zwei Isometrien mit ϕ( P ) = P + und ϕ( Q) = Q +70 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE +Aus den Axiomen folgt, dass es in der Situation von§4höchstens zwei Isometrien mit +ϕ +i (P ) = P + und ϕ + i (Q) = Q + gibt. +Beweis: Seien ϕ +1 , ϕ +2 , ϕ +3 Isometrien mit ϕ +i (P ) = P + , ϕ +i ( Q) = Q + mit i = 1, 2, 3. +Der Beweis vonProposition 4.2erfolgt über zwei Teilaussagen: +(Teil i) ∃R ∈ X \ P Q mit ϕ +1 (R ) = ϕ + 2 (R ). +(Teil ii)Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = id +X . +Aus(Teil i)und(Teil ii)folgt, dass ϕ− 1 +2 ◦ ϕ +1 = id +X , also ϕ +2 = ϕ +1 , da P , Q und R in diesem +Fall Fixpunkte sind. +Nun zu den Beweisen der Teilaussagen: +(Teil i) + Sei R ∈ X \ P Q. Von den drei Punkten ϕ +1 (R ), ϕ +2 (R ), ϕ +3 (R ) liegen zwei in der selben +Halbebene bzgl. P + Q + = ϕ +i ( P Q). +O. B. d. A. seien ϕ +1 (R ) und ϕ + 2 (R ) in der selben Halbebene. +Es gilt: d(P + , ϕ +1 ( R )) = d(ϕ +1 (P ) , ϕ +1 (R )) += d(P, R ) += d(ϕ +2 (P ) , ϕ +2 (R )) += d(P + , ϕ +2 ( R )) +und analog d( Q + , ϕ +1 ( R )) = d( Q + , ϕ +2 ( R )) +(Teil ii) + Seien P , Q und R Fixpunkte von ϕ, R /∈ P Q und A /∈ P Q ∪ P R ∪ QR . Sei B ∈ +P Q \ { P, Q } . Dann ist ϕ( B ) = B wegenBemerkung 63. +Ist R ∈ AB , so enthält AB 2 Fixpunkte von ϕ Bem. 63 +=====⇒ ϕ( A ) = A . +P + B QC RA +Abbildung 4.5: P, Q, R sind Fixpunkte, B ∈ P Q \ { P, Q }, A /∈ P Q ∪ P R ∪ QR +Ist R /∈ AB , so ist AB ∩ P R = ∅ oder AB ∈ RQ = ∅ nachSatz 4.1. Der Schnittpunkt +C ist dann Fixpunkt von ϕ + nachBemerkung 63 ⇒ ϕ( A ) = A. +Bemerkung 64 (SWS-Kongruenzsatz) +Sei ( X, d, G ) eine Geometrie, die§1-§4erfüllt. Seien außerdem AB C und A + B + C +Dreiecke, für die gilt: +(i) d(A, B ) = d( A + , B + ) +(ii) ∠C AB ∼ += ∠ C + A + B +71 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE +(iii) d(A, C ) = d(A + , C + ) +Dann ist AB C kongruent zu A + B + C + . +Beweis: Sei ϕ die Isometrie mit ϕ( A + ) = A , ϕ( A + C + + ) = AC + + und ϕ( A + B + + ) = AB + + . Diese +Isometrie existiert wegenPunkt §4. +⇒ C ∈ ϕ( A + C + + ) und B ∈ ϕ (A + B + + ). +d( A + , C + ) = d(ϕ (A + ) , ϕ( C + )) = d( A, ϕ( C + )) 3(i ) +==⇒ ϕ( C + ) = C +d(A + , B + ) = d(ϕ( A + ), ϕ(B + )) = d(A, ϕ(B + )) 3(i ) +==⇒ ϕ( B + ) = B +Also gilt insbesondere ϕ( A + B + C + ) = AB C . +Bemerkung 65 (WSW-Kongruenzsatz) +Sei ( X, d, G ) eine Geometrie, die§1-§4erfüllt. Seien außerdem AB C und A + B + C +Dreiecke, für die gilt: +(i) d(A, B ) = d( A + , B + ) +(ii) ∠ C AB ∼ += ∠ C + A + B +(iii) ∠ AB C ∼ += ∠ A + B + C +Dann ist AB C kongruent zu A + B + C + . +Beweis: Sei ϕ die Isometrie mit ϕ(A + ) = A , ϕ(B + ) = B und ϕ(C + ) liegt in der selben Halbebene +bzgl. AB wie C . Diese Isometrie existiert wegen§4. +Aus ∠ C AB = ∠C + A + B + = ∠ ϕ( C + )ϕ (A + ) ϕ(B + ) = ∠ ϕ( C + )AB folgt, dass ϕ (C + ) ∈ AC + + . +Analog folgt aus ∠ AB C = ∠ A + B + C + = ∠ ϕ ( A + ) ϕ( B + ) ϕ( C + ) = ∠ AB ϕ ( C + ) , dass ϕ ( C + ) ∈ +B C + + . +Dann gilt ϕ (C + ) ∈ AC ∩ B C = { C } ⇒ ϕ( C + ) = C . +Es gilt also ϕ (A + B + C + ) = AB C . +Definition 61 +a) Ein Winkel ist ein Punkt P ∈ X zusammen mit 2 Halbgeraden mit Anfangspunkt P . +Man schreibt: ∠ R +1 P R +2 bzw. ∠ R +2 P R +1 2 +b) + Zwei Winkel sind gleich , wenn es eine Isometrie gibt, die den einen Winkel auf den +anderen abbildet. +c) ∠ R +1 P + R +2 heißt kleiner als ∠ R +1 P R +2 , wenn es eine Isometrie ϕ gibt, mit ϕ ( P + ) = P , +ϕ(P + R + +1 ) = P R + +1 und ϕ (R +2 ) liegt in der gleichen Halbebene bzgl. P R +1 wie R +2 und in +der gleichen Halbebene bzgl. P R +2 wie R +1 +d)Im Dreieck P QR gibt es Innenwinkel und Außenwinkel. +Bemerkung 66 +In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel. +Beweis: Zeige ∠ P RQ < ∠ RQP + . +Sei M der Mittelpunkt der Strecke QR und P + ∈ P Q+ + \ P Q. Sei A ∈ M P − + mit d(P, M ) = +d( M , A ). +2 + Für dieses Skript gilt: ∠ R + 1 P R +2 = ∠ R +2 P R +1 . Also sind insbesondere alle Winkel ≤ 180◦ + . +72 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE +P + R +1 R +1R +2R + 2 +(a) ∠R +1 P + R +2 ist kleiner als ∠ R +1 P R +2 , +vgl.Definition 61.c P +Q R +(b) InnenwinkelundAußenwin- +kelin P QR , vgl.Definiti- +on 61.d +Abbildung 4.6:Situation ausDefinition 61 +Q M +A P + R +(a) Parallelogramm AQPR α βR +Q P +(b) Innen- und Außenwin- +kel von P QR +Abbildung 4.7:Situation ausBemerkung 66 +Es gilt: d( Q, M ) = d( M , R ) und d( P, M ) = d( M , A ) sowie ∠P M R = ∠ AM Q ⇒ M RQ +ist kongruent zu AM Q , denn eine der beiden Isometrien, die∠ P M R auf ∠ AM Q abbildet, +bildet R auf Q und P auf A ab. +⇒ ∠M QA = ∠ M RP = ∠ QRP = ∠ P RQ. +Noch zu zeigen: ∠ M QA < ∠ RQP + , denn A liegt in der selben Halbebene bzgl. P Q wie M . +Proposition 4.3 (Existenz der Parallelen) +Sei (X, d, G ) eine Geometrie mit den Axiomen§1-§4. +Dann gibt es zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g mindestens eine +Parallele h ∈ G mit P ∈ h und g ∩ h = ∅. +Beweis: Seien P, Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P + ∈ f mit +d( P, P + ) = d(P, Q ) abbildet und die Halbebenen bzgl. f erhält. +73 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE +Q hf + gP +Abbildung 4.8:Situation ausProposition 4.3 +Annahme: ϕ(g ) ∩ g = ∅ +⇒ Es gibt einen Schnittpunkt { R } = ϕ( g ) ∩ g . +Dann ist ∠RQP = ∠ RQP + < ∠ RP P + nachBemerkung 66und ∠RQP = ∠ RP P + , weil +ϕ( ∠ RQP ) = ∠ RP P + . +⇒ Widerspruch +⇒ ϕ (g ) ∩ g = ∅ +Folgerung 4.4 +Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π . +D. h. es gibt eine Isometrie ϕ mit ϕ(Q) = P und ϕ (QP + + ) = P R + + , sodass ϕ(R ) in der gleichen +Halbebene bzgl. P Q liegt wie R . +Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln ist π , d. h. die +beiden Halbgeraden bilden eine Gerade. +Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie, +Dreiecke mit drei 90◦ + -Winkeln. +Proposition 4.5 +In einer Geometrie mit den Axiomen§1-§4ist in jedem Dreieck die Summe der +Innenwinkel ≤ π . +74 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE +Sei im Folgenden „ IWS“ die „Innenwinkelsumme“. +Beweis: Sei ein Dreieck mit IWS() = π + ε +αβ +γ + P +(a)Summe der Winkel α , β und γ α + 1 +α + 2 βγ + M +A BC A +α +(b)Situation ausProposition 4.5 +Abbildung 4.10:Situation ausProposition 4.5 +Sei α ein Innenwinkel von . +Beh.: Es gibt ein Dreieck + mit IWS( + ) = IWS( ) und einem Innenwinkel α + ≤ α +2 . +Dann gibt es für jedes n ein +n mit IWS ( +n ) = IWS() und Innenwinkel α + ≤ α +2 n . Für +α +2n < ε ist dann die Summe der beiden Innenwinkel um +n größer als π ⇒ Widerspruch +zuFolgerung 4.4. +Beweis: Es seien A, B , C ∈ X und das Dreieck mit den Eckpunkten A, B , C und α sei +der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C . +Sei M der Mittelpunkt der Strecke B C . Sei außerdem α + 1 = ∠ C AM und α + 2 = ∠ B AM . +Sei weiter A + ∈ M A − + mit d(A + , M ) = d( A, M ). +Die Situation ist inAbbildung 4.10bskizziert. +⇒ ( M A + C ) und ( M AB ) sind kongruent. ⇒ ∠ AB M = ∠ A + C M und ∠ M A + C = +∠M AB . ⇒ α + β + γ = IWS(AB C ) = IWS(AA + C ) und α + 1 + α +2 = α , also o. B. d. A. +α +1 ≤ α +2 +Bemerkung 67 +In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π . +α +α +α ββ + γ +A BC + g +Abbildung 4.11:Situation ausBemerkung 67 +Beweis: Sei g eine Parallele von AB durch C . +• Es gilt α + = α wegenProposition 4.3. +• Es gilt β + = β wegenProposition 4.3. +• Es gilt α + = α + wegenAufgabe 8. +75 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE +⇒ IWS(AB C ) = γ + α + + β + = π +Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich +π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW. +4.2 Weitere Eigenschaften einer euklidischen Ebene +Satz 4.6 (Strahlensatz) +In ähnlichen Dreiecken sind Verhältnisse entsprechender Seiten gleich. +xy +−1 0 1 2 3 40123 + z + x λ 2 + z +λ 2 + x +Abbildung 4.12:Strahlensatz +Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar. +A + B C +B C +cb a + c b + a +Abbildung 4.13:Die Dreiecke AB C und AB + C + sind ähnlich. +4.2.1 Flächeninhalt +Definition 62 +„Simplizialkomplexe“ in euklidischer Ebene ( X, d) heißen flächengleich , wenn sie sich in +kongruente Dreiecke zerlegen lassen. +76 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE +(a)Zwei kongruente Dreiecke (b) Zwei weitere kongruente Drei- +ecke +Abbildung 4.14:Flächengleichheit +Der Flächeninhalt eines Dreiecks ist 1 +/2 · Grundseite · Höhe. +A BC +L +C h +c + c +(a) 1 +/2 · | AB | · |h + c | · +A BC + L +A +h +a c +(b) 1 +/2 · | BC | · |h + a | +Abbildung 4.15:Flächenberechnung im Dreieck +Zu zeigen: Unabhängigkeit von der gewählten Grundseite. +α α +γ γ +A BC + L + A +L +C +Abbildung 4.16: AB L + a und C L +C B sind ähnlich, weil IWS = π +Strahlensatz +=======⇒ a +h +c = c +h +a → a · h +a = c · h +c +Satz 4.7 (Satz des Pythagoras) +Im rechtwinkligen Dreieck gilt a2 + + b2 + = c2 + , wobei c die Hypotenuse und a, b die beiden +Katheten sind. +Beweis: (a + b) · (a + b) = a 2 + + 2 ab + b2 + = c2 + + 4 · ( 1 +2 · a · b) +77 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE +cb a +A BC +· +(a) a, b sind Katheten und c ist die Hypo- +tenuse b a baba +b +a · +··· +γ + (b)Beweisskizze +Abbildung 4.17:Satz des Pythagoras +Satz 4.8 +Bis auf Isometrie gibt es genau eine euklidische Ebene ( X, d, G ) , nämlich X = R 2 + , +d = euklidischer Abstand, G = Menge der üblichen Geraden. +Beweis: +(i) (R 2 + , d +Euklid ) ist offensichtlich eine euklidische Ebene. +(ii) Sei (X, d) eine euklidische Ebene und g + 1 , g +2 Geraden in X , die sich in einem Punkt 0 +im rechten Winkel schneiden. +Sei P ∈ X \ ( g +1 ∪ g +2 ) ein Punkt und P + X der Fußpunkt des Lots von P auf g +1 (vgl. +Aufgabe 9 (c)) und P + Y der Fußpunkt des Lots von P auf g +2 . +Sei x + P := d(P + X , 0) und y +P := d(P + Y , 0). +InAbbildung 4.19wurde die Situation skizziert. +Sei h : X → R2 + eine Abbildung mit h ( P ) := ( x + P , y +P ) Dadurch wird h auf dem +Quadranten definiert, in dem P liegt, d. h. +∀Q ∈ X mit P Q ∩ g +1 = ∅ = P Q ∩ g +2 +Fortsetzung auf ganz X durch konsistente Vorzeichenwahl. +Im Folgenden werden zwei Aussagen gezeigt: +(i) h ist surjektiv +(ii) h ist eine Isometrie +Da jede Isometrie injektiv ist, folgt aus(i)und(ii), dass h bijektiv ist. +Nun zu den Beweisen der Teilaussagen: +78 4.3. HYPERBOLISCHE GEOMETRIE +· + g +1g + 2 + PX + (a)Schritt 1 · + g + 1g + 2 + x +Py +P P +0 + P + XP + YX + (b)Schritt 2 +Abbildung 4.18:Beweis zuSatz 4.8 +(i) Sei ( x, y ) ∈ R 2 + , z. B. x ≥ 0 , y ≥ 0 . Sei P + ∈ g +1 mit d(0 , P + ) = x und P + auf der +gleichen Seite von g + 2 wie P . + g +1g + 2 + x +Py + P P Q +0 R +X +Abbildung 4.19:Beweis zuSatz 4.8 +(ii)Zu Zeigen: d(P, Q ) = d( h( P ), h(Q )) +d( P, Q )2 Pythagoras += d(P, R ) 2 + + d( R, Q )2 + = (y + Q − y +P )2 + + ( x + Q − x + P )2 + . +h( Q) = (x + Q , y +Q ) +4.3 Hyperbolische Geometrie +Definition 63 +Sei + H := { z ∈ C | (z ) > 0 } = + ( x, y ) ∈ R 2 + + y > 0 +79 4.3. HYPERBOLISCHE GEOMETRIE +die obere Halbebene bzw. Poincaré-Halbebene und G = G + 1 ∪ G + 2 mit +G + 1 = { g +1 ⊆ H | ∃m ∈ R , r ∈ R + >0 : g +1 = { z ∈ H : | z − m| = r } } +G + 2 = { g +2 ⊆ H | ∃x ∈ R : g +2 = { z ∈ H : ( z ) = x } } +Die Elemente aus G heißen hyperbolische Geraden. +Bemerkung 68 (Eigenschaften der hyperbolischen Geraden) +Die hyperbolischen Geraden erfüllen. . . +a). . . die Inzidenzaxiome§1 +b). . . das Anordnungsaxiom§3 (ii) +c). . . nicht das Parallelenaxiom§5 +Beweis: +a)Offensichtlich sind§1 (iii)und§1 (ii)erfüllt. Für§1 (i)gilt: +Gegeben z + 1 , z +2 ∈ H +Existenz: +Fall 1 (z + 1 ) = ( z + 2 ) +⇒ z + 1 und z + 2 liegen auf + g = { z ∈ C | ( z ) = (z + 1 ) ∧ H } +SieheAbbildung 4.20a. +Fall 2 (z + 1 ) = ( z + 2 ) +Betrachte nun z + 1 und z + 2 als Punkte in der euklidischen Ebene. Die Mittelsenkrech- +te zu diesen Punkten schneidet diex -Achse. Alle Punkte auf der Mittelsenkrechten +zu z + 1 und z + 2 sind gleich weit von z + 1 und z + 2 entfernt. Daher ist der Schnittpunkt mit +der x-Achse der Mittelpunkt eines Kreises durchz + 1 und z + 2 (vgl.Abbildung 4.20b) +xy +−1 0 1 2 3 4 501234 + Z + 1Z + 2 + (Z + 1 ) +(a)Fall 1 xy +−1 0 1 2 3 4 501234 + Z + 1 Z + 2 +(b)Fall 2 +Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer +Geraden +b)Sei g ∈ G + 1 ˙ +∪ G + 2 eine hyperbolische Gerade. +80 4.3. HYPERBOLISCHE GEOMETRIE +Es existieren disjunkte Zerlegungen von H \ g : +Fall 1: g = { z ∈ H z − m| = r } ∈ G + 1 +Dann gilt: + H = { z ∈ H z − m | < r } + + +=:H + 1 (Kreisinneres) ˙ +∪ { z ∈ H z − m | > r } + +=: H +2 (Kreisäußeres) +Da r > 0 ist H +1 nicht leer, da r ∈ R ist H +2 nicht leer. +Fall 2: g = { z ∈ H | z = x } ∈ G + 2 +Die disjunkte Zerlegung ist: +H = { z ∈ H | (z ) < x } + + +=: H +1 (Links) ˙ +∪ { z ∈ H | ( z ) > x } + +=: H + 2 (Rechts) +Zu zeigen: ∀A ∈ H +i , B ∈ H +j mit i, j ∈ { 1, 2 } gilt: AB ∩ g = ∅ ⇔ i = j +„ ⇐ “: A ∈ H + 1 , B ∈ H +2 : AB ∩ g = ∅ +Da d +H stetig ist, folgt diese Richtung direkt. Alle Punkte in H +1 haben einen Abstand +von m der kleiner ist als r und alle Punkte in H +2 haben einen Abstand von m der +größer ist als r . Da man jede Strecke von A nach B insbesondere auch als stetige +Abbildung f : R → R +> 0 auffassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g = ∅ +„ ⇒ “: A ∈ H + i , B ∈ H + j mit i, j ∈ { 1 , 2 } : AB ∩ g = ∅ ⇒ i = j +Sei h die Gerade, die durch A und B geht. +Da A, B /∈ g , aber A, B ∈ h gilt, haben g und h insbesondere mindestens einen +unterschiedlichen Punkt. Aus§1 (i)folgt, dass sich g und h in höchstens einen Punkt +schneiden. Sei C dieser Punkt. +Aus A, B /∈ g folgt: C = A und C = B . Also liegt C zwischen A und B . Daraus folgt, +dass A und B bzgl. g in verschiedenen Halbebenen liegen. +c)SieheAbbildung 4.21. + xy +−5 −4 −3 −2 −1 0 1 2 3 4 5 6012345 +Abbildung 4.21:Hyperbolische Geraden erfüllen§5nicht. +81 4.3. HYPERBOLISCHE GEOMETRIE +Definition 64 +Es seien a, b, c, d ∈ R mit ad − bc = 0 und σ : C → C eine Abbildung definiert durch +σ (z ) := az + b +cz + d +σ heißt Möbiustransformation. +Proposition 4.9 +a)Die Gruppe SL +2 (R ) operiert auf H durch die Möbiustransformation +σ (z ) := +a b +c d + ◦ z := az + b +cz + d +b)Die Gruppe PSL +2 ( R) = SL +2 ( R )/ + (±I ) operiert durch σ auf H. +c) PSL +2 (R ) operiert auf R ∪ { ∞ }. Diese Gruppenoperation ist 3-fach transitiv, d. h. +zu x + 0 < x + 1 < x +∞ ∈ R gibt es genau ein σ ∈ PSL +2 ( R ) mit σ ( x + 0 ) = 0 , σ ( x +1 ) = 1 , +σ (x + ∞ ) = ∞. +d) SL +2 (R ) wird von den Matrizen + +λ 0 +0 λ− 1 + + +=: A + λ , +1 t +0 1 + + +=: B +t und + 0 1 +−1 0 + + +=: C mit t, λ ∈ R × +erzeugt. +e) PSL +2 ( R) operiert auf G . +Beweis: +a)Sei z = x + i y ∈ H, d. h. y > 0 und σ = + a b +c d + ∈ SL +2 ( R ) +⇒ σ (z ) = a(x + i y ) + b +c( x + i y ) + d += (ax + b) + i ay +(cx + d) + i cy · ( cx + d) − i cy +(cx + d) − i cy += (ax + b)(cx + d) + aycy +(cx + d) 2 + + ( cy ) 2 + i ay ( cx + d) − (ax + b) cy +(cx + d)2 + + ( cy )2 += axcx + axd + bcx + bd + aycy +(cx + d) 2 + + ( cy )2 + i (ad − bc)y +( cx + d)2 + + ( cy ) 2 +SL +2 (R ) += ac( x2 + + y 2 + ) + adx + bcx + bd +(cx + d) 2 + + ( cy )2 + i y +(cx + d)2 + + ( cy )2 +⇒ ( σ (z )) = y +(cx +d )2 + +( cy )2 > 0 +Die Abbildung bildet also nach H ab. Außerdem gilt: + +1 0 +0 1 + ◦ z = x + i y +1 = x + i y = z +82 4.3. HYPERBOLISCHE GEOMETRIE +und + +a b +c d + ◦ +a + b +c + d + ◦ z + = +a b +c d + ◦ a + z + b +c + z + d += a a + z + b +c + z +d + b +c a + z + b +c + z +d + d += a(a + z + b + )+b (c + z +d + ) +c + z + d +c (a + z +b + )+ d(c + z + d + ) +c + z + d += a (a + z + b + ) + b( c + z + d + ) +c(a + z + b + ) + d(c + z + d + ) += ( aa + + bc + )z + ab + + bd +(ca + + db + )z + cb + + dd += +aa + + bc + ab + + bd +ca + + db + cb + + dd + ◦ z += +a b +c d + · + a + b +c + d + ◦ z +b)Es gilt σ (z ) = (−σ )(z ) für alle σ ∈ SL +2 ( R ) und z ∈ H. +c)Ansatz: σ = +a b +c d + σ (x + 0 ) = ax +0 + b +cx + 0 + d ! += 0 ⇒ ax + 0 + b = 0 ⇒ b = −ax +0 +σ (x + ∞ ) = ∞ ⇒ cx +∞ + d = 0 ⇒ d = −cx +∞ +σ (x + 1 ) = 1 ⇒ ax + 1 + b = cx +1 + d +a( x + 1 − x + 0 ) = c(x + 1 − x + ∞ ) ⇒ c = a x + 1 − x +0 +x + 1 −x + ∞ +⇒ −a 2 + · x +∞ x + 1 −x + 0 +x + 1 − x + ∞ + a2 + x + 0 x +1 − x + 0 +x + 1 −x + ∞ = 1 +⇒ a 2 x + 1 − x +0 +x +0 −x + ∞ ( x + 0 − x + ∞ ) = 1 ⇒ a 2 + = x + 1 −x + ∞ +(x + 1 − x + ∞ )(x + 1 − x +0 ) +d)Es gilt: + A −1 +λ = A + 1 +λ +B −1 +t = B +− t +C −1 + = C 3 +Daher genügt es zu zeigen, dass man mit A + λ , B +t und C alle Matrizen aus SL +2 ( R ) +erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit +Matrizen der Form A + λ , B +t und C die Einheitsmatrix zu generieren. +Sei also + M = + a b +c d + ∈ SL +2 ( R ) +beliebig. +Fall 1: a = 0 +Da M ∈ SL +2 (R ) ist, gilt det M = 1 = ad − bc = −bc. Daher ist insbesondere c = 0. Es +folgt: + + 0 1 +−1 0 + · +a b +c d + = + c d +−a −b +83 4.3. HYPERBOLISCHE GEOMETRIE +Gehe zu Fall 2. +Fall 2: a = 0 +Nun wird in M durch M · A + 1 +a an der Stelle von a eine 1 erzeugt: + +a b +c d + · + 1 +a 0 +0 a + = + 1 ab +c +a ad +Gehe zu Fall 3. +Fall 3: a = 1 + +1 b +c d + · +1 −b +0 1 + = +1 0 +c d − bc +Da wir det M = 1 = ad − bc = d − bc wissen, gilt sogar M + 2, 2 = 1. +Gehe zu Fall 4. +Fall 4: + a = 1, b = 0, d = 1 + A + − 1 C B +c C +1 0 +c 1 + = +1 0 +0 1 +Daher erzeugen Matrizen der Form A + λ , B + t und C die Gruppe SL +2 R . +e)Es genügt die Aussage für Matrizen ausProposition 4.9 (d)zu zeigen. +• σ = + λ 0 +0 λ −1 +, also σ ( z ) = λ 2 + z . Daraus ergeben sich die Situationen, die in +Abbildung 4.22aundAbbildung 4.22bdargestellt sind. +xy +−1 0 1 2 3 4 5 6 70123 + m λ2 + mm + i rλ2 + m + i λ2 + r +m + 1 +(a)Fall 1 xy +−1 0 1 2 3 40123 + z + x λ 2 + z +λ 2 + x +(b)Fall 2 (Strahlensatz) +Abbildung 4.22:Beweis vonProposition 4.9 (e)für eine Diagonalmatrix +• Offensichtlich gilt die Aussage für σ = +1 a +0 1 +• Sei nun σ = + 0 1 +−1 0 +, also σ (z ) = − 1 +z +Bemerkung 69 +Zu hyperbolischen Geraden g +1 , g +2 gibt es σ ∈ PSL +2 ( R) mit σ (g +1 ) = g +2 . +84 4.3. HYPERBOLISCHE GEOMETRIE +· + xy +−1 0 101 z = r · eiϕ +1 +z = 1 +r · e iϕ +Abbildung 4.23:Inversion am Kreis +Beweis: NachProposition 4.9 (c)gibt es σ mit σ ( a +1 ) = b +1 und σ ( a +2 ) = b +2 . Dann existiert +σ (g + 1 ) := g +2 wegen dem Inzidenzaxiom§1und ist eindeutig bestimmt. +Definition 65 +Seien z + 1 , z +2 , z +3 , z +4 ∈ C paarweise verschieden. +Dann heißt + DV( z + 1 , z +2 , z +3 , z +4 ) := z +1 −z + 4 +z + 1 −z + 2 +z + 3 −z + 4 +z + 3 −z + 2 = (z + 1 − z + 4 ) · (z + 3 − z + 2 ) +(z + 1 − z + 2 ) · (z + 3 − z + 4 ) +Doppelverhältnis von z + 1 , . . . , z + 4 . +Bemerkung 70 (Eigenschaften des Doppelverhältnisses) +a) DV( z + 1 , . . . , z + 4 ) ∈ C \ { 0 , 1 } +b) DV( z + 1 , z +4 , z +3 , z +2 ) = 1 +DV( z + 1 ,z + 2 ,z + 3 ,z + 4 ) +c) DV( z + 3 , z +2 , z +1 , z +4 ) = 1 +DV( z + 1 ,z + 2 ,z + 3 ,z + 4 ) +d) DV ist auch wohldefiniert, wenn eines der z + i = ∞ oder wenn zwei der z + i gleich sind. +e) DV(0 , 1, ∞, z +4 ) = z + 4 (Der Fall z + 4 ∈ { 0 , 1 , ∞ } ist zugelassen). +f )Für σ ∈ PSL +2 (C ) und z + 1 , . . . , z + 4 ∈ C ∪ { ∞ } ist +DV( σ (z + 1 ), σ (z + 2 ), σ ( z + 3 ) , σ ( z + 4 )) = DV(z + 1 , z +2 , z +3 , z +4 ) +und für σ (z ) = 1 +z gilt +DV( σ (z + 1 ), σ (z + 2 ), σ ( z + 3 ) , σ ( z + 4 )) = + DV( z + 1 , z +2 , z +3 , z +4 ) +g) DV( z + 1 , z +2 , z +3 , z +4 ) ∈ R ∪ { ∞ } ⇔ z + 1 , . . . , z + 4 liegen auf einer hyperbolischen Geraden. +Beweis: +a) DV( z + 1 , . . . , z + 4 ) = 0, da z + i paarweise verschieden +DV( z + 1 , . . . , z + 4 ) = 1, da: +Annahme: DV( z + 1 , . . . , z + 4 ) = 1 +⇔ (z + 1 − z + 2 )(z + 3 − z + 4 ) = ( z + 1 − z + 4 )(z + 3 − z + 2 ) +85 4.3. HYPERBOLISCHE GEOMETRIE +⇔ z + 1 z + 3 − z + 2 z + 3 − z + 1 z + 4 + z + 2 z + 4 = z + 1 z + 3 − z + 3 z + 4 − z + 1 z + 2 + z + 2 z + 4 +⇔ z + 2 z + 3 + z + 1 z + 4 = z + 3 z + 4 + z + 1 z + 2 +⇔ z + 2 z + 3 − z + 3 z + 4 = z + 1 z + 2 − z + 1 z + 4 +⇔ z + 3 (z + 2 − z + 4 ) = z + 1 ( z + 2 − z + 4 ) +⇔ z + 3 = z + 1 oder z + 2 = z + 4 +Alle z + i sind paarweise verschieden ⇒ Widerspruch +b) DV( z + 1 , z +4 , z +3 , z +2 ) = (z + 1 − z + 2 )· ( z + 3 −z + 4 ) +(z + 1 − z + 4 )· ( z + 3 −z + 2 ) = 1 +DV( z + 1 ,z + 2 ,z + 3 ,z + 4 ) +c) DV( z + 3 , z +2 , z +1 , z +4 ) = (z + 3 − z + 4 )· ( z + 1 −z + 2 ) +(z + 3 − z + 2 )· ( z + 1 −z + 4 ) = 1 +DV( z + 1 ,z + 2 ,z + 3 ,z + 4 ) +d)Zwei der z + i dürfen gleich sein, da: +Fall 1 z + 1 = z + 4 oder z + 3 = z + 2 +In diesem Fall ist DV( z + 1 , . . . , z + 4 ) = 0 +Fall 2 z + 1 = z + 2 oder z + 3 = z + 4 +Mit der Regel von L’Hospital folgt, dass in diesem Fall DV (z + 1 , . . . , z + 4 ) = ∞ gilt. +Fall 3 z + 1 = z + 3 oder z + 2 = z + 4 +Durch Einsetzen ergibt sich DV( z + 1 , . . . , z + 4 ) = 1 . +Im Fall, dass ein z + i = ∞ ist, ist entweder DV (0, 1, ∞, z +4 ) = 0 oder DV (0, 1, ∞ , z +4 ) ± ∞ +e) DV(0 , 1 , ∞, z +4 ) = (0 −z + 4 ) · (∞− 1) +(0 −1) · (∞− z + 4 ) = z + 4 · ( ∞−1) +∞− z + 4 = z + 4 +f )Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. +g) Sei σ ∈ PSL +2 (C ) mit σ (z + 1 ) = 0, σ (z + 2 ) = 1, σ (z + 3 ) = ∞. Ein solches σ existiert, da man +drei Parameter von σ wählen darf. +Bem. 70.f +⇒ DV( z + 1 , . . . , z + 4 ) = DV(0, 1 , ∞, σ (z + 4 )) +⇒ DV( z + 1 , . . . , z + 4 ) ∈ R ∪ { ∞ } +⇔ σ ( z + 4 ) ∈ R ∪ { ∞ } +Behauptung folgt, weil σ − 1 + (R ∪ ∞ ) ein Kreis oder eine Gerade in C ist. +Definition 66 +Für z + 1 , z +2 ∈ H sei g +z + 1 ,z + 2 die eindeutige hyperbolische Gerade durch z + 1 und z + 2 und a + 1 , a +2 die +„Schnittpunkte“ von g +z + 1 ,z + 2 mit R ∪ { ∞ }. +Dann sei d +H (z + 1 , z +2 ) := 1 +2 | ln DV(a +1 , z +1 , a +2 , z +2 )| und heiße hyperbolische Metrik. +Beh.: + Für z + 1 , z +2 ∈ H sei g +z + 1 ,z + 2 die eindeutige hyperbolische Gerade durch z + 1 und z + 2 und a + 1 , a +2 +die „Schnittpunkte“ von g +z + 1 ,z + 2 mit R ∪ { ∞ }. +Dann gilt: + 1 +2 | ln DV(a + 1 , z +1 , a +2 , z +2 ) | = 1 +2 | ln DV(a +2 , z +1 , a +1 , z +2 )| +Beweis: WegenBemerkung 70.cgilt: +DV( a + 1 , z +1 , a +2 , z +2 ) = 1 +DV( a +2 , z +1 , a +1 , z +2 ) +Außerdem gilt: + ln 1 +x = ln x − 1 + = (−1) · ln x = − ln x +86 4.3. HYPERBOLISCHE GEOMETRIE +Da der ln im Betrag steht, folgt direkt: +1 +2 | ln DV(a + 1 , z +1 , a +2 , z +2 ) | = 1 +2 | ln DV(a +2 , z +1 , a +1 , z +2 )| +Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x -Achse im Doppelver- +hältnis genutzt werden. +Beh.: Die hyperbolische Metrik ist eine Metrik auf H. +Beweis: WegenBemerkung 70.fist +d( z + 1 , z +2 ) := d( σ (z + 1 ), σ (z + 2 )) mit σ ( a +1 ) = 0, σ (a +2 ) = ∞ +d. h. σ ( g +z + 1 ,z + 2 ) = i R (imaginäre Achse). +also gilt o. B. d. A. z + 1 = i a und z + 2 = i b mit a, b ∈ R und a < b. +2d( i a, i b) =| ln DV(0, i a, ∞, i b ) | +=| ln (0 − i b)(∞ − i a) +(0 − i a)(∞ − i b) | +=| ln b +a | += ln b − ln a +Also: d(z + 1 , z +2 ) ≥ 0, d( z + 1 , z +2 ) = 0 ⇔ z + 1 = z + 2 +2d( z + 2 , z +1 ) =| ln DV(a +2 , z +2 , a +1 , z +1 ) | +=| ln DV(∞, i b, 0, i a ) | +Bem. 70.b += | ln DV(0, i b, ∞, i a ) | += 2d(z + 1 , z +2 ) +Liegen drei Punkte z + 1 , z +2 , z +3 ∈ C auf einer hyperbolischen Geraden, so gilt d( z + 1 , z +3 ) = +d( z + 1 , z +2 ) + d(z + 2 , z +3 ) (wenn z + 2 zwischen z + 1 und z + 3 liegt). +Dreiecksungleichung: Beweis ist umständlich und wird hier nicht geführt. Es sei auf die +Vorlesung „Hyperbolische Geometrie“ verwiesen. +Satz 4.10 +Die hyperbolische Ebene H mit der hyperbolischen Metrik d und den hyperbolischen +Geraden bildet eine „nichteuklidische Geometrie“, d. h. die Axiome§1-§4sind erfüllt, +aber Axiom§5ist verletzt. +87 4.3. HYPERBOLISCHE GEOMETRIE +Übungsaufgaben +Aufgabe 8 +Seien (X, d) eine absolute Ebene und P, Q, R ∈ X Punkte. Der Scheitelwinkel des Winkels +∠ P QR ist der Winkel, der aus den Halbgeraden QP − + und QR − + gebildet wird. Die +Nebenwinkel von ∠ P QR sind die von QP + + und QR − + bzw. QP − + und QR + + gebildeten +Winkel. +Zeigen Sie: +(a)Die beiden Nebenwinkel von ∠P QR sind gleich. +(b)Der Winkel ∠ P QR ist gleich seinem Scheitelwinkel. +Aufgabe 9 +Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ X von +Punkten ist definiert durch d( P, Y ) := inf d(P, y ) |y ∈ Y . +Zeigen Sie: +(a) Ist AB C ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die +Winkel ∠ AB C und ∠ B C A gleich. +(b) Ist AB C ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel +gegenüber und umgekehrt. +(c) Sind g eine Gerade und P /∈ g ein Punkt, so gibt es eine eindeutige Gerade h mit +P ∈ h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g +und der Schnittpunkt des Lots mit g heißt Lotfußpunkt . +Aufgabe 10 +Seien f , g, h ∈ G und paarweise verschieden. +Zeigen Sie: f g ∧ g h ⇒ f h +Aufgabe 11 +Beweise den Kongruenzsatz S S S . +5 Krümmung +Definition 67 +Sei f : [a, b] → R n + eine eine Funktion aus C ∞ + . Dann heißt f Kurve . +5.1 Krümmung von Kurven +Definition 68 +Sei γ : I = [a, b] → R n + eine Kurve. +a)Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt: + γ + (t ) + 2 = 1 ∀t ∈ I +Dabei ist γ + (t) = (γ +1 (t ), γ +2 (t ), . . . , γ +n (t )). +b) l (γ ) = + b +a γ + ( t) dt heißt Länge von γ . +Bemerkung 71 (Eigenschaften von Kurven I) +Sei γ : I = [a, b] → R n + eine C ∞ + -Funktion. +a)Ist γ durch Bogenlänge parametrisiert, so ist l (γ ) = b − a. +b)Ist γ durch Bogenlänge parametrisiert, so ist γ + (t ) orthogonal zu γ + ( t) für alle t ∈ I . +Beweis: +a) l (γ ) = + b +a γ + (t ) dt = + b +a 1dt = b − a. +b) Im Folgenden wird die Aussage nur fürγ : [a, b] → R 2 + bewiesen. Allerdings funktioniert +der Beweis im Rn + analog. Es muss nur die Ableitung angepasst werden. +1 = γ + (t) = γ + ( t) 2 + = γ + ( t) , γ + ( t) +⇒ 0 = d +dt γ + ( t) , γ + ( t) += d +dt (γ +1 (t) γ +1 ( t) + γ +2 (t )γ +2 (t)) += 2 · (γ +1 ( t) · γ +1 (t ) + γ +2 ( t) · γ +2 ( t)) += 2 · γ + ( t) , γ + ( t) +Definition 69 +Sei γ : I → R 2 + eine durch Bogenlänge parametrisierte Kurve. +a)Für t ∈ I sei n ( t) Normalenvektor an γ in t wenn gilt: + n (t) , γ + ( t) = 0, n ( t) = 1 und det((γ + (t ), n(t ))) = +1 +89 5.1. KRÜMMUNG VON KURVEN +b)Seit κ : I → R so, dass gilt: + γ + ( t) = κ( t) · n ( t) +Dann heißt κ (t ) Krümmung von γ in t . +Da n (t ) und γ + ( t) nachBemerkung 71.blinear abhängig sind, existiert κ (t) . +Beispiel 45 +Gegeben sei ein Kreis mit Radius r , d. h. mit Umfang 2πr . Es gilt: +γ (t ) = +r · cos t +r , r · sin t +r + für t ∈ [0, 2 πr ] +ist parametrisiert durch Bogenlänge, da gilt: +γ + (t ) = +(r · 1 +r )(− sin t +r ) , r 1 +r cos t +r += +− sin t +r , cos t +r +Der Normalenvektor von γ in t ist +n (t) = +− cos t +r , − sin t +r +da gilt: + n (t ), γ + (t ) = +− cos t +r +− sin t +r + , +− sin t +r +cos t +r += (− cos t +r ) · (− sin t +r ) + ( − sin t +r ) · (cos t +r ) += 0 + n (t ) = + + +(− cos t +r , − sin t +r ) + + + += (− cos t +r ) 2 + + ( − sin t +r )2 += 1 +det(γ +1 ( t), n(t )) = + + + +− sin t +r − cos t +r +cos t +r − sin t +r + + + += (− sin t +r ) 2 + − ( − cos t +r ) · cos t +r += 1 +Die Krümmung ist für jedes t konstant 1 +r , da gilt: +γ + (t ) = + − 1 +r cos t +r , − 1 +r sin t +r += 1 +r · +− cos t +r , − sin t +r +⇒ κ (t ) = 1 +r +90 5.2. TANGENTIALEBENE +Definition 70 +Sei γ : I → R 3 + eine durch Bogenlänge parametrisierte Kurve. +a)Für t ∈ I heißt κ( t) := γ + (t ) die Krümmung von γ in t . +b)Ist für t ∈ I die Ableitung γ + ( t) = 0, so heißt γ + ( t) + γ + ( t) Normalenvektor an γ in t. +c) b(t ) sei ein Vektor, der γ + (t), n(t) zu einer orientierten Orthonormalbasis vonR 3 + ergänzt. +Also gilt: + det(γ + (t ), n(t) , b( t)) = 1 +b(t ) heißt Binormalenvektor, die Orthonormalbasis + + γ + (t ), n(t ), b( t) +heißt begleitendes Dreibein. +Bemerkung 72 (Eigenschaften von Kurven II) +Sei γ : I → R 3 + durch Bogenlänge parametrisierte Kurve. +a) n (t ) ist orthogonal zu γ + ( t) . +b) b(t ) ausDefinition 70.cist eindeutig. +5.2 Tangentialebene +Erinnerung Sie sich anDefinition 32„reguläre Fläche“. +Äquivalent dazu ist: S ist lokal von der Form +V (f ) = + x ∈ R 3 + + f ( x) = 0 +für eine C ∞ + -Funktion f : R 3 + → R . +Definition 71 +Sei S ⊆ R3 + eine reguläre Fläche, s ∈ S , F : U → V ∩ S eine lokale Parametrisierung um +s ∈ V : + ( u, v ) → ( x (u, v ), y (u, v ), z (u, v )) +Für p = F −1 + ( s) ∈ U sei + J +F (p ) =  + ∂x +∂u (p ) ∂x +∂v (p ) +∂y +∂u (p ) ∂y +∂v (p ) +∂z +∂u (p ) ∂z +∂v (p )  + +und D + p F : R 2 + → R 3 + die durch J +F (p ) definierte lineare Abbildung. +Dann heißt T +s S := Bild(D + p F ) die Tangentialebene an s ∈ S . +Bemerkung 73 (Eigenschaften der Tangentialebene) +a) T +s S ist 2 -dimensionaler Untervektorraum von R 3 + . +b) T +s S = ˜u, ˜v , wobei ˜u, ˜v die Spaltenvektoren der Jacobi-Matrix J + F (p ) sind. +c) T +s S hängt nicht von der gewählten Parametrisierung ab. +91 5.2. TANGENTIALEBENE +d) Sei S = V ( f ) eine reguläre Fläche in R 3 + , also f : V → R eine C ∞ + -Funktion, V ⊆ R 3 +offen, grad(f )(x ) = 0 für alle x ∈ S . +Dann ist T +s S = (grad(f )(s ))⊥ + für jedes s ∈ S . +Beweis: +a) J +F ist eine 3 × 2 -Matrix, die mit einem 2 × 1 -Vektor multipliziert wird. Das ist +eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein +Vektorraum ist. Da Rg(J + F ) = 2, ist auch dim(T +s S ) = 2. +b)Hier kann man wie inPunkt a)argumentieren +c) T +s S + = { x ∈ R 3 + |∃parametrisierte Kurve γ : [ −ε, + ε ] → S für ein ε > 0 mit γ (0) = +s und γ + (0) = x } +Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. +d) + Sei x ∈ T +s S, γ : [ −ε, + ε ] → S eine parametrisierte Kurve mit ε > 0 und γ + (0) = s, +sodass γ + (0) = x gilt. Da γ ( t) ∈ S für alle t ∈ [−ε, ε], ist f ◦ γ = 0 +⇒ 0 = (f ◦ γ ) + (0) = grad(f )(γ (0)), γ + (0) +⇒ T +s S ⊆ grad(f )(s) ⊥ +dim=2 +====⇒ T +s S = (grad(f )(s ))⊥ +Definition 72 +a) Ein Normalenfeld auf der regulären Fläche S ⊆ R3 + ist eine Abbildung n : S → S 2 + ⊆ +R 3 + mit n (s ) ∈ T +s S ⊥ + für jedes s ∈ S . +b) S heißt orientierbar , wenn es ein stetiges Normalenfeld auf S gibt. +Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden. +Im Folgenden werden diese Begriffe jedoch synonym benutzt. +Bemerkung 74 (Eigenschaften von Normalenfeldern) +a)Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C ∞ + ). +b) + Zu jedem s ∈ S gibt es eine Umgebung V ⊆ R3 + von s und eine lokale Parametrisierung +F : U → V von S um s, sodass auf F (U ) = V ∩ S ein stetiges Normalenfeld existiert. +c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas vonS aus lokalen +Parametrisierungen F +i : U + i → V + i , i ∈ I gibt, sodass für alle i, j ∈ F und alle +s ∈ V +i ∩ V +j ∩ S gilt: + det(D + s V + i → V +j + + +F + j ◦ F − 1 +i + +∈ R3 ×3 ) > 0 +Beweis: Wird hier nicht geführt. +Beispiel 46 (Normalenfelder) +1) S = S 2 + , n + 1 = id +S 2 ist ein stetiges Normalenfeld. +Auch n + 2 = −id +S 2 ist ein stetiges Normalenfeld. +2) S = Möbiusband (vgl.Abbildung 5.1) ist nicht orientierbar. Es existiert ein Norma- +lenfeld, aber kein stetiges Normalenfeld. +92 5.3. GAUSS-KRÜMMUNG +Abbildung 5.1:Möbiusband +5.3 Gauß-Krümmung +Bemerkung 75 +Sei S eine reguläre Fläche, s ∈ S , n ( s) ist ein Normalenvektor in s, x ∈ T +s S , x = 1. +Sei E der von x und n ( s) aufgespannte 2-dimensionale Untervektorraum von R 3 + . +Dann gibt es eine Umgebung V ⊆ R 3 + von s , sodass +C := (s + E ) ∩ S ∩ V +das Bild einer durch Bogenlänge parametrisierten Kurveγ : [−ε, ε] → S enthält mit γ (0) = s +und γ + (0) = x . +Beweis: „Satz über implizite Funktionen“ 1 +Definition 73 +In der Situation ausBemerkung 75heißt die Krümmung κ + γ (0) der Kurve γ in der Ebene +(s + E ) im Punkt s die Normalkrümmung von S in s in Richtung x = γ + (0). +Man schreibt: κ + Nor ( s, x) := κ +γ (0) +Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt. +Beispiel 47 (Gauß-Krümmung) +1) S = S 2 + = V (X 2 + + Y 2 + + Z 2 + − 1) ist die Kugel um den Ursprung mit Radius 1,n = id, +s = (0, 0 , 1), x = (1, 0, 0) +⇒ E = R · x + R · n (s ) (x, z -Ebene) +C = E ∩ S ist Kreislinie +κ +Nor ( s, x) = 1 +r = 1 +2) S = V ( X 2 + + Z 2 + − 1) ⊆ R 3 + ist ein Zylinder (sieheAbbildung 5.2a). s = (1, 0, 0) +x + 1 = (0, 1, 0) ⇒ E +1 = R · e +1 + R · e + 2 (x, y -Ebene) +S ∩ E +1 = V (X 2 + + Y 2 + − 1) ∩ E , Kreislinie in E +⇒ κ + Nor ( s, x +1 ) = ±1 +x + 2 = (0, 0, 1), E +2 = R · e +1 + R · e + 3 (x, z -Ebene) +1 + Siehe z. B. https://github.com/MartinThoma/LaTeX- examples/tree/master/documents/Analysis%20II +93 5.3. GAUSS-KRÜMMUNG +V ∩ E +2 ∩ S = + (1, 0 , z ) ∈ R 3 + + z ∈ R + ist eine Gerade +⇒ κ + Nor (s, x +2 ) = 0 +3) S = V (X 2 + − Y 2 + − Z ), s = (0, 0, 0) (Hyperbolisches Paraboloid, sieheAbbildung 5.2b) +x + 1 = (1, 0, 0), n (s ) = (0 , 0 , 1) +x + 2 = (0, 1 , 0) +κ +Nor ( s, x +1 ) = 2 +κ +Nor ( s, x +2 ) = −2 + −1 .5 −1 −0. 5 0 0 .5 1 1.5 +−101012345 + xyz + (a) S = V ( X 2 + + Z 2 + − 1) −2 −1. 5 −1 −0 .5 0 0 .5 1 1 .5 2 +−2−1012−202 + xyz + −4−2024f (x, y ) +(b) S = V ( X 2 + − Y 2 + − Z ) +Abbildung 5.2:Beispiele für reguläre Flächen +Definition 74 +Sei S ⊆ R3 + eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S . +γ : [ −ε, ε] → S eine nach Bogenlänge parametrisierte Kurve ( ε > 0 ) mit γ (0) = s und +γ + (0) = 0. +Sei n (0) := γ + (0) +γ + (0) . Zerlege +n (0) = n (0)t + + n (0)⊥ + mit n (0)t + ∈ T +s S und n (0)⊥ + ∈ ( T +s S )⊥ +Dann ist n (0) ⊥ + = n (0) , n( s) · n (s ) +κ + Nor (s, γ ) := γ + (0), n(s ) die Normalkrümmung . +Bemerkung 76 +Sei γ ( t) = γ (−t) , t ∈ [ −ε, ε]. Dann ist κ + Nor (s, γ ) = κ +Nor ( s, γ ). +Beweis: γ + (0) = γ + (0) , da γ + (0) = −γ + (0). +Es gilt: κ +Nor ( s, γ ) hängt nur von |γ + (0)| ab und ist gleich κ + Nor (s, γ + (0)). +Bemerkung 77 +Sei S eine reguläre Fläche und n = n ( s) ein Normalenvektor an S in s . +Sei T 1 +s S = { x ∈ T +s S | x = 1 } ∼ += S 1 + . Dann ist +κn +Nor (s ) : T 1 +s S → R , x → κ +Nor ( s, x) +eine glatte Funktion und Bild κn +Nor (s ) ist ein abgeschlossenes Intervall. +Definition 75 +Sei S eine reguläre Fläche und n = n ( s) ein Normalenvektor an S in s . +94 5.3. GAUSS-KRÜMMUNG +a) κn +1 (s ) : = min + κn +Nor ( s, x) + + x ∈ T 1 +s S + und +κn +2 (s ) : = max + κ n +Nor ( s, x) + + x ∈ T 1 +s S heißen Hauptkrümmungen von S in s. +b) K ( s) := κn +1 ( s) · κ n +2 ( s) heißt Gauß-Krümmung von S in s . +Bemerkung 78 +Ersetzt man n durch −n , so gilt: +κ −n +Nor (s, x) = −κn +Nor (x ) ∀x ∈ T 1 +s S +⇒ κ− n +1 (s ) = −κn +2 ( s) +κ− n +2 (s ) = −κn +1 ( s) +und K − n + (s ) = K n + (s ) =: K ( s) +Beispiel 48 +1) S = S 2 + . Dann ist κ +1 ( s) = κ + 2 (s ) = ±1 ∀s ∈ S 2 +⇒ K (s ) = 1 +2)Zylinder: +κ +1 (s ) = 0 , κ +2 ( s) = 1 ⇒ K (s ) = 0 +3)Sattelpunkt auf hyperbolischem Paraboloid: +κ +1 (s ) < 0 , κ +2 ( s) = 0 → K (s ) < 0 +4) S = Torus. SieheAbbildung 5.3 +Abbildung 5.3: K ( s +1 ) > 0 , K (s + 2 ) = 0, K (s + 3 ) < 0 +Bemerkung 79 +Sei S eine reguläre Fläche, s ∈ S ein Punkt. +95 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM +a)Ist K ( s) > 0 , so liegt S in einer Umgebung von s ganz auf einer Seite von T +s S + s . +b)Ist K ( s) < 0 , so schneidet jede Umgebung von s in S beide Seiten von T +s S + s . +5.4 Erste und zweite Fundamentalform +Sei S ⊆ R 3 + eine reguläre Fläche, s ∈ S , T +s S die Tangentialebene an S in s und F : U → V eine +lokale Parametrisierung von S um s . Weiter sei p := F −1 + (s ). +Definition 76 +Sei I + S ∈ R2 ×2 + definiert als +I + S : = + g +1, 1 (s ) g +1, 2 (s ) +g +1, 2 (s ) g +2, 2 (s ) + = +E ( s) F ( s) +F ( s ) G (s ) +mit g +i,j = g + s (D + p F ( e +i ) , D +p F ( e +j )) += ∂ F +∂ u +i (p ), ∂ F +∂ u +j (p ) i, j ∈ { 1, 2 } +Die Matrix I + S heißt erste Fundamentalform von S bzgl. der Parametrisierung F . +Bemerkung 80 +a) + Die Einschränkung des Standardskalarproduktes des R 3 + auf T +s S macht T +s S zu einem +euklidischen Vektorraum. +b) { D + p F (e + 1 ), D +p F (e + 2 ) } ist eine Basis von T +s S . +c) Bzgl. der Basis { D + p F ( e +1 ) , D +p F ( e +2 ) } hat das Standardskalarprodukt ausBemer- +kung 80.adie Darstellungsmatrix I +S . +d) g +i,j (s ) ist eine differenzierbare Funktion von s . +Bemerkung 81 + det(I + S ) = + + + ∂ F +∂ u +1 ( p) × ∂ F +∂ u +2 (p ) + + +2 +Beweis: Sei ∂F +∂u + 1 ( p) =  +x + 1 +x + 2 +x + 3  + + , ∂F +∂u + 2 (p ) =  + y + 1 +y + 2 +y 3  + +Dann ist ∂F +∂u +1 (p ) × ∂F +∂u + 2 ( p) =  + z + 1 +z + 2 +z + 3  + + mit + z + 1 = x + 2 y +3 − x + 3 y + 2 +z + 2 = x + 3 y +1 − x + 1 y + 3 +z + 3 = x + 1 y + 2 − x + 2 y + 1 +⇒ ∂ F +∂ u +1 ( p ) × ∂ F +∂ u +2 ( p) = z 2 +1 + z 2 +2 + z 2 +3 +96 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM +det(I +S ) = g +1 , 1 g +2 , 2 − g 2 +1, 2 +=  +x + 1 +x + 2 +x + 3  + + ,  +x + 1 +x + 2 +x + 3  +  +y + 1 +y + 2 +y + 3  + + ,  + y +1 +y +2 +y +3  + + −  + x +1 +x +2 +x +3  + + ,  +y + 1 +y + 2 +y + 3  + 2 += (x 2 +1 + x 2 +2 + x 2 +3 )(y 2 +1 + y 2 +2 + y 2 +3 ) − ( x +1 y +1 + x + 2 y + 2 + x + 3 y + 3 ) 2 +Definition 77 +a) + Das Differential d A = + det(I )d u +1 d u +2 heißt Flächenelement von S bzgl. der Para- +metrisierung F . +b)Für eine Funktion f : V → R heißt + +V f dA := +U f ( F (u +1 , u +2 ) + + +=: s ) +det I (s )du +1 du +2 +der Wert des Integrals von f über V , falls das Integral rechts existiert. +Bemerkung 82 +a) +V f dA ist unabhängig von der gewählten Parametrisierung. +b)Sei f : S → R eine Funktion, die im Sinne vonDefinition 77.blokal integrierbar ist. +Dann ist + S f dA wohldefiniert, falls (z. B.) S kompakt ist. +Etwa: + +S f dA = n + +i =1 +V + if dA +− +i = j +V + i ∩V + jf dA ++ +i,j,k +V + i ∩V + j ∩V + kf dA +− . . . +Beweis: +a)Mit Transformationsformel. +b)Ist dem Leser überlassen. +Proposition 5.1 +Sei S ⊆ R3 + eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S → S 2 + . +Dann gilt: +a) n induziert für jedes s ∈ S eine lineare Abbildung d +s n : T +s S → T +n (s ) S 2 + durch +d +s n ( x ) = d +dt n (s „+“ tx + +Soll auf Fläche S bleiben) + + +t =0 +Die Abbildung d +s n heißt Weingarten-Abbildung +97 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM +b) T +n(s ) S 2 + = T +s S . +c) d +s n ist ein Endomorphismus von T +s S . +d) d +s n ist selbstadjungiert bzgl. des Skalarproduktes I +S . +Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt. +98 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM +Beweis: +a)Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. +b) T +n(S ) S 2 + = n (s ) ⊥ + = T +s S +c)WegenProposition 5.1 (a)ist d +s n ein Homomorphismus. +d)Zu zeigen: ∀x, y ∈ I +s S : x, d +s n ( y ) = d +s n ( x ), y +Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die +Basisvektoren zu zeigen. +Sei x + i = D + p F ( e +i ) = ∂F +∂u + i (p ) i = 1, 2 +Beh.: x + i , d +s n ( x +j ) = ∂ 2 + F +∂u +i ∂u + j (p ), d +s n (x + i ) +⇒ ∂ 2 + F +∂u + i ∂u +j ( p) , d +s n ( x + i ) = x +j , d +s n ( x +i ) +Bew.: 0 = ∂ F +∂ u (p + te + j ) , n( p + te + j ) +⇒ 0 = d +dt + ∂ F +∂ u ( p + te + j ), n( p + te + j ) + + + +t=0 += d +dt ∂ F +∂ u +i (p + te + j ) + +∂ 2 + F +∂u +j ∂u +i ( p) + + + t=0 , n( s) + x + i , d +s n D +p F (e + j ) + +x + j +Definition 78 +Die durch −d +s n definierte symmetrische Bilinearform aufT +s S heißt zweite Fundamental- +form von S in s bzgl. F . +Man schreibt: I I + s ( x, y ) = −d +s n ( x) , y = I + s (−d +s n (x ) , y ) +Bemerkung 83 +Bezüglich der Basis { x + 1 , x +2 } von T +s S hat I I + s die Darstellungsmatrix +(h( s ) +i,j ) +i,j =1, 2 mit h +i,j (s ) = ∂ 2 + F +∂ u +i ∂ u +j ( p) , n( s) +Proposition 5.2 +Sei γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve mitγ (0) = s. Dann gilt: +κ + Nor (s, γ ) = I I + s (γ + (0), γ + (0)) +Beweis: NachDefinition 74ist κ +Nor (s, γ ) = γ + (0), n(s ) . Nach Voraussetzung gilt +n (γ (t )) ⊥ γ + ( t) ⇔ γ + (0), n( s) = 0 +Die Ableitung nach t ergibt +0 = d +dt (n ( γ (t )), γ + ( t)) += + d +dt n ( γ (t )) + + +t=0 , γ + (0) + + n ( s) , γ + (0) +99 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM += d +s n ( γ + (0)), γ + (0) + κ +Nor ( s, γ ) += −I I + s ( γ + (0), γ + (0)) + κ +Nor ( s, γ ) +Folgerung 5.3 +Die beiden Definitionen von Normalkrümmung inAbschnitt 5.1stimmen überein: +κ +Nor (s, γ ) = κ + Nor ( s, γ + (0)) +Satz 5.4 +Sei S ⊆ R 3 + eine reguläre, orientierbare Fläche und s ∈ S . +a)Die Hauptkrümmungen κ +1 (s ), κ +2 ( s) sind die Eigenwerte von I I + s . +b)Für die Gauß-Krümmung gilt: K (s ) = det( I I + s ) +Beweis: +a) I I + s ist symmetrisch, I +s S hat also eine Orthonormalbasis aus Eigenvektoren y +1 , y +2 von +I I + s . Ist x ∈ T +s S , x = 1, so gibt es ϕ ∈ [0, 2 π ) mit x = cos ϕ · y + 1 + sin ϕ · y +2 . +Seien λ + 1 , λ +2 die Eigenwerte von I I + s , also I I + s (y + i , y +i ) = λ + i . Dann gilt: +I I + s (x, x) = cos2 + ϕλ +1 + sin 2 + ϕλ +2 += (1 − sin2 + ϕ)λ + 1 + sin 2 + ϕλ +2 += λ + 1 + sin 2 + ϕ(λ + 2 − λ +1 ) ≥ λ + 1 += cos2 + ϕ + (1 − cos 2 + ϕ) λ +2 += λ + 2 − cos 2 + ϕ(λ + 2 − λ +1 ) ≤ λ + 2 +Prop. 5. 2 +=====⇒ λ + 1 = min + κ +Nor (s, x) + + x ∈ T 1 +s S +λ +2 = max + κ +Nor ( s, x) + + x ∈ T 1 +s S +Satz 5.5 (Satz von Gauß-Bonnet) +Sei S ⊆ R 3 + eine kompakte orientierbare reguläre Fläche. Dann gilt: + +S K (s )dA = 2πχ( S ) +Dabei ist χ ( S ) die Euler-Charakteristik von S . +Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von +Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen werden. +Lösungen der Übungsaufgab en +Lösung zu Aufgabe1 +Teilaufgabe a) Es gilt: +(i) ∅ , X ∈ T +X . +(ii) T +X ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alleU +1 , U +2 ∈ +T +X : U + 1 ∩ U +2 ∈ T +X . +(iii) + Auch unter beliebigen Vereinigungen ist T +X abgeschlossen, d. h. es gilt für eine +beliebige Indexmenge I und alle U + i ∈ T +X für alle i ∈ I : +i ∈I U + i ∈ T +X +Also ist ( X, T +X ) ein topologischer Raum. +Teilaufgabe b) Wähle x = 1 , y = 0 . Dann gilt x = y und die einzige Umgebung von x +ist X . Da y = 0 ∈ X können also x und y nicht durch offene Mengen getrennt werden. +(X, T +X ) ist also nicht hausdorffsch. +Teilaufgabe c) Nach Bemerkung4sind metrische Räume hausdorffsch. Da(X, T + X ) nach +(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass(X, T +X ) +kein metrischer Raum sein kann. +Lösung zu Aufgabe2 +Teilaufgabe a) +Beh.: ∀a ∈ Z : { a } ist abgeschlossen. +Sei a ∈ Z beliebig. Dann gilt: +Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de +schicken. +Teilaufgabe b) +Beh.: { − 1, 1 } ist nicht offen +Bew.: durch Widerspruch +Annahme: { − 1, 1 } ist offen. +Dann gibt es T ⊆ B, sodass + M ∈ T M = { − 1, 1 } . Aber alle U ∈ B haben unendlich viele +Elemente. Auch endlich viele Schnitte von Elementen inB haben unendlich viele Elemente +⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒ { − 1 , 1 } ist +nicht offen. +Teilaufgabe c) +Beh.: Es gibt unendlich viele Primzahlen. +101 Lösungen der Übungsaufgaben +Bew.: durch Widerspruch +Annahme: Es gibt nur endlich viele Primzahlen p ∈ P +Dann ist + Z \ { − 1 , +1 } FS d. Arithmetik += +p∈ P U +0,p +endlich. Das ist ein Widerspruch zu | Z| ist unendlich und | { −1, 1 } | ist endlich. +Lösung zu Aufgabe3 +(a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form + +j ∈ J U +j × +i ∈ N,i = j P + i +wobei J ⊆ N endlich und U + j ⊆ P + j offen ist. +Beweis: Nach Definition der Produkttopologie bilden Mengen der Form + +i ∈ J U +j × +i ∈N \J P + i +wobei J ⊆ N endlich und U +j ⊆ P +j offen ∀j ∈ J eine Basis der Topologie. +Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen +Form. +(b) Beh.: Die Zusammenhangskomponenten von P sind alle einpunktig. +Beweis: Es seinen x, y ∈ P und x sowie y liegen in der gleichen Zusammenhangs- +komponente Z ⊆ P . Da Z zusammenhängend ist und ∀i ∈ I : p + i : P → P + i ist +stetig, ist p +i (Z ) ⊆ P +i zusammenhängend für alle i ∈ N . Die zusammenhängenden +Mengen von P + i sind genau { 0 } und { 1 } , d. h. für alle i ∈ N gilt entweder +p +i ( Z ) ⊆ { 0 } oder p +i ( Z ) ⊆ { 1 }. Es sei z + i ∈ { 0 , 1 } so, dass p +i ( Z ) ⊆ { z + i } für +alle i ∈ N . Dann gilt also: + p +i (x ) + += x +i = z + i = p +i (y ) + += y +i ∀i ∈ N +Somit folgt: x = y +Lösung zu Aufgabe4 +(a) Beh.: GL + n ( R ) ist nicht kompakt. +Bew.: det : GL + n ( R ) → R \ { 0 } ist stetig. Außerdem ist det ( GL + n ( R)) = R \ { 0 } +nicht kompakt. 22 +⇒ GL + n ( R) ist nicht kompakt. +(b) Beh.: SL +1 ( R ) ist nicht kompakt, für n > 1 ist SL +n ( R ) kompakt. +Bew.: Für SL +1 (R ) gilt: SL +1 (R) = + A ∈ R1 ×1 + + det A = 1 + = + 1 + ∼ += { 1 }. 22 +⇒ SL +1 (R) +ist kompakt. +102 Lösungen der Übungsaufgaben +SL +n (R ) ⊆ GL + n (R ) lässt sich mit einer Teilmenge des R n2 + identifizieren. NachSatz 1.1 +sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Definiere +nun für für n ∈ N + ≥ 2 , m ∈ N : + A + m = diag +n (m, 1 +m , . . . , 1) +Dann gilt: det A + m = 1, d. h. A + m ∈ SL +n (R ), und A + m ist unbeschränkt, da A + m +∞ = +m −−−−→ +m →∞ ∞. +(c) Beh.: P ( R) ist kompakt. +Bew.: P (R ) ∼ += S n + / + x ∼− x . Per Definition der Quotiententopologie ist die Klassenabbil- +dung stetig. Da S n + als abgeschlossene und beschränkte Teilmenge desR n+1 + kompakt +ist 22 +⇒ P (R ) ist kompakt. +Lösung zu Aufgabe5 +Die Definition von Homöomorphismus kann aufSeite 9nachgelesen werden. +Definition 79 +Seien (G, ∗) und ( H, ◦) Gruppen und ϕ : G → H eine Abbildung. +ϕ heißt Homomorphismus , wenn +∀g + 1 , g +2 ∈ G : ϕ (g +1 ∗ g +2 ) = ϕ( g +1 ) ◦ ϕ( g +2 ) +gilt. +Es folgt direkt: +1) + Sei X = R mit der Standarttopologie und ϕ + 1 : id +R und R = ( R , +) . Dann ist ϕ +1 ein +Gruppenhomomorphismus und ein Homöomorphismus. +2) Sei G = ( Z, +) und H = ( Z/3 Z, +) . Dann ist ϕ +2 : G → H, x → x mod 3 ein +Gruppenhomomorphismus. Jedoch ist ϕ +2 nicht injektiv, also sicher kein Homöomor- +phismus. +3) Sei X ein topologischer Raum. Dann ist id +X ein Homöomorphismus. Da keine +Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Grup- +penhomomorphismus. +Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten +verwendet. +Lösung zu Aufgabe6 +Die Definition einer Isotopie kann aufSeite 20nachgelesen werden, die einer Isometrie auf +Seite 6. +Definition 80 +Seien (G, ∗) und ( H, ◦) Gruppen und ϕ : G → H eine Abbildung. +ϕ heißt Isomorphismus , wenn ϕ ein bijektiver Homomorphismus ist. +Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen +Sinn und ein Isomorphismus benötigt eine Gruppenstruktur. +103 Lösungen der Übungsaufgaben +Lösung zu Aufgabe7 +(a) Vor.: Sei M eine topologische Mannigfaltigkeit. +Beh.: M ist wegzusammehängend ⇔ M ist zusammenhängend +Beweis: „ ⇒“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung +direkt ausBemerkung 23. +„ ⇐“: Seien x, y ∈ M und +Z := { z ∈ M | ∃Weg von x nach z } +Es gilt: +(i) Z = ∅ , da M lokal wegzusammenhängend ist +(ii) Z ist offen, da M lokal wegzusammenhängend ist +(iii) Z C + := { ˜z ∈ M | Weg von x nach ˜z } ist offen +Da M eine Mannigfaltigkeit ist, existiert zu jedem ˜z ∈ Z C + eine offene und +wegzusammenhängende Umgebung U + ˜z ⊆ M . +Es gilt sogar U + ˜z ⊆ Z C + , denn gäbe es ein U + ˜z z ∈ Z , so gäbe es Wege γ + 2 : +[0 , 1] → M , γ + 2 (0) = z, γ +2 (1) = x und γ + 1 : [0 , 1] → M , γ + 1 (0) = ˜z, γ +1 (1) = z . +Dann wäre aber + γ : [0, 1] → M , +γ ( x ) = +γ + 1 (2x) falls 0 ≤ x ≤ 1 +2 +γ + 2 (2x − 1) falls 1 +2 < x ≤ 1 +ein stetiger Weg von ˜z nach x ⇒ Widerspruch. +Da M zusammenhängend ist und M = Z + +offen ∪ Z C + +offen , sowie Z = ∅ folgt Z C + = ∅. +Also ist M = Z wegzusammenhängend. +(b) Beh.: X ist wegzusammenhängend. +Beweis: X := (R \ { 0 }) ∪ { 0 +1 , 0 + 2 } und (R \ { 0 }) ∪ { 0 + 2 } sind homöomorph zu R. +Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte +0 +1 und 0 +2 . +Da (R \ { 0 } ) ∪ { 0 + 1 } homöomorph zu R ist, exisitert ein Weg γ + 1 von 0 +1 zu einem +beliebigen Punkt a ∈ R \ { 0 }. +Da ( R \ { 0 }) ∪ { 0 +2 } ebenfalls homöomorph zu R ist, existiert außerdem ein +Weg γ + 2 von a nach 0 + 2 . Damit existiert ein (nicht einfacher) Weg γ von 0 + 1 nach +0 +2 . +Lösung zu Aufgabe9 +Vor.: Sei ( X, d) eine absolute Ebene, A, B , C ∈ X und AB C ein Dreieck. +104 Lösungen der Übungsaufgaben +(a) Beh.: AB ∼ += AC ⇒ ∠ AB C ∼ += ∠ AC B +Bew.: Sei AB ∼ += AC . +⇒ ∃ Isometrie ϕ mit ϕ( B ) = C und ϕ (C ) = B und ϕ (A ) = A . +⇒ ϕ (∠ AB C ) = ∠ AC B +⇒ ∠ AB C ∼ += ∠AC B +(b) Beh.: + Der längeren Seite von AB C liegt der größere Winkel gegenüber und umge- +kehrt. +Bew.: Sei d(A, C ) > d(A, B ). Nach§3 (i)gibt es C + ∈ AC + + mit d(A, C + ) = d(A, B ) +⇒ C + liegt zwischen A und C . +Es gilt AB C + < AB C und ausAufgabe 9 (a)folgt: AB C + = AC + B . +∠ B C + A ist ein nicht anliegender Außenwinkel zu ∠ B C A Bem. 66 +=====⇒ B C + A > B C A +⇒ B C A < B C + A = AB C + < AB C Sei umgekehrt AB C > B C A, kann +wegen 1. Teil vonAufgabe 9 (b)nicht d(A, B ) > d( A, C ) gelten. +WegenAufgabe 9 (a)kann nicht d(A, B ) = d(A, C ) gelten. +⇒ d(A, B ) < d( A, C ) +(c) Vor.: Sei g eine Gerade, P ∈ X und P /∈ g +Beh.: ∃! Lot +Bew.: + ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g . ϕ vertauscht die beiden +Halbebenen bzgl. g . +⇒ ϕ(P ) P schneidet g in F . +Es gibt eine Geradenspiegelung ϕ an g . ϕ vertauscht die beiden Halbebenen bzgl. g +⇒ ϕ(P ) P schneidet g in F . +Sei A ∈ g \ { F } . Dann gilt ϕ (∠ AF P ) = ∠ AF ϕ(P ) = π ⇒ ∠ AF P ist rechter Winkel. +Gäbe es nun G ∈ g \ { F }, so dass P G weiteres Lot von P auf g ist, wäre P F G +ein Dreieck mit zwei rechten Innenwinkeln (vgl.Abbildung 5.4). +· + ·A + GP +F + g +Abbildung 5.4:Zwei Lote zu einer Geraden g durch einen Punkt P +NachFolgerung 4.4ist die Summe von zwei Innenwinkeln immer < π +⇒ G gibt es nicht. +Lösung zu Aufgabe10 +Sei f h und o. B. d. A. f g . +f ∦ h ⇒ f ∩ h = ∅ , sei also x ∈ f ∩ h. Mit Axiom§5folgt: Es gibt höchstens eine Parallele +zu g durch x , da x /∈ g . Diese ist f , da x ∈ f und f g . Da aber x ∈ h, kann h nicht +105 Lösungen der Übungsaufgaben +parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zug durch x (f = h). ⇒ g ∦ h +Lösung zu Aufgabe11 +Sei ( X, d, G ) eine Geometrie, die§1-§4erfüllt. Seien außerdem AB C und A + B + C +Dreiecke, für die gilt: + d(A, B ) = d (A + , B + ) +d(A, C ) = d (A + , C + ) +d(B , C ) = d(B + , C + ) +Sei ϕ die Isometrie mit ϕ( A) = A + , ϕ( B ) = B + und ϕ ( C + ) liegt in der selben Halbebene +bzgl. AB wie C . Diese Isometrie existiert wegen§4. +Es gilt d( A, C ) = d ( A + , C + ) = d( ϕ( A + ) , ϕ( C + )) = d( A, ϕ( C + )) und d( B , C ) = d( B + , C + ) = +d( ϕ(B + ) , ϕ(C + )) = d(B , ϕ(C + )). +Bem. 62 +=====⇒ C = ϕ (C ) . +Es gilt also ϕ (A + B + C + ) = AB C . +Bildquellen +Alle Bilder, die hier nicht aufgeführt sind, wurden von Martin Thoma erstellt. +Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert. +Abb.0.1a S 2 + : Tom Bombadil,tex.stackexchange.com/a/42865 +Abb.0.1bWürfel: Jan Hlavacek,tex.stackexchange.com/a/12069 +Abb.0.1e T 2 + : Jake,tex.stackexchange.com/a/70979/5645 +Abb.1.6Stereographische Pro jektion:texample.net/tikz/examples/map-pro jections +Abb.1.11Knoten von Jim.belk aus der „Blue knots“-Serie: +– Trivialer Knoten: commons.wikimedia.org/wiki/File:Blue_Unknot.png +– Kleeblattknoten: commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png +– Achterknoten: commons.wikimedia.org/wiki/File:Blue_Figure- Eight_Knot.png +– 6 +2 -Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png +Abb.1.12Reidemeister-Züge: YAMASHITA Makoto (1,2,3) +Abb.1.13 + Kleeblattknoten, 3-Färbung: Jim.belk,commons.wikimedia.org/wiki/File:Tricoloring. +png +Abb.2.1 + Doppeltorus: Oleg Alexandrov,commons.wikimedia.org/wiki/File:Double\_torus\_illustration. +png +Abb.2.8Faltungsdiagramm: Jérôme Urhausen, Email vom 11.02.2014. +Abb.3.3b3 Pfade auf Torus:Charles Staats, tex.stackexchange.com/a/149991/5645 +Abb.3.10Überlagerung von S 1 + mit R :Alex, tex.stackexchange.com/a/149706/5645 +Abb.4.7aSphärisches Dreieck:Dominique Toussaint, +commons.wikimedia.org/wiki/File:Spherical_triangle_3d_opti.png +Abb.5.1Möbiusband:Jake, tex.stackexchange.com/a/118573/5645 +Abb.5.3Krümmung des Torus:Charles Staats, tex.stackexchange.com/a/149991/5645 +Abkürzungsverzeichnis +Beh. Behauptung +Bew. Beweis +bzgl. bezüglich +bzw. beziehungsweise +ca. circa +d. h. das heißt +Def. Definition +etc. et cetera +ex. existieren +Hom. Homomorphismus +o. B. d. A. ohne Beschränkung der Allgemeinheit +Prop. Proposition +sog. sogenannte +Vor. Voraussetzung +vgl. vergleiche +z. B. zum Beispiel +zhgd. zusammenhängend +z. z. zu zeigen +Ergänzende Definitionen und Sätze +Da dieses Skript in die Geometrie und Topologie einführen soll, sollten soweit wie möglich alle +benötigten Begriffe definiert und erklärt werden. Die folgenden Begriffe wurden zwar verwendet, +aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ sowie „Lineare Algebra +und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen. +Definition 81 +Sei D ⊆ R und x + 0 ∈ R. x +0 heißt ein Häufungspunkt von D : ⇔ ∃ Folge x + n in D \ { x + 0 } +mit x + n → x + 0 . +Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra +entnommen: +Definition 82 +Es seien V und W K-Vektorräume und A( V ) und A ( W ) die zugehörigen affinen Räume. +Eine Abbildung f : V → W heißt affin, falls für alle a, b ∈ V und alle λ, µ ∈ K mit λ + µ = 1 +gilt: + f (λa + µb ) = λf ( a) + µf ( b) +Definition 83 +Sei V ein Vektorraum und S ⊆ V eine Teilmenge. +S heißt eine Orthonormalbasis von V , wenn gilt: +(i) S ist eine Basis von V +(ii) ∀v ∈ S : v = 1 +(iii) ∀v +1 , v +2 ∈ S : v + 1 = v + 2 ⇒ v + 1 , v +2 = 0 +Satz (Zwischenwertsatz) +Sei a < b und f ∈ C [ a, b] := C ([ a, b]) , weiter sei y +0 ∈ R und f ( a ) < y +0 < f ( b) oder +f ( b) < y +0 < f (a ). Dann existiert ein x +0 ∈ [a, b] mit f ( x +0 ) = y +0 . +Definition 84 +Sei V ein Vektorraum über einem Körper K und f : V → V eine lineare Abbildung. +v ∈ V \ { 0 } heißt Eigenvektor :⇔ ∃λ ∈ K : f (v ) = λv . +Wenn ein solches λ ∈ K existiert, heißt es Eigenwert von f . +Satz (Binomischer Lehrsatz) +Sei x, y ∈ R. Dann gilt: + ( x + y )n + = n + +k =0 + n +k +x n− k + y k + ∀n ∈ N + 0 +Definition 85 +Seien a, b ∈ R 3 + Vektoren. +a × b :=  +a +1 +b +3 +a +3  + + ×  + a +1 +b +3 +a +3  + + =  + a +2 b +3 − a + 3 b +2 +a +3 b +1 − a + 1 b +3 +a +1 b +2 − a + 2 b +1  + +Symb olverzeichnis +Mengenoperationen +Seien A, B und M Mengen. +A C + Komplement von A +P (M ) Potenzmenge von M +M Abschluss von M +∂ M Rand der Menge M +M ◦ + Inneres der Menge M +A × B Kreuzprodukt +A ⊆ B Teilmengenbeziehung +A B echte Teilmengenbeziehung +A \ B Differenzmenge +A ∪ B Vereinigung +A ˙ +∪ B Disjunkte Vereinigung +A ∩ B Schnitt +Geometrie +AB + Gerade durch die Punkte A und +B +AB Strecke mit Endpunkten A und B +AB C Dreieck mit Eckpunkten A, B , C +AB ∼ += C D Die Strecken AB und C D sind +isometrisch +|K | Geometrische Realisierung des +Simplizialkomplexes K +Gruppen +Sei X ein topologischer Raum und K ein Kör- +per. +Homöo (X ) Homöomorphismengruppe +Iso( X ) Isometriengruppe +GL + n (K ) Allgemeine lineare Gruppe (von +G eneral L inear Group ) +SL +n (K ) Spezielle lineare Gruppe +PSL +n ( K ) Pro jektive lineare Gruppe Perm( X ) Permutationsgruppe +Sym(X ) Symmetrische Gruppe +Wege +Sei γ : I → X ein Weg. +[γ ] Homotopieklasse von γ +γ + 1 ∗ γ + 2 Zusammenhängen von Wegen +γ + 1 ∼ γ + 2 Homotopie von Wegen +γ (x ) Inverser Weg, also γ (x ) := γ (1 − x) +C Bild eines Weges γ , also C := +γ ([0, 1]) +Weiteres +B Basis einer Topologie +B +δ ( x ) δ -Kugel um x +S Subbasis einer Topologie +T Topologie +A Atlas +P Pro jektiver Raum +·, · Skalarprodukt +X/ + ∼ X modulo ∼ +[x ] +∼ Äquivalenzklassen von x bzgl. ∼ + x Norm von x +|x | Betrag von x +a Erzeugnis von a +S n + Sphäre +T n + Torus +f ◦ g Verkettung von f und g +π +X Pro jektion auf X +f | +U f eingeschränkt auf U +f − 1 + (M ) Urbild von M +Rg(M ) Rang von M +χ (K ) Euler-Charakteristik von K +110 Symbolverzeichnis +∆ k + Standard-Simplex +X # Y Verklebung von X und Y +d +n Lineare Abbildung ausBemer- +kung 37 +A ∼ += B A ist isometrisch zu B +f +∗ Abbildung zwischen Fundamental- +gruppen (vgl.Seite 49) +111 Symbolverzeichnis +Zahlenmengen +N = { 1, 2, 3, . . . } Natürliche Zahlen +Z = N ∪ { 0, −1 , −2 , . . . } Ganze Zahlen +Q = Z ∪ + 1 +2 , 1 +3 , 2 +3 + = + z +n mit z ∈ Z und n ∈ Z \ { 0 } + Rationale Zahlen +R = Q ∪ √ + 2, − 3√ + 3 , . . . + Reele Zahlen +R + + Echt positive reele Zahlen +R n ++, 0 := { ( x +1 , . . . , x + n ) ∈ R n + | x + n ≥ 0 } Halbraum +R × + = R \ { 0 } Einheitengruppe von R +C = { a + ib | a, b ∈ R } Komplexe Zahlen +P = { 2, 3, 5, 7, . . . } Primzahlen +H = { z ∈ C | z > 0 } obere Halbebene +I = [0, 1] R Einheitsintervall +f : S 1 + → R 2 + Einbettung der Kreislinie in die Ebene +π +1 (X, x) Fundamentalgruppe im topologischen Raum X um x ∈ X +Fix(f ) Menge der Fixpunkte der Abbildung f + · + 2 2-Norm; Euklidische Norm +κ Krümmung +κ +Nor Normalenkrümmung +V (f ) Nullstellenmenge von f 2 +Krümmung +D + p F : R 2 + → R3 + Lineare Abbildung mit Jacobi-Matrix in p (sieheSeite 89) +T +s S Tangentialebene an S ⊆ R 3 + durch s ∈ S +d +s n ( x) Weingarten-Abbildung +2 + von Vanishing Set +Stichwortverzeichnis +Abbildung +affine,107 +differenzierbare,29 +homotope,50 +offene,53 +simpliziale,35 +stetige,9 +Abschluss,3 +Abstand,86 +Abstandsaxiom,65 +Achterknoten,20 +Aktion, siehe Gruppenoperation +Anordnungsaxiome,66 +Atlas,24 +Außenwinkel,70 +Axiom,64 +Axiomensystem,64 +Basis,3 +Baum,37 +Betti-Zahl,41 +Bewegungsaxiom,66 +Binormalenvektor,89 +Cantorsches Diskontinuum,22 +C k + -Struktur,29 +Decktransformation,59 +Decktransformationsgruppe,59 +Deformationsretrakt,47 +dicht,3 +Diffeomorphismus,29 +Dimension,34 +diskret,53 +Doppelverhältnis,83 +Dreibein +begleitendes,89 +Ebene +euklidische,64 +Eigenvektor,107 +Eigenwert,107 einfach zusammenhängend,49 +Einheitsnormalenfeld,90 +Euler-Charakteristik, siehe Eulerzahl +Eulersche Polyederformel,38 +Eulerzahl,36 +Färbbarkeit,21 +Faser, siehe Urbild +Fläche +orientierbare,90 +reguläre,30 +Flächenelement,95 +Formoperator, siehe Weingarten-Abbildung +Fundamentalform +erste,94 +zweite,97 +Fundamentalgruppe,47 +Gauß-Krümmung,92, 91–94 +Geometrie,64 +Gerade,64 +hyperbolische,77 +Graph,37 +Grenzwert,8 +Gruppe +allgemeine lineare,22,26 +spezielle lineare,22 +topologische,33 +Gruppe operiert durch Homöomorphismen, +61 +Gruppenaktion, siehe Gruppenoperation +Gruppenoperation,60, 60–63 +stetige,61 +Häufungspunkt,107 +Hülle +konvexe,34 +Halbebene,66 +Halbgerade,65 +Halbraum,28 +Hauptkrümmung,92 +Hilbert-Kurve,19,19 +113 Stichwortverzeichnis +Homöomorphismengruppe,10 +Homöomorphismus,9 +Homologiegruppe,41 +Homomorphismus,101 +Homotopie,44 +Homotopieklasse,47 +Inklusionsabbildung,47 +Innenwinkel,70 +Inneres,3 +Inzidenzaxiome,64 +Isometrie,6,10 +Isometriegruppe,10 +Isomorphismus,101 +Isotopie,20 +Jordankurve,19 +geschlossene,19 +Karte,24 +Kartenwechsel,28 +Kern +offener,3 +Kleeblattknoten,20 +Klumpentopologie, siehe triviale Topologie +Knoten,20, 17–21 +äquivalente,20 +trivialer,20 +Knotendiagramm,20 +kollinear,65 +kongruent, siehe isometrisch +Kongruenz, siehe Isometrie +Kongruenzsatz +SSS,104 +SWS,69 +SWW,74 +WSW,70 +Krümmung,88,89 +Kreis,37 +Kreuzprodukt,107 +Kurve,87 +Länge einer,87 +Lage +allgemeine,34 +Lehrsatz +Binomischer,107 +Lie-Gruppe,33 +liegt zwischen,65 +Liftung,54 +Limes,8 lokal,3 +Lot,86 +Lotfußpunkt,86 +Möbiusband,91 +Möbiustransformation,80 +Mannigfaltigkeit,24 +differenzierbare,29 +geschlossene,25 +glatte,29 +mit Rand,28 +Menge +abgeschlossene,2 +offene,2 +zusammenhängende,11 +Metrik,6 +diskrete,6 +hyperbolische,84 +SNCF,8 +Nebenwinkel,86 +Neilsche Parabel,27 +Normalenfeld,90 +Normalenvektor,87,89 +Normalkrümmung,91,92,98 +Oktaeder,34 +Orthonormalbasis,107 +Paraboloid +hyperbolisches,92 +Parallele,66 +Parallelenaxiom,64 +parametrisiert +durch Bogenlänge,87 +Parametrisierung +reguläre,30 +Polyzylinder,17 +Produkttopologie,4 +Pro jektion +stereographische,11 +Punkt,34 +Quotiententopologie,5,10,11 +Rand,3,28 +Raum +hausdorffscher,8 +kompakter,14 +metrischer,6 +pro jektiver,5,22,25,52 +114 Stichwortverzeichnis +topologischer,2 +zusammenhängender,11 +Realisierung +geometrische,34 +Retraktion,47 +Satz von +Gauß-Bonnet,98 +Scheitelwinkel,86 +Seite,34 +Sierpińskiraum,3,22 +Simplex,34 +Simplizialkomplex,34 +Simplizialkomplexe +flächengleiche,74 +Sphäre +exotische,29 +Standard-Simplex,34 +Standardtopologie,2 +sternförmig,48 +Stetigkeit, 9–11 +Strecke,65 +Struktur +differenzierbare,29 +Subbasis,3 +Tangentialebene,89, 89–90 +Teilraum,4 +Teilraumtopologie,4 +Teilsimplex,34 +Topologie +diskrete,2,6 +euklidische,2 +feinste,11 +triviale,2 +Zariski,2,12,15 +Torus,iii,5,38,51,93 +Total Unzusammenhängend,100 +Triangulierung,38 +Überdeckung,14 +Übergangsfunktion, siehe Kartenwechsel +Überlagerung,51, 51–60 +reguläre,59 +universelle,57 +Umgebung,3 +Umgebungsbasis,58 +vanishing set,26 +Vektorprodukt, siehe Kreuzprodukt +Verklebung,26 verträglich,29 +Würfel,34 +Weg,17 +einfacher,17 +geschlossener,17 +homotope,44 +inverser,48 +zusammengesetzter,46 +Wegzusammenhang,18 +Weingarten-Abbildung,95 +Winkel,70 +Zusammenhang, 11–14 +Zusammenhangskomponente,13 +Zwischenwertsatz,107 diff --git a/read/results/pymupdf/1601.03642.txt b/read/results/pymupdf/1601.03642.txt index ab22ff4..6aeb67d 100644 --- a/read/results/pymupdf/1601.03642.txt +++ b/read/results/pymupdf/1601.03642.txt @@ -100,9 +100,9 @@ basic building blocks is a time-intensive and difficult task. An important group of machine learning algorithms was inspired by biological neurons and are thus called artificial neural networks. Those networks are based on mathematical -functions called artificial neurons which take n ∈ N num- -bers x1, . . . , xn ∈ R as input, multiply them with weights -w1, . . . , wn ∈ R, add them and apply a so called activation +functions called artificial neurons which take n ∈N num- +bers x1, . . . , xn ∈R as input, multiply them with weights +w1, . . . , wn ∈R, add them and apply a so called activation function ϕ as visualized in Figure 1(a). One example of such an activation function is the sigmoid function ϕ(x) = 1 diff --git a/read/results/pymupdf/1602.06541.txt b/read/results/pymupdf/1602.06541.txt index f95bfd4..6d69bee 100644 --- a/read/results/pymupdf/1602.06541.txt +++ b/read/results/pymupdf/1602.06541.txt @@ -175,10 +175,10 @@ However, this can only support the explanation of particular problems or showcase special situation. For meaningful information about the overall accuracy, there are a couple of metrics how accuracy can be defined. -For this section, let k ∈ N be the number of classes, -nij ∈ N0 with i, j ∈ 1, . . . , k be the number of pixels +For this section, let k ∈N be the number of classes, +nij ∈N0 with i, j ∈1, . . . , k be the number of pixels which belong to class i and were labeled as class j. -(nij) is called a confusion matrix. Let ti = �k +(nij) is called a confusion matrix. Let ti = Pk j=1 nij be the total number of pixels of class i. One way to compare segmentation algorithms is by @@ -186,9 +186,9 @@ the pixel-wise accuracy of the predicted segmentation as done in many publications [SWRC06], [CP08], [LSD14]. This is also called per-pixel rate and de- fined as -�k +Pk i=1 nii -�k +Pk i=1 ti . Taking the pixel-wise classification accuracy has two major drawbacks: P1 Tasks like segmenting images for autonomous cars @@ -209,29 +209,29 @@ car” Three accuracy metrics which do not suffer from problem P1 are used in [LSD14]: • mean accuracy: 1 -k · �k +k · Pk i=1 nii -ti ∈ [0, 1] +ti ∈[0, 1] • mean intersection over union: 1 -k · �k +k · Pk i=1 nii -ti−nii+�k -j=1 nji ∈ [0, 1] +ti−nii+Pk +j=1 nji ∈[0, 1] • frequency weighted intersection over union: -(�k +(Pk i=1 ti) -−1 �k +−1 Pk i=1 ti · nii -ti−nii+�k -j=1 nji ∈ [0, 1] +ti−nii+Pk +j=1 nji ∈[0, 1] Another problem might be pixels which cannot be assigned to one of the known classes. For this reason, [SWRC06] makes use of a void class. This class gets @@ -473,7 +473,7 @@ an image is histogram equalization, which can be applied to improve contrast [PAA+87], [RM07]. 2) Histogram of oriented Gradients: Histogram of oriented gradients (HOG) features interpret the image -as a discrete function I : N2 → { 0, . . . , 255 } which +as a discrete function I : N2 →{ 0, . . . , 255 } which maps the position (x, y) to a color. For each pixel, there are two gradients: The partial derivative of x and y. Now the original image is transformed to two feature @@ -687,8 +687,8 @@ D. SVMs SVMs are well-studied binary classifiers which can be described by five central ideas. For those ideas, the training data is represented as (xi, yi) where xi is the -feature vector and yi ∈ { −1, 1 } the binary label for -training example i ∈ { 1, . . . , m }. +feature vector and yi ∈{ −1, 1 } the binary label for +training example i ∈{ 1, . . . , m }. 1) If data is linearly separable, it can be separated by a hyperplane. There is one hyperplane which maximizes the distance to the next datapoints @@ -698,12 +698,12 @@ w,b 1 2∥w∥2 s.t. ∀m -i=1yi · (⟨w, xi⟩ + b) -� -�� -� +i=1yi · (⟨w, xi⟩+ b) +| +{z +} sgn applied to this gives the classification -≥ 1 +≥1 2) Even if the underlying process which generates the features for the two classes is linearly separable, noise can make the data not separable. The intro- @@ -711,7 +711,7 @@ duction of slack variables to relax the requirement of linear separability solves this problem. The trade-off between accepting some errors and a more complex model is weighted by a parameter -C ∈ R+ +C ∈R+ 0 . The bigger C, the more errors are accepted. The new optimization problem is: minimize @@ -719,13 +719,13 @@ w 1 2∥w∥2 + C · m -� +X i=1 ξi s.t. ∀m -i=1yi · (⟨w, xi⟩ + b) ≥ 1 − ξi -Note that 0 ≤ ξi ≤ 1 means that the data point -is within the margin, whereas ξi ≥ 1 means it is +i=1yi · (⟨w, xi⟩+ b) ≥1 −ξi +Note that 0 ≤ξi ≤1 means that the data point +is within the margin, whereas ξi ≥1 means it is misclassified. An SVM with C > 0 is also called a soft-margin SVM. 3) The primal problem is to find the normal vector @@ -733,10 +733,10 @@ w and the bias b. The dual problem is to express w as a linear combination of the training data xi: w = m -� +X i=1 αiyixi -where yi ∈ { −1, 1 } represents the class of the +where yi ∈{ −1, 1 } represents the class of the training example and αi are Lagrange multipliers. The usage of Lagrange multipliers is explained with some examples in [Smi04]. The usage of the @@ -750,22 +750,22 @@ to [Bur98]: maximize αi m -� +X i=1 -αi − 1 +αi −1 2 m -� +X i=1 m -� +X j=1 αiαjyiyj⟨xi, xj⟩ s.t. ∀m -i=10 ≤ αi ≤ C +i=10 ≤αi ≤C s.t. m -� +X i=1 αiyi = 0 @@ -783,14 +783,14 @@ This function K is called a kernel. The idea of never explicitly transforming the vectors xi to the higher dimensional space is called the kernel trick. Common kernels include the polynomial kernel -KP (xi, xj) = (⟨xi, xj⟩ + r)p +KP (xi, xj) = (⟨xi, xj⟩+ r)p of degree p and coefficient r, the Gaussian radial basis function (RBF) kernel KGauss(xi, xj) = e −γ∥xi−xj ∥2 2σ2 and the sigmoid kernel -Ktanh(xi, xj) = tanh(γ⟨xi, xj⟩ − r) +Ktanh(xi, xj) = tanh(γ⟨xi, xj⟩−r) where the parameter γ determines how much influence single training examples have. 5) The described SVMs can only distinguish between @@ -862,14 +862,14 @@ gets labeled as shown in Figure 3. For example, a MRF which is trained on images of the size 224 px×224 pixel and gets the raw RGB values as features has 224 · 224 · 3 -� -�� -� +| +{z +} input + 224 · 224 -� -�� -� +| +{z +} output = 200 704 random variables. Those random variables are condi- @@ -891,18 +891,18 @@ typically live on 0, . . . , 255 or [0, 1]. The probability of x, y can be expressed as P(x, y) = 1 Z e−E(x,y) -where Z = � +where Z = P x,y e−E(x,y) is a normalization term called the partition function and E is called the energy function. A common choice for the energy function is E(x, y) = -� +X c∈C ψc(x, y) where ψ is called a clique potential. One choice for cliques of size two x, y = (x1, x2) is [KP06] ψc(x1, x2) = wδ(x1, x2) = -� +( +w if x1 ̸= x2 −w @@ -927,14 +927,14 @@ compared to MRFs is that no distribution assumption about x has to be made. A CRF has the partition function Z: Z(x) = -� +X y P(x, y) and joint probability distribution P(y|x) = 1 Z(x) -� +Y c∈C ψc(yc|x) The simplest way to define the clique potentials ψ is @@ -981,7 +981,7 @@ function to the weighted sum and gives an output. Those neurons can take either a feature vector as input or the output of other neurons. In this way, they build up feature hierarchies. -The parameters they learn are the weights w ∈ R. +The parameters they learn are the weights w ∈R. They are learned by gradient descent. To do so, an error function — usually cross-entropy or mean squared error — is necessary. For the gradient descent algorithm, one @@ -2274,7 +2274,7 @@ Classes Channels Data source Colon Crypt DB -(302 px − 1116 px) × (349 px − 875 px) +(302 px −1116 px) × (349 px −875 px) 389 2 3 @@ -2286,19 +2286,19 @@ DIARETDB1 3 [KKV+14] KITTI Road -(1226 px − 1242 px) × (370 px − 376 px) +(1226 px −1242 px) × (370 px −376 px) 289 2 3 [FKG13] MSRCv1 -(213 px − 320 px) × (213 px − 320 px) +(213 px −320 px) × (213 px −320 px) 240 9 3 [MSR] MSRCv2 -(213 px − 320 px) × (162 px − 320 px) +(213 px −320 px) × (162 px −320 px) 591 23 3 @@ -2310,13 +2310,13 @@ Open-CAS Endoscopic Datasets 3 [MHMK+14] PASCAL VOC 2012 -(142 px − 500 px) × ( 71 px − 500 px) +(142 px −500 px) × ( 71 px −500 px) 2913 20 3 [EVGW+12] Warwick-QU -(567 px − 775 px) × (430 px − 522 px) +(567 px −775 px) × (430 px −522 px) 165 5 3 diff --git a/read/results/pymupdf/1707.09725.txt b/read/results/pymupdf/1707.09725.txt index f8bdbce..61fb43c 100644 --- a/read/results/pymupdf/1707.09725.txt +++ b/read/results/pymupdf/1707.09725.txt @@ -429,34 +429,34 @@ This chapter introduces linear image filters in Section 2.1, then standard laye CNNs are explained in Section 2.2. The layer block pattern is described in Section 2.3, transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5. 2.1. Linear Image Filters -A linear image filter (also called a filter bank or a kernel) is an element F ∈ Rkw×kh×d, +A linear image filter (also called a filter bank or a kernel) is an element F ∈Rkw×kh×d, where kw represents the filter’s width, kh the filter’s height and d the number of input -channels. The filter F is convolved with the image I ∈ Rw×h×d to produce a new image I′. +channels. The filter F is convolved with the image I ∈Rw×h×d to produce a new image I′. The output image I′ has only one channel. Each pixel I′(x, y) of the output image gets calculated by point-wise multiplication of one filter element with one element of the original image I: I′(x, y) = -⌊ kw +⌊kw 2 ⌋ -� -ix=1−⌈ kw +X +ix=1−⌈kw 2 ⌉ -⌊ kh +⌊kh 2 ⌋ -� -iy=1−⌈ kh +X +iy=1−⌈kh 2 ⌉ d -� +X ic=1 I(x + ix, y + iy, ic) · F(ix, iy, ic) This procedure is explained by Figure 2.1. It is essentially a discrete convolution. -I ∈ R7×7 +I ∈R7×7 Filter kernel -F ∈ R3×3 +F ∈R3×3 Result of point-wise multiplication -I′ ∈ R7×7 +I′ ∈R7×7 104 116 116 @@ -591,7 +591,7 @@ high-level features which are combinations of the low-level features. Also, models should utilize the fact that the pixels of images are ordered. One way to use this is by learning image filters in so called convolutional layers. While MLPs vectorize the input, the input of a layer in a CNN are feature maps. A feature -map is a matrix m ∈ Rw×h, but typically the width equals the height (w = h). For an RGB +map is a matrix m ∈Rw×h, but typically the width equals the height (w = h). For an RGB 4 2.2. CNN Layer Types @@ -609,16 +609,16 @@ the linear convolutions are the parameters which are adapted to the training dat number n of filters as well as the filter’s size kw × kh are hyperparameters of convolutional layers. Sometimes, it is denoted as n@kw × kh. Although the filter depth is usually omitted in the notation, the filters are of dimension kw × kh × d(i−1), where d(i−1) is the number of -feature maps of the input layer (i − 1). -Another hyperparameter of convolution layers is the stride s ∈ N≥1 and the padding. +feature maps of the input layer (i −1). +Another hyperparameter of convolution layers is the stride s ∈N≥1 and the padding. Padding (usually zero-padding [SCL12, SEZ+13, HZRS15a]) is used to make sure that the size of the feature maps doesn’t change. The hyperparameters of convolutional layers are -• the number of filters n ∈ N≥1, -• kw, kh ∈ N≥1 of the filter size kw × kh × d(i−1), +• the number of filters n ∈N≥1, +• kw, kh ∈N≥1 of the filter size kw × kh × d(i−1), • the activation function of the layer (see Table B.3) and -• the stride s ∈ N≥1 -Typical choices are n ∈ { 32, 64, 128 }, kw = kh = k ∈ { 1, 3, 5, 11 } such as in [KSH12, +• the stride s ∈N≥1 +Typical choices are n ∈{ 32, 64, 128 }, kw = kh = k ∈{ 1, 3, 5, 11 } such as in [KSH12, SZ14, SLJ+15], rectified linear unit (ReLU) activation and s = 1. The concept of weight sharing is crucial for CNNs. This concept was introduced in [WHH+89]. With weight sharing, the filters can be learned with stochastic gradient descent (SGD) just @@ -631,28 +631,28 @@ if only the flattened output is compared. This is easier to see when the filtering operation is denoted formally: o(i)(x) = b + k -� +X j=1 wij · xj -with i ∈ { 1, . . . , w } × { 1, . . . , h } × { 1, . . . , d } +with i ∈{ 1, . . . , w } × { 1, . . . , h } × { 1, . . . , d } [2.1] o(x,y,z)(I) = b + -⌊ kw +⌊kw 2 ⌋ -� -ix=1−⌈ kw +X +ix=1−⌈kw 2 ⌉ -⌊ kh +⌊kh 2 ⌋ -� -iy=1−⌈ kh +X +iy=1−⌈kh 2 ⌉ d -� +X ic=1 Fz(ix, iy, ic) · I(x + ix, y + iy, ic) [2.2] -with a bias b ∈ R, x ∈ { 1, . . . , w } , y ∈ { 1, . . . , h } and z ∈ { 1, . . . , d } +with a bias b ∈R, x ∈{ 1, . . . , w } , y ∈{ 1, . . . , h } and z ∈{ 1, . . . , d } One can see that most weights of the equivalent MLP are zero and many weights are equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters. The effect of fewer parameters is that less training data is necessary to get suitable @@ -698,7 +698,7 @@ Another insight recently got important: Every fully connected layer has an equiv convolutional layer which has the same weights.2 This way, one can use the complete classification network as a very complex non-linear image filter which can be used for semantic segmentation. -A fully connected layer with d ∈ N≥1 inputs and n ∈ N≥1 nodes can be interpreted as a +A fully connected layer with d ∈N≥1 inputs and n ∈N≥1 nodes can be interpreted as a convolutional layer with an input of shape 1 × 1 × d and n filters of size 1 × 1. This will produce an output shape 1 × 1 × n. Every single output is connected to all of the inputs. When a convolutional layer is followed by a fully connected layer, it is necessary to vectorize @@ -708,8 +708,8 @@ omitted if a convolution layer without padding and a filter size equal to the f size is applied. This was used by [LSD15]. 2.2.2. Pooling Layers Pooling summarizes a p × p area of the input feature map. Just like convolutional layers, -pooling can be used with a stride of s ∈ N>1. As s ≥ 2 is the usual choice, pooling layers -are sometimes also called subsampling layers. Typically, p ∈ { 2, 3, 4, 5 } and s = 2 such as +pooling can be used with a stride of s ∈N>1. As s ≥2 is the usual choice, pooling layers +are sometimes also called subsampling layers. Typically, p ∈{ 2, 3, 4, 5 } and s = 2 such as for AlexNet [KSH12] and VGG-16 [SZ14]. The type of summary for the set of activations A varies between the functions listed in Table 2.1, spatial pyramid pooling as introduced in [HZRS14] and generalizing pooling @@ -722,26 +722,26 @@ Name Definition Used by Max pooling -max { a ∈ A } +max { a ∈A } [BPL10, KSH12] Average / mean pooling 1 |A| -� +P a∈A a LeNet-5 [LBBH98] and [KSlB+10] ℓ2 pooling -�� +pP a∈A a2 [Le13] Stochastic pooling * [ZF13] -Table 2.1.: Pooling types for a set A of activations a ∈ R. +Table 2.1.: Pooling types for a set A of activations a ∈R. (*) For stochastic pooling, each of the p×p activation values ai in the pooling region gets picked with probability pi = ai -� +P aj ∈A aj . This assumes the activations ai are non-negative. Pooling is applied for three reasons: To get local translational invariance, to get invariance against minor local changes and, most important, for data reduction to @@ -834,13 +834,13 @@ Dropout is a technique used to prevent overfitting and co-adaptations of neuron the output of any neuron to zero with probability p. It was introduced in [HSK+12] and is well-described in [SHK+14]. A Dropout layer can be implemented as follows: For an input in of any shape s, a tensor of -the same shape D ∈ { 0, 1 }s is sampled, where each element di is sampled independently +the same shape D ∈{ 0, 1 }s is sampled, where each element di is sampled independently from a Bernoulli distribution. The results are element-wise multiplied to calculate the output out of the Dropout layer: -out = D ⊙ in -with di ∼ B(1, p) -where ⊙ is the Hadamard product -(A ⊙ B)i,j := (A)i,j(B)i,j +out = D ⊙in +with di ∼B(1, p) +where ⊙is the Hadamard product +(A ⊙B)i,j := (A)i,j(B)i,j Hence every value of the input gets set to zero with a dropout probability of p. Typically, Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout prob- ability than later layers. In order to keep the expected output at the same value, the @@ -871,23 +871,23 @@ One way to approach this problem is by normalizing mini-batches as described in Batch Normalization layer with d-dimensional input x = (x(1), . . . , x(d)) is first normalized point-wise to ˆx(k) = -x(k) − ¯x(k) -� +x(k) −¯x(k) +p s′[x(k)]2 + ε with ¯x(k) = 1 m -�m +Pm i=1 x(k) i being the sample mean and s′[x(k)]2 = 1 m -�m +Pm i=1(x(k) i -− ¯x(k)) the -sample variance where m ∈ N≥1 is the number of training samples per mini-batch, ε > 0 +−¯x(k)) the +sample variance where m ∈N≥1 is the number of training samples per mini-batch, ε > 0 being a small constant to prevent division by zero and x(k) i is the activation of neuron k for @@ -899,7 +899,7 @@ In the case of fully connected layers, this is applied to the activation, before is applied. If it is applied after the activation, it harms the training in early stages. For convolution, only one γ and one β is learned per feature map. One important special case is γ(k) = -� +p s′[x(k)]2 + ε and β(k) = ¯x(k), which would make the Batch Normalization layer an identity layer. During evaluation time,3 the expected value and the variance are calculated once for the @@ -907,10 +907,10 @@ complete dataset. An unbiased estimate of the empirical variance is used. The question where Batch Normalization layers (BN) should be applied and for which reasons is still open. For Dropout, it doesn’t matter if it is applied before or after the activation function. Considering this, the possible options for the order are: -1. CONV / FC → BN → activation function → Dropout → . . . -2. CONV / FC → activation function → BN → Dropout → . . . -3. CONV / FC → activation function → Dropout → BN → . . . -4. CONV / FC → Dropout → BN → activation function → . . . +1. CONV / FC →BN →activation function →Dropout →. . . +2. CONV / FC →activation function →BN →Dropout →. . . +3. CONV / FC →activation function →Dropout →BN →. . . +4. CONV / FC →Dropout →BN →activation function →. . . The authors of [IS15] suggest to use Batch Normalization before the activation function as in Items 1 and 4. Batch Normalization after the activation lead to better results in https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md @@ -953,7 +953,7 @@ Image source: [HZRS15a] Two common ways to add more parameters to neural networks are increasing their depth by adding more layers or increasing their width by adding more neurons / filters. Inception blocks [AM15] implicitly started a new idea which was explicitly described in [XGD+16] as -“ResNeXt block”: Increasing the cardinality C ∈ N≥1. By cardinality, the authors describe +“ResNeXt block”: Increasing the cardinality C ∈N≥1. By cardinality, the authors describe the concept of having C small convolutional networks with the same topology but different weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not combine aggregation blocks with residual blocks as the authors did. @@ -976,7 +976,7 @@ the same topology, the learned weights are different. The outputs of the groups concatenated. The hyperparameters of an aggregation block are: • The topology of the group members. -• The cardinality C ∈ N≥1. Note that a cardinality of C = 1 is equivalent in every +• The cardinality C ∈N≥1. Note that a cardinality of C = 1 is equivalent in every aspect to using the group network without an aggregation block. 12 @@ -1063,31 +1063,31 @@ are not covered by the training set and thus indicate the need to collect more d 2. Convolutional Neural Networks 2.5.2. Confusion Matrices -A confusion matrix is a matrix (c)ij ∈ NK×K +A confusion matrix is a matrix (c)ij ∈NK×K ≥0 -, where K ∈ N≥2 is the number of classes, +, where K ∈N≥2 is the number of classes, which contains all correct and wrong classifications. The item cij is the number of times items of class i were classified as class j. This means the correct classification is on the -diagonal cii and all wrong classifications are of the diagonal. The sum �K +diagonal cii and all wrong classifications are of the diagonal. The sum PK i=1 -�K +PK j=1 cij is the total number of samples which were evaluated and -� +P i=1 cii -�K +PK i=1 -�K +PK j=1 cij is the accuracy. -The sums r(i) = �K +The sums r(i) = PK j=1 cij of each class i are worth being investigated as they show if the classes are skewed. If the number of samples of one class dominates the data set, then the classifier can get a high accuracy by simply always prediction the most common class. If the accuracy of the classifier is close to the a priory probability of the most common class, techniques to deal with skewed classes might help. An automatic criterion to check for this problem is -accuracy ≤ max({ r(i) | i = 1, . . . , k }) -�k +accuracy ≤max({ r(i) | i = 1, . . . , k }) +Pk i=1 r(i) + ε where ε is a small value to compensate the fact that some examples might be correct just @@ -1096,20 +1096,20 @@ Other values which should be checked are the class-wise sensitivities: s(k) = # correctly identified instances of class k # instances of class k = ckk -r(k) ∈ [0, 1] +r(k) ∈[0, 1] If s(i) is much lower than s(j), it is an indicator that more or cleaner training data is necessary for s(i). The class-wise confusion fconfusability(k1, k2) = ck1k2 -�K +PK j=1 ck1j indicates if class k1 gets often classified as class k2. The highest values here can indicate if two classes should be merged or a specialized model for separating those classes could improve the overall system. 2.5.3. Validation Curves: Accuracy, loss and other metrics Validation curves display a hyperparameter (e.g., the training epoch) on the horizontal -axis and a quality metric on the vertical axis. Accuracy, error = (1 − accuracy) or loss are +axis and a quality metric on the vertical axis. Accuracy, error = (1 −accuracy) or loss are typical quality metrics. Other quality metrics can be found in [OHIL16]. In case that the number of training epochs are used as the examined hyperparameter, validation curves give an indicator if training longer improves the model’s performance. By @@ -1138,7 +1138,7 @@ Error Training set Validation set Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs -and the quality metric is the error (1 − accuracy). The longer the network is trained, +and the quality metric is the error (1 −accuracy). The longer the network is trained, the better it gets on the training set. At some point the network is fit too well to the training data and loses its capability to generalize. At this point the quality curve of the training set and the validation set diverge. While the classifier is still improving on @@ -1153,40 +1153,40 @@ real value to a complex event like the predicted class of a feature vector. It i the objective function. For classification problems the loss function is typically cross-entropy with ℓ1 or ℓ2 regularization, as it was described in [NH92]: ECE(W) = − -� +X x∈X K -� +X k=1 [tx k log(ox -k) + (1 − tx -k) log(1 − ox +k) + (1 −tx +k) log(1 −ox k)] -� -�� -� +| +{z +} cross-entropy data loss + λ1 · ℓ1 -� �� � -� +z }| { +X w∈W |w| +λ2 · ℓ2 -� �� � -� +z }| { +X w∈W w2 -� -�� -� +| +{z +} model complexity loss -where W are the weights, X is the training data set, K ∈ N≥0 is the number of classes and +where W are the weights, X is the training data set, K ∈N≥0 is the number of classes and tx k indicates if the training example x is of class k. ox k is the output of the classification -algorithm which depends on the weights. λ1, λ2 ∈ [0, ∞) weights the regularization and is +algorithm which depends on the weights. λ1, λ2 ∈[0, ∞) weights the regularization and is typically smaller than 0.1. 17 @@ -1218,15 +1218,15 @@ Quality criteria There are several quality criteria for classification models. Most quality criteria are based the confusion matrix c which denotes at cij the number of times the real class was i and j was predicted. This means the diagonal contains the number of correct predictions. For -the following, let ti = �k +the following, let ti = Pk j=1 cij be the number of training samples for class i. The most common quality criterion is accuracy: accuracy(c) = -�k +Pk i=1 cii -�k +Pk i=1 ti -∈ [0, 1] +∈[0, 1] One problem of accuracy as a quality criterion are skewed classes. If one class is by far more common than all other classes, then the simplest way to achieve a high score is to always classify everything as the most common class. @@ -1234,11 +1234,11 @@ In order to fix this problem, one can use the mean accuracy: mean-accuracy(c) = 1 k · k -� +X i=1 cii ti -∈ [0, 1] +∈[0, 1] For two-class problems there are many other metrics like precision, recall and Fβ-score. Quality criteria for semantic segmentation are explained in [Tho16]. Besides the quality of the classification result, several other quality criteria are important @@ -1335,7 +1335,7 @@ Gradient-based approaches In [SVZ13], a gradient-based approach was used to generate image-specific class saliency maps. The authors describe the problem as a ranking problem, where each pixel of the image I0 is assigned a score Sc(I0) for a class c of interest. CNNs are non-linear functions, -but they can be approximated by the first order Taylor expansion Sc(I) ≈ wT I + b where +but they can be approximated by the first order Taylor expansion Sc(I) ≈wT I + b where w is the derivative of Sc at I0. 21 @@ -1385,7 +1385,7 @@ max (x,y)∈{−k,...,k}2\(0,0) ⟨Wi, T(Wj, x, y)⟩f ∥Wi∥2 ∥Wj∥2 -∈ [−1, 1], +∈[−1, 1], where T(·, x, y) denotes the translation of the first operand by (x, y), with zero padding at the borders to keep the shape. ⟨·, ·⟩f denotes the flattened inner product, where the two operands are flattened into column vectors before applying the standard inner product. The @@ -1398,7 +1398,7 @@ is defined as ¯ρk(W) = 1 N N -� +X i=1 N max @@ -1481,17 +1481,16 @@ connected to the output nodes. 4. Correlation Maximization: Train the weights of the candidates by maximizing S, the correlation between candidates output value V with the networks residual error: S = -� +X o∈O -������ -� + +X p∈T -� -Vp − ¯V -� -(Ep,o − ¯ +Vp −¯V + +(Ep,o −¯ Eo) -������ + where O is the set of output nodes, T is the training set, Vp is the candidate neurons activation for a training pattern p. Ep,o is the residual output error at node o for pattern p. ¯V and ¯ @@ -1510,7 +1509,7 @@ maximization whereas the white squares are trainable weights. Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, where weights are deterministic and fixed at prediction time, each weight wij in Meiosis networks follows a normal distribution: -wij ∼ N(µij, σ2 +wij ∼N(µij, σ2 ij) 28 @@ -1520,12 +1519,14 @@ ij. The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell division. A node j is splitted, when the random part dominates the value of the sampled weights: -� -�i σij +P +i σij +P i µij > 1 and -� -�k σjk +P +k σjk +P k µjk > 1 The mean of the new nodes is sampled around the old mean, half the variance is assigned @@ -1534,7 +1535,7 @@ Hence Meiosis networks only change the number of neurons per layer. They do not layers or add skip connections. 3.1.3. Automatic Structure Optimization Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of on- -line handwriting recognition. It makes use of the confusion matrix C = (cij) ∈ Nk×k +line handwriting recognition. It makes use of the confusion matrix C = (cij) ∈Nk×k ≥0 (see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix S with sij = sji = cij · cji. The maximum of S defines where the ASO algorithm adds @@ -1560,7 +1561,7 @@ of parameter k the saliency sk. The parameters with the lowest saliency are dele means they are set to 0 and are not updated anymore. A follow-up method called Optimal Brain Surgeon [HSW93] claims to choose the weights in a much better way. This requires, however, to calculate the inverse Hessian matrix -H−1 ∈ Rn×n where n ∈ N is typically n > 106. +H−1 ∈Rn×n where n ∈N is typically n > 106. A much simpler and computationally cheaper pruning criterion is the weight magnitude. [HPTD15] prunes all weights w which are below a threshold θ: w ← @@ -1568,7 +1569,7 @@ w ←   w -if w ≥ θ +if w ≥θ 0 otherwise 3.3. Genetic approaches @@ -1678,7 +1679,7 @@ can be directly with standard clustering algorithms such as k-means, DBSCAN [EKS OPTICS [ABKS99], CLARANS [NH02], DIANA [KR09], AHC (see [HPK11]) or spectral clustering as in [XZY+14]. Those clusterings, however, are hard to interpret and most of them do not allow a human to improve the found clustering manually. -The confusion matrix (c)ij ∈ Nk×k states how often class i was present and class j was +The confusion matrix (c)ij ∈Nk×k states how often class i was present and class j was 34 4.2. Clustering classes @@ -1698,12 +1699,12 @@ Hence the order of the classes is permutated in such a way that the highest erro to the diagonal. One possible objective function to be minimized is f(C) = n -� +X i=1 n -� +X j=1 -Cij · |i − j| +Cij · |i −j| [4.1] which punishes errors linearly with the distance to the diagonal. This method is called CMO in the following. @@ -1736,7 +1737,7 @@ confusions are not made and thus many elements of the confusion matrix are close Those will be moved to the corners of the confusion matrix by optimizing Equation (4.1). Once a permutation of the classes is found which has a low score Equation (4.1), the clusters can either be made by hand by deciding why classes should not be in one clusters. With -such a permutation, only n − 1 binary decisions have to be made and hence only the list of +such a permutation, only n −1 binary decisions have to be made and hence only the list of classes has to be read. Alternatively, one can calculate the confusions C′ i,i+1 + C′ i+1,i for @@ -1772,7 +1773,7 @@ are grouped to 20 superclasses. It includes animals, people, plants, outdoor sce and other items. CIFAR-100 is not a superset of CIFAR-10, as CIFAR-100 does not contain the class airplane. The state of the art achieves an accuracy of 82.82 % [HLW16]. GTSRB (German Traffic Sign Recognition Benchmark) is a 43-class dataset of traffic signs. -The 51 839 images are in color and of a minimum size of 25 px× 25 px up to 266 px× 232 px. +The 51 839 images are in color and of a minimum size of 25 px×25 px up to 266 px×232 px. The state of the art achieves 99.46 % accuracy with an ensemble of 25 CNNs [SL11]. According to [SSSI], human performance is at 98.84 %. HASYv2 (Handwritten Symbols version 2) is a 369 class dataset of black-and-white images @@ -1813,7 +1814,7 @@ Early stopping [Pre98] with the validation accuracy as a stopping criterion and patience of 10 epochs. Kernel weights are initialized according to the uniform initialization scheme of He [HZRS15b] (see Appendix B.3). The architecture of the baseline model uses a pattern of -Conv-Block(n) = (Convolution − Batch Normalization − Activation)n − Pooling +Conv-Block(n) = (Convolution −Batch Normalization −Activation)n −Pooling The activation function is the Exponential Linear Unit (ELU) (see Table B.3), except for the last layer where softmax is used. Before the last two convolutional layer, a dropout layer with dropout probability 0.5 is applied. The architecture is given in detail in Table 5.1. @@ -1969,7 +1970,7 @@ BN + Softmax k @ 1 × 1 -� +P 515k +892 512 1032k @@ -2008,7 +2009,7 @@ Dropout, p = 0.5 C k@1 × 1/1 Global AVG pooling BN + Softmax -Figure 5.1.: Architecture of the baseline model. C 32@3× 3/1 is a convolutional layer with 32 filters +Figure 5.1.: Architecture of the baseline model. C 32@3×3/1 is a convolutional layer with 32 filters of kernel size 3 × 3 with stride 1. 39 @@ -2164,7 +2165,7 @@ The distribution of filter weights by layer is visualized in Figure 5.2 and the of bias weights by layer is shown in Figure 5.3. Although both figures only show the distribution for one specific model trained on CIFAR-100, the following observed patterns are consistent for 70 models (7 datasets and 10 models per dataset): -• The empiric [0.5 − percentile, 99.5 − percentile] interval which contains 99 % of the +• The empiric [0.5 −percentile, 99.5 −percentile] interval which contains 99 % of the filter weights is almost symmetric around zero. The same is true for the bias weights. • The farther a layer is from the input away, the smaller the 99-percentile interval is, except for the last layer (see Table A.1). @@ -2215,7 +2216,7 @@ trained on CIFAR-100. 5. Experimental Evaluation Figure 5.6.: The distribution of the range of values (max - min) of filters by channel and layer. For each filter, the range of values is recorded by channel. The smaller this range is, the -less information is lost if a n × n filter is replaced by a (n − 1) × (n − 1) filter. +less information is lost if a n × n filter is replaced by a (n −1) × (n −1) filter. 44 5.1. Baseline Model and Training setup @@ -2354,7 +2355,7 @@ viewed in electronic form. CIFAR-100 has pre-defined coarse classes. Those are used as a ground truth for the clusters which are to be found. The number of errors is determined by (i) Join all n clusters which contain the classes of the coarse class C to a set M. The error is n. (ii) Within M, find the -set of classes M− which do not belong to C. (iii) The final error is n + |M−|. As can be +set of classes M−which do not belong to C. (iii) The final error is n + |M−|. As can be seen in Table 5.4, both clustering methods find reasonable clusters. CMO, however, has only half the error of spectral clustering. The results for the HASYv2 dataset are qualitatively similar (see Table 5.5). It should be @@ -2424,7 +2425,7 @@ B, B B, B 0 C -C, c, ⊂ and C , ξ, E and C +C, c, ⊂and C , ξ, E and C 4 C, c, ⊂, C and C 1 @@ -2454,9 +2455,9 @@ K, κ K, κ 0 L -L, ⌊ and L, L +L, ⌊and L, L 1 -L, ⌊ and L, L +L, ⌊and L, L 1 M M and M and M @@ -2469,14 +2470,14 @@ N and N, N and N N and N, N and N, ℵ 3 O -O, O, 0, ◦, °, � and o +O, O, 0, ◦, °, # and o 1 -O, O, 0, ◦, ° and � and o +O, O, 0, ◦, ° and # and o 2 P P, P and p, ρ and P and ℘ 3 -P and P, P, ℘ and p, ρ +P and P, P, ℘and p, ρ 2 Q Q, Q, Q, ι, ⊔, ≳, ℓ, ℑ, Æ, 1 @@ -2494,12 +2495,12 @@ S, s, S S, s, S 0 T -T, ⊤ and T , τ +T, ⊤and T , τ 1 -T, ⊤ and T , τ +T, ⊤and T , τ 1 U -U, ∪ and u, U, A +U, ∪and u, U, A 1 U, u, U, A and ∪ 2 @@ -2783,11 +2784,10 @@ feature maps of layer i where i = 0 is the input layer and all filters are 3 × a bias, then the number of parameters is Parameters = k -� +X i=1 -� -(ni−1 · 32 + 1) · ni -� +(ni−1 · 32 + 1) · ni + Hence the width of one layer does not only influence the parameters in this layer, but also in the next layer. The number of possible subsequent layers of one feature map size is enormous, even if @@ -2890,7 +2890,7 @@ Batch Normalization. 5.9. Batch size 5.9. Batch size -The mini-batch size m ∈ N≥1 influences +The mini-batch size m ∈N≥1 influences • Epochs until convergence: The smaller m, the more often the model is updated in one epoch. Those updates, however, are based on fewer samples of the dataset. Hence the gradients of different mini-batches can noticeably differ. In the literature, @@ -3033,28 +3033,28 @@ As expected, PReLU and ELU performed best. Unexpected was that the logistic func tanh and softplus performed worse than the identity and it is unclear why the pure-softmax network performed so much better than the logistic function. One hypothesis why the logistic function performs so bad is that it cannot produce negative outputs. Hence the -logistic− function was developed: +logistic−function was developed: logistic−(x) = 1 -1 + e−x − 0.5 -The logistic− function has the same derivative as the logistic function and hence still suffers -from the vanishing gradient problem. The network with the logistic− function achieves an +1 + e−x −0.5 +The logistic−function has the same derivative as the logistic function and hence still suffers +from the vanishing gradient problem. The network with the logistic−function achieves an accuracy which is 11.30 % better than the network with the logistic function, but is still 5.54 % worse than the ELU. Similarly, ReLU was adjusted to have a negative output: -ReLU−(x) = max(−1, x) = ReLU(x + 1) − 1 -The results of ReLU− are much worse on the training set, but perform similar on the test +ReLU−(x) = max(−1, x) = ReLU(x + 1) −1 +The results of ReLU−are much worse on the training set, but perform similar on the test 61 5. Experimental Evaluation set. The result indicates that the possibility of hard zero and thus a sparse representation is either not important or similar important as the possibility to produce negative outputs. This contradicts [GBB11, SMGS14]. -A key difference between the logistic− function and ELU is that ELU does neither suffers +A key difference between the logistic−function and ELU is that ELU does neither suffers from the vanishing gradient problem nor is its range of values bound. For this reason, the S2ReLU activation function, defined as S2ReLU(x) = ReLU(x -2 + 1) − ReLU(−x +2 + 1) −ReLU(−x 2 + 1) =   @@ -3065,11 +3065,11 @@ S2ReLU(x) = ReLU(x    -− x +−x 2 + 1 -if x ≤ −2 +if x ≤−2 x -if − 2 ≤ x ≤ 2 +if −2 ≤x ≤2 x 2 + 1 if x > −2 @@ -3428,8 +3428,8 @@ why they improve the test accuracy is by reducing the variance. The idea of label smoothing is to use the ensemble prediction of the training data as labels for another classifier. For every element x of the training set, the one-hot encoded target t(x) is smoothed by the ensemble prediction yE(x) -t′(x) = α · t(x) + (1 − α)yE(x) -where α ∈ [0, 1] is the smoothing factor. +t′(x) = α · t(x) + (1 −α)yE(x) +where α ∈[0, 1] is the smoothing factor. There are three reasons why label smoothing could be beneficial: • Training speed: The ensemble prediction contains more information about the image than binary class decisions. Classifiers in computer vision predict how similar @@ -3616,7 +3616,7 @@ BN + Softmax k @ 1 × 1 -� +P 514k +947 654 520k @@ -4087,35 +4087,35 @@ trained on CIFAR-100. Figure A.2.: The distribution of bias weights of a model without batch normalization trained on CIFAR-100. Algorithm 1 Simulated Annealing for minimizing Equation (4.1). -Require: C ∈ Nn×n, steps ∈ N, T ∈ R+, c ∈ (0, 1) +Require: C ∈Nn×n, steps ∈N, T ∈R+, c ∈(0, 1) procedure SimulatedAnnealing(C, steps, T, c) -bestScore ← accuracy(C) -bestC ← C -for i = 0; i < steps; i ← i + 1 do -p ← randomFloat(0, 1) +bestScore ←accuracy(C) +bestC ←C +for i = 0; i < steps; i ←i + 1 do +p ←randomFloat(0, 1) if p < 0.5 then -▷ Swap rows -i ← randomInteger(1, . . . , n) -j ← randomInteger(1, . . . , n) \ { i } -p ← randomUniform(0, 1) -C′ ← swap(C, i, j) -s ← accuracy(C′) -if p < exp( s−bestScore +▷Swap rows +i ←randomInteger(1, . . . , n) +j ←randomInteger(1, . . . , n) \ { i } +p ←randomUniform(0, 1) +C′ ←swap(C, i, j) +s ←accuracy(C′) +if p < exp(s−bestScore T ) then -C ← C′ +C ←C′ if s > bestScore then -bestScore ← s -bestC ← C -T ← T · c +bestScore ←s +bestC ←C +T ←T · c else -▷ Move Block -s ← randomInteger(1, . . . , n) -▷ Block start -e ← randomInteger(s, . . . , n) -▷ Block end -i ← randomInteger(1, . . . , n − (e − s)) -▷ Block insert position +▷Move Block +s ←randomInteger(1, . . . , n) +▷Block start +e ←randomInteger(s, . . . , n) +▷Block end +i ←randomInteger(1, . . . , n −(e −s)) +▷Block insert position Move Block (s, . . . , e) to position i return bestM 76 @@ -4364,10 +4364,10 @@ Vertical flip 2 [DWD15]1 Rotation -∼ 40 (δ = 20) +∼40 (δ = 20) [DSRB14] Scaling -∼ 14 (δ ∈ [0.7, 1.4]) +∼14 (δ ∈[0.7, 1.4]) [DSRB14] Crops 322 = 1024 @@ -4377,16 +4377,16 @@ Shearing GANs [BCW+17] Brightness -∼ 20 (δ ∈ [0.5, 1.5]) +∼20 (δ ∈[0.5, 1.5]) [How13] Hue 51 (δ = 0.1) [MRM15, DSRB14] Saturation -∼ 20 (δ = 0.5) +∼20 (δ = 0.5) [DSRB14] Contrast -∼ 20 (δ ∈ [0.5, 1.5]) +∼20 (δ ∈[0.5, 1.5]) [How13] Channel shift [KSH12] @@ -4402,13 +4402,13 @@ Less common, but also reasonable are: • Color casting (used by [WYS+15]) • Vignetting (used by [WYS+15]) • Lens distortion (used by [WYS+15]) -1Vertical flipping combined with 180◦ rotation is equivalent to horizontal flipping +1Vertical flipping combined with 180◦rotation is equivalent to horizontal flipping 80 B.3. Initialization Weight initializations are usually chosen to be small and centered around zero. One way to characterize many initialization schemes is by -w ∼ α · U[−1, 1] + β · N(0, 1) + γ with α, β, γ ≥ 0 +w ∼α · U[−1, 1] + β · N(0, 1) + γ with α, β, γ ≥0 Table B.2 shows six commonly used weight initialization schemes. Several schemes use the same idea, that unit-variance is desired for each layer as the training converges faster [IS15]. Name @@ -4419,11 +4419,11 @@ Reference Constant α = 0 β = 0 -γ ≥ 0 +γ ≥0 used by [ZF14] Xavier/Glorot uniform α = -� +q 6 nin+nout β = 0 @@ -4432,10 +4432,10 @@ nin+nout Xavier/Glorot normal α = 0 β = -� + 2 (nin+nout) -�2 +2 γ = 0 [GB10] He @@ -4455,7 +4455,7 @@ LSUV — γ = 0 [MM15] -Table B.2.: Weight initialization schemes of the form w ∼ α · U[−1, 1] + β · N(0, 1) + γ. +Table B.2.: Weight initialization schemes of the form w ∼α · U[−1, 1] + β · N(0, 1) + γ. nin, nout are the number of units in the previous layer and the next layer. Typically, biases are initialized with constant 0 and weights by one of the other schemes to prevent unit-coadaptation. However, dropout makes it possible to use constant initialization for @@ -4464,46 +4464,46 @@ LSUV and Orthogonal initialization cannot be described with this simple pattern. B.4. Objective function For classification tasks, the cross-entropy ECE(W) = − -� +X x∈X K -� +X k=1 [tx k log(ox -k) + (1 − tx -k) log(1 − ox +k) + (1 −tx +k) log(1 −ox k)] is by far the most commonly used objective function (e.g., used by [ZF14]). In this equation, X is the set of training examples, K is the number of classes, tx -k ∈ { 0, 1 } indicates if the +k ∈{ 0, 1 } indicates if the training example x is of class k, ox k is the output of the classifier for the training example x and class k. -However, regularization terms weighted with a constant λ ∈ (0, +∞) are sometimes added: +However, regularization terms weighted with a constant λ ∈(0, +∞) are sometimes added: • LASSO: ℓ1 (e.g., used in [HPTD15]) • Weight decay: ℓ2 (e.g., λ = 0.0005 as in [MSM16]) -• Orthogonality regularization (|(W T · W − I)|, see [VTKP17]) +• Orthogonality regularization (|(W T · W −I)|, see [VTKP17]) 81 B.5. Optimization Techniques Most relevant optimization techniques for CNNs are based on SGD, which updates the weights according to the rule -wji ← wji + ∆wji with ∆wji = −η ∂Ex +wji ←wji + ∆wji with ∆wji = −η ∂Ex ∂wji -where η ∈ (0, 1), typically 0.01 (e.g., [MSM16]), is called the learning rate. +where η ∈(0, 1), typically 0.01 (e.g., [MSM16]), is called the learning rate. A slight variation of SGD is mini-batch gradient descent with the mini-batch B (typically -mini-batch sizes are |B| ∈ { 32, 64, 128, 256, 512 }, e.g. [ZF14]). Larger mini-batch sizes +mini-batch sizes are |B| ∈{ 32, 64, 128, 256, 512 }, e.g. [ZF14]). Larger mini-batch sizes lead to sharp minima and thus poor generalization [KMN+16]. Smaller mini-batch sizes lead to longer training times due to computational overhead and to more training steps due to gradient noise. -wji ← wji + ∆wji with ∆wji = −η∂EB +wji ←wji + ∆wji with ∆wji = −η∂EB ∂wji Nine variations which adjust the learning rate during training are: • Momentum: w(t+1) ji -← w(t) +←w(t) ji + ∆w(t+1) ji with ∆w(t+1) @@ -4512,17 +4512,17 @@ ji ∂wji + α∆w(t) ji -with α ∈ [0, 1], typically 0.9 (e.g., [ZF14, MSM16]) +with α ∈[0, 1], typically 0.9 (e.g., [ZF14, MSM16]) • Adagrad [DHS11] • RProp and the mini-batch version RMSProp [TH12] • Adadelta [Zei12] -• Power Scheduling [Xu11]: η(t) = η(0)(1 + a · t)−c, where t ∈ N0 is the training step, +• Power Scheduling [Xu11]: η(t) = η(0)(1 + a · t)−c, where t ∈N0 is the training step, a, c are constants. • Performance Scheduling [SHY+13]: Measure the error on the cross validation set and decrease the learning rate when the algorithms improvement is below a threshold. -• Exponential Decay Learning Rate [SHY+13]: η(t) = η(0) · 10− t -k where t ∈ N0 is the -training step, η(0) is the initial learning rate, k ∈ N≥1 is the number of training steps +• Exponential Decay Learning Rate [SHY+13]: η(t) = η(0) · 10−t +k where t ∈N0 is the +training step, η(0) is the initial learning rate, k ∈N≥1 is the number of training steps until the learning rate is decreased by 1 10th. @@ -4566,7 +4566,7 @@ Sign function†   +1 -if x ≥ 0 +if x ≥0 −1 if x < 0 { −1, 1 } @@ -4632,8 +4632,8 @@ ELU  x if x > 0 -α(ex − 1) -if x ≤ 0 +α(ex −1) +if x ≤0 (−∞, +∞)   @@ -4646,13 +4646,13 @@ otherwise Softmax‡ o(x)j = exj -�K +PK k=1 exk [0, 1]K o(x)j · -�K +PK k=1 exk−exj -�K +PK k=1 exk [KSH12, Tho14a] Maxout‡ @@ -4668,7 +4668,7 @@ otherwise [GWFM+13] Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0 and functions marked with ‡ operate on all elements of a layer simultaneously. The -hyperparameters α ∈ (0, 1) of Leaky ReLU and ELU are typically α = 0.01. Other +hyperparameters α ∈(0, 1) of Leaky ReLU and ELU are typically α = 0.01. Other activation function like randomized leaky ReLUs exist [XWCL15], but are far less commonly used. Some functions are smoothed versions of others, like the logistic function for the @@ -4700,7 +4700,7 @@ y ϕ2(x) = tanh(x) ϕ3(x) = max(0, x) ϕ4(x) = log(ex + 1) -ϕ5(x) = max(x, ex − 1) +ϕ5(x) = max(x, ex −1) Figure B.1.: Activation functions plotted in [−2, +2]. tanh and ELU are able to produce negative numbers. The image of ELU, ReLU and Softplus is not bound on the positive side, whereas tanh and the logistic function are always below 1. @@ -4729,8 +4729,8 @@ has ki · ki−1(n · m + 1) parameters. The +1 is due to the bias. • A fully connected layer with n nodes after k feature maps of size m1 × m2 has n · (k · m1 · m2 + 1) parameters. • A dense block with a depth of L, a growth rate of n and 3 × 3 filters has L + n · 32 + -32 · n2 �L -i=0(L − i) = L + 9n + 9n2 L2−L +32 · n2 PL +i=0(L −i) = L + 9n + 9n2 L2−L 2 parameters. According to [HPTD15], AlexNet has 60 million parameters which is roughly the number @@ -4741,13 +4741,13 @@ the following number are only giving rough estimates. In the following, nϕ denotes the number of FLOPs to compute the non-linearity ϕ. For simplicity, nϕ = 5 was chosen. • A fully connected layer with n nodes and k inputs has to calculate ϕ(W · x + b) with -W ∈ Rn×k, x ∈ Rk×1, b ∈ Rn×1. It hence needs about n · (k + (k − 1) + 1) = 2nk +W ∈Rn×k, x ∈Rk×1, b ∈Rn×1. It hence needs about n · (k + (k −1) + 1) = 2nk additions / multiplications before the non-linearity ϕ is calculated. The total number of FLOPs is 2 · n · k + n · nϕ. • In the following, biases are ignored. A convolutional layer with ki filters of size n × m being applied to ki−1 filter maps of size w × h results in ki filter maps of size w × h if padding is applied. For each element of each filter map, n·m·ki−1 multiplications and -(n · m · ki−1 − 1) additions have to be made. This results in (2nmki−1 − 1) · (ki · w · h) +(n · m · ki−1 −1) additions have to be made. This results in (2nmki−1 −1) · (ki · w · h) operations. The total number of FLOPs is (2·n·m·ki−1 −1)·(ki ·w ·h)+ki ·w ·h·nϕ. This is, of course, a naive way of calculating a convolution. There are other ways of calculating convolutions [LG16]. @@ -4864,7 +4864,7 @@ Fully Connected (output) 850 1 730 10 -� +P 61 710 15 144 446 9118 @@ -4879,7 +4879,7 @@ The first CNN which achieved major improvements on the ImageNet dataset was Ale Its architecture is shown in Figure D.2 and described in Table D.2. It has about 60·106 param- eters. A trained AlexNet can be downloaded at www.cs.toronto.edu/˜guerzhoy/tf_alexnet. Note that the uncompressed size is at least 60 965 224 floats · 32 bit -float ≈ 244 MB. +float ≈244 MB. Figure D.2.: Architecture of AlexNet as shown in [KSH12]: Convolutional Layers are followed by pooling layers multiple times. At the end, a fully connected network is applied. Conceptually, it is identical to the architecture of LeNet-5 (see Figure D.1). @@ -4976,7 +4976,7 @@ FC 4 097 000 8 M 1000 -� +P 60 965 224 3300 M 1 122 568 @@ -5193,7 +5193,7 @@ FC 4 097 000 8 M 1000 -� +P 138 357 544 31 000 M 15 245 800 @@ -5287,7 +5287,7 @@ Dropout (p=0.8) Softmax 1 537 000 1000 -� +P 42 679 816 Table D.4.: Inception-v4 network. 95 @@ -5348,43 +5348,43 @@ STL-10 3 [CLN11, CLN10] Caltech-101 -(80 px − 3481 px) -×(92 px − 3999 px) +(80 px −3481 px) +×(92 px −3999 px) 9144 102 3 [FFP03, FFFP06] Caltech-256 -(75 px − 7913 px) -×(75 px − 7913 px) +(75 px −7913 px) +×(75 px −7913 px) 30 607 257 3 [Gri06, GG07] ILSVRC 20121 -(8 px − 9331 px) -×(10 px − 6530 px) +(8 px −9331 px) +×(10 px −6530 px) 1.2 · 106 1000 3 [Ima12, RDS+14] Places3652 -(290px − 3158px) -×(225px − 2630px) +(290px −3158px) +×(225px −2630px) 1.8 · 106 365 3 [Zho16, ZKL+16] GTSRB -(25 px − 266 px) -×(25 px − 232 px) +(25 px −266 px) +×(25 px −232 px) 51 839 43 3 [SSSI, SSSI12] Asirra3 -(4 px − 500 px) -×(4 px − 500 px) +(4 px −500 px) +×(4 px −500 px) 25 000 2 3 @@ -5475,15 +5475,15 @@ Table E.2.: An overview over state of the art results achieved in computer visio Algorithm 2 Create a classification dataset from a semantic segmentation dataset Require: Semantic segmentation dataset (DS) procedure CreateDataset(Annotated dataset DS) -DC ← List -w ← desired image width -h ← desired image height +DC ←List +w ←desired image width +h ←desired image height for Image and associated label (x, y) in DS do -i ← randint(0, L.width − w) -j ← randint(0, L.height − h) -cL ← crop(y, (i, j), (i + w, j + h)) +i ←randint(0, L.width −w) +j ←randint(0, L.height −h) +cL ←crop(y, (i, j), (i + w, j + h)) if at least 50% of s are of one class then -cI ← crop(x, (i, j), (i + w, j + h)) +cI ←crop(x, (i, j), (i + w, j + h)) D.append((cI, cL)) return (DC) 98 @@ -5666,7 +5666,8 @@ Baseline Weight updates (maximum) . . . . . . . . . . . . . . . . . . . . . . 55 5.16 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 67 -A.1 Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +A.1 Image Filters +. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 75 A.2 Bias weight distribution without BN . . . . . . . . . . . . . . . . . . . . . . 76 diff --git a/read/results/pymupdf/2201.00021.txt b/read/results/pymupdf/2201.00021.txt index 3cb4a10..50e59a7 100644 --- a/read/results/pymupdf/2201.00021.txt +++ b/read/results/pymupdf/2201.00021.txt @@ -49,7 +49,7 @@ quent observations have led to the detection of new metastable ammonia masers, including 15NH3 (3,3) (Mauersberger et al. 1986), NH3 (1,1) (Gaume et al. 1996), NH3 (2,2) (Mills et al. 2018), NH3 (5,5) (Cesaroni et al. 1992), NH3 (6,6) (Beuther -⋆ Member of the International Max Planck Research School (IM- +⋆Member of the International Max Planck Research School (IM- PRS) for Astronomy and Astrophysics at the universities of Bonn and Cologne. et al. 2007), NH3 (7,7), NH3 (9,9), and NH3 (12,12) (Henkel @@ -133,7 +133,7 @@ January and 2021 February, July, and August. The S14mm dou- ble beam secondary focus receiver was employed. The full width at half maximum (FWHM) beam size is 49′′ at 18.5 GHz, the frequency of the target line. The observations were performed in -position switching mode, and the off position was 10′ in azimuth +position switching mode, and the offposition was 10′ in azimuth 1 Based on observations with the 100-meter telescope of the MPIfR (Max-Planck-Institut für Radioastronomie) at Effelsberg. away from the source. For observations made before 2021 Au- @@ -151,7 +151,7 @@ a main-beam brightness temperature, TMB, scale. This flux den- sity was calibrated assuming a TMB/S ratio of 1.95 K/Jy, derived from continuum cross scans of NGC 7027 (the flux density was adopted from Ott et al. 1994). Calibration uncertainties are esti- -mated to be ∼ 10%. +mated to be ∼10%. We used the GILDAS/CLASS2 package (Pety 2005) to re- duce the spectral line data. A first-order polynomial was sub- tracted from each spectrum for baseline removal. @@ -306,11 +306,11 @@ sented in Fig. 2. Three different locations showing NH3 (9,6) emission are found toward G34.26+0.15 (Fig. 4). The deconvolved NH3 (9,6) component sizes are (1′′.42±0′′.43)×(0′′.54±0′′.62) at P.A. = 97◦ -(M1), (0′′.42 ± 0′′.27) × (0′′.15 ± 0′′.27) at P.A. = 150◦ (M2), and +(M1), (0′′.42 ± 0′′.27) × (0′′.15 ± 0′′.27) at P.A. = 150◦(M2), and Article number, page 4 of 10 Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions -(1′′.17 ± 0′′.34) × (0′′.27 ± 0′′.46) at P.A. = 53◦ (M3) and are thus +(1′′.17 ± 0′′.34) × (0′′.27 ± 0′′.46) at P.A. = 53◦(M3) and are thus comparable to or smaller than the beam size. Overall, the NH3 (9,6) line from G34.26+0.15 weakened during the time interval from 2020 January to 2021 August by @@ -380,7 +380,7 @@ Finally, the non-detections of the (8,5) and (10,7) lines also indicate that the (9,6) line is special. This allows us to derive lower 3σ limits of the (9,6)/(8,5) and (9,6)/(10,7) line intensity ratios. The (9,6) line arises from ortho-NH3 (K = 3n), whereas -the NH3 (8,5) and (10,7) lines are para-NH3 (K � 3n) lines. +the NH3 (8,5) and (10,7) lines are para-NH3 (K , 3n) lines. The minimum ortho-to-para ratios are in the range 12–42 and 1– 8 toward Cep A and G34.26+0.15, respectively. The statistical weights for the ortho states are twice as large as those for the @@ -394,18 +394,18 @@ the case of G34.26+0.15 is likely similar. published (quasi-)thermal NH3 emission The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines show thermal emission toward Cep A over a velocity range of -−13 km s−1 ≤ VLSR ≤ −4 km s−1 (Brown et al. 1981; Güsten +−13 km s−1 ≤VLSR ≤−4 km s−1 (Brown et al. 1981; Güsten et al. 1984; Torrelles et al. 1985, 1986, 1993, 1999). An average NH3 column density of ∼5×1015 cm−2 was estimated for a region of 3′′ around HW2 (Torrelles et al. 1999). This high NH3 abun- dance could provide a suitable environment for maser species. -Large line widths (∆V1/2 ≃7.0 km s−1) with VLSR ∼ −10 km s−1 +Large line widths (∆V1/2 ≃7.0 km s−1) with VLSR ∼−10 km s−1 in both (1,1) and (2,2) lines were found toward HW2 (Torrelles et al. 1993). The velocity is similar to the cloud’s systemic lo- cal standard of rest (LSR) velocity of −11.2 km s−1, which is based on CO (Narayanan & Walker 1996) and HCO+ ob- servations (Gómez et al. 1999). Our (9,6) maser is redshifted -(−0.9 km s−1 ≤ VLSR +(−0.9 km s−1 ≤VLSR ≤2.9 km s−1) and shares positions with the outflowing gas seen in CO and HCO+ with similarly red- shifted velocities. Therefore, we argue that the (9,6) masers are @@ -426,19 +426,19 @@ While these lines were measured with a beam size of about lines with the kinetic temperature reveals the size of the hot, ammonia-emitting core to be only ∼2.5′′. All those measured NH3 lines were quasi-thermal and had LSR velocities of -∼ 58.5 km s−1, close to the systemic velocity of ∼ 58.1 km s−1 +∼58.5 km s−1, close to the systemic velocity of ∼58.1 km s−1 obtained from C17O observations (Wyrowski et al. 2012). Their line widths (∆V1/2 ≥3.6 km s−1) are larger than what -we find (0.35 km s−1 ≤ ∆V1/2 ≤ 0.94 km s−1) for each (9,6) +we find (0.35 km s−1 ≤∆V1/2 ≤0.94 km s−1) for each (9,6) maser component (see details in Table A.3). In all, we may have observed four different (9,6) velocity features. Three are blueshifted at VLSR -∼ 53.8 km s−1, 55.8 km s−1, and +∼53.8 km s−1, 55.8 km s−1, and 56.8 km s−1, and a fourth, tentatively detected, at 62.5 km s−1. This tentative redshifted feature was only potentially detected with Effelsberg in 2020 January. The velocity is similar to that of the JVLA measurements on the NH3 (1,1) absorption line -against continuum source C (∼ 7′′ resolution; Keto et al. 1987) +against continuum source C (∼7′′ resolution; Keto et al. 1987) Article number, page 5 of 10 A&A proofs: manuscript no. mainArxiv @@ -463,9 +463,9 @@ maser flux is associated with the compact H ii region HW3d. OH maser features close to the H ii regions are also seen in HW2 (e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These three kinds of masers in Cep A have a large velocity range of -−25 km s−1 ≤ VLSR ≤ −2 km s−1 and are widespread around +−25 km s−1 ≤VLSR ≤−2 km s−1 and are widespread around HW2 and HW3, while NH3 (9,6) emission is only detected at -−0.9 km s−1 ≤ VLSR +−0.9 km s−1 ≤VLSR ≤2.9 km s−1 toward a sub-arcsecond- sized region to the west of the peak continuum position of HW2 (see Fig. 3). This suggests that the NH3 (9,6) maser in Cep A @@ -475,7 +475,7 @@ In G34.26+0.15, OH (Zheng et al. 2000), H2O (Imai et al. 2011), and CH3OH (Bartkiewicz et al. 2016) masers have been detected east of source C (Fig. 4), and none of them coincides with the head of C. The NH3 (9,6) maser M1 is also found -slightly off the head of source C. This could suggest that M1 +slightly offthe head of source C. This could suggest that M1 is powered by continuum source C or by an outflow. Near com- ponent B, there are some OH and CH3OH masers but no H2O or NH3 masers. A group of H2O masers, well-known tracers @@ -577,7 +577,7 @@ Acknowledgements. We would like to thank the anonymous referee for the use- ful comments that improve the manuscript. Y.T.Y. is a member of the Interna- tional Max Planck Research School (IMPRS) for Astronomy and Astrophysics at the Universities of Bonn and Cologne. Y.T.Y. would like to thank the China -Scholarship Council (CSC) for its support. We would like to thank the staff at +Scholarship Council (CSC) for its support. We would like to thank the staffat the Effelsberg for their help provided during the observations. We thank the staff of the JVLA, especially Tony Perreault and Edward Starr, for their assistance with the observations and data reduction. This research has made use of the @@ -710,7 +710,7 @@ Epoch Channel S ν rms -� +R S νdv VLSR ∆V1/2 diff --git a/read/results/pymupdf/2201.00022.txt b/read/results/pymupdf/2201.00022.txt index ce51fa4..1a33d74 100644 --- a/read/results/pymupdf/2201.00022.txt +++ b/read/results/pymupdf/2201.00022.txt @@ -7,7 +7,7 @@ Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3 3Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel ABSTRACT Most stellar evolution models predict that black holes (BHs) should not exist above approximately -50 − 70 M⊙, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections +50 −70 M⊙, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions @@ -34,10 +34,10 @@ source GW190521 (The LIGO Scientific Collaboration et al. 2020a,b) produced an intermediate mass black hole of approximately 142 M⊙. This event may have also had a -85 M⊙ progenitor, which falls within the pair-instability +85 M⊙progenitor, which falls within the pair-instability mass gap that limits stellar black holes (BHs) to no more than ∼ -< 50 M⊙ (e.g., Heger et al. 2003; Woosley +< 50 M⊙(e.g., Heger et al. 2003; Woosley 2017)1. Similarly, the merger products of GW150914, GW170104, and GW170814 fall within the mass gap (e.g., Abbott et al. 2016, 2017a,b). @@ -49,7 +49,7 @@ Corresponding author: Sanaea C. Rose srose@astro.ucla.edu 1 Note that the exact lower and upper limits may be sensitive to metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli -2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski +2017a; Limongi & Chieffi2018a; Sakstein et al. 2020; Belczynski et al. 2020a; Renzo et al. 2020; Vink et al. 2021). lar clusters, or the field (e.g., Rodriguez et al. 2018; Ro- driguez et al. 2019; Fishbach et al. 2020; Mapelli et al. @@ -110,7 +110,7 @@ single interactions, binary BH GW merger, and GW merger recoil kicks. The post-kick merger product sinks back towards the cluster center over a dynamical fric- tion timescale. Using this approach, they showed that -103 − 104 M⊙ IMBHs can form efficiently over the life- +103 −104 M⊙IMBHs can form efficiently over the life- time of a cluster. However, as discussed in Section 2.2, direct BH-star collisions are much more frequent than BH-BH collision @@ -152,7 +152,7 @@ Section 2.9. Finally, we discuss and summarize our find- ings in Section 3. 2. METHODOLOGY We consider a population of stellar mass BHs embed- -ded in a cluster of 1 M⊙ stars. When stars and BHs +ded in a cluster of 1 M⊙stars. When stars and BHs collide, the BHs can accrete mass. The growth rate de- pends on the physical processes outlined below. We use a statistical approach to estimate the stellar encounters @@ -162,7 +162,7 @@ We consider a population of BHs within the inner few parsecs of the SMBH in a galactic nucleus (GN). We as- sume that the BH mass distribution follows that of the stars from which they originate, a Kroupa initial mass -function dN/dm ∝ m−2.35. While this choice represents +function dN/dm ∝m−2.35. While this choice represents a gross oversimplification, it has very little bearing on our final results. Future work may address the particu- lars of the BH mass distribution, but we do not expect @@ -172,7 +172,7 @@ and lower limits of the BH mass distribution are 5 and compass the range of upper bounds predicted by stellar evolution models, which vary between 40 and 125 M⊙ depending on the metallicity (Heger et al. 2003; Woosley -2017; Spera & Mapelli 2017b; Limongi & Chieffi 2018b; +2017; Spera & Mapelli 2017b; Limongi & Chieffi2018b; Belczynski et al. 2020b; Renzo et al. 2020). We assume that the orbits of the BHs follow a thermal eccentricity distribution. We draw their semimajor axes, a•, from a @@ -195,7 +195,7 @@ BH is on a circular orbit. The timescales depend on the density, so we adopt a range of density profiles, bounded by α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark -blue line represents the time for a 105 M⊙ BH to merge with +blue line represents the time for a 105 M⊙BH to merge with the SMBH through GW emission. observationally motivated distributions in Section 2.9, but reserve a more detailed examination of the distribu- @@ -209,14 +209,14 @@ where n is the number density of objects, σ is the ve- locity dispersion, and A is the cross-section. We use the collision timescale from Rose et al. (2020): t−1 -coll = πn(a•)σ(a•) +coll =πn(a•)σ(a•) × -� + f1(e•)r2 c + f2(e•)rc 2G(mBH + m⋆) σ(a•)2 -� + . (1) where G is the gravitational constant and rc is the sum of the radii of the interacting objects, a black hole with @@ -231,18 +231,18 @@ itational focusing, which enhances the cross-section of interaction. Assuming a circular orbit for simplicity, we plot the timescale for a BH orbiting in the GN to collide with -a 1 M⊙ star as a function of distance from the SMBH +a 1 M⊙star as a function of distance from the SMBH in Figure 1.2 As this timescale depends on the density of surrounding stars, we adopt a density profile of the form: ρ(r•) = ρ0 -�r• +r• r0 -�−α +−α , (2) where r• denotes the distance from the SMBH. We adopt -a SMBH mass of 4 × 106 M⊙ such that our fiducial GN +a SMBH mass of 4 × 106 M⊙such that our fiducial GN matches our own galactic center (e.g., Ghez et al. 2005; Genzel et al. 2003). In this case, the normalization in Eq. (2) is ρ0 = 1.35 × 106 M⊙/pc3 at r0 = 0.25 pc (Gen- @@ -257,7 +257,7 @@ n(r•) = ρ(r•) The collision timescale also depends on the velocity dis- persion, which we express as: σ(r•) = -� +s GM• r•(1 + α), (4) @@ -265,10 +265,10 @@ where α is the slope of the density profile and M• de- notes the mass of the SMBH (Alexander 1999; Alexan- der & Pfuhl 2014). As mentioned above, Eq. (1) depends on the sum of the radii of the colliding objects, rc. We -take rc = 1 R⊙ because these interactions involve a BH +take rc = 1 R⊙because these interactions involve a BH and a star, and the former has a much smaller physi- cal cross-section. For example, the Schwarzschild radius -of a 10 M⊙ BH is only 30 km, or 4.31 × 10−5 R⊙. For +of a 10 M⊙BH is only 30 km, or 4.31 × 10−5 R⊙. For this reason, direct collisions between compact objects are very rare and not included in our model. We note that direct collisions between BHs, via GW @@ -318,7 +318,7 @@ sidering the escape velocity from the BH at the star’s outermost point, its surface, which corresponds to the maximum impact parameter 1 R⊙. Qualitatively, one might expect that the BH could capture the entire star -(i.e., ∆m ∼ 1 M⊙) if the relative velocity is smaller than +(i.e., ∆m ∼1 M⊙) if the relative velocity is smaller than the escape velocity from the BH at this point. However, in the vicinity of the SMBH, the dispersion velocity of the stars may be much larger than the escape velocity @@ -346,7 +346,7 @@ Figure 2. We consider an example that highlights the mass growth as a function of distance from the SMBH. Grey dots represent the initial masses and distances from the SMBH of the BHs involved in the simulation. For simplicity, we set -the inital mass equal to 10 M⊙ for all of the BHs. Assuming +the inital mass equal to 10 M⊙for all of the BHs. Assuming the density profile of stars has α = 1, we consider two cases: BHs accrete all of the star’s mass during a collision (red) and only a portion of the star’s mass is accreted during a collision @@ -362,22 +362,22 @@ We approximate the density as 1 M⊙/(4πR3 ⊙/3) and take the conservative value of cs = 500 km s−1, which is -consistent with the sound speed inside a 1 M⊙ star +consistent with the sound speed inside a 1 M⊙star (Christensen-Dalsgaard et al. 1996) and allows us to set a lower limit on ∆m. To find ∆m, at each collision, we have: ∆m = min( ˙m × t⋆,cross, 1 M⊙) , (6) -where t⋆,cross ∼ R⊙/σ is the crossing time of the BH in +where t⋆,cross ∼R⊙/σ is the crossing time of the BH in the star. We take the minimum between ˙m×t⋆,cross and -1 M⊙ because the BH cannot accrete more mass than +1 M⊙because the BH cannot accrete more mass than one star at each collision. Figure 2 juxtaposes the expected growth using Bondi- Hoyle-Lyttleton accretion (blue small points) with a much simpler model in which the BH accretes the star’s -entire mass, 1 M⊙ (red large points). +entire mass, 1 M⊙(red large points). Both examples -start with identical populations of 10 M⊙ BHs (grey) +start with identical populations of 10 M⊙BHs (grey) and simulate growth through collisions using a statisti- cal approach. As the BHs grow, the collision timescale, which depends on mBH, decreases. @@ -393,7 +393,7 @@ IMBH Formation in Galactic Nuclei ulation time, 10 Gyr. Therefore, the BHs grow slowly, and their final masses can be approximated using the following equation: -mfinal(tcoll → const.) = minitial + ∆m T +mfinal(tcoll →const.)=minitial + ∆m T tcoll , (7) @@ -401,7 +401,7 @@ in which T represents the simulation time and ∆m and tcoll remain constant, approximated as their initial val- ues. This equation is plotted in Figure 2 for both cases, -∆m = 1 M⊙ (red) and ∆m from Bondi-Hoyle-Lyttleton +∆m = 1 M⊙(red) and ∆m from Bondi-Hoyle-Lyttleton accretion (blue), and the curves coincide with the cor- responding simulated results. The shaded regions rep- resent one standard deviation from Eq. (7), calculated @@ -429,7 +429,7 @@ For ∆m = 1M⊙, the general solution is reached by solving the differential equation dm/dt = 1 M⊙/tcoll(m), which gives: -mfinal(∆m → 1 M⊙) = −A + (minitial + A) eCT +mfinal(∆m →1 M⊙)=−A + (minitial + A) eCT (8) where A = σ2Rstar/G and C = 2πGnstarRstar/σ. As an example, we plot this curve in purple for the α = 2 case, @@ -464,7 +464,7 @@ invoked momentum-driven winds in BH accretion (e.g., Murray et al. 2005; Ostriker et al. 2010; Brennan et al. 2018). We thus estimate the fraction of captured mass accreted to be approximately vesc/(cη), where vesc is -the escape velocity from the BH at 1 R⊙ and η is the +the escape velocity from the BH at 1 R⊙and η is the accretion efficiency at the ISCO. We take η to be 0.1 (e.g., Yu & Tremaine 2002). This expression for the @@ -487,27 +487,27 @@ fects of GW emission on the BH’s semimajor axis and eccentricity following Peters & Mathews (1963a). The characteristic timescale to merge a BH with an SMBH is given by: -tGW ≈ 2.9 × 1012 yr -� +tGW ≈2.9 × 1012 yr + M• 106 M⊙ -�−1 � mBH +−1  mBH 106 M⊙ -�−1 +−1 × -� M• + mBH + M• + mBH 2 × 106 M⊙ -�−1 � +−1  a• 10−2 pc -�4 -× f(e•)(1 − e2 +4 +×f(e•)(1 −e2 •)7/2 , (9) where f(e•) is a function of e•. For all values of e•, f(e•) is between 0.979 and 1.81 (Blaes et al. 2002). We -plot this timescale for a 1 × 105 M⊙ BH in Figure 1 in +plot this timescale for a 1 × 105 M⊙BH in Figure 1 in blue. 6 @@ -567,12 +567,12 @@ the SMBH. The two-body relaxation timescale for a single-mass system is: trelax = 0.34 σ3 -G2ρ⟨M∗⟩ ln Λrlx +G2ρ⟨M∗⟩ln Λrlx , (10) -where ln Λrlx is the Coulomb logarithm and ⟨M∗⟩ is the +where ln Λrlx is the Coulomb logarithm and ⟨M∗⟩is the average mass of the surrounding objects, here assumed -to be 1 M⊙ (Spitzer 1987; Binney & Tremaine 2008, +to be 1 M⊙(Spitzer 1987; Binney & Tremaine 2008, Eq. (7.106)). This equation represents the approximate timescale for a BH on a semi-circular orbit to change its orbital energy and angular momentum by order of @@ -602,7 +602,7 @@ to segregate inwards in the GN (e.g., Shapiro & Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004). They sink toward the SMBH on the mass segregation -timescale, tseg ≈ ⟨M∗⟩/mBH × trelax (e.g., Spitzer 1987; +timescale, tseg ≈⟨M∗⟩/mBH × trelax (e.g., Spitzer 1987; Fregeau et al. 2002; Merritt 2006), which is typically an order of magnitude smaller than the relaxation timescale plotted in Figure 1. @@ -615,7 +615,7 @@ of zero and a standard deviation of ∆vrlx/ √ 3, where ∆vrlx = v• -� +p P•/trlx (see Bradnick et al. 2017, for an approach to changes in the angular momentum). The new orbital parameters can be calculated following Lu @@ -636,7 +636,7 @@ terings with the now rarer main-sequence stars. The BHs will then settle onto a Bahcall-Wolf profile, while the stars may follow a shallower profile, with approx- -imately n⋆ ∝ r−1.5, inwards of the transition radius +imately n⋆∝r−1.5, inwards of the transition radius (Linial & Sari in prep.). Therefore, after the initial mass segregation, we allow the BHs to begin diffusing over a relaxation timescale, @@ -645,7 +645,7 @@ dom process. In this random process, some of the BHs may migrate closer to the SMBH. We terminate mass growth when the BH enters the inner 200 au of the GN, within which the density of stars is uncertain. This cut- -off is based on the 120 au pericenter of S0-2, the closest +offis based on the 120 au pericenter of S0-2, the closest known star to the SMBH (e.g., Ghez et al. 2005). Another physical process that causes inward migra- tion is dynamical friction. A cursory derivation based @@ -683,9 +683,9 @@ In fact, using a KS test, we find that we cannot reject the hypothesis that the two distributions were drawn from the same sample for the α = 1.75 and α = 2 results. Interestingly, a BH mass -IMF with an average of 10 M⊙ leads to a final distri- -bution with an average of ∼ 200 M⊙ and a median of -∼ 45 M⊙, which lies within the mass gap. +IMF with an average of 10 M⊙leads to a final distri- +bution with an average of ∼200 M⊙and a median of +∼45 M⊙, which lies within the mass gap. 3. DISCUSSION AND PREDICTIONS We explore the feasibility of forming IMBHs in a GN through successive collisions between a stellar-mass @@ -693,7 +693,7 @@ BH and main-sequence stars. Taking both a statisti- cal and analytic approach, we show that this channel can produce IMBHs efficiently with masses as high as -103−4 M⊙ and may result in many IMBH-SMBH merg- +103−4 M⊙and may result in many IMBH-SMBH merg- ers (intermediate-mass ratio inspirals, or IMRIs) and EMRIs. @@ -730,7 +730,7 @@ However, the inclusion of relaxation processes in the simulations dampens the influence of the stellar density profile by allowing BHs to diffuse into regions of more or less efficient growth. As a result, more BHs grow in -mass, but their maximum mass is smaller (∼ 104 M⊙). +mass, but their maximum mass is smaller (∼104 M⊙). Additionally, the final masses have no apparent depen- dence on distance from the SMBH (see Figure 4). Most simulations in our study assume that the BHs @@ -751,7 +751,7 @@ mass gap still form naturally despite the substantially reduced accretion. About 5% of the BHs grow by 10 to 100 M⊙. Furthermore, if we increase this ∆M esti- mate by a factor of 2 (i.e., use η = 0.05), the simula- -tion produces a 3.5 × 103 M⊙ IMBH for the same initial +tion produces a 3.5 × 103 M⊙IMBH for the same initial conditions. Our proof-of-concept demonstrates that col- lisions between BH and stars are an important process that should be taken into account in dense places such @@ -792,21 +792,21 @@ occurs on Gyr timescales. Some studies have suggested that in situ star formation can occur in the Galactic Center as close as 0.04 pc from the SMBH (e.g., Levin & Beloborodov 2003; Paumard et al. 2006), and star -formation episodes can occur as often as every ∼ 5 Myr +formation episodes can occur as often as every ∼5 Myr (e.g. Lu et al. 2009). Therefore, we expect that after -the first Gyr, stars within ≲ 0.01 pc will be replenished +the first Gyr, stars within ≲0.01 pc will be replenished at intervals consistent with the star formation episodes; the infalling populations of stars are separated by ∼ 5−10 Myr, which is shorter than the collision timescale. However, star-star collisions may complicate this pic- -ture within ∼ 0.01 pc. As discussed above, regular star +ture within ∼0.01 pc. As discussed above, regular star formation ensures the BHs always have a stellar popula- -tion to interact with outside of ∼ 0.01 pc.5 At 0.01 pc, +tion to interact with outside of ∼0.01 pc.5 At 0.01 pc, however, the kinetic energy during a collision between -two 1 M⊙ stars is larger than their binding energies. +two 1 M⊙stars is larger than their binding energies. Collisions can therefore thin out the stellar populations during the time it takes them to diffuse to these small -radii, ≲ 0.01 pc, and may reduce the BH growth in the +radii, ≲0.01 pc, and may reduce the BH growth in the innermost region. We reserve the inclusion of star-star collisions for future work. We also note that the disrup- tion of binary stars by the SMBH may help replenish @@ -849,8 +849,8 @@ laboration et al. (2020) set an upper limit on the mass enclosed within the orbit of S0-2 to be about a few thou- sand M⊙, or 0.1% of the central mass. This upper limit can include mass that was previously in stars but is now -in BHs. In that case, the 180 M⊙ is what remains of the -stars, while BHs and IMBHs make up the ∼ 1000 M⊙ +in BHs. In that case, the 180 M⊙is what remains of the +stars, while BHs and IMBHs make up the ∼1000 M⊙ in the innermost region. Also not included in this study, collisions between the BH and other compact objects will increase the BH diff --git a/read/results/pymupdf/2201.00037.txt b/read/results/pymupdf/2201.00037.txt index 98cfad6..fba5233 100644 --- a/read/results/pymupdf/2201.00037.txt +++ b/read/results/pymupdf/2201.00037.txt @@ -51,7 +51,7 @@ symmetry axis are both coplanar with, and precess about, the normal to the Lapla its present-day orientation can be reconstructed from ephemerides data [Yseboodt and Margot, 2006; Baland et al., 2017]. Likewise, the rate of precession is also not observed directly, but is reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513 -yr with an inclination angle of I = 8.5330◦ between the orbit and Laplace plane normals [Ba- +yr with an inclination angle of I = 8.5330◦between the orbit and Laplace plane normals [Ba- land et al., 2017]. Measurements of the obliquity εm, defined as the angle of misalignment be- tween the spin-symmetry axis and the orbit normal, have been obtained by different techniques, including ground based radar observations [Margot et al., 2007, 2012], and stereo digital ter- @@ -102,7 +102,7 @@ mal to the Laplace plane (ˆeL 8.5330◦. The symmetry axis of the mantle ˆep 3 is offset from ˆeI -3 by εm ≈ 2 arcmin. ˆeI +3 by εm ≈2 arcmin. ˆeI 3 and ˆep 3 are coplanar with, and precess about, ˆeL 3 in a retrograde direction @@ -121,7 +121,7 @@ This indicates that it is only the mantle that librates, and that the outer part These evidences do not necessarily imply that the whole of Mercury’s core is fluid, but only that its outermost part must be. A solid inner core may have nucleated at the centre although its size is not well constrained. Inner core growth leads to planetary contraction, and the inferred -radial contraction of ∼ 7 km since the late heavy bombardment [Byrne et al., 2014] places an +radial contraction of ∼7 km since the late heavy bombardment [Byrne et al., 2014] places an approximate limit of 800 km on the inner core radius [Grott et al., 2011]. However, the inner core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history. –3– @@ -216,30 +216,30 @@ CrMB and surface. The flattenings at all interior boundaries are specified suc consistent with the observed degree 2 spherical harmonic coefficients of gravity J2 and C22; their numerical values are given in Table 1. Specifically, J2 and C22 are connected to the principal moments of inertia of Mercury (C > B > A) and to the polar and equatorial flattenings by -J2 = C − ¯A +J2 = C −¯A MR2 = 8π 15 1 MR2 -� -(ρs − ρf)r5 -sϵs + (ρf − ρm)r5 -fϵf + (ρm − ρc)r5 + +(ρs −ρf)r5 +sϵs + (ρf −ρm)r5 +fϵf + (ρm −ρc)r5 mϵm + ρcR5ϵr -� + , (1a) -C22 = B − A +C22 = B −A 4MR2 = 8π 15 1 4MR2 -� -(ρs − ρf)r5 -sξs + (ρf − ρm)r5 -fξf + (ρm − ρc)r5 + +(ρs −ρf)r5 +sξs + (ρf −ρm)r5 +fξf + (ρm −ρc)r5 mξm + ρcR5ξr -� + . (1b) where ¯A is the mean equatorial moment of inertia defined below. The same procedure was used @@ -291,15 +291,15 @@ Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031.8636 × 109 m3/s2 taken from Genova et al. [2019]. The mean density is calculated from 4π 3 ¯ρR3 = M. The numerical -values of ϵr and ξr are calculated from ϵr = (¯a − c)/R and ξr = (a − b)/R, where ¯a = 1 +values of ϵr and ξr are calculated from ϵr = (¯a −c)/R and ξr = (a −b)/R, where ¯a = 1 2(a + b) and where a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semimajor, intermediate and semiminor axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015]. J2 and C22 are computed from Equation (4) in the Supporting Information of Genova et al. [2019]. and Wieczorek [2016] who adopted the same strategy in their interior modelling of the Moon. Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topog- -raphy and the axes of the principal moments of inertia, which amount to a polar offset of ∼ 2◦ -and an equatorial offset of ∼ 15◦ [Perry et al., 2015]. +raphy and the axes of the principal moments of inertia, which amount to a polar offset of ∼2◦ +and an equatorial offset of ∼15◦[Perry et al., 2015]. Once the densities and flattenings of all interior regions are known, we can specify the mo- ments of inertia of the fluid core (Cf > Bf > Af) and solid inner core (Cs > Bs > As) along with the mean equatorial moments of inertia @@ -313,17 +313,17 @@ along with the mean equatorial moments of inertia From these, we define the polar (e, ef, es) and equatorial (γ, γs) dynamical ellipticities of the whole planet (no subscript), fluid core (subscript f) and solid inner core (subscript s), which enter our rotational model, -e = C − ¯A +e = C −¯A ¯A -ef = Cf − ¯Af +ef = Cf −¯Af ¯Af -es = Cs − ¯As +es = Cs −¯As ¯As , (3a) -γ = B − A +γ = B −A ¯A -γs = Bs − As +γs = Bs −As ¯As . (3b) @@ -377,7 +377,7 @@ b) Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b) in a frame attached to the rotating mantle. The orbit normal (ˆeI -3) is tilted by an angle I = 8.533◦ from +3) is tilted by an angle I = 8.533◦from the Laplace normal (ˆeL 3 ) and the symmetry axis of Mercury’s mantle (ˆep 3) is tilted by an obliquity εm @@ -394,7 +394,7 @@ mantle axes ˆep 2 with respect to the Cassini plane. Viewed in the frame attached to the rotating mantle (b), the Cassini plane is rotating at frequency ωΩo = -−Ωo − Ωp cos I in the longitudinal direc- +−Ωo −Ωp cos I in the longitudinal direc- tion. The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of illustration. –7– @@ -415,7 +415,7 @@ plane varies on long timescales, but it can be taken as invariable in inertial s purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between ˆeL 3 and ˆeI 3 -is the orbital inclination I = 8.5330◦ [Baland et al., 2017], the angle between ˆeI +is the orbital inclination I = 8.5330◦[Baland et al., 2017], the angle between ˆeI 3 and ˆep 3 is the obliquity εm and the angle between ˆeL @@ -429,9 +429,9 @@ fer to as the ‘mantle’ in the context of our rotational model. The rotation of the mantle are expected to remain in close alignment, but they do not coincide exactly. We define the rotation rate vector of the mantle by Ω, and its misalignment from ˆep 3 by an angle -θm. Note that θm ≪ εm and it is often the spin axis of Mercury which is used to define the +θm. Note that θm ≪εm and it is often the spin axis of Mercury which is used to define the obliquity εm [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, ˆep -3 and Ω would +3 and Ωwould characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and the angles I, εm and θm would completely describe the Cassini state. The presence of a fluid outer core and solid inner core require three additional orientation vectors and angles. The sym- @@ -439,9 +439,9 @@ metry axis of the inner core is defined by unit vector ˆes 3 and its misalignment from ˆep 3 by an angle θn. The rotation vectors of the fluid core and inner core are defined as Ωf and Ωs, re- -spectively, and their misalignment from the rotation vector of the mantle Ω are defined by an- +spectively, and their misalignment from the rotation vector of the mantle Ωare defined by an- gles θf and θs (see Figure 2a). The rotation and symmetry axes of the inner core remain in close -alignment, so θn ≈ θs. To be formal in our definition of the different angles of misalignment, +alignment, so θn ≈θs. To be formal in our definition of the different angles of misalignment, for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise direction. At equilibrium in the Cassini state, the three orientation vectors (ˆeI @@ -455,14 +455,14 @@ in inertial space, the Cassini plane is rotating in a retrograde direction at fr in the frame attached to the mantle rotating at sidereal frequency Ωo, the Cassini plane is ro- tating in a retrograde direction at frequency ωΩo (see Figure 2b), where ω, expressed in cycles per Mercury day, is equal to -ω = −1 − δω cos(θp) . +ω = −1 −δω cos(θp) . (5) The factor δω = Ωp/Ωo = 4.933 × 10−7 is the Poincar´e number, expressing the ratio of the forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal as seen in the mantle frame is expressed as d dt ˆeL -3 + Ω × ˆeL +3 + Ω× ˆeL 3 = 0 , (6) or equivalently, by Equation (19e) of Stys and Dumberry [2018], @@ -472,7 +472,7 @@ or equivalently, by Equation (19e) of Stys and Dumberry [2018], Confidential manuscript submitted to JGR-Planets This expresses a formal connection between θp and θm which is independent of the interior struc- -ture of Mercury. Using Equation (5) and cos(θm) → 1, this connection can be rewritten as +ture of Mercury. Using Equation (5) and cos(θm) →1, this connection can be rewritten as sin(θm) = δω sin(θp) . (8) and thus the relative amplitudes of θm and θp depend of the Poincar´e number δω. @@ -508,16 +508,16 @@ angle θm, as seen in the mantle frame, can be written as θm1(t) + iθm2(t) = ˜m exp[iωΩot] , (10a) where -˜m ≡ ˜m(ω) = Re[ ˜m] + iIm[ ˜m] , +˜m ≡˜m(ω) = Re[ ˜m] + iIm[ ˜m] , (10b) is the amplitude at frequency ωΩo. Equivalent definitions apply for all other angles, with the connection as follows: -θm ⇔ ˜m , -θf ⇔ ˜mf , -θs ⇔ ˜ms , -θn ⇔ ˜ns , -θp ⇔ ˜p , -εm ⇔ ˜εm . +θm ⇔˜m , +θf ⇔˜mf , +θs ⇔˜ms , +θn ⇔˜ns , +θp ⇔˜p , +εm ⇔˜εm . (11) The notation ˜m, ˜mf, ˜ms, ˜ns follows that introduced in the original model of Mathews et al. [1991]. Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase re- @@ -530,7 +530,7 @@ real. We concentrate our analysis in this work on the real part of the solutions sponds to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜εm corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to εm, though we keep the tilde notation in the presentation of our results to emphasize that it rep- -resents the real part of the solution from our system. Furthermore, since ˜m ≪ ˜εm, we often +resents the real part of the solution from our system. Furthermore, since ˜m ≪˜εm, we often refer to ˜εm as the orientation of spin axis of the mantle, since the Cassini state of Mercury is more customarily described in terms of the latter in the literature. The model of Mathews et al. [1991] is developed under the assumption of small angles as @@ -539,40 +539,40 @@ rived can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. T tions describe, respectively, the time rate of change of the angular momenta of the whole of Mer- cury, the fluid core, and the inner core in the reference frame of the rotating mantle. These three equations are -(ω − e) ˜m + (1 + ω) -� ¯Af +(ω −e) ˜m + (1 + ω) +" ¯Af ¯A ˜mf + ¯As ¯A ˜ms + α3es ¯As ¯A ˜ns -� +# = 1 iΩ2o ¯A -� + ˜Γsun -� + , (12a) -ω ˜m + (1 + ω + ef) ˜mf − ωα1es +ω ˜m + (1 + ω + ef) ˜mf −ωα1es ¯As ¯Af ˜ns = 1 iΩ2o ¯Af -� -− ˜Γcmb − ˜Γicb -� + +−˜Γcmb −˜Γicb + , (12b) -(ω − α3es) ˜m + α1es ˜mf + (1 + ω) ˜ms + (1 + ω − α2) es˜ns = +(ω −α3es) ˜m + α1es ˜mf + (1 + ω) ˜ms + (1 + ω −α2) es˜ns = 1 iΩ2o ¯As -� + ˜Γs sun + ˜Γicb -� + , (12c) and a fourth equation consists of a kinematic relation that expresses the change in the orien- @@ -584,25 +584,25 @@ and are given by α1 = ρf ρs , -α3 = 1 − α1 , -α2 = α1 − α3αg , +α3 = 1 −α1 , +α2 = α1 −α3αg , (13a) where the parameter αg is a measure of the ratio of the gravitational to inertial torque applied on the inner core, αg = 8πG 5Ω2o -[ρc(ϵr − ϵm) + ρm(ϵm − ϵf) + ρfϵf] , +[ρc(ϵr −ϵm) + ρm(ϵm −ϵf) + ρfϵf] , (13b) where G is the gravitational constant. ˜Γsun is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For a small mantle obliquity ˜εm and a small inner core tilt ˜ns, it is given by ˜Γsun = −iΩ2 o ¯A -� + φm˜εm + ¯As ¯A α3φs˜ns -� + , (14) where @@ -613,29 +613,29 @@ Confidential manuscript submitted to JGR-Planets 2 n2 Ω2o -� + G210 e + 1 2G201 γ -� + , (15a) φs = 3 2 n2 Ω2o -� + G210 es + 1 2G201 γs -� + , (15b) and where G210 and G201 are functions of the orbital eccentricity ec, G210 = 1 -(1 − e2c)3/2 , +(1 −e2c)3/2 , (16a) G201 = 7 -2ec − 123 +2ec −123 16 e3 c + 489 128e5 @@ -652,7 +652,7 @@ CMB and on the inner core at the ICB, respectively. These torques can be paramet terms of dimensionless complex coupling constants Kicb and Kcmb and the differential angu- lar velocities at each boundary [e.g Buffett, 1992; Buffett et al., 2002], ˜Γicb = iΩ2 -o ¯AsKicb( ˜mf − ˜ms) , +o ¯AsKicb( ˜mf −˜ms) , (18a) ˜Γcmb = iΩ2 o ¯AfKcmb ˜mf . @@ -665,14 +665,14 @@ and this is provided by Equation (7). For small angles θm and θp, this gives [ ˜m + (1 + ω)˜p = 0 . (19) For Mercury, it is more convenient to connect the internal model with ˜εm instead of ˜p. This -is because θp ≈ 8.567◦ whereas ˜εm ≈ 2 arcmin and thus the latter obeys more strictly the +is because θp ≈8.567◦whereas ˜εm ≈2 arcmin and thus the latter obeys more strictly the condition of small angles assumed in our framework. Furthermore, the external torques act- ing on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜εm. Writ- -ten in terms of ˜εm, and with the approximation of ˜εm ≪ 1 and ˜m ≪ 1, Equation (7) becomes +ten in terms of ˜εm, and with the approximation of ˜εm ≪1 and ˜m ≪1, Equation (7) becomes ˜m + (1 + ω)˜εm = −(1 + ω) tan I . (20) Likewise, the frequency ω from Equation (5) can be written simply in terms of I, -ω = −1 − δω cos I . +ω = −1 −δω cos I . (21) The set of four Equations (12) with the addition of Equation (20) form a linear system of equations for the five rotational variables ˜m, ˜mf, ˜ms, ˜ns and ˜εm. It captures the response @@ -692,7 +692,7 @@ and the elements of matrix M are M =   -ω − e +ω −e (1 + ω) ¯ Af @@ -705,9 +705,8 @@ A ¯ As¯ A α3 -� -(1 + ω)es + φs -� +(1 + ω)es + φs + φm ω 1 + ω + ef + Kcmb + @@ -726,10 +725,10 @@ As ¯ Af 0 -ω − α3es -α1es − Kicb +ω −α3es +α1es −Kicb 1 + ω + Kicb -(1 + ω − α2)es + α3φs +(1 + ω −α2)es + α3φs α3φs 0 0 @@ -785,18 +784,17 @@ Confidential manuscript submitted to JGR-Planets The Cassini state of a single-body, rigid Mercury For a rigid planet with no fluid and solid cores, our system of equations reduces to Equa- tions (12a) and (20), -(ω − e) ˜m + φm ˜εm = 0 , +(ω −e) ˜m + φm ˜εm = 0 , (23a) ˜m + (1 + ω)˜εm = −(1 + ω) tan I . (23b) -Using Equation (21), δω ≪ 1, and the approximation ¯A(1+e+δω cos I) = C + ¯Aδω cos I ≈ +Using Equation (21), δω ≪1, and the approximation ¯A(1+e+δω cos I) = C + ¯Aδω cos I ≈ C, these can be written as C ˜m = ¯Aφm ˜εm , (24a) ˜m = δω -� -sin I + cos I ˜εm -� +sin I + cos I ˜εm + . (24b) Equation (24b) gives a direct relationship between ˜m and ˜εm. For I = 8.5330◦, δω = @@ -804,9 +802,8 @@ Equation (24b) gives a direct relationship between ˜m and ˜εm. For I = 8.5330 than ˜εm: the offset of the rotation axis of the mantle with respect to its symmetry axis is very small. Substituting Equation (24b) in Equation (24a) gives CΩp -� -sin I + cos I ˜εm -� +sin I + cos I ˜εm + = ¯AΩoφm˜εm , (25) and isolating for ˜εm, @@ -822,7 +819,7 @@ CΩp sin I −CΩp cos I + nMR2 (G210J2 + 2G201C22) . (27) This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1 -[see for instance Equation (1) of Baland et al., 2017, where their definition of ˙Ω is equal to −Ωp]. +[see for instance Equation (1) of Baland et al., 2017, where their definition of ˙Ωis equal to −Ωp]. Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized mo- ment of inertia ˆC, @@ -845,9 +842,9 @@ cession of Mercury. As seen in the inertial frame, its frequency is given by Confidential manuscript submitted to JGR-Planets ωfp = nMR2 C -� + G210J2 + 2G201C22 -� + , (29) which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical com- @@ -870,47 +867,43 @@ the free precession period is much shorter than the forcing period of 325 kyr. U The obliquity of Mercury is thus determined by how the forcing frequency Ωp compares with the free precession frequency ωfp. Because ωfp > Ωp, Mercury occupies Cassini state 1 [Peale, 1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant -amplification if Ωp ≈ ωfp. Since ωfp ≫ Ωp, resonant amplification is minimal and the re- -sulting obliquity, ˜εm ≈ 2 arcmin, is much smaller than the inclination angle I ≈ 8.5◦. +amplification if Ωp ≈ωfp. Since ωfp ≫Ωp, resonant amplification is minimal and the re- +sulting obliquity, ˜εm ≈2 arcmin, is much smaller than the inclination angle I ≈8.5◦. 2.3.2 The misalignment of the fluid and solid cores -With ω = −1 − δω cos I and δω ≪ 1, Equation (12d) gives ˜ns ≈ ˜ms; as for the mantle, +With ω = −1 −δω cos I and δω ≪1, Equation (12d) gives ˜ns ≈˜ms; as for the mantle, the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state. The relationship between ˜m and ˜εm of Equation (24b) is independent of the interior structure, so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equa- tion (12a), and setting ˜ns = ˜ms, the angular momentum equation of the whole planet becomes CΩp -� -sin I + cos I ˜εm -� -+ ( ¯Af cos I Ωp) ˜mf + ¯As(cos I Ωp − Ωoα3φs)˜ns = ¯AΩoφm˜εm . +sin I + cos I ˜εm + ++ ( ¯Af cos I Ωp) ˜mf + ¯As(cos I Ωp −Ωoα3φs)˜ns = ¯AΩoφm˜εm . (31) This latter equation shows how the misaligned inner core and fluid core can lead to a modifi- cation of the mantle obliquity ˜εm. Approximate analytical solutions of ˜ns and ˜mf are given by -˜ns ≈ Ωp +˜ns ≈Ωp κλs -� -1 + Ωo(Kicb − α1es) + +1 + Ωo(Kicb −α1es) λf -� � -sin I + cos I ˜εm -� -− Ωoα3φs + sin I + cos I ˜εm + +−Ωoα3φs κλs ˜εm , (32a) -˜mf ≈ Ωp +˜mf ≈Ωp λf -� -sin I + cos I ˜εm -� +sin I + cos I ˜εm + + Ωo λf ¯As ¯Af -� -Kicb − α1es -� +Kicb −α1es + ˜ns , (32b) where @@ -919,40 +912,39 @@ where ¯Af Ω2 o -� -Kicb − α1es -�2 +Kicb −α1es +2 λs λf , (33a) -λf = ¯σf − Ωp cos I , +λf = ¯σf −Ωp cos I , (33b) -λs = ¯σs − Ωp cos I , +λs = ¯σs −Ωp cos I , (33c) –14– Confidential manuscript submitted to JGR-Planets and where we have introduced the frequencies ¯σf = Ωo -� + ef + Kcmb + ¯As ¯Af Kicb -� + , (33d) ¯σs = Ωo -� -esα3αg − esα1 + α3φs + Kicb -� + +esα3αg −esα1 + α3φs + Kicb + . (33e) These solutions are good approximations for all the results that we present in section 3. For an observed mantle obliquity ˜εm and for a chosen set of interior model parameters, they pro- vide useful predictions of ˜ns and ˜mf. In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯σs ≫ -Ωp and ¯σf ≫ Ωp, so that ˜ns → 0, ˜mf → 0 and Equation (31) reverts back to Equation (25) +Ωp and ¯σf ≫Ωp, so that ˜ns →0, ˜mf →0 and Equation (31) reverts back to Equation (25) for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and mantle (i.e. for spherical internal boundaries, ef = es = γs = 0 and no viscous or EM cou- pling, Kcmb = Kicb = 0), then @@ -962,17 +954,16 @@ pling, Kcmb = Kicb = 0), then ˜mf = ˜ns = −(tan I + ˜εm) . (34) Inserting these in Equation (31), and with the moment of inertia of the mantle equal to Cm = -C − ¯Af − ¯As, we obtain +C −¯Af −¯As, we obtain Cm Ωp -� -sin I + cos I ˜εm -� +sin I + cos I ˜εm + = ¯AΩoφm˜εm . (35) which describes, as expected, a forced precession of the mantle alone. If this was the case for -Mercury, taking Cm/C = 0.431, the obliquity should be ˜εm ≈ 0.88 arcmin, substantially smaller -than the observed obliquity of ˜εm ≈ 2 arcmin. -If ¯σf ≈ Ωp (and thus λf → 0) and/or ¯σs ≈ Ωp (and thus λs → 0) resonant amplifica- +Mercury, taking Cm/C = 0.431, the obliquity should be ˜εm ≈0.88 arcmin, substantially smaller +than the observed obliquity of ˜εm ≈2 arcmin. +If ¯σf ≈Ωp (and thus λf →0) and/or ¯σs ≈Ωp (and thus λs →0) resonant amplifica- tion leads to large amplitudes for ˜mf, ˜ns and the mantle obliquity ˜εm. The frequencies ¯σf and ¯σs are closely related to the FCN and FICN frequencies ωfcn and ωficn, respectively. Hence, just as a large mantle obliquity can result from resonant amplification when the forcing frequency @@ -981,8 +972,8 @@ onant amplification when the forcing frequency approaches the FCN or FICN frequ frequencies depend on the interior density structure and are not known. However, we will show that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not ex- -pect an important amplification effect. Furthermore, since ωfcn, ωficn ≫ Ωp, then ¯σf ≫ Ωp -and ¯σs ≫ Ωp, and we are in the strong coupling limit. The mantle obliquity should be close +pect an important amplification effect. Furthermore, since ωfcn, ωficn ≫Ωp, then ¯σf ≫Ωp +and ¯σs ≫Ωp, and we are in the strong coupling limit. The mantle obliquity should be close to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜mf and ˜ns should be of the order of ˜εm or smaller. This further justifies the assumption of small an- gles that we have adopted. @@ -1009,7 +1000,7 @@ Cm 1 + ζ , (36) where -f(ec) = 1 − 11e2 +f(ec) = 1 −11e2 c + 959 48 e4 c , @@ -1120,29 +1111,29 @@ nal torque. As shown by Baland et al. [2019], the external torque allow solid re a free motion in inertial space thereby affecting the free rotational modes. To a good approx- imation, the FCN and FICN frequencies (as seen in an inertial frame) for Kcmb = Kicb = 0 are given by -ωfcn ≈ −Ωo -� +ωfcn ≈−Ωo + ¯A ¯Am + ¯As -� � +  ef + φm -� + + Ωo efφm (ef + φm) , (38a) -ωficn ≈ Ωo -� ¯A + ¯As -¯A − ¯As -� � -esα1 − esα3αg − α3φs -� +ωficn ≈Ωo + ¯A + ¯As +¯A −¯As +  +esα1 −esα3αg −α3φs + . (38b) The expression of the FICN frequency involves the inertial torque (term esα1) and the grav- itational torque from the rest of Mercury (esα3αg) and the Sun (α3φs) acting on the inner core. For both of our inner core density scenarios (and our choices of ρs = 8800 kg m−3 and α3 = -0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α3αg ≫ α1; +0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α3αg ≫α1; the gravitational torque dominates the inertial torque, in large part because of the slow rota- tion rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek, 2016; Stys and @@ -1151,16 +1142,16 @@ and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approxi –17– Confidential manuscript submitted to JGR-Planets -sion for the FICN differs by a factor ( ¯A+ ¯As)/( ¯A− ¯As) compared to that given in Dumberry +sion for the FICN differs by a factor ( ¯A+ ¯As)/( ¯A−¯As) compared to that given in Dumberry and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon. The expression for FCN frequency differs from the usual expression for Earth. First, it involves the external torque from the Sun captured by the parameter φm. If we set φm = 0, we obtain the FCN frequency for a decoupled model in which only interior torques contribute, -ωfcn,int ≈ −Ωo -� +ωfcn,int ≈−Ωo + ¯A ¯Am + ¯As -� + ef . (38c) This frequency is slightly different from the usual expression for Earth, involving the ratio ¯A/( ¯Am+ @@ -1189,7 +1180,7 @@ of the fluid core spin axis from the mantle is significant: ˜mf is approximat a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin at the largest rs. Recall that ˜mf is measured with respect to the mantle rotation axis (which coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with -respect to the orbit normal is ˜εm+ ˜mf ≈ 6 arcmin. The reason why the obliquity of the spin +respect to the orbit normal is ˜εm+ ˜mf ≈6 arcmin. The reason why the obliquity of the spin axis of the fluid core is larger than that of the mantle can be understood from Equation (32b), which shows that ˜mf is determined by the resonant amplification of the FCN mode at the forc- ing frequency. When the FCN frequency is much larger than the forcing frequency, as is the @@ -1274,29 +1265,29 @@ C′ = C + ¯Acχ , where ¯Ac = ¯Af + ¯As and χ = Ωp cos I ¯Ac -� + ¯Af -(¯σf − Ωp cos I) + +(¯σf −Ωp cos I) + ¯As -(¯σs − Ωp cos I) -� +(¯σs −Ωp cos I) + − ¯As ¯Ac Ωoα3φs -(¯σs − Ωp cos I) . +(¯σs −Ωp cos I) . (41) The frequencies ¯σf and ¯σs are given in Equations (33d-33e) and closely approximate the FCN and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then how the core is entrained to precess with the mantle, with the coupling between the two ex- pressed in terms of the resonant amplification of the FCN and FICN frequencies. In the limit -of ¯σf, ¯σs → 0, then χ = −1, C′ = Cm, the core is fully decoupled from the mantle and we -retrieve Equation (35). If instead ¯σf, ¯σs → ∞, then χ = 0, C′ = C and we retrieve the pre- +of ¯σf, ¯σs →0, then χ = −1, C′ = Cm, the core is fully decoupled from the mantle and we +retrieve Equation (35). If instead ¯σf, ¯σs →∞, then χ = 0, C′ = C and we retrieve the pre- diction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ωp, as is the case here, resonant amplification is weak, χ is small and positive, C′ > C and this leads to a slightly larger ˜εm compared to a rigid planet. Because the inner core core is grav- itationally locked to the mantle, deviations from a rigid planet are dominantly caused by the -misalignment of the fluid core. In Equation (41), ¯σs ≫ ¯σf, so to a good approximation +misalignment of the fluid core. In Equation (41), ¯σs ≫¯σf, so to a good approximation –19– Confidential manuscript submitted to JGR-Planets @@ -1304,9 +1295,9 @@ Confidential manuscript submitted to JGR-Planets ¯Af ¯Ac Ωo cos I -(¯σf − Ωp cos I) . +(¯σf −Ωp cos I) . (42) -For a small inner core, χ ≈ 7.55×10−3. As the inner core grows, ¯Af decreases, and the com- +For a small inner core, χ ≈7.55×10−3. As the inner core grows, ¯Af decreases, and the com- bination ¯Acχ also decreases. This implies that C′ decreases with inner core size and, consequently, ˜εm also decreases with inner core size, as seen in Figure 4a, though it remains larger than the prediction for a rigid planet. @@ -1317,13 +1308,13 @@ density ρc and its thickness h. Changing ρs, ρc and/or h requires a differen ρm and rf in order to match M, ˆC and ˆCm. In turn, this leads to different ellipticities at in- terior boundary in order to match J2 and C22, and thus different predictions for ˜εm, ˜mf and ˜ns. To illustrate this, we show on Figure 4 two additional predictions computed with crustal -thicknesses changed to h = 16 and 36 km. The change in ˜εm remains modest, ∼ 0.025%, but -the changes in ˜mf and ˜ns are more substantial, ∼ 5% and ∼ 10%, respectively. +thicknesses changed to h = 16 and 36 km. The change in ˜εm remains modest, ∼0.025%, but +the changes in ˜mf and ˜ns are more substantial, ∼5% and ∼10%, respectively. We also show on Figure 4a (only for h = 26 km) the obliquity of the principal moment of inertia of the whole planet, which we denote by ˜εg. A difference between ˜εg and ˜εm occurs if the inner core is misaligned with the mantle. As seen in the mantle frame, a tilted inner core (with ˜ns assumed small) leads to an off-diagonal component of the moment of inertia tensor -of (Cs− ¯As)α3˜ns = ¯Asesα3˜ns. The angle by which the mantle frame must be rotated so that +of (Cs−¯As)α3˜ns = ¯Asesα3˜ns. The angle by which the mantle frame must be rotated so that the moment of inertia of the whole planet is purely diagonal is ( ¯Asesα3˜ns)/( ¯Ae), and hence a good approximation of ˜εg is ˜εg = ˜εm + @@ -1345,21 +1336,21 @@ Kcmb = πρfr4 f ¯Af -� ν +r ν 2Ωo -� -0.195 − 1.976i -� + +0.195 −1.976i + , (44a) Kicb = πρfr4 s ¯As -� ν +r ν 2Ωo -� -0.195 − 1.976i -� + +0.195 −1.976i + , (44b) where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary inte- @@ -1372,19 +1363,19 @@ Confidential manuscript submitted to JGR-Planets The above parameterizations are valid only under the assumption that the flow in the bound- ary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds number Re = rf∆uf/ν, associated with the differential velocity ∆uf = rfΩo ˜mf at the CMB. -For rf = 2000 km, and taking ˜mf = 4 arcmin ≈ 0.001 rad from the results in the previous -section, we get ∆uf ∼ 2 mm/s and Re ∼ 6 × 109. Such a large Reynolds number indicates +For rf = 2000 km, and taking ˜mf = 4 arcmin ≈0.001 rad from the results in the previous +section, we get ∆uf ∼2 mm/s and Re ∼6 × 109. Such a large Reynolds number indicates that the viscous friction between the fluid core and mantle should induce turbulent flows, as is the case for the Cassini state of the Moon [Yoder, 1981; Williams et al., 2001; C´ebron et al., 2019]. For a boundary layer that involves turbulent flows, the viscous torque should be inde- pendent of the fluid viscosity and proportional to the square of the differential velocity. The coupling constant Kcmb should be in the form Kcmb = fcmb -�� ˜mf -�� -� -0.195 − 1.976i -� + ˜mf + + +0.195 −1.976i + , (45) where fcmb is a numerical factor that depends among other things on surface roughness. In- @@ -1400,8 +1391,8 @@ by fitting a rotation model to the librations of the Moon observed by Lunar Las of a coupling parameter K and a recent estimate is K/CL = (1.41±0.34)×10−8 day−1 [Williams and Boggs, 2015], where CL is the lunar polar moment of inertia. The connection between K and Kcmb is -���Im[Kcmb] -��� = K + Im[Kcmb] + = K CL CL CfL @@ -1410,14 +1401,14 @@ CfL , (46) where CfL is the moment of inertia of the lunar core and ΩL = 2.66 × 10−6 s−1 the lunar -rotation rate. With CfL/CL ∼ 7 × 10−4 [e.g. Williams et al., 2014], this gives |Im[Kcmb]| ∼ +rotation rate. With CfL/CL ∼7 × 10−4 [e.g. Williams et al., 2014], this gives |Im[Kcmb]| ∼ 9×10−5. In order to match this amplitude in Equation (44a), with lunar parameters and as- -suming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10−4 m2 +suming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈5 × 10−4 m2 s−1, about 500 times larger than the laminar viscosity. Note that the differential velocity at the CMB of the Moon is closer to 3 cm/s [Yoder, 1981; Williams et al., 2001], more than 10 times larger than our estimate for Mercury above. Since the effective turbulent coupling constant Kcmb is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mer- -cury should be smaller. Thus, ν ≈ 5×10−4 m2 s−1 gives a conservative upper bound for the +cury should be smaller. Thus, ν ≈5×10−4 m2 s−1 gives a conservative upper bound for the possible effective turbulent viscosity that can be expected for Mercury. Figure 5 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for different choices of effective viscosities. For ν = 10−5 m2 s−1, viscous coupling is too weak to affect ˜εm and @@ -1428,7 +1419,7 @@ of ˜εm gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜εm and ˜mf are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent vis- -cosity that we have identified above (i.e ν ≈ 5 × 10−4 m2 s−1), the influence of viscous cou- +cosity that we have identified above (i.e ν ≈5 × 10−4 m2 s−1), the influence of viscous cou- –21– Confidential manuscript submitted to JGR-Planets @@ -1529,76 +1520,76 @@ has been developed in a few studies [e.g. Buffett, 1992; Buffett et al., 2002; by Br = √ 3 -� + Bd r -� + cos θ, where -� + Bd r -� + is the r.m.s. strength of the field, the coupling constant Kcmb can be written is the form -Kcmb = 3(1 − i)Fcmb -� +Kcmb = 3(1 −i)Fcmb + Bd r -�2 , + 2 , (47) where Fcmb = 1 Ωoρfrf -� + 1 σmδm + 1 σfδf -�−1 +−1 , (48) and where σm, δm = -� +p 2/(σmµΩo) and σf, δf = -� +p 2/(σfµΩo) are the electrical conductivi- ties and magnetic skin depths in the mantle and fluid core, respectively, with µ = 4π ×10−7 N A−2 the magnetic permeability of free space. The r.m.s. field strength -� + Bd r -� + is connected to the Gauss coefficient g0 1 of the surface magnetic field by -� + Bd r -� + = 2 √ 3 -� R + R rf -�3 ��g0 +3 g0 1 -�� . + . (49) We can readily build an estimate of the amplitude of Kcmb. The electrical conductivity of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding -to the CMB of Mercury is in the range of σm ∼ 0.01 − 1 S m−1 [Constable, 2015]. In con- -trast, the electrical conductivity of Fe in planetary cores is expected to be close σf ∼ 106 S -m−1 [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σmδm)−1 ≫ (σfδf)−1. Tak- +to the CMB of Mercury is in the range of σm ∼0.01 −1 S m−1 [Constable, 2015]. In con- +trast, the electrical conductivity of Fe in planetary cores is expected to be close σf ∼106 S +m−1 [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σmδm)−1 ≫(σfδf)−1. Tak- ing σm = 1 S m−1, -��g0 + g0 1 -�� = 190 nT for Mercury’s dipole field [Anderson et al., 2012], rf = -2000 km, ρf = 7000 kg m−3, this gives Kcmb ≈ (3.1 × 10−11) · (1 − i). To put this amplitude + = 190 nT for Mercury’s dipole field [Anderson et al., 2012], rf = +2000 km, ρf = 7000 kg m−3, this gives Kcmb ≈(3.1 × 10−11) · (1 −i). To put this amplitude in perspective, taking a molecular viscosity of ν = 10−6 m2 s−1 in Equation (44a) gives a vis- -cous coupling constant of Kcmb ≈ (6.0 × 10−7) · (0.195 − 1.976i). Hence, EM coupling at the +cous coupling constant of Kcmb ≈(6.0 × 10−7) · (0.195 −1.976i). Hence, EM coupling at the CMB is much weaker than viscous coupling, even if we include other spherical harmonic com- ponents of the radial magnetic field. EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by @@ -1606,22 +1597,22 @@ CMB cavities [Buffett, 2010; Glane and Buffett, 2018], in which case the effe closer to σf. Likewise, σm can be increased if a more electrically conducting layer has formed at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even -in the extreme case of σm = σf = 106 S m−1, Kcmb ≈ (1.6 × 10−8) · (1 − i), which remains +in the extreme case of σm = σf = 106 S m−1, Kcmb ≈(1.6 × 10−8) · (1 −i), which remains –23– Confidential manuscript submitted to JGR-Planets -smaller by a factor ∼ 60 than the smallest possible viscous coupling constant. Viscous forces +smaller by a factor ∼60 than the smallest possible viscous coupling constant. Viscous forces dominate the tangential stress on the CMB of Mercury. At the ICB, because we can expect the electrical conductivity in both the solid inner core and fluid core to be similar, and because the radial magnetic field is likely much stronger, EM coupling can be much larger and dominate viscous coupling. We assume that the magnetic field morphology at the ICB is dominantly comprised of small spatial scales for example as predicted by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in -terms of an equivalent uniform radial magnetic field ⟨Br⟩ capturing its r.m.s. strength [Buf- +terms of an equivalent uniform radial magnetic field ⟨Br⟩capturing its r.m.s. strength [Buf- fett et al., 2002; Dumberry and Koot, 2012]. Assuming an electrical conductivity σ equal in the fluid and solid core, the coupling constant Kicb can be written in the form Kicb = 5 -4(1 − i)Ficb ⟨Br⟩2 , +4(1 −i)Ficb ⟨Br⟩2 , (50) where Ficb = @@ -1630,29 +1621,29 @@ Ficb = , (51) and where δ = -� +p 2/(σµΩo) is the magnetic skin depth. As Ficb is inversely proportional to rs, Kicb is inversely proportional to inner core size. Note that computing the EM coupling based -on the r.m.s. strength ⟨Br⟩ rather than a true field morphology tends to overestimate the strength +on the r.m.s. strength ⟨Br⟩rather than a true field morphology tends to overestimate the strength of the coupling [Koot and Dumberry, 2013]. However, since the strength of the radial magnetic field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are -absorbed in the range of possible ⟨Br⟩ values. +absorbed in the range of possible ⟨Br⟩values. The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al., 2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected. -When ⟨Br⟩ is sufficiently large, this is no longer the case. EM coupling then enters a ’strong +When ⟨Br⟩is sufficiently large, this is no longer the case. EM coupling then enters a ’strong field’ regime [Buffett et al., 2002; Dumberry and Koot, 2012; Koot and Dumberry, 2013] in which -Kicb increases linearly with ⟨Br⟩ instead of quadratically. A good approximation of Kicb cal- +Kicb increases linearly with ⟨Br⟩instead of quadratically. A good approximation of Kicb cal- culated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012], KE -icb = (0.175 − i0.138) ⟨Br⟩ , +icb = (0.175 −i0.138) ⟨Br⟩, (52) -where ⟨Br⟩ is in units of Tesla. The superscript E emphasizes that the numerical factors are +where ⟨Br⟩is in units of Tesla. The superscript E emphasizes that the numerical factors are appropriate for the parameter values adopted for Earth in the computation of Dumberry and Koot [2012]. To adapt these numerical factors to Mercury, we write, -Kicb = (0.175 − i0.138)Ficb +Kicb = (0.175 −i0.138)Ficb FE icb -⟨Br⟩ , +⟨Br⟩, (53) where FE icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumb- @@ -1661,7 +1652,7 @@ km, σ = 5 × 105 S m−1, which gives FE icb = 90.36 T−2. To compute Ficb, we assume an electrical conductivity of σ = 106 S m−1 in the core of Mercury [e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and -strong field regime occurs when ⟨Br⟩ ≈ 1.53 mT for the real part of Kicb. ⟨Br⟩ at the ICB +strong field regime occurs when ⟨Br⟩≈1.53 mT for the real part of Kicb. ⟨Br⟩at the ICB of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geom- etry inside the core could be dominated by small length scales, yet only the weaker lower har- monics of the field would penetrate through a thermally stratified layer in the upper region of @@ -1669,21 +1660,21 @@ monics of the field would penetrate through a thermally stratified layer in th Confidential manuscript submitted to JGR-Planets the fluid core and reach the surface. If so, the field strength inside the core can exceed the sur- -face field strength by a factor 1000. Taking a surface field strength equal to ∼ 300 nT [e.g An- -derson et al., 2012], ⟨Br⟩ at the ICB could be as large as 0.3 mT, corresponding to approxi- +face field strength by a factor 1000. Taking a surface field strength equal to ∼300 nT [e.g An- +derson et al., 2012], ⟨Br⟩at the ICB could be as large as 0.3 mT, corresponding to approxi- mately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mer- cury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of Mercury remains in the weak field regime. Figure 6 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for different choices -of ⟨Br⟩. The larger ⟨Br⟩ is, the stronger is the EM coupling at the ICB, and the smaller is the +of ⟨Br⟩. The larger ⟨Br⟩is, the stronger is the EM coupling at the ICB, and the smaller is the differential rotation between the fluid core and inner core. The inner core and fluid core are vir- -tually locked into a common precession motion when ⟨Br⟩ > 0.3 mT. Further increasing ⟨Br⟩ +tually locked into a common precession motion when ⟨Br⟩> 0.3 mT. Further increasing ⟨Br⟩ above 1 mT does not change the solution as EM coupling already dominates all other torques on the inner core. This is the case even when EM coupling transitions into the strong field regime. EM coupling at the CMB is included in these calculations, with σm = 1 S m−1 and -��g0 + g0 1 -�� = + = 190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core we retrieved the solutions of ˜εm and ˜mf shown in Figure 4. As the inner core radius is increased, both ˜εm and ˜mf get smaller, as it was the case with @@ -1699,13 +1690,13 @@ locked into a common precession motion, a good approximation of ˜εm is given b diction as Equations (39-40) involving the effective moment of inertia C′, except χ is now given by χ = -¯AcΩp cos I − ¯AsΩoα3φs -¯AfΩo(ef + Kcmb) + ¯AsΩoesα3αg − ¯AcΩp cos I . +¯AcΩp cos I −¯AsΩoα3φs +¯AfΩo(ef + Kcmb) + ¯AsΩoesα3αg −¯AcΩp cos I . (54) For a small inner core, ¯AcΩp cos I > ¯AsΩoα3φs and χ is positive. Because ¯AsΩoα3φs increases with inner core size, χ gets smaller, and so do C′ and ˜εm. The mantle obliquity drops from 2.049 arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015 -arcmin. For an inner core larger than ≈ 1000 km, ¯AcΩp cos I < ¯AsΩoα3φs, so χ becomes neg- +arcmin. For an inner core larger than ≈1000 km, ¯AcΩp cos I < ¯AsΩoα3φs, so χ becomes neg- ative, C′ becomes smaller than the moment of inertia of a rigid Mercury C, and ˜εm becomes smaller than the prediction based on a rigid planet. The larger the inner core is, the smaller are the misalignments of the fluid and solid cores @@ -1784,9 +1775,9 @@ viscous forces, and that at the ICB should be dominated by EM forces. To simplif sider a model where Kcmb is purely from viscous coupling and Kicb purely from EM coupling. We choose an effective viscosity at the CMB of ν = 10−4 m2 s−1, which we believe to be a representative value given the comparison with the Moon (see section 3.3). We take a radial -field strength at the ICB of ⟨Br⟩ = 0.3 mT, approximately the field strength expected under +field strength at the ICB of ⟨Br⟩= 0.3 mT, approximately the field strength expected under the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representa- -tive’ coupling model, although the uncertainty on ν and ⟨Br⟩ obviously remains high. +tive’ coupling model, although the uncertainty on ν and ⟨Br⟩obviously remains high. Figure 7 shows how ˜εm, ˜mf and ˜ns vary with inner core radius for the ’representative’ coupling model (black lines) under the fixed inner core density scenario that we have used in sections 3.2, 3.3 and 3.4. Figure 7 also shows how the results change when, for the same rep- @@ -1872,7 +1863,7 @@ f and i′ s; these represent the obliquities with respect to the orbital plane and are connected to our variables by: i′ m = ˜εm, i′ -f = ˜εm + ˜m+ ˜mf ≈ ˜εm + ˜mf and i′ +f = ˜εm + ˜m+ ˜mf ≈˜εm + ˜mf and i′ s = ˜εm + ˜ns. To summarize their results, i′ f and i′ @@ -1885,7 +1876,7 @@ m gets progressively larger and is displaced further away from its expected orientation based of a rigid planet (see their Figure 6). The change in i′ m they obtain between a case with no inner core and an inner core radius equal to 0.6 times the plan- -etary radius (≈ 1463 km, close to the maximum inner core size of 1500 km we have considered), +etary radius (≈1463 km, close to the maximum inner core size of 1500 km we have considered), is approximately an increase of 5 × 10−5 rad = 0.17 arcmin. This also corresponds approxi- mately to the deviation of the obliquity with respect to that of a rigid planet. When only viscous stress is included in our model (section 3.3), our results are substan- @@ -1929,7 +1920,7 @@ est changes of the mantle obliquity εm compared to the obliquity predicted on t entirely rigid planet (εr m). Let us denote this difference as ∆εm = εm−εr m. The largest ∆εm -occurs for a small or no inner core, and is ∆εm ≈ 0.01 arcmin. This difference is decreased +occurs for a small or no inner core, and is ∆εm ≈0.01 arcmin. This difference is decreased as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM coupling and large density contrast at the ICB, ∆εm can be negative, but its absolute value remains smaller than 0.01 arcmin. @@ -1942,11 +1933,11 @@ planet. But it also implies that the observed obliquity cannot be used to place the inner core size. Nevertheless, our results show that the presence of a fluid core and inner core affect the resulting mantle obliquity by as much as 0.01 arcmin. This is of the same order as the change -in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec (≈ 0.006 +in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec (≈0.006 arcmin) [Baland et al., 2017]. This is also of the same order as the amplitude of the nutation motion about the mean equilibrium Cassini state forced by the precession of the pericenter, which -is approximately 0.85 arcsec (≈ 0.014 arcmin) [Baland et al., 2017]. The precision on the obliq- -uity from the upcoming BepiColombo satellite mission is expected to be ≤ 0.5 arcsec (≤ 0.008 +is approximately 0.85 arcsec (≈0.014 arcmin) [Baland et al., 2017]. The precision on the obliq- +uity from the upcoming BepiColombo satellite mission is expected to be ≤0.5 arcsec (≤0.008 arcmin) [Cical`o et al., 2016]. Thus, in addition to including tidal deformation and the preces- sion of the pericenter, a Cassini state model that includes a fluid and solid core will then be necessary in order to properly tie Mercury’s obliquity to its interior structure. In turn, this opens @@ -1960,14 +1951,14 @@ tle. Since gravitational coupling prevents a large inner core tilt with respect –28– Confidential manuscript submitted to JGR-Planets -find that the misalignment ∆εg = εg − εm is limited. The maximum offset that we obtain -is approximately ∆εg ≈ 0.007 arcmin. This limited magnitude of offset is important in the +find that the misalignment ∆εg = εg −εm is limited. The maximum offset that we obtain +is approximately ∆εg ≈0.007 arcmin. This limited magnitude of offset is important in the light of the recent obliquity of the gravity field estimated in Genova et al. [2019], εg = 1.968± 0.027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the spin-symmetry axis of the mantle: εm = 2.04 ± 0.08 arcmin [Margot et al., 2012] and εm = 2.029±0.085 arcmin [Stark et al., 2015a], although all three measurements remain consistent with one another within their error estimates. In their interpretation, Genova et al. [2019] sug- -gest that the different central value of the obliquity that they obtain (smaller by ∼ 0.07 ar- +gest that the different central value of the obliquity that they obtain (smaller by ∼0.07 ar- cmin) is perhaps explained by an offset ∆εg due to the presence of a (possibly large) solid in- ner core. However, this is one order of magnitude larger than the maximum magnitude of ∆εg that we predict. Moreover, we predict that the obliquity of the gravity field should be larger @@ -1980,7 +1971,7 @@ and symmetry axes in the Cassini plane. Dissipation at the CMB and ICB introduce cous and EM coupling also lead to a displacement of these axes in the direction perpendicu- lar to the Cassini plane [e.g Peale et al., 2014]. Indeed, the two measurements based on track- ing surface topographic features from Margot et al. [2012] and Stark et al. [2015a] suggest that -the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼ 0.03 arcmin). +the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼0.03 arcmin). Although this offset is smaller than the measurement errors, so that the observed obliquity is still consistent with no deviation away from the Cassini plane, some amount of dissipation in- variably takes place. These measurements give then a measure of the possible amplitude of the diff --git a/read/results/pymupdf/2201.00069.txt b/read/results/pymupdf/2201.00069.txt index f535534..54e4e3e 100644 --- a/read/results/pymupdf/2201.00069.txt +++ b/read/results/pymupdf/2201.00069.txt @@ -2,7 +2,7 @@ MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 1 A MeerKAT, e-MERLIN, H.E.S.S. and Swift search for persistent and transient emission associated with three localised FRBs -J. O. Chibueze,1,2★ M. Caleb,3,4† L. Spitler,5 H. Ashkar,6,17 F. Schüssler,6 B. W. Stappers,4 +J. O. Chibueze,1,2★M. Caleb,3,4† L. Spitler,5 H. Ashkar,6,17 F. Schüssler,6 B. W. Stappers,4 C. Venter,1 I. Heywood,7,8,9 A. M. S. Richards,3 D. R. A. Williams,3 M. Kramer,3,5 R. Beswick,3 M. C. Bezuidenhout,3 R. P. Breton,3 L. N. Driessen,3 F. Jankowski,3 E. F. Keane,10 M. Malenta,3 M. Mickaliger,3 V. Morello3, H. Qiu,11 K. Rajwade,3 @@ -42,16 +42,16 @@ MNRAS 000, 1–15 (2021) Preprint 4 January 2022 Compiled using MNRAS LATEX style file v3.0 ABSTRACT -We report on a search for persistent radio emission from the one-off Fast Radio Burst (FRB) +We report on a search for persistent radio emission from the one-offFast Radio Burst (FRB) 20190714A, as well as from two repeating FRBs, 20190711A and 20171019A, using the MeerKAT radio telescope. For FRB 20171019A we also conducted simultaneous observations with the High Energy Stereoscopic System (H.E.S.S.) in very high energy gamma rays and searched for signals in the ultraviolet, optical, and X-ray bands. For this FRB, we obtain a UV -flux upper limit of 1.39×10−16 erg cm−2 s−1Å−1, X-ray limit of ∼ 6.6×10−14 erg cm−2 s−1 and -a limit on the very-high-energy gamma-ray flux Φ(𝐴��� > 120 GeV) < 1.7 × 10−12 erg cm−2 s−1. -We obtain a radio upper limit of ∼15𝛹���Jy beam−1 for persistent emission at the locations of both +flux upper limit of 1.39×10−16 erg cm−2 s−1Å−1, X-ray limit of ∼6.6×10−14 erg cm−2 s−1 and +a limit on the very-high-energy gamma-ray flux Φ(𝐸> 120 GeV) < 1.7 × 10−12 erg cm−2 s−1. +We obtain a radio upper limit of ∼15𝜇Jy beam−1 for persistent emission at the locations of both FRBs 20190711A and 20171019A, but detect diffuse radio emission with a peak brightness -of ∼53𝛹���Jy beam−1 associated with FRB 20190714A at 𝐴��� = 0.2365. This represents the first +of ∼53𝜇Jy beam−1 associated with FRB 20190714A at 𝑧= 0.2365. This represents the first detection of the radio continuum emission potentially associated with the host (galaxy) of FRB 20190714A, and is only the third known FRB to have such an association. Given the possible association of a faint persistent source, FRB 20190714A may potentially be a repeating FRB @@ -67,11 +67,11 @@ logical distances (e.g. Lorimer et al. 2007; Thornton et al. 2013; Macquart et al. 2020). The estimated high radio luminosities and associated brightness temperatures required to produce these short- timescale energetic events at such distances are what makes them -intriguing (Petroff et al. 2021; Caleb & Keane 2021). They have been -observed to emit from ∼ 110 MHz − 8 GHz, though not yet across +intriguing (Petroffet al. 2021; Caleb & Keane 2021). They have been +observed to emit from ∼110 MHz −8 GHz, though not yet across a wide and continuous frequency band due to the variable band- limited spectra of the single pulses. Over 600 FRBs have been dis- -covered1 of which ∼ 20 have been seen to repeat, and it is presently +covered1 of which ∼20 have been seen to repeat, and it is presently uncertain whether they all do (Caleb et al. 2019; James et al. 2020). The extraordinary observed characteristics of the repeating and non- repeating FRBs have led to various progenitor models with the bulk @@ -94,7 +94,7 @@ of several potential mechanisms. In the magnetar model by Margalit et al. (2019), FRBs produced by binary neutron star mergers and accretion induced collapse are expected to be accompanied by per- sistent radio continuum emission on timescales of months to years. -★ james.chibueze@nwu.ac.za +★james.chibueze@nwu.ac.za † manisha.caleb@manchester.ac.uk 1 https://www.wis-tns.org/ The persistent emission is powered by the nebula of relativistic elec- @@ -111,29 +111,29 @@ time by several instruments (Tavani et al. 2021; Ridnaia et al. 2021; Mereghetti et al. 2020; Insight-HXMT 2020). Of the 19 FRBs that have been associated with host galax- ies2, only the sub-arcsecond localisation of the repeating FRB -20121102A to a host galaxy at a redshift of 𝐴��� = 0.19273 ± 0.0008 +20121102A to a host galaxy at a redshift of 𝑧= 0.19273 ± 0.0008 (Tendulkar et al. 2017; Bassa et al. 2017) showed that it is physi- -cally associated with a compact (≤ 0.7 pc), persistent radio source -of luminosity 𝛹���𝐴���𝛹��� ∼ 1039 erg s−1 at a few GHz (Marcote et al. +cally associated with a compact (≤0.7 pc), persistent radio source +of luminosity 𝜈𝐿𝜈∼1039 erg s−1 at a few GHz (Marcote et al. 2017). This source is detectable from 300 MHz – 26 GHz (Resmi -et al. 2020; Chatterjee et al. 2017) and is seen to exhibit ∼ 10% vari- +et al. 2020; Chatterjee et al. 2017) and is seen to exhibit ∼10% vari- ability on day timescales. In contrast, a similar sub-milliarcsecond localisation of another repeating FRB 20180916B to a nearby mas- -sive spiral galaxy at 𝐴��� = 0.0337 ± 0.0002 (Marcote et al. 2020) +sive spiral galaxy at 𝑧= 0.0337 ± 0.0002 (Marcote et al. 2020) showed no associated persistent radio emission. This places a strong -upper limit on the persistent source luminosity of 𝛹���𝐴���𝛹��� ≲ 7.6×1035 +upper limit on the persistent source luminosity of 𝜈𝐿𝜈≲7.6×1035 erg s−1 at 1.6 GHz, which is three orders of magnitude lower than that of FRB 20121102A. Recently, the CHIME/FRB collaboration announced heightened activity in the repeating FRB 20201124A (Chime/FRB Collaboration 2021), which was localised to a host -galaxy at a redshift of 𝐴��� = 0.0979 ± 0.0001 (Fong et al. 2021). +galaxy at a redshift of 𝑧= 0.0979 ± 0.0001 (Fong et al. 2021). Persistent radio emission was detected by the upgraded Giant Me- trewave Radio Telescope (uGMRT) (Wharton et al. 2021) and the Karl G. Jansky Very Large Array (JVLA) (Ricci et al. 2021) on angular scales of a few arcseconds, but resolved out to scales of -∼ 0.1 arcseconds with the European VLBI Network (Marcote et al. +∼0.1 arcseconds with the European VLBI Network (Marcote et al. 2021). -Localisations of four one-off FRBs through imaging of +Localisations of four one-offFRBs through imaging of 2 https://frbhosts.org/ © 2021 The Authors @@ -149,7 +149,7 @@ ducted at a centre frequency of 6.5 GHz. No persistent emission as luminous as the one associated with FRB 20121102A was detected for the ASKAP FRBs (Bhandari et al. 2020). While the true age of FRB 121102A is unknown, models based on polarization studies -predict the age to be ∼ 6 − 17 years (Hilmarsson et al. 2021). It is +predict the age to be ∼6 −17 years (Hilmarsson et al. 2021). It is possible that younger, more active FRBs like FRB 20121102A are associated with persistent radio emission while the emission might have faded over time for the older ones. The possibility of repeating @@ -158,7 +158,7 @@ increasing arcsecond localisations suggests that we are entering an era where we can begin to look for evidence of multiple classes by studying FRB host galaxies and multi-wavelength counterparts. In this paper, we report on the search for persistent radio emis- -sion in the host galaxies of one apparent one-off source (FRB +sion in the host galaxies of one apparent one-offsource (FRB 20190714A) and two repeating sources (FRBs 20171019A and 20190711A) (Kumar et al. 2019, 2021) using MeerKAT. In case of the latter, we also conducted simultaneous observations with the @@ -190,17 +190,17 @@ servations are considered in this paper. The data correlation was done with the SKARAB correlator (Hickish et al. 2016) in 4k mode which gives 4096 channels across the 856 MHz bandwidth resulting in a frequency resolution of ∼209 kHz. The data were reduced us- -ing the semi-automated MeerKAT data analysis pipelines - 𝐴���𝐴���𝐴���𝐴���𝐴���3 +ing the semi-automated MeerKAT data analysis pipelines - 𝑜𝑥𝑘𝑎𝑡3 (Heywood 2020). 2.1.1 Imaging analysis -The 𝐴���𝐴���𝐴���𝐴���𝐴��� pipeline employs a collection of publicly available ra- +The 𝑜𝑥𝑘𝑎𝑡pipeline employs a collection of publicly available ra- dio interferometry data reduction software. The final data prod- 3 https://ascl.net/code/v/2627 ucts, including reduced and calibrated visibility data (including self-calibration), continuum (including sub-band) images as well as diagnostic plots, are provided by the pipeline. The customary -configuration of the 𝐴���𝐴���𝐴���𝐴���𝐴��� pipeline incorporates flagging, cross- +configuration of the 𝑜𝑥𝑘𝑎𝑡pipeline incorporates flagging, cross- calibration and self-calibration processes. In the flagging process, the low-gain bandpass edges (856 MHz to 880 MHz and 1658 MHz to 1800 MHz) are flagged on all baselines, along with the location of @@ -210,9 +210,9 @@ the spectrum are then flagged on baselines shorter than 600 m. Then, other possible RFI affected data are flagged out using the CASA routines rflag and tfcrop for the calibrators, and using the tricolour package for the target fields. -The cross-calibration steps using 𝐴���𝐴���𝐴���𝐴���𝐴��� were standard, includ- +The cross-calibration steps using 𝑜𝑥𝑘𝑎𝑡were standard, includ- ing setting the flux scale and deriving corrections for residual delay -calibration, bandpass and time-varying gain. The 𝐴���𝐴���𝐴���𝐴���𝐴��� pipeline +calibration, bandpass and time-varying gain. The 𝑜𝑥𝑘𝑎𝑡pipeline uses the customary tasks from the CASA (McMullin et al. 2007) suite for cross-calibration. After applying all the corrections to the target field, we channel-averaged the dataset by a factor of five chan- @@ -228,11 +228,11 @@ WSClean generates the multi-frequency synthesis (MFS) map, in joined-channel deconvolution mode, with a central frequency of 1283 MHz. In other words, the MFS map is a full bandwidth map. In WSClean, each of the sub-bands is deconvolved separately with -an initially high mask of 20𝜋���rms (using the auto masking function +an initially high mask of 20𝜎rms (using the auto masking function provided by WSClean), to generate an artefact-free model of the target field for the self-calibration process. This masking threshold -was iteratively reduced to a value of 3𝜋���rms in the final iteration -of imaging. The 𝐴���𝐴���𝐴���𝐴���𝐴��� pipeline uses the customary tasks from the +was iteratively reduced to a value of 3𝜎rms in the final iteration +of imaging. The 𝑜𝑥𝑘𝑎𝑡pipeline uses the customary tasks from the Cubical software (Kenyon et al. 2018) for self-calibration. 2.1.2 Single pulse searches @@ -243,7 +243,7 @@ forming User Supplied Equipment (FBFUSE) that was designed and developed at the Max Planck Institute for Radio Astronomy in Bonn. For this project, FBFUSE combined the data into 764 total-intensity tied-array beams which were used to populate the primary beam of -∼ 1 deg2 of the array. The data are then captured at 306.24 μs time +∼1 deg2 of the array. The data are then captured at 306.24 μs time resolution by the Transient User Supplied Equipment (TUSE), a real-time transient detection backend instrument developed by the MeerTRAP4 team at the University of Manchester. More details on @@ -269,29 +269,29 @@ hanced Multi-Element Remote-Linked Interferometer Network, e- MERLIN array in the United Kingdom (project code: CY10003) on 13 January, 2021 (see Section 3.1.2). Six antennas were used including the 75-m Lovell telescope and the target pointing cen- -tre was R.A. = 12ℎ15𝐴���55𝐴���.12, Dec. = −13◦01′15.′′7. 1407+2827 +tre was R.A. = 12ℎ15𝑚55𝑠.12, Dec. = −13◦01′15.′′7. 1407+2827 was used as the bandpass calibrator, 1331+3030 as the flux cal- ibrator and 1216−1033 as the phase calibrator. The angular sep- aration between the target and the phase calibrator is 2.47◦. The data reduction was done following standard e-MERLIN calibra- tion procedures6 with additional flagging of bad visibilities fol- lowed by imaging. We found two confusing sources in the field, -at R.A. = 12ℎ15𝐴���44𝐴���.669, Dec. = −12◦57′59.′′56 and R.A. = -12ℎ15𝐴���37𝐴���.216, Dec. = −13◦09′33.′′44 at 4.1′ and 9.4′ from the +at R.A. = 12ℎ15𝑚44𝑠.669, Dec. = −12◦57′59.′′56 and R.A. = +12ℎ15𝑚37𝑠.216, Dec. = −13◦09′33.′′44 at 4.1′ and 9.4′ from the pointing centre, respectively. They had apparent flux densities of 4 and 1.3 mJy without primary beam correction. We used these for self-calibration of the field and then subtracted them before final imaging. The final image synthesized beam is 0.′′65 × 0.′′15, posi- -tion angle 15◦ elongated in the Declination direction due to the low +tion angle 15◦elongated in the Declination direction due to the low target elevation from the UK. 2.3 The Swift satellite: UVOT and XRT observations Neil Gehrels Swift Observatory (Swift) is a multi-wavelength NASA space mission operating in soft-X-rays and optical/UV. Here we use data from the X-ray Telescope (XRT) (Burrows et al. 2005) -which operates in the soft X-ray domain of 0.3 − 10 keV as well as +which operates in the soft X-ray domain of 0.3 −10 keV as well as data taken by the UV/Optical Telescope (UVOT) (Roming et al. -2005) operating in the UV to optical domain (170 − 600 nm). +2005) operating in the UV to optical domain (170 −600 nm). During the FRB 20171019A multi-wavelength (MWL) observing campaign, two 2 ks target-of-opportunity (ToO) observations were performed with Swift from 2019-09-28 18:37:02 to 2019-09-28 @@ -338,19 +338,19 @@ RESULTS 3.1 MeerKAT The theoretical thermal noise of the MeerKAT can be calculated as -𝐴���rms = 1 -𝛹���𝐴��� +𝑆rms = 1 +𝜂𝑐 SEFD √︃ -𝐴���pol × 𝐴���(𝐴��� − 1) × Δ𝛹��� × 𝐴���int +𝑛pol × 𝑁(𝑁−1) × Δ𝜈× 𝑡int . (1) The system equivalent flux density (SEFD) of MeerKAT at the -1.28 GHz is 443 Jy and 𝛹���𝐴��� is the correlator efficiency. We used 𝐴���pol -= 2 polarisation products (XX and YY), N = 64 telescopes, Δ𝛹��� = -856 MHz bandwidth and 𝐴���int = 21600 sec observing time for one -epoch. This gives the theoretical rms of ∼ 2 𝛹���Jy beam−1. The typical -image rms obtained from our residual images is ∼ 5 𝛹���Jy beam−1, +1.28 GHz is 443 Jy and 𝜂𝑐is the correlator efficiency. We used 𝑛pol += 2 polarisation products (XX and YY), N = 64 telescopes, Δ𝜈= +856 MHz bandwidth and 𝑡int = 21600 sec observing time for one +epoch. This gives the theoretical rms of ∼2 𝜇Jy beam−1. The typical +image rms obtained from our residual images is ∼5 𝜇Jy beam−1, which is 2.5 times the expected theoretical rms. The wideband MFS image does not allow primary beam correction procedure as this can only be done on the sub-band images with limited rms for detection @@ -362,7 +362,7 @@ their NRAO (National Radio Astronomy Observatory) VLA (Very Large Array) Sky Survey (NVSS) counterparts. However, Chibueze et al. (2021, submitted) confirmed that the overall flux densities obtained with MeerKAT and NVSS are in good agreement with -each other within errors of ∼ 5%. We compared the astrometry of +each other within errors of ∼5%. We compared the astrometry of the discrete radio sources obtained with MeerKAT and NVSS in Figure 1. The position uncertainty of the MeerKAT ranges from 0.′′2 (close to the centre of the primary beam) to a few arcseconds @@ -387,7 +387,7 @@ tion to sources within 5′′. Using this spatial coincidence criterion, we identified a persistent 1283 MHz continuum source near FRB 20190714A, detected in both the 14 September 2019 and the 28 September 2019 epoch. The peak of the MeerKAT radio emission -is offset by ∼ 2′′.1 from the peak of the 𝐴���-band magnitude of the op- +is offset by ∼2′′.1 from the peak of the 𝑖-band magnitude of the op- tical galaxy identified in the Panoramic Survey Telescope and Rapid Response System (PanSTARRS, located at Haleakala Observatory) image (shown as contours in Figures 2 and 3). The MeerKAT ra- @@ -397,22 +397,22 @@ dio source is offset by 1.′′68 from the localisation region of FRB e-MERLIN detection of compact emission towards FRB 20190714 Compact persistent emission was detected in the 1.51 GHz e- -MERLIN image at R.A. = 12ℎ15𝐴���55𝐴���.116, Dec. = −13◦01′14.′′48 -at 86 𝛹���Jy beam−1 by e-MERLIN. The stochastic position uncer- +MERLIN image at R.A. = 12ℎ15𝑚55𝑠.116, Dec. = −13◦01′14.′′48 +at 86 𝜇Jy beam−1 by e-MERLIN. The stochastic position uncer- tainty is (0.04, 0.15) arcsec and the uncertainty (due to the sepa- ration between phase-calibrator and target, and antenna position uncertainty) is (0.013, 0.056) arcsec, giving a total astrometric uncertainty of (0.04, 0.16) arcsec in R.A. and Dec., respectively. The offset from the FRB position is negligible in R.A. and 1.2 arcsec in Dec. The rms in this region (of full primary beam sen- -sitivity) is 20 𝛹���Jy beam−1, making this a 4.3𝜋���rms detection. It is -∼1.5𝜋���rms higher than that of the MeerKAT detection. Although the +sitivity) is 20 𝜇Jy beam−1, making this a 4.3𝜎rms detection. It is +∼1.5𝜎rms higher than that of the MeerKAT detection. Although the e-MERLIN flux scale nominal uncertainty is ∼5%, in these data it is possibly higher due to the low declination of the phase-reference source and to the strong RFI which were removed from the data but may have affected the linearity of the receiver response. The -peak of the e-MERLIN radio emission is offset by ∼ 1.′′4 from the -peak of the PanSTARRS 𝐴���-band emission in Figures 2 and 3. The +peak of the e-MERLIN radio emission is offset by ∼1.′′4 from the +peak of the PanSTARRS 𝑖-band emission in Figures 2 and 3. The e-MERLIN radio source (shown by the cyan cross in Figures 2 and 3) is offset by 0.′′53 from the localised position of FRB 20190714. We estimate the probability of a chance alignment of a back- @@ -420,21 +420,21 @@ ground persistent radio source and the host galaxy, following the procedure of Eftekhari et al. (2018). Instead of using the FRB lo- calisation region, we use the area of the galaxy, which is taken as 2′′ × 2′′, twice the half light radius from Heintz et al. (2020). Given -the source has a flux density of ∼ 90𝛹���Jy we estimate the chance -alignment probability of 0.0008, which corresponds to 3.4𝜋���. The -flux density threshold, assuming 3𝜋���, for an unresolved radio source -is ∼ 15 𝛹���Jy. If instead we consider the probability of detecting any -radio source above our flux density threshold of 15𝛹���Jy, the probabil- +the source has a flux density of ∼90𝜇Jy we estimate the chance +alignment probability of 0.0008, which corresponds to 3.4𝜎. The +flux density threshold, assuming 3𝜎, for an unresolved radio source +is ∼15 𝜇Jy. If instead we consider the probability of detecting any +radio source above our flux density threshold of 15𝜇Jy, the probabil- ity of a chance alignment is, therefore, approximately 0.8%, making -the statistical significance of our detection 2.6𝜋���. This represents the +the statistical significance of our detection 2.6𝜎. This represents the first detection of radio continuum emission associated with the host (galaxy) of FRB 20190714A (see Figure 2 and 3). 3.1.3 MeerKAT non-detections No continuum emission was detected near FRBs 20171019A and 20190711A. As each of the images of these sources has an rms -of ∼ 5 𝛹���Jy beam−1, the 3𝜋��� intensity upper limit of any emission -associated with FRBs 20171019A and 20190711A will be ∼ 15 𝛹���Jy +of ∼5 𝜇Jy beam−1, the 3𝜎intensity upper limit of any emission +associated with FRBs 20171019A and 20190711A will be ∼15 𝜇Jy beam−1 (see Table 1). Candidate pulses above a signal-to-noise (S/N) of 10 from the single pulse search with MeerTRAP were visually inspected offline. @@ -446,14 +446,14 @@ Swift The UVOT summed image is presented in Figure 4. The UVOT field of view corresponds roughly to the uncertainty7 of the locali- sation region of FRB 20171019A (RA = 7.5′and DEC = 7′). Using -uvotdetect, we find 30 sources above the 5𝜋��� level and within the +uvotdetect, we find 30 sources above the 5𝜎level and within the FRB 20171019A uncertainty region. Using a 3 arcsec maximum separation, which is slightly larger than the UVOT PSF (Breeveld et al. 2010), these sources are cross-matched with known catalogue sources. We find that out of the 30 sources detected by UVOT, 28 are spatially coincident with stars catalogued in the SDSS catalogue (DR12; Alam et al. 2015), and one source is coincident with a galaxy -(AGN broadline SDSS ID: 1237652599570890948 at 𝐴��� ∼ 0.156). +(AGN broadline SDSS ID: 1237652599570890948 at 𝑧∼0.156). This galaxy is also detected by the MeerKAT radio observations. We use the NASA/IPAC Extragalactic Database (NED)8 to search for known galaxies in the FRB 20171019A uncertainty regions. We find @@ -461,7 +461,7 @@ multiple galaxies with unknown redshifts, therefore we cannot draw conclusions on the host galaxy from our observations. Using a 50′′ circular ON region centred on the position of FRB 20171019A and a 50′′ OFF region that does not contain any of the detected sources, -we run the uvotsource tool with a 5𝜋��� background threshold and +we run the uvotsource tool with a 5𝜎background threshold and obtain a flux upper limit of 1.4 × 10−16 erg cm−2 s−1Å−1 without applying a Calactic extinction correction. The XRT summed image is shown in Figure 5. At the edge @@ -470,7 +470,7 @@ the Wolf 1561 star. As we consider this source unrelated to the FRB, we use the online Swift-XRT data products generator (Evans et al. 2007) (Evans et al. 2009) to derive upper limits in the 0.3- 10 keV range on the count rate of 0.001885 counts.s−1. Using -WebPIMMS9 (v4.11a) and assuming a weighted average 𝐴���H = 5.12× +WebPIMMS9 (v4.11a) and assuming a weighted average 𝑁H = 5.12× 1020 cm−2 from the direction of the source estimated from the NASA’s HEASARC 10 online tools (HI4PI Collaboration et al. 2016) and a power law model with a photon index = 2, this upper @@ -485,13 +485,13 @@ A second analysis using an independent event calibration and recon- struction (Parsons & Hinton 2014) confirms this result. A search for variable emission on timescales ranging from milliseconds to sev- eral minutes with tools provided in (Brun et al. 2020) does not reveal -any variability above 2.2 𝜋���. For the total data set of 1.8 h, 95% confi- +any variability above 2.2 𝜎. For the total data set of 1.8 h, 95% confi- dence level (C. L.) upper limits on the photon flux are derived using the method described by Rolke et al. (2005). The energy threshold of the data is highly dependent on the zenith angle of the observa- tions. For these observations, the zenith angles range from 15 to 25 deg, which leads to an energy threshold for the stacked data set of -𝐴���th = 120 GeV. The upper limit on the Very High Energy (VHE) +𝐸th = 120 GeV. The upper limit on the Very High Energy (VHE) 7 https://www.wis-tns.org/object/20171019a 8 https://ned.ipac.caltech.edu; NED is funded by the National Aeronautics and Space Administration and operated by the California Insti- @@ -507,8 +507,8 @@ Chibueze et al. Figure 1. Astrometric comparison between MeerKAT and NVSS discrete compact sources.The open circles represent the difference in position between the MeerKAT and NVSS sources. gamma-ray flux above that threshold and assuming an energy depen- -dence following 𝐴���−2 is Φ(𝐴��� > 120 GeV) < 2.10 × 10−12 cm−2 s−1 -or Φ(𝐴��� > 120 GeV) < 1.7 × 10−12 erg cm−2 s−1. A variation of +dence following 𝐸−2 is Φ(𝐸> 120 GeV) < 2.10 × 10−12 cm−2 s−1 +or Φ(𝐸> 120 GeV) < 1.7 × 10−12 erg cm−2 s−1. A variation of ± 0.5 of the assumed spectral index leads to a variation in the upper limit of less than ± 19%. A map of energy flux upper limits covering the full region accessible within the H.E.S.S. field of view above @@ -517,20 +517,20 @@ the full region accessible within the H.E.S.S. field of view above DISCUSSION Of the targeted FRB fields reported here, only FRB 20190714A is observed to be spatially coincident with a persistent radio con- -tinuum source. We obtain an upper limit of ∼ 15 𝛹���Jy beam−1 for +tinuum source. We obtain an upper limit of ∼15 𝜇Jy beam−1 for FRBs 20190711A and 20171019A, respectively, and a peak inten- -sity of ∼ 53 𝛹���Jy beam−1 for the emission coincident with FRB +sity of ∼53 𝜇Jy beam−1 for the emission coincident with FRB 20190714A. This source is detected at both epochs with similar intensities within the measured rms of the images (see Tables 1 and 2 for details). The values in the Table 2 are derived by carrying out 2D Gaussian fit using similar ellipses enclosing the detected -persistent emission. The average flux density is ∼ 3 times less than +persistent emission. The average flux density is ∼3 times less than that of the persistent source associated with FRBs 20121102A, one -of the most prolific repeaters, located at 𝐴��� = 0.19273(8). Persistent +of the most prolific repeaters, located at 𝑧= 0.19273(8). Persistent radio emission from FRB 20201124A was detected by the uGMRT (Wharton et al. 2021) and the JVLA (Ricci et al. 2021) on angular scales of a few arcseconds. However, it is resolved out at scales of -∼ 0.1 arcseconds with the European VLBI Network (Marcote et al. +∼0.1 arcseconds with the European VLBI Network (Marcote et al. 2021) suggesting that it is not a compact source directly associated with the FRB. In contrast, the other localised, prolific repeating FRB 20180916A has no persistent radio counterpart. @@ -538,8 +538,8 @@ In the image in Figure 3 one can see that the persistent radio source lies at the edge of the optical extent of the host galaxy as seen in PanSTARRS observations (Heintz et al. 2020). Our derived 1283 MHz peak position with MeerKAT places it just -1.′′68 away from the position of FRB 20190714A (𝛹���𝐴���2000, 𝛹���𝐴���2000 -= 12ℎ15𝐴���55𝐴���.12, -13◦01′15.′′70; Heintz et al. 2020). The posi- +1.′′68 away from the position of FRB 20190714A (𝛼𝐽2000, 𝛿𝐽2000 += 12ℎ15𝑚55𝑠.12, -13◦01′15.′′70; Heintz et al. 2020). The posi- tional uncertainty on the FRB position is 0.′′283. Similarly, the peak 1.51 GHz e-MERLIN position of the persistent radio source is sepa- rated from the position of FRB 20190714A by 0.′′53. The persistent @@ -550,7 +550,7 @@ MNRAS 000, 1–15 (2021) MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 7 Figure 2. FRB 20190714A MeerKAT epoch I image (top) and a zoom-in (bottom) around the position of the FRB indicated by the cyan circle. White contours -(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝐴���-band optical counterpart coincident in position with the persistent radio emission. The +(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝑖-band optical counterpart coincident in position with the persistent radio emission. The white ellipse in the bottom left corner represents the beam size of MeerKAT. The cyan cross indicates the position of the detected compact emission in our e-MERLIN observations. MNRAS 000, 1–15 (2021) @@ -558,7 +558,7 @@ MNRAS 000, 1–15 (2021) 8 Chibueze et al. Figure 3. FRB 20190714A MeerKAT epoch II image (top) and a zoom-in (bottom) around the position of the FRB indicated by the cyan circle. White contours -(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝐴���-band optical counterpart coincident in position with the persistent radio emission. The +(levels: 300, 500, 900, 1200, 1600 counts) represent the PanSTARRS 𝑖-band optical counterpart coincident in position with the persistent radio emission. The white ellipse in the bottom left corner represents the beam size of MeerKAT. The cyan cross indicates the position of the detected compact emission in our e-MERLIN observations. MNRAS 000, 1–15 (2021) @@ -566,14 +566,14 @@ MNRAS 000, 1–15 (2021) MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 9 Figure 4. UVOT summed image of FRB 20171019A region taken during the MWL observation campaign in September-October 2019. The white circles -indicate sources detected above 5𝜋���. The cyan dot denotes the location of FRB 20171019A, the circle around it indicates the region used to derive the upper +indicate sources detected above 5𝜎. The cyan dot denotes the location of FRB 20171019A, the circle around it indicates the region used to derive the upper limits while the magenta region indicates the background region used. The green box indicates FRB 20171019A 90% localisation region as reported in Kumar et al. (2019). Table 1. Details of the FRB fields observed with MeerKAT. Field name Observation date Synthesized beam -rms (𝛹���Jy beam−1) +rms (𝜇Jy beam−1) Detected? FRB 20171019A 28 September 2019 @@ -583,34 +583,34 @@ FRB 20171019A 18 October 2019 6.′′8 × 5.′′0 5.2 -< 15𝛹���Jy beam−1 +< 15𝜇Jy beam−1 FRB 20190711A 23 August 2019 11.′′7 × 4.′′9 4.9 -< 15𝛹���Jy beam−1 +< 15𝜇Jy beam−1 FRB 20190711A 09 September 2019 12.′′5 × 4.′′9 4.6 -< 15𝛹���Jy beam−1 +< 15𝜇Jy beam−1 FRB 20190714A 14 September 2019 7.′′1 × 6.′′2 4.2 -54.4 𝛹���Jy beam−1 +54.4 𝜇Jy beam−1 FRB 20190714A 28 September 2019 6.′′5 × 5.′′1 5.8 -52.0 𝛹���Jy beam−1 +52.0 𝜇Jy beam−1 Table 2. Details of the radio continuum source associated with FRB 20190714A. Field name Observation date Telescope -𝛹���centre (GHz) -𝛹���J2000 -𝛹���J2000 +𝜈centre (GHz) +𝛼J2000 +𝛿J2000 Maj. × min. axis Pos. angle Int. flux density @@ -618,29 +618,29 @@ FRB 20190714A 28 September 2019 MeerKAT 1.283 -12ℎ15𝐴���55𝐴���.154 +12ℎ15𝑚55𝑠.154 -13◦01′17.′′30 9.′′6 × 7.′′4 88.7◦ -87.4 𝛹���Jy +87.4 𝜇Jy FRB 20190714A 18 October 2019 MeerKAT 1.283 -12ℎ15𝐴���55𝐴���.193 +12ℎ15𝑚55𝑠.193 −13◦01′17.′′18 8.′′2 × 6.′′4 12.2◦ -80.7 𝛹���Jy +80.7 𝜇Jy FRB 20190714A 13 January 2021 e-MERLIN 1.510 -12ℎ15𝐴���55𝐴���.116 +12ℎ15𝑚55𝑠.116 −13◦01′14.′′51 0.′′15 × 0.′′65 17.6◦ -107.5 𝛹���Jy +107.5 𝜇Jy large offset from the centre of the galaxy makes the persistent source unlikely to be an AGN. So far this FRB has not been seen to repeat. Higher resolution imaging will be required to be certain of a direct @@ -659,12 +659,12 @@ Wolf 1561 star is shown in cyan and is labelled. The green box indicates FRB 201 FRB 20190714A (780 Mpc), an unresolved source with an an- gular size of 0.′′6 corresponds to a physical extent of ≲2.3 kpc. The uGMRT reported the detection of an unresolved radio emission at -650 MHz with a flux density of 700±100 𝛹���Jy (Wharton et al. 2021), +650 MHz with a flux density of 700±100 𝜇Jy (Wharton et al. 2021), while the JVLA detected persistent emission with a flux density of -340 ± 30 𝛹���Jy at 3 GHz (Ricci et al. 2021). Assuming the estimated -spectral index between these frequencies (∼ −0.5, Ricci et al. 2021), -the 1.3 GHz flux density would be ∼ 500 𝛹���Jy (similar to the 3-𝜋��� -upper limit on observations from 1 − 2 GHz; Law et al. 2021). The +340 ± 30 𝜇Jy at 3 GHz (Ricci et al. 2021). Assuming the estimated +spectral index between these frequencies (∼−0.5, Ricci et al. 2021), +the 1.3 GHz flux density would be ∼500 𝜇Jy (similar to the 3-𝜎 +upper limit on observations from 1 −2 GHz; Law et al. 2021). The flux density we measured for FRB 20190714A is a factor of ∼10 lower than FRB20201124A, but FRB 20190714A is also a factor 2.6 more distant. Therefore, the flux densities would be comparable @@ -679,7 +679,7 @@ to FRB 20121102A, is a young nebula powered flaring magnetar embedded in a 20–50 year-old supernova remnant (Beloborodov 2017; Metzger et al. 2019). The lack of a bright persistent radio source associated with the repeater FRB 20180916A suggests that -it is comparatively older at ≳ 200 − 500 years and the persistent +it is comparatively older at ≳200 −500 years and the persistent radio source may have faded. In the model by Metzger et al. (2019), the nebula is suggested to contribute significantly to the rotation measure and dispersion measure (DM), as well as to the persis- @@ -715,7 +715,7 @@ MNRAS 000, 1–15 (2021) MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 11 Figure 6. Map of upper limits on the VHE gamma-ray energy flux derived from the H.E.S.S. observations. The limits are valid above 120 GeV and assume -a photon flux distribution following an 𝐴���−2 dependence. The green box indicates the FRB 20171019A 90% localisation region as reported in Kumar et al. +a photon flux distribution following an 𝐸−2 dependence. The green box indicates the FRB 20171019A 90% localisation region as reported in Kumar et al. (2019).The oversampling radius is 0.1◦. emissions. In the case of existence of X-ray non-thermal outbursts, the lack of VHE detection could indicate that inverse Compton is @@ -743,11 +743,11 @@ of three FRBs (FRB 20190714A, 20190711A and 20171019A), and also a multi-wavelength campaign on one of these (FRB 20171019A). We detected persistent compact radio emission associated with -FRB 20190714A (at 𝐴��� = 0.2365) using the MeerKAT and e- +FRB 20190714A (at 𝑧= 0.2365) using the MeerKAT and e- MERLIN radio telescope. This represents the first detection of the radio continuum emission associated with the host (galaxy) of FRB 20190714A and is only the third known FRB to have such an as- -sociation. We furthermore obtained a radio upper limit of∼ 15𝛹���Jy +sociation. We furthermore obtained a radio upper limit of∼15𝜇Jy beam−1 for the repeating FRBs 20190711A and 20171019A. We also performed UV, X-ray and VHE observations with the Swift and H.E.S.S. instruments and obtained upper limits in the three @@ -798,7 +798,7 @@ the Austrian Federal Ministry of Education, Science and Research and the Austrian Science Fund (FWF), the Australian Research Council (ARC), the Japan Society for the Promotion of Science and by the University of Amsterdam. We appreciate the excellent -work of the technical support staff in Berlin, Zeuthen, Heidelberg, +work of the technical support staffin Berlin, Zeuthen, Heidelberg, Palaiseau, Paris, Saclay, Tübingen and in Namibia in the construc- tion and operation of the equipment. This work benefited from services provided by the H.E.S.S. Virtual Organisation, supported @@ -889,7 +889,7 @@ Mereghetti S., et al., 2020, ApJ, 898, L29 Metzger B. D., Margalit B., Sironi L., 2019, MNRAS, 485, 4091 Offringa A. R., et al., 2014, MNRAS, 444, 606 Parsons R. D., Hinton J. A., 2014, Astroparticle Physics, 56, 26 -Petroff E., Hessels J. W. T., Lorimer D. R., 2021, arXiv e-prints, p. +PetroffE., Hessels J. W. T., Lorimer D. R., 2021, arXiv e-prints, p. arXiv:2107.10113 Platts E., Weltman A., Walters A., Tendulkar S. P., Gordin J. E. B., Kandhai S., 2019, Phys. Rep., 821, 1 diff --git a/read/results/pymupdf/2201.00151.txt b/read/results/pymupdf/2201.00151.txt index 86ff84e..42faaeb 100644 --- a/read/results/pymupdf/2201.00151.txt +++ b/read/results/pymupdf/2201.00151.txt @@ -426,7 +426,7 @@ All the relevant properties of the galaxy are given in Table 1, including numbers of particles and total masses for both compo- nents, and details on the shape of the stellar component: the axis ratios minor to major (shortest to longest) c/a, intermediate to -major b/a, and the triaxiality parameter T = (a2 − b2)/(a2 − c2). +major b/a, and the triaxiality parameter T = (a2 −b2)/(a2 −c2). We distinguish between the half-mass radius provided in the Il- lustris database and the half-number radius r1/2, which we use for further calculations in this paper. The difference between the @@ -496,7 +496,7 @@ persion. Both populations show a weak rotation signal at large distances from the center. The velocity anisotropy parameter β(r) = -1 − (σ2 +1 −(σ2 θ + σ2 φ)/(2σ2 @@ -549,10 +549,10 @@ sity profile with the King formula (King 1962) I(R) = I0  1 -� +p 1 + (R/Rc)2 − 1 -� +p 1 + (Rt/Rc)2  2 @@ -673,11 +673,11 @@ where I0, Rc, and Rt are the model parameters. The profile can be analytically deprojected to obtain the 3D density ρ(r) = ρ0 z2 -�1 +"1 z arccos(z) − -� -1 − z2 -� +p +1 −z2 +# , (2) where @@ -687,7 +687,7 @@ I0 (3) and z = -� +s r2 + R2c R2c + R2 t @@ -704,10 +704,10 @@ We follow the approach introduced in Kowalczyk et al. (2018), namely we model the total mass profile with the mass-to-light ratio Υ varying with radius: log Υ(r) = -� +( log(Υ0) -r ≤ r0 -a(log r − log r0)c + log(Υ0) +r ≤r0 +a(log r −log r0)c + log(Υ0) r > r0 (5) Article number, page 5 of 12 @@ -778,8 +778,8 @@ where r is the distance from the center of the galaxy, r0 is a constant, while Υ0, a, and c are the parameters of a model. We have assumed log r0 = 0.33 which corresponds to three softening scales for stellar particles in the Illustris simulation. -We probed the parameter a ∈ [0 : 1.3] with a step ∆a = 0.04 -and c ∈ [1.1 : 2.9] with a step ∆c = 0.2, imposing the require- +We probed the parameter a ∈[0 : 1.3] with a step ∆a = 0.04 +and c ∈[1.1 : 2.9] with a step ∆c = 0.2, imposing the require- ment on the total density profile to be monotonically decreasing with radius. For each set of parameters and for each line of sight we generated 1200 orbits using 100 values of energy (expressed @@ -795,7 +795,7 @@ of Υ0 were obtained with a simple transformation of velocities given by Eq. 12, 13, and 15 in Kowalczyk et al. (2018). In or- der to smooth out the numerical artifacts, the three-dimensional χ2 spaces were then interpolated with 12-order polynomials -(∼ a4c4Υ4 +(∼a4c4Υ4 0) that were further used to determine the global min- imums (identified as the best-fitting models) and 1, 2, 3 σ con- fidence levels which for three parameters correspond to ∆χ2 = @@ -1266,7 +1266,7 @@ and 4th velocity moments (top to bottom) for the three data sam- ples: all stars, population I, and population II (in red, orange, and blue, respectively). The error bars indicate 1 σ sampling errors. The parameter space for Υ(r) has been probed as follows: -a ∈ [0 : 1.85] with a step ∆a = 0.05 and c ∈ [1.2 : 6] with a +a ∈[0 : 1.85] with a step ∆a = 0.05 and c ∈[1.2 : 6] with a step ∆c = 0.2. We point out that in Kowalczyk et al. (2019) the parameter c was fixed at c = 3 and now we fit it as a free pa- rameter. As for the mock data in Section 3.2, different values of @@ -1395,9 +1395,9 @@ panel) for the Fornax dSph. closed total mass at larger radii. In particular, for the mass en- closed within 1.8 kpc we get Mall(< 1.8 kpc) = 3.87+1.48 −1.56 × 108 -M⊙ from the fit for all stars and Mpops(< 1.8 kpc) = 4.71+0.87 +M⊙from the fit for all stars and Mpops(< 1.8 kpc) = 4.71+0.87 −1.13 × -108 M⊙ from the fit of populations, while previously we had +108 M⊙from the fit of populations, while previously we had Mold(< 1.8 kpc) = 3.7+1.4 −1.3 × 108 M⊙. Interestingly, despite the significant shift of the position of diff --git a/read/results/pymupdf/2201.00178.txt b/read/results/pymupdf/2201.00178.txt index 7b745f8..90b7e18 100644 --- a/read/results/pymupdf/2201.00178.txt +++ b/read/results/pymupdf/2201.00178.txt @@ -14,8 +14,8 @@ probe steady, near-surface flows in the Sun. Using Doppler cubes obtained from Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on mode-coupling measurements to show that the resulting divergence and radial vorticity maps at supergranular length scales (∼30 Mm) near the surface compare extremely well with those obtained using the Local Corre- -lation Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows, -while ≥ 0.8 is obtained for the radial vorticity. +lation Tracking method. We find that the Pearson correlation coefficient is ≥0.9 for divergence flows, +while ≥0.8 is obtained for the radial vorticity. Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662) 1. INTRODUCTION Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its effect @@ -73,8 +73,8 @@ complete derivation of the forward problem. Working in the plane-parallel atmosp denote the horizontal unit vectors ex and ey in our local Cartesian domain as pointing towards west and north on the solar surface, respectively, and ez points outwards. This approximation is valid when observing patches of the surface that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood -of the supergranular scale (∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the -horizontal wavenumber qR⊙ ≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(qx, qy)| is the vector horizontal +of the supergranular scale (∼30 Mm), we expect the measured spectral cross-correlation signal to peak around the +horizontal wavenumber qR⊙≈120 (Rincon & Rieutord 2018), where q = |q| = |(qx, qy)| is the vector horizontal wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, see Rincon & Rieutord 2018), permitting us to model the flow vector uuu = (ux, uy, uz) in the Cartesian domain like so (Unno et al. @@ -82,7 +82,7 @@ perturbation described in a horizontal Fourier domain. Supergranular velocities uσ = ∇×[∇×(P ez)] + ∇×(T ez), (1) where P = P σ(x) and T = T σ(x) are poloidal and toroidal scalar functions, varying with position x and temporal -frequency σ. ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying +frequency σ. ∇is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for example), here we only consider the frequency bin σ = 0, denoting the temporally averaged flow over the period of analysis. @@ -96,12 +96,12 @@ Fourier domain, and since we wish to image horizontal flows on a small patch of function of horizontal wavenumber q and depth zez. Hence the poloidal and toroidal flows are described by Pq(z) and Tq(z), respectively. Furthermore, we parametrize the flow along ez using basis functions f(z) (Chebyshev, B-spline, etc). This is expressed as -P ≡ Pq(z) = -� +P ≡Pq(z) = +X j fj(z) Pqj, -T ≡ Tq(z) = -� +T ≡Tq(z) = +X j fj(z) Tqj. (3) @@ -123,9 +123,9 @@ k+q thus to the flow coefficients Pqj and Tqj (see eq A7) ⟨φω∗ k φω -k+q⟩ = Hω +k+q⟩= Hω kk′nn′ -� +X j Cqj,kPqj + Dqj,kTqj. (4) @@ -146,13 +146,13 @@ k φω k+q (see Woodard 2006, 2014, 2016) results in the B-coefficients Bk,q, according to Bk,q = -� +P ω Hω∗ kk′nn′φω∗ k φω k+q -� +P ω |Hω kk′nn′|2 @@ -162,21 +162,21 @@ Multiplying eq 4 on both sides by Hω∗ kk′nn′ and substituting by eq 5 on the left-hand-side results in a concisely defined forward problem (compare with eq 4) Bk,q = -� +X j Cqj,kPqj + Dqj,kTqj. (6) In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over ω. Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ωnk, |ω| ∈ -� -ωnk − ϵΓnk/2, ωnk + ϵΓnk/2 -� + +ωnk −ϵΓnk/2, ωnk + ϵΓnk/2 + or |ω| ∈ -� -ωn′k′ − ϵΓn′k′/2, ωn′k′ + ϵΓn′k′/2 -� + +ωn′k′ −ϵΓn′k′/2, ωn′k′ + ϵΓn′k′/2 + . (7) Summing over ±ω guarantees that the parity Bk,q = B∗ @@ -186,7 +186,7 @@ Taking the complex conjugate on both sides of eq 6 and considering the negative −k, B∗ −k,−q = -� +X j C−qj,−kP ∗ −qj + D−qj,−kT ∗ @@ -208,11 +208,11 @@ and with finite lifetimes. This stochasticity leads to realization noise in rep Mani et al. Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p1 (orange) and p2 (green). The shaded regions of the same colours indicate 1-linewidth Γ about the mode frequency. The yellow shaded region indicates the range of -kR⊙ and ω/2π to which we have restricted ourselves in this analysis. Beyond kR⊙ of 2000, it is seen that the theoretical fitting +kR⊙and ω/2π to which we have restricted ourselves in this analysis. Beyond kR⊙of 2000, it is seen that the theoretical fitting of mode frequencies start deviating from the observed dispersion relation for the f-mode. such as its amplitude, frequency and linewidth, and consequently in Bk,q in our case. We use the same noise model as in H21, which was motivated by the above discussion, -Gk,q ≡ ⟨|Bk,q|2⟩, +Gk,q ≡⟨|Bk,q|2⟩, (9) where, unlike H21, we again sum over ±ω. Gk,q is real, with the symmetry relation Gk,q = G−k,−q (see Appendix A for explanation). @@ -234,23 +234,23 @@ Eq 6, while short enough that supergranules do not substantially evolve (lifetim from center-to-limb systematics (Zhao et al. 2012; Langfellner et al. 2015). Maximum signal can be extracted from the weighted summation of the cross correlations (eq 5) when the spectral profiles of the two modes [n, k] and [n′, k′] closely align in ω space. This implies that their mode frequencies should be -sufficiently close (|ωnk − ωn′k′| ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over +sufficiently close (|ωnk −ωn′k′| ≤δ, the separation parameter). Since Lorentzians decay rapidly, the summation over ±ω is significant only over a few linewidths (ϵ, the summation parameter; see eq 7). We have empirically found and -tabulated δ in Table 1 for the radial order couplings n-n′ ∈ f-f, p1-p1, and p2-p2 (the signal strength depends only +tabulated δ in Table 1 for the radial order couplings n-n′ ∈f-f, p1-p1, and p2-p2 (the signal strength depends only weakly on ϵ; we set it to 3 line widths). Figure 1 shows that for any two adjacent ridges (adjacent n and n′), mode frequencies ωnk and ωn′k become spaced farther apart with increasing wavenumber kR⊙. It is also known that mode linewidth Γ grows with radial orders for a given kR⊙. Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of -observation set the total number of modes within a range of kR⊙ (and ω/2π) that can be clearly observed, thereby +observation set the total number of modes within a range of kR⊙(and ω/2π) that can be clearly observed, thereby affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually -inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR⊙ at fixed -radial order are different. In wavenumber, we restrict our analysis to within 200 ≤ kR⊙ ≤ 2000 and qR⊙ ≤ 300. Our -frequency range is confined to span the range over which acoustic modes are observed (2 ≤ ω/2π ≤ 5 in mHz). +inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR⊙at fixed +radial order are different. In wavenumber, we restrict our analysis to within 200 ≤kR⊙≤2000 and qR⊙≤300. Our +frequency range is confined to span the range over which acoustic modes are observed (2 ≤ω/2π ≤5 in mHz). Imaging near-surface flows using mode-coupling analysis 5 Coupling -kR⊙ range +kR⊙range # of δ modes @@ -286,16 +286,16 @@ Bk,q from the linear relation in eq 6. We describe inversion using regularized-l leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods complement each other (see Sekii 1997), where RLS tries to minimize the misfit between data and model, whereas SOLA gives better localization. For total number of modes M, RLS scales as MxJ where J is the number of basis -functions fj(z) (J ≪ M; see eq 3 and section 3.1), whereas SOLA scales as M 2 (see Appendix B). For M > 5000, +functions fj(z) (J ≪M; see eq 3 and section 3.1), whereas SOLA scales as M 2 (see Appendix B). For M > 5000, computation starts to quickly become expensive for SOLA. Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is present even in p1-p1, and p2-p2 (see Figure 3), and possibly other higher order self- and cross-couplings. Since we are interested in only surface flows, we leave higher order coupling to future work. It bears mentioning that the slopes of the ridges in the kR⊙-ν spectrum (Figure 1) increase with radial order. This -limits us to low-to-intermediate kR⊙ (< 1000) for these higher radial orders if we are to remain under the acoustic cut- -off frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals -from low kR⊙ - too large an observation region could possibly render invalid the Cartesian geometry approximation. +limits us to low-to-intermediate kR⊙(< 1000) for these higher radial orders if we are to remain under the acoustic cut- +offfrequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals +from low kR⊙- too large an observation region could possibly render invalid the Cartesian geometry approximation. Regardless, in addition to performing inversions using all the couplings stacked together, we also demonstrate inversions separately for the three couplings (see Table 2) in order to account for the full gamut of mode-coupling as a signal-rich helioseismic technique. @@ -303,9 +303,9 @@ helioseismic technique. For given q, the forward problem may be stated as KU = B, (10) -with the aim to minimize the misfit � +with the aim to minimize the misfit P k -||KU − B||2, with || ||2 denoting the L2 norm. Here, K is the matrix formed +||KU −B||2, with || ||2 denoting the L2 norm. Here, K is the matrix formed by the sensitivity kernels: {Cqj,k, Dqj,k}. U is a vector composed of the flow coefficients: {Pqj, Tqj} and B is a vector composed of computed B-coefficients: {Bk,q}. The least-squares problem is solved simultaneously for poloidal and toroidal flow. We use B-spline basis functions as our fj(z), comprising 11 knots spaced uniformly in acoustic radius, @@ -320,28 +320,28 @@ U =(K⊺Λ−1K)−1K⊺Λ−1B. 6 Mani et al. -Figure 2. Left: Averaging kernel for poloidal flow (see section B.2, eq B17, and left panel of Figure 8) for qR⊙ = [−112, −45], -at the depth zo = −0.41 Mm. Right: L-curve for the mode qR⊙ = [−112, −45]; the knee (λ = 2.48) is marked by a blue +Figure 2. Left: Averaging kernel for poloidal flow (see section B.2, eq B17, and left panel of Figure 8) for qR⊙= [−112, −45], +at the depth zo = −0.41 Mm. Right: L-curve for the mode qR⊙= [−112, −45]; the knee (λ = 2.48) is marked by a blue diamond. -Since the least-squares problem is typically ill-posed, we restate the minimization as � +Since the least-squares problem is typically ill-posed, we restate the minimization as P k -||KU − B||2 + λ||U||2 with -the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution +||KU −B||2 + λ||U||2 with +the regularization parameter λ which this results in a trade-offbetween misfit reduction (first term) and solution norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this regularization makes the problem better conditioned and is now defined as U = (K⊺Λ−1K + λI)−1K⊺Λ−1B, (13) where I is the identity matrix for L1 regularization. The knee-point of the L-curve (Hansen 1992), a curve formed -by plotting ||U||2 vs ||KU − B||2 for different values of λ (see right panel of Figure 2), is usually chosen as the +by plotting ||U||2 vs ||KU −B||2 for different values of λ (see right panel of Figure 2), is usually chosen as the regularization parameter. After successfully inverting for U, we reconstruct the flow using eq 3. Results for poloidal flow Pq are shown in Figure 3. 4. LCT To improve confidence in the imaged near-surface flows through mode-coupling, we compare them with flows obtained from Local Correlation Tracking method (LCT; November & Simon 1988). LCT provides surface-flow maps by -examining the advection of convective granules (1.2 Mm, qR⊙ ≈ 3500; Hathaway et al. 2015) by underlying larger- -scale flow systems. Since granules are used as tracers, which are much smaller in size than supergranules (≈ 35 Mm), +examining the advection of convective granules (1.2 Mm, qR⊙≈3500; Hathaway et al. 2015) by underlying larger- +scale flow systems. Since granules are used as tracers, which are much smaller in size than supergranules (≈35 Mm), LCT is an effective method (see Rieutord et al. 2001) to produce surface horizontal flow maps of supergranulation. Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2 (tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are ob- @@ -353,9 +353,9 @@ A Gaussian of width sigma allows to isolate a small region surrounding the grid moved by granules are usually in sub-pixel regime. The convention for the direction of x is the same as described in section 1.1. The two patches I1, I2 are then cross correlated for different values of position shifts ∆x, Cij(∆x, ∆y) = -� +Z dx I∗ -1(−x)I2(∆x − x). +1(−x)I2(∆x −x). (14) The shift ∆x = (∆x, ∆y) that maximizes the cross-correlation Cij is taken to be the proper motion of the granule. Provided that the time difference ∆t, here 45 seconds, between the images is less than the lifetime of granules (< 10 @@ -366,7 +366,7 @@ requires the input sigma, which we set to 4 pix, that captures the extent of loc Imaging near-surface flows using mode-coupling analysis 7 -Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p1-p1, and p2-p2 as a function of qxR⊙ and +Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p1-p1, and p2-p2 as a function of qxR⊙and qyR⊙. Bottom: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the mean. Total power appears to increase through the radial orders. Power is in units of m2/s4. dominant length scale of the velocity field in the images. The Postel-projected intensity images are fed as input to the @@ -387,15 +387,15 @@ y = q2, div is given by, ∇h · uuu(q, z) = q2∂zP, (16) and curl is given by, -� -∇ × uuu(q, z) -� +h +∇× uuu(q, z) +i z = q2T. (17) We follow similar steps to those taken in Langfellner et al. (2015) for comparison of flow maps with LCT. The -essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR⊙ of +essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR⊙of interest (see Figure 4), and subsequently convert it to real space. -We seek to show comparisons (see Figures 5, 6, and 7) for qR⊙ = 100, 150, 200 and 250. To sufficiently delineate +We seek to show comparisons (see Figures 5, 6, and 7) for qR⊙= 100, 150, 200 and 250. To sufficiently delineate flows at these length scales, we apply a Gaussian filter (see Figure 4) to flows obtained from eqns 16 and 17. The Gaussian is centered at the desired wavenumber with a half-width of 25. We then perform a 2D Fourier transform to obtain a real-space steady-flow map. @@ -403,16 +403,16 @@ obtain a real-space steady-flow map. 8 Mani et al. Figure 4. Left: Divergence-flow power spectrum |div|2, from eqn 16, obtained from inversion using all the couplings. The -power-spectrum is then filtered with a bandpass centered around qR⊙ = 150 (middle panel). The resulting spectra is shown in +power-spectrum is then filtered with a bandpass centered around qR⊙= 150 (middle panel). The resulting spectra is shown in the right panel. The units of |div|2 are in s−2. For illustration, we show the action of the filter on the power-spectrum |div|2 since it is a real quantity, but recall that it is the Fourier-space flow div (a complex quantity) on which we apply the filter. For LCT, we first apply a Gaussian smoothing to vx and vy to average over small-scale features; the extent of -smoothing depends on the length scale qR⊙ to be compared with mode-coupling. +smoothing depends on the length scale qR⊙to be compared with mode-coupling. div and curl are then simply computed by div = ∂xvx + ∂yvy, (18) -curl = ∂xvy − ∂yvx. +curl = ∂xvy −∂yvx. (19) We then perform a 2D Fourier transform on eqns 18 and 19, apply the same Gaussian filters as for mode-coupling, and transform back to real space. @@ -421,10 +421,10 @@ performed for mode-coupling (M-C) and for LCT - M-C : φ(x, y; t) 3D FFT -=====⇒ φω +=====⇒φω k, Bk,q inversion -======⇒ P, T +======⇒P, T ∇h· ===⇒ ∇× @@ -436,7 +436,7 @@ div, curl LCT : I1, I2 FLCT -====⇒ vx, vy +====⇒vx, vy smooth, ======⇒ ∇h· ∇× @@ -448,11 +448,11 @@ Filtered, Fourier-space flows 2D FFT -=====⇒ div, curl +=====⇒div, curl 6. RESULTS Table 2 summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure 5, where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from -the two methods near supergranular scale (qR⊙ ≈ 100). Near-surface flows are imaged most faithfully when all the +the two methods near supergranular scale (qR⊙≈100). Near-surface flows are imaged most faithfully when all the couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence @@ -460,7 +460,7 @@ the vortical flows as inferred from the two methods, despite being an order of insufficient modes for the p2-p2 case (see Table 1), we are unable to infer vortical flows with conviction other than near the supergranular scale, as can be seen from Table 2. Figure 6 also aligns with what we believe can be accomplished through mode-coupling helioseismology - using f-f or p1-p1 alone to seismically infer near-surface divergence and vortical -flows at different scales (qR⊙ = 100, 150) can yield extremely good agreement with LCT. As the length scale of the +flows at different scales (qR⊙= 100, 150) can yield extremely good agreement with LCT. As the length scale of the inferred flow moves further away from that of supergranules (Figure 7), the demand on signal-to-noise also increases. An adequate number of modes (and coupling strength between higher radial-orders) thus becomes a necessity to comment substantively on the flows at these scales. @@ -468,10 +468,10 @@ comment substantively on the flows at these scales. Imaging near-surface flows using mode-coupling analysis 9 -(a) qR⊙ = 100, f-f + p1-p1 + p2-p2 +(a) qR⊙= 100, f-f + p1-p1 + p2-p2 Figure 5. Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1) for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass filtered around -qR⊙ = 100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges +qR⊙= 100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. The slopes of the best-fit line through the scatter plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps are saturated to show only 40% of the maximum values. @@ -494,17 +494,17 @@ and the regularization parameter to be used in the inversion. We then separately 10 Mani et al. -(a) qR⊙ = 100, f-f -(b) qR⊙ = 150, p1-p1 +(a) qR⊙= 100, f-f +(b) qR⊙= 150, p1-p1 Figure 6. Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1) for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around -qR⊙ = 100, and using (b) p1-p1 coupling (bottom row), bandpass filtered around qR⊙ = 150. We cut edges out from the flow +qR⊙= 100, and using (b) p1-p1 coupling (bottom row), bandpass filtered around qR⊙= 150. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. -(a) qR⊙ = 200, f-f + p1-p1 + p2-p2 -(b) qR⊙ = 250, f-f + p1-p1 + p2-p2 +(a) qR⊙= 200, f-f + p1-p1 + p2-p2 +(b) qR⊙= 250, f-f + p1-p1 + p2-p2 Figure 7. Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1) for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass filtered around -(a) qR⊙ = 200, and (b) qR⊙ = 250. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. +(a) qR⊙= 200, and (b) qR⊙= 250. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. vorticity maps for LCT for different values of smoothing. These flow maps are then compared with those obtained from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation (corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired @@ -594,40 +594,40 @@ As described in section 1.1, we seek to describe the flow u as a function of q eq 3 into eq 2, uσ q(z) = -� +X j -� + q2 fjez + iq f ′ j -� + P σ jq + iq×ez fjT σ jq. (A1) -For flows in the anelastic limit (u ≪ speed of sound), we can denote the flow perturbation operator as δLσ = -−2iωρuσ · ∇ (see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get, +For flows in the anelastic limit (u ≪speed of sound), we can denote the flow perturbation operator as δLσ = +−2iωρuσ · ∇(see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get, δLσ q = −2iω ρ (iuσ q · k + uσ q · ez∂z), (A2) -= −2iωρ � +=−2iωρ P j -� + −k · q f ′ jP σ -jq − k · (q×ez) fjT σ +jq −k · (q×ez) fjT σ jq + q2 fjP σ jq ∂z -� + . (A3) 12 Mani et al. Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006) -ξk ≡ ξnk(z) = iˆkHnk(z)ez + ˆzVnk(z), +ξk ≡ξnk(z) = iˆkHnk(z)ez + ˆzVnk(z), (A4) where H and V are real-valued functions; n and n′ are dropped for compactness of notation. Then the coupling of two modes ξk and ξk′ (k′ = k + q), by the flow perturbation operator δLσ @@ -636,17 +636,17 @@ k′(σ), is given by Λk k′(σ) ≡ -� +Z dx (δLσ qξk) · ξ∗ k′ = -� +Z dx -� -− 2iωρ -� +" +−2iωρ +X j -� +n q2 fjP σ jq (ˆk · ˆk ′ H′ @@ -655,24 +655,24 @@ k′ + V ′ kV ∗ k′) − -� + k · q f ′ jP σ jq + k · (q×ez) fjT σ jq -� + (ˆk · ˆk ′ HkH∗ k′ + VkV ∗ k′) -� � +o # (A5) We desire to linearly relate the coupling integral in the above equation to the flows P and T, through poloidal and toroidal sensitivity kernels, Cqj,k and Dqj,k respectively. Hence, they are given by Cqj,k = -� +Z dz ρ -� +h q2 fj (ˆk · ˆk ′ H′ kH∗ @@ -684,10 +684,10 @@ j (ˆk · ˆk ′ HkH∗ k′ + VkV ∗ k′) -� +i , Dqj,k = k · (q×ez) -� +Z dz ρ fj (ˆk · ˆk ′ HkH∗ k′ + VkV ∗ @@ -698,7 +698,7 @@ measurement between modes k and k + q From eq 8 of Woodard (2014), we write the wavefield cross-correlation as ⟨φω∗ k φω+σ -k+q ⟩ = Hω +k+q ⟩= Hω kk′σΛk k′(σ), (A7) @@ -718,7 +718,7 @@ Rω k = 1 ω2 -nk − ω2 − iωγnk/2, +nk −ω2 −iωγnk/2, (A9) where ωnk is the resonant frequency of the mode, and γnk is the mode linewidth. Eq A9 can be derived by introducing mode damping −iωγρ as an operator in the differential equation that governs undamped, driven oscillations (see eq @@ -732,13 +732,13 @@ are established. Mode normalization N is given by Nk = 1 Q Q -� +X k -� +P ω |φω k|2 -� +P ω Rω k @@ -748,7 +748,7 @@ where the 1 Q Q -� +P k on the right-hand-side implies average over all [kx, ky] (Q terms in all) such that k = |k| is constant. This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ωnk. @@ -769,16 +769,16 @@ Imaging near-surface flows using mode-coupling analysis B. SOLA INVERSIONS Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) aims to obtain a set of weight factors for the mode q and depth zo, which we will call αk,zo. A linear weighted sum of the measurements Bk,q in the fashion -� +P k αk,zoBk,q allows for an average value of the flow Pq(z) to be estimated at the depth zo. To obtain the coefficients αk,zo, it is assumed that a set of sensitivity kernels Kk,q(z) for the mode q can be summed up coherently to give an -’averaging kernel’ that is localized at the depth zo. Conventionally, a Gaussian centered at zo and a width ∆ is chosen +’averaging kernel’ that is localized at the depth zo. Conventionally, a Gaussian centered at zo and a width ∆is chosen which the averaging kernel should resemble after performing inversion. B.1. Kernels in the integral form Since the kernels in eq A6 are manifest as coefficients on a basis fj(z), we first derive kernels that can be expressed as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions: -P ≡ Pq(z), p ≡ Pqj, F ≡ fj(z), B ≡ Bk,q C ≡ Cqj,k and K ≡ Kk,q(z), we write (assume only poloidal flow for +P ≡Pq(z), p ≡Pqj, F ≡fj(z), B ≡Bk,q C ≡Cqj,k and K ≡Kk,q(z), we write (assume only poloidal flow for simplicity, the same derivations hold true for toroidal flow as well) P = Fp (B11) @@ -796,11 +796,11 @@ where K = (F T F)−1F T C, i.e., Kk,q(z) = -� +X j,j′ -� � +h Z dz fj(z)fj′(z) -�−1 +i−1 fj′(z)Cqj′,k (B14) B.2. Obtaining the coefficients α @@ -809,23 +809,23 @@ T (z, zo) = 1 √ 2π∆2 exp -�z − zo +z −zo 2∆2 -� + . (B15) This can be achieved by solving the optimization problem minimize X = -� +Z dz -� -T (z, zo) − Θq(z, zo) -�2 +h +T (z, zo) −Θq(z, zo) +i2 , (B16) where we introduce the averaging kernel for mode q thus Θq(z, zo) = -� +X k αk,zoKk,q(z). (B17) @@ -834,46 +834,46 @@ and B14. 14 Mani et al. -Figure 8. Left: Kernel Kk,q(z) (eq B14) shown vs depth z for the three radial order couplings f-f, p1-p1, and p2-p2. qR⊙ = -[−112, −45] and kR⊙ = [−853, −157] is chosen for all the radial order couplings for comparison. +Figure 8. Left: Kernel Kk,q(z) (eq B14) shown vs depth z for the three radial order couplings f-f, p1-p1, and p2-p2. qR⊙= +[−112, −45] and kR⊙= [−853, −157] is chosen for all the radial order couplings for comparison. Right: Averaging kernel -(eq B17) using SOLA, for qR⊙ = [−112, −45] at depth z0 = −0.48 Mm, and the corresponding target Gaussian (eq B15). +(eq B17) using SOLA, for qR⊙= [−112, −45] at depth z0 = −0.48 Mm, and the corresponding target Gaussian (eq B15). Integral of the averaging kernel over z is 0.89. Setting ∂X -∂α → 0 gives us the matrix problem to be solved +∂α →0 gives us the matrix problem to be solved A{α} = v, {α} = -� +h A + µI -�−1 +i−1 v, (B18) where the square matrix A = -� +R dz Kk,q(z)Kk′,q(z) and v = -� +R dz Kk,q(z)T (z, zo). Here, k′ is just a dummy index for denoting elements in the matrix A, (k′ ̸= k+q). In the last line of eq B18, we introduce regularization using an Identity matrix I, with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α -obtained from eq B18 into last line of eq B13, and � +obtained from eq B18 into last line of eq B13, and P k on both sides -� +X k αk,zoBσ k,q = -� +X k αk,zo -� +Z dz Kk,q(z)P σ q (z), = -� +Z dz Θq(z, zo)P σ q (z), -≈ ⟨P σ +≈⟨P σ q (zo)⟩ (B19) Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Di- @@ -901,10 +901,10 @@ doi: 10.1007/s41116-020-00028-3 Imaging near-surface flows using mode-coupling analysis 15 -Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of qxR⊙ and qyR⊙. Right: Corresponding power-spectrum -averaged over the azimuthal angle. Shaded region shows ±1 − σ error around the mean. Power is in units of m2/s4. +Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of qxR⊙and qyR⊙. Right: Corresponding power-spectrum +averaged over the azimuthal angle. Shaded region shows ±1 −σ error around the mean. Power is in units of m2/s4. Figure 10. Real-space divergence flows (in units of 10−5s−1) for mode-coupling inversion through SOLA using f-f coupling, -and LCT, bandpass filtered around qR⊙ = 100. We cut edges out from the flow maps and compare a circular region of diameter +and LCT, bandpass filtered around qR⊙= 100. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is 1.05. For demonstration, we show inversions only for poloidal flow using SOLA. De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh, diff --git a/read/results/pymupdf/2201.00200.txt b/read/results/pymupdf/2201.00200.txt index 34504e3..768dc6c 100644 --- a/read/results/pymupdf/2201.00200.txt +++ b/read/results/pymupdf/2201.00200.txt @@ -167,7 +167,7 @@ panel to a model with luminosity enhanced by a factor of ten. The dash-dotted red lines show ∆T/T0 (in %), the relative dif- ference between the time and space averages of the temperature, T, and the initial temperature, T0. The solid blue lines show the -time and space averages of the sub-adiabaticity (∇ − ∇ad). The +time and space averages of the sub-adiabaticity (∇−∇ad). The dashed black lines show the initial profile of the sub-adiabaticity, (∇−∇ad)init. The convective boundary is indicated by the vertical solid line (see details in B21) @@ -178,7 +178,7 @@ boundary found in the simulations of B21 is illustrated in Fig. cial enhancement in the luminosity by a factor of ten because the features are intensified in these ‘boosted’ models (upper panel). The figure shows the local heating in the overshooting layer and -its impact on the sub-adiabaticity (∇ − ∇ad), with ∇ = d log T +its impact on the sub-adiabaticity (∇−∇ad), with ∇= d log T d log P the 2 @@ -212,10 +212,10 @@ defined by A = 1 Γ1 d ln P -d ln r − d ln ρ +d ln r −d ln ρ d ln r , (1) -with Γ1 = (∂ ln P/∂ ln ρ)ad. Starting from a reference evolu- +with Γ1 = (∂ln P/∂ln ρ)ad. Starting from a reference evolu- tionary model, Buldgen et al. (2020) used an inversion pro- cedure to iteratively reconstruct a solar model. Successive in- versions of the Ledoux discriminant allowed them to obtain a @@ -243,22 +243,22 @@ the quantity (ASun - Aref). The second concerns the speed of sound. The same positive bump at the same location as for the Ledoux discriminant, A, is observed for the quantity (c2 -s,Sun − c2 +s,Sun −c2 s,ref)/c2 s,ref. The corrections applied to A during the reconstruction procedure also reduce the discrepancy in the speed of sound in the radiative region. The third concerns the entropy. Large discrepancies are ob- served in both the radiative region and the convective zone. The -1 Less sub-adiabatic means that |∇ − ∇ad| decreases compared to the +1 Less sub-adiabatic means that |∇−∇ad| decreases compared to the initial profile. -entropy discrepancy (S Sun − S ref)/S ref has two positive peaks in +entropy discrepancy (S Sun −S ref)/S ref has two positive peaks in the radiative zone, one just below the overshooting region and a -larger peak deeper at ∼ 40% of the stellar radius. This discrep- +larger peak deeper at ∼40% of the stellar radius. This discrep- ancy is negative in the convective zone. The corrections applied to A help reduce these entropy discrepancies in both regions. The fourth concerns the density. The quantity (ρSun − -ρref)/ρref has a negative peak in the radiative region, at ∼ 35% +ρref)/ρref has a negative peak in the radiative region, at ∼35% of the stellar radius, and is positive in the convective zone. Importantly, Buldgen et al. (2020) mention that their recon- struction procedure gives similar Ledoux discriminant profiles @@ -305,23 +305,23 @@ ature gradient in the overshooting layer that qualitatively repro- duces the behaviour displayed in Fig. 1. We define an overshoot- ing length dov = αovHP,CB, with HP,CB the pressure scale height at the convective boundary and αov a free parameter. We also de- -fine two radial locations, rov = rCB − dov and rmid = rCB − dov/2, +fine two radial locations, rov = rCB −dov and rmid = rCB −dov/2, with rCB the radial location of the convective boundary. The tem- -perature gradient is modified as follows. For rmid ≤ r < rCB, we +perature gradient is modified as follows. For rmid ≤r < rCB, we use -∇ = g(r)∇ad + (1 − g(r))∇rad, +∇= g(r)∇ad + (1 −g(r))∇rad, (2) with -g(r) = sin{[(r − rmid)/(rCB − rmid)]a × π/2}. +g(r) = sin{[(r −rmid)/(rCB −rmid)]a × π/2}. (3) 3 Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem -For rov ≤ r < rmid, we use -∇ = ∇rad − h(r)∇ad, +For rov ≤r < rmid, we use +∇= ∇rad −h(r)∇ad, (4) with -h(r) = b × sin{[(rmid − r)/(rmid − rov)] × π}. +h(r) = b × sin{[(rmid −r)/(rmid −rov)] × π}. (5) Sine functions are used in Eqs. (3) and (5) to reproduce the smooth variations in the temperature gradient below the convec- @@ -337,7 +337,7 @@ results, but we note that the results are insensitive to the value of b. 3.2.1. Thermal equilibrium models The details of the procedure for the first method are the follow- -ing. We calculate the evolution of a 1 M⊙ model with an initial +ing. We calculate the evolution of a 1 M⊙model with an initial helium mass fraction of 0.28, metallicity Z = 0.02, and a mix- ing length lmix = 1.9HP. We use a reference model that is in thermal equilibrium2 and has the luminosity and radius of the diff --git a/read/results/pymupdf/2201.00201.txt b/read/results/pymupdf/2201.00201.txt index 31d8868..88a2f93 100644 --- a/read/results/pymupdf/2201.00201.txt +++ b/read/results/pymupdf/2201.00201.txt @@ -50,7 +50,7 @@ rill 1942), and showed that the shorter periods are also accom- panied by a higher velocity dispersion. Furthermore, groups of LPVs with relatively short periods are characterized by a greater scale height above the Galactic plane. This was shown, using for -⋆ Corresponding +⋆Corresponding author: M. Trabucchi @@ -126,9 +126,9 @@ We employed PARSEC-COLIBRI isochrones (Marigo et al. sion 1.2S) for the preceding evolution. The adopted set of isochrones covers the range 0.001 to 0.016 in initial metal- licity (Zi), with a 0.001 step, while it spans the age interval -8.00 ≤ log(τ/yr) ≤ 10.45 with a step of 0.05. Since the AGB +8.00 ≤log(τ/yr) ≤10.45 with a step of 0.05. Since the AGB phase is short-lived, it only spans a small range of initial masses -for each given isochrone, of order of 10−2 M⊙ at most. +for each given isochrone, of order of 10−2 M⊙at most. The adopted isochrones include linear pulsation periods from Trabucchi et al. (2019) for overtone modes and nonlinear periods computed with the period-mass-radius relation from Trabucchi @@ -183,7 +183,7 @@ Kharchenko et al. (2016) and Baumgardt et al. (2013) for clusters in the Galaxy and LMC, respectively, thereby ensuring that ages would be homogeneously derived for clusters in both galaxies. Age uncertainties from Baumgardt et al. (2013), provided for -each cluster, are generally around σlog(τ) ≃ 0.05. Kharchenko +each cluster, are generally around σlog(τ) ≃0.05. Kharchenko et al. (2016) do not provide age uncertainties, but a reasonable upper limit for their method should be σlog(τ) = 0.2 based on the analysis of Kharchenko et al. (2005) (the same value was @@ -191,7 +191,7 @@ adopted by Grady et al. 2019, in their Fig. 7). As discussed by Kamath et al. (2010), the age of the SMC cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is consistent with the value τ = 1.45 ± 0.05 Gyr from Goudfrooij -et al. (2014), while it is as young as τ ≃ 0.89 ± 0.015 Gyr ac- +et al. (2014), while it is as young as τ ≃0.89 ± 0.015 Gyr ac- cording to Perren et al. (2017). Since an accurate estimate is not necessary for our exploratory analysis, we took a rough average and assumed log(τ/yr) = 9.1 ± 0.1. NGC 419 and NGC 1846 @@ -207,8 +207,8 @@ their clusters we adopted the distance moduli µLMC = 18.49 ± (2017). We searched for data on interstellar extinction from sev- eral literature works (e.g., Nayak et al. 2016; Kharchenko et al. 2016; Perren et al. 2017), all of which suggest that extinction -in the Ks filter is smaller than ∼ 0.1 mag for most of the clus- -ters we considered, and at most as large as ∼ 0.3 mag, which is +in the Ks filter is smaller than ∼0.1 mag for most of the clus- +ters we considered, and at most as large as ∼0.3 mag, which is negligible for our purposes. Article number, page 2 of 9 @@ -247,14 +247,14 @@ riod of LPVs pulsating in the FM decreases with increasing age. Crosses mark the average properties of the three groups of C- rich LPVs from Feast et al. (2006, their table 4), which fit the general pattern with the exception of their group 3, estimated to -be older than what our models predict at P ≃ 650. +be older than what our models predict at P ≃650. We also show a linear best-fit to the models distribution (weighted by NFM), which shows a fairly good agreement with the best-fit to observations by Grady et al. (2019, also shown). However, the best-fit line does not fully capture the properties of the predictions, nor of the observed trend. Indeed, models are indicative of a substantial dispersion around the relation. For in- -stance, at 1 Gyr, the FM period ranges from ∼ 200 days to ∼ 550 +stance, at 1 Gyr, the FM period ranges from ∼200 days to ∼550 days. Conversely, LPVs pulsating in the FM with a period of 350 days are predicted to be at least ∼200 Myr old, but they can be as old as ∼3 Gyr. Observed data are consistent with the predicted @@ -284,7 +284,7 @@ predictions and observations. We note that in both cases, the dis- tribution is skewed toward short periods, which seems to be true at all ages for O-rich stars. This can be seen in panel (a) of Fig. 2, which is a version of the PA plane limited to an O-rich compo- -sition2. Indeed, although at τ ≲ 5 Gyr the observed sample is +sition2. Indeed, although at τ ≲5 Gyr the observed sample is very scarce, it appears to be consistent with models predicting a more densely populated region in the shorter-period half of the PA distribution. @@ -301,7 +301,7 @@ higher masses, so that younger C-rich models are more concen- trated at longer periods, leading to a steeper PA relation com- pared with the O-rich case. These predictions agree with ob- servations on the old side of the period distribution, while the -scarcity of C stars at τ ≃ 0.6 Gyr prevents us from performing a +scarcity of C stars at τ ≃0.6 Gyr prevents us from performing a comparison at younger ages. In appendix B, we provide analytic PA relations by fitting the high-density parts of the O- and C-rich models’ distribution. We @@ -343,7 +343,7 @@ a linear scale, normalized to maximum). Symbols represent observed LPVs (green: indicating their host cluster or literature source as indicated in the legend. The age uncertainties are marked by the error bars. The groups of galactic C-stars of Feast et al. (2006) are marked by crosses annotated with the group number. The solid and dotted line represent a linear best-fit to models and the best-fit by Grady et al. (2019), respectively. Period distributions at selected ages are compared in panels (b) and (c) and marked -in panel (a) by the blue and red shaded areas (at log(τ/yr) ∼ 9.15 and ∼ 10.10, respectively). For clarity, the effect of the TP-AGB boosting is +in panel (a) by the blue and red shaded areas (at log(τ/yr) ∼9.15 and ∼10.10, respectively). For clarity, the effect of the TP-AGB boosting is suppressed in panel (a). Fig. 2. Similar to Fig. 1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while dashed lines are best fits to the edges of the model distribution (see the text for more details). @@ -370,7 +370,7 @@ warmer and have smaller radii compared with metal-rich ones. As a consequence, the bulk of the period distribution of metal- poor LPVs is at periods shorter than Pb, so they only contribute to the global distribution (i.e., at all Zi at a given age) over a -small period range at P ≳ Pb. In contrast, metal-rich LPVs have +small period range at P ≳Pb. In contrast, metal-rich LPVs have periods well beyond Pb, so they contribute both at that value and at longer periods. The result is an excess of FM-dominated LPVs near Pb, that is to say on the short side of the overall period dis- @@ -684,7 +684,7 @@ Three of the sources without a spectral type lack Gaia pho- tometry, so they cannot be classified with the Gaia-2MASS. Two of them (LW5 and LW22 in 47 Tuc) have no match in Gaia EDR3, but they have NIR data and are probably O-rich based on -their position in the J − Ks versus Ks color-magnitude diagram. +their position in the J −Ks versus Ks color-magnitude diagram. The third source is one of the two stars in NGC 1903 from the list of Grady et al. (2019), which we identified with the 2MASS source J05171633-6920298. It is likely C-rich according to the @@ -745,7 +745,7 @@ log(τ/yr) = a0 + a1 (P/ ˜P) + a2 (P/ ˜P)2 , (where ˜P = 350 days) and employed a Lenvenberg-Marquardt nonlinear regression algorithm3 to derive the best-fit coefficients, which are listed in Table B.1. We remark that these best-fit ex- -pressions are only valid in the intervals 8.0 ≤ log(τ/yr) ≤ 10.3 +pressions are only valid in the intervals 8.0 ≤log(τ/yr) ≤10.3 and 20 < P/days < 700 for O-rich composition, and within 3 We made use of the Python library SciPy to perform Gaussian KDE modeling and best-fit, respectively, by means of the gaussian_kde @@ -786,7 +786,7 @@ upper edge 8.498 -1.827 -0.9959 -8.6 ≤ log(τ/yr) ≤ 9.3 and 140 < P/days < 620 in the C-rich +8.6 ≤log(τ/yr) ≤9.3 and 140 < P/days < 620 in the C-rich case. Because of the connection between age and initial mass, the PA relation can be translated into a period-initial mass relation, @@ -803,7 +803,7 @@ namely the star-formation history and age-metallicity relation. Appendix C: The shape of the period distribution As an example case, we consider an isochrone of age log(τ/yr) = 8.3 and initial metallicity Zi = 0.006. Stars on the TP-AGB have -initial masses Mi ≃ 3.85 M⊙ over a small range of ∼ 10−3 M⊙. +initial masses Mi ≃3.85 M⊙over a small range of ∼10−3 M⊙. The relation between period and initial mass is displayed in panel (a) of Fig. C.1, where isochrone portions undergoing Table B.2. Best-fit coefficients for the period-initial mass relation and @@ -842,11 +842,11 @@ distributions for a few different cases. It is instructive, to begin with, to ignore the effect of thermal pulses and consider only the quiescent evolution (green lines in Fig. C.1). The smallest initial mass corresponds to a star that just -entered the TP-AGB, when the FM has a period of ∼ 240 days +entered the TP-AGB, when the FM has a period of ∼240 days but is not dominant. It only becomes dominant above a threshold radius Rdom,0, that is for periods longer than a (mass-dependent) critical period Pdom,0 (the solid gray line in Fig. C.1). The least -evolved (quiescent) model with dominant FM has PFM ≃ 360 +evolved (quiescent) model with dominant FM has PFM ≃360 days (green circle and horizontal line), corresponding to a sharp cut in the period distribution shown in panel (b) of Fig. C.1. As a star evolves along the AGB it expands, and its period be- @@ -855,8 +855,8 @@ a higher initial mass are more evolved, hence they have a larger radius and a longer period. The rate at which a period increases with radius is not fixed, but rather decreases with evolution. Ac- cording to the prescription of Trabucchi et al. (2021b), a period -grows with radius as a broken power-law with exponent α ≃ 1.8 -if R < Rb, and with α ≃ 1.25 at larger radii. +grows with radius as a broken power-law with exponent α ≃1.8 +if R < Rb, and with α ≃1.25 at larger radii. This is equivalent to saying that the period grows more slowly after it exceeds a critical value Pb = P(Rb), marked by the gray dotted line in Fig. C.1. The isochrone reaches it at @@ -867,7 +867,7 @@ Fig. B.1. Similar to Fig. 2, but showing initial mass Mi in place of age. The be PFM – Mi relation are shown. Fig. C.1. Period distribution at fixed age and metallicity. Panel (a) shows period as a function of initial mass (current mass on the top axis) on the -TP-AGB for a ∼ 200 Myr old isochrone with Zi = 0.006. Red lines +TP-AGB for a ∼200 Myr old isochrone with Zi = 0.006. Red lines show full thermal pulses, while blue lines ignore luminosity spikes and green lines show only the quiescent evolution. The same color code is used for the period distributions (normalized to their maximum) on @@ -878,7 +878,7 @@ Gray lines mark the critical values of periods at which the FM becomes dominant (solid line), less sensitive to radius (dotted line, which occurs at the vertical line for this specific isochrone), and independent of radius (dashed line). -Mi ≃ 3.8524 M⊙ (vertical gray line), when PFM ≃ 420 days. In +Mi ≃3.8524 M⊙(vertical gray line), when PFM ≃420 days. In models with a smaller initial mass, the period is still increasing at a relatively large rate as the envelope expands, while in more massive models the period has already become less sensitive to @@ -890,7 +890,7 @@ this maximum, while limiting the selection to DFMP, produces a distribution skewed toward short periods, as found in Sect. 3. If the luminosity dips following thermal pulses are taken into account (blue lines), the corresponding envelope contrac- -tion causes the period to decrease, and the cut at ∼ 360 days +tion causes the period to decrease, and the cut at ∼360 days becomes less sharp. Because of mass loss, the threshold period Pdom,0 is lowered, so that the shortest period associated with DFMP does not correspond to the least evolved model (green diff --git a/read/results/pymupdf/2201.00214.txt b/read/results/pymupdf/2201.00214.txt index 24bbe18..956a7d5 100644 --- a/read/results/pymupdf/2201.00214.txt +++ b/read/results/pymupdf/2201.00214.txt @@ -129,7 +129,7 @@ has ten different wavelength channels, three in white light and UV, and the othe channels. Between these seven, the 304 filter, which is mostly sensitive to chromospheric temper- atures (in order of T = 104.7K), not the corona, is not taken into account (Aschwanden et al. 2015). Therefore, we consider the images of the events in the six wavelengths (94, 131, 171, 193, 211, 335 -). These are covering the coronal temperature range from T ≈ 0.6 to T ≥ 16MK. +). These are covering the coronal temperature range from T ≈0.6 to T ≥16MK. The two below data sets are finally selected to study thermal variations and coronal loops oscillations in flaring or non-flaring active regions. A few distinct loops are visible in the regions. Finally, these loops are chosen: @@ -155,7 +155,7 @@ at (230, 165) arcsec and its width and height are 450 ′′ × 456 ′′ /750 × 775 pixels. The flare occurring in this active region is an X2.1 class flare located close to the disk center at latitude -14◦ north and longitude 18◦ west (269.9 arcsec, 129.9 arcsec). This flare initiates at 22:12UT, +14◦north and longitude 18◦west (269.9 arcsec, 129.9 arcsec). This flare initiates at 22:12UT, ends about 22:24UT with the peak at 22:20UT, and associates with a coronal mass ejection (CME) which occurs from 2011 September 6, 21:36:05T to 2011 September 7, 02:24:05T, with the radial velocity of 469 km/s,angular width of 252 deg, and position angle of 275 deg (for @@ -207,7 +207,7 @@ in terms of the logarithm of the temperature, which has three free parameters (A Boerner, 2011): DEMi = dEMi dT -= EMp,i exp (−[log (T) − log (Tp,i) += EMp,i exp (−[log (T) −log (Tp,i) 2σ2 T,i ). @@ -216,7 +216,7 @@ In which, Tp,i is the DEM peak temperature, EMp,i is the peak EM function, and logarithmic width of the temperature for that strip. To calculate the background-subtracted fluxes (for each strip) we use Eq.6 of Aschwanden & Boerner (2011) (in below): F0λ = -� dEM(T) +Z dEM(T) dT Rλ(T)dT = ∑ k diff --git a/read/results/pymupdf/GeoTopo-book.txt b/read/results/pymupdf/GeoTopo-book.txt index 5e9a446..ca23581 100644 --- a/read/results/pymupdf/GeoTopo-book.txt +++ b/read/results/pymupdf/GeoTopo-book.txt @@ -33,7 +33,7 @@ Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werd in „Analysis I“ vermittelt. Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit, der Spektralsatz und der projektive Raum P(R) aus „Lineare Algebra I“ bekannt sind. In „Lineare -Algebra II“ wird der Begriff der Orthonormalbasis eingeführt. +Algebra II“ wird der Begriffder Orthonormalbasis eingeführt. iii (a) S2 @@ -159,105 +159,105 @@ Stichwortverzeichnis 1 Topologische Grundbegriffe 1.1 Topologische Räume Definition 1 -Ein topologischer Raum ist ein Paar (X, T) bestehend aus einer Menge X und T ⊆ P(X) +Ein topologischer Raum ist ein Paar (X, T) bestehend aus einer Menge X und T ⊆P(X) mit folgenden Eigenschaften -(i) ∅, X ∈ T -(ii) Sind U1, U2 ∈ T, so ist U1 ∩ U2 ∈ T -(iii) Ist I eine Menge und Ui ∈ T für jedes i ∈ I, so ist -� +(i) ∅, X ∈T +(ii) Sind U1, U2 ∈T, so ist U1 ∩U2 ∈T +(iii) Ist I eine Menge und Ui ∈T für jedes i ∈I, so ist +[ i∈I -Ui ∈ T +Ui ∈T Die Elemente von T heißen offene Teilmengen von X. -A ⊆ X heißt abgeschlossen, wenn X \ A offen ist. +A ⊆X heißt abgeschlossen, wenn X \ A offen ist. Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0, 1). Auch gibt es Mengen, die sowohl abgeschlossen als auch offen sind. Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.) -Betrachte ∅ und X mit der trivialen Topologie Ttriv = { ∅, X }. -Es gilt: X ∈ T und ∅ ∈ T, d. h. X und ∅ sind offen. Außerdem XC = X \ X = ∅ ∈ T und -X \ ∅ = X ∈ T, d. h. X und ∅ sind als Komplement offener Mengen abgeschlossen. +Betrachte ∅und X mit der trivialen Topologie Ttriv = { ∅, X }. +Es gilt: X ∈T und ∅∈T, d. h. X und ∅sind offen. Außerdem XC = X \ X = ∅∈T und +X \ ∅= X ∈T, d. h. X und ∅sind als Komplement offener Mengen abgeschlossen. ■ Beispiel 1 (Topologien) 1) X = Rn mit der von der euklidischen Metrik erzeugten Topologie TEuklid: -U ⊆ Rn offen ⇔ für jedes x ∈ U gibt es r > 0, -sodass Br(x) = { y ∈ Rn | d(x, y) < r } ⊆ U +U ⊆Rn offen ⇔für jedes x ∈U gibt es r > 0, +sodass Br(x) = { y ∈Rn | d(x, y) < r } ⊆U Diese Topologie wird auch „Standardtopologie des Rn“ genannt. Sie beinhaltet unter anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedli- chem Mittelpunkt (vgl. Definition 1.ii). 2) Jeder metrische Raum (X, d) ist auch ein topologischer Raum. 3) Für eine Menge X heißt TDiskret = P(X) diskrete Topologie. -4) X := R, TZ := { U ⊆ R | R \ U endlich } ∪ { ∅ } heißt Zariski-Topologie +4) X := R, TZ := { U ⊆R | R \ U endlich } ∪{ ∅} heißt Zariski-Topologie Beobachtungen: -• U ∈ TZ ⇔ ∃f ∈ R[X], sodass R \ U = V (f) = { x ∈ R | f(x) = 0 } +• U ∈TZ ⇔∃f ∈R[X], sodass R \ U = V (f) = { x ∈R | f(x) = 0 } • Es gibt keine disjunkten offenen Mengen in TZ. 4 1.1. TOPOLOGISCHE RÄUME -5) X := Rn, TZ = {U ⊆ Rn|Es gibt Polynome f1, . . . , fr ∈ R[X1, . . . , Xn] sodass +5) X := Rn, TZ = {U ⊆Rn|Es gibt Polynome f1, . . . , fr ∈R[X1, . . . , Xn] sodass Rn \ U = V (f1, . . . , fr)} 6) X := { 0, 1 } , T = { ∅, { 0, 1 } , { 0 } } heißt Sierpińskiraum. ∅, { 0, 1 } , { 1 } sind dort alle abgeschlossenen Mengen. Definition 2 -Sei (X, T) ein topologischer Raum und x ∈ X. -Eine Teilmenge U ⊆ X heißt Umgebung von x, wenn es ein U0 ∈ T gibt mit x ∈ U0 und -U0 ⊆ U. +Sei (X, T) ein topologischer Raum und x ∈X. +Eine Teilmenge U ⊆X heißt Umgebung von x, wenn es ein U0 ∈T gibt mit x ∈U0 und +U0 ⊆U. Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokal gilt. Definition 3 -Sei (X, T) ein topologischer Raum und M ⊆ X eine Teilmenge. -a) M◦ := { x ∈ M | M ist Umgebung von x } = -� +Sei (X, T) ein topologischer Raum und M ⊆X eine Teilmenge. +a) M◦:= { x ∈M | M ist Umgebung von x } = +[ U⊆M U∈T U heißt Inneres oder offener Kern von M. b) M := -� +\ M⊆A A abgeschlossen A heißt abgeschlossene Hülle oder Abschluss von M. -c) ∂M := M \ M◦ heißt Rand von M. +c) ∂M := M \ M◦heißt Rand von M. d) M heißt dicht in X, wenn M = X ist. Beispiel 2 -1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M◦ = ∅ +1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M◦= ∅ 2) Sei X = R und M = (a, b). Dann gilt: M = [a, b] 3) Sei X = R, T = TZ und M = (a, b). Dann gilt: M = R Definition 4 Sei (X, T) ein topologischer Raum. -a) B ⊆ T heißt Basis der Topologie T, wenn jedes U ∈ T Vereinigung von Elementen +a) B ⊆T heißt Basis der Topologie T, wenn jedes U ∈T Vereinigung von Elementen aus B ist. -b) S ⊆ T heißt Subbasis der Topologie T, wenn jedes U ∈ T Vereinigung von endlichen +b) S ⊆T heißt Subbasis der Topologie T, wenn jedes U ∈T Vereinigung von endlichen Durchschnitten von Elementen aus S ist. Beispiel 3 (Basis und Subbasis) 1) Jede Basis ist auch eine Subbasis, z.B. -S = { (a, b) | a, b ∈ R, a < b } ist für R mit der Standardtopologie sowohl Basis als +S = { (a, b) | a, b ∈R, a < b } ist für R mit der Standardtopologie sowohl Basis als auch Subbasis. 2) Gegeben sei X = Rn mit euklidischer Topologie T. Dann ist -B = { Br(x) | r ∈ Q>0, x ∈ Qn } +B = { Br(x) | r ∈Q>0, x ∈Qn } ist eine abzählbare Basis von T. 3) Sei (X, T) ein topologischer Raum mit X = { 0, 1, 2 } und T = { ∅, { 0 } , { 0, 1 } , { 0, 2 } , X }. Dann ist S = { ∅, { 0, 1 } , { 0, 2 } } eine Subbasis von T, da gilt: 5 1.1. TOPOLOGISCHE RÄUME -• S ⊆ T -• ∅, { 0, 1 } und { 0, 2 } ∈ S -• { 0 } = { 0, 1 } ∩ { 0, 2 } -• X = { 0, 1 } ∪ { 0, 2 } +• S ⊆T +• ∅, { 0, 1 } und { 0, 2 } ∈S +• { 0 } = { 0, 1 } ∩{ 0, 2 } +• X = { 0, 1 } ∪{ 0, 2 } Allerings ist S keine Basis von (X, T), da { 0 } nicht als Vereinigung von Elementen aus S erzeugt werden kann. Bemerkung 2 -Sei X eine Menge und S ⊆ P(X). Dann gibt es genau eine Topologie T auf X, für die S +Sei X eine Menge und S ⊆P(X). Dann gibt es genau eine Topologie T auf X, für die S Subbasis ist. Definition 5 -Sei (X, T) ein topologischer Raum und Y ⊆ X. -TY := { U ∩ Y | U ∈ T } ist eine Topologie auf Y . +Sei (X, T) ein topologischer Raum und Y ⊆X. +TY := { U ∩Y | U ∈T } ist eine Topologie auf Y . TY heißt Teilraumtopologie und (Y, TY ) heißt ein Teilraum von (X, T). Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt. Definition 6 Seien X1, X2 topologische Räume. -U ⊆ X1 × X2 sei offen, wenn es zu jedem x = (x1, x2) ∈ U Umgebungen Ui um xi mit -i = 1, 2 gibt, sodass U1 × U2 ⊆ U gilt. -T = { U ⊆ X1 × X2 | U offen } ist eine Topologie auf X1×X2. Sie heißt Produkttopologie. +U ⊆X1 × X2 sei offen, wenn es zu jedem x = (x1, x2) ∈U Umgebungen Ui um xi mit +i = 1, 2 gibt, sodass U1 × U2 ⊆U gilt. +T = { U ⊆X1 × X2 | U offen } ist eine Topologie auf X1×X2. Sie heißt Produkttopologie. B = { U1 × U2 | Ui offen in Xi, i = 1, 2 } ist eine Basis von T. U x @@ -267,10 +267,10 @@ U2 U1 X1 X2 -Abbildung 1.1: Zu x = (x1, x2) gibt es Umgebungen U1, U2 mit U1 × U2 ⊆ U +Abbildung 1.1: Zu x = (x1, x2) gibt es Umgebungen U1, U2 mit U1 × U2 ⊆U Beispiel 4 (Produkttopologien) 1) X1 = X2 = R mit euklidischer Topologie. -⇒ Die Produkttopologie auf R × R = R2 stimmt mit der euklidischen Topologie auf +⇒Die Produkttopologie auf R × R = R2 stimmt mit der euklidischen Topologie auf R2 überein. 2) X1 = X2 = R mit Zariski-Topologie. T Produkttopologie auf R2: U1 × U2 (Siehe Abbildung 1.2) @@ -281,17 +281,17 @@ U1 = R \ N U2 = R \ N Abbildung 1.2: Zariski-Topologie auf R2 Definition 7 -Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ sei die Menge -der Äquivalenzklassen, π : X → X, -x �→ [x]∼. +Sei X ein topologischer Raum, ∼eine Äquivalenzrelation auf X, X = X/∼sei die Menge +der Äquivalenzklassen, π : X →X, +x 7→[x]∼. TX := -� -U ⊆ X -�� π−1(U) ∈ TX -� + +U ⊆X + π−1(U) ∈TX + (X, TX) heißt Quotiententopologie. Beispiel 5 -X = R, a ∼ b :⇔ a − b ∈ Z +X = R, a ∼b :⇔a −b ∈Z R -1 0 @@ -305,13 +305,13 @@ a U a π−1(u) -0 ∼ 1, d. h. [0] = [1] +0 ∼1, d. h. [0] = [1] Beispiel 6 -Sei X = R2 und (x1, y1) ∼ (x2, y2) ⇔ x1 − x2 ∈ Z und y1 − y2 ∈ Z. Dann ist X/∼ ein Torus. +Sei X = R2 und (x1, y1) ∼(x2, y2) ⇔x1 −x2 ∈Z und y1 −y2 ∈Z. Dann ist X/∼ein Torus. Beispiel 7 (Projektiver Raum) X = Rn+1 \ { 0 } , -x ∼ y ⇔ ∃λ ∈ R× mit y = λx -⇔ x und y liegen auf der gleichen +x ∼y ⇔∃λ ∈R× mit y = λx +⇔x und y liegen auf der gleichen Ursprungsgerade X = Pn(R) @@ -330,35 +330,35 @@ Also für n = 1: 4 1.2 Metrische Räume Definition 8 -Sei X eine Menge. Eine Abbildung d : X × X → R+ +Sei X eine Menge. Eine Abbildung d : X × X →R+ 0 heißt Metrik, wenn gilt: (i) Definitheit: -d(x, y) = 0 ⇔ x = y -∀x, y ∈ X +d(x, y) = 0 ⇔x = y +∀x, y ∈X (ii) Symmetrie: d(x, y) = d(y, x) -∀x, y ∈ X +∀x, y ∈X (iii) Dreiecksungleichung: -d(x, z) ≤ d(x, y) + d(y, z) -∀x, y, z ∈ X +d(x, z) ≤d(x, y) + d(y, z) +∀x, y, z ∈X Das Paar (X, d) heißt ein metrischer Raum. Bemerkung 3 Sei (X, d) ein metrischer Raum und -Br(x) := { y ∈ X | d(x, y) < r } für x ∈ X, r ∈ R+ -B = { Br(x) ⊆ P(X) | x ∈ X, r ∈ R+ } ist Basis einer Topologie auf X. +Br(x) := { y ∈X | d(x, y) < r } für x ∈X, r ∈R+ +B = { Br(x) ⊆P(X) | x ∈X, r ∈R+ } ist Basis einer Topologie auf X. Definition 9 -Seien (X, dX) und (Y, dY ) metrische Räume und ϕ : X → Y eine Abbildung mit -∀x1, x2 ∈ X : dX(x1, x2) = dY (ϕ(x1), ϕ(x2)) +Seien (X, dX) und (Y, dY ) metrische Räume und ϕ : X →Y eine Abbildung mit +∀x1, x2 ∈X : dX(x1, x2) = dY (ϕ(x1), ϕ(x2)) Dann heißt ϕ eine Isometrie von X nach Y . Beispiel 8 (Skalarprodukt erzeugt Metrik) Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt ⟨·, ·⟩. Dann wird V durch d(x, y) := -� -⟨x − y, x − y⟩ zum metrischen Raum. +p +⟨x −y, x −y⟩zum metrischen Raum. Beispiel 9 (diskrete Metrik) Sei X eine Menge. Dann heißt d(x, y) = -� +( 0 falls x = y 1 @@ -368,7 +368,7 @@ die diskrete Metrik. Die Metrik d induziert die diskrete Topologie. 8 1.2. METRISCHE RÄUME Beispiel 10 -X = R2 und d ((x1, y1), (x2, y2)) := max(∥x1 − x2∥, ∥y1 − y2∥) ist Metrik. +X = R2 und d ((x1, y1), (x2, y2)) := max(∥x1 −x2∥, ∥y1 −y2∥) ist Metrik. Beobachtung: d erzeugt die euklidische Topologie. Br(0) = r @@ -395,10 +395,10 @@ X = R2 4 Definition 10 Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x ̸= y in X -Umgebungen Ux um x und Uy um y gibt, sodass Ux ∩ Uy = ∅. +Umgebungen Ux um x und Uy um y gibt, sodass Ux ∩Uy = ∅. Bemerkung 4 (Trennungseigenschaft) Metrische Räume sind hausdorffsch, wegen -d(x, y) > 0 ⇒ ∃ε > 0 : Bε(x) ∩ Bε(y) = ∅ +d(x, y) > 0 ⇒∃ε > 0 : Bε(x) ∩Bε(y) = ∅ Beispiel 12 (Topologische Räume und Hausdorff-Räume) 1) (R, TZ) ist ein topologischer Raum, der nicht hausdorffsch ist. 2) (R, TEuklid) ist ein topologischer Hausdorff-Raum. @@ -407,15 +407,15 @@ Seien X, X1, X2 Hausdorff-Räume. a) Jeder Teilraum von X ist hausdorffsch. b) X1 × X2 ist hausdorffsch (vgl. Abbildung 1.4). Definition 11 -Sei X ein topologischer Raum und (x)n∈N eine Folge in X. x ∈ X heißt Grenzwert oder -Limes von (xn), wenn es für jede Umgebung U von x ein n0 gibt, sodass xn ∈ U für alle -n ≥ n0. +Sei X ein topologischer Raum und (x)n∈N eine Folge in X. x ∈X heißt Grenzwert oder +Limes von (xn), wenn es für jede Umgebung U von x ein n0 gibt, sodass xn ∈U für alle +n ≥n0. Bemerkung 6 Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert. Beweis: Sei (xn) eine konvergierende Folge und x und y Grenzwerte der Folge. -Da X hausdorffsch ist, gibt es Umgebungen Ux von x und Uy von y mit Ux ∩ Uy = ∅ falls -x ̸= y. Da (xn) gegen x und y konvergiert, existiert ein n0 mit xn ∈ Ux ∩ Uy für alle n ≥ n0 -⇒ x = y +Da X hausdorffsch ist, gibt es Umgebungen Ux von x und Uy von y mit Ux ∩Uy = ∅falls +x ̸= y. Da (xn) gegen x und y konvergiert, existiert ein n0 mit xn ∈Ux ∩Uy für alle n ≥n0 +⇒x = y ■ 1Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt. @@ -432,42 +432,42 @@ X2 Abbildung 1.4: Wenn X1, X2 hausdorffsch sind, dann auch X1 × X2 1.3 Stetigkeit Definition 12 -Seien (X, TX), (Y, TY ) topologische Räume und f : X → Y eine Abbildung. -a) f heißt stetig :⇔ ∀U ∈ TY : f−1(U) ∈ TX. +Seien (X, TX), (Y, TY ) topologische Räume und f : X →Y eine Abbildung. +a) f heißt stetig :⇔∀U ∈TY : f−1(U) ∈TX. b) f heißt Homöomorphismus, wenn f stetig ist und es eine stetige Abbildung g : -Y → X gibt, sodass g ◦ f = idX und f ◦ g = idY . +Y →X gibt, sodass g ◦f = idX und f ◦g = idY . Bemerkung 72 -Seien X, Y metrische Räume und f : X → Y eine Abbildung. -Dann gilt: f ist stetig ⇔ zu jedem x ∈ X und jedem ε > 0 gibt es δ(x, ε) > 0, sodass für -alle y ∈ X mit d(x, y) < δ gilt dY (f(x), f(y)) < ε. -Beweis: „⇒“: Sei x ∈ X, ε > 0 gegeben und U := Bε(f(x)). +Seien X, Y metrische Räume und f : X →Y eine Abbildung. +Dann gilt: f ist stetig ⇔zu jedem x ∈X und jedem ε > 0 gibt es δ(x, ε) > 0, sodass für +alle y ∈X mit d(x, y) < δ gilt dY (f(x), f(y)) < ε. +Beweis: „⇒“: Sei x ∈X, ε > 0 gegeben und U := Bε(f(x)). Dann ist U offen in Y . Def. 12.a -=====⇒ f−1(U) ist offen in X. Dann ist x ∈ f−1(U). -⇒ ∃δ > 0, sodass Bδ(x) ⊆ f−1(U) -⇒ f(Bδ(x)) ⊆ U -⇒ { y ∈ X | dX(x, y) < δ } ⇒ Beh. -„⇐“: Sei U ⊆ Y offen, X ∈ f−1(U). -Dann gibt es ε > 0, sodass Bε(f(x)) ⊆ U +=====⇒f−1(U) ist offen in X. Dann ist x ∈f−1(U). +⇒∃δ > 0, sodass Bδ(x) ⊆f−1(U) +⇒f(Bδ(x)) ⊆U +⇒{ y ∈X | dX(x, y) < δ } ⇒Beh. +„⇐“: Sei U ⊆Y offen, X ∈f−1(U). +Dann gibt es ε > 0, sodass Bε(f(x)) ⊆U Vor. -==⇒ Es gibt δ > 0, sodass f(Bδ(x)) ⊆ Bε(f(x))) -⇒ Bδ(x) ⊆ f−1(Bε(f(x))) ⊆ f−1(U) +==⇒Es gibt δ > 0, sodass f(Bδ(x)) ⊆Bε(f(x))) +⇒Bδ(x) ⊆f−1(Bε(f(x))) ⊆f−1(U) ■ Bemerkung 8 -Seien X, Y topologische Räume und f : X → Y eine Abbildung. Dann gilt: +Seien X, Y topologische Räume und f : X →Y eine Abbildung. Dann gilt: f ist stetig -⇔ für jede abgeschlossene Teilmenge A ⊆ Y gilt : f−1(A) ⊆ X ist abgeschlossen. +⇔für jede abgeschlossene Teilmenge A ⊆Y gilt : f−1(A) ⊆X ist abgeschlossen. Beispiel 13 (Stetige Abbildungen und Homöomorphismen) -1) Für jeden topologischen Raum X gilt: idX : X → X ist Homöomorphismus. +1) Für jeden topologischen Raum X gilt: idX : X →X ist Homöomorphismus. 2Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt. 11 1.3. STETIGKEIT 2) Ist (Y, TY ) trivialer topologischer Raum, d. h. TY = Ttriv, so ist jede Abbildung -f : X → Y stetig. -3) Ist X diskreter topologischer Raum, so ist f : X → Y stetig für jeden topologischen +f : X →Y stetig. +3) Ist X diskreter topologischer Raum, so ist f : X →Y stetig für jeden topologischen Raum Y und jede Abbildung f. -4) Sei X = [0, 1), Y = S1 = { z ∈ C | ∥z∥ = 1 } und f(t) = e2πit. +4) Sei X = [0, 1), Y = S1 = { z ∈C | ∥z∥= 1 } und f(t) = e2πit. R 0 1 @@ -477,61 +477,61 @@ g Abbildung 1.5: Beispiel einer stetigen Funktion f, deren Umkehrabbildung g nicht stetig ist. Die Umkehrabbildung g ist nicht stetig, da g−1(U) nicht offen ist (vgl. Abbildung 1.5). Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig) -Seien X, Y, Z topologische Räume, f : X → Y und g : Y → Z stetige Abbildungen. -Dann ist g ◦ f : X → Z stetig. +Seien X, Y, Z topologische Räume, f : X →Y und g : Y →Z stetige Abbildungen. +Dann ist g ◦f : X →Z stetig. X f -� +/ g◦f -� + Y g -� + Z -Beweis: Sei U ⊆ Z offen ⇒ (g ◦ f)−1(U) = f−1(g−1(U)). g−1(U) ist offen in Y weil g stetig +Beweis: Sei U ⊆Z offen ⇒(g ◦f)−1(U) = f−1(g−1(U)). g−1(U) ist offen in Y weil g stetig ist, f−1(g−1(U)) ist offen in X, weil f stetig ist. ■ Bemerkung 10 a) Für jeden topologischen Raum X ist -Homöo(X) := { f : X → X | f ist Homöomorphismus } +Homöo(X) := { f : X →X | f ist Homöomorphismus } eine Gruppe. -b) Jede Isometrie f : X → Y zwischen metrischen Räumen ist ein Homöomorphismus. -c) Iso(X) := { f : X → X | f ist Isometrie } ist eine Untergruppe von Homöo(X) für +b) Jede Isometrie f : X →Y zwischen metrischen Räumen ist ein Homöomorphismus. +c) Iso(X) := { f : X →X | f ist Isometrie } ist eine Untergruppe von Homöo(X) für jeden metrischen Raum X. Bemerkung 11 (Projektionen sind stetig) -Seien X, Y topologische Räume. πX : X × Y → X und πY : X × Y → Y die Projektionen -πX : (x, y) �→ x und πY : (x, y) �→ y +Seien X, Y topologische Räume. πX : X × Y →X und πY : X × Y →Y die Projektionen +πX : (x, y) 7→x und πY : (x, y) 7→y Wird X × Y mit der Produkttopologie versehen, so sind πX und πY stetig. -Beweis: Sei U ⊆ X offen -⇒ π−1 +Beweis: Sei U ⊆X offen +⇒π−1 X (U) = U × Y ist offen in X × Y . ■ Bemerkung 12 -Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ der Bahnenraum -versehen mit der Quotiententopologie, π : X → X, x �→ [x]∼. +Sei X ein topologischer Raum, ∼eine Äquivalenzrelation auf X, X = X/∼der Bahnenraum +versehen mit der Quotiententopologie, π : X →X, x 7→[x]∼. Dann ist π stetig. 12 1.4. ZUSAMMENHANG -Beweis: Nach Definition ist U ⊆ X offen ⇔ π−1(U) ⊆ X offen. +Beweis: Nach Definition ist U ⊆X offen ⇔π−1(U) ⊆X offen. ■ Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird. Beispiel 14 (Stereographische Projektion) -Rn und Sn \ { N } sind homöomorph für beliebiges N ∈ Sn. Es gilt: +Rn und Sn \ { N } sind homöomorph für beliebiges N ∈Sn. Es gilt: Sn = -� -x ∈ Rn+1 �� ∥x∥ = 1 -� + +x ∈Rn+1 ∥x∥= 1 + = -� -x ∈ Rn+1 -����� +( +x ∈Rn+1 + n+1 -� +X i=1 x2 i = 1 -� +) O. B. d. A. sei N =   @@ -548,11 +548,11 @@ O. B. d. A. sei N =  . Die Gerade durch N und P schneidet die Ebene H in genau einem Punkt ˆP. P wird auf ˆP abgebildet. -f :Sn \ { N } → Rn -P �→ +f :Sn \ { N } →Rn +P 7→ genau ein Punkt -� �� � -LP ∩ H +z }| { +LP ∩H wobei Rn = H =   @@ -567,8 +567,8 @@ x1 xn+1   - ∈ Rn+1 -������� +∈Rn+1 + xn+1 = 0   @@ -592,8 +592,8 @@ Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig. 1.4 Zusammenhang Definition 13 a) Ein Raum X heißt zusammenhängend, wenn es keine offenen, nichtleeren Teilmengen -U1, U2 von X gibt mit U1 ∩ U2 = ∅ und U1 ∪ U2 = X. -b) Eine Teilmenge Y ⊆ X heißt zusammenhängend, wenn Y als topologischer Raum mit +U1, U2 von X gibt mit U1 ∩U2 = ∅und U1 ∪U2 = X. +b) Eine Teilmenge Y ⊆X heißt zusammenhängend, wenn Y als topologischer Raum mit der Teilraumtopologie zusammenhängend ist. 13 @@ -607,82 +607,82 @@ N P Abbildung 1.6: Visualisierung der stereographischen Projektion Bemerkung 13 -X ist zusammenhängend ⇔ Es gibt keine abgeschlossenen, nichtleeren Teilmengen A1, A2 -mit A1 ∩ A2 = ∅ und A1 ∪ A2 = X. +X ist zusammenhängend ⇔Es gibt keine abgeschlossenen, nichtleeren Teilmengen A1, A2 +mit A1 ∩A2 = ∅und A1 ∪A2 = X. Beispiel 15 (Zusammenhang von Räumen) 1) (Rn, TEuklid) ist zusammenhängend, denn: -Annahme: Rn = U1 ˙∪ U2 mit ∅ ̸= U1, U2 ∈ TEuklid existieren. -Sei x ∈ U1, y ∈ U2 und [x, y] die Strecke zwischen x und y. Sei V = [x, y]. Nun -betrachten wir V ⊊ Rn als (metrischen) Teilraum mit der Teilraumtopologie TV . -Somit gilt U1 ∩ [x, y] ∈ TV wegen der Definition der Teilraumtopologie. -Dann gibt es z ∈ [x, y] mit z ∈ ∂(U1 ∩ [x, y]), aber z /∈ U1 ⇒ z ∈ U2. In jeder -Umgebung von z liegt ein Punkt von U1 ⇒ Widerspruch zu U2 offen. -2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R<0 ∪ R>0 +Annahme: Rn = U1 ˙∪U2 mit ∅̸= U1, U2 ∈TEuklid existieren. +Sei x ∈U1, y ∈U2 und [x, y] die Strecke zwischen x und y. Sei V = [x, y]. Nun +betrachten wir V ⊊Rn als (metrischen) Teilraum mit der Teilraumtopologie TV . +Somit gilt U1 ∩[x, y] ∈TV wegen der Definition der Teilraumtopologie. +Dann gibt es z ∈[x, y] mit z ∈∂(U1 ∩[x, y]), aber z /∈U1 ⇒z ∈U2. In jeder +Umgebung von z liegt ein Punkt von U1 ⇒Widerspruch zu U2 offen. +2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R<0 ∪R>0 3) R2 \ { 0 } ist zusammenhängend. -4) Q ⊊ R ist nicht zusammenhängend, da (Q ∩ R< +4) Q ⊊R ist nicht zusammenhängend, da (Q ∩R< √ -2) ∪ (Q ∩ R> +2) ∪(Q ∩R> √ 2) = Q -5) { x } ist zusammenhängend für jedes x ∈ X, wobei X ein topologischer Raum ist. +5) { x } ist zusammenhängend für jedes x ∈X, wobei X ein topologischer Raum ist. 6) R mit Zariski-Topologie ist zusammenhängend. Bemerkung 14 -Sei X ein topologischer Raum und A ⊆ X zusammenhängend. Dann ist auch A zusammen- +Sei X ein topologischer Raum und A ⊆X zusammenhängend. Dann ist auch A zusammen- hängend. 14 1.4. ZUSAMMENHANG Beweis: durch Widerspruch -Annahme: A = A1 ∪ A2, Ai abgeschlossen, Ai ̸= ∅, A1 ∩ A2 = ∅ -⇒ A = (A ∩ A1) -� -�� -� +Annahme: A = A1 ∪A2, Ai abgeschlossen, Ai ̸= ∅, A1 ∩A2 = ∅ +⇒A = (A ∩A1) +| +{z +} abgeschlossen -˙∪ (A ∩ A2) -� -�� -� +˙∪(A ∩A2) +| +{z +} abgeschlossen -� -�� -� +| +{z +} disjunkt -Wäre A ∩ A1 = ∅ -⇒ A ⊆ A = A1 ˙∪ A2 -⇒ A ⊆ A2 ⇒ A ⊆ A2 -⇒ A1 = ∅ -⇒ Widerspruch zu A1 ̸= ∅ -⇒ A ∩ A1 ̸= ∅ und analog A ∩ A2 ̸= ∅ -⇒ Widerspruch zu A ist zusammenhängend. +Wäre A ∩A1 = ∅ +⇒A ⊆A = A1 ˙∪A2 +⇒A ⊆A2 ⇒A ⊆A2 +⇒A1 = ∅ +⇒Widerspruch zu A1 ̸= ∅ +⇒A ∩A1 ̸= ∅und analog A ∩A2 ̸= ∅ +⇒Widerspruch zu A ist zusammenhängend. ■ Bemerkung 15 -Sei X ein topologischer Raum und A, B ⊆ X zusammenhängend. -Ist A ∩ B ̸= ∅, dann ist A ∪ B zusammenhängend. -Beweis: Sei A ∪ B = U1 ˙∪ U2, Ui ̸= ∅ offen +Sei X ein topologischer Raum und A, B ⊆X zusammenhängend. +Ist A ∩B ̸= ∅, dann ist A ∪B zusammenhängend. +Beweis: Sei A ∪B = U1 ˙∪U2, Ui ̸= ∅offen o. B. d. A. -======⇒ A = (A ∩ U1) ˙∪ (A ∩ U2) offen +======⇒A = (A ∩U1) ˙∪(A ∩U2) offen A zhgd. -====⇒ A ∩ U1 = ∅ +====⇒A ∩U1 = ∅ A∩B̸=∅ -====⇒ U1 ⊆ B -B = (B ∩ U1) -� -�� -� +====⇒U1 ⊆B +B = (B ∩U1) +| +{z +} =U1 -∪ (B ∩ U2) -� -�� -� +∪(B ∩U2) +| +{z +} =∅ ist unerlaubte Zerlegung. ■ Definition 14 Sei X ein topologischer Raum. -Für x ∈ X sei Z(x) ⊆ X definiert durch +Für x ∈X sei Z(x) ⊆X definiert durch Z(x) := -� +[ A⊆Xzhgd. x∈A A @@ -696,52 +696,52 @@ Beweis: 15 1.5. KOMPAKTHEIT -a) Sei Z(x) = A1 ˙∪ A2 mit Ai ̸= ∅ abgeschlossen. -O. B. d. A. sei x ∈ A1 und y ∈ A2. y liegt in einer zusammehängenden Teilmenge A, -die auch x enthält. ⇒ A = (A ∩ A1) -� -�� -� +a) Sei Z(x) = A1 ˙∪A2 mit Ai ̸= ∅abgeschlossen. +O. B. d. A. sei x ∈A1 und y ∈A2. y liegt in einer zusammehängenden Teilmenge A, +die auch x enthält. ⇒A = (A ∩A1) +| +{z +} ∋x -∪ (A ∩ A2) -� -�� -� +∪(A ∩A2) +| +{z +} ∋y ist unerlaubte Zerlegung. -b) Nach Bemerkung 14 ist Z(x) zusammenhängend ⇒ Z(x) ⊆ Z(x) ⇒ Z(x) = Z(x) -c) Ist Z(y) ∩ Z(x) ̸= ∅ Bem. 15 -=====⇒ Z(y) ∪ Z(x) ist zusammenhängend. -⇒ Z(x) ∪ Z(y) ⊆ Z(x) ⇒ Z(y) ⊆ Z(x) -⊆ Z(y) ⇒ Z(x) ⊆ Z(y) +b) Nach Bemerkung 14 ist Z(x) zusammenhängend ⇒Z(x) ⊆Z(x) ⇒Z(x) = Z(x) +c) Ist Z(y) ∩Z(x) ̸= ∅Bem. 15 +=====⇒Z(y) ∪Z(x) ist zusammenhängend. +⇒Z(x) ∪Z(y) ⊆Z(x) ⇒Z(y) ⊆Z(x) +⊆Z(y) ⇒Z(x) ⊆Z(y) ■ Bemerkung 17 -Sei f : X → Y stetig. Ist A ⊆ X zusammenhängend, so ist f(A) ⊆ Y zusammenhängend. -Beweis: Sei f(A) = U1 ∪ U2, Ui ̸= ∅, offen, disjunkt. -⇒ f−1(f(A)) = f−1(U1) ∪ f−1(U2) -⇒ A = (A ∩ f−1(U1)) -� -�� -� +Sei f : X →Y stetig. Ist A ⊆X zusammenhängend, so ist f(A) ⊆Y zusammenhängend. +Beweis: Sei f(A) = U1 ∪U2, Ui ̸= ∅, offen, disjunkt. +⇒f−1(f(A)) = f−1(U1) ∪f−1(U2) +⇒A = (A ∩f−1(U1)) +| +{z +} ̸=∅ -∪ (A ∩ f−1(U2)) -� -�� -� +∪(A ∩f−1(U2)) +| +{z +} ̸=∅ ■ 1.5 Kompaktheit Definition 15 -Sei X eine Menge und U ⊆ P(X). +Sei X eine Menge und U ⊆P(X). U heißt eine Überdeckung von X, wenn gilt: -∀x ∈ X : ∃M ∈ U : x ∈ M +∀x ∈X : ∃M ∈U : x ∈M Definition 16 Ein topologischer Raum X heißt kompakt, wenn jede offene Überdeckung von X U = { Ui }i∈I mit Ui offen in X eine endliche Teilüberdeckung -� +[ i∈J⊆I -Ui = X mit |J| ∈ N +Ui = X mit |J| ∈N besitzt. Bemerkung 18 Das Einheitsintervall I := [0, 1] ist kompakt bezüglich der euklidischen Topologie. @@ -753,67 +753,67 @@ einem der Ui enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich v 1.5. KOMPAKTHEIT der Länge δ unterteilen und alle Ui in die endliche Überdeckung aufnehmen, die Teilintervalle enthalten. -Angenommen, es gibt kein solches δ. Dann gibt es für jedes n ∈ N ein Intervall In ⊆ [0, 1] -der Länge 1/n sodass In ⊊ Ui für alle i ∈ J. -Sei xn der Mittelpunkt von In. Die Folge (xn) hat einen Häufungspunkt x ∈ [0, 1]. Dann -gibt es i ∈ J mit x ∈ Ui. Da Ui offen ist, gibt es ein ε > 0, sodass (x − ε, x + ε) ⊆ Ui. -Dann gibt es n0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n ≥ n0 : |x − xn| < ε/2, also -In ⊆ (x − ε, x + ε) ⊆ Ui für mindestens ein n ∈ N.4 -⇒ Widerspruch +Angenommen, es gibt kein solches δ. Dann gibt es für jedes n ∈N ein Intervall In ⊆[0, 1] +der Länge 1/n sodass In ⊊Ui für alle i ∈J. +Sei xn der Mittelpunkt von In. Die Folge (xn) hat einen Häufungspunkt x ∈[0, 1]. Dann +gibt es i ∈J mit x ∈Ui. Da Ui offen ist, gibt es ein ε > 0, sodass (x −ε, x + ε) ⊆Ui. +Dann gibt es n0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n ≥n0 : |x −xn| < ε/2, also +In ⊆(x −ε, x + ε) ⊆Ui für mindestens ein n ∈N.4 +⇒Widerspruch Dann überdecke [0, 1] mit endlich vielen Intervallen I1, . . . , Id der Länge δ. Jedes Ij ist in Uij enthalten. -⇒ Uj1, . . . , Ujd ist endliche Teilüberdeckung von U. +⇒Uj1, . . . , Ujd ist endliche Teilüberdeckung von U. ■ Beispiel 16 (Kompakte Räume) 1) R ist nicht kompakt. 2) (0, 1) ist nicht kompakt. -Un = (1/n, 1 − 1/n) ⇒ � +Un = (1/n, 1 −1/n) ⇒S n∈N Un = (0, 1) 3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch. Bemerkung 19 -Sei X kompakter Raum, A ⊆ X abgeschlossen. Dann ist A kompakt. +Sei X kompakter Raum, A ⊆X abgeschlossen. Dann ist A kompakt. Beweis: Sei (Vi)i∈I offene Überdeckung von A. -Dann gibt es für jedes i ∈ I eine offene Teilmenge Ui ⊆ X mit Vi = Ui ∩ A. -⇒ A ⊆ -� +Dann gibt es für jedes i ∈I eine offene Teilmenge Ui ⊆X mit Vi = Ui ∩A. +⇒A ⊆ +[ i∈I Ui -⇒ U = { Ui | i ∈ I } ∪ { X \ A } ist offene Überdeckung von X +⇒U = { Ui | i ∈I } ∪{ X \ A } ist offene Überdeckung von X X kompakt -=======⇒ es gibt i1, . . . , in ∈ I, sodass -n� +=======⇒es gibt i1, . . . , in ∈I, sodass +n[ j=1 -Uij ∪ (X \ A) = X +Uij ∪(X \ A) = X ⇒   -n� +n[ j=1 -Uij ∪ (X \ A) +Uij ∪(X \ A)  - ∩ A = A +∩A = A ⇒ -n� +n[ j=1 -(Uij ∩ A) -� -�� -� +(Uij ∩A) +| +{z +} =Vij -∪ ((X \ A) ∩ A) -� -�� -� +∪((X \ A) ∩A) +| +{z +} =∅ = A -⇒ Vi1, . . . , Vin überdecken A. +⇒Vi1, . . . , Vin überdecken A. ■ Bemerkung 20 Seien X, Y kompakte topologische Räume. Dann ist X × Y mit der Produkttopologie kompakt. -Beweis: Sei (Wi)i∈I eine offene Überdeckung von X × Y . Für jedes (x, y) ∈ X × Y gibt es -offene Teilmengen Ux,y von X und Vx,y von Y sowie ein i ∈ I, sodass Ux,y × Vx,y ⊆ Wi. -3Dies gilt nicht für alle n ≥ n0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. +Beweis: Sei (Wi)i∈I eine offene Überdeckung von X × Y . Für jedes (x, y) ∈X × Y gibt es +offene Teilmengen Ux,y von X und Vx,y von Y sowie ein i ∈I, sodass Ux,y × Vx,y ⊆Wi. +3Dies gilt nicht für alle n ≥n0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. 4Sogar für unendlich viele. 17 @@ -827,93 +827,92 @@ Ux,y Y X Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen -Die offenen Mengen Ux0,y × Vx0,y für festes x0 und alle y ∈ Y überdecken { x0 } × y. Da Y -kompakt ist, ist auch { x0 } × Y kompakt. Also gibt es y1, . . . , ym(x0) mit �m(x0) +Die offenen Mengen Ux0,y × Vx0,y für festes x0 und alle y ∈Y überdecken { x0 } × y. Da Y +kompakt ist, ist auch { x0 } × Y kompakt. Also gibt es y1, . . . , ym(x0) mit Sm(x0) i=1 Ux0,yi × -Vx0,yi ⊇ { x0 } × Y . -Sei Ux0 := �m(x) -i=1 Ux0,yi. Da X kompakt ist, gibt es x1, . . . , xn ∈ X mit �n +Vx0,yi ⊇{ x0 } × Y . +Sei Ux0 := Tm(x) +i=1 Ux0,yi. Da X kompakt ist, gibt es x1, . . . , xn ∈X mit Sn j=1 Uxj = X -⇒ �k +⇒Sk j=1 -�m(xj) +Sm(xj) i=1 -� -Uxj,yi × Vxj,yi -� -� -�� -� +Uxj,yi × Vxj,yi + +| +{z +} Ein grün-oranges Kästchen -⊇ X × Y -⇒ � +⊇X × Y +⇒S j -� +S i Wi(xj, yi) = X × Y ■ Bemerkung 21 -Sei X ein Hausdorffraum und K ⊆ X kompakt. Dann ist K abgeschlossen. +Sei X ein Hausdorffraum und K ⊆X kompakt. Dann ist K abgeschlossen. Beweis: z. Z.: Komplement ist offen -Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y ∈ X \ K. Für jedes x ∈ K seien -Ux bzw. Vy Umgebungen von x bzw. von y, sodass Ux ∩ Vy = ∅. +Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y ∈X \ K. Für jedes x ∈K seien +Ux bzw. Vy Umgebungen von x bzw. von y, sodass Ux ∩Vy = ∅. Xi K x y -Da K kompakt ist, gibt es endlich viele x1, . . . , xn ∈ K, sodass �m -i=1 Uxi ⊇ K. +Da K kompakt ist, gibt es endlich viele x1, . . . , xn ∈K, sodass Sm +i=1 Uxi ⊇K. Sei V := -n� +n\ i=1 Vxi 18 1.6. WEGE UND KNOTEN -⇒ V ∩ -� n� +⇒V ∩ + n[ i=1 Uxi -� +! = ∅ -⇒ V ∩ K = ∅ -⇒ V ist Überdeckung von y, die ganz in X \ K enthalten ist. -⇒ X \ K ist offen +⇒V ∩K = ∅ +⇒V ist Überdeckung von y, die ganz in X \ K enthalten ist. +⇒X \ K ist offen Damit ist K abgeschlossen. ■ Bemerkung 22 -Seien X, Y topologische Räume, f : X → Y stetig. -Ist K ⊆ X kompakt, so ist f(K) ⊆ Y kompakt. +Seien X, Y topologische Räume, f : X →Y stetig. +Ist K ⊆X kompakt, so ist f(K) ⊆Y kompakt. Beweis: Sei (Vi)i∈I offene Überdeckung von f(K) f stetig -====⇒ (f−1(Vi))i∈I ist offene Überdeckung von K +====⇒(f−1(Vi))i∈I ist offene Überdeckung von K Kompakt -=====⇒ es gibt i1, . . . , in, sodass f−1(Vi1), . . . , f−1(Vin) Überdeckung von K ist. -⇒ f(f−1(Vi1)), . . . , f(f−1(Vin)) überdecken f(K). -Es gilt: f(f−1(V )) = V ∩ f(X) +=====⇒es gibt i1, . . . , in, sodass f−1(Vi1), . . . , f−1(Vin) Überdeckung von K ist. +⇒f(f−1(Vi1)), . . . , f(f−1(Vin)) überdecken f(K). +Es gilt: f(f−1(V )) = V ∩f(X) ■ Satz 1.1 (Heine-Borel) Eine Teilmenge von Rn oder Cn ist genau dann kompakt, wenn sie beschränkt und abgeschlossen ist. -Beweis: „⇒“: Sei K ⊆ Rn (oder Cn) kompakt. +Beweis: „⇒“: Sei K ⊆Rn (oder Cn) kompakt. Da Rn und Cn hausdorffsch sind, ist K nach Bemerkung 21 abgeschlossen. Nach Vorausset- -zung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ K ist +zung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒K ist beschränkt. -„⇐“ Sei A ⊆ Rn (oder Cn) beschränkt und abgeschlossen. +„⇐“ Sei A ⊆Rn (oder Cn) beschränkt und abgeschlossen. Dann gibt es einen Würfel W = [−N, N] × · · · × [−N, N] -� -�� -� +| +{z +} n mal -mit A ⊆ W bzw. „Polyzylinder“ -Z = { (z1, . . . , zn) ∈ Cn | zi ≤ N für i = 1, . . . , n } +mit A ⊆W bzw. „Polyzylinder“ +Z = { (z1, . . . , zn) ∈Cn | zi ≤N für i = 1, . . . , n } Nach Bemerkung 20 und Bemerkung 18 ist W kompakt, also ist A nach Bemerkung 19 auch kompakt. Genauso ist Z kompakt, weil -{ z ∈ C ∥ z| ≤ 1 } +{ z ∈C ∥z| ≤1 } homöomorph zu -� -(x, y) ∈ R2 �� ∥(x, y)∥ ≤ 1 -� + +(x, y) ∈R2 ∥(x, y)∥≤1 + ist. ■ 1.6 Wege und Knoten @@ -922,56 +921,56 @@ Sei X ein topologischer Raum. 19 1.6. WEGE UND KNOTEN -a) Ein Weg in X ist eine stetige Abbildung γ : [0, 1] → X. +a) Ein Weg in X ist eine stetige Abbildung γ : [0, 1] →X. b) γ heißt geschlossen, wenn γ(1) = γ(0) gilt. c) γ heißt einfach, wenn γ|[0,1) injektiv ist. Beispiel 17 Ist X diskret, so ist jeder Weg konstant, d. h. von der Form -∀x ∈ [0, 1] : γ(x) = c, -c ∈ X +∀x ∈[0, 1] : γ(x) = c, +c ∈X Denn γ([0, 1]) ist zusammenhängend für jeden Weg γ. Definition 18 Ein topologischer Raum X heißt wegzusammenhängend, wenn es zu je zwei Punkten -x, y ∈ X einen Weg γ : [0, 1] → X gibt mit γ(0) = x und γ(1) = y. +x, y ∈X einen Weg γ : [0, 1] →X gibt mit γ(0) = x und γ(1) = y. Bemerkung 23 Sei X ein topologischer Raum. -a) X ist wegzusammenhängend ⇒ X ist zusammenhängend -b) X ist wegzusammenhängend ̸⇐ X ist zusammenhängend +a) X ist wegzusammenhängend ⇒X ist zusammenhängend +b) X ist wegzusammenhängend ̸⇐X ist zusammenhängend Beweis: a) Sei X ein wegzusammenhängender topologischer Raum, A1, A2 nichtleere, disjunkte, -abgeschlossene Teilmengen von X mit A1 ∪ A2 = X. Sei x ∈ A1, y ∈ A2, γ : [0, 1] → X +abgeschlossene Teilmengen von X mit A1 ∪A2 = X. Sei x ∈A1, y ∈A2, γ : [0, 1] →X ein Weg von x nach y. -Dann ist C := γ([0, 1]) ⊆ X zusammenhängend, weil γ stetig ist. -C = (C ∩ A1) -� -�� -� +Dann ist C := γ([0, 1]) ⊆X zusammenhängend, weil γ stetig ist. +C = (C ∩A1) +| +{z +} ∋x -∪ (C ∩ A2) -� -�� -� +∪(C ∩A2) +| +{z +} ∋y -ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒ Widerspruch +ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒Widerspruch b) Sei X = -� -(x, y) ∈ R2 ��� x2 + y2 = 1 ∨ y = 1 + 2 · e− 1 -10 x � +n +(x, y) ∈R2 x2 + y2 = 1 ∨y = 1 + 2 · e−1 +10 x o . Abbildung 1.8a veranschaulicht diesen Raum. -Sei U1 ∪ U2 = X, U1 ̸= U2 = ∅, Ui offen. X = C ∪ S. Dann ist C ⊆ U1 oder C ⊆ U2, +Sei U1 ∪U2 = X, U1 ̸= U2 = ∅, Ui offen. X = C ∪S. Dann ist C ⊆U1 oder C ⊆U2, weil C und S zusammenhängend sind. Also ist C = U1 und S = U2 (oder umgekehrt). -Sei y ∈ C = U1, ε > 0 und Bε(y) ⊆ U1 eine Umgebung von y, die in U1 enthalten ist. -Aber: Bε(y) ∩ S ̸= ∅ ⇒ Widerspruch ⇒ X ∪ S ist zusammenhängend, aber nicht +Sei y ∈C = U1, ε > 0 und Bε(y) ⊆U1 eine Umgebung von y, die in U1 enthalten ist. +Aber: Bε(y) ∩S ̸= ∅⇒Widerspruch ⇒X ∪S ist zusammenhängend, aber nicht wegzusammenhängend. ■ Beispiel 18 (Hilbert-Kurve) -Es gibt stetige, surjektive Abbildungen [0, 1] → [0, 1] × [0, 1]. Ein Beispiel ist die in Abbil- +Es gibt stetige, surjektive Abbildungen [0, 1] →[0, 1] × [0, 1]. Ein Beispiel ist die in Abbil- dung 1.9 dargestellte Hilbert-Kurve. Definition 19 Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ : -[0, 1] → C ⊆ X bzw. γ : S1 → C ⊆ X, wobei C := Bild γ. +[0, 1] →C ⊆X bzw. γ : S1 →C ⊆X, wobei C := Bild γ. 20 1.6. WEGE UND KNOTEN @@ -984,8 +983,8 @@ Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ X Y {(x, sin( 1 -x)) ∈ X × Y } -(−1, 1) ⊆ Y +x)) ∈X × Y } +(−1, 1) ⊆Y (b) Sinus Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend sind. @@ -1020,24 +1019,24 @@ Beispiel 19 (Knoten) (d) 62-Knoten Abbildung 1.11: Beispiele für verschiedene Knoten Definition 21 -Zwei Knoten γ1, γ2 : S1 → R3 heißen äquivalent, wenn es eine stetige Abbildung -H : S1 × [0, 1] → R3 +Zwei Knoten γ1, γ2 : S1 →R3 heißen äquivalent, wenn es eine stetige Abbildung +H : S1 × [0, 1] →R3 gibt mit H(z, 0) = γ1(z) -∀z ∈ S1 +∀z ∈S1 H(z, 1) = γ2(z) -∀z ∈ S1 -und für jedes feste t ∈ [0, 1] ist -Hz : S1 → R3, z �→ H(z, t) +∀z ∈S1 +und für jedes feste t ∈[0, 1] ist +Hz : S1 →R3, z 7→H(z, t) ein Knoten. Die Abbildung H heißt Isotopie zwischen γ1 und γ2. Definition 22 -Sei γ : [0, 1] → R3 ein Knoten, E eine Ebene und π : R3 → E eine Projektion auf E. +Sei γ : [0, 1] →R3 ein Knoten, E eine Ebene und π : R3 →E eine Projektion auf E. π heißt Knotendiagramm von γ, wenn gilt: -��π−1(x) -�� ≤ 2 -∀x ∈ π(γ) + π−1(x) + ≤2 +∀x ∈π(γ) Ist (π|γ([0,1]))−1(x) = { y1, y2 }, so liegt y1 über y2, wenn gilt: -∃λ > 1 : (y1 − x) = λ(y2 − x) +∃λ > 1 : (y1 −x) = λ(y2 −x) Satz 1.3 (Satz von Reidemeister) Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können. @@ -1065,20 +1064,20 @@ Es sei X := { 0, 1 } und TX := { ∅, { 0 } , X }. Dies ist der sogenannte Sierp (b) Ist (X, TX) hausdorffsch? (c) Ist TX von einer Metrik erzeugt? Aufgabe 2 -Es sei Z mit der von den Mengen Ua,b := a + bZ(a ∈ Z, b ∈ Z \ { 0 }) erzeugten Topologie +Es sei Z mit der von den Mengen Ua,b := a + bZ(a ∈Z, b ∈Z \ { 0 }) erzeugten Topologie versehen. Zeigen Sie: (a) Jedes Ua,b und jede einelementige Teilmenge von Z ist abgeschlossen. (b) { −1, 1 } ist nicht offen. (c) Es gibt unendlich viele Primzahlen. Aufgabe 3 (Cantorsches Diskontinuum) -Für jedes i ∈ N sei Pi := { 0, 1 } mit der diskreten Topologie. Weiter Sei P := � +Für jedes i ∈N sei Pi := { 0, 1 } mit der diskreten Topologie. Weiter Sei P := Q i∈N Pi. (a) Wie sehen die offenen Mengen von P aus? (b) Was können Sie über den Zusammenhang von P sagen? Aufgabe 4 (Kompaktheit) -(a) Ist GLn(R) = { A ∈ Rn×n | det(A) ̸= 0 } kompakt? -(b) Ist SLn(R) = { A ∈ Rn×n | det(A) = 1 } kompakt? +(a) Ist GLn(R) = { A ∈Rn×n | det(A) ̸= 0 } kompakt? +(b) Ist SLn(R) = { A ∈Rn×n | det(A) = 1 } kompakt? (c) Ist P(R) kompakt? Aufgabe 5 (Begriffe) Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“. @@ -1097,18 +1096,18 @@ Definieren Sie die Begriffe „Isomorphismus“, „Isotopie“ und „Isometr Simplizialkomplexe 2.1 Topologische Mannigfaltigkeiten Definition 24 -Sei (X, T) ein topologischer Raum und n ∈ N. -a) Eine n-dimensionale Karte auf X ist ein Paar (U, ϕ), wobei U ∈ T und ϕ : U → V -Homöomorphismus von U auf eine offene Teilmenge V ⊆ Rn. +Sei (X, T) ein topologischer Raum und n ∈N. +a) Eine n-dimensionale Karte auf X ist ein Paar (U, ϕ), wobei U ∈T und ϕ : U →V +Homöomorphismus von U auf eine offene Teilmenge V ⊆Rn. b) Ein n-dimensionaler Atlas A auf X ist eine Familie (Ui, ϕi)i∈I von Karten auf X, -sodass � +sodass S i∈I Ui = X. c) X heißt (topologische) n-dimensionale Mannigfaltigkeit, wenn X hausdorffsch ist, eine abzählbare Basis der Topologie hat und einen n-dimensionalen Atlas besitzt. Anschaulich ist also ein n-dimensionale Mannigfaltigkeit lokal dem Rn ähnlich. Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten) -Jede n-dimensionale Mannigfaltigkeit mit n ≥ 1 ist mindestens so mächtig wie R. -Beweis: Sei (X, T) ein topologischer Raum und (U, ϕ) mit U ∈ T und ϕ : U → V ⊆ Rn, wobei +Jede n-dimensionale Mannigfaltigkeit mit n ≥1 ist mindestens so mächtig wie R. +Beweis: Sei (X, T) ein topologischer Raum und (U, ϕ) mit U ∈T und ϕ : U →V ⊆Rn, wobei V offen und ϕ ein Homöomorphismus ist, eine Karte auf X. Da jede offene Teilmenge des Rn genauso mächtig ist wie der Rn, ϕ als Homöomorphismus insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig @@ -1118,91 +1117,91 @@ hat, muss jede Mannigfaltigkeit X mindestens so mächtig sein wie der Rn. Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können beliebig viele Elemente haben. Bemerkung 25 -a) Es gibt surjektive, stetige Abbildungen [0, 1] → [0, 1] × [0, 1] +a) Es gibt surjektive, stetige Abbildungen [0, 1] →[0, 1] × [0, 1] b) Für n ̸= m sind Rn und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz von der Gebietstreue“ (Brouwer): -Ist U ⊆ Rn offen und f : U → Rn stetig und injektiv, so ist f(U) offen. +Ist U ⊆Rn offen und f : U →Rn stetig und injektiv, so ist f(U) offen. Ist n < m und Rm homöomorph zu Rn, so wäre -f : Rn → Rm → Rn, -(x1, . . . , xn) �→ (x1, x2, . . . , xn, 0, . . . , 0) -eine stetige injektive Abbildung. Also müsste f(Rn) offen sein ⇒ Widerspruch +f : Rn →Rm →Rn, +(x1, . . . , xn) 7→(x1, x2, . . . , xn, 0, . . . , 0) +eine stetige injektive Abbildung. Also müsste f(Rn) offen sein ⇒Widerspruch 26 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN Beispiel 20 (Mannigfaltigkeiten) -1) Jede offene Teilmenge U ⊆ Rn ist eine n-dimensionale Mannigfaltigkeit mit einem +1) Jede offene Teilmenge U ⊆Rn ist eine n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte. 2) Cn ist eine 2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte: -(z1, . . . , zn) �→ (ℜ(z1), ℑ(z1), . . . , ℜ(zn), ℑ(zn)) -3) Pn(R) = (Rn+1 \ { 0 })/∼ = Sn/∼ und Pn(C) sind Mannigfaltigkeiten der Dimension +(z1, . . . , zn) 7→(ℜ(z1), ℑ(z1), . . . , ℜ(zn), ℑ(zn)) +3) Pn(R) = (Rn+1 \ { 0 })/∼= Sn/∼und Pn(C) sind Mannigfaltigkeiten der Dimension n bzw. 2n, da gilt: -Sei Ui := { (x0 : · · · : xn) ∈ Pn(R) | xi ̸= 0 } ∀i ∈ 0, . . . , n. Dann ist Pn(R) = �n +Sei Ui := { (x0 : · · · : xn) ∈Pn(R) | xi ̸= 0 } ∀i ∈0, . . . , n. Dann ist Pn(R) = Sn i=0 Ui und die Abbildung -Ui → Rn -(x0 : · · · : xn) �→ -�x0 +Ui →Rn +(x0 : · · · : xn) 7→ +x0 xi , . . . , -� -�� + + xi xi , . . . , xn xi -� + (y1 : · · · : yi−1 : 1 : yi : · · · : yn) -�→ +7→ (y1, . . . , yn) ist bijektiv. Die Ui mit i = 0, . . . , n bilden einen n-dimensionalen Atlas: -x = (1 : 0 : 0) ∈ U0 → R2 -x �→ (0, 0) -y = (0 : 1 : 1) ∈ U2 → R2 -y �→ (0, 1) -Umgebung: B1(0, 1) → { (1 : u : v) | ∥(u, v)∥ < 1 } = V1 +x = (1 : 0 : 0) ∈U0 →R2 +x 7→(0, 0) +y = (0 : 1 : 1) ∈U2 →R2 +y 7→(0, 1) +Umgebung: B1(0, 1) →{ (1 : u : v) | ∥(u, v)∥< 1 } = V1 Umgebung: B1(0, 1) → -� + (w : z : 1) -�� w2 + z2 < 1 -� + w2 + z2 < 1 + = V2 -V1 ∩ V2 = ∅? -(a : b : c) ∈ V1 ∩ V2 -⇒ a ̸= 0 und ( b +V1 ∩V2 = ∅? +(a : b : c) ∈V1 ∩V2 +⇒a ̸= 0 und ( b a)2 + ( c -a)2 < 1 ⇒ c +a)2 < 1 ⇒c a < 1 -⇒ c ̸= 0 und ( a +⇒c ̸= 0 und (a c)2 + ( b -c)2 < 1 ⇒ a +c)2 < 1 ⇒a c < 1 -⇒ Widerspruch +⇒Widerspruch 4) Sn = -� -x ∈ Rn+1 �� ∥x∥ = 1 -� + +x ∈Rn+1 ∥x∥= 1 + ist n-dimensionale Mannigfaltigkeit. Karten: -Di := {(x1, . . . , xn+1) ∈ Sn|xi > 0} → B1(0, . . . , 0 -� �� � +Di := {(x1, . . . , xn+1) ∈Sn|xi > 0} →B1(0, . . . , 0 +| {z } ∈Rn ) -Ci := {(x1, . . . , xn+1) ∈ Sn|xi < 0} → B1(0, . . . , 0) -(x1, . . . , xn+1) �→ (x1, . . . , +Ci := {(x1, . . . , xn+1) ∈Sn|xi < 0} →B1(0, . . . , 0) +(x1, . . . , xn+1) 7→(x1, . . . ,  xi, . . . , xn+1)1 -(x1, . . . , xn) �→ (x1, . . . , xi−1, -� -1 − �n +(x1, . . . , xn) 7→(x1, . . . , xi−1, +q +1 −Pn k=1 x2 k, xi, . . . , xn), oder − -� -1 − �n +q +1 −Pn k=1 x2 k für Ci -Sn = �n+1 -i=1 (Ci ∪ Di) +Sn = Sn+1 +i=1 (Ci ∪Di) Als kompakte Mannigfaltigkeit wird Sn auch „geschlossene Mannigfaltigkeit“ genannt. 5) [0, 1] ist keine Mannigfaltigkeit, denn: Es gibt keine Umgebung von 0 in [0, 1], die homöomorph zu einem offenem Intervall @@ -1212,34 +1211,34 @@ ist. 27 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN 6) V1 = -� -(x, y) ∈ R2 �� x · y = 0 -� + +(x, y) ∈R2 x · y = 0 + ist keine Mannigfaltigkeit. Das Problem ist (0, 0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4 Zusammenhangskomponenten. Jeder Rn zerfällt jedoch in höchstens zwei Zusammen- hangskomponenten, wenn man einen Punkt entfernt. 7) V2 = -� -(x, y) ∈ R2 �� x3 = y2 � + +(x, y) ∈R2 x3 = y2 ist eine Mannigfaltigkeit. -8) X = (R \ { 0 }) ∪ (01, 02) -U ⊆ X offen ⇔ -� +8) X = (R \ { 0 }) ∪(01, 02) +U ⊆X offen ⇔ +( U offen in R \ { 0 } , -falls 01 /∈ U, 02 ∈ U -∃ε > 0 : (−ε, ε) ⊆ U -falls 01 ∈ U, 02 ∈ U -Insbesondere sind (R \ { 0 }) ∪ { 01 } und (R \ { 0 }) ∪ { 02 } offen und homöomorph +falls 01 /∈U, 02 ∈U +∃ε > 0 : (−ε, ε) ⊆U +falls 01 ∈U, 02 ∈U +Insbesondere sind (R \ { 0 }) ∪{ 01 } und (R \ { 0 }) ∪{ 02 } offen und homöomorph zu R. Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 01 und 02. 9) GLn(R) ist eine Mannigfaltigkeit der Dimension n2, weil offene Teilmengen von Rn2 eine Mannigfaltigkeit bilden. Definition 25 -Seien X, Y n-dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y offen, Φ : U → V ein Ho- -möomorphismus Z = (X ˙∪ Y )/∼ mit der von u ∼ Φ(u) ∀u ∈ U erzeugten Äquivalenzrelation -und der von ∼ induzierten Quotiententopologie. +Seien X, Y n-dimensionale Mannigfaltigkeiten, U ⊆X und V ⊆Y offen, Φ : U →V ein Ho- +möomorphismus Z = (X ˙∪Y )/∼mit der von u ∼Φ(u) ∀u ∈U erzeugten Äquivalenzrelation +und der von ∼induzierten Quotiententopologie. Z heißt Verklebung von X und Y längs U und V . Z besitzt einen Atlas aus n-dimensionalen Karten. Falls Z hausdorffsch ist, ist Z eine n-dimensionale Mannigfaltigkeit. Bemerkung 26 @@ -1257,7 +1256,7 @@ Mannigfaltigkeiten mit Dimension 2: 3) T 2 (1 Henkel) 4) oder mehr Henkel, wie z.B. der Zweifachtorus in Abbildung 2.1 Bemerkung 27 -Sei n ∈ N, F : Rn → R stetig differenzierbar und X = V (F) := { x ∈ Rn | F(x) = 0 } das +Sei n ∈N, F : Rn →R stetig differenzierbar und X = V (F) := { x ∈Rn | F(x) = 0 } das „vanishing set“. Dann gilt: @@ -1266,26 +1265,26 @@ Dann gilt: Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus. a) X ist abgeschlossen in Rn b) Ist grad(F)(X) ̸= 0 -∀x ∈ X, so ist X eine Mannigfaltigkeit der Dimension n − 1. +∀x ∈X, so ist X eine Mannigfaltigkeit der Dimension n −1. Beweis: -a) Sei y ∈ Rn \ V (F). Weil F stetig ist, gibt es δ > 0, sodass F(Bδ(y)) ⊆ Bε(F(y)) mit +a) Sei y ∈Rn \ V (F). Weil F stetig ist, gibt es δ > 0, sodass F(Bδ(y)) ⊆Bε(F(y)) mit ε = 1 -2∥F(y)∥. Folgt Bδ(y) ∩ V (F) = ∅ ⇒ Rn \ V (F) ist offen. -b) Sei x ∈ X mit grad(F)(x) ̸= 0, also o. B. d. A. +2∥F(y)∥. Folgt Bδ(y) ∩V (F) = ∅⇒Rn \ V (F) ist offen. +b) Sei x ∈X mit grad(F)(x) ̸= 0, also o. B. d. A. ∂F ∂X1 (x) ̸= 0, x = (x1, . . . , xn), -x′ := (x2, . . . , xn) ∈ Rn−1. Der Satz von der impliziten Funktion liefert nun: Es -gibt Umgebungen U von x′ und differenzierbare Funktionen g : U → R, sodass -G : U → Rn, u �→ (g(u), u) eine stetige Abbildung auf eine offene Umgebung V von x +x′ := (x2, . . . , xn) ∈Rn−1. Der Satz von der impliziten Funktion liefert nun: Es +gibt Umgebungen U von x′ und differenzierbare Funktionen g : U →R, sodass +G : U →Rn, u 7→(g(u), u) eine stetige Abbildung auf eine offene Umgebung V von x in X ist. ■ Beispiel 22 -1) F : R3 → R, -(x, y, z) �→ x2+y2+z2−1, V (F) = S2, grad(F) = (2x, 2y, 2z) Bem. 27.b +1) F : R3 →R, +(x, y, z) 7→x2+y2+z2−1, V (F) = S2, grad(F) = (2x, 2y, 2z) Bem. 27.b ======⇒ Sn ist n-dimensionale Mannigfaltigkeit in Rn+1 -2) F : R2 → R, -(x, y) �→ y2 −x3 Es gilt: grad(F) = (−3x2, 2y). Also: grad(0, 0) = (0, 0). +2) F : R2 →R, +(x, y) 7→y2 −x3 Es gilt: grad(F) = (−3x2, 2y). Also: grad(0, 0) = (0, 0). −5 −4 −3 @@ -1312,7 +1311,7 @@ z 0 100 f(x, y) -(a) F(x, y) = y2 − x3 +(a) F(x, y) = y2 −x3 2 4 6 @@ -1329,7 +1328,7 @@ a = 1 3 a = 1 a = 2 -(b) y2 − ax3 = 0 +(b) y2 −ax3 = 0 Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a. Daher ist Bemerkung 27.b nicht anwendbar, aber V (F) ist trotzdem eine 1-dimensionale topologische Mannigfaltigkeit. @@ -1338,10 +1337,10 @@ topologische Mannigfaltigkeit. 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN Definition 26 Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n-dimensionale -Mannigfaltigkeit mit Rand, wenn es einen Atlas (Ui, ϕi) gibt, wobei Ui ⊆ Xi offen und +Mannigfaltigkeit mit Rand, wenn es einen Atlas (Ui, ϕi) gibt, wobei Ui ⊆Xi offen und ϕi ein Homöomorphismus auf eine offene Teilmenge von Rn -+,0 := { (x1, . . . , xn) ∈ Rn | xn ≥ 0 } ++,0 := { (x1, . . . , xn) ∈Rn | xn ≥0 } ist. Rn +,0 ist ein „Halbraum“. @@ -1356,17 +1355,17 @@ Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand Definition 27 Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt ∂X := -� +[ (U,ϕ)∈A -{ x ∈ U | ϕ(x) = 0 } +{ x ∈U | ϕ(x) = 0 } Rand von X. -∂X ist eine Mannigfaltigkeit der Dimension n − 1. +∂X ist eine Mannigfaltigkeit der Dimension n −1. Definition 28 Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui, ϕi)i∈I -Für i, j ∈ I mit Ui ∩ Uj ̸= ∅ heißt -ϕij := ϕj ◦ ϕ−1 +Für i, j ∈I mit Ui ∩Uj ̸= ∅heißt +ϕij := ϕj ◦ϕ−1 i -ϕi(Ui ∩ Uj) → ϕj(Ui ∩ Uj) +ϕi(Ui ∩Uj) →ϕj(Ui ∩Uj) Kartenwechsel oder Übergangsfunktion. 30 @@ -1385,54 +1384,54 @@ Abbildung 2.4: Kartenwechsel Definition 29 Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui, ϕi)i∈I. a) X heißt differenzierbare Mannigfaltigkeit der Klasse Ck, wenn jede Karten- -wechselabbildung ϕij, i, j ∈ I k-mal stetig differenzierbar ist. +wechselabbildung ϕij, i, j ∈I k-mal stetig differenzierbar ist. b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannig- -faltigkeit der Klasse C∞ ist. -Differenzierbare Mannigfaltigkeiten der Klasse C∞ werden auch glatt genannt. +faltigkeit der Klasse C∞ist. +Differenzierbare Mannigfaltigkeiten der Klasse C∞werden auch glatt genannt. Definition 30 -Sei X eine differenzierbare Mannigfaltigkeit der Klasse Ck (k ∈ N ∪ { ∞ }) mit Atlas +Sei X eine differenzierbare Mannigfaltigkeit der Klasse Ck (k ∈N ∪{ ∞}) mit Atlas A = (Ui, ϕi)i∈I. -a) Eine Karte (U, ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ ϕ−1 +a) Eine Karte (U, ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ϕ−1 i -und ϕi ◦ ϕ−1 (i ∈ I mit Ui ∩ U ̸= ∅) differenzierbar von Klasse Ck sind. +und ϕi ◦ϕ−1 (i ∈I mit Ui ∩U ̸= ∅) differenzierbar von Klasse Ck sind. b) Die Menge aller mit A verträglichen Karten auf X bildet einen maximalen Atlas der Klasse Ck. Er heißt Ck-Struktur auf X. Eine C∞-Struktur heißt auch differenzierbare Struktur auf X. Bemerkung 28 -Für n ≥ 4 gibt es auf Sn mehrere verschiedene differenzierbare Strukturen, die sogenannten +Für n ≥4 gibt es auf Sn mehrere verschiedene differenzierbare Strukturen, die sogenannten „exotische Sphären“. Definition 31 -Seien X, Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x ∈ X. -a) Eine stetige Abbildung f : X → Y heißt differenzierbar in x (von Klasse Ck), wenn -es Karten (U, ϕ) von X mit x ∈ U und (V, ψ) von Y mit f(U) ⊆ V gibt, sodass -ψ ◦ f ◦ ϕ−1 stetig differenzierbar von Klasse Ck in ϕ(x) ist. -b) f heißt differenzierbar (von Klasse Ck), wenn f in jedem x ∈ X differenzierbar ist. -c) f heißt Diffeomorphismus, wenn f differenzierbar von Klasse C∞ ist und es eine -differenzierbare Abbildung g : Y → X von Klasse C∞ gibt mit g ◦ f = idX und -f ◦ g = idY . +Seien X, Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x ∈X. +a) Eine stetige Abbildung f : X →Y heißt differenzierbar in x (von Klasse Ck), wenn +es Karten (U, ϕ) von X mit x ∈U und (V, ψ) von Y mit f(U) ⊆V gibt, sodass +ψ ◦f ◦ϕ−1 stetig differenzierbar von Klasse Ck in ϕ(x) ist. +b) f heißt differenzierbar (von Klasse Ck), wenn f in jedem x ∈X differenzierbar ist. +c) f heißt Diffeomorphismus, wenn f differenzierbar von Klasse C∞ist und es eine +differenzierbare Abbildung g : Y →X von Klasse C∞gibt mit g ◦f = idX und +f ◦g = idY . 31 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN Bemerkung 29 Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab. -Beweis: Seien (U ′, ϕ′) und (V ′, ψ′) Karten von X bzw. Y um x bzw. f(x) mit f(U ′) ⊆ V ′. -⇒ ψ′ ◦ f ◦ (ϕ′)−1 -= ψ′ ◦ (ψ−1 ◦ ψ) ◦ f ◦ (ϕ−1 ◦ ϕ) ◦ (ϕ′)−1 -ist genau dann differenzierbar, wenn ψ ◦ f ◦ ϕ−1 differenzierbar ist. +Beweis: Seien (U′, ϕ′) und (V ′, ψ′) Karten von X bzw. Y um x bzw. f(x) mit f(U′) ⊆V ′. +⇒ψ′ ◦f ◦(ϕ′)−1 += ψ′ ◦(ψ−1 ◦ψ) ◦f ◦(ϕ−1 ◦ϕ) ◦(ϕ′)−1 +ist genau dann differenzierbar, wenn ψ ◦f ◦ϕ−1 differenzierbar ist. Beispiel 23 -f : R → R, -x �→ x3 ist kein Diffeomorphismus, aber Homöomorphismus, da mit g(x) := +f : R →R, +x 7→x3 ist kein Diffeomorphismus, aber Homöomorphismus, da mit g(x) := 3√x -gilt: f ◦ g = idR, -g ◦ f = idR +gilt: f ◦g = idR, +g ◦f = idR Bemerkung 30 Sei X eine glatte Mannigfaltigkeit. Dann ist -Diffeo(X) := { f : X → X | f ist Diffeomorphismus } +Diffeo(X) := { f : X →X | f ist Diffeomorphismus } eine Untergruppe von Homöo(X). Definition 32 -S ⊆ R3 heißt reguläre Fläche :⇔ ∀s ∈ S ∃ Umgebung V (s) ⊆ R3 ∃U ⊆ R2 offen: -∃ differenzierbare Abbildung F : U → V ∩ S: Rg(JF (u)) = 2 -∀u ∈ U. +S ⊆R3 heißt reguläre Fläche :⇔∀s ∈S ∃Umgebung V (s) ⊆R3 ∃U ⊆R2 offen: +∃differenzierbare Abbildung F : U →V ∩S: Rg(JF (u)) = 2 +∀u ∈U. F heißt (lokale) reguläre Parametrisierung von S. F(u, v) = (x(u, v), y(u, v), z(u, v)) JF (u, v) = @@ -1453,9 +1452,9 @@ JF (u, v) =   Beispiel 24 -1) Rotationsflächen: Sei r : R → R>0 eine differenzierbare Funktion. -F : R2 → R3 -(u, v) �→ (r(u) cos(u), r(v) sin(u), v) +1) Rotationsflächen: Sei r : R →R>0 eine differenzierbare Funktion. +F : R2 →R3 +(u, v) 7→(r(u) cos(u), r(v) sin(u), v) JF (u, v) =   @@ -1467,19 +1466,17 @@ r′(v) sin u 1   -hat Rang 2 für alle (u, v) ∈ R2. -2) Kugelkoordinaten: F : R2 → R3, -(u, v) �→ (R cos v cos u, R cos v sin u, R sin v) -Es gilt: F(u, v) ∈ S2 +hat Rang 2 für alle (u, v) ∈R2. +2) Kugelkoordinaten: F : R2 →R3, +(u, v) 7→(R cos v cos u, R cos v sin u, R sin v) +Es gilt: F(u, v) ∈S2 R, denn R2 cos2(v) cos2(u) + R2 cos2(v) sin2(u) + R2 sin2(v) =R2(cos2(v) cos2(u) + cos2(v) sin2(u) + sin2(v)) -=R2 � -cos2(v)(cos2(u) + sin2(u)) + sin2(v) -� -=R2 � -cos2(v) + sin2(v) -� +=R2 cos2(v)(cos2(u) + sin2(u)) + sin2(v) + +=R2 cos2(v) + sin2(v) + =R2 32 @@ -1533,14 +1530,14 @@ R cos v  hat Rang 2 für cos v ̸= 0. In N und S ist cos v = 0. Bemerkung 31 -Jede reguläre Fläche S ⊆ R3 ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit. +Jede reguläre Fläche S ⊆R3 ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit. Beweis: -S ⊆ R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von -regulären Flächen folgt direkt, dass Karten (Ui, Fi) und (Uj ⊆ R2, Fj : R2 → R3) von S mit -Ui ∩ Uj ̸= ∅ existieren, wobei Fi und Fj nach Definition differenzierbare Abbildungen sind. +S ⊆R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von +regulären Flächen folgt direkt, dass Karten (Ui, Fi) und (Uj ⊆R2, Fj : R2 →R3) von S mit +Ui ∩Uj ̸= ∅existieren, wobei Fi und Fj nach Definition differenzierbare Abbildungen sind. z.Z.: F −1 j -◦ Fi ist ein Diffeomorphismus. +◦Fi ist ein Diffeomorphismus. Ui Uj S @@ -1551,19 +1548,19 @@ F −1 j ◦Fi Abbildung 2.5: Reguläre Fläche S zum Beweis von Bemerkung 31 -Idee: Finde differenzierbare Funktion � +Idee: Finde differenzierbare Funktion g F −1 j -in Umgebung W von s, sodass � +in Umgebung W von s, sodass g F −1 j |S∩W = F −1 j . -Ausführung: Sei u0 ∈ Ui, v0 ∈ Uj mit Fi(u0) = s = Fj(v0). +Ausführung: Sei u0 ∈Ui, v0 ∈Uj mit Fi(u0) = s = Fj(v0). Da Rg(JFj(v0)) = 2 ist, ist o. B. d. A. det -� ∂x + ∂x ∂u ∂x ∂v @@ -1571,16 +1568,16 @@ det ∂u ∂y ∂v -� + (v0) ̸= 0 und Fj(u, v) = (x(u, v), y(u, v), z(u, v)). -Definiere � -Fj : Uj × R → R3 durch -� +Definiere f +Fj : Uj × R →R3 durch +f Fj(u, v, t) := (x(u, v), y(u, v), z(u, v) + t) -Offensichtlich: � +Offensichtlich: f Fj|Uj×{ 0 } = Fj -J� +Jf Fj =   @@ -1600,11 +1597,11 @@ Fj = ∂v 1  - ⇒ det J� +⇒det Jf Fj(v0, 0) ̸= 0 Analysis II -======⇒ Es gibt Umgebungen W von Fj von � -Fj(v0, 0) = Fj(v0) = s, sodass � +======⇒Es gibt Umgebungen W von Fj von f +Fj(v0, 0) = Fj(v0) = s, sodass f Fj auf W eine differenzierbar Inverse F −1 j @@ -1613,26 +1610,26 @@ hat. 34 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN Weiter gilt: -� +f Fj −1|W∩S = F −1 j |W∩S -⇒ F −1 +⇒F −1 j -◦ Fi|F −1 +◦Fi|F −1 i (W∩S) = F −1 j -◦ Fi|F −1 +◦Fi|F −1 i (W∩S) ist differenzierbar. Definition 33 Sei G eine Mannigfaltigkeit und (G, ◦) eine Gruppe. -a) G heißt topologische Gruppe, wenn die Abbildungen ◦ : G×G → G und ι : G → G +a) G heißt topologische Gruppe, wenn die Abbildungen ◦: G×G →G und ι : G →G definiert durch -g ◦ h := g · h und ι(g) := g−1 +g ◦h := g · h und ι(g) := g−1 stetig sind. b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, ◦) und (G, ι) differenzierbar sind. @@ -1641,7 +1638,7 @@ Beispiel 25 (Lie-Gruppen) 2) GLn(R) 3) (R×, ·) 4) (R>0, ·) -5) (Rn, +), denn A · B(i, j) = �n +5) (Rn, +), denn A · B(i, j) = Pn k=1 aikbkj ist nach allen Variablen differenzierbar (A−1)(i, j) = det(Aij) det A @@ -1660,44 +1657,44 @@ an1 ann   - ∈ R(n−1)×(n−1) +∈R(n−1)×(n−1) ist differenzierbar. det Aij kann 0 werden, da: -� 1 + 1 1 −1 0 -� -6) SLn(R) = { A ∈ GLn(R) | det(A) = 1 } + +6) SLn(R) = { A ∈GLn(R) | det(A) = 1 } Bemerkung 32 -Ist G eine Lie-Gruppe und g ∈ G, so ist die Abbildung -lg : G → G -h �→ g · h +Ist G eine Lie-Gruppe und g ∈G, so ist die Abbildung +lg : G →G +h 7→g · h ein Diffeomorphismus. 35 2.3. SIMPLIZIALKOMPLEX 2.3 Simplizialkomplex Definition 34 -Seien v0, . . . , vk ∈ Rn Punkte. +Seien v0, . . . , vk ∈Rn Punkte. a) v0, . . . , vk sind in allgemeiner Lage -⇔ es gibt keinen (k−1)-dimensionalen affinen Untervektorraum, der v0, . . . , vk enthält -⇔ v1 − v0, . . . , vk − v0 sind linear unabhängig. +⇔es gibt keinen (k−1)-dimensionalen affinen Untervektorraum, der v0, . . . , vk enthält +⇔v1 −v0, . . . , vk −v0 sind linear unabhängig. b) conv(v0, . . . , vk) := -� �k +n Pk i=0 λivi -��� λi ≥ 0, �k + λi ≥0, Pk i=0 λi = 1 -� +o heißt die konvexe Hülle von v0, . . . , vk. Definition 35 -a) Sei ∆n = conv(e0, . . . , en) ⊆ Rn+1 die konvexe Hülle der Standard-Basisvektoren +a) Sei ∆n = conv(e0, . . . , en) ⊆Rn+1 die konvexe Hülle der Standard-Basisvektoren e0, . . . , en. Dann heißt ∆n Standard-Simplex und n die Dimension des Simplex. b) Für Punkte v0, . . . , vk im Rn in allgemeiner Lage heißt ∆(v0, . . . , vk) = conv(v0, . . . , vk) ein k-Simplex in Rn. -c) Ist ∆(v0, . . . , vk) ein k-Simplex und I = { i0, . . . , ir } ⊆ { 0, . . . , k }, so ist si0,...,ir := +c) Ist ∆(v0, . . . , vk) ein k-Simplex und I = { i0, . . . , ir } ⊆{ 0, . . . , k }, so ist si0,...,ir := conv(vi0, . . . , vir) ein r-Simplex und heißt Teilsimplex oder Seite von ∆. (a) 0-Simplex ∆0 1 @@ -1728,11 +1725,11 @@ Abbildung 2.6: Beispiele für k-Simplexe Definition 36 a) Eine endliche Menge K von Simplizes im Rn heißt (endlicher) Simplizialkomplex, wenn gilt: -(i) Für ∆ ∈ K und S ⊆ ∆ Teilsimplex ist S ∈ K. -(ii) Für ∆1, ∆2 ∈ K ist ∆1 ∩ ∆2 leer oder ein Teilsimplex von ∆1 und von ∆2. -b) |K| := � -∆∈K ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K. -c) Ist d = max { k ∈ N0 | K enthält k-Simplex }, so heißt d die Dimension von K. +(i) Für ∆∈K und S ⊆∆Teilsimplex ist S ∈K. +(ii) Für ∆1, ∆2 ∈K ist ∆1 ∩∆2 leer oder ein Teilsimplex von ∆1 und von ∆2. +b) |K| := S +∆∈K ∆(mit Teilraumtopologie) heißt geometrische Realisierung von K. +c) Ist d = max { k ∈N0 | K enthält k-Simplex }, so heißt d die Dimension von K. 36 2.3. SIMPLIZIALKOMPLEX @@ -1749,10 +1746,10 @@ P Abbildung 2.7: Beispiele für Simplizialkomplexe Definition 37 Seien K, L Simplizialkomplexe. Eine stetige Abbildung -f : |K| → |L| -heißt simplizial, wenn für jedes ∆ ∈ K gilt: -a) f(∆) ∈ L -b) f|∆ : ∆ → f(∆) ist eine affine Abbildung. +f : |K| →|L| +heißt simplizial, wenn für jedes ∆∈K gilt: +a) f(∆) ∈L +b) f|∆: ∆→f(∆) ist eine affine Abbildung. Beispiel 26 (Simpliziale Abbildungen) 1) ϕ(e1) := b1, ϕ(e2) := b2 ϕ ist eine eindeutig bestimmte lineare Abbildung @@ -1766,7 +1763,7 @@ e1 b1 b2 ϕ -2) Folgende Abbildung ϕ : ∆n → ∆n−1 ist simplizial: +2) Folgende Abbildung ϕ : ∆n →∆n−1 ist simplizial: ϕ 3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8) M @@ -1818,58 +1815,58 @@ b b Abbildung 2.8: Abbildung eines Torus auf eine Sphäre Definition 38 -Sei K ein endlicher Simplizialkomplex. Für n ≥ 0 sei an(K) die Anzahl der n-Simplizes in +Sei K ein endlicher Simplizialkomplex. Für n ≥0 sei an(K) die Anzahl der n-Simplizes in K. Dann heißt χ(K) := dim K -� +X n=0 (−1)nan(K) Eulerzahl (oder Euler-Charakteristik) von K. Beispiel 27 -1) χ(∆1) = 2 − 1 = 1 -χ(∆2) = 3 − 3 + 1 = 1 -χ(∆3) = 4 − 6 + 4 − 1 = 1 -2) χ(Oktaeder-Oberfläche) = 6 − 12 + 8 = 2 +1) χ(∆1) = 2 −1 = 1 +χ(∆2) = 3 −3 + 1 = 1 +χ(∆3) = 4 −6 + 4 −1 = 1 +2) χ(Oktaeder-Oberfläche) = 6 −12 + 8 = 2 χ(Rand des Tetraeders) = 2 -χ(Ikosaeder) = 12 − 30 + 20 = 2 -3) χ(Würfel) = 8 − 12 + 6 = 2 -χ(Würfel, unterteilt in Dreiecksflächen) = 8 − (12 + 6) + (6 · 2) = 2 +χ(Ikosaeder) = 12 −30 + 20 = 2 +3) χ(Würfel) = 8 −12 + 6 = 2 +χ(Würfel, unterteilt in Dreiecksflächen) = 8 −(12 + 6) + (6 · 2) = 2 Bemerkung 33 -χ(∆n) = 1 für jedes n ∈ N0 +χ(∆n) = 1 für jedes n ∈N0 38 2.3. SIMPLIZIALKOMPLEX Beweis: ∆n ist die konvexe Hülle von (e0, . . . , en) in Rn+1. Jede (k + 1)-elementige Teilmenge von { e0, . . . , en } definiert ein k-Simplex. -⇒ ak(∆n) = -�n+1 +⇒ak(∆n) = +n+1 k+1 -� + , k = 0, . . . , n -⇒ χ(∆n) = �n -k=0(−1)k�n+1 +⇒χ(∆n) = Pn +k=0(−1)kn+1 k+1 -� + f(x) = (x + 1)n+1 Binomischer Lehrsatz = -�n+1 +Pn+1 k=0 -�n+1 +n+1 k -� + xk -⇒ 0 = �n+1 +⇒0 = Pn+1 k=0 -�n+1 +n+1 k -� -(−1)k = χ(∆n) − 1 -⇒ χ(∆n) = 1 + +(−1)k = χ(∆n) −1 +⇒χ(∆n) = 1 ■ Definition 39 a) Ein 1D-Simplizialkomplex heißt Graph. @@ -1888,45 +1885,45 @@ Beweis: Induktion über die Anzahl der Ecken. Bemerkung 35 a) Jeder zusammenhängende Graph Γ enthält einen Teilbaum T, der alle Ecken von Γ enthält.2 -b) Ist n = a1(Γ) − a1(T), so ist χ(Γ) = 1 − n. +b) Ist n = a1(Γ) −a1(T), so ist χ(Γ) = 1 −n. Beweis: a) Siehe „Algorithmus von Kruskal“. 2T wird „Spannbaum“ genannt. 39 2.3. SIMPLIZIALKOMPLEX -b) χ(Γ) = a0(Γ) − a1(Γ) -= a0(Γ) − (n + a1(T)) -= a0(T) − a1(T) − n -= χ(T) − n -= 1 − n +b) χ(Γ) = a0(Γ) −a1(Γ) += a0(Γ) −(n + a1(T)) += a0(T) −a1(T) −n += χ(T) −n += 1 −n Bemerkung 36 -Sei ∆ ein n-Simplex und x ∈ ∆◦ ⊆ Rn. Sei K der Simplizialkomplex, der aus ∆ durch +Sei ∆ein n-Simplex und x ∈∆◦⊆Rn. Sei K der Simplizialkomplex, der aus ∆durch „Unterteilung“ in x entsteht. Dann ist χ(K) = χ(∆) = 1. (a) K (b) ∆, das aus K durch Unter- teilung entsteht Abbildung 2.10: Beispiel für Bemerkung 36. -Beweis: χ(K) = χ(∆) − (−1)n -� �� � +Beweis: χ(K) = χ(∆) −(−1)n +| {z } n-Simplex + n -� +X k=0 (−1)k -�n + 1 +n + 1 k -� -� -�� -� + +| +{z +} (1+(−1))n+1 = χ(∆) ■ Definition 40 Sei X ein topologischer Raum, K ein Simplizialkomplex und -h : |K| → X +h : |K| →X ein Homöomorphismus von der geometrischen Realisierung |K| auf X. Dann heißt h eine Triangulierung von X. Beispiel 28 (Triangulierung des Torus) @@ -1936,11 +1933,11 @@ in Beispiel 28. Satz 2.1 (Eulersche Polyederformel) Sei P ein konvexes Polyeder in R3, d. h. ∂P ist ein 2-dimensionaler Simplizialkomplex, sodass gilt: -∀x, y ∈ ∂P : [x, y] ⊆ P +∀x, y ∈∂P : [x, y] ⊆P Dann ist χ(∂P) = 2. Beweis: 1) Die Aussage ist richtig für den Tetraeder. -2) O. B. d. A. sei 0 ∈ P und P ⊆ B1(0). Projeziere ∂P von 0 aus auf ∂B1(0) = S2. +2) O. B. d. A. sei 0 ∈P und P ⊆B1(0). Projeziere ∂P von 0 aus auf ∂B1(0) = S2. Erhalte Triangulierung von S2. 40 @@ -1968,27 +1965,27 @@ P2 ein Tetraeder ist. Bemerkung 37 (Der Rand vom Rand ist 0) Sei K ein endlicher Simplizialkomplex mit Knotenmenge V und < eine Totalordnung auf V . Sei An die Menge der n-Simplizes in K, d. h. -An(K) := { σ ∈ K | dim(σ) = n } +An(K) := { σ ∈K | dim(σ) = n } für n = 0, . . . , d = dim(K) und Cn(K) der R-Vektorraum mit Basis An(K), d. h. Cn(K) =    -� +X σ∈An(K) cσ · σ -������ -cσ ∈ R + +cσ ∈R    -Sei σ = ∆(x0, . . . , xn) ∈ An(K), sodass x0 < x1 < · · · < xn. +Sei σ = ∆(x0, . . . , xn) ∈An(K), sodass x0 < x1 < · · · < xn. Für i = 0, . . . , n sei ∂iσ := ∆(x0, . . . , ˆxi, . . . , xn) die i-te Seite von σ und dσ = dnσ := -� -i=0(−1)i∂iσ ∈ Cn−1(K) und dn : Cn(K) → Cn−1(K) die dadurch definierte lineare +P +i=0(−1)i∂iσ ∈Cn−1(K) und dn : Cn(K) →Cn−1(K) die dadurch definierte lineare Abbildung. -Dann gilt: dn−1 ◦ dn = 0 +Dann gilt: dn−1 ◦dn = 0 a b c @@ -1999,46 +1996,46 @@ e2 Abbildung 2.14: Simplizialkomplex mit Totalordnung Beispiel 29 Sei a < b < c. Dann gilt: -d2σ = e1 − e2 + e3 -d1(e1 − e2 + e3) = (c − b) − (c − a) + (b − a) +d2σ = e1 −e2 + e3 +d1(e1 −e2 + e3) = (c −b) −(c −a) + (b −a) 42 2.3. SIMPLIZIALKOMPLEX = 0 Sei a < b < c < d. Dann gilt für Tetraeder: -d3(∆(a, b, c, d)) = ∆(b, c, d) − ∆(a, c, d) + ∆(a, b, d) − ∆(a, b, c), wobei: +d3(∆(a, b, c, d)) = ∆(b, c, d) −∆(a, c, d) + ∆(a, b, d) −∆(a, b, c), wobei: d2( ∆(b, c, d)) = ∆(c, d)−∆(b, d) + ∆(b, c) d2(−∆(a, c, d)) = −∆(c, d) + ∆(a, d)−∆(a, c) d2( ∆(a, b, d)) = ∆(b, d)−∆(a, d) + ∆(a, b) d2(−∆(a, b, c)) = −∆(b, c) + ∆(a, c)−∆(a, b) -⇒ d2(d3(∆(a, b, c, d))) = 0 -Beweis: Sei σ ∈ An. Dann gilt: +⇒d2(d3(∆(a, b, c, d))) = 0 +Beweis: Sei σ ∈An. Dann gilt: dn−1(dnσ) = dn−1( n -� +X i=0 (−1)i∂iσ) = n -� +X i=0 (−1)idn−1(∂iσ) = n -� +X i=0 (−1)i n−1 -� +X j=0 ∂i(∂jσ)(−1)j = -� +X 0≤i≤j≤n−1 (−1)i+j∂j(∂i(σ)) + -� +X 0≤j d(P, C) = d(P, B) + d(B, C) = d(P, A) + d(B, C) ⇒ -d(A, C) > d(B, C) ⇒ Widerspruch zu Punkt (i) +d(A, C) > d(B, C) ⇒Widerspruch zu Punkt (i) b) C liegt zwischen P und B d(P, C) + d(C, A) > d(P, A) = d(P, B) = d(P, C) + d(C, B) -⇒ d(C, A) > d(C, B) -⇒ Widerspruch zu Punkt (i) +⇒d(C, A) > d(C, B) +⇒Widerspruch zu Punkt (i) 2. Fall: Q und B liegen auf verschieden Halbebenen bzgl. PA. Dann liegen A und Q in derselben Halbebene bzgl. PB. -Tausche A und B ⇒ Fall 1 +Tausche A und B ⇒Fall 1 ■ Bemerkung 63 -Sei (X, d, G) eine Geometrie, die §1 - §3 erfüllt, P, Q ∈ X mit P ̸= Q und ϕ eine Isometrie +Sei (X, d, G) eine Geometrie, die §1 - §3 erfüllt, P, Q ∈X mit P ̸= Q und ϕ eine Isometrie mit ϕ(P) = P und ϕ(Q) = Q. Dann gilt ϕ(S) = S -∀S ∈ PQ. +∀S ∈PQ. Beweis: -O. B. d. A. sei S ∈ PQ 2⇔ d(P, Q) = d(P, S) + d(S, Q) +O. B. d. A. sei S ∈PQ 2⇔d(P, Q) = d(P, S) + d(S, Q) ϕ∈Iso(X) ⇒ d(ϕ(P), ϕ(Q)) = d(ϕ(P), ϕ(S)) + d(ϕ(S), ϕ(Q)) P,Q∈Fix(ϕ) ⇒ d(P, Q) = d(P, ϕ(S)) + d(ϕ(S), Q) -⇒ ϕ(S) liegt zwischen P und Q -⇒ d(P, S) = d(ϕ(P), ϕ(S)) = d(P, ϕ(S)) +⇒ϕ(S) liegt zwischen P und Q +⇒d(P, S) = d(ϕ(P), ϕ(S)) = d(P, ϕ(S)) 3(i) -⇒ ϕ(S) = S +⇒ϕ(S) = S ■ Proposition 4.2 In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P, P ′, Q, Q′ mit d(P, Q) = d(P ′, Q′) @@ -3212,14 +3209,14 @@ Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometri ϕi(P) = P ′ und ϕi(Q) = Q′ gibt. Beweis: Seien ϕ1, ϕ2, ϕ3 Isometrien mit ϕi(P) = P ′, ϕi(Q) = Q′ mit i = 1, 2, 3. Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen: -(Teil i) ∃R ∈ X \ PQ mit ϕ1(R) = ϕ2(R). +(Teil i) ∃R ∈X \ PQ mit ϕ1(R) = ϕ2(R). (Teil ii) Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = idX. Aus (Teil i) und (Teil ii) folgt, dass ϕ−1 2 -◦ ϕ1 = idX, also ϕ2 = ϕ1, da P, Q und R in diesem +◦ϕ1 = idX, also ϕ2 = ϕ1, da P, Q und R in diesem Fall Fixpunkte sind. Nun zu den Beweisen der Teilaussagen: -(Teil i) Sei R ∈ X \ PQ. Von den drei Punkten ϕ1(R), ϕ2(R), ϕ3(R) liegen zwei in der selben +(Teil i) Sei R ∈X \ PQ. Von den drei Punkten ϕ1(R), ϕ2(R), ϕ3(R) liegen zwei in der selben Halbebene bzgl. P ′Q′ = ϕi(PQ). O. B. d. A. seien ϕ1(R) und ϕ2(R) in der selben Halbebene. Es gilt: d(P ′, ϕ1(R)) = d(ϕ1(P), ϕ1(R)) @@ -3227,19 +3224,19 @@ Es gilt: d(P ′, ϕ1(R)) = d(ϕ1(P), ϕ1(R)) = d(ϕ2(P), ϕ2(R)) = d(P ′, ϕ2(R)) und analog d(Q′, ϕ1(R)) = d(Q′, ϕ2(R)) -(Teil ii) Seien P, Q und R Fixpunkte von ϕ, R /∈ PQ und A /∈ PQ ∪ PR ∪ QR. Sei B ∈ +(Teil ii) Seien P, Q und R Fixpunkte von ϕ, R /∈PQ und A /∈PQ ∪PR ∪QR. Sei B ∈ PQ \ { P, Q }. Dann ist ϕ(B) = B wegen Bemerkung 63. -Ist R ∈ AB, so enthält AB 2 Fixpunkte von ϕ Bem. 63 -=====⇒ ϕ(A) = A. +Ist R ∈AB, so enthält AB 2 Fixpunkte von ϕ Bem. 63 +=====⇒ϕ(A) = A. P B Q C R A -Abbildung 4.5: P, Q, R sind Fixpunkte, B ∈ PQ \ { P, Q }, A /∈ PQ ∪ PR ∪ QR -Ist R /∈ AB, so ist AB ∩ PR ̸= ∅ oder AB ∈ RQ ̸= ∅ nach Satz 4.1. Der Schnittpunkt -C ist dann Fixpunkt von ϕ′ nach Bemerkung 63 ⇒ ϕ(A) = A. +Abbildung 4.5: P, Q, R sind Fixpunkte, B ∈PQ \ { P, Q }, A /∈PQ ∪PR ∪QR +Ist R /∈AB, so ist AB ∩PR ̸= ∅oder AB ∈RQ ̸= ∅nach Satz 4.1. Der Schnittpunkt +C ist dann Fixpunkt von ϕ′ nach Bemerkung 63 ⇒ϕ(A) = A. Bemerkung 64 (SWS-Kongruenzsatz) Sei (X, d, G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem △ABC und △A′B′C′ Dreiecke, für die gilt: @@ -3252,13 +3249,13 @@ Dreiecke, für die gilt: Dann ist △ABC kongruent zu △A′B′C′ . Beweis: Sei ϕ die Isometrie mit ϕ(A′) = A, ϕ(A′C′+) = AC+ und ϕ(A′B′+) = AB+. Diese Isometrie existiert wegen Punkt §4. -⇒ C ∈ ϕ(A′C′+) und B ∈ ϕ(A′B′+). +⇒C ∈ϕ(A′C′+) und B ∈ϕ(A′B′+). d(A′, C′) = d(ϕ(A′), ϕ(C′)) = d(A, ϕ(C′)) 3(i) -==⇒ ϕ(C′) = C +==⇒ϕ(C′) = C d(A′, B′) = d(ϕ(A′), ϕ(B′)) = d(A, ϕ(B′)) 3(i) -==⇒ ϕ(B′) = B +==⇒ϕ(B′) = B Also gilt insbesondere ϕ(△A′B′C′) = △ABC. ■ Bemerkung 65 (WSW-Kongruenzsatz) @@ -3270,20 +3267,21 @@ Dreiecke, für die gilt: Dann ist △ABC kongruent zu △A′B′C′ . Beweis: Sei ϕ die Isometrie mit ϕ(A′) = A, ϕ(B′) = B und ϕ(C′) liegt in der selben Halbebene bzgl. AB wie C. Diese Isometrie existiert wegen §4. -Aus ∠CAB = ∠C′A′B′ = ∠ϕ(C′)ϕ(A′)ϕ(B′) = ∠ϕ(C′)AB folgt, dass ϕ(C′) ∈ AC+. +Aus ∠CAB = ∠C′A′B′ = ∠ϕ(C′)ϕ(A′)ϕ(B′) = ∠ϕ(C′)AB folgt, dass ϕ(C′) ∈AC+. Analog folgt aus ∠ABC = ∠A′B′C′ = ∠ϕ(A′)ϕ(B′)ϕ(C′) = ∠ABϕ(C′), dass ϕ(C′) ∈ BC+. -Dann gilt ϕ(C′) ∈ AC ∩ BC = { C } ⇒ ϕ(C′) = C. +Dann gilt ϕ(C′) ∈AC ∩BC = { C } ⇒ϕ(C′) = C. Es gilt also ϕ(△A′B′C′) = △ABC. ■ Definition 61 -a) Ein Winkel ist ein Punkt P ∈ X zusammen mit 2 Halbgeraden mit Anfangspunkt P. +a) Ein Winkel ist ein Punkt P ∈X zusammen mit 2 Halbgeraden mit Anfangspunkt P. Man schreibt: ∠R1PR2 bzw. ∠R2PR12 b) Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den anderen abbildet. c) ∠R′ 1P ′R′ -ϕ(P ′R′+2 heißt kleiner als ∠R1PR2, wenn es eine Isometrie ϕ gibt, mit ϕ(P ′) = P, +2 heißt kleiner als ∠R1PR2, wenn es eine Isometrie ϕ gibt, mit ϕ(P ′) = P, +ϕ(P ′R′+ 1 ) = PR+ 1 und ϕ(R′ 2) liegt in der gleichen Halbebene bzgl. PR1 wie R2 und in @@ -3292,9 +3290,9 @@ d) Im Dreieck △PQR gibt es Innenwinkel und Außenwinkel. Bemerkung 66 In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel. Beweis: Zeige ∠PRQ < ∠RQP ′. -Sei M der Mittelpunkt der Strecke QR und P ′ ∈ PQ+ \ PQ. Sei A ∈ MP − mit d(P, M) = +Sei M der Mittelpunkt der Strecke QR und P ′ ∈PQ+ \ PQ. Sei A ∈MP −mit d(P, M) = d(M, A). -2Für dieses Skript gilt: ∠R1PR2 = ∠R2PR1. Also sind insbesondere alle Winkel ≤ 180◦. +2Für dieses Skript gilt: ∠R1PR2 = ∠R2PR1. Also sind insbesondere alle Winkel ≤180◦. 72 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE @@ -3330,16 +3328,16 @@ P (b) Innen- und Außenwin- kel von △PQR Abbildung 4.7: Situation aus Bemerkung 66 -Es gilt: d(Q, M) = d(M, R) und d(P, M) = d(M, A) sowie ∠PMR = ∠AMQ ⇒ △MRQ +Es gilt: d(Q, M) = d(M, R) und d(P, M) = d(M, A) sowie ∠PMR = ∠AMQ ⇒△MRQ ist kongruent zu △AMQ, denn eine der beiden Isometrien, die ∠PMR auf ∠AMQ abbildet, bildet R auf Q und P auf A ab. -⇒ ∠MQA = ∠MRP = ∠QRP = ∠PRQ. +⇒∠MQA = ∠MRP = ∠QRP = ∠PRQ. Noch zu zeigen: ∠MQA < ∠RQP ′, denn A liegt in der selben Halbebene bzgl. PQ wie M. Proposition 4.3 (Existenz der Parallelen) Sei (X, d, G) eine Geometrie mit den Axiomen §1 - §4. -Dann gibt es zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g mindestens eine -Parallele h ∈ G mit P ∈ h und g ∩ h = ∅. -Beweis: Seien P, Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P ′ ∈ f mit +Dann gibt es zu jeder Geraden g ∈G und jedem Punkt P ∈X \ g mindestens eine +Parallele h ∈G mit P ∈h und g ∩h = ∅. +Beweis: Seien P, Q ∈f ∈G und ϕ die Isometrie, die Q auf P und P auf P ′ ∈f mit d(P, P ′) = d(P, Q) abbildet und die Halbebenen bzgl. f erhält. 73 @@ -3350,12 +3348,12 @@ f g P Abbildung 4.8: Situation aus Proposition 4.3 -Annahme: ϕ(g) ∩ g ̸= ∅ -⇒ Es gibt einen Schnittpunkt { R } = ϕ(g) ∩ g. +Annahme: ϕ(g) ∩g ̸= ∅ +⇒Es gibt einen Schnittpunkt { R } = ϕ(g) ∩g. Dann ist ∠RQP = ∠RQP ′ < ∠RPP ′ nach Bemerkung 66 und ∠RQP = ∠RPP ′, weil ϕ(∠RQP) = ∠RPP ′. -⇒ Widerspruch -⇒ ϕ(g) ∩ g = ∅ +⇒Widerspruch +⇒ϕ(g) ∩g = ∅ ■ Folgerung 4.4 Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π. @@ -3367,13 +3365,14 @@ Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidisc Dreiecke mit drei 90◦-Winkeln. Proposition 4.5 In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der -Innenwinkel ≤ π. +Innenwinkel ≤π. 74 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE Sei im Folgenden „IWS“ die „Innenwinkelsumme“. -Beweis: Sei △ ein Dreieck mit IWS(△) = π + ε -β α +Beweis: Sei △ein Dreieck mit IWS(△) = π + ε +α +β γ P (a) Summe der Winkel α, β und γ @@ -3389,21 +3388,21 @@ A′ (b) Situation aus Proposition 4.5 Abbildung 4.10: Situation aus Proposition 4.5 Sei α ein Innenwinkel von △. -Beh.: Es gibt ein Dreieck △′ mit IWS(△′) = IWS(△) und einem Innenwinkel α′ ≤ α +Beh.: Es gibt ein Dreieck △′ mit IWS(△′) = IWS(△) und einem Innenwinkel α′ ≤α 2 . -Dann gibt es für jedes n ein △n mit IWS(△n) = IWS(△) und Innenwinkel α′ ≤ α +Dann gibt es für jedes n ein △n mit IWS(△n) = IWS(△) und Innenwinkel α′ ≤α 2n . Für α -2n < ε ist dann die Summe der beiden Innenwinkel um △n größer als π ⇒ Widerspruch +2n < ε ist dann die Summe der beiden Innenwinkel um △n größer als π ⇒Widerspruch zu Folgerung 4.4. -Beweis: Es seien A, B, C ∈ X und △ das Dreieck mit den Eckpunkten A, B, C und α sei +Beweis: Es seien A, B, C ∈X und △das Dreieck mit den Eckpunkten A, B, C und α sei der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C. Sei M der Mittelpunkt der Strecke BC. Sei außerdem α1 = ∠CAM und α2 = ∠BAM. -Sei weiter A′ ∈ MA− mit d(A′, M) = d(A, M). +Sei weiter A′ ∈MA−mit d(A′, M) = d(A, M). Die Situation ist in Abbildung 4.10b skizziert. -⇒ △(MA′C) und △(MAB) sind kongruent. ⇒ ∠ABM = ∠A′CM und ∠MA′C = -∠MAB. ⇒ α+β +γ = IWS(△ABC) = IWS(△AA′C) und α1 +α2 = α, also o. B. d. A. -α1 ≤ α +⇒△(MA′C) und △(MAB) sind kongruent. ⇒∠ABM = ∠A′CM und ∠MA′C = +∠MAB. ⇒α+β +γ = IWS(△ABC) = IWS(△AA′C) und α1 +α2 = α, also o. B. d. A. +α1 ≤α 2 Bemerkung 67 In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π. @@ -3425,7 +3424,7 @@ Beweis: Sei g eine Parallele von AB durch C. 75 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE -⇒ IWS(△ABC) = γ + α′′ + β′ = π +⇒IWS(△ABC) = γ + α′′ + β′ = π Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW. 4.2 Weitere Eigenschaften einer euklidischen Ebene @@ -3500,14 +3499,14 @@ LA LC Abbildung 4.16: △ABLa und △CLCB sind ähnlich, weil IWS = π Strahlensatz -=======⇒ a +=======⇒a hc = c -ha → a · ha = c · hc +ha →a · ha = c · hc Satz 4.7 (Satz des Pythagoras) Im rechtwinkligen Dreieck gilt a2 + b2 = c2, wobei c die Hypotenuse und a, b die beiden Katheten sind. -Beweis: (a + b) · (a + b) = a2 + 2ab + b2 = c2 + 4 · ( 1 +Beweis: (a + b) · (a + b) = a2 + 2ab + b2 = c2 + 4 · (1 2 · a · b) 77 @@ -3543,13 +3542,13 @@ Beweis: (i) (R2, dEuklid) ist offensichtlich eine euklidische Ebene. (ii) Sei (X, d) eine euklidische Ebene und g1, g2 Geraden in X, die sich in einem Punkt 0 im rechten Winkel schneiden. -Sei P ∈ X \ (g1 ∪ g2) ein Punkt und PX der Fußpunkt des Lots von P auf g1 (vgl. +Sei P ∈X \ (g1 ∪g2) ein Punkt und PX der Fußpunkt des Lots von P auf g1 (vgl. Aufgabe 9 (c)) und PY der Fußpunkt des Lots von P auf g2. Sei xP := d(PX, 0) und yP := d(PY , 0). In Abbildung 4.19 wurde die Situation skizziert. -Sei h : X → R2 eine Abbildung mit h(P) := (xP , yP ) Dadurch wird h auf dem +Sei h : X →R2 eine Abbildung mit h(P) := (xP , yP ) Dadurch wird h auf dem Quadranten definiert, in dem P liegt, d. h. -∀Q ∈ X mit PQ ∩ g1 = ∅ = PQ ∩ g2 +∀Q ∈X mit PQ ∩g1 = ∅= PQ ∩g2 Fortsetzung auf ganz X durch konsistente Vorzeichenwahl. Im Folgenden werden zwei Aussagen gezeigt: (i) h ist surjektiv @@ -3577,7 +3576,7 @@ PY X (b) Schritt 2 Abbildung 4.18: Beweis zu Satz 4.8 -(i) Sei (x, y) ∈ R2, z. B. x ≥ 0, y ≥ 0. Sei P ′ ∈ g1 mit d(0, P ′) = x und P ′ auf der +(i) Sei (x, y) ∈R2, z. B. x ≥0, y ≥0. Sei P ′ ∈g1 mit d(0, P ′) = x und P ′ auf der gleichen Seite von g2 wie P. g1 g2 @@ -3592,21 +3591,21 @@ Abbildung 4.19: Beweis zu Satz 4.8 (ii) Zu Zeigen: d(P, Q) = d(h(P), h(Q)) d(P, Q)2 Pythagoras = -d(P, R)2 + d(R, Q)2 = (yQ − yP )2 + (xQ − xP )2. +d(P, R)2 + d(R, Q)2 = (yQ −yP )2 + (xQ −xP )2. h(Q) = (xQ, yQ) 4.3 Hyperbolische Geometrie Definition 63 Sei -H := { z ∈ C | ℑ(z) > 0 } = -� -(x, y) ∈ R2 �� y > 0 -� +H := { z ∈C | ℑ(z) > 0 } = + +(x, y) ∈R2 y > 0 + 79 4.3. HYPERBOLISCHE GEOMETRIE -die obere Halbebene bzw. Poincaré-Halbebene und G = G1 ∪ G2 mit -G1 = { g1 ⊆ H | ∃m ∈ R, r ∈ R>0 : g1 = { z ∈ H : | z − m| = r } } -G2 = { g2 ⊆ H | ∃x ∈ R : g2 = { z ∈ H : ℜ(z) = x } } +die obere Halbebene bzw. Poincaré-Halbebene und G = G1 ∪G2 mit +G1 = { g1 ⊆H | ∃m ∈R, r ∈R>0 : g1 = { z ∈H : | z −m| = r } } +G2 = { g2 ⊆H | ∃x ∈R : g2 = { z ∈H : ℜ(z) = x } } Die Elemente aus G heißen hyperbolische Geraden. Bemerkung 68 (Eigenschaften der hyperbolischen Geraden) Die hyperbolischen Geraden erfüllen. . . @@ -3615,11 +3614,11 @@ b) . . . das Anordnungsaxiom §3 (ii) c) . . . nicht das Parallelenaxiom §5 Beweis: a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt: -Gegeben z1, z2 ∈ H +Gegeben z1, z2 ∈H Existenz: Fall 1 ℜ(z1) = ℜ(z2) -⇒ z1 und z2 liegen auf -g = { z ∈ C | ℜ(z) = ℜ(z1) ∧ H } +⇒z1 und z2 liegen auf +g = { z ∈C | ℜ(z) = ℜ(z1) ∧H } Siehe Abbildung 4.20a. Fall 2 ℜ(z1) ̸= ℜ(z2) Betrachte nun z1 und z2 als Punkte in der euklidischen Ebene. Die Mittelsenkrech- @@ -3663,48 +3662,48 @@ Z2 (b) Fall 2 Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer Geraden -b) Sei g ∈ G1 ˙∪ G2 eine hyperbolische Gerade. +b) Sei g ∈G1 ˙∪G2 eine hyperbolische Gerade. 80 4.3. HYPERBOLISCHE GEOMETRIE Es existieren disjunkte Zerlegungen von H \ g: -Fall 1: g = { z ∈ H ∥ z − m| = r } ∈ G1 +Fall 1: g = { z ∈H ∥z −m| = r } ∈G1 Dann gilt: -H = { z ∈ H ∥ z − m| < r } -� -�� -� +H = { z ∈H ∥z −m| < r } +| +{z +} =:H1 (Kreisinneres) -˙∪ { z ∈ H ∥ z − m| > r } -� -�� -� +˙∪{ z ∈H ∥z −m| > r } +| +{z +} =:H2 (Kreisäußeres) -Da r > 0 ist H1 nicht leer, da r ∈ R ist H2 nicht leer. -Fall 2: g = { z ∈ H | ℜz = x } ∈ G2 +Da r > 0 ist H1 nicht leer, da r ∈R ist H2 nicht leer. +Fall 2: g = { z ∈H | ℜz = x } ∈G2 Die disjunkte Zerlegung ist: -H = { z ∈ H | ℜ(z) < x } -� -�� -� +H = { z ∈H | ℜ(z) < x } +| +{z +} =:H1 (Links) -˙∪ { z ∈ H | ℜ(z) > x } -� -�� -� +˙∪{ z ∈H | ℜ(z) > x } +| +{z +} =:H2 (Rechts) -Zu zeigen: ∀A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } gilt: AB ∩ g ̸= ∅ ⇔ i ̸= j -„⇐“: A ∈ H1, B ∈ H2 : AB ∩ g ̸= ∅ +Zu zeigen: ∀A ∈Hi, B ∈Hj mit i, j ∈{ 1, 2 } gilt: AB ∩g ̸= ∅⇔i ̸= j +„⇐“: A ∈H1, B ∈H2 : AB ∩g ̸= ∅ Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte in H1 haben einen Abstand von m der kleiner ist als r und alle Punkte in H2 haben einen Abstand von m der größer ist als r. Da man jede Strecke von A nach B insbesondere auch als stetige -Abbildung f : R → R>0 auffassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g ̸= ∅ -„⇒“: A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } : AB ∩ g ̸= ∅ ⇒ i ̸= j +Abbildung f : R →R>0 auffassen kann, greift der Zwischenwertsatz ⇒AB ∩g ̸= ∅ +„⇒“: A ∈Hi, B ∈Hj mit i, j ∈{ 1, 2 } : AB ∩g ̸= ∅⇒i ̸= j Sei h die Gerade, die durch A und B geht. -Da A, B /∈ g, aber A, B ∈ h gilt, haben g und h insbesondere mindestens einen +Da A, B /∈g, aber A, B ∈h gilt, haben g und h insbesondere mindestens einen unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich g und h in höchstens einen Punkt schneiden. Sei C dieser Punkt. -Aus A, B /∈ g folgt: C ̸= A und C ̸= B. Also liegt C zwischen A und B. Daraus folgt, +Aus A, B /∈g folgt: C ̸= A und C ̸= B. Also liegt C zwischen A und B. Daraus folgt, dass A und B bzgl. g in verschiedenen Halbebenen liegen. c) Siehe Abbildung 4.21. x @@ -3732,76 +3731,76 @@ Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht. 81 4.3. HYPERBOLISCHE GEOMETRIE Definition 64 -Es seien a, b, c, d ∈ R mit ad − bc ̸= 0 und σ : C → C eine Abbildung definiert durch +Es seien a, b, c, d ∈R mit ad −bc ̸= 0 und σ : C →C eine Abbildung definiert durch σ(z) := az + b cz + d σ heißt Möbiustransformation. Proposition 4.9 a) Die Gruppe SL2(R) operiert auf H durch die Möbiustransformation σ(z) := -�a +a b c d -� -◦ z := az + b + +◦z := az + b cz + d b) Die Gruppe PSL2(R) = SL2(R)/(±I) operiert durch σ auf H. -c) PSL2(R) operiert auf R ∪ { ∞ }. Diese Gruppenoperation ist 3-fach transitiv, d. h. -zu x0 < x1 < x∞ ∈ R gibt es genau ein σ ∈ PSL2(R) mit σ(x0) = 0, σ(x1) = 1, +c) PSL2(R) operiert auf R ∪{ ∞}. Diese Gruppenoperation ist 3-fach transitiv, d. h. +zu x0 < x1 < x∞∈R gibt es genau ein σ ∈PSL2(R) mit σ(x0) = 0, σ(x1) = 1, σ(x∞) = ∞. d) SL2(R) wird von den Matrizen -�λ +λ 0 0 λ−1 -� -� -�� -� + +| +{z +} =:Aλ , -�1 +1 t 0 1 -� -� �� � + +| {z } =:Bt und -� 0 + 0 1 −1 0 -� -� -�� -� + +| +{z +} =:C -mit t, λ ∈ R× +mit t, λ ∈R× erzeugt. e) PSL2(R) operiert auf G. Beweis: -a) Sei z = x + iy ∈ H, d. h. y > 0 und σ = -�a +a) Sei z = x + iy ∈H, d. h. y > 0 und σ = +a b c d -� -∈ SL2(R) -⇒ σ(z) = a(x + iy) + b + +∈SL2(R) +⇒σ(z) = a(x + iy) + b c(x + iy) + d = (ax + b) + iay -(cx + d) + icy · (cx + d) − icy -(cx + d) − icy +(cx + d) + icy · (cx + d) −icy +(cx + d) −icy = (ax + b)(cx + d) + aycy (cx + d)2 + (cy)2 -+ i ay(cx + d) − (ax + b)cy ++ i ay(cx + d) −(ax + b)cy (cx + d)2 + (cy)2 = axcx + axd + bcx + bd + aycy (cx + d)2 + (cy)2 + i -(ad − bc)y +(ad −bc)y (cx + d)2 + (cy)2 SL2(R) = @@ -3810,47 +3809,47 @@ ac(x2 + y2) + adx + bcx + bd + i y (cx + d)2 + (cy)2 -⇒ ℑ(σ(z)) = +⇒ℑ(σ(z)) = y (cx+d)2+(cy)2 > 0 Die Abbildung bildet also nach H ab. Außerdem gilt: -�1 +1 0 0 1 -� -◦ z = x + iy + +◦z = x + iy 1 = x + iy = z 82 4.3. HYPERBOLISCHE GEOMETRIE und -�a +a b c d -� + ◦ -��a′ +a′ b′ c′ d′ -� -◦ z -� + +◦z + = -�a +a b c d -� -◦ a′z + b′ + +◦a′z + b′ c′z + d′ = -a a′z+b′ +aa′z+b′ c′z+d′ + b -c a′z+b′ +ca′z+b′ c′z+d′ + d = a(a′z+b′)+b(c′z+d′) @@ -3862,44 +3861,44 @@ c(a′z + b′) + d(c′z + d′) = (aa′ + bc′)z + ab′ + bd′ (ca′ + db′)z + cb′ + dd′ = -�aa′ + bc′ +aa′ + bc′ ab′ + bd′ ca′ + db′ cb′ + dd′ -� -◦ z + +◦z = -��a +a b c d -� + · -�a′ +a′ b′ c′ d′ -�� -◦ z -b) Es gilt σ(z) = (−σ)(z) für alle σ ∈ SL2(R) und z ∈ H. + +◦z +b) Es gilt σ(z) = (−σ)(z) für alle σ ∈SL2(R) und z ∈H. c) Ansatz: σ = -�a +a b c d -� + σ(x0) = ax0+b cx0+d -!= 0 ⇒ ax0 + b = 0 ⇒ b = −ax0 -σ(x∞) = ∞ ⇒ cx∞ + d = 0 ⇒ d = −cx∞ -σ(x1) = 1 ⇒ ax1 + b = cx1 + d -a(x1 − x0) = c(x1 − x∞) ⇒ c = a x1−x0 +!= 0 ⇒ax0 + b = 0 ⇒b = −ax0 +σ(x∞) = ∞⇒cx∞+ d = 0 ⇒d = −cx∞ +σ(x1) = 1 ⇒ax1 + b = cx1 + d +a(x1 −x0) = c(x1 −x∞) ⇒c = a x1−x0 x1−x∞ -⇒ −a2 · x∞ x1−x0 -x1−x∞ + a2x0 x1−x0 -x1−x∞ = 1 -⇒ a2 x1−x0 -x0−x∞ (x0 − x∞) = 1 ⇒ a2 = +⇒−a2 · x∞x1−x0 +x1−x∞+ a2x0 x1−x0 +x1−x∞= 1 +⇒a2 x1−x0 +x0−x∞(x0 −x∞) = 1 ⇒a2 = x1−x∞ (x1−x∞)(x1−x0) d) Es gilt: @@ -3916,33 +3915,33 @@ erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation Matrizen der Form Aλ, Bt und C die Einheitsmatrix zu generieren. Sei also M = -�a +a b c d -� -∈ SL2(R) + +∈SL2(R) beliebig. Fall 1: a = 0 -Da M ∈ SL2(R) ist, gilt det M = 1 = ad − bc = −bc. Daher ist insbesondere c ̸= 0. Es +Da M ∈SL2(R) ist, gilt det M = 1 = ad −bc = −bc. Daher ist insbesondere c ̸= 0. Es folgt: -� 0 + 0 1 −1 0 -� + · -�a +a b c d -� + = -� c + c d −a −b -� + 83 4.3. HYPERBOLISCHE GEOMETRIE @@ -3950,68 +3949,68 @@ Gehe zu Fall 2. Fall 2: a ̸= 0 Nun wird in M durch M · A 1 a an der Stelle von a eine 1 erzeugt: -�a +a b c d -� + · -� 1 + 1 a 0 0 a -� + = -�1 +1 ab c a ad -� + Gehe zu Fall 3. Fall 3: a = 1 -�1 +1 b c d -� + · -�1 +1 −b 0 1 -� + = -�1 +1 0 c -d − bc -� -Da wir det M = 1 = ad − bc = d − bc wissen, gilt sogar M2,2 = 1. +d −bc + +Da wir det M = 1 = ad −bc = d −bc wissen, gilt sogar M2,2 = 1. Gehe zu Fall 4. Fall 4: a = 1, b = 0, d = 1 A−1CBcC -�1 +1 0 c 1 -� + = -�1 +1 0 0 1 -� + Daher erzeugen Matrizen der Form Aλ, Bt und C die Gruppe SL2R. ■ e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen. • σ = -�λ +λ 0 0 λ−1 -� + , also σ(z) = λ2z. Daraus ergeben sich die Situationen, die in Abbildung 4.22a und Abbildung 4.22b dargestellt sind. x @@ -4054,21 +4053,21 @@ x (b) Fall 2 (Strahlensatz) Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix • Offensichtlich gilt die Aussage für σ = -�1 +1 a 0 1 -� + • Sei nun σ = -� 0 + 0 1 −1 0 -� -, also σ(z) = − 1 + +, also σ(z) = −1 z Bemerkung 69 -Zu hyperbolischen Geraden g1, g2 gibt es σ ∈ PSL2(R) mit σ(g1) = g2. +Zu hyperbolischen Geraden g1, g2 gibt es σ ∈PSL2(R) mit σ(g1) = g2. 84 4.3. HYPERBOLISCHE GEOMETRIE @@ -4088,46 +4087,46 @@ Abbildung 4.23: Inversion am Kreis Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a1) = b1 und σ(a2) = b2. Dann existiert σ(g1) := g2 wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt. Definition 65 -Seien z1, z2, z3, z4 ∈ C paarweise verschieden. +Seien z1, z2, z3, z4 ∈C paarweise verschieden. Dann heißt DV(z1, z2, z3, z4) := z1−z4 z1−z2 z3−z4 z3−z2 -= (z1 − z4) · (z3 − z2) -(z1 − z2) · (z3 − z4) += (z1 −z4) · (z3 −z2) +(z1 −z2) · (z3 −z4) Doppelverhältnis von z1, . . . , z4. Bemerkung 70 (Eigenschaften des Doppelverhältnisses) -a) DV(z1, . . . , z4) ∈ C \ { 0, 1 } +a) DV(z1, . . . , z4) ∈C \ { 0, 1 } b) DV(z1, z4, z3, z2) = 1 DV(z1,z2,z3,z4) c) DV(z3, z2, z1, z4) = 1 DV(z1,z2,z3,z4) -d) DV ist auch wohldefiniert, wenn eines der zi = ∞ oder wenn zwei der zi gleich sind. -e) DV(0, 1, ∞, z4) = z4 (Der Fall z4 ∈ { 0, 1, ∞ } ist zugelassen). -f) Für σ ∈ PSL2(C) und z1, . . . , z4 ∈ C ∪ { ∞ } ist +d) DV ist auch wohldefiniert, wenn eines der zi = ∞oder wenn zwei der zi gleich sind. +e) DV(0, 1, ∞, z4) = z4 (Der Fall z4 ∈{ 0, 1, ∞} ist zugelassen). +f) Für σ ∈PSL2(C) und z1, . . . , z4 ∈C ∪{ ∞} ist DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4) und für σ(z) = 1 z gilt DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4) -g) DV(z1, z2, z3, z4) ∈ R ∪ { ∞ } ⇔ z1, . . . , z4 liegen auf einer hyperbolischen Geraden. +g) DV(z1, z2, z3, z4) ∈R ∪{ ∞} ⇔z1, . . . , z4 liegen auf einer hyperbolischen Geraden. Beweis: a) DV(z1, . . . , z4) ̸= 0, da zi paarweise verschieden DV(z1, . . . , z4) ̸= 1, da: Annahme: DV(z1, . . . , z4) = 1 -⇔ (z1 − z2)(z3 − z4) = (z1 − z4)(z3 − z2) +⇔(z1 −z2)(z3 −z4) = (z1 −z4)(z3 −z2) 85 4.3. HYPERBOLISCHE GEOMETRIE -⇔ z1z3 − z2z3 − z1z4 + z2z4 = z1z3 − z3z4 − z1z2 + z2z4 -⇔ z2z3 + z1z4 = z3z4 + z1z2 -⇔ z2z3 − z3z4 = z1z2 − z1z4 -⇔ z3(z2 − z4) = z1(z2 − z4) -⇔ z3 = z1 oder z2 = z4 -Alle zi sind paarweise verschieden ⇒ Widerspruch +⇔z1z3 −z2z3 −z1z4 + z2z4 = z1z3 −z3z4 −z1z2 + z2z4 +⇔z2z3 + z1z4 = z3z4 + z1z2 +⇔z2z3 −z3z4 = z1z2 −z1z4 +⇔z3(z2 −z4) = z1(z2 −z4) +⇔z3 = z1 oder z2 = z4 +Alle zi sind paarweise verschieden ⇒Widerspruch ■ b) DV(z1, z4, z3, z2) = (z1−z2)·(z3−z4) (z1−z4)·(z3−z2) = @@ -4141,31 +4140,31 @@ d) Zwei der zi dürfen gleich sein, da: Fall 1 z1 = z4 oder z3 = z2 In diesem Fall ist DV(z1, . . . , z4) = 0 Fall 2 z1 = z2 oder z3 = z4 -Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1, . . . , z4) = ∞ gilt. +Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1, . . . , z4) = ∞gilt. Fall 3 z1 = z3 oder z2 = z4 Durch Einsetzen ergibt sich DV(z1, . . . , z4) = 1. -Im Fall, dass ein zi = ∞ ist, ist entweder DV(0, 1, ∞, z4) = 0 oder DV(0, 1, ∞, z4)±∞ +Im Fall, dass ein zi = ∞ist, ist entweder DV(0, 1, ∞, z4) = 0 oder DV(0, 1, ∞, z4)±∞ e) DV(0, 1, ∞, z4) = (0−z4)·(∞−1) (0−1)·(∞−z4) = z4·(∞−1) ∞−z4 = z4 f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. -g) Sei σ ∈ PSL2(C) mit σ(z1) = 0, σ(z2) = 1, σ(z3) = ∞. Ein solches σ existiert, da man +g) Sei σ ∈PSL2(C) mit σ(z1) = 0, σ(z2) = 1, σ(z3) = ∞. Ein solches σ existiert, da man drei Parameter von σ wählen darf. Bem. 70.f ⇒ DV(z1, . . . , z4) = DV(0, 1, ∞, σ(z4)) ⇒ -DV(z1, . . . , z4) ∈ R ∪ { ∞ } -⇔ σ(z4) ∈ R ∪ { ∞ } -Behauptung folgt, weil σ−1(R ∪ ∞) ein Kreis oder eine Gerade in C ist. +DV(z1, . . . , z4) ∈R ∪{ ∞} +⇔σ(z4) ∈R ∪{ ∞} +Behauptung folgt, weil σ−1(R ∪∞) ein Kreis oder eine Gerade in C ist. Definition 66 -Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 die -„Schnittpunkte“ von gz1,z2 mit R ∪ { ∞ }. +Für z1, z2 ∈H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 die +„Schnittpunkte“ von gz1,z2 mit R ∪{ ∞}. Dann sei dH(z1, z2) := 1 2| ln DV(a1, z1, a2, z2)| und heiße hyperbolische Metrik. -Beh.: Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 -die „Schnittpunkte“ von gz1,z2 mit R ∪ { ∞ }. +Beh.: Für z1, z2 ∈H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 +die „Schnittpunkte“ von gz1,z2 mit R ∪{ ∞}. Dann gilt: 1 2| ln DV(a1, z1, a2, z2)| = 1 @@ -4176,7 +4175,7 @@ DV(a1, z1, a2, z2) = DV(a2, z1, a1, z2) Außerdem gilt: ln 1 -x = ln x−1 = (−1) · ln x = − ln x +x = ln x−1 = (−1) · ln x = −ln x 86 4.3. HYPERBOLISCHE GEOMETRIE @@ -4191,21 +4190,21 @@ Beh.: Die hyperbolische Metrik ist eine Metrik auf H. Beweis: Wegen Bemerkung 70.f ist d(z1, z2) := d(σ(z1), σ(z2)) mit σ(a1) = 0, σ(a2) = ∞ d. h. σ(gz1,z2) = iR (imaginäre Achse). -also gilt o. B. d. A. z1 = ia und z2 = ib mit a, b ∈ R und a < b. +also gilt o. B. d. A. z1 = ia und z2 = ib mit a, b ∈R und a < b. 2d(ia, ib) =| ln DV(0, ia, ∞, ib) | -=| ln (0 − ib)(∞ − ia) -(0 − ia)(∞ − ib) | +=| ln (0 −ib)(∞−ia) +(0 −ia)(∞−ib) | =| ln b a | -= ln b − ln a -Also: d(z1, z2) ≥ 0, d(z1, z2) = 0 ⇔ z1 = z2 += ln b −ln a +Also: d(z1, z2) ≥0, d(z1, z2) = 0 ⇔z1 = z2 2d(z2, z1) =| ln DV(a2, z2, a1, z1) | =| ln DV(∞, ib, 0, ia) | Bem. 70.b = | ln DV(0, ib, ∞, ia) | = 2d(z1, z2) -Liegen drei Punkte z1, z2, z3 ∈ C auf einer hyperbolischen Geraden, so gilt d(z1, z3) = +Liegen drei Punkte z1, z2, z3 ∈C auf einer hyperbolischen Geraden, so gilt d(z1, z3) = d(z1, z2) + d(z2, z3) (wenn z2 zwischen z1 und z3 liegt). Dreiecksungleichung: Beweis ist umständlich und wird hier nicht geführt. Es sei auf die Vorlesung „Hyperbolische Geometrie“ verwiesen. @@ -4218,60 +4217,60 @@ aber Axiom §5 ist verletzt. 4.3. HYPERBOLISCHE GEOMETRIE Übungsaufgaben Aufgabe 8 -Seien (X, d) eine absolute Ebene und P, Q, R ∈ X Punkte. Der Scheitelwinkel des Winkels -∠PQR ist der Winkel, der aus den Halbgeraden QP − und QR− gebildet wird. Die -Nebenwinkel von ∠PQR sind die von QP + und QR− bzw. QP − und QR+ gebildeten +Seien (X, d) eine absolute Ebene und P, Q, R ∈X Punkte. Der Scheitelwinkel des Winkels +∠PQR ist der Winkel, der aus den Halbgeraden QP −und QR−gebildet wird. Die +Nebenwinkel von ∠PQR sind die von QP + und QR−bzw. QP −und QR+ gebildeten Winkel. Zeigen Sie: (a) Die beiden Nebenwinkel von ∠PQR sind gleich. (b) Der Winkel ∠PQR ist gleich seinem Scheitelwinkel. Aufgabe 9 -Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ X von -Punkten ist definiert durch d(P, Y ) := inf d(P, y)|y ∈ Y . +Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆X von +Punkten ist definiert durch d(P, Y ) := inf d(P, y)|y ∈Y . Zeigen Sie: (a) Ist △ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die Winkel ∠ABC und ∠BCA gleich. (b) Ist △ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel gegenüber und umgekehrt. -(c) Sind g eine Gerade und P /∈ g ein Punkt, so gibt es eine eindeutige Gerade h mit -P ∈ h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g +(c) Sind g eine Gerade und P /∈g ein Punkt, so gibt es eine eindeutige Gerade h mit +P ∈h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g und der Schnittpunkt des Lots mit g heißt Lotfußpunkt. Aufgabe 10 -Seien f, g, h ∈ G und paarweise verschieden. -Zeigen Sie: f ∥ g ∧ g ∥ h ⇒ f ∥ h +Seien f, g, h ∈G und paarweise verschieden. +Zeigen Sie: f ∥g ∧g ∥h ⇒f ∥h Aufgabe 11 Beweise den Kongruenzsatz SSS. 5 Krümmung Definition 67 -Sei f : [a, b] → Rn eine eine Funktion aus C∞. Dann heißt f Kurve. +Sei f : [a, b] →Rn eine eine Funktion aus C∞. Dann heißt f Kurve. 5.1 Krümmung von Kurven Definition 68 -Sei γ : I = [a, b] → Rn eine Kurve. +Sei γ : I = [a, b] →Rn eine Kurve. a) Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt: ∥γ′(t)∥2 = 1 -∀t ∈ I +∀t ∈I Dabei ist γ′(t) = (γ′ 1(t), γ′ 2(t), . . . , γ′ n(t)). b) l(γ) = -� b +R b a ∥γ′(t)∥dt heißt Länge von γ. Bemerkung 71 (Eigenschaften von Kurven I) -Sei γ : I = [a, b] → Rn eine C∞-Funktion. -a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b − a. -b) Ist γ durch Bogenlänge parametrisiert, so ist γ′(t) orthogonal zu γ′′(t) für alle t ∈ I. +Sei γ : I = [a, b] →Rn eine C∞-Funktion. +a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b −a. +b) Ist γ durch Bogenlänge parametrisiert, so ist γ′(t) orthogonal zu γ′′(t) für alle t ∈I. Beweis: a) l(γ) = -� b +R b a ∥γ′(t)∥dt = -� b -a 1dt = b − a. -b) Im Folgenden wird die Aussage nur für γ : [a, b] → R2 bewiesen. Allerdings funktioniert +R b +a 1dt = b −a. +b) Im Folgenden wird die Aussage nur für γ : [a, b] →R2 bewiesen. Allerdings funktioniert der Beweis im Rn analog. Es muss nur die Ableitung angepasst werden. -1 = ∥γ′(t)∥ = ∥γ′(t)∥2 = ⟨γ′(t), γ′(t)⟩ -⇒ 0 = d +1 = ∥γ′(t)∥= ∥γ′(t)∥2 = ⟨γ′(t), γ′(t)⟩ +⇒0 = d dt⟨γ′(t), γ′(t)⟩ = d dt(γ′ @@ -4286,145 +4285,145 @@ dt(γ′ 2(t)) = 2 · ⟨γ′′(t), γ′(t)⟩ Definition 69 -Sei γ : I → R2 eine durch Bogenlänge parametrisierte Kurve. -a) Für t ∈ I sei n(t) Normalenvektor an γ in t wenn gilt: -⟨n(t), γ′(t)⟩ = 0, ∥n(t)∥ = 1 und det((γ′(t), n(t))) = +1 +Sei γ : I →R2 eine durch Bogenlänge parametrisierte Kurve. +a) Für t ∈I sei n(t) Normalenvektor an γ in t wenn gilt: +⟨n(t), γ′(t)⟩= 0, ∥n(t)∥= 1 und det((γ′(t), n(t))) = +1 89 5.1. KRÜMMUNG VON KURVEN -b) Seit κ : I → R so, dass gilt: +b) Seit κ : I →R so, dass gilt: γ′′(t) = κ(t) · n(t) Dann heißt κ(t) Krümmung von γ in t. Da n(t) und γ′′(t) nach Bemerkung 71.b linear abhängig sind, existiert κ(t). Beispiel 45 Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt: γ(t) = -� + r · cos t r, r · sin t r -� -für t ∈ [0, 2πr] + +für t ∈[0, 2πr] ist parametrisiert durch Bogenlänge, da gilt: γ′(t) = -� + (r · 1 -r)(− sin t +r)(−sin t r), r1 r cos t r -� + = -� -− sin t + +−sin t r, cos t r -� + Der Normalenvektor von γ in t ist n(t) = -� -− cos t -r, − sin t + +−cos t +r, −sin t r -� + da gilt: -⟨n(t), γ′(t)⟩ = -��− cos t +⟨n(t), γ′(t)⟩= +−cos t r -− sin t +−sin t r -� + , -�− sin t +−sin t r cos t r -�� -= (− cos t -r) · (− sin t -r) + (− sin t + += (−cos t +r) · (−sin t +r) + (−sin t r) · (cos t r) = 0 -∥n(t)∥ = -����(− cos t -r, − sin t +∥n(t)∥= + (−cos t +r, −sin t r) -���� -= (− cos t -r)2 + (− sin t + += (−cos t +r)2 + (−sin t r)2 = 1 det(γ′ 1(t), n(t)) = -���� -�− sin t + +−sin t r -− cos t +−cos t r cos t r -− sin t +−sin t r -����� -= (− sin t -r)2 − (− cos t + += (−sin t +r)2 −(−cos t r) · cos t r = 1 Die Krümmung ist für jedes t konstant 1 r, da gilt: γ′′(t) = -� + −1 r cos t r, −1 r sin t r -� + = 1 r · -� -− cos t -r, − sin t + +−cos t +r, −sin t r -� -⇒ κ(t) = 1 + +⇒κ(t) = 1 r 90 5.2. TANGENTIALEBENE Definition 70 -Sei γ : I → R3 eine durch Bogenlänge parametrisierte Kurve. -a) Für t ∈ I heißt κ(t) := ∥γ′′(t)∥ die Krümmung von γ in t. -b) Ist für t ∈ I die Ableitung γ′′(t) ̸= 0, so heißt +Sei γ : I →R3 eine durch Bogenlänge parametrisierte Kurve. +a) Für t ∈I heißt κ(t) := ∥γ′′(t)∥die Krümmung von γ in t. +b) Ist für t ∈I die Ableitung γ′′(t) ̸= 0, so heißt γ′′(t) -∥γ′′(t)∥ Normalenvektor an γ in t. +∥γ′′(t)∥Normalenvektor an γ in t. c) b(t) sei ein Vektor, der γ′(t), n(t) zu einer orientierten Orthonormalbasis von R3 ergänzt. Also gilt: det(γ′(t), n(t), b(t)) = 1 b(t) heißt Binormalenvektor, die Orthonormalbasis -� + γ′(t), n(t), b(t) -� + heißt begleitendes Dreibein. Bemerkung 72 (Eigenschaften von Kurven II) -Sei γ : I → R3 durch Bogenlänge parametrisierte Kurve. +Sei γ : I →R3 durch Bogenlänge parametrisierte Kurve. a) n(t) ist orthogonal zu γ′(t). b) b(t) aus Definition 70.c ist eindeutig. 5.2 Tangentialebene Erinnerung Sie sich an Definition 32 „reguläre Fläche“. Äquivalent dazu ist: S ist lokal von der Form V (f) = -� -x ∈ R3 �� f(x) = 0 -� -für eine C∞-Funktion f : R3 → R. + +x ∈R3 f(x) = 0 + +für eine C∞-Funktion f : R3 →R. Definition 71 -Sei S ⊆ R3 eine reguläre Fläche, s ∈ S, F : U → V ∩ S eine lokale Parametrisierung um -s ∈ V : -(u, v) �→ (x(u, v), y(u, v), z(u, v)) -Für p = F −1(s) ∈ U sei +Sei S ⊆R3 eine reguläre Fläche, s ∈S, F : U →V ∩S eine lokale Parametrisierung um +s ∈V : +(u, v) 7→(x(u, v), y(u, v), z(u, v)) +Für p = F −1(s) ∈U sei JF (p) =   @@ -4442,8 +4441,8 @@ JF (p) = ∂v(p)   -und DpF : R2 → R3 die durch JF (p) definierte lineare Abbildung. -Dann heißt TsS := Bild(DpF) die Tangentialebene an s ∈ S. +und DpF : R2 →R3 die durch JF (p) definierte lineare Abbildung. +Dann heißt TsS := Bild(DpF) die Tangentialebene an s ∈S. Bemerkung 73 (Eigenschaften der Tangentialebene) a) TsS ist 2-dimensionaler Untervektorraum von R3. b) TsS = ⟨˜u, ˜v⟩, wobei ˜u, ˜v die Spaltenvektoren der Jacobi-Matrix JF (p) sind. @@ -4451,45 +4450,46 @@ c) TsS hängt nicht von der gewählten Parametrisierung ab. 91 5.2. TANGENTIALEBENE -d) Sei S = V (f) eine reguläre Fläche in R3, also f : V → R eine C∞-Funktion, V ⊆ R3 -offen, grad(f)(x) ̸= 0 für alle x ∈ S. -Dann ist TsS = (grad(f)(s))⊥ für jedes s ∈ S. +d) Sei S = V (f) eine reguläre Fläche in R3, also f : V →R eine C∞-Funktion, V ⊆R3 +offen, grad(f)(x) ̸= 0 für alle x ∈S. +Dann ist TsS = (grad(f)(s))⊥für jedes s ∈S. Beweis: a) JF ist eine 3 × 2-Matrix, die mit einem 2 × 1-Vektor multipliziert wird. Das ist eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein Vektorraum ist. Da Rg(JF ) = 2, ist auch dim(TsS) = 2. b) Hier kann man wie in Punkt a) argumentieren -c) TsS = {x ∈ R3|∃parametrisierte Kurve γ : [−ε, +ε] → S für ein ε > 0 mit γ(0) = +c) TsS = {x ∈R3|∃parametrisierte Kurve γ : [−ε, +ε] →S für ein ε > 0 mit γ(0) = s und γ′(0) = x} Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. -d) Sei x ∈ TsS, γ : [−ε, +ε] → S eine parametrisierte Kurve mit ε > 0 und γ′(0) = s, -sodass γ′(0) = x gilt. Da γ(t) ∈ S für alle t ∈ [−ε, ε], ist f ◦ γ = 0 -⇒ 0 = (f ◦ γ)′(0) = ⟨grad(f)(γ(0)), γ′(0)⟩ -⇒ TsS ⊆ grad(f)(s)⊥ +d) Sei x ∈TsS, γ : [−ε, +ε] →S eine parametrisierte Kurve mit ε > 0 und γ′(0) = s, +sodass γ′(0) = x gilt. Da γ(t) ∈S für alle t ∈[−ε, ε], ist f ◦γ = 0 +⇒0 = (f ◦γ)′(0) = ⟨grad(f)(γ(0)), γ′(0)⟩ +⇒TsS ⊆grad(f)(s)⊥ dim=2 -====⇒ TsS = (grad(f)(s))⊥ +====⇒TsS = (grad(f)(s))⊥ Definition 72 -a) Ein Normalenfeld auf der regulären Fläche S ⊆ R3 ist eine Abbildung n : S → S2 ⊆ -R3 mit n(s) ∈ TsS⊥ für jedes s ∈ S. +a) Ein Normalenfeld auf der regulären Fläche S ⊆R3 ist eine Abbildung n : S →S2 ⊆ +R3 mit n(s) ∈TsS⊥für jedes s ∈S. b) S heißt orientierbar, wenn es ein stetiges Normalenfeld auf S gibt. Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden. Im Folgenden werden diese Begriffe jedoch synonym benutzt. Bemerkung 74 (Eigenschaften von Normalenfeldern) a) Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C∞). -b) Zu jedem s ∈ S gibt es eine Umgebung V ⊆ R3 von s und eine lokale Parametrisierung -F : U → V von S um s, sodass auf F(U) = V ∩ S ein stetiges Normalenfeld existiert. +b) Zu jedem s ∈S gibt es eine Umgebung V ⊆R3 von s und eine lokale Parametrisierung +F : U →V von S um s, sodass auf F(U) = V ∩S ein stetiges Normalenfeld existiert. c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas von S aus lokalen -Parametrisierungen Fi : Ui → Vi, i ∈ I gibt, sodass für alle i, j ∈ F und alle -s ∈ Vi ∩ Vj ∩ S gilt: +Parametrisierungen Fi : Ui →Vi, i ∈I gibt, sodass für alle i, j ∈F und alle +s ∈Vi ∩Vj ∩S gilt: det(Ds -� Vi→Vj -�� -� -Fj ◦ F −1 +Vi→Vj +z +}| +{ +Fj ◦F −1 i -� -�� -� +| +{z +} ∈R3×3 ) > 0 Beweis: Wird hier nicht geführt. @@ -4504,11 +4504,11 @@ lenfeld, aber kein stetiges Normalenfeld. Abbildung 5.1: Möbiusband 5.3 Gauß-Krümmung Bemerkung 75 -Sei S eine reguläre Fläche, s ∈ S, n(s) ist ein Normalenvektor in s, x ∈ TsS, ∥x∥ = 1. +Sei S eine reguläre Fläche, s ∈S, n(s) ist ein Normalenvektor in s, x ∈TsS, ∥x∥= 1. Sei E der von x und n(s) aufgespannte 2-dimensionale Untervektorraum von R3. -Dann gibt es eine Umgebung V ⊆ R3 von s, sodass -C := (s + E) ∩ S ∩ V -das Bild einer durch Bogenlänge parametrisierten Kurve γ : [−ε, ε] → S enthält mit γ(0) = s +Dann gibt es eine Umgebung V ⊆R3 von s, sodass +C := (s + E) ∩S ∩V +das Bild einer durch Bogenlänge parametrisierten Kurve γ : [−ε, ε] →S enthält mit γ(0) = s und γ′(0) = x. Beweis: „Satz über implizite Funktionen“1 Definition 73 @@ -4517,28 +4517,28 @@ In der Situation aus Bemerkung 75 heißt die Krümmung κγ(0) der Kurve γ in d Man schreibt: κNor(s, x) := κγ(0) Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt. Beispiel 47 (Gauß-Krümmung) -1) S = S2 = V (X2 + Y 2 + Z2 − 1) ist die Kugel um den Ursprung mit Radius 1, n = id, +1) S = S2 = V (X2 + Y 2 + Z2 −1) ist die Kugel um den Ursprung mit Radius 1, n = id, s = (0, 0, 1), x = (1, 0, 0) -⇒ E = R · x + R · n(s) (x, z-Ebene) -C = E ∩ S ist Kreislinie +⇒E = R · x + R · n(s) (x, z-Ebene) +C = E ∩S ist Kreislinie κNor(s, x) = 1 r = 1 -2) S = V (X2 + Z2 − 1) ⊆ R3 ist ein Zylinder (siehe Abbildung 5.2a). s = (1, 0, 0) -x1 = (0, 1, 0) ⇒ E1 = R · e1 + R · e2 (x, y-Ebene) -S ∩ E1 = V (X2 + Y 2 − 1) ∩ E, Kreislinie in E -⇒ κNor(s, x1) = ±1 +2) S = V (X2 + Z2 −1) ⊆R3 ist ein Zylinder (siehe Abbildung 5.2a). s = (1, 0, 0) +x1 = (0, 1, 0) ⇒E1 = R · e1 + R · e2 (x, y-Ebene) +S ∩E1 = V (X2 + Y 2 −1) ∩E, Kreislinie in E +⇒κNor(s, x1) = ±1 x2 = (0, 0, 1), E2 = R · e1 + R · e3 (x, z-Ebene) 1Siehe z. B. https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II 93 5.3. GAUSS-KRÜMMUNG -V ∩ E2 ∩ S = -� -(1, 0, z) ∈ R3 �� z ∈ R -� +V ∩E2 ∩S = + +(1, 0, z) ∈R3 z ∈R + ist eine Gerade -⇒ κNor(s, x2) = 0 -3) S = V (X2 − Y 2 − Z), s = (0, 0, 0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b) +⇒κNor(s, x2) = 0 +3) S = V (X2 −Y 2 −Z), s = (0, 0, 0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b) x1 = (1, 0, 0), n(s) = (0, 0, 1) x2 = (0, 1, 0) κNor(s, x1) = @@ -4563,7 +4563,7 @@ x2 = (0, 1, 0) x y z -(a) S = V (X2 + Z2 − 1) +(a) S = V (X2 + Z2 −1) −2 −1.5 −1 @@ -4590,30 +4590,30 @@ z 2 4 f(x, y) -(b) S = V (X2 − Y 2 − Z) +(b) S = V (X2 −Y 2 −Z) Abbildung 5.2: Beispiele für reguläre Flächen Definition 74 -Sei S ⊆ R3 eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S. -γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und +Sei S ⊆R3 eine reguläre Fläche, s ∈S und n ein stetiges Normalenfeld auf S. +γ : [−ε, ε] →S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und γ′′(0) ̸= 0. Sei n(0) := γ′′(0) ∥γ′′(0)∥. Zerlege -n(0) = n(0)t + n(0)⊥ mit n(0)t ∈ TsS und n(0)⊥ ∈ (TsS)⊥ -Dann ist n(0)⊥ = ⟨n(0), n(s)⟩ · n(s) -κNor(s, γ) := ⟨γ′′(0), n(s)⟩ die Normalkrümmung. +n(0) = n(0)t + n(0)⊥mit n(0)t ∈TsS und n(0)⊥∈(TsS)⊥ +Dann ist n(0)⊥= ⟨n(0), n(s)⟩· n(s) +κNor(s, γ) := ⟨γ′′(0), n(s)⟩die Normalkrümmung. Bemerkung 76 -Sei γ(t) = γ(−t), t ∈ [−ε, ε]. Dann ist κNor(s, γ) = κNor(s, γ). +Sei γ(t) = γ(−t), t ∈[−ε, ε]. Dann ist κNor(s, γ) = κNor(s, γ). Beweis: γ′′(0) = γ′′(0), da γ′(0) = −γ′(0). Es gilt: κNor(s, γ) hängt nur von |γ′(0)| ab und ist gleich κNor(s, γ′(0)). Bemerkung 77 Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s. Sei T 1 -s S = { x ∈ TsS | ∥x∥ = 1 } ∼= S1. Dann ist +s S = { x ∈TsS | ∥x∥= 1 } ∼= S1. Dann ist κn Nor(s) : T 1 -s S → R, -x �→ κNor(s, x) +s S →R, +x 7→κNor(s, x) eine glatte Funktion und Bild κn Nor(s) ist ein abgeschlossenes Intervall. Definition 75 @@ -4623,21 +4623,21 @@ Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s. 5.3. GAUSS-KRÜMMUNG a) κn 1(s) : = min -� + κn Nor(s, x) -�� x ∈ T 1 + x ∈T 1 s S -� + und κn 2(s) : = max -� + κn Nor(s, x) -�� x ∈ T 1 + x ∈T 1 s S -� + heißen Hauptkrümmungen von S in s. b) K(s) := κn 1(s) · κn @@ -4646,9 +4646,9 @@ Bemerkung 78 Ersetzt man n durch −n, so gilt: κ−n Nor(s, x) = −κn -Nor(x) ∀x ∈ T 1 +Nor(x) ∀x ∈T 1 s S -⇒ κ−n +⇒κ−n 1 (s) = −κn 2(s) κ−n @@ -4656,48 +4656,48 @@ s S 1(s) und K−n(s) = Kn(s) =: K(s) Beispiel 48 -1) S = S2. Dann ist κ1(s) = κ2(s) = ±1 ∀s ∈ S2 -⇒ K(s) = 1 +1) S = S2. Dann ist κ1(s) = κ2(s) = ±1 ∀s ∈S2 +⇒K(s) = 1 2) Zylinder: -κ1(s) = 0, κ2(s) = 1 ⇒ K(s) = 0 +κ1(s) = 0, κ2(s) = 1 ⇒K(s) = 0 3) Sattelpunkt auf hyperbolischem Paraboloid: -κ1(s) < 0, κ2(s) = 0 → K(s) < 0 +κ1(s) < 0, κ2(s) = 0 →K(s) < 0 4) S = Torus. Siehe Abbildung 5.3 s1 s2 s3 Abbildung 5.3: K(s1) > 0, K(s2) = 0, K(s3) < 0 Bemerkung 79 -Sei S eine reguläre Fläche, s ∈ S ein Punkt. +Sei S eine reguläre Fläche, s ∈S ein Punkt. 95 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM a) Ist K(s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von TsS + s. b) Ist K(s) < 0, so schneidet jede Umgebung von s in S beide Seiten von TsS + s. 5.4 Erste und zweite Fundamentalform -Sei S ⊆ R3 eine reguläre Fläche, s ∈ S, TsS die Tangentialebene an S in s und F : U → V eine +Sei S ⊆R3 eine reguläre Fläche, s ∈S, TsS die Tangentialebene an S in s und F : U →V eine lokale Parametrisierung von S um s. Weiter sei p := F −1(s). Definition 76 -Sei IS ∈ R2×2 definiert als +Sei IS ∈R2×2 definiert als IS : = -�g1,1(s) +g1,1(s) g1,2(s) g1,2(s) g2,2(s) -� + = -�E(s) +E(s) F(s) F(s) G(s) -� + mit gi,j = gs(DpF(ei), DpF(ej)) -= ⟨ ∂F += ⟨∂F ∂ui (p), ∂F ∂uj (p)⟩ -i, j ∈ { 1, 2 } +i, j ∈{ 1, 2 } Die Matrix IS heißt erste Fundamentalform von S bzgl. der Parametrisierung F. Bemerkung 80 a) Die Einschränkung des Standardskalarproduktes des R3 auf TsS macht TsS zu einem @@ -4708,13 +4708,13 @@ kung 80.a die Darstellungsmatrix IS. d) gi,j(s) ist eine differenzierbare Funktion von s. Bemerkung 81 det(IS) = -���� + ∂F ∂u1 (p) × ∂F ∂u2 (p) -���� + 2 Beweis: Sei ∂F ∂u1 (p) = @@ -4724,7 +4724,7 @@ x1 x2 x3  - , +, ∂F ∂u2 (p) =  @@ -4743,31 +4743,31 @@ z1 z2 z3  - mit -z1 = x2y3 − x3y2 -z2 = x3y1 − x1y3 -z3 = x1y2 − x2y1 -⇒ ∥ ∂F +mit +z1 = x2y3 −x3y2 +z2 = x3y1 −x1y3 +z3 = x1y2 −x2y1 +⇒∥∂F ∂u1 (p) × ∂F ∂u2 -(p)∥ = z2 +(p)∥= z2 1 + z2 2 + z2 3 96 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM -det(IS) = g1,1g2,2 − g2 +det(IS) = g1,1g2,2 −g2 1,2 = -� +*  x1 x2 x3  - , +,   x1 @@ -4775,13 +4775,13 @@ x2 x3   -� � ++ *  y1 y2 y3  - , +,   y1 @@ -4789,15 +4789,15 @@ y2 y3   -� ++ − -� +*  x1 x2 x3  - , +,   y1 @@ -4805,78 +4805,78 @@ y2 y3   -�2 ++2 = (x2 1 + x2 2 + x2 3)(y2 1 + y2 2 + y2 -3) − (x1y1 + x2y2 + x3y3)2 +3) −(x1y1 + x2y2 + x3y3)2 Definition 77 a) Das Differential dA = -� +p det(I)du1du2 heißt Flächenelement von S bzgl. der Para- metrisierung F. -b) Für eine Funktion f : V → R heißt -� +b) Für eine Funktion f : V →R heißt +Z V fdA := -� +Z U f(F(u1, u2) -� -�� -� +| +{z +} =:s ) -� +p det I(s)du1du2 der Wert des Integrals von f über V , falls das Integral rechts existiert. Bemerkung 82 a) -� +R V fdA ist unabhängig von der gewählten Parametrisierung. -b) Sei f : S → R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist. +b) Sei f : S →R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist. Dann ist -� +R S fdA wohldefiniert, falls (z. B.) S kompakt ist. Etwa: -� +Z S fdA = n -� +X i=1 -� +Z Vi fdA − -� +X i̸=j -� +Z Vi∩Vj fdA + -� +X i,j,k -� +Z Vi∩Vj∩Vk fdA -− . . . +−. . . Beweis: a) Mit Transformationsformel. b) Ist dem Leser überlassen. Proposition 5.1 -Sei S ⊆ R3 eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S → S2. +Sei S ⊆R3 eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S →S2. Dann gilt: -a) n induziert für jedes s ∈ S eine lineare Abbildung dsn : TsS → Tn(s)S2 durch +a) n induziert für jedes s ∈S eine lineare Abbildung dsn : TsS →Tn(s)S2 durch dsn(x) = d dtn(s„+“tx -� �� � +| {z } Soll auf Fläche S bleiben ) -��� + t=0 Die Abbildung dsn heißt Weingarten-Abbildung @@ -4891,110 +4891,110 @@ Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt. 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM Beweis: a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. -b) Tn(S)S2 = ⟨n(s)⟩⊥ = TsS +b) Tn(S)S2 = ⟨n(s)⟩⊥= TsS c) Wegen Proposition 5.1 (a) ist dsn ein Homomorphismus. -d) Zu zeigen: ∀x, y ∈ IsS : ⟨x, dsn(y)⟩ = ⟨dsn(x), y⟩ +d) Zu zeigen: ∀x, y ∈IsS : ⟨x, dsn(y)⟩= ⟨dsn(x), y⟩ Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die Basisvektoren zu zeigen. Sei xi = DpF(ei) = ∂F ∂ui (p) i = 1, 2 -Beh.: ⟨xi, dsn(xj)⟩ = ⟨ +Beh.: ⟨xi, dsn(xj)⟩= ⟨ ∂2F ∂ui∂uj (p), dsn(xi)⟩ -⇒ ⟨ +⇒⟨ ∂2F -∂ui∂uj (p), dsn(xi)⟩ = ⟨xj, dsn(xi)⟩ +∂ui∂uj (p), dsn(xi)⟩= ⟨xj, dsn(xi)⟩ Bew.: 0 = ⟨∂F ∂u (p + tej), n(p + tej)⟩ -⇒ 0 = d +⇒0 = d dt -� + ⟨∂F ∂u (p + tej), n(p + tej)⟩ -���� + t=0 -= ⟨ d += ⟨d dt ∂F ∂ui (p + tej) -� -�� -� +| +{z +} ∂2F ∂uj∂ui (p) -��� -t=0, n(s)⟩ + ⟨xi, dsn DpF(ej) -� -�� -� + +t=0, n(s)⟩+ ⟨xi, dsn DpF(ej) +| +{z +} xj ⟩ Definition 78 Die durch −dsn definierte symmetrische Bilinearform auf TsS heißt zweite Fundamental- form von S in s bzgl. F. -Man schreibt: IIs(x, y) = ⟨−dsn(x), y⟩ = Is(−dsn(x), y) +Man schreibt: IIs(x, y) = ⟨−dsn(x), y⟩= Is(−dsn(x), y) Bemerkung 83 Bezüglich der Basis { x1, x2 } von TsS hat IIs die Darstellungsmatrix (h(s) -i,j )i,j=1,2 mit hi,j(s) = ⟨ ∂2F +i,j )i,j=1,2 mit hi,j(s) = ⟨∂2F ∂ui∂uj (p), n(s)⟩ Proposition 5.2 -Sei γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt: +Sei γ : [−ε, ε] →S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt: κNor(s, γ) = IIs(γ′(0), γ′(0)) Beweis: Nach Definition 74 ist κNor(s, γ) = ⟨γ′′(0), n(s)⟩. Nach Voraussetzung gilt -n(γ(t)) ⊥ γ′(t) ⇔ ⟨γ′′(0), n(s)⟩ = 0 +n(γ(t)) ⊥γ′(t) ⇔⟨γ′′(0), n(s)⟩= 0 Die Ableitung nach t ergibt 0 = d dt(⟨n(γ(t)), γ′(t)) = -� d + d dtn(γ(t)) -��� + t=0, γ′(0) -� + + ⟨n(s), γ′′(0)⟩ 99 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM -= ⟨dsn(γ′(0)), γ′(0)⟩ + κNor(s, γ) += ⟨dsn(γ′(0)), γ′(0)⟩+ κNor(s, γ) = −IIs(γ′(0), γ′(0)) + κNor(s, γ) Folgerung 5.3 Die beiden Definitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein: κNor(s, γ) = κNor(s, γ′(0)) Satz 5.4 -Sei S ⊆ R3 eine reguläre, orientierbare Fläche und s ∈ S. +Sei S ⊆R3 eine reguläre, orientierbare Fläche und s ∈S. a) Die Hauptkrümmungen κ1(s), κ2(s) sind die Eigenwerte von IIs. b) Für die Gauß-Krümmung gilt: K(s) = det(IIs) Beweis: a) IIs ist symmetrisch, IsS hat also eine Orthonormalbasis aus Eigenvektoren y1, y2 von -IIs. Ist x ∈ TsS, ∥x∥ = 1, so gibt es ϕ ∈ [0, 2π) mit x = cos ϕ · y1 + sin ϕ · y2. +IIs. Ist x ∈TsS, ∥x∥= 1, so gibt es ϕ ∈[0, 2π) mit x = cos ϕ · y1 + sin ϕ · y2. Seien λ1, λ2 die Eigenwerte von IIs, also IIs(yi, yi) = λi. Dann gilt: IIs(x, x) = cos2 ϕλ1 + sin2 ϕλ2 -= (1 − sin2 ϕ)λ1 + sin2 ϕλ2 -= λ1 + sin2 ϕ(λ2 − λ1) ≥ λ1 -= cos2 ϕ + (1 − cos2 ϕ)λ2 -= λ2 − cos2 ϕ(λ2 − λ1) ≤ λ2 += (1 −sin2 ϕ)λ1 + sin2 ϕλ2 += λ1 + sin2 ϕ(λ2 −λ1) ≥λ1 += cos2 ϕ + (1 −cos2 ϕ)λ2 += λ2 −cos2 ϕ(λ2 −λ1) ≤λ2 Prop. 5.2 -=====⇒ λ1 = min -� +=====⇒λ1 = min + κNor(s, x) -�� x ∈ T 1 + x ∈T 1 s S -� + λ2 = max -� + κNor(s, x) -�� x ∈ T 1 + x ∈T 1 s S -� + Satz 5.5 (Satz von Gauß-Bonnet) -Sei S ⊆ R3 eine kompakte orientierbare reguläre Fläche. Dann gilt: -� +Sei S ⊆R3 eine kompakte orientierbare reguläre Fläche. Dann gilt: +Z S K(s)dA = 2πχ(S) Dabei ist χ(S) die Euler-Charakteristik von S. @@ -5004,33 +5004,33 @@ Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen w Lösungen der Übungsaufgaben Lösung zu Aufgabe 1 Teilaufgabe a) Es gilt: -(i) ∅, X ∈ TX. +(i) ∅, X ∈TX. (ii) TX ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U1, U2 ∈ -TX : U1 ∩ U2 ∈ TX. +TX : U1 ∩U2 ∈TX. (iii) Auch unter beliebigen Vereinigungen ist TX abgeschlossen, d. h. es gilt für eine -beliebige Indexmenge I und alle Ui ∈ TX für alle i ∈ I : � -i∈I Ui ∈ TX +beliebige Indexmenge I und alle Ui ∈TX für alle i ∈I : S +i∈I Ui ∈TX Also ist (X, TX) ein topologischer Raum. Teilaufgabe b) Wähle x = 1, y = 0. Dann gilt x ̸= y und die einzige Umgebung von x -ist X. Da y = 0 ∈ X können also x und y nicht durch offene Mengen getrennt werden. +ist X. Da y = 0 ∈X können also x und y nicht durch offene Mengen getrennt werden. (X, TX) ist also nicht hausdorffsch. Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X, TX) nach (b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass (X, TX) kein metrischer Raum sein kann. Lösung zu Aufgabe 2 Teilaufgabe a) -Beh.: ∀a ∈ Z : { a } ist abgeschlossen. -Sei a ∈ Z beliebig. Dann gilt: +Beh.: ∀a ∈Z : { a } ist abgeschlossen. +Sei a ∈Z beliebig. Dann gilt: Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de schicken. Teilaufgabe b) Beh.: { −1, 1 } ist nicht offen Bew.: durch Widerspruch Annahme: { −1, 1 } ist offen. -Dann gibt es T ⊆ B, sodass � -M∈T M = { −1, 1 }. Aber alle U ∈ B haben unendlich viele +Dann gibt es T ⊆B, sodass S +M∈T M = { −1, 1 }. Aber alle U ∈B haben unendlich viele Elemente. Auch endlich viele Schnitte von Elementen in B haben unendlich viele Elemente -⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒ { −1, 1 } ist +⇒keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒{ −1, 1 } ist nicht offen. ■ Teilaufgabe c) @@ -5039,97 +5039,96 @@ Beh.: Es gibt unendlich viele Primzahlen. 101 Lösungen der Übungsaufgaben Bew.: durch Widerspruch -Annahme: Es gibt nur endlich viele Primzahlen p ∈ P +Annahme: Es gibt nur endlich viele Primzahlen p ∈P Dann ist Z \ { −1, +1 } FS d. Arithmetik = -� +[ p∈P U0,p endlich. Das ist ein Widerspruch zu |Z| ist unendlich und | { −1, 1 } | ist endlich. ■ Lösung zu Aufgabe 3 (a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form -� +Y j∈J Uj × -� +Y i∈N,i̸=j Pi -wobei J ⊆ N endlich und Uj ⊆ Pj offen ist. +wobei J ⊆N endlich und Uj ⊆Pj offen ist. Beweis: Nach Definition der Produkttopologie bilden Mengen der Form -� +Y i∈J Uj × -� +Y i∈N\J Pi -wobei J ⊆ N endlich und Uj ⊆ Pj offen ∀j ∈ J eine Basis der Topologie. +wobei J ⊆N endlich und Uj ⊆Pj offen ∀j ∈J eine Basis der Topologie. Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen Form. ■ (b) Beh.: Die Zusammenhangskomponenten von P sind alle einpunktig. -Beweis: Es seinen x, y ∈ P und x sowie y liegen in der gleichen Zusammenhangs- -komponente Z ⊆ P. Da Z zusammenhängend ist und ∀i ∈ I : pi : P → Pi ist -stetig, ist pi(Z) ⊆ Pi zusammenhängend für alle i ∈ N. Die zusammenhängenden -Mengen von Pi sind genau { 0 } und { 1 }, d. h. für alle i ∈ N gilt entweder -pi(Z) ⊆ { 0 } oder pi(Z) ⊆ { 1 }. Es sei zi ∈ { 0, 1 } so, dass pi(Z) ⊆ { zi } für -alle i ∈ N. Dann gilt also: +Beweis: Es seinen x, y ∈P und x sowie y liegen in der gleichen Zusammenhangs- +komponente Z ⊆P. Da Z zusammenhängend ist und ∀i ∈I : pi : P →Pi ist +stetig, ist pi(Z) ⊆Pi zusammenhängend für alle i ∈N. Die zusammenhängenden +Mengen von Pi sind genau { 0 } und { 1 }, d. h. für alle i ∈N gilt entweder +pi(Z) ⊆{ 0 } oder pi(Z) ⊆{ 1 }. Es sei zi ∈{ 0, 1 } so, dass pi(Z) ⊆{ zi } für +alle i ∈N. Dann gilt also: pi(x) -� �� � +| {z } =xi = zi = pi(y) -� �� � +| {z } =yi -∀i ∈ N +∀i ∈N Somit folgt: x = y ■ Lösung zu Aufgabe 4 (a) Beh.: GLn(R) ist nicht kompakt. -Bew.: det : GLn(R) → R \ { 0 } ist stetig. Außerdem ist det(GLn(R)) = R \ { 0 } +Bew.: det : GLn(R) →R \ { 0 } ist stetig. Außerdem ist det(GLn(R)) = R \ { 0 } nicht kompakt. 22 -⇒ GLn(R) ist nicht kompakt. +⇒GLn(R) ist nicht kompakt. ■ (b) Beh.: SL1(R) ist nicht kompakt, für n > 1 ist SLn(R) kompakt. Bew.: Für SL1(R) gilt: SL1(R) = -� -A ∈ R1×1 �� det A = 1 -� + +A ∈R1×1 det A = 1 + = -� -1 -� ∼= { 1 }. 22 -⇒ SL1(R) +1 + ∼= { 1 }. 22 +⇒SL1(R) ist kompakt. 102 Lösungen der Übungsaufgaben -SLn(R) ⊆ GLn(R) lässt sich mit einer Teilmenge des Rn2 identifizieren. Nach Satz 1.1 +SLn(R) ⊆GLn(R) lässt sich mit einer Teilmenge des Rn2 identifizieren. Nach Satz 1.1 sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Definiere -nun für für n ∈ N≥2, m ∈ N: +nun für für n ∈N≥2, m ∈N: Am = diagn(m, 1 m, . . . , 1) -Dann gilt: det Am = 1, d. h. Am ∈ SLn(R), und Am ist unbeschränkt, da ∥Am∥∞ = +Dann gilt: det Am = 1, d. h. Am ∈SLn(R), und Am ist unbeschränkt, da ∥Am∥∞= m −−−−→ -m→∞ ∞. +m→∞∞. ■ (c) Beh.: P(R) ist kompakt. Bew.: P(R) ∼= Sn/x∼−x. Per Definition der Quotiententopologie ist die Klassenabbil- dung stetig. Da Sn als abgeschlossene und beschränkte Teilmenge des Rn+1 kompakt ist 22 -⇒ P(R) ist kompakt. +⇒P(R) ist kompakt. ■ Lösung zu Aufgabe 5 Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden. Definition 79 -Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung. +Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G →H eine Abbildung. ϕ heißt Homomorphismus, wenn -∀g1, g2 ∈ G : ϕ(g1 ∗ g2) = ϕ(g1) ◦ ϕ(g2) +∀g1, g2 ∈G : ϕ(g1 ∗g2) = ϕ(g1) ◦ϕ(g2) gilt. Es folgt direkt: 1) Sei X = R mit der Standarttopologie und ϕ1 : idR und R = (R, +). Dann ist ϕ1 ein Gruppenhomomorphismus und ein Homöomorphismus. -2) Sei G = (Z, +) und H = (Z/3Z, +). Dann ist ϕ2 : G → H, x �→ x mod 3 ein +2) Sei G = (Z, +) und H = (Z/3Z, +). Dann ist ϕ2 : G →H, x 7→x mod 3 ein Gruppenhomomorphismus. Jedoch ist ϕ2 nicht injektiv, also sicher kein Homöomor- phismus. 3) Sei X ein topologischer Raum. Dann ist idX ein Homöomorphismus. Da keine @@ -5141,7 +5140,7 @@ Lösung zu Aufgabe 6 Die Definition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf Seite 6. Definition 80 -Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung. +Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G →H eine Abbildung. ϕ heißt Isomorphismus, wenn ϕ ein bijektiver Homomorphismus ist. Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen Sinn und ein Isomorphismus benötigt eine Gruppenstruktur. @@ -5150,82 +5149,82 @@ Sinn und ein Isomorphismus benötigt eine Gruppenstruktur. Lösungen der Übungsaufgaben Lösung zu Aufgabe 7 (a) Vor.: Sei M eine topologische Mannigfaltigkeit. -Beh.: M ist wegzusammehängend ⇔ M ist zusammenhängend +Beh.: M ist wegzusammehängend ⇔M ist zusammenhängend Beweis: „⇒“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung direkt aus Bemerkung 23. -„⇐“: Seien x, y ∈ M und -Z := { z ∈ M | ∃Weg von x nach z } +„⇐“: Seien x, y ∈M und +Z := { z ∈M | ∃Weg von x nach z } Es gilt: (i) Z ̸= ∅, da M lokal wegzusammenhängend ist (ii) Z ist offen, da M lokal wegzusammenhängend ist -(iii) ZC := { ˜z ∈ M | ∄Weg von x nach ˜z } ist offen -Da M eine Mannigfaltigkeit ist, existiert zu jedem ˜z ∈ ZC eine offene und -wegzusammenhängende Umgebung U˜z ⊆ M. -Es gilt sogar U˜z ⊆ ZC, denn gäbe es ein U˜z ∋ z ∈ Z, so gäbe es Wege γ2 : -[0, 1] → M, γ2(0) = z, γ2(1) = x und γ1 : [0, 1] → M, γ1(0) = ˜z, γ1(1) = z. +(iii) ZC := { ˜z ∈M | ∄Weg von x nach ˜z } ist offen +Da M eine Mannigfaltigkeit ist, existiert zu jedem ˜z ∈ZC eine offene und +wegzusammenhängende Umgebung U˜z ⊆M. +Es gilt sogar U˜z ⊆ZC, denn gäbe es ein U˜z ∋z ∈Z, so gäbe es Wege γ2 : +[0, 1] →M, γ2(0) = z, γ2(1) = x und γ1 : [0, 1] →M, γ1(0) = ˜z, γ1(1) = z. Dann wäre aber -γ : [0, 1] → M, +γ : [0, 1] →M, γ(x) = -� +( γ1(2x) -falls 0 ≤ x ≤ 1 +falls 0 ≤x ≤1 2 -γ2(2x − 1) +γ2(2x −1) falls 1 -2 < x ≤ 1 -ein stetiger Weg von ˜z nach x ⇒ Widerspruch. +2 < x ≤1 +ein stetiger Weg von ˜z nach x ⇒Widerspruch. Da M zusammenhängend ist und M = Z -���� +|{z} offen -∪ ZC -���� +∪ZC +|{z} offen -, sowie Z ̸= ∅ folgt ZC = ∅. +, sowie Z ̸= ∅folgt ZC = ∅. Also ist M = Z wegzusammenhängend. ■ (b) Beh.: X ist wegzusammenhängend. -Beweis: X := (R \ { 0 }) ∪ { 01, 02 } und (R \ { 0 }) ∪ { 02 } sind homöomorph zu R. +Beweis: X := (R \ { 0 }) ∪{ 01, 02 } und (R \ { 0 }) ∪{ 02 } sind homöomorph zu R. Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte 01 und 02. Da (R\{ 0 })∪{ 01 } homöomorph zu R ist, exisitert ein Weg γ1 von 01 zu einem -beliebigen Punkt a ∈ R \ { 0 }. -Da (R \ { 0 }) ∪ { 02 } ebenfalls homöomorph zu R ist, existiert außerdem ein +beliebigen Punkt a ∈R \ { 0 }. +Da (R \ { 0 }) ∪{ 02 } ebenfalls homöomorph zu R ist, existiert außerdem ein Weg γ2 von a nach 02. Damit existiert ein (nicht einfacher) Weg γ von 01 nach 02. ■ Lösung zu Aufgabe 9 -Vor.: Sei (X, d) eine absolute Ebene, A, B, C ∈ X und △ABC ein Dreieck. +Vor.: Sei (X, d) eine absolute Ebene, A, B, C ∈X und △ABC ein Dreieck. 104 Lösungen der Übungsaufgaben -(a) Beh.: AB ∼= AC ⇒ ∠ABC ∼= ∠ACB +(a) Beh.: AB ∼= AC ⇒∠ABC ∼= ∠ACB Bew.: Sei AB ∼= AC. -⇒ ∃ Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A. -⇒ ϕ(∠ABC) = ∠ACB -⇒ ∠ABC ∼= ∠ACB +⇒∃Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A. +⇒ϕ(∠ABC) = ∠ACB +⇒∠ABC ∼= ∠ACB ■ (b) Beh.: Der längeren Seite von △ABC liegt der größere Winkel gegenüber und umge- kehrt. -Bew.: Sei d(A, C) > d(A, B). Nach §3 (i) gibt es C′ ∈ AC+ mit d(A, C′) = d(A, B) -⇒ C′ liegt zwischen A und C. +Bew.: Sei d(A, C) > d(A, B). Nach §3 (i) gibt es C′ ∈AC+ mit d(A, C′) = d(A, B) +⇒C′ liegt zwischen A und C. Es gilt ∡ABC′ < ∡ABC und aus Aufgabe 9 (a) folgt: ∡ABC′ = ∡AC′B. ∠BC′A ist ein nicht anliegender Außenwinkel zu ∠BCA Bem. 66 -=====⇒ ∡BC′A > ∡BCA -⇒ ∡BCA < ∡BC′A = ∡ABC′ < ∡ABC Sei umgekehrt ∡ABC > ∡BCA, kann +=====⇒∡BC′A > ∡BCA +⇒∡BCA < ∡BC′A = ∡ABC′ < ∡ABC Sei umgekehrt ∡ABC > ∡BCA, kann wegen 1. Teil von Aufgabe 9 (b) nicht d(A, B) > d(A, C) gelten. Wegen Aufgabe 9 (a) kann nicht d(A, B) = d(A, C) gelten. -⇒ d(A, B) < d(A, C) +⇒d(A, B) < d(A, C) ■ -(c) Vor.: Sei g eine Gerade, P ∈ X und P /∈ g +(c) Vor.: Sei g eine Gerade, P ∈X und P /∈g Beh.: ∃! Lot Bew.: ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g. -⇒ ϕ(P)P schneidet g in F. +⇒ϕ(P)P schneidet g in F. Es gibt eine Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g -⇒ ϕ(P)P schneidet g in F. -Sei A ∈ g\{ F }. Dann gilt ϕ(∠AFP) = ∠AFϕ(P) = π ⇒ ∠AFP ist rechter Winkel. -Gäbe es nun G ∈ g \ { F }, so dass PG weiteres Lot von P auf g ist, wäre △PFG +⇒ϕ(P)P schneidet g in F. +Sei A ∈g\{ F }. Dann gilt ϕ(∠AFP) = ∠AFϕ(P) = π ⇒∠AFP ist rechter Winkel. +Gäbe es nun G ∈g \ { F }, so dass PG weiteres Lot von P auf g ist, wäre △PFG ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4). · · @@ -5236,16 +5235,16 @@ F g Abbildung 5.4: Zwei Lote zu einer Geraden g durch einen Punkt P Nach Folgerung 4.4 ist die Summe von zwei Innenwinkeln immer < π -⇒ G gibt es nicht. +⇒G gibt es nicht. ■ Lösung zu Aufgabe 10 -Sei f ∥ h und o. B. d. A. f ∥ g. -f ∦ h ⇒ f ∩ h ̸= ∅, sei also x ∈ f ∩ h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele -zu g durch x, da x /∈ g. Diese ist f, da x ∈ f und f ∥ g. Da aber x ∈ h, kann h nicht +Sei f ∥h und o. B. d. A. f ∥g. +f ∦h ⇒f ∩h ̸= ∅, sei also x ∈f ∩h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele +zu g durch x, da x /∈g. Diese ist f, da x ∈f und f ∥g. Da aber x ∈h, kann h nicht 105 Lösungen der Übungsaufgaben -parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f ̸= h). ⇒ g ∦ h ■ +parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f ̸= h). ⇒g ∦h ■ Lösung zu Aufgabe 11 Sei (X, d, G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem △ABC und △A′B′C′ Dreiecke, für die gilt: @@ -5257,7 +5256,7 @@ bzgl. AB wie C. Diese Isometrie existiert wegen §4. Es gilt d(A, C) = d(A′, C′) = d(ϕ(A′), ϕ(C′)) = d(A, ϕ(C′)) und d(B, C) = d(B′, C′) = d(ϕ(B′), ϕ(C′)) = d(B, ϕ(C′)). Bem. 62 -=====⇒ C = ϕ(C). +=====⇒C = ϕ(C). Es gilt also ϕ(△A′B′C′) = △ABC. ■ @@ -5312,42 +5311,42 @@ benötigten Begriffe definiert und erklärt werden. Die folgenden Begriffe wu aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ sowie „Lineare Algebra und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen. Definition 81 -Sei D ⊆ R und x0 ∈ R. x0 heißt ein Häufungspunkt von D :⇔ ∃ Folge xn in D \ { x0 } -mit xn → x0. +Sei D ⊆R und x0 ∈R. x0 heißt ein Häufungspunkt von D :⇔∃Folge xn in D \ { x0 } +mit xn →x0. Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra entnommen: Definition 82 Es seien V und W K-Vektorräume und A(V ) und A(W) die zugehörigen affinen Räume. -Eine Abbildung f : V → W heißt affin, falls für alle a, b ∈ V und alle λ, µ ∈ K mit λ+µ = 1 +Eine Abbildung f : V →W heißt affin, falls für alle a, b ∈V und alle λ, µ ∈K mit λ+µ = 1 gilt: f(λa + µb) = λf(a) + µf(b) Definition 83 -Sei V ein Vektorraum und S ⊆ V eine Teilmenge. +Sei V ein Vektorraum und S ⊆V eine Teilmenge. S heißt eine Orthonormalbasis von V , wenn gilt: (i) S ist eine Basis von V -(ii) ∀v ∈ S : ∥v∥ = 1 -(iii) ∀v1, v2 ∈ S : v1 ̸= v2 ⇒ ⟨v1, v2⟩ = 0 +(ii) ∀v ∈S : ∥v∥= 1 +(iii) ∀v1, v2 ∈S : v1 ̸= v2 ⇒⟨v1, v2⟩= 0 Satz (Zwischenwertsatz) Sei a < b und f ∈ -C[a, b] := C([a, b]), weiter sei y0 ∈ R und f(a) < y0 < f(b) oder -f(b) < y0 < f(a). Dann existiert ein x0 ∈ [a, b] mit f(x0) = y0. +C[a, b] := C([a, b]), weiter sei y0 ∈R und f(a) < y0 < f(b) oder +f(b) < y0 < f(a). Dann existiert ein x0 ∈[a, b] mit f(x0) = y0. Definition 84 -Sei V ein Vektorraum über einem Körper K und f : V → V eine lineare Abbildung. -v ∈ V \ { 0 } heißt Eigenvektor :⇔ ∃λ ∈ K : f(v) = λv. -Wenn ein solches λ ∈ K existiert, heißt es Eigenwert von f. +Sei V ein Vektorraum über einem Körper K und f : V →V eine lineare Abbildung. +v ∈V \ { 0 } heißt Eigenvektor :⇔∃λ ∈K : f(v) = λv. +Wenn ein solches λ ∈K existiert, heißt es Eigenwert von f. Satz (Binomischer Lehrsatz) -Sei x, y ∈ R. Dann gilt: +Sei x, y ∈R. Dann gilt: (x + y)n = n -� +X k=0 -�n +n k -� + xn−kyk -∀n ∈ N0 +∀n ∈N0 Definition 85 -Seien a, b ∈ R3 Vektoren. +Seien a, b ∈R3 Vektoren. a × b :=   @@ -5355,19 +5354,19 @@ a1 b3 a3  - × +×   a1 b3 a3  - = +=   -a2b3 − a3b2 -a3b1 − a1b3 -a1b2 − a2b1 +a2b3 −a3b2 +a3b1 −a1b3 +a1b2 −a2b1   @@ -5385,14 +5384,14 @@ Rand der Menge M M◦ Inneres der Menge M A × B Kreuzprodukt -A ⊆ B Teilmengenbeziehung -A ⊊ B echte Teilmengenbeziehung +A ⊆B Teilmengenbeziehung +A ⊊B echte Teilmengenbeziehung A \ B Differenzmenge -A ∪ B Vereinigung -A ˙∪ B +A ∪B Vereinigung +A ˙∪B Disjunkte Vereinigung -A ∩ B +A ∩B Schnitt Geometrie AB @@ -5425,14 +5424,14 @@ Permutationsgruppe Sym(X) Symmetrische Gruppe Wege -Sei γ : I → X ein Weg. +Sei γ : I →X ein Weg. [γ] Homotopieklasse von γ -γ1 ∗ γ2 +γ1 ∗γ2 Zusammenhängen von Wegen -γ1 ∼ γ2 Homotopie von Wegen +γ1 ∼γ2 Homotopie von Wegen γ(x) -Inverser Weg, also γ(x) := γ(1 − x) +Inverser Weg, also γ(x) := γ(1 −x) C Bild eines Weges γ, also C := γ([0, 1]) @@ -5448,9 +5447,9 @@ A Atlas P Projektiver Raum -⟨·, ·⟩ Skalarprodukt -X/∼ X modulo ∼ -[x]∼ Äquivalenzklassen von x bzgl. ∼ +⟨·, ·⟩Skalarprodukt +X/∼X modulo ∼ +[x]∼Äquivalenzklassen von x bzgl. ∼ ∥x∥ Norm von x |x| @@ -5461,7 +5460,7 @@ Sn Sphäre T n Torus -f ◦ g +f ◦g Verkettung von f und g πX Projektion auf X @@ -5493,45 +5492,45 @@ Symbolverzeichnis Zahlenmengen N = { 1, 2, 3, . . . } Natürliche Zahlen -Z = N ∪ { 0, −1, −2, . . . } +Z = N ∪{ 0, −1, −2, . . . } Ganze Zahlen Q = Z ∪ -� 1 + 1 2, 1 3, 2 3 -� + = -� z -n mit z ∈ Z und n ∈ Z \ { 0 } -� + z +n mit z ∈Z und n ∈Z \ { 0 } + Rationale Zahlen R = Q ∪ -� √ + √ 2, − 3√ 3, . . . -� + Reele Zahlen R+ Echt positive reele Zahlen Rn -+,0 := { (x1, . . . , xn) ∈ Rn | xn ≥ 0 } ++,0 := { (x1, . . . , xn) ∈Rn | xn ≥0 } Halbraum R× = R \ { 0 } Einheitengruppe von R -C = { a + ib | a, b ∈ R } +C = { a + ib | a, b ∈R } Komplexe Zahlen P = { 2, 3, 5, 7, . . . } Primzahlen -H = { z ∈ C | ℑz > 0 } +H = { z ∈C | ℑz > 0 } obere Halbebene -I = [0, 1] ⊊ R +I = [0, 1] ⊊R Einheitsintervall -f : S1 �→ R2 Einbettung der Kreislinie in die Ebene +f : S1 ,→R2 Einbettung der Kreislinie in die Ebene π1(X, x) -Fundamentalgruppe im topologischen Raum X um x ∈ X +Fundamentalgruppe im topologischen Raum X um x ∈X Fix(f) Menge der Fixpunkte der Abbildung f -∥ · ∥2 +∥· ∥2 2-Norm; Euklidische Norm κ Krümmung @@ -5540,9 +5539,9 @@ Normalenkrümmung V (f) Nullstellenmenge von f2 Krümmung -DpF : R2 → R3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89) +DpF : R2 →R3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89) TsS -Tangentialebene an S ⊆ R3 durch s ∈ S +Tangentialebene an S ⊆R3 durch s ∈S dsn(x) Weingarten-Abbildung 2von Vanishing Set diff --git a/read/results/pypdf/1601.03642.txt b/read/results/pypdf/1601.03642.txt index 16681de..6659a55 100644 --- a/read/results/pypdf/1601.03642.txt +++ b/read/results/pypdf/1601.03642.txt @@ -2,7 +2,7 @@ Creativity in Machine Learning Martin Thoma E-Mail: info@martin-thoma.de -Abstract —Recent machine learning techniques can be modified +Abstract—Recent machine learning techniques can be modified to produce creative results. Those results did not exist before; it is not a trivial combination of the data which was fed into the machine learning system. The obtained results come in multiple @@ -42,25 +42,30 @@ a lot of data has become available. The idea of machine learning is to make use of this data. A formal definition of the field of Machine Learning is given by Tom Mitchel [Mit97]: -A computer program is said to learn from experienceEwith - respect to some class of tasks Tand +A computer program is said to learn from experience + E with respect to some class of tasks T and performance measure P, if its performance at tasks -inT, as measured by P, improves with experience E.Σϕx0 +in T, as measured by P, improves with experience E. +Σ ϕ +x0 x1 x2 x3 -xnw0 +xn +w0 w1 w2 w3 -wn... -(a)Example of an artificial neuron unit. -xiare the input signals and wiare +wn +... +(a) Example of an artificial neuron unit. +xi are the input signals and wi are weights which have to get learned. Each input signal gets multiplied with its weight, everything gets -summed up and the activation functionϕis - applied.(b)A visualization of a simple feedforward +summed up and the activation function + ϕ is applied. +(b) A visualization of a simple feedforward neural network. The 5 input nodes are red, the 2 bias nodes are gray, the 3 hidden units are @@ -94,20 +99,21 @@ of time required to understand such a complex system from basic building blocks is a time-intensive and difficult task. An important group of machine learning algorithms was inspired by biological neurons and are thus called artificial -neural networks . Those networks are based on mathematical -functions called artificial neurons which take n∈Nnumbersx1, - . . . , x n∈Ras input, multiply them with weights -w1, . . . , w n∈R, add them and apply a so called activation -function ϕas visualized in Figure 1(a). One example of such -an activation function is the sigmoid function ϕ(x) =1 -1+e−x. +neural networks. Those networks are based on mathematical +functions called artificial neurons which take n ∈N numbers + x1, . . . , xn ∈R as input, multiply them with weights +w1, . . . , wn ∈R, add them and apply a so called activation +function ϕ as visualized in Figure 1(a). One example of such +an activation function is the sigmoid function ϕ(x) = 1 +1+e−x . Those functions act as building blocks for more complex systems as they can be chained and grouped in layers as visualized in Figure 1(b). The interesting question is how -the parameters wiare learned. This is usually done by an -optimization technique called gradient descent . The gradient +the parameters wi are learned. This is usually done by an +optimization technique called gradient descent. The gradient descent algorithm takes a function which has to be derivable, -starts at any point of the surface of this error function andarXiv:1601.03642v1 [cs.CV] 12 Jan 2016 +starts at any point of the surface of this error function and +arXiv:1601.03642v1 [cs.CV] 12 Jan 2016 makes a step in the direction which goes downwards. Hence it tries to find a minimum of this high-dimensional function. @@ -118,7 +124,7 @@ III. I MAGE DATA Applying a simple neural network on image data directly can work, but the number of parameters gets extraordinary large. One would take one neuron per pixel and channel. This means -for500px×500pxRGB images one would get 750,000input +for 500 px ×500 px RGB images one would get 750,000 input signals. To approach this problem, so called Convolutional Neural Networks (CNNs) were introduced. Instead of learning the full connection between the input layer and the first @@ -158,7 +164,7 @@ might be chosen is because neural networks are structured in layers. Recent publications tend to have more and more layers [HZRS15]. The used jargon is to say they get “deeper”. As this technique as published by Google engineers, the -technique is called Google DeepDream . +technique is called Google DeepDream. Fig. 2: Aurelia aurita Fig. 3: DeepDream impression of Aurelia aurita It has become famous in the internet [Red]. Usually, the images @@ -182,14 +188,14 @@ different artists to an arbitrary image of their choice. (a) Original Image (b) Style image -(c)The artistic style of Van Gogh’s “Starry Night” applied to the photograph +(c) The artistic style of Van Gogh’s “Starry Night” applied to the photograph of a Scottish Highland Cattle. Fig. 4: The algorithm takes both, the original image and the style image to produce the result. This artistic style imitation can be seen itself as creative work. An example is given by Figure 4. The code which created this example is available under [Joh16]. -Something similar was done by [SPB+14], where the style of +Something similar was done by [SPB +14], where the style of a portrait photograph was transferred to another photograph. A demo can be seen on [Shi14]. C. Drawing Robots @@ -198,7 +204,7 @@ AIKON (Automatic IKONic drawing) which can automatically generated sketches for portraits [TL05]. AIKON takes a digital photograph, detects faces on them and sketches them with a pen-plotter. -Tresset and Leymaire use k-means clustering [KMN+02] to +Tresset and Leymaire use k-means clustering [KMN +02] to segment regions of the photograph with similar color which, in turn, will get a similar shading. Such a drawing robot could apply machine learning techniques @@ -207,7 +213,8 @@ could apply self-learning techniques to draw results most similar to the artists impression of the image. However, the system described in [TL05] seems not to be a machine learning computer program according to the definition by Tom -Mitchell [Mit97].IV. T EXT DATA +Mitchell [Mit97]. +IV. T EXT DATA Digital text is the first form of natural communication which involved computers. It is used in the form of chats, websites, on collaborative projects like Wikipedia, in scientific literature. @@ -218,7 +225,7 @@ This list could be continued and most of these kinds of texts are now available in digital form. This digital form can be used to teach machines to generate similar texts. The most simple language model which is of use is an n-gram -model. This model makes use of sequences of the length nto +model. This model makes use of sequences of the length n to model language. It can be used to get the probability of a third word, given the previous two words. This way, a complete text can be generated word by word. Refinements and extensions @@ -251,7 +258,7 @@ A. Similar Texts Generation Karpathy trained multiple character RNNs on different datasets and gave an excellent introduction [Kar15b]. He trained it on Paul Graham’s essays, all the works of Shakespeare, the Hutter -Prize [hut] 100MB dataset of raw Wikipedia articles, the raw +Prize [hut] 100 MB dataset of raw Wikipedia articles, the raw LATEX source file of a book about algebraic stacks and geometry and Linux C code. With that training data, the models can generate similar texts. @@ -299,16 +306,17 @@ subtitles of movies as well as the astonishing increase in computing power to train RNNs and language models similar to the ones described before. Interesting results like the following were obtained by [VL15]: -Human : what is the purpose of life ? -Machine : to serve the greater good . -Human : what is the purpose of living ? -Machine : to live forever . +Human: what is the purpose of life ? +Machine: to serve the greater good . +Human: what is the purpose of living ? +Machine: to live forever . V. A UDIO DATA Common machine learning tasks which involve audio data are speech recognition, speaker identification, identification of songs. This leads to some less-common, but interesting topics: The composition of music, the synthesizing of audio as art. -While the composition might be considered in Section IV,we will now investigate the work which was done in audio +While the composition might be considered in Section IV, +we will now investigate the work which was done in audio synthesization. A. Emily Howell David Cope created a project called “Experiments in Musical @@ -344,12 +352,12 @@ Recurrent neural networks — LSTM networks, to be exact (GRU) to build a network which can be trained to generate music. Instead of taking notes directly or MIDI files, Nayebi and Vitelli took raw audio waveforms as input. Those audio -waveforms are feature vectors given for time steps 0,1, . . . , t− -1, t. The network is given those feature vectors X1, . . . , X t +waveforms are feature vectors given for time steps 0, 1, . . . , t− +1, t. The network is given those feature vectors X1, . . . , Xt and has to predict the following feature vector Xt+1. This means it continues the music. As the input is continuous, the problem was modeled as a regression task. Discrete Fourier -Transformation (DFT) was used on chunks of length Nof the +Transformation (DFT) was used on chunks of length N of the music to obtain features in the frequency domain. An implementation can be found at [VN15] and a demonstration can be found at [Vit15]. @@ -387,10 +395,10 @@ REFERENCES [Cop05] ——, Computer models of musical creativity . MIT Press Cambridge, 2005. [Cop12] ——, “Emily howell fugue,” YouTube, Oct. 2012. [Online]. -Available: https://www.youtube.com/watch?v=jLR- cuCwI +Available: https://www.youtube.com/watch?v=jLR- c uCwI [Cop13] ——, “The well-programmed clavier: Style in computer music composition,” XRDS: Crossroads, The ACM Magazine for -Students , vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available: +Students, vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available: http://dl.acm.org/citation.cfm?id=2460444 [Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [Online]. Available: http://www.bbc.co.uk/blogs/adamcurtis/entries/ @@ -410,7 +418,8 @@ arnumber=6795963 Available: http://prize.hutter1.net/ [HZRS15] K. He, X. Zhang, S. Ren, and J. Sun, “Deep residual learning for image recognition,” arXiv preprint arXiv:1512.03385 , 2015. -[Online]. Available: http://arxiv.org/abs/1512.03385[Joh15a] D. Johnson, “Biaxial recurrent neural network for music +[Online]. Available: http://arxiv.org/abs/1512.03385 +[Joh15a] D. Johnson, “Biaxial recurrent neural network for music composition,” GitHub, Aug. 2015. [Online]. Available: https: //github.com/hexahedria/biaxial-rnn-music-composition [Joh15b] ——, “Composing music with recurrent neural @@ -428,7 +437,7 @@ http://karpathy.github.io/2015/05/21/rnn-effectiveness/ and A. Wu, “An efficient k-means clustering algorithm: analysis and implementation,” Pattern Analysis and Machine Intelligence, IEEE Transactions on , vol. 24, no. 7, pp. 881–892, Jul 2002. -[Mit97] T. M. Mitchell, Machine learning , ser. McGraw Hill series in +[Mit97] T. M. Mitchell, Machine learning, ser. McGraw Hill series in computer science. McGraw-Hill, 1997. [MOT15] A. Mordvintsev, C. Olah, and M. Tyka, “Inceptionism: Going deeper into neural networks,” googleresearch.blogspot.co.uk, @@ -448,7 +457,7 @@ com/r/deepdream/ Hj5lGFzlubU [SPB+14] Y . Shih, S. Paris, C. Barnes, W. T. Freeman, and F. Durand, “Style transfer for headshot portraits,” ACM Transactions on -Graphics (TOG) , vol. 33, no. 4, p. 148, 2014. [Online]. Available: +Graphics (TOG), vol. 33, no. 4, p. 148, 2014. [Online]. Available: http://dl.acm.org/citation.cfm?id=2601137 [TL05] P. Tresset and F. F. Leymarie, “Generative portrait sketching,” in Proceedings of VSMM , 2005, pp. 739–748. @@ -468,7 +477,7 @@ Available: https://github.com/MattVitelli/GRUV [Wei76] J. Weizenbaum, Computer Power and Human Reason: From Judgement to Calculation . W.H.Freeman & Co Ltd, 1976. [ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional - networks,” in Computer Vision–ECCV 2014 . Springer, + networks,” in Computer Vision–ECCV 2014. Springer, 2014, pp. 818–833. APPENDIX A @@ -516,15 +525,15 @@ was starting to signing a major tripad of aid exile.]] C. Linux Code, 1 /* -*Increment the size file of the new incorrect UI_FILTER group information -*of the size generatively. +* Increment the size file of the new incorrect UI_FILTER group information +* of the size generatively. */ static int indicate_policy(void) { int error; if (fd == MARN_EPT) { /* -*The kernel blank will coeld it to userspace. +* The kernel blank will coeld it to userspace. */ if (ss->segment < mem_total) unblock_graph_and_set_blocked(); @@ -537,7 +546,7 @@ selector = seg / 16; setup_works = true; for (i = 0; i < blocks; i++) { seq = buf[i++]; -bpf = bd->bd.next + i *search; +bpf = bd->bd.next + i * search; if (fd) { current = blocked; } @@ -549,21 +558,21 @@ return segtable; } D. Linux Code, 2 /* -*Copyright (c) 2006-2010, Intel Mobile Communications. All rights reserved. +* Copyright (c) 2006-2010, Intel Mobile Communications. All rights reserved. * * This program is free software; you can redistribute it and/or modify it -*under the terms of the GNU General Public License version 2 as published by -*the Free Software Foundation. +* under the terms of the GNU General Public License version 2 as published by +* the Free Software Foundation. * * This program is distributed in the hope that it will be useful, -*but WITHOUT ANY WARRANTY; without even the implied warranty of +* but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -*GNU General Public License for more details. +* GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, -*Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +* Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include diff --git a/read/results/pypdf/1602.06541.txt b/read/results/pypdf/1602.06541.txt index c5c1984..46adcb7 100644 --- a/read/results/pypdf/1602.06541.txt +++ b/read/results/pypdf/1602.06541.txt @@ -2,7 +2,7 @@ A Survey of Semantic Segmentation Martin Thoma info@martin-thoma.de -Abstract —This survey gives an overview over different +Abstract—This survey gives an overview over different techniques used for pixel-level semantic segmentation. Metrics and datasets for the evaluation of segmentation algorithms and traditional approaches for segmentation @@ -18,10 +18,10 @@ Semantic segmentation is the task of clustering parts of images together which belong to the same object class. This type of algorithm has several usecases such as detecting road signs [ MBLAGJ+07], -detecting tumors [ MBVLG02 ], detecting medical instruments - in operations [ WAH97 ], colon crypts segmentation - [ CRSS14 ], land use and land cover classification - [ HDT02 ]. In contrast, non-semantic segmentation +detecting tumors [ MBVLG02], detecting medical instruments + in operations [W AH97], colon crypts segmentation + [ CRSS14], land use and land cover classification + [HDT02]. In contrast, non-semantic segmentation only clusters pixels together based on general characteristics of single objects. Hence the task of non-semantic segmentation is not well-defined, as many different @@ -48,7 +48,8 @@ brief, non-exhaustive summary of recently published semantic segmentation algorithms which are based on neural networks in Section VI. Finally, Section VII informs the reader about typical problematic cases for -segmentation algorithms.II. T AXONOMY OF SEGMENTATION ALGORITHMS +segmentation algorithms. +II. T AXONOMY OF SEGMENTATION ALGORITHMS The computer vision community has published a wide range of segmentation algorithms so far. Those algorithms can be grouped by the kind of data they @@ -68,16 +69,16 @@ such, the classes on which the algorithm is trained is a central design decision. Most algorithms work with a fixed set of classes; some even only work on binary classes like foreground - vs background [RM07 ], [CS10 ] or street vs + vs background [RM07], [ CS10] or street vs no street [BKTT15]. However, there are also unsupervised segmentation algorithms which do not distinguish classes at all (see Section V-B) as well as segmentation algorithms which are able to recognize when they don’t know a class. -For example, in [ GRC+08] avoid class was added +For example, in [ GRC+08] a void class was added for classes which were not in the training set. Such a void class was also used in the MSRCv2 dataset -(see Section III-B 2) to make it possible to make more +(see Section III-B2) to make it possible to make more coarse segmentations and thus having to spend less time annotating the image. B. Class affiliation of pixels @@ -90,31 +91,32 @@ we simultaneously two labels to the coordinates of the glass: Glass and table. Although there is much more work being done on single class affiliation segmentation algorithms, there is a publication about multiple -class affiliation segmentation [ LRAL08 ]. Similarly, +class affiliation segmentation [ LRAL08]. Similarly, recent publications in pixel-level object segmentation -used layered models [YHRF12].arXiv:1602.06541v2 [cs.CV] 11 May 2016 +used layered models [YHRF12]. +arXiv:1602.06541v2 [cs.CV] 11 May 2016 C. Input Data The available data which can be used for the inference of a segmentation varies by application. -•Grayscale vs colored : Grayscale images are +• Grayscale vs colored : Grayscale images are commonly used in medical imaging such as magnetic resonance (MR) imaging or ultrasonography whereas colored photographs are obviously widespread. -•Excluding or including depth data : RGB-D, +• Excluding or including depth data : RGB-D, sometimes also called range [ HJBJ+96] is available in robotics, autonomous cars and recently also in consumer electronics such as Microsoft Kinect [Zha12]. -•Single image vs stereo images vs cosegmentation - : Single image segmentation is the +• Single image vs stereo images vs cosegmentation: + Single image segmentation is the most wide-spread kind of segmentation, but using -stereo images was already tried in [ BVZ01 ]. It can +stereo images was already tried in [ BVZ01]. It can be seen as a more natural way of segmentation as most mammals have two eyes. It can also be seen as being related to having depth data. -Co-segmentation as in [ RMBK06 ], [CXGS12 ] is +Co-segmentation as in [ RMBK06], [CXGS12] is the problem of finding a consistent segmentation for multiple images. This problem can be seen in two ways: One the one hand, it can be seen @@ -124,25 +126,25 @@ after the first can be used as an additional source of information to find a meaningful segmentation. This idea can be extended to time series such as videos. -•2D vs 3D : Segmenting images is a 2D segmentation - task where the smallest unit is called a pixel . +• 2D vs 3D : Segmenting images is a 2D segmentation + task where the smallest unit is called a pixel. In 3D data, such as volumetric X-ray CT images -as they were used in [ HHR01 ], the smallest unit +as they were used in [ HHR01], the smallest unit is called a voxel. D. Operation state The operation state of the classifying machine can -either be active as in [ SUM+11], [SSA12 ] where robots -can move objects to find a segmentation or passive , +either be active as in [SUM+11], [SSA12] where robots +can move objects to find a segmentation or passive, where the received image cannot be influenced. Among the passive algorithms, some segment in a completely automatic fashion, others work in an interactive mode. One example would be a system where the user clicks on the background or marks a coarse segmentation and -the algorithm finds a fine-grained segmentation. [ BJ00 ], -[RKB04 ], [PS07 ] describe systems which work in an +the algorithm finds a fine-grained segmentation. [ BJ00], +[RKB04], [PS07] describe systems which work in an interactive mode. (a) Example Scene - (b)Visualization of a found segmentation + (b) Visualization of a found segmentation Figure 1: An example of a scene and a possible visualization of a found segmentation. @@ -166,29 +168,31 @@ However, this can only support the explanation of particular problems or showcase special situation. For meaningful information about the overall accuracy, there are a couple of metrics how accuracy can be defined. -For this section, let k∈Nbe the number of classes, -nij∈N0withi,j∈1,...,k be the number of pixels -which belong to class iand were labeled as class j. -(nij)is called a confusion matrix . Letti=∑k -j=1nij +For this section, let k∈N be the number of classes, +nij ∈N0 with i,j ∈1,...,k be the number of pixels +which belong to class i and were labeled as class j. +(nij) is called a confusion matrix. Let ti = ∑k +j=1 nij be the total number of pixels of class i. One way to compare segmentation algorithms is by the pixel-wise accuracy of the predicted segmentation -as done in many publications [ SWRC06 ], [CP08 ], -[LSD14 ]. This is also called per-pixel rate and defined - as∑k -i=1nii∑k -i=1ti. Taking the pixel-wise classification +as done in many publications [ SWRC06], [ CP08], +[LSD14]. This is also called per-pixel rate and defined + as +∑k +i=1 nii∑k +i=1 ti +. Taking the pixel-wise classification accuracy has two major drawbacks: -P1Tasks like segmenting images for autonomous cars +P1 Tasks like segmenting images for autonomous cars have large regions which have one class. This makes achieving classification accuracies of more -than30%with a priori knowledge only possible. +than 30 % with a priori knowledge only possible. For example, a system might learn that a certain position of the image is most of the time “sky” while another position is most of the time “road”. -P2The manually labeled images could have a more +P2 The manually labeled images could have a more coarse labeling. For example, a human classifier could have labeled a region as “car” and the algorithm could have split that region into the @@ -196,50 +200,56 @@ general “car” and the more specific “wheel of a car” Three accuracy metrics which do not suffer from problem P1 are used in [LSD14]: -•mean accuracy :1 -k·∑k -i=1nii -ti∈[0,1] -•mean intersection over union : +• mean accuracy: 1 +k ·∑k +i=1 +nii +ti +∈[0,1] +• mean intersection over union : 1 -k·∑k -i=1nii +k ·∑k +i=1 +nii ti−nii+∑k -j=1nji∈[0,1] -•frequency weighted intersection over union : +j=1 nji +∈[0,1] +• frequency weighted intersection over union : (∑k -i=1ti)−1∑k -i=1ti·nii +i=1 ti) +−1 ∑k +i=1 ti · nii ti−nii+∑k -j=1nji∈[0,1] +j=1 nji +∈[0,1] Another problem might be pixels which cannot be assigned to one of the known classes. For this reason, -[SWRC06 ] makes use of a void class. This class gets +[SWRC06] makes use of a void class. This class gets completely ignored for all quality measures. Hence the total number of pixels is assumed to be width·height− number of void pixels. One way to deal with problem P1 and problem P2 -is giving the confusion matrix as done in [ SWRC06 ]. +is giving the confusion matrix as done in [ SWRC06]. However, this approach is not feasible if many classes are given. -TheF-measure is useful for binary classification +The F-measure is useful for binary classification task such as the KITTI road segmentation -benchmark [ FKG13 ] or crypt segmentation as done -by [CRSS14 ]. It is calculated as “the harmonic mean +benchmark [ FKG13] or crypt segmentation as done +by [CRSS14]. It is calculated as “the harmonic mean of the precision and recall” [PH05]: -Fβ= (1 +β)2 tp -(1 +β2)·tp+β2·fn+fp -whereβ= 1 is chosen in most cases and tpmeans -true positive ,fnmeans false negative andfpmeans -false positive . +Fβ = (1 + β)2 tp +(1 + β2) ·tp + β2 ·fn + fp +where β = 1 is chosen in most cases and tp means +true positive, fn means false negative and fp means +false positive. Finally, it should be noted that a lot of other measures for the accuracy of segmentations were proposed for non-semantic segmentation. One of those accuracy measures is Normalized Probabilistic Rand (NPR) -index which was introduced in [ UPH05 ] and evaluated +index which was introduced in [ UPH05] and evaluated in [ CSI+09] on dermoscopy images. Other non-semantic segmentation measures were introduced -in [MFTM01 ], but the reason for creating them seems to +in [MFTM01], but the reason for creating them seems to be to deal with the under-defined task description of nonsemantic segmentation. These accuracy measures try to deal with different levels of coarsity of the segmentation. @@ -249,16 +259,17 @@ and thus those measures are not explained here. time for the inference on a single image is a hard requirement for some applications. For example, in the case of autonomous cars an algorithm which classifies -pixel as street or no-street and thus makes a semanticsegmentation, every image needs to be processed within -20ms [BKTT15]. This time is called latency . +pixel as street or no-street and thus makes a semantic +segmentation, every image needs to be processed within +20 ms [BKTT15]. This time is called latency. Most papers do not give exact values for the time their application needs. One reason might be that this is very hardware, implementation and in some cases even data specific. For example, [ HJBJ+96] notes that their -algorithm needs 10son a Sun SparcStation 20. The -fastest CPU ever produced for this system had 200MHz . +algorithm needs 10 s on a Sun SparcStation 20. The +fastest CPU ever produced for this system had 200 MHz. Comparing this directly with results which were obtained - using an Intel i7-4820K with 3.9GHz would not + using an Intel i7-4820K with 3.9 GHz would not be meaningful. However, it does still make sense to mention the execution time as well as the hardware in individual @@ -294,10 +305,10 @@ the following, only the most widely used ones as well as three medical databases are described. An overview over the quantity and the kind of data is given by Table I. -1) PASCAL VOC: The PASCAL1VOC2challenge +1) PASCAL VOC: The PASCAL1 VOC2 challenge was organized eight times with different datasets: Once every year from 2005 to 2012 [ EVGW+b]. -1pattern analysis, statistical modelling and comput ational learning, +1pattern analysis, statistical modelling and computational learning, an EU network of excellence 2Visual Object Classes @@ -322,7 +333,7 @@ database of 591 photographs with pixel-level annotation of 21 classes: aeroplane, bike, bird, boat, body, book, building, car, cat, chair, cow, dog, face, flower, grass, road, sheep, sign, sky, tree, water. Additionally, there -is avoid label for pixels which do not belong to +is a void label for pixels which do not belong to any of the 21 classes or which are close to the segmentation boundary. This allows a “rough and quick hand-segmentation which does not align exactly with @@ -331,7 +342,7 @@ the object boundaries” [SWRC06]. consists of 165 images with pixel-level annotation of 5 classes: “healthy, adenomatous, moderately differentiated, moderately-to-poorly differentiated, and poorly -differentiated” [ CSM09 ]. This dataset is part of the +differentiated” [CSM09]. This dataset is part of the Gland Segmentation (GlaS) challenge. The DIARETDB1 [ KKV+14] is a dataset of 89 images fundus images. Those images show the interior @@ -356,11 +367,16 @@ a majority vote on a pixel basis of 10 segmentations given by 10 different KWs. Training Prediction -PostprocessingWindow-wise +Postprocessing -ClassificationWindow -extractionData -augmentationFeature extractionPreprocessingFigure 2: A typical segmentation pipeline gets raw +Window-wise +Classification +Window +extraction +Data +augmentationFeature extraction +Preprocessing +Figure 2: A typical segmentation pipeline gets raw pixel data, applies preprocessing techniques like scaling and feature extraction like HOG features. For training, data augmentation @@ -375,18 +391,18 @@ Fields (MRFs). IV. S EGMENTATION PIPELINE Typically, semantic segmentation is done with a classifier which operates on fixed-size feature inputs -and a sliding-window approach [ DT05 ], [YBCK10 ], +and a sliding-window approach [ DT05], [ YBCK10], [SCZ08]. This means a classifier is trained on images of a fixed size. The trained classifier is then fed with -rectangular regions of the image which are called windows - . Although the classifier gets an image patch of e.g. -51px×51pxof the environment, it might only classify +rectangular regions of the image which are called windows. + Although the classifier gets an image patch of e.g. +51 px ×51 px of the environment, it might only classify the center pixel or a subset of the complete window. This segmentation pipeline is visualized in Figure 2. -This approach was taken by [ BKTT15 ] and a majority +This approach was taken by [ BKTT15] and a majority of the VOC2007 participants [ EVGW+a]. As this -approach has to apply the patch classifier 512·512 = -262 144 times for images of size 512px×512px, there +approach has to apply the patch classifier 512 ·512 = +262 144 times for images of size 512 px×512 px, there are techniques for speeding it up such as applying a stride and interpolating the results. Neural networks are able to apply the sliding window @@ -411,7 +427,7 @@ Fields in Section V-E and Support Vector Machines (SVMs) in Section V-D. Postprocessing is covered in Section V-G. It should be noted that algorithms can use combination - of methods. For example, [ TNL14 ] makes use of a + of methods. For example, [ TNL14] makes use of a combination of a SVM and a MRF. Also, auto-encoders can be used to learn features which in turn can be used by any classifier. @@ -426,49 +442,50 @@ for the gray-value) are the most widely used features. A typical image is in the RGB color space, but depending on the classifier and the problem another color space might result in better segmentations. RGB, YcBcr, HSL, -Lab and YIQ are some examples used by [ CRSS14 ]. +Lab and YIQ are some examples used by [ CRSS14]. No single color space has been proven to be superior -to all others in all contexts [ CJSW01 ]. However, the +to all others in all contexts [ CJSW01]. However, the most common choices seem to be RGB and HSI. Reasons for choosing RGB is simplicity and the support by programming languages, whereas the choice of the HSI color space might make it simpler for the classifier to become invariant to illumination. One reason for choosing CIE-L*a*b* color space is that it -approximates human perception of brightness [ KP92 ]. +approximates human perception of brightness [ KP92]. It follows that choosing the L*a*b color space helps algorithms to detect structures which are seen by humans. Another way of improving the structure within an image is histogram equalization, which can be -applied to improve contrast [PAA+87], [RM07]. +applied to improve contrast [PAA +87], [RM07]. 2) Histogram of oriented Gradients: Histogram of oriented gradients (HOG) features interpret the image -as a discrete function I:N2→{0,..., 255}which -maps the position (x,y)to a color. For each pixel, there -are two gradients: The partial derivative of xandy. +as a discrete function I : N2 →{ 0,..., 255 }which +maps the position (x,y) to a color. For each pixel, there +are two gradients: The partial derivative of x and y. Now the original image is transformed to two feature maps of equal size which represents the gradient. These -feature maps are splitted into patches and a histogram ofthe directions is calculated for each patch. HOG features -were proposed in [ DT05 ] and are used in [ BMBM10 ], +feature maps are splitted into patches and a histogram of +the directions is calculated for each patch. HOG features +were proposed in [ DT05] and are used in [ BMBM10], [FGMR10] for segmentation tasks. 3) SIFT: Scale-invariant feature transform (SIFT) feature descriptors describe keypoints in an image. The -image patch of the size 16×16around the keypoint -is taken. This patch is divided in 16distinct parts of -the size 4×4. For each of those parts a histogram of +image patch of the size 16 ×16 around the keypoint +is taken. This patch is divided in 16 distinct parts of +the size 4 ×4. For each of those parts a histogram of 8 orientations is calculated similar as for HOG features. This results in a 128-dimensional feature vector for each keypoint. It should be emphasized that SIFT is a global feature for a complete image. -SIFT is described in detail in [ Low04 ] and are used +SIFT is described in detail in [ Low04] and are used in [PTN09]. 4) BOV: Bag-of-visual-words (BOV), also called bag of keypoints , is based on vector quantization. Similar to HOG features, BOV features are histograms which count the number of occurrences of certain patterns within a patch of the image. BOV are described -in [CDF+04] and used in combination with SIFT +in [ CDF+04] and used in combination with SIFT feature descriptors in [CP08]. 5) Poselets: Poselets rely on manually added extra keypoints such as “right shoulder”, “left shoulder”, @@ -480,10 +497,10 @@ like airplanes, ships, organs or cells where the human annotators do not know the keypoints. Additionally, the keypoints have to be chosen for every single class. There are strategies to deal with those problems like viewpointdependent - keypoints. Poselets were used in [ BMBM10 ] -to detect people and in [ BBMM11 ] for general object + keypoints. Poselets were used in [ BMBM10] +to detect people and in [ BBMM11] for general object detection of the PASCAL VOC dataset. -6) Textons: Atexton is the minimal building block +6) Textons: A texton is the minimal building block of vision. The computer vision literature does not give a strict definition for textons, but edge detectors could be one example. One might argue that deep learning techniques @@ -499,7 +516,7 @@ contain much more information. A simple approach to deal with this is downsampling the high-resolution image to a low-resolution variant. Another way of doing dimensionality reduction is principal component -analysis (PCA), which is applied by [ COWR11 ]. The +analysis (PCA), which is applied by [ COWR11]. The idea behind PCA is to find a hyperplane on which all feature vectors can be projected with a minimal loss @@ -526,18 +543,18 @@ consistent regions or region boundaries. directly be applied on the pixels, when one gives a feature vector per pixel. Two clustering algorithms are k-means and the mean-shift algorithm. -Thek-means algorithm is a general-purpose clustering +The k-means algorithm is a general-purpose clustering algorithm which requires the number of clusters to -be given beforehand. Initially, it places the kcentroids +be given beforehand. Initially, it places the k centroids randomly in the feature space. Then it assigns each data point to the nearest centroid, moves the centroid to the center of the cluster and continues the process until a stopping criterion is reached. A faster variant is described in [Har75]. -k-means was applied by [ CLP98 ] for medical image +k-means was applied by [ CLP98] for medical image segmentation. Another clustering algorithm is the mean-shift algorithm - which was introduced by [ CM02 ] for segmentation + which was introduced by [ CM02] for segmentation tasks. The algorithm finds the cluster centers by initializing centroids at random seed points and iteratively shifting them to the mean coordinate within @@ -550,8 +567,9 @@ points. 2) Graph Based Image Segmentation: Graph-based image segmentation algorithms typically interpret pixels as vertices and an edge weight is a measure of -dissimilarity such as the difference in color [ FH04 ], -[Fel]. There are several different candidates for edges.The 4-neighborhood (north, east, south west) or an 8neighborhood +dissimilarity such as the difference in color [ FH04], +[Fel]. There are several different candidates for edges. +The 4-neighborhood (north, east, south west) or an 8neighborhood (north, north-east, east, south-east, south, south-west, west, north-west) are plausible choices. One way to cut the edges is by building a minimum @@ -559,11 +577,11 @@ spanning tree and removing edges above a threshold. This threshold can either be constant, adapted to the graph or adjusted by the user. After the edge-cutting step, the connected components are the segments. -A graph-based method which ranked 2ndin the +A graph-based method which ranked 2 nd in the Pascal VOC 2010 challenge [ EVGW+10] is described -in [CS10 ]. The system makes heavy use of the multicue - contour detector globalPb [ MAFM08 ] and needs -about 10GB of main memory [CS11]. +in [CS10]. The system makes heavy use of the multicue + contour detector globalPb [ MAFM08] and needs +about 10 GB of main memory [CS11]. 3) Random Walks: Random walks belong to the graph-based image segmentation algorithms. Random walk image segmentation usually works as follows: @@ -582,7 +600,7 @@ segmentation methods output as seed points. along edges, but also try to find a border which is smooth. This is done by defining a so called energy function which will be minimized. They were initially -described in [ KWT88 ]. ACMs can be used to segment +described in [ KWT88]. ACMs can be used to segment an image or to refine segmentation as it was done in [AM98] for brain MR images. 5) Watershed Segmentation: The watershed algorithm @@ -598,7 +616,7 @@ watershed is found. The algorithm stops when the highest point is reached. A detailed description of the watershed segmentation algorithm is given in [RM00]. -The watershed segmentation was used in [ JLD03 ] to +The watershed segmentation was used in [ JLD03] to segment white blood cells. As the authors describe, the segmentation by watershed transform has two flaws: Over-segmentation due to local minima and thick @@ -606,13 +624,13 @@ watersheds due to plateaus. C. Random Decision Forests Random Decision Forests were first proposed -in [Ho95 ]. This type of classifier applies techniques +in [ Ho95]. This type of classifier applies techniques called ensemble learning , where multiple classifiers are trained and a combination of their hypotheses is used. One ensemble learning technique is the random subspaces method where each classifier is trained on a random subspace of the feature space. Another -ensemble learning technique is bagging , which is +ensemble learning technique is bagging, which is training the trees on random subsets of the training set. In the case of Random Decision Forests, the classifiers are decision trees. A decision tree is a tree where each @@ -626,233 +644,268 @@ ordinal, interval, ratio) can be arbitrary. Another advantage for example, is the speed of training and classification. Decision trees were extensively studied in the past 20 years and a multitude of training algorithms have -been proposed (e.g. ID3 in [ Qui86 ], C4.5 in [ Qui93 ]). +been proposed (e.g. ID3 in [ Qui86], C4.5 in [ Qui93]). Possible training hyperparameters are the measure to -evaluate the “goodness of split” [ Min89 ], the number of +evaluate the “goodness of split” [Min89], the number of decision trees being used, and if the depth of the trees is restricted. Typically in the context of classification, decision trees are trained by adding new nodes until each leaf contains only nodes of a single class or until it is not possible to split further. This is called a stopping -criterion . +criterion. There are two typical training modes: Central axis projection and perceptron training . In training, for each node a hyperplane is searched which is optimal according to an error function. Random Decision Forests with texton features (see -Section V-A6) are applied in [ SJC08 ] for segmentation. -In the [ MSC ] dataset, they report a per-pixel accuracy -rate of 66.9%for their best system. This system -requires 415msfor the segmentation of 320px×213px -images on a single 2.7GHz core. On the Pascal +Section V-A6) are applied in [ SJC08] for segmentation. +In the [ MSC] dataset, they report a per-pixel accuracy +rate of 66.9 % for their best system. This system +requires 415 ms for the segmentation of 320 px×213 px +images on a single 2.7 GHz core. On the Pascal VOC 2007 dataset, they report an average per-pixel -accuracy for their best segmentation system of 42%. +accuracy for their best segmentation system of 42 %. An excellent introduction to Random Decision -Forests for semantic segmentation is given by [ SCZ08 ]. +Forests for semantic segmentation is given by [SCZ08]. D. SVMs SVMs are well-studied binary classifiers which can be described by five central ideas. For those ideas, the -training data is represented as (xi,yi)where xiis the -feature vector and yi∈{− 1,1}the binary label for -training example i∈{1,...,m}.1)If data is linearly separable, it can be separated +training data is represented as (xi,yi) where xi is the +feature vector and yi ∈{− 1,1 }the binary label for +training example i∈{1,...,m }. +1) If data is linearly separable, it can be separated by a hyperplane. There is one hyperplane which maximizes the distance to the next datapoints -(support vectors ). This hyperplane should be taken: +(support vectors). This hyperplane should be taken: minimize -w,b1 +w,b +1 2∥w∥2 -s.t.∀m -i=1yi·(⟨w,xi⟩+b) -sgn applied to this gives the classification≥1 -2)Even if the underlying process which generates the +s.t. ∀m +i=1yi ·(⟨w,xi⟩+ b)   +sgn applied to this gives the classification +≥1 +2) Even if the underlying process which generates the features for the two classes is linearly separable, noise can make the data not separable. The introduction of slack variables to relax the requirement of linear separability solves this problem. The trade-off between accepting some errors and a more complex model is weighted by a parameter -C∈R+ -0. The bigger C, the more errors are +C ∈ R+ +0 . The bigger C, the more errors are accepted. The new optimization problem is: minimize -w1 -2∥w∥2+C·m∑ -i=1ξi -s.t.∀m -i=1yi·(⟨w,xi⟩+b)≥1−ξi -Note that 0≤ξi≤1means that the data point -is within the margin, whereas ξi≥1means it is -misclassified. An SVM with C > 0is also called -asoft-margin SVM . -3)The primal problem is to find the normal vector -wand the bias b. The dual problem is to express -was a linear combination of the training data xi: -w=m∑ -i=1αiyixi -whereyi∈{− 1,1}represents the class of the -training example and αiare Lagrange multipliers. +w +1 +2∥w∥2 + C· +m∑ +i=1 +ξi +s.t. ∀m +i=1yi ·(⟨w,xi⟩+ b) ≥1 −ξi +Note that 0 ≤ξi ≤1 means that the data point +is within the margin, whereas ξi ≥1 means it is +misclassified. An SVM with C >0 is also called +a soft-margin SVM. +3) The primal problem is to find the normal vector +w and the bias b. The dual problem is to express +w as a linear combination of the training data xi: +w = +m∑ +i=1 +αiyixi +where yi ∈{− 1,1 }represents the class of the +training example and αi are Lagrange multipliers. The usage of Lagrange multipliers is explained -with some examples in [ Smi04 ]. The usage of the -Lagrange multipliers αichanges the optimization -problem depend on the αiwhich are weights for -the feature vectors. It turns out that most αiwill +with some examples in [ Smi04]. The usage of the +Lagrange multipliers αi changes the optimization +problem depend on the αi which are weights for +the feature vectors. It turns out that most αi will be zero. The non-zero weighted vectors are called -support vectors . +support vectors. The optimization problem is now, according to [Bur98]: maximize -αim∑ -i=1αi−1 -2m∑ -i=1m∑ -j=1αiαjyiyj⟨xi,xj⟩ -s.t.∀m -i=10≤αi≤C -s.t.m∑ -i=1αiyi= 0 +αi +m∑ +i=1 +αi −1 +2 +m∑ +i=1 +m∑ +j=1 +αiαjyiyj⟨xi,xj⟩ +s.t. ∀m +i=10 ≤αi ≤C +s.t. +m∑ +i=1 +αiyi = 0 -4)Not every dataset is linearly separable. This problem +4) Not every dataset is linearly separable. This problem is approached by transforming the feature -vectors xwith a non-linear mapping Φinto +vectors x with a non-linear mapping Φ into a higher dimensional (probably ∞-dimensional) -space. As the feature vectors xare only used +space. As the feature vectors x are only used within scalar product ⟨xi,xj⟩, it is not necessary to do the transformation. It is enough to do the calculation -K(xi,xj) =⟨xi,xj⟩ -This function Kis called a kernel . The idea of -never explicitly transforming the vectors xito the -higher dimensional space is called the kernel trick . +K(xi,xj) = ⟨xi,xj⟩ +This function K is called a kernel. The idea of +never explicitly transforming the vectors xi to the +higher dimensional space is called the kernel trick. Common kernels include the polynomial kernel -KP(xi,xj) = (⟨xi,xj⟩+r)p -of degreepand coefficient r, the Gaussian radial +KP(xi,xj) = (⟨xi,xj⟩+ r)p +of degree p and coefficient r, the Gaussian radial basis function (RBF) kernel -KGauss(xi,xj) =e−γ∥xi−xj∥2 +KGauss(xi,xj) = e +−γ∥xi−xj∥2 2σ2 and the sigmoid kernel Ktanh(xi,xj) = tanh(γ⟨xi,xj⟩−r) -where the parameter γdetermines how much +where the parameter γ determines how much influence single training examples have. -5)The described SVMs can only distinguish between +5) The described SVMs can only distinguish between two classes. Common strategies to expand those binary classifiers to multi-class classification is -theone-vs-all and the one-vs-one strategy. In the -one-vs-all strategy nclassifiers have to be trained -which can distinguish one of the nclasses against -all other classes. In the one-vs-one strategyn2−n +the one-vs-all and the one-vs-one strategy. In the +one-vs-all strategy n classifiers have to be trained +which can distinguish one of the n classes against +all other classes. In the one-vs-one strategy n2−n 2 classifiers are trained; one classifier for each pair of classes. A detailed description of SVMs can be found in [Bur98]. -SVMs are used by [ YHRF12 ] on the 2009 and 2010 +SVMs are used by [ YHRF12] on the 2009 and 2010 PASCAL segmentation challenge [ EVGW+10]. They did not hand their classifier in to the challenge itself, but calculated an average rank of 7 among the different categories. -[FGMR10 ] also used an SVM based method with -HOG features and achieved the 7thrank in the 2010 +[FGMR10] also used an SVM based method with +HOG features and achieved the 7 th rank in the 2010 PASCAL segmentation challenge by mean accuracy. It -needs about 2s on a 2.8GHz 8-core Intel processor. +needs about 2 s on a 2.8 GHz 8-core Intel processor. E. Markov Random Fields MRFs are undirected probabilistic graphical models which are wide-spread model in computer vision. The overall idea of MRFs is to assign a random variable for -each feature and a random variable for each pixel whichx1x2x3x4x5x6x7x8x9 -y1y2y3y4y5y6y7y8y9 -x1x2x3x4x5x6x7x8x9 -y1y2y3y4y5y6y7y8y9 +each feature and a random variable for each pixel which +x1 x2 x3 +x4 x5 x6 +x7 x8 x9 +y1 y2 y3 +y4 y5 y6 +y7 y8 y9 +x1 x2 x3 +x4 x5 x6 +x7 x8 x9 +y1 y2 y3 +y4 y5 y6 +y7 y8 y9 Figure 3: CRF with 4-neighborhood. Each node xi -represents a pixel and each node yirepresents +represents a pixel and each node yi represents a label. gets labeled as shown in Figure 3. For example, a MRF -which is trained on images of the size 224px×224pixel +which is trained on images of the size 224 px×224 pixel and gets the raw RGB values as features has -224·224·3 -input+ 224·224 -output= 200 704 +224 ·224 ·3   +input ++ 224·224   +output += 200 704 random variables. Those random variables are conditionally independent, given their local neighborhood. These (in)dependencies can be expressed with a graph. -LetG= (V,E)be the associated undirected graph -of an MRF andCbe the set of all maximal cliques in -that graph. Nodes represent random variables x,yand +Let G= (V,E) be the associated undirected graph +of an MRF and Cbe the set of all maximal cliques in +that graph. Nodes represent random variables x,y and edges represent conditional dependencies. Just like in -he 4-neighborhood [ SWRC06 ] and the 8-neighborhood +he 4-neighborhood [ SWRC06] and the 8-neighborhood are reasonable choices for constructing the graph. -Typically, random variables yrepresent the class of a -single pixel, random variables xrepresent a pixel values +Typically, random variables y represent the class of a +single pixel, random variables x represent a pixel values and edges represent pixel neighborhood in computer vision problems segmentation problems where MRFs -are used. Accordingly, the random variables ylive -on1,..., nr of classes and the random variables x -typically live on 0,..., 255or[0,1]. -The probability of x,ycan be expressed as -P(x,y) =1 +are used. Accordingly, the random variables y live +on 1,..., nr of classes and the random variables x +typically live on 0,..., 255 or [0,1]. +The probability of x,y can be expressed as +P(x,y) = 1 Ze−E(x,y) -whereZ=∑ -x,ye−E(x,y)is a normalization term -called the partition function andEis called the energy -function . A common choice for the energy function is -E(x,y) =∑ -c∈Cψc(x,y) -whereψis called a clique potential . One choice for -cliques of size two x,y= (x1,x2)is [KP06] -ψc(x1,x2) =wδ(x1,x2) ={ -+wifx1̸=x2 -−wifx1=x2 -According to [ Mur12 ], the most common way of +where Z = ∑ +x,y e−E(x,y) is a normalization term +called the partition function and E is called the energy +function. A common choice for the energy function is +E(x,y) = +∑ +c∈C +ψc(x,y) +where ψ is called a clique potential . One choice for +cliques of size two x,y = (x1,x2) is [KP06] +ψc(x1,x2) = wδ(x1,x2) = +{ ++w if x1 ̸= x2 +−w if x1 = x2 +According to [ Mur12], the most common way of inference over the posterior MRF in computer vision problems is Maximum A Posteriori (MAP) estimation. Detailed introductions to MRFs are given by -[BKR11 ], [Mur12 ]. MRFs are used by [ ZBS01 ] and +[BKR11], [ Mur12]. MRFs are used by [ ZBS01] and [MSB12] for image segmentation. F . Conditional Random Fields CRFs are MRFs where all clique potentials are -conditioned on input features [ Mur12 ]. This means, +conditioned on input features [ Mur12]. This means, instead of learning the distribution P(y,x), the task is reformulated to learn the distribution P(y|x). One consequence of this reformulation is that CRFs need -much less parameters as the distribution of xdoes +much less parameters as the distribution of x does not have to be estimated. Another advantage of CRFs compared to MRFs is that no distribution assumption -about xhas to be made. +about x has to be made. A CRF has the partition function Z: -Z(x) =∑ -yP(x,y) +Z(x) = +∑ +y +P(x,y) and joint probability distribution -P(y|x) =1 -Z(x)∏ -c∈Cψc(yc|x) -The simplest way to define the clique potentials ψis -the count of the class ycgivenxadded with a positive +P(y|x) = 1 +Z(x) +∏ +c∈C +ψc(yc|x) +The simplest way to define the clique potentials ψ is +the count of the class yc given x added with a positive smoothing constant to prevent the complete term from getting zero. -CRFs as described in [ LRKT09 ] have reached top -performance in PASCAL VOC 2010 [ VOC10 ] and -are also used in [ HZCP04 ], [SWRC06 ] for semantic +CRFs as described in [ LRKT09] have reached top +performance in PASCAL VOC 2010 [ VOC10] and +are also used in [ HZCP04], [ SWRC06] for semantic segmentation. A method similar to CRFs was proposed in [ GBVdW+10]. The system of Gonfaus et.al. -ranked 1stby mean accuracy in the segmentation task -of the PASCAL VOC 2010 challenge [EVGW+10]. +ranked 1st by mean accuracy in the segmentation task +of the PASCAL VOC 2010 challenge [EVGW +10]. An introduction to CRFs is given by [SM11]. G. Post-processing methods Post-processing refine a found segmentation and remove obvious errors. For example, the morphological -operations opening andclosing can remove noise. The +operations opening and closing can remove noise. The opening operation is a dilation followed by a erosion. This removes tiny segments. The closing operation is a erosion followed by a dilation. This removes tiny gaps -in otherwise filled regions. They were used in [ CLP98 ] +in otherwise filled regions. They were used in [ CLP98] for biomedical image segmentation. Another way of refinement of the found segmentation is by adjusting the segmentation to match close edges. -This was used in [ BBMM11 ] with an ultra-metric +This was used in [ BBMM11] with an ultra-metric contour map [AMFM09]. Active contour models are another example of a -post-processing method [KWT88].VI. N EURAL NETWORKS FOR SEMANTIC +post-processing method [KWT88]. +VI. N EURAL NETWORKS FOR SEMANTIC SEGMENTATION Artificial neural networks are classifiers which are inspired by biologic neurons. Every single artificial @@ -862,7 +915,7 @@ function to the weighted sum and gives an output. Those neurons can take either a feature vector as input or the output of other neurons. In this way, they build up feature hierarchies. -The parameters they learn are the weightsw∈R. +The parameters they learn are the weights w ∈R. They are learned by gradient descent. To do so, an error function — usually cross-entropy or mean squared error — is necessary. For the gradient descent algorithm, one @@ -879,39 +932,39 @@ CNNs are neural networks which learn image filters. They drastically reduce the number of parameters which have to be learned while being still general enough for the problem domain of images. This was shown by Alex -Krizhevsky et al. in [ KSH12 ]. One major idea was a -clever regularization called dropout training , which set +Krizhevsky et al. in [ KSH12]. One major idea was a +clever regularization called dropout training, which set the output of neurons while training randomly to zero. Another contribution was the usage of an activation function called rectified linear unit : ϕReLU(x) = max(0,x) Those are much faster to train than the commonly used sigmoid activation functions -ϕSigmoid (x) =1 -e−x+ 1 +ϕSigmoid(x) = 1 +e−x + 1 Krizhevsky et al. implemented those ideas and participated in the ImageNet Large-Scale Visual Recognition Challenge (ILSVRC). The best other system, which used SIFT features and Fisher Vectors, had a performance - of about 25.7%while the network by Alex -Krizhevsky et al. got 17.0%error rate on the ILSVRC2010 + of about 25.7 % while the network by Alex +Krizhevsky et al. got 17.0 % error rate on the ILSVRC2010 dataset. As a preprocessing step, they downsampled - all images to a fixed size of 256px×256pxbefore + all images to a fixed size of 256 px×256 px before they fed the features into their network. This network -is commonly known as AlexNet . +is commonly known as AlexNet. Since AlexNet was developed, a lot of different neural networks have been proposed. One interesting -example is [ PC13 ], where a recurrent CNN for semantic +example is [PC13], where a recurrent CNN for semantic segmentation is presented. -Another notable paper is [ LSD14 ]. The algorithm +Another notable paper is [ LSD14]. The algorithm presented there makes use of a classifying network such as AlexNet, but applies the complete network as an image filter. This way, each pixel gets a probability distribution for each of the trained classes. By taking the most likely class, a semantic segmentation can be done with arbitrary image sizes. -A very recent publication by Dai et al. [ DHS15 ] +A very recent publication by Dai et al. [ DHS15] showed that segmentation with much deeper networks is possible and achieves better results. More detailed explanations to neural networks for @@ -928,7 +981,7 @@ I am not aware of any systematic work which examined A. Lens Flare Lens flare is the effect of light getting scattered in the lens system of the camera. The testing data set of -the KITTI road evaluation benchmark [ FKG13 ] has a +the KITTI road evaluation benchmark [ FKG13] has a couple of photos with this problem. Figure 4(a) shows an extreme example of lens flare. B. Vignetting @@ -984,7 +1037,7 @@ not have photos from the point of view of a child. This is visualized in Figure 4(f). VIII. D ISCUSSION -Ohta et al. wrote [ OKS78 ] 38 years ago. It is one +Ohta et al. wrote [ OKS78] 38 years ago. It is one of the first papers mentioning semantic segmentation. In this time, a lot of work was done and many different directions have been explored. Different kinds @@ -1005,61 +1058,62 @@ A combination of different classifiers in an ensemble would be an interesting option to explore in order to improve accuracy. Another direction which is currently studied is combining classifiers such as neural networks -with CRFs [ZJRP+15].REFERENCES +with CRFs [ZJRP +15]. +REFERENCES [AM98] M. S. Atkins and B. T. Mackiewich, “Fully automatic segmentation of the brain in mri,” Medical Imaging, IEEE Transactions on, vol. 17, no. 1, pp. 98–107, Feb. 1998. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=668699 +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=668699 [AMFM09] P. Arbelaez, M. Maire, C. Fowlkes, and J. Malik, “From contours to regions: An empirical evaluation,” in Computer Vision and Pattern Recognition, 2009. CVPR 2009. IEEE -Conference on . IEEE, Jun. 2009, pp. 2294–2301. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=5206707 +Conference on. IEEE, Jun. 2009, pp. 2294–2301. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=5206707 [AP11] G. Azzopardi and N. Petkov, “Detection of retinal vascular bifurcations by trainable v4-like filters,” in Computer Analysis of Images and -Patterns . Springer, 2011, pp. 451–459. [Online]. -Available: http://www .cs.rug .nl/~imaging/databases/ -retina_database/retinalfeatures_database .html +Patterns. Springer, 2011, pp. 451–459. [Online]. +Available: http://www.cs.rug.nl/~imaging/databases/ +retina_database/retinalfeatures_database.html [BBMM11] T. Brox, L. Bourdev, S. Maji, and J. Malik, “Object segmentation by alignment of poselet activations to image contours,” in Computer Vision and Pattern Recognition (CVPR), 2011 IEEE -Conference on . IEEE, Jun. 2011, pp. 2225–2232. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=5995659 +Conference on. IEEE, Jun. 2011, pp. 2225–2232. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=5995659 [BJ00] Y . Boykov and M.-P. Jolly, “Interactive organ segmentation using graph cuts,” in Medical Image Computing and Computer-Assisted Intervention– MICCAI 2000 . Springer, 2000, pp. 276– -286. [Online]. Available: http://link .springer .com/ -chapter/10 .1007/978-3-540-40899-4_28 +286. [Online]. Available: http://link .springer.com/ +chapter/10.1007/978-3-540-40899-4_28 [BKR11] A. Blake, P. Kohli, and C. Rother, Markov random fields for vision and image processing . Mit Press, 2011. [BKTT15] S. Bittel, V . Kaiser, M. Teichmann, and M. Thoma, “Pixel-wise segmentation of street with neural networks,” arXiv preprint arXiv:1511.00513 , 2015. -[Online]. Available: http://arxiv .org/abs/1511 .00513 +[Online]. Available: http://arxiv.org/abs/1511.00513 [BMBM10] L. Bourdev, S. Maji, T. Brox, and J. Malik, “Detecting people using mutually consistent poselet activations,” in Computer Vision–ECCV -2010 . Springer, 2010, pp. 168–181. [Online]. -Available: http://link .springer .com/chapter/10 .1007/ +2010. Springer, 2010, pp. 168–181. [Online]. +Available: http://link.springer.com/chapter/10.1007/ 978-3-642-15567-3_13#page-1 [Bur98] C. J. Burges, “A tutorial on support vector machines -for pattern recognition,” Data mining and knowledge -discovery , vol. 2, no. 2, pp. 121–167, 1998. +for pattern recognition,”Data mining and knowledge +discovery, vol. 2, no. 2, pp. 121–167, 1998. [BVZ01] Y . Boykov, O. Veksler, and R. Zabih, “Fast approximate energy minimization via graph cuts,” Pattern Analysis and Machine Intelligence, IEEE -Transactions on , vol. 23, no. 11, pp. 1222–1239, -2001. [Online]. Available: http://ieeexplore .ieee .org/ -xpls/abs_all .jsp?arnumber=969114 +Transactions on, vol. 23, no. 11, pp. 1222–1239, +2001. [Online]. Available: http://ieeexplore.ieee.org/ +xpls/abs_all.jsp?arnumber=969114 [CDF+04] G. Csurka, C. Dance, L. Fan, J. Willamowski, and C. Bray, “Visual categorization with bags of keypoints,” in Workshop on statistical learning in @@ -1067,47 +1121,47 @@ computer vision, ECCV , vol. 1, no. 1-22. Prague, 2004, pp. 1–2. [CJSW01] H.-D. Cheng, X. Jiang, Y . Sun, and J. Wang, “Color image segmentation: advances and prospects,” -Pattern recognition , vol. 34, no. 12, pp. 2259–2281, +Pattern recognition, vol. 34, no. 12, pp. 2259–2281, 2001. [CLP98] C. W. Chen, J. Luo, and K. J. Parker, “Image segmentation via adaptive k-mean clustering and knowledge-based morphological operations with biomedical applications,” Image Processing, IEEE -Transactions on , vol. 7, no. 12, pp. 1673–1683, Dec. +Transactions on, vol. 7, no. 12, pp. 1673–1683, Dec. -1998. [Online]. Available: http://ieeexplore .ieee .org/ -xpls/abs_all .jsp?arnumber=730379 +1998. [Online]. Available: http://ieeexplore.ieee.org/ +xpls/abs_all.jsp?arnumber=730379 [CM02] D. Comaniciu and P. Meer, “Mean shift: A robust approach toward feature space analysis,” Pattern Analysis and Machine Intelligence, IEEE -Transactions on , vol. 24, no. 5, pp. 603–619, 2002. -[Online]. Available: http://ieeexplore .ieee .org/xpl/ -login .jsp?tp=&arnumber=1000236 +Transactions on, vol. 24, no. 5, pp. 603–619, 2002. +[Online]. Available: http://ieeexplore .ieee.org/xpl/ +login.jsp?tp=&arnumber=1000236 [COWR11] C. Chen, J. Ozolek, W. Wang, and G. K. Rohde, “A pixel classification system for segmenting biomedical images using intensity neighborhoods and dimension reduction,” in Biomedical Imaging: From Nano to Macro, 2011 IEEE International Symposium on . IEEE, 2011, pp. 1649–1652. -[Online]. Available: https://www .andrew .cmu .edu/ -user/gustavor/chen_isbi_11 .pdf +[Online]. Available: https://www.andrew.cmu.edu/ +user/gustavor/chen_isbi_11.pdf [CP08] G. Csurka and F. Perronnin, “A simple high performance approach to semantic segmentation.” -inBMVC , 2008, pp. 1–10. [Online]. Available: - http://www .xrce .xerox .com/layout/set/print/ -content/download/16654/118653/file/2008-023 .pdf +in BMVC, 2008, pp. 1–10. [Online]. Available: + http://www .xrce.xerox.com/layout/set/print/ +content/download/16654/118653/file/2008-023.pdf [CRSS] A. Cohen, E. Rivlin, I. Shimshoni, and E. Sabo, “Colon crypt segmentation website.” [Online]. - Available: http://mis .haifa .ac.il/~ishimshoni/ -SegmentCrypt/Download .htm + Available: http://mis.haifa.ac.il/~ishimshoni/ +SegmentCrypt/Download.htm [CRSS14] ——, “Memory based active contour algorithm using pixel-level classified images for colon crypt segmentation,” Computerized Medical Imaging and Graphics , Nov. 2014. [Online]. Available: -http://mis .haifa .ac.il/~ishimshoni/SegmentCrypt/ +http://mis.haifa.ac.il/~ishimshoni/SegmentCrypt/ Active%20contour%20based%20on%20pixellevel%20classified%20image%20for%20colon% -20crypts%20segmentation .pdf +20crypts%20segmentation.pdf [CS10] J. Carreira and C. Sminchisescu, “Constrained parametric min-cuts for automatic object segmentation,” in Computer Vision and Pattern Recognition @@ -1115,61 +1169,62 @@ parametric min-cuts for automatic object segmentation,” pp. 3241–3248. [CS11] ——, “Cpmc: Constrained parametric min-cuts for automatic object segmentation,” Feb. 2011. [Online]. -Available: http://www .maths .lth.se/matematiklth/ +Available: http://www .maths.lth.se/matematiklth/ personal/sminchis/code/cpmc/ [CSI+09] M. E. Celebi, G. Schaefer, H. Iyatomi, W. V . Stoecker, J. M. Malters, and J. M. Grichnik, “An improved objective evaluation measure for border detection in dermoscopy images,” Skin Research -and Technology , vol. 15, no. 4, pp. 444–450, 2009. -[Online]. Available: http://arxiv .org/abs/1009 .1020 +and Technology, vol. 15, no. 4, pp. 444–450, 2009. +[Online]. Available: http://arxiv.org/abs/1009.1020 [CSM09] L. P. Coelho, A. Shariff, and R. F. Murphy, “Nuclear segmentation in microscope cell images: a handsegmented dataset and comparison of algorithms,” -inBiomedical Imaging: From Nano to Macro, +in Biomedical Imaging: From Nano to Macro, 2009. ISBI’09. IEEE International Symposium on . IEEE, 2009, pp. 518–521. [Online]. Available: -http://murphylab .web .cmu .edu/data +http://murphylab.web.cmu.edu/data [CXGS12] M. D. Collins, J. Xu, L. Grady, and V . Singh, “Random walks based multi-image segmentation: Quasiconvexity results and gpu-based solutions,” -inComputer Vision and Pattern Recognition +in Computer Vision and Pattern Recognition (CVPR), 2012 IEEE Conference on . IEEE, 2012, pp. 1656–1663. [Online]. Available: http: -//pages .cs.wisc .edu/~jiaxu/pub/rwcoseg .pdf +//pages.cs.wisc.edu/~jiaxu/pub/rwcoseg.pdf [DHS15] J. Dai, K. He, and J. Sun, “Instance-aware semantic segmentation via multi-task network cascades,” arXiv preprint arXiv:1512.04412 , 2015. [DT05] N. Dalal and B. Triggs, “Histograms of oriented gradients for human detection,” in Computer Vision and Pattern Recognition, 2005. CVPR -2005. IEEE Computer Society Conference on ,vol. 1, June 2005, pp. 886–893 vol. 1. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=1467360 +2005. IEEE Computer Society Conference on , +vol. 1, June 2005, pp. 886–893 vol. 1. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=1467360 [EVGW+a] M. Everingham, L. Van Gool, C. K. I. Williams, J. Winn, and A. Zisserman, “The PASCAL Visual Object Classes Challenge 2007 (VOC2007) Results,” http://www.pascalnetwork.org/challenges/VOC/voc2007/workshop/index.html. -[Online]. Available: http://host .robots .ox.ac.uk: -8080/pascal/VOC/voc2007/index .html +[Online]. Available: http://host .robots.ox.ac.uk: +8080/pascal/VOC/voc2007/index.html [EVGW+b] ——, “The PASCAL Visual Object Classes Challenge 2012 (VOC2012) Results,” http://www.pascalnetwork.org/challenges/VOC/voc2012/workshop/index.html. -[Online]. Available: http://host .robots .ox.ac.uk: -8080/pascal/VOC/voc2012/index .html +[Online]. Available: http://host .robots.ox.ac.uk: +8080/pascal/VOC/voc2012/index.html [EVGW+10] M. Everingham, L. Van Gool, C. K. Williams, J. Winn, and A. Zisserman, “The pascal visual object classes (voc) challenge,” International journal of -computer vision , vol. 88, no. 2, pp. 303–338, 2010. +computer vision, vol. 88, no. 2, pp. 303–338, 2010. [EVGW+12] M. Everingham, L. Van Gool, C. K. I. Williams, J. Winn, and A. Zisserman, “Visual object classes challenge 2012 (voc2012),” 2012. [Online]. -Available: http://host .robots .ox.ac.uk:8080/pascal/ -VOC/voc2012/index .html +Available: http://host.robots.ox.ac.uk:8080/pascal/ +VOC/voc2012/index.html [Fel] P. F. Felzenszwalb, “Graph based image segmentation.” [Online]. Available: http: -//cs .brown .edu/~pff/segment/ +//cs.brown.edu/~pff/segment/ [FGMR10] P. F. Felzenszwalb, R. B. Girshick, D. McAllester, and D. Ramanan, “Object detection with discriminatively trained part-based models,” Pattern Analysis @@ -1179,14 +1234,14 @@ vol. 32, no. 9, pp. 1627–1645, 2010. “Efficient graph-based image segmentation,” International Journal of Computer Vision , vol. 59, no. 2, pp. 167–181, 2004. [Online]. -Available: http://link .springer .com/article/10 .1023/ -B:VISI .0000022288 .19776 .77 +Available: http://link.springer.com/article/10.1023/ +B:VISI.0000022288.19776.77 [FKG13] J. Fritsch, T. Kuehnl, and A. Geiger, “A new performance measure and evaluation benchmark for road detection algorithms,” in International Conference on Intelligent Transportation Systems (ITSC) , 2013. [Online]. Available: -http://www .cvlibs .net/datasets/kitti/eval_road .php +http://www.cvlibs.net/datasets/kitti/eval_road.php [GBVdW+10] J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D. Bagdanov, J. Serrat, and J. Gonzalez, “Harmony potentials for joint classification and segmentation,” in @@ -1196,23 +1251,23 @@ Computer Vision and Pattern Recognition (CVPR), [GRC+08] S. Gould, J. Rodgers, D. Cohen, G. Elidan, and D. Koller, “Multi-class segmentation with relative location prior,” International Journal of Computer -Vision , vol. 80, no. 3, pp. 300–316, Apr. 2008. +Vision, vol. 80, no. 3, pp. 300–316, Apr. 2008. [GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.Z. Yang, “Probabilistic tracking of affine-invariant anisotropic regions,” Pattern Analysis and Machine Intelligence, IEEE Transactions on , vol. 35, no. 1, pp. 130–143, 2013. -[Har75] J. A. Hartigan, Clustering algorithms . John Wiley +[Har75] J. A. Hartigan, Clustering algorithms. John Wiley & Sons, Inc., 1975. [HDT02] C. Huang, L. Davis, and J. Townshend, “An assessment of support vector machines for land -cover classification,” International Journal of remote -sensing , vol. 23, no. 4, pp. 725–749, 2002. +cover classification,”International Journal of remote +sensing, vol. 23, no. 4, pp. 725–749, 2002. [HHR01] S. Hu, E. Hoffman, and J. Reinhardt, “Automatic lung segmentation for accurate quantitation of -volumetric x-ray ct images,” Medical Imaging, IEEE +volumetric x-ray ct images,”Medical Imaging, IEEE -Transactions on , vol. 20, no. 6, pp. 490–498, Jun. +Transactions on, vol. 20, no. 6, pp. 490–498, Jun. 2001. [HJBJ+96] A. Hoover, G. Jean-Baptiste, X. Jiang, P. J. Flynn, H. Bunke, D. B. Goldgof, K. Bowyer, @@ -1221,105 +1276,106 @@ Fisher, “An experimental comparison of range image segmentation algorithms,” Pattern Analysis and Machine Intelligence, IEEE Transactions on, vol. 18, no. 7, pp. 673–689, Jul. 1996. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=506791 +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=506791 [Ho95] T. K. Ho, “Random decision forests,” in Document Analysis and Recognition, 1995., Proceedings of the Third International Conference on, vol. 1. IEEE, 1995, pp. 278–282. -[Online]. Available: http://ect .bell-labs .com/who/ -tkh/publications/papers/odt .pdf +[Online]. Available: http://ect .bell-labs.com/who/ +tkh/publications/papers/odt.pdf [Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia Commons, Nov. 2007. [Online]. Available: - https://commons .wikimedia .org/wiki/File: -CCTV_Lens_flare .jpg + https://commons .wikimedia.org/wiki/File: +CCTV_Lens_flare.jpg [HZCP04] X. He, R. Zemel, and M. Carreira-Perpindn, “Multiscale conditional random fields for image labeling,” in Computer Vision and Pattern Recognition, 2004. CVPR 2004. Proceedings of the 2004 IEEE Computer Society Conference on, vol. 2, Jun. 2004, pp. II–695–II–702 V ol.2. -[Online]. Available: http://ieeexplore .ieee .org/xpl/ -login .jsp?tp=&arnumber=1315232 +[Online]. Available: http://ieeexplore .ieee.org/xpl/ +login.jsp?tp=&arnumber=1315232 [JLD03] K. Jiang, Q.-M. Liao, and S.-Y . Dai, “A novel white blood cell segmentation scheme using scale-space filtering and watershed clustering,” in Machine Learning and Cybernetics, 2003 International -Conference on , vol. 5, Nov 2003, pp. 2820–2825 -V ol.5. [Online]. Available: http://ieeexplore .ieee .org/ -xpl/login .jsp?tp=&arnumber=1260033 +Conference on, vol. 5, Nov 2003, pp. 2820–2825 +V ol.5. [Online]. Available: http://ieeexplore.ieee.org/ +xpl/login.jsp?tp=&arnumber=1260033 [Kaf07] L. Kaffer, “File:great male leopard in south afrikajd.jpg,” Wikipedia Commons, Jul. 2007. [Online]. -Available: https://commons .wikimedia .org/wiki/File: -Great_male_Leopard_in_South_Afrika-JD .JPG +Available: https://commons.wikimedia.org/wiki/File: +Great_male_Leopard_in_South_Afrika-JD.JPG [KKV+14] V . Kalesnykiene, J.-k. Kamarainen, R. V outilainen, J. Pietilä, H. Kälviäinen, and H. Uusitalo, “Diaretdb1 diabetic retinopathy database and evaluation protocol,” 2014. [Online]. Available: -http://www2 .it.lut.fi/project/imageret/diaretdb1/ +http://www2.it.lut.fi/project/imageret/diaretdb1/ [KP92] J. M. Kasson and W. Plouffe, “An analysis of selected computer interchange color spaces,” ACM -Transactions on Graphics (TOG) , vol. 11, no. 4, pp. +Transactions on Graphics (TOG), vol. 11, no. 4, pp. 373–405, 1992. [KP06] Z. Kato and T.-C. Pong, “A markov random field image segmentation model for color textured images,” Image and Vision Computing , vol. 24, no. 10, pp. 1103–1114, 2006. [Online]. -Available: http://www .sciencedirect .com/science/ +Available: http://www .sciencedirect.com/science/ article/pii/S0262885606001223 [KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, “Imagenet classification with deep convolutional neural networks,” in Advances in neural information -processing systems , 2012, pp. 1097–1105. +processing systems, 2012, pp. 1097–1105. [KWT88] M. Kass, A. Witkin, and D. Terzopoulos, “Snakes: Active contour models,” International journal of computer vision , vol. 1, no. 4, pp. 321–331, Jan. 1988. [Online]. Available: http: -//link .springer .com/article/10 .1007/BF00133570 +//link.springer.com/article/10.1007/BF00133570 [LKJ15] F.-F. Li, A. Karpathy, and J. Johnson, “CS231n: Convolutional neural networks for visual recognition,” 2015. [Online]. Available: -http://cs231n .stanford .edu/ -[Low04] D. Lowe, “Distinctive image features from scale-invariant keypoints,” International Journal of -Computer Vision , vol. 60, no. 2, pp. 91–110, 2004. -[Online]. Available: http://dx .doi .org/10 .1023/B% -3A VISI .0000029664 .99615 .94 +http://cs231n.stanford.edu/ +[Low04] D. Lowe, “Distinctive image features from scaleinvariant + keypoints,” International Journal of +Computer Vision, vol. 60, no. 2, pp. 91–110, 2004. +[Online]. Available: http://dx .doi.org/10.1023/B% +3A VISI.0000029664.99615.94 [LRAL08] A. Levin, A. Rav-Acha, and D. Lischinski, “Spectral matting,” Pattern Analysis and Machine Intelligence, IEEE Transactions on , vol. 30, no. 10, pp. 1699–1712, 2008. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=4547428 +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4547428 [LRKT09] L. Ladický, C. Russell, P. Kohli, and P. Torr, “Associative hierarchical crfs for object class image segmentation,” in Computer Vision, 2009 IEEE 12th International Conference on , 2009, pp. 739–746. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=5459248 +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=5459248 [LSD14] J. Long, E. Shelhamer, and T. Darrell, “Fully convolutional networks for semantic segmentation,” arXiv preprint arXiv:1411.4038 , 2014. [Online]. -Available: http://arxiv .org/abs/1411 .4038 +Available: http://arxiv.org/abs/1411.4038 [MAFM08] M. Maire, P. Arbelaez, C. Fowlkes, and J. Malik, “Using contours to detect and localize junctions in natural images,” in Computer Vision and Pattern Recognition, 2008. CVPR 2008. IEEE Conference on , June 2008, pp. 1–8. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=4587420 +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4587420 [Man12] M. Manske, “File:randabschattung mikroskop kamera 6.jpg,” Wikipedia Commons, Dec. 2012. [Online]. Available: - https://commons .wikimedia .org/wiki/File: -Randabschattung_Mikroskop_Kamera_6 .JPG + https://commons .wikimedia.org/wiki/File: +Randabschattung_Mikroskop_Kamera_6.JPG [MBLAGJ+07] S. Maldonado-Bascon, S. Lafuente-Arroyo, P. GilJimenez, H. Gomez-Moreno, and F. LopezFerreras, “Road-sign detection and recognition based on support vector machines,” Intelligent Transportation Systems, IEEE Transactions on , vol. 8, no. 2, pp. 264–278, Jun. 2007. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=4220659 +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4220659 [MBVLG02] N. Moon, E. Bullitt, K. Van Leemput, and G. Gerig, “Automatic brain and tumor segmentation,” in Medical Image Computing and Computer-Assisted Intervention—MICCAI @@ -1331,9 +1387,9 @@ images and its application to evaluating segmentation algorithms and measuring ecological statistics,” in Computer Vision, 2001. ICCV 2001. Proceedings. Eighth IEEE International -Conference on , vol. 2. IEEE, 2001, pp. 416–423. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=937655 +Conference on, vol. 2. IEEE, 2001, pp. 416–423. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=937655 [MHMK+14] L. Maier-Hein, S. Mersmann, D. Kondermann, S. Bodenstedt, A. Sanchez, C. Stock, H. G. Kenngott, M. Eisenmann, and S. Speidel, “Can @@ -1341,11 +1397,11 @@ masses of non-experts train highly accurate image classifiers?” in Medical Image Computing and Computer-Assisted Intervention–MICCAI 2014 . Springer, 2014, pp. 438–445. [Online]. Available: -http://opencas .webarchiv .kit.edu/?q=node/26 +http://opencas.webarchiv.kit.edu/?q=node/26 [Min89] J. Mingers, “An empirical comparison of selection measures for decision-tree induction,” Machine -Learning , vol. 3, no. 4, pp. 319–342, 1989. -[Online]. Available: http://dx .doi .org/10 .1023/A% +Learning, vol. 3, no. 4, pp. 319–342, 1989. +[Online]. Available: http://dx.doi.org/10.1023/A% 3A1022645801436 [MSB12] G. Moser, S. B. Serpico, and J. A. Benediktsson, “Markov random field models for supervised land @@ -1353,22 +1409,22 @@ Learning , vol. 3, no. 4, pp. 319–342, 1989. cover classification from very high resolution multispectral remote sensing images,” in Advances in Radar and Remote Sensing (TyWRRS), 2012 -Tyrrhenian Workshop on . IEEE, 2012, pp. 235– -242. [Online]. Available: http://ieeexplore .ieee .org/ -xpl/login .jsp?tp=&arnumber=6381135 +Tyrrhenian Workshop on. IEEE, 2012, pp. 235– +242. [Online]. Available: http://ieeexplore .ieee.org/ +xpl/login.jsp?tp=&arnumber=6381135 [MSC] “Object class recognition image database.” -[Online]. Available: http://research .microsoft .com/ +[Online]. Available: http://research.microsoft.com/ vision/cambridge/recognition/ [MSR] “Image understanding - research data,” Microsoft Research. [Online]. Available: - http://research .microsoft .com/en-us/projects/ + http://research.microsoft.com/en-us/projects/ objectclassrecognition/ [Mur12] K. P. Murphy, Machine learning: a probabilistic -perspective . MIT press, 2012. +perspective. MIT press, 2012. [OKS78] Y .-i. Ohta, T. Kanade, and T. Sakai, “An analysis system for scenes containing objects with substructures,” in Proceedings of the Fourth International -Joint Conference on Pattern Recognitions , 1978, pp. +Joint Conference on Pattern Recognitions, 1978, pp. 752–754. [PAA+87] S. M. Pizer, E. P. Amburn, J. D. Austin, R. Cromartie, A. Geselowitz, T. Greer, B. ter @@ -1376,168 +1432,169 @@ Haar Romeny, J. B. Zimmerman, and K. Zuiderveld, “Adaptive histogram equalization and its variations,” Computer vision, graphics, and image processing , vol. 39, no. 3, pp. 355–368, 1987. [Online]. -Available: http://www .sciencedirect .com/science/ +Available: http://www .sciencedirect.com/science/ article/pii/S0734189X8780186X [PC13] P. H. Pinheiro and R. Collobert, “Recurrent convolutional neural networks for scene parsing,” arXiv preprint arXiv:1306.2795 , 2013. [Online]. -Available: http://arxiv .org/abs/1306 .2795v1 +Available: http://arxiv.org/abs/1306.2795v1 [PH05] C. Pantofaru and M. Hebert, “A comparison of image segmentation algorithms,” Robotics Institute , p. 336, 2005. [Online]. -Available: http://riweb-backend .ri.cmu .edu/ -pub_files/pub4/pantofaru _caroline _2005 _1/ -pantofaru_caroline_2005_1 .pdf +Available: http://riweb-backend .ri.cmu.edu/ +pub_files/pub4/pantofaru_caroline_2005_1/ +pantofaru_caroline_2005_1.pdf [PS07] A. Protiere and G. Sapiro, “Interactive image segmentation via adaptive weighted distances,” Image Processing, IEEE Transactions on, vol. 16, no. 4, pp. 1046–1057, 2007. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=4130436 +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4130436 [PTN09] N. Plath, M. Toussaint, and S. Nakajima, “Multiclass image segmentation using conditional random fields and global classification,” in Proceedings of the 26th Annual International Conference on -Machine Learning . ACM, 2009, pp. 817–824. +Machine Learning. ACM, 2009, pp. 817–824. [PXP00] D. L. Pham, C. Xu, and J. L. Prince, “A survey of current methods in medical image segmentation,” Annual Review of Biomedical -Engineering , vol. 2, no. 1, pp. 315–337, 2000, +Engineering, vol. 2, no. 1, pp. 315–337, 2000, pMID: 11701515. [Online]. Available: http:// -dx.doi .org/10 .1146/annurev .bioeng .2.1.315 +dx.doi.org/10.1146/annurev.bioeng.2.1.315 [Qui86] J. R. Quinlan, “Induction of decision trees,” Machine learning , vol. 1, no. 1, pp. 81–106, -Aug. 1986. [Online]. Available: http://dx .doi .org/ +Aug. 1986. [Online]. Available: http://dx .doi.org/ 10.1023/A%3A1022643204877 -[Qui93] ——, C4.5: Programs for Machine Learning , P. Langley, +[Qui93] ——, C4.5: Programs for Machine Learning, P. Langley, Ed. Morgan Kaufmann Publishers, Inc., 1993. [RKB04] C. Rother, V . Kolmogorov, and A. Blake, “Grabcut: Interactive foreground extraction using iterated graph cuts,” ACM Transactions on Graphics -(TOG) , vol. 23, no. 3, pp. 309–314, 2004. [Online]. -Available: http://delivery .acm .org/10 .1145/1020000/ -1015720/p309-rother .pdf +(TOG), vol. 23, no. 3, pp. 309–314, 2004. [Online]. +Available: http://delivery.acm.org/10.1145/1020000/ +1015720/p309-rother.pdf [RM00] J. B. Roerdink and A. Meijster, “The watershed -transform: Definitions, algorithms and paralleliza-tion strategies,” Fundam. Inform. , vol. 41, no. 1-2, +transform: Definitions, algorithms and parallelization + strategies,” Fundam. Inform., vol. 41, no. 1-2, pp. 187–228, 2000. [RM07] J. Reynolds and K. Murphy, “Figure-ground segmentation using a hierarchical conditional random field,” in Computer and Robot Vision, 2007. CRV ’07. Fourth Canadian Conference on , May 2007, pp. 175–182. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=4228537 +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4228537 [RMBK06] C. Rother, T. Minka, A. Blake, and V . Kolmogorov, “Cosegmentation of image pairs by histogram matching - incorporating a global constraint into mrfs,” in Computer Vision and Pattern Recognition, 2006 IEEE Computer Society Conference on , vol. 1, June 2006, pp. 993– -1000. [Online]. Available: http://ieeexplore .ieee .org/ -xpls/abs_all .jsp?arnumber=1640859 +1000. [Online]. Available: http://ieeexplore.ieee.org/ +xpls/abs_all.jsp?arnumber=1640859 [SAN+04] J. Staal, M. D. Abràmoff, M. Niemeijer, -M. Viergever, B. Van Ginneken et al. , “Ridge-based +M. Viergever, B. Van Ginneken et al., “Ridge-based vessel segmentation in color images of the retina,” Medical Imaging, IEEE Transactions on , vol. 23, no. 4, pp. 501–509, 2004. [Online]. Available: -http://www .isi.uu.nl/Research/Databases/DRIVE/ +http://www.isi.uu.nl/Research/Databases/DRIVE/ [SCZ08] F. Schroff, A. Criminisi, and A. Zisserman, “Object class segmentation using random -forests.” in BMVC , 2008, pp. 1–10. [Online]. - Available: http://research .microsoft .com/pubs/ -72423/Criminisi_bmvc2008 .pdf +forests.” in BMVC, 2008, pp. 1–10. [Online]. + Available: http://research.microsoft.com/pubs/ +72423/Criminisi_bmvc2008.pdf [SJC08] J. Shotton, M. Johnson, and R. Cipolla, “Semantic texton forests for image categorization and segmentation,” in Computer vision and pattern recognition, 2008. CVPR 2008. IEEE Conference on . IEEE, Jun. 2008, pp. 1–8. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=4587503 +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4587503 [SM11] C. Sutton and A. McCallum, “An introduction -to conditional random fields,” Machine Learning , +to conditional random fields,” Machine Learning, vol. 4, no. 4, pp. 267–373, 2011. [Online]. -Available: http://homepages .inf .ed.ac.uk/csutton/ -publications/crftutv2 .pdf +Available: http://homepages .inf.ed.ac.uk/csutton/ +publications/crftutv2.pdf [Smi02] L. I. Smith, “A tutorial on principal components -analysis,” Cornell University, USA , vol. 51, p. 52, +analysis,” Cornell University, USA, vol. 51, p. 52, 2002. [Smi04] B. T. Smith, “Lagrange multipliers tutorial in the context of support vector machines,” Memorial University of Newfoundland St. John’s, Newfoundland, -Canada , Jun. 2004. +Canada, Jun. 2004. [SSA12] D. Schiebener, J. Schill, and T. Asfour, “Discovery, segmentation and reactive grasping of unknown -objects.” in Humanoids , 2012, pp. 71–77. [Online]. - Available: http://h2t .anthropomatik .kit.edu/ -pdf/Schiebener2012 .pdf +objects.” in Humanoids, 2012, pp. 71–77. [Online]. + Available: http://h2t .anthropomatik.kit.edu/ +pdf/Schiebener2012.pdf [SUM+11] D. Schiebener, A. Ude, J. Morimotot, T. Asfour, and R. Dillmann, “Segmentation and learning of unknown objects through physical interaction,” in Humanoid Robots (Humanoids), 2011 11th IEEE-RAS International Conference on. IEEE, 2011, pp. 500–506. [Online]. -Available: http://ieeexplore .ieee .org/ielx5/6086637/ -6100798/06100843 .pdf +Available: http://ieeexplore.ieee.org/ielx5/6086637/ +6100798/06100843.pdf [SWRC06] J. Shotton, J. Winn, C. Rother, and A. Criminisi, “Textonboost: Joint appearance, shape and context modeling for multi-class object recognition and segmentation,” in Computer Vision–ECCV 2006 . Springer, 2006, pp. 1–15. [Online]. Available: http: -//link .springer .com/chapter/10 .1007/11744023_1 +//link.springer.com/chapter/10.1007/11744023_1 [TNL14] J. Tighe, M. Niethammer, and S. Lazebnik, “Scene parsing with object instances and occlusion ordering,” in Computer Vision and Pattern Recognition (CVPR), 2014 IEEE Conference on . IEEE, 2014, pp. 3748–3755. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=6909874 +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=6909874 [UPH05] R. Unnikrishnan, C. Pantofaru, and M. Hebert, “A measure for objective evaluation of image segmentation algorithms,” in Computer Vision and Pattern Recognition-Workshops, 2005. CVPR Workshops. IEEE Computer Society Conference on . IEEE, 2005, pp. 34–34. -[Online]. Available: http://repository .cmu .edu/cgi/ -viewcontent .cgi?article=1365&context=robotics +[Online]. Available: http://repository.cmu.edu/cgi/ +viewcontent.cgi?article=1365&context=robotics [vdMPvdH09] L. J. van der Maaten, E. O. Postma, and H. J. van den Herik, “Dimensionality reduction: A comparative review,” Journal of Machine Learning -Research , vol. 10, no. 1-41, pp. 66–71, 2009. +Research, vol. 10, no. 1-41, pp. 66–71, 2009. [VOC10] “V oc2010 preliminary results,” 2010. [Online]. -Available: http://host .robots .ox.ac.uk/pascal/VOC/ -voc2010/results/index .html -[WAH97] G.-Q. Wei, K. Arbter, and G. Hirzinger, “Automatic +Available: http://host.robots.ox.ac.uk/pascal/VOC/ +voc2010/results/index.html +[W AH97] G.-Q. Wei, K. Arbter, and G. Hirzinger, “Automatic tracking of laparoscopic instruments by color -coding,” in CVRMed-MRCAS’97 , ser. Lecture +coding,” in CVRMed-MRCAS’97, ser. Lecture Notes in Computer Science, J. Troccaz, E. Grimson, and R. Mösges, Eds. Springer Berlin Heidelberg, 1997, vol. 1205, pp. 357–366. [Online]. Available: -http://dx .doi .org/10 .1007/BFb0029257 +http://dx.doi.org/10.1007/BFb0029257 [YBCK10] Z. Yin, R. Bise, M. Chen, and T. Kanade, “Cell segmentation in microscopy imagery using a bag of local bayesian classifiers,” in Biomedical Imaging: From Nano to Macro, 2010 IEEE International Symposium on , Apr. 2010, pp. 125– -128. [Online]. Available: http://ieeexplore .ieee .org/ -xpls/abs_all .jsp?arnumber=5490399 +128. [Online]. Available: http://ieeexplore .ieee.org/ +xpls/abs_all.jsp?arnumber=5490399 [YHRF12] Y . Yang, S. Hallman, D. Ramanan, and C. C. Fowlkes, “Layered object models for image segmentation,” Pattern Analysis and Machine Intelligence, IEEE Transactions on , vol. 34, no. 9, pp. 1731–1743, Sep. 2012. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=6042883 +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=6042883 [ZBS01] Y . Zhang, M. Brady, and S. Smith, “Segmentation of brain MR images through a hidden Markov random field model and the expectationmaximization algorithm,” Medical Imaging, IEEE -Transactions on , vol. 20, no. 1, pp. 45–57, 2001. -[Online]. Available: http://ieeexplore .ieee .org/xpls/ -abs_all .jsp?arnumber=906424 +Transactions on, vol. 20, no. 1, pp. 45–57, 2001. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=906424 [ZGWX05] S.-C. Zhu, C.-E. Guo, Y . Wang, and Z. Xu, “What are textons?” International Journal of Computer -Vision , vol. 62, no. 1-2, pp. 121–143, 2005. +Vision, vol. 62, no. 1-2, pp. 121–143, 2005. [Zha12] Z. Zhang, “Microsoft kinect sensor and its effect,” MultiMedia, IEEE , vol. 19, no. 2, pp. 4–10, Feb. 2012. @@ -1546,9 +1603,10 @@ V . Vineet, Z. Su, D. Du, C. Huang, and P. H. Torr, “Conditional random fields as recurrent neural networks,” in Proceedings of the IEEE International Conference on -Computer Vision , 2015, pp. 1529–1537. [Online]. -Available: http://www .robots .ox.ac.uk/~szheng/ -papers/CRFasRNN .pdfGLOSSARY +Computer Vision, 2015, pp. 1529–1537. [Online]. +Available: http://www .robots.ox.ac.uk/~szheng/ +papers/CRFasRNN.pdf +GLOSSARY ACM active contour model. 6 BOV bag-of-visual-words. 5 CNN Convolution Neuronal Network. 5, 9 @@ -1567,17 +1625,20 @@ SVM Support Vector Machine. 4, 6–8 APPENDIX A TABLES -Database Image Resolution (width ×height)Number +Database Image Resolution (width × height) +Number of -ImagesNumber +Images +Number of -ClassesChannels Data source -Colon Crypt DB (302 px−1116 px)×(349 px−875px) 389 2 3 [CRSS] -DIARETDB1 1500 px×1500 px 89 4 3 [KKV+14] -KITTI Road (1226 px−1242 px)×(370 px−376px) 289 2 3 [FKG13] -MSRCv1 (213 px−320px)×(213 px−320px) 240 9 3 [MSR] -MSRCv2 (213 px−320px)×(162 px−320px) 591 23 3 [MSR] -Open-CAS Endoscopic Datasets 640px×480px 120 2 3 [MHMK+14] -PASCAL VOC 2012 (142 px−500px)×( 71 px−500px) 2913 20 3 [EVGW+12] -Warwick-QU (567 px−775px)×(430 px−522px) 165 5 3 [CSM09] +Classes +Channels Data source +Colon Crypt DB (302 px − 1116 px) × (349 px − 875 px) 389 2 3 [CRSS] +DIARETDB1 1500 px × 1500 px 89 4 3 [KKV +14] +KITTI Road (1226 px − 1242 px) × (370 px − 376 px) 289 2 3 [FKG13] +MSRCv1 (213 px − 320 px) × (213 px − 320 px) 240 9 3 [MSR] +MSRCv2 (213 px − 320 px) × (162 px − 320 px) 591 23 3 [MSR] +Open-CAS Endoscopic Datasets 640 px × 480 px 120 2 3 [MHMK +14] +PASCAL VOC 2012 (142 px − 500 px) × ( 71 px − 500 px) 2913 20 3 [EVGW +12] +Warwick-QU (567 px − 775 px) × (430 px − 522 px) 165 5 3 [CSM09] Table I: An overview over publicly available image databases with a semantic segmentation ground trouth. \ No newline at end of file diff --git a/read/results/pypdf/1707.09725.txt b/read/results/pypdf/1707.09725.txt index cef8e8c..edaac58 100644 --- a/read/results/pypdf/1707.09725.txt +++ b/read/results/pypdf/1707.09725.txt @@ -11,7 +11,8 @@ Reviewer: Prof. Dr.–Ing. R. Dillmann Second reviewer: Prof. Dr.–Ing. J. M. Zöllner Advisor: Dipl.–Inform. Michael Weber Research Period: 03. May 2017 – 03. August 2017 -KIT – University of the State of Baden-Wuerttemberg and National Research Center of the Helmholtz Association www.kit.eduarXiv:1707.09725v1 [cs.CV] 31 Jul 2017 +KIT – University of the State of Baden-Wuerttemberg and National Research Center of the Helmholtz Association www.kit.edu +arXiv:1707.09725v1 [cs.CV] 31 Jul 2017 Analysis and Optimization of Convolutional Neural Network Architectures @@ -36,7 +37,7 @@ August 2017 Abstract Convolutional Neural Networks (CNNs) dominate various computer vision tasks since Alex Krizhevsky showed that they can be trained effectively and reduced the top-5 error -from 26.2 %to15.3 %on the ImageNet large scale visual recognition challenge. Many +from 26.2 % to 15.3 % on the ImageNet large scale visual recognition challenge. Many aspects of CNNs are examined in various publications, but literature about the analysis and construction of neural network architectures is rare. This work is one step to close this gap. A comprehensive overview over existing techniques for CNN analysis and topology @@ -46,7 +47,7 @@ evaluated. Additionally, some results are confirmed and quantified for CIFAR-100 example, the positive impact of smaller batch sizes, averaging ensembles, data augmentation and test-time transformations on the accuracy. Other results, such as the positive impact of learned color transformation on the test accuracy could not be confirmed. A model which -has only one million learned parameters for an input size of 32×32×3and 100 classes and +has only one million learned parameters for an input size of32 ×32 ×3 and 100 classes and which beats the state of the art on the benchmark dataset Asirra, GTSRB, HASYv2 and STL-10 was developed. @@ -54,7 +55,7 @@ Zusammenfassung Modelle welche auf Convolutional Neural Networks (CNNs) basieren sind in verschiedenen Aufgaben der Computer Vision dominant seit Alex Krizhevsky gezeigt hat dass diese effektiv trainiert werden können und er den Top-5 Fehler in dem ImageNet large scale visual -recognition challenge Benchmark von 26.2 %auf15.3 %drücken konnte. Viele Aspekte +recognition challenge Benchmark von26.2 % auf 15.3 % drücken konnte. Viele Aspekte von CNNs wurden in verschiedenen Publikationen untersucht, aber es wurden vergleichsweise wenige Arbeiten über die Analyse und die Konstruktion von Neuronalen Netzen geschrieben. Diese Masterarbeit stellt einen Schritt dar um diese Lücke zu schließen. Eine @@ -184,20 +185,20 @@ low-level information given by raw pixels from digital images. Robots, search engines, self-driving cars, surveillance agencies and many others have applications which include one of the following six problems in computer vision as subproblems: -•Classification :1The algorithm is given an image and kpossible classes. The task is -to decide which of the kclasses the image belongs to. For example, an image from -a self-driving cars on-board camera contains either paved road ,unpaved road or +• Classification:1 The algorithm is given an image andk possible classes. The task is +to decide which of thek classes the image belongs to. For example, an image from +a self-driving cars on-board camera contains eitherpaved road, unpaved road or no road: Which of those given three classes is in the image? -•Localization : The algorithm is given an image and one class k. The task is to find -bounding boxes for all instances of k. -•Detection : Given an image and kclasses, find bounding boxes for all instances of +• Localization: The algorithm is given an image and one classk. The task is to find +bounding boxes for all instances ofk. +• Detection: Given an image andk classes, find bounding boxes for all instances of those classes. -•Semantic Segmentation : Given an image and kclasses, classify each pixel. -•Instance segmentation : Given an image and kclasses, classify each pixel as one of -thekclasses, but distinguish different instances of the classes. -•Content-based Image Retrieval : Given an image xandnimages in a database, -find the top uimages which are most similar to x. -There are many techniques to approach those problems, but since AlexNet [ KSH12] was +• Semantic Segmentation: Given an image andk classes, classify each pixel. +• Instance segmentation: Given an image andk classes, classify each pixel as one of +the k classes, but distinguish different instances of the classes. +• Content-based Image Retrieval: Given an imagex and n images in a database, +find the topu images which are most similar tox. +There are many techniques to approach those problems, but since AlexNet [KSH12] was published, all of those problems have high-quality solutions which make use of Convolutional Neural Networks (CNNs) [HZRS15a, LAE+16, RFB15, DHS16, SKP15]. Today, most neural networks are constructed by rules of thumb and gut feeling. The @@ -205,7 +206,7 @@ architectures evolved and got deeper, more hyperparameters were added. Although are methods for analyzing CNNs, those methods are not enough to determine all steps in the development of network architectures without gut feeling. A detailed introduction to CNNs as well as nine methods for analysis of CNNs is given in Chapter 2. -1Classification is also called identification if the classes are humans. Another name is object recognition , +1Classification is also calledidentification if the classes are humans. Another name isobject recognition, although the classes can be humans and animals as well. 1. Introduction @@ -232,125 +233,139 @@ This chapter introduces linear image filters in Section 2.1, then standard layer CNNs are explained in Section 2.2. The layer block pattern is described in Section 2.3, transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5. 2.1. Linear Image Filters -Alinear image filter (also called a filter bank or akernel) is an element F∈Rkw×kh×d, -wherekwrepresents the filter’s width, khthe filter’s height and dthe number of input -channels. The filter Fis convolved with the image I∈Rw×h×dto produce a new image I′. -The output image I′has only one channel. Each pixel I′(x,y)of the output image gets +A linear image filter(also called afilter bank or akernel) is an elementF ∈Rkw×kh×d, +where kw represents the filter’s width,kh the filter’s height andd the number of input +channels. The filterF is convolved with the imageI ∈Rw×h×d to produce a new imageI′. +The output imageI′ has only one channel. Each pixelI′(x,y) of the output image gets calculated by point-wise multiplication of one filter element with one element of the original -imageI: -I′(x,y) =⌊kw -2⌋∑ +image I: +I′(x,y) = +⌊kw +2 ⌋∑ ix=1−⌈kw -2⌉⌊kh -2⌋∑ +2 ⌉ +⌊kh +2 ⌋∑ iy=1−⌈kh -2⌉d∑ -ic=1I(x+ix,y+iy,ic)·F(ix,iy,ic) -This procedure is explained by Figure 2.1. It is essentially a discrete convolution.I∈R7×7 -Filter kernel -F∈R3×3Result of point-wise -multiplicationI′∈R7×7 -104116116112584747 +2 ⌉ +d∑ +ic=1 +I(x+ ix,y + iy,ic) ·F(ix,iy,ic) +This procedure is explained by Figure 2.1. It is essentially a discrete convolution. +I∈R7×7 +Filter kernelF∈R3×3 +Result of point-wisemultiplication +I′∈R7×7 +10411611611258 47 47 1099711411610511045 -1161041111099746100 +11610411110997 46100 1014710997115116101 -1144799971169997 -116999711646112104 -11263118614946489-3-1 --653 -2-80936-333-109 +11447 99 9711699 97 +11699 9711646112104 +1126311861 49 46 48 +9 -3 -1 +-6 5 3 +2 -8 0 +936-333-109 -282545291 -94-7920-4-254-498-662-849-642187 +94-7920 +-4-254-498-662-849-642187 -52045240211388215-861 -340559-105185-138-180503 -718429350173251268-655 -567-53-7580571-12824 -408596-55036826976156 30264787922381154660 -Figure 2.1.: Visualization of the application of a linear k×k×1image filter. For each pixel of the -output image, k2multiplications and k2additions of the products have to be calculated. +Figure 2.1.:Visualization of the application of a lineark×k×1 image filter. For each pixel of the +output image,k2 multiplications andk2 additions of the products have to be calculated. 2. Convolutional Neural Networks One important detail is how boundaries are treated. There are four common ways of boundary treatment: -•don’t compute : The image I′will be smaller than the original image. I′∈ -R(w−kw+1)×(h−kh+1)×d3, to be exact. -•zero padding : The image Iis padded by zeros where the filter would access elements +• don’t compute: The image I′ will be smaller than the original image. I′ ∈ +R(w−kw+1)×(h−kh+1)×d3 , to be exact. +• zero padding: The imageI is padded by zeros where the filter would access elements which do not exist. This will result in edges being detected at the border if the border pixels are not black, but doesn’t need any computation. -•nearest: Repeat the pixel which is closest to the boundary. -•reflect: Reflect the image at the boundaries. +• nearest: Repeat the pixel which is closest to the boundary. +• reflect: Reflect the image at the boundaries. Common tasks that can be done with linear filters include edge detection, corner detection, smoothing, sharpening, median filtering, box filtering. See Figure A.1 for five examples. Please note that the result of a filtering operation is again an image. This means filters -can be applied successively. While each pixel after one filtering operation with a 3×3 -filter got influenced by 3·3 = 9pixels of the original image, two successively applied 3×3 +can be applied successively. While each pixel after one filtering operation with a3 ×3 +filter got influenced by3 ·3 = 9 pixels of the original image, two successively applied3 ×3 filters increase the area of the original image which influenced the output. The output is -then influenced by 25 pixel. This is called the receptive field . The kind of pattern which is -detected by a filter is called a feature. The bigger the receptive field is, the more complex +then influenced by 25 pixel. This is called thereceptive field. The kind of pattern which is +detected by a filter is called afeature. The bigger the receptive field is, the more complex can features get as they are able to consider more of the original image. Instead of taking -one5×5filter with 25 parameters, one might consider to take two successive 3×3filters -with 2·(3·3) = 18parameters. The 5×5filter is a strict superset of possible filtering -operations compared to the two 3×3filters, but the relevance of this technique will become +one 5 ×5 filter with 25 parameters, one might consider to take two successive3 ×3 filters +with 2 ·(3 ·3) = 18 parameters. The 5 ×5 filter is a strict superset of possible filtering +operations compared to the two3 ×3 filters, but the relevance of this technique will become clear in Section 2.2. 2.2. CNN Layer Types -While the idea behind deep MLPs is that feature hierarchies capture the important parts -of the input more easily, CNNs are inspired by the idea of translational invariance : Many +While the idea behind deep MLPs is thatfeature hierarchiescapture the important parts +of the input more easily, CNNs are inspired by the idea oftranslational invariance: Many features in an image are translationally invariant. For example, if a car is developed, one -could try to detect it by its parts [ FGMR10 ]. But then there are many positions at which +could try to detect it by its parts [FGMR10]. But then there are many positions at which the wheels could be. Combining those, it is desirable to capture low-level, translationally invariant features at lower layers of an artificial neural network (ANN) and in higher layers high-level features which are combinations of the low-level features. Also, models should utilize the fact that the pixels of images are ordered. One way to use -this is by learning image filters in so called convolutional layers . -While MLPs vectorize the input, the input of a layer in a CNN are feature maps . A feature -map is a matrix m∈Rw×h, but typically the width equals the height ( w=h). For an RGB +this is by learning image filters in so calledconvolutional layers. +While MLPs vectorize the input, the input of a layer in a CNN arefeature maps. A feature +map is a matrixm∈Rw×h, but typically the width equals the height (w= h). For an RGB 2.2. CNN Layer Types -input image, the number of feature maps is d= 3. Each color channel is a feature map. -Since AlexNet [ KSH12] almost halved the error in the ImageNet challenge, CNNs are +input image, the number of feature maps isd= 3. Each color channel is a feature map. +Since AlexNet [KSH12] almost halved the error in the ImageNet challenge, CNNs are state-of-the-art in various computer vision tasks. Traditional CNNs have three important building tools: -•Convolutional layers with a non-linear activation function as described in Section 2.2.1, -•pooling layers as described in Section 2.2.2 and -•normalization layers as described in Section 2.2.4. +• Convolutional layers with a non-linear activation function as described in Section 2.2.1, +• pooling layers as described in Section 2.2.2 and +• normalization layers as described in Section 2.2.4. 2.2.1. Convolutional Layers -Convolutional layers take several feature maps as input and produce nfeature maps1as -output, where nis the number of filters in the convolution layer. The filter weights of +Convolutional layers take several feature maps as input and producen feature maps1 as +output, where n is the number of filters in the convolution layer. The filter weights of the linear convolutions are the parameters which are adapted to the training data. The -numbernof filters as well as the filter’s size kw×khare hyperparameters of convolutional -layers. Sometimes, it is denoted as n@kw×kh. Although the filter depth is usually omitted -in the notation, the filters are of dimension kw×kh×d(i−1), whered(i−1)is the number of -feature maps of the input layer (i−1). -Another hyperparameter of convolution layers is the stride s∈N≥1and the padding. -Padding (usually zero-padding [ SCL12,SEZ+13,HZRS15a ]) is used to make sure that the +number n of filters as well as the filter’s sizekw ×kh are hyperparameters of convolutional +layers. Sometimes, it is denoted asn@kw×kh. Although the filter depth is usually omitted +in the notation, the filters are of dimensionkw ×kh ×d(i−1), whered(i−1) is the number of +feature maps of the input layer(i−1). +Another hyperparameter of convolution layers is the strides ∈N≥1 and the padding. +Padding (usually zero-padding [SCL12, SEZ+13, HZRS15a]) is used to make sure that the size of the feature maps doesn’t change. The hyperparameters of convolutional layers are -•the number of filters n∈N≥1, -•kw,kh∈N≥1of the filter size kw×kh×d(i−1), -•the activation function of the layer (see Table B.3) and -•the strides∈N≥1 -Typical choices are n∈{32,64,128},kw=kh=k∈{1,3,5,11}such as in [ KSH12, -SZ14, SLJ+15], rectified linear unit (ReLU) activation and s= 1. +• the number of filtersn∈N≥1, +• kw,kh ∈N≥1 of the filter sizekw ×kh ×d(i−1), +• the activation function of the layer (see Table B.3) and +• the strides∈N≥1 +Typical choices aren ∈{ 32,64,128 }, kw = kh = k ∈{ 1,3,5,11 }such as in [KSH12, +SZ14, SLJ+15], rectified linear unit (ReLU) activation ands= 1. TheconceptofweightsharingiscrucialforCNNs. Thisconceptwasintroducedin[ WHH+89]. With weight sharing, the filters can be learned with stochastic gradient descent (SGD) just like MLPs. In fact, every CNN has an equivalent MLP which computes the same function if only the flattened output is compared. -1also called activation maps orchannels +1also calledactivation maps or channels 2. Convolutional Neural Networks This is easier to see when the filtering operation is denoted formally: -o(i)(x) =b+k∑ -j=1wij·xjwithi∈{1,...,w}×{ 1,...,h}×{ 1,...,d}[2.1] -o(x,y,z )(I) =b+⌊kw -2⌋∑ +o(i)(x) = b+ +k∑ +j=1 +wij ·xj with i∈{1,...,w }×{ 1,...,h }×{ 1,...,d } [2.1] +o(x,y,z)(I) = b+ +⌊kw +2 ⌋∑ ix=1−⌈kw -2⌉⌊kh -2⌋∑ +2 ⌉ +⌊kh +2 ⌋∑ iy=1−⌈kh -2⌉d∑ -ic=1Fz(ix,iy,ic)·I(x+ix,y+iy,ic) [2.2] -with a bias b∈R,x∈{1,...,w},y∈{1,...,h}andz∈{1,...,d} +2 ⌉ +d∑ +ic=1 +Fz(ix,iy,ic) ·I(x+ ix,y + iy,ic) [2.2] +with a biasb∈R, x∈{1,...,w }, y∈{1,...,h }and z∈{1,...,d } One can see that most weights of the equivalent MLP are zero and many weights are equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters. The effect of fewer parameters is that less training data is necessary to get suitable @@ -358,112 +373,134 @@ estimations for those. This means a MLP which is able to compute the same functi CNN will likely have worse results on the same dataset, if a CNN architecture is suitable for the dataset. See Figure 2.2 for a visualization of the application of a convolutional layer. -3feature maps -(e.g. RGB)nfeature mapsnfilters of -sizek×k×3 -widthwwidthw heighth heighthneural +3 feature maps +(e.g. RGB) n feature maps +n filters of +size k× k× 3 +width +w +width +w +height h +height h +neural network -dataapply +data +apply ... ... -...... ... ... -Figure 2.2.: Application of a single convolutional layer with nfilters of size k×k×3with stride -s= 1to input data of size width ×height with three channels. +... +... +Figure 2.2.:Application of a single convolutional layer withn filters of sizek×k×3 with stride +s= 1 to input data of size width×height with three channels. 2.2. CNN Layer Types -A convolutional layer with nfilters of size kw×khand SAMEpadding after d(i−1)feature -maps of size sx×syhasn·d(i−1)·(kw·kh)parameters if no bias is used. In contrast, a fully +A convolutional layer withn filters of sizekw ×kh and SAME padding afterd(i−1) feature +maps of sizesx×sy has n·d(i−1) ·(kw·kh) parameters if no bias is used. In contrast, a fully connected layer which produces the same output size and does not use a bias would have -n·d(i−1)·(sx×sy)2parameters. This means a convolutional layer has drastically fewer +n·d(i−1) ·(sx ×sy)2 parameters. This means a convolutional layer has drastically fewer parameters. One the one hand, this means it can learn less complex decision boundaries. On the other hand, it means fewer parameters have to be learned and hence the optimization procedure needs fewer examples and the optimization objective is simpler. -It is particularly interesting to notice that even a convolutional layer of 1×1filters does -learn a linear combination of the dinput feature maps. This can be used for dimensionality -reduction, if there are fewer 1×1filters in a convolutional layer than input feature maps. +It is particularly interesting to notice that even a convolutional layer of1 ×1 filters does +learn a linear combination of thedinput feature maps. This can be used for dimensionality +reduction, if there are fewer1 ×1 filters in a convolutional layer than input feature maps. Another insight recently got important: Every fully connected layer has an equivalent -convolutional layer which has the same weights.2This way, one can use the complete +convolutional layer which has the same weights.2 This way, one can use the complete classification network as a very complex non-linear image filter which can be used for semantic segmentation. -A fully connected layer with d∈N≥1inputs and n∈N≥1nodes can be interpreted as a -convolutional layer with an input of shape 1×1×dandnfilters of size 1×1. This will -produce an output shape 1×1×n. Every single output is connected to all of the inputs. +A fully connected layer withd∈N≥1 inputs andn∈N≥1 nodes can be interpreted as a +convolutional layer with an input of shape1 ×1 ×d and n filters of size1 ×1. This will +produce an output shape1 ×1 ×n. Every single output is connected to all of the inputs. When a convolutional layer is followed by a fully connected layer, it is necessary to vectorize -to feature maps. If the 1×1convolutional filter layer is applied to the vectorized output, +to feature maps. If the1 ×1 convolutional filter layer is applied to the vectorized output, it is completely equivalent to a fully connected layer. However, the vectorization can be omitted if a convolution layer without padding and a filter size equal to the feature maps size is applied. This was used by [LSD15]. 2.2.2. Pooling Layers -Pooling summarizes a p×parea of the input feature map. Just like convolutional layers, -pooling can be used with a stride of s∈N>1. Ass≥2is the usual choice, pooling layers -are sometimes also called subsampling layers . Typically, p∈{2,3,4,5}ands= 2such as +Pooling summarizes ap×p area of the input feature map. Just like convolutional layers, +pooling can be used with a stride ofs∈N>1. As s≥2 is the usual choice, pooling layers +are sometimes also calledsubsampling layers. Typically,p∈{ 2,3,4,5 }and s= 2 such as for AlexNet [KSH12] and VGG-16 [SZ14]. -The type of summary for the set of activations Avaries between the functions listed -in Table 2.1, spatial pyramid pooling as introduced in [ HZRS14] and generalizing pooling +The type of summary for the set of activationsA varies between the functions listed +in Table 2.1, spatial pyramid pooling as introduced in [HZRS14] and generalizing pooling functions as introduced in [LGT16]. -2But convolutional layers only have equivalent fully connected layers if the output feature map is 1×1 +2But convolutional layers only have equivalent fully connected layers if the output feature map is1 × 1 2. Convolutional Neural Networks Name Definition Used by -Max pooling max{a∈A}[BPL10, KSH12] -Average / mean pooling1 -|A|∑ -a∈AaLeNet-5 [LBBH98] and [KSlB+10] -ℓ2pooling√∑ -a∈Aa2[Le13] +Max pooling max {a∈A} [BPL10, KSH12] +Average / mean pooling 1 +|A| +∑ +a∈Aa LeNet-5 [LBBH98] and [KSlB+10] +ℓ2 pooling +√∑ +a∈Aa2 [Le13] Stochastic pooling * [ZF13] -Table 2.1.: Pooling types for a set Aof activations a∈R. -(*) For stochastic pooling, each of the p×pactivation values aiin the pooling region gets -picked with probability pi=ai∑ -aj∈Aaj. This assumes the activations aiare non-negative. +Table 2.1.: Pooling types for a setA of activationsa∈R. +(*) For stochastic pooling, each of thep×pactivation valuesai in the pooling region gets +picked with probabilitypi = ai∑ +aj∈A aj +. This assumes the activationsai are non-negative. Pooling is applied for three reasons: To get local translational invariance, to get invariance against minor local changes and, most important, for data reduction to1 -s2th of the data by -using strides of s>1. +s2 th of the data by +using strides ofs> 1. See Figure 2.3 for a visualization of max pooling. -793594070090509375929643 -2×2max pooling -95999722 -Figure 2.3.: 2×2max pooling applied to a feature map of size 6×4with stride s= 2and padding. -Average pooling of p×pareas with stride scan be replaced by a convolutional layer. If -the input of the pooling layer are d(i−1)feature maps, the convolutional layer has to have -d(i−1)filters of size p×pand strides. Theith filter has the values +7 9 3 5 9 4 +0 7 0 0 9 0 +5 0 9 3 7 5 +9 2 9 6 4 3 +2 × 2 max pooling +9 5 9 +9 9 7 +2 +2 +Figure 2.3.:2 ×2 max pooling applied to a feature map of size6 ×4 with strides= 2 and padding. +Average pooling ofp×p areas with strides can be replaced by a convolutional layer. If +the input of the pooling layer ared(i−1) feature maps, the convolutional layer has to have +d(i−1) filters of sizep×p and strides. The ith filter has the values  -1 -p2...1 + +1 +p2 ... 1 p2 -......... +... ... ... 1 -p2...1 -p2 +p2 ... 1 +p2 +  -for the dimension iand the zero matrix +for the dimensioni and the zero matrix  -0...0 -......... -0...0 + +0 ... 0 +... ... ... +0 ... 0 +  -for all other dimensions i= 1,...,d(i−1). +for all other dimensionsi= 1,...,d (i−1). 2.2. CNN Layer Types 2.2.3. Dropout Dropout is a technique used to prevent overfitting and co-adaptations of neurons by setting -the output of any neuron to zero with probability p. It was introduced in [ HSK+12] and is +the output of any neuron to zero with probabilityp. It was introduced in [HSK+12] and is well-described in [SHK+14]. -A Dropout layer can be implemented as follows: For an input inof any shape s, a tensor of -the same shape D∈{0,1}sis sampled, where each element diis sampled independently +A Dropout layer can be implemented as follows: For an inputin of any shapes, a tensor of +the same shapeD∈{ 0,1 }s is sampled, where each elementdi is sampled independently from a Bernoulli distribution. The results are element-wise multiplied to calculate the -output outof the Dropout layer: -out=D⊙in with di∼B(1,p) -where⊙is the Hadamard product -(A⊙B)i,j:= (A)i,j(B)i,j -Hence every value of the input gets set to zero with a dropout probability of p. Typically, -Dropout is used with p= 0.5. Layers closer to the input usually have a lower dropout probability +output out of the Dropout layer: +out = D⊙in with di ∼B(1,p) +where ⊙is the Hadamard product +(A⊙B)i,j := (A)i,j(B)i,j +Hence every value of the input gets set to zero with a dropout probability ofp. Typically, +Dropout is used withp= 0.5. Layers closer to the input usually have a lower dropout probability than later layers. In order to keep the expected output at the same value, the output of a dropout layer is multiplied with1 -1−pwhen dropout is enabled [ Las17,tf-16b]. +1−p when dropout is enabled [Las17, tf-16b]. At inference time, dropout is disabled. Dropout is usually only applied after fully connected layers, but not after convolutional layers as it usually increases the test error as pointed out in [GG16]. @@ -476,78 +513,81 @@ in [HSL+16] dropout only complete layers. This can be done by having Residual ne which have one identity connection and one residual feature connection. Hence the residual features can be dropped out and the identity connection remains. 2.2.4. Normalization Layers -One problem when training deep neural networks is internal covariate shift : While the +One problem when training deep neural networks isinternal covariate shift: While the parameters of layers close to the output are adapted to some input produced by lower layers, those lower layers parameters are also adapted. This leads to the parameters in the upper layers being worse. A very low learning rate has to be chosen to adjust for the fact that the input features might drastically change over time. 2. Convolutional Neural Networks -One way to approach this problem is by normalizing mini-batches as described in [ IS15]. A -Batch Normalization layer with d-dimensional input x= (x(1),...,x(d))is first normalized +One way to approach this problem is by normalizing mini-batches as described in [IS15]. A +Batch Normalization layer withd-dimensional inputx= (x(1),...,x (d)) is first normalized point-wise to -ˆx(k)=x(k)−¯x(k) +ˆx(k) = x(k) −¯x(k) √ -s′[x(k)]2+ε -with ¯x(k)=1 -m∑m -i=1x(k) -ibeing the sample mean and s′[x(k)]2=1 -m∑m +s′[x(k)]2 + ε +with ¯x(k) = 1 +m +∑m +i=1 x(k) +i being the sample mean ands′[x(k)]2 = 1 +m +∑m i=1(x(k) -i−¯x(k))the -sample variance where m∈N≥1is the number of training samples per mini-batch, ε>0 -being a small constant to prevent division by zero and x(k) -iis the activation of neuron kfor -training sample i. -Additionally, for each activation x(k)two parameters γ(k),β(k)are introduced which scale +i −¯x(k)) the +sample variance wherem∈N≥1 is the number of training samples per mini-batch,ε> 0 +being a small constant to prevent division by zero andx(k) +i is the activation of neuronk for +training samplei. +Additionally, for each activationx(k) two parametersγ(k),β(k) are introduced which scale and shift the feature: -y(k)=γ(k)·ˆx(k)+β(k) +y(k) = γ(k) ·ˆx(k) + β(k) In the case of fully connected layers, this is applied to the activation, before the non-linearity is applied. If it is applied after the activation, it harms the training in early stages. For -convolution, only one γand oneβis learned per feature map. -One important special case is γ(k)=√ -s′[x(k)]2+εandβ(k)=¯x(k), which would make the +convolution, only oneγ and oneβ is learned per feature map. +One important special case isγ(k) = +√ +s′[x(k)]2 + ε and β(k) = ¯x(k), which would make the Batch Normalization layer an identity layer. -During evaluation time,3the expected value and the variance are calculated once for the +During evaluation time,3 the expected value and the variance are calculated once for the complete dataset. An unbiased estimate of the empirical variance is used. The question where Batch Normalization layers (BN) should be applied and for which reasons is still open. For Dropout, it doesn’t matter if it is applied before or after the activation function. Considering this, the possible options for the order are: -1. CONV / FC→BN→activation function →Dropout→... -2. CONV / FC→activation function →BN→Dropout→... -3. CONV / FC→activation function →Dropout→BN→... -4. CONV / FC→Dropout→BN→activation function →... -The authors of [ IS15] suggest to use Batch Normalization before the activation function +1. CONV / FC→BN →activation function→Dropout →... +2. CONV / FC→activation function→BN →Dropout →... +3. CONV / FC→activation function→Dropout →BN →... +4. CONV / FC→Dropout →BN →activation function→... +The authors of [IS15] suggest to use Batch Normalization before the activation function as in Items 1 and 4. Batch Normalization after the activation lead to better results in -https://github .com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm .md -Another normalization layer is Local Response Normalization as described in [ KSH12], -which includes ℓ2normalization as described in [ WWQ13 ]. Those two normalization layers, +https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md +Another normalization layer is Local Response Normalization as described in [KSH12], +which includesℓ2 normalization as described in [WWQ13]. Those two normalization layers, however, are superseded by Batch Normalization. -3also called inference time +3also calledinference time 2.3. CNN Blocks 2.3. CNN Blocks This section describes more complex building blocks than simple layers. CNN blocks act similar to a layer, but they are themselves composed of layers. 2.3.1. Residual Blocks -Residual blocks as introduced in [ HZRS15a ] are a milestone in computer vision. They +Residual blocks as introduced in [HZRS15a] are a milestone in computer vision. They enabled the computer vision community to go from about 16 layers as in VGG 16-D (see Appendix D.3) to several hundred layers. The key idea of deep residual networks (ResNets) -as introduced in [ HZRS15a ] is to add an identity connection which skips two layers. This +as introduced in [HZRS15a] is to add an identity connection which skips two layers. This identity connection adds the feature maps onto the other feature maps and thus requires the output of the input layer of the residual block to be of the same dimension as last layer of the residual block. -Formally, it can be described as follows. If xiare the feature maps after layer iandx0is -the input image, His a non-linear transformation of feature maps, then -y=H(x) +Formally, it can be described as follows. Ifxi are the feature maps after layeri and x0 is +the input image,H is a non-linear transformation of feature maps, then +y= H(x) describes a traditional CNN. Note that this could be multiple layers. A residual block as visualized in Figure 2.4 is described by -y=H(x) +x -In [HZRS15a ], they only used residual skip connections to skip two layers. Hence, if -convi(xi)describes the application of the convolutional layer ito the input xiwithout the +y= H(x) + x +In [HZRS15a], they only used residual skip connections to skip two layers. Hence, if +convi(xi) describes the application of the convolutional layeri to the inputxi without the nonlinearity, then such a residual block is -xi+2= convi+1(ReLU(conv i(xi))) +xi +xi+2 = conv i+1(ReLU(conv i(xi))) + xi Figure 2.4.: ResNet module Image source: [HZRS15a] [HM16] provides some insights why deep residual networks are successful. @@ -556,77 +596,82 @@ Image source: [HZRS15a] 2.3.2. Aggregation Blocks Two common ways to add more parameters to neural networks are increasing their depth by adding more layers or increasing their width by adding more neurons / filters. Inception -blocks [AM15] implicitly started a new idea which was explicitly described in [ XGD+16] as -“ResNeXt block”: Increasing the cardinality C∈N≥1. By cardinality, the authors describe -the concept of having Csmall convolutional networks with the same topology but different +blocks [AM15] implicitly started a new idea which was explicitly described in [XGD+16] as +“ResNeXt block”: Increasing the cardinalityC ∈N≥1. By cardinality, the authors describe +the concept of havingC small convolutional networks with the same topology but different weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not combine aggregation blocks with residual blocks as the authors did. 256-d in -concatenatetotal 32 +concatenate +total 32 groups ... -128-d out4 @1×1×256 -4 @3×3×44 @1×1×256 -4 @3×3×44 @1×1×256 -4 @3×3×4 -Figure 2.5.: Aggregation block with a cardinality of C= 32. Each of the 32 groups is a 2-layer -convolutional network. The first layer receives 256 feature maps and applies four 1×1 -filters to it. The second layer applies four 3×3filters. Although every group has +128-d out +4 @1 ×1 ×256 +4 @3 ×3 ×4 +4 @1 ×1 ×256 +4 @3 ×3 ×4 +4 @1 ×1 ×256 +4 @3 ×3 ×4 +Figure 2.5.:Aggregation block with a cardinality ofC = 32. Each of the 32 groups is a 2-layer +convolutional network. The first layer receives 256 feature maps and applies four1 ×1 +filters to it. The second layer applies four3 ×3 filters. Although every group has the same topology, the learned weights are different. The outputs of the groups are concatenated. The hyperparameters of an aggregation block are: -•The topology of the group members. -•The cardinality C∈N≥1. Note that a cardinality of C= 1is equivalent in every +• The topology of the group members. +• The cardinalityC ∈N≥1. Note that a cardinality ofC = 1 is equivalent in every aspect to using the group network without an aggregation block. 2.3. CNN Blocks 2.3.3. Dense Blocks -Dense blocks are collections of convolutional layers which are introduced in [ HLW16]. The +Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The idea is to connect each convolutional layer directly to subsequent convolutional layers. -Traditional CNNs with Llayers and one input layer have Lconnections between layers, +Traditional CNNs withL layers and one input layer haveL connections between layers, but dense blocks haveL(L+1) -2connections between layers. The input feature maps are +2 connections between layers. The input feature maps are concatenated in depth. According to the authors, this prevents features from being relearned and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16 have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors used only on the order of 12 feature maps per layer. A dense block is visualized in Figure 2.6. 256-d in -k@3×3 +k @ 3 ×3 +concatenate +k @ 3 ×3 concatenate -k@3×3 -concatenate256-d +256-d k-d -(256 +k)-d +(256 + k)-d k-d -(256 +L·k)-d out -Figure 2.6.: Dense block with L= 2layers and a growth factor of k. +(256 + L·k)-d out +Figure 2.6.: Dense block withL= 2 layers and a growth factor ofk. Dense block have five hyperparameters: -•The activation function being used. The authors use ReLU. -•The sizekw×khof filters. The authors use kw=kh= 3. -•The number of layers L, whereL= 2is a simple convolutional layer. -•The number kof filters added per layer (called growth rate in the paper) -It might be necessary use 1×1convolutions to reduce the number of L·kfeature maps. +• The activation function being used. The authors use ReLU. +• The sizekw ×kh of filters. The authors usekw = kh = 3. +• The number of layersL, whereL= 2 is a simple convolutional layer. +• The numberk of filters added per layer (calledgrowth ratein the paper) +It might be necessary use1 ×1 convolutions to reduce the number ofL·k feature maps. 2. Convolutional Neural Networks 2.4. Transition Layers Transition layers are used to overcome constraints imposed by resource limitations or architectural design choices. One constraint is the number of feature maps (see Appendix C.3 for details). In order to reduce the number of feature maps while still keeping as much -relevant information as possible in the network, a convolutional layer iwithkifilters of -the shape 1×1×ki−1is added. The number of filters kidirectly controls the number of +relevant information as possible in the network, a convolutional layeri with ki filters of +the shape1 ×1 ×ki−1 is added. The number of filterski directly controls the number of generated feature maps. In order to reduce the dimensionality (width and height) of the feature maps, one typically applies pooling. Global pooling is another type of transition layer. It applies pooling over the complete -feature map size to shrink the input to a constant 1×1feature map and hence allows one +feature map size to shrink the input to a constant1 ×1 feature map and hence allows one network to have different input sizes. 2.5. Analysis Techniques 2.5. Analysis Techniques CNNs have dozens of hyperparameters and ways to tune them. Although there are -automatic methods like random search [ BB12], grid search [ LBOM98 ], gradient-based -hyperparameter optimization [ MDA15] and Hyperband [ LJD+16] some actions need a +automatic methods like random search [BB12], grid search [LBOM98], gradient-based +hyperparameter optimization [MDA15] and Hyperband [LJD+16] some actions need a manual investigation to improve the model’s quality. For this reason, analysis techniques which guide developers and researchers to the important hyperparameters are necessary. In the following, nine diagnostic techniques are explained. @@ -656,43 +701,49 @@ are not covered by the training set and thus indicate the need to collect more d 2. Convolutional Neural Networks 2.5.2. Confusion Matrices -Aconfusion matrix is a matrix (c)ij∈NK×K -≥0, whereK∈N≥2is the number of classes, -which contains all correct and wrong classifications. The item cijis the number of times -items of class iwere classified as class j. This means the correct classification is on the -diagonalciiand all wrong classifications are of the diagonal. The sum∑K -i=1∑K -j=1cijis the -total number of samples which were evaluated and∑ -i=1cii∑K -i=1∑K -j=1cijis the accuracy. -The sumsr(i) =∑K -j=1cijof each class iare worth being investigated as they show if the +A confusion matrix is a matrix(c)ij ∈NK×K +≥0 , whereK ∈N≥2 is the number of classes, +which contains all correct and wrong classifications. The itemcij is the number of times +items of classi were classified as classj. This means the correct classification is on the +diagonal cii and all wrong classifications are of the diagonal. The sum∑K +i=1 +∑K +j=1 cij is the +total number of samples which were evaluated and +∑ +i=1 cii∑K +i=1 +∑K +j=1 cij +is the accuracy. +The sumsr(i) = ∑K +j=1 cij of each classi are worth being investigated as they show if the classes are skewed. If the number of samples of one class dominates the data set, then the classifier can get a high accuracy by simply always prediction the most common class. If the accuracy of the classifier is close to the a priory probability of the most common class, techniques to deal with skewed classes might help. An automatic criterion to check for this problem is -accuracy≤max({r(i)|i= 1,...,k})∑k -i=1r(i)+ε -whereεis a small value to compensate the fact that some examples might be correct just +accuracy ≤max({r(i) |i= 1,...,k })∑k +i=1 r(i) ++ ε +where ε is a small value to compensate the fact that some examples might be correct just by chance. Other values which should be checked are the class-wise sensitivities: -s(k) =# correctly identified instances of class k -# instances of class k=ckk -r(k)∈[0,1] -Ifs(i)is much lower than s(j), it is an indicator that more or cleaner training data is -necessary for s(i). +s(k) = # correctly identified instances of classk +# instances of classk = ckk +r(k) ∈[0,1] +If s(i) is much lower thans(j), it is an indicator that more or cleaner training data is +necessary fors(i). The class-wise confusion -fconfusability (k1,k2) =ck1k2∑K -j=1ck1j -indicates if class k1gets often classified as class k2. The highest values here can indicate +fconfusability(k1,k2) = ck1k2 +∑K +j=1 ck1j +indicates if classk1 gets often classified as classk2. The highest values here can indicate if two classes should be merged or a specialized model for separating those classes could improve the overall system. 2.5.3. Validation Curves: Accuracy, loss and other metrics Validation curves display a hyperparameter (e.g., the training epoch) on the horizontal -axis and a quality metric on the vertical axis. Accuracy, error = (1−accuracy )or loss are +axis and a quality metric on the vertical axis. Accuracy,error = (1 −accuracy) or loss are typical quality metrics. Other quality metrics can be found in [OHIL16]. In case that the number of training epochs are used as the examined hyperparameter, validation curves give an indicator if training longer improves the model’s performance. By @@ -700,12 +751,17 @@ validation curves give an indicator if training longer improves the model’s pe 2.5. Analysis Techniques plotting the error on the training set as well as the error on a validation set, one can also estimate if overfitting might become a problem. See Figure 2.7 for an example. -10 20 30 40 50 60 70 80 90 1000.20.40.60.8 +10 20 30 40 50 60 70 80 90 100 +0.2 +0.4 +0.6 +0.8 overfitting -EpochsErrorTraining set +Epochs +Error Training set Validation set -Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs -and the quality metric is the error (1−accuracy ). The longer the network is trained, +Figure 2.7.:A typical validation curve: In this case, the hyperparameter is the number of epochs +and the quality metric is the error(1 −accuracy). The longer the network is trained, the better it gets on the training set. At some point the network is fit too well to the training data and loses its capability to generalize. At this point the quality curve of the training set and the validation set diverge. While the classifier is still improving on @@ -715,36 +771,46 @@ When the epoch-loss validation curve has plateaus as in Figure 2.8, this means t problem of plateaus are (i) to change weight initialization if the plateau was at the beginning, (ii) regularizing the model or (iii) changing the optimization algorithm. Loss functions -The loss function (also called error function orcost function ) is a function which assigns a +The loss function (also callederror functionor cost function) is a function which assigns a real value to a complex event like the predicted class of a feature vector. It is used to define -theobjective function . For classification problems the loss function is typically cross-entropy -withℓ1orℓ2regularization, as it was described in [NH92]: -ECE(W) =−∑ -x∈XK∑ -k=1[tx +the objective function. For classification problems the loss function is typically cross-entropy +with ℓ1 or ℓ2 regularization, as it was described in [NH92]: +ECE(W) = − +∑ +x∈X +K∑ +k=1 +[tx klog(ox k) + (1−tx k) log(1−ox k)]    -cross-entropy data loss+λ1·ℓ1∑ -w∈W|w|+λ2·ℓ2∑ -w∈Ww2 - +cross-entropy data loss ++ λ1 · +ℓ1 +  ∑ +w∈W +|w|+λ2 · +ℓ2 +  ∑ +w∈W +w2 +   model complexity loss -whereWare the weights, Xis the training data set, K∈N≥0is the number of classes and +where W are the weights,X is the training data set,K ∈N≥0 is the number of classes and tx -kindicates if the training example xis of classk.ox -kis the output of the classification -algorithm which depends on the weights. λ1,λ2∈[0,∞)weights the regularization and is -typically smaller than 0.1. +k indicates if the training examplex is of classk. ox +k is the output of the classification +algorithm which depends on the weights.λ1,λ2 ∈[0,∞) weights the regularization and is +typically smaller than0.1. 2. Convolutional Neural Networks -Figure 2.8.: Example for a validation curve (plotted loss function) with plateaus. The dark orange +Figure 2.8.:Example for a validation curve (plotted loss function) with plateaus. The dark orange curve is smoothed, but the non-smoothed curve is also plotted in light orange. The data loss is positive whenever the classification is not correct, whereas the model complexity loss is higher for more complex models. The model complexity loss exists due -to the intuition of Occam’s razor : If two models explain the same data with an accuracy of +to the intuition ofOccam’s razor: If two models explain the same data with an accuracy of 100 %, the simpler model is to be preferred. A reason to show the loss for the validation curve technique instead of other quality metrics is that it contains more information about the quality of the model. A reason against the @@ -753,46 +819,52 @@ loss only shows relative learning progress whereas the accuracy shows absolute p human readers. There are three observations in the loss validation curve which can help to improve the network: -•If the loss does not decrease for several epochs, the learning rate might be too low. +• If the loss does not decrease for several epochs, the learning rate might be too low. The optimization process might also be stuck in a local minimum. -•Loss being NAN might be due to too high learning rates. Another reason is division +• Loss being NAN might be due to too high learning rates. Another reason is division by zero or taking the logarithm of zero. In both cases, adding a small constant like -10−7fixes the problem. -•If the loss-epoch validation curve has a plateau at the beginning, the weight initialization +10−7 fixes the problem. +• If the loss-epoch validation curve has a plateau at the beginning, the weight initialization might be bad. 2.5. Analysis Techniques Quality criteria There are several quality criteria for classification models. Most quality criteria are based -the confusion matrix cwhich denotes at cijthe number of times the real class was iandj +the confusion matrixc which denotes atcij the number of times the real class wasi and j was predicted. This means the diagonal contains the number of correct predictions. For -the following, let ti=∑k -j=1cijbe the number of training samples for class i. The most +the following, letti = ∑k +j=1 cij be the number of training samples for classi. The most common quality criterion is accuracy: -accuracy (c) =∑k -i=1cii∑k -i=1ti∈[0,1] +accuracy(c) = +∑k +i=1 cii +∑k +i=1 ti +∈[0,1] One problem of accuracy as a quality criterion are skewed classes. If one class is by far more common than all other classes, then the simplest way to achieve a high score is to always classify everything as the most common class. In order to fix this problem, one can use the mean accuracy: -mean-accuracy (c) =1 -k·k∑ -i=1cii -ti∈[0,1] -For two-class problems there are many other metrics like precision, recall and Fβ-score. +mean-accuracy(c) = 1 +k · +k∑ +i=1 +cii +ti +∈[0,1] +For two-class problems there are many other metrics like precision, recall andFβ-score. Quality criteria for semantic segmentation are explained in [Tho16]. Besides the quality of the classification result, several other quality criteria are important in practice: -•Speed of evaluation for new images, -•latency, -•power consumption, -•robustness against (non)random perturbations in the training data (see [ SZS+13, +• Speed of evaluation for new images, +• latency, +• power consumption, +• robustness against (non)random perturbations in the training data (see [SZS+13, PMW+15]), -•robustness against (non)random perturbations in the training labels (see [ NDRT13 , +• robustness against (non)random perturbations in the training labels (see [NDRT13, XXE12]), -•model size -As reducing the floating point accuracy allows to process more data on a given device [ Har15], +• model size +As reducing the floating point accuracy allows to process more data on a given device [Har15], analysis under this aspect is also highly relevant in some scenarios. However, the following focuses on the quality of the classification result. @@ -807,16 +879,21 @@ the networks performance. Having the training set’s learning curve, it is poss if the capacity of the model to fit the data is high enough for the desired classification error. The error on the validation set should never be expected to be significantly lower than the error on the training set. If the error on the training set is too high, then more data will -nothelp. Instead, the model or the training algorithm need to be adjusted. +not help. Instead, the model or the training algorithm need to be adjusted. If the training set’s learning curve is significantly higher than the validation set’s learning curve, then removing features (e.g., by decreasing the images resolution), more training samples or more regularization will help. -10 20 30 40 50 60 70 80 90 1000.20.40.6 -avoidable biasvariance -human-levelerror -Training samplesErrorValidation set +10 20 30 40 50 60 70 80 90 100 +0.2 +0.4 +0.6 +avoidable bias +variance +human-level error +Training samples +Error Validation set Training set -Figure 2.9.: A typical learning curve: The more data is used for training, the more errors a given +Figure 2.9.:A typical learning curve: The more data is used for training, the more errors a given architecture will make to fit the given training data. At the same time, it is expected that the training data gets more similar to the true distribution of the data which should be captured by the test data. At some point, the error on the training and @@ -838,59 +915,59 @@ well. However, it is not the desired solution. For classification problems in computer vision, there are two types of visualizations which help to diagnose such problems. Both color superpixels of the original image to convey information how the model used those superpixels: -•Correct class heatmap : The probability of the correct class is encoded to give a +• Correct class heatmap: The probability of the correct class is encoded to give a heat map which superpixels are important for the correct class. This can also be done by setting the opacity accordingly. -•Most-likely class image : Each of the most likely classes for all superpixels is +• Most-likely class image: Each of the most likely classes for all superpixels is represented by a color. The colored image thus gives clues why different predictions were assigned a high probability. Two methods to generate such images are explained in the following. Occlusion Sensitivity Analysis -Occlusion sensitivity analysis is described in [ ZF14]. The idea is to occlude a part of the -image by something. This could be a gray square as in [ ZF14] or a black superpixel as +Occlusion sensitivity analysis is described in [ZF14]. The idea is to occlude a part of the +image by something. This could be a gray square as in [ZF14] or a black superpixel as in [RSG16]. Then the classifier is run on the image again. This is done for each region (e.g., superpixel or position of the square) and the regions are then colored to generate either a correct class heatmap of the most-likely class image. It is important to note that the color -at regionridenotes the result if riis occluded. +at regionri denotes the result ifri is occluded. Both visualizations are shown in Figure 2.10. One can see that the network makes sensible predictions for this image of the class “Pomeranian”. However, the image of the class “Afghan Hound” gets confused with “Ice lolly”, which is a sign that this needs further investigation. Gradient-based approaches -In [SVZ13], a gradient-based approach was used to generate image-specific class saliency +In [SVZ13], a gradient-based approach was used to generateimage-specific class saliency maps. The authors describe the problem as a ranking problem, where each pixel of the -imageI0is assigned a score Sc(I0)for a classcof interest. CNNs are non-linear functions, -but they can be approximated by the first order Taylor expansion Sc(I)≈wTI+bwhere -wis the derivative of ScatI0. +image I0 is assigned a scoreSc(I0) for a classc of interest. CNNs are non-linear functions, +but they can be approximated by the first order Taylor expansionSc(I) ≈wTI+ b where +w is the derivative ofSc at I0. 2. Convolutional Neural Networks 2.5.6. Argmax Method -Theargmax method has two variants: -•Fixed class argmax : Propagate all elements of a given class through the network +The argmax methodhas two variants: +• Fixed class argmax: Propagate all elements of a given class through the network and analyze which neurons are activated most often / have the highest activation. -•Fixed neuron argmax : Propagate the data through the network and find the n +• Fixed neuron argmax: Propagate the data through the network and find then data elements which cause the highest activation for a given neuron. -Note that a “neuron” is a filter in a CNN. The amount of activation of a filter Fby an -imageIis calculated by applying FtoIand calculating the element-wise sum of the result. -Fixed-neuron argmax was applied in [ ZF14]. However, they did not stop with that. Besides -showingthe9imageswhichcausedthehighestactivation, theyalsotrainedadeconvolutional +Note that a “neuron” is a filter in a CNN. The amount of activation of a filterF by an +image I is calculated by applyingF to I and calculating the element-wise sum of the result. +Fixed-neuron argmax was applied in [ZF14]. However, they did not stop with that. Besides +showing the 9 images which caused the highest activation, they also trained a deconvolutional neural network to project the activation of the filter back into pixel space. The fixed neuron argmax can be used qualitatively to get an impression of the kind of -features which are learned. This is useful to diagnose problems, for example in [ AM15] it is +features which are learned. This is useful to diagnose problems, for example in [AM15] it is described that the network recognized the class “dumbbell” only if a hand was present, too. Fixed neuron argmax can also be used quantitatively to estimate the amount of parameters being shared between classes or how many parameters are mainly assigned to which classes. Going one step further from the fixed neuron argmax method is using an optimization algorithm to change an initial image minimally in such a way that any desired class gets -predicted. This is called caricaturization in [MV16]. +predicted. This is calledcaricaturization in [MV16]. 2.5.7. Feature Map Reconstructions -Feature map visualizations such as the ones made in [ ZF14] (see Figure 2.11) give insights +Feature map visualizations such as the ones made in [ZF14] (see Figure 2.11) give insights into the learned features. This shows what the network emphasizes. However, it is not necessarily the case that the feature maps allow direct and easy conclusions about the -learned features. This technique is called inversion in [MV16]. +learned features. This technique is calledinversion in [MV16]. A key idea of feature map visualizations is to reconstruct a layers input, given its activation. This makes it possible find which inputs would cause neurons to activate with extremely high or low values. -More recent work like [ NYC16] tries to make the reconstructions appearance look more +More recent work like [NYC16] tries to make the reconstructions appearance look more natural. 2.5. Analysis Techniques @@ -901,28 +978,34 @@ initializations, the learned weights should still be comparable. If the set of learned filters changes with initialization, this might be an indicator for too little capacity of that layer. Hence adding more filters to that layer could improve the performance. -Filters can be compared with the k-translation correlation as introduced in [ZCZL16]: +Filters can be compared with thek-translation correlation as introduced in [ZCZL16]: ρk(Wi,Wj) = max -(x,y)∈{−k,...,k}2\(0,0)⟨Wi,T(Wj,x,y)⟩f -∥Wi∥2∥Wj∥2∈[−1,1], -whereT(·,x,y)denotes the translation of the first operand by (x,y), with zero padding at -the borders to keep the shape. ⟨·,·⟩fdenotes the flattened inner product, where the two +(x,y)∈{−k,...,k}2\(0,0) +⟨Wi,T(Wj,x,y )⟩f +∥Wi∥2 ∥Wj∥2 +∈[−1,1], +where T(·,x,y ) denotes the translation of the first operand by(x,y), with zero padding at +the borders to keep the shape.⟨·,·⟩f denotes the flattened inner product, where the two operands are flattened into column vectors before applying the standard inner product. The -closer the absolute value of the k-translation correlation to one, the more similar two filters -Wi,Wjare. According to [ ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and +closer the absolute value of thek-translation correlation to one, the more similar two filters +Wi,Wj are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and VGG-16 (see Appendix D.3) have many filters which are highly correlated. They found -this by comparing the averaged maximum k-translational correlation of the networks with -Gaussian-distributed initialized filters. The averaged maximum k-translational correlation +this by comparing theaveraged maximumk-translational correlationof the networks with +Gaussian-distributed initialized filters. The averaged maximumk-translational correlation is defined as -¯ρk(W) =1 -NN∑ -i=1Nmax -j=1,j̸=iρk(Wi,Wj) -whereNis the number of filters in the layer WandWidenotes the ith filter. +¯ρk(W) = 1 +N +N∑ +i=1 +N +max +j=1,j̸=i +ρk(Wi,Wj) +where N is the number of filters in the layerW and Wi denotes theith filter. 2.5.9. Weight update tracking Andrej Karpathy proposed in the 5th lecture of CS231n to track weight updates to check if the learning rate is well-chosen. He suggests that the weight update should be in the order -of10−3. If the weight update is too high, then the learning rate has to be decreased. If the +of 10−3. If the weight update is too high, then the learning rate has to be decreased. If the weight update is too low, then the learning rate has to be increased. The order of the weight updates as well as possible implications highly depend on the model and the training algorithm. See Appendix B.5 for a short overview of training algorithms @@ -932,24 +1015,24 @@ for neural networks. 2.6. Accuracy boosting techniques There are techniques which can almost always be applied to improve accuracy of CNN classifiers: -•Ensembles [CMS12] -•Training-time augmentation (see Appendix B.2) -•Test-time transformations [DDFK16, How13, HZRS15b] -•Pre-training and fine-tuning [ZDGD14, GDDM14] -One of the most simple ensemble techniques which was introduced in [ CMS12] is averaging -the prediction of nclassifiers. This improves the accuracy even if the classifiers use exactly +• Ensembles [CMS12] +• Training-time augmentation (see Appendix B.2) +• Test-time transformations [DDFK16, How13, HZRS15b] +• Pre-training and fine-tuning [ZDGD14, GDDM14] +One of the most simple ensemble techniques which was introduced in [CMS12] is averaging +the prediction ofn classifiers. This improves the accuracy even if the classifiers use exactly the same training setup by reducing variance. Data augmentation techniques give the optimizer the possibility to take invariances like rotation into account by generating artificial training samples from real training samples. Data augmentation hence reduces bias and variance with no cost at inference time. Data augmentation at inference time reduces the variance of the classifier. Similar to using an ensemble, it increases the computational cost of inference. -Pretrainingtheclassifieronanotherdatasettoobtainstartfromagoodpositionorfinetuning +Pretraining the classifier on another dataset to obtain start from a good position or finetuning a model which was originally created for another task is also a common technique. 2.6. Accuracy boosting techniques -Figure 2.10.: Occlusion sensitivity analysis by [ ZF14]: The left column shows three example images, -where a gray square occluded a part of the image. This gray squares center (x,y)was +Figure 2.10.:Occlusion sensitivity analysis by [ZF14]: The left column shows three example images, +where a gray square occluded a part of the image. This gray squares center(x,y) was moved over the complete image and the classifier was run on each of the occluded images. The probability of the correct class, depending on the gray squares position, is showed in the middle column. One can see that the predicted probability of the @@ -959,7 +1042,7 @@ it always predicts the correct class if the head is visible. However, if the hea dog is occluded, it predicts other classes. 2. Convolutional Neural Networks -Figure 2.11.: Filter visualization from [ ZF14]: The filters themselves as well as the input feature +Figure 2.11.:Filter visualization from [ZF14]: The filters themselves as well as the input feature maps which caused the highest activation are displayed. 3. Topology Learning @@ -977,64 +1060,73 @@ layers / neurons into the network. In the following, Cascade-Correlation, Meiosis Networks and Automatic Structure Optimization are introduced. 3.1.1. Cascade-Correlation -Cascade-Correlation was introduced in [ FL89]. It generates a cascading architecture which +Cascade-Correlation was introduced in [FL89]. It generates a cascading architecture which is similar to dense block described in Section 2.3.3. Cascade-Correlation works as follows: -1.Initialization : The number of input nodes and the number of output nodes are +1. Initialization: The number of input nodes and the number of output nodes are defined by the problem. Create a minimal, fully connected network for those. -2.Training : Train the network until the error no longer decreases. -3.Candidate Generation : Generate candidate nodes. Each candidate node is connected +2. Training: Train the network until the error no longer decreases. +3. Candidate Generation: Generate candidate nodes. Each candidate node is connected to all inputs. They are not connected to other candidate nodes and not connected to the output nodes. 3. Topology Learning -4.Correlation Maximization : Train the weights of the candidates by maximizing S, -the correlation between candidates output value Vwith the networks residual error: -S=∑ -o∈O⏐⏐⏐⏐⏐⏐∑ -p∈T( -Vp−¯V) -(Ep,o−¯Eo)⏐⏐⏐⏐⏐⏐ -whereOis the set of output nodes, Tis the training set, Vpis the candidate neurons -activation for a training pattern p.Ep,ois the residual output error at node ofor -patternp.¯Vand ¯Eoare averaged values over all elements of T. This step is finished +4. Correlation Maximization: Train the weights of the candidates by maximizingS, +the correlation between candidates output valueV with the networks residual error: +S = +∑ +o∈O +⏐⏐⏐⏐⏐⏐ +∑ +p∈T +( +Vp −¯V +) +(Ep,o − ¯Eo) +⏐⏐⏐⏐⏐⏐ +where O is the set of output nodes,T is the training set,Vp is the candidate neurons +activation for a training patternp. Ep,o is the residual output error at nodeo for +pattern p. ¯V and ¯Eo are averaged values over all elements ofT. This step is finished when the correlation no longer increases. -5.Candidate selection : Keep the candidate node with the highest correlation, freeze +5. Candidate selection: Keep the candidate node with the highest correlation, freeze its incoming weights and add connections to the output nodes. -6.Continue : If the error is higher than desired, continue with step 2. +6. Continue: If the error is higher than desired, continue with step 2. One network with three hidden nodes trained by Cascade-Correlation is shown in Figure 3.1. 1 -Figure 3.1.: A Cascade-Correlation network with three input nodes (red) and one bias node (gray) +Figure 3.1.:A Cascade-Correlation network with three input nodes (red) and one bias node (gray) to the left, three hidden nodes (green) in the middle and two output nodes in the upper right corner. The black squares represent frozen weights which are found by correlation maximization whereas the white squares are trainable weights. 3.1.2. Meiosis Networks -Meiosis Networks are introduced in [ Han89]. In contrast to most MLPs and CNNs, where -weights are deterministic and fixed at prediction time, each weight wijin Meiosis networks +Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, where +weights are deterministic and fixed at prediction time, each weightwij in Meiosis networks follows a normal distribution: -wij∼N(µij,σ2 +wij ∼N(µij,σ2 ij) 3.2. Pruning approaches -Hence every connection has two learned parameters: µijandσ2 +Hence every connection has two learned parameters:µij and σ2 ij. The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell -division. A node jis splitted, when the random part dominates the value of the sampled +division. A nodej is splitted, when the random part dominates the value of the sampled weights: ∑ iσij∑ -iµij>1and∑ +iµij +>1 and +∑ kσjk∑ -kµjk>1 +kµjk +>1 The mean of the new nodes is sampled around the old mean, half the variance is assigned to the new connections. Hence Meiosis networks only change the number of neurons per layer. They do not add layers or add skip connections. 3.1.3. Automatic Structure Optimization -Automatic Structure Optimization (ASO) was introduced in [ BM93] for the task of online - handwriting recognition. It makes use of the confusion matrix C= (cij)∈Nk×k +Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of online + handwriting recognition. It makes use of the confusion matrixC = ( cij) ∈Nk×k ≥0 (see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix -Swithsij=sji=cij·cji. The maximum of Sdefines where the ASO algorithm adds +S with sij = sji = cij ·cji. The maximum ofS defines where the ASO algorithm adds more parameters. The details how the resources are added are not transferable to CNNs. 3.2. Pruning approaches Pruning approaches start with a network which is bigger than necessary and prune it. The @@ -1047,41 +1139,43 @@ Pruning generally works as follows: 2. prune weights according to a pruning criterion and 3. retrain the pruned network. This procedure can be repeated. -One family of pruning criterions uses the Hessian matrix . For example, Optimal Brain -Damage (OBD) as introduced in [ LDS+89]. For every single parameter k, OBD calculates -the effect on the objective function of deleting k. The authors call the effect of the deletion +One family of pruning criterions uses theHessian matrix. For example, Optimal Brain +Damage (OBD) as introduced in [LDS+89]. For every single parameterk, OBD calculates +the effect on the objective function of deletingk. The authors call the effect of the deletion 3. Topology Learning -of parameter kthe saliency sk. The parameters with the lowest saliency are deleted, which +of parameterk the saliencysk. The parameters with the lowest saliency are deleted, which means they are set to 0 and are not updated anymore. -A follow-up method called Optimal Brain Surgeon [HSW93] claims to choose the weights +A follow-up method calledOptimal Brain Surgeon[HSW93] claims to choose the weights in a much better way. This requires, however, to calculate the inverse Hessian matrix -H−1∈Rn×nwheren∈Nis typically n>106. -A much simpler and computationally cheaper pruning criterion is the weight magnitude . -[HPTD15] prunes all weights wwhich are below a threshold θ: -w← +H−1 ∈Rn×n where n∈N is typicallyn> 106. +A much simpler and computationally cheaper pruning criterion is theweight magnitude. +[HPTD15] prunes all weightsw which are below a thresholdθ: +w← +  -wifw≥θ -0otherwise + +w if w≥θ +0 otherwise 3.3. Genetic approaches The general idea of genetic algorithms (GAs) is to encode the solution space as genes, which can recombine themselves via crossover and inversion. An introduction to such algorithms is given in [ES03]. -Commonly used techniques to generate neural networks by GAs are NEAT [ SM02] and its +Commonly used techniques to generate neural networks by GAs are NEAT [SM02] and its successors HyperNEAT [SDG09] and ES-HyperNEAT [RLS10]. The results, however, are of unacceptable quality: On MNIST (see Appendix E), where -random chance gives 10 %accuracy, even simple topologies trained with SGD achieve -about 92 %accuracy [ TF-16a] and state of the art is 99.79 %[WZZ+13], the HyperNEAT -algorithm achieves only 23.9 %accuracy [VH13]. -Kocmánek shows in [ Koc15] that HyperNEAT approaches can achieve 96.47 %accuracy +random chance gives10 % accuracy, even simple topologies trained with SGD achieve +about 92 % accuracy [TF-16a] and state of the art is99.79 % [WZZ+13], the HyperNEAT +algorithm achieves only23.9 % accuracy [VH13]. +Kocmánek shows in [Koc15] that HyperNEAT approaches can achieve96.47 % accuracy on MNIST. Kocmánek mentions that HyperNEAT becomes slower with each hidden layer so that not more than three hidden layers could be trained. At the same time, VGG19 [SZ14] already has 19 hidden layers and ResNets are successfully trained with 1202 layers in [HZRS15a]. [LX17] shows that Genetic algorithms can achieve competitive results on MNIST and -SVHN, but the best results on CIFAR-10 were 7.10 %error whereas the state of the art is -at3.74 %[HLW16]. Similarly, the Genetic algorithm achieves 29.03 %error on CIFAR-100, -but the state of the art is 17.18 %[HLW16]. +SVHN, but the best results on CIFAR-10 were7.10 % error whereas the state of the art is +at 3.74 % [HLW16]. Similarly, the Genetic algorithm achieves29.03 % error on CIFAR-100, +but the state of the art is17.18 % [HLW16]. 3.4. Reinforcement Learning Reinforcement learning is a sub-field of machine learning, which focuses on the question how to choose actions that lead to high rewards. @@ -1091,22 +1185,22 @@ One can think of the search for good neural network topologies as a reinforcemen problem. The agent is a recurrent neural network which can generate bitstrings. Those variable-length bitstrings encode neural network topologies. In 2016, this approach was applied to construct neural networks for computer vision. -In [BGNR16], Q-learning with an ε-greedy exploration was applied. -In [ZL16], the REINFORCE algorithm from [ Wil92] was used to train state of the art models +In [BGNR16], Q-learning with anε-greedy exploration was applied. +In [ZL16], theREINFORCE algorithm from [Wil92] was used to train state of the art models for CIFAR-10 and the Penn Treebank dataset. A drawback of this method is that enormous amounts of computational resources were used to obtain those results. 3.5. Convolutional Neural Fabrics -Convolutional Neural Fabrics are introduced in [ SV16]. They side-step hard decisions +Convolutional Neural Fabrics are introduced in [SV16]. They side-step hard decisions about topologies by learning an ensemble of different CNN architectures. The idea is to define a single architecture as a trellis through a 3D grid of nodes. Each node represents a convolutional layer. One dimension is the index of the layer, the other two dimensions are the amount of filters and the feature size. Each node is connected to nine other nodes and thus represents nine possible choices of convolutional layers: -•Resolution : (i) convolution with stride=1 or (ii) convolution with stride=2 or +• Resolution: (i) convolution with stride=1 or (ii) convolution withstride=2 or (iii) deconvolution (doubling the resolution) -•Channels : (i) half the number of filters than the layer before (ii) the same number +• Channels: (i) half the number of filters than the layer before (ii) the same number of filters as the layer before (iii) double the number of filters than the layer before -They always use ReLU as an activation function and they always use filters of size 3×3. +They always use ReLU as an activation function and they always use filters of size3 ×3. They don’t use pooling at all. 3. Topology Learning @@ -1114,19 +1208,19 @@ They don’t use pooling at all. 4. Hierarchical Classification Designing a classifier for a new dataset is hard for two main reasons: Many design choices are not clearly superior to others and evaluating one design choice takes much time. Especially -CNNs are known to take several days [ KSH12,SLJ+15] or even weeks [ SZ14] to train. +CNNs are known to take several days [KSH12, SLJ+15] or even weeks [SZ14] to train. Additionally, some methods for analyzing a dataset become harder to use with more classes and more training samples. Examples are t-SNE, the manual inspection of errors and confusion matrices, and the argmax method. One idea to approach this problem is by building a hierarchy of classifiers. The root classifier distinguishes clusters of classes, whereas the leaf classifiers distinguish single classes. Figure 4.1 gives an example for an hierarchy of classifiers. -Figure 4.1.: Example for a hierarchy of classifiers. Each classifier is visualized by a rounded rectangle. -The root classifier C0has to distinguish six coarse classes (pedestrian, four+-wheelers, -traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C0predicts a -pedestrian , another classifier has to predict if it is an adult or a child. Similar, if C0 -predicts traffic sign , then another classifier has to predict if it is a speed limit, a -sign indicating danger or something else. If C0, however, predicts road, then no other +Figure 4.1.:Example for a hierarchy of classifiers. Each classifier is visualized by a rounded rectangle. +The root classifierC0 has to distinguish six coarse classes (pedestrian, four+-wheelers, +traffic signs, two-wheelers, street, other) or 17 fine-grained classes. IfC0 predicts a +pedestrian, another classifier has to predict if it is an adult or a child. Similar, ifC0 +predicts traffic sign, then another classifier has to predict if it is a speed limit, a +sign indicating danger or something else. IfC0, however, predictsroad, then no other classifier will become active. In this example, the problem has 17 classes. The hierarchical approach introduces 7 clusters of classes and thus uses 8 classifiers. @@ -1135,37 +1229,37 @@ Such a hierarchy of classifiers needs clusters of classes. 4. Hierarchical Classification 4.1. Advantages of classifier hierarchies Having a classifier hierarchy has five advantages: -•Division of labor : Different teams can work together. Instead of having a monolithic +• Division of labor: Different teams can work together. Instead of having a monolithic task, the solutions can be combined. -•Guarantees : Changing a classifier will only change the prediction of itself and its +• Guarantees: Changing a classifier will only change the prediction of itself and its children. Siblings are not affected. In the example from Figure 4.1, the classifier -which distinguishes traffic signs can be changed while the classification as pedestrian , -four+-wheelers ,traffic sign ,street,otherwill not be affected. Also, the +which distinguishes traffic signs can be changed while the classification aspedestrian, +four+-wheelers, traffic sign, street, other will not be affected. Also, the classification between speed limits, danger signs and other signs will not change. -•Faster training : Except for the root classifier C0, each other classifier will have +• Faster training: Except for the root classifierC0, each other classifier will have less than the total amount of training data. Depending on the combined classes, the models could also be simpler. Hence the training time is reduced. -•Weighting of errors : In practice, some errors are more severe than others. For -example, it could be acceptable if the two-wheelers classifier has an error rate of -40 %. But it is not acceptable if the speed limit classifier has such a high error rate. -•Post-hoc explanations : The simpler a model is, the easier it is to explain why a +• Weighting of errors: In practice, some errors are more severe than others. For +example, it could be acceptable if thetwo-wheelers classifier has an error rate of +40 %. But it is not acceptable if thespeed limit classifier has such a high error rate. +• Post-hoc explanations: The simpler a model is, the easier it is to explain why a classification is made the way it is made. 4.2. Clustering classes There are two ways to cluster classes: By similarity or by semantics. While semantic clustering needs either additional information or manual work, the similarity can be -automatically inferred from the data. As pointed out in [ XZY+14], semantically similar +automatically inferred from the data. As pointed out in [XZY+14], semantically similar classes are often also visually similar. For example, in the ImageNet dataset most dogs are semantically and visually more similar to each other than to non-dogs. An example -where this is obviously not the case are symbols: The summation symbol \sumis identical -in appearance to the Greek letter \Sigma, but semantically much closer to the addition +where this is obviously not the case are symbols: The summation symbol\sum is identical +in appearance to the Greek letter\Sigma, but semantically much closer to the addition operator +. One approach to cluster classes by similarity is to train a classifier and examine its predictions. Each class is represented in the confusion matrix by one row. Those rows -can be directly with standard clustering algorithms such as k-means, DBSCAN [ EKS+96], -OPTICS [ ABKS99 ], CLARANS [ NH02], DIANA [ KR09], AHC (see [ HPK11]) or spectral -clustering as in [ XZY+14]. Those clusterings, however, are hard to interpret and most of +can be directly with standard clustering algorithms such ask-means, DBSCAN [EKS+96], +OPTICS [ABKS99], CLARANS [NH02], DIANA [KR09], AHC (see [HPK11]) or spectral +clustering as in [XZY+14]. Those clusterings, however, are hard to interpret and most of them do not allow a human to improve the found clustering manually. -The confusion matrix (c)ij∈Nk×kstates how often class iwas present and class jwas +The confusion matrix(c)ij ∈Nk×k states how often classi was present and classj was 4.2. Clustering classes predicted. The more often this confusion happens, the more similar those two classes are to @@ -1176,34 +1270,37 @@ diminish after a critical point of classes is reached. Hence a binary tree might good choice. As an alternative, an approach which allows building arbitrary many clusters, is proposed. The proposed algorithm has two main ideas: -•The order of columns and rows in the confusion matrix is arbitrary. This means one -can swap rows and columns. If row iandjare swapped, then the columns iandj +• The order of columns and rows in the confusion matrix is arbitrary. This means one +can swap rows and columns. If rowi and j are swapped, then the columnsi and j have to be swapped to in order to keep the same confusion matrix. -•If two classes are confused often, then they are similar to the classifier. +• If two classes are confused often, then they are similar to the classifier. Hence the order of the classes is permutated in such a way that the highest errors are close to the diagonal. One possible objective function to be minimized is -f(C) =n∑ -i=1n∑ -j=1Cij·|i−j| [4.1] +f(C) = +n∑ +i=1 +n∑ +j=1 +Cij ·|i−j| [4.1] which punishes errors linearly with the distance to the diagonal. This method is called CMO in the following. As pointed out by Tobias Ribizel (personal communication), this optimization problem -is a weighted version of Optimal Linear Arrangement problem . That problem is NPcomplete - [ GJ02,GJS76]. Simulated Annealing as described in Algorithm 1, however, +is a weighted version ofOptimal Linear Arrangement problem. That problem is NPcomplete + [GJ02, GJS76]. Simulated Annealing as described in Algorithm 1, however, produces reasonable clusterings as well as visually appealing confusion matrices. The -algorithm works as follows: First, decide with probability 0.5if only two random rows are +algorithm works as follows: First, decide with probability0.5 if only two random rows are swapped or a block is swapped. If two rows are swapped, choose both of them randomly. If a block is swapped, then choose the start randomly and the end of the block randomly after the start. The insert position has to be a valid position considering the block length, but besides that it is also chosen uniformly random. Simple row-swapping can exploit local improvements. For example, in the context of -ImageNet, it can swap the dog-class Silky Terrier to the dog-class Yorkshire terrier -and both dog classes Dalmatian andGreyhound next to each other. Both the two clusters -of dog breeds could be separated by carandbusdue to random chance. Moving any single +ImageNet, it can swap the dog-classSilky Terrier to the dog-classYorkshire terrier +and both dog classesDalmatian and Greyhound next to each other. Both the two clusters +of dog breeds could be separated bycar and bus due to random chance. Moving any single class increases the score, but moving either one of the dog breed clusters or the vehicle cluster decreases the score. Hence it is beneficial to implement block moving. One advantage of permutating the classes in order to minimize Equation (4.1) in comparison -to spectral clustering as used in [ XZY+14] is that the adjusted confusion matrix can be +to spectral clustering as used in [XZY+14] is that the adjusted confusion matrix can be 4. Hierarchical Classification split into many much smaller matrices along the diagonal. In the case of many classes (e.g., @@ -1213,15 +1310,15 @@ confusions are not made and thus many elements of the confusion matrix are close Those will be moved to the corners of the confusion matrix by optimizing Equation (4.1). Once a permutation of the classes is found which has a low score Equation (4.1), the clusters can either be made by hand by deciding why classes should not be in one clusters. With -such a permutation, only n−1binary decisions have to be made and hence only the list of -classes has to be read. Alternatively, one can calculate the confusions C′ -i,i+1+C′ -i+1,ifor +such a permutation, onlyn−1 binary decisions have to be made and hence only the list of +classes has to be read. Alternatively, one can calculate the confusionsC′ +i,i+1 + C′ +i+1,i for each pair of classes which are neighbors in the confusion matrix. The higher this value, the -more similar are the classes according to the classifier. Hence a threshold θcan be applied. -θcan either be set automatically (e.g., such that 10 %of all pairs are above the threshold) +more similar are the classes according to the classifier. Hence a thresholdθ can be applied. +θ can either be set automatically (e.g., such that10 % of all pairs are above the threshold) or semi-automatically by asking the user for information if two classes belong to the same -cluster. Such an approach only needs log(n)binary decisions from the user where nis the +cluster. Such an approach only needslog(n) binary decisions from the user wheren is the number of classes. Please note that CMO only works if the classifier is neither too bad nor too good. A classifier which does not solve the task at all might just give almost uniform predictions whereas the @@ -1231,147 +1328,159 @@ the prediction of the class in contrast to using only the argmax in order to fin permutation. 5. Experimental Evaluation -All experiments are implemented using Keras 2.0 [ Cho15] with Tensorflow 1.0 [ AAB+16] -and cuDNN 5.1 [ CWV+14] as the backend. The experiments were run on different machines +All experiments are implemented using Keras 2.0 [Cho15] with Tensorflow 1.0 [AAB+16] +and cuDNN 5.1 [CWV+14] as the backend. The experiments were run on different machines with different Nvidia graphics processing units (GPUs), including the Titan Black, GeForce GTX 970 and GeForce 940MX. -The GTSRB [ SSSI12], SVHN [ NWC+11b], CIFAR-10 and CIFAR-100 [ Kri], MNIST [ YL98], -HASYv2 [ Tho17a], STL-10 [ CLN10] dataset are used for the evaluation. Those datasets are +The GTSRB [SSSI12], SVHN [NWC+11b], CIFAR-10 and CIFAR-100 [Kri], MNIST [YL98], +HASYv2 [Tho17a], STL-10 [CLN10] dataset are used for the evaluation. Those datasets are used as their size is small enough to be trained within a day. Other classification datasets which were considered are listed in Appendix E. -CIFAR-10 (Canadian Institute for Advanced Research 10) is a 10-class dataset of color -images of the size 32 px×32 px. Its ten classes are airplane, automobile, bird, cat, deer, -dog, frog, horse, ship, truck. The state of the art achieves an accuracy of 96.54 %[HLW16]. -According to [Kar11], human accuracy is at about 94 %. -CIFAR-100 is a 100-class dataset of color images of the size 32 px×32 px. Its 100 classes +CIFAR-10(Canadian Institute for Advanced Research 10) is a 10-class dataset of color +images of the size32 px×32 px. Its ten classes are airplane, automobile, bird, cat, deer, +dog, frog, horse, ship, truck. The state of the art achieves an accuracy of96.54 % [HLW16]. +According to [Kar11], human accuracy is at about94 %. +CIFAR-100is a 100-class dataset of color images of the size32 px×32 px. Its 100 classes are grouped to 20 superclasses. It includes animals, people, plants, outdoor scenes, vehicles and other items. CIFAR-100 is not a superset of CIFAR-10, as CIFAR-100 does not contain -the class airplane . The state of the art achieves an accuracy of 82.82 %[HLW16]. +the classairplane. The state of the art achieves an accuracy of82.82 % [HLW16]. GTSRB (German Traffic Sign Recognition Benchmark) is a 43-class dataset of traffic signs. -The51 839images are in color and of a minimum size of 25 px×25 pxup to 266 px×232 px. -The state of the art achieves 99.46 %accuracy with an ensemble of 25 CNNs [ SL11]. -According to [SSSI], human performance is at 98.84 %. -HASYv2 (Handwritten Symbols version 2) is a 369 class dataset of black-and-white images -of the size 32 px×32 px. The 369 classes contain the Latin and Greek letters, arrows, -mathematical symbols. The state of the art achieves an accuracy of 82.00 %[Tho17a]. -STL-10 (self-taught learning 10) is a 10-class dataset of color images of the size 96 px×96 px. +The 51 839 images are in color and of a minimum size of25 px×25 px up to266 px×232 px. +The state of the art achieves99.46 % accuracy with an ensemble of 25 CNNs [SL11]. +According to [SSSI], human performance is at98.84 %. +HASYv2(Handwritten Symbols version 2) is a 369 class dataset of black-and-white images +of the size32 px×32 px. The 369 classes contain the Latin and Greek letters, arrows, +mathematical symbols. The state of the art achieves an accuracy of82.00 % [Tho17a]. +STL-10(self-taught learning 10) is a 10-class dataset of color images of the size96 px×96 px. Its ten classes are airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck. The state -of the art achieves an accuracy of 74.80 %[ZMGL15 ]. It contains 100 000unlabeled images -for unsupervised training and 500images per class for supervised training. -SVHN(Street View House Numbers) exists in two formats. For the following experiments, +of the art achieves an accuracy of74.80 % [ZMGL15]. It contains100 000 unlabeled images +for unsupervised training and500 images per class for supervised training. +SVHN (Street View House Numbers) exists in two formats. For the following experiments, the cropped digit format was used. It contains the 10 digits cropped from photos of Google -Street View. The images are in color and of size 32 px×32 px. The state of the art +Street View. The images are in color and of size32 px ×32 px. The state of the art 5. Experimental Evaluation -achieves an accuracy of 98.41 %[HLW16]. According to [ NWC+11a], human performance -is at 98.0 %. -As a preprocessing step, the pixel-features were divided by 255 to obtain values in [0,1]. -For GTSRB, the training and test data was scaled to 32 px×32 px. +achieves an accuracy of98.41 % [HLW16]. According to [NWC+11a], human performance +is at98.0 %. +As a preprocessing step, the pixel-features were divided by 255 to obtain values in[0,1]. +For GTSRB, the training and test data was scaled to32 px×32 px. 5.1. Baseline Model and Training setup -The baseline model is trained with Adam [ KB14], an initial learning rate of 10−4, a batch +The baseline model is trained with Adam [KB14], an initial learning rate of10−4, a batch size of 64 for at most 1000 epochs with data augmentation. The kind of data augmentation depends on the dataset: -•CIFAR-10 ,CIFAR-100 and STL-10: Random width and height shift by at most -±3pixels in either direction; Random horizontal flip. -•GTSRB ,MNIST : Random width and height shift by at most ±5pixels in either -direction; random rotation by at most ±15degrees; random channel shift; random -zoom in [0.5,1.5]; random shear by at most 6 degrees. -•HASYv2 : Random width and height shift by at most ±5pixels in either direction; -random rotation by at most ±5degree. -•SVHN: No data augmentation. -If the dataset does not define a training/test set, a stratified 67 %/33 %split is applied. If +• CIFAR-10, CIFAR-100and STL-10: Random width and height shift by at most +±3 pixels in either direction; Random horizontal flip. +• GTSRB, MNIST: Random width and height shift by at most±5 pixels in either +direction; random rotation by at most±15 degrees; random channel shift; random +zoom in[0.5,1.5]; random shear by at most 6 degrees. +• HASYv2: Random width and height shift by at most±5 pixels in either direction; +random rotation by at most±5 degree. +• SVHN: No data augmentation. +If the dataset does not define a training/test set, a stratified67 % / 33 % split is applied. If the dataset does not define a validation set, the training set is split in a stratified manner -into90 %training set / 10 %test set. -Early stopping [ Pre98] with the validation accuracy as a stopping criterion and a patience of +into 90 % training set /10 % test set. +Early stopping [Pre98] with the validation accuracy as a stopping criterion and a patience of 10 epochs is applied. After this, the model is trained without data augmentation for at most 1000 epochs with early stopping and the validation accuracy as a stopping criterion and a patience of 10 epochs. Kernel weights are initialized according to the uniform initialization scheme of He [HZRS15b] (see Appendix B.3). The architecture of the baseline model uses a pattern of -Conv-Block (n) = (Convolution−Batch Normalization −Activation )n−Pooling +Conv-Block(n) = (Convolution−Batch Normalization−Activation)n −Pooling The activation function is the Exponential Linear Unit (ELU) (see Table B.3), except for the last layer where softmax is used. Before the last two convolutional layer, a dropout -layer with dropout probability 0.5is applied. The architecture is given in detail in Table 5.1. +layer with dropout probability0.5 is applied. The architecture is given in detail in Table 5.1. Please note that the number of input- and output channels of the network depends on -the dataset. If the input image is larger than 32 px×32 px, for each power of two a -Conv-Block (2)is added at the input. For MNIST, the images are bilinearly upsampled to +the dataset. If the input image is larger than 32 px ×32 px, for each power of two a +Conv-Block(2) is added at the input. For MNIST, the images are bilinearly upsampled to 32 px×32 px. 5.1. Baseline Model and Training setup # Type Filters @ -Patch size / strideParameters FLOPs Output size -Input 0 0 3@32 ×32 -1 Convolution 32@ 3×3×3/1 896 1736704 32@32×32 -2 BN + ELU 64 163904 32@32×32 -3 Convolution 32@ 3×3×32/1 9248 18841600 32@32×32 -4 BN + ELU 64 163904 32@32×32 -Max pooling 2×2/2 0 40960 32@16 ×16 -5 Convolution 64@ 3×3×32/1 18496 9420800 64@16 ×16 -6 BN + ELU 128 82048 64@16 ×16 -7 Convolution 64@ 3×3×64/1 36928 18 857 984 64@16×16 -8 BN + ELU 128 82048 64@16 ×16 -Max pooling 2×2/2 20480 64@ 8 ×8 -9 Convolution 64@ 3×3×64/1 36928 4714496 64@ 8 ×8 -10 BN + ELU 128 20608 64@ 8 ×8 -Max pooling 2×2/2 5120 64@ 4 ×4 -11 Convolution (v) 512@ 4×4×64/1 524 800 1048064 512@ 1 ×1 -12 BN + ELU 1024 3584 512@ 1 ×1 -Dropout 0.5 0 0 512@ 1 ×1 -13 Convolution 512@ 1×1×512/1 262656 523776 512@ 1 ×1 -14 BN + ELU 1024 3584 512@ 1 ×1 -Dropout 0.5 0 0 512@ 1 ×1 -15 Convolution k @ 1×1×512/1k·(512 + 1) 1024 ·kk @ 1×1 -Global avg Pooling 1×1 0 k k @ 1×1 -16 BN + Softmax 2k 7k k @ 1×1 +Patch size / stride +Parameters FLOPs Output size +Input 0 0 3@32 × 32 +1 Convolution 32@ 3 ×3 ×3 /1 896 1736704 32 @32 × 32 +2 BN + ELU 64 163904 32 @32 × 32 +3 Convolution 32@ 3 ×3 ×32 /1 9248 18841600 32 @32 × 32 +4 BN + ELU 64 163904 32 @32 × 32 +Max pooling 2 ×2 /2 0 40960 32@16 × 16 +5 Convolution 64@ 3 ×3 ×32 /1 18496 9420800 64@16 × 16 +6 BN + ELU 128 82048 64@16 × 16 +7 Convolution 64@ 3 ×3 ×64 /1 36928 18 857 984 64@16 × 16 +8 BN + ELU 128 82048 64@16 × 16 +Max pooling 2 ×2 /2 20480 64@ 8 × 8 +9 Convolution 64@ 3 ×3 ×64 /1 36928 4714496 64@ 8 × 8 +10 BN + ELU 128 20608 64@ 8 × 8 +Max pooling 2 ×2 /2 5120 64@ 4 × 4 +11 Convolution (v) 512@ 4 ×4 ×64 /1 524 800 1048064 512@ 1 × 1 +12 BN + ELU 1024 3584 512@ 1 × 1 +Dropout 0.5 0 0 512@ 1 × 1 +13 Convolution 512@ 1 ×1 ×512 /1 262656 523776 512@ 1 × 1 +14 BN + ELU 1024 3584 512@ 1 × 1 +Dropout 0.5 0 0 512@ 1 × 1 +15 Convolution k @ 1 ×1 ×512 /1 k·(512 + 1) 1024 ·k k @ 1 × 1 +Global avg Pooling 1 ×1 0 k k @ 1 × 1 +16 BN + Softmax 2k 7k k @ 1 × 1 ∑ 515k -+8925121032k -+55729664103424+ 2k -Table 5.1.: Baseline architecture with 3 input channels of size 32×32. All convolutional layers -useSAMEpadding, except for layer 11 which used VALIDpadding in order to decrease -the feature map size to 1×1. If the input feature map is bigger than 32×32, for -each power of two there are two Convolution + BN + ELU blocks and one Max pooling -block added. This is the framed part in the table.32×32Input ++892512 +1032k ++55729664 103424+2k +Table 5.1.:Baseline architecture with 3 input channels of size32 ×32. All convolutional layers +use SAME padding, except for layer 11 which usedVALID padding in order to decrease +the feature map size to1 ×1. If the input feature map is bigger than32 ×32, for +each power of two there are twoConvolution + BN + ELU blocks and oneMax pooling +block added. This is the framed part in the table. +32×32 +Input C32@3×3/1 BN + ELU C32@3×3/1 -BN + ELU16×16max pooling 2×2/2 +BN + ELU +16×16 +max pooling2×2/2 C64@3×3/1 BN + ELU C64@3×3/1 -BN + ELU8×8max pooling 2×2/2 +BN + ELU +8×8 +max pooling2×2/2 C64@3×3/1 -BN + ELU4×4max pooling 2×2/2 +BN + ELU +4×4 +max pooling2×2/2 C512@4×4/1(V) BN + ELU -Dropout,p= 0.51×1C512@1×1/1 +Dropout,p= 0.5 +1×1 +C512@1×1/1 BN + ELU Dropout,p= 0.5 Ck@1×1/1 Global AVG pooling BN + Softmax -Figure 5.1.: Architecture of the baseline model. C32@3×3/1is a convolutional layer with 32 filters -of kernel size 3×3with stride 1. +Figure 5.1.:Architecture of the baseline model.C 32@3 ×3/1 is a convolutional layer with 32 filters +of kernel size3 ×3 with stride 1. 5. Experimental Evaluation 5.1.1. Baseline Evaluation The results for the baseline model evaluated on eight datasets are given in Table 5.2. The speed for inference for different GPUs is given in Table 5.3. -DatasetSingle Model Accuracy Ensemble of 10 +Dataset Single Model Accuracy Ensemble of 10 Training Set Test Set Training Set Test Set -Asirra 94.22 %σ= 3.49 94.37 %σ= 3.47 97 .07 % 97.37 % -CIFAR-10 91.23 %σ= 1.10 85.84 %σ= 0.87 92 .36 % 86.75 % -CIFAR-100 76.64 %σ= 1.48 63.38 %σ= 0.55 78 .30 % 64.70 % -GTSRB 100.00 %σ= 0.00 99.18 %σ= 0.11 100 .00 % 99.46 % -HASYv2 89.49 %σ= 0.42 85.35 %σ= 0.10 89 .94 % 86.03 % -MNIST 99.93 %σ= 0.07 99.53 %σ= 0.06 99 .99 % 99.58 % -STL-10 94.12 %σ= 0.87 75.67 %σ= 0.34 96 .35 % 77.62 % -SVHN 99.02 %σ= 0.07 96.28 %σ= 0.10 99 .42 % 97.20 % -Table 5.2.: Baseline model accuracy on eight datasets. The single model actuary is the 10 models -used in the ensemble. The empirical standard deviation σof the accuracy is also given. +Asirra 94.22 % σ= 3.49 94 .37 % σ= 3.47 97 .07 % 97 .37 % +CIFAR-10 91.23 % σ= 1.10 85 .84 % σ= 0.87 92 .36 % 86 .75 % +CIFAR-100 76.64 % σ= 1.48 63 .38 % σ= 0.55 78 .30 % 64 .70 % +GTSRB 100.00 % σ= 0.00 99 .18 % σ= 0.11 100 .00 % 99 .46 % +HASYv2 89.49 % σ= 0.42 85 .35 % σ= 0.10 89 .94 % 86 .03 % +MNIST 99.93 % σ= 0.07 99 .53 % σ= 0.06 99 .99 % 99 .58 % +STL-10 94.12 % σ= 0.87 75 .67 % σ= 0.34 96 .35 % 77 .62 % +SVHN 99.02 % σ= 0.07 96 .28 % σ= 0.10 99 .42 % 97 .20 % +Table 5.2.:Baseline model accuracy on eight datasets. The single model actuary is the 10 models +used in the ensemble. The empirical standard deviationσ of the accuracy is also given. CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the models uses unlabeled data or data from other datasets. For HASYv2 no test time transformations are used. -Network GPU TensorflowInference per Training +Network GPU Tensorflow Inference per Training 1 Image 128 images time / epoch Baseline Default Intel i7-4930K 3 ms 244 ms 231 .0 s Baseline Optimized Intel i7-4930K 2 ms 143 ms 149 .0 s @@ -1383,9 +1492,9 @@ Baseline Default GTX 1070 2 ms 15 ms 14 .4 s-14.5 s Baseline Default Titan Black 4 ms 25 ms 28 .1 s-28.1 s Baseline Optimized Titan Black 3 ms 22 ms 24 .4 s-24.4 s DenseNet-40-12 Default GeForce 940MX 27 ms 2403 ms — -Table 5.3.: SpeedcomparisonofthebaselinemodelonCIFAR-10. Thebaselinemodelisevaluatedon -six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken from [ Maj17]. -Weights the baseline model can be found at [ Tho17b]. The optimized Tensorflow build +Table 5.3.:Speed comparison of the baseline model on CIFAR-10. The baseline model is evaluated on +six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken from [Maj17]. +Weights the baseline model can be found at [Tho17b]. The optimized Tensorflow build makes use of SSE4.X, AVX, AVX2 and FMA instructions. 5.1. Baseline Model and Training setup @@ -1394,55 +1503,55 @@ The distribution of filter weights by layer is visualized in Figure 5.2 and the of bias weights by layer is shown in Figure 5.3. Although both figures only show the distribution for one specific model trained on CIFAR-100, the following observed patterns are consistent for 70 models (7 datasets and 10 models per dataset): -•The empiric [0.5−percentile,99.5−percentile ]interval which contains 99 %of the +• The empiric[0.5 −percentile,99.5 −percentile] interval which contains99 % of the filter weights is almost symmetric around zero. The same is true for the bias weights. -•The farther a layer is from the input away, the smaller the 99-percentile interval is, +• The farther a layer is from the input away, the smaller the 99-percentile interval is, except for the last layer (see Table A.1). -•The 99-percentile interval of the first layers filter weights is about [−0.5,+0.5], except -for MNIST and HASYv2 where it is in [−0.8,0.8]. -•The 99-percentile interval of the first layers bias weights is always in [−0.2,0.2]. -•The distribution of filter weights of the last convolutional layer is not symmetric. In +• The 99-percentile interval of the first layers filter weights is about[−0.5,+0.5], except +for MNIST and HASYv2 where it is in[−0.8,0.8]. +• The 99-percentile interval of the first layers bias weights is always in[−0.2,0.2]. +• The distribution of filter weights of the last convolutional layer is not symmetric. In some cases the distribution is also not unimodal. -•The bias weights of the last three layers are very close to zero. The absolute value of -most of them is smaller than 10−2. -Similarly, Figure 5.4 and Figure 5.5 show the distribution of the γand theβparameter of -Batch Normalization. It is expected that γis close to 1 and βis close to 0. In those cases, +• The bias weights of the last three layers are very close to zero. The absolute value of +most of them is smaller than10−2. +Similarly, Figure 5.4 and Figure 5.5 show the distribution of theγ and theβ parameter of +Batch Normalization. It is expected thatγ is close to 1 andβ is close to 0. In those cases, the Batch Normalization layer equals the identity and thus is only relevant for the training. -Whileγandβdo not show as clear patterns as the filter and bias weights of convolutional +While γ and β do not show as clear patterns as the filter and bias weights of convolutional layers, some observations are also consistent through all models even for different datasets: -•γof the last layer (layer 16) is bigger than 1.3. -•The 99-percentile interval for βof the last layer is longer than the other 99-percentile +• γ of the last layer (layer 16) is bigger than 1.3. +• The 99-percentile interval forβ of the last layer is longer than the other 99-percentile intervals. -•The 99-percentile interval for βof the fourth-last (layer 14 for STL-10, layer 10 for +• The 99-percentile interval forβ of the fourth-last (layer 14 for STL-10, layer 10 for all other models) is more negative then all other layers. Finally, the distribution of filter weight ranges is plotted in Figure 5.6 for each convolutional layer. The ranges are calculated for each channel and filter separately. The smaller the values are, the less information is lost if the filters are replaced by smaller filters. 5. Experimental Evaluation -Figure 5.2.: Violin plots of the distribution of filter weights of a baseline model trained on CIFAR100. - The weights of the first layer are relatively evenly spread in the interval [−0.4,+0.4]. -With every layer the interval which contains 95 %of the weights and is centered around +Figure 5.2.:Violin plots of the distribution of filter weights of a baseline model trained on CIFAR100. + The weights of the first layer are relatively evenly spread in the interval[−0.4,+0.4]. +With every layer the interval which contains95 % of the weights and is centered around the mean becomes smaller, especially with layer 11 where the feature maps are of -size1×1. In contrast to the other layers, the last convolutional layer has a bimodal +size 1 ×1. In contrast to the other layers, the last convolutional layer has a bimodal distribution. This plot indicates that the network might benefit from bigger filters in the first layer, whereas the filters in layers 7 – 11 could potentially be smaller. -Figure 5.3.: Violin plots of the distribution of bias weights of a baseline model trained on CIFAR-100. -While the first layers biases are in [−0.1,+0.1], after each max-pooling layer the interval -which contains 95 %of the weights and is centered around the mean becomes smaller. -In the last three convolutional layer, most bias weights are in [−0.005,+0.005]. +Figure 5.3.:Violin plots of the distribution of bias weights of a baseline model trained on CIFAR-100. +While the first layers biases are in[−0.1,+0.1], after each max-pooling layer the interval +which contains95 % of the weights and is centered around the mean becomes smaller. +In the last three convolutional layer, most bias weights are in[−0.005,+0.005]. 5.1. Baseline Model and Training setup -Figure 5.4.: Violin plots of the distribution of the γparameter of Batch Normalization layers of a +Figure 5.4.:Violin plots of the distribution of theγ parameter of Batch Normalization layers of a baseline model trained on CIFAR-100. -Figure 5.5.: The distribution of the βparameter of Batch Normalization layers of a baseline model +Figure 5.5.:The distribution of theβ parameter of Batch Normalization layers of a baseline model trained on CIFAR-100. 5. Experimental Evaluation -Figure 5.6.: The distribution of the range of values (max - min) of filters by channel and layer. For +Figure 5.6.:The distribution of the range of values (max - min) of filters by channel and layer. For each filter, the range of values is recorded by channel. The smaller this range is, the -less information is lost if a n×nfilter is replaced by a (n−1)×(n−1)filter. +less information is lost if an×n filter is replaced by a(n−1) ×(n−1) filter. 5.1. Baseline Model and Training setup 5.1.3. Training behavior @@ -1456,27 +1565,36 @@ the start are also better at the end. In order to check this hypothesis, the rel validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering stays approximately the same, then it can be considered to run the first few epochs many times and only train the best models to the end. For 10 models, there can be102−10 -2= 45 +2 = 45 pair-wise changes in the ordering at maximum if the relative order of validation accuracies is reversed. For the baseline model, 21.8 changes in the relative order of accuracies occurred -in average for each pair of epochs (i,i+ 1). This means if one knows only the relative order -of the validation accuracy of two models mandm′in epochi, it is doubtful if one can -make any statement about the ordering of mandm′in epochi+ 1. -01020304050607080901001101201301400.20.30.40.50.60.7 -epochvalidation accuracy +in average for each pair of epochs(i,i + 1). This means if one knows only the relative order +of the validation accuracy of two modelsm and m′ in epoch i, it is doubtful if one can +make any statement about the ordering ofm and m′in epochi+ 1. +0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 +0.2 +0.3 +0.4 +0.5 +0.6 +0.7 +epoch +validation accuracy maximum validation accuracy -minimum validation accuracy1.5 +minimum validation accuracy +1.5 2 2.5 3 3.5 4 -4.5loss +4.5 +loss maximum validation accuracy minimum validation accuracy mean loss -Figure 5.7.: Minimum and maximum validation accuracy of the 10 trained models by epoch. The -differences do not exceed 1 %and does not increase by training epoch. Four models +Figure 5.7.:Minimum and maximum validation accuracy of the 10 trained models by epoch. The +differences do not exceed1 % and does not increase by training epoch. Four models stopped the first training stage at epoch 133 which causes the shift in the loss and the maximum validation accuracy. Figures 5.8 to 5.10 show how the weights changed while training on CIFAR-100. It was @@ -1506,50 +1624,50 @@ of the task is hard. For more than about 10 classes, however, it becomes hard to and read. For CIFAR-10, the proposed method groups the four object classes and the six animal classes together (see Figure 5.11a). -(a)CIFAR-10 Test set - (b)Random -Figure 5.11.: Figure 5.11a shows an ordered confusion matrix of the CIFAR-10 dataset. The diagonal +(a) CIFAR-10 Test set + (b) Random +Figure 5.11.:Figure 5.11a shows an ordered confusion matrix of the CIFAR-10 dataset. The diagonal elements are set to 0 in order to make other elements easier to see. Figure 5.11b shows a confusion matrix with random mistakes. -The first image of Figure 5.12 shows one example of a classifier with only 97.13 %test +The first image of Figure 5.12 shows one example of a classifier with only97.13 % test accuracy where a good permutation was found. Please note that this is not the best classifier. -The confusion matrix which resulted from a baseline classifier with 99.32 %test accuracy is +The confusion matrix which resulted from a baseline classifier with99.32 % test accuracy is displayed in as the second image. Those results suggest that the ordering of classes is a valuable tool to make patterns easier to see. Humans, however, are good at finding patterns even if they come from random noise. -Hence, for comparison, a confusion matrix of a classifier with 30 classes, 60 %accuracy -and40 %uniformly random errors of a balanced dataset is created, optimized according to +Hence, for comparison, a confusion matrix of a classifier with 30 classes,60 % accuracy +and 40 % uniformly random errors of a balanced dataset is created, optimized according to Equation (4.1) and shown in Figure 5.11b. It clearly looks different than Figure 5.11a. On the HASYv2 dataset the class-ordering is necessary to see anything as most possible confusions do not happen. See Figure 5.13 for comparison of the first 50 classes of the unsorted confusion matrix and the sorted confusion matrix. If confusion matrices of a -maximum size of 50×50are displayed, the ordered method can show only 8 matrices +maximum size of50 ×50 are displayed, the ordered method can show only 8 matrices because the off-diagonal matrices are almost 0. Without sorting, 64 matrices have to be displayed. 5.2. Confusion Matrix Ordering -Figure 5.12.: ThefirstimageshowstheconfusionmatrixforthetestofGTSRBsetafteroptimization +Figure 5.12.:The first image shows the confusion matrix for the test of GTSRB set after optimization to Equation (4.1). The diagonal elements are set to 0 in order to make other elements easier to see. The symbols next to the label on the vertical axis indicate the shape and the color of the signs. The second image shows the same, but with baseline model. -Best viewed in electronic form. -Figure 5.13.: The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal +Best viewed in electronic form. +Figure 5.13.:The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal elements are set to 0 in order to make other elements easier to see. The top image shows arbitrary class ordering, the bottom image shows the optimized ordering. 5.3. Spectral Clustering vs CMO 5.3. Spectral Clustering vs CMO This section evaluates the clustering quality of CMO in comparison to the clustering quality of spectral clustering. -The evaluated model achieves 70.50 %training accuracy and 53.16 %test accuracy on +The evaluated model achieves70.50 % training accuracy and 53.16 % test accuracy on CIFAR-100. Figure 5.14 shows the sorted confusion matrix. -Figure 5.14.: The first 50 entries of the ordered confusion matrix of the CIFAR-100 dataset. The +Figure 5.14.:The first 50 entries of the ordered confusion matrix of the CIFAR-100 dataset. The diagonal elements are set to 0 in order to make other elements easier to see. Best viewed in electronic form. CIFAR-100 has pre-defined coarse classes. Those are used as a ground truth for the clusters -which are to be found. The number of errors is determined by (i) Join all nclusters which -contain the classes of the coarse class Cto a setM. The error is n. (ii) Within M, find the -set of classes M−which do not belong to C. (iii) The final error is n+|M−|. As can be +which are to be found. The number of errors is determined by (i) Join alln clusters which +contain the classes of the coarse classC to a setM. The error isn. (ii) WithinM, find the +set of classesM−which do not belong toC. (iii) The final error isn + |M−|. As can be seen in Table 5.4, both clustering methods find reasonable clusters. CMO, however, has only half the error of spectral clustering. The results for the HASYv2 dataset are qualitatively similar (see Table 5.5). It should be @@ -1559,101 +1677,109 @@ based on CMO as described in Section 4.2. 5. Experimental Evaluation Cluster Spectral clustering Errors CMO Errors fish aquarium fish, orchid + flatfish -+ ray, shark + trout, lion5 aquarium fish, orchid + flatfish -+ ray + shark, trout4 ++ ray, shark + trout, lion +5 aquarium fish, orchid + flatfish ++ ray + shark, trout +4 flowers orchid, aquarium fish + sunflower + poppy, tulip + rose, -train5orchid, aquarium fish + sunflower, - poppy, tulip, rose2 +train +5 orchid, aquarium fish + sunflower, + poppy, tulip, rose +2 people baby, boy, man + girl + woman 2 baby, boy, girl, woman, man 0 reptiles crocodile, plain, road, table, wardrobe + dinosaur + lizard -+ snake, worm + turtle9crocodile, lizard, lobster, caterpillar ++ snake, worm + turtle +9 crocodile, lizard, lobster, caterpillar + dinosaur + snake + turtle, - crab6 + crab +6 trees maple, oak, pine+willow, forest -+ palm3 palm, willow, pine, maple, oak 0 ++ palm +3 palm, willow, pine, maple, oak 0 Total 24 12 -Table 5.4.: Differences in spectral clustering and CMO. Classes in a cluster are separated by , -whereas clusters are separated by +. +Table 5.4.:Differences in spectral clustering and CMO. Classes in a cluster are separated by, +whereas clusters are separated by+. Cluster Spectral clustering Errors CMO Errors -AA,A,A 0A,A,A, Å 1 -BB,B 0B,B 0 -CC,c,⊂andC,ξ,EandC 4C,c,⊂,CandC 1 -DD,D,D,⊿ 1D,D,D 0 -EEandE,ε 2EandE,ε,ϵ,∈ 4 -FFandF,F 1FandF,F 1 -HHandH,κandH 3HandH,H 1 -KK,κ 0K,κ 0 -LL,⌊andL,L 1L,⌊andL,L 1 -MMandMandM 2Mandµ,MandM 3 -NNandN,NandN 2NandN,NandN,ℵ 3 -OO,O,0,◦,°,#ando 1O,O,0,◦,°and#ando 2 -PP,Pandp,ρandPand℘ 3PandP,P,℘andp,ρ 2 -QQ,Q,Q,ι,⊔,≳,ℓ,ℑ, Æ, 1 7QandQ,Q 1 -RR,RandR,R,kandℜ 3Randℜ,R,R,R 1 -SS,s,S 0S,s,S 0 -TT,⊤andT,τ 1T,⊤andT,τ 1 -UU,∪andu,U,A 1U,u,U,Aand∪ 2 -VV,v,∨ 0V,v,∨ 0 -WW,w,ω 0W,wandω 1 -XX,x,X,χ,× 0X,x,X,χ,× 0 -YYandy 1Y,y 0 -ZZ,z,ZandZ,Z 1Z,z,Z,Z,Z 0 +A A, A, A 0 A, A, A , Å 1 +B B, B 0 B, B 0 +C C, c, ⊂and C, ξ, E and C 4 C, c, ⊂, Cand C 1 +D D, D, D, ⊿ 1 D, D, D 0 +E E and E, ε 2 E and E, ε, ϵ, ∈ 4 +F F and F, F 1 F and F, F 1 +H H and H , κ and H 3 H and H, H 1 +K K, κ 0 K, κ 0 +L L, ⌊and L, L 1 L, ⌊and L, L 1 +M M and Mand M 2 M and µ, Mand M 3 +N N and N, N and N 2 N and N, N and N, ℵ 3 +O O, O, 0, ◦, °, # and o 1 O, O, 0, ◦, ° and # and o 2 +P P, Pand p, ρ and P and ℘ 3 P and P, P, ℘ and p, ρ 2 +Q Q, Q, Q, ι, ⊔, ≳, ℓ, ℑ, Æ,1 7 Q and Q, Q 1 +R R, Rand R, R, k and ℜ 3 R and ℜ, R, R, R 1 +S S, s, S 0 S, s, S 0 +T T, ⊤and T, τ 1 T, ⊤and T, τ 1 +U U, ∪and u, U, A 1 U, u, U, A and ∪ 2 +V V, v, ∨ 0 V, v, ∨ 0 +W W, w, ω 0 W, w and ω 1 +X X, x, X, χ, × 0 X, x, X, χ, × 0 +Y Y and y 1 Y, y 0 +Z Z, z, Zand Z, Z 1 Z, z, Z, Z, Z 0 Total 34 25 Table 5.5.: Differences in spectral clustering and CMO. 5.4. Hierarchy of Classifiers 5.4. Hierarchy of Classifiers In a first step, a classifier is trained on the 100 classes of CIFAR-100. The fine-grained root -classifier achieves an accuracy of 65.29 %with test-time transformations. The accuracy on +classifier achieves an accuracy of65.29 % with test-time transformations. The accuracy on the found sub-classes are listed in Table 5.6. The fact that the root classifier achieves better results within a cluster than the specialized leaf classifiers in 13 of 14 cases could either -be due to limited training data, overfitting or the small size of 32 px×32 pxof the data. +be due to limited training data, overfitting or the small size of32 px×32 px of the data. The experiment also shows that most of the errors are due to not identifying the correct cluster. Hence, in this case, more work in improving the root classifier is necessary rather than improving the discrimination of classes within a cluster. Although the classes within a cluster capture most of the classifications, many misclassifications happen outside of the clusters. For example, in cluster 3, a perfect leaf classifier would -push the accuracy in the fullcolumn only to 63.50 %due to errors of the root classifier +push the accuracy in thefull column only to63.50 % due to errors of the root classifier where the root classifier does not predict the correct cluster. The leaf classifiers use the same topology as the root classifier. By initializing them with -the root classifiers weights their performance can be pushed at about the inneraccuracy. -They are, however, only useful if their accuracy is well above the inneraccuracy of the root +the root classifiers weights their performance can be pushed at about theinner accuracy. +They are, however, only useful if their accuracy is well above theinner accuracy of the root classifier. Hence, for CIFAR-100, building hierarchies of classifiers is not useful. -Cluster Classesaccuracy +Cluster Classes +accuracy root classifier leaf classifier cluster identified class identified | cluster class identified | cluster -1 3 69.67 % 84 .27 % 72.98 % -2 5 46.60 % 58 .54 % 43.47 % -3 2 58.50 % 92 .13 % 83.46 % -4 2 50.50 % 87 .83 % 81.74 % -5 3 44.67 % 79 .29 % 71.01 % -6 2 29.50 % 78 .67 % 72.00 % -7 2 52.50 % 92 .11 % 87.72 % -8 2 59.50 % 86 .23 % 81.88 % -9 2 59.00 % 90 .08 % 87.79 % -10 2 62.00 % 85 .52 % 73.10 % -11 2 67.00 % 87 .01 % 75.32 % -12 2 72.50 % 94 .77 % 76.77 % -13 2 64.00 % 82 .58 % 86.27 % -14 2 79.67 % 89 .85 % 89.10 % -Table 5.6.: Accuracies of the root classifier trained on the full set of 100 classes evaluated on -14 clusters of classes. Each class has 100 elements to test. The column cluster identified +1 3 69.67 % 84 .27 % 72 .98 % +2 5 46.60 % 58 .54 % 43 .47 % +3 2 58.50 % 92 .13 % 83 .46 % +4 2 50.50 % 87 .83 % 81 .74 % +5 3 44.67 % 79 .29 % 71 .01 % +6 2 29.50 % 78 .67 % 72 .00 % +7 2 52.50 % 92 .11 % 87 .72 % +8 2 59.50 % 86 .23 % 81 .88 % +9 2 59.00 % 90 .08 % 87 .79 % +10 2 62.00 % 85 .52 % 73 .10 % +11 2 67.00 % 87 .01 % 75 .32 % +12 2 72.50 % 94 .77 % 76 .77 % +13 2 64.00 % 82 .58 % 86 .27 % +14 2 79.67 % 89 .85 % 89 .10 % +Table 5.6.:Accuracies of the root classifier trained on the full set of 100 classes evaluated on +14 clusters of classes. Each class has 100 elements to test. The columncluster identified gives the percentage that the root classifiers argmax prediction is within the correct -cluster, but not necessarily the correct class. The columns class identified | cluster only +cluster, but not necessarily the correct class. The columnsclass identified | clusteronly consider data points where the root classifier correctly identified the cluster. 5. Experimental Evaluation 5.5. Increased width for faster learning More filters in one layer could simplify the optimization problem as each filter needs smaller -updates. Hence a CNN Nwithnifilters in layer iis expected to take more epochs than a -CNNN′with 2·nifilters in layer ito achieve the same validation accuracy. -This hypothesis can be falsified by training a CNN Nand a CNN N′and comparing the +updates. Hence a CNNN with ni filters in layeri is expected to take more epochs than a +CNN N′with 2 ·ni filters in layeri to achieve the same validation accuracy. +This hypothesis can be falsified by training a CNNN and a CNNN′and comparing the trained number of epochs. As more filters can lead to different results depending on the layer where they are added, five models are trained. The details about those models are given in Table 5.7 -Name LayerFilter count Total +Name Layer Filter count Total Baseline New parameters m9 9 64 638 5 978 566 m′ @@ -1662,29 +1788,30 @@ m11 11 512 3786 5 982 698 m′ 11 11 512 1024 1 731 980 m13 13 512 8704 5 982 092 -Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer +Table 5.7.:Models which are identical to the baseline, except that the number of filters of one layer was increased. The detailed results are given in Table 5.8. As expected, the number of training epochs of the models with increased numbers of parameters is lower. The wall-clock time, however, is higher due to the increase in computation per forward- and backward-pass. -Form9,m11andm13, the filter weight range of the layer with increased capacity decreases +For m9, m11 and m13, the filter weight range of the layer with increased capacity decreases compared to Figure 5.6, the filter weights of the layer with increased capacity are more -concentrated around zero compared to Figure 5.2. For model m13, the distribution of +concentrated around zero compared to Figure 5.2. For modelm13, the distribution of weight of the output layer changed to a more bell-shaped distribution. Except for this, the distribution of filter weights in other layers did not change for all three models compared to the baseline. -Model ParametersAccuracy Training +Model Parameters +Accuracy Training Single Model Ensemble Mean Epochs Mean Time Mean std -baseline 944 012 63 .38 %0.55 64.70 % 154.7 3856 s -m9 5 978 566 65 .53 %0.37 66.72 % 105.7 4472 s +baseline 944 012 63 .38 % 0.55 64.70 % 154.7 3856 s +m9 5 978 566 65 .53 % 0.37 66.72 % 105.7 4472 s m′ -9 8 925 622 65 .10 %1.09 66.54 % 95.6 5261 s -m11 5 982 698 65.73 %0.77 67.38 % 149.2 5450 s +9 8 925 622 65 .10 % 1.09 66.54 % 95.6 5261 s +m11 5 982 698 65.73 % 0.77 67.38 % 149.2 5450 s m′ -11 1 731 980 62 .12 %0.48 62.89 % 143.6 3665 s -m13 5 982 092 62 .39 %0.66 63.77 % 147.8 4485 s -Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m9,m11,m13 +11 1 731 980 62 .12 % 0.48 62.89 % 143.6 3665 s +m13 5 982 092 62 .39 % 0.66 63.77 % 147.8 4485 s +Table 5.8.:Training time in epochs and wall-clock time for the baseline and modelsm9, m11, m13 as well as their accuracies. 5.6. Weight updates @@ -1705,28 +1832,31 @@ the mean weight updates of layers 1 and 3 are higher, the range of the mean weig from epoch 50 is higher for layer 5 and the range of mean updates of layer 7 is higher. For the maximum and the sum, no similar pattern could be observed (see Figures A.3 and A.4). -Figure 5.15.: Mean weight updates between epochs by layer. The model is the baseline model, but +Figure 5.15.:Mean weight updates between epochs by layer. The model is the baseline model, but with layer 5 reduced to 3 filters. 5. Experimental Evaluation 5.7. Multiple narrow layers vs One wide layer On a given feature map size one can have an arbitrary number of convolutional layers with -SAMEpadding and each layer can have an arbitrary number of filters. A convolutional layer -with more filters is called wider[ZK16], a convolutional layer with fewer filters is thus called -narrower and the number of filters in a convolutional layer is the layers width. +SAME padding and each layer can have an arbitrary number of filters. A convolutional layer +with more filters is calledwider [ZK16], a convolutional layer with fewer filters is thus called +narrower and the number of filters in a convolutional layer is the layerswidth. If the number of parameters which may be used for the feature map scale is fixed and high -enough, there are still many combinations. If niwithi= 0,...,kis the number of output -feature maps of layer iwherei= 0is the input layer and all filters are 3×3filters without +enough, there are still many combinations. Ifni with i= 0,...,k is the number of output +feature maps of layeri where i= 0 is the input layer and all filters are3 ×3 filters without a bias, then the number of parameters is -Parameters =k∑ -i=1( -(ni−1·32+ 1)·ni) +Parameters = +k∑ +i=1 +( +(ni−1 ·32 + 1) ·ni +) Hence the width of one layer does not only influence the parameters in this layer, but also in the next layer. The number of possible subsequent layers of one feature map size is enormous, even if constraints are placed on the number of parameters. For example, the first convolutional layer of the baseline model has 896 parameters. If one assumes that less than 3 filters per -layer are not desirable, one keeps all layers having a bias and all layers only use 3×3filters, +layer are not desirable, one keeps all layers having a bias and all layers only use3 ×3 filters, then the maximum depth is 10. If one furthermore assumes that at least 800 parameters should be used, there are still 120 possible layer combinations. As experimentally evaluating one layer combination takes about 10 hours on a GTX 970 for CIFAR-100 it is not possible @@ -1734,52 +1864,52 @@ to evaluate all layer combinations. In the following, a couple of changes to the width / depth will be evaluated. Each layer expands the perceptive field. Hence deeper layer can use more of the input for every single output value. But deeper networks need more time for inference as the output -of layerihas to be computed before the output of i+ 1can be computed. Hence there is +of layeri has to be computed before the output ofi+ 1 can be computed. Hence there is less potential to parallelize computations. Each filter can be seen as a concept which can be learned. The deeper the filter is in the network, the higher is the abstraction level of the concept. In most cases, both is necessary: Many different concepts (width) and high-level concepts (depth). Reducing the two first convolutional layers of the baseline model (see Page 39) to one -convolutional layer of 48 filters ( 944 396parameters in total, whereas the baseline model -has944 012parameters) resulted in a mean accuracy of 61.64 %(-1.74 %) and a standard -deviation of σ= 1.12(+0.57). The ensemble achieved 63.18 %(-1.52 %). As expected, -the training time per epoch was reduced. For the GTX 980, it was reduced from 22.0 sof -the baseline model to 15 sof the model with one less convolutional layer, one less Batch -Normalization and one less activation layer. The inference time was also reduced from 6 ms +convolutional layer of 48 filters (944 396 parameters in total, whereas the baseline model +has 944 012 parameters) resulted in a mean accuracy of61.64 % (-1.74 %) and a standard +deviation of σ = 1.12 (+0.57). The ensemble achieved63.18 % (-1.52 %). As expected, +the training time per epoch was reduced. For the GTX 980, it was reduced from22.0 s of +the baseline model to15 s of the model with one less convolutional layer, one less Batch +Normalization and one less activation layer. The inference time was also reduced from6 ms 5.8. Batch Normalization -to4 msfor 1 image and from 32 msto23 msfor 128 images. Due to the loss in accuracy of +to 4 ms for 1 image and from32 ms to 23 ms for 128 images. Due to the loss in accuracy of more then one percentage point of the mean model and the increased standard deviation of -the models performance, at least two convolutional layers are on the 32 px×32 pxfeature +the models performance, at least two convolutional layers are on the32 px×32 px feature map scale are recommendable for CIFAR-100. Changing the baseline to have less filters but more layers is another option. This was tried -for the first block at the 32 px×32 pxfeature map scale. The two convolutional layers +for the first block at the32 px×32 px feature map scale. The two convolutional layers (layers 1 – 4 in Page 39) were replaced by two convolutional layers with 27 filters and one -convolutional layer with 26 filters in the convolution - BN - ELU pattern. The model -has944 132parameters. Compared to the baseline model, the time for inference was the +convolutional layer with 26 filters in theconvolution - BN - ELU pattern. The model +has 944 132 parameters. Compared to the baseline model, the time for inference was the same. This is unexpected, because the inference time changed when a layer was removed at -this scale. The mean test accuracy was 63.66 %(+0.28) and the standard deviation was -σ= 1.03(+0.48). The ensemble achieved 64.91 %test accuracy (+0.21). +this scale. The mean test accuracy was63.66 % (+0.28) and the standard deviation was +σ= 1.03 (+0.48). The ensemble achieved64.91 % test accuracy (+0.21). Having two nonlinearities at each feature map scale could be important to learn nonlinear transformations at that scale. As the baseline model does only have one nonlinearity at the -8×8feature maps scale, another convolutional layer with 64 filters, Batch Normalization +8 ×8 feature maps scale, another convolutional layer with 64 filters, Batch Normalization and ELU was added. To keep the number of parameters constant, layer 11 of the baseline model was reduced from 512 filters to 488 filters. The new model achieves a mean accuracy -of63.09 %(-0.29) with a standard deviation of σ= 0.70(+0.15). The ensemble achieves -an accuracy of 64.39 %(+0.31). This could indicate that having two convolutional layers +of 63.09 % (-0.29) with a standard deviation ofσ= 0.70 (+0.15). The ensemble achieves +an accuracy of64.39 % (+0.31). This could indicate that having two convolutional layers is more important for layers close to the input than intermediate layer. Alternatively, the parameters could be more important in layer 11 than having a new convolutional layer after layer 9. In order to control the hypothesis that having two convolutional layers are less important in -the middle of a network, the second convolutional layer at the 16×16feature map scale is +the middle of a network, the second convolutional layer at the16 ×16 feature map scale is removed. The first convolutional layer was increased from 32 filters to 59 filters, the second convolutional layer was increased from 32 filter s to 58 filters in order to keep the amount of -parameters of the model constant. The adjusted model achieved 62.72 %(-0.66) mean test -accuracy with a standard deviation of σ= 0.84(+0.29). The ensemble achieved 63.88 % +parameters of the model constant. The adjusted model achieved62.72 % (-0.66) mean test +accuracy with a standard deviation ofσ= 0.84 (+0.29). The ensemble achieved63.88 % test accuracy (-0.66). -Even more extreme, if both convolutional layers are removed from the 16×16feature map -scale, the mean test accuracy drops to 61.21 %(-2.17) with a standard deviation of σ= 0.51 -(-0.04). The ensemble achieves a test accuracy of 63.07 %(-1.63). Thus it is very important +Even more extreme, if both convolutional layers are removed from the16 ×16 feature map +scale, the mean test accuracy drops to61.21 % (-2.17) with a standard deviation ofσ= 0.51 +(-0.04). The ensemble achieves a test accuracy of63.07 % (-1.63). Thus it is very important to have at least one convolutional layer at this feature map scale. 5.8. Batch Normalization In [CUH15], the authors write that Batch Normalization does not improve ELU networks. @@ -1787,31 +1917,31 @@ Hence the effect of removing Batch Normalization from the baseline is investigat 5. Experimental Evaluation experiment. -As before, 10 models are trained on CIFAR-100. The training setup and the model mno-bn -are identical to the baseline model m, except that in mno-bnthe Batch Normalization layers +As before, 10 models are trained on CIFAR-100. The training setup and the modelmno-bn +are identical to the baseline modelm, except that inmno-bn the Batch Normalization layers are removed. -One notable difference is the training time: While mneeds 21 msper epoch in average on -a GTX 980, mno-bnonly needs 21 msper epoch. The number of epochs used for training, +One notable difference is the training time: Whilem needs 21 ms per epoch in average on +a GTX 980,mno-bn only needs21 ms per epoch. The number of epochs used for training, however, also increased noticeably from 149 epochs to 178 epochs in average. The standard -deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs for mno-bn. -The mean accuracy of mno-bnis62.86 %and hence 0.52 percentage points worse. The +deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs formno-bn. +The mean accuracy ofmno-bn is 62.86 % and hence 0.52 percentage points worse. The standard deviation between models increased from 0.55 to 0.61. This is likely a result of the early stopping policy and the differences in training epochs. This can potentially be fixed by retraining the models which stopped earlier than the model which was trained for the -biggest amount of epochs. The ensemble test accuracy is 63.88 %and hence 0.82 percentage +biggest amount of epochs. The ensemble test accuracy is63.88 % and hence 0.82 percentage points worse than the baseline. The filter weight range and distribution is approximately the same as Figure 5.6 and Figure 5.2, but the distribution of bias weights changed noticeably: While the bias weights of the baseline are spread out in the first layer and much more concentrated in subsequent layers (see Figure 5.3), the model without Batch Normalization has rather concentrated weights in the first layers and only the bias weights of the last layer is spread out (see Figure A.2). -Another model m′ -no-bnwhich has one more filter in the convolutional layer 1, 3, 5, and 7 to +Another modelm′ +no-bn which has one more filter in the convolutional layer 1, 3, 5, and 7 to compensate for the loss of parameters in Batch Normalization. The mean test accuracy of -10 such models is 62.87 %which is 0.51 percentage points worse than the baseline. The -ensemble of m′ -no-bnachieves 64.33 %which is 0.37 percentage points worse than the baseline. -The mean training time was 14 sper epoch and 157.4 epochs with a standard deviation of +10 such models is62.87 % which is 0.51 percentage points worse than the baseline. The +ensemble ofm′ +no-bn achieves64.33 % which is 0.37 percentage points worse than the baseline. +The mean training time was14 s per epoch and 157.4 epochs with a standard deviation of 20.7 epochs. Hence it is not advisable to remove Batch Normalization for the final model. It could, however, be possible to remove Batch Normalization for the experiments to iterate quicker @@ -1820,75 +1950,75 @@ Batch Normalization. 5.9. Batch size 5.9. Batch size -The mini-batch size m∈N≥1influences -•Epochs until convergence : The smaller m, the more often the model is updated +The mini-batch sizem∈N≥1 influences +• Epochs until convergence: The smallerm, the more often the model is updated in one epoch. Those updates, however, are based on fewer samples of the dataset. Hence the gradients of different mini-batches can noticeably differ. In the literature, this is referred to as gradient noise [KMN+16]. -•Training time per epoch : The smaller the batch size, the higher the training time +• Training time per epoch: The smaller the batch size, the higher the training time per epoch as the hardware is not optimally utilized. -•Resulting model quality : The choice of the hyperparameter minfluences the -accuracy of the classifier when training is finished. [ KMN+16] supports the view that -smallermresult in less sharp minima. Hence smaller mlead to better generalization. +• Resulting model quality: The choice of the hyperparameter m influences the +accuracy of the classifier when training is finished. [KMN+16] supports the view that +smaller m result in less sharp minima. Hence smallerm lead to better generalization. Empiric evaluation results can be found in Table 5.9. Those results confirm the claim of [KMN+16] that lower batch sizes generalize better. -mTrainingEpochsMean total Single model Ensemble +m Training Epochs Mean total Single model Ensemble time training time Accuracy std Accuracy -8118s -epoch81–153 14 131 s 61 .93 %σ= 1.03 65.68 % -16 62s -epoch103 – 173 8349 s 64.16 %σ= 0.81 66.98 % -32 35s -epoch119 – 179 5171 s 64 .11 %σ= 0.75 65.89 % -64 25s -epoch133 – 195 2892 s 63.38 %σ= 0.55 64.70 % -128 18s -epoch145 – 239 3126 s 62 .23 %σ= 0.73 63.55 % -Table 5.9.: Trainingtimeperepochandsinglemodeltestsetaccuracy(meanandstandarddeviation) -of baseline models trained with different mini-batch sizes mon GTX 970 GPUs on +8 118 s +epoch 81 – 153 14 131 s 61 .93 % σ= 1.03 65 .68 % +16 62 s +epoch 103 – 173 8349 s 64.16 % σ= 0.81 66.98 % +32 35 s +epoch 119 – 179 5171 s 64 .11 % σ= 0.75 65 .89 % +64 25 s +epoch 133 – 195 2892 s 63.38 % σ= 0.55 64.70 % +128 18 s +epoch 145 – 239 3126 s 62 .23 % σ= 0.73 63 .55 % +Table 5.9.:Training time per epoch and single model test set accuracy (mean and standard deviation) +of baseline models trained with different mini-batch sizesm on GTX 970 GPUs on CIFAR-100. 5.10. Bias Figure 5.3 suggests that the bias is not important for the layers 11, 13 and 15. Hence a -modelmno-biasis created which is identical to the baseline model m, except that the bias of +model mno-bias is created which is identical to the baseline modelm, except that the bias of layers 11, 13 and 15 is removed. -The mean test accuracy of 10 trained mno-biasis63.74 %which is an improvement of -0.36 percentage points over the baseline. The ensemble achieves a test accuracy of 65.13 % +The mean test accuracy of 10 trainedmno-bias is 63.74 % which is an improvement of +0.36 percentage points over the baseline. The ensemble achieves a test accuracy of65.13 % which is 0.43 percentage points better than the baseline. Hence the bias can safely be removed. Removing the biases did not have a noticeable effect on the filter weight range, the filter -weight distribution or the distribution of the remaining biases. Also, the γandβparameters +weight distribution or the distribution of the remaining biases. Also, theγ and β parameters of the Batch Normalization layers did not noticeably change. 5. Experimental Evaluation 5.11. Learned Color Space Transformation -In [MSM16] it is described that placing one convolutional layer with 10 filters of size 1×1 -directly after the input and then another convolutional layer with 3 filters of size 1×1acts +In [MSM16] it is described that placing one convolutional layer with 10 filters of size1 ×1 +directly after the input and then another convolutional layer with 3 filters of size1 ×1 acts as a learned transformation in another color space and boosts the accuracy. This approach was evaluated on CIFAR-100 by adding a convolutional layer with ELU activation and 10 filters followed by another convolutional layer with ELU activation and -3 filters. The mean accuracy of 10 models was 63.31 %with a standard deviation of 1.37. +3 filters. The mean accuracy of 10 models was63.31 % with a standard deviation of 1.37. The standard deviation is noticeable higher than the standard deviation of the baseline model (0.55) and the accuracy also decreased by 0.07 percentage points. The accuracy of -the ensemble is at 64.77 %and hence 0.07 percentage points higher than the accuracy of +the ensemble is at64.77 % and hence 0.07 percentage points higher than the accuracy of the baseline models. The inference time for 1 image and for 128 images did not change compared to the baseline. -The training time per epoch increased from 26 sto30 son the GTX 970. +The training time per epoch increased from26 s to 30 s on the GTX 970. Hence it is not advisable to use the learned color space transformation. 5.12. Pooling -An alternative to max pooling with stride 2 with a 2×2kernel is using a 3×3kernel with +An alternative to max pooling with stride 2 with a2 ×2 kernel is using a3 ×3 kernel with stride 2. This approach was evaluated on CIFAR-100 by replacing all max pooling layers with the -3×3kernel max pooling (and SAMEpadding). The mean accuracy of 10 models was 63.32 % -(−0.06) and the standard deviation was 0.57 ( +0.02). The ensemble achieved 65.15 %test -accuracy ( +0.45). -The training time per epoch decreased from 20.5 s-21.1 sto18.6 s(mean of 10 training runs) -on the Nvidia GTX 970. The time for inference increased from 25 msto26 msfor a batch +3 ×3 kernel max pooling (andSAME padding). The mean accuracy of 10 models was63.32 % +(−0.06) and the standard deviation was 0.57 (+0.02). The ensemble achieved65.15 % test +accuracy (+0.45). +The training time per epoch decreased from20.5 s-21.1 s to 18.6 s (mean of 10 training runs) +on the Nvidia GTX 970. The time for inference increased from25 ms to 26 ms for a batch of 128 images. 5.13. Activation Functions -Nonlinear, differentiableactivationfunctionsareimportantforneuralnetworkstoallowthem +Nonlinear, differentiable activation functions are important for neural networks to allow them to learn nonlinear decision boundaries. One of the simplest and most widely used activation -functions for CNNs is ReLU [ KSH12], but others such as ELU [ CUH15], parametrized -rectified linear unit (PReLU) [ HZRS15b ], softplus [ ZYL+15] and softsign [ BDLB09 ] have +functions for CNNs is ReLU [KSH12], but others such as ELU [CUH15], parametrized +rectified linear unit (PReLU) [HZRS15b], softplus [ZYL+15] and softsign [BDLB09] have been proposed. The baseline uses ELU. 5.13. Activation Functions @@ -1896,16 +2026,16 @@ Activation functions differ in the range of values and the derivative. The defin other comparisons of eleven activation functions are given in Table B.3. Theoretical explanations why one activation function is preferable to another in some scenarios are the following: -•Vanishing Gradient : Activation functions like tanh and the logistic function saturate - outside of the interval [−5,5]. This means weight updates are very small for +• Vanishing Gradient: Activation functions like tanh and the logistic function saturate + outside of the interval[−5,5]. This means weight updates are very small for preceding neurons, which is especially a problem for very deep or recurrent networks as -described in [ BSF94]. Even if the neurons learn eventually, learning is slower [ KSH12]. -•Dying ReLU : The dying ReLU problem is similar to the vanishing gradient problem. +described in [BSF94]. Even if the neurons learn eventually, learning is slower [KSH12]. +• Dying ReLU: The dying ReLU problem is similar to the vanishing gradient problem. The gradient of the ReLU function is 0 for all non-positive values. This means if all elements of the training set lead to a negative input for one neuron at any point in the training process, this neuron does not get any update and hence does not participate in the training process. This problem is addressed in [MHN13]. -•Mean unit activation : Some publications like [ CUH15,IS15] claim that mean +• Mean unit activation: Some publications like [CUH15, IS15] claim that mean unit activations close to 0 are desirable. They claim that this speeds up learning by reducing the bias shift effect. The speedup of learning is supported by many experiments. Hence the possibility of negative activations is desirable. @@ -1918,14 +2048,14 @@ tanh and softplus performed worse than the identity and it is unclear why the pu network performed so much better than the logistic function. One hypothesis why the logistic function performs so bad is that it cannot produce negative outputs. Hence the logistic−function was developed: -logistic−(x) =1 -1 +e−x−0.5 +logistic−(x) = 1 +1 + e−x −0.5 The logistic−function has the same derivative as the logistic function and hence still suffers from the vanishing gradient problem. The network with the logistic−function achieves an -accuracy which is 11.30 %better than the network with the logistic function, but is still -5.54 %worse than the ELU. +accuracy which is11.30 % better than the network with the logistic function, but is still +5.54 % worse than the ELU. Similarly, ReLU was adjusted to have a negative output: -ReLU−(x) = max(−1,x) =ReLU (x+ 1)−1 +ReLU−(x) = max(−1,x) = ReLU(x+ 1) −1 The results of ReLU−are much worse on the training set, but perform similar on the test 5. Experimental Evaluation @@ -1935,16 +2065,18 @@ This contradicts [GBB11, SMGS14]. A key difference between the logistic−function and ELU is that ELU does neither suffers from the vanishing gradient problem nor is its range of values bound. For this reason, the S2ReLU activation function, defined as -S2ReLU (x) =ReLU (x -2+ 1)−ReLU (−x -2+ 1) = +S2ReLU(x) = ReLU(x +2 + 1) −ReLU(−x +2 + 1) = +  -−x -2+ 1ifx≤−2 -x if−2≤x≤2 + +−x +2 + 1 if x≤−2 +x if −2 ≤x≤2 x -2+ 1ifx>−2 -This function is similar to SReLUs as introduced in [ JXF+16]. The difference is that S2ReLU +2 + 1 if x> −2 +This function is similar to SReLUs as introduced in [JXF+16]. The difference is that S2ReLU does not introduce learnable parameters. The S2ReLU was designed to be symmetric, be the identity close to zero and have a smaller absolute value than the identity farther away. It is easy to compute and easy to implement. @@ -1956,11 +2088,11 @@ test accuracy. Function Vanishing Gradient Negative Activation possible Bound activation Identity No Yes No Logistic Yes No Yes -Logistic−Yes Yes Yes +Logistic− Yes Yes Yes Softmax Yes Yes Yes tanh Yes Yes Yes Softsign Yes Yes Yes -ReLU Yes1No Half-sided +ReLU Yes1 No Half-sided Softplus No No Half-sided S2ReLU No Yes No LReLU/PReLU No Yes No @@ -1969,189 +2101,201 @@ Table 5.10.: Properties of activation functions. 1The dying ReLU problem is similar to the vanishing gradient problem. 5.13. Activation Functions -FunctionSingle model Ensemble of 10 +Function Single model Ensemble of 10 Training set Test set Training set Test set -Identity 66.25 %σ= 0.77 56.74 %σ= 0.51 68.77 % 58 .78 % -Logistic 51.87 %σ= 3.64 46.54 %σ= 3.22 61.19 % 54 .58 % -Logistic−66.49 %σ= 1.99 57.84 %σ= 1.15 69.04 % 60 .10 % -Softmax 75.22 %σ= 2.41 59.49 %σ= 1.25 78.87 % 63 .06 % -Tanh 67.27 %σ= 2.38 55.70 %σ= 1.44 70.21 % 58 .10 % -Softsign 66.43 %σ= 1.74 55.75 %σ= 0.93 69.78 % 58 .40 % -ReLU 78.62 %σ= 2.15 62.18 %σ= 0.99 81.81 % 64 .57 % -ReLU−76.01 %σ= 2.31 62.87 %σ= 1.08 78.18 % 64 .81 % -Softplus 66.75 %σ= 2.45 56.68 %σ= 1.32 71.27 % 60 .26 % -S2ReLU 63.32 %σ= 1.69 56.99 %σ= 1.14 65.80 % 59 .20 % -LReLU 74.92 %σ= 2.49 61.86 %σ= 1.23 77.67 % 64 .01 % -PReLU 80.01 %σ= 2.03 62.16 %σ= 0.73 83.50 % 64.79 % -ELU 76.64 %σ= 1.48 63.38 %σ= 0.55 78.30 % 64 .70 % -Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation -functions on CIFAR-100. For LReLU, α= 0.3was chosen. -FunctionInference per TrainingEpochsMean total +Identity 66.25 % σ= 0.77 56.74 % σ= 0.51 68 .77 % 58 .78 % +Logistic 51.87 % σ= 3.64 46 .54 % σ= 3.22 61 .19 % 54 .58 % +Logistic− 66.49 % σ= 1.99 57 .84 % σ= 1.15 69 .04 % 60 .10 % +Softmax 75.22 % σ= 2.41 59 .49 % σ= 1.25 78 .87 % 63 .06 % +Tanh 67.27 % σ= 2.38 55 .70 % σ= 1.44 70 .21 % 58 .10 % +Softsign 66.43 % σ= 1.74 55 .75 % σ= 0.93 69 .78 % 58 .40 % +ReLU 78.62 % σ= 2.15 62 .18 % σ= 0.99 81 .81 % 64 .57 % +ReLU− 76.01 % σ= 2.31 62 .87 % σ= 1.08 78 .18 % 64 .81 % +Softplus 66.75 % σ= 2.45 56 .68 % σ= 1.32 71 .27 % 60 .26 % +S2ReLU 63.32 % σ= 1.69 56 .99 % σ= 1.14 65 .80 % 59 .20 % +LReLU 74.92 % σ= 2.49 61 .86 % σ= 1.23 77 .67 % 64 .01 % +PReLU 80.01 % σ= 2.03 62 .16 % σ= 0.73 83.50 % 64 .79 % +ELU 76.64 % σ= 1.48 63.38 % σ= 0.55 78 .30 % 64 .70 % +Table 5.11.:Training and test accuracy of adjusted baseline models trained with different activation +functions on CIFAR-100. For LReLU,α= 0.3 was chosen. +Function Inference per Training Epochs Mean total 1 Image 128 time training time -Identity 8 ms 42 ms 31s -epoch108 –148 3629 s -Logistic 6 ms 31 ms 24s -epoch101– 167 2234 s -Logistic−6 ms 31 ms 22s -epoch133 – 255 3421 s -Softmax 7 ms 37 ms 33s -epoch127 – 248 5250 s -Tanh 6 ms 31 ms 23s -epoch125 – 211 3141 s -Softsign 6 ms 31 ms 23s -epoch122 – 205 3505 s -ReLU 6 ms 31 ms 23s -epoch118 – 192 3449 s -Softplus 6 ms 31 ms 24s -epoch101– 165 2718 s -S2ReLU 5 ms 32 ms 26s -epoch108 – 209 3231 s -LReLU 7 ms 34 ms 25s -epoch109 – 198 3388 s -PReLU 7 ms 34 ms 28s -epoch131 – 215 3970 s -ELU 6 ms 31 ms 23s -epoch146 – 232 3692 s -Table 5.12.: Training time and inference time of adjusted baseline models trained with different +Identity 8 ms 42 ms 31 s +epoch 108 –148 3629 s +Logistic 6 ms 31 ms 24 s +epoch 101 – 167 2234 s +Logistic− 6 ms 31 ms 22 s +epoch 133 – 255 3421 s +Softmax 7 ms 37 ms 33 s +epoch 127 – 248 5250 s +Tanh 6 ms 31 ms 23 s +epoch 125 – 211 3141 s +Softsign 6 ms 31 ms 23 s +epoch 122 – 205 3505 s +ReLU 6 ms 31 ms 23 s +epoch 118 – 192 3449 s +Softplus 6 ms 31 ms 24 s +epoch 101 – 165 2718 s +S2ReLU 5 ms 32 ms 26 s +epoch 108 – 209 3231 s +LReLU 7 ms 34 ms 25 s +epoch 109 – 198 3388 s +PReLU 7 ms 34 ms 28 s +epoch 131 – 215 3970 s +ELU 6 ms 31 ms 23 s +epoch 146 – 232 3692 s +Table 5.12.:Training time and inference time of adjusted baseline models trained with different activation functions on GTX 970 GPUs on CIFAR-100. It was expected that the identity is the fastest function. This result is likely an implementation specific problem of Keras 2.0.4 or Tensorflow 1.1.0. 5. Experimental Evaluation -FunctionSingle model Ensemble Epochs +Function Single model Ensemble Epochs Accuracy std Accuracy Range Mean -Identity 99.45 %σ= 0.09 99.63 %55 – 77 62.2 -Logistic 97.27 %σ= 2.10 99.48 %37– 7654.5 -Softmax 99.60 %σ= 0.03 99.63 %44 – 73 55.6 -Tanh 99.40 %σ= 0.09 99.57 %56 – 80 67.6 -Softsign 99.40 %σ= 0.08 99.57 %72 – 101 84.0 -ReLU 99.62 %σ= 0.04 99.73 %51 – 94 71.7 -Softplus 99.52 %σ= 0.05 99.62 %62 –7068.9 -PReLU 99.57 %σ= 0.07 99.73 %44 – 89 71.2 -ELU 99.53 %σ= 0.06 99.58 %45 – 111 72.5 -Table 5.13.: Test accuracy of adjusted baseline models trained with different activation functions +Identity 99.45 % σ= 0.09 99 .63 % 55 – 77 62.2 +Logistic 97.27 % σ= 2.10 99 .48 % 37 – 76 54.5 +Softmax 99.60 % σ= 0.03 99.63 % 44 – 73 55.6 +Tanh 99.40 % σ= 0.09 99 .57 % 56 – 80 67.6 +Softsign 99.40 % σ= 0.08 99 .57 % 72 – 101 84.0 +ReLU 99.62 % σ= 0.04 99.73 % 51 – 94 71.7 +Softplus 99.52 % σ= 0.05 99 .62 % 62 – 70 68.9 +PReLU 99.57 % σ= 0.07 99.73 % 44 – 89 71.2 +ELU 99.53 % σ= 0.06 99 .58 % 45 – 111 72.5 +Table 5.13.:Test accuracy of adjusted baseline models trained with different activation functions on MNIST. 5.14. Label smoothing -Ensembles consisting of nmodels trained by the same procedure on the same data but +Ensembles consisting ofn models trained by the same procedure on the same data but initialized with different weights and trained with a different order of the training data perform consistently better than single models. One drawback of ensembles in applications -such as self-driving cars is that they increase the computation by a factor of n. One idea +such as self-driving cars is that they increase the computation by a factor ofn. One idea why they improve the test accuracy is by reducing the variance. The idea of label smoothing is to use the ensemble prediction of the training data as labels -for another classifier. For every element xof the training set, the one-hot encoded target -t(x)is smoothed by the ensemble prediction yE(x) -t′(x) =α·t(x) + (1−α)yE(x) -whereα∈[0,1]is the smoothing factor. +for another classifier. For every elementx of the training set, the one-hot encoded target +t(x) is smoothed by the ensemble predictionyE(x) +t′(x) = α·t(x) + (1−α)yE(x) +where α∈[0,1] is the smoothing factor. There are three reasons why label smoothing could be beneficial: -•Training speed : The ensemble prediction contains more information about the +• Training speed: The ensemble prediction contains more information about the image than binary class decisions. Classifiers in computer vision predict how similar the input looks to other input of the classes they are trained on. By smoothing the labels, the information that one image could also belong to another class is passed to the optimizer. In early stages of the optimization this could lead to a lower loss on the non-smoothed validation set. -•Higher accuracy : Using smoothed labels for the optimization could lead to a higher +• Higher accuracy: Using smoothed labels for the optimization could lead to a higher accuracy of the base-classifier due to a smoothed error surface. It might be less likely 5.14. Label smoothing that the classifier gets into bad local minima. -•Label noise : Depending on the way how the labels are obtained, it might not always +• Label noise: Depending on the way how the labels are obtained, it might not always be clear which label is the correct one. Also, labeling errors can be present in training datasets. Those errors severely harm the training. By smoothing the labels errors could be relaxed. -10 models msmoothare trained with the α= 0.5smoothed labels from the prediction +10 models msmooth are trained with the α = 0 .5 smoothed labels from the prediction of an ensemble of 10 baseline models. The mean accuracy of the models trained on the -smoothed training set labels was 63.61 %(+0.23 %) and the standard deviation was σ= 0.72 -(+0.17 %). The ensemble of 10 msmoothmodels achieved 64.79 %accuracy ( +0.09 %). Hence +smoothed training set labels was63.61 % (+0.23 %) and the standard deviation wasσ= 0.72 +(+0.17 %). The ensemble of 10msmooth models achieved64.79 % accuracy (+0.09 %). Hence the effect of this kind of label smoothing on the final accuracy is questionable. The training speed didn’t noticeably change either: The number of trained epochs ranged from 144 to 205, the mean number of epochs was 177. The baseline training ranged from 146 to 232 epochs with a mean of 174 epochs. After 10, 30 and 80 epochs both training methods accuracy differed by less than one percentage point. Hence it is unlikely that label smoothing has a positive effect on the training speed. -Hinton et al. called this method distillation in [HVD15]. Hinton et al. used smooth and +Hinton et al. called this methoddistillation in [HVD15]. Hinton et al. used smooth and hard labels for training, this work only used smoothed labels. 5. Experimental Evaluation 5.15. Optimized Classifier In comparison to the baseline classifier, the following changes are applied to the optimized classifier: -•Remove the bias for the last layers : For all layers which output a 1×1feature +• Remove the bias for the last layers: For all layers which output a1 ×1 feature map, the bias is removed -•Increase the max pooling kernel to 3×3 -•More filters in the first layers +• Increase the max pooling kernel to3 ×3 +• More filters in the first layers The detailed architecture is given in Table 5.14 and visualized in Figure 5.16. The evaluation is given in Table 5.15 and the timing comparison is given in Table 5.16. # Type Filters @ -Patch size / strideParameters FLOPs Output size -Input 0 0 3@32 ×32 -1 Convolution 69@ 3×3×3/1 1932 3744768 69@32×32 -2 BN + ELU 138 353418 69@32×32 -3 Convolution 69@ 3×3×32/1 42918 37684096 69@32×32 -4 BN + ELU 138 353418 69@32×32 -Max pooling 2×2/2 0 40960 32@16 ×16 -5 Convolution 64@ 3×3×32/1 39808 20332544 64@16 ×16 -6 BN + ELU 128 82048 64@16 ×16 -7 Convolution 64@ 3×3×64/1 36928 18 857 984 64@16×16 -8 BN + ELU 128 82048 64@16 ×16 -Max pooling 2×2/2 20480 64@ 8 ×8 -9 Convolution 64@ 3×3×64/1 36928 4714496 64@ 8 ×8 -10 BN + ELU 128 20608 64@ 8 ×8 -Max pooling 2×2/2 5120 64@ 4 ×4 -11 Convolution (v) 512@ 4×4×64/1 524 288 1048064 512@ 1 ×1 -12 BN + ELU 1024 3584 512@ 1 ×1 -Dropout 0.5 0 0 512@ 1 ×1 -13 Convolution 512@ 1×1×512/1 262144 523776 512@ 1 ×1 -14 BN + ELU 1024 3584 512@ 1 ×1 -Dropout 0.5 0 0 512@ 1 ×1 -15 Convolution k @ 1×1×512/1 512·k 512·kk @ 1×1 -Global avg Pooling 1×1 0k k @ 1×1 -16 BN + Softmax 2k 7k k @ 1×1 +Patch size / stride +Parameters FLOPs Output size +Input 0 0 3@32 × 32 +1 Convolution 69@ 3 ×3 ×3 /1 1932 3744768 69 @32 × 32 +2 BN + ELU 138 353418 69 @32 × 32 +3 Convolution 69@ 3 ×3 ×32 /1 42918 37684096 69 @32 × 32 +4 BN + ELU 138 353418 69 @32 × 32 +Max pooling 2 ×2 /2 0 40960 32@16 × 16 +5 Convolution 64@ 3 ×3 ×32 /1 39808 20332544 64@16 × 16 +6 BN + ELU 128 82048 64@16 × 16 +7 Convolution 64@ 3 ×3 ×64 /1 36928 18 857 984 64@16 × 16 +8 BN + ELU 128 82048 64@16 × 16 +Max pooling 2 ×2 /2 20480 64@ 8 × 8 +9 Convolution 64@ 3 ×3 ×64 /1 36928 4714496 64@ 8 × 8 +10 BN + ELU 128 20608 64@ 8 × 8 +Max pooling 2 ×2 /2 5120 64@ 4 × 4 +11 Convolution (v) 512@ 4 ×4 ×64 /1 524 288 1048064 512@ 1 × 1 +12 BN + ELU 1024 3584 512@ 1 × 1 +Dropout 0.5 0 0 512@ 1 × 1 +13 Convolution 512@ 1 ×1 ×512 /1 262144 523776 512@ 1 × 1 +14 BN + ELU 1024 3584 512@ 1 × 1 +Dropout 0.5 0 0 512@ 1 × 1 +15 Convolution k @ 1 ×1 ×512 /1 512 ·k 512 ·k k @ 1 × 1 +Global avg Pooling 1 ×1 0 k k @ 1 × 1 +16 BN + Softmax 2k 7k k @ 1 × 1 ∑ 514k -+947654520k -+87870996179200+ 2k -Table 5.14.: Optimized architecture with 3 input channels of size 32×32. All convolutional layers -useSAMEpadding, except for layer 11 which used VALIDpadding in order to decrease -the feature map size to 1×1. If the input feature map is bigger than 32×32, for each -power of two there are two Convolution + BN + ELU blocks and one Max pooling ++947654 +520k ++87870996 179200+2k +Table 5.14.:Optimized architecture with 3 input channels of size32 ×32. All convolutional layers +use SAME padding, except for layer 11 which usedVALID padding in order to decrease +the feature map size to1 ×1. If the input feature map is bigger than32 ×32, for each +power of two there are twoConvolution + BN + ELU blocks and oneMax pooling block added. This is the framed part in the table. -5.15. Optimized Classifier32×32Input +5.15. Optimized Classifier +32×32 +Input C69@3×3/1 BN + ELU C69@3×3/1 -BN + ELU16×16max pooling 3×3/2 +BN + ELU +16×16 +max pooling3×3/2 C64@3×3/1 BN + ELU C64@3×3/1 -BN + ELU8×8max pooling 3×3/2 +BN + ELU +8×8 +max pooling3×3/2 C64@3×3/1 -BN + ELU4×4max pooling 3×3/2 +BN + ELU +4×4 +max pooling3×3/2 C*512@4×4/1(V) BN + ELU -Dropout,p= 0.51×1C*512@1×1/1 +Dropout,p= 0.5 +1×1 +C*512@1×1/1 BN + ELU Dropout,p= 0.5 C*k@1×1/1 Global AVG pooling BN + Softmax -Figure 5.16.: Architecture of the optimized model. C32@3×3/1is a convolutional layer with -32 filters of kernel size 3×3with stride 1. The * indicates that no bias is used. -DatasetSingle Model Accuracy Ensemble of 10 +Figure 5.16.:Architecture of the optimized model. C 32@3 ×3/1 is a convolutional layer with +32 filters of kernel size3 ×3 with stride 1. The * indicates that no bias is used. +Dataset Single Model Accuracy Ensemble of 10 Training Set Test Set Training Set Test Set -Asirra 95.83 %σ= 4.70 90.75 %σ= 4.73 98 .78 % 93.09 % -CIFAR-10 94.58 %σ= 0.70 87.92 %σ= 0.46 96 .47 % 89.86 % -CIFAR-100 77.96 %σ= 2.18 64.42 %σ= 0.73 81 .44 % 67.03 % -GTSRB 100.00 %σ= 0.00 99.28 %σ= 0.10 100 .00 % 99.51 % -HASYv2 88.79 %σ= 0.45 85.36 %σ= 0.15 89 .36 % 85.92 % -MNIST 99.88 %σ= 0.10 99.48 %σ= 0.13 99 .99 % 99.67 % -STL-10 95.43 %σ= 3.57 75.09 %σ= 2.39 98 .54 % 78.66 % -SVHN 99.08 %σ= 0.07 96.37 %σ= 0.12 99 .50 % 97.47 % -Table 5.15.: Optimized model accuracy on eight datasets. The single model actuary is the 10 models -used in the ensemble. The empirical standard deviation σof the accuracy is also given. +Asirra 95.83 % σ= 4.70 90 .75 % σ= 4.73 98 .78 % 93 .09 % +CIFAR-10 94.58 % σ= 0.70 87 .92 % σ= 0.46 96 .47 % 89 .86 % +CIFAR-100 77.96 % σ= 2.18 64 .42 % σ= 0.73 81 .44 % 67 .03 % +GTSRB 100.00 % σ= 0.00 99 .28 % σ= 0.10 100 .00 % 99 .51 % +HASYv2 88.79 % σ= 0.45 85 .36 % σ= 0.15 89 .36 % 85 .92 % +MNIST 99.88 % σ= 0.10 99 .48 % σ= 0.13 99 .99 % 99 .67 % +STL-10 95.43 % σ= 3.57 75 .09 % σ= 2.39 98 .54 % 78 .66 % +SVHN 99.08 % σ= 0.07 96 .37 % σ= 0.12 99 .50 % 97 .47 % +Table 5.15.:Optimized model accuracy on eight datasets. The single model actuary is the 10 models +used in the ensemble. The empirical standard deviationσ of the accuracy is also given. CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the models uses unlabeled data or data from other datasets. For MNIST, GTSRB, SVHN and HASY, no test time transformations are used. -Network GPU TensorflowInference per Training +Network GPU Tensorflow Inference per Training 1 Image 128 images time / epoch Optimized Default Intel i7-4930K 5 ms 432 ms 386 s Optimized Optimized Intel i7-4930K 4 ms 307 ms 315 s @@ -2161,9 +2305,9 @@ Optimized Default GTX 980 3 ms 35 ms 27 s Optimized Default GTX 980 Ti 6 ms 36 ms 26 s Optimized Default GTX 1070 2 ms 24 ms 21 s Optimized Default Titan Black 4 ms 46 ms 43 s -Table 5.16.: Speed comparison of the optimized model on CIFAR-10. The baseline model is +Table 5.16.:Speed comparison of the optimized model on CIFAR-10. The baseline model is evaluated on six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken -from [Maj17]. Weights the baseline model can be found at [ Tho17b]. The optimized +from [Maj17]. Weights the baseline model can be found at [Tho17b]. The optimized Tensorflow build makes use of SSE4.X, AVX, AVX2 and FMA instructions. 5. Experimental Evaluation @@ -2172,7 +2316,7 @@ A separate validation set is necessary for two reasons: (1) Early stopping and ( overfitting due to many experiments. To prevent overfitting, a different dataset can be used. For example, all decisions about hyperparameters in this thesis are based on CIFAR-100, but the network is finally trained and evaluated with the same hyperparameters on all -datasets.2The validation set can hence be removed if early stopping is removed. Instead, +datasets.2 The validation set can hence be removed if early stopping is removed. Instead, the validation data is used in a first run to determine the number of epochs necessary for training. In a second training run the validation data is added to the training set. The number of used epochs for the second run is given in Table 5.17. @@ -2185,7 +2329,7 @@ HASYv2 92 136 116 369 369 GTSRB 97 35 288 43 821 STL-10 116 4500 10 450 CIFAR-100 155 45 000 100 450 -Table 5.17.: Mean number of training epochs for the optimized model. For comparison, the total +Table 5.17.:Mean number of training epochs for the optimized model. For comparison, the total amount of used training data, the number of classes of the dataset and the average amount of data per class is given. Alternatively, the model can be trained with early stopping (ES) purely on the training @@ -2197,7 +2341,7 @@ improve the results when the number of epochs is fixed, but notably improved the when the training loss was used as the early stopping criterion. 5.17. Regularization Stronger regularization might even improve the results when using the training loss as an -early stopping criterion. ℓ2regularization with a weighting factor of λ= 0.0001is used in +early stopping criterion.ℓ2 regularization with a weighting factor ofλ= 0.0001 is used in all other experiments. While the accuracy as shown in Table 5.19 does not show a clear pattern, the number of epochs increases with lower model regularization (see Table 5.20). 2Except data augmentation and test time transformations. @@ -2205,31 +2349,31 @@ pattern, the number of epochs increases with lower model regularization (see Tab 4Only 3 models are in this ensemble due to the long training time of more than 8 hours per model. 5.17. Regularization -DatasetEarly Stopping Fixed epochs +Dataset Early Stopping Fixed epochs val. acc train loss -Asirra 93.09 % 96.01 %396.01 % -CIFAR-10 89.86 % 91.75 % 88 .88 % -CIFAR-100 67.03 % 71.01 % 69 .08 % -HASYv2 85.92 % 82.89 %485.05 % -MNIST 99.67 % 99.64 % 99 .57 % -STL-10 78.66 % 83.25 % 78 .64 % -Table 5.18.: Comparisons of trained optimized models with early stopping on the validation accuracy +Asirra 93.09 % 96 .01 %3 96.01 % +CIFAR-10 89.86 % 91 .75 % 88 .88 % +CIFAR-100 67.03 % 71 .01 % 69 .08 % +HASYv2 85.92 % 82 .89 %4 85.05 % +MNIST 99.67 % 99 .64 % 99 .57 % +STL-10 78.66 % 83 .25 % 78 .64 % +Table 5.18.:Comparisons of trained optimized models with early stopping on the validation accuracy compared training setups without a validation set and thus more training data. The second column uses the training loss as a stopping criterion, the third column uses a fixed number of epochs which is equal to the mean number of training epochs of the models with early stopping on the validation set accuracy. -λSingle Model Accuracy Ensemble of 10 +λ Single Model Accuracy Ensemble of 10 Training Set Test Set Training Set Test Set -λ= 0.01 73.83 %σ= 1.78 58.94 %σ= 1.33 87 .78 % 69.98 % -λ= 0.001 82.86 %σ= 0.89 63.03 %σ= 0.67 91 .86 % 71.02 % -λ= 0.0001 77.96 %σ= 2.18 64.42 %σ= 0.73 81 .44 % 67.03 % -Table 5.19.: Different choices of ℓ2model regularization applied to the optimized model. +λ= 0.01 73 .83 % σ= 1.78 58 .94 % σ= 1.33 87 .78 % 69 .98 % +λ= 0.001 82 .86 % σ= 0.89 63 .03 % σ= 0.67 91 .86 % 71 .02 % +λ= 0.0001 77 .96 % σ= 2.18 64 .42 % σ= 0.73 81 .44 % 67 .03 % +Table 5.19.: Different choices ofℓ2 model regularization applied to the optimized model. λ min max mean std -λ= 0.01457 503 404.6 37.2 -λ= 0.001516 649 588.4 41.6 -λ= 0.0001579 833 696.1 79.1 -Table 5.20.: Training time in epochs of models with early stopping on training loss by different -choices ofℓ2model regularization applied to the optimized model. +λ= 0.01 457 503 404.6 37.2 +λ= 0.001 516 649 588.4 41.6 +λ= 0.0001 579 833 696.1 79.1 +Table 5.20.:Training time in epochs of models with early stopping on training loss by different +choices ofℓ2 model regularization applied to the optimized model. 5. Experimental Evaluation @@ -2240,16 +2384,16 @@ algorithms in Chapter 3. Confusion Matrix Ordering (CMO), originally developed as a method to make visualizations of confusion matrices easier to read (see Figure 5.13), was introduced as a class clustering algorithm in Chapter 4 and evaluated in Sections 4.2 and 5.4. The important insights are: -•Ordering the classes in the confusion matrix allows to display the relevant parts even +• Ordering the classes in the confusion matrix allows to display the relevant parts even for several hundred classes. -•A hierarchy of classifiers based on the classes does not improve the results on CIFAR100. +• A hierarchy of classifiers based on the classes does not improve the results on CIFAR100. There are three possible reasons for this: -–32 px×32 pxis too low dimensional -–100 classes are not enough for this approach -–More classes are always easier to distinguish if each new class comes with more +– 32 px×32 px is too low dimensional +– 100 classes are not enough for this approach +– More classes are always easier to distinguish if each new class comes with more data. One reason why this might be the case is that distinguishing the object from background has similar properties even for different classes. -•Label smoothing had only a minor effect on the accuracy and no effect on the training +• Label smoothing had only a minor effect on the accuracy and no effect on the training time when a single base classifier was used to train with the smoothed labels by an ensemble of base classifiers. A baseline model was defined and evaluated on eight publicly available datasets. The @@ -2257,57 +2401,57 @@ baselines topology and training setup are described in detail as well as its beh training and properties of the weights of the trained model. The influence of various hyperparameters is examined in Sections 5.5 to 5.12 for CIFAR-100. The insights of those experiments are: -•Averaging ensembles of 10 base classifiers of the same architecture and trained with the +• Averaging ensembles of 10 base classifiers of the same architecture and trained with the same setup consistently improve the accuracy. The amount of improvement depends on the base classifiers, but the ensemble tends to improve the test accuracy by about one percentage point. -•Wider networks learn in fewer epochs. This, however, does not mean that the +• Wider networks learn in fewer epochs. This, however, does not mean that the 6. Conclusion and Outlook wall-clock time is lower due to increased computation in forward- and backward passes. -•Batch Normalization increases the training time noticeably. For the described ELU +• Batch Normalization increases the training time noticeably. For the described ELU baseline model it also increases accuracy, which contradicts [CUH15]. -•The lower the batch size, the longer the time for each epoch of training and the less +• The lower the batch size, the longer the time for each epoch of training and the less epochs need to be trained. Higher accuracy by lower batch sizes was empirically confirmed. The batch size, however, can also be too low. -•An analysis of the weights of the baseline indicated that the bias of layers close to +• An analysis of the weights of the baseline indicated that the bias of layers close to the output layer can be removed. This was experimentally confirmed. -•It could not be confirmed that learned color space transformation, as described +• It could not be confirmed that learned color space transformation, as described in [MSM16], improves the network. Neither with ELU nor with leaky rectified linear -unit (LReLU) and α= 0.3. -•It could be confirmed that ELU networks gives better results than any other activation +unit (LReLU) andα= 0.3. +• It could be confirmed that ELU networks gives better results than any other activation function on CIFAR-100. For the character datasets MNIST and HASYv2, however, ReLU, LReLU, PReLU, Softplus and ELU all performed similar. -•Changing the activation functions to the identity had very little impact on the HASYv2 +• Changing the activation functions to the identity had very little impact on the HASYv2 and MNIST classifiers. Note that those networks are still able to learn nonlinear -decision boundaries due to max-pooling and SAMEpadding. For CIFAR-100, however, -the accuracy drops by 6.64 %when ELU is replaced by the identity. +decision boundaries due to max-pooling andSAME padding. For CIFAR-100, however, +the accuracy drops by6.64 % when ELU is replaced by the identity. Based on the results of those experiments, an optimized classifier was developed and evaluated on all eight datasets. -The state of the art of STL-10 was improved from 74.80 %[ZMGL15 ] to78.66 %without +The state of the art of STL-10 was improved from74.80 % [ZMGL15] to78.66 % without using the unlabeled part of the dataset. The state of the art of HASYv2 was improved -from 81.00 %[Tho17a] to85.92 %, for GTSRB the state of the art was improved from -99.46 %[SL11] to99.51 %, for Asirra it was improved from 82.7 %[Gol08] to93.09 %.1 +from 81.00 % [Tho17a] to 85.92 %, for GTSRB the state of the art was improved from +99.46 % [SL11] to 99.51 %, for Asirra it was improved from82.7 % [Gol08] to 93.09 %.1 This was mainly achieved by the combination of ELU, Dropout, ensembles, training data augmentation and test-time transformations. The removal of the bias of layers close to the -output and re-usage of those parameters in layers close to the input as well as using 3×3 -pooling instead of 2×2pooling improved the baseline. +output and re-usage of those parameters in layers close to the input as well as using3 ×3 +pooling instead of2 ×2 pooling improved the baseline. While writing this masters thesis, several related questions could not be answered: -•Deeper CNNs have generally higher accuracy, if trained long enough and if overfitting +• Deeper CNNs have generally higher accuracy, if trained long enough and if overfitting is not a problem. But at which subsampling-level does having more layers have the biggest effect? Can this question be answered before a deeper network is trained? -•Is label smoothing helpful for noisy labels? +• Is label smoothing helpful for noisy labels? 1The baseline is better than the optimized model on Asirra and on HASYv2. -•How does the choice of activation functions influence residual architectures? Could the +• How does the choice of activation functions influence residual architectures? Could the results be the same for different activation functions in architectures with hundreds of layers? -•The results for the pooling kernel were inconclusive. Larger pooling kernels might be +• The results for the pooling kernel were inconclusive. Larger pooling kernels might be advantageous as well as fractional max pooling [Gra15]. -•Why is the mean weight update (see Figure 5.8) not decreasing? Is this an effect that +• Why is the mean weight update (see Figure 5.8) not decreasing? Is this an effect that can and should be fixed? -•Why is softmax so much better than the logistic function? Can the reason be used to +• Why is softmax so much better than the logistic function? Can the reason be used to further improve ELU? Besides those questions, the influence of optimizers on time per epoch, epochs until convergence, total training time, memory consumption, accuracy of the models and standard @@ -2316,14 +2460,14 @@ might be crucial for the models quality. A. Figures, Tables and Algorithms -(a)Original image - (b)Smoothing filter - (c)Laplace edge detection filter -(d)Sobel edge detection filter - (e)Prewitt edge detection filter - (f)Canny filter +(a) Original image + (b) Smoothing filter + (c) Laplace edge detection filter +(d) Sobel edge detection filter + (e) Prewitt edge detection filter + (f) Canny filter Figure A.1.: Examples of image filters. Best viewed in electronic form. -Layer99-percentile interval +Layer 99-percentile interval filter bias 1 [-0.50, 0.48] [-0.06, 0.07] 3 [-0.21, 0.19] [-0.07, 0.07] @@ -2333,75 +2477,75 @@ filter bias 11 [-0.08, 0.08] [-0.00, 0.00] 13 [-0.08, 0.08] [-0.00, 0.00] 15 [-0.10, 0.11] [-0.01, 0.01] -Table A.1.: 99-percentile intervals for filter weights and bias weights by layer of a baseline model +Table A.1.:99-percentile intervals for filter weights and bias weights by layer of a baseline model trained on CIFAR-100. -Figure A.2.: The distribution of bias weights of a model without batch normalization trained on +Figure A.2.:The distribution of bias weights of a model without batch normalization trained on CIFAR-100. -Algorithm 1 Simulated Annealing for minimizing Equation (4.1). -Require:C∈Nn×n, steps∈N,T∈R+,c∈(0,1) -procedure SimulatedAnnealing (C, steps,T,c) -bestScore←accuracy (C) -bestC←C -fori= 0;ibestScore then -bestScore←s -bestC←C -T←T·c -else ⊿Move Block -s←randomInteger (1,...,n ) ⊿Block start -e←randomInteger (s,...,n ) ⊿Block end -i←randomInteger (1,...,n−(e−s)) ⊿Block insert position -Move Block (s, ..., e) to position i -returnbestM - -Figure A.3.: Maximum weight updates between epochs by layer. The model is the baseline model, +Algorithm 1Simulated Annealing for minimizing Equation (4.1). +Require: C ∈Nn×n, steps∈N, T ∈R+, c∈(0,1) +procedure SimulatedAnnealing(C, steps,T, c) +bestScore ←accuracy(C) +bestC ←C +for i= 0; i< steps; i←i+ 1 do +p←randomFloat(0,1) +if p< 0.5 then ⊿ Swap rows +i←randomInteger(1,...,n ) +j ←randomInteger(1,...,n ) \{i} +p←randomUniform(0,1) +C′←swap(C,i,j ) +s←accuracy(C′) +if p< exp(s−bestScore +T ) then +C ←C′ +if s> bestScore then +bestScore ←s +bestC ←C +T ←T ·c +else ⊿ Move Block +s←randomInteger(1,...,n ) ⊿ Block start +e←randomInteger(s,...,n ) ⊿ Block end +i←randomInteger(1,...,n −(e−s)) ⊿ Block insert position +Move Block (s, ..., e) to positioni +return bestM + +Figure A.3.:Maximum weight updates between epochs by layer. The model is the baseline model, but with layer 5 reduced to 3 filters. -FunctionSingle model Ensemble of 10 Epochs +Function Single model Ensemble of 10 Epochs Training set Test set Train Test Range Mean -Identity 87.92 %σ= 0.40 84.69 %σ= 0.08 88.59 % 85.43 %92 – 140 114.5 -Logistic 81.46 %σ= 5.08 79.67 %σ= 4.85 86.38 % 84.60 %58–91 77.3 -Softmax 88.19 %σ= 0.31 84.70 %σ= 0.15 88.69 % 85.43 %124 – 171 145.8 -Tanh 88.41 %σ= 0.36 84.46 %σ= 0.27 89.24 % 85.45 %89 – 123 108.7 -Softsign 88.00 %σ= 0.47 84.46 %σ= 0.23 88.77 % 85.33 %77 – 119 104.1 -ReLU 88.93 %σ= 0.4685.35 %σ= 0.21 89.35 % 85.95 %96 – 132 102.8 -Softplus 88.42 %σ= 0.2985.16 %σ= 0.15 88.90 % 85.73 %108 – 143 121.0 -LReLU 88.61 %σ= 0.41 85.21 %σ= 0.0589.07 % 85.83 %87 – 117 104.5 -PReLU 89.62 %σ= 0.4185.35 %σ= 0.1790.10 % 86.01 %85 – 111 100.5 -ELU 89.49 %σ= 0.4285.35 %σ= 0.10 89.94 % 86.03 %73 – 113 92.4 -Table A.2.: Test accuracy of adjusted baseline models trained with different activation functions on -HASYv2. For LReLU, α= 0.3was chosen. - -Figure A.4.: Sum of weight updates between epochs by layer. The model is the baseline model, but +Identity 87.92 % σ= 0.40 84 .69 % σ= 0.08 88 .59 % 85 .43 % 92 – 140 114.5 +Logistic 81.46 % σ= 5.08 79 .67 % σ= 4.85 86 .38 % 84 .60 % 58 – 91 77.3 +Softmax 88.19 % σ= 0.31 84 .70 % σ= 0.15 88 .69 % 85 .43 % 124 – 171 145.8 +Tanh 88.41 % σ= 0.36 84 .46 % σ= 0.27 89 .24 % 85 .45 % 89 – 123 108.7 +Softsign 88.00 % σ= 0.47 84 .46 % σ= 0.23 88 .77 % 85 .33 % 77 – 119 104.1 +ReLU 88.93 % σ= 0.46 85.35 % σ= 0.21 89 .35 % 85 .95 % 96 – 132 102.8 +Softplus 88.42 % σ= 0.29 85.16 % σ= 0.15 88 .90 % 85 .73 % 108 – 143 121.0 +LReLU 88.61 % σ= 0.41 85 .21 % σ= 0.05 89.07 % 85 .83 % 87 – 117 104.5 +PReLU 89.62 % σ= 0.41 85.35 % σ= 0.17 90.10 % 86.01 % 85 – 111 100.5 +ELU 89.49 % σ= 0.42 85.35 % σ= 0.10 89 .94 % 86.03 % 73 – 113 92.4 +Table A.2.:Test accuracy of adjusted baseline models trained with different activation functions on +HASYv2. For LReLU,α= 0.3 was chosen. + +Figure A.4.:Sum of weight updates between epochs by layer. The model is the baseline model, but with layer 5 reduced to 3 filters. -FunctionSingle model Ensemble of 10 Epochs +Function Single model Ensemble of 10 Epochs Training set Test set Train Test Range Mean -Identity 87.49 %σ= 2.50 69.86 %σ= 1.41 89.78 % 71.90 %51 – 65 53.4 -Logistic 45.32 %σ= 14.88 40.85 %σ= 12.56 51.06 % 45.49 %38 – 93 74.6 -Softmax 87.90 %σ= 3.58 67.91 %σ= 2.32 91.51 % 70.96 %108 – 150 127.5 -Tanh 85.38 %σ= 4.04 67.65 %σ= 2.01 90.47 % 71.29 %48 – 92 65.2 -Softsign 88.57 %σ= 4.00 69.32 %σ= 1.68 93.04 % 72.40 %55 – 117 83.2 -ReLU 94.35 %σ= 3.38 71.01 %σ= 1.63 98.20 % 74.85 %52 – 98 75.5 -Softplus 83.03 %σ= 2.07 68.28 %σ= 1.74 93.04 % 75.99 %56 – 89 68.9 -LReLU 93.83 %σ= 3.89 74.66 %σ= 2.11 97.56 % 78.08 %52 – 120 80.1 -PReLU 95.53 %σ= 1.92 71.69 %σ= 1.37 98.17 % 74.69 %59 – 101 78.8 -ELU 95.42 %σ= 3.57 75.09 %σ= 2.39 98.54 % 78.66 %66 – 72 67.2 -Table A.3.: Test accuracy of adjusted baseline models trained with different activation functions on -STL-10. For LReLU, α= 0.3was chosen. +Identity 87.49 % σ= 2.50 69 .86 % σ= 1.41 89 .78 % 71 .90 % 51 – 65 53.4 +Logistic 45.32 % σ= 14.88 40 .85 % σ= 12.56 51 .06 % 45 .49 % 38 – 93 74.6 +Softmax 87.90 % σ= 3.58 67 .91 % σ= 2.32 91 .51 % 70 .96 % 108 – 150 127.5 +Tanh 85.38 % σ= 4.04 67 .65 % σ= 2.01 90 .47 % 71 .29 % 48 – 92 65.2 +Softsign 88.57 % σ= 4.00 69 .32 % σ= 1.68 93 .04 % 72 .40 % 55 – 117 83.2 +ReLU 94.35 % σ= 3.38 71 .01 % σ= 1.63 98 .20 % 74 .85 % 52 – 98 75.5 +Softplus 83.03 % σ= 2.07 68 .28 % σ= 1.74 93 .04 % 75 .99 % 56 – 89 68.9 +LReLU 93.83 % σ= 3.89 74 .66 % σ= 2.11 97 .56 % 78 .08 % 52 – 120 80.1 +PReLU 95.53 % σ= 1.92 71 .69 % σ= 1.37 98 .17 % 74 .69 % 59 – 101 78.8 +ELU 95.42 % σ= 3.57 75 .09 % σ= 2.39 98 .54 % 78 .66 % 66 – 72 67.2 +Table A.3.:Test accuracy of adjusted baseline models trained with different activation functions on +STL-10. For LReLU,α= 0.3 was chosen. B. Hyperparameters Hyperparameters are parameters of models which are not optimized automatically (e.g., by -gradient descent), but by methods like random search [ BB12], grid search [ LBOM98 ] or +gradient descent), but by methods like random search [BB12], grid search [LBOM98] or manual search. B.1. Preprocessing Preprocessing used to be of major importance in machine learning. However, with the @@ -2409,21 +2553,21 @@ availability of data sets with hundreds of examples per class and the possibilit learn features themselves, most models today rely on raw pixel values. The only common preprocessing is size normalization. In order to get a fixed input-size for a CNN, the following procedure can be used: -•Take one or multiple crops of the image which have the desired aspect ratio. -•Scale the crop(s) to the desired size. -•In training, all crops can be used independently. In testing, all crops can be passed +• Take one or multiple crops of the image which have the desired aspect ratio. +• Scale the crop(s) to the desired size. +• In training, all crops can be used independently. In testing, all crops can be passed through the network and the output probability distributions can get fusioned, for example by averaging. Other preprocessing methods are: -•Color space transformations (RGB, HSV, etc.) -•Mean subtraction -•Standardization of pixel-values to [0,1]by dividing through 255(used by [HLW16]) -•Dimensionality reduction -–Principal component analysis (PCA): An unsupervised linear transformation +• Color space transformations (RGB, HSV, etc.) +• Mean subtraction +• Standardization of pixel-values to[0,1] by dividing through255 (used by [HLW16]) +• Dimensionality reduction +– Principal component analysis (PCA): An unsupervised linear transformation which can be learned in the first hidden layer. It is hence doubtful if PCA improves the network. -–Linear discriminant analysis (LDA) -•Zero Components Analysis (ZCA) whitening (used by [KH09]) +– Linear discriminant analysis (LDA) +• Zero Components Analysis (ZCA) whitening (used by [KH09]) B.2. Data augmentation Data augmentation techniques aim at making artificially more data from real data items by @@ -2431,278 +2575,316 @@ applying invariances. For computer vision, they include: Name Augmentation Factor Used by Horizontal flip 2 [KSH12, WYS+15] Vertical flip 2 [DWD15]1 -Rotation∼40(δ= 20) [DSRB14] -Scaling∼14(δ∈[0.7,1.4]) [DSRB14] -Crops 322= 1024 [KSH12, WYS+15] +Rotation ∼40 (δ= 20) [DSRB14] +Scaling ∼14 (δ∈[0.7,1.4]) [DSRB14] +Crops 322 = 1024 [KSH12, WYS+15] Shearing [Gra15] -GANs [BCW+17] -Brightness∼20(δ∈[0.5,1.5]) [How13] -Hue 51(δ= 0.1) [MRM15, DSRB14] -Saturation∼20(δ= 0.5) [DSRB14] -Contrast∼20(δ∈[0.5,1.5]) [How13] +GANs [BCW +17] +Brightness ∼20 (δ∈[0.5,1.5]) [How13] +Hue 51 (δ= 0.1) [MRM15, DSRB14] +Saturation ∼20 (δ= 0.5) [DSRB14] +Contrast ∼20 (δ∈[0.5,1.5]) [How13] Channel shift [KSH12] -Table B.1.: Overview of data augmentation techniques. The augmentation factor is calculated for +Table B.1.:Overview of data augmentation techniques. The augmentation factor is calculated for typical situations. For example, the augmentation factor for random crops is calculated -for256 px×256 pximages which are cropped to 224 px×224 px. +for 256 px×256 px images which are cropped to224 px×224 px. Taking several scales if the original is of higher resolution than desired is another technique. Combinations of the techniques above can also be applied. Please note that the order of operations does matter in many cases and hence the order is another augmentation factor. Less common, but also reasonable are: -•Adding noise -•Elastic deformations -•Color casting (used by [WYS+15]) -•Vignetting (used by [WYS+15]) -•Lens distortion (used by [WYS+15]) -1Vertical flipping combined with 180◦rotation is equivalent to horizontal flipping +• Adding noise +• Elastic deformations +• Color casting (used by [WYS+15]) +• Vignetting (used by [WYS+15]) +• Lens distortion (used by [WYS+15]) +1Vertical flipping combined with180◦ rotation is equivalent to horizontal flipping B.3. Initialization Weight initializations are usually chosen to be small and centered around zero. One way to characterize many initialization schemes is by -w∼α·U[−1,1] +β·N(0,1) +γwithα,β,γ≥0 +w∼α·U[−1,1] + β·N(0,1) + γ with α,β,γ ≥0 Table B.2 shows six commonly used weight initialization schemes. Several schemes use the -same idea, that unit-variance is desired for each layer as the training converges faster [ IS15]. +same idea, that unit-variance is desired for each layer as the training converges faster [IS15]. Name α β γ Reference -Constant α= 0 β= 0 γ≥0used by [ZF14] -Xavier/Glorot uniform α=√ +Constant α= 0 β = 0 γ ≥0 used by [ZF14] +Xavier/Glorot uniform α= +√ 6 -nin+noutβ= 0 γ= 0[GB10] -Xavier/Glorot normal α= 0 β=( +nin+nout β = 0 γ = 0 [GB10] +Xavier/Glorot normal α= 0 β = +( 2 -(nin+nout))2 -γ= 0[GB10] -He α= 0 β=2 -ninγ= 0[HZRS15b] -Orthogonal — — γ= 0[SMG13] -LSUV — — γ= 0[MM15] -Table B.2.: Weight initialization schemes of the form w∼α·U[−1,1] +β·N(0,1) +γ. -nin,noutare the number of units in the previous layer and the next layer. Typically, +(nin+nout) +)2 +γ = 0 [GB10] +He α= 0 β = 2 +nin +γ = 0 [HZRS15b] +Orthogonal — — γ = 0 [SMG13] +LSUV — — γ = 0 [MM15] +Table B.2.: Weight initialization schemes of the formw∼α·U[−1,1] + β·N(0,1) + γ. +nin,nout are the number of units in the previous layer and the next layer. Typically, biases are initialized with constant 0 and weights by one of the other schemes to prevent unit-coadaptation. However, dropout makes it possible to use constant initialization for all parameters. LSUV and Orthogonal initialization cannot be described with this simple pattern. B.4. Objective function For classification tasks, the cross-entropy -ECE(W) =−∑ -x∈XK∑ -k=1[tx +ECE(W) = − +∑ +x∈X +K∑ +k=1 +[tx klog(ox k) + (1−tx k) log(1−ox k)] -is by far the most commonly used objective function (e.g., used by [ ZF14]). In this equation, -Xis the set of training examples, Kis the number of classes, tx -k∈{0,1}indicates if the -training example xis of classk,ox -kis the output of the classifier for the training example x +is by far the most commonly used objective function (e.g., used by [ZF14]). In this equation, +X is the set of training examples,K is the number of classes,tx +k ∈{ 0,1 }indicates if the +training examplex is of classk, ox +k is the output of the classifier for the training examplex and classk. -However, regularization terms weighted with a constant λ∈(0,+∞)are sometimes added: -•LASSO:ℓ1(e.g., used in [HPTD15]) -•Weight decay: ℓ2(e.g.,λ= 0.0005as in [MSM16]) -•Orthogonality regularization ( |(WT·W−I)|, see [VTKP17]) +However, regularization terms weighted with a constantλ∈(0,+∞) are sometimes added: +• LASSO: ℓ1 (e.g., used in [HPTD15]) +• Weight decay:ℓ2 (e.g., λ= 0.0005 as in [MSM16]) +• Orthogonality regularization (|(WT ·W −I)|, see [VTKP17]) B.5. Optimization Techniques Most relevant optimization techniques for CNNs are based on SGD, which updates the weights according to the rule -wji←wji+ ∆wjiwith ∆wji=−η∂Ex +wji ←wji + ∆wji with ∆wji = −η∂Ex ∂wji -whereη∈(0,1), typically 0.01(e.g., [MSM16]), is called the learning rate . -A slight variation of SGD is mini-batch gradient descent with the mini-batch B(typically -mini-batch sizes are |B|∈{ 32,64,128,256,512}, e.g. [ZF14]). Larger mini-batch sizes -lead to sharp minima and thus poor generalization [ KMN+16]. Smaller mini-batch sizes +where η∈(0,1), typically0.01 (e.g., [MSM16]), is called thelearning rate. +A slight variation of SGD is mini-batch gradient descent with the mini-batchB (typically +mini-batch sizes are|B|∈{ 32,64,128,256,512 }, e.g. [ZF14]). Larger mini-batch sizes +lead to sharp minima and thus poor generalization [KMN+16]. Smaller mini-batch sizes lead to longer training times due to computational overhead and to more training steps due to gradient noise. -wji←wji+ ∆wjiwith ∆wji=−η∂EB +wji ←wji + ∆wji with ∆wji = −η∂EB ∂wji Nine variations which adjust the learning rate during training are: -•Momentum: +• Momentum: w(t+1) -ji←w(t) -ji+ ∆w(t+1) -jiwith ∆w(t+1) -ji =−η∂EB -∂wji+α∆w(t) +ji ←w(t) +ji + ∆w(t+1) +ji with ∆w(t+1) +ji = −η∂EB +∂wji ++ α∆w(t) ji -withα∈[0,1], typically 0.9(e.g., [ZF14, MSM16]) -•Adagrad [DHS11] -•RProp and the mini-batch version RMSProp [TH12] -•Adadelta [Zei12] -•Power Scheduling [ Xu11]:η(t) =η(0)(1 +a·t)−c, wheret∈N0is the training step, -a,care constants. -•Performance Scheduling [ SHY+13]: Measure the error on the cross validation set and +with α∈[0,1], typically0.9 (e.g., [ZF14, MSM16]) +• Adagrad [DHS11] +• RProp and the mini-batch version RMSProp [TH12] +• Adadelta [Zei12] +• Power Scheduling [Xu11]: η(t) = η(0)(1 + a·t)−c, wheret∈N0 is the training step, +a,c are constants. +• Performance Scheduling [SHY+13]: Measure the error on the cross validation set and decrease the learning rate when the algorithms improvement is below a threshold. -•Exponential Decay Learning Rate [ SHY+13]:η(t) =η(0)·10−t -kwheret∈N0is the -training step, η(0)is the initial learning rate, k∈N≥1is the number of training steps +• Exponential Decay Learning Rate [SHY+13]: η(t) = η(0) ·10−t +k where t∈N0 is the +training step,η(0) is the initial learning rate,k∈N≥1 is the number of training steps until the learning rate is decreased by1 -10th. -•Newbob Scheduling [ new00]: Start with Performance Scheduling, then use Exponential +10 th. +• Newbob Scheduling [new00]: Start with Performance Scheduling, then use Exponential Decay Scheduling. -•Adam and AdaMax [KB14] +• Adam and AdaMax [KB14] -•Nadam [Doz15] +• Nadam [Doz15] Some of those are explained in [Rud16]. Other first-order gradient optimization methods are: -•Quickprop [Fah88] -•Nesterov Accellerated Momentum (NAG) [Nes83] -•Conjugate Gradient method [ Cha92]: Combines a line search for the step size with +• Quickprop [Fah88] +• Nesterov Accellerated Momentum (NAG) [Nes83] +• Conjugate Gradient method [Cha92]: Combines a line search for the step size with the gradients direction. Higher-order gradient methods like Newtons method or quasi-Newton methods like BFGS and L-BFGS need the inverse of the Hessian matrix which is intractable for today’s CNNs. However, there are alternatives which do not use gradient information: -•Genetic algorithms such as NeuroEvolution of Augmenting Topologies (NEAT) [ SM02] -•Simulated Annealing [vLA87] -•Twiddle: A local hill-climbing algorithm explained by Sebastian Thrun and described +• Genetic algorithms such as NeuroEvolution of Augmenting Topologies (NEAT) [SM02] +• Simulated Annealing [vLA87] +• Twiddle: A local hill-climbing algorithm explained by Sebastian Thrun and described on [Tho14b] There are also approaches which learn the optimization algorithm [ADG+16, LM16]. B.6. Network Design CNNs have the following hyperparameters: -•Depth: The number of layers -•Width: The number of filters per layer -•Layer and block connectivity graph -•Layer and block hyperparameters : -–Activation Functions as shown in Table B.3 -–For more, see Sections 2.2 and 2.3. +• Depth: The number of layers +• Width: The number of filters per layer +• Layer and block connectivity graph +• Layer and block hyperparameters: +– Activation Functions as shown in Table B.3 +– For more, see Sections 2.2 and 2.3. Name Function ϕ(x) Range of Values ϕ′(x) Used by -Sign function† +Sign function† +  -+1ifx≥0 -−1ifx<0{−1,1} 0 [KS02] + ++1 if x≥0 +−1 if x< 0 +{−1,1 } 0 [KS02] Heaviside -step function† +step function† +  -+1ifx>0 -0ifx<0{0,1} 0 [MP43] -Logistic function1 -1+e−x [0,1]ex + ++1 if x> 0 +0 if x< 0 +{0,1 } 0 [MP43] +Logistic function 1 +1+e−x [0,1] ex (ex+1)2 [DJ99] -Tanhex−e−x -ex+e−x= tanh(x) [−1,1] sech2(x) [LBBH98, Tho14a] -ReLU†max(0,x) [0 ,+∞) +Tanh ex−e−x +ex+e−x = tanh(x) [ −1,1] sech 2(x) [LBBH98, Tho14a] +ReLU† max(0,x) [0 ,+∞) +  -1ifx>0 -0ifx<0[KSH12] + +1 if x> 0 +0 if x< 0 +[KSH12] LReLU†2 -(PReLU)ϕ(x) = max(αx,x ) (−∞,+∞) +(PReLU) +ϕ(x) = max(αx,x) ( −∞,+∞) +  -1ifx>0 -αifx<0[MHN13, HZRS15b] -Softplus log(ex+ 1) (0 ,+∞)ex -ex+1[DBB+01, GBB11] -ELU + +1 if x> 0 +α if x< 0 +[MHN13, HZRS15b] +Softplus log(ex + 1) (0 ,+∞) ex +ex+1 [DBB+01, GBB11] +ELU +  -x ifx>0 -α(ex−1)ifx≤0(−∞,+∞) + +x if x> 0 +α(ex −1) if x≤0 +(−∞,+∞) +  -1ifx>0 -αexotherwise[CUH15] -Softmax‡o(x)j=exj∑K -k=1exk[0,1]Ko(x)j·∑K -k=1exk−exj + +1 if x> 0 +αex otherwise +[CUH15] +Softmax‡ o(x)j = exj ∑K -k=1exk[KSH12, Tho14a] -Maxout‡o(x) = maxx∈xx (−∞,+∞) +k=1 exk [0,1]K o(x)j · +∑K +k=1 exk −exj +∑K +k=1 exk [KSH12, Tho14a] +Maxout‡ o(x) = maxx∈x x (−∞,+∞) +  -1ifxi= max x -0otherwise[GWFM+13] -Table B.3.: Overview of activation functions. Functions marked with †are not differentiable at 0 -and functions marked with ‡operate on all elements of a layer simultaneously. The -hyperparameters α∈(0,1)of Leaky ReLU and ELU are typically α= 0.01. Other -activation function like randomized leaky ReLUs exist [ XWCL15 ], but are far less + +1 if xi = max x +0 otherwise +[GWFM+13] +Table B.3.:Overview of activation functions. Functions marked with†are not differentiable at 0 +and functions marked with‡operate on all elements of a layer simultaneously. The +hyperparameters α ∈(0,1) of Leaky ReLU and ELU are typicallyα = 0.01. Other +activation function like randomized leaky ReLUs exist [XWCL15], but are far less commonly used. Some functions are smoothed versions of others, like the logistic function for the Heaviside step function, tanh for the sign function, softplus for ReLU. Softmax is the standard activation function for the last layer of a classification network as it produces a probability distribution. See Figure B.1 for a plot of some of them. -2αis a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function. - -−2.0−1.5−1.0−0.5 0.5 1.0 1.5 2.0 -−1.0−0.50.51.01.52.0 -xy -ϕ1(x) =1 +2α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function. + +−2.0 −1.5 −1.0 −0.5 0.5 1.0 1.5 2.0 +−1.0 +−0.5 +0.5 +1.0 +1.5 +2.0 +x +y +ϕ1(x) = 1 1+e−x ϕ2(x) = tanh(x) ϕ3(x) = max(0,x) -ϕ4(x) = log(ex+ 1) -ϕ5(x) = max(x,ex−1) -Figure B.1.: Activation functions plotted in [−2,+2].tanhand ELU are able to produce negative +ϕ4(x) = log(ex + 1) +ϕ5(x) = max(x,ex −1) +Figure B.1.:Activation functions plotted in[−2,+2]. tanh and ELU are able to produce negative numbers. The image of ELU, ReLU and Softplus is not bound on the positive side, -whereas tanhand the logistic function are always below 1. +whereas tanh and the logistic function are always below 1. B.7. Regularization Regularization techniques aim to make the fitted function smoother and reduce overfitting. Regularization techniques are: -•ℓ1,ℓ2, and Orthogonality regularization: See Appendix B.4 -•Max-norm regularization (e.g. used ins [SHK+14]) -•Dropout (introduced in [ SHK+14]), DropConnect (see [ WZZ+13]), Stochastic Depth +• ℓ1, ℓ2, and Orthogonality regularization: See Appendix B.4 +• Max-norm regularization (e.g. used ins [SHK+14]) +• Dropout (introduced in [SHK+14]), DropConnect (see [WZZ+13]), Stochastic Depth (see [HSL+16]) -•Feature scale clipping (see [ZF14]) -•Data augmentation (according to [ZBH+16]) -•Global average pooling (according to [ZKL+15]) -•Dense-Sparse-Dense training (see [HPN+16]) -•Soft targets (see [HVD15]) +• Feature scale clipping (see [ZF14]) +• Data augmentation (according to [ZBH+16]) +• Global average pooling (according to [ZKL+15]) +• Dense-Sparse-Dense training (see [HPN+16]) +• Soft targets (see [HVD15]) C. Calculating Network Characteristics C.1. Parameter Numbers -•A fully connected layer with nnodes,kinputs hasn·(k+ 1)parameters. The +1is +• A fully connected layer withn nodes, k inputs hasn·(k+ 1) parameters. The +1 is due to the bias. -•A convolutional layer iwithkifilters of size n×mbeing applied to ki−1feature maps -haski·ki−1(n·m+ 1)parameters. The +1is due to the bias. -•A fully connected layer with nnodes after kfeature maps of size m1×m2has -n·(k·m1·m2+ 1)parameters. -•A dense block with a depth of L, a growth rate of nand3×3filters hasL+n·32+ -32·n2∑L -i=0(L−i) =L+ 9n+ 9n2L2−L -2parameters. -According to [ HPTD15 ], AlexNet has 60 million parameters which is roughly the number +• A convolutional layeriwith ki filters of sizen×mbeing applied toki−1 feature maps +has ki ·ki−1(n·m+ 1) parameters. The +1 is due to the bias. +• A fully connected layer withn nodes after k feature maps of size m1 ×m2 has +n·(k·m1 ·m2 + 1) parameters. +• A dense block with a depth ofL, a growth rate ofn and 3 ×3 filters hasL+ n·32 + +32 ·n2 ∑L +i=0(L−i) = L+ 9n+ 9n2 L2−L +2 parameters. +According to [HPTD15], AlexNet has 60 million parameters which is roughly the number calculated in Table D.2. C.2. FLOPs The FLOPs of a layer depend on the implementation, the compiler and the hardware. Hence the following number are only giving rough estimates. -In the following, nϕdenotes the number of FLOPs to compute the non-linearity ϕ. For -simplicity,nϕ= 5was chosen. -•A fully connected layer with nnodes andkinputs has to calculate ϕ(W·x+b)with -W∈Rn×k,x∈Rk×1,b∈Rn×1. It hence needs about n·(k+ (k−1) + 1) = 2 nk -additions / multiplications before the non-linearity ϕis calculated. The total number -of FLOPs is 2·n·k+n·nϕ. -•In the following, biases are ignored. A convolutional layer with kifilters of size n×m -being applied to ki−1filter maps of size w×hresults inkifilter maps of size w×hif -padding is applied. For each element of each filter map, n·m·ki−1multiplications and -(n·m·ki−1−1)additions have to be made. This results in (2nmki−1−1)·(ki·w·h) -operations. The total number of FLOPs is (2·n·m·ki−1−1)·(ki·w·h)+ki·w·h·nϕ. +In the following,nϕ denotes the number of FLOPs to compute the non-linearityϕ. For +simplicity,nϕ = 5 was chosen. +• A fully connected layer withn nodes andk inputs has to calculateϕ(W ·x+ b) with +W ∈Rn×k, x ∈Rk×1, b ∈Rn×1. It hence needs aboutn·(k+ (k−1) + 1) = 2nk +additions / multiplications before the non-linearityϕ is calculated. The total number +of FLOPs is2 ·n·k+ n·nϕ. +• In the following, biases are ignored. A convolutional layer withki filters of sizen×m +being applied toki−1 filter maps of sizew×hresults inki filter maps of sizew×hif +padding is applied. For each element of each filter map,n·m·ki−1 multiplications and +(n·m·ki−1 −1) additions have to be made. This results in(2nmki−1 −1) ·(ki·w·h) +operations. The total number of FLOPs is(2·n·m·ki−1 −1)·(ki·w·h)+ ki·w·h·nϕ. This is, of course, a naive way of calculating a convolution. There are other ways of calculating convolutions [LG16]. -•A fully connected layer with nnodes after kfeature maps of size w×hneeds 2n(k·w·h) -FLOPs. The total number of FLOPs is 2n·(k·w·h) +n·nϕ. -•As Dropout is only calculated during training, the number of FLOPs was set to 0. -•The number of FLOPs for max pooling is dominated by the number of positions to -which the pooling kernel is applied. For a feature map of size w×ha max pooling -filter with stride sgets appliedw·h -s2. The number of FLOPs per application depends -on the kernel size. A 2×2kernel is assumed to need 5 FLOPs. -•The number of FLOPs for Batch Normalization is the same as the number of its +• A fully connected layer withnnodes afterkfeature maps of sizew×hneeds 2n(k·w·h) +FLOPs. The total number of FLOPs is2n·(k·w·h) + n·nϕ. +• As Dropout is only calculated during training, the number of FLOPs was set to 0. +• The number of FLOPs for max pooling is dominated by the number of positions to +which the pooling kernel is applied. For a feature map of sizew×h a max pooling +filter with strides gets applied w·h +s2 . The number of FLOPs per application depends +on the kernel size. A2 ×2 kernel is assumed to need 5 FLOPs. +• The number of FLOPs for Batch Normalization is the same as the number of its parameters. Here are some references which give information for the FLOPs: -•AlexNet -–1.5B in total [HPTD15]. -–725M in total [KPY+15]. -–3300M in total in Table D.2 -•VGG-16: -–15484M in total [HPTD15]. -–31000M in total in Table D.3. -•GoogleNet: 1566M in total [HPTD15]. +• AlexNet +– 1.5B in total [HPTD15]. +– 725M in total [KPY+15]. +– 3300M in total in Table D.2 +• VGG-16: +– 15484M in total [HPTD15]. +– 31000M in total in Table D.3. +• GoogleNet: 1566M in total [HPTD15]. One can see that the numbers are by a factor of 2 up to a factor of 4 different for the same network. C.3. Memory Footprint The memory footprint of CNNs determines when networks can be used at all and if they can be trained efficiently. In order to be able to train CNNs efficiently, one weight update step has to fit in the memory of the GPU. This includes the following: -•Activations : All activations of one mini-batch in order to calculate the gradients +• Activations: All activations of one mini-batch in order to calculate the gradients in the backward pass. This is the number of floats in the feature maps of all weight layers combined. -•Weights -•Optimization algorithm : The optimization algorithm introduces some overhead. +• Weights +• Optimization algorithm: The optimization algorithm introduces some overhead. For example, Adam stores two parameters per weights. At inference time, every two consecutive layers have to fit into memory. When the forward pass of layer A to layer B is calculated, the memory can be freed if no skip connections are @@ -2719,168 +2901,183 @@ The summation row gives the sum of all floats for the output size column. This a conclusions about the maximum mini-batch size which can be in memory for training. D.1. LeNet-5 -One of the first CNNs used was LeNet-5 [ LBBH98 ]. LeNet-5 uses two times the common -pattern of a single convolutional layer with tanhas a non-linear activation function followed +One of the first CNNs used was LeNet-5 [LBBH98]. LeNet-5 uses two times the common +pattern of a single convolutional layer withtanh as a non-linear activation function followed by a pooling layer and three fully connected layers. One fully connected layer is used to get the right output dimension, another one is necessary to allow the network to learn a non-linear combination of the features of the feature maps. Its exact architecture is shown in Figure D.1 and described in Table D.1. It reaches a test -error rate of 0.8 %on MNIST. +error rate of0.8 % on MNIST. Figure D.1.: Architecture of LeNet-5 as shown in [LBBH98]. # Type Filters @ -Patch size / strideParameters FLOPs Output size +Patch size / stride +Parameters FLOPs Output size Input 0 0 1@32 ×32 -1 Convolution 6@ 5×5×1/1 156 307800 6@28×28 -2 Scaled average pooling 2×2/2 2 336 6@14 ×14 -3 Convolution 16@ 5×5×6/1 2416 942 400 16@10×10 -4 Scaled average pooling 2×2/2 2 1600 16@ 5 ×5 +1 Convolution 6@ 5 ×5 ×1 /1 156 307800 6 @28 ×28 +2 Scaled average pooling 2 ×2 /2 2 336 6@14 ×14 +3 Convolution 16@ 5 ×5 ×6 /1 2416 942 400 16@10 ×10 +4 Scaled average pooling 2 ×2 /2 2 1600 16@ 5 × 5 5 Fully Connected 120 neurons 48 120 240000 120 6 Fully Connected 84 neurons 10164 20580 84 7 Fully Connected (output) 10 neurons 850 1730 10 -∑61710 15144446 9118 -Table D.1.: LeNet-5 architecture: After layers 1, 3, 5 and 6 the tanhactivation function is applied. +∑ 61710 15144446 9118 +Table D.1.:LeNet-5 architecture: After layers 1, 3, 5 and 6 thetanh activation function is applied. After layer 7, the softmax function is applied. One can see that convolutional layer need much fewer parameters, but an order of magnitude more FLOPs per parameter than fully connected layers. D.2. AlexNet ThefirstCNNwhichachievedmajorimprovementsontheImageNetdatasetwasAlexNet[ KSH12]. -ItsarchitectureisshowninFigureD.2anddescribedinTableD.2. Ithasabout 60·106parameters. +ItsarchitectureisshowninFigureD.2anddescribedinTableD.2. Ithasabout 60·106 parameters. A trained AlexNet can be downloaded at www.cs.toronto.edu/˜ guerzhoy/tf_alexnet. -Note that the uncompressed size is at least 60 965 224 floats·32bit -float≈244 MB. -Figure D.2.: Architecture of AlexNet as shown in [ KSH12]: Convolutional Layers are followed +Note that the uncompressed size is at least60 965 224 floats·32 bit +float ≈244 MB. +Figure D.2.:Architecture of AlexNet as shown in [KSH12]: Convolutional Layers are followed by pooling layers multiple times. At the end, a fully connected network is applied. Conceptually, it is identical to the architecture of LeNet-5 (see Figure D.1). # Type Filters @ -Patch size / strideParameters FLOPs Output size -Input 3 @ 224×224 -1 Convolution 96 @ 11×11×3/ 4 34 944 211 M 96@ 55×55 -LCN 12 M96@ 55×55 -2 Max pooling 3×3/ 2 0 301 k 96 @ 27×27 -3 Convolution 256 @ 5×5×48/ 1 307 456 448 M 256 @ 13×13 -LCN 3 M256 @ 13×13 -4 Max pooling 3×3/ 2 0 50 k 256 @ 13×13 -5 Convolution 384 @ 3×3×256/ 1 885 120 299 M 384 @ 13×13 -7 Convolution 384 @ 3×3×192/ 1 663 936 224 M 384 @ 13×13 -9 Convolution 256 @ 3×3×192/ 1 442 624 150 M 256 @ 13×13 -10 Max pooling 3×3/ 2 0 50 k 256 @ 6×6 -11 FC 4096 neurons 37 752 832 75 M4096 +Patch size / stride +Parameters FLOPs Output size +Input 3 @ 224 ×224 +1 Convolution 96 @ 11 ×11 ×3 / 4 34 944 211 M 96@ 55 × 55 +LCN 12 M 96@ 55 × 55 +2 Max pooling 3 ×3 / 2 0 301 k 96 @ 27 × 27 +3 Convolution 256 @ 5 ×5 × 48 / 1 307 456 448 M 256 @ 13 × 13 +LCN 3 M 256 @ 13 × 13 +4 Max pooling 3 ×3 / 2 0 50 k 256 @ 13 × 13 +5 Convolution 384 @ 3 ×3 ×256 / 1 885 120 299 M 384 @ 13 × 13 +7 Convolution 384 @ 3 ×3 ×192 / 1 663 936 224 M 384 @ 13 × 13 +9 Convolution 256 @ 3 ×3 ×192 / 1 442 624 150 M 256 @ 13 × 13 +10 Max pooling 3 ×3 / 2 0 50 k 256 @ 6 × 6 +11 FC 4096 neurons 37 752 832 75 M 4096 12 FC 4096 neurons 16 781 312 34 M 4096 13 FC 1000 neurons 4 097 000 8 M 1000 -∑60 965 224 3300 M 1 122 568 -Table D.2.: AlexNet architecture: One special case of AlexNet is grouping of convolutions due to +∑ 60 965 224 3300 M 1 122 568 +Table D.2.:AlexNet architecture: One special case of AlexNet is grouping of convolutions due to computational restrictions at the time of its development. This also reduces the number of parameters and allows parallel computation on separate GPUs. However, to make the architecture easier to compare, this grouping was ignored for the parameter count. -The FLOPs are taken from [ HPTD15 ] and combined with rough estimates for Local +The FLOPs are taken from [HPTD15] and combined with rough estimates for Local Contrast Normalization and max pooling. The calculated number of parameters was checked against the downloaded version. It -also has 60 965 224 parameters. +also has60 965 224parameters. D.3. VGG-16 D -Another widespread architecture is the VGG-16 (D) [ SZ14]. VGG comes from the Visual -GeometryGroup in Oxford which developed this architecture. It has 16layers which can -learn parameters. A major difference compared to AlexNet is that VGG-16 uses only 3×3 +Another widespread architecture is the VGG-16 (D) [SZ14]. VGG comes from theVisual +Geometry Group in Oxford which developed this architecture. It has16 layers which can +learn parameters. A major difference compared to AlexNet is that VGG-16 uses only3 ×3 filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a detailed textual description is given in Table D.3. -AtrainedVGG-16DforTensorflowcanbedownloadedat https://github .com/machrisaa/ -tensorflow-vgg . Note that the uncompressed size is at least 138 357 544 floats ·32bit -float≈ -520 MB. The downloaded Numpy binary file npzneeds 553 MBwithout compression and -514 MBwith compression.224×224Input +A trained VGG-16 D for Tensorflow can be downloaded athttps://github.com/machrisaa/ +tensorflow-vgg. Note that the uncompressed size is at least138 357 544 floats·32 bit +float ≈ +520 MB. The downloaded Numpy binary filenpz needs 553 MB without compression and +514 MB with compression. +224×224 +Input +C64@3×3/1 C64@3×3/1 -C64@3×3/1112×112max pooling 2×2/1 +112×112 +max pooling2×2/1 +C128@3×3/1 C128@3×3/1 -C128@3×3/156×56max pooling 2×2/1 +56×56 +max pooling2×2/1 C256@3×3/1 C256@3×3/1 -C256@3×3/128×28max pooling 2×2/1 +C256@3×3/1 +28×28 +max pooling2×2/1 +C512@3×3/1 +C512@3×3/1 C512@3×3/1 +14×14 +max pooling2×2/1 C512@3×3/1 -C512@3×3/114×14max pooling 2×2/1 C512@3×3/1 C512@3×3/1 -C512@3×3/17×7max pooling 2×2/1 +7×7 +max pooling2×2/1 Fully Connected 4096 Dropout,p= 0.5 Fully Connected 4096 Dropout,p= 0.5 Fully Connected 1000 -Figure D.3.: Architecture of VGG-16 D. C512@3×3/1is a convolutional layer with 512 filters of -kernel size 3×3with stride 1. All convolutional layers use SAMEpadding. +Figure D.3.:Architecture of VGG-16 D.C 512@3 ×3/1 is a convolutional layer with 512 filters of +kernel size3 ×3 with stride 1. All convolutional layers useSAME padding. # Type Filters @ -Patch size / strideParameters FLOPs Output size -Input 3 @ 224×224 -1 Convolution 64 @ 3×3×3/ 1 1 792 186 M 64@ 224×224 -2 Convolution 64 @ 3×3×64/ 1 36 928 3712 M 64@ 224×224 -Max pooling 2×2/ 2 0 2 M 64 @ 112×112 -3 Convolution 128 @ 3×3×64/ 1 73 856 1856 M 128 @ 112×112 -4 Convolution 128 @ 3×3×128/ 1 147 584 3705 M 128 @ 112×112 -Max pooling 2×2/ 2 0 1 M 128 @ 56×56 -5 Convolution 256 @ 3×3×128/ 1 295 168 1853 M 256 @ 56×56 -6 Convolution 256 @ 3×3×256/ 1 590 080 3703 M 256 @ 56×56 -7 Convolution 256 @ 3×3×256/ 1 590 080 3703 M 256 @ 56×56 -Max pooling 2×2/ 2 0<1 M256 @ 28×28 -8 Convolution 512 @ 3×3×256/ 1 1 180 160 1851 M 512 @ 28×28 -9 Convolution 512 @ 3×3×512/ 1 2 359 808 3701 M 512 @ 28×28 -10 Convolution 512 @ 3×3×512/ 1 2 359 808 3701 M 512 @ 28×28 -Max pooling 2×2/ 2 0<1 M512 @ 14×14 -11 Convolution 512 @ 3×3×512/ 1 2 359 808 925 M 512 @ 14×14 -12 Convolution 512 @ 3×3×512/ 1 2 359 808 925 M 512 @ 14×14 -13 Convolution 512 @ 3×3×512/ 1 2 359 808 925 M 512 @ 14×14 -Max pooling 2×2/ 2 0<1 M512 @ 7×7 -14 FC 4096 neurons 102 764 544 206 M4096 +Patch size / stride +Parameters FLOPs Output size +Input 3 @ 224 ×224 +1 Convolution 64 @ 3 ×3 × 3 / 1 1 792 186 M 64@ 224 ×224 +2 Convolution 64 @ 3 ×3 × 64 / 1 36 928 3712 M 64@ 224 ×224 +Max pooling 2 ×2 / 2 0 2 M 64 @112 ×112 +3 Convolution 128 @ 3 ×3 × 64 / 1 73 856 1856 M 128 @112 ×112 +4 Convolution 128 @ 3 ×3 ×128 / 1 147 584 3705 M 128 @112 ×112 +Max pooling 2 ×2 / 2 0 1 M 128 @ 56 × 56 +5 Convolution 256 @ 3 ×3 ×128 / 1 295 168 1853 M 256 @ 56 × 56 +6 Convolution 256 @ 3 ×3 ×256 / 1 590 080 3703 M 256 @ 56 × 56 +7 Convolution 256 @ 3 ×3 ×256 / 1 590 080 3703 M 256 @ 56 × 56 +Max pooling 2 ×2 / 2 0 <1 M 256 @ 28 × 28 +8 Convolution 512 @ 3 ×3 ×256 / 1 1 180 160 1851 M 512 @ 28 × 28 +9 Convolution 512 @ 3 ×3 ×512 / 1 2 359 808 3701 M 512 @ 28 × 28 +10 Convolution 512 @ 3 ×3 ×512 / 1 2 359 808 3701 M 512 @ 28 × 28 +Max pooling 2 ×2 / 2 0 <1 M 512 @ 14 × 14 +11 Convolution 512 @ 3 ×3 ×512 / 1 2 359 808 925 M 512 @ 14 × 14 +12 Convolution 512 @ 3 ×3 ×512 / 1 2 359 808 925 M 512 @ 14 × 14 +13 Convolution 512 @ 3 ×3 ×512 / 1 2 359 808 925 M 512 @ 14 × 14 +Max pooling 2 ×2 / 2 0 <1 M 512 @ 7 × 7 +14 FC 4096 neurons 102 764 544 206 M 4096 Dropout 0 0 4096 15 FC 4096 neurons 16 781 312 34 M 4096 Dropout 0 0 4096 16 FC 1000 neurons 4 097 000 8 M 1000 -∑138 357 544 31 000 M 15 245 800 -Table D.3.: VGG-16 D architecture: The authors chose to give only layers a number which have +∑ 138 357 544 31 000 M 15 245 800 +Table D.3.:VGG-16 D architecture: The authors chose to give only layers a number which have learnable parameters. All convolutions are zero padded to prevent size changes and use ReLU activation functions. The channels mean is subtracted from each pixel as -a preprocessing step ( −103.939,−116.779,−123.68). As Dropout is only calculated -during training time, the number of FLOPs is 0. The dropout probability is 0.5. +a preprocessing step (−103.939,−116.779,−123.68). As Dropout is only calculated +during training time, the number of FLOPs is 0. The dropout probability is0.5. The calculated number of parameters was checked against the downloaded version. It -also has 138 357 544 parameters. +also has138 357 544parameters. D.4. GoogleNet, Inception v2 and v3 The large number of parameters and operations is a problem when such models should get applied in practice to thousands of images. In order to reduce the computational cost while -maintaining the classification quality, GoogleNet [ SLJ+15] and the Inception module were -developed. The Inception module essentially only computes 1×1filters, 3×3filters and -5×5filters in parallel, but applied bottleneck 1×1filters before to reduce the number of +maintaining the classification quality, GoogleNet [SLJ+15] and the Inception module were +developed. The Inception module essentially only computes1 ×1 filters, 3 ×3 filters and +5 ×5 filters in parallel, but applied bottleneck1 ×1 filters before to reduce the number of parameters. It is shown in Figure D.4. Figure D.4.: Inception module Image source: [SLJ+15] -Compared to GoogleNet, Inception v2 [ SVI+15] removed the 5×5filters and replaced -them by two successive layers of 3×3filters. A visualization of an Inception v2 module +Compared to GoogleNet, Inception v2 [SVI+15] removed the5 ×5 filters and replaced +them by two successive layers of3 ×3 filters. A visualization of an Inception v2 module is given in Figure D.5. Additionally, Inception v2 applies successive asymmetric filters to -approximate symmetric filters with fewer parameters. The authors call this approach filter -factorization . +approximate symmetric filters with fewer parameters. The authors call this approachfilter +factorization. Inception v3 introduced Batch Normalization to the network [SVI+15]. Figure D.5.: Inception v2 module Image source: [SVI+15] D.5. Inception-v4 -Inception-v4 as described in [ SIV16] consists of four main building blocks: The stem, +Inception-v4 as described in [SIV16] consists of four main building blocks: The stem, Inception A, Inception B and Inception C. To quote the authors: Inception-v4 is a deeper, wider and more uniform simplified architecture than Inception-v3. The stem, Reduction A and Reduction B use max-pooling, whereas Inception A, Inception B and Inception C use average pooling. The stem, module B and module C use separable convolutions. -#×Type Parameters Output size -Input 3 @ 299×299 -1 Stem 605 728 384 @ 35×35 -24×Inception A 317 632 384 @ 35×35 -3 Reduction A 2 306 112 1024 @ 17×17 -47×Inception B 2 936 256 1024 @ 17×17 -5 Reduction B 2 747 392 1536 @ 8×8 -63×Inception C 4 553 088 1536 @ 8×8 -Global Average Pooling 0 1536 @ 1×1 -Dropout (p=0.8) 0 1536 @ 1×1 +# × Type Parameters Output size +Input 3 @ 299 ×299 +1 Stem 605 728 384 @ 35 × 35 +2 4× Inception A 317 632 384 @ 35 × 35 +3 Reduction A 2 306 112 1024 @ 17 × 17 +4 7× Inception B 2 936 256 1024 @ 17 × 17 +5 Reduction B 2 747 392 1536 @ 8 × 8 +6 3× Inception C 4 553 088 1536 @ 8 × 8 +Global Average Pooling 0 1536 @ 1 × 1 +Dropout (p=0.8) 0 1536 @ 1 × 1 7 Softmax 1 537 000 1000 -∑42 679 816 +∑ 42 679 816 Table D.4.: Inception-v4 network. @@ -2889,69 +3086,72 @@ Well-known benchmark datasets for classification problems in computer vision are in Table E.1. The best results known to me are given in Table E.2. However, every semantic segmentation dataset (e.g., PASCAL VOC) can also be used to benchmark image classifiers using Algorithm 2. -DatabaseImage Resolution -(width×height)Number +Database Image Resolution +(width ×height) +Number of -ImagesNumber +Images +Number of -ClassesChannels Data source +Classes +Channels Data source MNIST 28 px×28 px 70 000 10 1 [YL98, LBBH98] HASYv2 32 px×32 px 168 233 369 1 [Tho17a] -SVHN 32 px×32 px 630 420 10 3[NWC+11b], +SVHN 32 px×32 px 630 420 10 3 [NWC+11b], [NWC+11a] CIFAR-10 32 px×32 px 60 000 10 3 [Kri, KH09] CIFAR-100 32 px×32 px 60 000 100 3 [Kri, KH09] STL-10 96 px×96 px 13 000 10 3 [CLN11, CLN10] -Caltech-101(80 px−3481 px) -×(92 px−3999 px)9144 102 3 [FFP03, FFFP06] -Caltech-256(75 px−7913 px) -×(75 px−7913 px)30 607 257 3 [Gri06, GG07] +Caltech-101 (80 px−3481 px) +×(92 px−3999 px) 9144 102 3 [FFP03, FFFP06] +Caltech-256 (75 px−7913 px) +×(75 px−7913 px) 30 607 257 3 [Gri06, GG07] ILSVRC 20121 (8 px−9331 px) -×(10 px−6530 px)1.2·1061000 3 [Ima12, RDS+14] +×(10 px−6530 px) 1.2 ·106 1000 3 [Ima12, RDS +14] Places3652 (290px−3158px) -×(225px−2630px)1.8·106365 3 [Zho16, ZKL+16] -GTSRB(25 px−266 px) -×(25 px−232 px)51 839 43 3 [SSSI, SSSI12] +×(225px−2630px) 1.8 ·106 365 3 [Zho16, ZKL +16] +GTSRB (25 px−266 px) +×(25 px−232 px) 51 839 43 3 [SSSI, SSSI12] Asirra3 (4 px−500 px) -×(4 px−500 px)25 000 2 3 [Asi17, EDHS07] -Graz-02480 px×640 px -and640 px×480 px1096 3 3 [Mar08, MS07] -Table E.1.: An overview over publicly available image databases for classification. The number +×(4 px−500 px) 25 000 2 3 [Asi17, EDHS07] +Graz-02 480 px×640 px +and 640 px×480 px 1096 3 3 [Mar08, MS07] +Table E.1.:An overview over publicly available image databases for classification. The number of images row gives the sum of the training and the test images. Some datasets, like SVHN, have additional unlabeled data which is not given in this table. 1ImageNet Large Scale Visual Recognition Competition 2The dimensions are only calculated for the validation set. 3Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs” competition on Kaggle -Dataset Model type / name Result ScoreAchieved / +Dataset Model type / name Result Score Achieved / Claimed by -MNIST — 0.21 % error [WZZ+13] +MNIST — 0.21 % error [WZZ +13] HASYv2 TF-CNN 81.00 % accuracy [Tho17a] SVHN DenseNet ( k= 24) 1.59 % error [HLW16] -CIFAR-10 DenseNet-BC ( k= 40)3.46 % error [HLW16] +CIFAR-10 DenseNet-BC ( k= 40) 3.46 % error [HLW16] CIFAR-100 WRN-28-10 16.21 % error [LH16] STL-10 SWWAE-4layer 74.80 % accuracy [ZMGL15] -Caltech-101 SPP-net (pretrained) 93.42 %±0.5 %accuracy [HZRS14] -Caltech-256 ZF-Net (pretrained) 74.2 %±0.3 %accuracy [ZF14] +Caltech-101 SPP-net (pretrained) 93.42 %±0.5 % accuracy [HZRS14] +Caltech-256 ZF-Net (pretrained) 74.2 %±0.3 % accuracy [ZF14] ImageNet 2012 ResNet ensemble 3.57 % Top-5 error [HZRS15a] GTSRB MCDNN 99.46 % accuracy [SL11] Asirra SVM 82.7 % accuracy [Gol08] Graz-02 Optimal NBNN 78.98 % accuracy [BMDP10] Table E.2.: An overview over state of the art results achieved in computer vision datasets. -Algorithm 2 Create a classification dataset from a semantic segmentation dataset -Require: Semantic segmentation dataset ( DS) -procedure CreateDataset (Annotated dataset DS) -DC←List +Algorithm 2Create a classification dataset from a semantic segmentation dataset +Require: Semantic segmentation dataset (DS) +procedure CreateDataset(Annotated datasetDS) +DC ←List w←desired image width h←desired image height -forImage and associated label (x,y)inDSdo -i←randint (0,L.width−w) -j←randint (0,L.height−h) -cL←crop (y,(i,j),(i+w,j+h)) -ifat least 50% of sare of one class then -cI←crop (x,(i,j),(i+w,j+h)) -D.append ((cI,cL)) -return(DC) +for Image and associated label(x,y) in DS do +i←randint(0,L.width−w) +j ←randint(0,L.height−h) +cL ←crop(y,(i,j),(i+ w,j + h)) +if at least 50% ofs are of one classthen +cI ←crop(x,(i,j),(i+ w,j + h)) +D.append((cI,cL)) +return (DC) F. List of Tables 2.1 Pooling types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 @@ -3006,8 +3206,8 @@ G. List of Figures 5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39 5.2 Baseline model filter weight distribution . . . . . . . . . . . . . . . . . . . . 42 5.3 Baseline model bias weight distribution . . . . . . . . . . . . . . . . . . . . . 42 -5.4 Baseline model γdistribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43 -5.5 Baseline model βdistribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43 +5.4 Baseline model γ distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43 +5.5 Baseline model β distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43 5.6 Baseline model filter weight range distribution . . . . . . . . . . . . . . . . . 44 5.7 Baseline model CIFAR-100 validation accuracy . . . . . . . . . . . . . . . . 45 5.8 Baseline Weight updates (mean) . . . . . . . . . . . . . . . . . . . . . . . . 46 @@ -3032,596 +3232,596 @@ D.4 Inception module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . D.5 Inception v2 module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94 H. Bibliography -[AAB+16]M. Abadi, A. Agarwal et al., “Tensorflow: Large-scale machine learning on -heterogeneous distributed systems,” arXiv preprint arXiv:1603.04467 , Mar. -2016. [Online]. Available: https://arxiv .org/abs/1603 .04467 -[ABKS99] M. Ankerst, M. M. Breunig et al., “OPTICS: Ordering points to identify the -clustering structure,” in ACM Sigmod record , vol. 28, no. 2. ACM, 1999, pp. +[AAB+16] M. Abadi, A. Agarwalet al., “Tensorflow: Large-scale machine learning on +heterogeneous distributed systems,”arXiv preprint arXiv:1603.04467, Mar. +2016. [Online]. Available: https://arxiv.org/abs/1603.04467 +[ABKS99] M. Ankerst, M. M. Breuniget al., “OPTICS: Ordering points to identify the +clustering structure,” inACM Sigmod record, vol. 28, no. 2. ACM, 1999, pp. 49–60. -[ADG+16]M. Andrychowicz, M. Denil et al., “Learning to learn by gradient descent by -gradient descent,” in Advances in Neural Information Processing Systems 29 -(NIPS), D. D. Lee, M. Sugiyama et al., Eds. Curran Associates, Inc., Mar. -2016, pp. 3981–3989. [Online]. Available: http://papers .nips.cc/paper/6461learning-to-learn-by-gradient-descent-by-gradient-descent - .pdf +[ADG+16] M. Andrychowicz, M. Denilet al., “Learning to learn by gradient descent by +gradient descent,” inAdvances in Neural Information Processing Systems 29 +(NIPS), D. D. Lee, M. Sugiyamaet al., Eds. Curran Associates, Inc., Mar. +2016, pp. 3981–3989. [Online]. Available: http://papers.nips.cc/paper/6461learning-to-learn-by-gradient-descent-by-gradient-descent.pdf + [AM15] M. T. Alexander Mordvintsev, Christopher Olah, “Inceptionism: Going deeper into neural networks,” Jun. 2015. [Online]. Available: - https://research .googleblog.com/2015/06/inceptionism-going-deeperinto-neural.html + https://research.googleblog.com/2015/06/inceptionism-going-deeperinto-neural.html [Asi17] “Kaggle cats and dogs dataset,” Oct. 2017. [Online]. Available: https: -//www.microsoft.com/en-us/download/details .aspx?id=54765 +//www.microsoft.com/en-us/download/details.aspx?id=54765 [BB12] J. Bergstra and Y. Bengio, “Random search for hyper-parameter optimization,” -Journal of Machine Learning Research , vol. 13, no. Feb, pp. 281–305, -Feb. 2012. [Online]. Available: http://jmlr .csail.mit.edu/papers/volume13/ -bergstra12a/bergstra12a .pdf -[BCW+17]J. Bao, D. Chen et al., “CVAE-GAN: Fine-grained image generation through -asymmetric training,” arXiv preprint arXiv:1703.10155 , Mar. 2017. [Online]. -Available: https://arxiv .org/abs/1703 .10155 -[BDLB09] J. Bergstra, G. Desjardins et al., “Quadratic polynomials learn better image +Journal of Machine Learning Research, vol. 13, no. Feb, pp. 281–305, +Feb. 2012. [Online]. Available: http://jmlr.csail.mit.edu/papers/volume13/ +bergstra12a/bergstra12a.pdf +[BCW+17] J. Bao, D. Chenet al., “CVAE-GAN: Fine-grained image generation through +asymmetric training,”arXiv preprint arXiv:1703.10155, Mar. 2017. [Online]. +Available: https://arxiv.org/abs/1703.10155 +[BDLB09] J. Bergstra, G. Desjardinset al., “Quadratic polynomials learn better image features,” Département d’Informatique et de Recherche Opérationnelle, Université de Montréal, Tech. Rep. 1337, 2009. -[BGNR16] B. Baker, O. Gupta et al., “Designing neural network architectures using -reinforcement learning,” arXiv preprint arXiv:1611.02167 , Nov. 2016. [Online]. -Available: https://arxiv .org/abs/1611 .02167 +[BGNR16] B. Baker, O. Guptaet al., “Designing neural network architectures using +reinforcement learning,”arXiv preprint arXiv:1611.02167, Nov. 2016. [Online]. +Available: https://arxiv.org/abs/1611.02167 [BM93] U. Bodenhausen and S. Manke, Automatically Structured Neural -Networks For Handwritten Character And Word Recognition . London: +Networks For Handwritten Character And Word Recognition. London: Springer London, Sep. 1993, pp. 956–961. [Online]. Available: http: //dx.doi.org/10.1007/978-1-4471-2063-6_283 [BMDP10] R. Behmo, P. Marcombes et al., “Towards optimal naive Bayes nearest -neighbor,” in European Conference on Computer Vision (ECCV) . Springer, +neighbor,” inEuropean Conference on Computer Vision (ECCV). Springer, 2010, pp. 171–184. [BPL10] Y.-L. Boureau, J. Ponce, and Y. LeCun, “A theoretical analysis of feature pooling in visual recognition,” in International Conference on -Machine Learning (ICML) , no. 27, 2010, pp. 111–118. [Online]. Available: -http://yann .lecun.com/exdb/publis/pdf/boureau-icml-10 .pdf +Machine Learning (ICML), no. 27, 2010, pp. 111–118. [Online]. Available: +http://yann.lecun.com/exdb/publis/pdf/boureau-icml-10.pdf [BSF94] Y. Bengio, P. Simard, and P. Frasconi, “Learning long-term dependencies -with gradient descent is difficult,” IEEE transactions on neural networks , +with gradient descent is difficult,”IEEE transactions on neural networks, vol. 5, no. 2, pp. 157–166, 1994. [Cha92] C. Charalambous, “Conjugate gradient algorithm for efficient training of artificial neural networks,” IEEE Proceedings G-Circuits, Devices -and Systems , vol. 139, no. 3, pp. 301–310, 1992. [Online]. Available: -http://ieeexplore .ieee.org/document/143326/ +and Systems, vol. 139, no. 3, pp. 301–310, 1992. [Online]. Available: +http://ieeexplore.ieee.org/document/143326/ [Cho15] F. Chollet, “Keras,” https://github .com/fchollet/keras, 2015. [CLN10] A. Coates, H. Lee, and A. Y. Ng, “An analysis of single-layer networks -in unsupervised feature learning,” Ann Arbor , vol. 1001, no. 48109, +in unsupervised feature learning,” Ann Arbor, vol. 1001, no. 48109, p. 2, 2010. [Online]. Available: http://cs .stanford.edu/~acoates/papers/ -coatesleeng_aistats_2011 .pdf +coatesleeng_aistats_2011.pdf [CLN11] A. Coates, H. Lee, and A. Y. Ng, “STL-10 dataset,” 2011. [Online]. Available: http://cs.stanford.edu/~acoates/stl10 [CMS12] D. Ciregan, U. Meier, and J. Schmidhuber, “Multi-column deep neural -networks for image classification,” in Conference on Computer Vision and -Pattern Recognition (CVPR) . IEEE, Feb. 2012, pp. 3642–3649. [Online]. -Available: https://arxiv .org/abs/1202 .2745v1 +networks for image classification,” inConference on Computer Vision and +Pattern Recognition (CVPR). IEEE, Feb. 2012, pp. 3642–3649. [Online]. +Available: https://arxiv.org/abs/1202.2745v1 [CUH15] D.-A. Clevert, T. Unterthiner, and S. Hochreiter, “Fast and accurate deep network learning by exponential linear units (ELUs),” arXiv preprint arXiv:1511.07289 , Nov. 2015. [Online]. Available: https: -//arxiv.org/abs/1511 .07289 -[CWV+14]S. Chetlur, C. Woolley et al., “cuDNN: Efficient primitives for deep -learning,” arXiv preprint arXiv:1410.0759 , Oct. 2014. [Online]. Available: -https://arxiv .org/abs/1410 .0759 +//arxiv.org/abs/1511.07289 +[CWV+14] S. Chetlur, C. Woolley et al., “cuDNN: Efficient primitives for deep +learning,” arXiv preprint arXiv:1410.0759, Oct. 2014. [Online]. Available: +https://arxiv.org/abs/1410.0759 -[DBB+01]C. Dugas, Y. Bengio et al., “Incorporating second-order functional +[DBB+01] C. Dugas, Y. Bengio et al., “Incorporating second-order functional knowledge for better option pricing,” in Advances in Neural Information - Processing Systems 13 (NIPS) , T. K. Leen, T. G. Dietterich, + Processing Systems 13 (NIPS), T. K. Leen, T. G. Dietterich, and V. Tresp, Eds. MIT Press, 2001, pp. 472–478. [Online]. -Available: http://papers .nips.cc/paper/1920-incorporating-second-orderfunctional-knowledge-for-better-option-pricing - .pdf +Available: http://papers .nips.cc/paper/1920-incorporating-second-orderfunctional-knowledge-for-better-option-pricing.pdf + [DDFK16] S. Dieleman, J. De Fauw, and K. Kavukcuoglu, “Exploiting cyclic symmetry -in convolutional neural networks,” arXiv preprint arXiv:1602.02660 , Feb. -2016. [Online]. Available: https://arxiv .org/abs/1602 .02660 +in convolutional neural networks,”arXiv preprint arXiv:1602.02660, Feb. +2016. [Online]. Available: https://arxiv.org/abs/1602.02660 [DHS11] J. Duchi, E. Hazan, and Y. Singer, “Adaptive subgradient methods for -online learning and stochastic optimization,” Journal of Machine Learning -Research , vol. 12, no. Jul, pp. 2121–2159, 2011. [Online]. Available: -http://www .jmlr.org/papers/volume12/duchi11a/duchi11a .pdf +online learning and stochastic optimization,”Journal of Machine Learning +Research, vol. 12, no. Jul, pp. 2121–2159, 2011. [Online]. Available: +http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf [DHS16] J. Dai, K. He, and J. Sun, “Instance-aware semantic segmentation via -multi-task network cascades,” in Conference on Computer Vision and Pattern -Recognition (CVPR) . IEEE, 2016, pp. 3150–3158. [Online]. Available: -https://arxiv .org/abs/1512 .04412 -[DJ99] W. Duch and N. Jankowski, “Survey of neural transfer functions,” Neural -Computing Surveys , vol. 2, no. 1, pp. 163–212, 1999. [Online]. Available: -ftp://ftp.icsi.berkeley.edu/pub/ai/jagota/vol2_6 .pdf +multi-task network cascades,” inConference on Computer Vision and Pattern +Recognition (CVPR). IEEE, 2016, pp. 3150–3158. [Online]. Available: +https://arxiv.org/abs/1512.04412 +[DJ99] W. Duch and N. Jankowski, “Survey of neural transfer functions,”Neural +Computing Surveys, vol. 2, no. 1, pp. 163–212, 1999. [Online]. Available: +ftp://ftp.icsi.berkeley.edu/pub/ai/jagota/vol2_6.pdf [Doz15] T. Dozat, “Incorporating Nesterov momentum into Adam,” Stanford -University, Tech. Rep., 2015. [Online]. Available: http://cs229 .stanford.edu/ -proj2015/054_report .pdf +University, Tech. Rep., 2015. [Online]. Available: http://cs229.stanford.edu/ +proj2015/054_report.pdf [DSRB14] A. Dosovitskiy, J. T. Springenberg et al., “Discriminative unsupervised -feature learning with convolutional neural networks,” in Advances in Neural -Information Processing Systems 27 (NIPS) , Z. Ghahramani, M. Welling +feature learning with convolutional neural networks,” inAdvances in Neural +Information Processing Systems 27 (NIPS), Z. Ghahramani, M. Welling et al., Eds. Curran Associates, Inc., 2014, pp. 766–774. [Online]. -Available: http://papers .nips.cc/paper/5548-discriminative-unsupervisedfeature-learning-with-convolutional-neural-networks - .pdf +Available: http://papers.nips.cc/paper/5548-discriminative-unsupervisedfeature-learning-with-convolutional-neural-networks.pdf + [DWD15] S. Dieleman, K. W. Willett, and J. Dambre, “Rotation-invariant convolutional -neural networks for galaxy morphology prediction,” Monthly notices of the -royal astronomical society , vol. 450, no. 2, pp. 1441–1459, 2015. -[EDHS07] J. Elson, J. J. Douceur et al., “Asirra: A CAPTCHA that +neural networks for galaxy morphology prediction,”Monthly notices of the +royal astronomical society, vol. 450, no. 2, pp. 1441–1459, 2015. +[EDHS07] J. Elson, J. J. Douceur et al. , “Asirra: A CAPTCHA that exploits interest-aligned manual image categorization,” in ACM Conference on Computer and Communications Security (CCS) , no. 14. Association for Computing Machinery, Inc., Oct. 2007. [Online]. -Available: https://www .microsoft.com/en-us/research/publication/asirra-acaptcha-that-exploits-interest-aligned-manual-image-categorization/ +Available: https://www.microsoft.com/en-us/research/publication/asirra-acaptcha-that-exploits-interest-aligned-manual-image-categorization/ -[EKS+96]M. Ester, H.-P. Kriegel et al., “A density-based algorithm for discovering -clusters in large spatial databases with noise.” in Kdd, vol. 96, no. 34, 1996, +[EKS+96] M. Ester, H.-P. Kriegelet al., “A density-based algorithm for discovering +clusters in large spatial databases with noise.” inKdd, vol. 96, no. 34, 1996, pp. 226–231. -[ES03] A. E. Eiben and J. E. Smith, Introduction to evolutionary computing . -Springer, 2003, vol. 53. [Online]. Available: https://dx .doi.org/10.1007/978-3662-44874-8 +[ES03] A. E. Eiben and J. E. Smith, Introduction to evolutionary computing. +Springer, 2003, vol. 53. [Online]. Available: https://dx.doi.org/10.1007/978-3662-44874-8 [Fah88] S. E. Fahlman, “An empirical study of learning speed in back-propagation networks,” 1988. [Online]. Available: http://repository .cmu.edu/cgi/ -viewcontent .cgi?article=2799&context=compsci +viewcontent.cgi?article=2799&context=compsci [FFFP06] L. Fei-Fei, R. Fergus, and P. Perona, “One-shot learning of object -categories,” IEEE transactions on pattern analysis and machine intelligence , +categories,”IEEE transactions on pattern analysis and machine intelligence, vol. 28, no. 4, pp. 594–611, Apr. 2006. [Online]. Available: http: -//vision.stanford.edu/documents/Fei-FeiFergusPerona2006 .pdf +//vision.stanford.edu/documents/Fei-FeiFergusPerona2006.pdf [FFP03] R. F. Fei-Fei and P. Perona, “Caltech 101,” 2003. [Online]. Available: http: -//www.vision.caltech.edu/Image_Datasets/Caltech101/Caltech101 .html -[FGMR10] P. F. Felzenszwalb, R. B. Girshick et al., “Object detection with discriminatively - trained part-based models,” IEEE transactions on pattern analysis and -machine intelligence , vol. 32, no. 9, pp. 1627–1645, 2010. +//www.vision.caltech.edu/Image_Datasets/Caltech101/Caltech101.html +[FGMR10] P. F. Felzenszwalb, R. B. Girshicket al., “Object detection with discriminatively + trained part-based models,”IEEE transactions on pattern analysis and +machine intelligence, vol. 32, no. 9, pp. 1627–1645, 2010. [FL89] S. E. Fahlman and C. Lebiere, “The cascade-correlation learning architecture,” -1989. [Online]. Available: http://repository .cmu.edu/compsci/1938/ +1989. [Online]. Available: http://repository.cmu.edu/compsci/1938/ [GB10] X. Glorot and Y. Bengio, “Understanding the difficulty of training deep -feedforward neural networks.” in Aistats, vol. 9, 2010, pp. 249–256. [Online]. -Available: http://jmlr .org/proceedings/papers/v9/glorot10a/glorot10a .pdf +feedforward neural networks.” inAistats, vol. 9, 2010, pp. 249–256. [Online]. +Available: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf [GBB11] X. Glorot, A. Bordes, and Y. Bengio, “Deep sparse rectifier neural -networks.” in Aistats, vol. 15, no. 106, 2011, p. 275. [Online]. Available: -http://www .jmlr.org/proceedings/papers/v15/glorot11a/glorot11a .pdf -[GDDM14] R. Girshick, J. Donahue et al., “Rich feature hierarchies for accurate object -detection and semantic segmentation,” in Conference on Computer Vision -and Pattern Recognition (CVPR) . IEEE, 2014, pp. 580–587. [Online]. -Available: https://arxiv .org/abs/1311 .2524 +networks.” inAistats, vol. 15, no. 106, 2011, p. 275. [Online]. Available: +http://www.jmlr.org/proceedings/papers/v15/glorot11a/glorot11a.pdf +[GDDM14] R. Girshick, J. Donahueet al., “Rich feature hierarchies for accurate object +detection and semantic segmentation,” inConference on Computer Vision +and Pattern Recognition (CVPR). IEEE, 2014, pp. 580–587. [Online]. +Available: https://arxiv.org/abs/1311.2524 [GG07] P. P. Greg Griffin, Alex Holub, “Caltech-256 object category dataset,” Apr. -2007. [Online]. Available: http://authors .library.caltech.edu/7694/ +2007. [Online]. Available: http://authors.library.caltech.edu/7694/ [GG16] Y. Gal and Z. Ghahramani, “Bayesian convolutional neural networks with -Bernoulli approximate variational inference,” arXivpreprintarXiv:1506.02158 , -Jan. 2016. [Online]. Available: https://arxiv .org/abs/1506 .02158v6 -[GJ02] M. R. Garey and D. S. Johnson, Computers and intractability . wh freeman +Bernoulli approximate variational inference,”arXiv preprint arXiv:1506.02158, +Jan. 2016. [Online]. Available: https://arxiv.org/abs/1506.02158v6 +[GJ02] M. R. Garey and D. S. Johnson,Computers and intractability. wh freeman New York, 2002, vol. 29. -[GJS76] M.R.Garey, D.S.Johnson, andL.Stockmeyer, “SomesimplifiedNP-complete -graph problems,” Theoretical computer science , vol. 1, no. 3, pp. 237–267, +[GJS76] M. R. Garey, D. S. Johnson, and L. Stockmeyer, “Some simplified NP-complete +graph problems,”Theoretical computer science, vol. 1, no. 3, pp. 237–267, 1976. -[Gol08] P. Golle, “Machine learning attacks against the Asirra CAPTCHA,” in ACM -conference on Computer and communications security (CCS) , no. 15. ACM, +[Gol08] P. Golle, “Machine learning attacks against the Asirra CAPTCHA,” inACM +conference on Computer and communications security (CCS), no. 15. ACM, 2008, pp. 535–542. -[Gra15] B. Graham, “Fractional max-pooling,” arXiv preprint arXiv:1412.6071 , May -2015. [Online]. Available: https://arxiv .org/abs/1412 .6071 +[Gra15] B. Graham, “Fractional max-pooling,”arXiv preprint arXiv:1412.6071, May +2015. [Online]. Available: https://arxiv.org/abs/1412.6071 [Gri06] A. P. Griffin, G. Holub, “Caltech 256,” 2006. [Online]. Available: -http://www .vision.caltech.edu/Image_Datasets/Caltech256/ -[GWFM+13]I. J. Goodfellow, D. Warde-Farley et al., “Maxout networks.” ICML, +http://www.vision.caltech.edu/Image_Datasets/Caltech256/ +[GWFM+13] I. J. Goodfellow, D. Warde-Farley et al., “Maxout networks.” ICML, vol. 28, no. 3, pp. 1319–1327, 2013. [Online]. Available: http: -//www.jmlr.org/proceedings/papers/v28/goodfellow13 .pdf +//www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf [HAE16] M. Huh, P. Agrawal, and A. A. Efros, “What makes ImageNet good for -transfer learning?” arXiv preprint arXiv:1608.08614 , Aug. 2016. [Online]. -Available: https://arxiv .org/abs/1608 .08614 -[Han89] S. J. Hanson, “Meiosis networks.” in NIPS, 1989, pp. 533–541. [Online]. -Available: http://papers .nips.cc/paper/227-meiosis-networks .pdf +transfer learning?” arXiv preprint arXiv:1608.08614, Aug. 2016. [Online]. +Available: https://arxiv.org/abs/1608.08614 +[Han89] S. J. Hanson, “Meiosis networks.” inNIPS, 1989, pp. 533–541. [Online]. +Available: http://papers.nips.cc/paper/227-meiosis-networks.pdf [Har15] M. Harris, “New features in CUDA 7.5,” Jul. 2015. [Online]. Available: -https://devblogs .nvidia.com/parallelforall/new-features-cuda-7-5/ +https://devblogs.nvidia.com/parallelforall/new-features-cuda-7-5/ [HLW16] G. Huang, Z. Liu, and K. Q. Weinberger, “Densely connected convolutional -networks,” arXiv preprint arXiv:1608.06993 , Aug. 2016. [Online]. Available: -https://arxiv .org/abs/1608 .06993v1 +networks,”arXiv preprint arXiv:1608.06993, Aug. 2016. [Online]. Available: +https://arxiv.org/abs/1608.06993v1 [HM16] M. Hardt and T. Ma, “Identity matters in deep learning,” arXiv preprint arXiv:1611.04231 , Nov. 2016. [Online]. Available: https: -//arxiv.org/abs/1611 .04231 +//arxiv.org/abs/1611.04231 [How13] A. G. Howard, “Some improvements on deep convolutional neural network -based image classification,” arXiv preprint arXiv:1312.5402 , Dec. 2013. -[Online]. Available: https://arxiv .org/abs/1312 .5402 +based image classification,” arXiv preprint arXiv:1312.5402, Dec. 2013. +[Online]. Available: https://arxiv.org/abs/1312.5402 -[HPK11] J. Han, J. Pei, and M. Kamber, Data mining: concepts and techniques . +[HPK11] J. Han, J. Pei, and M. Kamber, Data mining: concepts and techniques. Elsevier, 2011. -[HPN+16]S. Han, J. Pool et al., “DSD: Regularizing deep neural networks with -dense-sparse-dense training flow,” arXiv preprint arXiv:1607.04381 , Jul. 2016. -[Online]. Available: https://arxiv .org/abs/1607 .04381 -[HPTD15] S. Han, J. Pool et al., “Learning both weights and connections for efficient -neural network,” in Advances in Neural Information Processing Systems 28 -(NIPS), C. Cortes, N. D. Lawrence et al., Eds. Curran Associates, Inc., Jun. -2015, pp. 1135–1143. [Online]. Available: http://papers .nips.cc/paper/5784learning-both-weights-and-connections-for-efficient-neural-network +[HPN+16] S. Han, J. Pool et al., “DSD: Regularizing deep neural networks with +dense-sparse-dense training flow,”arXiv preprint arXiv:1607.04381, Jul. 2016. +[Online]. Available: https://arxiv.org/abs/1607.04381 +[HPTD15] S. Han, J. Poolet al., “Learning both weights and connections for efficient +neural network,” inAdvances in Neural Information Processing Systems 28 +(NIPS), C. Cortes, N. D. Lawrenceet al., Eds. Curran Associates, Inc., Jun. +2015, pp. 1135–1143. [Online]. Available: http://papers.nips.cc/paper/5784learning-both-weights-and-connections-for-efficient-neural-network .pdf -[HSK+12]G. E. Hinton, N. Srivastava et al., “Improving neural networks by preventing -co-adaptation of feature detectors,” arXiv preprint arXiv:1207.0580 , Jul. -2012. [Online]. Available: https://arxiv .org/abs/1207 .0580 -[HSL+16]G. Huang, Y. Sun et al., “Deep networks with stochastic depth,” -arXiv preprint arXiv:1603.09382 , Mar. 2016. [Online]. Available: https: -//arxiv.org/abs/1603 .09382 +[HSK+12] G. E. Hinton, N. Srivastavaet al., “Improving neural networks by preventing +co-adaptation of feature detectors,”arXiv preprint arXiv:1207.0580, Jul. +2012. [Online]. Available: https://arxiv.org/abs/1207.0580 +[HSL+16] G. Huang, Y. Sun et al., “Deep networks with stochastic depth,” +arXiv preprint arXiv:1603.09382, Mar. 2016. [Online]. Available: https: +//arxiv.org/abs/1603.09382 [HSW93] B. Hassibi, D. G. Stork, and G. J. Wolff, “Optimal brain surgeon and general network pruning,” in International Conference on Neural -Networks . IEEE, 1993, pp. 293–299. [Online]. Available: http: -//ee.caltech.edu/Babak/pubs/conferences/00298572 .pdf +Networks. IEEE, 1993, pp. 293–299. [Online]. Available: http: +//ee.caltech.edu/Babak/pubs/conferences/00298572.pdf [HVD15] G. Hinton, O. Vinyals, and J. Dean, “Distilling the knowledge in a neural -network,” arXiv preprint arXiv:1503.02531 , Mar. 2015. [Online]. Available: -https://arxiv .org/abs/1503 .02531 +network,”arXiv preprint arXiv:1503.02531, Mar. 2015. [Online]. Available: +https://arxiv.org/abs/1503.02531 [HZRS14] K. He, X. Zhang et al., “Spatial pyramid pooling in deep convolutional networks for visual recognition,” in European Conference on Computer Vision (ECCV) . Springer, 2014, pp. 346–361. [Online]. Available: -https://arxiv .org/abs/1406 .4729 +https://arxiv.org/abs/1406.4729 [HZRS15a] K. He, X. Zhang et al., “Deep residual learning for image recognition,” -arXiv preprint arXiv:1512.03385 , Dec. 2015. [Online]. Available: https: -//arxiv.org/abs/1512 .03385v1 -[HZRS15b] K. He, X. Zhang et al., “Delving deep into rectifiers: Surpassing human-level +arXiv preprint arXiv:1512.03385, Dec. 2015. [Online]. Available: https: +//arxiv.org/abs/1512.03385v1 +[HZRS15b] K. He, X. Zhanget al., “Delving deep into rectifiers: Surpassing human-level performance on imagenet classification,” in International Conference on -Computer Vision (ICCV) , Feb. 2015, pp. 1026–1034. [Online]. Available: -https://arxiv .org/abs/1502 .01852 +Computer Vision (ICCV), Feb. 2015, pp. 1026–1034. [Online]. Available: +https://arxiv.org/abs/1502.01852 [Ima12] “Imagenet large scale visual recognition challenge 2012 (ILSVRC2012),” -2012. [Online]. Available: http://www .image-net.org/challenges/LSVRC/ +2012. [Online]. Available: http://www.image-net.org/challenges/LSVRC/ 2012/nonpub-downloads [IS15] S. Ioffe and C. Szegedy, “Batch normalization: Accelerating deep network -training by reducing internal covariate shift,” arXiv preprint arXiv:1502.03167 , -Feb. 2015. [Online]. Available: https://arxiv .org/abs/1502 .03167 -[JXF+16]X. Jin, C. Xu et al., “Deep learning with s-shaped rectified linear activation -units,” in Thirtieth AAAI Conference on Artificial Intelligence , Dec. 2016. -[Online]. Available: https://arxiv .org/abs/1512 .07030 +training by reducing internal covariate shift,”arXiv preprint arXiv:1502.03167, +Feb. 2015. [Online]. Available: https://arxiv.org/abs/1502.03167 +[JXF+16] X. Jin, C. Xuet al., “Deep learning with s-shaped rectified linear activation +units,” inThirtieth AAAI Conference on Artificial Intelligence, Dec. 2016. +[Online]. Available: https://arxiv.org/abs/1512.07030 [Kar11] A. Karpathy, “Lessons learned from manually classifying CIFAR-10,” Apr. -2011. [Online]. Available: http://karpathy .github.io/2011/04/27/manuallyclassifying-cifar10/ +2011. [Online]. Available: http://karpathy.github.io/2011/04/27/manuallyclassifying-cifar10/ [KB14] D. Kingma and J. Ba, “Adam: A method for stochastic optimization,” -arXiv preprint arXiv:1412.6980 , Dec. 2014. [Online]. Available: https: -//arxiv.org/abs/1412 .6980 +arXiv preprint arXiv:1412.6980, Dec. 2014. [Online]. Available: https: +//arxiv.org/abs/1412.6980 [KH09] A. Krizhevsky and G. Hinton, “Learning multiple layers of features from tiny -images,” Apr. 2009. [Online]. Available: https://www .cs.toronto.edu/~kriz/ -learning-features-2009-TR .pdf -[KMN+16]N. S. Keskar, D. Mudigere et al., “On large-batch training for deep learning: -Generalization gap and sharp minima,” arXiv preprint arXiv:1609.04836 , -Sep. 2016. [Online]. Available: https://arxiv .org/abs/1609 .04836 +images,” Apr. 2009. [Online]. Available: https://www.cs.toronto.edu/~kriz/ +learning-features-2009-TR.pdf +[KMN+16] N. S. Keskar, D. Mudigereet al., “On large-batch training for deep learning: +Generalization gap and sharp minima,”arXiv preprint arXiv:1609.04836, +Sep. 2016. [Online]. Available: https://arxiv.org/abs/1609.04836 [Koc15] T. Kocmánek, “HyperNEAT and novelty search for image recognition,” Ph.D. dissertation, Master’s thesis, Czech Technical University in Prague, 2015. -[Online]. Available: http://kocmi .tk/photos/DiplomaThesis .pdf -[KPY+15]Y.-D. Kim, E. Park et al., “Compression of deep convolutional neural networks -for fast and low power mobile applications,” arXiv preprint arXiv:1511.06530 , -Nov. 2015. [Online]. Available: https://arxiv .org/abs/1511 .06530 -[KR09] L. Kaufman and P. J. Rousseeuw, Finding groups in data: an introduction to -cluster analysis . John Wiley & Sons, 2009, vol. 344. +[Online]. Available: http://kocmi.tk/photos/DiplomaThesis.pdf +[KPY+15] Y.-D. Kim, E. Parket al., “Compression of deep convolutional neural networks +for fast and low power mobile applications,”arXiv preprint arXiv:1511.06530, +Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.06530 +[KR09] L. Kaufman and P. J. Rousseeuw,Finding groups in data: an introduction to +cluster analysis. John Wiley & Sons, 2009, vol. 344. [Kri] A. Krizhevsky, “The CIFAR-10 dataset.” [Online]. Available: https: -//www.cs.toronto.edu/~kriz/cifar .html +//www.cs.toronto.edu/~kriz/cifar.html [KS02] V. Kurkova and M. Sanguineti, “Comparison of worst case errors in linear -and neural network approximation,” IEEE Transactions on Information +and neural network approximation,”IEEE Transactions on Information Theory, vol. 48, no. 1, pp. 264–275, Jan. 2002. [Online]. Available: -http://ieeexplore .ieee.org/abstract/document/971754/ +http://ieeexplore.ieee.org/abstract/document/971754/ [KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, “Imagenet classification with deep convolutional neural networks,” in Advances in Neural -Information Processing Systems 25 (NIPS) , F. Pereira, C. J. C. Burges +Information Processing Systems 25 (NIPS), F. Pereira, C. J. C. Burges et al., Eds. Curran Associates, Inc., 2012, pp. 1097–1105. [Online]. -Available: http://papers .nips.cc/paper/4824-imagenet-classification-withdeep-convolutional-neural-networks - .pdf -[KSlB+10]K. Kavukcuoglu, P. Sermanet et al., “Learning convolutional feature +Available: http://papers.nips.cc/paper/4824-imagenet-classification-withdeep-convolutional-neural-networks.pdf + +[KSlB+10] K. Kavukcuoglu, P. Sermanet et al., “Learning convolutional feature hierarchies for visual recognition,” in Advances in Neural Information Processing Systems 23 (NIPS) , J. D. Lafferty, C. K. I. Williams et al., Eds. Curran Associates, Inc., 2010, pp. 1090–1098. [Online]. -Available: http://papers .nips.cc/paper/4133-learning-convolutional-featurehierarchies-for-visual-recognition - .pdf -[LAE+16]W. Liu, D. Anguelov et al., “SSD: Single shot multibox detector,” in -European Conference on Computer Vision (ECCV) . Springer, 2016, pp. -21–37. [Online]. Available: https://arxiv .org/abs/1512 .02325 -[Las17] “Noise layers,” Jan. 2017. [Online]. Available: http://lasagne .readthedocs .io/ -en/latest/modules/layers/noise .html#lasagne .layers.DropoutLayer -[LBBH98] Y. LeCun, L. Bottou et al., “Gradient-based learning applied to document -recognition,” Proceedings of the IEEE , vol. 86, no. 11, pp. 2278–2324, Nov. -1998. [Online]. Available: http://yann .lecun.com/exdb/publis/pdf/lecun01a.pdf +Available: http://papers.nips.cc/paper/4133-learning-convolutional-featurehierarchies-for-visual-recognition.pdf + +[LAE+16] W. Liu, D. Anguelov et al., “SSD: Single shot multibox detector,” in +European Conference on Computer Vision (ECCV). Springer, 2016, pp. +21–37. [Online]. Available: https://arxiv.org/abs/1512.02325 +[Las17] “Noise layers,” Jan. 2017. [Online]. Available: http://lasagne.readthedocs.io/ +en/latest/modules/layers/noise.html#lasagne.layers.DropoutLayer +[LBBH98] Y. LeCun, L. Bottouet al., “Gradient-based learning applied to document +recognition,”Proceedings of the IEEE, vol. 86, no. 11, pp. 2278–2324, Nov. +1998. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/lecun01a.pdf [LBH15] Y. LeCun, Y. Bengio, and G. Hinton, “Deep learning,” Nature, vol. 521, no. 7553, pp. 436–444, May 2015. [Online]. Available: -http://www .nature.com/nature/journal/v521/n7553/abs/nature14539 .html -[LBOM98] Y. A. LeCun, L. Bottou et al.,Efficient BackProp , ser. Lecture Notes in +http://www.nature.com/nature/journal/v521/n7553/abs/nature14539.html +[LBOM98] Y. A. LeCun, L. Bottouet al., Efficient BackProp, ser. Lecture Notes in Computer Science. Berlin, Heidelberg: Springer Berlin Heidelberg, 1998, vol. -1524, pp. 9–50. [Online]. Available: http://dx .doi.org/10.1007/3-540-49430-8 -[LDS+89]Y. LeCun, J. S. Denker et al., “Optimal brain damage.” in NIPs, vol. 2, 1989, -pp. 598–605. [Online]. Available: http://yann .lecun.com/exdb/publis/pdf/ +1524, pp. 9–50. [Online]. Available: http://dx.doi.org/10.1007/3-540-49430-8 +[LDS+89] Y. LeCun, J. S. Denkeret al., “Optimal brain damage.” inNIPs, vol. 2, 1989, +pp. 598–605. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/ lecun-90b.pdf [Le13] Q. V. Le, “Building high-level features using large scale unsupervised learning,” in International conference on acoustics, speech and signal -processing . IEEE, 2013, pp. 8595–8598. [Online]. Available: http: -//ieeexplore .ieee.org/stamp/stamp .jsp?arnumber=6639343 +processing. IEEE, 2013, pp. 8595–8598. [Online]. Available: http: +//ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6639343 [LG16] A. Lavin and S. Gray, “Fast algorithms for convolutional neural networks,” in -Conference on Computer Vision and Pattern Recognition (CVPR) . IEEE, Sep. -2016, pp. 4013–4021. [Online]. Available: https://arxiv .org/abs/1509 .09308 +Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. +2016, pp. 4013–4021. [Online]. Available: https://arxiv.org/abs/1509.09308 [LGT16] C.-Y. Lee, P. W. Gallagher, and Z. Tu, “Generalizing pooling functions in -convolutional neural networks: Mixed, gated, and tree,” in International -Conference on Artificial Intelligence and Statistics , 2016. [Online]. Available: -https://arxiv .org/abs/1509 .08985v2 +convolutional neural networks: Mixed, gated, and tree,” inInternational +Conference on Artificial Intelligence and Statistics, 2016. [Online]. Available: +https://arxiv.org/abs/1509.08985v2 [LH16] I. Loshchilov and F. Hutter, “SGDR: stochastic gradient descent -with warm restarts,” Learning , Aug. 2016. [Online]. Available: https: -//arxiv.org/abs/1608 .03983 -[LJD+16]L. Li, K. Jamieson et al., “Hyperband: A novel bandit-based approach to -hyperparameter optimization,” arXiv preprint arXiv:1603.06560 , Mar. 2016. -[Online]. Available: https://arxiv .org/abs/1603 .06560 -[LM16] K. Li and J. Malik, “Learning to optimize,” arXiv preprint arXiv:1606.01885 , -Jun. 2016. [Online]. Available: https://arxiv .org/abs/1606 .01885 +with warm restarts,” Learning, Aug. 2016. [Online]. Available: https: +//arxiv.org/abs/1608.03983 +[LJD+16] L. Li, K. Jamiesonet al., “Hyperband: A novel bandit-based approach to +hyperparameter optimization,”arXiv preprint arXiv:1603.06560, Mar. 2016. +[Online]. Available: https://arxiv.org/abs/1603.06560 +[LM16] K. Li and J. Malik, “Learning to optimize,”arXiv preprint arXiv:1606.01885, +Jun. 2016. [Online]. Available: https://arxiv.org/abs/1606.01885 [LSD15] J. Long, E. Shelhamer, and T. Darrell, “Fully convolutional networks for -semantic segmentation,” in Conference on Computer Vision and Pattern -Recognition (CVPR) . IEEE, Mar. 2015, pp. 3431–3440. [Online]. Available: -https://arxiv .org/abs/1411 .4038v2 -[LX17] A. Y. Lingxi Xie, “Genetic CNN,” arXiv preprint arXiv:1703.01513 , Mar. -2017. [Online]. Available: https://arxiv .org/abs/1703 .01513 +semantic segmentation,” inConference on Computer Vision and Pattern +Recognition (CVPR). IEEE, Mar. 2015, pp. 3431–3440. [Online]. Available: +https://arxiv.org/abs/1411.4038v2 +[LX17] A. Y. Lingxi Xie, “Genetic CNN,”arXiv preprint arXiv:1703.01513, Mar. +2017. [Online]. Available: https://arxiv.org/abs/1703.01513 [Maj17] S. Majumdar, “Densenet,” GitHub, Feb. 2017. [Online]. Available: -https://github .com/titu1994/DenseNet +https://github.com/titu1994/DenseNet [Mar08] M. Marszałek, “INRIA annotations for Graz-02 (IG02),” Oct. 2008. [Online]. -Available: http://lear .inrialpes.fr/people/marszalek/data/ig02/ +Available: http://lear.inrialpes.fr/people/marszalek/data/ig02/ [MDA15] D. Maclaurin, D. Duvenaud, and R. Adams, “Gradient-based hyperparameter -optimization through reversible learning,” in International Conference on -Machine Learning (ICML) , 2015, pp. 2113–2122. -[MH08] L. v. d. Maaten and G. Hinton, “Visualizing data using t-SNE,” Journal of -Machine Learning Research , vol. 9, no. Nov, pp. 2579–2605, 2008. +optimization through reversible learning,” inInternational Conference on +Machine Learning (ICML), 2015, pp. 2113–2122. +[MH08] L. v. d. Maaten and G. Hinton, “Visualizing data using t-SNE,”Journal of +Machine Learning Research, vol. 9, no. Nov, pp. 2579–2605, 2008. [MHN13] A. L. Maas, A. Y. Hannun, and A. Y. Ng, “Rectifier nonlinearities -improve neural network acoustic models,” in Proc. ICML , vol. 30, -no. 1, 2013. [Online]. Available: https://web .stanford.edu/~awni/papers/ -relu_hybrid_icml2013_final .pdf +improve neural network acoustic models,” in Proc. ICML, vol. 30, +no. 1, 2013. [Online]. Available: https://web.stanford.edu/~awni/papers/ +relu_hybrid_icml2013_final.pdf [MM15] D. Mishkin and J. Matas, “All you need is a good init,” arXiv preprint arXiv:1511.06422 , Nov. 2015. [Online]. Available: https: -//arxiv.org/abs/1511 .06422 +//arxiv.org/abs/1511.06422 [MP43] W. S. McCulloch and W. Pitts, “A logical calculus of the ideas immanent in -nervous activity,” The bulletin of mathematical biophysics , vol. 5, no. 4, pp. +nervous activity,”The bulletin of mathematical biophysics, vol. 5, no. 4, pp. 115–133, 1943. [MRM15] N. McLaughlin, J. M. D. Rincon, and P. Miller, “Data-augmentation for -reducing dataset bias in person re-identification,” in International Conference -on Advanced Video and Signal Based Surveillance (AVSS) , no. 12, Aug. 2015, -pp. 1–6. [Online]. Available: http://ieeexplore .ieee.org/abstract/document/ +reducing dataset bias in person re-identification,” inInternational Conference +on Advanced Video and Signal Based Surveillance (AVSS), no. 12, Aug. 2015, +pp. 1–6. [Online]. Available: http://ieeexplore.ieee.org/abstract/document/ 7301739/ [MS07] M. Marszalek and C. Schmid, “Accurate object localization with shape masks,” in Conference on Computer Vision and Pattern -Recognition (CVPR) . IEEE, 2007, pp. 1–8. [Online]. Available: http: -//ieeexplore .ieee.org/document/4270110/ +Recognition (CVPR). IEEE, 2007, pp. 1–8. [Online]. Available: http: +//ieeexplore.ieee.org/document/4270110/ [MSM16] D. Mishkin, N. Sergievskiy, and J. Matas, “Systematic evaluation of CNN -advances on the ImageNet,” arXiv preprint arXiv:1606.02228 , Jun. 2016. -[Online]. Available: https://arxiv .org/abs/1606 .02228 +advances on the ImageNet,”arXiv preprint arXiv:1606.02228, Jun. 2016. +[Online]. Available: https://arxiv.org/abs/1606.02228 [MV16] A. Mahendran and A. Vedaldi, “Visualizing deep convolutional neural -networks using natural pre-images,” InternationalJournal of Computer Vision , -pp. 1–23, Apr. 2016. [Online]. Available: https://arxiv .org/abs/1512 .02017 -[NDRT13] N. Natarajan, I. S. Dhillon et al., “Learning with noisy labels,” in Advances -in Neural Information Processing Systems 26 (NIPS) , C. J. C. Burges, -L. Bottou et al., Eds. Curran Associates, Inc., 2013, pp. 1196–1204. [Online]. -Available: http://papers .nips.cc/paper/5073-learning-with-noisy-labels .pdf +networks using natural pre-images,”International Journal of Computer Vision, +pp. 1–23, Apr. 2016. [Online]. Available: https://arxiv.org/abs/1512.02017 +[NDRT13] N. Natarajan, I. S. Dhillonet al., “Learning with noisy labels,” inAdvances +in Neural Information Processing Systems 26 (NIPS), C. J. C. Burges, +L. Bottouet al., Eds. Curran Associates, Inc., 2013, pp. 1196–1204. [Online]. +Available: http://papers.nips.cc/paper/5073-learning-with-noisy-labels.pdf [Nes83] Y. Nesterov, “A method of solving a convex programming problem with -convergence rate o (1/k2),” in Soviet Mathematics Doklady , vol. 27, no. 2, +convergence rate o (1/k2),” inSoviet Mathematics Doklady, vol. 27, no. 2, 1983, pp. 372–376. [new00] “The training performed by qnstrn,” Aug. 2000. [Online]. Available: -http://www1 .icsi.berkeley.edu/Speech/faq/nn-train .html +http://www1.icsi.berkeley.edu/Speech/faq/nn-train.html [Ng16] A. Ng, “Nuts and bolts of building ai applications using deep learning,” NIPS Talk, Dec. 2016. [NH92] S. J. Nowlan and G. E. Hinton, “Simplifying neural networks by soft -weight-sharing,” Neural computation , vol. 4, no. 4, pp. 473–493, 1992. -[Online]. Available: https://www .cs.toronto.edu/~hinton/absps/sunspots .pdf +weight-sharing,” Neural computation, vol. 4, no. 4, pp. 473–493, 1992. +[Online]. Available: https://www.cs.toronto.edu/~hinton/absps/sunspots.pdf [NH02] R. T. Ng and J. Han, “CLARANS: A method for clustering objects for spatial -data mining,” IEEE transactions on knowledge and data engineering , vol. 14, +data mining,”IEEE transactions on knowledge and data engineering, vol. 14, no. 5, pp. 1003–1016, 2002. -[NWC+11a]Y. Netzer, T. Wang et al., “Reading digits in natural images with +[NWC+11a] Y. Netzer, T. Wang et al., “Reading digits in natural images with unsupervised feature learning,” in NIPS workshop on deep learning and -unsupervised feature learning , vol. 2011, no. 2, 2011, p. 5. [Online]. Available: -http://ufldl .stanford.edu/housenumbers/nips2011_housenumbers .pdf -[NWC+11b]Y. Netzer, T. Wang et al., “The street view house numbers (SVHN) dataset,” -2011. [Online]. Available: http://ufldl .stanford.edu/housenumbers/ +unsupervised feature learning, vol. 2011, no. 2, 2011, p. 5. [Online]. Available: +http://ufldl.stanford.edu/housenumbers/nips2011_housenumbers.pdf +[NWC+11b] Y. Netzer, T. Wanget al., “The street view house numbers (SVHN) dataset,” +2011. [Online]. Available: http://ufldl.stanford.edu/housenumbers/ [NYC16] A. Nguyen, J. Yosinski, and J. Clune, “Multifaceted feature visualization: Uncovering the different types of features learned by each neuron in deep -neural networks,” arXiv preprint arXiv:1602.03616 , May 2016. [Online]. -Available: https://arxiv .org/abs/1602 .03616 +neural networks,” arXiv preprint arXiv:1602.03616, May 2016. [Online]. +Available: https://arxiv.org/abs/1602.03616 [OHIL16] J. Ortigosa-Hernández, I. Inza, and J. A. Lozano, “Towards competitive classifiers for unbalanced classification problems: A study on the performance -scores,”arXiv preprint arXiv:1608.08984 , Aug. 2016. [Online]. Available: -https://arxiv .org/abs/1608 .08984 -[PMW+15]N. Papernot, P. McDaniel et al., “Distillation as a defense to adversarial -perturbations against deep neural networks,” arXiv preprint arXiv:1511.04508 , -Nov. 2015. [Online]. Available: https://arxiv .org/abs/1511 .04508 +scores,” arXiv preprint arXiv:1608.08984, Aug. 2016. [Online]. Available: +https://arxiv.org/abs/1608.08984 +[PMW+15] N. Papernot, P. McDanielet al., “Distillation as a defense to adversarial +perturbations against deep neural networks,”arXiv preprint arXiv:1511.04508, +Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.04508 [Pre98] L. Prechelt, Early Stopping - But When? Berlin, Heidelberg: Springer -Berlin Heidelberg, 1998, pp. 55–69. [Online]. Available: http://dx .doi.org/ +Berlin Heidelberg, 1998, pp. 55–69. [Online]. Available: http://dx.doi.org/ 10.1007/3-540-49430-8_3 -[RDS+14]O. Russakovsky, J. Deng et al., “Imagenet large scale visual recognition -challenge,” arXiv preprint arXiv:1409.0575 , vol. 115, no. 3, pp. 211–252, Sep. -2014. [Online]. Available: https://arxiv .org/abs/1409 .0575 +[RDS+14] O. Russakovsky, J. Denget al., “Imagenet large scale visual recognition +challenge,”arXiv preprint arXiv:1409.0575, vol. 115, no. 3, pp. 211–252, Sep. +2014. [Online]. Available: https://arxiv.org/abs/1409.0575 [RFB15] O. Ronneberger, P. Fischer, and T. Brox, “U-net: Convolutional networks -for biomedical image segmentation,” in International Conference on Medical -Image Computing and Computer-Assisted Intervention . Springer, 2015, pp. -234–241. [Online]. Available: https://arxiv .org/abs/1505 .04597 +for biomedical image segmentation,” inInternational Conference on Medical +Image Computing and Computer-Assisted Intervention. Springer, 2015, pp. +234–241. [Online]. Available: https://arxiv.org/abs/1505.04597 [RLS10] S. Risi, J. Lehman, and K. O. Stanley, “Evolving the placement and density - of neurons in the hyperneat substrate,” in Conference on Genetic and -evolutionary computation , no. 12. ACM, 2010, pp. 563–570. + of neurons in the hyperneat substrate,” inConference on Genetic and +evolutionary computation, no. 12. ACM, 2010, pp. 563–570. [RSG16] M. T. Ribeiro, S. Singh, and C. Guestrin, “"why should i trust you?": -Explaining the predictions of any classifier,” arXiv preprint arXiv:1602.04938 , -Feb. 2016. [Online]. Available: https://arxiv .org/abs/1602 .04938 +Explaining the predictions of any classifier,”arXiv preprint arXiv:1602.04938, +Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.04938 [Rud16] S. Ruder, “An overview of gradient descent optimization algorithms,” -arXiv preprint arXiv:1609.04747 , Sep. 2016. [Online]. Available: https: -//arxiv.org/abs/1609 .04747 +arXiv preprint arXiv:1609.04747, Sep. 2016. [Online]. Available: https: +//arxiv.org/abs/1609.04747 [SCL12] P. Sermanet, S. Chintala, and Y. LeCun, “Convolutional neural networks -applied to house numbers digit classification,” in International Conference -on Pattern Recognition (ICPR) , no. 21. IEEE, Apr. 2012, pp. 3288–3291. -[Online]. Available: https://arxiv .org/abs/1204 .3968 +applied to house numbers digit classification,” inInternational Conference +on Pattern Recognition (ICPR), no. 21. IEEE, Apr. 2012, pp. 3288–3291. +[Online]. Available: https://arxiv.org/abs/1204.3968 [SDG09] K. O. Stanley, D. B. D’Ambrosio, and J. Gauci, “A hypercube-based encoding -for evolving large-scale neural networks,” Artificial life , vol. 15, no. 2, pp. 185– -212, 2009. [Online]. Available: http://ieeexplore .ieee.org/document/6792316/ -[SEZ+13]P. Sermanet, D. Eigen et al., “Overfeat: Integrated recognition, localization -and detection using convolutional networks,” arXiv preprint arXiv:1312.6229 , -Feb. 2013. [Online]. Available: https://arxiv .org/abs/1312 .6229v4 -[SHK+14]N. Srivastava, G. E. Hinton et al., “Dropout: a simple way to -prevent neural networks from overfitting.” Journal of Machine Learning -Research , vol. 15, no. 1, pp. 1929–1958, 2014. [Online]. Available: -https://www .cs.toronto.edu/~hinton/absps/JMLRdropout .pdf -[SHY+13]A. Senior, G. Heigold et al., “An empirical study of learning rates in deep +for evolving large-scale neural networks,”Artificial life, vol. 15, no. 2, pp. 185– +212, 2009. [Online]. Available: http://ieeexplore.ieee.org/document/6792316/ +[SEZ+13] P. Sermanet, D. Eigenet al., “Overfeat: Integrated recognition, localization +and detection using convolutional networks,”arXiv preprint arXiv:1312.6229, +Feb. 2013. [Online]. Available: https://arxiv.org/abs/1312.6229v4 +[SHK+14] N. Srivastava, G. E. Hinton et al. , “Dropout: a simple way to +prevent neural networks from overfitting.”Journal of Machine Learning +Research, vol. 15, no. 1, pp. 1929–1958, 2014. [Online]. Available: +https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf +[SHY+13] A. Senior, G. Heigoldet al., “An empirical study of learning rates in deep neural networks for speech recognition,” in International Conference on -Acoustics, Speech and Signal Processing . IEEE, 2013, pp. 6724–6728. [Online]. -Available: http://ieeexplore .ieee.org/document/6638963/?arnumber=6638963 -[SIV16] C.Szegedy, S.Ioffe, andV.Vanhoucke, “Inception-v4, inception-resnetandthe -impact of residual connections on learning,” arXiv preprint arXiv:1602.07261 , -Feb. 2016. [Online]. Available: https://arxiv .org/abs/1602 .07261 +Acoustics, Speech and Signal Processing. IEEE, 2013, pp. 6724–6728. [Online]. +Available: http://ieeexplore.ieee.org/document/6638963/?arnumber=6638963 +[SIV16] C. Szegedy, S. Ioffe, and V. Vanhoucke, “Inception-v4, inception-resnet and the +impact of residual connections on learning,”arXiv preprint arXiv:1602.07261, +Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.07261 [SKP15] F. Schroff, D. Kalenichenko, and J. Philbin, “Facenet: A unified embedding for face recognition and clustering,” in Conference on Computer Vision -and Pattern Recognition (CVPR) . IEEE, Mar. 2015, pp. 815–823. [Online]. -Available: https://arxiv .org/abs/1503 .03832 +and Pattern Recognition (CVPR). IEEE, Mar. 2015, pp. 815–823. [Online]. +Available: https://arxiv.org/abs/1503.03832 [SL11] P. Sermanet and Y. LeCun, “Traffic sign recognition with multi-scale convolutional networks,” in International Joint Conference on Neural Networks (IJCNN) , Jul. 2011, pp. 2809–2813. [Online]. Available: -http://ieeexplore .ieee.org/document/6033589/ -[SLJ+15]C. Szegedy, W. Liu et al., “Going deeper with convolutions,” in Conference -on Computer Vision and Pattern Recognition (CVPR) . IEEE, Sep. 2015, pp. -1–9. [Online]. Available: https://arxiv .org/abs/1409 .4842 +http://ieeexplore.ieee.org/document/6033589/ +[SLJ+15] C. Szegedy, W. Liuet al., “Going deeper with convolutions,” inConference +on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. 2015, pp. +1–9. [Online]. Available: https://arxiv.org/abs/1409.4842 [SM02] K. O. Stanley and R. Miikkulainen, “Evolving neural networks through -augmenting topologies,” Evolutionary computation , vol. 10, no. 2, pp. 99–127, -2002. [Online]. Available: http://www .mitpressjournals .org/doi/abs/10 .1162/ +augmenting topologies,”Evolutionary computation, vol. 10, no. 2, pp. 99–127, +2002. [Online]. Available: http://www.mitpressjournals.org/doi/abs/10.1162/ 106365602320169811 [SMG13] A. M. Saxe, J. L. McClelland, and S. Ganguli, “Exact solutions to the nonlinear dynamics of learning in deep linear neural networks,” -arXiv preprint arXiv:1312.6120 , Dec. 2013. [Online]. Available: https: -//arxiv.org/abs/1312 .6120 +arXiv preprint arXiv:1312.6120, Dec. 2013. [Online]. Available: https: +//arxiv.org/abs/1312.6120 [SMGS14] R. K. Srivastava, J. Masci et al., “Understanding locally competitive -networks,” arXiv preprint arXiv:1410.1165 , Oct. 2014. [Online]. Available: -https://arxiv .org/abs/1410 .1165 +networks,”arXiv preprint arXiv:1410.1165, Oct. 2014. [Online]. Available: +https://arxiv.org/abs/1410.1165 [SSSI] J. Stallkamp, M. Schlipsing et al., “The german traffic sign recognition -benchmark.” [Online]. Available: http://benchmark .ini.rub.de/?section= +benchmark.” [Online]. Available: http://benchmark.ini.rub.de/?section= gtsrb&subsection=news [SSSI12] J. Stallkamp, M. Schlipsing et al., “Man vs. computer: Benchmarking -machine learning algorithms for traffic sign recognition,” Neural Networks , -no. 0, pp. –, 2012. [Online]. Available: http://www .sciencedirect .com/science/ +machine learning algorithms for traffic sign recognition,”Neural Networks, +no. 0, pp. –, 2012. [Online]. Available: http://www.sciencedirect.com/science/ article/pii/S0893608012000457 -[SV16] S. Saxena and J. Verbeek, “Convolutional neural fabrics,” arXiv preprint -arXiv:1606.02492 , 2016.[Online].Available: https://arxiv .org/abs/1606 .02492 -[SVI+15]C. Szegedy, V. Vanhoucke et al., “Rethinking the inception architecture -for computer vision,” arXiv preprint arXiv:1512.00567 , Dec. 2015. [Online]. -Available: https://arxiv .org/abs/1512 .00567v3 +[SV16] S. Saxena and J. Verbeek, “Convolutional neural fabrics,”arXiv preprint +arXiv:1606.02492, 2016.[Online].Available: https://arxiv.org/abs/1606.02492 +[SVI+15] C. Szegedy, V. Vanhouckeet al., “Rethinking the inception architecture +for computer vision,”arXiv preprint arXiv:1512.00567, Dec. 2015. [Online]. +Available: https://arxiv.org/abs/1512.00567v3 [SVZ13] K. Simonyan, A. Vedaldi, and A. Zisserman, “Deep inside convolutional networks: Visualising image classification models and saliency maps,” -arXiv preprint arXiv:1312.6034 , Dec. 2013. [Online]. Available: https: -//arxiv.org/abs/1312 .6034 +arXiv preprint arXiv:1312.6034, Dec. 2013. [Online]. Available: https: +//arxiv.org/abs/1312.6034 [SZ14] K. Simonyan and A. Zisserman, “Very deep convolutional networks for -large-scale image recognition,” arXiv preprint arXiv:1409.1556 , Sep. 2014. -[Online]. Available: https://arxiv .org/abs/1409 .1556 -[SZS+13]C. Szegedy, W. Zaremba et al., “Intriguing properties of neural -networks,” arXiv preprint arXiv:1312.6199 , Dec. 2013. [Online]. Available: -https://arxiv .org/abs/1312 .6199v4 +large-scale image recognition,”arXiv preprint arXiv:1409.1556, Sep. 2014. +[Online]. Available: https://arxiv.org/abs/1409.1556 +[SZS+13] C. Szegedy, W. Zaremba et al. , “Intriguing properties of neural +networks,”arXiv preprint arXiv:1312.6199, Dec. 2013. [Online]. Available: +https://arxiv.org/abs/1312.6199v4 [TF-16a] “MNIST for ML beginners,” Dec. 2016. [Online]. Available: https: //www.tensorflow.org/tutorials/mnist/beginners/ -[tf-16b] “tf.nn.dropout,” Dec. 2016. [Online]. Available: https://www .tensorflow.org/ +[tf-16b] “tf.nn.dropout,” Dec. 2016. [Online]. Available: https://www.tensorflow.org/ api_docs/python/nn/activation_functions_#dropout [TH12] T. Tieleman and G. Hinton, “Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude,” COURSERA: Neural -Networks for Machine Learning , vol. 4, no. 2, 2012. [Online]. Available: -http://www .cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6 .pdf +Networks for Machine Learning, vol. 4, no. 2, 2012. [Online]. Available: +http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf [Tho14a] M. Thoma, “On-line recognition of handwritten mathematical symbols,” Karlsruhe, Germany, Nov. 2014. [Online]. Available: http://martinthoma.com/write-math [Tho14b] M. Thoma, “The Twiddle algorithm,” Sep. 2014. [Online]. Available: -https://martin-thoma .com/twiddle/ +https://martin-thoma.com/twiddle/ [Tho16] M. Thoma, “A survey of semantic segmentation,” arXiv preprint -arXiv:1602.06541 , Feb. 2016. [Online]. Available: https://arxiv .org/abs/ +arXiv:1602.06541, Feb. 2016. [Online]. Available: https://arxiv.org/abs/ 1602.06541 -[Tho17a] M. Thoma, “The HASYv2 dataset,” arXiv preprint arXiv:1701.08380 , Jan. -2017. [Online]. Available: https://arxiv .org/abs/1701 .08380 +[Tho17a] M. Thoma, “The HASYv2 dataset,”arXiv preprint arXiv:1701.08380, Jan. +2017. [Online]. Available: https://arxiv.org/abs/1701.08380 [Tho17b] M. Thoma, “Master thesis (blog post),” Apr. 2017. [Online]. Available: -https://martin-thoma .com/msthesis +https://martin-thoma.com/msthesis [VH13] P. Verbancsics and J. Harguess, “Generative neuroevolution for deep -learning,” arXiv preprint arXiv:1312.5355 , Dec. 2013. [Online]. Available: -https://arxiv .org/abs/1312 .5355 -[vLA87] P. J. M. van Laarhoven and E. H. L. Aarts, Simulated annealing . +learning,” arXiv preprint arXiv:1312.5355, Dec. 2013. [Online]. Available: +https://arxiv.org/abs/1312.5355 +[vLA87] P. J. M. van Laarhoven and E. H. L. Aarts, Simulated annealing. Dordrecht: Springer Netherlands, 1987, pp. 7–15. [Online]. Available: http://dx.doi.org/10.1007/978-94-015-7744-1_2 -[VTKP17] E. Vorontsov, C. Trabelsi et al., “On orthogonality and learning recurrent -networks with long term dependencies,” arXiv preprint arXiv:1702.00071 , -Jan. 2017. [Online]. Available: https://arxiv .org/abs/1702 .00071 -[WHH+89]A. Waibel, T. Hanazawa et al., “Phoneme recognition using time-delay +[VTKP17] E. Vorontsov, C. Trabelsiet al., “On orthogonality and learning recurrent +networks with long term dependencies,”arXiv preprint arXiv:1702.00071, +Jan. 2017. [Online]. Available: https://arxiv.org/abs/1702.00071 +[WHH+89] A. Waibel, T. Hanazawa et al., “Phoneme recognition using time-delay neural networks,” IEEE transactions on acoustics, speech, and signal -processing , vol. 37, no. 3, pp. 328–339, Aug. 1989. [Online]. Available: -http://ieeexplore .ieee.org/document/21701/ +processing, vol. 37, no. 3, pp. 328–339, Aug. 1989. [Online]. Available: +http://ieeexplore.ieee.org/document/21701/ [Wil92] R. J. Williams, “Simple statistical gradient-following algorithms for connectionist - reinforcement learning,” Machine learning , vol. 8, no. 3-4, pp. 229–256, + reinforcement learning,”Machine learning, vol. 8, no. 3-4, pp. 229–256, 1992. -[WWQ13] X. Wang, L. Wang, and Y. Qiao, A Comparative Study of Encoding, Pooling -and Normalization Methods for Action Recognition . Berlin, Heidelberg: +[WWQ13] X. Wang, L. Wang, and Y. Qiao,A Comparative Study of Encoding, Pooling +and Normalization Methods for Action Recognition. Berlin, Heidelberg: Springer Berlin Heidelberg, Nov. 2013, no. 11, pp. 572–585. [Online]. -Available: http://dx .doi.org/10.1007/978-3-642-37431-9_44 -[WYS+15]R. Wu, S. Yan et al., “Deep image: Scaling up image recognition,” arXiv -preprint arXiv:1501.02876 , vol. 7, no. 8, Jul. 2015. [Online]. Available: -https://arxiv .org/abs/1501 .02876v4 -[WZZ+13]L.Wan, M.Zeiler etal., “Regularizationofneuralnetworksusingdropconnect,” -inInternational Conference on Machine Learning (ICML) , no. 30, 2013, -pp. 1058–1066. [Online]. Available: http://www .matthewzeiler .com/pubs/ -icml2013/icml2013 .pdf -[XGD+16]S. Xie, R. Girshick et al., “Aggregated residual transformations for deep -neural networks,” arXiv preprint arXiv:1611.05431 , Nov. 2016. [Online]. -Available: https://arxiv .org/abs/1611 .05431v1 +Available: http://dx.doi.org/10.1007/978-3-642-37431-9_44 +[WYS+15] R. Wu, S. Yanet al., “Deep image: Scaling up image recognition,”arXiv +preprint arXiv:1501.02876, vol. 7, no. 8, Jul. 2015. [Online]. Available: +https://arxiv.org/abs/1501.02876v4 +[WZZ+13] L. Wan, M. Zeileretal., “Regularization of neural networks using dropconnect,” +in International Conference on Machine Learning (ICML), no. 30, 2013, +pp. 1058–1066. [Online]. Available: http://www.matthewzeiler.com/pubs/ +icml2013/icml2013.pdf +[XGD+16] S. Xie, R. Girshicket al., “Aggregated residual transformations for deep +neural networks,” arXiv preprint arXiv:1611.05431, Nov. 2016. [Online]. +Available: https://arxiv.org/abs/1611.05431v1 [Xu11] W. Xu, “Towards optimal one pass large scale learning with averaged -stochastic gradient descent,” arXiv preprint arXiv:1107.2490 , Jul. 2011. -[Online]. Available: https://arxiv .org/abs/1107 .2490 +stochastic gradient descent,” arXiv preprint arXiv:1107.2490, Jul. 2011. +[Online]. Available: https://arxiv.org/abs/1107.2490 [XWCL15] B. Xu, N. Wang et al., “Empirical evaluation of rectified activations in -convolutional network,” arXiv preprint arXiv:1505.00853 , May 2015. [Online]. -Available: https://arxiv .org/abs/1505 .00853 +convolutional network,”arXiv preprint arXiv:1505.00853, May 2015. [Online]. +Available: https://arxiv.org/abs/1505.00853 [XXE12] H. Xiao, H. Xiao, and C. Eckert, “Adversarial label flips attack on -support vector machines.” in ECAI, 2012, pp. 870–875. [Online]. Available: -https://www .sec.in.tum.de/assets/Uploads/ecai2 .pdf -[XZY+14]T. Xiao, J. Zhang et al., “Error-driven incremental learning in deep convolutional - neural network for large-scale image classification,” in International -Conference on Multimedia , no. 22. ACM, 2014, pp. 177–186. +support vector machines.” inECAI, 2012, pp. 870–875. [Online]. Available: +https://www.sec.in.tum.de/assets/Uploads/ecai2.pdf +[XZY+14] T. Xiao, J. Zhanget al., “Error-driven incremental learning in deep convolutional + neural network for large-scale image classification,” inInternational +Conference on Multimedia, no. 22. ACM, 2014, pp. 177–186. [YL98] C. J. B. Yann LeCun, Corinna Cortes, “The MNIST database of handwritten -digits,” 1998. [Online]. Available: http://yann .lecun.com/exdb/mnist/ -[ZBH+16]C. Zhang, S. Bengio et al., “Understanding deep learning requires rethinking -generalization,” arXiv preprint arXiv:1611.03530 , Nov. 2016. [Online]. -Available: https://arxiv .org/abs/1611 .03530 +digits,” 1998. [Online]. Available: http://yann.lecun.com/exdb/mnist/ +[ZBH+16] C. Zhang, S. Bengioet al., “Understanding deep learning requires rethinking +generalization,” arXiv preprint arXiv:1611.03530, Nov. 2016. [Online]. +Available: https://arxiv.org/abs/1611.03530 [ZCZL16] S. Zhai, Y. Cheng et al., “Doubly convolutional neural networks,” in -Advances in Neural Information Processing Systems 29 (NIPS) , D. D. Lee, -M. Sugiyama et al., Eds. Curran Associates, Inc., Oct. 2016, pp. 1082–1090. -[Online]. Available: http://papers .nips.cc/paper/6340-doubly-convolutionalneural-networks - .pdf +Advances in Neural Information Processing Systems 29 (NIPS), D. D. Lee, +M. Sugiyamaet al., Eds. Curran Associates, Inc., Oct. 2016, pp. 1082–1090. +[Online]. Available: http://papers.nips.cc/paper/6340-doubly-convolutionalneural-networks.pdf + -[ZDGD14] N. Zhang, J. Donahue et al., “Part-based R-CNNs for fine-grained category -detection,” in European Conference on Computer Vision (ECCV) . Springer, -Jul. 2014, pp. 834–849. [Online]. Available: https://arxiv .org/abs/1407 .3867 -[Zei12] M. D. Zeiler, “Adadelta: an adaptive learning rate method,” arXiv preprint -arXiv:1212.5701 , Dec. 2012. [Online]. Available: https://arxiv .org/abs/ +[ZDGD14] N. Zhang, J. Donahueet al., “Part-based R-CNNs for fine-grained category +detection,” inEuropean Conference on Computer Vision (ECCV). Springer, +Jul. 2014, pp. 834–849. [Online]. Available: https://arxiv.org/abs/1407.3867 +[Zei12] M. D. Zeiler, “Adadelta: an adaptive learning rate method,”arXiv preprint +arXiv:1212.5701, Dec. 2012. [Online]. Available: https://arxiv .org/abs/ 1212.5701v1 [ZF13] M. D. Zeiler and R. Fergus, “Stochastic pooling for regularization of deep -convolutional neural networks,” arXiv preprint arXiv:1301.3557 , Jan. 2013. -[Online]. Available: https://arxiv .org/abs/1301 .3557v1 +convolutional neural networks,”arXiv preprint arXiv:1301.3557, Jan. 2013. +[Online]. Available: https://arxiv.org/abs/1301.3557v1 [ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional -networks,” in European Conference on Computer Vision (ECCV) . Springer, -Nov. 2014, pp. 818–833. [Online]. Available: https://arxiv .org/abs/1311 .2901 +networks,” inEuropean Conference on Computer Vision (ECCV). Springer, +Nov. 2014, pp. 818–833. [Online]. Available: https://arxiv.org/abs/1311.2901 [Zho16] B. Zhou, “Places2 download,” 2016. [Online]. Available: http:// -places2.csail.mit.edu/download .html +places2.csail.mit.edu/download.html [ZK16] S. Zagoruyko and N. Komodakis, “Wide residual networks,” arXiv preprint arXiv:1605.07146 , May 2016. [Online]. Available: https: -//arxiv.org/abs/1605 .07146 -[ZKL+15]B. Zhou, A. Khosla et al., “Learning deep features for discriminative -localization,” arXiv preprint arXiv:1512.04150 , Dec. 2015. [Online]. Available: -https://arxiv .org/abs/1512 .04150 -[ZKL+16]B. Zhou, A. Khosla et al., “Places: An image database for deep scene -understanding,” arXiv preprint arXiv:1610.02055 , Oct. 2016. [Online]. -Available: https://arxiv .org/abs/1610 .02055 +//arxiv.org/abs/1605.07146 +[ZKL+15] B. Zhou, A. Khosla et al., “Learning deep features for discriminative +localization,”arXiv preprint arXiv:1512.04150, Dec. 2015. [Online]. Available: +https://arxiv.org/abs/1512.04150 +[ZKL+16] B. Zhou, A. Khosla et al., “Places: An image database for deep scene +understanding,” arXiv preprint arXiv:1610.02055, Oct. 2016. [Online]. +Available: https://arxiv.org/abs/1610.02055 [ZL16] B. Zoph and Q. V. Le, “Neural architecture search with reinforcement -learning,” arXiv preprint arXiv:1611.01578 , Nov. 2016. [Online]. Available: -https://arxiv .org/abs/1611 .01578 -[ZMGL15] J. Zhao, M. Mathieu et al., “Stacked what-where auto-encoders,” -arXiv preprint arXiv:1506.02351 , Jun. 2015. [Online]. Available: https: -//arxiv.org/abs/1506 .02351v1 -[ZYL+15]H. Zheng, Z. Yang et al., “Improving deep neural networks using softplus -units,” in International Joint Conference on Neural Networks (IJCNN) , Jul. +learning,”arXiv preprint arXiv:1611.01578, Nov. 2016. [Online]. Available: +https://arxiv.org/abs/1611.01578 +[ZMGL15] J. Zhao, M. Mathieu et al. , “Stacked what-where auto-encoders,” +arXiv preprint arXiv:1506.02351, Jun. 2015. [Online]. Available: https: +//arxiv.org/abs/1506.02351v1 +[ZYL+15] H. Zheng, Z. Yanget al., “Improving deep neural networks using softplus +units,” inInternational Joint Conference on Neural Networks (IJCNN), Jul. 2015, pp. 1–4. I. Glossary -ANNartificial neural network. 4 -ASOAutomatic Structure Optimization. 29 -CMOConfusion Matrix Ordering. 2, 35, 36, 51, 52, 71 -CNNConvolutional Neural Network. 1, 3–6, 11, 13, 15, 21–23, 28, 29, 31, 33, 37, 54, 60, +ANN artificial neural network. 4 +ASO Automatic Structure Optimization. 29 +CMO Confusion Matrix Ordering. 2, 35, 36, 51, 52, 71 +CNN Convolutional Neural Network. 1, 3–6, 11, 13, 15, 21–23, 28, 29, 31, 33, 37, 54, 60, 71, 72, 79, 82–84, 88–91 -ELUExponential Linear Unit. 38, 57, 60–64, 72, 73, 77, 78, 84 -ESearly stopping. 68 -FCFully Connected. 91, 93 -FLOPfloating point operation. 27, 29, 87, 88, 90, 91, 93 -GAgenetic algorithm. 30 -GANGenerative Adverserial Network. 80 -GPUgraphics processing unit. 37, 40, 59, 63, 67, 88, 91 -HSVhue, saturation, value. 79 -LCNLocal Contrast Normalization. 91 -LDAlinear discriminant analysis. 79 -LReLUleaky rectified linear unit. 63, 72, 77, 78, 84 -MLPmultilayer perceptron. 3–6, 28 -NAGNesterov Accellerated Momentum. 83 -NEATNeuroEvolution of Augmenting Topologies. 83 -OBDOptimal Brain Damage. 29 - -PCAprincipal component analysis. 79 +ELU Exponential Linear Unit. 38, 57, 60–64, 72, 73, 77, 78, 84 +ES early stopping. 68 +FC Fully Connected. 91, 93 +FLOP floating point operation. 27, 29, 87, 88, 90, 91, 93 +GA genetic algorithm. 30 +GAN Generative Adverserial Network. 80 +GPU graphics processing unit. 37, 40, 59, 63, 67, 88, 91 +HSV hue, saturation, value. 79 +LCN Local Contrast Normalization. 91 +LDA linear discriminant analysis. 79 +LReLU leaky rectified linear unit. 63, 72, 77, 78, 84 +MLP multilayer perceptron. 3–6, 28 +NAG Nesterov Accellerated Momentum. 83 +NEAT NeuroEvolution of Augmenting Topologies. 83 +OBD Optimal Brain Damage. 29 + +PCA principal component analysis. 79 PReLU parametrized rectified linear unit. 60, 61, 63, 64, 72, 77, 78, 84 -ReLUrectified linear unit. 5, 13, 60, 61, 63, 64, 72, 77, 78, 84 -SGDstochastic gradient descent. 5, 30, 45, 46, 82 -ZCAZero Components Analysis. 79 +ReLU rectified linear unit. 5, 13, 60, 61, 63, 64, 72, 77, 78, 84 +SGD stochastic gradient descent. 5, 30, 45, 46, 82 +ZCA Zero Components Analysis. 79 diff --git a/read/results/pypdf/2201.00021.txt b/read/results/pypdf/2201.00021.txt index e75180c..79fc9c1 100644 --- a/read/results/pypdf/2201.00021.txt +++ b/read/results/pypdf/2201.00021.txt @@ -1,38 +1,38 @@ -Astronomy &Astrophysics manuscript no. mainArxiv ©ESO 2022 +Astronomy & Astrophysics manuscript no. mainArxiv ©ESO 2022 April 12, 2022 Discovery of ammonia (9,6) masers in two high-mass star-forming regions -Y . T. Yan ( 闫耀庭)1,⋆, C. Henkel1,2,3, K. M. Menten1, Y . Gong ( 龚龑)1, J. Ott4, T. L. Wilson1, A. Wootten4, A. -Brunthaler1, J. S. Zhang (张江水 )5, J. L. Chen ( 陈家梁)5, and K. Yang ( 杨楷)6,7 -1Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, 53121 Bonn, Germany +Y . T. Yan (闫耀庭)1, ⋆, C. Henkel1,2,3, K. M. Menten1, Y . Gong (龚龑)1, J. Ott4, T. L. Wilson1, A. Wootten4, A. +Brunthaler1, J. S. Zhang (张江水)5, J. L. Chen (陈家梁)5, and K. Yang (杨楷)6,7 +1 Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, 53121 Bonn, Germany e-mail: yyan@mpifr-bonn.mpg.de -2Astronomy Department, Faculty of Science, King Abdulaziz University, P. O. Box 80203, Jeddah 21589, Saudi Arabia -3Xinjiang Astronomical Observatory, Chinese Academy of Sciences, 830011 Urumqi, PR China -4National Radio Astronomy Observatory, 520 Edgemont Road, Charlottesville, V A 22903-2475, USA -5Center for Astrophysics, Guangzhou University, 510006 Guangzhou, People’s Republic of China -6School of Astronomy and Space Science, Nanjing University, 163 Xianlin Avenue, Nanjing 210023, People’s Republic of China -7Key Laboratory of Modern Astronomy and Astrophysics (Nanjing University), Ministry of Education, Nanjing 210023, People’s +2 Astronomy Department, Faculty of Science, King Abdulaziz University, P. O. Box 80203, Jeddah 21589, Saudi Arabia +3 Xinjiang Astronomical Observatory, Chinese Academy of Sciences, 830011 Urumqi, PR China +4 National Radio Astronomy Observatory, 520 Edgemont Road, Charlottesville, V A 22903-2475, USA +5 Center for Astrophysics, Guangzhou University, 510006 Guangzhou, People’s Republic of China +6 School of Astronomy and Space Science, Nanjing University, 163 Xianlin Avenue, Nanjing 210023, People’s Republic of China +7 Key Laboratory of Modern Astronomy and Astrophysics (Nanjing University), Ministry of Education, Nanjing 210023, People’s Republic of China -Received 13 December 2021 /Accepted 30 December 2021 +Received 13 December 2021 / Accepted 30 December 2021 ABSTRACT Context. Molecular maser lines are signposts of high-mass star formation, probing the excitation and kinematics of very compact regions in the close environment of young stellar objects and providing useful targets for trigonometric parallax measurements. -Aims. Only a few NH 3(9,6) masers are known so far, and their origin is still poorly understood. Here we aim to find new NH 3(9,6) +Aims. Only a few NH3 (9,6) masers are known so far, and their origin is still poorly understood. Here we aim to find new NH 3 (9,6) masers to provide a better observational basis for studying their role in high-mass star-forming regions. -Methods. We carried out NH 3(9,6) observations toward Cepheus A and G34.26 +0.15 with the E ffelsberg 100-meter telescope (beam +Methods. We carried out NH3 (9,6) observations toward Cepheus A and G34.26+0.15 with the Effelsberg 100-meter telescope (beam size 49′′) and the Karl G. Jansky Very Large Array (JVLA; beam size about 1′′.2). -Results. We discovered new NH 3(9,6) masers in Cep A and G34.26 +0.15, which increases the number of known high-mass starforming - regions hosting NH 3(9,6) masers from five to seven. Long-term monitoring (20 months) at E ffelsberg shows that the intensity -of the (9,6) maser in G34.26 +0.15 is decreasing, while the Cep A maser remains stable. Compared to the E ffelsberg data and assuming -linear variations between the epochs of observation, the JVLA data indicate no missing flux. This suggests that the NH 3(9,6) emission +Results. We discovered new NH3 (9,6) masers in Cep A and G34.26 +0.15, which increases the number of known high-mass starforming + regions hosting NH3 (9,6) masers from five to seven. Long-term monitoring (20 months) at Effelsberg shows that the intensity +of the (9,6) maser in G34.26+0.15 is decreasing, while the Cep A maser remains stable. Compared to the Effelsberg data and assuming +linear variations between the epochs of observation, the JVLA data indicate no missing flux. This suggests that the NH3 (9,6) emission arises from single compact emission regions that are not resolved by the interferometric measurements. As JVLA imaging shows, the -NH 3(9,6) emission in Cep A originates from a sub-arcsecond-sized region, slightly to the west (0′′.28±0′′.10) of the peak position -of the 1.36 cm continuum object, HW2. In G34.26 +0.15, three NH 3(9,6) maser spots are observed: one is close to the head of the -cometary ultracompact H iiregion C, and the other two are emitted from a compact region to the west of the hypercompact H iiregion +NH3 (9,6) emission in Cep A originates from a sub-arcsecond-sized region, slightly to the west (0 ′′.28 ±0′′.10) of the peak position +of the 1.36 cm continuum object, HW2. In G34.26 +0.15, three NH3 (9,6) maser spots are observed: one is close to the head of the +cometary ultracompact H ii region C, and the other two are emitted from a compact region to the west of the hypercompact Hii region A. Conclusions. The newly found (9,6) masers appear to be related to outflows. The higher angular resolution of JVLA and very long baseline interferometry observations are needed to provide more accurate positions and constraints for pumping scenarios. -Key words. Masers – ISM: clouds – ISM: individual objects: Cep A, G34.26 +0.15 – ISM: H iiregions – Radio lines: ISM +Key words. Masers – ISM: clouds – ISM: individual objects: Cep A, G34.26+0.15 – ISM: H ii regions – Radio lines: ISM 1. Introduction Since its discovery more than five decades ago (Cheung et al. 1968), ammonia (NH 3) has been a most valuable molecule for @@ -42,15 +42,16 @@ the centimeter-wavelength inversion transitions of ammonia are regarded as a reliable thermometer of molecular clouds (e.g., Walmsley & Ungerechts 1983; Danby et al. 1988), ammonia masers have attracted attention since the first detection of maser -action in the ( J,K)=(3,3) metastable ( J=K) line toward the +action in the ( J,K) = (3,3) metastable ( J = K) line toward the massive star-forming region W33 (Wilson et al. 1982). Subsequent observations have led to the detection of new metastable -ammonia masers, including15NH 3(3,3) (Mauersberger et al. -1986), NH 3(1,1) (Gaume et al. 1996), NH 3(2,2) (Mills et al. -2018), NH 3(5,5) (Cesaroni et al. 1992), NH 3(6,6) (Beuther -⋆Member of the International Max Planck Research School (IMPRS) +ammonia masers, including 15NH3 (3,3) (Mauersberger et al. +1986), NH3 (1,1) (Gaume et al. 1996), NH 3 (2,2) (Mills et al. +2018), NH 3 (5,5) (Cesaroni et al. 1992), NH 3 (6,6) (Beuther +⋆ Member of the International Max Planck Research School (IMPRS) for Astronomy and Astrophysics at the universities of Bonn and -Cologne.et al. 2007), NH 3(7,7), NH 3(9,9), and NH 3(12,12) (Henkel +Cologne. +et al. 2007), NH 3 (7,7), NH 3 (9,9), and NH 3 (12,12) (Henkel et al. 2013). These have led to the discovery of metastable maser lines in 22 di fferent regions (Mauersberger et al. 1986, 1987; Wilson & Henkel 1988; Wilson et al. 1990; Pratap et al. 1991; @@ -61,18 +62,19 @@ et al. 2009; Brogan et al. 2011; Urquhart et al. 2011; Walsh et al. 2011; Wang et al. 2012; Henkel et al. 2013; Ho ffman & Joyce 2014; McEwen et al. 2016; Mills et al. 2018; Hogge et al. 2019; Mei et al. 2020; Towner et al. 2021). Compared with the -metastable ammonia masers, detected non-metastable ( J>K) +metastable ammonia masers, detected non-metastable ( J > K) ammonia maser transitions are more numerous. The first highly excited non-metastable ammonia maser was detected by Madden - et al. (1986) in the ( J,K)=(9,6) and (6,3) lines. Thereafter, -many other NH 3non-metastable inversion transition lines have + et al. (1986) in the (J,K) = (9,6) and (6,3) lines. Thereafter, +many other NH3 non-metastable inversion transition lines have been identified as masers, including the (5,3), (5,4), (6,1), (6,2), (6,4), (6,5), (7,3), (7,4), (7,5) (7,6), (8,3), (8,4), (8,5), (8,6), (9,3), (9,4), (9,5), (9,7), (9,8), (10,7), (10,8), (10,9), and (11,9) transiArticle - number, page 1 of 10arXiv:2201.00021v3 [astro-ph.GA] 9 Apr 2022 -A&A proofs: manuscript no. mainArxiv + number, page 1 of 10 +arXiv:2201.00021v3 [astro-ph.GA] 9 Apr 2022 +A&A proofs:manuscript no. mainArxiv tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007; -Henkel et al. 2013; Mei et al. 2020). Except for the NH 3(3,3) +Henkel et al. 2013; Mei et al. 2020). Except for the NH 3 (3,3) masers proposed to be associated with four supernova remnants (McEwen et al. 2016), almost all the other ammonia masers are detected in high-mass star-forming regions (HMSFRs). However, @@ -84,94 +86,95 @@ high-mass star plays in their excitation remains unclear. Therefore, indispensable in regard to their overall incidence and association with di fferent environments, which can provide additional constraints on the pumping mechanism of ammonia masers. -So far, a total of 32 NH 3inversion transitions ( ∆K=0 -and∆J=0) have been identified as masers. Among these, and +So far, a total of 32 NH 3 inversion transitions ( ∆K = 0 +and ∆J = 0) have been identified as masers. Among these, and despite arising from energy levels as high as 1090 K above -the ground state, the NH 3(9,6) maser stands out as being the +the ground state, the NH 3 (9,6) maser stands out as being the strongest and most variable one in W51-IRS2 (e.g., Henkel et al. 2013). Maser emission in this line has only been detected in five HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al. -1986), and Sgr B2(N) (Mei et al. 2020). The NH 3(3,3) masers +1986), and Sgr B2(N) (Mei et al. 2020). The NH 3 (3,3) masers are thought to be collisionally excited (e.g., Flower et al. 1990; Mangum & Wootten 1994); in contrast, the pumping mechanism - of NH 3(9,6) masers is less well constrained (Madden et al. + of NH3 (9,6) masers is less well constrained (Madden et al. 1986). Brown & Cragg (1991) have studied ortho-ammonia and found that it could possibly pump the (6,3) inversion line, but they did not extend their model to the (9,6) transition due to the fact that collision rates are only known for inversion levels up to -J=6 (e.g., Danby et al. 1988). -NH 3(9,6) masers are found to be strongly variable, similar to +J = 6 (e.g., Danby et al. 1988). +NH3 (9,6) masers are found to be strongly variable, similar to H2O masers (Madden et al. 1986; Pratap et al. 1991; Henkel et al. 2013). In W51-IRS2, Henkel et al. (2013) found that the (9,6) line showed significant variation in line shape within a time interval of only two days. Mapping of the (9,6) maser toward W51 with very long baseline interferometry (VLBI) suggests that the masers are closer to the H 2O masers than to the OH masers or -to ultracompact (UC) H iiregions (Pratap et al. 1991). While +to ultracompact (UC) H ii regions (Pratap et al. 1991). While Henkel et al. (2013) and Goddi et al. (2015) showed that the SiO -and NH 3masers in W51-IRS2 are very close to each other, their -positions, di ffering by 0′′.065 (∼0.015 pc), do not fully coincide. -In this paper we report the discovery of NH 3(9,6) masers +and NH3 masers in W51-IRS2 are very close to each other, their +positions, differing by 0′′.065 (∼0.015 pc), do not fully coincide. +In this paper we report the discovery of NH 3 (9,6) masers in two HMSFRs, Cepheus A and G34.26 +0.15. This increases the number of (9,6) maser detections in our Galaxy from five to seven. In Sect. 2 observations with the E ffelsberg 100-meter telescope and the Karl G. Jansky Very Large Array (JVLA) are described. Results are presented in Sect. 3. The morphology of -Cep A and G34.26 +0.15 as well as a comparison of the emission -distributions of di fferent tracers with the NH 3(9,6) masers are +Cep A and G34.26+0.15 as well as a comparison of the emission +distributions of di fferent tracers with the NH 3 (9,6) masers are presented in Sect. 4. Our main results are summarized in Sect. 5. 2. Observations and data reduction 2.1. Effelsberg observations and data reduction -The NH 3(9,6) line was observed toward Cep A and -G34.26 +0.15 with the 100-meter E ffelsberg telescope1in 2020 +The NH 3 (9,6) line was observed toward Cep A and +G34.26+0.15 with the 100-meter E ffelsberg telescope1 in 2020 January and 2021 February, July, and August. The S14mm double beam secondary focus receiver was employed. The full width -at half maximum (FWHM) beam size is 49′′at 18.5 GHz, the +at half maximum (FWHM) beam size is 49 ′′ at 18.5 GHz, the frequency of the target line. The observations were performed in -position switching mode, and the o ffposition was 10′in azimuth -1Based on observations with the 100-meter telescope of the MPIfR -(Max-Planck-Institut für Radioastronomie) at E ffelsberg.away from the source. For observations made before 2021 August, +position switching mode, and the off position was 10′in azimuth +1 Based on observations with the 100-meter telescope of the MPIfR +(Max-Planck-Institut für Radioastronomie) at Effelsberg. +away from the source. For observations made before 2021 August, we used a spectrometer that covered 2 GHz wide backends -with a channel width of 38.1 kHz, corresponding to ∼0.62 km s−1 +with a channel width of 38.1 kHz, corresponding to∼0.62 km s−1 at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar 1975). A high spectral resolution backend with 65536 channels and a bandwidth of 300 MHz was employed in 2021 August, -providing a channel width of 0.07 km s−1at 18.5 GHz. Pointing +providing a channel width of 0.07 km s −1 at 18.5 GHz. Pointing was checked every 2 hours using 3C 286 or NGC 7027. Focus calibrations were done at the beginning of the observations and during sunset and sunrise toward the abovementioned pointing sources. The system temperatures were 100–130 K on a main-beam brightness temperature, TMB, scale. This flux density - was calibrated assuming a TMB/Sratio of 1.95 K /Jy, derived + was calibrated assuming aTMB/S ratio of 1.95 K/Jy, derived from continuum cross scans of NGC 7027 (the flux density was adopted from Ott et al. 1994). Calibration uncertainties are estimated - to be∼10%. -We used the GILDAS /CLASS2package (Pety 2005) to reduce + to be ∼10%. +We used the GILDAS /CLASS2 package (Pety 2005) to reduce the spectral line data. A first-order polynomial was subtracted from each spectrum for baseline removal. 2.2. JVLA observations and data reduction -Observations of the NH 3(9,6) line toward Cep A and -G34.26 +0.15 were obtained on 2021 July 13 with the JVLA -of the National Radio Astronomy Observatory3(NRAO) in the +Observations of the NH 3 (9,6) line toward Cep A and +G34.26+0.15 were obtained on 2021 July 13 with the JVLA +of the National Radio Astronomy Observatory 3 (NRAO) in the C configuration (project ID: 21A-157, PI: Yaoting Yan). We employed 27 antennas for the observations. The primary beam -of the JVLA antennas is 150′′(FWHM) at 18.5 GHz. A mixture +of the JVLA antennas is 150 ′′ (FWHM) at 18.5 GHz. A mixture of mixed three-bit and eight-bit samplers were used to perform - the observations. For the NH 3(9,6) line observations, we + the observations. For the NH 3 (9,6) line observations, we used one subband with the eight-bit sampler covering a bandwidth of 16 MHz with full polarization, eight recirculations, and four baseline board pairs (BIBPs) to provide a velocity range -of 260 km s−1with a channel spacing of 0.13 km s−1. Two +of 260 km s −1 with a channel spacing of 0.13 km s −1. Two additional subbands of bandwidth 16 MHz were used to cover -the NH 3(8,5) and (10,7) lines. The three-bit sampler with 32 +the NH3 (8,5) and (10,7) lines. The three-bit sampler with 32 subbands, each with a bandwidth of 128 MHz to cover a total range of 4 GHz between 20–24 GHz, was used to measure the continuum emission. 3C 286 with a flux density of 2.89 Jy at 18.5 GHz (Perley & Butler 2013) was used as a calibrator for pointing, flux density, bandpass, and polarization. -J2230 +6946 and J1851 +0035 served as gain calibrators for Cep +J2230+6946 and J1851+0035 served as gain calibrators for Cep A and G34.26 +0.15, respectively. The on-source times were -4m30sand 4m50stoward Cep A and G34.26 +0.15, respectively. +4m30s and 4m50s toward Cep A and G34.26+0.15, respectively. Data from two antennas were lost due to technical issues. The data from the remaining 25 antennas were reduced through the Common Astronomy Software Applications package @@ -183,56 +186,56 @@ the calibrated visibility data to search for additional artifacts before imaging. Then, the uvcontsub task in CASA was used to separate the calibrated visibilities into two parts, one with lineonly data and the other with the continuum data. The tclean task -with a cell size of 0′′.2 and Briggs weighting with robust =0 was +with a cell size of 0′′.2 and Briggs weighting with robust=0 was used to produce the images of spectral line and continuum emission. - The synthesized beams for NH 3(9,6) are 1′′.47×0′′.99 at -2https: //www.iram.fr /IRAMFR /GILDAS / -3The National Radio Astronomy Observatory is a facility of the National + The synthesized beams for NH 3 (9,6) are 1 ′′.47 ×0′′.99 at +2 https://www.iram.fr/IRAMFR/GILDAS/ +3 The National Radio Astronomy Observatory is a facility of the National Science Foundation operated under cooperative agreement by Associated Universities, Inc. -4https: //casa.nrao.edu / +4 https://casa.nrao.edu/ Article number, page 2 of 10 -Y . T. Yan ( 闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions -P.A.=58◦.79 and 1′′.33×1′′.06 at P.A. =5◦.36 toward Cep A +Y . T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +P.A. = 58◦.79 and 1 ′′.33 ×1′′.06 at P.A. = 5◦.36 toward Cep A and G34.26 +0.15, respectively. For the 1.36 cm (20–24 GHz) -continuum emission, the synthesized beams are 1′′.08×0′′.67 at -P.A.=60◦.64 and 0′′.95×0′′.71 at P.A. =5◦.91 toward Cep A and -G34.26 +0.15. The typical absolute astrometric accuracy of the -JVLA is∼10% of the synthesized beam5. The flux density scale +continuum emission, the synthesized beams are 1 ′′.08 ×0′′.67 at +P.A. = 60◦.64 and 0′′.95 ×0′′.71 at P.A. = 5◦.91 toward Cep A and +G34.26+0.15. The typical absolute astrometric accuracy of the +JVLA is ∼10% of the synthesized beam5. The flux density scale calibration accuracy is estimated to be within 15%. -Fig. 1. Spectra from NH 3(9,6) transition lines. Left:Top to bottom: -Time sequence of NH 3(9,6) profiles observed toward Cep A with the +Fig. 1. Spectra from NH 3 (9,6) transition lines. Left: Top to bottom: +Time sequence of NH 3 (9,6) profiles observed toward Cep A with the Effelsberg 100-meter telescope (after subtracting a first-order polynomial baseline). A JVLA spectrum is interspersed. The systemic velocity - from CO and HCO+lines is indicated by a dashed blue line. The -two dashed red lines at LSR velocities, VLSR, of−0.90 km s−1and -−0.28 km s−1indicate the central velocities of the two major components. - Right : NH 3(9,6) spectra from G34.26 +0.15. The systemic velocity - from C17O is indicated by a dashed blue line. The three dashed -red lines at VLSR=54.1 km s−1, 55.8 km s−1, and 62.5 km s−1show the + from CO and HCO + lines is indicated by a dashed blue line. The +two dashed red lines at LSR velocities, VLSR, of −0.90 km s −1 and +−0.28 km s −1 indicate the central velocities of the two major components. + Right: NH3 (9,6) spectra from G34.26 +0.15. The systemic velocity + from C 17O is indicated by a dashed blue line. The three dashed +red lines at VLSR = 54.1 km s−1, 55.8 km s−1, and 62.5 km s−1 show the central velocities of the main ammonia emission components. 3. Results The spectra from di fferent epochs are shown in Figs. 1 and 2. -Toward Cep A, the NH 3(9,6) line profile from the JVLA is extracted - from an E ffelsberg-beam-sized region (FWHM, 49′′). In -the case of G34.26 +0.15, the NH 3spectrum is below the noise +Toward Cep A, the NH3 (9,6) line profile from the JVLA is extracted + from an Effelsberg-beam-sized region (FWHM, 49′′). In +the case of G34.26 +0.15, the NH3 spectrum is below the noise level if a similarly large beam size is used. Therefore, we derived - the JVLA NH 3(9,6) spectrum from a smaller region, with -radius 3′′.5, that contains all the detected NH 3(9,6) emission. In -Table A.1, the observed NH 3(9,6) line parameters obtained by -Gaussian fits are listed. NH 3(8,5) and (10,7) emission is not detected - by our JVLA observations. The 3 σupper limits for the -NH 3(8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1 -5https: //science.nrao.edu /facilities /vla/docs/manuals /oss/performance/positional-accuracy + the JVLA NH3 (9,6) spectrum from a smaller region, with +radius 3′′.5, that contains all the detected NH3 (9,6) emission. In +Table A.1, the observed NH 3 (9,6) line parameters obtained by +Gaussian fits are listed. NH3 (8,5) and (10,7) emission is not detected + by our JVLA observations. The 3 σ upper limits for the +NH3 (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam −1 +5 https://science.nrao.edu/facilities/vla/docs/manuals/oss/performance/positional-accuracy -Fig. 2. NH 3(9,6) line profiles emphasizing, in contrast to the spectra +Fig. 2.NH3 (9,6) line profiles emphasizing, in contrast to the spectra in Fig. 1, weaker features. Cep A spectra are presented on the left, -G34.26 +0.15 spectra on the right. The two dashed red lines in the left -panels indicate VLSR=1.48 km s−1and 2.89 km s−1. In the right panels, -the two dashed red lines refer to 54.1 km s−1and 55.8 km s−1. -and 27.2 mJy beam−1, respectively. In G34.26 +0.15, the corresponding - 3σupper limits for the NH 3(8,5) and (10,7) lines are -22.1 mJy beam−1and 30.4 mJy beam−1. For both sources, sensitivity +G34.26+0.15 spectra on the right. The two dashed red lines in the left +panels indicate VLSR = 1.48 km s−1 and 2.89 km s−1. In the right panels, +the two dashed red lines refer to 54.1 km s−1 and 55.8 km s−1. +and 27.2 mJy beam −1, respectively. In G34.26+0.15, the corresponding + 3σupper limits for the NH3 (8,5) and (10,7) lines are +22.1 mJy beam−1 and 30.4 mJy beam −1. For both sources, sensitivity levels refer to emission from a single channel of width 0.13 km s−1. Taking the larger measured line widths of the (9,6) maser features (see Table A.1), these limits could be further lowered @@ -242,66 +245,67 @@ The 1.36 cm continuum, derived from our JVLA observations, toward Cep A is presented in Fig. 3. Six published compact sources, HW2, HW3a, HW3b, HW3c, HW3d, and HW9, are detected in our observations. Figure 4 shows the 1.36 cm continuum - in G34.26 +0.15. Three main continuum objects, A, B, and + in G34.26+0.15. Three main continuum objects, A, B, and C, are detected. By using the imfit task in CASA, we measured the continuum flux at 1.36 cm toward individual compact source -components in Cep A and G34.26 +0.15. Details are given in Table +components in Cep A and G34.26+0.15. Details are given in Table A.2. -3.2. NH 3(9,6) emission in Cep A -In 2020 January, NH 3(9,6) emission with a peak flux density of -0.67±0.07 Jy was first detected with the E ffelsberg 100-meter +3.2. NH3 (9,6) emission in Cep A +In 2020 January, NH3 (9,6) emission with a peak flux density of +0.67 ±0.07 Jy was first detected with the E ffelsberg 100-meter telescope in Cep A. Emission with similar strength was also detected in 2021 February and August with the same telescope. Higher velocity resolution data, which were obtained in 2021 August, again with the E ffelsberg 100-meter telescope, show that the (9,6) emission contains two main velocity components. -Overall, the flux densities of the NH 3(9,6) emission line measured - with the E ffelsberg 100-meter telescope are, within the calibration +Overall, the flux densities of the NH 3 (9,6) emission line measured + with the Effelsberg 100-meter telescope are, within the calibration uncertainties, unchanged. This is valid for the time interval between 2020 January and August 2021, when we smoothed the obtained spectra to the same velocity resolution. We also see another two weaker components. Figure 2 emphasizes these weak components with an expanded flux density scale. Higher angular resolution data from the JVLA pinpoint the -position of the NH 3(9,6) emission with an o ffset of (−0′′.28, +position of the NH 3 (9,6) emission with an o ffset of ( −0′′.28, 0′′.02) relative to the 1.36 cm continuum peak of Cep A HW2 -(Fig. 3). The deconvolved NH 3(9,6) component size is (0′′.29± -0′′.15)×(0′′.19±0′′.14) at P.A. =174◦, derived with the imfit task +(Fig. 3). The deconvolved NH3 (9,6) component size is (0′′.29 ± +0′′.15) ×(0′′.19 ±0′′.14) at P.A.= 174◦, derived with the imfit task in CASA, and can thus be considered, accounting for the uncertainties, as unresolved. Article number, page 3 of 10 -A&A proofs: manuscript no. mainArxiv +A&A proofs:manuscript no. mainArxiv Fig. 3. Cepheus A. White contours mark the 1.36 cm JVLA continuum map of Cep A; levels are −5, 5, 10, 20, 30, 40, 50, 70, 90, -and 110×0.125 mJy beam−1. The background image is the Spitzer 4.5µm emission, taken from the Galactic Legacy Infrared Mid-Plane -Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is αJ2000 =22h56m17s.972, and -δJ2000=62◦01′49′′.587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black -ellipse denoting the position of the NH 3(9,6) emission with a purple star at its center. OH (Bartkiewicz et al. 2005), H 2O (Sobolev et al. 2018), -and CH 3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, respectively. The color bar on the right-hand side indicates +and 110 ×0.125 mJy beam −1. The background image is the Spitzer 4.5 µm emission, taken from the Galactic Legacy Infrared Mid-Plane +Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is αJ2000 = 22h56m17s.972, and +δJ2000 = 62◦01′49′′.587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black +ellipse denoting the position of the NH 3 (9,6) emission with a purple star at its center. OH (Bartkiewicz et al. 2005), H 2O (Sobolev et al. 2018), +and CH3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, respectively. The color bar on the right-hand side indicates the LSR velocity range of the maser spots. -Fig. 4. 1.36 cm JVLA continuum map of G34.26 +0.15 presented as white contours with levels of −5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130, -150, 180, and 200 ×5.0 mJy beam−1. The background image is the Spitzer 4.5µm emission, taken from GLIMPSE. The reference position is -αJ2000=18h53m18s.560, andδJ2000=01◦14′58′′.201, the peak position, is marked by a black cross. The black ellipses show the positions of NH 3 -(9,6) emissions with stars at their center (i.e., M1, M2, and M3). OH (Zheng et al. 2000), H 2O (Imai et al. 2011), and CH 3OH (Bartkiewicz et al. -2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range ( VLSR) of maser spots. +Fig. 4.1.36 cm JVLA continuum map of G34.26 +0.15 presented as white contours with levels of −5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130, +150, 180, and 200 ×5.0 mJy beam −1. The background image is the Spitzer 4.5 µm emission, taken from GLIMPSE. The reference position is +αJ2000 = 18h53m18s.560, and δJ2000 = 01◦14′58′′.201, the peak position, is marked by a black cross. The black ellipses show the positions of NH 3 +(9,6) emissions with stars at their center (i.e., M1, M2, and M3). OH (Zheng et al. 2000), H 2O (Imai et al. 2011), and CH3OH (Bartkiewicz et al. +2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range (VLSR) of maser spots. In view of the constancy of the flux densities obtained at Effelsberg and the similar JVLA flux density, measured in 2021 July, there is no missing interferometric flux density in the JVLA data. -3.3. NH 3(9,6) emission in G34.26 +0.15 -The NH 3(9,6) emission was first detected toward G34.26 +0.15 -in 2020 January with the E ffelsberg 100-meter telescope. Highervelocity resolution data from 2021 August show the NH 3(9,6) -emission to be composed of two di fferent components. The spectra +3.3. NH3 (9,6) emission in G34.26+0.15 +The NH3 (9,6) emission was first detected toward G34.26 +0.15 +in 2020 January with the Effelsberg 100-meter telescope. Higher +velocity resolution data from 2021 August show the NH 3 (9,6) +emission to be composed of two different components. The spectra of weak components on a smaller flux density scale are presented in Fig. 2. -Three di fferent locations showing NH 3(9,6) emission are -found toward G34.26 +0.15 (Fig. 4). The deconvolved NH 3(9,6) -component sizes are (1′′.42±0′′.43)×(0′′.54±0′′.62) at P.A. =97◦ -(M1), (0′′.42±0′′.27)×(0′′.15±0′′.27) at P.A. =150◦(M2), and +Three di fferent locations showing NH 3 (9,6) emission are +found toward G34.26+0.15 (Fig. 4). The deconvolved NH3 (9,6) +component sizes are (1′′.42 ±0′′.43) ×(0′′.54 ±0′′.62) at P.A.= 97◦ +(M1), (0′′.42 ±0′′.27) ×(0′′.15 ±0′′.27) at P.A. = 150◦(M2), and Article number, page 4 of 10 -Y . T. Yan ( 闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions -(1′′.17±0′′.34)×(0′′.27±0′′.46) at P.A. =53◦(M3) and are thus +Y . T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +(1′′.17 ±0′′.34) ×(0′′.27 ±0′′.46) at P.A. = 53◦(M3) and are thus comparable to or smaller than the beam size. -Overall, the NH 3(9,6) line from G34.26 +0.15 weakened +Overall, the NH 3 (9,6) line from G34.26 +0.15 weakened during the time interval from 2020 January to 2021 August by about 70%. A comparison between the JVLA spectrum and the Effelsberg data, assuming a linear decrease in the integrated intensity @@ -309,66 +313,67 @@ Effelsberg data, assuming a linear decrease in the integrated intensity 100-meter observations, suggests there is no missing flux in the JVLA data. This is similar to the situation in Cep A. 4. Discussion -4.1. Morphology of Cep A and G34.26 +0.15 +4.1. Morphology of Cep A and G34.26+0.15 Cep A, at a trigonometric parallax distance of 0.70 ±0.04 kpc (Moscadelli et al. 2009; Dzib et al. 2011), is the second closest -HMSFR (after Orion) and by far the closest NH 3(9,6) maser -known. About 16 compact ( ∼1′′) radio sources (e.g., Hughes & +HMSFR (after Orion) and by far the closest NH 3 (9,6) maser +known. About 16 compact (∼1′′) radio sources (e.g., Hughes & Wouterloot 1984; Hughes 1991; Garay et al. 1996) have been identified in Cep A. Hughes & Wouterloot (1984) discovered these targets at radio wavelengths, which are UC and hypercompact - (HC) H iiregions and /or stellar wind sources, subsequently + (HC) H ii regions and/or stellar wind sources, subsequently named as HW sources. The HW2 object is one of the best known examples of a protostellar jet or disk system driving a powerful outflow (e.g., Rodriguez et al. 1980; Güsten et al. 1984; Torrelles et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021). -The observed NH 3(9,6) emission is slightly o ffset (−0′′.28, 0′′.02) +The observed NH3 (9,6) emission is slightly offset (−0′′.28, 0′′.02) from the center of HW2 (see Fig. 3). -G34.26 +0.15 is an HMSFR located at a distance of 3.3 kpc +G34.26+0.15 is an HMSFR located at a distance of 3.3 kpc (Kuchar & Bania 1994). It hosts four radio continuum components named A, B, C, and D. Component C is a prototypical -cometary UC H iiregion containing a compact head and a di ffuse +cometary UC Hii region containing a compact head and a diffuse tail that extends from east to west (e.g., Reid & Ho 1985; Garay et al. 1986; Sewilo et al. 2004; Sewiło et al. 2011). Components -A and B are HC H iiregions, located to the east of component -C. An extended ring-like H iiregion, called component D, is located +A and B are HC H ii regions, located to the east of component +C. An extended ring-like H ii region, called component D, is located southeast of components A-C. One of the three observed -NH 3(9,6) emission line sources, M1, is close to the head of component +NH3 (9,6) emission line sources, M1, is close to the head of component C, whereas M2 and M3 originate from another compact -region in the west of the HC H iicomponent A (see Fig. 4). -4.2. NH 3(9,6) emission possibly caused by maser action -As shown in Fig. 1, the NH 3(9,6) profiles in Cep A and -G34.26 +0.15 are narrow ( ∆V1/2≤2.0 km s−1), much narrower -than the expected line widths ( ≳4 km s−1) of thermal lines observed +region in the west of the HC H ii component A (see Fig. 4). +4.2. NH3 (9,6) emission possibly caused by maser action +As shown in Fig. 1, the NH 3 (9,6) profiles in Cep A and +G34.26+0.15 are narrow ( ∆V1/2 ≤2.0 km s −1), much narrower +than the expected line widths ( ≳4 km s −1) of thermal lines observed at a similar angular resolution (e.g., Torrelles et al. 1985, 1986, 1993, 1999; Henkel et al. 1987; Comito et al. 2007; Mookerjea et al. 2007; Wyrowski et al. 2012; Beuther et al. 2018). Velocity shifts with respect to the systemic velocities of the two -sources are both observed, that is, V∼10 km s−1in Cep A and -V∼4 km s−1in G34.26 +0.15 (see details in Sect. 4.3). Furthermore, +sources are both observed, that is, V ∼10 km s−1 in Cep A and +V ∼4 km s−1 in G34.26+0.15 (see details in Sect. 4.3). Furthermore, time variability is observed in the case of G34.26 +0.15, which is also a characteristic feature of maser emission. Additional evidence of their maser nature is the high brightness temperatures of the (9,6) emission spots toward Cep A and -G34.26 +0.15. The spectral parameters are listed in Table A.3. -Because at least a significant part of the NH 3(9,6) emission +G34.26+0.15. The spectral parameters are listed in Table A.3. +Because at least a significant part of the NH 3 (9,6) emission is not resolved by our JVLA observations, the derived brightness temperatures are only lower limits. Nevertheless, the lower limits on the brightness temperature are >800 K in Cep A (see Table A.3), which is much higher than the expected thermal gas temperature of ∼250 K (e.g., Patel et al. 2005; Comito -et al. 2007; Beuther et al. 2018). This strongly suggests thatthe NH 3(9,6) emission in Cep A is due to maser action. Because - G34.26 +0.15 is located at about five times the distance to -Cep A, beam dilution e ffects reduce the lower main beam brightness +et al. 2007; Beuther et al. 2018). This strongly suggests that +the NH 3 (9,6) emission in Cep A is due to maser action. Because + G34.26+0.15 is located at about five times the distance to +Cep A, beam dilution effects reduce the lower main beam brightness temperature limit to 400 K in G34.26 +0.15 (M2) (see Table - A.3). We also note that the luminosity of the NH 3(9,6) emission - in G34.26 +0.15 is higher than or comparable to that in Cep + A.3). We also note that the luminosity of the NH3 (9,6) emission + in G34.26+0.15 is higher than or comparable to that in Cep A, depending on the epoch of our observations. Finally, the non-detections of the (8,5) and (10,7) lines also indicate that the (9,6) line is special. This allows us to derive -lower 3σlimits of the (9,6) /(8,5) and (9,6) /(10,7) line intensity -ratios. The (9,6) line arises from ortho-NH 3(K=3n), whereas -the NH 3(8,5) and (10,7) lines are para-NH 3(K,3n) lines. +lower 3σlimits of the (9,6) /(8,5) and (9,6)/(10,7) line intensity +ratios. The (9,6) line arises from ortho-NH 3 (K = 3n), whereas +the NH 3 (8,5) and (10,7) lines are para-NH 3 (K , 3n) lines. The minimum ortho-to-para ratios are in the range 12–42 and 1– 8 toward Cep A and G34.26 +0.15, respectively. The statistical weights for the ortho states are twice as large as those for the @@ -377,112 +382,113 @@ et al. 2013). In Cep A, the line intensity ratios are far higher than this factor of two. Thus, at least in Cep A the higher main beam brightness peak temperature of the (9,6) emission is caused by maser action, perhaps involving exponential amplification, and -the case of G34.26 +0.15 is likely similar. -4.3. Comparison of NH 3(9,6) masers with previously -published (quasi-)thermal NH 3emission +the case of G34.26+0.15 is likely similar. +4.3. Comparison of NH3 (9,6) masers with previously +published (quasi-)thermal NH3 emission The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines show thermal emission toward Cep A over a velocity range of -−13 km s−1≤VLSR≤−4 km s−1(Brown et al. 1981; Güsten +−13 km s −1 ≤VLSR ≤−4 km s −1 (Brown et al. 1981; Güsten et al. 1984; Torrelles et al. 1985, 1986, 1993, 1999). An average -NH 3column density of ∼5×1015cm−2was estimated for a region -of 3′′around HW2 (Torrelles et al. 1999). This high NH 3abundance +NH3 column density of∼5×1015 cm−2 was estimated for a region +of 3′′around HW2 (Torrelles et al. 1999). This high NH 3 abundance could provide a suitable environment for maser species. -Large line widths ( ∆V1/2≃7.0 km s−1) with VLSR∼ −10 km s−1 +Large line widths (∆V1/2 ≃7.0 km s−1) with VLSR ∼ −10 km s−1 in both (1,1) and (2,2) lines were found toward HW2 (Torrelles et al. 1993). The velocity is similar to the cloud’s systemic local - standard of rest (LSR) velocity of −11.2 km s−1, which -is based on CO (Narayanan & Walker 1996) and HCO+observations + standard of rest (LSR) velocity of −11.2 km s −1, which +is based on CO (Narayanan & Walker 1996) and HCO + observations (Gómez et al. 1999). Our (9,6) maser is redshifted -(−0.9 km s−1≤VLSR≤2.9 km s−1) and shares positions with -the outflowing gas seen in CO and HCO+with similarly redshifted +(−0.9 km s −1 ≤VLSR ≤2.9 km s −1) and shares positions with +the outflowing gas seen in CO and HCO + with similarly redshifted velocities. Therefore, we argue that the (9,6) masers are related to outflowing gas. -In G34.26 +0.15, a large NH 3column density, -1018.5±0.2cm−2, and a kinetic temperature of 225 ±75 K +In G34.26 +0.15, a large NH 3 column density, +1018.5±0.2 cm−2, and a kinetic temperature of 225 ±75 K were derived by Henkel et al. (1987) based on measurements -of 15 NH 3inversion transitions in the frequency range of +of 15 NH 3 inversion transitions in the frequency range of 22.0–26.0 GHz. These did not include the (9,6) transition. While these lines were measured with a beam size of about 40′′, a comparison of the peak intensities of the optically thick lines with the kinetic temperature reveals the size of the hot, ammonia-emitting core to be only ∼2.5′′. All those measured -NH 3lines were quasi-thermal and had LSR velocities of +NH3 lines were quasi-thermal and had LSR velocities of ∼58.5 km s−1, close to the systemic velocity of ∼58.1 km s−1 -obtained from C17O observations (Wyrowski et al. 2012). -Their line widths ( ∆V1/2≥3.6 km s−1) are larger than what -we find (0.35 km s−1≤∆V1/2≤0.94 km s−1) for each (9,6) +obtained from C 17O observations (Wyrowski et al. 2012). +Their line widths ( ∆V1/2 ≥3.6 km s −1) are larger than what +we find (0.35 km s −1 ≤ ∆V1/2 ≤0.94 km s −1) for each (9,6) maser component (see details in Table A.3). In all, we may have observed four di fferent (9,6) velocity features. Three -are blueshifted at VLSR∼53.8 km s−1, 55.8 km s−1, and -56.8 km s−1, and a fourth, tentatively detected, at 62.5 km s−1. +are blueshifted at VLSR ∼ 53.8 km s −1, 55.8 km s −1, and +56.8 km s−1, and a fourth, tentatively detected, at 62.5 km s −1. This tentative redshifted feature was only potentially detected -with E ffelsberg in 2020 January. The velocity is similar to that -of the JVLA measurements on the NH 3(1,1) absorption line -against continuum source C ( ∼7′′resolution; Keto et al. 1987) +with Effelsberg in 2020 January. The velocity is similar to that +of the JVLA measurements on the NH 3 (1,1) absorption line +against continuum source C ( ∼7′′ resolution; Keto et al. 1987) Article number, page 5 of 10 -A&A proofs: manuscript no. mainArxiv -and the NH 3(3,3) emission surrounding continuum source B as -well as the head of C (1′′.4×1′′.2 resolution; Heaton et al. 1989). +A&A proofs:manuscript no. mainArxiv +and the NH3 (3,3) emission surrounding continuum source B as +well as the head of C (1 ′′.4×1′′.2 resolution; Heaton et al. 1989). However, we did not find this redshifted component in our -JVLA observations. Therefore, its position within G34.26 +0.15 +JVLA observations. Therefore, its position within G34.26+0.15 cannot be determined. The blueshifted (9,6) masers with a -velocity range of 53.8–56.8 km s−1(M1, M2, and M3) show -velocities compatible with those of the NH 3(3,3) emission at +velocity range of 53.8–56.8 km s −1 (M1, M2, and M3) show +velocities compatible with those of the NH 3 (3,3) emission at the proper positions (Heaton et al. 1989), which might be a suitable environment for maser species. -4.4. Comparison of NH 3(9,6) masers with other maser lines -To characterize the environment of NH 3(9,6) masers, we can +4.4. Comparison of NH3 (9,6) masers with other maser lines +To characterize the environment of NH 3 (9,6) masers, we can compare their positions with respect to those of other maser species (i.e., OH, H 2O, and CH 3OH). Toward Cep A HW2, -many CH 3OH (e.g., Menten 1991; Sugiyama et al. 2008; Sanna +many CH3OH (e.g., Menten 1991; Sugiyama et al. 2008; Sanna et al. 2017) and H 2O maser spots (e.g., Torrelles et al. 1998, 2011; Sobolev et al. 2018) are detected and are associated with its disk. Sobolev et al. (2018) also found that most of the H 2O -maser flux is associated with the compact H iiregion HW3d. OH -maser features close to the H iiregions are also seen in HW2 +maser flux is associated with the compact Hii region HW3d. OH +maser features close to the H ii regions are also seen in HW2 (e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These three kinds of masers in Cep A have a large velocity range of -−25 km s−1≤VLSR≤−2 km s−1and are widespread around -HW2 and HW3, while NH 3(9,6) emission is only detected at -−0.9 km s−1≤VLSR≤2.9 km s−1toward a sub-arcsecondsized +−25 km s −1 ≤VLSR ≤−2 km s −1 and are widespread around +HW2 and HW3, while NH 3 (9,6) emission is only detected at +−0.9 km s −1 ≤ VLSR ≤2.9 km s −1 toward a sub-arcsecondsized region to the west of the peak continuum position of HW2 -(see Fig. 3). This suggests that the NH 3(9,6) maser in Cep A +(see Fig. 3). This suggests that the NH 3 (9,6) maser in Cep A is unique and not related to maser spots seen in other molecular species. -In G34.26 +0.15, OH (Zheng et al. 2000), H 2O (Imai et al. -2011), and CH 3OH (Bartkiewicz et al. 2016) masers have been +In G34.26+0.15, OH (Zheng et al. 2000), H 2O (Imai et al. +2011), and CH3OH (Bartkiewicz et al. 2016) masers have been detected east of source C (Fig. 4), and none of them coincides -with the head of C. The NH 3(9,6) maser M1 is also found -slightly o ffthe head of source C. This could suggest that M1 +with the head of C. The NH 3 (9,6) maser M1 is also found +slightly o ff the head of source C. This could suggest that M1 is powered by continuum source C or by an outflow. Near component B, there are some OH and CH 3OH masers but no H 2O -or NH 3masers. A group of H 2O masers, well-known tracers -of outflows, with a large velocity distribution of 43 km s−1≤ -VLSR≤54 km s−1, was found to the west of the centimetercontinuum +or NH 3 masers. A group of H 2O masers, well-known tracers +of outflows, with a large velocity distribution of 43 km s −1 ≤ +VLSR ≤54 km s −1, was found to the west of the centimetercontinuum source A and close to the peak of the millimetercontinuum emission (see details in our Fig. A.2 and also in Fig. 5 -of Imai et al. 2011). The closeness of NH 3(9,6) maser spots M2 +of Imai et al. 2011). The closeness of NH3 (9,6) maser spots M2 and M3 to this group of water masers and their similar velocities -again suggest an association of NH 3(9,6) masers with outflow +again suggest an association of NH 3 (9,6) masers with outflow activity. 4.5. Constraints on pumping scenarios -Our observations have resulted in the detection of NH 3(9,6) +Our observations have resulted in the detection of NH 3 (9,6) masers in Cep A and G34.26 +0.15. The new detections could provide additional constraints on the maser line’s pumping mechanism. As mentioned in Sect. 1, the pumping mechanism of the (9,6) maser is unclear (Madden et al. 1986; Brown & Cragg 1991). Previous studies have suggested that there are three -main pumping scenarios to explain the observed NH 3maser +main pumping scenarios to explain the observed NH 3 maser lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared radiation from the dust continuum emission, (2) line overlap, and (3) collisional pumping. For the first mechanism, infrared photons near 10 µm are needed for vibrational excitation. The high dust temperature (∼300 K) of W51-IRS2 can provide substantial infrared photons - near 10 µm, which is used for radiative pumping (Henkelet al. 2013). Both Cep A and G34.26 +0.15 have similar kinetic + near 10 µm, which is used for radiative pumping (Henkel +et al. 2013). Both Cep A and G34.26 +0.15 have similar kinetic temperatures of ≳200 K (Henkel et al. 1987; Patel et al. 2005; Comito et al. 2007; Beuther et al. 2018). This suggests that -high kinetic temperatures are needed to excite NH 3(9,6) masers. +high kinetic temperatures are needed to excite NH3 (9,6) masers. However, it should be noted that the silicate dust absorption feature might dominate at 10 µm (see the spectral energy distribution of Cep A in De Buizer et al. 2017). Additionally, there is @@ -490,17 +496,17 @@ no bright infrared emission around the two (9,6) masers, M2 and M3, in G34.26 +0.15 (see Fig. 4; see also Fig. 11 in De Buizer et al. 2003 for a 10.5 µm map). This indicates that the pumping mechanism via infrared photons near 10 µm may not be viable -to explain the (9,6) masers in Cep A and G34.26 +0.15. Furthermore, +to explain the (9,6) masers in Cep A and G34.26+0.15. Furthermore, Wilson & Schilke (1993) argued that radiative pumping by dust emission tends to excite multiple adjacent ammonia maser transitions, which appears to contradict our failure to detect the adjacent (8,5) and (10,7) lines (with respect to quantum numbers and frequency) and to only measure the (9,6) transitions in Cep -A and G34.26 +0.15. Therefore, we suggest that infrared radiation +A and G34.26+0.15. Therefore, we suggest that infrared radiation from dust is not the main pumping source. Madden et al. (1986) suggested that there might be some -line overlaps between the rotational NH 3transitions in the farinfrared - band. However, this would be unlikely to a ffect only the +line overlaps between the rotational NH 3 transitions in the farinfrared + band. However, this would be unlikely to affect only the (9,6) line. Nevertheless, far-infrared spectral observations will be needed to clarify this scenario. Based on our observations, the (9,6) maser spots are close @@ -510,39 +516,39 @@ masers show velocity o ffsets with respect to their systemic velocities. This indicates that the (9,6) masers are located at the base of outflows, similar to the H 2O masers. This is supported by VLBI observations that show that (9,6) masers tend to be -closely associated with H 2O masers (Pratap et al. 1991). The observed - time variability in G34.26 +0.15 and W51-IRS2 can also +closely associated with H2O masers (Pratap et al. 1991). The observed + time variability in G34.26+0.15 and W51-IRS2 can also be attributed to episodic molecular outflows. This indicates that collisional pumping could be the driver of the (9,6) maser. On the other hand, collisional pumping has been successfully used -to explain the NH 3(3,3) maser (Walmsley & Ungerechts 1983; +to explain the NH3 (3,3) maser (Walmsley & Ungerechts 1983; Flower et al. 1990; Mangum & Wootten 1994). Collisions tend to pump from the K=0 level to the K=3 level with parity changes, that is, the upper level of the (3,3) metastable transition will be -overpopulated. NH 3(9,6) arises from the ortho species, so a similar +overpopulated. NH3 (9,6) arises from the ortho species, so a similar mechanism might also occur in the case of the (9,6) transition. Further measurements of collisional rates of ammonia will allow us to test this scenario. 5. Summary -We report the discovery of NH 3(9,6) masers in two HMSFRs, +We report the discovery of NH 3 (9,6) masers in two HMSFRs, Cep A and G34.26 +0.15. The narrow line width of the emission - features ( ∆V1/2≤2.0 km s−1) and their high brightness temperatures - (>400 K) indicate the maser nature of the lines. + features (∆V1/2 ≤2.0 km s−1) and their high brightness temperatures + ( > 400 K) indicate the maser nature of the lines. The intensity of the (9,6) maser in G34.26 +0.15 is decreasing with time, while toward Cep A the maser is stable based on 20 months of monitoring at E ffelsberg. Linearly interpolating the -integrated intensities obtained at E ffelsberg as a function of time, +integrated intensities obtained at Effelsberg as a function of time, the JVLA measurements show that there is no missing flux density - on scales on the order of 1.2 arcsec (4 ×10−3and 2×10−2pc) + on scales on the order of 1.2 arcsec (4×10−3 and 2 ×10−2 pc) to the total single-dish flux. The JVLA-detected emission indicates - that the NH 3(9,6) maser in Cep A originates from a -sub-arcsecond-sized region slightly (0′′.28±0′′.10) to the west + that the NH 3 (9,6) maser in Cep A originates from a +sub-arcsecond-sized region slightly (0 ′′.28 ±0′′.10) to the west of the peak position of the 1.36 cm continuum object, HW2. In -G34.26 +0.15, three NH 3(9,6) maser spots are observed: one is -close to the head of the cometary UC H iiregion C, and the other -two are emitted from a compact region to the west of the HC H ii +G34.26+0.15, three NH3 (9,6) maser spots are observed: one is +close to the head of the cometary UC Hii region C, and the other +two are emitted from a compact region to the west of the HC Hii Article number, page 6 of 10 -Y . T. Yan ( 闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +Y . T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions region A. We suggest that the (9,6) masers may be connected to outflowing gas. Higher angular resolution JVLA and VLBI observations are planned to provide more accurate positions and @@ -551,11 +557,11 @@ Acknowledgements. We would like to thank the anonymous referee for the useful comments that improve the manuscript. Y .T.Y . is a member of the International Max Planck Research School (IMPRS) for Astronomy and Astrophysics at the Universities of Bonn and Cologne. Y .T.Y . would like to thank the China -Scholarship Council (CSC) for its support. We would like to thank the sta ffat -the Effelsberg for their help provided during the observations. We thank the sta ff +Scholarship Council (CSC) for its support. We would like to thank the sta ff at +the Effelsberg for their help provided during the observations. We thank the staff of the JVLA, especially Tony Perreault and Edward Starr, for their assistance with the observations and data reduction. This research has made use of the -NASA /IPAC Infrared Science Archive, which is funded by the National Aeronautics +NASA/IPAC Infrared Science Archive, which is funded by the National Aeronautics and Space Administration and operated by the California Institute of Technology. References @@ -586,7 +592,7 @@ De Buizer, J. M., Radomski, J. T., Telesco, C. M., & Piña, R. K. 2003, ApJ, 598 1127 Dzib, S., Loinard, L., Rodríguez, L. F., Mioduszewski, A. J., & Torres, R. M. 2011, ApJ, 733, 71 -Flower, D. R., O ffer, A., & Schilke, P. 1990, MNRAS, 244, 4P +Flower, D. R., Offer, A., & Schilke, P. 1990, MNRAS, 244, 4P Galván-Madrid, R., Keto, E., Zhang, Q., et al. 2009, ApJ, 706, 1036 Garay, G., Ramirez, S., Rodriguez, L. F., Curiel, S., & Torrelles, J. M. 1996, ApJ, 459, 193 @@ -616,7 +622,8 @@ Madden, S. C., Irvine, W. M., Matthews, H. E., Brown, R. D., & Godfrey, P. D. 1986, ApJ, 300, L79 Mangum, J. G. & Wootten, A. 1994, ApJ, 428, L33 Mauersberger, R., Henkel, C., & Wilson, T. L. 1987, A&A, 173, 352 -Mauersberger, R., Wilson, T. L., & Henkel, C. 1986, A&A, 160, L13Mauersberger, R., Wilson, T. L., & Henkel, C. 1988, A&A, 201, 123 +Mauersberger, R., Wilson, T. L., & Henkel, C. 1986, A&A, 160, L13 +Mauersberger, R., Wilson, T. L., & Henkel, C. 1988, A&A, 201, 123 McEwen, B. C., Pihlström, Y . M., & Sjouwerman, L. O. 2016, ApJ, 826, 189 McMullin, J. P., Waters, B., Schiebel, D., Young, W., & Golap, K. 2007, in Astronomical Society of the Pacific Conference Series, V ol. 376, Astronomical @@ -672,74 +679,75 @@ Zhang, Q. & Ho, P. T. P. 1995, ApJ, 450, L63 Zhang, Q., Hunter, T. R., Sridharan, T. K., & Cesaroni, R. 1999, ApJ, 527, L117 Zheng, X. W., Moran, J. M., & Reid, M. J. 2000, MNRAS, 317, 192 Article number, page 7 of 10 -A&A proofs: manuscript no. mainArxiv +A&A proofs:manuscript no. mainArxiv Appendix A: -Table A.1. Summary of NH 3(9, 6) maser observations. -Source Telescope Beam Epoch Channel Sν rms∫ -Sνdv V LSR ∆V1/2 +Table A.1.Summary of NH3 (9, 6) maser observations. +Source Telescope Beam Epoch Channel S ν rms +∫ +S νdv V LSR ∆V1/2 size spacing -(km s−1) (Jy) (mJy) (Jy km s−1) (km s−1) -Cep A E ffelsberg 49′′2020, Jan. 04 0.62 0.67 3.41 1.19 ±0.02 -1.11±0.02 1.67±0.04 -Effelsberg 49′′2021, Feb. 11 0.62 0.59 5.97 1.08 ±0.02 -0.74±0.02 1.70±0.04 -Effelsberg 49′′2021, Feb. 15 0.62 0.65 10.98 1.11 ±0.03 -0.75±0.02 1.60±0.05 -JVLAa1′′.47×0′′.99 2021, Jul. 13 0.13 1.13 144 0.89 ±0.09 -0.86±0.03 0.74±0.12 -Effelsberg 49′′2021, Aug. 11 0.07 0.98 13.36 0.49 ±0.02 -0.90±0.01 0.47±0.01 -0.35 0.26 ±0.02 -0.28±0.02 0.69±0.05 -Effelsberg 49′′2021, Aug. 12 0.07 0.98 13.35 0.50 ±0.01 -0.89±0.07 0.48±0.07 -0.35 0.20 ±0.01 -0.29±0.07 0.54±0.07 -0.06 0.07 ±0.01 0.51±0.07 1.09±0.07 -0.02 0.02 ±0.01 2.15±0.07 0.80±0.07 -0.07 0.06 ±0.01 2.89±0.07 0.92±0.07 -G34.26 +0.15 E ffelsberg 49′′2020, Jan. 03 0.62 0.30 1.26 0.65 ±0.03 62.50±0.05 2.05±0.13 -Effelsberg 49′′2021, Feb. 11 0.62 0.24 2.42 0.40 ±0.02 55.76±0.04 1.60±0.12 -Effelsberg 49′′2021, Feb. 15 0.62 0.20 4.86 0.38 ±0.02 55.71±0.05 1.80±0.14 -JVLAb1′′.33×1′′.06 2021, Jul. 13 0.13 0.23 37.1 0.09 ±0.02 54.41±0.03 0.38±0.09 -0.22 0.22 ±0.02 55.82±0.05 0.95±0.12 -0.15 0.06 ±0.01 57.21±0.04 0.35±0.08 -Effelsberg 49′′2021, Aug. 11 0.07 0.08 13.92 0.06 ±0.007 54.10±0.05 0.68±0.12 -0.07 0.02 ±0.006 54.82±0.03 0.31±0.09 -0.12 0.10 ±0.006 55.85±0.02 0.75±0.06 -Effelsberg 49′′2021, Aug. 12 0.07 0.16 27.40 0.09 ±0.008 55.83±0.02 0.56±0.05 -Notes. The spectral parameters are obtained from Gaussian fitting.(a)The JVLA spectrum toward Cep A is extracted from the E ffelsberg-beamsized - region (FWHM 49′′).(b)For G34.26 +0.15, the JVLA beam samples the NH 3(9,6) spectrum over a region of radius 3′′.5, which contains all -detected NH 3(9,6) emissions. -Table A.2. 1.36 cm JVLA flux densities of individual continuum sources. -Source R.A. Dec. Size P.A. Sν -(h m s ) (◦ ′ ′′) (arcsec) (deg) (mJy) -Cep A HW2 22 56 17.972 ±0.003 +62 01 49.587±0.015 (0.45±0.19)×(0.22±0.10) 50.0 20.2 ±1.4 -HW3a 22 56 17.420 ±0.022 +62 01 44.576±0.076 (2.35±0.45)×(0.55±0.14) 66.6 4.75 ±0.74 -HW3b 22 56 17.578 ±0.009 +62 01 45.041±0.043 (1.43±0.24)×(0.45±0.10) 59.9 3.19 ±0.36 -HW3c 22 56 17.956 ±0.016 +62 01 46.224±0.038 (1.44±0.37)×(0.36±0.19) 86.0 9.90 ±1.7 -HW3d 22 56 18.195 ±0.005 +62 01 46.325±0.014 (1.26±0.12)×(0.30±0.19) 102.5 13.75 ±0.92 -HW9 22 56 18.626 ±0.014 +62 01 47.851±0.137 (1.53±0.51)×(0.29±0.30) 28.0 3.26 ±0.78 -G34.26 +0.15 A 18 53 18.774 ±0.005 +01 14 56.208±0.125 (0.66±0.49)×(0.50±0.33) 10.0 94 ±33 -B 18 53 18.649 ±0.005 +01 15 00.071±0.180 (2.31±0.49)×(0.85±0.21) 17.4 597 ±110 -C 18 53 18.560 ±0.004 +01 14 58.201±0.112 (2.03±0.30)×(1.34±0.20) 178.0 5070 ±660 +(km s−1) (Jy) (mJy) (Jy km s −1) (km s −1) +Cep A E ffelsberg 49 ′′ 2020, Jan. 04 0.62 0.67 3.41 1.19 ±0.02 -1.11 ±0.02 1.67 ±0.04 +Effelsberg 49 ′′ 2021, Feb. 11 0.62 0.59 5.97 1.08 ±0.02 -0.74 ±0.02 1.70 ±0.04 +Effelsberg 49 ′′ 2021, Feb. 15 0.62 0.65 10.98 1.11 ±0.03 -0.75 ±0.02 1.60 ±0.05 +JVLAa 1′′.47 ×0′′.99 2021, Jul. 13 0.13 1.13 144 0.89 ±0.09 -0.86 ±0.03 0.74 ±0.12 +Effelsberg 49 ′′ 2021, Aug. 11 0.07 0.98 13.36 0.49 ±0.02 -0.90 ±0.01 0.47 ±0.01 +0.35 0.26 ±0.02 -0.28 ±0.02 0.69 ±0.05 +Effelsberg 49 ′′ 2021, Aug. 12 0.07 0.98 13.35 0.50 ±0.01 -0.89 ±0.07 0.48 ±0.07 +0.35 0.20 ±0.01 -0.29 ±0.07 0.54 ±0.07 +0.06 0.07 ±0.01 0.51 ±0.07 1.09 ±0.07 +0.02 0.02 ±0.01 2.15 ±0.07 0.80 ±0.07 +0.07 0.06 ±0.01 2.89 ±0.07 0.92 ±0.07 +G34.26+0.15 E ffelsberg 49 ′′ 2020, Jan. 03 0.62 0.30 1.26 0.65 ±0.03 62.50 ±0.05 2.05 ±0.13 +Effelsberg 49 ′′ 2021, Feb. 11 0.62 0.24 2.42 0.40 ±0.02 55.76 ±0.04 1.60 ±0.12 +Effelsberg 49 ′′ 2021, Feb. 15 0.62 0.20 4.86 0.38 ±0.02 55.71 ±0.05 1.80 ±0.14 +JVLAb 1′′.33 ×1′′.06 2021, Jul. 13 0.13 0.23 37.1 0.09 ±0.02 54.41 ±0.03 0.38 ±0.09 +0.22 0.22 ±0.02 55.82 ±0.05 0.95 ±0.12 +0.15 0.06 ±0.01 57.21 ±0.04 0.35 ±0.08 +Effelsberg 49 ′′ 2021, Aug. 11 0.07 0.08 13.92 0.06 ±0.007 54.10 ±0.05 0.68 ±0.12 +0.07 0.02 ±0.006 54.82 ±0.03 0.31 ±0.09 +0.12 0.10 ±0.006 55.85 ±0.02 0.75 ±0.06 +Effelsberg 49 ′′ 2021, Aug. 12 0.07 0.16 27.40 0.09 ±0.008 55.83 ±0.02 0.56 ±0.05 +Notes. The spectral parameters are obtained from Gaussian fitting. (a) The JVLA spectrum toward Cep A is extracted from the E ffelsberg-beamsized + region (FWHM 49′′). (b) For G34.26+0.15, the JVLA beam samples the NH3 (9,6) spectrum over a region of radius 3′′.5, which contains all +detected NH3 (9,6) emissions. +Table A.2.1.36 cm JVLA flux densities of individual continuum sources. +Source R.A. Dec. Size P.A. S ν +(h m s ) ( ◦ ′ ′′ ) (arcsec) (deg) (mJy) +Cep A HW2 22 56 17.972 ±0.003 +62 01 49.587 ±0.015 (0.45 ±0.19) ×(0.22 ±0.10) 50.0 20.2 ±1.4 +HW3a 22 56 17.420 ±0.022 +62 01 44.576 ±0.076 (2.35 ±0.45) ×(0.55 ±0.14) 66.6 4.75 ±0.74 +HW3b 22 56 17.578 ±0.009 +62 01 45.041 ±0.043 (1.43 ±0.24) ×(0.45 ±0.10) 59.9 3.19 ±0.36 +HW3c 22 56 17.956 ±0.016 +62 01 46.224 ±0.038 (1.44 ±0.37) ×(0.36 ±0.19) 86.0 9.90 ±1.7 +HW3d 22 56 18.195 ±0.005 +62 01 46.325 ±0.014 (1.26 ±0.12) ×(0.30 ±0.19) 102.5 13.75 ±0.92 +HW9 22 56 18.626 ±0.014 +62 01 47.851 ±0.137 (1.53 ±0.51) ×(0.29 ±0.30) 28.0 3.26 ±0.78 +G34.26+0.15 A 18 53 18.774 ±0.005 +01 14 56.208 ±0.125 (0.66 ±0.49) ×(0.50 ±0.33) 10.0 94 ±33 +B 18 53 18.649 ±0.005 +01 15 00.071 ±0.180 (2.31 ±0.49) ×(0.85 ±0.21) 17.4 597 ±110 +C 18 53 18.560 ±0.004 +01 14 58.201 ±0.112 (2.03 ±0.30) ×(1.34 ±0.20) 178.0 5070 ±660 Article number, page 8 of 10 -Y . T. Yan ( 闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions -Table A.3. NH 3(9,6) maser positions derived from the JVLA observations. -Source R.A. Dec. Sν TMB VLSR ∆V1/2 -(h m s ) (◦ ′ ′′) (mJy beam−1) (K) (km s−1) -Cep A M 22 56 17.933 ±0.002 +62 01 49.608±0.011 985.2 2464.8 -0.88 ±0.01 0.51±0.02 -343.2 829.5 -0.24 ±0.03 0.63±0.05 -G34.26 +0.15 M1 18 53 18.569 ±0.007 +01 14 57.997±0.056 37.1 94.5 56.82 ±0.06 0.68±0.14 -M2 18 53 18.696 ±0.002 +01 14 55.807±0.034 48.4 122.4 53.77 ±0.05 0.35±0.08 -57.8 146.2 54.35 ±0.07 0.83±0.14 -180.8 457.6 55.83 ±0.01 0.59±0.03 -M3 18 53 18.667 ±0.005 +01 14 55.348±0.066 78.1 197.2 54.22 ±0.04 0.94±0.08 -73.7 186.3 55.78 ±0.04 0.79±0.08 -Fig. A.1. Cepheus A. The grey shaded areas mark the 1.36 cm JVLA continuum map of Cep A. The reference position is αJ2000=22h56m17s.972, -andδJ2000=62◦01′49′′.587, the peak position of the continuum map, is marked by a red cross. Slightly to the west of the cross is the white ellipse -denoting the position of the NH 3(9,6) emission with a purple star at its center. The red contours show the NOrthern Extended Millimeter Array +Y . T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +Table A.3.NH3 (9,6) maser positions derived from the JVLA observations. +Source R.A. Dec. S ν TMB VLSR ∆V1/2 +(h m s ) ( ◦ ′ ′′ ) (mJy beam −1) (K) (km s −1) +Cep A M 22 56 17.933 ±0.002 +62 01 49.608 ±0.011 985.2 2464.8 -0.88 ±0.01 0.51 ±0.02 +343.2 829.5 -0.24 ±0.03 0.63 ±0.05 +G34.26+0.15 M1 18 53 18.569 ±0.007 +01 14 57.997 ±0.056 37.1 94.5 56.82 ±0.06 0.68 ±0.14 +M2 18 53 18.696 ±0.002 +01 14 55.807 ±0.034 48.4 122.4 53.77 ±0.05 0.35 ±0.08 +57.8 146.2 54.35 ±0.07 0.83 ±0.14 +180.8 457.6 55.83 ±0.01 0.59 ±0.03 +M3 18 53 18.667 ±0.005 +01 14 55.348 ±0.066 78.1 197.2 54.22 ±0.04 0.94 ±0.08 +73.7 186.3 55.78 ±0.04 0.79 ±0.08 +Fig. A.1.Cepheus A. The grey shaded areas mark the 1.36 cm JVLA continuum map of Cep A. The reference position is αJ2000 = 22h56m17s.972, +and δJ2000 = 62◦01′49′′.587, the peak position of the continuum map, is marked by a red cross. Slightly to the west of the cross is the white ellipse +denoting the position of the NH 3 (9,6) emission with a purple star at its center. The red contours show the NOrthern Extended Millimeter Array (NOEMA) 1.37 mm continuum, taken from Beuther et al. (2018). Contour levels are -5, 5, 10, 20, 40, 80, 100, 150, and 200 ×2.43 mJy beam−1. -OH (Bartkiewicz et al. 2005), H 2O (Sobolev et al. 2018), and CH 3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, -respectively. The color bar on the right-hand side indicates the velocity range ( VLSR) of maser spots. +OH (Bartkiewicz et al. 2005), H2O (Sobolev et al. 2018), and CH3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, +respectively. The color bar on the right-hand side indicates the velocity range (VLSR) of maser spots. Article number, page 9 of 10 -A&A proofs: manuscript no. mainArxiv -Fig. A.2. 1.36 cm JVLA continuum map of G34.26 +0.15 presented as gray shaded areas. The reference position is αJ2000=18h53m18s.560, and -δJ2000=01◦14′58′′.201, the peak position, is marked by a red cross. The red ellipses show the positions of NH 3(9,6) emission with stars at their +A&A proofs:manuscript no. mainArxiv +Fig. A.2.1.36 cm JVLA continuum map of G34.26 +0.15 presented as gray shaded areas. The reference position is αJ2000 = 18h53m18s.560, and +δJ2000 = 01◦14′58′′.201, the peak position, is marked by a red cross. The red ellipses show the positions of NH 3 (9,6) emission with stars at their center (i.e., M1, M2, and M3). The blue contours show the Berkeley-Illinois-Maryland Association (BIMA) array 2.8 mm continuum, taken from -Mookerjea et al. (2007). Contour levels are -3, 3, 10, 20, 30, 40, 50, 70, 90, 100, 120, and 140 ×20 mJy beam−1. OH (Zheng et al. 2000), H 2O (Imai +Mookerjea et al. (2007). Contour levels are -3, 3, 10, 20, 30, 40, 50, 70, 90, 100, 120, and 140×20 mJy beam−1. OH (Zheng et al. 2000), H2O (Imai et al. 2011), and CH 3OH (Bartkiewicz et al. 2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates -the velocity range ( VLSR) of maser spots. +the velocity range (VLSR) of maser spots. Article number, page 10 of \ No newline at end of file diff --git a/read/results/pypdf/2201.00022.txt b/read/results/pypdf/2201.00022.txt index 069fe3c..b90ac7c 100644 --- a/read/results/pypdf/2201.00022.txt +++ b/read/results/pypdf/2201.00022.txt @@ -1,18 +1,18 @@ Draft version July 7, 2022 -Typeset using L ATEXtwocolumn style in AASTeX631 +Typeset using LATEX twocolumn style in AASTeX631 The Formation of Intermediate Mass Black Holes in Galactic Nuclei -Sanaea C. Rose,1, 2Smadar Naoz,1, 2Re’em Sari,3and Itai Linial3 +Sanaea C. Rose,1, 2 Smadar Naoz,1, 2 Re’em Sari,3 and Itai Linial3 1Department of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA 2Mani L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA 3Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel ABSTRACT Most stellar evolution models predict that black holes (BHs) should not exist above approximately -50−70 M ⊙, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections +50 −70 M⊙, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite -efficient, forming IMBHs as massive as 104M⊙. This upper limit assumes that (1) the BHs accrete a +efficient, forming IMBHs as massive as 10 4 M⊙. This upper limit assumes that (1) the BHs accrete a substantial fraction of the stellar mass captured during each collision and (2) that the rate at which new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our @@ -23,14 +23,14 @@ Additionally, formed through this channel, both black holes in the mass gap and with the supermassive black hole at the center of a galactic nucleus through gravitational waves. These gravitational wave events are extreme and intermediate mass ratio inspirals (EMRIs and IMRIs, respectively). -1.INTRODUCTION +1. INTRODUCTION The recently detected gravitational wave source GW190521 (The LIGO Scientific Collaboration et al. 2020a,b) produced an intermediate mass black hole of -approximately 142 M ⊙. This event may have also had a -85 M ⊙progenitor, which falls within the pair-instability +approximately 142 M⊙. This event may have also had a +85 M⊙ progenitor, which falls within the pair-instability mass gap that limits stellar black holes (BHs) to no -more than∼<50 M ⊙(e.g., Heger et al. 2003; Woosley +more than ∼< 50 M ⊙ (e.g., Heger et al. 2003; Woosley 2017)1. Similarly, the merger products of GW150914, GW170104, and GW170814 fall within the mass gap (e.g., Abbott et al. 2016, 2017a,b). BH mergers that @@ -39,10 +39,11 @@ form second generation BHs and, in some cases, intermediate (GW) events can occur in globular clusters, young stelCorresponding author: Sanaea C. Rose srose@astro.ucla.edu -1Note that the exact lower and upper limits may be sensitive to +1 Note that the exact lower and upper limits may be sensitive to metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli 2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski -et al. 2020a; Renzo et al. 2020; Vink et al. 2021).lar clusters, or the field (e.g., Rodriguez et al. 2018; Rodriguez +et al. 2020a; Renzo et al. 2020; Vink et al. 2021). +lar clusters, or the field (e.g., Rodriguez et al. 2018; Rodriguez et al. 2019; Fishbach et al. 2020; Mapelli et al. 2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. 2021; Arca Sedda et al. 2021). However, IMBHs are @@ -63,7 +64,8 @@ Valiante et al. 2016) or from direct collapse of accumulated gas (e.g., Begelman et al. 2006; Yue et al. 2014; Ferrara et al. 2014; Choi et al. 2015; Shlosman et al. 2016). These high redshift IMBHs would need to survive - galaxy evolution and mergers to present day (e.g.,arXiv:2201.00022v2 [astro-ph.GA] 6 Jul 2022 + galaxy evolution and mergers to present day (e.g., +arXiv:2201.00022v2 [astro-ph.GA] 6 Jul 2022 Rose et al. Rashkov & Madau 2014), with significant effects on their stellar and even dark matter surroundings (e.g., Bertone @@ -96,7 +98,7 @@ clusters without a SMBH. They considered BH binarysingle merger recoil kicks. The post-kick merger product sinks back towards the cluster center over a dynamical friction timescale. Using this approach, they showed that -103−104M⊙IMBHs can form efficiently over the lifetime +103 −104 M⊙ IMBHs can form efficiently over the lifetime of a cluster. However, as discussed in Section 2.2, direct BH-star collisions are much more frequent than BH-BH collision @@ -113,10 +115,11 @@ red giant populations (e.g., Dale & Davies 2006; Dale et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti et al. 2021). We propose that IMBHs can form naturally within the central pc of a galactic center through repeated - collisions between BHs and main sequence stars . + collisions between BHs and main sequence stars. During a collision, the BH can accrete some portion of the star’s mass. Over many collisions, it can grow appreciably - in size. We demonstrate that this channel cancreate IMBHs with masses as large as 104M⊙, an upper + in size. We demonstrate that this channel can +create IMBHs with masses as large as 104 M⊙, an upper limit that depends on the density profile of the surrounding stars and the efficiency of the accretion. The paper is structured as follows: we describe relevant @@ -133,9 +136,9 @@ and 2.8 discuss implications for GW merger events between processes and discuss the subsequent results in Section 2.9. Finally, we discuss and summarize our findings in Section 3. -2.METHODOLOGY +2. METHODOLOGY We consider a population of stellar mass BHs embedded - in a cluster of 1 M ⊙stars. When stars and BHs + in a cluster of 1 M ⊙ stars. When stars and BHs collide, the BHs can accrete mass. The growth rate depends on the physical processes outlined below. We use a statistical approach to estimate the stellar encounters @@ -145,13 +148,13 @@ We consider a population of BHs within the inner few parsecs of the SMBH in a galactic nucleus (GN). We assume that the BH mass distribution follows that of the stars from which they originate, a Kroupa initial mass -functiondN/dm∝m−2.35. While this choice represents +function dN/dm∝m−2.35. While this choice represents a gross oversimplification, it has very little bearing on our final results. Future work may address the particulars of the BH mass distribution, but we do not expect that it will significantly alter the outcome. The upper and lower limits of the BH mass distribution are 5 and -50M⊙, respectively. We select the upper limit to encompass +50 M⊙, respectively. We select the upper limit to encompass the range of upper bounds predicted by stellar evolution models, which vary between 40 and 125 M⊙ depending on the metallicity (Heger et al. 2003; Woosley @@ -159,7 +162,7 @@ depending on the metallicity (Heger et al. 2003; Woosley Belczynski et al. 2020b; Renzo et al. 2020). We assume that the orbits of the BHs follow a thermal eccentricity distribution. We draw their semimajor axes, a•, from a -uniform distribution in log distance, dN/d (logr) being +uniform distribution in log distance, dN/d(log r) being constant. While this distribution is not necessarily representative of actual conditions in the GN, we use it to build a comprehensive physical picture of BH growth at @@ -174,7 +177,7 @@ from the SMBH. For the collision timescale, we assume the BH is on a circular orbit. The timescales depend on the density, so we adopt a range of density profiles, bounded by α= 1 (dashed curve) to α= 2 (dark, solid curve). The dark -blue line represents the time for a 105M⊙BH to merge with +blue line represents the time for a 105 M⊙ BH to merge with the SMBH through GW emission. observationally motivated distributions in Section 2.9, but reserve a more detailed examination of the distribution’s @@ -183,59 +186,67 @@ but reserve a more detailed examination of the distribution’s BHs in the GN can undergo direct collisions with other objects. The timescale for this process, tcoll, can be estimated using a simple rate calculation: t−1 -coll=nσA, -wherenis the number density of objects, σis the velocity +coll = nσA, +where n is the number density of objects, σ is the velocity dispersion, and Ais the cross-section. We use the collision timescale from Rose et al. (2020): t−1 -coll=πn(a•)σ(a•) -×( +coll = πn(a•)σ(a•) +× +( f1(e•)r2 -c+f2(e•)rc2G(mBH+m⋆) -σ(a•)2) -.(1) -whereGis the gravitational constant and rcis the sum +c + f2(e•)rc +2G(mBH + m⋆) +σ(a•)2 +) +. (1) +where Gis the gravitational constant and rc is the sum of the radii of the interacting objects, a black hole with -massmBHand a star with mass m⋆. Detailed in Rose -et al. (2020), f1(e•) andf2(e•) account for the effect of +mass mBH and a star with mass m⋆. Detailed in Rose +et al. (2020), f1(e•) and f2(e•) account for the effect of the eccentricity of the BH’s orbit about the SMBH on -the collision rate, while nandσare simply evaluated +the collision rate, while n and σ are simply evaluated at the semimajor axis of the orbit (see below). Note that this timescale equation includes the effects of gravitational focusing, which enhances the cross-section of interaction. Assuming a circular orbit for simplicity, we plot the timescale for a BH orbiting in the GN to collide with -a 1M⊙star as a function of distance from the SMBHin Figure 1.2As this timescale depends on the density +a 1 M⊙ star as a function of distance from the SMBH +in Figure 1. 2 As this timescale depends on the density of surrounding stars, we adopt a density profile of the form: -ρ(r•) =ρ0(r• -r0)−α +ρ(r•) = ρ0 +(r• +r0 +)−α , (2) -wherer•denotes the distance from the SMBH. We adopt -a SMBH mass of 4 ×106M⊙such that our fiducial GN +where r• denotes the distance from the SMBH. We adopt +a SMBH mass of 4 ×106 M⊙ such that our fiducial GN matches our own galactic center (e.g., Ghez et al. 2005; Genzel et al. 2003). In this case, the normalization in -Eq. (2) isρ0= 1.35×106M⊙/pc3atr0= 0.25 pc (Genzel - et al. 2010). Additionally, in Eq. (2), αgives the +Eq. (2) is ρ0 = 1.35 ×106 M⊙/pc3 at r0 = 0.25 pc (Genzel + et al. 2010). Additionally, in Eq. (2), α gives the slope of the power law. We assume that a uniform population of solar mass stars account for most of the mass in the GN, making the stellar number density: -n(r•) =ρ(r•) -1M⊙. (3) +n(r•) = ρ(r•) +1 M⊙ +. (3) The collision timescale also depends on the velocity dispersion, which we express as: -σ(r•) =√ +σ(r•) = +√ GM• -r•(1 +α), (4) -whereαis the slope of the density profile and M•denotes +r•(1 + α), (4) +where α is the slope of the density profile and M• denotes the mass of the SMBH (Alexander 1999; Alexander & Pfuhl 2014). As mentioned above, Eq. (1) depends on the sum of the radii of the colliding objects, rc. We -takerc= 1R⊙because these interactions involve a BH +take rc = 1 R⊙ because these interactions involve a BH and a star, and the former has a much smaller physical cross-section. For example, the Schwarzschild radius -of a 10M⊙BH is only 30 km, or 4 .31×10−5R⊙. For +of a 10 M⊙ BH is only 30 km, or 4 .31 ×10−5 R⊙. For this reason, direct collisions between compact objects are very rare and not included in our model. We note that direct collisions between BHs, via GW @@ -251,26 +262,26 @@ than the BH-BH GW collision timescale (for the relevant 2018, for example). Thus, we expect that star-BH collisions will be the main driver of IMBH growth in the GN. -2We note that the eccentricity has a very minor effect on the +2 We note that the eccentricity has a very minor effect on the collision timescale (Rose et al. 2020). Rose et al. 2.3. Statistical Approach to Collisions We simulate the mass growth of a population of BHs with initial conditions detailed in Section 2.1. Over an -increment ∆ tof 106yr, we calculate the probability of +increment ∆t of 106 yr, we calculate the probability of a collision occurring, given by ∆ t/tcoll. This choice of -∆tis motivated by our galactic center’s star formation +∆t is motivated by our galactic center’s star formation timescale (e.g., Lu et al. 2009), allowing for regular replenishment of the stellar population in the GN. We have checked that the results are not sensitive to this choice of ∆t, omitted here to avoid clutter. We draw a number between 0 and 1 using a random number generator. If that number is less than or equal to the probability, we -increase the BH’s mass by ∆ m, the mass that the BH is +increase the BH’s mass by ∆m, the mass that the BH is expected to accrete in a single collision (see Section 2.4 for details). We recalculate the collision timescale using the updated BH mass and repeat this process until the -time elapsed equals the simulation time of 10 Gyr3. +time elapsed equals the simulation time of 10 Gyr 3. 2.4. Mass Growth When a BH collides with a star, it may accrete material and grow in mass. The details of the accretion @@ -294,21 +305,21 @@ assume that the BH accretes all of the material that it captures. The details of the accretion are uncertain, however, and it may be much less efficient than our results imply. We discuss accretion in Section 2.5. -To estimate ∆ m, we begin with the Bondi-Hoyle accretion - rate, ˙ m, given by: -˙m=4πG2m2 +To estimate ∆m, we begin with the Bondi-Hoyle accretion + rate, ˙m, given by: +˙m= 4πG2m2 BHρstar -(c2s+σ2)3/2, (5) -3Closer to the SMBH, ∆ tmay exceed the collision timescale by +(c2s + σ2)3/2 , (5) +3 Closer to the SMBH, ∆ t may exceed the collision timescale by a factor of a few for steep density profiles. We include a safeguard - in our code which takes the ratio tcoll/∆tand rounds it + in our code which takes the ratio tcoll/∆t and rounds it to the nearest integer. We take this integer to be the number of collisions and increase the BH mass accordingly. -Figure 2. We consider an example that highlights the mass +Figure 2.We consider an example that highlights the mass growth as a function of distance from the SMBH. Grey dots represent the initial masses and distances from the SMBH of the BHs involved in the simulation. For simplicity, we set -the inital mass equal to 10 M⊙for all of the BHs. Assuming +the inital mass equal to 10 M⊙ for all of the BHs. Assuming the density profile of stars has α= 1, we consider two cases: BHs accrete all of the star’s mass during a collision (red) and only a portion of the star’s mass is accreted during a collision @@ -316,26 +327,26 @@ given by Eq. 6 (blue). The latter case results in less growth closer to the SMBH where the velocity dispersion becomes high. The shaded regions and dashed lines represent the analytical predictions detailed in Section 2.4. -wherecsis the speed of sound in the star and ρstaris its +where cs is the speed of sound in the star and ρstar is its density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima et al. 1985; Edgar 2004, see latter for a review). We approximate the density as 1 M⊙/(4πR3 ⊙/3) and take -the conservative value of cs= 500km s−1, which is -consistent with the sound speed inside a 1 M⊙star +the conservative value of cs = 500 km s−1, which is +consistent with the sound speed inside a 1 M⊙ star (Christensen-Dalsgaard et al. 1996) and allows us to set a lower limit on ∆ m. To find ∆ m, at each collision, we have: -∆m= min( ˙m×t⋆,cross,1 M⊙), (6) -wheret⋆,cross∼R⊙/σis the crossing time of the BH in -the star. We take the minimum between ˙ m×t⋆,crossand -1 M⊙because the BH cannot accrete more mass than +∆m= min( ˙m×t⋆,cross,1 M⊙) , (6) +where t⋆,cross ∼R⊙/σ is the crossing time of the BH in +the star. We take the minimum between ˙m×t⋆,cross and +1 M⊙ because the BH cannot accrete more mass than one star at each collision. Figure 2 juxtaposes the expected growth using BondiHoyle-Lyttleton accretion (blue small points) with a much simpler model in which the BH accretes the star’s -entire mass, 1 M⊙(red large points). Both examples -start with identical populations of 10 M⊙BHs (grey) +entire mass, 1 M⊙ (red large points). Both examples +start with identical populations of 10 M⊙ BHs (grey) and simulate growth through collisions using a statistical approach. As the BHs grow, the collision timescale, which depends on mBH, decreases. Simultaneously, @@ -348,17 +359,18 @@ IMBH Formation in Galactic Nuclei 5 ulation time, 10 Gyr. Therefore, the BHs grow slowly, and their final masses can be approximated using the following equation: -mfinal(tcoll→const.) =minitial + ∆mT -tcoll,(7) -in whichTrepresents the simulation time and ∆ mand -tcollremain constant, approximated as their initial values. +mfinal(tcoll →const.) =minitial + ∆m T +tcoll +, (7) +in which T represents the simulation time and ∆ m and +tcoll remain constant, approximated as their initial values. This equation is plotted in Figure 2 for both cases, -∆m= 1M⊙(red) and ∆ mfrom Bondi-Hoyle-Lyttleton +∆m= 1 M⊙ (red) and ∆mfrom Bondi-Hoyle-Lyttleton accretion (blue), and the curves coincide with the corresponding simulated results. The shaded regions represent one standard deviation from Eq. (7), calculated -using the square root of the number of collisions, T/tcoll. +using the square root of the number of collisions,T/tcoll. As indicated by the results in red, in the absence of Bondi-Hoyle-Lyttleton accretion, the BHs closest to the SMBH experience the most growth because they have @@ -366,26 +378,26 @@ shorter collision timescales. However, Bondi-HoyleLyttleton accretion becomes important closer to the SMBH, where the velocity dispersion is large compared with the stars’ escape velocity, and curtails the mass -growth for BHs in this region. Outside of 10−2pc, a BH +growth for BHs in this region. Outside of 10−2 pc, a BH consumes the star’s entire mass: the accretion-limited -∆mgoverned by Eq. (7) is greater than or equal to the +∆m governed by Eq. (7) is greater than or equal to the star’s mass. Eq. 7 does not apply for other values of α. When the collision timescale is shorter, corresponding to a larger -indexαin the density profile (see Figure 1), the growth -is very efficient and ∆ mquickly approaches 1 M ⊙. Consequently, - while we can now assume ∆ m= 1 M ⊙, we +index αin the density profile (see Figure 1), the growth +is very efficient and ∆mquickly approaches 1 M⊙. Consequently, + while we can now assume ∆ m = 1 M⊙, we can no longer assume the collision timescale is constant. The final mass grows exponentially as a result. For -∆m= 1M ⊙, the general solution is reached by solving -the differential equation dm/dt = 1M⊙/tcoll(m), which +∆m = 1M⊙, the general solution is reached by solving +the differential equation dm/dt= 1 M⊙/tcoll(m), which gives: -mfinal(∆m→1 M⊙) =−A+ (minitial +A)eCT(8) -whereA=σ2Rstar/GandC= 2πGn starRstar/σ. As an +mfinal(∆m→1 M⊙) =−A+ (minitial + A) eCT (8) +where A= σ2Rstar/Gand C = 2πGnstarRstar/σ. As an example, we plot this curve in purple for the α= 2 case, in Figure 3, which agrees with the simulated masses. 2.5. Uncertainties in Accretion -We note that the ∆ Mcalculated in this proof-ofconcept +We note that the ∆ M calculated in this proof-ofconcept study assumes that the BH accretes all of the material that it captures. Estimating the true fraction of the material accreted by the BH is very challenging; @@ -393,7 +405,8 @@ of the material accreted by the BH is very challenging; the generalized GR fluid equations with cooling, heating, and radiative transfer, etc. and remains an active field of research (e.g., Blandford & Begelman 1999; Park -& Ostriker 2001; Narayan et al. 2003; Igumenshchevet al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang +& Ostriker 2001; Narayan et al. 2003; Igumenshchev +et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang et al. 2014; McKinney et al. 2014; Narayan et al. 2022). Heuristically, if a collision between a BH and a star results in an accretion disk, the disk’s viscous timescale @@ -411,13 +424,13 @@ even if it accretes very little in a single one. We explore invoked momentum-driven winds in BH accretion (e.g., Murray et al. 2005; Ostriker et al. 2010; Brennan et al. 2018). We thus estimate the fraction of captured mass -accreted to be approximately vesc/(cη), wherevescis -the escape velocity from the BH at 1 R ⊙andηis the -accretion efficiency at the ISCO. We take ηto be 0.1 +accreted to be approximately vesc/(cη), where vesc is +the escape velocity from the BH at 1 R ⊙ and η is the +accretion efficiency at the ISCO. We take η to be 0 .1 (e.g., Yu & Tremaine 2002). This expression for the fraction accreted is consistent with Kremer et al. (2022) equation 19 for s= 0.5, which is a reasonable value for -s, a free parameter between 0 .2 and 0.8. We discuss +s, a free parameter between 0 .2 and 0 .8. We discuss the results of the momentum-driven winds estimate in Section 3. We note that the accretion process may be more efficient than this estimate implies if, for example, @@ -433,36 +446,42 @@ circularize and shrink its orbit. We implement the effects eccentricity following Peters & Mathews (1963a). The characteristic timescale to merge a BH with an SMBH is given by: -tGW≈2.9×1012yr(M• -106M⊙)−1(mBH -106M⊙)−1 -×(M•+mBH -2×106M⊙)−1(a• -10−2pc)4 -×f(e•)(1−e2 -•)7/2, (9) -wheref(e•) is a function of e•. For all values of e•, -f(e•) is between 0 .979 and 1.81 (Blaes et al. 2002). We -plot this timescale for a 1 ×105M⊙BH in Figure 1 in +tGW ≈2.9 ×1012 yr +( M• +106 M⊙ +)−1 ( mBH +106 M⊙ +)−1 +× +(M• + mBH +2 ×106 M⊙ +)−1 ( a• +10−2 pc +)4 +×f(e•)(1 −e2 +•)7/2 , (9) +where f(e•) is a function of e•. For all values of e•, +f(e•) is between 0.979 and 1.81 (Blaes et al. 2002). We +plot this timescale for a 1 ×105 M⊙ BH in Figure 1 in blue. Rose et al. -Figure 3. On the right, we plot final masses of 500 BHs using different values of αin the density profile, shallow ( α= 1) to +Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow ( α = 1) to cuspy (α= 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking minitial to be the average mass of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and merger times of these BHs. In our simulations, we assume a BH has merged with -the SMBH when the condition tGW< telapsed is met. +the SMBH when the condition tGW < telapsed is met. When this condition is satisfied, we terminate mass -growth through collisions for that BH.4 +growth through collisions for that BH. 4 2.7. IMBH growth As detailed above, BH-stellar collisions can increase the BH masses as a function of time. Here, we examine the sensitivity of the BH growth to the density power law. From Eq. (1), it is clear that the growth rate depends - on the stellar density profile, governed by the indexα. - We expect that higher values of α, or steeper + on the stellar density profile, governed by the index + α. We expect that higher values of α, or steeper profiles, will result in more efficient mass growth. In -Figure 1, larger values of αlead to collision timescales +Figure 1, larger values of α lead to collision timescales in the GN’s inner region, inwards of 0 .25 pc, that are much smaller that the 10 Gyr simulation time. Figure 3 confirms this expectation. It depicts the mass growth of @@ -475,12 +494,13 @@ and Extreme Mass Ratio Inspiral Candidates Towards the SMBH, efficient collisions can create BHs massive enough to merge with the SMBH through GWs. Following the method detailed in Section 2.6, when a -given BH meets the criterion tGW< telapsed , we mark -4For comparison, we also incrementally changed the semimajor +given BH meets the criterion tGW < telapsed, we mark +4 For comparison, we also incrementally changed the semimajor axis and eccentricity from GW emission following the equations in Peters & Mathews (1963b). This method leads to a slight increase in the final IMBH masses because it accounts for the -collisions that take place while the orbit is gradually shrinking.it as merged with the SMBH. We assume that at this +collisions that take place while the orbit is gradually shrinking. +it as merged with the SMBH. We assume that at this point the dynamics of the BH will be determined by GW emission, shrinking and circularizing the BHs orbit until it undergoes an extreme or intermediate mass ratio @@ -496,11 +516,12 @@ A BH orbiting the SMBH experiences weak gravitational relaxation time, these interactions alter its orbit about the SMBH. The two-body relaxation timescale for a single-mass system is: -trelax= 0.34σ3 -G2ρ⟨M∗⟩ln Λrlx, (10) -where ln Λ rlxis the Coulomb logarithm and ⟨M∗⟩is the +trelax = 0.34 σ3 +G2ρ⟨M∗⟩ln Λrlx +, (10) +where ln Λrlx is the Coulomb logarithm and ⟨M∗⟩is the average mass of the surrounding objects, here assumed -to be 1M⊙(Spitzer 1987; Binney & Tremaine 2008, +to be 1 M⊙ (Spitzer 1987; Binney & Tremaine 2008, Eq. (7.106)). This equation represents the approximate timescale for a BH on a semi-circular orbit to change its orbital energy and angular momentum by order of @@ -518,7 +539,7 @@ Alexander 2005; Aharon & Perets 2016; Stone & Metzger 2016; Amaro-Seoane 2018; Sari & Fragione 2019; Naoz et al. 2022). The relaxation process is therefore crucial to our study. In Figure 1, we plot the relaxation -timescale in gold for a range of α. We note that the Bahcall +timescale in gold for a range ofα. We note that the Bahcall & Wolf (1976) profile, α= 7/4, corresponds to zero net flux and therefore does not preferentially migrate objects inward. @@ -528,7 +549,7 @@ to segregate inwards in the GN (e.g., Shapiro & Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; Miralda-Escud´ e & Gould 2000; Baumgardt et al. 2004). They sink toward the SMBH on the mass segregation -timescale,tseg≈⟨M∗⟩/mBH×trelax (e.g., Spitzer 1987; +timescale, tseg ≈⟨M∗⟩/mBH ×trelax (e.g., Spitzer 1987; Fregeau et al. 2002; Merritt 2006), which is typically an order of magnitude smaller than the relaxation timescale plotted in Figure 1. @@ -536,11 +557,13 @@ We incorporate relaxation processes by introducing a small change in the BH’s energy and angular momentum each time it orbits the SMBH. We apply a small instantaneous velocity kick to the BH, denoted as ∆ v. -We draw ∆ vfrom a Guassian distribution with average -of zero and a standard deviation of ∆ vrlx/√ +We draw ∆v from a Guassian distribution with average +of zero and a standard deviation of ∆ vrlx/ +√ 3, where -∆vrlx=v•√ -P•/trlx(see Bradnick et al. 2017, for an +∆vrlx = v• +√ +P•/trlx (see Bradnick et al. 2017, for an approach to changes in the angular momentum). The new orbital parameters can be calculated following Lu & Naoz (2019), and see Naoz et al. (2022) for the full @@ -558,14 +581,15 @@ scattering for both black holes and stars. Within this radius, BH self-interaction dominates over two-body scatterings with the now rarer main-sequence stars. The BHs will then settle onto a Bahcall-Wolf profile, while -the stars may follow a shallower profile, with approximatelyn⋆∝r−1.5, - inwards of the transition radius +the stars may follow a shallower profile, with approximately + n⋆ ∝ r−1.5, inwards of the transition radius (Linial & Sari in prep.). Therefore, after the initial mass segregation, we allow the BHs to begin diffusing over a relaxation timescale, their orbital parameters changing slowly through a random process. In this random process, some of the BHs -may migrate closer to the SMBH. We terminate massgrowth when the BH enters the inner 200 au of the GN, +may migrate closer to the SMBH. We terminate mass +growth when the BH enters the inner 200 au of the GN, within which the density of stars is uncertain. This cutoff is based on the 120 au pericenter of S0-2, the closest known star to the SMBH (e.g., Ghez et al. 2005). @@ -598,21 +622,21 @@ BHs grow, but the maximum mass is lower compared to the scenario that ignores two-body relaxation. The histogram in Figure 4 presents the final BH mass distributions for different power law indices α. As expected, -the two-body relaxation suppresses the αdependence +the two-body relaxation suppresses the α dependence highlighted in Figure 3. In fact, using a KS test, we find that we cannot reject the hypothesis that the two distributions were drawn from the same sample for the -α= 1.75 andα= 2 results. Interestingly, a BH mass -IMF with an average of 10 M ⊙leads to a final distribution - with an average of ∼200 M ⊙and a median of -∼45 M ⊙, which lies within the mass gap. -3.DISCUSSION AND PREDICTIONS +α = 1.75 and α = 2 results. Interestingly, a BH mass +IMF with an average of 10 M ⊙ leads to a final distribution + with an average of ∼200 M⊙ and a median of +∼45 M⊙, which lies within the mass gap. +3. DISCUSSION AND PREDICTIONS We explore the feasibility of forming IMBHs in a GN through successive collisions between a stellar-mass BH and main-sequence stars. Taking both a statistical and analytic approach, we show that this channel can produce IMBHs efficiently with masses as high as -103−4M⊙and may result in many IMBH-SMBH mergers +103−4 M⊙ and may result in many IMBH-SMBH mergers (intermediate-mass ratio inspirals, or IMRIs) and EMRIs. Rose et al. @@ -634,7 +658,7 @@ limit, the BH captures a “tunnel” of material through the star, estimated using Bondi-Hoyle-Lyttleton accretion. In our statistical analysis, we account for BondiHoyle-Lyttleton accretion and find that BHs outside of -10−2pc from the SMBH can capture the entire star (see +10−2 pc from the SMBH can capture the entire star (see Figure 2). The efficiency of collisions, and therefore IMBH, EMRI, and IMRI formation as well, are sensitive to @@ -647,7 +671,7 @@ However, the inclusion of relaxation processes in the simulations dampens the influence of the stellar density profile by allowing BHs to diffuse into regions of more or less efficient growth. As a result, more BHs grow in -mass, but their maximum mass is smaller ( ∼104M⊙). +mass, but their maximum mass is smaller ( ∼104 M⊙). Additionally, the final masses have no apparent dependence on distance from the SMBH (see Figure 4). Most simulations in our study assume that the BHs @@ -658,15 +682,16 @@ the accretion is a highly uncertain process and represents 1999; Park & Ostriker 2001; Narayan et al. 2003; Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang et al. 2014; McKinney et al. 2014; -Narayan et al. 2022). To assess the limits of our model,we also consider a physically motivated accretion model, +Narayan et al. 2022). To assess the limits of our model, +we also consider a physically motivated accretion model, momentum-driven winds (Section 2.5). We present the final mass distribution for momentum-driven winds in Figure 4. Importantly, we find that BHs within the mass gap still form naturally despite the substantially reduced accretion. About 5% of the BHs grow by 10 -to 100 M ⊙. Furthermore, if we increase this ∆ Mestimate - by a factor of 2 (i.e., use η= 0.05), the simulation - produces a 3 .5×103M⊙IMBH for the same initial +to 100 M ⊙. Furthermore, if we increase this ∆ M estimate + by a factor of 2 (i.e., use η = 0.05), the simulation + produces a 3.5 ×103 M⊙ IMBH for the same initial conditions. Our proof-of-concept demonstrates that collisions between BH and stars are an important process that should be taken into account in dense places such @@ -706,19 +731,19 @@ Center as close as 0.04 pc from the SMBH (e.g., Levin & Beloborodov 2003; Paumard et al. 2006), and star formation episodes can occur as often as every ∼5 Myr (e.g. Lu et al. 2009). Therefore, we expect that after -the first Gyr, stars within ≲0.01 pc will be replenished +the first Gyr, stars within ≲ 0.01 pc will be replenished at intervals consistent with the star formation episodes; the infalling populations of stars are separated by ∼ 5−10 Myr, which is shorter than the collision timescale. However, star-star collisions may complicate this picture - within∼0.01 pc. As discussed above, regular star + within ∼0.01 pc. As discussed above, regular star formation ensures the BHs always have a stellar population - to interact with outside of ∼0.01 pc.5At 0.01 pc, + to interact with outside of ∼0.01 pc.5 At 0.01 pc, however, the kinetic energy during a collision between -two 1 M ⊙stars is larger than their binding energies. +two 1 M ⊙ stars is larger than their binding energies. Collisions can therefore thin out the stellar populations during the time it takes them to diffuse to these small -radii,≲0.01 pc, and may reduce the BH growth in the +radii, ≲ 0.01 pc, and may reduce the BH growth in the innermost region. We reserve the inclusion of star-star collisions for future work. We also note that the disruption of binary stars by the SMBH may help replenish @@ -740,27 +765,28 @@ subsequent BH growth may occur in bursts, coinciding with replenishment of the stars. While there are many competing dynamical processes that shape the stellar density profile, we stress that α -5In fact, the star-star collision timescale is greater than 10 Myr +5 In fact, the star-star collision timescale is greater than 10 Myr for the entire parameter space, save at 0 .001 pc for larger values -ofα; the BH-star collision timescale plotted in Fig. 1 is the same -order of magnitude as the star-star collision timescale.can simply be chosen to encapsulate all of the relevant -physics. A value for αthat is constrained by observations +of α; the BH-star collision timescale plotted in Fig. 1 is the same +order of magnitude as the star-star collision timescale. +can simply be chosen to encapsulate all of the relevant +physics. A value for α that is constrained by observations must already reflect ongoing processes like starstar collisions and replenishment. Sch¨ odel et al. (2018) find the observed stellar mass enclosed within 0.01 pc of the Milky Way’s Galactic Center to be approximately -180 M ⊙. This estimate is consistent to order of magnitude +180 M⊙. This estimate is consistent to order of magnitude with our α= 1.25 case. In a simulation like those depicted in Figure 4, which include relaxation, α= 1.25 leads to a maximum IMBH mass of 140 M ⊙. Furthermore, while the stellar mass within 0.01 pc may be a -few hundred M ⊙, Do et al. (2019) and GRAVITY Collaboration +few hundred M⊙, Do et al. (2019) and GRAVITY Collaboration et al. (2020) set an upper limit on the mass enclosed within the orbit of S0-2 to be about a few thousand - M ⊙, or 0.1% of the central mass. This upper limit + M⊙, or 0.1% of the central mass. This upper limit can include mass that was previously in stars but is now -in BHs. In that case, the 180 M ⊙is what remains of the -stars, while BHs and IMBHs make up the ∼1000 M ⊙ +in BHs. In that case, the 180 M ⊙ is what remains of the +stars, while BHs and IMBHs make up the ∼1000 M⊙ in the innermost region. Also not included in this study, collisions between the BH and other compact objects will increase the BH @@ -800,7 +826,7 @@ stars may contribute to the x-ray emission from our galactic centre (e.g., Muno et al. 2005, 2009; Hailey et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kremer et al. (2022) for a discussion of electromagnetic signatures - from BH-star collisions)6. These interactions, + from BH-star collisions) 6. These interactions, in particular grazing collisions, may also result in tidal disruption events (e.g., Baumgardt et al. 2006b; Perets et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kremer @@ -836,10 +862,11 @@ Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, doi: 10.1088/0004-637X/780/2/148 Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4, doi: 10.1007/s41114-018-0013-8 -6The connection between the observed X-ray sources at the Galactic +6 The connection between the observed X-ray sources at the Galactic Center and tidal capture has been suggested by Generozov et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for -alternative channels.Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. +alternative channels. +Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. 2021, arXiv e-prints, arXiv:2109.12119. https://arxiv.org/abs/2109.12119 Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214, @@ -909,7 +936,8 @@ Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424, doi: 10.1111/j.1365-2966.2005.09937.x Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M. 2009, MNRAS, 393, 1016, -doi: 10.1111/j.1365-2966.2008.14254.xDall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, +doi: 10.1111/j.1365-2966.2008.14254.x +Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783 Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T. C. N. 2021a, MNRAS, 505, 2186, @@ -1008,7 +1036,8 @@ https://arxiv.org/abs/2201.12368 Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104, doi: 10.3847/1538-4357/abeb14 Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, -45, doi: 10.3847/1538-4357/abb945Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020, +45, doi: 10.3847/1538-4357/abb945 +Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020, MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276 Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33, doi: 10.1086/376675 @@ -1108,7 +1137,8 @@ Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019, Phys. Rev. D, 100, 043027, doi: 10.1103/PhysRevD.100.043027 Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904, -113, doi: 10.3847/1538-4357/abc557Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., +113, doi: 10.3847/1538-4357/abc557 +Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., & Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213. https://arxiv.org/abs/2009.01213 Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017, @@ -1167,7 +1197,8 @@ doi: 10.3847/1538-4357/836/2/244 Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965, doi: 10.1046/j.1365-8711.2002.05532.x Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129, -doi: 10.1088/0004-637X/761/2/129Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. +doi: 10.1088/0004-637X/761/2/129 +Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. 2014, Monthly Notices of the Royal Astronomical Society, 440, 1263, doi: 10.1093/mnras/stu351 Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints, diff --git a/read/results/pypdf/2201.00029.txt b/read/results/pypdf/2201.00029.txt index 78813f7..7f07ac2 100644 --- a/read/results/pypdf/2201.00029.txt +++ b/read/results/pypdf/2201.00029.txt @@ -1,21 +1,369 @@ - Exploring new techniques for analyzing variability in white dwarf KIC 8626021 Thomas Huckans, Peter Stine Department of Physics and Engineering, Bloomsburg University of Pennsylvania, 400 E 2nd St., Bloomsburg, PA 17815 - Abstract As is common with the collection of astronomical data, signals are frequently dominated by noise. However, when performing FTs of light curves, re-binning data can improve the signal-to-noise ratio (SNR) at lower frequencies. Using data collected from the Kepler space telescope, we sequentially re-binned data three times to investigate the SNR improvement of lower frequency (< 17 µHz) variability in white dwarf KIC 8626021. We found that the SNR at approximately 5.8 µHz greatly improved through this process, and we postulate that this frequency is linked to the rotation of KIC 8626021. Introduction First detected in 1862, white dwarfs long posed a mystery for early observers. When the companion to Sirius was detected, apparent contradictions concerning the mass, luminosities, and densities baffled astronomers. Lacking full understanding of atomic structures and the energy states of electrons, these early researchers believed white dwarfs too dense to exist. However, new discoveries at the turn of the 20th century explained the existence of these stars, and between the world wars white dwarfs were increasingly studied and modeled (Holberg, 2009). As stars age, those that lack the mass to become neutron stars and black holes become white dwarf stars, representing 98% of the stars in our galaxy (Winget & Kepler, 2008). They are composed of a core of carbon and oxygen ions that slowly cools over billions of years, and the light emanating from these stars is a result of thermal energy. White dwarf stars are no longer supported against the force of gravity by fusion, so the stars collapse into an electron-degenerate state where the electrons in the carbon and oxygen atoms occupy the lowest energy levels. As two electrons cannot occupy the same quantum state, Pauli repulsion keeps white dwarfs from collapsing entirely. For many years, accurate detection of light variability in white dwarfs was difficult due to a lack of adequate instruments. However, the launch of the Kepler space telescope in 2009 made capturing the light of distant stars much more efficient and effective (Basri et al., 2010). Kepler was initially developed with the intention of surveying our region of the Milky Way galaxy in order to find potentially habitable planets. The purpose of the mission was to identify key traits for such planets by determining the number of planets in habitable zones, the sizes and shapes of orbits, and the characteristics of the stars being orbited. Over the lifespan of its first mission, Kepler observed approximately 1.5 x 105 stars (Johnson, 2018), affording scientists excellent opportunities to research stellar variability. Due to the loss of a second reaction wheel in 2013, NASA developed the K2 mission, a way to prolong Kepler’s assistance to astronomy and astrophysics. Utilizing Kepler’s ability to maintain three-dimensional control, NASA proceeded to use the telescope to collect photometry data of certain sections of our galaxy, although the number of targets was significantly reduced. In addition, the K2 mission was designed to be community-oriented, with the scientific community having an influence on the fields observed and serving as the analysts of the vast amounts of data being received (Howell et al., 2014). Although Kepler was deactivated in 2018, the data used in this paper came from observations during 2010 and 2012 of white dwarf KIC 8626021 and was obtained from the Kepler Asteroseismic Science Operations Center (KASOC). - The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon previous studies, this research investigated novel techniques of analyzing variability in white dwarfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on the star, allowing for the validation of results using our methods. KIC 8626021 has an effective temperature of 29,700 K, log g = 7.890, and mass of 0.56 M☉ (Córsico, 2020). Other research has found that this white dwarf is the DBV with the highest known temperature, and its helium layer is the thinnest (Bischoff-Kim et al., 2015). Despite the long-cadence light curve being too noisy to draw many conclusions, other FTs of short-cadence data have been performed to find variability in the dwarf. Analyses at high frequencies of KIC 8626021 yielded pulsations with frequencies of 4309.89 µHz, 5073.26 µHz, 3681.87 µHz, 3294.22 µHz and 2658.85 µHz (Østensen et al., 2011). These findings confirm the classification of the white dwarf as a V777 Herculis, although our research focuses on low frequencies using long-cadence data. Methods All data were downloaded from the KASOC database, and the long-cadence (data sampled approximately every thirty minutes) measurements of Corrected Flux (ppm) were analyzed. All computations were made in Wolfram Mathematica and Microsoft Excel, and FTs were performed in Mathematica. The re-binning process consisted of summing adjacent light curve data points in each quarter, therefore doubling the sampling interval from 0.5 hour to one hour, and then repeating this process on the data sample for a total of three times. In addition, a significant detection was defined as being 3𝝈 above the mean of the relative flux, and 0 on the graphs below represents this 3𝝈 cutoff. (Koch, D. G., 2010), (Wolfram Research, Inc., 2021). To find the SNR, we converted to decibels. Using these SNRs, we were able to easily identify improvement in signal strength. Results Figure 1 presents the lightcurves constructed for quarters seven (Q7) and thirteen (Q13), with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs of the first iteration and three successive re-bins for Q7, while Figure 3 presents the FTs of the same for Q13. Tables 1 and 2 both show the hypothesized frequency corresponding to the rotation of KIC 8626021 that is found in the FTs of the first iteration and subsequent re-bins for Q7 and Q13. Tables 3 and 4 show all data values < 17 µHz found in the first iterations and re-bins of Q7 and Q13. - FIG. 1: Pictured top is the light curve constructed for Q7, below is the light curve for Q13. Q7 lasted from September 24 – December 13, 2010, and Q13 was from March 29 – June 23, 2012. Both graphs were constructed by plotting corrected flux magnitude (flux corrected for instrumental artifacts) versus time in Excel, and gaps in the data were filled in by interpolating between points. Q7 had forty-three interpolated points, and Q13 had sixty-six. - FIG. 2: The graphs show the initial FTs of Q7, and then the FTs of the three successive re-bins of the light curve data. The significant frequencies of 5.886 µHz and 5.889 µHz are circled. The disappearance of the frequency in the last FT is most likely a byproduct of the method, and the spurious frequency of 5.464 µHz in the last FT most probably represents an artifact of the re-binning process. +Exploring new techniques for analyzing variability in white dwarf KIC 8626021 +Thomas Huckans, Peter Stine +Department of Physics and Engineering, Bloomsburg University of Pennsylvania, 400 E 2nd St., +Bloomsburg, PA 17815 - FIG. 3: The graphs show the initial FT of Q13, and then the FTs of the three successive re-bins of the light curve data. The significant frequencies of 5.784 µHz and 5.787 µHz are circled. In addition, in the third re-bin, the frequencies 11.641 µHz and 16.823 µHz rise above 3𝝈 and are nearly perfect integer multiples of 5.787 µHz. These harmonics are potentially indications of a starspot (Santos et al., 2017). - Q7 Significant Data Points Light Variability Frequency (µHz) Corrected Flux Magnitude (ppm) Period (days) Signal-to-Noise (dB) Q7 First Iteration 5.886 -1.198 1.966 9.9 Q7 Re-bin 1 5.886 -1.477 1.966 12.8 Q7 Re-bin 2 5.889 0.597 1.965 19.2 TABLE I: The table displays the various frequencies collected from Q7 and the information found through calculations to find period and SNR. The frequency of 5.464 µHz is not included, and therefore was not used in any calculations determining the average period of rotation. The values under corrected flux magnitude are relative to our significant frequency cutoff of 3𝝈, thus negative numbers are under the cutoff. Q13 Significant Data Points Light Variability Frequency (µHz) Corrected Flux Magnitude (ppm) Period (days) Signal-to-Noise (dB) Q13 First Iteration 5.784 1.555 2.001 15.6 Q13 Re-bin 1 5.784 2.873 2.001 17.7 Q13 Re-bin 2 5.787 4.938 2.000 22.6 Q13 Re-bin 3 5.787 6.909 2.000 26.3 Q13 Re-bin 3 11.641 7.073 0.994 26.4 Q13 Re-bin 3 16.823 2.299 0.688 24.1 TABLE II: The table displays the various frequencies collected from Q13 and the information found through calculations to find period and SNR. The last two significant frequencies (11.641 µHz and 16.823 µHz) for Q13 Re-bin 3 represent potential harmonics, which are discussed in further detail in the Conclusions section of this paper. The values under corrected flux magnitude are relative to our significant frequency cutoff of 3𝝈, thus negative numbers are under the cutoff. - First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) 0.933 0.933 0.215 0.216 1.148 1.148 0.575 0.575 1.364 1.364 0.934 0.935 1.507 1.507 1.005 1.006 12.561 12.561 1.149 1.150 16.581 16.581 1.221 1.222 1.364 1.366 1.508 1.509 1.580 1.582 1.724 1.725 1.795 1.797 5.889 2.085 6.822 5.392 9.192 5.464 9.479 7.476 11.203 9.489 12.568 11.215 14.291 12.581 16.230 13.084 16.589 13.443 13.659 14.018 14.809 15.097 16.031 16.463 16.894 TABLE III: The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm) above the cutoff of 3𝝈. The minor shifting of significant frequencies between re-bins is a by-product of the method, and we calculated for such errors when finding our average. - First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) 3.094 2.018 2.019 1.951 5.784 3.094 3.095 2.019 9.080 5.784 5.787 2.442 13.519 7.667 7.671 2.759 15.671 9.080 9.084 3.095 16.209 11.165 11.641 3.634 16.411 13.519 13.526 4.374 15.469 15.477 4.778 15.671 15.679 4.912 16.209 15.881 5.047 16.411 16.419 5.787 8.479 9.084 10.565 11.641 13.526 15.544 15.881 16.823 TABLE IV: The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm) above the cutoff of 3𝝈. The minor shifting of significant frequencies between re-bins is a by-product of the method, and we calculated for such errors when finding our average. Conclusions As our research used the long-cadence data from Kepler, much of the high-frequency variability due to gravitational wave pulsations is lost. However, this presents an opportunity to verify our results with the work of research groups that analyzed short-cadence data.With the data analyzed, the lower frequencies between 5-6 µHz emerged. After finding the average of the periods and accounting for a 1𝝈 margin of error, our research hypothesizes that the rotation period of KIC 8626021 is 1.99 ± 0.02 days. Other short-cadence research has found the rotation period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff-Kim et al., 2015). Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011), and these periods indicate that the more precise significant period identified through our re-binning relates to the rotation of the white dwarf. Through the re-binning process, the SNR clearly improves for both quarters, and for Q7 it improves by approximately 1.3 dB, except for the last data re-bin. In the last re-bin, the previous - significant frequency disappears, which becomes increasingly likely after successive re-binning processes. The frequency 5.464 µHz rises as another significant frequency; however, we believe that this new frequency is simply an artifact of the re-binning process. In Q13, we saw SNR improvement ranging from 1.1 dB to 1.3 dB. Through the re-binning process, more lines, or significant frequencies, appeared above the 3𝝈 cutoff, particularly at lower frequencies. These findings suggest that as an alternative to short-cadence analysis, the re-binning process of long-cadence data can be used to identify significant lower frequencies in white dwarfs. The methods we used are also simple and replicable, which allows even those with less experience to quickly analyze the large amounts of data being collected by orbiting telescopes, such as the currently active TESS (Transiting Exoplanet Survey Satellite) telescope. The presence of possible harmonics in the third re-bin of Q13 also indicates the possible presence of a previously unseen starspot in KIC 8626021 caused by magnetic activity. These spots are darker, cooler, and modulate stellar light curves, and with confirmation of its existence, the harmonic frequencies can be used to calculate the spot’s rotation rate, size, latitude, and contrast (Santos et al., 2017). Using the process of re-binning, a starspot signal, previously dominated by noise, may have been discovered. - Acknowledgments We wish to thank Bloomsburg University of Pennsylvania for its continued support of our research. This paper includes data collected by the Kepler mission and obtained from the MAST data archive at the Space Telescope Science Institute (STScI). Funding for the Kepler mission is provided by the NASA Science Mission Directorate. STScI is operated by the Association of Universities for Research in Astronomy, Inc., under NASA contract NAS 5–26555. References Basri, G., Walkowicz, L. M., Batalha, N., Gilliland, R. L., Jenkins, J., Borucki, W. J., Koch, D., Caldwell, D., Dupree, A. K., Latham, D. W., Meibom, S., Howell, S., & Brown, T. (2010). PHOTOMETRIC V ARIABILITY IN KEPLER TARGET stars: THE SUN AMONG stars—a FIRST LOOK. The Astrophysical Journal, 713(2), L155-L159. https://doi.org/10.1088/2041-8205/713/2/L155 Bischoff-Kim, A., Østensen, R. H., Hermes, J.j., & Provencal, J. L. (2015). Seven-Period asteroseismic fit of KIC 8626021. EPJ Web of Conferences, 101, 06009. https://doi.org/10.1051/epjconf/201510106009 Córsico, A. H. (2020). White-Dwarf asteroseismology with the kepler space telescope. Frontiers in Astronomy and Space Sciences, 7. https://doi.org/10.3389/fspas.2020.00047 Holberg, J. B. (2009). The discovery of the existence of white dwarf stars: 1862 to 1930. Journal for the History of Astronomy, 40(2), 137-154. https://doi.org/10.1177%2F002182860904000201 Howell, S. B., Sobeck, C., Haas, M., Still, M., Barclay, T., Mullally, F., Troeltzsch, J., Aigrain, S., Bryson, S. T., Caldwell, D., Chaplin, W. J., Cochran, W. D., Huber, D., Marcy, G. W., Miglio, A., Najita, J. R., Smith, M., Twicken, J. D., & Fortney, J. J. (2014). The k2 mission: Characterization and early results. Publications of the Astronomical Society of the Pacific, 126(938), 398-408. https://doi.org/10.1086/676406 Johnson, M. (Ed.). (2018, October 30). Mission overview. National Aeronautics and Space Administration. Retrieved September 2, 2021, from https://www.nasa.gov/mission_pages/kepler/overview/index.html Koch, D. G., Borucki, W. J., Basri, G., Batalha, N. M., Brown, T. M., Caldwell, D., Christensen-dalsgaard, J., Cochran, W. D., Devore, E., Dunham, E. W., Gautier, T. N., Geary, J. C., Gilliland, R. L., Gould, A., Jenkins, J., Kondo, Y., Latham, D. W., Lissauer, J. J., Marcy, G., . . . Morrison, D. (2010). KEPLER MISSION design, REALIZED PHOTOMETRIC performance, AND EARLY SCIENCE. The Astrophysical Journal, 713(2), L79-L86. https://dx.doi.org/10.1088/2041-8205/713/2/L79 Østensen, R. H., Bloemen, S., Vučković, M., Aerts, C., Oreiro, R., Kinemuchi, K., Still, M., & Koester, D. (2011). AT last—a v777 HER PULSATOR IN THE KEPLER FIELD. The Astrophysical Journal, 736(2), L39. https://doi.org/10.1088/2041-8205/736/2/L39 Santos, A. R. G., Cunha, M. S., Avelino, P. P., García, R. A., & Mathur, S. (2017). Starspot signature on the light curve. Astronomy & Astrophysics, 599, A1. https://doi.org/10.1051/0004-6361/201629923 - Winget, D.e., & Kepler, S.o. (2008). Pulsating white dwarf stars and precision asteroseismology. Annual Review of Astronomy and Astrophyics, 46(1), 157-199. https://doi.org/10.1146/annurev.astro.46.060407.145250 Wolfram Research, Inc., Mathematica, Version 12.3.1, Champaign, IL (2021). \ No newline at end of file +Abstract + +As is common with the collection of astronomical data, signals are frequently dominated +by noise. However, when performing FTs of light curves, re-binning data can improve the signalto-noise + ratio (SNR) at lower frequencies. Using data collected from the K epler space telescope, +we sequentially re-binned data three times to investigate the SNR improvement of lower frequency +(< 17 µHz) variability in white dwarf KIC 8626021. We found that the SNR at approximately 5.8 +µHz greatly improved through this process, and we postulate that this frequen cy is linked to the +rotation of KIC 8626021. + + +Introduction + +First detected in 1862, white dwarfs long posed a mystery for early observ ers. When the +companion to Sirius was detected, apparent contradictions concerning the mass, luminosities, and +densities baffled astronomers. Lacking full understanding of atom ic structures and the energy +states of electrons, these early researchers believed white dwarfs too dense to exist. However, new +discoveries at the turn of the 20th century explained the existence of these stars, and between the +world wars white dwarfs were increasingly studied and modeled (Holberg, 2009). +As stars age, those that lack the mass to become neutron stars and black holes become +white dwarf stars, representing 98% of the stars in our galaxy (Winget & Kepler, 2008). They are +composed of a core o f carbon and oxygen ions that slowly cools over billions of years, and the +light emanating from these stars is a result of thermal energy. White dwarf stars are no longer +supported against the force of gravity by fusion, so the stars collapse into an elect ron-degenerate +state where the electrons in the carbon and oxygen atoms occupy the lowest energy levels. As two +electrons cannot occupy the same quantum state, Pauli repulsion keeps white dwarfs from +collapsing entirely. +For many years, accurate detection of light variability in white dwarfs was difficult due to +a lack of adequate instruments. However, the launch of the Kepler space telescope in 2009 made +capturing the light of distant stars much more efficient and effective (Basri et al., 2010 ). Kepler +was initially developed with the intention of surveying our region of the Milky Way galaxy in +order to find potentially habitable planets. The purpose of the mission was to identify key traits for +such planets by determining the number of planets in habitable zones, the sizes and shapes of orbits, +and the characteristics of the stars being orbited. Over the lifespan of its first mission, Kepler +observed approximately 1.5 x 10 5 stars (Johnson, 2018) , affording scientists excellent +opportunities to research stel lar variability. Due to the loss of a second reaction wheel in 2013, +NASA developed the K2 mission, a way to prolong Kepler’s assistance to astronomy and +astrophysics. +Utilizing Kepler’s ability to maintain three -dimensional control, NASA proceeded to use +the telescope to collect photometry data of certain sections of our galaxy, although the number of +targets was significantly reduced. In addition, the K2 mission was designed to be community oriented, + with the scientific community having an influence on the fields observed and serving as +the analysts of the vast amounts of data being received (Howell et al., 2014). Although Kepler was +deactivated in 2018, the data used in this paper came from observations during 2010 and 2012 of +white dwarf KIC 8626021 and was obtained from the Kepler Asteroseismic Science Operations +Center (KASOC). + +The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon +previous studies, this research investigated novel techniques of analyzing variability in white +dwarfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on +the star, allowing for the validation of results using our methods. KIC 8626021 has an effective +temperature of 29,700 K, log g = 7.890, and mass of 0.56 M☉ (Córsico, 2020). Other research +has found that this white dwarf is the DBV with the highest known temperature, and its helium +layer is the thinnest (Bischoff-Kim et al., 2015). Despite the long-cadence light curve being too +noisy to draw many conclusions, other FTs of short-cadence data have been performed to find +variability in the dwarf. Analyses at high frequencies of KIC 8626021 yielded pulsations with +frequencies of 4309.89 µHz, 5073.26 µHz, 3681.87 µHz, 3294.22 µHz and 2658.85 µHz +(Østensen et al., 2011). These findings confirm the classification of the white dwarf as a V777 +Herculis, although our research focuses on low frequencies using long-cadence data. + + + +Methods + +All data were downloaded from the KASOC database, and the long-cadence (data +sampled approximately every thirty minutes) measurements of Corrected Flux (ppm) were +analyzed. All computations were made in Wolfram Mathematica and Microsoft Excel, and FTs +were performed in Mathematica. The re-binning process consisted of summing adjacent light +curve data points in each quarter, therefore doubling the sampling interval from 0.5 hour to one +hour, and then repeating this process on the data sample for a total of three times. In addition, a +significant detection was defined as being 3𝝈 above the mean of the relative flux, and 0 on the +graphs below represents this 3𝝈 cutoff. (Koch, D. G., 2010), (Wolfram Research, Inc., 2021). To +find the SNR, we converted to decibels. Using these SNRs, we were able to easily identify +improvement in signal strength. + + +Results + + Figure 1 presents the lightcurves constructed for quarters seven (Q7) and thirteen (Q13), +with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs +of the first iteration and three successive re-bins for Q7, while Figure 3 presents the FTs of the +same for Q13. + Tables 1 and 2 both show the hypothesized frequency corresponding to the rotation of +KIC 8626021 that is found in the FTs of the first iteration and subsequent re-bins for Q7 and +Q13. Tables 3 and 4 show all data values < 17 µHz found in the first iterations and re-bins of Q7 +and Q13. + + + + + + + + + + +FIG. 1: Pictured top is the light curve constructed for Q7, below is the light curve for Q13. Q7 +lasted from September 24 – December 13, 2010, and Q13 was from March 29 – June 23, 2012. +Both graphs were constructed by plotting corrected flux magnitude (flux corrected for +instrumental artifacts) versus time in Excel, and gaps in the data were filled in by interpolating +between points. Q7 had forty-three interpolated points, and Q13 had sixty-six. + + + + + + + + + + + + + + +FIG. 2: The graphs show the initial FTs of Q7, and then the FTs of the three successive re-bins of +the light curve data. The significant fr equencies of 5.886 µHz and 5.889 µHz are circled. The +disappearance of the frequency in the last FT is most likely a b yproduct of the method, and the +spurious frequency of 5.464 µHz in the last FT most probably represents an artifact of the re binning + process. + + + + + + + + + + + + + + +FIG. 3: The graphs show the initial FT of Q13, and then the FTs of the three successive re-bins +of the light curve data. The significant frequencies of 5.784 µHz and 5.787 µHz are circled. In +addition, in the third re-bin, the frequencies 11.641 µHz and 16.823 µHz rise above 3𝝈 and are +nearly perfect integer multiples of 5.787 µHz. These harmonics are potentially indications of a +starspot (Santos et al., 2017). + + + + + + + + + + + + + + + +Q7 Significant +Data Points +Light +Variability +Frequency +(µHz) +Corrected Flux +Magnitude +(ppm) +Period (days) Signal-to-Noise +(dB) +Q7 First +Iteration +5.886 -1.198 1.966 9.9 +Q7 Re-bin 1 5.886 -1.477 1.966 12.8 +Q7 Re-bin 2 5.889 0.597 1.965 19.2 +TABLE I: The table displays the various frequencies collected from Q7 and the information +found through calculations to find period and SNR. The frequency of 5.464 µHz is not included, +and therefore was not used in any calculations determining the average period of rotation. The +values under corrected flux magnitude are relative to our significant frequency cutoff of 3𝝈, thus +negative numbers are under the cutoff. + + + +Q13 Significant +Data Points +Light +Variability +Frequency +(µHz) +Corrected Flux +Magnitude +(ppm) +Period (days) Signal-to-Noise +(dB) +Q13 First +Iteration +5.784 1.555 2.001 15.6 +Q13 Re-bin 1 5.784 2.873 2.001 17.7 +Q13 Re-bin 2 5.787 4.938 2.000 22.6 +Q13 Re-bin 3 5.787 6.909 2.000 26.3 +Q13 Re-bin 3 11.641 7.073 0.994 26.4 +Q13 Re-bin 3 16.823 2.299 0.688 24.1 +TABLE II: The table displays the various frequencies collected from Q13 and the information +found through calculations to find period and SNR. The last two significant frequencies (11.641 +µHz and 16.823 µHz) for Q13 Re-bin 3 represent potential harmonics, which are discussed in +further detail in the Conclusions section of this paper. The values under corrected flux magnitude +are relative to our significant frequency cutoff of 3𝝈, thus negative numbers are under the cutoff. + + + + + + + + + + +First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) +0.933 0.933 0.215 0.216 +1.148 1.148 0.575 0.575 +1.364 1.364 0.934 0.935 +1.507 1.507 1.005 1.006 +12.561 12.561 1.149 1.150 +16.581 16.581 1.221 1.222 + 1.364 1.366 + 1.508 1.509 + 1.580 1.582 + 1.724 1.725 + 1.795 1.797 + 5.889 2.085 + 6.822 5.392 + 9.192 5.464 + 9.479 7.476 + 11.203 9.489 + 12.568 11.215 + 14.291 12.581 + 16.230 13.084 + 16.589 13.443 + 13.659 + 14.018 + 14.809 + 15.097 + 16.031 + 16.463 + 16.894 +TABLE III: The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm) +above the cutoff of 3𝝈. The minor shifting of significant frequencies between re-bins is a byproduct + of the method, and we calculated for such errors when finding our average. + + + + + +First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) +3.094 2.018 2.019 1.951 +5.784 3.094 3.095 2.019 +9.080 5.784 5.787 2.442 +13.519 7.667 7.671 2.759 +15.671 9.080 9.084 3.095 +16.209 11.165 11.641 3.634 +16.411 13.519 13.526 4.374 + 15.469 15.477 4.778 + 15.671 15.679 4.912 + 16.209 15.881 5.047 + 16.411 16.419 5.787 + 8.479 + 9.084 + 10.565 + 11.641 + 13.526 + 15.544 + 15.881 + 16.823 +TABLE IV: The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm) +above the cutoff of 3𝝈. The minor shifting of significant frequencies between re-bins is a byproduct + of the method, and we calculated for such errors when finding our average. + + +Conclusions + +As our research used the long-cadence data from Kepler, much of the high-frequency +variability due to gravitational wave pulsations is lost. However, this presents an opportunity to +verify our results with the work of research groups that analyzed short-cadence data.With the +data analyzed, the lower frequencies between 5-6 µHz emerged. After finding the average of the +periods and accounting for a 1𝝈 margin of error, our research hypothesizes that the rotation +period of KIC 8626021 is 1.99 ± 0.02 days. Other short-cadence research has found the rotation +period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff-Kim et +al., 2015). Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011), and +these periods indicate that the more precise significant period identified through our re-binning +relates to the rotation of the white dwarf. +Through the re-binning process, the SNR clearly improves for both quarters, and for Q7 it +improves by approximately 1.3 dB, except for the last data re-bin. In the last re-bin, the previous + +significant frequency disappears, which becomes increasingly likely after successive re-binning +processes. The frequency 5.464 µHz rises as another significant frequency; however, we believe +that this new frequency is simply an artifact of the re-binning process. In Q13, we saw SNR +improvement ranging from 1.1 dB to 1.3 dB. +Through the re-binning process, more lines, or significant frequencies, appeared above +the 3𝝈 cutoff, particularly at lower frequencies. These findings suggest that as an alternative to +short-cadence analysis, the re-binning process of long-cadence data can be used to identify +significant lower frequencies in white dwarfs. The methods we used are also simple and +replicable, which allows even those with less experience to quickly analyze the large amounts of +data being collected by orbiting telescopes, such as the currently active TESS (Transiting +Exoplanet Survey Satellite) telescope. +The presence of possible harmonics in the third re-bin of Q13 also indicates the possible +presence of a previously unseen starspot in KIC 8626021 caused by magnetic activity. These +spots are darker, cooler, and modulate stellar light curves, and with confirmation of its existence, +the harmonic frequencies can be used to calculate the spot’s rotation rate, size, latitude, and +contrast (Santos et al., 2017). Using the process of re-binning, a starspot signal, previously +dominated by noise, may have been discovered. + +Acknowledgments + +We wish to thank Bloomsburg University of Pennsylvania for its continued support of our +research. +This paper includes data collected by the Kepler mission and obtained from the MAST +data archive at the Space Telescope Science Institute (STScI). Funding for the Kepler mission is +provided by the NASA Science Mission Directorate. STScI is operated by the Association of +Universities for Research in Astronomy, Inc., under NASA contract NAS 5–26555. + + +References + + Basri, G., Walkowicz, L. M., Batalha, N., Gilliland, R. L., Jenkins, J., Borucki, W. J., Koch, D., +Caldwell, D., Dupree, A. K., Latham, D. W., Meibom, S., Howell, S., & Brown, T. (2010). +PHOTOMETRIC V ARIABILITY IN KEPLER TARGET stars: THE SUN AMONG +stars—a FIRST LOOK. The Astr ophysical Journal, 713(2), L155 -L159. +https://doi.org/10.1088/2041-8205/713/2/L155 +Bischoff-Kim, A., Øs tensen, R. H., Hermes, J.j., & Provencal, J. L. (2015). Seven -Period +asteroseismic fit of KI C 8626021. EPJ Web of Conferences, 101, 06009. +https://doi.org/10.1051/epjconf/201510106009 +Córsico, A. H. (2020). White-Dwarf asteroseismology with the kepler space telescope. Frontiers +in Astronomy and Space Sciences, 7. https://doi.org/10.3389/fspas.2020.00047 +Holberg, J. B. (2009). The discovery of the existence of white dwarf stars: 1862 to 1930. Journal +for the History of Astrono my, 40(2), 137 -154. +https://doi.org/10.1177%2F002182860904000201 +Howell, S. B., Sobeck, C., Haas, M., Still, M., Barclay, T., Mullally, F., Troeltzsch, J., Aigrain, S., +Bryson, S. T., Caldwell, D., Chaplin, W. J., Cochran, W. D., Huber, D., Marcy, G. W., +Miglio, A., Najita, J. R., Smith, M., Twicken, J. D., & Fortney, J. J. (2014). The k2 mission: +Characterization and early results. Publications of the Astronomical Society of the Pacific, +126(938), 398-408. https://doi.org/10.1086/676406 +Johnson, M. (Ed.). (2018, October 30). Mission overview. National Aeronautics and Space +Administration. Retrieved September 2, 2021, from +https://www.nasa.gov/mission_pages/kepler/overview/index.html +Koch, D. G., Borucki, W. J., Basri, G., Batalha, N. M., Brown, T. M., Caldwell, D., Christensendalsgaard, + J., Cochran, W. D., Devore, E., Dunham, E. W., Gautier, T. N., Geary, J. C., +Gilliland, R. L., Gould, A., Jenkins, J., Kondo, Y., Latham, D. W., Lissauer, J. J., Marcy, +G., . . . Morrison, D. (2010). KEPLER MISSION design, REALIZED PHOTOMETRIC +performance, AND EARLY SCIENCE. The Astrophysical Journal, 713(2), L79-L86. +https://dx.doi.org/10.1088/2041-8205/713/2/L79 +Østensen, R. H., Bloemen, S., Vučković, M., Aerts, C., Oreiro, R., Kinemuchi, K., Still, M., & +Koester, D. (2011). AT last—a v777 HER PULSATOR IN THE KEPLER FIELD. The +Astrophysical Journal, 736(2), L39. https://doi.org/10.1088/2041-8205/736/2/L39 +Santos, A. R. G., Cunha, M. S., Avelino, P. P., García, R. A., & Mathur, S. (2017). Starspot +signature on the light curve. Astronomy & Astrophysics , 599, A1. +https://doi.org/10.1051/0004-6361/201629923 + +Winget, D.e., & Kepler, S.o. (2008). Pulsating white dwarf stars and precision asteroseismology. +Annual Review of Astronomy and Astrophyics, 46(1), 157-199. +https://doi.org/10.1146/annurev.astro.46.060407.145250 +Wolfram Research, Inc., Mathematica, Version 12.3.1, Champaign, IL (2021). \ No newline at end of file diff --git a/read/results/pypdf/2201.00037.txt b/read/results/pypdf/2201.00037.txt index ecb4993..96fe280 100644 --- a/read/results/pypdf/2201.00037.txt +++ b/read/results/pypdf/2201.00037.txt @@ -1,17 +1,18 @@ Confidential manuscript submitted to JGR-Planets The influence of a fluid core and a solid inner core on the Cassini sate of Mercury -Mathieu Dumberry1 +Mathieu Dumberry 1 1Department of Physics, University of Alberta, Edmonton, Alberta, Canada. Key Points: -•The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid +• The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid planet by no more than 0.01 arcmin. -•For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid +• For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid cores into a common precession motion. -•The larger the inner core is, the more the obliquity of the polar moment of inertia approaches +• The larger the inner core is, the more the obliquity of the polar moment of inertia approaches that expected for a rigid planet. Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca -–1–arXiv:2201.00037v1 [astro-ph.EP] 31 Dec 202 +–1– +arXiv:2201.00037v1 [astro-ph.EP] 31 Dec 202 Confidential manuscript submitted to JGR-Planets Abstract We present a model of the Cassini state of Mercury that comprises an inner core, a fluid core @@ -44,51 +45,66 @@ offset smaller than the present-day error in measurements. We also show that the solid inner core is, the more the planet behaves as if it were precessing as an entirely rigid body. 1 Introduction Mercury is expected to be in a Cassini state (Figure 1) whereby its orbit normal and spinsymmetry - axis are both coplanar with, and precess about, the normal to the Laplace plane [ Colombo , -1966; Peale , 1969, 2006]. The orientation of the Laplace plane varies on long timescales, but -its present-day orientation can be reconstructed from ephemerides data [ Yseboodt and Margot , + axis are both coplanar with, and precess about, the normal to the Laplace plane [ Colombo, +1966; Peale, 1969, 2006]. The orientation of the Laplace plane varies on long timescales, but +its present-day orientation can be reconstructed from ephemerides data [ Yseboodt and Margot, 2006; Baland et al. , 2017]. Likewise, the rate of precession is also not observed directly, but is reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513 -yr with an inclination angle of I= 8.5330◦between the orbit and Laplace plane normals [ Baland +yr with an inclination angle of I = 8.5330◦between the orbit and Laplace plane normals [ Baland et al. , 2017]. Measurements of the obliquity εm, defined as the angle of misalignment between the spin-symmetry axis and the orbit normal, have been obtained by different techniques, including ground based radar observations [ Margot et al. , 2007, 2012], and stereo digital terrain - images [ Stark et al. , 2015a] and radio tracking data [ Mazarico et al. , 2014; Verma and Margot, + images [Stark et al. , 2015a] and radio tracking data [ Mazarico et al. , 2014; Verma and Margot, 2016; Genova et al. , 2019; Konopliv et al. , 2020] from the MErcury Surface Space ENvironment GEochemistry and Ranging (MESSENGER) spacecraft. Within measurement errors, all techniques yield an obliquity which is coplanar with the orbit and Laplace plane normals -and consistent with a Cassini state. Furthermore, the observed obliquity angle (2 .042±0.08 +and consistent with a Cassini state. Furthermore, the observed obliquity angle (2 .042 ±0.08 –2– Confidential manuscript submitted to JGR-Planets I descending -node of orbitΩpê3I -Iê3L εmI ê3p -ascendingnode of orbit -descendingnode of equatorequatorial plane -orbitaldirection -Sê3Iê3L +node of orbit +Ωp +ê3 +I +I +ê3 +LεmI ê3 +p +ascending +node of orbit +descending +node of equator +equatorial +plane +orbital +direction +S +ê3 +Iê3 +L M -εmorbital plane +εm +orbital plane Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded rectangle) and the Cassini state of Mercury. The normal to the orbital plane ( ˆeI 3) is offset from the normal to the Laplace plane ( ˆeL -3) by an angle I= 8.5330◦. The symmetry axis of the mantle ˆep -3is offset +3 ) by an angle I = 8 .5330◦. The symmetry axis of the mantle ˆep +3 is offset from ˆeI -3byεm≈2 arcmin. ˆeI -3andˆep -3are coplanar with, and precess about, ˆeL -3in a retrograde direction -at frequency Ω p= 2π/325,513 yr−1. The blue (orange) shaded region indicates the portion of the orbit +3 by εm ≈ 2 arcmin. ˆeI +3 and ˆep +3 are coplanar with, and precess about, ˆeL +3 in a retrograde direction +at frequency Ωp = 2 π/325,513 yr−1. The blue (orange) shaded region indicates the portion of the orbit when Mercury is above (below) the Laplace plane. Angles are not drawn to scale. -arcmin [ Margot et al. , 2012], 2.029±0.085 arcmin [ Stark et al. , 2015a] and 1 .968±0.027 [ Genova +arcmin [Margot et al. , 2012], 2.029±0.085 arcmin [Stark et al. , 2015a] and 1 .968±0.027 [Genova et al. , 2019] to list a few) matches that expected if Mercury occupies Cassini state 1. The prediction of Mercury’s obliquity is based on the assumption that the whole planet precesses as a single body. However, we know that Mercury has a fluid core from two main lines of evidence. First, Mercury’s large scale magnetic field is intrinsic, and must be maintained by -dynamo action [ Anderson et al. , 2011, 2012; Johnson et al. , 2012]. This requires fluid motion +dynamo action [Anderson et al. , 2011, 2012; Johnson et al. , 2012]. This requires fluid motion in its metallic core, and hence that Mercury’s core is at least partially liquid. Second, the observed amplitude of the 88-day longitudinal libration is approximately twice as large as that expected if Mercury were librating as a rigid body [ Margot et al. , 2007, 2012; Stark et al. , 2015a]. @@ -101,7 +117,7 @@ approximate limit of 800 km on the inner core radius [ Grott et al. , 2011]. How core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history. –3– Confidential manuscript submitted to JGR-Planets -With a fluid core, and possibly a solid inner core, the observed obliquity εmreflects the +With a fluid core, and possibly a solid inner core, the observed obliquity εm reflects the orientation of the spin-symmetry axis of the precessing mantle and crust alone. Neglecting dissipation, and at equilibrium in the Cassini state, the spin axis of the fluid core and the spinsymmetry axis of the inner core should both also precess about the normal to the Laplace plane @@ -110,7 +126,7 @@ plane that defines the equilibrium Cassini state [e.g. Dumberry and Wieczorek , their obliquity angles may be different than εm. Whether the spin axis of the fluid core is brought into an alignment with the mantle obliquity depends primarily on the pressure torque (also referred to as the inertial torque) exerted by the centrifugal force of the rotating fluid core on the -misaligned elliptical shape of the core-mantle boundary (CMB) [ Poincar´ e , 1910]. The more flattened +misaligned elliptical shape of the core-mantle boundary (CMB) [ Poincar´ e, 1910]. The more flattened the CMB is, the stronger the pressure torque is, and the more the fluid core is entrained into a co-precession at a similar obliquity to that of the mantle. The flattening of Mercury’s CMB is not known. But if one assumes that the topography of the CMB coincides with an equipotential @@ -132,7 +148,7 @@ mantle. Conversely, if the pressure torque at the ICB is the largest, the inner at the ICB should also enforce a closer alignment between the rotation vectors of the inner core and fluid core. It is on the basis of the observed mantle obliquity that the polar moment of inertia of Mercury - is inferred [e.g. Peale , 1976; Margot et al. , 2018]. Inherent in this calculation is the builtin + is inferred [e.g. Peale, 1976; Margot et al. , 2018]. Inherent in this calculation is the builtin assumption that the mantle obliquity does not deviate from that of a rigid planet by a substantial amount. However, the recent study by Peale et al. [2016] suggests that the inner core can be misaligned from the mantle by a few arcmin and that a large inner core can perturb the @@ -143,7 +159,7 @@ does not coincide with the orientation of the polar moment of inertia of the who can introduce a systematic offset between different types of obliquity measurements. Those based on tracking topographic features [ Margot et al. , 2007, 2012; Stark et al. , 2015a] capture the obliquity of the mantle spin axis. While those based on the orientation of the gravity field [ Mazarico -et al. , 2014; Verma and Margot , 2016; Genova et al. , 2019; Konopliv et al. , 2020] are instead +et al., 2014; Verma and Margot , 2016; Genova et al. , 2019; Konopliv et al. , 2020] are instead tied to the orientation of the principal moment of inertia of the whole planet. An offset of the obliquity of the mantle spin axis with respect to the gravity field could be used to constrain the size of the inner core, even though this is difficult to do at present because the different estimates @@ -164,134 +180,163 @@ differ from that of an entirely rigid Mercury, and third, by how much the obliqu 2.1 The interior structure of Mercury Our model of Mercury consists of four layers of uniform density: a solid inner core, a fluid outer core, a solid mantle, and a thin crust. The outer radii of each of these layers, are denoted -byrs,rf,rm, andR, and their densities by ρs,ρf,ρm, andρc, respectively. The inner core radiusrscorresponds - to the ICB radius, the fluid core radius rfto the CMB radius, and R= +by rs, rf , rm, and R, and their densities by ρs, ρf , ρm, and ρc, respectively. The inner core radius + rs corresponds to the ICB radius, the fluid core radius rf to the CMB radius, and R= 2439.36 km to the planetary radius of Mercury. Compressibility effects from increasing pressure with depth are not negligible in the core of Mercury. However adopting uniform densities simplifies the analytical expressions of the model while still capturing the first order rotational dynamics. Uniform densities were also adopted by Peale et al. [2016] and following the same strategy facilitates comparisons between our results. -We build our interior model as detailed in Peale et al. [2016]. We first specify rs,ρs(or -a density contrast at the ICB), the crustal density ρcand crustal thickness h=R−rm. The -three unknowns rf,ρfandρmare then solved such that the interior model is consistent with -the known mass Mand chosen values of the moments of inertia of the whole planet Cand that +We build our interior model as detailed in Peale et al. [2016]. We first specify rs, ρs (or +a density contrast at the ICB), the crustal density ρc and crustal thickness h= R−rm. The +three unknowns rf , ρf and ρm are then solved such that the interior model is consistent with +the known mass M and chosen values of the moments of inertia of the whole planet C and that of the mantle and crust Cm. Each layer is triaxial in shape. We denote the polar flattening (or geometrical ellipticity) -byϵi, defined as the difference between the mean equatorial and polar radii, divided by the mean +by ϵi, defined as the difference between the mean equatorial and polar radii, divided by the mean spherical radius. Likewise, we denote the equatorial flattening by the variable ξi, defined as the difference between the maximum and minimum equatorial radii, divided by the mean spherical - radius. As above, we use the subscript i=s,f,mandr, to denote the polar or equatorial + radius. As above, we use the subscript i = s, f, m and r, to denote the polar or equatorial flattenings at the ICB, CMB, crust-mantle boundary (CrMB), and surface. The measured polar and equatorial flattenings are taken from Perry et al. [2015] and their numerical values are given in Table 1. We then assume that the ICB and CMB are both at hydrostatic equilibrium with the imposed gravitational potential induced by the flattenings at the CrMB and surface. The flattenings at all interior boundaries are specified such that they are -consistent with the observed degree 2 spherical harmonic coefficients of gravity J2andC22; their -numerical values are given in Table 1. Specifically, J2andC22are connected to the principal -moments of inertia of Mercury ( C >B >A ) and to the polar and equatorial flattenings by -J2=C−¯A -MR2=8π -151 -MR2[ -(ρs−ρf)r5 -sϵs+ (ρf−ρm)r5 -fϵf+ (ρm−ρc)r5 -mϵm+ρcR5ϵr] +consistent with the observed degree 2 spherical harmonic coefficients of gravity J2 and C22; their +numerical values are given in Table 1. Specifically, J2 and C22 are connected to the principal +moments of inertia of Mercury ( C >B >A) and to the polar and equatorial flattenings by +J2 = C−¯A +MR2 = 8π +15 +1 +MR2 +[ +(ρs −ρf )r5 +sϵs + (ρf −ρm)r5 +f ϵf + (ρm −ρc)r5 +mϵm + ρcR5ϵr +] , (1a) -C22=B−A -4MR2=8π -151 -4MR2[ -(ρs−ρf)r5 -sξs+ (ρf−ρm)r5 -fξf+ (ρm−ρc)r5 -mξm+ρcR5ξr] -.(1b) -where ¯Ais the mean equatorial moment of inertia defined below. The same procedure was used -inPeale et al. [2016] and the mathematical details are given in Equations (18-20) of Dumberry +C22 = B−A +4MR2 = 8π +15 +1 +4MR2 +[ +(ρs −ρf )r5 +sξs + (ρf −ρm)r5 +f ξf + (ρm −ρc)r5 +mξm + ρcR5ξr +] +. (1b) +where ¯A is the mean equatorial moment of inertia defined below. The same procedure was used +in Peale et al. [2016] and the mathematical details are given in Equations (18-20) of Dumberry –5– Confidential manuscript submitted to JGR-Planets Mercury Parameter Numerical value Reference -mean motion, n 2π/87.96935 day−1Stark et al. [2015b] -rotation rate, Ω o= 1.5n 2π/58.64623 day−1Stark et al. [2015b] -orbit precession rate, Ω p 2π/325,513 yr−1Baland et al. [2017] -Poincar´ e number, δω= Ωp/Ωo4.9327×10−7 +mean motion, n 2π/87.96935 day−1 Stark et al. [2015b] +rotation rate, Ωo = 1.5n 2π/58.64623 day−1 Stark et al. [2015b] +orbit precession rate, Ω p 2π/325,513 yr−1 Baland et al. [2017] +Poincar´ e number,δω = Ωp/Ωo 4.9327 ×10−7 orbital eccentricity, ec 0.20563 Baland et al. [2017] -orbital inclination, I 8.5330◦Baland et al. [2017] +orbital inclination, I 8.5330◦ Baland et al. [2017] mean planetary radius, R 2439.360 km Perry et al. [2015] -mass,M 3.3012×1023kg Genova et al. [2019] -mean density, ¯ ρ 5429.5 kg m−3 -J2 5.0291×10−5Genova et al. [2019] -C22 8.0415×10−6Genova et al. [2019] -polar surface flattening, ϵr 6.7436×10−4Perry et al. [2015] -equatorial surface flattening, ξr5.1243×10−4Perry et al. [2015] -Table 1. Reference parameters for Mercury. The mass Mis computed from GM = 22031.8636×109 -m3/s2taken from Genova et al. [2019]. The mean density is calculated from4π -3¯ρR3=M. The numerical -values ofϵrandξrare calculated from ϵr= (¯a−c)/Randξr= (a−b)/R, where ¯a=1 -2(a+b) and where -a= 2440.53 km,b= 2439.28 km and c= 2438.26 km are the semimajor, intermediate and semiminor -axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015].J2andC22are -computed from Equation (4) in the Supporting Information of Genova et al. [2019]. +mass, M 3.3012 ×1023 kg Genova et al. [2019] +mean density, ¯ρ 5429.5 kg m−3 +J2 5.0291 ×10−5 Genova et al. [2019] +C22 8.0415 ×10−6 Genova et al. [2019] +polar surface flattening, ϵr 6.7436 ×10−4 Perry et al. [2015] +equatorial surface flattening, ξr 5.1243 ×10−4 Perry et al. [2015] +Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031 .8636 × 109 +m3/s2 taken from Genova et al.[2019]. The mean density is calculated from 4π +3 ¯ρR3 = M. The numerical +values of ϵr and ξr are calculated from ϵr = (¯a− c)/R and ξr = (a− b)/R, where ¯a= 1 +2 (a+ b) and where +a = 2440 .53 km, b = 2439 .28 km and c = 2438 .26 km are the semimajor, intermediate and semiminor +axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al.[2015]. J2 and C22 are +computed from Equation (4) in the Supporting Information of Genova et al.[2019]. and Wieczorek [2016] who adopted the same strategy in their interior modelling of the Moon. Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topography and the axes of the principal moments of inertia, which amount to a polar offset of ∼2◦ and an equatorial offset of ∼15◦[Perry et al. , 2015]. Once the densities and flattenings of all interior regions are known, we can specify the moments - of inertia of the fluid core ( Cf> B f> A f) and solid inner core ( Cs> B s> A s) + of inertia of the fluid core ( Cf > Bf > Af ) and solid inner core ( Cs > Bs > As) along with the mean equatorial moments of inertia -¯A=1 -2(A+B), ¯Af=1 -2(Af+Bf), ¯As=1 -2(As+Bs). (2) -From these, we define the polar ( e,ef,es) and equatorial ( γ,γs) dynamical ellipticities of the +¯A= 1 +2(A+ B) , ¯Af = 1 +2(Af + Bf ) , ¯As = 1 +2(As + Bs) . (2) +From these, we define the polar ( e, ef , es) and equatorial ( γ, γs) dynamical ellipticities of the whole planet (no subscript), fluid core (subscript f) and solid inner core (subscript s), which enter our rotational model, -e=C−¯A -¯Aef=Cf−¯Af -¯Afes=Cs−¯As -¯As, (3a) -γ=B−A -¯Aγs=Bs−As -¯As. (3b) -We further note that eandγare connected to J2andC22by -e=MR2 -¯AJ2, γ =4MR2 -¯AC22. (4) +e= C−¯A +¯A ef = Cf −¯Af +¯Af +es = Cs −¯As +¯As +, (3a) +γ = B−A +¯A γs = Bs −As +¯As +. (3b) +We further note that e and γ are connected to J2 and C22 by +e= MR2 +¯A J2 , γ = 4MR2 +¯A C22 . (4) –6– Confidential manuscript submitted to JGR-Planets θm θn θs -θfΩ +θf +Ω Ωs -Ωfê3p -ê3s ê3I -Iεm -θpê3L -ê1pê2p +Ωf +ê3 +p +ê3 +sê3 +I +I +εm +θp +ê3 +L +ê1 +p +ê2 +p Cassini plane -ωΩotê3I -Iεmê3p -ê1ê2pê3La) b) +ωΩot +ê3 +I +I εm +ê3 +p +ê1 +ê2 +p +ê3 +L +a) b) Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b) in a frame attached to the rotating mantle. The orbit normal ( ˆeI -3) is tilted by an angle I= 8.533◦from +3) is tilted by an angle I = 8 .533◦ from the Laplace normal ( ˆeL -3) and the symmetry axis of Mercury’s mantle ( ˆep +3 ) and the symmetry axis of Mercury’s mantle ( ˆep 3) is tilted by an obliquity εm with respect to ˆeI 3. Shown in (a) are the orientations of the symmetry axis of the inner core ( ˆes 3), the -rotation rate vectors of the mantle ( Ω), fluid core ( Ωf) and inner core ( Ωf) and angles θp,θn,θm,θf -andθsin their Cassini state equilibrium. All vectors and angles are in a common plane which we refer +rotation rate vectors of the mantle ( Ω), fluid core ( Ωf) and inner core ( Ωf) and angles θp, θn, θm, θf +and θs in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer to as the Cassini plane. The light grey, white, and dark grey ellipsoid represent a polar cross-section of the mantle, fluid core and inner core, respectively; blue shaded parts show an equatorial cross section. The black curved arrow in the equatorial plane in (a) indicates the direction of rotation of the equatorial mantle axes ˆep -1andˆep -2with respect to the Cassini plane. Viewed in the frame attached to the rotating -mantle (b), the Cassini plane is rotating at frequency ωΩo=−Ωo−ΩpcosIin the longitudinal direction. +1 and ˆep +2 with respect to the Cassini plane. Viewed in the frame attached to the rotating +mantle (b), the Cassini plane is rotating at frequency ωΩo = −Ωo − Ωpcos I in the longitudinal direction. The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of illustration. –7– @@ -300,73 +345,73 @@ Confidential manuscript submitted to JGR-Planets Mercury’s rotation is characterized by a 3:2 spin-orbit resonance in which it completes 3 rotations around itself for every 2 orbital revolutions around the Sun. The orbital period is 87.96935 day and the sidereal rotation period is 58.64623 day [ Stark et al. , 2015b]. These define - the mean motion n= 2π/87.96935 day−1and the sidereal frequency Ω o= 2π/58.64623 -day−1, with Ω o= 1.5n. Mercury’s rotational state is also characterized by a Cassini state whereby + the mean motion n = 2π/87.96935 day−1 and the sidereal frequency Ω o = 2π/58.64623 +day−1, with Ωo = 1.5 n. Mercury’s rotational state is also characterized by a Cassini state whereby the orientations of the orbit normal ( ˆeI 3) and of the mantle symmetry axis ( ˆep 3) are both coplanar with, and precess about, the normal to the Laplace plane ( ˆeL -3). The orientation of the Laplace +3 ). The orientation of the Laplace plane varies on long timescales, but it can be taken as invariable in inertial space for our present purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between ˆeL -3andˆeI +3 and ˆeI 3 -is the orbital inclination I= 8.5330◦[Baland et al. , 2017], the angle between ˆeI -3andˆep -3is the -obliquityεmand the angle between ˆeL -3andˆep -3isθp=I+εm. The precession of ˆeI -3andˆep +is the orbital inclination I = 8.5330◦[Baland et al. , 2017], the angle between ˆeI +3 and ˆep +3 is the +obliquity εm and the angle between ˆeL +3 and ˆep +3 is θp = I + εm. The precession of ˆeI +3 and ˆep 3 -about the Laplace pole is retrograde with frequency Ω p= 2π/325,513 yr−1[Baland et al. , 2017]. +about the Laplace pole is retrograde with frequency Ω p = 2π/325,513 yr−1 [Baland et al. , 2017]. The mantle and crust are welded together and form a single rotating region which we refer to as the ‘mantle’ in the context of our rotational model. The rotation and symmetry axes of the mantle are expected to remain in close alignment, but they do not coincide exactly. We define the rotation rate vector of the mantle by Ω, and its misalignment from ˆep -3by an angle -θm. Note that θm≪εmand it is often the spin axis of Mercury which is used to define the -obliquityεm[e.g. Baland et al. , 2017]. If Mercury were an entirely rigid planet, ˆep -3andΩwould +3 by an angle +θm. Note that θm ≪εm and it is often the spin axis of Mercury which is used to define the +obliquity εm [e.g. Baland et al. , 2017]. If Mercury were an entirely rigid planet, ˆep +3 and Ω would characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and -the anglesI,εmandθmwould completely describe the Cassini state. The presence of a fluid +the angles I, εm and θm would completely describe the Cassini state. The presence of a fluid outer core and solid inner core require three additional orientation vectors and angles. The symmetry axis of the inner core is defined by unit vector ˆes -3and its misalignment from ˆep -3by an -angleθn. The rotation vectors of the fluid core and inner core are defined as ΩfandΩs, respectively, - and their misalignment from the rotation vector of the mantle Ωare defined by anglesθfandθs(see - Figure 2a). The rotation and symmetry axes of the inner core remain in close -alignment, so θn≈θs. To be formal in our definition of the different angles of misalignment, -forIdefined positive as depicted on Figure 2a, all angles are defined positive in the clockwise +3 and its misalignment from ˆep +3 by an +angle θn. The rotation vectors of the fluid core and inner core are defined as Ωf and Ωs, respectively, + and their misalignment from the rotation vector of the mantle Ω are defined by angles + θf and θs (see Figure 2a). The rotation and symmetry axes of the inner core remain in close +alignment, so θn ≈θs. To be formal in our definition of the different angles of misalignment, +for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise direction. At equilibrium in the Cassini state, the three orientation vectors ( ˆeI -3,ˆep -3,ˆes +3, ˆep +3, ˆes 3) and three -rotation vectors ( Ω,Ωf,Ωs) are forced to precess about ˆeL -3at the same frequency. If we neglect +rotation vectors (Ω, Ωf, Ωs) are forced to precess about ˆeL +3 at the same frequency. If we neglect dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ω p. Viewed in the frame attached to the mantle rotating at sidereal frequency Ω o, the Cassini plane is rotating - in a retrograde direction at frequency ωΩo(see Figure 2b), where ω, expressed in cycles + in a retrograde direction at frequency ωΩo (see Figure 2b), where ω, expressed in cycles per Mercury day, is equal to -ω=−1−δωcos(θp). (5) -The factor δω= Ω p/Ωo= 4.933×10−7is the Poincar´ e number, expressing the ratio of the +ω= −1 −δωcos(θp) . (5) +The factor δω = Ωp/Ωo = 4.933 ×10−7 is the Poincar´ e number, expressing the ratio of the forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal as seen in the mantle frame is expressed as d dtˆeL -3+Ω׈eL -3=0, (6) +3 + Ω ׈eL +3 = 0 , (6) or equivalently, by Equation (19e) of Stys and Dumberry [2018], -ωsin(θp) + sin(θm+θp) = 0. (7) +ωsin(θp) + sin(θm + θp) = 0 . (7) –8– Confidential manuscript submitted to JGR-Planets -This expresses a formal connection between θpandθmwhich is independent of the interior structure - of Mercury. Using Equation (5) and cos( θm)→1, this connection can be rewritten as -sin(θm) =δωsin(θp). (8) -and thus the relative amplitudes of θmandθpdepend of the Poincar´ e number δω. +This expresses a formal connection between θp and θm which is independent of the interior structure + of Mercury. Using Equation (5) and cos( θm) →1, this connection can be rewritten as +sin(θm) = δω sin(θp) . (8) +and thus the relative amplitudes of θm and θp depend of the Poincar´ e numberδω. To investigate Mercury’s response to the gravitational torque from the Sun, we take advantage of the framework developed in Mathews et al. [1991] to model the forced nutations of Earth [see also Mathews et al. , 2002; Dehant and Mathews , 2015]. This model takes into account @@ -374,9 +419,9 @@ Earth [see also Mathews et al. , 2002; Dehant and Mathews , 2015]. This model ta of the fluid core is misaligned from the symmetry axes of the elliptical surfaces of the CMB and ICB. It also includes the gravitational torque exerted on the inner core when it is misaligned with the mantle. Electromagnetic and viscous torques at both the CMB and ICB have been -incorporated into the framework [e.g Buffett , 1992; Buffett et al. , 2002; Mathews and Guo , 2005; +incorporated into the framework [e.g Buffett, 1992; Buffett et al. , 2002; Mathews and Guo , 2005; Deleplace and Cardin , 2006]. The framework was adapted to model the Cassini state of the Moon -inDumberry and Wieczorek [2016] and further developed in Stys and Dumberry [2018] and Organowski +in Dumberry and Wieczorek [2016] and further developed in Stys and Dumberry [2018] and Organowski and Dumberry [2020]. We adapt it here to capture the Cassini state of Mercury. Because the forced precession period is much longer than the rotation and orbital periods of Mercury, the gravitational solar torque that is relevant to the Cassini state is the mean @@ -384,24 +429,24 @@ torque averaged over one orbit. This mean torque is perpendicular to the Cassini in the same direction as the vector connecting the Sun to the descending node of Mercury’s orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque is periodic, rotating at frequency ωΩo. Setting the equatorial directions ˆep -1andˆep -2to correspond +1 and ˆep +2 to correspond to the real and imaginary axes of the complex plane, respectively, we can write the equatorial components of this periodic applied torque in a compact form as -Γ1(t) +iΓ2(t) =−i˜Γ(ω) exp[iωΩot], (9) +Γ1(t) + iΓ2(t) = −i˜Γ(ω) exp[iωΩot] , (9) where ˜Γ(ω) represents the amplitude of the torque at frequency ωΩo. In response to this torque, -the axes defining all angles ( θp,εm,θm,θf,θs,θn) as viewed in the mantle frame are also rotating - at frequency ωΩo(see Figure 2). The longitudinal direction of each of these angles at -a specific time tcan then also be written in the equatorial complex plane and is proportional -to exp[iωΩot]. For instance, the two equatorial time-dependent components θm1andθm2of the -angleθm, as seen in the mantle frame, can be written as -θm1(t) +iθm2(t) = ˜mexp[iωΩot], (10a) +the axes defining all angles ( θp, εm, θm, θf , θs, θn) as viewed in the mantle frame are also rotating + at frequency ωΩo (see Figure 2). The longitudinal direction of each of these angles at +a specific time t can then also be written in the equatorial complex plane and is proportional +to exp[iωΩot]. For instance, the two equatorial time-dependent components θm1 and θm2 of the +angle θm, as seen in the mantle frame, can be written as +θm1(t) + iθm2(t) = ˜m exp[iωΩot] , (10a) where -˜m≡˜m(ω) =Re[ ˜m] +iIm[ ˜m], (10b) +˜m≡˜m(ω) = Re[ ˜m] + iIm[ ˜m] , (10b) is the amplitude at frequency ωΩo. Equivalent definitions apply for all other angles, with the connection as follows: -θm⇔˜m, θ f⇔˜mf, θ s⇔˜ms, θ n⇔˜ns, θ p⇔˜p, ε m⇔˜εm. (11) -The notation ˜ m, ˜mf, ˜ms, ˜nsfollows that introduced in the original model of Mathews et al. [1991]. +θm ⇔˜m, θ f ⇔˜mf , θ s ⇔˜ms , θ n ⇔˜ns , θ p ⇔˜p, ε m ⇔˜εm . (11) +The notation ˜m, ˜mf , ˜ms, ˜ns follows that introduced in the original model of Mathews et al. [1991]. Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase response to the applied torque as a result of dissipation, for instance from viscous or EM coupling –9– @@ -411,8 +456,8 @@ real. We concentrate our analysis in this work on the real part of the solutions to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜ εm corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to εm, though we keep the tilde notation in the presentation of our results to emphasize that it represents - the real part of the solution from our system. Furthermore, since ˜ m≪˜εm, we often -refer to ˜εmas the orientation of spin axis of the mantle, since the Cassini state of Mercury is + the real part of the solution from our system. Furthermore, since ˜ m ≪˜εm, we often +refer to ˜εm as the orientation of spin axis of the mantle, since the Cassini state of Mercury is more customarily described in terms of the latter in the literature. The model of Mathews et al. [1991] is developed under the assumption of small angles as appropriate for the nutations on Earth. The details on how the equations of the model are derived @@ -420,120 +465,163 @@ appropriate for the nutations on Earth. The details on how the equations of the describe, respectively, the time rate of change of the angular momenta of the whole of Mercury, the fluid core, and the inner core in the reference frame of the rotating mantle. These three equations are -(ω−e) ˜m+ (1 +ω)[¯Af -¯A˜mf+¯As -¯A˜ms+α3es¯As -¯A˜ns] -=1 -iΩ2o¯A( -˜Γsun) +(ω−e) ˜m+ (1 +ω) +[¯Af +¯A ˜mf + +¯As +¯A ˜ms + α3es +¯As +¯A ˜ns +] += 1 +iΩ2o ¯A +( +˜Γsun +) , (12a) -ω˜m+ (1 +ω+ef) ˜mf−ωα1es¯As -¯Af˜ns=1 -iΩ2o¯Af( -−˜Γcmb−˜Γicb) +ω˜m+ (1 +ω+ ef ) ˜mf −ωα1es +¯As +¯Af +˜ns = 1 +iΩ2o ¯Af +( +−˜Γcmb −˜Γicb +) , (12b) -(ω−α3es) ˜m+α1es˜mf+ (1 +ω) ˜ms+ (1 +ω−α2)es˜ns=1 -iΩ2o¯As( +(ω−α3es) ˜m+ α1es ˜mf + (1 +ω) ˜ms + (1 +ω−α2) es˜ns = 1 +iΩ2o ¯As +( ˜Γs -sun+˜Γicb) +sun + ˜Γicb +) , (12c) and a fourth equation consists of a kinematic relation that expresses the change in the orientation of the inner core figure as a result of its own rotation, -˜ms+ω˜ns= 0. (12d) -In these equations, the parameters α1,α2andα3involve the density contrast at the ICB +˜ms + ω˜ns = 0 . (12d) +In these equations, the parameters α1, α2 and α3 involve the density contrast at the ICB and are given by -α1=ρf -ρs, α 3= 1−α1, α 2=α1−α3αg, (13a) -where the parameter αgis a measure of the ratio of the gravitational to inertial torque applied +α1 = ρf +ρs +, α 3 = 1 −α1 , α 2 = α1 −α3αg , (13a) +where the parameter αg is a measure of the ratio of the gravitational to inertial torque applied on the inner core, -αg=8πG -5Ω2o[ρc(ϵr−ϵm) +ρm(ϵm−ϵf) +ρfϵf], (13b) -whereGis the gravitational constant. -˜Γsunis the amplitude of the gravitational torque by the Sun on the whole of Mercury. For -a small mantle obliquity ˜ εmand a small inner core tilt ˜ ns, it is given by -˜Γsun=−iΩ2 -o¯A( -φm˜εm+¯As -¯Aα3φs˜ns) +αg = 8πG +5Ω2o +[ρc(ϵr −ϵm) + ρm(ϵm −ϵf ) + ρf ϵf ] , (13b) +where G is the gravitational constant. +˜Γsun is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For +a small mantle obliquity ˜εm and a small inner core tilt ˜ns, it is given by +˜Γsun = −iΩ2 +o ¯A +( +φm˜εm + +¯As +¯A α3φs˜ns +) , (14) where –10– Confidential manuscript submitted to JGR-Planets -φm=3 -2n2 -Ω2o[ -G210e+1 -2G201γ] +φm = 3 +2 +n2 +Ω2o +[ +G210 e+ 1 +2G201 γ +] , (15a) -φs=3 -2n2 -Ω2o[ -G210es+1 -2G201γs] +φs = 3 +2 +n2 +Ω2o +[ +G210 es + 1 +2G201 γs +] , (15b) -and whereG210andG201are functions of the orbital eccentricity ec, -G210=1 -(1−e2c)3/2, (16a) -G201=7 -2ec−123 -16e3 -c+489 +and where G210 and G201 are functions of the orbital eccentricity ec, +G210 = 1 +(1 −e2c)3/2 , (16a) +G201 = 7 +2ec −123 +16 e3 +c + 489 128e5 -c. (16b) +c . (16b) The gravitational torque by the Sun acting on the inner core alone, ˜Γs sun, is ˜Γs -sun=−iΩ2 -o¯Asα3φs(˜εm+ ˜ns). (17) -˜Γcmband˜Γicbare the torques from tangential stresses by the fluid core on the mantle at the +sun = −iΩ2 +o ¯Asα3φs(˜εm + ˜ns) . (17) +˜Γcmb and ˜Γicb are the torques from tangential stresses by the fluid core on the mantle at the CMB and on the inner core at the ICB, respectively. These torques can be parameterized in -terms of dimensionless complex coupling constants KicbandKcmband the differential angular - velocities at each boundary [e.g Buffett , 1992; Buffett et al. , 2002], -˜Γicb=iΩ2 -o¯AsKicb( ˜mf−˜ms), (18a) -˜Γcmb=iΩ2 -o¯AfKcmb˜mf. (18b) -Specific expressions for KicbandKcmbare delayed to sections 4 and 5 when we consider the +terms of dimensionless complex coupling constants Kicb and Kcmb and the differential angular + velocities at each boundary [e.g Buffett, 1992; Buffett et al. , 2002], +˜Γicb = iΩ2 +o ¯AsKicb( ˜mf −˜ms) , (18a) +˜Γcmb = iΩ2 +o ¯Af Kcmb ˜mf . (18b) +Specific expressions for Kicb and Kcmb are delayed to sections 4 and 5 when we consider the effects of viscous and EM coupling, respectively. A fifth equation is required to connect this interior model to the obliquity of the mantle, -and this is provided by Equation (7). For small angles θmandθp, this gives [e.g. Mathews et al. , +and this is provided by Equation (7). For small angles θm and θp, this gives [e.g. Mathews et al. , 1991; Dumberry and Wieczorek , 2016; Baland et al. , 2019] -˜m+ (1 +ω)˜p= 0. (19) -For Mercury, it is more convenient to connect the internal model with ˜ εminstead of ˜p. This -is becauseθp≈8.567◦whereas ˜εm≈2 arcmin and thus the latter obeys more strictly the +˜m+ (1 +ω)˜p= 0 . (19) +For Mercury, it is more convenient to connect the internal model with ˜εm instead of ˜p. This +is because θp ≈ 8.567◦whereas ˜εm ≈ 2 arcmin and thus the latter obeys more strictly the condition of small angles assumed in our framework. Furthermore, the external torques acting on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜ εm. Written - in terms of ˜ εm, and with the approximation of ˜ εm≪1 and ˜m≪1, Equation (7) becomes -˜m+ (1 +ω)˜εm=−(1 +ω) tanI. (20) -Likewise, the frequency ωfrom Equation (5) can be written simply in terms of I, -ω=−1−δωcosI. (21) + in terms of ˜εm, and with the approximation of ˜εm ≪1 and ˜m≪1, Equation (7) becomes +˜m+ (1 +ω)˜εm = −(1 + ω) tanI. (20) +Likewise, the frequency ω from Equation (5) can be written simply in terms of I, +ω= −1 −δωcos I. (21) The set of four Equations (12) with the addition of Equation (20) form a linear system -of equations for the five rotational variables ˜ m, ˜mf, ˜ms, ˜nsand ˜εm. It captures the response -of Mercury, in the frequency domain, when subject to a periodic solar torque applied at frequencyω. - The system can be written in a matrix form as +of equations for the five rotational variables ˜m, ˜mf , ˜ms, ˜ns and ˜εm. It captures the response +of Mercury, in the frequency domain, when subject to a periodic solar torque applied at frequency + ω. The system can be written in a matrix form as –11– Confidential manuscript submitted to JGR-Planets -M·x=y, (22a) +M·x = y , (22a) where the solution ( x) and forcing ( y) vectors are -xT= [ ˜m,˜mf,˜ms,˜ns,˜εm], (22b) -yT= [0,0,0,0,−(1 +ω) tanI], (22c) -and the elements of matrix Mare -M= -ω−e (1 +ω)¯Af -¯A(1 +ω)¯As¯A¯As¯Aα3( -(1 +ω)es+φs) +xT = [ ˜m, ˜mf , ˜ms,˜ns,˜εm] , (22b) +yT = [0,0,0,0,−(1 + ω) tanI] , (22c) +and the elements of matrix M are +M = + + +ω−e (1 + ω) +¯Af +¯A (1 + ω) +¯As +¯A +¯As +¯A α3 +( +(1 + ω)es + φs +) φm -ω 1 +ω+ef+Kcmb+¯As¯AfKicb−¯As¯AfKicb−ωesα1¯As¯Af0 -ω−α3es α1es−Kicb 1 +ω+Kicb(1 +ω−α2)es+α3φsα3φs +ω 1 + ω+ ef + Kcmb + +¯As +¯Af +Kicb − +¯As +¯Af +Kicb −ωesα1 +¯As +¯Af +0 +ω−α3es α1es −Kicb 1 + ω+ Kicb (1 + ω−α2)es + α3φs α3φs 0 0 1 ω 0 -1 0 0 0 (1 + ω) -. +1 0 0 0 (1 + ω) + + +. (22d) -Solutions of the homogeneous system (i.e. y=0) represent free modes of precession. Three +Solutions of the homogeneous system (i.e. y = 0) represent free modes of precession. Three modes have periods which, when seen in inertial space, are typically in the range of a few hundred to a few thousand years. The first is the free axial precession of Mercury maintained by -the solar torque acting on its elliptical figure [e.g. Peale , 2005]. The second is the free core nutation +the solar torque acting on its elliptical figure [e.g. Peale, 2005]. The second is the free core nutation (FCN), which is the free precession of the spin axis of the fluid core about the symmetry axis of the CMB [e.g. Mathews et al. , 1991]. The third is the free inner core nutation (FICN), a free mode of rotation similar to the FCN but associated with the inner core [e.g. Mathews et al. , @@ -543,7 +631,7 @@ although we have retained the triaxial shape of Mercury in the expression of the we treat its angular momentum response as if it were an axially symmetric body. This is convenient as the two equatorial angular momentum equations for each region can be combined into a single equation. To first order, the frequency of the free precession of Mercury is not largely -altered by triaxiality [e.g. Peale , 2005]. Baland et al. [2019] showed that the frequencies of the +altered by triaxiality [e.g. Peale, 2005]. Baland et al. [2019] showed that the frequencies of the FCN and FICN for a triaxial planetary body may be slightly different than those for an axially symmetric body, but not by large factor. As the response of Mercury to the solar torque is largely determined by the resonant amplification due to the presence of these three modes, @@ -568,175 +656,212 @@ Confidential manuscript submitted to JGR-Planets 2.3.1 The Cassini state of a single-body, rigid Mercury For a rigid planet with no fluid and solid cores, our system of equations reduces to Equations (12a) and (20), -(ω−e) ˜m+φm˜εm= 0, (23a) -˜m+ (1 +ω)˜εm=−(1 +ω) tanI. (23b) -Using Equation (21), δω≪1, and the approximation ¯A(1 +e+δωcosI) =C+¯AδωcosI≈ +(ω−e) ˜m+ φm ˜εm = 0 , (23a) +˜m+ (1 +ω)˜εm = −(1 + ω) tanI. (23b) +Using Equation (21), δω ≪1, and the approximation ¯A(1 +e+ δωcos I) = C+ ¯Aδωcos I ≈ C, these can be written as -C˜m=¯Aφm˜εm, (24a) -˜m=δω( -sinI+ cosI˜εm) +C˜m= ¯Aφm ˜εm , (24a) +˜m= δω +( +sin I+ cosI˜εm +) . (24b) -Equation (24b) gives a direct relationship between ˜ mand ˜εm. ForI= 8.5330◦,δω= -4.9327×10−7and taking ˜ εm= 2.04 arcmin, this gives ˜ m= 2.52×10−4arcmin, much smaller +Equation (24b) gives a direct relationship between ˜m and ˜εm. For I = 8 .5330◦, δω = +4.9327×10−7 and taking ˜εm = 2.04 arcmin, this gives ˜m= 2.52×10−4 arcmin, much smaller than ˜εm: the offset of the rotation axis of the mantle with respect to its symmetry axis is very small. Substituting Equation (24b) in Equation (24a) gives -CΩp( -sinI+ cosI˜εm) -=¯AΩoφm˜εm, (25) -and isolating for ˜ εm, -˜εm=CΩpsinI -−CΩpcosI+¯AΩoφm. (26) -Upon using Equations (4), (15a), and Ω o=3 -2n, we can write -˜εm=CΩpsinI -−CΩpcosI+nMR2(G210J2+ 2G201C22). (27) +CΩp +( +sin I+ cosI˜εm +) += ¯AΩoφm˜εm , (25) +and isolating for ˜εm, +˜εm = CΩp sin I +−CΩp cos I+ ¯AΩoφm +. (26) +Upon using Equations (4), (15a), and Ω o = 3 +2 n, we can write +˜εm = CΩp sin I +−CΩp cos I+ nMR2 (G210J2 + 2G201C22) . (27) This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1 -[see for instance Equation (1) of Baland et al. , 2017, where their definition of ˙Ω is equal to−Ωp]. +[see for instance Equation (1) of Baland et al. , 2017, where their definition of ˙Ω is equal to −Ωp]. Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized moment of inertia ˆC, -ˆC=C -MR2=n -ΩpG210J2+ 2G201C22 -cosI+ sinI/˜εm. (28) +ˆC = C +MR2 = n +Ωp +G210J2 + 2G201C22 +cos I+ sinI/˜εm +. (28) which is equivalent to Equation (89) of Van Hoolst [2015]. It is based on the latter equation that a measurement of the obliquity gives a constraint on ˆC. -Two free modes of precession are found by setting y=0in Equation (23). One mode corresponds +Two free modes of precession are found by setting y = 0 in Equation (23). One mode corresponds to the Eulerian wobble, or Chandler wobble, and represents the prograde precession of the rotation axis about the symmetry axis. The second mode is the free retrograde axial precession of Mercury. As seen in the inertial frame, its frequency is given by –13– Confidential manuscript submitted to JGR-Planets -ωfp=nMR2 -C( -G210J2+ 2G201C22) +ωfp = nMR2 +C +( +G210J2 + 2G201C22 +) , (29) which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical component. Note that in Peale [2005] it was assumed that only the mantle was involved in the solidbody - precession and hence Cwas replaced by Cm. UsingC= 0.346·MR2[Margot et al. , -2012] and the numerical values for n,J2,C22andecgiven in Table 1, we obtain a free precession - period of Tfp= 2π/ωfp= 1298 yr. If we use Cminstead ofCin Equation (29), and take -Cm= 0.431·C= 0.431·0.346·MR2[Margot et al. , 2012], we obtain Tfp= 2π/ωfp= 560 yr. + precession and hence C was replaced by Cm. Using C = 0 .346 ·MR2 [Margot et al. , +2012] and the numerical values for n, J2, C22 and ec given in Table 1, we obtain a free precession + period of Tfp = 2π/ωfp = 1298 yr. If we use Cm instead of C in Equation (29), and take +Cm = 0.431·C = 0.431·0.346·MR2 [Margot et al. , 2012], we obtain Tfp = 2π/ωfp = 560 yr. These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical, the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid core, the degree of which depends on the amplitude of the pole-to-equator CMB flattening. The true free precession period lies somewhere between 560 and 1298 yr. Regardless of its exact value, the free precession period is much shorter than the forcing period of 325 kyr. Using Equation (29), Equation (27) can be written as [e.g. Baland et al. , 2017] -˜εm=ΩpsinI -−ΩpcosI+ωfp. (30) -The obliquity of Mercury is thus determined by how the forcing frequency Ω pcompares with -the free precession frequency ωfp. Becauseωfp>Ωp, Mercury occupies Cassini state 1 [ Peale , +˜εm = Ωp sin I +−Ωp cos I+ ωfp +. (30) +The obliquity of Mercury is thus determined by how the forcing frequency Ω p compares with +the free precession frequency ωfp . Because ωfp >Ωp, Mercury occupies Cassini state 1 [ Peale, 1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant -amplification if Ω p≈ωfp. Sinceωfp≫Ωp, resonant amplification is minimal and the resulting - obliquity, ˜ εm≈2 arcmin, is much smaller than the inclination angle I≈8.5◦. +amplification if Ωp ≈ ωfp . Since ωfp ≫ Ωp, resonant amplification is minimal and the resulting + obliquity, ˜εm ≈2 arcmin, is much smaller than the inclination angle I ≈8.5◦. 2.3.2 The misalignment of the fluid and solid cores -Withω=−1−δωcosIandδω≪1, Equation (12d) gives ˜ ns≈˜ms; as for the mantle, +With ω= −1 −δωcos I and δω ≪1, Equation (12d) gives ˜ns ≈˜ms; as for the mantle, the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state. -The relationship between ˜ mand ˜εmof Equation (24b) is independent of the interior structure, +The relationship between ˜m and ˜εm of Equation (24b) is independent of the interior structure, so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equation - (12a), and setting ˜ ns= ˜ms, the angular momentum equation of the whole planet becomes -CΩp( -sinI+ cosI˜εm) -+ (¯AfcosIΩp) ˜mf+¯As(cosIΩp−Ωoα3φs)˜ns=¯AΩoφm˜εm. (31) + (12a), and setting ˜ns = ˜ms, the angular momentum equation of the whole planet becomes +CΩp +( +sin I+ cosI˜εm +) ++ ( ¯Af cos IΩp) ˜mf + ¯As(cos IΩp −Ωoα3φs)˜ns = ¯AΩoφm˜εm . (31) This latter equation shows how the misaligned inner core and fluid core can lead to a modification - of the mantle obliquity ˜ εm. Approximate analytical solutions of ˜ nsand ˜mfare given by -˜ns≈Ωp -κλs( -1 +Ωo(Kicb−α1es) -λf)( -sinI+ cosI˜εm) + of the mantle obliquity ˜εm. Approximate analytical solutions of ˜ns and ˜mf are given by +˜ns ≈ Ωp +κλs +( +1 + Ωo(Kicb −α1es) +λf +)( +sin I+ cosI˜εm +) −Ωoα3φs -κλs˜εm, (32a) -˜mf≈Ωp -λf( -sinI+ cosI˜εm) -+Ωo -λf¯As -¯Af( -Kicb−α1es) -˜ns, (32b) +κλs +˜εm , (32a) +˜mf ≈Ωp +λf +( +sin I+ cosI˜εm +) ++ Ωo +λf +¯As +¯Af +( +Kicb −α1es +) +˜ns , (32b) where -κ= 1−¯As -¯AfΩ2 -o( -Kicb−α1es)2 -λsλf, (33a) -λf= ¯σf−ΩpcosI, (33b) -λs= ¯σs−ΩpcosI, (33c) +κ= 1 − +¯As +¯Af +Ω2 +o +( +Kicb −α1es +)2 +λs λf +, (33a) +λf = ¯σf −Ωp cos I, (33b) +λs = ¯σs −Ωp cos I, (33c) –14– Confidential manuscript submitted to JGR-Planets and where we have introduced the frequencies -¯σf= Ωo( -ef+Kcmb+¯As -¯AfKicb) +¯σf = Ωo +( +ef + Kcmb + +¯As +¯Af +Kicb +) , (33d) -¯σs= Ωo( -esα3αg−esα1+α3φs+Kicb) +¯σs = Ωo +( +esα3αg −esα1 + α3φs + Kicb +) . (33e) These solutions are good approximations for all the results that we present in section 3. For -an observed mantle obliquity ˜ εmand for a chosen set of interior model parameters, they provide - useful predictions of ˜ nsand ˜mf. -In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯ σs≫ -Ωpand ¯σf≫Ωp, so that ˜ns→0, ˜mf→0 and Equation (31) reverts back to Equation (25) +an observed mantle obliquity ˜εm and for a chosen set of interior model parameters, they provide + useful predictions of ˜ns and ˜mf . +In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯ σs ≫ +Ωp and ¯σf ≫Ωp, so that ˜ns →0, ˜mf →0 and Equation (31) reverts back to Equation (25) for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and -mantle (i.e. for spherical internal boundaries, ef=es=γs= 0 and no viscous or EM coupling,Kcmb=Kicb= - 0), then -φs= 0, κ = 1, λ f=λs=−ΩpcosI, ˜mf= ˜ns=−(tanI+ ˜εm). (34) -Inserting these in Equation (31), and with the moment of inertia of the mantle equal to Cm= -C−¯Af−¯As, we obtain -CmΩp( -sinI+ cosI˜εm) -=¯AΩoφm˜εm. (35) +mantle (i.e. for spherical internal boundaries, ef = es = γs = 0 and no viscous or EM coupling, + Kcmb = Kicb = 0), then +φs = 0 , κ = 1 , λ f = λs = −Ωp cos I, ˜mf = ˜ns = −(tan I+ ˜εm) . (34) +Inserting these in Equation (31), and with the moment of inertia of the mantle equal to Cm = +C−¯Af −¯As, we obtain +Cm Ωp +( +sin I+ cosI˜εm +) += ¯AΩoφm˜εm . (35) which describes, as expected, a forced precession of the mantle alone. If this was the case for -Mercury, taking Cm/C= 0.431, the obliquity should be ˜ εm≈0.88 arcmin, substantially smaller -than the observed obliquity of ˜ εm≈2 arcmin. -If ¯σf≈Ωp(and thusλf→0) and/or ¯σs≈Ωp(and thusλs→0) resonant amplification - leads to large amplitudes for ˜ mf, ˜nsand the mantle obliquity ˜ εm. The frequencies ¯ σfand -¯σsare closely related to the FCN and FICN frequencies ωfcnandωficn, respectively. Hence, +Mercury, taking Cm/C = 0.431, the obliquity should be ˜εm ≈0.88 arcmin, substantially smaller +than the observed obliquity of ˜εm ≈2 arcmin. +If ¯σf ≈Ωp (and thus λf →0) and/or ¯σs ≈Ωp (and thus λs →0) resonant amplification + leads to large amplitudes for ˜mf , ˜ns and the mantle obliquity ˜εm. The frequencies ¯σf and +¯σs are closely related to the FCN and FICN frequencies ωfcn and ωficn , respectively. Hence, just as a large mantle obliquity can result from resonant amplification when the forcing frequency approaches the free precession frequency, a large mantle obliquity can likewise result from resonant amplification when the forcing frequency approaches the FCN or FICN frequencies. These frequencies depend on the interior density structure and are not known. However, we will show that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not expect - an important amplification effect. Furthermore, since ωfcn,ωficn≫Ωp, then ¯σf≫Ωp -and ¯σs≫Ωp, and we are in the strong coupling limit. The mantle obliquity should be close -to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜ mfand -˜nsshould be of the order of ˜ εmor smaller. This further justifies the assumption of small angles + an important amplification effect. Furthermore, since ωfcn ,ωficn ≫Ωp, then ¯σf ≫Ωp +and ¯σs ≫Ωp, and we are in the strong coupling limit. The mantle obliquity should be close +to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜ mf and +˜ns should be of the order of ˜εm or smaller. This further justifies the assumption of small angles that we have adopted. 3 Results 3.1 Geodetic constraints and interior density structure -All our interior models are constrained to match the mass Mof Mercury and specific choices -ofˆC=C/MR2andCm/C. The choice of ˆCis determined from Equation (28). For the parameters - listed in Table 1, and an observed obliquity of εm= 2.04 arcmin [ Margot et al. , 2012], -this gives ˆC=C/MR2= 0.3455 and all our interior models are consistent with this choice. +All our interior models are constrained to match the mass M of Mercury and specific choices +of ˆC = C/MR2 and Cm/C. The choice of ˆC is determined from Equation (28). For the parameters + listed in Table 1, and an observed obliquity of εm = 2.04 arcmin [Margot et al. , 2012], +this gives ˆC = C/MR2 = 0.3455 and all our interior models are consistent with this choice. Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are –15– Confidential manuscript submitted to JGR-Planets perfectly aligned with the mantle, which is not strictly correct. Hence, we make an error in estimating - ˆCfrom Equation (28), or conversely in predicting εmbased on a given choice for ˆC. -Part of the objective of our study is to estimate how large this error is. The ratio Cm/Cis obtained + ˆC from Equation (28), or conversely in predicting εm based on a given choice for ˆC. +Part of the objective of our study is to estimate how large this error is. The ratio Cm/C is obtained from the amplitude of the 88-day longitudinal mantle libration φo, which is given by -φo= 6·f(ec)C22MR2 -CC -Cm1 -1 +ζ, (36) +φo = 6 ·f(ec)C22 +MR2 +C +C +Cm +1 +1 + ζ , (36) where -f(ec) = 1−11e2 -c+959 -48e4 -c, (37) -and whereζis a correction that takes into account the entrainment of the inner core in the libration - [ Van Hoolst et al. , 2012; Dumberry et al. , 2013; Dumberry and Rivoldini , 2015]; this correction +f(ec) = 1 −11e2 +c + 959 +48 e4 +c , (37) +and where ζ is a correction that takes into account the entrainment of the inner core in the libration + [Van Hoolst et al. , 2012; Dumberry et al. , 2013; Dumberry and Rivoldini , 2015]; this correction is small and, to simplify, we neglect it here. Taking the observed libration amplitude -to be 38.5 arcsec [ Margot et al. , 2012], ˆC=C/MR2= 0.3455 andC22andecfrom Table 1, -this corresponds to a ratio Cm/C= 0.4269, or equivalently ˆCm=Cm/MR2= 0.1475. -For all results presented in our study, the crustal density is set at ρc= 2974 kg m−3[Sori, -2018]. Our standard choice for the crustal thickness is h= 26 km [ Sori, 2018], although in +to be 38.5 arcsec [ Margot et al. , 2012], ˆC = C/MR2 = 0.3455 and C22 and ec from Table 1, +this corresponds to a ratio Cm/C = 0.4269, or equivalently ˆCm = Cm/MR2 = 0.1475. +For all results presented in our study, the crustal density is set at ρc = 2974 kg m−3 [Sori, +2018]. Our standard choice for the crustal thickness is h = 26 km [ Sori, 2018], although in section 3.2 we also present some results with other choices of h. We have considered two possible prescriptions connected to the density of the inner core. First, for all the results presented -in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρs= 8800 kg m−3approximately +in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρs = 8800 kg m−3 approximately that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure Fe composition in face-centered cubic phase. This captures an end-member scenario where the core composition is an Fe-S alloy; at Mercury’s core conditions, crystallization of Fe is relatively @@ -749,80 +874,111 @@ our Mercury model with uniform density layers. To capture this other end-member scenario, in section 3.5 we present results where we instead prescribe a fixed density contrast between the fluid and solid core; specifically, we set the numerical value of α3. For a given choice of inner core radius rs, the densities of the mantle ( ρm) and fluid core -(ρf) and the radius of the CMB ( rf) are determined such that the interior model matches M, -ˆC= 0.3455 and ˆCm= 0.1475. Figure 3a shows how ρm,ρfandrfvary as a function of inner - core radius rsfor each of the two inner core density scenarios: a fixed ρs, or a fixed α3. When +(ρf ) and the radius of the CMB ( rf ) are determined such that the interior model matches M, +ˆC = 0.3455 and ˆCm = 0.1475. Figure 3a shows how ρm, ρf and rf vary as a function of inner + core radius rs for each of the two inner core density scenarios: a fixed ρs, or a fixed α3. When the inner core is small, its presence has a limited influence on the resulting density structure, -and we find ρm= 3197 kg m−3,ρf= 7263 kg m−3andrf= 2000 km in each of the two -scenarios. When ρsis fixed to 8800 kg m−3, as the inner core reaches 1500 km in size, rfincreases - to above 2100 km, ρmapproaches 4000 kg m−3andρfis reduced to below 5000 kg m−3. +and we find ρm = 3197 kg m −3, ρf = 7263 kg m −3 and rf = 2000 km in each of the two +scenarios. When ρs is fixed to 8800 kg m −3, as the inner core reaches 1500 km in size, rf increases + to above 2100 km, ρm approaches 4000 kg m−3 and ρf is reduced to below 5000 kg m −3. Figure 3a illustrates that when adopting a fixed ρs, there is a limit in the possible inner core -size, as otherwise ρmgets unreasonably large and ρfgets inappropriately small (as it would +size, as otherwise ρm gets unreasonably large and ρf gets inappropriately small (as it would require an excessively large concentration of light elements). When adopting instead a fixed density - contrast, with α3= 0.1, the changes in rf,ρmandρfwith inner core radius are more modest, - allowing larger possible inner core sizes. Different assumptions on ρcandhwould alter the + contrast, with α3 = 0.1, the changes in rf , ρm and ρf with inner core radius are more modest, + allowing larger possible inner core sizes. Different assumptions on ρc and h would alter the numerical values shown on Figure 3a but not their trends with rs. -Figure 3b shows how the FCN and FICN periods vary with rsfor each of the two inner -core density scenarios and in the absence of viscous and EM coupling (i.e. Kcmb=Kicb= +Figure 3b shows how the FCN and FICN periods vary with rs for each of the two inner +core density scenarios and in the absence of viscous and EM coupling (i.e. Kcmb = Kicb = –16– Confidential manuscript submitted to JGR-Planets -0200400600800100012001400period (yr) +0 +200 +400 +600 +800 +1000 +1200 +1400period (yr) 0 200 400 600 800 1000 1200 1400 -Inner core radius (km)300040005000600070008000density (kg/m3) +Inner core radius (km) +3000 +4000 +5000 +6000 +7000 +8000density (kg/m3) 0 200 400 600 800 1000 1200 1400 -Inner core radius (km)200020202040206020802100 -Fluid core radius (km)fluid core density +Inner core radius (km) +2000 +2020 +2040 +2060 +2080 +2100 +Fluid core radius (km) +fluid core density CMB radius -FICNFCN int -mantle densitya b +FICN +FCNint +mantle density +a b FCN Figure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand side scale) and b) FICN (blue) and FCN (red) periods as a function of inner core radius. The FCN -period when the external torque is set to zero (FCNint) is shown in orange. Solid lines correspond to -a scenario where the density of the inner core is set to 8800 kg m−3; thin dashed lines correspond to a -scenario where the density contrast between the fluid and solid cores is set to α3= 0.1. +period when the external torque is set to zero (FCN int) is shown in orange. Solid lines correspond to +a scenario where the density of the inner core is set to 8800 kg m −3; thin dashed lines correspond to a +scenario where the density contrast between the fluid and solid cores is set to α3 = 0.1. 0). Both of these free modes are retrograde. The FCN period is close to 400 yr for a small inner core, increasing to approximately 600 yr at the largest rs. The FICN period is shorter, close to 100 yr (160 yr) for a small inner core and decreasing to approximately 40 yr (120 yr) at the -largestrsunder the fixed ρs(fixedα3) scenario. This confirms that the FCN and FICN periods +largest rs under the fixed ρs (fixed α3) scenario. This confirms that the FCN and FICN periods are both much shorter than the forcing precession period of 325 kyr and sufficiently far away -from it that we do not expect large ˜ mfand ˜nsfrom resonant amplification. +from it that we do not expect large ˜mf and ˜ns from resonant amplification. The FCN and FICN periods that we have computed include the influence of the external torque. As shown by Baland et al. [2019], the external torque allow solid regions to have a free motion in inertial space thereby affecting the free rotational modes. To a good approximation, - the FCN and FICN frequencies (as seen in an inertial frame) for Kcmb=Kicb= 0 + the FCN and FICN frequencies (as seen in an inertial frame) for Kcmb = Kicb = 0 are given by -ωfcn≈−Ωo(¯A -¯Am+¯As)( -ef+φm) -+ Ωoefφm -(ef+φm), (38a) -ωficn≈Ωo(¯A+¯As -¯A−¯As)( -esα1−esα3αg−α3φs) +ωfcn ≈−Ωo +( ¯A +¯Am + ¯As +)( +ef + φm +) ++ Ωo +ef φm +(ef + φm) , (38a) +ωficn ≈Ωo +(¯A+ ¯As +¯A−¯As +)( +esα1 −esα3αg −α3φs +) . (38b) The expression of the FICN frequency involves the inertial torque (term esα1) and the gravitational torque from the rest of Mercury ( esα3αg) and the Sun ( α3φs) acting on the inner core. -For both of our inner core density scenarios (and our choices of ρs= 8800 kg m−3andα3= -0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α3αg≫α1; +For both of our inner core density scenarios (and our choices of ρs = 8800 kg m−3 and α3 = +0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α3αg ≫α1; the gravitational torque dominates the inertial torque, in large part because of the slow rotation rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek , 2016; Stys and -Dumberry , 2018], but it is different for Earth, where α1>α3αgbecause of its faster rotation +Dumberry, 2018], but it is different for Earth, where α1 >α3αg because of its faster rotation and the FICN mode is prograde [ Mathews et al. , 1991]. Note also that our approximate expres–17– Confidential manuscript submitted to JGR-Planets -sion for the FICN differs by a factor ( ¯A+¯As)/(¯A−¯As) compared to that given in Dumberry +sion for the FICN differs by a factor ( ¯A+ ¯As)/( ¯A−¯As) compared to that given in Dumberry and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon. The expression for FCN frequency differs from the usual expression for Earth. First, it -involves the external torque from the Sun captured by the parameter φm. If we setφm= 0, +involves the external torque from the Sun captured by the parameter φm. If we set φm = 0, we obtain the FCN frequency for a decoupled model in which only interior torques contribute, -ωfcn,int≈−Ωo(¯A -¯Am+¯As) -ef. (38c) -This frequency is slightly different from the usual expression for Earth, involving the ratio ¯A/(¯Am+ +ωfcn,int ≈−Ωo +( ¯A +¯Am + ¯As +) +ef . (38c) +This frequency is slightly different from the usual expression for Earth, involving the ratio ¯A/( ¯Am+ ¯As) rather than ¯A/¯Am. This is because of the relatively thin mantle of Mercury; for the largest -rsconsidered, the moment of inertia of the inner core can get close to 40% of that of the mantle +rs considered, the moment of inertia of the inner core can get close to 40% of that of the mantle and is not negligible. The period of the FCN when only interior torques contribute is shown in Figure 3b. It is close to 1100 yr for a small inner core, increasing to approximately 1500 yr at the largest rs. Hence, the influence of the solar torque reduces the FCN period by a factor @@ -830,151 +986,191 @@ of approximately 3. We note that the FICN period, in contrast, is not altered su the external torque is set to zero. 3.2 Gravitational and inertial coupling Let us now investigate the obliquities of the mantle, fluid core and inner core in their equilibrium - Cassini state. We assume a fixed inner core density scenario in this section, with ρs= + Cassini state. We assume a fixed inner core density scenario in this section, with ρs = 8800 kg m−3. Viscous and EM coupling are set to zero in order to isolate the influence of gravitational - and inertial coupling. Figure 4 shows how ˜ εm, ˜mfand ˜nsvary as functions of inner + and inertial coupling. Figure 4 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius. We show calculations for three different choices of crustal thickness, but let us concentrate - first on the case for h= 26 km. For small rs, we retrieve an obliquity of ˜ εm= 2.0494 -arcmin (Figure 4a). ˜ εmdecreases with rs, but not substantially; at the largest rs(1500 km), -˜εm= 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜ εm= 2.04 -arcmin, the obliquity that we used in setting the constraint for ˆC– and hence the prediction + first on the case for h= 26 km. For small rs, we retrieve an obliquity of ˜εm = 2.0494 +arcmin (Figure 4a). ˜εm decreases with rs, but not substantially; at the largest rs (1500 km), +˜εm = 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜ εm = 2.04 +arcmin, the obliquity that we used in setting the constraint for ˆC – and hence the prediction we should recover for a rigid planet – is an overestimate of approximately 0 .01 arcmin which occurs for small inner cores. -The deviation of ˜ εmfrom that of a rigid planet is due to the misalignments of the fluid -core ( ˜mf) and solid inner core (˜ ns) with respect to the mantle (Figure 4b). The misalignment -of the fluid core spin axis from the mantle is significant: ˜ mfis approximately 4.02 arcmin for +The deviation of ˜εm from that of a rigid planet is due to the misalignments of the fluid +core ( ˜mf ) and solid inner core (˜ns) with respect to the mantle (Figure 4b). The misalignment +of the fluid core spin axis from the mantle is significant: ˜ mf is approximately 4.02 arcmin for a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin -at the largest rs. Recall that ˜ mfis measured with respect to the mantle rotation axis (which +at the largest rs. Recall that ˜mf is measured with respect to the mantle rotation axis (which coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with -respect to the orbit normal is ˜ εm+ ˜mf≈6 arcmin. The reason why the obliquity of the spin +respect to the orbit normal is ˜εm+ ˜mf ≈6 arcmin. The reason why the obliquity of the spin axis of the fluid core is larger than that of the mantle can be understood from Equation (32b), -which shows that ˜ mfis determined by the resonant amplification of the FCN mode at the forcing +which shows that ˜mf is determined by the resonant amplification of the FCN mode at the forcing frequency. When the FCN frequency is much larger than the forcing frequency, as is the -case for Mercury, the resonant amplification is very weak but remains present and ˜ mfis larger +case for Mercury, the resonant amplification is very weak but remains present and ˜mf is larger than zero. -In contrast to ˜ mf, the misalignment of the inner core with respect to the mantle is much -smaller; ˜nsis approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜ εm. +In contrast to ˜mf , the misalignment of the inner core with respect to the mantle is much +smaller; ˜ns is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜εm. Physically, this is because the gravitational torque acting on the inner core when it is tilted from the mantle is much stronger than the inertial torque acting at the ICB. As a result, the inner core must remain in close alignment with the mantle. Presented differently, since the FICN period is more than 3000 times shorter than the forced precession period, the inner core can eas–18– Confidential manuscript submitted to JGR-Planets -2.0382.0402.0422.0442.0462.0482.050Obliquity angle (arcmin) +2.038 +2.040 +2.042 +2.044 +2.046 +2.048 +2.050Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 -Inner core radius (km)1.52.02.53.03.54.04.5Obliquity angle (arcmin) +Inner core radius (km) +1.5 +2.0 +2.5 +3.0 +3.5 +4.0 +4.5Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 -Inner core radius (km)crustal thickness +Inner core radius (km) +crustal thickness 16 km -36 km26 kmcrustal thickness +36 km +26 km +crustal thickness 16 km -36 km26 kmεm +36 km +26 km +εm εg - for a rigid planetεmmf -ns(x100)a b -Figure 4. a) Obliquity of the mantle (˜ εm, solid lines) and of the principal moment of inertia (˜ εg, -dashed line) b) ˜ mf(solid lines) and ˜ ns(dashed lines, x100) as a function of inner core radius and for + for a rigid planetεm +mf +ns (x100) +a b +Figure 4. a) Obliquity of the mantle (˜εm, solid lines) and of the principal moment of inertia (˜εg, +dashed line) b) ˜mf (solid lines) and ˜ns (dashed lines, x100) as a function of inner core radius and for different choices of crustal thickness. -ily follow the forced precession of the mantle and remains gravitationally locked to it. ˜ nsdoes +ily follow the forced precession of the mantle and remains gravitationally locked to it. ˜ ns does not change substantially as the inner core increases in size. -WhenKicb=Kcmb= 0, a good approximation of ˜ εmis given by -˜εm=C′ΩpsinI -−C′ΩpcosI+¯AΩoφm, (39) -which is identical to the prediction of Equation (26) for a rigid Mercury, except Cis replaced -byC′. The latter represents an effective moment of inertia that accounts for the coupling of +When Kicb = Kcmb = 0, a good approximation of ˜εm is given by +˜εm = C′Ωp sin I +−C′Ωp cos I+ ¯AΩoφm +, (39) +which is identical to the prediction of Equation (26) for a rigid Mercury, except C is replaced +by C′. The latter represents an effective moment of inertia that accounts for the coupling of the core to the mantle, -C′=C+¯Acχ, (40) -where ¯Ac=¯Af+¯Asand -χ=ΩpcosI -¯Ac(¯Af -(¯σf−ΩpcosI)+¯As -(¯σs−ΩpcosI)) -−¯As -¯AcΩoα3φs -(¯σs−ΩpcosI). (41) -The frequencies ¯ σfand ¯σsare given in Equations (33d-33e) and closely approximate the FCN -and FICN frequencies of Equations (38c) and (38b), respectively. The factor χcaptures then +C′= C+ ¯Acχ, (40) +where ¯Ac = ¯Af + ¯As and +χ= Ωp cos I +¯Ac +( ¯Af +(¯σf −Ωp cos I) + +¯As +(¯σs −Ωp cos I) +) +− +¯As +¯Ac +Ωoα3φs +(¯σs −Ωp cos I) . (41) +The frequencies ¯σf and ¯σs are given in Equations (33d-33e) and closely approximate the FCN +and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then how the core is entrained to precess with the mantle, with the coupling between the two expressed in terms of the resonant amplification of the FCN and FICN frequencies. In the limit -of ¯σf,¯σs→0, thenχ=−1,C′=Cm, the core is fully decoupled from the mantle and we -retrieve Equation (35). If instead ¯ σf,¯σs→∞ , thenχ= 0,C′=Cand we retrieve the prediction +of ¯σf ,¯σs →0, then χ = −1, C′ = Cm, the core is fully decoupled from the mantle and we +retrieve Equation (35). If instead ¯σf ,¯σs →∞, then χ = 0, C′= C and we retrieve the prediction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ω p, -as is the case here, resonant amplification is weak, χis small and positive, C′> C and this -leads to a slightly larger ˜ εmcompared to a rigid planet. Because the inner core core is gravitationally +as is the case here, resonant amplification is weak, χ is small and positive, C′ > Cand this +leads to a slightly larger ˜εm compared to a rigid planet. Because the inner core core is gravitationally locked to the mantle, deviations from a rigid planet are dominantly caused by the -misalignment of the fluid core. In Equation (41), ¯ σs≫¯σf, so to a good approximation +misalignment of the fluid core. In Equation (41), ¯σs ≫¯σf , so to a good approximation –19– Confidential manuscript submitted to JGR-Planets -χ≈¯Af -¯AcΩocosI -(¯σf−ΩpcosI). (42) -For a small inner core, χ≈7.55×10−3. As the inner core grows, ¯Afdecreases, and the combination - ¯Acχalso decreases. This implies that C′decreases with inner core size and, consequently, -˜εmalso decreases with inner core size, as seen in Figure 4a, though it remains larger than the +χ≈ +¯Af +¯Ac +Ωo cos I +(¯σf −Ωp cos I) . (42) +For a small inner core, χ≈7.55×10−3. As the inner core grows, ¯Af decreases, and the combination + ¯Acχ also decreases. This implies that C′decreases with inner core size and, consequently, +˜εm also decreases with inner core size, as seen in Figure 4a, though it remains larger than the prediction for a rigid planet. -The specific predictions of ˜ εm, ˜mfand ˜nson Figure 4 depend sensitively on the assumed +The specific predictions of ˜εm, ˜mf and ˜ns on Figure 4 depend sensitively on the assumed interior density model and on the dynamical ellipticities of the inner core ( es) and fluid core -(ef). Hence, it depends on the choices we have made for the inner core density ρs, the crustal -densityρcand its thickness h. Changing ρs,ρcand/orhrequires a different combination of ρf, -ρmandrfin order to match M,ˆCand ˆCm. In turn, this leads to different ellipticities at interior - boundary in order to match J2andC22, and thus different predictions for ˜ εm, ˜mfand +(ef ). Hence, it depends on the choices we have made for the inner core density ρs, the crustal +density ρc and its thickness h. Changing ρs, ρc and/or h requires a different combination of ρf , +ρm and rf in order to match M, ˆC and ˆCm. In turn, this leads to different ellipticities at interior + boundary in order to match J2 and C22, and thus different predictions for ˜εm, ˜mf and ˜ns. To illustrate this, we show on Figure 4 two additional predictions computed with crustal -thicknesses changed to h= 16 and 36 km. The change in ˜ εmremains modest,∼0.025%, but -the changes in ˜ mfand ˜nsare more substantial, ∼5% and∼10%, respectively. +thicknesses changed to h= 16 and 36 km. The change in ˜εm remains modest, ∼0.025%, but +the changes in ˜mf and ˜ns are more substantial, ∼5% and ∼10%, respectively. We also show on Figure 4a (only for h= 26 km) the obliquity of the principal moment -of inertia of the whole planet, which we denote by ˜ εg. A difference between ˜ εgand ˜εmoccurs +of inertia of the whole planet, which we denote by ˜εg. A difference between ˜εg and ˜εm occurs if the inner core is misaligned with the mantle. As seen in the mantle frame, a tilted inner core -(with ˜nsassumed small) leads to an off-diagonal component of the moment of inertia tensor -of (Cs−¯As)α3˜ns=¯Asesα3˜ns. The angle by which the mantle frame must be rotated so that -the moment of inertia of the whole planet is purely diagonal is ( ¯Asesα3˜ns)/(¯Ae), and hence a -good approximation of ˜ εgis -˜εg= ˜εm+¯Ases -¯Aeα3˜ns. (43) +(with ˜ns assumed small) leads to an off-diagonal component of the moment of inertia tensor +of (Cs−¯As)α3˜ns = ¯Asesα3˜ns. The angle by which the mantle frame must be rotated so that +the moment of inertia of the whole planet is purely diagonal is ( ¯Asesα3˜ns)/( ¯Ae), and hence a +good approximation of ˜εg is +˜εg = ˜εm + +¯Ases +¯Ae α3˜ns . (43) Since the inner core is gravitationally forced into a close alignment with the mantle, the difference - between ˜ εgand ˜εmremains very small. For the largest inner core radius that we have -considered, ˜ εgdiffers from ˜ εmonly by approximately 0.001 arcmin. + between ˜εg and ˜εm remains very small. For the largest inner core radius that we have +considered, ˜εg differs from ˜εm only by approximately 0.001 arcmin. 3.3 Viscous coupling We now investigate how viscous coupling at the CMB and ICB affects the equilibrium Cassini state. Peale et al. [2014] present two different parameterizations of viscous coupling based on the timescale of attenuation of the differential rotation between the fluid core and mantle. More complete analytical solutions for the flow resulting from a differentially precessing shell have -been derived [e.g. Stewartson and Roberts , 1963; Busse , 1968; Rochester , 1976] and we exploit -these solutions here. The parametrization of the viscous coupling constants KcmbandKicbbased +been derived [e.g. Stewartson and Roberts , 1963; Busse, 1968; Rochester, 1976] and we exploit +these solutions here. The parametrization of the viscous coupling constants Kcmb and Kicb based on them are given in Mathews and Guo [2005], -Kcmb=πρfr4 +Kcmb = +πρf r4 f -¯Af√ν -2Ωo( -0.195−1.976i) +¯Af +√ ν +2Ωo +( +0.195 −1.976i +) , (44a) -Kicb=πρfr4 +Kicb = πρf r4 s -¯As√ν -2Ωo( -0.195−1.976i) +¯As +√ ν +2Ωo +( +0.195 −1.976i +) , (44b) -whereνis the kinematic viscosity. The appropriate numerical value for νin planetary interior +where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary interior is not well known but based on theoretical and experimental studies it is expected to be -of the order of 10−6m2s−1[e.g. Gans , 1972; de Wijs et al. , 1998; Alf` e et al. , 2000; Rutter et al. , +of the order of 10 −6 m2 s−1 [e.g. Gans, 1972; de Wijs et al. , 1998; Alf` e et al., 2000; Rutter et al. , 2002a,b]. –20– Confidential manuscript submitted to JGR-Planets The above parameterizations are valid only under the assumption that the flow in the boundary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds -numberRe=rf∆uf/ν, associated with the differential velocity ∆ uf=rfΩo˜mfat the CMB. -Forrf= 2000 km, and taking ˜ mf= 4 arcmin≈0.001 rad from the results in the previous -section, we get ∆ uf∼2 mm/s and Re∼6×109. Such a large Reynolds number indicates +number Re= rf ∆uf /ν, associated with the differential velocity ∆ uf = rf Ωo ˜mf at the CMB. +For rf = 2000 km, and taking ˜mf = 4 arcmin ≈0.001 rad from the results in the previous +section, we get ∆ uf ∼2 mm/s and Re ∼6 ×109. Such a large Reynolds number indicates that the viscous friction between the fluid core and mantle should induce turbulent flows, as -is the case for the Cassini state of the Moon [ Yoder , 1981; Williams et al. , 2001; C´ ebron et al. , +is the case for the Cassini state of the Moon [ Yoder, 1981; Williams et al. , 2001; C´ ebron et al., 2019]. For a boundary layer that involves turbulent flows, the viscous torque should be independent of the fluid viscosity and proportional to the square of the differential velocity. The -coupling constant Kcmbshould be in the form -Kcmb=fcmb⏐⏐˜mf⏐⏐( -0.195−1.976i) +coupling constant Kcmb should be in the form +Kcmb = fcmb +⏐⏐˜mf +⏐⏐ +( +0.195 −1.976i +) , (45) -wherefcmbis a numerical factor that depends among other things on surface roughness. Incorporating +where fcmb is a numerical factor that depends among other things on surface roughness. Incorporating a viscous coupling of this form in our rotational model is more challenging not only -becausefcmbis not known but also because the viscous torque is no longer linear in ˜ mf. One +because fcmb is not known but also because the viscous torque is no longer linear in ˜ mf . One strategy is to find solutions through an iterative process. The simpler alternative strategy that we adopt is to use the laminar formulas of Equation (44) but with the understanding that ν represents an effective turbulent viscosity. @@ -982,69 +1178,91 @@ To give an estimate of an appropriate turbulent value for ν, we turn to the Cas of the Moon. A measure of the viscous dissipation at the CMB of the Moon has been obtained by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR) [Williams et al. , 2001, 2014; Williams and Boggs , 2015]. Viscous dissipation is reported in terms -of a coupling parameter Kand a recent estimate is K/CL= (1.41±0.34)×10−8day−1[Williams -and Boggs , 2015], where CLis the lunar polar moment of inertia. The connection between K -andKcmbis -⏐⏐⏐Im[Kcmb]⏐⏐⏐=K -CLCL -CfL1 -ΩL, (46) -whereCfLis the moment of inertia of the lunar core and Ω L= 2.66×10−6s−1the lunar -rotation rate. With CfL/CL∼7×10−4[e.g. Williams et al. , 2014], this gives |Im[Kcmb]|∼ +of a coupling parameter Kand a recent estimate is K/CL = (1.41±0.34)×10−8 day−1 [Williams +and Boggs , 2015], where CL is the lunar polar moment of inertia. The connection between K +and Kcmb is +⏐⏐⏐Im[Kcmb] +⏐⏐⏐= K +CL +CL +CfL +1 +ΩL +, (46) +where CfL is the moment of inertia of the lunar core and Ω L = 2 .66 ×10−6 s−1 the lunar +rotation rate. With CfL /CL ∼7 ×10−4 [e.g. Williams et al. , 2014], this gives |Im[Kcmb]|∼ 9×10−5. In order to match this amplitude in Equation (44a), with lunar parameters and assuming - a lunar core radius of 400 km, the required turbulent viscosity is ν≈5×10−4m2 + a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 ×10−4 m2 s−1, about 500 times larger than the laminar viscosity. Note that the differential velocity at the -CMB of the Moon is closer to 3 cm/s [ Yoder , 1981; Williams et al. , 2001], more than 10 times +CMB of the Moon is closer to 3 cm/s [ Yoder, 1981; Williams et al. , 2001], more than 10 times larger than our estimate for Mercury above. Since the effective turbulent coupling constant Kcmb is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mercury - should be smaller. Thus, ν≈5×10−4m2s−1gives a conservative upper bound for the + should be smaller. Thus, ν ≈5×10−4 m2 s−1 gives a conservative upper bound for the possible effective turbulent viscosity that can be expected for Mercury. -Figure 5 shows how ˜ εm, ˜mfand ˜nsvary as functions of inner core radius for different choices -of effective viscosities. For ν= 10−5m2s−1, viscous coupling is too weak to affect ˜ εmand -˜mfand they are essentially unchanged from the solutions shown in Figure 4. With increasing +Figure 5 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for different choices +of effective viscosities. For ν = 10 −5 m2 s−1, viscous coupling is too weak to affect ˜εm and +˜mf and they are essentially unchanged from the solutions shown in Figure 4. With increasing ν, the stronger viscous coupling between the core and the mantle reduces their differential velocity, - and ˜ mfis reduced. With the reduced differential velocity at the CMB, the prediction -of ˜εmgets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB -viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜ εm -and ˜mfare qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the + and ˜mf is reduced. With the reduced differential velocity at the CMB, the prediction +of ˜εm gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB +viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜εm +and ˜mf are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent viscosity - that we have identified above (i.e ν≈5×10−4m2s−1), the influence of viscous cou–21– + that we have identified above (i.e ν ≈5 ×10−4 m2 s−1), the influence of viscous cou–21– Confidential manuscript submitted to JGR-Planets -εmεg +εm +εg mf ns -2.0382.0402.0422.0442.0462.0482.050Obliquity angle (arcmin) +2.038 +2.040 +2.042 +2.044 +2.046 +2.048 +2.050Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 -Inner core radius (km)0.00.51.01.52.02.53.03.54.04.5Obliquity angle (arcmin) +Inner core radius (km) +0.0 +0.5 +1.0 +1.5 +2.0 +2.5 +3.0 +3.5 +4.0 +4.5Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 -Inner core radius (km)kinematic viscosity: 0.01 m2 s-1 0.00001 m2 s-1 0.0001 m2 s-1 0.0005 m2 s-1 0.001 m2 s-1 +Inner core radius (km) +kinematic viscosity: 0.01 m2 s-1 0.00001 m2 s-10.0001 m2 s-10.0005 m2 s-10.001 m2 s-1 a b for a rigid planetεm -Figure 5. a) Obliquity of the mantle (˜ εm, solid lines) and gravity field (˜ εg, dashed lines) b) ˜ mf -(solid lines) and ˜ ns(dashed lines) as a function of inner core radius and for different choices of kinematic +Figure 5. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf +(solid lines) and ˜ns (dashed lines) as a function of inner core radius and for different choices of kinematic viscosity (color in legend). -pling on ˜εmremains modest, reducing its amplitude by a maximum of approximately 0.0015 +pling on ˜εm remains modest, reducing its amplitude by a maximum of approximately 0.0015 arcmin. The inclusion of viscous coupling at the ICB can lead to a substantial change in inner core tilt. A larger viscosity leads to stronger viscous coupling and to a closer alignment of the inner core with the fluid core spin axis. The viscous coupling strength is inversely proportional -tors, so a larger viscosity results in a larger inner core radius at which viscous coupling is of -a similar magnitude to gravitational coupling. Taking again an upper bound of ν= 5×10−4 -m2s−1, Figure 5 indicates that ˜ nsmay be 1 arcmin or larger only if the inner core radius is +to rs, so a larger viscosity results in a larger inner core radius at which viscous coupling is of +a similar magnitude to gravitational coupling. Taking again an upper bound of ν = 5×10−4 +m2 s−1, Figure 5 indicates that ˜ns may be 1 arcmin or larger only if the inner core radius is smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravitational coupling is much larger than viscous coupling, and the inner core tilt is limited to a fraction of 1 arcmin. The larger inner core tilt observed with increasing effective viscosity results in a larger -offset between the obliquity of the principal moment of inertia ˜ εgand that of the mantle ˜ εm, -though it remains limited. For the upper bound of ν= 5×10−4m2s−1, and forrs= 1500 -km, the difference between ˜ εgand ˜εmis limited to 0.0013 arcmin. +offset between the obliquity of the principal moment of inertia ˜εg and that of the mantle ˜εm, +though it remains limited. For the upper bound of ν = 5 ×10−4 m2 s−1, and for rs = 1500 +km, the difference between ˜εg and ˜εm is limited to 0.0013 arcmin. The conclusion that emerges from Figure 5 is that the larger the inner core is, the smaller the misalignments of both the fluid core and inner core are with respect to the mantle. This implies that the larger the inner core is, the more we approach a planet precessing as a rigid body, although the misalignment of the spin axis of the fluid core remains important, approximately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜ εm, ˜mf -and ˜nschange with inner core size would certainly be different for a turbulent model of viscous +and ˜ns change with inner core size would certainly be different for a turbulent model of viscous coupling. But the general conclusion remains that the addition of viscous coupling at the CMB and ICB does not significantly modify the Cassini state equilibrium angle of the mantle. –22– @@ -1057,64 +1275,85 @@ electrically conducting regions stretches existing magnetic field lines that thr This induces a secondary magnetic field (or equivalently, an electrical current) and an associated tangential EM stress resisting the differential motion. EM coupling at the CMB and ICB acts then in a similar way to viscous coupling, and this ’magnetic friction’ depends on the strength -of the radial magnetic field Brand the electrical conductivity σon either side of the boundary - [ Rochester , 1960, 1962, 1968]. -The parametrization of EM coupling in terms of the coupling constants KcmbandKicb -has been developed in a few studies [e.g. Buffett , 1992; Buffett et al. , 2002; Dumberry and Koot , +of the radial magnetic field Br and the electrical conductivity σ on either side of the boundary + [Rochester, 1960, 1962, 1968]. +The parametrization of EM coupling in terms of the coupling constants Kcmb and Kicb +has been developed in a few studies [e.g. Buffett, 1992; Buffett et al. , 2002; Dumberry and Koot , 2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given -byBr=√ -3⟨ +by Br = +√ +3 +⟨ Bd -r⟩ -cosθ, where⟨ +r +⟩ +cos θ, where +⟨ Bd -r⟩ +r +⟩ is the r.m.s. strength of the field, the coupling constant -Kcmbcan be written is the form -Kcmb= 3(1−i)Fcmb⟨ +Kcmb can be written is the form +Kcmb = 3(1 −i)Fcmb +⟨ Bd -r⟩2, (47) +r +⟩2 +, (47) where -Fcmb=1 -Ωoρfrf(1 -σmδm+1 -σfδf)−1 +Fcmb = 1 +Ωoρf rf +( 1 +σmδm ++ 1 +σf δf +)−1 , (48) -and whereσm,δm=√ -2/(σmµΩo) andσf,δf=√ -2/(σfµΩo) are the electrical conductivities +and where σm, δm = +√ +2/(σmµΩo) and σf , δf = +√ +2/(σf µΩo) are the electrical conductivities and magnetic skin depths in the mantle and fluid core, respectively, with µ= 4π×10−7 -N A−2the magnetic permeability of free space. The r.m.s. field strength⟨ +N A−2 the magnetic permeability of free space. The r.m.s. field strength +⟨ Bd -r⟩ +r +⟩ is connected to the Gauss coefficient g0 -1of the surface magnetic field by +1 of the surface magnetic field by ⟨ Bd -r⟩ -=2√ -3(R -rf)3⏐⏐g0 -1⏐⏐. (49) +r +⟩ += 2√ +3 +(R +rf +)3 ⏐⏐g0 +1 +⏐⏐. (49) We can readily build an estimate of the amplitude of Kcmb. The electrical conductivity of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding -to the CMB of Mercury is in the range of σm∼0.01−1 S m−1[Constable , 2015]. In contrast, - the electrical conductivity of Fe in planetary cores is expected to be close σf∼106S -m−1[Pozzo et al. , 2012; de Koker et al. , 2012]. This implies that ( σmδm)−1≫(σfδf)−1. Takingσm= - 1 S m−1,⏐⏐g0 -1⏐⏐= 190 nT for Mercury’s dipole field [ Anderson et al. , 2012],rf= -2000 km,ρf= 7000 kg m−3, this gives Kcmb≈(3.1×10−11)·(1−i). To put this amplitude -in perspective, taking a molecular viscosity of ν= 10−6m2s−1in Equation (44a) gives a viscous - coupling constant of Kcmb≈(6.0×10−7)·(0.195−1.976i). Hence, EM coupling at the +to the CMB of Mercury is in the range of σm ∼ 0.01 −1 S m−1 [Constable, 2015]. In contrast, + the electrical conductivity of Fe in planetary cores is expected to be close σf ∼106 S +m−1 [Pozzo et al. , 2012; de Koker et al. , 2012]. This implies that ( σmδm)−1 ≫(σf δf )−1. Taking + σm = 1 S m −1, +⏐⏐g0 +1 +⏐⏐ = 190 nT for Mercury’s dipole field [ Anderson et al. , 2012], rf = +2000 km, ρf = 7000 kg m−3, this gives Kcmb ≈(3.1 ×10−11) ·(1 −i). To put this amplitude +in perspective, taking a molecular viscosity of ν = 10−6 m2 s−1 in Equation (44a) gives a viscous + coupling constant of Kcmb ≈(6.0 ×10−7) ·(0.195 −1.976i). Hence, EM coupling at the CMB is much weaker than viscous coupling, even if we include other spherical harmonic components of the radial magnetic field. EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by -CMB cavities [ Buffett , 2010; Glane and Buffett , 2018], in which case the effective σmcould be -closer toσf. Likewise, σmcan be increased if a more electrically conducting layer has formed +CMB cavities [Buffett, 2010; Glane and Buffett , 2018], in which case the effective σm could be +closer to σf . Likewise, σm can be increased if a more electrically conducting layer has formed at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al. , 2013]. However, even -in the extreme case of σm=σf= 106S m−1,Kcmb≈(1.6×10−8)·(1−i), which remains +in the extreme case of σm = σf = 106 S m−1, Kcmb ≈(1.6 ×10−8) ·(1 −i), which remains –23– Confidential manuscript submitted to JGR-Planets smaller by a factor ∼60 than the smallest possible viscous coupling constant. Viscous forces @@ -1125,42 +1364,45 @@ coupling can be much larger and dominate viscous coupling. We assume that the ma morphology at the ICB is dominantly comprised of small spatial scales for example as predicted by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in terms of an equivalent uniform radial magnetic field ⟨Br⟩capturing its r.m.s. strength [ Buffett - et al. , 2002; Dumberry and Koot , 2012]. Assuming an electrical conductivity σequal in the -fluid and solid core, the coupling constant Kicbcan be written in the form -Kicb=5 -4(1−i)Ficb⟨Br⟩2, (50) + et al. , 2002; Dumberry and Koot , 2012]. Assuming an electrical conductivity σ equal in the +fluid and solid core, the coupling constant Kicb can be written in the form +Kicb = 5 +4(1 −i)Ficb ⟨Br⟩2 , (50) where -Ficb=σδ -Ωoρsrs, (51) -and whereδ=√ -2/(σµΩo) is the magnetic skin depth. As Ficbis inversely proportional to -rs,Kicbis inversely proportional to inner core size. Note that computing the EM coupling based +Ficb = σδ +Ωoρsrs +, (51) +and where δ = +√ +2/(σµΩo) is the magnetic skin depth. As Ficb is inversely proportional to +rs, Kicb is inversely proportional to inner core size. Note that computing the EM coupling based on the r.m.s. strength ⟨Br⟩rather than a true field morphology tends to overestimate the strength of the coupling [ Koot and Dumberry , 2013]. However, since the strength of the radial magnetic field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are absorbed in the range of possible ⟨Br⟩values. The parametrization of Equation (50) is only valid in a ’weak field’ regime [ Buffett et al. , 2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected. -When⟨Br⟩is sufficiently large, this is no longer the case. EM coupling then enters a ’strong -field’ regime [ Buffett et al. , 2002; Dumberry and Koot , 2012; Koot and Dumberry , 2013] in which -Kicbincreases linearly with ⟨Br⟩instead of quadratically. A good approximation of Kicbcalculated +When ⟨Br⟩is sufficiently large, this is no longer the case. EM coupling then enters a ’strong +field’ regime [Buffett et al. , 2002; Dumberry and Koot , 2012; Koot and Dumberry , 2013] in which +Kicb increases linearly with ⟨Br⟩instead of quadratically. A good approximation of Kicb calculated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012], KE -icb= (0.175−i0.138)⟨Br⟩, (52) -where⟨Br⟩is in units of Tesla. The superscript Eemphasizes that the numerical factors are +icb = (0.175 −i0.138) ⟨Br⟩, (52) +where ⟨Br⟩is in units of Tesla. The superscript E emphasizes that the numerical factors are appropriate for the parameter values adopted for Earth in the computation of Dumberry and Koot [2012]. To adapt these numerical factors to Mercury, we write, -Kicb= (0.175−i0.138)Ficb +Kicb = (0.175 −i0.138)Ficb FE -icb⟨Br⟩, (53) -whereFE -icbis defined as in Equation (51) but using the parameters for Earth as defined in Dumberry - and Koot [2012]. These are Ω o= 7.292×10−5s−1,ρs= 12846 kg m−3,rs= 1221.5 -km,σ= 5×105S m−1, which givesFE -icb= 90.36 T−2. -To computeFicb, we assume an electrical conductivity of σ= 106S m−1in the core of +icb +⟨Br⟩, (53) +where FE +icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumberry + and Koot [2012]. These are Ω o = 7 .292 ×10−5 s−1, ρs = 12846 kg m −3, rs = 1221 .5 +km, σ= 5 ×105 S m−1, which gives FE +icb = 90.36 T−2. +To compute Ficb, we assume an electrical conductivity of σ= 106 S m−1 in the core of Mercury [e.g. de Koker et al. , 2012; Deng et al. , 2013]. The transition between the weak and -strong field regime occurs when ⟨Br⟩ ≈ 1.53 mT for the real part of Kicb.⟨Br⟩at the ICB +strong field regime occurs when ⟨Br⟩ ≈1.53 mT for the real part of Kicb. ⟨Br⟩at the ICB of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geometry inside the core could be dominated by small length scales, yet only the weaker lower harmonics of the field would penetrate through a thermally stratified layer in the upper region of @@ -1168,82 +1410,106 @@ of Mercury is unknown. The dynamo model of Christensen [2006] showed that the fi Confidential manuscript submitted to JGR-Planets the fluid core and reach the surface. If so, the field strength inside the core can exceed the surface field strength by a factor 1000. Taking a surface field strength equal to ∼300 nT [e.g Anderson - et al. , 2012],⟨Br⟩at the ICB could be as large as 0.3 mT, corresponding to approximately + et al. , 2012], ⟨Br⟩at the ICB could be as large as 0.3 mT, corresponding to approximately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mercury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of Mercury remains in the weak field regime. -Figure 6 shows how ˜ εm, ˜mfand ˜nsvary as functions of inner core radius for different choices -of⟨Br⟩. The larger⟨Br⟩is, the stronger is the EM coupling at the ICB, and the smaller is the +Figure 6 shows how ˜εm, ˜mf and ˜ns vary as functions of inner core radius for different choices +of ⟨Br⟩. The larger ⟨Br⟩is, the stronger is the EM coupling at the ICB, and the smaller is the differential rotation between the fluid core and inner core. The inner core and fluid core are virtually locked into a common precession motion when ⟨Br⟩>0.3 mT. Further increasing ⟨Br⟩ above 1 mT does not change the solution as EM coupling already dominates all other torques on the inner core. This is the case even when EM coupling transitions into the strong field regime. -EM coupling at the CMB is included in these calculations, with σm= 1 S m−1and⏐⏐g0 -1⏐⏐= +EM coupling at the CMB is included in these calculations, with σm = 1 S m −1 and +⏐⏐g0 +1 +⏐⏐ = 190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core -we retrieved the solutions of ˜ εmand ˜mfshown in Figure 4. -As the inner core radius is increased, both ˜ εmand ˜mfget smaller, as it was the case with +we retrieved the solutions of ˜εm and ˜mf shown in Figure 4. +As the inner core radius is increased, both ˜εm and ˜mf get smaller, as it was the case with viscous coupling alone, although the addition of EM coupling lead to more substantial changes. The inner core needs to be larger than approximately 500 km for changes in the Cassini state -equilibrium to be noticeable. It is important to point out that ˜ mfis reduced not because of +equilibrium to be noticeable. It is important to point out that ˜ mf is reduced not because of EM coupling at the CMB, but rather from the combination of EM coupling at the ICB, which pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the -greater is the reduction in ˜ εmand ˜mf. +greater is the reduction in ˜εm and ˜mf . When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are -locked into a common precession motion, a good approximation of ˜ εmis given by the same prediction - as Equations (39-40) involving the effective moment of inertia C′, exceptχis now given +locked into a common precession motion, a good approximation of ˜εm is given by the same prediction + as Equations (39-40) involving the effective moment of inertia C′, except χ is now given by -χ=¯AcΩpcosI−¯AsΩoα3φs -¯AfΩo(ef+Kcmb) +¯AsΩoesα3αg−¯AcΩpcosI. (54) -For a small inner core, ¯AcΩpcosI > ¯AsΩoα3φsandχis positive. Because ¯AsΩoα3φsincreases -with inner core size, χgets smaller, and so do C′and ˜εm. The mantle obliquity drops from 2.049 +χ= +¯AcΩp cos I−¯AsΩoα3φs +¯Af Ωo(ef + Kcmb) + ¯AsΩoesα3αg −¯AcΩp cos I . (54) +For a small inner core, ¯AcΩp cos I >¯AsΩoα3φs and χ is positive. Because ¯AsΩoα3φs increases +with inner core size, χ gets smaller, and so do C′and ˜εm. The mantle obliquity drops from 2.049 arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015 -arcmin. For an inner core larger than ≈1000 km, ¯AcΩpcosI < ¯AsΩoα3φs, soχbecomes negative,C′becomes - smaller than the moment of inertia of a rigid Mercury C, and ˜εmbecomes +arcmin. For an inner core larger than ≈1000 km, ¯AcΩp cos I <¯AsΩoα3φs, so χ becomes negative, + C′becomes smaller than the moment of inertia of a rigid Mercury C, and ˜εm becomes smaller than the prediction based on a rigid planet. The larger the inner core is, the smaller are the misalignments of the fluid and solid cores with respect to the mantle. Hence, the general conclusion we reached for viscous coupling alone is not altered with the addition of EM coupling but further strengthened; the larger the inner core is, the closer we approach a planet precessing as a rigid body. This is best revealed by the -obliquity of the gravity field ˜ εgwhich, for a large inner core, asymptotically approaches the obliquity +obliquity of the gravity field ˜εg which, for a large inner core, asymptotically approaches the obliquity expected for a rigid planet. Note that with strong EM coupling at the ICB, the offset between - ˜εmand ˜εgcan be as large as 0.008 arcmin for a large inner core. + ˜εm and ˜εg can be as large as 0.008 arcmin for a large inner core. 3.5 Fixed inner core density versus fixed ICB density contrast Coupling models when viscous and EM stresses are both present have been presented in Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of our results, –25– Confidential manuscript submitted to JGR-Planets -2.0322.0342.0362.0382.0402.0422.0442.0462.0482.050Obliquity angle (arcmin) +2.032 +2.034 +2.036 +2.038 +2.040 +2.042 +2.044 +2.046 +2.048 +2.050Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 -Inner core radius (km)0.00.51.01.52.02.53.03.54.04.5Obliquity angle (arcmin) +Inner core radius (km) +0.0 +0.5 +1.0 +1.5 +2.0 +2.5 +3.0 +3.5 +4.0 +4.5Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 -Inner core radius (km)Br at ICB: 1 mT 0.01 mT 0.03 mT 0.1 mT 0.3 mT +Inner core radius (km) +Br at ICB: 1 mT 0.01 mT0.03 mT0.1 mT0.3 mT εm εg mf -nsa b +ns +a b for a rigid planetεm -Figure 6. a) Obliquity of the mantle (˜ εm, solid lines) and gravity field (˜ εg, dashed lines) b) ˜ mf -(solid lines) and ˜ ns(dashed lines) as a function of inner core radius and for different choices of Br +Figure 6. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf +(solid lines) and ˜ns (dashed lines) as a function of inner core radius and for different choices of Br (colour in legend). for the Cassini state equilibrium of Mercury, the tangential stress at the CMB is dominated by viscous forces, and that at the ICB should be dominated by EM forces. To simplify, we consider - a model where Kcmbis purely from viscous coupling and Kicbpurely from EM coupling. -We choose an effective viscosity at the CMB of ν= 10−4m2s−1, which we believe to be a + a model where Kcmb is purely from viscous coupling and Kicb purely from EM coupling. +We choose an effective viscosity at the CMB of ν = 10 −4 m2 s−1, which we believe to be a representative value given the comparison with the Moon (see section 3.3). We take a radial field strength at the ICB of ⟨Br⟩= 0.3 mT, approximately the field strength expected under the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representative’ - coupling model, although the uncertainty on νand⟨Br⟩obviously remains high. -Figure 7 shows how ˜ εm, ˜mfand ˜nsvary with inner core radius for the ’representative’ + coupling model, although the uncertainty on ν and ⟨Br⟩obviously remains high. +Figure 7 shows how ˜εm, ˜mf and ˜ns vary with inner core radius for the ’representative’ coupling model (black lines) under the fixed inner core density scenario that we have used in sections 3.2, 3.3 and 3.4. Figure 7 also shows how the results change when, for the same representative coupling model, we adopt instead a fixed density contrast between the fluid and solid -cores and for different choices of α3(coloured lines). For a relatively high density contrast ( α3= +cores and for different choices of α3 (coloured lines). For a relatively high density contrast ( α3 = 0.2), the results are qualitatively similar to the fixed inner core density scenario. For a smaller α3, the point at which the orientation of the co-precessing fluid and inner cores begins to be pulled into an alignment with the mantle is pushed to a larger inner core radius. However, the -general behaviour of ˜ εm, ˜mfand ˜nsas functions of inner core radius is unchanged. Hence, all +general behaviour of ˜εm, ˜mf and ˜ns as functions of inner core radius is unchanged. Hence, all our results in the previous three sections would be qualitatively similar under a fixed density contrast scenario. A smaller density contrast at the ICB only implies that a larger inner core is required in order to produce an equivalent change in the Cassini state equilibrium. @@ -1254,38 +1520,61 @@ model included the tangential viscous stress at the ICB and CMB, but not the EM Table 1 gives the obliquities of the mantle, fluid core and inner core, denoted respectively as –26– Confidential manuscript submitted to JGR-Planets -2.0322.0342.0362.0382.0402.0422.0442.0462.0482.050Obliquity angle (arcmin) +2.032 +2.034 +2.036 +2.038 +2.040 +2.042 +2.044 +2.046 +2.048 +2.050Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 -Inner core radius (km)0.00.51.01.52.02.53.03.54.04.5Obliquity angle (arcmin) +Inner core radius (km) +0.0 +0.5 +1.0 +1.5 +2.0 +2.5 +3.0 +3.5 +4.0 +4.5Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 -Inner core radius (km) for a rigid planetεma bα3: 0.20 0.01 0.05 0.10 0.15 ρs = 8800 kg m -3 +Inner core radius (km) + for a rigid planetεm +a b +α3: 0.20 0.010.05 0.100.15ρs = 8800 kg m-3 mf -nsεm +ns +εm εg -Figure 7. a) Obliquity of the mantle (˜ εm, solid lines) and gravity field (˜ εg, dashed lines) b) ˜ mf -(solid lines) and ˜ ns(dashed lines) as a function of inner core radius, for a fixed inner core density of -8800 kg m−3(black lines) and for different choices of α3(coloured lines). +Figure 7. a) Obliquity of the mantle (˜εm, solid lines) and gravity field (˜εg, dashed lines) b) ˜mf +(solid lines) and ˜ns (dashed lines) as a function of inner core radius, for a fixed inner core density of +8800 kg m−3 (black lines) and for different choices of α3 (coloured lines). i′ -m,i′ -fandi′ +m, i′ +f and i′ s; these represent the obliquities with respect to the orbital plane and are connected to our variables by: i′ -m= ˜εm,i′ -f= ˜εm+ ˜m+ ˜mf≈˜εm+ ˜mfandi′ -s= ˜εm+ ˜ns. To summarize +m = ˜εm, i′ +f = ˜εm + ˜m+ ˜mf ≈˜εm + ˜mf and i′ +s = ˜εm + ˜ns. To summarize their results, i′ -fandi′ -svary substantially for different inner core sizes, are always of comparable +f and i′ +s vary substantially for different inner core sizes, are always of comparable amplitude, and i′ -sis always larger than i′ -f. Furthermore, they find that as the inner core +s is always larger than i′ +f . Furthermore, they find that as the inner core size is increased, the mantle obliquity i′ -mgets progressively larger and is displaced further away +m gets progressively larger and is displaced further away from its expected orientation based of a rigid planet (see their Figure 6). The change in i′ -mthey +m they obtain between a case with no inner core and an inner core radius equal to 0.6 times the planetary radius (≈1463 km, close to the maximum inner core size of 1500 km we have considered), -is approximately an increase of 5 ×10−5rad = 0.17 arcmin. This also corresponds approximately +is approximately an increase of 5 ×10−5 rad = 0.17 arcmin. This also corresponds approximately to the deviation of the obliquity with respect to that of a rigid planet. When only viscous stress is included in our model (section 3.3), our results are substantially different. As illustrated in Figure 4, we find instead that the obliquity of the fluid core @@ -1323,13 +1612,13 @@ amplitude of the decrease can be as large as 0.015 arcmin, 3 times larger than f alone; this remains a factor 10 smaller than the changes suggested in Peale et al. [2016], and again, importantly, in the reverse direction. Our results suggest then that the presence and size of an inner core leads to only modest - changes of the mantle obliquity εmcompared to the obliquity predicted on the basis of an -entirely rigid planet ( εr -m). Let us denote this difference as ∆ εm=εm−εr + changes of the mantle obliquity εm compared to the obliquity predicted on the basis of an +entirely rigid planet (εr +m). Let us denote this difference as ∆ εm = εm−εr m. The largest ∆ εm -occurs for a small or no inner core, and is ∆ εm≈0.01 arcmin. This difference is decreased +occurs for a small or no inner core, and is ∆ εm ≈ 0.01 arcmin. This difference is decreased as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM -coupling and large density contrast at the ICB, ∆ εmcan be negative, but its absolute value +coupling and large density contrast at the ICB, ∆ εm can be negative, but its absolute value remains smaller than 0.01 arcmin. To put these results in perspective, the uncertainty in the measurement of the mantle obliquity reported by Margot et al. [2012] and Stark et al. [2015a] is of the order of 0.08 arcmin, much @@ -1341,11 +1630,11 @@ the inner core size. Nevertheless, our results show that the presence of a fluid core and inner core affect the resulting mantle obliquity by as much as 0.01 arcmin. This is of the same order as the change in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec ( ≈0.006 -arcmin) [ Baland et al. , 2017]. This is also of the same order as the amplitude of the nutation +arcmin) [Baland et al. , 2017]. This is also of the same order as the amplitude of the nutation motion about the mean equilibrium Cassini state forced by the precession of the pericenter, which -is approximately 0.85 arcsec ( ≈0.014 arcmin) [ Baland et al. , 2017]. The precision on the obliquity +is approximately 0.85 arcsec ( ≈0.014 arcmin) [Baland et al. , 2017]. The precision on the obliquity from the upcoming BepiColombo satellite mission is expected to be ≤0.5 arcsec (≤0.008 -arcmin) [ Cical` o et al. , 2016]. Thus, in addition to including tidal deformation and the precession +arcmin) [Cical` o et al., 2016]. Thus, in addition to including tidal deformation and the precession of the pericenter, a Cassini state model that includes a fluid and solid core will then be necessary in order to properly tie Mercury’s obliquity to its interior structure. In turn, this opens the possibility of further constraining the interior structure of Mercury on the basis of its obliquity. @@ -1357,19 +1646,19 @@ two orientations do not coincide when an inner core is present and is misaligned Since gravitational coupling prevents a large inner core tilt with respect to the mantle, we –28– Confidential manuscript submitted to JGR-Planets -find that the misalignment ∆ εg=εg−εmis limited. The maximum offset that we obtain -is approximately ∆ εg≈0.007 arcmin. This limited magnitude of offset is important in the -light of the recent obliquity of the gravity field estimated in Genova et al. [2019],εg= 1.968± +find that the misalignment ∆ εg = εg −εm is limited. The maximum offset that we obtain +is approximately ∆εg ≈ 0.007 arcmin. This limited magnitude of offset is important in the +light of the recent obliquity of the gravity field estimated in Genova et al. [2019], εg = 1.968± 0.027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the -spin-symmetry axis of the mantle: εm= 2.04±0.08 arcmin [ Margot et al. , 2012] and εm= -2.029±0.085 arcmin [ Stark et al. , 2015a], although all three measurements remain consistent +spin-symmetry axis of the mantle: εm = 2.04 ±0.08 arcmin [Margot et al. , 2012] and εm = +2.029±0.085 arcmin [Stark et al. , 2015a], although all three measurements remain consistent with one another within their error estimates. In their interpretation, Genova et al. [2019] suggest - that the different central value of the obliquity that they obtain (smaller by ∼0.07 arcmin) - is perhaps explained by an offset ∆ εgdue to the presence of a (possibly large) solid inner + that the different central value of the obliquity that they obtain (smaller by ∼ 0.07 arcmin) + is perhaps explained by an offset ∆ εg due to the presence of a (possibly large) solid inner core. However, this is one order of magnitude larger than the maximum magnitude of ∆ εg that we predict. Moreover, we predict that the obliquity of the gravity field should be larger than that of the mantle spin axis, not smaller. Hence, at the present-day level of the precision -of the measurements, εgandεmshould coincide, and their difference cannot be interpreted as +of the measurements, εg and εm should coincide, and their difference cannot be interpreted as reflecting the misalignment between the polar moment of inertia of the whole planet and the mantle spin axis. Lastly, we have concentrated our efforts on the mutual orientations of the different spin @@ -1412,186 +1701,186 @@ scripts and data files to reproduce all figures are freely accessible in Dumberr was supported by an NSERC/CRSNG Discovery Grant. References Alf` e, D., G. Kresse, and M. Gillan (2000), Structure and dynamics of liquid iron under core -conditions, Phys. Rev. ,B61, 132–142. +conditions, Phys. Rev., B61, 132–142. Anderson, B. J., C. L. Johnson, H. Korth, M. E. Purucker, R. M. Winslow, J. A. Slavin, S. C. Solomon, R. L. McNutt, M. Raines, Jim, and T. H. Zurbuchen (2011), The global -magnetic field of Mercury from MESSENGER orbital observations, Science ,333, 1859– +magnetic field of Mercury from MESSENGER orbital observations, Science, 333, 1859– 1862. Anderson, B. J., C. L. Johnson, H. Korth, R. M. Winslow, J. E. Borovsky, M. E. Purucker, J. A. Slavin, S. C. Solomon, M. T. Zuber, and R. L. McNutt (2012), Lowdegree - structure in mercury’s planetary magnetic field, J. Geophys. Res. ,117, E00L12, + structure in mercury’s planetary magnetic field, J. Geophys. Res., 117, E00L12, doi:10.1029/2012JE004159. Baland, R.-M., A. Yseboodt, M. Rivoldini, and T. Van Hoolst (2017), Obliquity of Mercury: - Influence of the precession of the pericenter and of tides, Icarus ,291, 136–159. + Influence of the precession of the pericenter and of tides, Icarus, 291, 136–159. Baland, R.-M., A. Coyette, and T. Van Hoolst (2019), Coupling between the spin precession and polar motion of a synchronously rotating satellite: application to Titan, -Celestial Mechanics and Dynamical Astronomy ,131(11), 1–50. +Celestial Mechanics and Dynamical Astronomy , 131 (11), 1–50. Buffett, B. A. (1992), Constraints on magnetic energy and mantle conductivity from the -forced nutations of the Earth, J. Geophys. Res. ,97, 19,581–19,597. +forced nutations of the Earth, J. Geophys. Res., 97, 19,581–19,597. Buffett, B. A. (2010), Chemical stratification at the top of earth’s core: Constraints from -observations of nutations, Earth Planet. Sci. Lett. ,296, 367–372. +observations of nutations, Earth Planet. Sci. Lett. , 296, 367–372. Buffett, B. A., P. M. Mathews, and T. A. Herring (2002), Modeling of nutation-precession: -effects of electromagnetic coupling, J. Geophys. Res. ,107, doi:10.1029/2001JB000056. -Busse, F. H. (1968), Steady fluid flow in a precessing spheroidal shell, J. Fluid Mech. ,33, +effects of electromagnetic coupling, J. Geophys. Res., 107, doi:10.1029/2001JB000056. +Busse, F. H. (1968), Steady fluid flow in a precessing spheroidal shell, J. Fluid Mech. , 33, 739–751. Byrne, P. K., C. Klimczak, A. M. C. Seng¨ or, S. C. Solomon, T. R. Watters, and S. A. Hauck (2014), Mercury’s global contraction much greater than earlier estimates, Nature -Geosci. ,7, 301–307. +Geosci., 7, 301–307. C´ ebron, D., R. Laguerre, J. Noir, and N. Schaeffer (2019), Precessing spherical shells: -flows, dissipation, dynamo and the lunar core, Geophys. J. Int. ,219(Supplement 1), +flows, dissipation, dynamo and the lunar core, Geophys. J. Int. , 219 (Supplement 1), S34–S57, doi:10.1093/gji/ggz037. -Christensen, U. R. (2006), A deep dynamo generating Mercury’s magnetic field, Nature , +Christensen, U. R. (2006), A deep dynamo generating Mercury’s magnetic field, Nature, 444, 1056–1058. Cical` o, S., G. Schettino, S. Di Ruzza, E. M. Alessi, G. Tommei, and A. Milani (2016), The BepiColombo MORE gravimetry and rotation experiments with the ORBIT14 software, -Month. N. Roy. Astr. Soc. ,457, 1507–1521. -Colombo, G. (1966), Cassini’s second and third laws, Astron. J. ,71, 891–896. +Month. N. Roy. Astr. Soc. , 457, 1507–1521. +Colombo, G. (1966), Cassini’s second and third laws, Astron. J., 71, 891–896. Constable, S. (2015), Geomagnetic induction studies, in Treatise on Geophysics, Second -Edition , vol. 5, edited by G. Schubert and M. Kono, chap. 7, pp. 219–254, Elsevier, Oxford. +Edition, vol. 5, edited by G. Schubert and M. Kono, chap. 7, pp. 219–254, Elsevier, Oxford. de Koker, N., G. Seinle-Neumann, and V. Vlˇ cek (2012), Electrical resistivity and thermal conductivity of liquid Fe alloys at high P and T, and heat flux in Earth’s core, Proc. -Nat. Acad. Sci. ,109, 4070–4073. +Nat. Acad. Sci., 109, 4070–4073. –30– Confidential manuscript submitted to JGR-Planets de Wijs, G. A., G. Kresse, L. Voˇ cadlo, D. Dobson, D. Alf´ e, M. J. Gillan, and G. D. Price -(1998), The viscosity of liquid iron at the physical conditions of the Earth’s core, Nature , +(1998), The viscosity of liquid iron at the physical conditions of the Earth’s core, Nature, 392, 805–807. -Dehant, V., and P. Mathews (2015), Earth rotation variations, in Treatise on Geophysics , +Dehant, V., and P. Mathews (2015), Earth rotation variations, in Treatise on Geophysics, vol. 3, edited by G. Schubert, chap. 10, pp. 263–305, Elsevier, Oxford. Deleplace, B., and P. Cardin (2006), Viscomagnetic torque at the core mantle boundary, -Geophys. J. Int. ,167, 557–566. +Geophys. J. Int. , 167, 557–566. Deng, L., C. Seagle, Y. Fei, and A. Shahar (2013), High pressure and temperature electrical -resistivity of iron and implications for planetary cores, Geophys. Res. Lett. ,40, 33–37, +resistivity of iron and implications for planetary cores, Geophys. Res. Lett., 40, 33–37, doi:10.1029/2012GL054347. Dumberry, M. (2020), Replication Data for: The influence of a fluid core and a solid inner core on the Cassini sate of Mercury, https://doi.org/10.7939/DVN/903HUV, UAL Dataverse, V2. Dumberry, M., and L. Koot (2012), A global model of electromagnetic coupling for nutations, - Geophys. J. Int. ,191, 530–544. + Geophys. J. Int. , 191, 530–544. Dumberry, M., and A. Rivoldini (2015), Mercury’s inner core size and core-crystallization -regime, Icarus ,248, 254–268. +regime, Icarus, 248, 254–268. Dumberry, M., and M. A. Wieczorek (2016), The forced precession of the Moon’s inner -core, J. Geophys. Res. Planets ,121, 1264–1292. +core, J. Geophys. Res. Planets , 121, 1264–1292. Dumberry, M., A. Rivoldini, T. Van Hoolst, and M. Yseboodt (2013), The role of Mercury’s - core density structure on its longitudinal librations, Icarus ,225, 62–74. -Gans, R. F. (1972), Viscosity of the Earth’s core, J. Geophys. Res. ,77, 360–366. + core density structure on its longitudinal librations, Icarus, 225, 62–74. +Gans, R. F. (1972), Viscosity of the Earth’s core, J. Geophys. Res., 77, 360–366. Genova, A., S. Goossens, E. Mazarico, F. G. Lemoine, G. A. Neumann, W. Kuang, T. J. Sabaka, S. A. Hauck II, D. E. Smith, S. C. Solomon, and M. T. Zuber (2019), -Geodetic evidence that Mercury has a solid inner core, Geophys. Res. Lett. ,46, +Geodetic evidence that Mercury has a solid inner core, Geophys. Res. Lett., 46, doi:10.1029/2018GL081135. Glane, S., and B. A. Buffett (2018), Enhanced core-mantle coupling due to stratification at -the top of the core, Frontiers in Earth Science ,6, 171, doi:10.3389/feart.2018.00171. +the top of the core, Frontiers in Earth Science, 6, 171, doi:10.3389/feart.2018.00171. Grott, M., D. Breuer, and M. Laneuville (2011), Thermo-chemical evolution and global -contraction of Mercury, Earth Planet. Sci. Lett. ,307, 135–146. +contraction of Mercury, Earth Planet. Sci. Lett. , 307, 135–146. Hauck, S. A., J.-L. Margot, S. C. Solomon, R. J. Phillips, C. L. Johnson, F. G. Lemoine, E. Mazarico, T. J. McCoy, S. Padovan, S. J. Peale, M. E. Perry, D. E. Smith, and M. T. -Zuber (2013), The curious case of Mercury’s internal structure, J. Geophys. Res. ,118, +Zuber (2013), The curious case of Mercury’s internal structure, J. Geophys. Res., 118, doi:10.1002/jgre.20091. Johnson, C. L., M. E. Purucker, H. Korth, B. J. Anderson, R. M. Winslow, M. M. H. Al Asad, J. A. Slavin, I. I. Alexeev, R. J. Phillips, M. T. Zuber, and S. C. Solomon (2012), MESSENGER observations of mercury’s magnetic field structure, J. Geophys. -Res.,117, E00L14, doi:10.1029/2012JE004217. +Res., 117, E00L14, doi:10.1029/2012JE004217. Konopliv, A. S., R. S. Park, and A. I. Ermakov (2020), The Mercury gravity field, orientation, love number, and ephemeris from the MESSENGER radiometric tracking data, -Icarus ,335, 113,386. +Icarus, 335, 113,386. Koot, L., and M. Dumberry (2013), The role of the magnetic field morphology on the -electromagnetic coupling for nutations, Geophys. J. Int. ,195, 200–210. +electromagnetic coupling for nutations, Geophys. J. Int. , 195, 200–210. Li, J., Y. Fei, H. Mao, K. Hirose, and S. Shieh (2001), Sulfur in Earth’s inner core, Earth -Planet. Sci. Lett. ,193, 509–514. +Planet. Sci. Lett. , 193, 509–514. Margot, J. L., S. J. Peale, R. F. Jurgens, M. A. Slade, and I. V. Holin (2007), Large longitude - libration of Mercury reveals a molten core, Science ,316, 710–714. + libration of Mercury reveals a molten core, Science, 316, 710–714. Margot, J. L., S. J. Peale, S. C. Solomon, S. A. Hauck, F. D. Ghigo, R. F. Jurgens, M. Yseboodt, J. D. Giorgini, S. Padovan, and D. B. Campbell (2012), Mercury’s –31– Confidential manuscript submitted to JGR-Planets -moment of inertia from spin and gravity data, J. Geophys. Res. ,117, E00L09, +moment of inertia from spin and gravity data, J. Geophys. Res., 117, E00L09, doi:10.1029/2012JE004161. Margot, J. L., S. A. Hauck II, E. Mazarico, S. Padovan, and S. J. Peale (2018), Mercury’s internal structure, in Mercury: The View after MESSENGER , edited by S. Solomon, L. Nittler, and B. Anderson, pp. 85–113, Cambridge University Press, Cambridge, doi: 10.1017/9781316650684.005. Mathews, P. M., and J. Guo (2005), Viscoelectromagnetic coupling in precession-nutation -theory, J. Geophys. Res. ,110(B02402), doi:10.1029/2003JB002915. +theory, J. Geophys. Res., 110 (B02402), doi:10.1029/2003JB002915. Mathews, P. M., B. A. Buffett, T. A. Herring, and I. I. Shapiro (1991), Forced nutations of -the Earth: Influence of inner core dynamics. 1. theory, J. Geophys. Res. ,96, 8219–8242. +the Earth: Influence of inner core dynamics. 1. theory, J. Geophys. Res., 96, 8219–8242. Mathews, P. M., T. A. Herring, and B. A. Buffett (2002), Modeling of nutations and precession: New nutation series for nonrigid Earth and insights into the Earth’s interior, J. -Geophys. Res. ,107, doi:10.1029/2004JB000390. +Geophys. Res., 107, doi:10.1029/2004JB000390. Mazarico, E., A. Genova, S. Goossens, F. G. Lemoine, G. A. Neumann, M. T. Zuber, D. E. Smith, and S. C. Solomon (2014), The gravity field, orientation, and ephemeris of Mercury from MESSENGER observations after three years in orbit, J. Geophys. Res. -Planets ,119, 2417–2436. +Planets, 119, 2417–2436. Organowski, O., and M. Dumberry (2020), Viscoelastic relaxation within the Moon -and the phase lead of its Cassini state, Journal of Geophysical Research Planets ,125, +and the phase lead of its Cassini state, Journal of Geophysical Research Planets , 125, e2020JE006386. -Peale, S. J. (1969), Generalized Cassini’s laws, Astron. J. ,74, 483–489. -Peale, S. J. (1974), Possible histories of the obliquity of Mercury, Astron. J. ,79, 722–744. -Peale, S. J. (1976), Does Mercury have a molten core?, Nature ,262, 765–766. -Peale, S. J. (2005), The free precession and libration of Mercury, Icarus ,178, 4–18. +Peale, S. J. (1969), Generalized Cassini’s laws, Astron. J., 74, 483–489. +Peale, S. J. (1974), Possible histories of the obliquity of Mercury, Astron. J., 79, 722–744. +Peale, S. J. (1976), Does Mercury have a molten core?, Nature, 262, 765–766. +Peale, S. J. (2005), The free precession and libration of Mercury, Icarus, 178, 4–18. Peale, S. J. (2006), The proximity of Mercury’s spin to Cassini state 1 from adiabatic invariance, - Icarus ,181, 338–347. + Icarus, 181, 338–347. Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2014), Effect of core-mantle -and tidal torques on Mercury’s spin axis orientation, Icarus ,231, 206–220. +and tidal torques on Mercury’s spin axis orientation, Icarus, 231, 206–220. Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2016), Consequences of a -solid inner core on Mercury’s spin configuration, Icarus ,264, 443–455. +solid inner core on Mercury’s spin configuration, Icarus, 264, 443–455. Perry, M. E., G. A. Neumann, R. J. Phillips, and et al. (2015), The low-degree shape of -Mercury, Geophys. Res. Lett. ,42, 6951–6958. -Poincar´ e, H. (1910), Sur la pr´ ecession des corps d´ eformables, Bull. Astron. Ser. 1 ,27, +Mercury, Geophys. Res. Lett., 42, 6951–6958. +Poincar´ e, H. (1910), Sur la pr´ ecession des corps d´ eformables,Bull. Astron. Ser. 1 , 27, 321–356. Pozzo, M., C. Davies, D. Gubbins, and D. Alf´ e (2012), Thermal and electrical conductivity -of iron at Earth’s core conditions, Nature ,485, 355–358. +of iron at Earth’s core conditions, Nature, 485, 355–358. Rochester, M. G. (1960), Geomagnetic westward drift and irregularities in the Earth’s -rotation, Phil. Trans. R. Soc. Lond., A ,252, 531–555. -Rochester, M. G. (1962), Geomagnetic core-mantle coupling, J. Geophys. Res. ,67, 4833– +rotation, Phil. Trans. R. Soc. Lond., A , 252, 531–555. +Rochester, M. G. (1962), Geomagnetic core-mantle coupling, J. Geophys. Res., 67, 4833– 4836. Rochester, M. G. (1968), Perturbations in the Earth’s rotation and geomagnetic coremantle - coupling, J. Geomag. Geoelectr. ,20, 387–402. + coupling, J. Geomag. Geoelectr., 20, 387–402. Rochester, M. G. (1976), The secular decrease of obliquity due to dissipative core-mantle -coupling, Geophys. J. R. Astron. Soc. ,46, 109–126. +coupling, Geophys. J. R. Astron. Soc. , 46, 109–126. Rutter, M., R. Secco, T. Uchida, H. Liu, Y. Wang, M. Rivers, and S. Sutton (2002a), Towards evaluating the viscosity of the Earth’s outer core: an experimental high pressure -study of liquid Fe-S (8.5 wt. per cent S), Geophys. Res. Lett. ,29, 080,000–1. +study of liquid Fe-S (8.5 wt. per cent S), Geophys. Res. Lett., 29, 080,000–1. Rutter, M. D., R. A. Secco, H. Liu, T. Uchida, M. Rivers, S. Sutton, and Y. Wang -(2002b), Viscosity of liquid Fe at high pressure, Phys. Rev. B ,66, 060,102, +(2002b), Viscosity of liquid Fe at high pressure, Phys. Rev. B , 66, 060,102, –32– Confidential manuscript submitted to JGR-Planets doi:10.1029/2001GL014392. Schaefer, L., S. B. Jacobsen, J. L. Remo, M. I. Petaev, and D. D. Sasselov (2017), Metalsilicate partitioning and its role in core formation and composition on Super-Earths, -Astrophys. J. ,835, 234. -Sori, M. M. (2018), A thin, dense crust for Mercury, Earth Planet. Sci. Lett. ,489, 92–99. +Astrophys. J., 835, 234. +Sori, M. M. (2018), A thin, dense crust for Mercury, Earth Planet. Sci. Lett. , 489, 92–99. Stark, A., J. Oberst, F. Preusker, S. J. Peale, J.-L. Margot, R. J. Phillips, G. A. Neumann, S. D. E., M. T. Zuber, and S. C. Solomon (2015a), First MESSENGER orbital observations - of Mercury’s librations, Geophys. Res. Lett. ,42, 7881–7889. + of Mercury’s librations, Geophys. Res. Lett., 42, 7881–7889. Stark, A., J. Oberst, and H. Hussmann (2015b), Mercury’s resonant rotation from secular -orbital elements, Celest. Mech. Dyn. Astr. ,123, 263–277. +orbital elements, Celest. Mech. Dyn. Astr. , 123, 263–277. Stewartson, K., and P. H. Roberts (1963), On the motion of a liquid in a spheroidal cavity -of a precessing rigid body, J. Fluid Mech. ,17, 1–20. +of a precessing rigid body, J. Fluid Mech. , 17, 1–20. Stys, C., and M. Dumberry (2018), The cassini state of the Moon’s inner core, J. Geophys. -Res. Planets ,123, 1–25, doi:10.1029/2018JE005607. -Van Hoolst, T. (2015), Rotation of the terrestrial planets, in Treatise on Geophysics , +Res. Planets, 123, 1–25, doi:10.1029/2018JE005607. +Van Hoolst, T. (2015), Rotation of the terrestrial planets, in Treatise on Geophysics, vol. 10, edited by G. Schubert, chap. 4, pp. 121 – 151, Elsevier, Oxford. Van Hoolst, T., A. Rivoldini, R.-M. Baland, and M. Yseboodt (2012), The effects of tides -and an inner core on the forced libration of mercury, Earth Planet. Sci. Lett. ,333–334 , +and an inner core on the forced libration of mercury, Earth Planet. Sci. Lett. , 333–334, 83–90. Verma, A. K., and J. L. Margot (2016), Mercury’s gravity, tides, and spin from MESSENGER - radio science data, J. Geophys. Res. Planets ,121, 1627–1640. + radio science data, J. Geophys. Res. Planets , 121, 1627–1640. Wessel, P., W. H. F. Smith, R. Scharroo, J. Luis, and F. Wobbe (2013), Generic Mapping -Tools: Improved version released, EOS Trans. AGU ,94, 409–410. +Tools: Improved version released, EOS Trans. AGU, 94, 409–410. Williams, J. G., and D. H. Boggs (2015), Tides on the Moon: theory and determination of -dissipation, J. Geophys. Res. Planets ,120(4), 689–724, doi:10.1002/2014JE004755. +dissipation, J. Geophys. Res. Planets , 120 (4), 689–724, doi:10.1002/2014JE004755. Williams, J. G., D. H. Boggs, C. F. Yoder, J. T. Ratcliff, and J. O. Dickey (2001), Lunar -rotational dissipation in solid body and molten core, J. Geophys. Res. ,106, 27,933– +rotational dissipation in solid body and molten core, J. Geophys. Res., 106, 27,933– 27,968. Williams, J. G., A. S. Konopliv, D. H. Boggs, R. S. Park, D.-N. Yuan, F. G. Lemoine, S. Goossens, E. Mazarico, F. Nimmo, R. C. Weber, S. W. Asmar, H. J. Melosh, G. A. Neumann, R. J. Phillips, D. E. Smith, S. C. Solomon, M. M. Watkins, M. A. Wieczorek, J. C. Andrews-Hanna, J. W. Head, W. S. Kiefer, I. Matsuyama, P. J. McGovern, G. J. Taylor, and M. T. Zuber (2014), Lunar interior properties from the GRAIL mission, J. -Geophys. Res. Planets ,119(7), 1546–1578, doi:10.1002/2013JE004559. +Geophys. Res. Planets, 119 (7), 1546–1578, doi:10.1002/2013JE004559. Yoder, C. F. (1981), The free librations of a dissipative Moon, Phil. Trans. R. Soc. Lond. -A,303, 327–338. -Yseboodt, M., and J. L. Margot (2006), Evolution of Mercury’s obliquity, Icarus ,181, +A, 303, 327–338. +Yseboodt, M., and J. L. Margot (2006), Evolution of Mercury’s obliquity, Icarus, 181, 327–337. –33– \ No newline at end of file diff --git a/read/results/pypdf/2201.00069.txt b/read/results/pypdf/2201.00069.txt index aa3d4ce94a9708071f6d1e46458b63de5fe6d3be..719457551dc3a81a1d4781c6aba51d8139831fe9 100644 GIT binary patch delta 6110 zcmd5=YmgMzb!KK+9xVX@1R7q;MR+9^dWP@NFYU%<|?E1^epXQp?iwLRUV ze(W$C+p9OS?8u-iQIag;P?QZ;qKczVC6aR=+%9z|*tl+ac&V~|3D&RIYqnjKGZ#)| zFP!LLoe4IK^$9j=6w2Hwx^j~3#R%I#l3|(#uh1M84LV#@BdgN%45?lEjBgZmt8Duo z7T}8mm~?c{wlEw@u#p7Y>lnK0>NFw#CbFT0J{{&J^x+Me!$x6>n??!hVaYEdtNYm? zcIOY(cu|*=5){jE%P8_W9df>78T35Xu(ROsQVB!o0|nrm32sh7Sc-}M=$=+`Pp9EQ zS8J+_ePlJodJ_9FX_YRXv&boFCSHxQ9c&!Rb0=@R92z>FVY#xVkkBsf&DfKMC2K0X zhs-XNb;qy_S<^0@(30%ArnQ;SAGYDTuw83s16XF872EP;Ek!mLbxc@w`ai^{b*E(M zvWA_9n-Egy*yZ>Zf(N6zeAAecwJ@;V+$!l6Y?Pt^b#up_oV+%0xn)mtS?!RpTQ~Q( zi3wTDhSAX-$M;A>64_nmRuR!hK)cI`1?~;*- zvnRO&A?-~3Pt*D(WYu1M8jeUROtjA5tWcz^owpDEG4v+R< z{A(uwlgNQdxAd?2m7r$p3lc9RZszKBL;air$r{pAja;iBtOQ|wwyPj_mJ`}Qd z2obw@)`4o;oJWRqi__~A=`p1HP?eCEOi2jNkX?2G^w3p9K6lI024D)GL&u?dH4Jfz zTtCbUQ#!QQXiwx&-7V|0(^N~od{K^%Y<2s7CL*BsyuvJikwle_~4mFVsOPvjQz73b zQ;0Uy0tX@{lc!2Dd9SM4fj5BPp{(FTpe zobz6@F+K&|+_E9J!|T6X#0o^^W3K-YW>S=I)7)Y`W|4?0 zOOYH$wrW`-qHzP6-L60`1``}S2b)sl*$D$aER=22oeBYPkIyE2i%r;8iKK{!7jFuo zeV98X?$qHJ75u8U<6C@vxHw}VunqXp7%Utz)A_}*2oskV-?c^>9vSV-DT*R9^}|O5 zcHN>Wn%X9bgp>^OQcE<;z`U$bnWr&#o_g`gd8&MO+~IZ8s2CtS7E51vAstnfdD!*S zE17ux=Dy`De4|}|BNi^%j=j@M9{&RtOC_(IeC6msJ+Jf1JXKhav`CD5S56*3!*((8 z(UMuw`qHkOSRX)bF|VvM@!h5853;=PxSq=<9lJuauFiT#yIkfVFHt6o%(9uK*HAY+ zz2F(ZH7nXR2(TGvlcuj*1)V{2C81;o>J^uXv&&k8OUvY__|5XymP&Y6D+9kQhNKd& zuee?O%wYje^GNR@X@n>BASFWTGv$13v!9j76m2k78eoC|AX~tYV-$arFYq0z(fn=SoN# z)5jU8#dYB@7R1+Hh%9RaLb>pQ+M#8D>ff!ah^h74*CA+6UpaZ~9s18=s*>yo;|q8& z5v)Ikt-CjjZzmU{y22_4p{eQ|?Lmnmr73Dw=}^;MjfhxKJYQ$x{Dw8LxzJ-vB?-=5 zKo5$d!IllrtY!(80oDjoAaKGO?HD}MvMBQD4?8j)S3kttGaGM>K%uu;6~Bp1g!|r2 z_XVdn)uRhzX*Lfm8{GD#pG5Bp@n!O`oU@wFxar!A+~gNLV9GTrz6k=+ZQo^f-qoFH z7)02j0>Q-AJN|bEE25(MF2Vt*y0F&N=i&!Bu-^A8*Fg#26ftsu?IH^fZ^=ia5)~dwXHt~~Hcz5I4AiCX;!do@<(JxrAL^~b@ zvUF$Cb}G=!K#CzxT|XbslW|6cm!J?}prbOdtTx;~!-5~SZ;mVia-MZ4^RA>WT_5~i z`u5M?N<=b0YG=K1-73LG#vPp%c-8ZfBQktw@;V}g5=4=bSrt4Cbw~yAN#@tP8PN;k z6y|hI$HzXsN6vueeS};O{akgRuu~6LLuo0dLUak(`%;G{COV#jFaE3J`{cftc5D~# z?f4cZiaTZe9^1JNzkjy#&Raf*NLlckoj;3+f7`VlYks?Hujst%XLxGeU27teJGpyZ z`2Wq_nFtH6>|PgH6oNjTV!_JJuS6DOlmtw{L@##L7pobC%}Xm7t?7OqAH3GRAu@E; z)8rTKR^f*DHFtz=aQzAeC`2l-=}+69QNg6-*|y2H^!Ie;_V0T&kuiyOkWcr7Vj~AA^z;ecLDpAL~~m zk_h^1cf!X66x|FX7ODuf3w1>4WyAk%;2{efwrZZZS>fIYtS` zAc`i0p;4#;a9bSxVRR(KHr!*T&RvgrHTzRbU^jMegx%#}-x9A>I8`-<8e(~E4Q6t~ z%%c?Zi~?voq-NSWSUzwzx(VS^B_R1R3>V#T$=f?)bN=aIAQLs09{( zJaj*-KRBFkz4UxYnY*NHDox2iX;L$a;14`dh8mWKx;*@BP#XC`G-y5e^`>Tmlq8mY z`EaoO&|-4q+VNkDRTJ+olNw8ZOfM8vP|*?2v@HXbGj z?mT>WFqUeS!*mUcXUHUmClQU<;Qzc?XJw;Ayj}F+BzO*dX4+Iy3m}=0_s9)2E&k!~ z`9(36cpB~fx9@EcZ{M5TH%EIeY9(sz7Pn0G7bH(#6ws!yf`gq%gJ}P~^kDBYnSa5Xmc;t?dGSqA~ z1s?P*&%N*h-p}nIP3ZXp`-X>eI5F%gx2g8VO|4#XjmBvP& zY2~(LiG=8j8@jx}GcJdfYsg&R#I%%{G6xq@W>D|+me~%mADLV68>_4rsmiuYnAy21 zFX)}c3T_0D*bP9z*9`fK5z;wdCnt8}_5sBOr$>-%g+l({m4vulxg~ts{ke22_?x$i zaqE{&K$7^^n^(s)SUYEycCC+^R25HoSV6^Nj+uX2eAFl?u@L6p(jSxqDu`c z1NAujAMRgC&Fg38gEJ2wjI4|$8ORmD10PW*z%5BgZb4Y?2hjR$mI)sB{v{Id>9vt0 znn>=TBVuTYI+Tps-igx1IC|70EbrL7h)g8zgOk8YjYi{atxsI3J#bS-YNQU+65`$? zU!7%1YT{G$(-F4wTKG$~qJfDv>cWs}kQ5>mC&1iT=jdn-5I)hU*mku+bm0fOQ^k1~ zAES+>D=UxE6$D{~{Cu^RQ2i(U#80Ia(F79@*8lbk!xBxBs%u1kX7c}$y5!BJ9ap{C zo;WnVU%dLn{o?&E?+I@G$|pp*OJ^_P_n&7eNp5@O6^i2{(_QStC!SSP@ZQTQ`u)>{ znrdfp90!*ws-mJ*RTNzMbTE2A*BVZ=vurkzVM|#ylK@}ZkES|r6w^TkKY3jWj-1(0 zipHWiZ4uL|T0$c!3WPz*YVq--Z10VdaGP{hQagQ^QkfD;0;r=``wTUcNJEyYWD+TM z`ru=cy^`xsPU41%f)2b;$fpgjK36h6+u_N8lkdu5(@f2k6_T-d=J6|EjD%DoIQvA6 ze7o@}AHUB(^tZLMuWL*5k(bi^_;Xo=b9~}$g zPsf{Xx*aKm&WOGfg|Gj8Gv$jE?D)p3O$%~0W71>6&%e30>1I{E8s~IEtPr(@L{okG zxOh%z3vr}*144rzi(o$WWWnr#%SoC9><>lj=njtls3bX0KV+nEJ8#oh7U zwF?NS!KpvrL*svceP`q@x|rxp$gCe{I$V>$y}DwEDw$ zQq7^3AHTbLen!G&Rlznn4ouxd?3RJU(eHiN=lFC*QJk6LEF~i+;;NzqJKr;##o0^S JR@~T4{~wgY=-2=N delta 5734 zcmd5=i<1<`eZJk}xq<@=gpPL$3Ai(}b2GEIFVG3@$-!~Q>Gl8#A>rQL-ksrQrF(d-`@Y|=_g`LY{l_b<=SL#s!pPnDaGVQ&yKd94N_stWin>Z_ z_OhNHQc130Uc2gfHp7%?k8H^7Q^_!^*ElaK>DkMf*~{5pm5ky=l}uO#le$GuNep0s ztyFTvvTci2FeR#y^#Odj}30xynZ&SgG#l*9p*jHzV6wV3BIjKw!2 z8&6}|D7Bg6Qzapvt-=(wtuky%9LA8l%(i(#Au#`bD^PTO1n1KeHCkj!A_mKH)Uk>b zR!Bnw*Rk+83G1YPE-jap1k8dV*x(4Yr?7F7Iz?&85!WgU7ENwJEP5>;wmVZM4p;Q# zpn4EV?)Zw46yvRH#fjE+r(sB+8#qb|4)g`KHKiCb zQT@~@vkFc~!ry-C@=|I3*`p4hriz}8VaI;zQyUYCk-^dZ%yk1_+J)WvO*pOSNL#;& z*c}_~n8Pz)Boh-0O0u*flX>mSwpEfg18*aC@U1r;i%uztZt=r~?Wci-5$^ec zBf+A<`vb`0wU=GkYc!*CfH_otnS>PwSOD`S-V(`}bby;4FfOBZAfTQ(Z2_7_Hw-*b zuQrmNMEpl-VT!?U1BZb~bNbo_kf|!Uii&x!&?XdiyL#3pc%$7*k z#P2U&zimQ=SL4(LXMm^_mW^8w3d4YR8h(slyK34dFEt_v8}1J* z*JVzg0t9*zftX-WF>rAPZ^*D>T#+wlVqp8t0d|_;HOP+Ats}=1L9vlhSUC|`Gx5XB z1{pZQ03&kLq*Dm2A}%jkaT?&vQG2Elr9^T-#Su^^#2#>w=V9PWD%%_xLeO&5!)ib& zHmu5F!3=yKWCX_WTmvd){`ae6Dm>4bw92axr;OaG$_jki3Ou=3epzKs;WT_PvPfLc zXOJ>lsp4h%r4Ran!JZAH0jx(+8A21jVQp_)k;dc;Dw3nsTc9HD0)49Jnd zAxAiPXk^4cEWK+889qvJnp?+#Iy6-8?v;@{TAZj!pFDh^A=@(cgbU=t^EncWK?WDi$QKJvDnN71gyCz5ABeguZ-lTVwX-V)4xKF7d|lvn$$@i5pj5cxrz= z&uFDZ7o%-t5xH^Y!teD6zwLAus%rtgcHK9bZ(43~%HcI73mw83(>T_DM0|0^X zlBQ~z{q>3`4y@c7A>qlDN;E>mmD^v5@lwqM4cbAm2UcwpkFENn1U9df=(N;Z2dN8u zD6Hor`0BXJ%gjM!T)e|&<+hxH9TZu3V)fgRM3wt6)}nUx=6hbJn&TB%(6G1y39-GZ z%ZgUPcWKe$VQEcYG~IYCp!!)(5LBQIo(C`FxWc>w{PTE87Dp3xsLrG%vDo{cIVb+P zeW!TTX%BnWJ`?Gj+t#aCXhb{^!YQKbXvU0}CfVWneZ&7;HydedW<{I5q-TwELVSBe z1rh1q*tHg4yzummE9bu<|F-K|A}ezbc!|zed=~K-2@yId+yMX zZC(<&?(awBS z{NavG;q@KAiXd+9#HAf0XIB^G)K(qlcNKt^uIVl#Wu$`wdWe{~v+KKZS@=I)K~x0B zna1F@#D(bgDsxNRtpHdKFz5yOcwTa?Ldz0644R)YMu%s@_mX!aqf@`T-ED0J^kZ;7ED_&M8%BCM)6)u_*hCgrvLIU{Z~_ zsNAgRbLew`xP$1d>cSMaAqu<{YuAgcY>3@E4TSFG&fgIWcKv~PZC980VAl)c%)N?u z_1^X3oqO+vNUaXn^?V>B{OkKRi1oV%@N|6l2V!7PEz%>tzGpo?|7K4*T;01KZZ(jU zN`~pa$C{%=i>`^c`s+)Gp4M7YnkIG+TnxWAurV^+3=RCF9*Tz_zv&n}@`4H?IR_IG zOmpxMb1|QDn{<%j!QR}#p-GZbjT8|*L+93$Q?5UsTGH{+@BexTmyYn)L%Sn=RT&K$ z@X{EqSb2+f7>PDLC3&OeKvfeuooY*f4EQ%lJ-<_l0Xq(}*>pnCp-+(@KTPN&)?K{7!JbiG7K4--2W{qz(sJ zZp<^$Ke!(0nCptd>d3|Dt{y!G7=9A9;e2jzXyE;)Klg91pT*RL=d0X~;_sQ+H{@rF zKKo|$`b8;5a`r_4TnvmoEWR|B-%6VEJ+VwO1#}aM`I?155IjiDXuREd@Hu4QPY=EY z%@6&F-)fDq(?WzkIt@&DB}2o+rlRw zoRu&ferWTDjNuP$5YIkzw2!nWw<^SPNJ9dMBs{VF80gNSfO)%aT4ktZF~G$Suh<-w zssI_7ODd*>e(dcfq+OR%*e(9+*d`ox=fjEJ4JGnWi3+SkhSAB(%2_~fP;lkm&X83G zIXwgZ#uA;x@(6B29`i}QPBapVhlR9y+wr6LeB$^>_}=l>sQ37aUl$XFQ{vr%x=F)v1B3lNX{}Q|BsDV@ zlPqZ}B?;S$??e_hBwPFwbH&lpvz@YdF%#W?vk1r?;T6w&%xz-ZYi>lv+VbP=4bC;) zCJoX_E)q(_E9IT*2%^;8Xg^Z7G$pck2^W|bA{|W`&@-7NDe%DYJu-U<)k`H!OOLSp z_`cE6TvlAQI#>22Q2>o-OxFxec0uA3Q)@)q2?)iBPj~#E&Zi?YdnqAb{%AuK{`ZL= zL^s318QC15gFy}PT-n-6KlP-~jbu#xtTM6yn9{^vryEuK;~VD%-(f}7s?vfA+Mt0k zRaOj$xaRD@MXZ>^7p%8f!Ic^uZXgD;U@M+ku>psET+$)_xN7YK&n-XjQFD2TOkyvG z+q_(lWyK!ehyUtrj@ea9IHnq_+TzT`b>a8j-1qz5ctB zhv#1>vo8`@Pxi9Ivv1CR0ZB+VU!6~a*mLq&c;#eo+giTw19mNIp?IYcj;+wcv)?_Ob zI5clFVV=7olP2sZ*TU^{Pu3#hS5F1Pd-@Zti}1_O51#()W>f@Co1;L7hs%^rT_~9> z5F`;N;U`B#2(8(6>nWiwgHGOSSJQQ{Z|X)63XF*=f2Xqh~#Qk$q(PV zu&%X@==yx*Q;7V(U#u;iXC)EUOU7cr^!)2BM)>7R;etp{c>Hs#ZslWaj*n-bH&kkS zQbw9>mSSs`ttg4*ULX!D47YbpL=!BB5~KBtPvwuSA;+P z<24ao7G7l-_m?46>BCPB1Gqmc3_r8>Li=zbs>seFhsz<8hvKZ4hF_DrtqzZU?WO28 zxYCfz36NL*$fJzOi(32$^ZXBuRl*%#Pqv2io5^`Qe)?x?7U*MEf!j3j`ejTU3-^Ir jmh5!{dHpWVOwpv0Nyl|fi-o=K+6$pmuY6|*ELiow4n@~D diff --git a/read/results/pypdf/2201.00151.txt b/read/results/pypdf/2201.00151.txt index 9dc2432..6f747ec 100644 --- a/read/results/pypdf/2201.00151.txt +++ b/read/results/pypdf/2201.00151.txt @@ -1,28 +1,29 @@ -arXiv:2201.00151v1 [astro-ph.GA] 1 Jan 2022Astronomy&Astrophysics manuscript no. Populations4 ©ESO 2022 +arXiv:2201.00151v1 [astro-ph.GA] 1 Jan 2022 +Astronomy &Astrophysics manuscript no. Populations4 ©ESO 2022 January 4, 2022 Multiple stellar populations in Schwarzschild modeling and the application to the Fornax dwarf Klaudia Kowalczyk and Ewa L. Łokas -Nicolaus Copernicus Astronomical Center, Polish Academy o f Sciences, Bartycka 18, 00-716 Warsaw, Poland -e-mail:klaudia.kowalczyk@gmail.com, lokas@camk.edu.pl +Nicolaus Copernicus Astronomical Center, Polish Academy o f Sciences, Bartycka 18, 00-716 W arsaw , Poland +e-mail: klaudia.kowalczyk@gmail.com, lokas@camk.edu.pl January 4, 2022 ABSTRACT Dwarf spheroidal (dSph) galaxies are believed to be strongl y dark matter dominated and thus are considered perfect obje cts to study -dark matter distribution and test theories of structure for mation. They possess resolved, multiple stellar populatio ns that offer new +dark matter distribution and test theories of structure for mation. They possess resolved, multiple stellar populatio ns that o ffer new possibilities for modeling. A promising tool for the dynami cal modeling of these objects is the Schwarzschild orbit sup erposition method. In this work we extend our previous implementation o f the scheme to include more than one population of stars and a more -general form of the mass-to-light ratio function. We tested the improved approach on a nearly spherical, gas-free galax y formed in -the cosmological context from the Illustris simulation. We modeled the binned velocity moments for stars split into two populations +general form of the mass-to-light ratio function. W e tested the improved approach on a nearly spherical, gas-free galax y formed in +the cosmological context from the Illustris simulation. W e modeled the binned velocity moments for stars split into two populations by metallicity and demonstrate that in spite of larger sampl ing errors the increased number of constraints leads to sign ificantly tighter -confidence regions on the recovered density and velocity ani sotropy profiles. We then applied the method to the Fornax dSp h galaxy +confidence regions on the recovered density and velocity ani sotropy profiles. W e then applied the method to the Fornax dSp h galaxy with stars similarly divided into two populations. In compa rison with our earlier work, we find the anisotropy parameter to be slightly -increasing, rather than decreasing, with radius and more st rongly constrained. We are also able to infer anisotropy for each stellar +increasing, rather than decreasing, with radius and more st rongly constrained. W e are also able to infer anisotropy for each stellar population separately and find them to be significantly di fferent. Key words. galaxies: kinematics and dynamics – galaxies: structure – g alaxies: fundamental parameters – galaxies: dwarf – galaxi es: star clusters: individual: Fornax 1. Introduction Dwarf spheroidal (dSph) galaxies of the Local Group (Mateo -1998; Tolstoy et al. 2009) are considered to be a perfect tool to +1998; T olstoy et al. 2009) are considered to be a perfect toolto test our current theories of structure formation involving dark matter in the context of near-field cosmology. The objects ar e believed to be strongly dark matter dominated with mass-to- light @@ -38,7 +39,7 @@ As the samples of the stars with kinematic measurements grew , it became possible to estimate the profile of the velocity dis persion and model it using the Jeans equation (Binney & Tremaine 2008). Since the stars in the galaxy can move on a variety -of orbits, from circular to radial, the degeneracy between t he +of orbits, from circular to radial, the degeneracy between the anisotropy of the orbits and the mass distribution is inhere nt in this type of modeling. The reason for this lies in the fact tha t different combinations of these quantities can reproduce the ve locity @@ -48,20 +49,21 @@ higher order line-of-sight velocity moments, such as the ku rtosis, and use the corresponding Jeans equations. Since the ku rtosis is more sensitive to the velocity anisotropy than to the m ass distribution, useful constraints can be obtained on both. S till, the -method requires large kinematic samples to estimate the vel ocitymoments reliably and some assumption on the functional form +method requires large kinematic samples to estimate the vel ocity +moments reliably and some assumption on the functional form of the anisotropy (Łokas 2002; Łokas et al. 2005). The Schwarzschild modeling technique (Schwarzschild -1979) offers a different approach to estimate the properties of +1979) o ffers a di fferent approach to estimate the properties of dSph galaxies without prior assumptions on the type of orbit s. It relies on building a galaxy model out of a set of best-fittin g orbits probed in the range of energy and angular momenta. In -this method, the anisotropy of the stellar orbits comes out a s a +this method, the anisotropy of the stellar orbits comes out as a result of the modeling in the same way as the density profile. A lthough it has been originally developed for large elliptica l galaxies - (van der Marel et al. 1998; Valluri et al. 2004; Gebhardt e t al. + (van der Marel et al. 1998; V alluri et al. 2004; Gebhardt e t al. 2015), it has recently been adopted for use on discrete data characteristic of dSph galaxies and applied to a number of -dwarfs, including Carina, Draco, Fornax, Sculptor, and Sex tans +dwarfs, including Carina, Draco, Fornax, Sculptor, and Sextans (Jardel & Gebhardt 2008; Jardel et al. 2013; Breddels & Helmi 2013; Breddels et al. 2013; Kowalczyk et al. 2019). Many dSph galaxies show signs of the presence of multiple @@ -70,29 +72,29 @@ stellar populations resulting from a few star formation epi sodes Pace et al. 2020). This observation o ffers a way to improve the modeling methods since, assuming dynamical equilibrium, a ll populations are supposed to be influenced by the same underlying - gravitational potential of the galaxy, but they have d ifferent + gravitational potential of the galaxy, but they have different distributions so more constraints can be imposed dur ing the modeling. This approach was first used by Battaglia et al. (2008) to model the mass distribution in the Sculptor dSph galaxy. A few attempts have also been made to constrain the inner slope of the dark matter profile in dSph galaxies using -this technique (Walker & Peñarrubia 2011; Amorisco & Evans -2012; Hayashi et al. 2018) in order to resolve the so-called c uspcore +this technique (W alker & Peñarrubia 2011; Amorisco & Evans +2012; Hayashi et al. 2018) in order to resolve the so-called cuspcore problem. It has been shown to be di fficult, however, due Article number, page 1 of 12 A&A proofs: manuscript no. Populations4 -Table 1. Properties of the Illustris galaxy used to create mock data. -Property Value +T able 1.Properties of the Illustris galaxy used to create mock data. +Property V alue Subhalo ID 16960 -Number of stellar particles ( N⋆) 70446 -Number of dark matter particles ( NDM) 78448 -Stellar mass ( M⋆) 5 .74×1010M⊙ -Dark matter mass ( MDM) 4 .91×1011M⊙ +Number of stellar particles (N⋆) 70446 +Number of dark matter particles ( NDM ) 78448 +Stellar mass ( M⋆) 5 .74 ×1010 M⊙ +Dark matter mass ( MDM ) 4 .91 ×1011 M⊙ Mean mass of stellar particles 815808 M ⊙ Stellar half-mass radius 9 .99 kpc -Stellar half-number radius ( r1/2) 9.6 kpc -Axis ratio c/awithin r1/2 0.907 -Axis ratio b/awithin r1/2 0.949 +Stellar half-number radius ( r1/2 ) 9.6 kpc +Axis ratio c/a within r1/2 0.907 +Axis ratio b/a within r1/2 0.949 Triaxiality 0.56 to the nonsphericity of the dwarfs that introduces biases in such measurements (Kowalczyk et al. 2013; Genina et al. 2018). @@ -100,7 +102,7 @@ In our recent papers (Kowalczyk et al. 2017, 2018, 2019) we developed the Schwarzschild technique in the form applicab le to binned velocity moments of a single tracer and verified its ab ility to reproduce the mass distribution and velocity anisotr opy of -simulated galaxies. We have also studied biases resulting f rom +simulated galaxies. W e have also studied biases resulting f rom the nonsphericity of the modeled objects. Later, we applied the method to model the kinematics of the Fornax dSph galaxy esti mating its mass and anisotropy profiles with unprecedented p recision. @@ -108,14 +110,14 @@ method to model the kinematics of the Fornax dSph galaxy esti mating In this paper we extend our Schwarzschild modeling technique to include multiple stellar populations with the aim t o constrain the properties of dSph galaxies even more strongl y. -We test our approach on a realistic simulated galaxy formed i n +W e test our approach on a realistic simulated galaxy formed i n the cosmological context, originating from the Illustris p roject (V ogelsberger et al. 2014a). Although no precise analogues of dSph galaxies are available in this simulation because of th e resolution, we use a more massive galaxy but with properties oth erwise similar to dSphs. The reliability of the modeling doe s not depend on the particular value of the mass so we believe these -tests to be viable. We do not attempt to constrain the inner da rk +tests to be viable. W e do not attempt to constrain the inner dark matter density profile (which is poorly resolved anyway) but try to put tighter limits on the estimates of the mass and anisotr opy profiles. Finally, we apply the improved method to the availa ble @@ -128,45 +130,60 @@ Section 3 contains an overview of our modeling method, the ap plication of the method to all stars and to two populations, a nd a comparison of the results obtained with these two approach es. The results of the application of the method to the Fornax dSp h -galaxy are presented in Section 4. We discuss our findings and +galaxy are presented in Section 4. W e discuss our findings and summarize the paper in Section 5. 2. Mock data 2.1. Selection of the simulated galaxy In order to test our modeling method on realistic simulated data, we decided to use a galaxy from the Illustris project -(V ogelsberger et al. 2014a,b; Genel et al. 2014; Nelson et al . +(V ogelsberger et al. 2014a,b; Genel et al. 2014; Nelson et al. 2015), namely the Illustris-1 cosmological simulation. Th is simulation follows the formation and evolution of galaxies fro m the early Universe to the present by solving gravity and hydrody namics, as well as modeling of star formation, galactic wind s, -SFR [M⊙ yr-1] -t [Gyr] 0 4 8 12 16 - 0 2 4 6 8 10 12 +SFR [M ⊙ yr-1] +t [Gyr] + 0 + 4 + 8 + 12 + 16 + 0 2 4 6 8 10 12 Fig. 1. Star formation rate as a function of the age of the Universe in the simulated galaxy from the Illustris project used to crea te mock data. The black and gray vertical arrows indicate the last mergers which the -galaxy underwent, wet and dry, respectively. +galaxy underwent, wet and dry , respectively . t [Gyr] -Z [Z⊙] 0 2 4 6 8 10 - 0 1 2 3 4 5 0 2 4 6 +Z [Z⊙] + 0 + 2 + 4 + 6 + 8 + 10 + 0 1 2 3 4 5 + 0 + 2 + 4 + 6 N [102] Fig. 2. Number of stars as a function of their metallicity and time of -formation (the age of the Universe) in the simulated galaxy. The vertical +formation (the age of the Universe) in the simulated galaxy . The vertical line indicates the applied split into stellar populations. magnetic fields, and the feedback from black holes. Although -dwarf galaxies that are of our interest here are not resolved in the +dwarf galaxies that are of our interest here are not resolvedin the suite, this can be easily overcome with the appropriate choi ce of the object and the treatment of data. As the key properties of dSph galaxy equivalents we identified: the lack of gas, the lack of a black hole, a low spin, the stellar mass much smaller than the dark matter mass and a -nearly spherical shape. The last condition was adopted in an attempt +nearly spherical shape. The last condition was adopted in anattempt to avoid any strong bias introduced by the spherical mo deling of a nonspherical object. Moreover, we required the ga laxy to possess a significant number of both stellar and dark matter - particles (over 105), and a well resolved center. Due to the + particles (over 105 ), and a well resolved center. Due to the large softening scale for dark matter particles in the simul ation -(ǫDM=1.42 kpc), we looked for an object in which even the +(ǫDM = 1.42 kpc), we looked for an object in which even the more concentrated stellar population (see Section 2.2) ext ended over 43 kpc so that the region a ffected by the numerical artifacts was enclosed within 2-3 innermost data bins (we used 20 linea rly @@ -174,69 +191,176 @@ spaced spatial bins, see Section 3.1). Out of 27345 galaxies listed in the catalog of stellar circularities, angular momenta, and axis ratios published by the Illustris team (Genel et al. 2015) containing subhalos with the st ellar -mass larger than 109M⊙, only a few met our restrictive requireArticle +mass larger than 10 9 M⊙, only a few met our restrictive requireArticle number, page 2 of 1 K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling --80-4004080POPULATION I[kpc]majorPOPULATION I -intermediatePOPULATION I +-80 +-40 +0 +40 +80 +POPULATION I +[kpc] +major +POPULATION I +intermediate +POPULATION I minor - 5.3 5.9 6.5 7.1 7.7 + 5.3 + 5.9 + 6.5 + 7.1 + 7.7 log(Σ) [M⊙/kpc2] --80-40040POPULATION II[kpc]POPULATION IIPOPULATION II --160-80 0 80 160 +-80 +-40 +0 +40 +POPULATION II +[kpc] +POPULATION IIPOPULATION II +-160 +-80 + 0 + 80 + 160 V [km/s] --80-40040 --80-40040POPULATION II[kpc] -[kpc]-80-40040POPULATION II -[kpc]-80-4004080POPULATION II -[kpc] 0 30 60 90 -σ [km/s]-80-4004080POPULATION II[kpc]majorPOPULATION II -intermediatePOPULATION II +-80 +-40 +0 +40 +-80 -40 0 40 +POPULATION II +[kpc] +[kpc] +-80 -40 0 40 +POPULATION II +[kpc] +-80 -40 0 40 80 +POPULATION II +[kpc] + 0 + 30 + 60 + 90 +σ [km/s] +-80 +-40 +0 +40 +80 +POPULATION II +[kpc] +major +POPULATION II +intermediate +POPULATION II minor - 5.3 5.9 6.5 7.1 7.7 + 5.3 + 5.9 + 6.5 + 7.1 + 7.7 log(Σ) [M⊙/kpc2] --80-40040POPULATION II[kpc]POPULATION IIPOPULATION II --160-80 0 80 160 +-80 +-40 +0 +40 +POPULATION II +[kpc] +POPULATION IIPOPULATION II +-160 +-80 + 0 + 80 + 160 V [km/s] --80-40040 --80-40040POPULATION II[kpc] -[kpc]-80-40040POPULATION II -[kpc]-80-4004080POPULATION II -[kpc] 0 30 60 90 +-80 +-40 +0 +40 +-80 -40 0 40 +POPULATION II +[kpc] +[kpc] +-80 -40 0 40 +POPULATION II +[kpc] +-80 -40 0 40 80 +POPULATION II +[kpc] + 0 + 30 + 60 + 90 σ [km/s] -Fig. 3. Maps of the projected stellar density, mean stellar velocit y, and stellar velocity dispersion (in rows) for two stellar populations: the metalrich +Fig. 3. Maps of the projected stellar density , mean stellar velocit y , and stellar velocity dispersion (in rows) for two stellar populations: the metalrich population I (left-hand side panels) and the metal-poo r population II (right-hand side), and observations along t he principal axes determined for all stars (in columns, along the major, the intermediate , and the minor axis, respectively). --1-0.5 0 0.5 1 - 1 10 100β(r) +-1 +-0.5 + 0 + 0.5 + 1 + 1 10 100 +β(r) +r [kpc] +-1 +-0.5 + 0 + 0.5 + 1 + 0 10 20 30 40 50 +β(r) r [kpc] --1-0.5 0 0.5 1 - 0 10 20 30 40 50β(r) -r [kpc]all stars +all stars pop I -pop II 40 60 80 100 120 - 1 10 100σr(r) +pop II + 40 + 60 + 80 + 100 + 120 + 1 10 100 +σr(r) +r [kpc] + 40 + 60 + 80 + 100 + 120 + 0 10 20 30 40 50 +σr(r) r [kpc] - 40 60 80 100 120 - 0 10 20 30 40 50σr(r) -r [kpc] 40 60 80 100 120 - 1 10 100σt(r) + 40 + 60 + 80 + 100 + 120 + 1 10 100 +σt(r) r [kpc] - 40 60 80 100 120 - 0 10 20 30 40 50σt(r) + 40 + 60 + 80 + 100 + 120 + 0 10 20 30 40 50 +σt(r) r [kpc] Fig. 4. Profiles of the velocity anisotropy parameter, radial veloc ity dispersion, and tangential velocity dispersion (in con secutive columns) calculated from all stars (in red), including only population I (i n orange), and only population II (in blue). The upper row sho ws the profiles using the logarithmic distance scale and reaching the outskirts of th e galaxy whereas the bottom row presents in the linear scale o nly the radial range used in the modeling. -ments. We decided to use a galaxy labeled as subhalo 16960. -All the relevant properties of the galaxy are given in Table 1 , +ments. W e decided to use a galaxy labeled as subhalo 16960. +All the relevant properties of the galaxy are given in T able 1, including numbers of particles and total masses for both com ponents, and details on the shape of the stellar component: the axis ratios minor to major (shortest to longest) c/a, intermediate to -major b/a, and the triaxiality parameter T=(a2−b2)/(a2−c2). -We distinguish between the half-mass radius provided in the Illustris - database and the half-number radius r1/2, which we usefor further calculations in this paper. The di fference between the +major b/a, and the triaxiality parameter T = (a2 −b2 )/(a2 −c2 ). +W e distinguish between the half-mass radius provided in the Illustris + database and the half-number radius r1/2 , which we use +for further calculations in this paper. The di fference between the two comes from a small gradient in the stellar mass-to-light ratio with the distance from the galactic center. Since in our appr oach we treat stars as equal-mass particles and refer to number de nsities @@ -245,29 +369,36 @@ needed), the application of the half-number radius is more s elfconsistent. Article number, page 3 of 12 A&A proofs: manuscript no. Populations4 -10-310-1101103 - 10 100n⋆(R) [kpc-2] -R [kpc]major +10-3 +10-1 +101 +103 10 100 -R [kpc]intermediate +n⋆(R) [kpc-2] +R [kpc] +major 10 100 -R [kpc]minor +R [kpc] +intermediate + 10 100 +R [kpc] +minor all stars pop I pop II Fig. 5. Surface number density profiles of the stellar data samples f or the simulated galaxy observed along di fferent lines of sight (from the left to -the right). Different lines show profiles for all available stars (in red), th e metal-rich population I (in orange), and the metal-poor po pulation II (in -blue). Thin vertical lines indicate r0(see text) and the outer boundary of the spectroscopic data. +the right). Di fferent lines show profiles for all available stars (in red), th e metal-rich population I (in orange), and the metal-poor po pulation II (in +blue). Thin vertical lines indicate r0 (see text) and the outer boundary of the spectroscopic data. 2.2. Splitting the stars into populations Our chosen galaxy shows a complex formation history undergoing - multiple mergers which result in extended star format ion + multiple mergers which result in extended star formation with a few star formation bursts. The last wet merger, that is a merger with an object containing gas, happens at 6.9 Gyr from -the beginning of the simulation, whereas the last dry merger (no +the beginning of the simulation, whereas the last dry merger(no gas transfer) at 12.1 Gyr, giving the galaxy enough time to re gain -dynamical equilibrium. We present the star formation rate ( SFR) +dynamical equilibrium. W e present the star formation rate ( SFR) as a function of time (the age of the Universe) in Fig. 1, where -these last mergers are indicated with black and gray vertica l arrows. +these last mergers are indicated with black and gray vertical arrows. In Fig. 2 we show the distribution of stars as a function of their metallicity (in solar units) and the time of formation . In order to divide the stellar sample into two populations we cut i t in @@ -276,7 +407,7 @@ split is indicated in Fig. 2 with the vertical line. With sati sfying accuracy it separates the stars born before and after 4 Gyr si nce the start of the simulation, which corresponds to the format ion time before and after the end of the second major star burst, a s -shown in Fig. 1. We refer to the metal-rich stars as populatio n I +shown in Fig. 1. W e refer to the metal-rich stars as populatio n I and to the metal-poor as population II, following the common ly used nomenclature in astronomy. In Fig. 3 we present maps of the projected stellar mass density, @@ -284,174 +415,274 @@ In Fig. 3 we present maps of the projected stellar mass density, for both populations obtained by projecting the galaxy alon g its principal axes. The orientation was determined from the ine rtia tensor calculated from all stars within the half-number radius -r1/2and therefore is the same in both panels. The two populations - differ significantly in the spatial distribution and kinematics +r1/2 and therefore is the same in both panels. The two populations + di ffer significantly in the spatial distribution and kinematics with the metal-rich (considered to be younger) populati on I being more concentrated but having lower central velocity d ispersion. Both populations show a weak rotation signal at lar ge distances from the center. -The velocity anisotropy parameter β(r)=1−(σ2 -θ+ +The velocity anisotropy parameter β(r) = 1 − (σ2 +θ + σ2 φ)/(2σ2 -r), whereσiare velocity dispersions in spherical coordinates +r), where σi are velocity dispersions in spherical coordinates (Binney & Tremaine 2008), describes the orbital struc ture of galaxies. It is one of the most important dynamical proper ties of bound systems which cannot be inferred directly from observations - and has to be recovered by dynamical modeling. Th e -profiles of the anisotropy parameter βas well as the radial σr -and tangentialσt=[(σ2 + and has to be recovered by dynamical modeling. The +profiles of the anisotropy parameter β as well as the radial σr +and tangential σt = [(σ2 θ+σ2 -φ)/2]1/2velocity dispersions for our -simulated galaxy are presented in the consecutive columns o fFig. 4. Throughout the paper we use red, orange, and blue colo rs +φ)/2]1/2 velocity dispersions for our +simulated galaxy are presented in the consecutive columns o f +Fig. 4. Throughout the paper we use red, orange, and blue colo rs to indicate values calculated or recovered for all stars, po pulation I, and population II, respectively. The two rows of the fi gure show the behavior of the parameters at di fferent scales. The top row plots the profiles with the distance from the center of the -galaxy in the logarithmic scale and shows the drop of anisotr opy +galaxy in the logarithmic scale and shows the drop of anisotropy at the outer edges of the object. The bottom row uses the linea r distance scale and focuses on the main body of the galaxy. Figure 5 shows the surface number density profiles of the -stars as measured in di fferent directions. We can see that while -the different subsamples have quite distinguishable profiles, the +stars as measured in di fferent directions. W e can see that while +the di fferent subsamples have quite distinguishable profiles, the difference between the lines of sight is small because the galaxy is close to spherical. 2.3. Observables -We generated nine sets of mock data by observing all stars and -each population separately along the principal axes determ ined +W e generated nine sets of mock data by observing all stars and +each population separately along the principal axes determined from all stars. For the observables to be used in the modeling we divided the stars into 20 bins spaced linearly in distance fr om the center of the galaxy up to 50 kpc, measuring the fraction of the total number of stars and the 2nd, 3rd, and 4th proper moments of the line-of-sight velocity defined in Eq. 8 and 9 -of Kowalczyk et al. (2018). The profiles of these quantities a re +of Kowalczyk et al. (2018). The profiles of these quantities are shown in consecutive rows in Fig. 6. Columns correspond to di fferent lines of sight, from the left to the right: along the ma jor, intermediate, and minor axis of the galaxy. For clarity of th e figure, in each panel we indicate only the error bars for one of th e data sets. However, as the number of stars in a sample remains -roughly constant between the lines of sight, the error bars a re -very similar among the panels in a given row. +roughly constant between the lines of sight, the error bars are +very similar among the panels in a given row . Although in our previous studies of the reliability of the Schwarzschild modeling and its applications to real dat a (Kowalczyk et al. 2017, 2018, 2019) we approximated the density - profile of the tracer with the Sérsic formula, we found th at it + profile of the tracer with the Sérsic formula, we found that it does not provide a good approximation of the data for the simu lated - galaxy considered here. We therefore fit the projected density + galaxy considered here. W e therefore fit the projected density profile with the King formula (King 1962) -I(R)=I01√ -1+(R/Rc)2−1√ -1+(Rt/Rc)22 +I(R) = I0 + + + + + + + +1 +√ +1 +(R/Rc)2 +− 1√ +1 +(Rt /Rc)2 + + + + + + + +2 , (1) Article number, page 4 of 12 K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling -10-310-210-1100 - 0 10 20 30 40M(R) -R [kpc]major - 0 10 20 30 40 -R [kpc]intermediate - 0 10 20 30 40 50 -R [kpc]minor -36912 - 0 10 20 30 40m2(R)[103(km s-1)2] -R [kpc] 0 10 20 30 40 -R [kpc] 0 10 20 30 40 50 +10-3 +10-2 +10-1 +100 + 0 10 20 30 40 +M(R) +R [kpc] +major + 0 10 20 30 40 +R [kpc] +intermediate + 0 10 20 30 40 50 +R [kpc] +minor +3 +6 +9 +12 + 0 10 20 30 40 +m 2(R)[103(km s-1)2] +R [kpc] + 0 10 20 30 40 +R [kpc] + 0 10 20 30 40 50 +R [kpc] +-10 +-5 +0 +5 +10 + 0 10 20 30 40 +m 3(R)[104(km s-1)3] +R [kpc] + 0 10 20 30 40 R [kpc] --10-50510 - 0 10 20 30 40m3(R)[104(km s-1)3] -R [kpc] 0 10 20 30 40 -R [kpc] 0 10 20 30 40 50 + 0 10 20 30 40 50 R [kpc] -01234 - 0 10 20 30 40m4(R)[108(km s-1)4] -R [kpc] 0 10 20 30 40 -R [kpc] 0 10 20 30 40 50 -R [kpc]all stars +0 +1 +2 +3 +4 + 0 10 20 30 40 +m 4(R)[108(km s-1)4] +R [kpc] + 0 10 20 30 40 +R [kpc] + 0 10 20 30 40 50 +R [kpc] +all stars pop I pop II -Fig. 6. Observables used in our Schwarzschild modeling scheme of th e simulated galaxy. In rows: the fraction of the total number of stars, 2nd, +Fig. 6. Observables used in our Schwarzschild modeling scheme of th e simulated galaxy . In rows: the fraction of the total number of stars, 2nd, 3rd, and 4th velocity moment. In columns: mock data from the simulated galaxy along the major, interme diate, and minor axis. In red we present -the values obtained for all stars whereas in orange and blue t hose for populations I and II, respectively. For clarity of t he figure, in each panel we +the values obtained for all stars whereas in orange and blue t hose for populations I and II, respectively . For clarity of t he figure, in each panel we indicate only the error bars for one of the data sets. -where I0,Rc, and Rtare the model parameters. The profile can +whereI0 , Rc , and Rt are the model parameters. The profile can be analytically deprojected to obtain the 3D density -ρ(r)=ρ0 -z2[1 -zarccos( z)−√ -1−z2] +ρ(r) = ρ0 +z2 +[ 1 +z arccos(z) − +√ +1 −z2 +] , (2) where -ρ0=I0 -πRc[1+(Rt/Rc)2]3/2(3) +ρ0 = I0 +πRc [1 +(Rt /Rc)2 ]3/2 (3) and -z=√ -r2+R2c -R2c+R2 -t. (4)3. Schwarzschild modeling +z= +√ +r2 +R2 +c +R2 +c+R2 +t +. (4) +3. Schwarzschild modeling In this section we briefly present our modeling method and its -application to the data sets derived for all stars and the two populations +application to the data sets derived for all stars and the twopopulations of the simulated galaxy separately. In both cases o ur aim was to recover the profiles of the total mass and the veloci ty anisotropy. 3.1. Overview of the method -We follow the approach introduced in Kowalczyk et al. (2018) , +W e follow the approach introduced in Kowalczyk et al. (2018) , namely we model the total mass profile with the mass-to-light ratioΥvarying with radius: -logΥ(r)={ -log(Υ0) r≤r0 -a(logr−logr0)c+log(Υ0)r>r0(5) +log Υ(r) = +{ +log(Υ0 ) r ≤ r0 +a(log r −log r0 )c +log(Υ0) r > r0 +(5) Article number, page 5 of 12 A&A proofs: manuscript no. Populations4 - 1 2 3 - 0 0.5 1 1 2 3ALL -Υ0ac - 1 2 3 - 0 0.5 1 1 2 3POPULATIONS -Υ0ac - 10 100 + 1 + 2 + 3 + 0 + 0.5 + 1 + 1 + 2 + 3 +ALL +Υ0 +a +c + 1 + 2 + 3 + 0 + 0.5 + 1 + 1 + 2 + 3 +POPULATIONS +Υ0 +a +c + 10 + 100 χ2 - 1 2 3 - 0 0.5 1 1 2 3POP I -Υ0ac - 1 2 3 - 0 0.5 1 1 2 3POP II -Υ0ac - 10 100 + 1 + 2 + 3 + 0 + 0.5 + 1 + 1 + 2 + 3 +POP I +Υ0 +a +c + 1 + 2 + 3 + 0 + 0.5 + 1 + 1 + 2 + 3 +POP II +Υ0 +a +c + 10 + 100 χ2 -Fig. 7. Absolute values of χ2obtained from the fits of three data sets: all stars (top left p anel), population I (bottom left), and population II (botto m -right) for the observations along the major axis of the simul ated galaxy. The results for the modeling of two populations (top right) were obtained -as an algebraic sum of values for populations I and II. To avoi d large numbers in the figure, Υ0was divided by the mean mass of a stellar particle. -where ris the distance from the center of the galaxy, r0is a -constant, whileΥ0,a, and care the parameters of a model. We -have assumed log r0=0.33 which corresponds to three softening +Fig. 7. Absolute values of χ2 obtained from the fits of three data sets: all stars (top left p anel), population I (bottom left), and population II (botto m +right) for the observations along the major axis of the simul ated galaxy . The results for the modeling of two populations (top right) were obtained +as an algebraic sum of values for populations I and II. T o avoi d large numbers in the figure, Υ0 was divided by the mean mass of a stellar particle. +where r is the distance from the center of the galaxy, r0 is a +constant, while Υ0 , a, and c are the parameters of a model. W e +have assumed log r0 = 0.33 which corresponds to three softening scales for stellar particles in the Illustris simulation. -We probed the parameter a∈[0 : 1.3] with a step∆a=0.04 -andc∈[1.1 : 2.9] with a step∆c=0.2, imposing the requirement +W e probed the parameter a ∈ [0 : 1 .3] with a step ∆a = 0.04 +and c ∈ [1.1 : 2 .9] with a step ∆c = 0.2, imposing the requirement on the total density profile to be monotonically decreas ing with radius. For each set of parameters and for each line of si ght we generated 1200 orbits using 100 values of energy (express ed with the radius of a circular orbit) spaced logarithmically and 12 values of the relative angular momentum spaced linearly. The outer radius of the orbit library, that is the apocenter of th e most -extended orbit, was set to rout=165 kpc in order to cover over +extended orbit, was set to rout = 165 kpc in order to cover over 0.999 of the total stellar mass based on the fitted King profile parameters. -We fit the kinematics weighted with the fraction of mass with +W e fit the kinematics weighted with the fraction of mass with the constrained least squares algorithm where di fferent values -ofΥ0were obtained with a simple transformation of velocities +of Υ0 were obtained with a simple transformation of velocities given by Eq. 12, 13, and 15 in Kowalczyk et al. (2018). In order - to smooth out the numerical artifacts, the three-dimens ional -χ2spaces were then interpolated with 12-order polynomials(∼a4c4Υ4 + to smooth out the numerical artifacts, the three-dimensional +χ2 spaces were then interpolated with 12-order polynomials +(∼ a4 c4 Υ4 0) that were further used to determine the global minimums - (identified as the best-fitting models) and 1, 2, 3 σconfidence - levels which for three parameters correspond to ∆χ2= -3.53,8.02,14.2 (Press et al. 1992). + (identified as the best-fitting models) and 1, 2, 3 σ confidence + levels which for three parameters correspond to ∆χ2 = +3.53, 8.02, 14.2 (Press et al. 1992). 3.2. Application to mock data In the following we present the direct and inferred results o f the Schwarzschild modeling of the data sets described in Sec tion 2.3. First, Fig. 7 shows the distribution of the absolute values o f -theχ2as a function of three parameters of the mass-to-light ratio. +the χ2 as a function of three parameters of the mass-to-light ratio. In order to avoid unnecessary repetitions, we include o nly the plot for the mock data obtained by observing the Illustri s galaxy along its major axis as the others are qualitatively s imilar. @@ -463,76 +694,149 @@ As our parametrization of the mass-to-light ratio is not int uitive we present its profiles explicitly in the first rows of th e leftArticle number, page 6 of 12 K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling -1061071081091010 - 10 100ALLΥ(r) [M⊙/L⊙] -r [kpc]major - 10 100ALL -r [kpc]intermediate - 10 100ALL -r [kpc]minor +106 +107 +108 +109 +1010 + 10 100 +ALL +Υ(r) [M⊙/L⊙] +r [kpc] +major + 10 100 +ALL +r [kpc] +intermediate + 10 100 +ALL +r [kpc] +minor 3σ 2σ 1σ best model data -104106108 - 10 100ALLνtot(r) [M⊙ kpc-3] -r [kpc] 10 100ALL -r [kpc] 10 100ALL +104 +106 +108 + 10 100 +ALL +νtot(r) [M⊙ kpc-3] +r [kpc] + 10 100 +ALL +r [kpc] + 10 100 +ALL +r [kpc] +1010 +1011 +1012 + 10 100 +ALL +M tot(r) [M⊙] +r [kpc] + 10 100 +ALL +r [kpc] + 10 100 +ALL +r [kpc] +-2 +-1 +0 +1 + 0 10 20 30 40 +ALL +β(r) r [kpc] -101010111012 - 10 100ALLMtot(r) [M⊙] -r [kpc] 10 100ALL -r [kpc] 10 100ALL + 0 10 20 30 40 +ALL r [kpc] --2-101 - 0 10 20 30 40ALLβ(r) -r [kpc] 0 10 20 30 40ALL -r [kpc] 0 10 20 30 40 50ALL -r [kpc]1061071081091010 - 10 100POPULATIONSΥ(r) [M⊙/L⊙] -r [kpc]major - 10 100POPULATIONS -r [kpc]intermediate - 10 100POPULATIONS -r [kpc]minor + 0 10 20 30 40 50 +ALL +r [kpc] +106 +107 +108 +109 +1010 + 10 100 +POPULATIONS +Υ(r) [M⊙/L⊙] +r [kpc] +major + 10 100 +POPULATIONS +r [kpc] +intermediate + 10 100 +POPULATIONS +r [kpc] +minor 3σ 2σ 1σ best model data -104106108 - 10 100POPULATIONSνtot(r) [M⊙ kpc-3] -r [kpc] 10 100POPULATIONS -r [kpc] 10 100POPULATIONS +104 +106 +108 + 10 100 +POPULATIONS +νtot(r) [M⊙ kpc-3] +r [kpc] + 10 100 +POPULATIONS +r [kpc] + 10 100 +POPULATIONS +r [kpc] +1010 +1011 +1012 + 10 100 +POPULATIONS +M tot(r) [M⊙] +r [kpc] + 10 100 +POPULATIONS +r [kpc] + 10 100 +POPULATIONS +r [kpc] +-2 +-1 +0 +1 + 0 10 20 30 40 +POPULATIONS +β(r) r [kpc] -101010111012 - 10 100POPULATIONSMtot(r) [M⊙] -r [kpc] 10 100POPULATIONS -r [kpc] 10 100POPULATIONS + 0 10 20 30 40 +POPULATIONS r [kpc] --2-101 - 0 10 20 30 40POPULATIONSβ(r) -r [kpc] 0 10 20 30 40POPULATIONS -r [kpc] 0 10 20 30 40 50POPULATIONS + 0 10 20 30 40 50 +POPULATIONS r [kpc] Fig. 8. Left-hand side: results of Schwarzschild modeling of three mock data sets obtained by observing the simulated galaxy al ong the principal -axes. In rows: derived mass-to-light ratio, total density, total mass, an d anisotropy parameter. In columns: observations along the major, intermediate, - and minor axis, respectively. Green lines indicate v alues for the best-fit models whereas the colored areas of dec reasing intensity show the -1, 2, and 3σconfidence levels. The true values are presented as black lin es. Thin vertical lines mark the values of r0and the outer range of the +axes. In rows: derived mass-to-light ratio, total density , total mass, an d anisotropy parameter. In columns: observations along the major, intermediate, + and minor axis, respectively . Green lines indicate v alues for the best-fit models whereas the colored areas of dec reasing intensity show the +1, 2, and 3 σ confidence levels. The true values are presented as black lin es. Thin vertical lines mark the values of r0 and the outer range of the data sets, from left to right. Right-hand side: same as left b ut for the fit of two stellar populations. and right-hand side panels of Fig. 8 for the results obtained for -all stars and the populations, respectively. We further cal culate +all stars and the populations, respectively. W e further cal culate the total density (second rows) and the total mass content (t hird -rows). We include the obtained orbit anisotropy within the m odeled +rows). W e include the obtained orbit anisotropy within the m odeled range in the bottom rows. The consecutive columns prese nt the results for the observations along the major, intermedi ate, and minor axis. Green lines indicate values for the best-fit m odels whereas the colored areas of decreasing intensity corre spond -to 1, 2, and 3σconfidence regions obtained as extreme values allowed - by the models with χ2within a given region. In each panel +to 1, 2, and 3 σconfidence regions obtained as extreme values allowed + by the models with χ2 within a given region. In each panel the true values from the simulation are presented with black lines -while thin vertical lines mark the values of r0and the outer range +while thin vertical lines mark the values of r0 and the outer range of the data sets beyond which the reliability of results drop s significantly. The true mass-to-light ratio profile was obtaine d by dividing the total mass by the fitted King profiles, therefore the @@ -544,81 +848,101 @@ location of global minimum and confidence levels from two pop ulations (as in the top right panel of Fig. 7), in Fig. 9 we pres ent another method of calculating the anisotropy. In the second and third row we show the derived profiles for population I and II -separately and combine them as stellar mass weighted averag e -in the top row. As in previous figures, three columns refer to t he +separately and combine them as stellar mass weighted average +in the top row . As in previous figures, three columns refer to t he different lines of sight whereas the narrow fourth one shows the behavior of the true profiles outside the modeled range which , as we noticed in our previous studies, in a limited way influence s the results. Such an impact is understandable since the star s at larger distances from the center are still included in the li ne-ofsight - measurements.3.3. Comparison of fitting results + measurements. +3.3. Comparison of fitting results The main strength of the two populations method comes from -tracing the underlying gravitational potential at di fferent scales. +tracing the underlying gravitational potential at different scales. As can be seen in the bottom panels of Fig. 7, population I, whi ch -is more concentrated, is also more sensitive to Υ0, but gives -weaker constraints on aorc. On the other hand, population II +is more concentrated, is also more sensitive to Υ0 , but gives +weaker constraints on a or c. On the other hand, population II attempts to reproduce the total mass content at larger dista nces as well, therefore showing stronger coupling between the pa rameters. -The global minimums of the χ2distributions for both approaches, +The global minimums of the χ2 distributions for both approaches, that is modeling one and two populations, which we -identify as the best-fitting models, closely coincide showi ng that +identify as the best-fitting models, closely coincide showing that there is no internal bias in the improved method. However, si gnificant - differences can be observed when comparing the confidence + di fferences can be observed when comparing the confidence levels, mainly at 1 and 3 σ. Namely, we find that using two populations, the constraints we obtain on the density an d anisotropy profile are much stronger. Additionally, the more accurate method allows us to study -other effects and biases, for example the consequences of the +other e ffects and biases, for example the consequences of the nonsphericity of the modeled object. Whereas for the fit of al l stars the true values of the density, mass, and anisotropy pr ofiles -are contained within 1 σconfidence regions, the results for the +are contained within 1 σ confidence regions, the results for the populations are more or less biased depending on the axis. Th ey are well reproduced for the observation along the intermedi ate axis, for which the e ffects of nonsphericity seem to cancel out, -and more biased for the remaining lines of sight. We notice a -trend from under- to overestimation of the anisotropy when g oing +and more biased for the remaining lines of sight. W e notice a +trend from under- to overestimation of the anisotropy when going from the major to the minor axis. Article number, page 7 of 12 A&A proofs: manuscript no. Populations4 --101 - 0 10 20 30 40POP I + POP II +-1 +0 +1 + 0 10 20 30 40 +POP I + POP II β(r) -r [kpc]major - 0 10 20 30 40 -r [kpc]intermediate - 0 10 20 30 40 -r [kpc]minor - 50 60 70 80 --101 - 0 10 20 30 40POP I +r [kpc] +major + 0 10 20 30 40 +r [kpc] +intermediate + 0 10 20 30 40 +r [kpc] +minor + 50 60 70 80 +-1 +0 +1 + 0 10 20 30 40 +POP I β(r) -r [kpc] 0 10 20 30 40 -r [kpc] 0 10 20 30 40 -r [kpc] 50 60 70 80 --101 - 0 10 20 30 40POP II +r [kpc] + 0 10 20 30 40 +r [kpc] + 0 10 20 30 40 +r [kpc] + 50 60 70 80 +-1 +0 +1 + 0 10 20 30 40 +POP II β(r) -r [kpc] 0 10 20 30 40 -r [kpc] 0 10 20 30 40 -r [kpc] 50 60 70 80 +r [kpc] + 0 10 20 30 40 +r [kpc] + 0 10 20 30 40 +r [kpc] + 50 60 70 80 data best model - 1σ + +1σ 2σ 3σ -Fig. 9. Profiles of the anisotropy parameter obtained with the Schwa rzschild modeling of two stellar populations of the simulat ed galaxy. In rows: +Fig. 9. Profiles of the anisotropy parameter obtained with the Schwa rzschild modeling of two stellar populations of the simulat ed galaxy . In rows: results for all stars (calculated as the superposition of tw o populations), population I, and population II. Colors fol low the convention used in previous figures. In columns: observations along the major, intermediate, and minor axis . The last narrower column shows the data (black lines) outside the modeled radial range. Color lines indicate valu es for the best-fit models whereas the colored areas of decrea sing intensity show the 1, -2, and 3σconfidence regions. +2, and 3 σ confidence regions. 4. Modeling Fornax dSph -In this section we present the application of our Schwarzsch ild +In this section we present the application of our Schwarzschild modeling scheme to the observational data for the Fornax dSp h galaxy obtained by del Pino et al. (2015) and del Pino et al. -(2017). This study is a follow-up of the work of Kowalczyk et a l. +(2017). This study is a follow-up of the work of Kowalczyk et al. (2019) and can be directly compared to the results presented -there. Moreover, we refer the reader to these previous publi cations +there. Moreover, we refer the reader to these previous publications for details on the origin of data and our procedures use d for cleaning the spectroscopic sample. Similarly to the approach introduced in Section 2.2, we divided @@ -627,97 +951,133 @@ their metallicity and then cross-correlated the samples wi th the data used in Kowalczyk et al. (2019). The metallicity histog ram of the final spectroscopic sample is shown in Fig. 10. Additio nally, we color-coded each bin with the population it has been -assigned to, namely orange or blue for population I or II. Int erestingly, +assigned to, namely orange or blue for population I or II. Interestingly, the case of Fornax is similar to our simulated gala xy -as the split at [Fe/H]=−1 also captures an important feature +as the split at [Fe /H]= −1 also captures an important feature of the object’s star formation history, separating stars in to subsamples older and younger than 6 Gyr, as shown in Fig. 12 of del Pino et al. (2015) and Fig. 8 of del Pino et al. (2017). The -numbers of stars contained in the samples of all stars, popul ation - I, and population II are given in Table 2, where the indic es +numbers of stars contained in the samples of all stars, population + I, and population II are given in T able 2, where the indic es "phot" and "spec" refer to the photometric and kinematic sam ples. - The sum of stars in the populations is lower than in thesample of all stars since only stars with reliable measureme nts + The sum of stars in the populations is lower than in the +sample of all stars since only stars with reliable measureme nts of metallicity could be included. N -[Fe/H]pop I +[Fe/H] +pop I pop II - 0 20 40 60 80 100 --2.5-2-1.5-1-0.5 0 + 0 + 20 + 40 + 60 + 80 + 100 +-2.5 -2 -1.5 -1 -0.5 0 Fig. 10. Metallicity histogram of the final spectroscopic sample use d in the modeling of two stellar populations in the Fornax dSph. E ach bin is color-coded according to the population it has been assigne d to, orange -or blue for population I and II, respectively. +or blue for population I and II, respectively . As we have shown in our earlier work, the light profile of the Fornax dSph can be well reproduced with the three-parameter Article number, page 8 of 12 K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling -Table 2. Properties of the data samples for the Fornax dSph. +T able 2.Properties of the data samples for the Fornax dSph. Property ALL POP I POP II -Number of stars ( Nphot) 65 797 14 882 49 205 -Number of stars ( Nspec) 3286 1136 1151 +Number of stars ( Nphot ) 65 797 14 882 49 205 +Number of stars ( Nspec ) 3286 1136 1151 Stars within 1.8 kpc 3268 1134 1130 -Fitted normalization ( N0) [×104] 6.95 1.81 5.45 -Sérsic radius ( RS) [kpc] 0.454 0.429 0.420 +Fitted normalization ( N0 ) [ ×104] 6.95 1.81 5.45 +Sérsic radius ( RS ) [kpc] 0.454 0.429 0.420 Sérsic parameter ( m) 0.808 0.807 0.898 -102103104105 - 0.2 0.5 2 0.1 1n⋆(R) [kpc-2] -R [kpc]all stars +102 +103 +104 +105 + 0.2 0.5 2 0.1 1 +n⋆(R) [kpc-2] +R [kpc] +all stars popI popII Fig. 11. Surface number density profiles of the photometric data samples for the Fornax dSph: all available stars (in red), the me tal-rich population I (in orange), and the metal-poor population II (in b lue). Thin -vertical lines indicate r0(see text) and the outer boundary of the spectroscopic +vertical lines indicate r0 (see text) and the outer boundary of the spectroscopic data. -Sérsic formula (Sérsic 1968). The profiles of number density for +Sérsic formula (Sérsic 1968). The profiles of number densityfor all stars and both populations together with the best-fittin g Sérsic profiles are presented in Fig. 11. The colors follow the conve ntion introduced in previous sections. Thin vertical lines i ndicate the innermost data point for the light profile for all stars an d the outer boundary of the kinematic sample. The former, set a t -logr=−0.16, is also used as the minimum of the mass-to-light -ratio profile ( r0in Eq. 5). The fitted parameters of the profiles, -that is the normalization N0, the Sérsic radius RS, and the Sérsic -parameter m, are included in the second part of Table 2. +log r = −0.16, is also used as the minimum of the mass-to-light +ratio profile ( r0 in Eq. 5). The fitted parameters of the profiles, +that is the normalization N0 , the Sérsic radius RS , and the Sérsic +parameter m, are included in the second part of T able 2. Figure 12 presents the profiles of the observables used in the Schwarzschild modeling: the fraction of stars and the 2nd, 3 rd, and 4th velocity moments (top to bottom) for the three data sa mples: all stars, population I, and population II (in red, ora nge, and blue, respectively). The error bars indicate 1 σsampling errors. The parameter space for Υ(r) has been probed as follows: -a∈[0 : 1.85] with a step∆a=0.05 and c∈[1.2 : 6] with a -step∆c=0.2. We point out that in Kowalczyk et al. (2019) the -parameter cwas fixed at c=3 and now we fit it as a free parameter. +a ∈ [0 : 1 .85] with a step ∆a = 0.05 and c ∈ [1.2 : 6] with a +step ∆c = 0.2. W e point out that in Kowalczyk et al. (2019) the +parameter c was fixed at c = 3 and now we fit it as a free parameter. As for the mock data in Section 3.2, di fferent values of -Υ0were obtained with the transformation of velocity moments -within theχ2fitting routine. The values of ∆χ2for all stars and +Υ0 were obtained with the transformation of velocity moments +within the χ2 fitting routine. The values of ∆χ2 for all stars and the populations are shown in the two panels of Fig. 13 (left an d right-hand side, respectively). Due to the dense coverage o f the -grid, we decided to include only the values within 3 σfrom the +grid, we decided to include only the values within 3 σ from the fitted minimums (see Section 3.1). The profiles of the mass-to-light ratio, total density, tota l -mass, and velocity anisotropy resulting from the χ2distributions +mass, and velocity anisotropy resulting from the χ2 distributions are presented in the consecutive rows of Fig. 14. The anisotr opy -profile for the populations is based on the fit of all stars but u sing 0 0.05 0.1 0.15 0.2 0.25 - 0 0.4 0.8 1.2 1.6M(R) -R [kpc]all stars +profile for the populations is based on the fit of all stars but u sing + 0 + 0.05 + 0.1 + 0.15 + 0.2 + 0.25 + 0 0.4 0.8 1.2 1.6 +M(R) +R [kpc] +all stars pop I pop II -04080120160200 - 0 0.4 0.8 1.2 1.6m2(R)[(km s-1)2] +0 +40 +80 +120 +160 +200 + 0 0.4 0.8 1.2 1.6 +m 2(R)[(km s-1)2] R [kpc] --16-80816 - 0 0.4 0.8 1.2 1.6m3(R)[102(km s-1)3] +-16 +-8 +0 +8 +16 + 0 0.4 0.8 1.2 1.6 +m 3(R)[102(km s-1)3] R [kpc] -0481216 - 0 0.4 0.8 1.2 1.6m4(R)[104(km s-1)4] +0 +4 +8 +12 +16 + 0 0.4 0.8 1.2 1.6 +m 4(R)[104(km s-1)4] R [kpc] Fig. 12. Observables of the Fornax dSph used in our Schwarzschild modeling scheme. In rows: the fraction of the total number of stars, the 2nd, 3rd, and 4th velocity moment. In red we present the value s obtained for all stars whereas in orange and blue those for population s I and II, -respectively. -the confidence levels on Υfrom the fit of two populations. Green +respectively . +the confidence levels onΥfrom the fit of two populations. Green lines indicate the values for the best-fitting models wherea s the colored areas of decreasing intensity show the 1, 2, and 3 σconfidence regions. Additionally, with black dashed lines we in clude @@ -726,46 +1086,70 @@ As a result of freeing the steepness of the mass-to-light ratio profile (parameter c) with respect to the previous study Article number, page 9 of 12 A&A proofs: manuscript no. Populations4 - 0 0.5 1 1.5 0 0.5 1 - 1.5 2 3 4 5 6ALL + 1.5 + 0 + 0.5 + 1 + 1.5 + 2 + 3 + 4 + 5 + 6 +ALL Υ0 -ac - 0 0.5 1 1.5 +a +c + 0 + 0.5 + 1 + 1.5 0 0.5 1 - 1.5 2 3 4 5 6POPULATIONS + 1.5 + 2 + 3 + 4 + 5 + 6 +POPULATIONS Υ0 -ac - 0 3 6 9 12 +a +c + 0 + 3 + 6 + 9 + 12 χ2-χ2 min -Fig. 13. Values ofχ2relative to the fitted minimum within the range of 3 σconfidence level for all stars (left panel) and for the popula tions (right +Fig. 13. V alues of χ2 relative to the fitted minimum within the range of 3 σ confidence level for all stars (left panel) and for the popula tions (right panel) for the Fornax dSph. -(Kowalczyk et al. 2019), we obtained higher estimates of the enclosed +(Kowalczyk et al. 2019), we obtained higher estimates of theenclosed total mass at larger radii. In particular, for the mas s enclosed - within 1.8 kpc we get Mall(<1.8 kpc)=3.87+1.48 -−1.56×108 -M⊙from the fit for all stars and Mpops(<1.8 kpc)=4.71+0.87 -−1.13× -108M⊙from the fit of populations, while previously we had -Mold(<1.8 kpc)=3.7+1.4 -−1.3×108M⊙. + within 1.8 kpc we get Mall (< 1.8 kpc) = 3.87+1.48 +−1.56 × 108 +M⊙ from the fit for all stars and Mpops (< 1.8 kpc) = 4.71+0.87 +−1.13 × +108 M⊙ from the fit of populations, while previously we had +Mold (< 1.8 kpc) = 3.7+1.4 +−1.3 ×108 M⊙. Interestingly, despite the significant shift of the positio n of χ2 -min(toc=4.2 for all stars and 3.6 for populations), the obtained +min (to c = 4.2 for all stars and 3.6 for populations), the obtained profile of the anisotropy parameter remains decreasi ng or flat for all stars but changes to increasing from 0 to 0.5 for th e populations. Nevertheless, even in the latter case the prev ious result agrees with the new finding within 1 σ. The detailed analysis of the anisotropy is shown in Fig. 15 where the middle and bottom panels present the profiles obtained - for each population separately. We notice that the pr ofile + for each population separately. W e notice that the profile for population I is decreasing or has a local minimum whereas -for population II is increasing (from −0.25 to 0.5 for the bestfitting +for population II is increasing (from−0.25 to 0.5 for the bestfitting model). Since population I is more concentrated, the last bins contain very few stars, which limits their credibility . The top panel of Fig. 15 presents the anisotropy of all stars calc ulated @@ -773,7 +1157,7 @@ top panel of Fig. 15 presents the anisotropy of all stars calc ulated approach we still obtain the increasing profile (from 0 to 0.5 ) but the previous result agrees with it only within 2 σ. Since Fornax dSph is significantly elongated with the projected - ellipticity of ǫ=0.30±0.01 (Irwin & Hatzidimitriou + ellipticity of ǫ = 0.30 ± 0.01 (Irwin & Hatzidimitriou 1995), we anticipate some bias in the obtained results cause d by the spherically symmetric modeling. Kowalczyk et al. (20 18) studied such bias in an axisymmetric simulated object quali tatively @@ -786,27 +1170,28 @@ underestimated, further strengthening the likelihood of t he real anisotropy to be radial and its profile to be growing with radi us with respect to the results of Kowalczyk et al. (2019). Both constant (like for our population I) and growing (population - II) anisotropy profiles can arise from biased modeli ngof the real growing profile by observing an object along the + II) anisotropy profiles can arise from biased modeli ng +of the real growing profile by observing an object along the minor and major axis, respectively. However, for the bias to -occur in two populations presented here, their inner orient ations +occur in two populations presented here, their inner orientations would need to be opposite. Since such morphological fe atures are not supported by the photometric studies of Fornax -(del Pino et al. 2015; Wang et al. 2019) which rather find a good -spatial alignment between the stellar populations, we conc lude +(del Pino et al. 2015; W ang et al. 2019) which rather find a good +spatial alignment between the stellar populations, we conclude that the anisotropy profiles of the two populations modeled i n this work are indeed significantly distinct. Finally, it is worth noticing that the so-called mass-follo wslight model, that is the one following from the assumption th at the total density traces the stellar distribution, is no lon ger supported by the fit of the populations. With our parametrizatio n, -the mass-follows-light model corresponds to a=0 and whereas -it is enclosed within 3 σfor the fit of all stars, as was the case +the mass-follows-light model corresponds to a = 0 and whereas +it is enclosed within 3 σ for the fit of all stars, as was the case in Kowalczyk et al. (2019), the allowed values for the improv ed method are much larger, as demonstrated by the right panel of Fig. 13. 5. Summary and discussion Building on the previously created implementation of the -Schwarzschild orbit superposition method focused on model ing +Schwarzschild orbit superposition method focused on modeling dSph galaxies of the Local Group (Kowalczyk et al. 2017, 2018 , 2019), we improved our tool by introducing multiple stellar populations. Such an improvement is desirable and justified sin ce @@ -817,7 +1202,7 @@ expect a significant improvement in the estimates of not only the total mass content but also the orbit anisotropy since this r obust modeling technique reproduces the anisotropy as a by-produ ct of the modeling rather than taking it as an assumption. -We have tested our hypothesis by modeling mock data generated +W e have tested our hypothesis by modeling mock data generated from a galaxy formed in the Illustris simulation. Due to the limitations of the resolution, we chose a galaxy of mass a few orders of magnitude larger than the estimated masses of classi cal @@ -825,69 +1210,102 @@ dwarfs. Still, the galaxy possessed appropriate qualitati ve characteristics, such as the lack of gas and an almost spherical s hape, Article number, page 10 of 12 K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling -101103105 - 0.1 1Υ(r) [M⊙/L⊙] -r [kpc]ALL - 0.1 1r [kpc]POPULATIONS +101 +103 +105 + 0.1 1 +Υ(r) [M⊙/L⊙] +r [kpc] +ALL + 0.1 1r [kpc] +POPULATIONS 3σ 2σ 1σ best model K19 -104106108 - 0.1 1νtot(r) [M⊙ kpc-3] -r [kpc] 0.1 1 +104 +106 +108 + 0.1 1 +νtot(r) [M⊙ kpc-3] +r [kpc] + 0.1 1 +r [kpc] +105 +107 +109 + 0.1 1 +M tot(r) [M⊙] r [kpc] -105107109 - 0.1 1Mtot(r) [M⊙] -r [kpc] 0.1 1 + 0.1 1 r [kpc] --3-2-101 - 0 0.4 0.8 1.2 1.6β(r) -r [kpc] 0 0.4 0.8 1.2 1.6 +-3 +-2 +-1 +0 +1 + 0 0.4 0.8 1.2 1.6 +β(r) +r [kpc] + 0 0.4 0.8 1.2 1.6 r [kpc] Fig. 14. Results of Schwarzschild modeling of the Fornax dSph. -In rows: derived mass-to-light ratio, total density, total mass, an d +In rows: derived mass-to-light ratio, total density , total mass, an d anisotropy parameter. In columns: results for all stars and the populations, - respectively. Green lines indicate the values for th e best-fit models -whereas the colored areas of decreasing intensity show the 1 , 2, and 3σ + respectively . Green lines indicate the values for th e best-fit models +whereas the colored areas of decreasing intensity show the 1 , 2, and 3 σ confidence regions. The best-fitting values obtained by Kowa lczyk et al. (2019) are shown with black dashed lines. that made it a good test bed for modeling techniques applicable - to dSph galaxies. We applied our approach to all data and -to two stellar populations separately, comparing the accur acy of + to dSph galaxies. W e applied our approach to all data and +to two stellar populations separately, comparing the accuracy of the obtained results. Although the addition of the second tr acer seemingly increases the number of constraints twice, the in crement is somewhat compromised by the sampling errors since th e number of stars in each sample is then reduced. Still, we foun d strong improvements in the accuracy of the method when using - two populations. The results of the modeling show that th e + two populations. The results of the modeling show that the density and velocity anisotropy profiles are more strongly c onstrained, - most importantly at the 3 σlevel, that is the range of + most importantly at the 3 σ level, that is the range of allowed values is much narrower. Similarly to the conclusions of Kowalczyk et al. (2018) who -explored the effects of nonsphericity using large and small +explored the e ffects of nonsphericity using large and small data samples, the comparison of results presented in the lef tand right-hand side panels of Fig. 8 suggests that the improv ed method using two stellar populations gives more precise but less accurate outcome. However, in both studies the apparent det erioration of the reliability is a consequence of modeling of a nonspherical object. In both cases, a simpler approach (muc h -smaller data samples or using one stellar population) resul ted-2-101 - 0 0.4 0.8 1.2 1.6POP I + POP II +smaller data samples or using one stellar population) resul ted +-2 +-1 +0 +1 + 0 0.4 0.8 1.2 1.6 +POP I + POP II β(r) r [kpc] --2-101 - 0 0.4 0.8 1.2 1.6POP I +-2 +-1 +0 +1 + 0 0.4 0.8 1.2 1.6 +POP I β(r) r [kpc] --2-101 - 0 0.4 0.8 1.2 1.6POP II +-2 +-1 +0 +1 + 0 0.4 0.8 1.2 1.6 +POP II β(r) r [kpc] best model 1σ -2σ3σ +2σ +3σ K19 Fig. 15. Profiles of the anisotropy parameter obtained with the Schwarzschild modeling of two stellar populations for the F ornax dSph. @@ -896,21 +1314,21 @@ In rows: results for all stars (calculated as the superposition of tw o populati for the best-fit models whereas the colored areas of decreasi ng intensity show the 1, 2, and 3 σconfidence regions. The dashed black line shows the result from Kowalczyk et al. (2019) for comparison. -in larger final uncertainties, usually containing the true v alues -within 1σconfidence region. On the other hand, the improved +in larger final uncertainties, usually containing the true values +within 1 σ confidence region. On the other hand, the improved methods exhibit substantially reduced uncertainties, hig hlighting the underlying bias. Our method parametrizes the total mass content with the mass-to-light ratio varying with radius as a power-law in th e loglog - scale. We made two main changes with respect to our previous - work: we added a third parameter ccontrolling the steepness + scale. W e made two main changes with respect to our previous + work: we added a third parameterc controlling the steepness of the mass-to-light ratio profile (previously fixed at the va lue of 3) and allowed for di fferent stellar density profiles (previously only Sérsic, now also King). These changes are of course coupled since different density profiles require di fferent exponents to reproduce the same mass profile. It is visible also in our resu lts since the King profile applied in the simulated galaxy gave us -values of clower than 3. Nevertheless, we decided to use di fferent +values ofc lower than 3. Nevertheless, we decided to use di fferent density profiles to make our method more general and appli cable to objects, such as our Illustris galaxy, for which the Sérsic formula does not provide a good approximation of the density @@ -922,7 +1340,7 @@ Fornax dSph galaxy. Due to the addition of another free param eter Article number, page 11 of 12 A&A proofs: manuscript no. Populations4 obtained in Kowalczyk et al. (2019). However, in terms of the -total density and mass distribution the estimates obtained here +total density and mass distribution the estimates obtainedhere agree very well with those earlier results in the range cover ed by the data. Therefore, the detailed comparison with other e stimates from the literature presented in Kowalczyk et al. (201 9) is @@ -932,32 +1350,32 @@ estimates is seen in the results of modeling two populations in Fornax. In this case we find the anisotropy to be slightly incr easing rather than decreasing with radius and, most importantl y, the confidence regions for this parameter, as well as for the density, - are much narrower. We were thus able to obtain tighter c onstraints + are much narrower. W e were thus able to obtain tighter constraints on the properties of Fornax, which means that the im proved method is successful. For the first time, we were also a ble to deduce the velocity anisotropy profiles for each of the pop ulations - separately. We found that the more concentrated, meta l-rich + separately. W e found that the more concentrated, meta l-rich population I has a decreasing anisotropy profile while the mo re extended, metal-poor population II has the anisotropy incr easing with radius. This finding may partially explain the large spr ead of the anisotropy values obtained in the literature and summ arized - in Table 2 and 3 of Kowalczyk et al. (2019), which were -often based on modeling subsamples of our spectroscopic dat a + in T able 2 and 3 of Kowalczyk et al. (2019), which were +often based on modeling subsamples of our spectroscopic data set. For both studied objects we split the stars into two populations - by dividing them in half based on their metallicity, Z(in + by dividing them in half based on their metallicity, Z (in solar units), for the Illustris galaxy and [Fe /H] for Fornax. Such a method is approximate but justified. Both galaxies have com plex star formation history with multiple star formation bu rsts, as demonstrated by Fig. 1 in this work and Fig. 7 in del Pino et al. -(2013), producing multiple stellar populations which cann ot be +(2013), producing multiple stellar populations which cannot be easily tracked as the metallicity is a good but not perfect pr oxy for the stellar age. Moreover, the metallicity histograms f or both objects are approximately unimodal not allowing for a conve nient separation. More refined methods of division have been -suggested in the literature, for example in the form of the li kelihood +suggested in the literature, for example in the form of the likelihood function based on the position, velocity, and metallic ity index - (Walker & Peñarrubia 2011). However, the likelihood fun ction + (W alker & Peñarrubia 2011). However, the likelihood fun ction requires many assumptions which introduce additional uncertainties into the treatment of the data. On the other hand , our approach ensures the maximization of each sample (and there fore @@ -965,64 +1383,65 @@ approach ensures the maximization of each sample (and there fore features of the star formation history. Further improvements to the Schwarzschild modeling method are certainly possible. One way to proceed would be to -include the modeling of the proper motions of the stars. For n ow, +include the modeling of the proper motions of the stars. For now , measurements of transverse velocities are available only f or the brightest stars in dSph galaxies, but even small samples of t his type could provide further constraints on the models, as dem onstrated by Strigari et al. (2007) and Massari et al. (2020). -Acknowledgements. We are grateful to Andrés del Pino for providing the data for +Acknowledgements.W e are grateful to Andrés del Pino for providing the data for the Fornax dSph and to the Illustris team for making their sim ulations publicly available. Useful comments from the anonymous referee are k indly appreciated. This research was supported by the Polish National Science C enter under grant 2018/28/C/ST9/00529. References -Amorisco, N. C., & Evans, N. W. 2012, MNRAS, 419, 184 -Battaglia, G., Helmi, A., Tolstoy, E., et al. 2008, ApJ, 681, L13 -Bellazzini, M., Ferraro, F. R., & Pancino, E. 2001, MNRAS, 32 7, L15 -Binney, J., & Tremaine, S. 2008, Galactic Dynamics, 2nd edn. (Princeton University +Amorisco, N. C., & Evans, N. W . 2012, MNRAS, 419, 184 +Battaglia, G., Helmi, A., T olstoy , E., et al. 2008, ApJ, 681,L13 +Bellazzini, M., Ferraro, F . R., & Pancino, E. 2001, MNRAS, 32 7, L15 +Binney , J., & Tremaine, S. 2008, Galactic Dynamics, 2nd edn. (Princeton University Press, Princeton) Breddels, M. A., & Helmi, A. 2013, A&A, 558, A35 -Breddels, M. A., Helmi, A., van den Bosch, R. C. E., van de Ven, G., & Battaglia, -G. 2013, MNRAS, 433, 3173del Pino, A., Hidalgo, S. L., Aparicio, A., et al. 2013, MNRAS , 433, 1505 +Breddels, M. A., Helmi, A., van den Bosch, R. C. E., van de V en,G., & Battaglia, +G. 2013, MNRAS, 433, 3173 +del Pino, A., Hidalgo, S. L., Aparicio, A., et al. 2013, MNRAS , 433, 1505 del Pino, A., Aparicio, A., & Hidalgo, S. L. 2015, MNRAS, 454, 3996 del Pino, A., Aparicio, A., Hidalgo, S. L., & Łokas, E. L. 2017 , MNRAS, 465, 3708 Fabrizio, M., Bono, G., Nonino, M., et al. 2016, ApJ, 830, 126 -Gebhardt, K., Richstone, D., Tremaine, S., et al. 2003, ApJ, 583, 92 +Gebhardt, K., Richstone, D., Tremaine, S., et al. 2003, ApJ,583, 92 Genel, S., Fall, S. M., Hernquist, L., et al. 2015, ApJ, 804, L 40 Genel, S., V ogelsberger, M., Springel, V ., et al. 2014, MNRA S, 445, 175 -Genina, A., Benitez-Llambay, A., Frenk, C. S., et al. 2018, M NRAS, 474, 1398 +Genina, A., Benitez-Llambay , A., Frenk, C. S., et al. 2018, M NRAS, 474, 1398 Hayashi, K., Fabrizio, M., Łokas, E. L., et al. 2018, MNRAS, 4 81, 250 Irwin, M., & Hatzidimitriou, D. 1995, MNRAS, 277, 1354 Jardel, J. R., & Gebhardt, K. 2012, ApJ, 746, 89 -Jardel, J. R., Gebhardt, K., Fabricius, M. H., Drory, N., & Wi lliams, M. J. 2013, +Jardel, J. R., Gebhardt, K., Fabricius, M. H., Drory , N., & Williams, M. J. 2013, ApJ, 763, 91 King, I. 1962, AJ, 67, 471 -Kowalczyk, K., Łokas, E. L., Kazantzidis, S., & Mayer, L. 201 3, MNRAS, 431, +Kowalczyk, K., Łokas, E. L., Kazantzidis, S., & Mayer, L. 2013, MNRAS, 431, 2796 -Kowalczyk, K., Łokas, E. L., & Valluri, M. 2017, MNRAS, 470, 3 959 -Kowalczyk, K., Łokas, E. L., & Valluri, M. 2018, MNRAS, 476, 2 918 -Kowalczyk, K., del Pino, A., Łokas, E. L., & Valluri, M. 2019, MNRAS, 482, +Kowalczyk, K., Łokas, E. L., & V alluri, M. 2017, MNRAS, 470, 3 959 +Kowalczyk, K., Łokas, E. L., & V alluri, M. 2018, MNRAS, 476, 2 918 +Kowalczyk, K., del Pino, A., Łokas, E. L., & V alluri, M. 2019, MNRAS, 482, 5241 Łokas, E. L., 2002, MNRAS, 333, 697 -Łokas, E. L., Mamon, G. A., & Prada, F. 2005, MNRAS, 363, 918 -Massari, D., Helmi, A., Mucciarelli, A. et al. 2020, A&A, 633 , A36 +Łokas, E. L., Mamon, G. A., & Prada, F . 2005, MNRAS, 363, 918 +Massari, D., Helmi, A., Mucciarelli, A. et al. 2020, A&A, 633, A36 Mateo, M. 1998, ARA&A, 36, 435 -Nelson, D., Pillepich, A., Genel, S., et al. 2015, Astronomy and Computing, 13, +Nelson, D., Pillepich, A., Genel, S., et al. 2015, Astronomyand Computing, 13, 12 -Pace, A. B., Kaplinghat, M., Kirby, E., et al. 2020, MNRAS, 49 5, 3022 -Press, W. H., Teukolsky, S. A., Vetterling, W. T., & Flannery , B. P. 1992, Numerical +Pace, A. B., Kaplinghat, M., Kirby , E., et al. 2020, MNRAS, 49 5, 3022 +Press, W . H., T eukolsky , S. A., V etterling, W . T ., & Flannery , B. P . 1992, Numerical Recipes in C, 2nd edn. (Cambridge University Press, Cam bridge) Schwarzschild, M. 1979, ApJ, 232, 236 -Sérsic, J. L. 1968, Atlas de Galaxias Australes (Observator io Astronomico, Cordoba, +Sérsic, J. L. 1968, Atlas de Galaxias Australes (Observatorio Astronomico, Cordoba, Argentina) Strigari, L. E., Bullock, J. S., & Kaplinghat, M. 2007, ApJ, 6 57, L1 -Tolstoy, E., Hill, V ., & Tosi, M. 2009, ARA&A, 47, 371 -Valluri, M., Merritt, D., & Emsellem, E. 2004, ApJ, 602, 66 -van der Marel, R. P., Cretton, N., de Zeeuw, P. T., & Rix, H.-W. 1998, ApJ, 493, +T olstoy , E., Hill, V ., & T osi, M. 2009, ARA&A, 47, 371 +V alluri, M., Merritt, D., & Emsellem, E. 2004, ApJ, 602, 66 +van der Marel, R. P ., Cretton, N., de Zeeuw , P . T ., & Rix, H.-W .1998, ApJ, 493, 613 V ogelsberger, M., Genel, S., Springel, V ., et al. 2014a, Nat ure, 509, 177 V ogelsberger, M., Genel, S., Springel, V ., et al. 2014b, MNR AS, 444, 1518 -Walker, M. G., & Peñarrubia, J. 2011, ApJ, 742, 20 -Wang, M. Y ., de Boer, T., Pieres, A., et al. 2019, ApJ, 881, 118 +W alker, M. G., & Peñarrubia, J. 2011, ApJ, 742, 20 +W ang, M. Y ., de Boer, T ., Pieres, A., et al. 2019, ApJ, 881, 118 Article number, page 12 of \ No newline at end of file diff --git a/read/results/pypdf/2201.00178.txt b/read/results/pypdf/2201.00178.txt index 7cf5988..6423552 100644 --- a/read/results/pypdf/2201.00178.txt +++ b/read/results/pypdf/2201.00178.txt @@ -1,10 +1,10 @@ Draft version January 4, 2022 -Typeset using L ATEX default style in AASTeX631 +Typeset using LATEX default style in AASTeX631 Imaging the Sun’s near-surface flows using mode-coupling analysis Prasad Mani - ,1Chris S. Hanson - ,2and Shravan Hanasoge -1, 2 + ,1 Chris S. Hanson + ,2 and Shravan Hanasoge + 1, 2 1Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India 2Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE ABSTRACT @@ -15,9 +15,9 @@ Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on measurements to show that the resulting divergence and radial vorticity maps at supergranular length scales (∼30 Mm) near the surface compare extremely well with those obtained using the Local Correlation Tracking method. We find that the Pearson correlation coefficient is ≥0.9 for divergence flows, -while≥0.8 is obtained for the radial vorticity. +while ≥0.8 is obtained for the radial vorticity. Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662) -1.INTRODUCTION +1. INTRODUCTION Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its effect on solar oscillations (see Christensen-Dalsgaard 2002, for a review). These are resonant normal modes of the Sun, behaving as standing waves in a cavity bounded by the solar surface and a depth that depends on the wavenumber @@ -43,7 +43,8 @@ et al. 2020; Mani & Hanasoge 2021) and Rossby modes (Hanasoge & Mandal 2019; Man et al. 2021). Local mode-coupling analysis in the Cartesian approximation, formulated by Woodard (2006), was validated by Hanson et al. (2021) (hereafter H21) by examining the power-spectrum of supergranular waves and comparing with previous time-distance studies (Langfellner et al. 2018). -prasad.subramanian@tifr.res.inarXiv:2201.00178v1 [astro-ph.SR] 1 Jan 2022 +prasad.subramanian@tifr.res.in +arXiv:2201.00178v1 [astro-ph.SR] 1 Jan 2022 Mani et al. Normal-mode coupling refers to the concept of expressing solar-oscillation eigenfunctions as a linear weighted combination of model-eigenfunctions (e.g., Model S Christensen-Dalsgaard 2021). The model eigenfunctions form a complete @@ -58,105 +59,122 @@ In this study, we extend the spectral analysis of H21 and develop the method to at supergranulation length scales. A part of the formalism that was used to derive the forward model in H21 is reworked, primarily to image steady flows. Measurements are then constructed, and inversions to infer divergence flow and radial vorticity are described. We also demonstrate signal associated with supergranular flow in a radial-order -coupling (p 2-p2), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface. +coupling (p2-p2), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface. We compare our results with flows obtained using the Local Correlation Tracking method on solar granules. 1.1. Forward problem In favor of algebraic brevity, we only show crucial steps here and refer the interested reader to Appendix A for a complete derivation of the forward problem. Working in the plane-parallel atmosphere (see also Woodard 2006), we -denote the horizontal unit vectors exandeyin our local Cartesian domain as pointing towards west and north on the -solar surface, respectively, and ezpoints outwards. This approximation is valid when observing patches of the surface +denote the horizontal unit vectors ex and ey in our local Cartesian domain as pointing towards west and north on the +solar surface, respectively, and ez points outwards. This approximation is valid when observing patches of the surface that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood of the supergranular scale ( ∼30 Mm), we expect the measured spectral cross-correlation signal to peak around the -horizontal wavenumber qR⊙≈120 (Rincon & Rieutord 2018), where q=|q|=|(qx,qy)|is the vector horizontal +horizontal wavenumber qR⊙ ≈120 (Rincon & Rieutord 2018), where q = |q|= |(qx,qy)|is the vector horizontal wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, see Rincon & Rieutord 2018), permitting us to model the flow vector uuu= (ux,uy,uz) in the Cartesian domain like so (Unno et al. 1989; Woodard 2006) -uσ=∇×[∇×(Pez)] +∇×(Tez), (1) -whereP=Pσ(x) andT=Tσ(x) are poloidal and toroidal scalar functions, varying with position xand temporal -frequencyσ.∇is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying +uσ = ∇×[∇×(Pez)] + ∇×(Tez), (1) +where P = Pσ(x) and T = Tσ(x) are poloidal and toroidal scalar functions, varying with position x and temporal +frequency σ. ∇is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for -example), here we only consider the frequency bin σ= 0, denoting the temporally averaged flow over the period -of analysis. We therefore suppress σfrom all terms this point forward, remembering that temporal dynamics of +example), here we only consider the frequency bin σ = 0, denoting the temporally averaged flow over the period +of analysis. We therefore suppress σ from all terms this point forward, remembering that temporal dynamics of perturbations may also be studied using the same model outlined in the following paragraphs. Simplifying eq 1 using vector calculus results in -u=−∇2Pez+∇(∂zP) +∇hT×ez, (2) -where∇hrefers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the +u= −∇2Pez + ∇(∂zP) + ∇hT×ez, (2) +where ∇h refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a -function of horizontal wavenumber qand depthzez. Hence the poloidal and toroidal flows are described by Pq(z) and -Tq(z), respectively. Furthermore, we parametrize the flow along ezusing basis functions f(z) (Chebyshev, B-spline, +function of horizontal wavenumber qand depth zez. Hence the poloidal and toroidal flows are described by Pq(z) and +Tq(z), respectively. Furthermore, we parametrize the flow along ez using basis functions f(z) (Chebyshev, B-spline, etc). This is expressed as -P≡Pq(z) =∑ -jfj(z)Pqj, T≡Tq(z) =∑ -jfj(z)Tqj. (3) -The flow coefficients PqjandTqj, represented by the discrete indices qandj, become ideal candidates for inversions, -where the flow for each wavenumber qcan be inverted for independently; parallelization in computation can thus be -exploited to expedite inversions. Note that Pqj=P∗ -−qjandTqj=T∗ -−qjfor the flow field to be real in the spatiotemporal +P ≡Pq(z) = +∑ +j +fj(z) Pqj, T ≡Tq(z) = +∑ +j +fj(z) Tqj. (3) +The flow coefficients Pqj and Tqj, represented by the discrete indices qand j, become ideal candidates for inversions, +where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be +exploited to expedite inversions. Note that Pqj = P∗ +−qj and Tqj = T∗ +−qj for the flow field to be real in the spatiotemporal domain. -To infer flows from wavefields φscattered by a perturbation of length scale q, cross-correlate them in the manner +To infer flows from wavefields φ scattered by a perturbation of length scale q, cross-correlate them in the manner Imaging near-surface flows using mode-coupling analysis 3 φω∗ -kφω -k+q, wherekis the oscillation mode wavenumber ( kx,ky) andωis the temporal frequency. Relate φω∗ -kφω -k+qthus -to the flow coefficients PqjandTqj(see eq A7) +k φω +k+q, where kis the oscillation mode wavenumber (kx,ky) and ω is the temporal frequency. Relate φω∗ +k φω +k+q thus +to the flow coefficients Pqj and Tqj (see eq A7) ⟨φω∗ -kφω -k+q⟩=Hω -kk′nn′∑ -jCqj,kPqj+Dqj,kTqj. (4) -The weight factor Hω(see eq A8) is a function of frequency, capturing information about the extent of coupling between -the two modes [ n,k] and [n′,k′], wherenandn′are the radial orders of the modes, and k=|k|andk′=|k′|=|k+q|. +k φω +k+q⟩= Hω +kk′nn′ +∑ +j +Cqj,kPqj + Dqj,kTqj. (4) +The weight factorHω (see eq A8) is a function of frequency, capturing information about the extent of coupling between +the two modes [n,k] and [n′,k′], where nand n′are the radial orders of the modes, and k= |k|and k′= |k′|= |k+q|. The spectral profile of the mode (see eq A9) is approximated using a Lorentzian (Anderson et al. 1990). The more the -Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms Cqj,kandDqj,kare poloidal +Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms Cqj,k and Dqj,k are poloidal and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements -and are derived from the solar model see Appendix A. They possess the symmetry relation: Cqj,k=C−qj,−kand -Dqj,k=D−qj,−k(see eq A6). The kernels, as flows, are expressed on the basis fj(z). +and are derived from the solar model see Appendix A. They possess the symmetry relation: Cqj,k = C−qj,−k and +Dqj,k = D−qj,−k (see eq A6). The kernels, as flows, are expressed on the basis fj(z). 1.2. Least-squares of cross-correlation Even though φω∗ -kφω -k+qisolates the effect of flow perturbations at individual wavenumbers q, a more compact measurement, - known in mode-coupling literature as ’ B-coefficients’, is much better designed for inversion as it reduces the +k φω +k+q isolates the effect of flow perturbations at individual wavenumbers q, a more compact measurement, + known in mode-coupling literature as ’B-coefficients’, is much better designed for inversion as it reduces the dimension of the problem. A least-squares fit to the cross-correlation φω∗ -kφω -k+q(see Woodard 2006, 2014, 2016) results -in theB-coefficients Bk,q, according to -Bk,q=∑ -ωHω∗ -kk′nn′φω∗ -kφω +k φω +k+q (see Woodard 2006, 2014, 2016) results +in the B-coefficients Bk,q, according to +Bk,q = +∑ +ω +Hω∗ +kk′nn′ φω∗ +k φω k+q ∑ -ω|Hω -kk′nn′|2. (5) +ω +|Hω +kk′nn′ |2 . (5) Multiplying eq 4 on both sides by Hω∗ -kk′nn′and substituting by eq 5 on the left-hand-side results in a concisely defined +kk′nn′ and substituting by eq 5 on the left-hand-side results in a concisely defined forward problem (compare with eq 4) -Bk,q=∑ -jCqj,kPqj+Dqj,kTqj. (6) +Bk,q = +∑ +j +Cqj,kPqj + Dqj,kTqj. (6) In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over ω. -Here, we sum over both ±ωwithin a few mode linewidths Γ. Denoting the resonant frequency of a mode using ωnk, -|ω|∈( -ωnk−ϵΓnk/2,ωnk+ϵΓnk/2) +Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ωnk, +|ω|∈ +( +ωnk −ϵΓnk/2,ωnk + ϵΓnk/2 +) or -|ω|∈( -ωn′k′−ϵΓn′k′/2,ωn′k′+ϵΓn′k′/2) +|ω|∈ +( +ωn′k′ −ϵΓn′k′ /2,ωn′k′ + ϵΓn′k′ /2 +) . (7) -Summing over±ωguarantees that the parity Bk,q=B∗ -−k,−q(see Appendix A for derivation) is obeyed, thereby +Summing over ±ω guarantees that the parity Bk,q = B∗ +−k,−q (see Appendix A for derivation) is obeyed, thereby ensuring that the flow field on the right-hand-side of eq 6 is a real physical quantity in the spatio-temporal domain. -Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components −qand +Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components −q and −k, B∗ -−k,−q=∑ -jC−qj,−kP∗ -−qj+D−qj,−kT∗ +−k,−q = +∑ +j +C−qj,−kP∗ +−qj + D−qj,−kT∗ −qj. (8) -Substituting parity and symmetry relations for all terms in the above results in eq 6. As Bk,qis constructed by a -least-squares fitting, it is noteworthy that summing over −ωwill also lead to improvement in its signal-to-noise as a +Substituting parity and symmetry relations for all terms in the above results in eq 6. As Bk,q is constructed by a +least-squares fitting, it is noteworthy that summing over −ω will also lead to improvement in its signal-to-noise as a by-product. 1.3. Noise model In the addition to the sensitivity kernels, a systematic background noise model is required to infer the flows from @@ -167,25 +185,25 @@ Every independent realization of a mode can be understood as the output of a dam random forcing function (see Duvall & Harvey 1986). Modes are thus generated with random phases and amplitudes and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters Mani et al. -Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p 1(orange) and p 2(green). The shaded +Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p 1 (orange) and p 2 (green). The shaded regions of the same colours indicate 1-linewidth Γ about the mode frequency. The yellow shaded region indicates the range of -kR⊙andω/2πto which we have restricted ourselves in this analysis. Beyond kR⊙of 2000, it is seen that the theoretical fitting +kR⊙ and ω/2π to which we have restricted ourselves in this analysis. Beyond kR⊙ of 2000, it is seen that the theoretical fitting of mode frequencies start deviating from the observed dispersion relation for the f-mode. -such as its amplitude, frequency and linewidth, and consequently in Bk,qin our case. We use the same noise model +such as its amplitude, frequency and linewidth, and consequently in Bk,q in our case. We use the same noise model as in H21, which was motivated by the above discussion, -Gk,q≡⟨|Bk,q|2⟩, (9) -where, unlike H21, we again sum over ±ω.Gk,qis real, with the symmetry relation Gk,q=G−k,−q(see Appendix A +Gk,q ≡⟨|Bk,q|2⟩, (9) +where, unlike H21, we again sum over ±ω. Gk,q is real, with the symmetry relation Gk,q = G−k,−q (see Appendix A for explanation). -2.DATA ANALYSIS +2. DATA ANALYSIS In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO, Scherrer et al. 2012). Each image is Postel projected, with a spatial resolution of approximately 0 .48Mm, sperated in time by 45 seconds, and is tracked -at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194 .4×194.4 Mm2in size, tracked for 24 hours +at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194 .4 ×194.4 Mm2 in size, tracked for 24 hours and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number -2197, Carrington longitude 90◦). This Dopplercube is considered as the physical wavefield φ(x,y;t). The Fourier-space -wavefieldφω -k(and subsequently, the cross-correlation φω∗ -kφω +2197, Carrington longitude 90◦). This Dopplercube is considered as the physical wavefield φ(x,y; t). The Fourier-space +wavefield φω +k (and subsequently, the cross-correlation φω∗ +k φω k+q) is obtained by computing the 3D spatial and temporal Fourier transform of the Dopplercube. The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in @@ -193,21 +211,21 @@ Eq 6, while short enough that supergranules do not substantially evolve (lifetim & Rieutord 2018) over this period. Our observation region is close to the disk center to also avoid any contamination from center-to-limb systematics (Zhao et al. 2012; Langfellner et al. 2015). Maximum signal can be extracted from the weighted summation of the cross correlations (eq 5) when the spectral -profiles of the two modes [ n,k] and [n′,k′] closely align in ωspace. This implies that their mode frequencies should be -sufficiently close ( |ωnk−ωn′k′|≤δ, the separation parameter). Since Lorentzians decay rapidly, the summation over -±ωis significant only over a few linewidths ( ϵ, the summation parameter; see eq 7). We have empirically found and -tabulatedδin Table 1 for the radial order couplings n-n′∈f-f, p 1-p1, and p 2-p2(the signal strength depends only -weakly onϵ; we set it to 3 line widths). -Figure 1 shows that for any two adjacent ridges (adjacent nandn′), mode frequencies ωnkandωn′kbecome spaced +profiles of the two modes [n,k] and [n′,k′] closely align in ω space. This implies that their mode frequencies should be +sufficiently close (|ωnk −ωn′k′ |≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over +±ω is significant only over a few linewidths ( ϵ, the summation parameter; see eq 7). We have empirically found and +tabulated δ in Table 1 for the radial order couplings n-n′ ∈f-f, p1-p1, and p 2-p2 (the signal strength depends only +weakly on ϵ; we set it to 3 line widths). +Figure 1 shows that for any two adjacent ridges (adjacent nand n′), mode frequencies ωnk and ωn′k become spaced farther apart with increasing wavenumber kR⊙. It is also known that mode linewidth Γ grows with radial orders for -a givenkR⊙. Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of -observation set the total number of modes within a range of kR⊙(andω/2π) that can be clearly observed, thereby +a given kR⊙. Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of +observation set the total number of modes within a range of kR⊙ (and ω/2π) that can be clearly observed, thereby affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR⊙at fixed -radial order are different. In wavenumber, we restrict our analysis to within 200 ≤kR⊙≤2000 andqR⊙≤300. Our +radial order are different. In wavenumber, we restrict our analysis to within 200 ≤kR⊙≤2000 and qR⊙≤300. Our frequency range is confined to span the range over which acoustic modes are observed (2 ≤ω/2π≤5 in mHz). Imaging near-surface flows using mode-coupling analysis 5 -Coupling kR⊙range # of δ +Coupling kR⊙ range # of δ modes f-f [400,1000] 5240 4 [1000,1500] 7784 1.1 @@ -217,103 +235,110 @@ p1-p1 [400,1000] 5240 4.5 p2-p2 [200,1000] 5886 3 [1000,1300] 4280 3 Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different -ranges ofkR⊙. -3.INVERSION +ranges of kR⊙. +3. INVERSION The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements -Bk,qfrom the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and +Bk,q from the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods complement each other (see Sekii 1997), where RLS tries to minimize the misfit between data and model, whereas -SOLA gives better localization. For total number of modes M, RLS scales as MxJwhereJis the number of basis -functionsfj(z) (J≪M; see eq 3 and section 3.1), whereas SOLA scales as M2(see Appendix B). For M > 5000, +SOLA gives better localization. For total number of modes M, RLS scales as MxJ where J is the number of basis +functions fj(z) (J ≪M; see eq 3 and section 3.1), whereas SOLA scales as M2 (see Appendix B). For M >5000, computation starts to quickly become expensive for SOLA. Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is -present even in p 1-p1, and p 2-p2(see Figure 3), and possibly other higher order self- and cross-couplings. Since we are +present even in p1-p1, and p2-p2 (see Figure 3), and possibly other higher order self- and cross-couplings. Since we are interested in only surface flows, we leave higher order coupling to future work. -It bears mentioning that the slopes of the ridges in the kR⊙-νspectrum (Figure 1) increase with radial order. This +It bears mentioning that the slopes of the ridges in the kR⊙-ν spectrum (Figure 1) increase with radial order. This limits us to low-to-intermediate kR⊙(<1000) for these higher radial orders if we are to remain under the acoustic cutoff frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals -from lowkR⊙- too large an observation region could possibly render invalid the Cartesian geometry approximation. +from low kR⊙- too large an observation region could possibly render invalid the Cartesian geometry approximation. Regardless, in addition to performing inversions using all the couplings stacked together, we also demonstrate inversions separately for the three couplings (see Table 2) in order to account for the full gamut of mode-coupling as a signal-rich helioseismic technique. 3.1. RLS -For givenq, the forward problem may be stated as -KU=B, (10) -with the aim to minimize the misfit∑ -k||KU−B||2, with||||2denoting the L2norm. Here, Kis the matrix formed -by the sensitivity kernels: {Cqj,k,Dqj,k}.Uis a vector composed of the flow coefficients: {Pqj,Tqj}andBis a vector -composed of computed B-coefficients:{Bk,q}. The least-squares problem is solved simultaneously for poloidal and +For given q, the forward problem may be stated as +KU = B, (10) +with the aim to minimize the misfit ∑ +k +||KU −B||2, with ||||2 denoting the L2 norm. Here, K is the matrix formed +by the sensitivity kernels: {Cqj,k,Dqj,k}. U is a vector composed of the flow coefficients: {Pqj,Tqj}and B is a vector +composed of computed B-coefficients: {Bk,q}. The least-squares problem is solved simultaneously for poloidal and toroidal flow. We use B-spline basis functions as our fj(z), comprising 11 knots spaced uniformly in acoustic radius, -for both poloidal and toroidal coefficients. Hence, for Mmodes (total number of kfor a givenqisM) and 11 basis -functions for each poloidal and toroidal, the dimensions of K,UandBare thusM×22, 22×1, andM×1 respectively. -Normalizing both sides of eq 10 by the noise covariance Λ(a diagonal matrix with the entries Gk,q; see eq 9; dimension -M×M) and pre-multiplying by K⊺, -(K⊺Λ−1K)U=(K⊺Λ−1)B, (11) -U=(K⊺Λ−1K)−1K⊺Λ−1B. (12) +for both poloidal and toroidal coefficients. Hence, for M modes (total number of kfor a given q is M) and 11 basis +functions for each poloidal and toroidal, the dimensions of K, U and B are thus M×22, 22×1, and M×1 respectively. +Normalizing both sides of eq 10 by the noise covariance Λ (a diagonal matrix with the entries Gk,q; see eq 9; dimension +M ×M) and pre-multiplying by K⊺, +(K⊺Λ−1K)U =(K⊺Λ−1)B, (11) +U =(K⊺Λ−1K)−1K⊺Λ−1B. (12) Mani et al. -Figure 2. Left: Averaging kernel for poloidal flow (see section B.2, eq B17, and left panel of Figure 8) for qR⊙= [−112,−45], -at the depth zo=−0.41 Mm. Right : L-curve for the mode qR⊙= [−112,−45]; the knee ( λ= 2.48) is marked by a blue +Figure 2. Left: Averaging kernel for poloidal flow (see section B.2, eq B17, and left panel of Figure 8) for qR⊙ = [−112,−45], +at the depth zo = −0.41 Mm. Right: L-curve for the mode qR⊙ = [ −112,−45]; the knee ( λ = 2 .48) is marked by a blue diamond. -Since the least-squares problem is typically ill-posed, we restate the minimization as∑ -k||KU−B||2+λ||U||2with -the regularization parameter λwhich this results in a trade-off between misfit reduction (first term) and solution -norm minimization (second term). Under-regularizing can lead to a solution Uthat is dominated by errors in the +Since the least-squares problem is typically ill-posed, we restate the minimization as ∑ +k +||KU −B||2 + λ||U||2 with +the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution +norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this regularization makes the problem better conditioned and is now defined as -U= (K⊺Λ−1K+λI)−1K⊺Λ−1B, (13) -where Iis the identity matrix for L1regularization. The knee-point of the L-curve (Hansen 1992), a curve formed -by plotting||U||2vs||KU−B||2for different values of λ(see right panel of Figure 2), is usually chosen as the +U = (K⊺Λ−1K + λI)−1K⊺Λ−1B, (13) +where I is the identity matrix for L1 regularization. The knee-point of the L-curve (Hansen 1992), a curve formed +by plotting ||U||2 vs ||KU −B||2 for different values of λ (see right panel of Figure 2), is usually chosen as the regularization parameter. After successfully inverting for U, we reconstruct the flow using eq 3. Results for poloidal -flowPqare shown in Figure 3. -4.LCT +flow Pq are shown in Figure 3. +4. LCT To improve confidence in the imaged near-surface flows through mode-coupling, we compare them with flows obtained from Local Correlation Tracking method (LCT; November & Simon 1988). LCT provides surface-flow maps by -examining the advection of convective granules (1.2 Mm, qR⊙≈3500; Hathaway et al. 2015) by underlying largerscale +examining the advection of convective granules (1.2 Mm, qR⊙ ≈3500; Hathaway et al. 2015) by underlying largerscale flow systems. Since granules are used as tracers, which are much smaller in size than supergranules ( ≈35 Mm), LCT is an effective method (see Rieutord et al. 2001) to produce surface horizontal flow maps of supergranulation. Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2 (tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are obtained and Postel projected. The horizontal flows are deduced by tracking the proper motions of granules between consecutive intensity images, which we denote as I1,I2. The LCT method selects a patch in two images each -(I1=I1e(x−xij)2/2sigma2,I2=I2e(x−xij)2/2sigma2) that observe the same granule at the grid point xij= (xi,yj). +(I1 = I1e(x−xij)2/2 sigma2 +,I2 = I2e(x−xij)2/2 sigma2 +) that observe the same granule at the grid point xij = ( xi,yj). A Gaussian of width sigma allows to isolate a small region surrounding the grid point of interest as the distance moved by granules are usually in sub-pixel regime. The convention for the direction of xis the same as described in -section 1.1. The two patches I1,I2are then cross correlated for different values of position shifts ∆ x, -Cij(∆x,∆y) =∫ +section 1.1. The two patches I1,I2 are then cross correlated for different values of position shifts ∆ x, +Cij(∆x,∆y) = +∫ dxI∗ -1(−x)I2(∆x−x). (14) -The shift ∆x= (∆x,∆y) that maximizes the cross-correlation Cijis taken to be the proper motion of the granule. +1 (−x)I2(∆x−x). (14) +The shift ∆ x= (∆x,∆y) that maximizes the cross-correlation Cij is taken to be the proper motion of the granule. Provided that the time difference ∆ t, here 45 seconds, between the images is less than the lifetime of granules ( <10 -min), the velocities are given by vx= ∆x/∆tandvy= ∆y/∆t. This exercise is repeated for all grid points in the -imagesI1,I2and for each consecutive pair of images in the cube. -In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing vxandvy. FLCT -requires the input sigma , which we set to 4 pix, that captures the extent of localization desired, and depends on the +min), the velocities are given by vx = ∆x/∆t and vy = ∆y/∆t. This exercise is repeated for all grid points in the +images I1,I2 and for each consecutive pair of images in the cube. +In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing vx and vy. FLCT +requires the input sigma, which we set to 4 pix, that captures the extent of localization desired, and depends on the Imaging near-surface flows using mode-coupling analysis 7 -Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p 1-p1, and p 2-p2as a function of qxR⊙and -qyR⊙.Bottom : Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σerror around the -mean. Total power appears to increase through the radial orders. Power is in units of m2/s4. +Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p 1-p1, and p2-p2 as a function of qxR⊙ and +qyR⊙. Bottom: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the +mean. Total power appears to increase through the radial orders. Power is in units of m 2/s4. dominant length scale of the velocity field in the images. The Postel-projected intensity images are fed as input to the -FLCT code. vxandvyare then computed for consecutive pairs of images and are averaged over the entire day. -5.MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY +FLCT code. vx and vy are then computed for consecutive pairs of images and are averaged over the entire day. +5. MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY For mode-coupling, horizontal divergence (hereafter div) and radial vorticity (hereafter curl) are computed by -substituting PandTfrom eq 3 into eq 2 as below uuu(q,z) - =−∇2Pez+∇(∂zP) +∇hT×ez, -=−(0,0, ∂2 -xP+∂2 -yP+∂2 +substituting P and T from eq 3 into eq 2 as below uuu(q,z) + = −∇2Pez + ∇(∂zP) + ∇hT×ez, += −(0, 0, ∂2 +xP + ∂2 +yP + ∂2 zP) + (∂x∂zP, ∂y∂zP, ∂2 -zP) + (∂yT,−∂xT,0). (15) -Setting∂2 -x+∂2 -y=q2,divis given by, -∇h·uuu(q,z) =q2∂zP, (16) -andcurl is given by, +zP) + (∂yT, −∂xT, 0). (15) +Setting ∂2 +x + ∂2 +y = q2, div is given by, +∇h ·uuu(q,z) = q2∂zP, (16) +and curl is given by, [ -∇×uuu(q,z)] -z=q2T. (17) +∇×uuu(q,z) +] +z += q2T. (17) We follow similar steps to those taken in Langfellner et al. (2015) for comparison of flow maps with LCT. The -essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR⊙of +essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR⊙ of interest (see Figure 4), and subsequently convert it to real space. We seek to show comparisons (see Figures 5, 6, and 7) for qR⊙= 100, 150, 200 and 250. To sufficiently delineate flows at these length scales, we apply a Gaussian filter (see Figure 4) to flows obtained from eqns 16 and 17. The @@ -321,79 +346,99 @@ Gaussian is centered at the desired wavenumber with a half-width of 25. We then obtain a real-space steady-flow map. Mani et al. Figure 4. Left: Divergence-flow power spectrum |div|2, from eqn 16, obtained from inversion using all the couplings. The -power-spectrum is then filtered with a bandpass centered around qR⊙= 150 (middle panel). The resulting spectra is shown in -the right panel. The units of |div|2are in s−2. For illustration, we show the action of the filter on the power-spectrum |div|2 -since it is a real quantity, but recall that it is the Fourier-space flow div(a complex quantity) on which we apply the filter. -For LCT, we first apply a Gaussian smoothing to vxandvyto average over small-scale features; the extent of -smoothing depends on the length scale qR⊙to be compared with mode-coupling. divandcurl are then simply +power-spectrum is then filtered with a bandpass centered around qR⊙ = 150 (middle panel). The resulting spectra is shown in +the right panel. The units of |div|2 are in s −2. For illustration, we show the action of the filter on the power-spectrum |div|2 +since it is a real quantity, but recall that it is the Fourier-space flow div (a complex quantity) on which we apply the filter. +For LCT, we first apply a Gaussian smoothing to vx and vy to average over small-scale features; the extent of +smoothing depends on the length scale qR⊙ to be compared with mode-coupling. div and curl are then simply computed by -div=∂xvx+∂yvy, (18) -curl=∂xvy−∂yvx. (19) +div= ∂xvx + ∂yvy, (18) +curl= ∂xvy −∂yvx. (19) We then perform a 2D Fourier transform on eqns 18 and 19, apply the same Gaussian filters as for mode-coupling, and transform back to real space. Condensing all of the above, the following sequence of operations to compare flows at desired length scales are performed for mode-coupling (M-C) and for LCT M-C - :φ(x,y;t)3D FFT= = = = =⇒φω -k,Bk,qinversion= = = = = =⇒P,T∇h·= = =⇒ -∇×eqns 16,17Filter,= = = = =⇒ -2D FFTdiv,curl -LCT :I1,I2FLCT= = = =⇒vx,vysmooth,= = = = = =⇒ -∇h·∇×eqns 18,192D FFT,= = = = = =⇒ -FilterFiltered, + : φ(x,y; t) +3D FFT += = = = =⇒φω +k,Bk,q +inversion += = = = = =⇒P,T +∇h· += = =⇒ +∇× +eqns 16, 17 +Filter, += = = = =⇒ +2D FFT +div,curl +LCT : I1,I2 +FLCT += = = =⇒vx,vy +smooth, += = = = = =⇒ +∇h· ∇× +eqns 18, 19 +2D FFT, += = = = = =⇒ +Filter +Filtered, Fourier-space -flows2D FFT= = = = =⇒div,curl -6.RESULTS +flows +2D FFT += = = = =⇒div,curl +6. RESULTS Table 2 summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure 5, where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from -the two methods near supergranular scale ( qR⊙≈100). Near-surface flows are imaged most faithfully when all the +the two methods near supergranular scale ( qR⊙ ≈100). Near-surface flows are imaged most faithfully when all the couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence flows (this is consistent with the results of Hathaway et al. 2015; Langfellner et al. 2015; Rincon et al. 2017). Due to -insufficient modes for the p 2-p2case (see Table 1), we are unable to infer vortical flows with conviction other than near +insufficient modes for the p2-p2 case (see Table 1), we are unable to infer vortical flows with conviction other than near the supergranular scale, as can be seen from Table 2. Figure 6 also aligns with what we believe can be accomplished -through mode-coupling helioseismology - using f-f or p 1-p1alone to seismically infer near-surface divergence and vortical +through mode-coupling helioseismology - using f-f or p1-p1 alone to seismically infer near-surface divergence and vortical flows at different scales ( qR⊙= 100,150) can yield extremely good agreement with LCT. As the length scale of the inferred flow moves further away from that of supergranules (Figure 7), the demand on signal-to-noise also increases. An adequate number of modes (and coupling strength between higher radial-orders) thus becomes a necessity to comment substantively on the flows at these scales. 6.1. Amplitudes of mode-coupling flows Imaging near-surface flows using mode-coupling analysis 9 -(a)qR⊙= 100 ,f-f + p 1-p1+ p 2-p2 -Figure 5. Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1) +(a) qR⊙ = 100, f-f + p1-p1 + p2-p2 +Figure 5.Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1) for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass filtered around -qR⊙= 100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges +qR⊙ = 100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. The slopes of the best-fit line through the scatter plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps are saturated to show only 40% of the maximum values. For both LCT and mode-coupling divergence and vorticity maps, numerous factors, arising from the associated numerous data processing steps, can influence the final inference of flow amplitudes, making it difficult to put forward -a precise statement on them. H21 reported a 60% greater amplitude for p 1-p1over f-f coupling (Figure 3 reflects a +a precise statement on them. H21 reported a 60% greater amplitude for p 1-p1 over f-f coupling (Figure 3 reflects a similar conclusion), another element to consider when combining different radial orders. The choice of regularization (see right panel of Figure 2) has the potential to affect the amplitudes of the inverted flows to some degree. Flow amplitudes also vary with depth, implying that different radial orders and LCT will measure different flow averages. This variability emerges as a natural consequence of any helioseismic inversion procedure necessitating the use of a radial grid along which kernels and flows tend to be described. Thus, the amplitudes of the mode-coupling flows (and the correlation coefficient) depend upon the following factors: -•Coupling(s) used, -•Regularization parameter in the inversion, -•Smoothing applied to LCT flows (indirectly; see below paragraph), -•The depth at which flows are inferred. +• Coupling(s) used, +• Regularization parameter in the inversion, +• Smoothing applied to LCT flows (indirectly; see below paragraph), +• The depth at which flows are inferred. Here, we report in Table 2 only the maximum correlation found from among the points in the radial grid close to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR⊙, we first fix the coupling(s) and the regularization parameter to be used in the inversion. We then separately compute filtered divergence and Mani et al. -(a)qR⊙= 100 ,f-f - (b)qR⊙= 150 ,p1-p1 -Figure 6. Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1) +(a) qR⊙ = 100, f-f + (b) qR⊙ = 150, p1-p1 +Figure 6.Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1) for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around -qR⊙= 100, and using (b) p 1-p1coupling (bottom row), bandpass filtered around qR⊙= 150. We cut edges out from the flow +qR⊙ = 100, and using (b) p 1-p1 coupling (bottom row), bandpass filtered around qR⊙ = 150. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. -(a)qR⊙= 200 ,f-f + p 1-p1+ p 2-p2 - (b)qR⊙= 250 ,f-f + p 1-p1+ p 2-p2 -Figure 7. Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1) +(a) qR⊙ = 200, f-f + p1-p1 + p2-p2 + (b) qR⊙ = 250, f-f + p1-p1 + p2-p2 +Figure 7.Real-space divergence flows (left column, in units of 10−5s−1) and radial vorticity (right column, in units of 10−6s−1) for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass filtered around -(a)qR⊙= 200, and (b) qR⊙= 250. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. +(a) qR⊙ = 200, and (b) qR⊙ = 250. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. vorticity maps for LCT for different values of smoothing. These flow maps are then compared with those obtained from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation (corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired @@ -402,12 +447,12 @@ It has been shown (see De Rosa & Toomre 2004; Langfellner et al. 2015) that line and LCT agree closely in amplitudes. But, to recapitulate, a host of factors described above can skew the amplitudes for divergence flows owing to the multi-step process involved in obtaining them. For example, there has been a history (see, e.g., De Rosa et al. 2000; Sekii et al. 2007; Zhao et al. 2007; Langfellner et al. 2018; B¨ oning et al. 2020; Korda -&ˇSvanda 2021) of using travel-time difference as only a proxy for horizontal divergence. However, Langfellner et al. +& ˇSvanda 2021) of using travel-time difference as only a proxy for horizontal divergence. However, Langfellner et al. Imaging near-surface flows using mode-coupling analysis 11 -Coupling qR⊙div curl +Coupling qR⊙ div curl f-f 100 0.97 0.87 -+ p 1-p1 150 0.95 0.76 -+ p 2-p2 200 0.92 0.76 ++ p1-p1 150 0.95 0.76 ++ p2-p2 200 0.92 0.76 250 0.85 0.65 f-f 100 0.96 0.85 150 0.93 0.76 @@ -421,7 +466,7 @@ p2-p2 100 0.94 0.7 150 0.91 0.39 200 0.79 0.3 250 0.55 0.3 -Table 2. Correlation between mode-coupling flow maps and LCT maps derived from HMI Dopplergrams and intensity images, +Table 2.Correlation between mode-coupling flow maps and LCT maps derived from HMI Dopplergrams and intensity images, respectively. (2015), Birch et al. (2016) and Birch et al. (2019) use empirically determined conversion factors to align flow amplitudes from travel-time measurements with those of LCT, while acknowledging that LCT underestimates magnitudes (see @@ -435,206 +480,261 @@ signal-to-noise through larger observation sizes, we suggest that Cartesian mode applications to investigate other depth- and time-varying features such as giant cell flows (see Hathaway et al. 2013; Hanson et al. 2020), emerging active regions, meridional flows and Rossby waves. APPENDIX -A.DERIVATION OF THE FORWARD MODEL -As described in section 1.1, we seek to describe the flow uas a function of qalongez. To that end, substituting +A. DERIVATION OF THE FORWARD MODEL +As described in section 1.1, we seek to describe the flow uas a function of q along ez. To that end, substituting eq 3 into eq 2, uσ -q(z) =∑ -j{ -q2fjez+iqf′ -j} +q(z) = +∑ +j +{ +q2 fjez + iqf′ +j +} Pσ -jq+iq×ezfjTσ +jq + iq×ezfjTσ jq. (A1) -For flows in the anelastic limit ( u≪speed of sound), we can denote the flow perturbation operator as δLσ= -−2iωρuσ·∇(see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get, +For flows in the anelastic limit ( u ≪speed of sound), we can denote the flow perturbation operator as δLσ = +−2iωρuσ ·∇(see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get, δLσ -q=−2iωρ(iuσ -q·k+uσ -q·ez∂z), (A2) -=−2iωρ∑ -j{ +q = −2iωρ (iuσ +q ·k+ uσ +q ·ez∂z), (A2) += −2iωρ∑ +j +{ −k·qf′ jPσ -jq−k·(q×ez)fjTσ -jq+q2fjPσ -jq∂z} +jq −k·(q×ez) fjTσ +jq + q2 fjPσ +jq ∂z +} . (A3) Mani et al. Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006) -ξk≡ξnk(z) =iˆkHnk(z)ez+ ˆzVnk(z), (A4) -whereHandVare real-valued functions; nandn′are dropped for compactness of notation. Then the coupling of -two modesξkandξk′(k′=k+q), by the flow perturbation operator δLσ -q, denoted by coupling integral Λk -k′(σ), is +ξk ≡ξnk(z) = iˆkHnk(z)ez + ˆzVnk(z), (A4) +where H and V are real-valued functions; n and n′ are dropped for compactness of notation. Then the coupling of +two modes ξk and ξk′ (k′= k+ q), by the flow perturbation operator δLσ +q, denoted by coupling integral Λ k +k′ (σ), is given by Λk -k′(σ)≡∫ +k′ (σ) ≡ +∫ dx(δLσ -qξk)·ξ∗ -k′=∫ -dx[ -−2iωρ∑ -j{ -q2fjPσ -jq(ˆk·ˆk′H′ +qξk) ·ξ∗ +k′ = +∫ +dx +[ +−2iωρ +∑ +j +{ +q2 fjPσ +jq (ˆk·ˆk +′ +H′ kH∗ -k′+V′ +k′ + V′ kV∗ -k′) -−[ +k′ ) +− +[ k·qf′ jPσ -jq+k·(q×ez)fjTσ -jq] -(ˆk·ˆk′HkH∗ -k′+VkV∗ -k′)}] +jq + k·(q×ez) fjTσ +jq +] +(ˆk·ˆk +′ +HkH∗ +k′ + VkV∗ +k′ ) +}] (A5) -We desire to linearly relate the coupling integral in the above equation to the flows PandT, through poloidal and -toroidal sensitivity kernels, Cqj,kandDqj,krespectively. Hence, they are given by -Cqj,k=∫ -dzρ[ -q2fj(ˆk·ˆk′H′ +We desire to linearly relate the coupling integral in the above equation to the flows P and T, through poloidal and +toroidal sensitivity kernels, Cqj,k and Dqj,k respectively. Hence, they are given by +Cqj,k = +∫ +dzρ +[ +q2 fj(ˆk·ˆk +′ +H′ kH∗ -k′+V′ +k′ + V′ kV∗ -k′) +k′ ) −k·qf′ -j(ˆk·ˆk′HkH∗ -k′+VkV∗ -k′)] +j(ˆk·ˆk +′ +HkH∗ +k′ + VkV∗ +k′ ) +] , -Dqj,k=k·(q×ez)∫ -dzρfj(ˆk·ˆk′HkH∗ -k′+VkV∗ -k′). (A6) -Note the symmetry Cqj,k=C−qj,−kandDqj,k=D−qj,−k. This coupling integral contributes to the cross-spectral -measurement between modes kandk+qFrom eq 8 of Woodard (2014), we write the first-order effect of flow on +Dqj,k = k·(q×ez) +∫ +dzρf j(ˆk·ˆk +′ +HkH∗ +k′ + VkV∗ +k′ ). (A6) +Note the symmetry Cqj,k = C−qj,−k and Dqj,k = D−qj,−k. This coupling integral contributes to the cross-spectral +measurement between modes k and k+ q From eq 8 of Woodard (2014), we write the first-order effect of flow on wavefield cross-correlation as ⟨φω∗ -kφω+σ -k+q⟩=Hω +k φω+σ +k+q ⟩= Hω kk′σΛk -k′(σ), (A7) +k′ (σ), (A7) where the function His given by Hω -kk′σ=−2iω(Nk|Rω -k|2Rω+σ -k′+Nk′|Rω+σ -k′|2Rω∗ -k). (A8) -We absorb the factor −2iωinto the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4. -The mode spectral profile Ris a Lorentzian, given by +kk′σ = −2iω(Nk|Rω +k|2 Rω+σ +k′ + Nk′ |Rω+σ +k′ |2 Rω∗ +k ). (A8) +We absorb the factor −2iω into the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4. +The mode spectral profile R is a Lorentzian, given by Rω -k=1 +k = 1 ω2 -nk−ω2−iωγnk/2, (A9) -whereωnkis the resonant frequency of the mode, and γnkis the mode linewidth. Eq A9 can be derived by introducing -mode damping−iωγρ as an operator in the differential equation that governs undamped, driven oscillations (see eq +nk −ω2 −iωγnk/2, (A9) +where ωnk is the resonant frequency of the mode, and γnk is the mode linewidth. Eq A9 can be derived by introducing +mode damping −iωγρ as an operator in the differential equation that governs undamped, driven oscillations (see eq 5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation. -Also, the parityHω -kk′σ=H−ω∗ -kk′−σandRω -k=R−ω∗ -kare established. Mode normalization Nis given by -Nk=1 -QQ∑ -k∑ -ω|φω +Also, the parity Hω +kk′σ = H−ω∗ +kk′−σ and Rω +k = R−ω∗ +k are established. Mode normalization N is given by +Nk = 1 +Q +Q∑ +k +∑ +ω +|φω k|2 ∑ -ωRω -k, (A10) -where the1 -QQ∑ -kon the right-hand-side implies average over all [ kx,ky] (Q terms in all) such that k=|k|is constant. -This forces Nto be isotropic, i.e., to only depend on k, and notk. The sum over ωis within five linewidths of ωnk. +ω +Rω +k +, (A10) +where the 1 +Q +Q∑ +k +on the right-hand-side implies average over all [ kx,ky] (Q terms in all) such that k = |k|is constant. +This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ωnk. Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real. The three equations A8 through A10, along with the symmetry relation for kernels, and summation over ±ω, serve to establish the parity Bσ -k,q=B∗−σ +k,q = B∗−σ −k,−q. This allows for obtaining Pσ -q=P∗−σ -−q, and subsequently, purely real flow in -the real domain. Setting σ= 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into -the noise model obtained in H21 and summing over ±ωestablishes the symmetry Gσ -k,q=G−σ +q = P∗−σ +−q , and subsequently, purely real flow in +the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into +the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσ +k,q = G−σ −k,−q. Imaging near-surface flows using mode-coupling analysis 13 -B.SOLA INVERSIONS +B. SOLA INVERSIONS Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) aims to obtain a set of weight factors -for the mode qand depthzo, which we will call αk,zo. A linear weighted sum of the measurements Bk,qin the fashion∑ -kαk,zoBk,qallows for an average value of the flow Pq(z) to be estimated at the depth zo. To obtain the coefficients -αk,zo, it is assumed that a set of sensitivity kernels Kk,q(z) for the mode qcan be summed up coherently to give an -’averaging kernel’ that is localized at the depth zo. Conventionally, a Gaussian centered at zoand a width ∆ is chosen +for the mode qand depth zo, which we will call αk,zo. A linear weighted sum of the measurements Bk,q in the fashion∑ +k +αk,zoBk,q allows for an average value of the flow Pq(z) to be estimated at the depth zo. To obtain the coefficients +αk,zo, it is assumed that a set of sensitivity kernels Kk,q(z) for the mode q can be summed up coherently to give an +’averaging kernel’ that is localized at the depth zo. Conventionally, a Gaussian centered at zo and a width ∆ is chosen which the averaging kernel should resemble after performing inversion. B.1. Kernels in the integral form Since the kernels in eq A6 are manifest as coefficients on a basis fj(z), we first derive kernels that can be expressed -as a function of depth z(see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions: -P≡Pq(z),p≡Pqj,F≡fj(z),B≡Bk,qC≡Cqj,kandK≡Kk,q(z), we write (assume only poloidal flow for +as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions: +P ≡Pq(z), p ≡Pqj, F ≡fj(z), B ≡Bk,q C ≡Cqj,k and K ≡Kk,q(z), we write (assume only poloidal flow for simplicity, the same derivations hold true for toroidal flow as well) -P=Fp (B11) -The size of Pis thus the same as the length of the radial grid z. -Now, pre-multiply by FTand integrate over zon both sides (drop the integral notation for compactness), -FTP= (FTF)p -p= (FTF)−1FTP (B12) +P = Fp (B11) +The size of P is thus the same as the length of the radial grid z. +Now, pre-multiply by FT and integrate over z on both sides (drop the integral notation for compactness), +FTP = (FTF)p +p= (FTF)−1 FTP (B12) Now, substituting eq B12 into the forward problem eq 6, -B=Cp +B = Cp = (FTF)−1FTCP -=KP (B13) += KP (B13) where -K= (FTF)−1FTC, -i.e.,Kk,q(z) =∑ -j,j′[∫ -dzfj(z)fj′(z)]−1 -fj′(z)Cqj′,k (B14) +K = (FTF)−1FTC, +i.e., Kk,q(z) = +∑ +j,j′ +[∫ +dzfj(z)fj′ (z) +]−1 +fj′ (z)Cqj′,k (B14) B.2. Obtaining the coefficients α Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at zo -T(z,zo) =1√ -2π∆2exp(z−zo -2∆2) +T(z,zo) = 1√ +2π∆2 exp +(z−zo +2∆2 +) . (B15) This can be achieved by solving the optimization problem -minimizeX=∫ -dz[ -T(z,zo)−Θq(z,zo)]2 +minimize X= +∫ +dz +[ +T(z,zo) −Θq(z,zo) +]2 , (B16) where we introduce the averaging kernel for mode qthus -Θq(z,zo) =∑ -kαk,zoKk,q(z). (B17) +Θq(z,zo) = +∑ +k +αk,zoKk,q(z). (B17) As an aside, we note that averaging kernels can similarly be constructed for RLS (see section 3.1) using eqns 13 and B14. Mani et al. -Figure 8. Left: KernelKk,q(z) (eq B14) shown vs depth zfor the three radial order couplings f-f, p 1-p1, and p 2-p2.qR⊙= -[−112,−45] and kR⊙= [−853,−157] is chosen for all the radial order couplings for comparison. Right : Averaging kernel -(eq B17) using SOLA, for qR⊙= [−112,−45] at depth z0=−0.48 Mm, and the corresponding target Gaussian (eq B15). -Integral of the averaging kernel over zis 0.89. -Setting∂X -∂α→0 gives us the matrix problem to be solved -A{α}=v, -{α}=[ -A+µI]−1 +Figure 8. Left: Kernel Kk,q(z) (eq B14) shown vs depth z for the three radial order couplings f-f, p 1-p1, and p 2-p2. qR⊙ = +[−112,−45] and kR⊙ = [ −853,−157] is chosen for all the radial order couplings for comparison. Right: Averaging kernel +(eq B17) using SOLA, for qR⊙ = [ −112,−45] at depth z0 = −0.48 Mm, and the corresponding target Gaussian (eq B15). +Integral of the averaging kernel over z is 0.89. +Setting ∂X +∂α →0 gives us the matrix problem to be solved +A{α}= v, +{α}= +[ +A+ µI +]−1 v, (B18) -where the square matrix A=∫ -dzKk,q(z)Kk′,q(z) andv=∫ -dzKk,q(z)T(z,zo). Here,k′is just a dummy index for -denoting elements in the matrix A, (k′̸=k+q). In the last line of eq B18, we introduce regularization using an Identity -matrixI, with the regularization parameter µ- purpose being the same as that described in section 3.1. Obtaining -αthus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α -obtained from eq B18 into last line of eq B13, and∑ -kon both sides +where the square matrix A= +∫ +dzKk,q(z)Kk′,q(z) and v = +∫ +dzKk,q(z)T(z,zo). Here, k′is just a dummy index for +denoting elements in the matrixA, (k′̸= k+q). In the last line of eq B18, we introduce regularization using an Identity +matrix I, with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining +α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α +obtained from eq B18 into last line of eq B13, and ∑ +k +on both sides +∑ +k +αk,zo Bσ +k,q = ∑ -kαk,zoBσ -k,q=∑ -kαk,zo∫ +k +αk,zo +∫ dzKk,q(z)Pσ -q(z), -=∫ +q (z), += +∫ dzΘq(z,zo)Pσ -q(z), +q (z), ≈⟨Pσ -q(zo)⟩ (B19) +q (zo)⟩ (B19) Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Divergence flow can then be obtained from eq 16. Results are shown in Figures 9 and 10. REFERENCES @@ -647,7 +747,8 @@ Birch, A. C., Schunker, H., Braun, D. C., et al. 2016, Science Advances, 2, e1600557, doi: 10.1126/sciadv.1600557 Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019, -A&A, 628, A37, doi: 10.1051/0004-6361/201935591B¨ oning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., & +A&A, 628, A37, doi: 10.1051/0004-6361/201935591 +B¨ oning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., & Schou, J. 2020, A&A, 635, A181, doi: 10.1051/0004-6361/201937331 Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189, @@ -657,10 +758,10 @@ Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073 —. 2021, Living Reviews in Solar Physics, 18, 2, doi: 10.1007/s41116-020-00028-3 Imaging near-surface flows using mode-coupling analysis 15 -Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of qxR⊙andqyR⊙.Right : Corresponding power-spectrum -averaged over the azimuthal angle. Shaded region shows ±1−σerror around the mean. Power is in units of m2/s4. -Figure 10. Real-space divergence flows (in units of 10−5s−1) for mode-coupling inversion through SOLA using f-f coupling, -and LCT, bandpass filtered around qR⊙= 100. We cut edges out from the flow maps and compare a circular region of diameter +Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of qxR⊙ and qyR⊙. Right: Corresponding power-spectrum +averaged over the azimuthal angle. Shaded region shows ±1 −σ error around the mean. Power is in units of m 2/s4. +Figure 10. Real-space divergence flows (in units of 10 −5s−1) for mode-coupling inversion through SOLA using f-f coupling, +and LCT, bandpass filtered around qR⊙ = 100. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is 1.05. For demonstration, we show inversions only for poloidal flow using SOLA. De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh, @@ -683,7 +784,8 @@ Giles, P. M., Duvall, T. L., Scherrer, P. H., & Bogart, R. S. Gizon, L., & Birch, A. C. 2004, ApJ, 614, 472, doi: 10.1086/423367 Gizon, L., Cameron, R. H., Pourabdian, M., et al. 2020, -Science, 368, 1469, doi: 10.1126/science.aaz7119Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A, +Science, 368, 1469, doi: 10.1126/science.aaz7119 +Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A, 652, L6, doi: 10.1051/0004-6361/202141462 Greer, B. J., Hindman, B. W., & Toomre, J. 2016, ApJ, 824, 128, doi: 10.3847/0004-637X/824/2/128 @@ -738,7 +840,8 @@ doi: 10.1086/166758 Pijpers, F. P., & Thompson, M. J. 1994, A&A, 281, 231 Rieutord, M., Roudier, T., Ludwig, H. G., Nordlund, ˚A., & Stein, R. 2001, A&A, 377, L14, -doi: 10.1051/0004-6361:20011160Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar +doi: 10.1051/0004-6361:20011160 +Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar Physics, 15, 6, doi: 10.1007/s41116-018-0013-5 Rincon, F., Roudier, T., Schekochihin, A. A., & Rieutord, M. 2017, A&A, 599, A69, diff --git a/read/results/pypdf/2201.00200.txt b/read/results/pypdf/2201.00200.txt index 0736717..02833d2 100644 --- a/read/results/pypdf/2201.00200.txt +++ b/read/results/pypdf/2201.00200.txt @@ -1,12 +1,12 @@ -Astronomy & Astrophysics manuscript no. solar˙model˙v10˙corrected ©ESO 2022 +Astronomy & Astrophysicsmanuscript no. solar˙model˙v10˙corrected © ESO 2022 January 4, 2022 Local heating due to convective overshooting and the solar modelling problem -I. Bara ffe1,2, T. Constantino1, J. Clarke1, A. Le Saux1,2, T. Go ffrey4, T. Guillet1, J. Pratt3, D. G. Vlaykov1 -1University of Exeter, Physics and Astronomy, EX4 4QL Exeter, UK (e-mail: i.baraffe@ex.ac.uk ) -2´Ecole Normale Sup ´erieure, Lyon, CRAL (UMR CNRS 5574), Universit ´e de Lyon, France -3Department of Physics and Astronomy, Georgia State University, Atlanta GA 30303, USA -4Centre for Fusion, Space and Astrophysics, Department of Physics, University of Warwick, Coventry, CV4 7AL, UK +I. Baraffe1,2, T. Constantino1, J. Clarke1, A. Le Saux1,2, T. Goffrey4, T. Guillet1, J. Pratt3, D. G. Vlaykov1 +1 University of Exeter, Physics and Astronomy, EX4 4QL Exeter, UK (e-mail:i.baraffe@ex.ac.uk) +2 ´Ecole Normale Sup´erieure, Lyon, CRAL (UMR CNRS 5574), Universit´e de Lyon, France +3 Department of Physics and Astronomy, Georgia State University, Atlanta GA 30303, USA +4 Centre for Fusion, Space and Astrophysics, Department of Physics, University of Warwick, Coventry, CV4 7AL, UK ABSTRACT Recent hydrodynamical simulations of convection in a solar-like model suggest that penetrative convective flows at the boundary of the convective envelope modify the thermal background in the overshooting layer. Based on these results, we implement in onedimensional @@ -22,7 +22,7 @@ Key words. Convection – Hydrodynamics – Stars: evolution – Sun: evolution 1. Introduction Modelling the internal structure of the Sun is still a challenge. A recent review by Christensen-Dalsgaard (2021) describes in -detail the long-standing e fforts to improve solar models. The solar +detail the long-standing efforts to improve solar models. The solar modelling problem refers to the discrepancy between helioseismology and solar interior models that adopt low metallicities predicted by the three-dimensional (3D) atmosphere models @@ -49,7 +49,8 @@ a nearly adiabatic form to a radiative form is usually assumed, as suggested by the theoretical work of Zahn (1991). Models with a smoother transition have also been investigated. Based on the analysis of models with di fferent stratifications near the -Send o ffprint requests to : I. Bara ffebase of the convective zone, Christensen-Dalsgaard et al. (2011) +Send offprint requests to: I. Baraffe +base of the convective zone, Christensen-Dalsgaard et al. (2011) found that models that better fit the helioseismic data have a weakly sub-adiabatic temperature gradient in the lower part of the convective zone and a smooth transition to the radiative gradient @@ -71,8 +72,8 @@ Zhang et al. (2019) find that this model cannot solve the whole solar problem because such a flux worsens the sound-speed profile in the deep radiative interior of their solar model. Given the uncertainties regarding the temperature stratification of the overshooting - region, solar modellers have considered these e ffects as -secondary and have focused their e fforts on exploring the impact + region, solar modellers have considered these effects as +secondary and have focused their efforts on exploring the impact of solar abundances, microphysics (opacities, equations of state, nuclear reaction rates), and chemical mixing and di ffusion (see details and references in the review of Buldgen et al. 2019a). @@ -80,8 +81,9 @@ Additional, more exotic e ffects such as early disk accretion or solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot 2021) are also attracting increasing attention. To reinvigorate the debate, Buldgen et al. (2019b) recently -highlighted once again how the transition of the temperature gra1arXiv:2201.00200v1 - [astro-ph.SR] 1 Jan 2022 +highlighted once again how the transition of the temperature gra1 + +arXiv:2201.00200v1 [astro-ph.SR] 1 Jan 2022 Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem dient just below the convective envelope can significantly impact the disagreement between solar models and helioseismic constraints. @@ -94,7 +96,7 @@ two extremes. Christensen-Dalsgaard et al. (2018) also note that an increase in the temperature at the transition would remove a remaining small sharp dip in the speed of sound immediately beneath the convective zone of the model. A major di fficulty is -to disentangle the e ffects of overshoot from the e ffects of opacities, +to disentangle the effects of overshoot from the effects of opacities, which can also alter the temperature gradient in these layers. Given the large number of parameters to deal with in order to improve solar models and the current lack of strong arguments in @@ -143,7 +145,8 @@ et al. 2002; Brun et al. 2011; Hotta 2017; K ¨apyl¨a 2019; Cai et al. 2019; Higl et al. 2021) have also reported a modification of the local thermal background in the overshooting region, but without providing a detailed description. The simulations of B21 -provide a physical explanation that links the convective penetra-tion process to the local heating and to the radiative bump in the +provide a physical explanation that links the convective penetration + process to the local heating and to the radiative bump in the overshooting layer. The solar-like star simulated in B21 is based on a model that is not thermally relaxed. It is reasonable to assume that the local heating seen in B21 is present in stars because @@ -153,18 +156,18 @@ These two features are also commonly observed in other hydrodynamical simulations, as mentioned above. An exploration of the impact of this heating on stellar evolution models may reveal that heating is a necessary aspect of models for overshooting. -Fig. 1. Radial profile of the temperature departure ∆T/T0from -the initial profile T0and of the sub-adiabaticity ( ∇−∇ ad) close to +Fig. 1.Radial profile of the temperature departure ∆T/T0 from +the initial profile T0 and of the sub-adiabaticity (∇−∇ad) close to the convective boundary predicted by 2D hydrodynamical simulations (B21) of solar-like models. The lower panel corresponds to the model with a realistic stellar luminosity and the upper panel to a model with luminosity enhanced by a factor of ten. -The dash-dotted red lines show ∆T/T0(in %), the relative difference +The dash-dotted red lines show ∆T/T0 (in %), the relative difference between the time and space averages of the temperature, T, and the initial temperature, T0. The solid blue lines show the time and space averages of the sub-adiabaticity ( ∇−∇ ad). The dashed black lines show the initial profile of the sub-adiabaticity, -(∇−∇ ad)init. The convective boundary is indicated by the vertical +(∇−∇ad)init. The convective boundary is indicated by the vertical solid line (see details in B21) The behaviour of the thermal profile below the convective boundary found in the simulations of B21 is illustrated in Fig. @@ -173,20 +176,20 @@ boundary found in the simulations of B21 is illustrated in Fig. enhancement in the luminosity by a factor of ten because the features are intensified in these ‘boosted’ models (upper panel). The figure shows the local heating in the overshooting layer and -its impact on the sub-adiabaticity ( ∇−∇ ad), with∇=d log T -d log Pthe +its impact on the sub-adiabaticity (∇−∇ad), with ∇= d logT +d logP the Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem -temperature gradient and ∇ad=d log T -d log P|Sthe adiabatic gradient. +temperature gradient and ∇ad = d logT +d logP |S the adiabatic gradient. The initial stratification below the convective boundary (located -atr=0.6734×Rstarfor this specific stellar model) is set by -the stable radiative gradient, ∇rad(see the dashed black line below +at r = 0.6734 ×Rstar for this specific stellar model) is set by +the stable radiative gradient, ∇rad (see the dashed black line below the convective boundary in Fig. 1). B21 show that, as a result of the local heating below the convective boundary characterised - by the bump in temperature di fference ∆T/T0displayed + by the bump in temperature di fference ∆T/T0 displayed in Fig. 1, the temperature gradient becomes less sub-adiabatic -immediately below the convective boundary1. The net result is +immediately below the convective boundary 1. The net result is a smoother transition just below the convective boundary with a temperature gradient that has an intermediate value between the radiative temperature gradient and the adiabatic one. In the @@ -203,25 +206,26 @@ qualitative impact of the local heating produced by overshooting. et al. (2020), who constructed a static structure of the Sun in agreement with seismic inversions of the Ledoux discriminant defined by -A=1 -Γ1dlnP -dlnr−dlnρ -dlnr, (1) -with Γ1=(∂lnP/∂lnρ)ad. Starting from a reference evolutionary +A = 1 +Γ1 +d ln P +d ln r −d ln ρ +d ln r , (1) +with Γ1 = (∂ln P/∂ln ρ)ad. Starting from a reference evolutionary model, Buldgen et al. (2020) used an inversion procedure to iteratively reconstruct a solar model. Successive inversions of the Ledoux discriminant allowed them to obtain a model-independent profile for this quantity. Their reconstruction method also gives solar structures that are in excellent agreement - with other structural inversions, namely the entropy, S, the + with other structural inversions, namely the entropy,S , the square of the speed of sound, c2 -s, and the density, ρ. To illustrate +s , and the density, ρ. To illustrate the convergence of their reconstruction procedure, they show (right panels of their Figs. 3-6) the successive iterations that converge to an excellent level of agreement for the four structural -inversions ( A,S,c2 -s,ρ) starting from the initial reference model -adopted in their work. The di fferences found between the reconstructed +inversions (A, S , c2 +s , ρ) starting from the initial reference model +adopted in their work. The differences found between the reconstructed model and the reference model are useful as they indicate the modifications of the reference model that are required to converge towards a solar model in agreement with helioseismic @@ -231,25 +235,26 @@ analysis in Sect. 3.2. The first concerns the Ledoux discriminant. The major discrepancy between the Sun and the reference model occurs just below the convective boundary, with a large positive bump for -the quantity ( ASun-Aref). +the quantity (ASun - Aref). The second concerns the speed of sound. The same positive bump at the same location as for the Ledoux discriminant, A, is observed for the quantity ( c2 -s,Sun−c2 +s,Sun −c2 s,ref)/c2 s,ref. The corrections -applied to Aduring the reconstruction procedure also reduce the +applied to A during the reconstruction procedure also reduce the discrepancy in the speed of sound in the radiative region. The third concerns the entropy. Large discrepancies are observed in both the radiative region and the convective zone. The -1Less sub-adiabatic means that |∇−∇ ad|decreases compared to the -initial profile.entropy discrepancy ( SSun−Sref)/Srefhas two positive peaks in +1 Less sub-adiabatic means that |∇−∇ ad|decreases compared to the +initial profile. +entropy discrepancy (S Sun −S ref)/S ref has two positive peaks in the radiative zone, one just below the overshooting region and a larger peak deeper at ∼40% of the stellar radius. This discrepancy is negative in the convective zone. The corrections applied -toAhelp reduce these entropy discrepancies in both regions. -The fourth concerns the density. The quantity ( ρSun− -ρref)/ρrefhas a negative peak in the radiative region, at ∼35% +to A help reduce these entropy discrepancies in both regions. +The fourth concerns the density. The quantity ( ρSun − +ρref)/ρref has a negative peak in the radiative region, at ∼35% of the stellar radius, and is positive in the convective zone. Importantly, Buldgen et al. (2020) mention that their reconstruction procedure gives similar Ledoux discriminant profiles @@ -271,7 +276,7 @@ modified to reproduce the temperature gradient in the overshooting the chemical abundances are not modified by nuclear reactions, mixing, or microscopic di ffusion during the relaxation process. For these tests, we used the 1D Lyon stellar evolution code -(Bara ffe et al. 1998). We repeated this experiment based on thermal +(Baraffe et al. 1998). We repeated this experiment based on thermal relaxation with the stellar evolution code MONSTAR (e.g. Constantino et al. 2014) and obtained the same qualitative results. @@ -280,35 +285,35 @@ modification of the temperature gradient in the overshooting layer from the zero age main sequence (ZAMS). The models are then evolved until they reach the solar radius and luminosity. With this approach, changes in the chemical abundances from -nuclear reactions, microscopic di ffusion, and overshooting mixing +nuclear reactions, microscopic diffusion, and overshooting mixing are also consistent with any modification of the structure induced by the forced local heating in the overshooting layer. These tests were performed with MONSTAR as it includes the -treatment of microscopic di ffusion. +treatment of microscopic diffusion. The first method allows the impact of local heating in the overshooting layer after thermal relaxation to be isolated. The second method provides evolutionary models that are selfconsistent - since the e ffect of the modification of the temperature + since the effect of the modification of the temperature gradient is accounted for during their evolution on the main sequence. In the following, we adopt a modification of the local temperature gradient in the overshooting layer that qualitatively reproduces the behaviour displayed in Fig. 1. We define an overshooting - length dov=αovHP,CB, with HP,CBthe pressure scale height -at the convective boundary and αova free parameter. We also define - two radial locations, rov=rCB−dovandrmid=rCB−dov/2, -with rCBthe radial location of the convective boundary. The temperature - gradient is modified as follows. For rmid≤rRbis reached +increases with radius (R) as a broken power law, whose exponent +decreases as soon as the “bending radius” Rb is exceeded, it and +becomes zero when the “saturation radius” Rs > Rb is reached (i.e., the period becomes independent of radius). The exact values - of RbandRs, as well as of the exponents, depend on the + of Rb and Rs, as well as of the exponents, depend on the current mass ( M). We assume that the FM is dominant if the stellar radius is larger than the critical value Rdom,0, which we computed from the current stellar mass using Eq. 4 of Trabucchi et al. (2021b). -1Hereinafter, whenever we discuss periods, it should be understood -that we refer to FM periods on which this work is focused.2.2. Data +1 Hereinafter, whenever we discuss periods, it should be understood +that we refer to FM periods on which this work is focused. +2.2. Data As a first set of data, we considered the cluster-LPV pairs used by Grady et al. (2019, see their tables 1 and 2). These consist of 19 clusters in the Large Magellanic Cloud, hosting a total of 20 @@ -148,7 +151,7 @@ potential LPV members, and eight Galactic clusters each hosting a potential LPV member. We expanded this list with data for LPVs in a few populous clusters, namely the Galactic clusters NGC 362, NGC 2808, 47 -Tuc (NGC 104), and ωCen (NGC 5139); the LMC clusters NGC +Tuc (NGC 104), andωCen (NGC 5139); the LMC clusters NGC 1978 and NGC 1846; and the cluster NGC 419 in the Small Magellanic Cloud (SMC). The source lists were taken from Lebzelter & Wood (2005, 2007, 2011, 2016) and Kamath et al. (2010), @@ -166,8 +169,8 @@ Cutri et al. 2013), the catalog of variable stars from the AllSky Automated Survey for SuperNovae (ASAS-SN Jayasinghe et al. 2020), the catalogs of LPVs in the Magellanic Clouds from the third phase of the Optical Gravitational Lensing Experiment -(OGLE-III, Soszy ´nski et al. 2009, 2011), the early third data release - from the Gaia mission ( Gaia EDR3, Gaia Collaboration +(OGLE-III, Soszy´nski et al. 2009, 2011), the early third data release + from the Gaia mission (Gaia EDR3, Gaia Collaboration et al. 2021), and the catalog of LPV candidates from Gaia DR2 (Mowlavi et al. 2018). Following Grady et al. (2019), we took ages from @@ -175,18 +178,18 @@ Kharchenko et al. (2016) and Baumgardt et al. (2013) for clusters in the Galaxy and LMC, respectively, thereby ensuring that ages would be homogeneously derived for clusters in both galaxies. Age uncertainties from Baumgardt et al. (2013), provided for -each cluster, are generally around σlog(τ)≃0.05. Kharchenko +each cluster, are generally around σlog(τ) ≃ 0.05. Kharchenko et al. (2016) do not provide age uncertainties, but a reasonable -upper limit for their method should be σlog(τ)=0.2 based on +upper limit for their method should be σlog(τ) = 0.2 based on the analysis of Kharchenko et al. (2005) (the same value was adopted by Grady et al. 2019, in their Fig. 7). As discussed by Kamath et al. (2010), the age of the SMC cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is -consistent with the value τ=1.45±0.05 Gyr from Goudfrooij -et al. (2014), while it is as young as τ≃0.89±0.015 Gyr according +consistent with the value τ = 1.45 ±0.05 Gyr from Goudfrooij +et al. (2014), while it is as young as τ ≃0.89 ±0.015 Gyr according to Perren et al. (2017). Since an accurate estimate is not necessary for our exploratory analysis, we took a rough average -and assumed log( τ/yr)=9.1±0.1. NGC 419 and NGC 1846 +and assumed log( τ/yr) = 9.1 ±0.1. NGC 419 and NGC 1846 likely exhibit TP-AGB boosting (Girardi et al. 2013). We note that some clusters show multiple stellar populations, whose age spread has been estimated in some cases (e.g., Mackey & Broby @@ -194,12 +197,12 @@ Nielsen 2007; Joo & Lee 2013; Villanova et al. 2014) and is consistent with the age uncertainties we adopted. Distances of Galactic clusters were also taken from Kharchenko et al. (2016), while for the Magellanic Clouds and -their clusters we adopted the distance moduli µLMC=18.49± -0.09 mag and µSMC=18.96±0.02 mag from de Grijs et al. +their clusters we adopted the distance moduli µLMC = 18.49 ± +0.09 mag and µSMC = 18.96 ±0.02 mag from de Grijs et al. (2017). We searched for data on interstellar extinction from several literature works (e.g., Nayak et al. 2016; Kharchenko et al. 2016; Perren et al. 2017), all of which suggest that extinction -in the Ksfilter is smaller than ∼0.1 mag for most of the clusters +in the Ks filter is smaller than ∼0.1 mag for most of the clusters we considered, and at most as large as ∼0.3 mag, which is negligible for our purposes. Article number, page 2 of 9 @@ -208,7 +211,7 @@ A detailed membership verification is beyond the scope of this work, and we relied on the checks performed by authors whose source lists we adopted. It should be kept in mind that some sources may not be real cluster members. -For sources without a spectral type, we used the Gaia 2MASS +For sources without a spectral type, we used the Gaia2MASS diagram (Lebzelter et al. 2018, 2019) to determine whether they are O- or C-rich. We used the near-infrared periodluminosity diagram to identify the most likely pulsation mode @@ -230,7 +233,7 @@ likely negligible compared with those associated with age. Panel (a) of Fig. 1 shows a comparison between model predictions and observations in the PFM–log(τ/yr) plane. The former are displayed by a density map showing the expected number -NFMof LPVs pulsating in the FM in each period-age bin, normalized +NFM of LPVs pulsating in the FM in each period-age bin, normalized to maximum. Model predictions are in good agreement with data derived from observations (i.e., individual LPVs in clusters, represented by symbols), and they show that the period @@ -238,17 +241,17 @@ clusters, represented by symbols), and they show that the period Crosses mark the average properties of the three groups of Crich LPVs from Feast et al. (2006, their table 4), which fit the general pattern with the exception of their group 3, estimated to -be older than what our models predict at P≃650. +be older than what our models predict at P ≃650. We also show a linear best-fit to the models distribution (weighted by NFM), which shows a fairly good agreement with the best-fit to observations by Grady et al. (2019, also shown). However, the best-fit line does not fully capture the properties of the predictions, nor of the observed trend. Indeed, models are indicative of a substantial dispersion around the relation. For instance, - at 1 Gyr, the FM period ranges from ∼200 days to∼550 + at 1 Gyr, the FM period ranges from∼200 days to ∼550 days. Conversely, LPVs pulsating in the FM with a period of 350 -days are predicted to be at least ∼200 Myr old, but they can be as -old as∼3 Gyr. Observed data are consistent with the predicted +days are predicted to be at least∼200 Myr old, but they can be as +old as ∼3 Gyr. Observed data are consistent with the predicted spread, although the agreement cannot be considered as the observed sample adopted is not complete. Nonetheless, it is relevant that some clusters host multiple @@ -261,20 +264,21 @@ with the age uncertainties we adopted. This means that longerperiod opposite is true at shorter periods. This tends to strengthen the agreement between models and observations. Our data set samples the intermediate-age range (NGC 419 -and NGC 1846) relatively well as well as old ages ( ωCen, 47 +and NGC 1846) relatively well as well as old ages ( ω Cen, 47 Tuc, NGC 362, and NGC 2808). This provides us with the opportunity to study the period distribution at these ages, and for -a more detailed comparison between models and observations.On the basis of the average age of these two groups of clusters +a more detailed comparison between models and observations. +On the basis of the average age of these two groups of clusters and the associated uncertainty, and taking the discrete age sampling of the isochrones into account, we considered the age -ranges log(τ/yr)=9.15±0.10 and log(τ/yr)=10.10±0.20. Period +ranges log(τ/yr) = 9.15 ±0.10 and log(τ/yr) = 10.10 ±0.20. Period distributions at those ages are displayed in panels (b) and (c) of Fig. 1, respectively, showing good agreement between model predictions and observations. We note that in both cases, the distribution is skewed toward short periods, which seems to be true at all ages for O-rich stars. This can be seen in panel (a) of Fig. 2, which is a version of the PA plane limited to an O-rich composition2. - Indeed, although at τ≲5 Gyr the observed sample is + Indeed, although at τ ≲ 5 Gyr the observed sample is very scarce, it appears to be consistent with models predicting a more densely populated region in the shorter-period half of the PA distribution. @@ -312,7 +316,7 @@ both patterns emerging because of the prominent role of mass in shaping stellar structure and evolution. Indeed, stellar mass determines the lifetimes of the main evolutionary stages, and thus the age of stars in the AGB phase. Pulsation models (Trabucchi - et al. 2021b) show that the radius Rdom,0(and corresponding + et al. 2021b) show that the radius Rdom,0 (and corresponding luminosity) at the onset of dominant FM pulsation (DFMP) increases with mass, so that the most massive FM-dominated LPVs are brighter. They also have longer periods, as this increases @@ -323,18 +327,18 @@ We note that this would not be the case if the FM were dominant along the entire AGB, as the large change in radius during this phase would result in a wide range of periods at a given age. It is the very fact that DFMP occurs only during the final portion -2A further version of the PA plane highlighting both chemical types +2 A further version of the PA plane highlighting both chemical types can be found in Fig. A.2 of appendix A.1. Article number, page 3 of 9 -A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs -Fig. 1. Period-age diagram. Panel (a) shows the predicted period-age distribution (darker tones indicate a higher expected number of LPVs on +A&A proofs:manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs +Fig. 1.Period-age diagram. Panel (a) shows the predicted period-age distribution (darker tones indicate a higher expected number of LPVs on a linear scale, normalized to maximum). Symbols represent observed LPVs (green: SRVs; purple: Miras; white: unclassified) with the shape indicating their host cluster or literature source as indicated in the legend. The age uncertainties are marked by the error bars. The groups of galactic C-stars of Feast et al. (2006) are marked by crosses annotated with the group number. The solid and dotted line represent a linear best-fit to models and the best-fit by Grady et al. (2019), respectively. Period distributions at selected ages are compared in panels (b) and (c) and marked -in panel (a) by the blue and red shaded areas (at log( τ/yr)∼9.15 and∼10.10, respectively). For clarity, the e ffect of the TP-AGB boosting is +in panel (a) by the blue and red shaded areas (at log( τ/yr) ∼9.15 and ∼10.10, respectively). For clarity, the e ffect of the TP-AGB boosting is suppressed in panel (a). -Fig. 2. Similar to Fig. 1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while +Fig. 2.Similar to Fig. 1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while dashed lines are best fits to the edges of the model distribution (see the text for more details). of the AGB that limits the range of periods a FM-pulsating LPV can have at a given age. Yet, the DFMP part of the AGB is long @@ -346,19 +350,20 @@ At a given initial metallicity Zi, the shape of the period distribution envelope expansion accelerates, while the period becomes progressively less sensitive to changes in radius (see Appendix C). In particular, the slope of the period-radius relation decreases -sharply at Pb=P(Rb). The FM period distribution is roughly +sharply at Pb = P(Rb). The FM period distribution is roughly symmetric around that value, but at its short-period side, the FM is not dominant. Therefore, when only FM-dominated LPVs are considered, as is done here, the observed period distribution appears - skewed toward short periods.This feature is strengthened when a set of isochrones is considered + skewed toward short periods. +This feature is strengthened when a set of isochrones is considered which spans a range of initial metallicities because the adopted criterion for the onset of DFMP does not depend on metallicity, but the FM period does as metal-poor LPVs are warmer and have smaller radii compared with metal-rich ones. As a consequence, the bulk of the period distribution of metalpoor LPVs is at periods shorter than Pb, so they only contribute -to the global distribution (i.e., at all Ziat a given age) over a -small period range at P≳Pb. In contrast, metal-rich LPVs have +to the global distribution (i.e., at all Zi at a given age) over a +small period range at P ≳ Pb. In contrast, metal-rich LPVs have periods well beyond Pb, so they contribute both at that value and at longer periods. The result is an excess of FM-dominated LPVs near Pb, that is to say on the short side of the overall period distribution. @@ -377,20 +382,20 @@ environment-dependent, and it is not necessarily universal. A further point of uncertainty stems from the fact that the prescription we adopted assumes that the FM period only depends upon the mass and radius, and that it is a ffected by a -change in composition only through the e ffect that such a variation +change in composition only through the effect that such a variation has on the radius. While this is true to a good approximation, linear models show a small dependence of periods on metallicity at a fixed mass and radius, but the quantitative impact in the nonlinear case is unknown. We can only estimate, based on the -results of Trabucchi et al. (2019), an uncertainty of ±10% at most +results of Trabucchi et al. (2019), an uncertainty of±10% at most with respect to the prescriptions adopted here. Qualitatively, a realistic age-metallicity relation and the metallicity dependence of the period and of the onset of DFMP are all expected to result in a steeper PA relation than the one we predict, but it is di fficult to assess the relative importance of -these e ffects. In this sense, the composition probably a ffects the +these effects. In this sense, the composition probably a ffects the shape of the PA relation more than its dispersion. The latter is -likely a ffected by the composition indirectly through mass loss, +likely affected by the composition indirectly through mass loss, the analysis of which is beyond the scope of this study. However, we point out that mass loss represents a source of scatter in combination with the occurrence of thermal pulses, because it reduces @@ -410,7 +415,7 @@ as is customarily done for classical Cepheids with a color term (e.g., Bono et al. 2005), but with unsatisfactory results. A correction dependent on the photometric amplitude of variability represents a promising alternative, but it cannot be pursued at the -moment. Indeed, for computational e fficiency, current pulsation +moment. Indeed, for computational efficiency, current pulsation models include only a crude treatment of the atmospheric layers as they do not a ffect pulsation periods. On the other hand, the atmosphere is crucial in determining the spectral energy distribution @@ -420,7 +425,7 @@ sample adopted here is too heterogeneous for a self-consistent investigation of amplitude, but this kind of study could be made possible by the upcoming data release 3 of the Gaia mission (Gaia Collaboration et al. 2021) and the future Legacy Survey -of Space and Time (LSST, Ivezi ´c et al. 2019) of the Vera Rubin +of Space and Time (LSST, Ivezi´c et al. 2019) of the Vera Rubin Observatory. It is worth noting that our analysis applies to Miras as well as SRVs, provided that they predominantly pulsate in the FM. @@ -430,7 +435,8 @@ been done in literature so far, undoubtedly has some advantages: detect than SRVs, and their light curves are easier to process as they tend to be more regular. Moreover, Miras represent the end-point of AGB evolution, so in principle they correspond to a -smaller range of stellar parameters compared to the full extent ofthe DFMP regime, and they display a smaller range of periods +smaller range of stellar parameters compared to the full extent of +the DFMP regime, and they display a smaller range of periods at a given age (cf. Feast & Whitelock 2000b). In other words, they should exhibit a relatively narrow PA relation (even though, based on the observational data set we adopted, there is no conclusive @@ -492,7 +498,7 @@ scatter. We suggest that corrective terms, involving the amplitude this possibility. A study of the impact of metallicity on nonlinear pulsation is highly desirable to pursue this line of investigation, Article number, page 5 of 9 -A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs +A&A proofs:manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs as would be a theoretical investigation of the dependence of photometric amplitudes upon global stellar parameters. Acknowledgements. M.T. and N.M. acknowledge the support provided by the @@ -502,29 +508,29 @@ this paper, and to Léo Girardi for helping with the computation and interpretat of isochrones. This research has made use of: data from the OGLE-III Catalog of Variable Stars; data products from the Two Micron All Sky Survey, which is a joint project of the University of Massachusetts and the Infrared - Processing and Analysis Center /California Institute of Technology, funded + Processing and Analysis Center/California Institute of Technology, funded by the National Aeronautics and Space Administration and the National Science Foundation; data from the European Space Agency (ESA) mission Gaia -(https://www.cosmos.esa.int/gaia ), processed by the Gaia Data Processing +(https://www.cosmos.esa.int/gaia), processed by the Gaia Data Processing and Analysis Consortium (DPAC, https://www.cosmos.esa.int/web/ -gaia/dpac/consortium ). Funding for the DPAC has been provided by national +gaia/dpac/consortium). Funding for the DPAC has been provided by national institutions, in particular the institutions participating in the Gaia Multilateral - Agreement. This research has made use of the following free /open source -software and /or libraries: the Starlink Tables Infrastructure Library (STILTS and + Agreement. This research has made use of the following free/open source +software and/or libraries: the Starlink Tables Infrastructure Library (STILTS and Topcat, Taylor 2006); IPython (Pérez & Granger 2007) and Jupyter (Kluyver -et al. 2016) notebooks; the P ython libraries N umPy(Harris et al. 2020), S ciPy -(Virtanen et al. 2020), matplotlib (a Python library for publication quality graphics, - Hunter 2007), and A stropy (a community-developed core P ython package +et al. 2016) notebooks; the P ython libraries NumPy (Harris et al. 2020), S ciPy +(Virtanen et al. 2020),matplotlib(a Python library for publication quality graphics, + Hunter 2007), and A stropy (a community-developed core Python package for Astronomy, Astropy Collaboration et al. 2018). This research has made use of NASA’s Astrophysics Data System Bibliographic Services, and of the following services provided by CDS, Strasbourg: the SIMBAD data base, VizieR catalogue -access tool (DOI: 10.26093 /cds/vizier, Ochsenbein et al. 2000), the “Aladin sky +access tool (DOI: 10.26093/cds/vizier, Ochsenbein et al. 2000), the “Aladin sky atlas” (Bonnarel et al. 2000), and the cross-match service (Boch et al. 2012; Pineau et al. 2020). References Anderson, R. I., Saio, H., Ekström, S., Georgy, C., & Meynet, G. 2016, A&A, 591, A8 -Astropy Collaboration, Price-Whelan, A. M., Sip ˝ocz, B. M., et al. 2018, AJ, 156, +Astropy Collaboration, Price-Whelan, A. M., Sip˝ocz, B. M., et al. 2018, AJ, 156, 123 Battinelli, P. & Demers, S. 2012, A&A, 544, A10 Battinelli, P. & Demers, S. 2013, A&A, 553, A93 @@ -565,10 +571,11 @@ Grady, J., Belokurov, V ., & Evans, N. W. 2019, MNRAS, 483, 3022 Grady, J., Belokurov, V ., & Evans, N. W. 2020, MNRAS, 492, 3128 Harris, C. R., Millman, K. J., van der Walt, S. J., et al. 2020, Nature, 585, 357 Hunter, J. D. 2007, Computing in Science & Engineering, 9, 90 -Ivezi ´c, Ž., Kahn, S. M., Tyson, J. A., et al. 2019, ApJ, 873, 111 -Jayasinghe, T., Stanek, K. Z., Kochanek, C. S., et al. 2020, MNRAS, 491, 13Joo, S.-J. & Lee, Y .-W. 2013, ApJ, 762, 36 +Ivezi´c, Ž., Kahn, S. M., Tyson, J. A., et al. 2019, ApJ, 873, 111 +Jayasinghe, T., Stanek, K. Z., Kochanek, C. S., et al. 2020, MNRAS, 491, 13 +Joo, S.-J. & Lee, Y .-W. 2013, ApJ, 762, 36 Jura, M. & Kleinmann, S. G. 1992, ApJS, 79, 105 -Kamath, D., Wood, P. R., Soszy ´nski, I., & Lebzelter, T. 2010, MNRAS, 408, 522 +Kamath, D., Wood, P. R., Soszy´nski, I., & Lebzelter, T. 2010, MNRAS, 408, 522 Kharchenko, N. V ., Piskunov, A. E., Röser, S., Schilbach, E., & Scholz, R. D. 2005, A&A, 438, 1163 Kharchenko, N. V ., Piskunov, A. E., Schilbach, E., Röser, S., & Scholz, R. D. @@ -614,9 +621,9 @@ Software and Systems XXVII, ed. P. Ballester, J. Ibsen, M. Solar, & K. Shortridg 125 Sakamoto, T., Matsunaga, N., Hasegawa, T., & Nakada, Y . 2012, ApJ, 761, L10 Skrutskie, M. F., Cutri, R. M., Stiening, R., et al. 2006, AJ, 131, 1163 -Soszy ´nski, I., Olechowska, A., Ratajczak, M., et al. 2021, ApJ, 911, L22 -Soszy ´nski, I., Udalski, A., Szyma ´nski, M. K., et al. 2009, Acta Astron., 59, 239 -Soszy ´nski, I., Udalski, A., Szyma ´nski, M. K., et al. 2011, Acta Astron., 61, 217 +Soszy´nski, I., Olechowska, A., Ratajczak, M., et al. 2021, ApJ, 911, L22 +Soszy´nski, I., Udalski, A., Szyma´nski, M. K., et al. 2009, Acta Astron., 59, 239 +Soszy´nski, I., Udalski, A., Szyma´nski, M. K., et al. 2011, Acta Astron., 61, 217 Taylor, M. B. 2006, in Astronomical Society of the Pacific Conference Series, V ol. 351, Astronomical Data Analysis Software and Systems XV , ed. C. Gabriel, C. Arviset, D. Ponz, & S. Enrique, 666 @@ -639,14 +646,14 @@ Wyatt, S. P. & Cahn, J. H. 1983, ApJ, 275, 225 Ya’Ari, A. & Tuchman, Y . 1996, ApJ, 456, 350 Article number, page 6 of 9 Trabucchi et al.: The period-age relation of LPVs -Fig. A.1. Absolute- KsGaia -2MASS diagram for the stars with or without +Fig. A.1.Absolute-Ks Gaia-2MASS diagram for the stars with or without a spectral type (left and right panels, respectively) in the selected sample. Symbol colors and shapes indicate the spectral type and host cluster described in the legend, respectively, which also reports the number of sources displayed (i.e., having both optical and NIR photometry). The dashed line marks the separation between O- and C-rich sources according to Lebzelter et al. (2018). An arrow marks the source MSX -LMC 124 in NGC 1830 that, having WBP,RP−WJ,Ks=9.73 mag, lies outside +LMC 124 in NGC 1830 that, havingWBP,RP −WJ,Ks = 9.73 mag, lies outside the plot area. Background dots are LPVs in the LMC from OGLEIII (light gray) and Mowlavi et al. (2018) (darker gray). Appendix A: Classification of observed LPVs @@ -658,24 +665,25 @@ is the star 5-3 in NGC 419, for which we adopted the S-type as reported by Lloyd Evans (1983a). We also searched the SIMBAD astronomical database (Wenger et al. 2000) for spectral type information, which we -found for 26 more stars. We used the Gaia -2MASS diagram of +found for 26 more stars. We used the Gaia-2MASS diagram of Lebzelter et al. (2018) to confirm the chemical type classification taken from literature and to characterize the surface chemistry of sources of an unknown spectral type (see Fig. A.1). Among the latter, we identified 13 C-rich stars and 106 O-rich sources. Three of the sources without a spectral type lack Gaia photometry, - so they cannot be classified with the Gaia -2MASS. Two + so they cannot be classified with theGaia-2MASS. Two of them (LW5 and LW22 in 47 Tuc) have no match in Gaia EDR3, but they have NIR data and are probably O-rich based on -their position in the J−Ksversus Kscolor-magnitude diagram. +their position in the J −Ks versus Ks color-magnitude diagram. The third source is one of the two stars in NGC 1903 from the list of Grady et al. (2019), which we identified with the 2MASS source J05171633-6920298. It is likely C-rich according to the NIR color-magnitude diagram. -Finally, the sources V138 in ωCen, LW15 in NGC 2808, +Finally, the sources V138 in ω Cen, LW15 in NGC 2808, and LW4 in NGC 362 lack NIR data. They cannot be placed in the NIR PL diagram, upon which we relied to assign pulsation -modes to periods, so we excluded them from the sample. Thedistribution of O- and C-rich sources in the period-age diagram +modes to periods, so we excluded them from the sample. The +distribution of O- and C-rich sources in the period-age diagram is shown in Fig. A.2. Appendix A.2: Variability For variability information, we complemented the data from @@ -709,12 +717,12 @@ When available, the variability type was taken from OGLEIII or ASAS-SN. We note that we are only interested in whether a star is classified as a Mira or semi-regular variable. In many cases, this type is not given or the star is simply considered, for -instance, as an LPV or AGB in SIMBAD , in which case we considered +instance, as an LPV or AGB in SIMBAD, in which case we considered the variability type as undetermined. Appendix B: Fitting relations We obtained analytic expressions for the PA relations separately for O- and C-rich stars, proceeding as follows. For each bin of -log(τ/yr),we modeled the period distribution with a Gaussian +log(τ/yr), we modeled the period distribution with a Gaussian kernel density estimator (KDE) and identified the peak of the distribution. To describe the boundaries of the PA relation, we adopted, at each age, the values of the period at which the distribution @@ -722,35 +730,37 @@ adopted, at each age, the values of the period at which the distribution value upon visual inspection of the PA plane. We modeled the central trend of the PA relation, as well as its short- and longperiod edges, with linear or quadratic functions in the form -log(τ/yr)=a0+a1(P/˜P)+a2(P/˜P)2, (B.1) -(where ˜P=350 days) and employed a Lenvenberg-Marquardt -nonlinear regression algorithm3to derive the best-fit coe fficients, +log(τ/yr) = a0 + a1 (P/˜P) + a2 (P/˜P)2 , (B.1) +(where ˜P = 350 days) and employed a Lenvenberg-Marquardt +nonlinear regression algorithm3 to derive the best-fit coefficients, which are listed in Table B.1. We remark that these best-fit expressions - are only valid in the intervals 8 .0≤log(τ/yr)≤10.3 -and 20

6) of active regions especially the -ones associated with small (or micro-) flares ( Wang et al. (2021 )). The loops of our flaring active +kind of oscillations often occur in hot coronal loops (log (T) > 6) of active regions especially the +ones associated with small (or micro-) flares ( W ang et al. (2021)). The loops of our flaring active region are also hot loops with the mean temperature above thi s range. They also show intensity oscillations. Hence we think the above evidence confirms the slow-mode oscillations for flaring -loops. The temperature of the non-flaring loops are lower (lo g(T)<6) and as discussed above, +loops. The temperature of the non-flaring loops are lower (lo g(T) < 6) and as discussed above, we believe that the observed oscillation-like periods in no n-flaring loops should be more probably related to the high amplitude fluctuations. Comparing the loops of the flaring and non-flaring regions, we observed that the amplitudes -of the fluctuations show a discrepancy. Mean of the parameter (Max(log T)-Min(log T)) in the -FloopA, , FloopB1, FLoopB2, FloopC1, and FloopC2, are 1.21, 1.10, 0.81, 1.48, and 0.88, respectively. - And for non-flaring region, mean of (Max(log T)-Min(log T)), are 0.81, 0.62, and 0.56, for -nonfloopA, B, and C respectively. Therefore the values of the quantity mean of (Max(log T)Min(log - T)) for these non-flaring loops show a difference from the flari ng ones and are lower. +of the fluctuations show a discrepancy . Mean of the parameter (Max(log T)-Min(log T)) in the +FloopA, , FloopB1, FLoopB2, FloopC1, and FloopC2, are 1.21, 1.10, 0.81, 1.48, and 0.88, respectively + . And for non-flaring region, mean of (Max(log T)-Min(log T)), are 0.81, 0.62, and 0.56, for +nonfloopA, B, and C respectively . Therefore the values of the quantity mean of (Max(log T)Min(log + T)) for these non-flaring loops show a difference from the flari ng ones and are lower . Loops of the non-flaring active region 12194 have a relativel y uniform temperature at the beginning of the time interval, which rises slightly at its e nd. As the Solar Monitor reports in the neighborhood of this region, the flaring active region 12192 exists of which between its multiple -flares, there is a c4.6 class flare occurring at 9:44UT. Therefore, it could be a p ossible suggestion +flares, there is a c4.6 class flare occurring at 9:44UT . Therefore, it could be a p ossible suggestion that the abovementioned slight temperature rise in the loop s of AR 12194 (in the time interval 8:00 to 9:00) originated from the influence of an increase in t he energy at the pre-flare conditions exist in the AR 12192. Hence as our study shows, the temperature of coronal loops of flaring AR changes in an -oscillatory manner. Compared with these non-flaring loops, the flaring loops show higher temperatures +oscillatory manner . Compared with these non-flaring loops, the flaring loops show higher temperatures on average and higher oscillation periods with hi gher peaks and deeper valleys. More accurate commentary in this respect requires more extensiv e statistical research and broader observations. -arcsecarcsec -79154229304379454−6825118211304397 +arcsec +arcsec +79 154 229 304 379 454 +−68 +25 +118 +211 +304 +397 a -arcsecarcsec +arcsec +arcsec -114.6 171.2 227.8 284.4 341171.4206.3241.2276.1311 -Loop B1 Loop ALoop C2 -Loop C1b + +114.6 171.2 227.8 284.4 341 +171.4 +206.3 +241.2 +276.1 +311 +Loop B1 Loop A +Loop C2 +Loop C1 +b Loop B2 -Figure 1: (a) AIA image of the AR 11283 on 2011 September 6, 22:10 UT as se en in the 171 filter. (b) Zoom-in view +Figure 1: (a) AIA image of the AR 11283 on 2011 September 6, 22:10 UT as se en in the 171 filter . (b) Zoom-in view of the area marked by a box in the left. The selected loops are d istinguished in red. The loops A and B are -the same loops studied by Jain et al. (2015 ) (see Fig.3a in Jain et al. (2015 )). -arcsecarcsec -−154 0 154 308−572−418−264−11044 +the same loops studied by Jain et al. (2015) (see Fig.3a in Jain et al. (2015)). +arcsec +arcsec +−154 0 154 308 +−572 +−418 +−264 +−110 +44 a -arcsecarcsec -−202 −134 −66 2 70−396−338−280−221−162 -nonf−LoopAnonf−LoopB -nonf−LoopCb +arcsec +arcsec +−202 −134 −66 2 70 +−396 +−338 +−280 +−221 +−162 +nonf−LoopA +nonf−LoopB +nonf−LoopC +b Figure 2: (a) The NOAA AR12194 on 2014 October 26, at 08:00:00UT in 171 r ecorded by AIA/SDO. (b) Zoom-in view of the area, marked by a box in the left, the loops are dist inguished in red. -5.866.26.46.66.8LogTF−LoopA -5.866.26.46.66.8LogT -22:10 22:20 22:30 22:40 22:50 23:005.866.26.46.66.8 -timeLogT - 5.866.26.46.66.8LogTF−LoopB1 -22:10 22:20 22:30 22:40 22:50 23:005.866.26.46.66.8 -timeLogT +5.8 +6 +6.2 +6.4 +6.6 +6.8LogT +F−LoopA +5.8 +6 +6.2 +6.4 +6.6 +6.8LogT +22:10 22:20 22:30 22:40 22:50 23:00 +5.8 +6 +6.2 +6.4 +6.6 +6.8 +time +LogT + +5.8 +6 +6.2 +6.4 +6.6 +6.8LogT +F−LoopB1 +22:10 22:20 22:30 22:40 22:50 23:00 +5.8 +6 +6.2 +6.4 +6.6 +6.8 +time +LogT Figure 3: From up to down: The time-series of the temperature oscillat ions for the first 3 strips of Loop A (strip 1 to 3 from top to down), and the first 2 strips of LoopB1. Horizonta l axis is the time and the vertical axis is the logarithm of the temperature. The red lines mark the initial and final time of the flare x2.1. -22:10 22:20 22:30 22:40 22:50 23:000 11213242 F−loopA -Time Loop Length(Mm) -5.866.26.46.66.8 -22:10 22:20 22:30 22:40 22:50 23:000 5 101520 F−loopB1 -Time Loop Length(Mm) -66.056.16.156.26.256.36.356.46.456.5 -22:10 22:20 22:30 22:40 22:50 23:000 4 8 1216 F−loopB2 -Time Loop Length(Mm) -5.866.26.46.66.8 -22:10 22:20 22:30 22:40 22:50 23:000 6 111722 F−loopC1 -Time Loop Length(Mm) -5.65.866.26.46.66.8 -22:10 22:20 22:30 22:40 22:50 23:000 3 6 8 11 F−loopC2 -Time Loop Length(Mm) -5.866.26.46.66.8 -Figure 4: Temperature map of the flaring loops A, B1, B2, C1, and C2 (from top to down) as a time series. The vertical +22:10 22:20 22:30 22:40 22:50 23:00 +0 +11 +21 +32 +42 +F−loopA +Time + +Loop Length(Mm) +5.8 +6 +6.2 +6.4 +6.6 +6.8 +22:10 22:20 22:30 22:40 22:50 23:00 +0 +5 +10 +15 +20 +F−loopB1 +Time + +Loop Length(Mm) +6 +6.05 +6.1 +6.15 +6.2 +6.25 +6.3 +6.35 +6.4 +6.45 +6.5 +22:10 22:20 22:30 22:40 22:50 23:00 +0 +4 +8 +12 +16 +F−loopB2 +Time + +Loop Length(Mm) +5.8 +6 +6.2 +6.4 +6.6 +6.8 +22:10 22:20 22:30 22:40 22:50 23:00 +0 +6 +11 +17 +22 +F−loopC1 +Time + +Loop Length(Mm) +5.6 +5.8 +6 +6.2 +6.4 +6.6 +6.8 +22:10 22:20 22:30 22:40 22:50 23:00 +0 +3 +6 +8 +11 +F−loopC2 +Time + +Loop Length(Mm) +5.8 +6 +6.2 +6.4 +6.6 +6.8 +Figure 4: T emperature map of the flaring loops A, B1, B2, C1, and C2 (from top to down) as a time series. The vertical axis is the distance along the loop in Mm, and the horizontal a xis is the time. The colorbar in the left shows the colors considered for the temperature range. -Table 1: The properties observed for the loop segments of the flaring A R. +T able 1:The properties observed for the loop segments of the flaring A R. FLoopA -(Strip Number)The highest -Temp.’s period -observedMax(log(T))Min(log(T))FLoopB2 +(Strip Number) +The highest +T emp.’s period +observed +Max(log(T))Min(log(T)) -(Strip Number)The highest -Temp.’s period -observedMax(log(T))Min(log(T)) +FLoopB2 +(Strip Number) +The highest +T emp.’s period +observed +Max(log(T))Min(log(T)) 1 9.94 1.09 1 18.07 0.68 2 16.57 0.79 2 24.85 0.83 @@ -552,11 +702,13 @@ FLoopB1 - - 4 16.57 0.93 9 11.04 1.6 10 18.07 1.6 11 18.07 1.6 -Table 2: The properties observed for the loop segments of the non flari ng AR. +T able 2:The properties observed for the loop segments of the non flari ng AR. Nonf-LoopA -(Strip Number)The highest -Temp.’s period -observedMax(log(T))Min(log(T)) +(Strip Number) +The highest +T emp.’s period +observed +Max(log(T))Min(log(T)) 1 24 0.61 2 30 0.95 @@ -570,9 +722,11 @@ observedMax(log(T))Min(log(T)) 10 30 0.77 11 30 0.61 Nonf-LoopB -(Strip Number)The highest -Temp.’s period -observedMax(log(T))Min(log(T)) +(Strip Number) +The highest +T emp.’s period +observed +Max(log(T))Min(log(T)) 1 26.66 0.36 2 26.66 0.64 @@ -581,126 +735,242 @@ observedMax(log(T))Min(log(T)) 5 30 0.98 6 8.57 0.67 Nonf-LoopC -(Strip Number)The highest -Temp.’s period -observedMax(log(T))Min(log(T)) +(Strip Number) +The highest +T emp.’s period +observed +Max(log(T))Min(log(T)) 1 26.66 0.76 2 26.66 0.75 3 26.66 0.26 4 30 0.27 5 30 0.8 -22:10 22:20 22:30 22:40 22:50 23:000 11223243 Int−Fe−LoopA -Time Loop Length(Mm) -00.020.040.060.080.10.120.140.160.180.2 -22:10 22:20 22:30 22:40 22:50 23:0000.10.20.30.40.50.60.70.80.91Int−Fe−LoopA -TimeNormalized Intensity Fe XVIII -Figure 5: Normalized intensity map of the flaring loop A for the wavelen gth Fe XVIII, and mean intensity of Fe -XVIII (from top to down). The vertical axis is the distance al ong the loop in Mm for the first plot, and +22:10 22:20 22:30 22:40 22:50 23:000 +11 +22 +32 +43 +Int−Fe−LoopA +Time + +Loop Length(Mm) +0 +0.02 +0.04 +0.06 +0.08 +0.1 +0.12 +0.14 +0.16 +0.18 +0.2 +22:10 22:20 22:30 22:40 22:50 23:000 +0.1 +0.2 +0.3 +0.4 +0.5 +0.6 +0.7 +0.8 +0.9 +1 +Int−Fe−LoopA +Time +Normalized Intensity Fe XVIII +Figure 5: Normalized intensity map of the flaring loop A for the wavelen gth Fe XV I I I, and mean intensity of Fe +XV I I I (from top to down). The vertical axis is the distance al ong the loop in Mm for the first plot, and normalized intensity for the second. The horizontal axis is the time. The colorbar in the left shows the colors considered for the Intensity range. VI. acknowledgements The author Narges Fathalian wishes to also express her thank s for the technical support and -comments which has received from Dr.Farhad Daii and Dr.Mohs en Javaherian regarding to this +comments which has received from Dr .Farhad Daii and Dr .Mohs en Javaherian regarding to this work. - 5.866.26.46.66.8LogTNonF−LoopA -8:00 8:10 8:20 8:30 8:40 8:50 9:005.866.26.46.66.8 -timeLogT - 5.866.26.46.66.8LogTNonF−LoopB -8:00 8:10 8:20 8:30 8:40 8:50 9:005.866.26.46.66.8 -timeLogT + +5.8 +6 +6.2 +6.4 +6.6 +6.8LogT +NonF−LoopA +8:00 8:10 8:20 8:30 8:40 8:50 9:00 +5.8 +6 +6.2 +6.4 +6.6 +6.8 +time +LogT + +5.8 +6 +6.2 +6.4 +6.6 +6.8LogT +NonF−LoopB +8:00 8:10 8:20 8:30 8:40 8:50 9:00 +5.8 +6 +6.2 +6.4 +6.6 +6.8 +time +LogT Figure 6: from top to down: The time-series of the temperature for the fi rst 2 strips (from top to down) of the nonflaring Loops A and B. Horizontal axis is the time and the verti cal axis is the logarithm of the temperature. -8:10 8:20 8:30 8:40 8:50 9:000 5 101520 NonF−loopA -Time Loop Length(Mm) -5.866.26.46.66.8 -8:10 8:20 8:30 8:40 8:50 9:000 5 9 1418 NonF−loopB -Time Loop Length(Mm) -5.866.26.46.66.8 -8:10 8:20 8:30 8:40 8:50 9:000 3 5 8 10 NonF−loopC -Time Loop Length(Mm) -5.866.26.46.66.8 -Figure 7: from top to down: Temperature map of the non-flaring loops A, B and C as a time-series. The vertical axis +8:10 8:20 8:30 8:40 8:50 9:00 +0 +5 +10 +15 +20 +NonF−loopA +Time + +Loop Length(Mm) +5.8 +6 +6.2 +6.4 +6.6 +6.8 +8:10 8:20 8:30 8:40 8:50 9:00 +0 +5 +9 +14 +18 +NonF−loopB +Time + +Loop Length(Mm) +5.8 +6 +6.2 +6.4 +6.6 +6.8 +8:10 8:20 8:30 8:40 8:50 9:00 +0 +3 +5 +8 +10 +NonF−loopC +Time + +Loop Length(Mm) +5.8 +6 +6.2 +6.4 +6.6 +6.8 +Figure 7: from top to down: T emperature map of the non-flaring loops A, B and C as a time-series. The vertical axis is the distance along the loop in Mm, and the horizontal axis i s the time. The color-bar in the left shows the colors considered for the temperature range. -678910111213141516171819202122232425262728293000.050.10.150.20.250.30.350.4 -Temp. Period (min)Percentage of Temp. Periods +6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 +0 +0.05 +0.1 +0.15 +0.2 +0.25 +0.3 +0.35 +0.4 +Temp. Period (min) +Percentage of Temp. Periods Figure 8: Hisogram of the temperature periods percentages for the loo ps’ strips of the flaring (blue bars) and nonflaring (red bars) ARs. The horizontal axis shows the tempera ture periods in minute. -0.20.30.40.50.60.70.80.911.11.21.31.41.51.61.7024681012 -max(log(T))−min(log(T))Number +0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7 +0 +2 +4 +6 +8 +10 +12 +max(log(T))−min(log(T)) +Number Figure 9: Hisogram of the parameter of (max(log(T))-min(log(T))) fo r each strip of the loops of the flaring (blue bars) and non-flaring (red bars) ARs. References Abedini, A., Safari, H., & Nasiri, S. 2012, Solar Physics, 28 0 -Anfinogentov, S., Nakariakov, V . M., Mathioudakis, M., Van D oorsselaere, T., & Kowalski, A. F. +Anfinogentov, S., Nakariakov, V . M., Mathioudakis, M., V an D oorsselaere, T ., & Kowalski, A. F . 2013, ApJ, 773, 156 Aschwanden, M., B. P . S. C. M. A. 2013, Solar Physics, 283, 5 -Aschwanden, M. J. 2006, Philosophical Transactions of the R oyal Society of London Series A, 364, +Aschwanden, M. J. 2006, Philosophical T ransactions of the Royal Society of London Series A, 364, 417 -Aschwanden, M. J., & Boerner, P . 2011, The Astrophysical Jou rnal, 732, 81 -Aschwanden, M. J., Boerner, P ., Ryan, D., et al. 2015, The Ast rophysical Journal, 802, 53 +Aschwanden, M. J., & Boerner , P . 2011, The Astrophysical Jou rnal, 732, 81 +Aschwanden, M. J., Boerner , P ., Ryan, D., et al. 2015, The Ast rophysical Journal, 802, 53 Aschwanden, M. J., Fletcher, L., Schrijver, C. J., & Alexand er, D. 1999, ApJ, 520, 880 Ballai, I., Jess, D. B., & Douglas, M. 2011, A&A, 534, A13 -Banerjee, D., Erdélyi, R., Oliver, R., & O’Shea, E. 2007, Sol ar Physics, 246, 3 -Berghmans, D., & Clette, F. 1999, Solar Physics, 186, 207 -Boerner, P ., Edwards, C., Lemen, J., et al. 2012, Solar Physi cs, 275, 41 -Dahlburg, R. B., Einaudi, G., Ugarte-Urra, I., Rappazzo, A. F., & Velli, M. 2018, ApJ, 868, 116 -De Moortel, I. 2005, Philosophical Transactions of the Roya l Society of London Series A, 363, 2743 +Banerjee, D., Erdélyi, R., Oliver, R., & O’Shea, E. 2007, Solar Physics, 246, 3 +Berghmans, D., & Clette, F . 1999, Solar Physics, 186, 207 +Boerner, P ., Edwards, C., Lemen, J., et al. 2012, Solar Physics, 275, 41 +Dahlburg, R. B., Einaudi, G., Ugarte-Urra, I., Rappazzo, A. F ., & V elli, M. 2018, ApJ, 868, 116 +De Moortel, I. 2005, Philosophical T ransactions of the Roya l Society of London Series A, 363, 2743 De Moortel, I., & Brady, C. S. 2007, ApJ, 664, 1210 -De Moortel, I., Ireland, J., & Walsh, R. W. 2000, A&A, 355, L23 -De Moortel, I., & Nakariakov, V . M. 2012, Philosophical Tran sactions of the Royal Society of +De Moortel, I., Ireland, J., & W alsh, R. W . 2000, A&A, 355, L23 +De Moortel, I., & Nakariakov, V . M. 2012, Philosophical T ransactions of the Royal Society of London Series A, 370, 3193 Fathalian, N. 2019, arXiv e-prints, arXiv:1908.11369 Fathalian, N., & Safari, H. 2010, ApJ, 724, 411 -Fathalian, N., Safari, H., & Nasiri, S. 2010, New Astronomy, 15, 403 -Goossens, M., Hollweg, J. V ., & Sakurai, T. 1992, Solar Physi cs, 138, 233 +Fathalian, N., Safari, H., & Nasiri, S. 2010, New Astronomy ,15, 403 +Goossens, M., Hollweg, J. V ., & Sakurai, T . 1992, Solar Physi cs, 138, 233 Gruszecki, M., Murawski, K., Selwa, M., & Ofman, L. 2006, A&A , 460, 887 -Guennou, C., Auchère, F., Soubrié, E., et al. 2012a, ApJ, 203 , 25 -Guennou, C., Auchère, F., Soubrié, E., et al. 2012b, ApJ, 203 , 26 +Guennou, C., Auchère, F ., Soubrié, E., et al. 2012a, ApJ, 203 , 25 +Guennou, C., Auchère, F ., Soubrié, E., et al. 2012b, ApJ, 203 , 26 Habbal, S. R., & Rosner, R. 1979, ApJ, 234, 1113 -Hindman, B. W., & Jain, R. 2014, ApJ, 784, 103 -Jain, R., Maurya, R. A., & Hindman, B. W. 2015, ApJ, 804, L19 -Jess, D. B., Reznikova, V . E., Ryans, R. S. I., et al. 2016, Nat ure Physics, 12, 179 -Kolotkov, D. Y., Nakariakov, V . M., & Zavershinskii, D. I. 20 19, A&A, 628, A133 -Krishna Prasad, S., Jess, D. B., & Van Doorsselaere, T. 2019, Frontiers in Astronomy and Space +Hindman, B. W ., & Jain, R. 2014, ApJ, 784, 103 +Jain, R., Maurya, R. A., & Hindman, B. W . 2015, ApJ, 804, L19 +Jess, D. B., Reznikova, V . E., Ryans, R. S. I., et al. 2016, Nature Physics, 12, 179 +Kolotkov, D. Y ., Nakariakov, V . M., & Zavershinskii, D. I. 20 19, A&A, 628, A133 +Krishna Prasad, S., Jess, D. B., & V an Doorsselaere, T . 2019, Frontiers in Astronomy and Space Sciences, 6, 57 -Li, L. P ., Peter, H., Chen, F., & Zhang, J. 2015, A&A, 583, A109 -Liu, W., & Ofman, L. 2014, Solar Physics, 289, 3233–3277 -Luna, M., Terradas, J., Oliver, R., & Ballester, J. L. 2010, A pJ, 716, 1371 +Li, L. P ., Peter, H., Chen, F ., & Zhang, J. 2015, A&A, 583, A109 +Liu, W ., & Ofman, L. 2014, Solar Physics, 289, 3233–3277 +Luna, M., T erradas, J., Oliver, R., & Ballester, J. L. 2010, ApJ, 716, 1371 McClymont, A. N., & Craig, I. J. D. 1985, ApJ, 289, 834 -McLaughlin, J. A., Nakariakov, V . M., Dominique, M., Jelíne k, P ., & Takasao, S. 2018, Space +McLaughlin, J. A., Nakariakov, V . M., Dominique, M., Jelínek, P ., & T akasao, S. 2018, Space Science Reviews volume, 214, 45 -Nakariakov, V . M., Afanasyev, A. N., Kumar, S., & Moon, Y. J. 2 017, ApJ, 849, 62 +Nakariakov, V . M., Afanasyev, A. N., Kumar, S., & Moon, Y . J. 2 017, ApJ, 849, 62 Nakariakov, V . M., Inglis, A. R., Zimovets, I. V ., et al. 2010 , Plasma Physics and Controlled Fusion, 52, 124009 Nakariakov, V . M., Ofman, L., Deluca, E. E., Roberts, B., & Da vila, J. M. 1999, Science, 285, 862 -Nakariakov, V . M., & Verwichte, E. 2005, Living Reviews in So lar Physics, 2, 3 -Nisticò, G., Nakariakov, V . M., & Verwichte, E. 2013, A&A, 55 2, A57 +Nakariakov, V . M., & V erwichte, E. 2005, Living Reviews in So lar Physics, 2, 3 +Nisticò, G., Nakariakov, V . M., & V erwichte, E. 2013, A&A, 55 2, A57 Nisticò, G., Polito, V ., Nakariakov, V . M., & Del Zanna, G. 20 17, A&A, 600, A37 -Ofman, L., & Wang, T. 2002, ApJ, 580, L85 -Pant, V ., Tiwari, A., Yuan, D., & Banerjee, D. 2017, ApJ, 847, L5 -Pascoe, D. J., Nakariakov, V . M., & Arber, T. D. 2007, Solar Ph ysics, 246, 165 -Reale, F., Testa, P ., Petralia, A., & Kolotkov, D. Y. 2019, Ap J, 884, 131 +Ofman, L., & W ang, T . 2002, ApJ, 580, L85 +Pant, V ., Tiwari, A., Y uan, D., & Banerjee, D. 2017, ApJ, 847,L5 +Pascoe, D. J., Nakariakov, V . M., & Arber, T . D. 2007, Solar Ph ysics, 246, 165 +Reale, F ., T esta, P ., Petralia, A., & Kolotkov, D. Y . 2019, Ap J, 884, 131 Roberts, B., Edwin, P . M., & Benz, A. O. 1984, ApJ, 279, 857 -Romano, P ., Zuccarello, F., Guglielmino, S. L., et al. 2015, A&A, 582, A55 +Romano, P ., Zuccarello, F ., Guglielmino, S. L., et al. 2015,A&A, 582, A55 Russell, A. J. B., Simões, P . J. A., & Fletcher, L. 2015, A&A, 5 81, A8 Scargle, J. D. 1982, ApJ, 263, 835 -Schmelz, J. T., Jenkins, B. S., Worley, B. T., et al. 2011, ApJ , 731, 49 -Schmelz, J. T., Kimble, J. A., Jenkins, B. S., et al. 2010, ApJ , 725, L34 -Schmelz, J. T., Pathak, S., Brooks, D. H., Christian, G. M., & Dhaliwal, R. S. 2014, ApJ, 795, 171 -Schmelz, J. T., Pathak, S., Jenkins, B. S., & Worley, B. T. 201 3, ApJ, 764, 53 -Ugarte-Urra, I., & Warren, H. P . 2014, ApJ, 783, 12 -Van Doorsselaere, T., Kupriyanova, E. G., & Yuan, D. 2016, So lar Physics, 291, 3143 -Van Doorsselaere, T., Wardle, N., Del Zanna, G., et al. 2011, ApJ, 727, L32 -VanderPlas, J. T. 2018, ApJ, 236, 16 -Verwichte, E., Nakariakov, V . M., Ofman, L., & Deluca, E. E. 2 004, Solar Physics, 223, 77 -Wang, T. 2011, Space Science Reviews, 158, 397–419 -Wang, T., Innes, D. E., & Qiu, J. 2007, ApJ, 656, 598 -Wang, T. J., & Solanki, S. K. 2004, A&A, 421, L33 -Wang, T. J., Solanki, S. K., Innes, D. E., Curdt, W., & Marsch, E. 2003, A&A, 402, L17 -Wang, T., & Ofman, L. 2019, ApJ, 886, 2 -Wang, T., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M . 2015, ApJ, 811, L13 -Wang, T., Ofman, L., Yuan, D., et al. 2021, Space Science Revi ews, 217 -Warren, H. P ., Winebarger, A. R., & Brooks, D. H. 2012, ApJ, 75 9, 141 -Wills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 19 0, 467 \ No newline at end of file +Schmelz, J. T ., Jenkins, B. S., W orley, B. T ., et al. 2011, ApJ, 731, 49 +Schmelz, J. T ., Kimble, J. A., Jenkins, B. S., et al. 2010, ApJ , 725, L34 +Schmelz, J. T ., Pathak, S., Brooks, D. H., Christian, G. M., & Dhaliwal, R. S. 2014, ApJ, 795, 171 +Schmelz, J. T ., Pathak, S., Jenkins, B. S., & W orley, B. T . 201 3, ApJ, 764, 53 +Ugarte-Urra, I., & W arren, H. P . 2014, ApJ, 783, 12 +V an Doorsselaere, T ., Kupriyanova, E. G., & Y uan, D. 2016, Solar Physics, 291, 3143 +V an Doorsselaere, T ., W ardle, N., Del Zanna, G., et al. 2011, ApJ, 727, L32 +V anderPlas, J. T . 2018, ApJ, 236, 16 +V erwichte, E., Nakariakov, V . M., Ofman, L., & Deluca, E. E. 2004, Solar Physics, 223, 77 +W ang, T . 2011, Space Science Reviews, 158, 397–419 +W ang, T ., Innes, D. E., & Qiu, J. 2007, ApJ, 656, 598 +W ang, T . J., & Solanki, S. K. 2004, A&A, 421, L33 +W ang, T . J., Solanki, S. K., Innes, D. E., Curdt, W ., & Marsch,E. 2003, A&A, 402, L17 +W ang, T ., & Ofman, L. 2019, ApJ, 886, 2 +W ang, T ., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M. 2015, ApJ, 811, L13 +W ang, T ., Ofman, L., Y uan, D., et al. 2021, Space Science Revi ews, 217 +W arren, H. P ., W inebarger, A. R., & Brooks, D. H. 2012, ApJ, 75 9, 141 +W ills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 19 0, 467 \ No newline at end of file diff --git a/read/results/pypdf/GeoTopo-book.txt b/read/results/pypdf/GeoTopo-book.txt index 62ca6ac..d04416c 100644 --- a/read/results/pypdf/GeoTopo-book.txt +++ b/read/results/pypdf/GeoTopo-book.txt @@ -11,7 +11,7 @@ Danksagungen An dieser Stelle möchte ich Herrn Prof. Dr. Herrlich für einige Korrekturvorschläge und einen gut strukturierten Tafelanschrieb danken, der als Vorlage für dieses Skript diente. Tatsächlich basiert die Struktur dieses Skripts auf der Vorlesung von Herrn Prof. Dr. Herrlich und ganze -Abschnitte konnten direkt mit L ATEX umgesetzt werden. Vielen Dank für die Erlaubnis, Ihre +Abschnitte konnten direkt mit LATEX umgesetzt werden. Vielen Dank für die Erlaubnis, Ihre Inhalte in diesem Skript einbauen zu dürfen! Vielen Dank auch an Frau Lenz und Frau Randecker, die es mir erlaubt haben, ihre Übungsaufgaben und Lösungen zu benutzen. @@ -19,25 +19,25 @@ Jérôme Urhausen hat durch viele Verbesserungsvorschläge und Beweise zu einer Qualitätssteigerung am Skript beigetragen und meine Tutorin Sarah hat mir viele Fragen per E-Mail und nach dem Tutorium beantwortet. Danke! Was ist Topologie? -Die Kugeloberfläche S2lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche -oder der Oberfläche einer Pyramide verformen, aber nicht zum R2oder zu einem Torus T2. Für -denR2müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein +Die KugeloberflächeS2 lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche +oder der Oberfläche einer Pyramide verformen, aber nicht zumR2 oder zu einem TorusT2. Für +den R2 müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein Loch machen. Erforderliche Vorkenntnisse -Es wird ein sicherer Umgang mit den Quantoren ( ∀,∃), Mengenschreibweisen ( ∪,∩,\,∅,R,P(M)) +Es wird ein sicherer Umgang mit den Quantoren (∀,∃), Mengenschreibweisen (∪,∩,\,∅,R,P(M)) und ganz allgemein formaler Schreibweise vorausgesetzt. Auch die Beweisführung mittels Widerspruchsbeweisen - sollte bekannt sein und der Umgang mit komplexen Zahlen C, deren Betrag, + sollte bekannt sein und der Umgang mit komplexen ZahlenC, deren Betrag, Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werden vor allem in „Analysis I“ vermittelt. Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit, -der Spektralsatz und der projektive Raum P(R)aus „Lineare Algebra I“ bekannt sind. In „Lineare +der Spektralsatz und der projektive RaumP(R) aus „Lineare Algebra I“ bekannt sind. In „Lineare Algebra II“ wird der Begriff der Orthonormalbasis eingeführt. -(a)S2 -(b) Würfel (c) Pyramide +(a) S2 + (b) Würfel (c) Pyramide y x -(d)R2(e)T2 +(d) R2 (e) T2 Abbildung 0.1: Beispiele für verschiedene Formen Obwohl es nicht vorausgesetzt wird, könnte es von Vorteil sein „Einführung in die Algebra und Zahlentheorie“ gehört zu haben. @@ -81,622 +81,735 @@ Stichwortverzeichnis 111 1 Topologische Grundbegriffe 1.1 Topologische Räume Definition 1 -Eintopologischer Raum ist ein Paar (X,T)bestehend aus einer Menge XundT⊆P(X) +Ein topologischer Raumist ein Paar(X,T) bestehend aus einer MengeX und T ⊆P(X) mit folgenden Eigenschaften -(i)∅,X∈T -(ii) SindU1,U2∈T, so istU1∩U2∈T -(iii) IstIeine Menge und Ui∈Tfür jedesi∈I, so ist⋃ -i∈IUi∈T -Die Elemente von Theißenoffene Teilmengen vonX. -A⊆Xheißtabgeschlossen , wennX\Aoffen ist. -Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0,1). Auch gibt es +(i) ∅,X ∈T +(ii) Sind U1,U2 ∈T, so istU1 ∩U2 ∈T +(iii) Ist I eine Menge undUi ∈T für jedesi∈I, so ist +⋃ +i∈I +Ui ∈T +Die Elemente vonT heißenoffene Teilmengenvon X. +A⊆X heißtabgeschlossen, wennX\A offen ist. +Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B.[0,1). Auch gibt es Mengen, die sowohl abgeschlossen als auch offen sind. Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.) -Betrachte∅undXmit dertrivialen Topologie Ttriv={∅,X}. -Es gilt:X∈Tund∅∈T, d. h.Xund∅sind offen. Außerdem XC=X\X=∅∈Tund -X\∅=X∈T, d. h.Xund∅sind als Komplement offener Mengen abgeschlossen. ■ +Betrachte ∅und X mit dertrivialen Topologie Ttriv = {∅,X }. +Es gilt:X ∈T und ∅∈ T, d. h.X und ∅sind offen. AußerdemXC = X\X = ∅∈ T und +X\∅ = X ∈T, d. h.X und ∅sind als Komplement offener Mengen abgeschlossen. ■ Beispiel 1 (Topologien) -1)X=Rnmit der von der euklidischen Metrik erzeugten Topologie TEuklid: -U⊆Rnoffen⇔für jedesx∈Ugibt esr>0, -sodass Br(x) ={y∈Rn|d(x,y) 0, +sodass Br(x) = {y∈Rn |d(x,y) 0,x∈Qn} -ist eine abzählbare Basis von T. -3)Sei(X,T)eintopologischerRaummit X={0,1,2}undT={∅,{0},{0,1},{0,2},X}. -Dann istS={∅,{0,1},{0,2}}eine Subbasis von T, da gilt: +2) Gegeben seiX = Rn mit euklidischer TopologieT. Dann ist +B = {Br(x) |r∈Q>0,x ∈Qn } +ist eine abzählbare Basis vonT. +3) Sei(X,T) eintopologischerRaummit X = {0,1,2 }undT = {∅,{0 },{0,1 },{0,2 },X }. +Dann istS= {∅,{0,1 },{0,2 }} eine Subbasis vonT, da gilt: 1.1. TOPOLOGISCHE RÄUME -• S⊆ T -• ∅,{0,1}und{0,2}∈S -• {0}={0,1}∩{ 0,2} -•X={0,1}∪{ 0,2} -Allerings istSkeine Basis von (X,T), da{0}nicht als Vereinigung von Elementen -ausSerzeugt werden kann. +• S⊆T +• ∅,{0,1 }und {0,2 }∈S +• {0 }= {0,1 }∩{ 0,2 } +•X = {0,1 }∪{ 0,2 } +Allerings istSkeine Basis von(X,T), da{0 }nicht als Vereinigung von Elementen +aus Serzeugt werden kann. Bemerkung 2 -SeiXeine Menge undS⊆P (X). Dann gibt es genau eine Topologie TaufX, für dieS +Sei X eine Menge undS⊆P (X). Dann gibt es genau eine TopologieT auf X, für dieS Subbasis ist. Definition 5 -Sei(X,T)ein topologischer Raum und Y⊆X. -TY:={U∩Y|U∈T}ist eine Topologie auf Y. -TYheißtTeilraumtopologie und(Y,TY)heißt einTeilraum von(X,T). -Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt. +Sei (X,T) ein topologischer Raum undY ⊆X. +TY := {U ∩Y |U ∈T }ist eine Topologie aufY. +TY heißtTeilraumtopologieund (Y,TY) heißt einTeilraumvon (X,T). +Die Teilraumtopologie wird auchSpurtopologie oder Unterraumtopologie genannt. Definition 6 -SeienX1,X2topologische Räume. -U⊆X1×X2sei offen, wenn es zu jedem x= (x1,x2)∈UUmgebungen Uiumximit -i= 1,2gibt, sodass U1×U2⊆Ugilt. -T={U⊆X1×X2|Uoffen}ist eine Topologie auf X1×X2. Sie heißt Produkttopologie . -B={U1×U2|Uioffen inXi,i= 1,2}ist eine Basis von T. +Seien X1,X2 topologische Räume. +U ⊆X1 ×X2 sei offen, wenn es zu jedemx = (x1,x2) ∈U Umgebungen Ui um xi mit +i= 1,2 gibt, sodassU1 ×U2 ⊆U gilt. +T = {U ⊆X1 ×X2 |U offen}ist eine Topologie aufX1×X2. Sie heißtProdukttopologie. +B = {U1 ×U2 |Ui offen inXi,i = 1,2 }ist eine Basis vonT. U xx2 -x1U2 -U1X1X2 -Abbildung 1.1: Zu x= (x1,x2)gibt es Umgebungen U1,U2mitU1×U2⊆U +x1 +U2 +U1 +X1 +X2 +Abbildung 1.1: Zux= (x1,x2) gibt es UmgebungenU1,U2 mit U1 ×U2 ⊆U Beispiel 4 (Produkttopologien) -1)X1=X2=Rmit euklidischer Topologie. -⇒Die Produkttopologie auf R×R=R2stimmt mit der euklidischen Topologie auf -R2überein. -2)X1=X2=Rmit Zariski-Topologie. TProdukttopologie auf R2:U1×U2 +1) X1 = X2 = R mit euklidischer Topologie. +⇒Die Produkttopologie aufR ×R = R2 stimmt mit der euklidischen Topologie auf +R2 überein. +2) X1 = X2 = R mit Zariski-Topologie.T Produkttopologie aufR2: U1 ×U2 (Siehe Abbildung 1.2) 1.1. TOPOLOGISCHE RÄUME -U1=R\NU2=R\N -Abbildung 1.2: Zariski-Topologie auf R2 +U1 = R \N +U2=R\N +Abbildung 1.2: Zariski-Topologie aufR2 Definition 7 -SeiXein topologischer Raum, ∼eine Äquivalenzrelation auf X,X=X/∼sei die Menge -der Äquivalenzklassen, π:X→X, x↦→[x]∼. -TX:={ -U⊆X⏐⏐π−1(U)∈TX} -(X,TX)heißtQuotiententopologie . +Sei X ein topologischer Raum,∼eine Äquivalenzrelation aufX, X = X/∼sei die Menge +der Äquivalenzklassen,π: X →X, x ↦→[x]∼. +TX := +{ +U ⊆X +⏐⏐π−1(U) ∈TX +} +(X,TX) heißtQuotiententopologie. Beispiel 5 -X=R,a∼b:⇔a−b∈Z -R -1012345 -0a -Ua π−1(u) -0∼1, d. h. [0] = [1] +X = R,a ∼b:⇔a−b∈Z +R-1 0 1 2 3 4 5 +0 +a +U +aπ−1(u) +0 ∼1, d. h.[0] = [1] Beispiel 6 -SeiX=R2und(x1,y1)∼(x2,y2)⇔x1−x2∈Zundy1−y2∈Z. Dann istX/∼ein Torus. +Sei X = R2 und (x1,y1) ∼(x2,y2) ⇔x1 −x2 ∈Z und y1 −y2 ∈Z. Dann istX/∼ein Torus. Beispiel 7 (Projektiver Raum) -X=Rn+1\{0}, x∼y⇔∃λ∈R×mity=λx -⇔xundyliegen auf der gleichen +X = Rn+1 \{0 }, x ∼y⇔∃λ∈R×mit y= λx +⇔x und y liegen auf der gleichen Ursprungsgerade -X=Pn(R) +X = Pn(R) 1.2. METRISCHE RÄUME Also fürn= 1: -−4−2 2 4 6 8 -−4−224 +−4 −2 2 4 6 8 +−4 +−2 +2 +4 1.2 Metrische Räume Definition 8 -SeiXeine Menge. Eine Abbildung d:X×X→R+ -0heißtMetrik, wenn gilt: -(i) Definitheit: d(x,y) = 0⇔x=y∀x,y∈X -(ii) Symmetrie: d(x,y) =d(y,x)∀x,y∈X -(iii) Dreiecksungleichung: d(x,z)≤d(x,y) +d(y,z)∀x,y,z∈X -Das Paar (X,d)heißt einmetrischer Raum . +Sei X eine Menge. Eine Abbildungd: X×X →R+ +0 heißtMetrik, wenn gilt: +(i) Definitheit: d(x,y) = 0 ⇔x= y ∀x,y ∈X +(ii) Symmetrie: d(x,y) = d(y,x) ∀x,y ∈X +(iii) Dreiecksungleichung: d(x,z) ≤d(x,y) + d(y,z) ∀x,y,z ∈X +Das Paar(X,d) heißt einmetrischer Raum. Bemerkung 3 -Sei(X,d)ein metrischer Raum und -Br(x) :={y∈X|d(x,y)0⇒∃ε>0 :Bε(x)∩Bε(y) =∅ +d(x,y) >0 ⇒∃ε> 0 : Bε(x) ∩Bε(y) = ∅ Beispiel 12 (Topologische Räume und Hausdorff-Räume) -1)(R,TZ)ist ein topologischer Raum, der nicht hausdorffsch ist. -2)(R,TEuklid )ist ein topologischer Hausdorff-Raum. +1) (R,TZ) ist ein topologischer Raum, der nicht hausdorffsch ist. +2) (R,TEuklid) ist ein topologischer Hausdorff-Raum. Bemerkung 5 (Eigenschaften von Hausdorff-Räumen) -SeienX,X 1,X2Hausdorff-Räume. -a) Jeder Teilraum von Xist hausdorffsch. -b)X1×X2ist hausdorffsch (vgl. Abbildung 1.4). +Seien X,X1,X2 Hausdorff-Räume. +a) Jeder Teilraum vonX ist hausdorffsch. +b) X1 ×X2 ist hausdorffsch (vgl. Abbildung 1.4). Definition 11 -SeiXein topologischer Raum und (x)n∈Neine Folge in X.x∈XheißtGrenzwert oder -Limesvon(xn), wenn es für jede Umgebung Uvonxeinn0gibt, sodass xn∈Ufür alle +Sei X ein topologischer Raum und(x)n∈N eine Folge inX. x∈X heißtGrenzwert oder +Limes von (xn), wenn es für jede UmgebungU von x ein n0 gibt, sodassxn ∈U für alle n≥n0. Bemerkung 6 -IstXhausdorffsch, so hat jede Folge in Xhöchstens einen Grenzwert. -Beweis: Sei(xn)eine konvergierende Folge und xundyGrenzwerte der Folge. -DaXhausdorffsch ist, gibt es Umgebungen UxvonxundUyvonymitUx∩Uy=∅falls -x̸=y. Da (xn)gegenxundykonvergiert, existiert ein n0mitxn∈Ux∩Uyfür allen≥n0 -⇒x=y ■ +Ist X hausdorffsch, so hat jede Folge inX höchstens einen Grenzwert. +Beweis: Sei (xn) eine konvergierende Folge undx und y Grenzwerte der Folge. +Da X hausdorffsch ist, gibt es UmgebungenUx von x und Uy von y mit Ux ∩Uy = ∅falls +x̸= y. Da(xn) gegen x und y konvergiert, existiert einn0 mit xn ∈Ux ∩Uy für allen≥n0 +⇒x= y ■ 1Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt. 1.3. STETIGKEIT (x1,y1) (x2,y2) x1 x2 -U1×X2 U2×X2X1X2 -Abbildung 1.4: Wenn X1,X2hausdorffsch sind, dann auch X1×X2 +U1 ×X2 U2 ×X2 +X1 +X2 +Abbildung 1.4: WennX1,X2 hausdorffsch sind, dann auchX1 ×X2 1.3 Stetigkeit Definition 12 -Seien (X,TX),(Y,TY)topologische Räume und f:X→Yeine Abbildung. -a)fheißtstetig :⇔∀U∈TY:f−1(U)∈TX. -b)fheißtHomöomorphismus , wennfstetig ist und es eine stetige Abbildung g: -Y→Xgibt, sodass g◦f=idXundf◦g=idY. +Seien (X,TX),(Y,TY) topologische Räume undf : X →Y eine Abbildung. +a) f heißtstetig :⇔∀U ∈TY : f−1(U) ∈TX. +b) f heißtHomöomorphismus, wenn f stetig ist und es eine stetige Abbildungg : +Y →X gibt, sodassg◦f = idX und f ◦g= idY. Bemerkung 72 -SeienX,Ymetrische Räume und f:X→Yeine Abbildung. -Dann gilt:fist stetig⇔zu jedemx∈Xund jedem ε>0gibt esδ(x,ε)>0, sodass für -alley∈Xmitd(x,y)<δgiltdY(f(x),f(y))<ε. -Beweis: „⇒“: Seix∈X,ε> 0gegeben und U:=Bε(f(x)). -Dann istUoffen inY. -Def. 12.a= = = = =⇒f−1(U)ist offen in X. Dann istx∈f−1(U). -⇒∃δ>0, sodass Bδ(x)⊆f−1(U) -⇒f(Bδ(x))⊆U -⇒{y∈X|dX(x,y)<δ}⇒Beh. -„⇐“: SeiU⊆Yoffen,X∈f−1(U). -Dann gibt es ε>0, sodass Bε(f(x))⊆U -Vor.= =⇒Es gibtδ>0, sodassf(Bδ(x))⊆Bε(f(x))) -⇒Bδ(x)⊆f−1(Bε(f(x)))⊆f−1(U) ■ +Seien X,Y metrische Räume undf: X →Y eine Abbildung. +Dann gilt:f ist stetig⇔zu jedemx∈X und jedemε> 0 gibt esδ(x,ε) >0, sodass für +alle y∈X mit d(x,y) <δ gilt dY(f(x),f(y)) <ε. +Beweis: „⇒“: Seix∈X,ε> 0 gegeben undU := Bε(f(x)). +Dann istU offen inY. +Def. 12.a= = = = =⇒f−1(U) ist offen inX. Dann istx∈f−1(U). +⇒∃δ >0, sodassBδ(x) ⊆f−1(U) +⇒f(Bδ(x)) ⊆U +⇒{y∈X |dX(x,y) <δ }⇒ Beh. +„⇐“: SeiU ⊆Y offen, X ∈f−1(U). +Dann gibt esε> 0, sodassBε(f(x)) ⊆U +Vor.= =⇒Es gibtδ >0, sodassf(Bδ(x)) ⊆Bε(f(x))) +⇒Bδ(x) ⊆f−1(Bε(f(x))) ⊆f−1(U) ■ Bemerkung 8 -SeienX,Ytopologische Räume und f:X→Yeine Abbildung. Dann gilt: -fist stetig -⇔für jede abgeschlossene Teilmenge A⊆Ygilt:f−1(A)⊆Xist abgeschlossen. +Seien X,Y topologische Räume undf : X →Y eine Abbildung. Dann gilt: +f ist stetig +⇔für jede abgeschlossene TeilmengeA⊆Y gilt : f−1(A) ⊆X ist abgeschlossen. Beispiel 13 (Stetige Abbildungen und Homöomorphismen) -1) Für jeden topologischen Raum Xgilt: idX:X→Xist Homöomorphismus. +1) Für jeden topologischen RaumX gilt: idX : X →X ist Homöomorphismus. 2Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt. 1.3. STETIGKEIT -2)Ist(Y,TY)trivialer topologischer Raum, d. h. TY=Ttriv, so ist jede Abbildung -f:X→Ystetig. -3)IstXdiskreter topologischer Raum, so ist f:X→Ystetig für jeden topologischen -RaumYund jede Abbildung f. -4) SeiX= [0,1),Y=S1={z∈C|∥z∥= 1}undf(t) =e2πit. -R 010f +2) Ist (Y,TY) trivialer topologischer Raum, d. h.TY = Ttriv, so ist jede Abbildung +f : X →Y stetig. +3) Ist X diskreter topologischer Raum, so istf : X →Y stetig für jeden topologischen +Raum Y und jede Abbildungf. +4) Sei X = [0,1),Y = S1 = {z∈C |∥z∥= 1 }und f(t) = e2πit. +R0 1 +0 +f g -Abbildung 1.5: Beispiel einer stetigen Funktion f, deren Umkehrabbildung gnicht stetig ist. -Die Umkehrabbildung gist nicht stetig, da g−1(U)nicht offen ist (vgl. Abbildung 1.5). +Abbildung 1.5: Beispiel einer stetigen Funktionf, deren Umkehrabbildungg nicht stetig ist. +Die Umkehrabbildungg ist nicht stetig, dag−1(U) nicht offen ist (vgl. Abbildung 1.5). Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig) -SeienX,Y,Ztopologische Räume, f:X→Yundg:Y→Zstetige Abbildungen. -Dann istg◦f:X→Zstetig. -Xf→→ -g◦f↘↘Y -g↙↙ +Seien X,Y,Z topologische Räume,f : X →Y und g: Y →Z stetige Abbildungen. +Dann istg◦f : X →Z stetig. +X +f → → +g◦f ↘ ↘ +Y +g↙ ↙ Z -Beweis: SeiU⊆Zoffen⇒(g◦f)−1(U) =f−1(g−1(U)).g−1(U)ist offen in Yweilgstetig -ist,f−1(g−1(U))ist offen in X, weilfstetig ist. ■ +Beweis: Sei U ⊆Z offen ⇒(g◦f)−1(U) = f−1(g−1(U)). g−1(U) ist offen inY weil g stetig +ist, f−1(g−1(U)) ist offen inX, weilf stetig ist. ■ Bemerkung 10 -a) Für jeden topologischen Raum Xist -Homöo (X) :={f:X→X|fist Homöomorphismus } +a) Für jeden topologischen RaumX ist +Homöo(X) := {f : X →X |f ist Homöomorphismus} eine Gruppe. -b) Jede Isometrie f:X→Yzwischen metrischen Räumen ist ein Homöomorphismus. -c)Iso(X) :={f:X→X|fist Isometrie}ist eine Untergruppe von Homöo (X)für -jeden metrischen Raum X. +b) Jede Isometrief : X →Y zwischen metrischen Räumen ist ein Homöomorphismus. +c) Iso(X) := {f : X →X |f ist Isometrie}ist eine Untergruppe vonHomöo(X) für +jeden metrischen RaumX. Bemerkung 11 (Projektionen sind stetig) -SeienX,Ytopologische Räume. πX:X×Y→XundπY:X×Y→Ydie Projektionen -πX: (x,y)↦→xundπY: (x,y)↦→y -WirdX×Ymit der Produkttopologie versehen, so sind πXundπYstetig. -Beweis: SeiU⊆Xoffen +Seien X,Y topologische Räume.πX : X×Y →X und πY : X×Y →Y die Projektionen +πX : (x,y) ↦→x und πY : (x,y) ↦→y +Wird X×Y mit der Produkttopologie versehen, so sindπX und πY stetig. +Beweis: Sei U ⊆X offen ⇒π−1 -X(U) =U×Yist offen in X×Y. ■ +X (U) = U ×Y ist offen inX×Y. ■ Bemerkung 12 -SeiXein topologischer Raum, ∼eine Äquivalenzrelation auf X,X=X/∼der Bahnenraum -versehen mit der Quotiententopologie, π:X→X,x↦→[x]∼. -Dann istπstetig. +Sei X ein topologischer Raum,∼eine Äquivalenzrelation aufX, X = X/∼der Bahnenraum +versehen mit der Quotiententopologie,π: X →X, x↦→[x]∼. +Dann istπ stetig. 1.4. ZUSAMMENHANG -Beweis: Nach Definition ist U⊆Xoffen⇔π−1(U)⊆Xoffen. ■ -Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass πstetig wird. +Beweis: Nach Definition istU ⊆X offen ⇔π−1(U) ⊆X offen. ■ +Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodassπ stetig wird. Beispiel 14 (Stereographische Projektion) -RnundSn\{N}sind homöomorph für beliebiges N∈Sn. Es gilt: -Sn={ -x∈Rn+1⏐⏐∥x∥= 1} -={ -x∈Rn+1⏐⏐⏐⏐⏐n+1∑ -i=1x2 -i= 1} -O. B. d. A. sei N= -0 +Rn und Sn \{N }sind homöomorph für beliebigesN ∈Sn. Es gilt: +Sn = +{ +x∈Rn+1 ⏐⏐∥x∥= 1 +} += +{ +x∈Rn+1 +⏐⏐⏐⏐⏐ +n+1∑ +i=1 +x2 +i = 1 +} +O. B. d. A. seiN = + + +0 ... 0 -1 -. Die Gerade durch NundPschneidet die Ebene Hin genau -einem Punkt ˆP.Pwird auf ˆPabgebildet. -f:Sn\{N}→Rn -P↦→genau ein Punkt -LP∩H -wobeiRn=H= +1 + +. Die Gerade durchN und P schneidet die EbeneH in genau +einem Punkt ˆP. P wird auf ˆP abgebildet. +f :Sn \{N }→ Rn +P ↦→ +genau ein Punkt +   +LP ∩H +wobei Rn = H = +  - -x1 + + + +x1 ... -xn+1 -∈Rn+1⏐⏐⏐⏐⏐⏐⏐xn+1= 0 +xn+1 + +∈Rn+1 +⏐⏐⏐⏐⏐⏐⏐ +xn+1 = 0 +  -undLPdie Gerade in Rn+1durchN -undPist. -SeiP= -x1 + +und LP die Gerade inRn+1 durch N +und P ist. +Sei P = + + +x1 ... -xn+1 -, so istxn+1<1, also istLPnicht parallel zu H. Also schneiden sich LP -undHin genau einem Punkt ˆP. -Es gilt:fist bijektiv und die Umkehrabbildung ist ebenfalls stetig. +xn+1 + +, so istxn+1 <1, also istLP nicht parallel zuH. Also schneiden sichLP +und H in genau einem PunktˆP. +Es gilt:f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig. 1.4 Zusammenhang Definition 13 -a)EinRaumXheißtzusammenhängend ,wenneskeineoffenen,nichtleerenTeilmengen -U1,U2vonXgibt mitU1∩U2=∅undU1∪U2=X. -b)Eine Teilmenge Y⊆Xheißt zusammenhängend, wenn Yals topologischer Raum mit +a) EinRaum Xheißtzusammenhängend,wenneskeineoffenen,nichtleerenTeilmengen +U1,U2 von X gibt mitU1 ∩U2 = ∅und U1 ∪U2 = X. +b) Eine TeilmengeY ⊆X heißt zusammenhängend, wennY als topologischer Raum mit der Teilraumtopologie zusammenhängend ist. 1.4. ZUSAMMENHANG -xyz +x +y +z N -ˆP0P +ˆP +0 +P Abbildung 1.6: Visualisierung der stereographischen Projektion Bemerkung 13 -Xist zusammenhängend ⇔Es gibt keine abgeschlossenen, nichtleeren Teilmengen A1,A2 -mitA1∩A2=∅undA1∪A2=X. +X ist zusammenhängend⇔Es gibt keine abgeschlossenen, nichtleeren TeilmengenA1,A2 +mit A1 ∩A2 = ∅und A1 ∪A2 = X. Beispiel 15 (Zusammenhang von Räumen) -1)(Rn,TEuklid )ist zusammenhängend, denn: -Annahme :Rn=U1˙∪U2mit∅̸=U1,U2∈TEuklidexistieren. -Seix∈U1,y∈U2und [x,y]die Strecke zwischen xundy. SeiV= [x,y]. Nun -betrachten wir V⊊ Rnals (metrischen) Teilraum mit der Teilraumtopologie TV. -Somit giltU1∩[x,y]∈TVwegen der Definition der Teilraumtopologie. -Dann gibt es z∈[x,y]mitz∈∂(U1∩[x,y]), aberz /∈U1⇒z∈U2. In jeder -Umgebung von zliegt ein Punkt von U1⇒Widerspruch zu U2offen. -2)R\{0}ist nicht zusammenhängend, denn R\{0}=R<0∪R>0 -3)R2\{0}ist zusammenhängend. -4)Q ⊊ Rist nicht zusammenhängend, da (Q∩R<√ -2)∪(Q∩R>√ -2) =Q -5){x}ist zusammenhängend für jedes x∈X, wobeiXein topologischer Raum ist. -6)Rmit Zariski-Topologie ist zusammenhängend. +1) (Rn,TEuklid) ist zusammenhängend, denn: +Annahme: Rn = U1 ˙∪U2 mit ∅̸= U1,U2 ∈TEuklid existieren. +Sei x ∈U1,y ∈U2 und [x,y] die Strecke zwischenx und y. Sei V = [ x,y]. Nun +betrachten wir V ⊊ Rn als (metrischen) Teilraum mit der TeilraumtopologieTV. +Somit giltU1 ∩[x,y] ∈TV wegen der Definition der Teilraumtopologie. +Dann gibt es z ∈[x,y] mit z ∈∂(U1 ∩[x,y]), aber z /∈U1 ⇒z ∈U2. In jeder +Umgebung vonz liegt ein Punkt vonU1 ⇒Widerspruch zuU2 offen. +2) R \{0 }ist nicht zusammenhängend, dennR \{0 }= R<0 ∪R>0 +3) R2 \{0 }ist zusammenhängend. +4) Q ⊊ Rist nicht zusammenhängend, da(Q ∩R< +√ +2) ∪(Q ∩R> +√ +2) = Q +5) {x}ist zusammenhängend für jedesx∈X, wobeiX ein topologischer Raum ist. +6) R mit Zariski-Topologie ist zusammenhängend. Bemerkung 14 -SeiXein topologischer Raum und A⊆Xzusammenhängend. Dann ist auch Azusammenhängend. +Sei X ein topologischer Raum undA⊆X zusammenhängend. Dann ist auchA zusammenhängend. 1.4. ZUSAMMENHANG Beweis: durch Widerspruch -Annahme :A=A1∪A2, Aiabgeschlossen, Ai̸=∅,A1∩A2=∅ -⇒A= (A∩A1) -abgeschlossen˙∪(A∩A2) -abgeschlossen +Annahme: A= A1 ∪A2, Ai abgeschlossen, Ai ̸= ∅, A1 ∩A2 = ∅ +⇒A= ( A∩A1)   +abgeschlossen +˙∪ (A∩A2)   +abgeschlossen +   disjunkt -WäreA∩A1=∅ -⇒A⊆A=A1˙∪A2 -⇒A⊆A2⇒A⊆A2 -⇒A1=∅ -⇒Widerspruch zu A1̸=∅ -⇒A∩A1̸=∅und analog A∩A2̸=∅ -⇒Widerspruch zu Aist zusammenhängend. ■ +Wäre A∩A1 = ∅ +⇒A⊆A= A1 ˙∪A2 +⇒A⊆A2 ⇒A⊆A2 +⇒A1 = ∅ +⇒Widerspruch zuA1 ̸= ∅ +⇒A∩A1 ̸= ∅und analogA∩A2 ̸= ∅ +⇒Widerspruch zuA ist zusammenhängend. ■ Bemerkung 15 -SeiXein topologischer Raum und A,B⊆Xzusammenhängend. -IstA∩B̸=∅, dann istA∪Bzusammenhängend. -Beweis: SeiA∪B=U1˙∪U2,Ui̸=∅offen -o. B. d. A.= = = = = =⇒A= (A∩U1)˙∪(A∩U2)offen -Azhgd.= = = =⇒A∩U1=∅ -A∩B̸=∅= = = =⇒U1⊆B -B= (B∩U1) -=U1∪(B∩U2) -=∅ist unerlaubte Zerlegung. +Sei X ein topologischer Raum undA,B ⊆X zusammenhängend. +Ist A∩B ̸= ∅, dann istA∪B zusammenhängend. +Beweis: Sei A∪B = U1 ˙∪U2,Ui ̸= ∅offen +o. B. d. A.= = = = = =⇒A= (A∩U1) ˙∪(A∩U2) offen +A zhgd. += = = =⇒A∩U1 = ∅ +A∩B̸=∅ += = = =⇒U1 ⊆B +B = (B∩U1)   +=U1 +∪(B∩U2)   +=∅ +ist unerlaubte Zerlegung. ■ Definition 14 -SeiXein topologischer Raum. -Fürx∈XseiZ(x)⊆Xdefiniert durch -Z(x) :=⋃ +Sei X ein topologischer Raum. +Für x∈X sei Z(x) ⊆X definiert durch +Z(x) := +⋃ A⊆Xzhgd. -x∈AA -Z(x)heißtZusammenhangskomponente . +x∈A +A +Z(x) heißtZusammenhangskomponente. Bemerkung 16 (Eigenschaften von Zusammenhangskomponenten) -SeiXein topologischer Raum. Dann gilt: -a)Z(x)ist die größte zusammenhängende Teilmenge von X, diexenthält. -b)Z(x)ist abgeschlossen. -c)Xist disjunkte Vereinigung von Zusammenhangskomponenten. +Sei X ein topologischer Raum. Dann gilt: +a) Z(x) ist die größte zusammenhängende Teilmenge vonX, diex enthält. +b) Z(x) ist abgeschlossen. +c) X ist disjunkte Vereinigung von Zusammenhangskomponenten. Beweis: 1.5. KOMPAKTHEIT -a) SeiZ(x) =A1˙∪A2mitAi̸=∅abgeschlossen. -O. B. d. A. sei x∈A1undy∈A2.yliegt in einer zusammehängenden Teilmenge A, -die auchxenthält.⇒A= (A∩A1) -∋x∪(A∩A2) -∋yist unerlaubte Zerlegung. -b) Nach Bemerkung 14 ist Z(x)zusammenhängend ⇒Z(x)⊆Z(x)⇒Z(x) =Z(x) -c) IstZ(y)∩Z(x)̸=∅Bem. 15= = = = =⇒Z(y)∪Z(x)ist zusammenhängend. -⇒Z(x)∪Z(y)⊆Z(x)⇒Z(y)⊆Z(x) -⊆Z(y)⇒Z(x)⊆Z(y) +a) Sei Z(x) = A1 ˙∪A2 mit Ai ̸= ∅abgeschlossen. +O. B. d. A. seix∈A1 und y∈A2. y liegt in einer zusammehängenden TeilmengeA, +die auchx enthält. ⇒A= (A∩A1)   +∋x +∪(A∩A2)   +∋y +ist unerlaubte Zerlegung. +b) Nach Bemerkung 14 istZ(x) zusammenhängend ⇒Z(x) ⊆Z(x) ⇒Z(x) = Z(x) +c) Ist Z(y) ∩Z(x) ̸= ∅Bem. 15= = = = =⇒Z(y) ∪Z(x) ist zusammenhängend. +⇒Z(x) ∪Z(y) ⊆Z(x) ⇒Z(y) ⊆Z(x) +⊆Z(y) ⇒Z(x) ⊆Z(y) ■ Bemerkung 17 -Seif:X→Ystetig. IstA⊆Xzusammenhängend, so ist f(A)⊆Yzusammenhängend. -Beweis: Seif(A) =U1∪U2,Ui̸=∅,offen, disjunkt. -⇒f−1(f(A)) =f−1(U1)∪f−1(U2) -⇒A= (A∩f−1(U1)) -̸=∅∪(A∩f−1(U2)) -̸=∅■ +Sei f : X →Y stetig. IstA⊆X zusammenhängend, so istf(A) ⊆Y zusammenhängend. +Beweis: Sei f(A) = U1 ∪U2,Ui ̸= ∅, offen, disjunkt. +⇒f−1(f(A)) = f−1(U1) ∪f−1(U2) +⇒A= (A∩f−1(U1))   +̸=∅ +∪(A∩f−1(U2))   +̸=∅ +■ 1.5 Kompaktheit Definition 15 -SeiXeine Menge und U⊆P(X). -Uheißt eine Überdeckung vonX, wenn gilt: -∀x∈X:∃M∈U:x∈M +Sei X eine Menge undU ⊆P(X). +U heißt eineÜberdeckung von X, wenn gilt: +∀x∈X : ∃M ∈U : x∈M Definition 16 -Ein topologischer Raum Xheißtkompakt , wenn jede offene Überdeckung von X -U={Ui}i∈ImitUioffen inX +Ein topologischer RaumX heißtkompakt, wenn jede offene Überdeckung vonX +U = {Ui }i∈I mit Ui offen inX eine endliche Teilüberdeckung ⋃ -i∈J⊆IUi=Xmit|J|∈N +i∈J⊆I +Ui = X mit |J|∈ N besitzt. Bemerkung 18 -Das Einheitsintervall I:= [0,1]ist kompakt bezüglich der euklidischen Topologie. -Beweis: Sei(Ui)i∈Jeine offene Überdeckung von I. -Es genügt zu zeigen, dass es ein δ>0gibt, sodass jedes Teilintervall der Länge δvonIin -einem derUienthalten ist. Wenn es ein solches δgibt, kann man Iin endlich viele Intervalle +Das EinheitsintervallI := [0,1] ist kompakt bezüglich der euklidischen Topologie. +Beweis: Sei (Ui)i∈J eine offene Überdeckung vonI. +Es genügt zu zeigen, dass es einδ >0 gibt, sodass jedes Teilintervall der Längeδ von I in +einem derUi enthalten ist. Wenn es ein solchesδ gibt, kann manI in endlich viele Intervalle 1.5. KOMPAKTHEIT -der Längeδunterteilen und alle Uiin die endliche Überdeckung aufnehmen, die Teilintervalle +der Längeδunterteilen und alleUi in die endliche Überdeckung aufnehmen, die Teilintervalle enthalten. -Angenommen, es gibt kein solches δ. Dann gibt es für jedes n∈Nein Intervall In⊆[0,1] -der Länge 1/nsodassIn⊊Uifür allei∈J. -Seixnder Mittelpunkt von In. Die Folge (xn)hat einen Häufungspunkt x∈[0,1]. Dann -gibt esi∈Jmitx∈Ui. DaUioffen ist, gibt es ein ε >0, sodass (x−ε,x+ε)⊆Ui. -Dann gibt es n0, sodass gilt: 1/n0<ε/2und für unendlich viele3n≥n0:|x−xn|<ε/2, also -In⊆(x−ε,x+ε)⊆Uifür mindestens ein n∈N.4 +Angenommen, es gibt kein solchesδ. Dann gibt es für jedesn∈N ein IntervallIn ⊆[0,1] +der Länge1/n sodass In ⊊ Ui für allei∈J. +Sei xn der Mittelpunkt vonIn. Die Folge(xn) hat einen Häufungspunktx∈[0,1]. Dann +gibt es i ∈J mit x ∈Ui. Da Ui offen ist, gibt es einε >0, sodass (x−ε,x + ε) ⊆Ui. +Dann gibt esn0, sodass gilt:1/n0 <ε/2 und für unendlich viele3 n≥n0 : |x−xn|<ε/2, also +In ⊆(x−ε,x + ε) ⊆Ui für mindestens einn∈N.4 ⇒Widerspruch -Dann überdecke [0,1]mit endlich vielen Intervallen I1,...,Idder Längeδ. JedesIjist in -Uijenthalten. -⇒Uj1,...,Ujdist endliche Teilüberdeckung von U. ■ +Dann überdecke[0,1] mit endlich vielen IntervallenI1,...,I d der Längeδ. JedesIj ist in +Uij enthalten. +⇒Uj1 ,...,U jd ist endliche Teilüberdeckung vonU. ■ Beispiel 16 (Kompakte Räume) -1)Rist nicht kompakt. -2)(0,1)ist nicht kompakt. -Un= (1/n,1−1/n)⇒⋃ -n∈NUn= (0,1) -3)Rmit der Zariski-Topologie ist kompakt und jede Teilmenge von Rist es auch. +1) R ist nicht kompakt. +2) (0,1) ist nicht kompakt. +Un = (1/n,1 −1/n) ⇒⋃ +n∈N Un = (0,1) +3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge vonR ist es auch. Bemerkung 19 -SeiXkompakter Raum, A⊆Xabgeschlossen. Dann ist Akompakt. -Beweis: Sei(Vi)i∈Ioffene Überdeckung von A. -Dann gibt es für jedes i∈Ieine offene Teilmenge Ui⊆XmitVi=Ui∩A. -⇒A⊆⋃ -i∈IUi -⇒U={Ui|i∈I}∪{X\A}ist offene Überdeckung von X -Xkompakt= = = = = = =⇒es gibti1,...,in∈I, sodassn⋃ -j=1Uij∪(X\A) =X -⇒ -n⋃ -j=1Uij∪(X\A) -∩A=A -⇒n⋃ -j=1(Uij∩A) -=Vij∪((X\A)∩A) -=∅=A -⇒Vi1,...,Vinüberdecken A. +Sei X kompakter Raum,A⊆X abgeschlossen. Dann istA kompakt. +Beweis: Sei (Vi)i∈I offene Überdeckung von A. +Dann gibt es für jedesi∈I eine offene TeilmengeUi ⊆X mit Vi = Ui ∩A. +⇒A⊆ +⋃ +i∈I +Ui +⇒U = {Ui |i∈I}∪{ X\A}ist offene Überdeckung vonX +X kompakt += = = = = = =⇒ es gibti1,...,i n ∈I, sodass +n⋃ +j=1 +Uij ∪(X\A) = X +⇒ + + +n⋃ +j=1 +Uij ∪(X\A) + +∩A= A +⇒ +n⋃ +j=1 +(Uij ∩A)   +=Vij +∪((X\A) ∩A)   +=∅ += A +⇒Vi1 ,...,V in überdecken A. ■ Bemerkung 20 -SeienX,Ykompakte topologische Räume. Dann ist X×Ymit der Produkttopologie +Seien X,Y kompakte topologische Räume. Dann istX ×Y mit der Produkttopologie kompakt. -Beweis: Sei(Wi)i∈Ieine offene Überdeckung von X×Y. Für jedes (x,y)∈X×Ygibt es -offene Teilmengen Ux,yvonXundVx,yvonYsowie eini∈I, sodassUx,y×Vx,y⊆Wi. -3Dies gilt nicht für alle n≥n0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. +Beweis: Sei (Wi)i∈I eine offene Überdeckung vonX ×Y. Für jedes(x,y) ∈X ×Y gibt es +offene TeilmengenUx,y von X und Vx,y von Y sowie eini∈I, sodassUx,y ×Vx,y ⊆Wi. +3Dies gilt nicht für allen≥n0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. 4Sogar für unendlich viele. 1.5. KOMPAKTHEIT Wi xy -xVx,y -Ux,yYX +x +Vx,y +Ux,y +Y +X Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen -Die offenen Mengen Ux0,y×Vx0,yfür festesx0und alley∈Yüberdecken{x0}×y. DaY -kompakt ist, ist auch {x0}×Ykompakt. Also gibt es y1,...,ym(x0)mit⋃m(x0) -i=1Ux0,yi× -Vx0,yi⊇{x0}×Y. -SeiUx0:=⋂m(x) -i=1Ux0,yi. DaXkompakt ist, gibt es x1,...,xn∈Xmit⋃n -j=1Uxj=X +Die offenen MengenUx0,y ×Vx0,y für festesx0 und alley∈Y überdecken {x0 }×y. DaY +kompakt ist, ist auch{x0 }×Y kompakt. Also gibt esy1,...,y m(x0) mit ⋃m(x0) +i=1 Ux0,yi × +Vx0,yi ⊇{x0 }×Y. +Sei Ux0 := ⋂m(x) +i=1 Ux0,yi. DaX kompakt ist, gibt esx1,...,x n ∈X mit ⋃n +j=1 Uxj = X ⇒⋃k -j=1⋃m(xj) -i=1( -Uxj,yi×Vxj,yi) - -Ein grün-oranges Kästchen⊇X×Y +j=1 +⋃m(xj) +i=1 +( +Uxj,yi ×Vxj,yi +) +   +Ein grün-oranges Kästchen +⊇X×Y ⇒⋃ -j⋃ -iWi(xj,yi) =X×Y ■ +j +⋃ +iWi(xj,yi) = X×Y ■ Bemerkung 21 -SeiXein Hausdorffraum und K⊆Xkompakt. Dann ist Kabgeschlossen. +Sei X ein Hausdorffraum undK ⊆X kompakt. Dann istK abgeschlossen. Beweis: z. Z.:Komplement ist offen -IstX=K, so istKabgeschlossen in X. Andernfalls sei y∈X\K. Für jedes x∈Kseien -Uxbzw.VyUmgebungen von xbzw. vony, sodassUx∩Vy=∅. +Ist X = K, so istK abgeschlossen inX. Andernfalls seiy∈X\K. Für jedesx∈K seien +Ux bzw. Vy Umgebungen vonx bzw. vony, sodassUx ∩Vy = ∅. Xi -Kx +K +x y -DaKkompakt ist, gibt es endlich viele x1,...,xn∈K, sodass⋃m -i=1Uxi⊇K. -SeiV:=n⋂ -i=1Vxi +Da K kompakt ist, gibt es endlich vielex1,...,x n ∈K, sodass⋃m +i=1 Uxi ⊇K. +Sei V := +n⋂ +i=1 +Vxi 1.6. WEGE UND KNOTEN -⇒V∩(n⋃ -i=1Uxi) -=∅ -⇒V∩K=∅ -⇒Vist Überdeckung von y, die ganz in X\Kenthalten ist . -⇒X\Kist offen -Damit istKabgeschlossen. ■ +⇒V ∩ +(n⋃ +i=1 +Uxi +) += ∅ +⇒V ∩K = ∅ +⇒V ist Überdeckung vony, die ganz inX\K enthalten ist. +⇒X\K ist offen +Damit istK abgeschlossen. ■ Bemerkung 22 -SeienX,Ytopologische Räume, f:X→Ystetig. -IstK⊆Xkompakt, so ist f(K)⊆Ykompakt. -Beweis: Sei(Vi)i∈Ioffene Überdeckung von f(K) -fstetig= = = =⇒(f−1(Vi))i∈Iist offene Überdeckung von K -Kompakt= = = = =⇒es gibti1,...,in, sodassf−1(Vi1),...,f−1(Vin)Überdeckung von Kist. -⇒f(f−1(Vi1)),...,f (f−1(Vin))überdecken f(K). -Es gilt:f(f−1(V)) =V∩f(X) ■ +Seien X,Y topologische Räume,f : X →Y stetig. +Ist K ⊆X kompakt, so istf(K) ⊆Y kompakt. +Beweis: Sei (Vi)i∈I offene Überdeckung vonf(K) +f stetig += = = =⇒(f−1(Vi))i∈I ist offene Überdeckung vonK +Kompakt += = = = =⇒es gibti1,...,i n, sodassf−1(Vi1 ),...,f −1(Vin) Überdeckung vonK ist. +⇒f(f−1(Vi1 )),...,f (f−1(Vin)) überdecken f(K). +Es gilt:f(f−1(V)) = V ∩f(X) ■ Satz 1.1 (Heine-Borel) -Eine Teilmenge von RnoderCnist genau dann kompakt, wenn sie beschränkt und +Eine Teilmenge vonRn oder Cn ist genau dann kompakt, wenn sie beschränkt und abgeschlossen ist. -Beweis: „⇒“: SeiK⊆Rn(oderCn) kompakt. -DaRnundCnhausdorffsch sind, ist Knach Bemerkung 21 abgeschlossen. Nach Voraussetzung - kannKmit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒Kist +Beweis: „⇒“: SeiK ⊆Rn (oder Cn) kompakt. +Da Rn und Cn hausdorffsch sind, istK nach Bemerkung 21 abgeschlossen. Nach Voraussetzung + kannK mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden⇒K ist beschränkt. -„⇐“ SeiA⊆Rn(oderCn) beschränkt und abgeschlossen. -Dann gibt es einen Würfel W= [−N,N ]×···× [−N,N ] -nmalmitA⊆Wbzw. „Polyzylinder“ -Z={(z1,...,zn)∈Cn|zi≤Nfüri= 1,...,n} -Nach Bemerkung 20 und Bemerkung 18 ist Wkompakt, also ist Anach Bemerkung 19 auch -kompakt. Genauso ist Zkompakt, weil -{z∈C∥z|≤1} -homöomorph zu{ -(x,y)∈R2⏐⏐∥(x,y)∥≤1} +„⇐“ SeiA⊆Rn (oder Cn) beschränkt und abgeschlossen. +Dann gibt es einen WürfelW = [−N,N ] ×···× [−N,N ]   +n mal +mit A⊆W bzw. „Polyzylinder“ +Z = {(z1,...,z n) ∈Cn |zi ≤N für i= 1,...,n } +Nach Bemerkung 20 und Bemerkung 18 istW kompakt, also istAnach Bemerkung 19 auch +kompakt. Genauso istZ kompakt, weil +{z∈C ∥z|≤ 1 } +homöomorph zu { +(x,y) ∈R2 ⏐⏐∥(x,y)∥≤ 1 +} ist. ■ 1.6 Wege und Knoten Definition 17 -SeiXein topologischer Raum. +Sei X ein topologischer Raum. 1.6. WEGE UND KNOTEN -a) EinWeginXist eine stetige Abbildung γ: [0,1]→X. -b)γheißtgeschlossen , wennγ(1) =γ(0)gilt. -c)γheißteinfach, wennγ|[0,1)injektiv ist. +a) Ein Wegin X ist eine stetige Abbildungγ : [0,1] →X. +b) γ heißtgeschlossen, wennγ(1) = γ(0) gilt. +c) γ heißteinfach, wennγ|[0,1) injektiv ist. Beispiel 17 -IstXdiskret, so ist jeder Weg konstant, d. h. von der Form -∀x∈[0,1] :γ(x) =c, c∈X -Dennγ([0,1])ist zusammenhängend für jeden Weg γ. +Ist X diskret, so ist jeder Weg konstant, d. h. von der Form +∀x∈[0,1] : γ(x) = c, c ∈X +Denn γ([0,1]) ist zusammenhängend für jeden Wegγ. Definition 18 -Ein topologischer Raum Xheißtwegzusammenhängend , wenn es zu je zwei Punkten -x,y∈Xeinen Wegγ: [0,1]→Xgibt mitγ(0) =xundγ(1) =y. +Ein topologischer RaumX heißtwegzusammenhängend, wenn es zu je zwei Punkten +x,y ∈X einen Wegγ : [0,1] →X gibt mitγ(0) = x und γ(1) = y. Bemerkung 23 -SeiXein topologischer Raum. -a)Xist wegzusammenhängend ⇒Xist zusammenhängend -b)Xist wegzusammenhängend ̸⇐Xist zusammenhängend +Sei X ein topologischer Raum. +a) X ist wegzusammenhängend⇒X ist zusammenhängend +b) X ist wegzusammenhängend̸⇐X ist zusammenhängend Beweis: -a)SeiXein wegzusammenhängender topologischer Raum, A1,A2nichtleere, disjunkte, -abgeschlossene Teilmengen von XmitA1∪A2=X. Seix∈A1,y∈A2,γ: [0,1]→X -ein Weg von xnachy. -Dann istC:=γ([0,1])⊆Xzusammenhängend, weil γstetig ist. -C= (C∩A1) -∋x∪(C∩A2) +a) Sei X ein wegzusammenhängender topologischer Raum,A1,A2 nichtleere, disjunkte, +abgeschlossene Teilmengen vonX mit A1 ∪A2 = X. Seix∈A1,y ∈A2,γ : [0,1] →X +ein Weg vonx nach y. +Dann istC := γ([0,1]) ⊆X zusammenhängend, weilγ stetig ist. +C = (C∩A1)   +∋x +∪(C∩A2)   ∋y -ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒Widerspruch -b) SeiX={ -(x,y)∈R2⏐⏐⏐x2+y2= 1∨y= 1 + 2·e−1 -10x} +ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen⇒Widerspruch +b) Sei X = +{ +(x,y) ∈R2 +⏐⏐⏐x2 + y2 = 1 ∨y= 1 + 2·e−1 +10 x +} . Abbildung 1.8a veranschaulicht diesen Raum. -SeiU1∪U2=X,U 1̸=U2=∅,Uioffen.X=C∪S. Dann istC⊆U1oderC⊆U2, -weilCundSzusammenhängend sind. -Also istC=U1undS=U2(oder umgekehrt). -Seiy∈C=U1,ε> 0undBε(y)⊆U1eine Umgebung von y, die inU1enthalten ist. -Aber: Bε(y)∩S̸=∅⇒Widerspruch⇒X∪Sist zusammenhängend, aber nicht +Sei U1 ∪U2 = X,U1 ̸= U2 = ∅,Ui offen. X = C∪S. Dann istC ⊆U1 oder C ⊆U2, +weil C und S zusammenhängend sind. +Also istC = U1 und S = U2 (oder umgekehrt). +Sei y∈C = U1,ε> 0 und Bε(y) ⊆U1 eine Umgebung vony, die inU1 enthalten ist. +Aber: Bε(y) ∩S ̸= ∅⇒ Widerspruch ⇒X ∪S ist zusammenhängend, aber nicht wegzusammenhängend. ■ Beispiel 18 (Hilbert-Kurve) -Es gibt stetige, surjektive Abbildungen [0,1]→[0,1]×[0,1]. Ein Beispiel ist die in Abbildung +Es gibt stetige, surjektive Abbildungen[0,1] →[0,1] ×[0,1]. Ein Beispiel ist die in Abbildung 1.9 dargestellte Hilbert-Kurve. Definition 19 -SeiXein topologischer Raum. Eine Jordankurve inXist ein Homöomorphismus γ: -[0,1]→C⊆Xbzw.γ:S1→C⊆X, wobeiC:= Bildγ. +Sei X ein topologischer Raum. EineJordankurve in X ist ein Homöomorphismus γ : +[0,1] →C ⊆X bzw. γ : S1 →C ⊆X, wobeiC := Bild γ. 1.6. WEGE UND KNOTEN -(a) Spirale Smit KreisC0.1 1 -−101 -XY{(x,sin(1 -x))∈X×Y} -(−1,1)⊆Y +(a) SpiraleS mit KreisC +0.1 1 +−1 +0 +1 +X +Y +{(x,sin( 1 +x)) ∈X ×Y} +(−1,1) ⊆Y (b) Sinus -Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend +Abbildung 1.8:Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend sind. -(a)n= 1 (b)n= 2 (c)n= 3 (d)n= 4 (e)n= 5 +(a) n= 1 (b) n= 2 (c) n= 3 (d) n= 4 (e) n= 5 Abbildung 1.9: Hilbert-Kurve Jede Jordankurve ist also ein einfacher Weg. Satz 1.2 (Jordanscher Kurvensatz) -IstC=γ([0,1])eine geschlossene Jordankurve in R2, so hat R2\Cgenau zwei +Ist C = γ([0,1]) eine geschlossene Jordankurve in R2, so hat R2 \C genau zwei Zusammenhangskomponenten, von denen eine beschränkt ist und eine unbeschränkt. außen innen Jordankurve -Abbildung 1.10: Die unbeschränkte Zusammenhangskomponente wird häufig inneres, die beschränkte +Abbildung 1.10:Die unbeschränkte Zusammenhangskomponente wird häufig inneres, die beschränkte äußeres genannt. Beweis: ist technisch mühsam und wird hier nicht geführt. Er kann in „Algebraische Topologie: Eine Einführung“ von R. Stöcker und H. Zieschang auf S. 301f (ISBN 978-3519122265) nachgelesen werden. -Idee: Ersetze Weg Cdurch Polygonzug. +Idee: Ersetze WegC durch Polygonzug. 1.6. WEGE UND KNOTEN Definition 20 -Eine geschlossene Jordankurve in R3heißtKnoten. +Eine geschlossene Jordankurve inR3 heißtKnoten. Beispiel 19 (Knoten) (a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten - (d)62-Knoten + (d) 62-Knoten Abbildung 1.11: Beispiele für verschiedene Knoten Definition 21 -Zwei Knoten γ1,γ2:S1→R3heißenäquivalent , wenn es eine stetige Abbildung -H:S1×[0,1]→R3 +Zwei Knotenγ1,γ2 : S1 →R3 heißenäquivalent, wenn es eine stetige Abbildung +H : S1 ×[0,1] →R3 gibt mit -H(z,0) =γ1(z)∀z∈S1 -H(z,1) =γ2(z)∀z∈S1 -und für jedes feste t∈[0,1]ist -Hz:S1→R3,z↦→H(z,t) -ein Knoten. Die Abbildung HheißtIsotopie zwischenγ1undγ2. +H(z,0) = γ1(z) ∀z∈S1 +H(z,1) = γ2(z) ∀z∈S1 +und für jedes festet∈[0,1] ist +Hz : S1 →R3,z ↦→H(z,t) +ein Knoten. Die AbbildungH heißtIsotopie zwischen γ1 und γ2. Definition 22 -Seiγ: [0,1]→R3ein Knoten, Eeine Ebene und π:R3→Eeine Projektion auf E. -πheißtKnotendiagramm vonγ, wenn gilt: -⏐⏐π−1(x)⏐⏐≤2∀x∈π(γ) -Ist(π|γ([0,1]))−1(x) ={y1,y2}, soliegty1übery2, wenn gilt: -∃λ>1 : (y1−x) =λ(y2−x) +Sei γ : [0,1] →R3 ein Knoten,E eine Ebene undπ: R3 →E eine Projektion aufE. +π heißtKnotendiagramm von γ, wenn gilt: +⏐⏐π−1(x) +⏐⏐≤2 ∀x∈π(γ) +Ist (π|γ([0,1]))−1(x) = {y1,y2 }, soliegt y1 über y2, wenn gilt: +∃λ> 1 : (y1 −x) = λ(y2 −x) Satz 1.3 (Satz von Reidemeister) Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können. 1.6. WEGE UND KNOTEN -(a)Ω1 - (b)Ω2 -(c)Ω3 +(a) Ω1 + (b) Ω2 +(c) Ω3 Abbildung 1.12: Reidemeister-Züge Beweis: Durch sorgfältige Fallunterscheidung.5 Definition 23 -Ein Knotendiagramm heißt 3-färbbar , wenn jeder Bogen von Dso mit einer Farbe gefärbt +Ein Knotendiagramm heißt3-färbbar, wenn jeder Bogen vonD so mit einer Farbe gefärbt werden kann, dass an jeder Kreuzung eine oder 3 Farben auftreten und alle 3 Farben auftreten. Abbildung 1.13: Ein 3-gefärber Kleeblattknoten @@ -704,26 +817,26 @@ Abbildung 1.13: Ein 3-gefärber Kleeblattknoten 1.6. WEGE UND KNOTEN Übungsaufgaben Aufgabe 1 (Sierpińskiraum) -Es seiX:={0,1}undTX:={∅,{0},X}. Dies ist der sogenannte Sierpińskiraum. -(a) Beweisen Sie, dass (X,TX)ein topologischer Raum ist. -(b) Ist (X,TX)hausdorffsch? -(c) Ist TXvon einer Metrik erzeugt? +Es seiX := {0,1 }und TX := {∅,{0 },X }. Dies ist der sogenannte Sierpińskiraum. +(a) Beweisen Sie, dass(X,TX) ein topologischer Raum ist. +(b) Ist (X,TX) hausdorffsch? +(c) Ist TX von einer Metrik erzeugt? Aufgabe 2 -Es seiZmit der von den Mengen Ua,b:=a+bZ(a∈Z,b∈Z\{0})erzeugten Topologie +Es seiZ mit der von den MengenUa,b := a+ bZ(a∈Z,b ∈Z \{0 }) erzeugten Topologie versehen. Zeigen Sie: -(a) JedesUa,bund jede einelementige Teilmenge von Zist abgeschlossen. -(b){−1,1}ist nicht offen. +(a) Jedes Ua,b und jede einelementige Teilmenge vonZ ist abgeschlossen. +(b) {−1,1 }ist nicht offen. (c) Es gibt unendlich viele Primzahlen. Aufgabe 3 (Cantorsches Diskontinuum) -Für jedesi∈NseiPi:={0,1}mit der diskreten Topologie. Weiter Sei P:=∏ -i∈NPi. -(a) Wie sehen die offenen Mengen von Paus? -(b) Was können Sie über den Zusammenhang von Psagen? +Für jedesi∈N sei Pi := {0,1 }mit der diskreten Topologie. Weiter SeiP := ∏ +i∈N Pi. +(a) Wie sehen die offenen Mengen vonP aus? +(b) Was können Sie über den Zusammenhang vonP sagen? Aufgabe 4 (Kompaktheit) -(a) Ist GLn(R) ={A∈Rn×n|det(A)̸= 0}kompakt? -(b) Ist SLn(R) ={A∈Rn×n|det(A) = 1}kompakt? -(c) IstP(R)kompakt? +(a) Ist GLn(R) = {A∈Rn×n |det(A) ̸= 0 }kompakt? +(b) Ist SLn(R) = {A∈Rn×n |det(A) = 1 }kompakt? +(c) Ist P(R) kompakt? Aufgabe 5 (Begriffe) Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“. Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist, @@ -738,1345 +851,1596 @@ Definieren Sie die Begriffe „Isomorphismus“, „Isotopie“ und „Isometrie Simplizialkomplexe 2.1 Topologische Mannigfaltigkeiten Definition 24 -Sei(X,T)ein topologischer Raum und n∈N. -a)Einen-dimensionale KarteaufXist ein Paar (U,ϕ), wobeiU∈Tundϕ:U→V -Homöomorphismus von Uauf eine offene Teilmenge V⊆Rn. -b)Einn-dimensionaler AtlasAaufXist eine Familie (Ui,ϕi)i∈Ivon Karten auf X, -sodass⋃ -i∈IUi=X. -c)Xheißt (topologische) n-dimensionale Mannigfaltigkeit , wennXhausdorffsch ist, -eine abzählbare Basis der Topologie hat und einen n-dimensionalen Atlas besitzt. -Anschaulich ist also ein n-dimensionale Mannigfaltigkeit lokal dem Rnähnlich. +Sei (X,T) ein topologischer Raum undn∈N. +a) Eine n-dimensionale Karte auf X ist ein Paar(U,ϕ), wobeiU ∈T und ϕ: U →V +Homöomorphismus vonU auf eine offene TeilmengeV ⊆Rn. +b) Ein n-dimensionaler Atlas Aauf X ist eine Familie(Ui,ϕi)i∈I von Karten aufX, +sodass ⋃ +i∈I Ui = X. +c) X heißt (topologische)n-dimensionale Mannigfaltigkeit, wennX hausdorffsch ist, +eine abzählbare Basis der Topologie hat und einenn-dimensionalen Atlas besitzt. +Anschaulich ist also einn-dimensionale Mannigfaltigkeit lokal demRn ähnlich. Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten) -Jeden-dimensionale Mannigfaltigkeit mit n≥1ist mindestens so mächtig wie R. -Beweis: Sei(X,T)ein topologischer Raum und (U,ϕ)mitU∈Tundϕ:U→V⊆Rn, wobei -Voffen undϕein Homöomorphismus ist, eine Karte auf X. -Da jede offene Teilmenge des Rngenauso mächtig ist wie der Rn,ϕals Homöomorphismus +Jede n-dimensionale Mannigfaltigkeit mitn≥1 ist mindestens so mächtig wieR. +Beweis: Sei (X,T) ein topologischer Raum und(U,ϕ) mit U ∈T und ϕ: U →V ⊆Rn, wobei +V offen undϕ ein Homöomorphismus ist, eine Karte aufX. +Da jede offene Teilmenge desRn genauso mächtig ist wie derRn, ϕ als Homöomorphismus insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig -sind, istUgenauso mächtig wie der Rn. Da jede Mannigfaltigkeit mindestens eine Karte -hat, muss jede Mannigfaltigkeit Xmindestens so mächtig sein wie der Rn.■ -Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können +sind, istU genauso mächtig wie derRn. Da jede Mannigfaltigkeit mindestens eine Karte +hat, muss jede MannigfaltigkeitX mindestens so mächtig sein wie derRn. ■ +Hinweis: Es gibt auch noch0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können beliebig viele Elemente haben. Bemerkung 25 -a) Es gibt surjektive, stetige Abbildungen [0,1]→[0,1]×[0,1] -b)Fürn̸=msindRnundRmnicht homöomorph. Zum Beweis benutzt man den „Satz +a) Es gibt surjektive, stetige Abbildungen[0,1] →[0,1] ×[0,1] +b) Für n̸= m sind Rn und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz von der Gebietstreue“ (Brouwer): -IstU⊆Rnoffen undf:U→Rnstetig und injektiv, so ist f(U)offen. -Istn0}→B1(0,..., 0 -∈Rn) -Ci:={(x1,...,xn+1)∈Sn|xi<0}→B1(0,..., 0) -(x1,...,xn+1)↦→(x1,..., xi,...,xn+1)1 -(x1,...,xn)↦→(x1,...,xi−1,√ -1−∑n -k=1x2 -k,xi,...,xn), oder−√ -1−∑n -k=1x2 -kfürCi -Sn=⋃n+1 -i=1(Ci∪Di) -Als kompakte Mannigfaltigkeit wird Snauch „geschlossene Mannigfaltigkeit“ genannt. -5)[0,1]ist keine Mannigfaltigkeit, denn: -Es gibt keine Umgebung von 0in[0,1], die homöomorph zu einem offenem Intervall +Di := {(x1,...,x n+1) ∈Sn|xi >0}→ B1(0,..., 0   +∈Rn +) +Ci := {(x1,...,x n+1) ∈Sn|xi <0}→ B1(0,..., 0) +(x1,...,x n+1) ↦→(x1,..., xi,...,x n+1)1 +(x1,...,x n) ↦→(x1,...,x i−1, +√ +1 −∑n +k=1 x2 +k,xi,...,x n), oder− +√ +1 −∑n +k=1 x2 +k für Ci +Sn = ⋃n+1 +i=1 (Ci ∪Di) +Als kompakte Mannigfaltigkeit wirdSn auch „geschlossene Mannigfaltigkeit“ genannt. +5) [0,1] ist keine Mannigfaltigkeit, denn: +Es gibt keine Umgebung von0 in [0,1], die homöomorph zu einem offenem Intervall ist. -1xiwird rausgenommen +1xi wird rausgenommen 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN -6)V1={ -(x,y)∈R2⏐⏐x·y= 0} +6) V1 = +{ +(x,y) ∈R2 ⏐⏐x·y= 0 +} ist keine Mannigfaltigkeit. Das Problem ist (0,0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4 -Zusammenhangskomponenten. Jeder Rnzerfällt jedoch in höchstens zwei Zusammenhangskomponenten, +Zusammenhangskomponenten. JederRn zerfällt jedoch in höchstens zwei Zusammenhangskomponenten, wenn man einen Punkt entfernt. -7)V2={ -(x,y)∈R2⏐⏐x3=y2} +7) V2 = +{ +(x,y) ∈R2 ⏐⏐x3 = y2 } ist eine Mannigfaltigkeit. -8)X= (R\{0})∪(01,02) -U⊆Xoffen⇔{ -Uoffen in R\{0},falls01/∈U,02∈U -∃ε>0 : (−ε,ε)⊆Ufalls01∈U,02∈U -Insbesondere sind (R\{0})∪{01}und(R\{0})∪{02}offen und homöomorph -zuR. -Aber:Xist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 01 -und02. -9)GLn(R)ist eine Mannigfaltigkeit der Dimension n2, weil offene Teilmengen von Rn2 +8) X = (R \{0 }) ∪(01,02) +U ⊆X offen ⇔ +{ +U offen inR \{0 }, falls 01 /∈U,02 ∈U +∃ε> 0 : (−ε,ε) ⊆U falls 01 ∈U,02 ∈U +Insbesondere sind(R \{0 }) ∪{01 }und (R \{0 }) ∪{02 }offen und homöomorph +zu R. +Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von01 +und 02. +9) GLn(R) ist eine Mannigfaltigkeit der Dimensionn2, weil offene Teilmengen vonRn2 eine Mannigfaltigkeit bilden. Definition 25 -SeienX,Y n-dimensionale Mannigfaltigkeiten, U⊆XundV⊆Yoffen, Φ :U→Vein Homöomorphismus - Z= (X˙∪Y)/∼mit der von u∼Φ(u)∀u∈Uerzeugten Äquivalenzrelation +Seien X,Y n-dimensionale Mannigfaltigkeiten,U ⊆X und V ⊆Y offen, Φ : U →V ein Homöomorphismus + Z = (X ˙∪Y)/∼mit der vonu∼Φ(u) ∀u∈U erzeugten Äquivalenzrelation und der von∼induzierten Quotiententopologie. -ZheißtVerklebung vonXundYlängsUundV.ZbesitzteinenAtlasaus n-dimensionalen -Karten. Falls Zhausdorffsch ist, ist Zeinen-dimensionale Mannigfaltigkeit. +ZheißtVerklebungvonXundY längsU undV.Zbesitzt einen Atlas ausn-dimensionalen +Karten. FallsZ hausdorffsch ist, istZ eine n-dimensionale Mannigfaltigkeit. Bemerkung 26 -SindX,YMannigfaltigkeiten der Dimension nbzw.m, so istX×Yeine Mannigfaltigkeit -der Dimension n+m. +Sind X,Y Mannigfaltigkeiten der Dimensionn bzw. m, so istX×Y eine Mannigfaltigkeit +der Dimensionn+ m. Beweis: Produkte von Karten sind Karten. ■ Beispiel 21 Mannigfaltigkeiten mit Dimension 1: -1) Offene Intervalle, R,(0,1)sind alle homöomorph -2)S1 +1) Offene Intervalle,R, (0,1) sind alle homöomorph +2) S1 Mannigfaltigkeiten mit Dimension 2: -1)R2 -2)S2(0 Henkel) -3)T2(1 Henkel) +1) R2 +2) S2 (0 Henkel) +3) T2 (1 Henkel) 4) oder mehr Henkel, wie z.B. der Zweifachtorus in Abbildung 2.1 Bemerkung 27 -Sein∈N,F:Rn→Rstetig differenzierbar und X=V(F) :={x∈Rn|F(x) = 0}das +Sei n∈N,F : Rn →R stetig differenzierbar undX = V(F) := {x∈Rn |F(x) = 0 }das „vanishing set“. Dann gilt: 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus. -a)Xist abgeschlossen in Rn -b) Ist grad(F)(X)̸= 0∀x∈X, so istXeine Mannigfaltigkeit der Dimension n−1. +a) X ist abgeschlossen inRn +b) Ist grad(F)(X) ̸= 0 ∀x∈X, so istX eine Mannigfaltigkeit der Dimensionn−1. Beweis: -a)Seiy∈Rn\V(F). WeilFstetig ist, gibt es δ>0, sodassF(Bδ(y))⊆Bε(F(y))mit -ε=1 -2∥F(y)∥. Folgt Bδ(y)∩V(F) =∅⇒Rn\V(F)ist offen. -b)Seix∈Xmitgrad(F)(x)̸= 0, also o. B. d. A.∂F -∂X1(x)̸= 0,x= (x1,...,xn), -x′:= (x2,...,xn)∈Rn−1. Der Satz von der impliziten Funktion liefert nun: Es -gibt Umgebungen Uvonx′und differenzierbare Funktionen g:U→R, sodass -G:U→Rn, u↦→(g(u),u)eine stetige Abbildung auf eine offene Umgebung Vvonx -inXist. +a) Sei y∈Rn \V(F). WeilF stetig ist, gibt esδ >0, sodassF(Bδ(y)) ⊆Bε(F(y)) mit +ε= 1 +2 ∥F(y)∥. FolgtBδ(y) ∩V(F) = ∅⇒ Rn \V(F) ist offen. +b) Sei x ∈ X mit grad(F)(x) ̸= 0 , also o. B. d. A. ∂F +∂X1 +(x) ̸= 0 , x = ( x1,...,x n), +x′ := ( x2,...,x n) ∈Rn−1. Der Satz von der impliziten Funktion liefert nun: Es +gibt Umgebungen U von x′ und differenzierbare Funktionen g : U → R, sodass +G: U →Rn, u↦→(g(u),u) eine stetige Abbildung auf eine offene UmgebungV von x +in X ist. ■ Beispiel 22 -1)F:R3→R,(x,y,z )↦→x2+y2+z2−1,V(F) =S2,grad(F) = (2x,2y,2z)Bem. 27.b= = = = = =⇒ -Snistn-dimensionale Mannigfaltigkeit in Rn+1 -2)F:R2→R,(x,y)↦→y2−x3Es gilt: grad(F) = (−3x2,2y). Also: grad(0,0) = (0,0). -−5−4−3−2−1012345−4 +1) F : R3 →R, (x,y,z ) ↦→x2 +y2 +z2 −1, V(F) = S2, grad(F) = (2x,2y,2z) Bem. 27.b= = = = = =⇒ +Sn ist n-dimensionale Mannigfaltigkeit inRn+1 +2) F : R2 →R, (x,y) ↦→y2 −x3 Es gilt:grad(F) = (−3x2,2y). Also:grad(0,0) = (0,0). +−5−4−3−2−1012345 +−4 −2 0 2 -4−1000100 -xyz -−1000100f(x,y) -(a)F(x,y) =y2−x32 4 6 8 10 12 -−10−5510 -xy -a=1 +4 +−100 +0 +100 +x +y +z +−100 +0 +100 +f(x,y) +(a) F(x,y) = y2 −x3 +2 4 6 8 10 12 +−10 +−5 +5 +10 +x +y +a= 1 3 a= 1 a= 2 -(b)y2−ax3= 0 -Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a. -DaheristBemerkung 27.bnichtanwendbar,aber V(F)isttrotzdemeine1-dimensionale +(b) y2 −ax3 = 0 +Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parametera. +Daherist Bemerkung 27.bnicht anwendbar, aberV(F) isttrotzdemeine 1-dimensionale topologische Mannigfaltigkeit. 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN Definition 26 -SeiXein Hausdorffraum mit abzählbarer Basis der Topologie. Xheißtn-dimensionale -Mannigfaltigkeit mit Rand , wenn es einen Atlas (Ui,ϕi)gibt, wobei Ui⊆Xioffen und -ϕiein Homöomorphismus auf eine offene Teilmenge von +Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie.X heißtn-dimensionale +Mannigfaltigkeit mit Rand, wenn es einen Atlas(Ui,ϕi) gibt, wobeiUi ⊆Xi offen und +ϕi ein Homöomorphismus auf eine offene Teilmenge von Rn -+,0:={(x1,...,xn)∈Rn|xn≥0} ++,0 := {(x1,...,x n) ∈Rn |xn ≥0 } ist. Rn -+,0ist ein „Halbraum“. ++,0 ist ein „Halbraum“. Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten. ∼= (a) Halbraum +∼ += +(b) Pair of pants ∼= -(b) Pair of pants∼= (c) Sphäre mit einem Loch Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand Definition 27 -SeiXeinen-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt -∂X:=⋃ -(U,ϕ)∈A{x∈U|ϕ(x) = 0} -RandvonX. -∂Xist eine Mannigfaltigkeit der Dimension n−1. +Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und AtlasA. Dann heißt +∂X := +⋃ +(U,ϕ)∈A +{x∈U |ϕ(x) = 0 } +Rand von X. +∂X ist eine Mannigfaltigkeit der Dimensionn−1. Definition 28 -SeiXeinen-dimensionale Mannigfaltigkeit mit Atlas (Ui,ϕi)i∈I -Füri,j∈ImitUi∩Uj̸=∅heißt -ϕij:=ϕj◦ϕ−1 +Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas(Ui,ϕi)i∈I +Für i,j ∈I mit Ui ∩Uj ̸= ∅heißt +ϕij := ϕj ◦ϕ−1 i -ϕi(Ui∩Uj)→ϕj(Ui∩Uj) -Kartenwechsel oderÜbergangsfunktion . +ϕi(Ui ∩Uj) →ϕj(Ui ∩Uj) +Kartenwechseloder Übergangsfunktion. 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN -RnRnUiUj -Vi VjX +Rn Rn +Ui Uj +Vi Vj +X ϕi ϕj Abbildung 2.4: Kartenwechsel 2.2 Differenzierbare Mannigfaltigkeiten Definition 29 -SeiXeinen-dimensionale Mannigfaltigkeit mit Atlas (Ui,ϕi)i∈I. -a)Xheißtdifferenzierbare Mannigfaltigkeit der Klasse Ck, wenn jede Kartenwechselabbildung +Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas(Ui,ϕi)i∈I. +a) X heißtdifferenzierbare Mannigfaltigkeit der KlasseCk, wenn jede Kartenwechselabbildung ϕij, i,j∈I k-mal stetig differenzierbar ist. -b)Xheißtdifferenzierbare Mannigfaltigkeit , wennXeine differenzierbare Mannigfaltigkeit - der Klasse C∞ist. -Differenzierbare Mannigfaltigkeiten der Klasse C∞werden auch glattgenannt. +b) X heißtdifferenzierbare Mannigfaltigkeit, wennX eine differenzierbare Mannigfaltigkeit + der KlasseC∞ist. +Differenzierbare Mannigfaltigkeiten der KlasseC∞werden auchglatt genannt. Definition 30 -SeiXeine differenzierbare Mannigfaltigkeit der Klasse Ck(k∈N∪{∞}) mit Atlas +Sei X eine differenzierbare Mannigfaltigkeit der KlasseCk (k ∈N ∪{∞} ) mit Atlas A= (Ui,ϕi)i∈I. -a)Eine Karte (U,ϕ)aufXheißtverträglich mitA, wenn alle Kartenwechsel ϕ◦ϕ−1 +a) Eine Karte(U,ϕ) auf X heißtverträglichmit A, wenn alle Kartenwechselϕ◦ϕ−1 i -undϕi◦ϕ−1(i∈ImitUi∩U̸=∅) differenzierbar von Klasse Cksind. -b)Die Menge aller mit Averträglichen Karten auf Xbildet einen maximalen Atlas der -KlasseCk. Er heißtCk-Struktur aufX. -EineC∞-Struktur heißt auch differenzierbare Struktur aufX. +und ϕi ◦ϕ−1 (i∈I mit Ui ∩U ̸= ∅) differenzierbar von KlasseCk sind. +b) Die Menge aller mitAverträglichen Karten aufX bildet einen maximalen Atlas der +Klasse Ck. Er heißtCk-Struktur auf X. +Eine C∞-Struktur heißt auchdifferenzierbare Strukturauf X. Bemerkung 28 -Fürn≥4gibt es aufSnmehrere verschiedene differenzierbare Strukturen, die sogenannten +Für n≥4 gibt es aufSn mehrere verschiedene differenzierbare Strukturen, die sogenannten „exotische Sphären“. Definition 31 -SeienX,Ydifferenzierbare Mannigfaltigkeiten der Dimension nbzw.m,x∈X. -a)Eine stetige Abbildung f:X→Yheißtdifferenzierbar inx(von Klasse Ck), wenn -es Karten (U,ϕ)vonXmitx∈Uund (V,ψ)vonYmitf(U)⊆Vgibt, sodass -ψ◦f◦ϕ−1stetig differenzierbar von Klasse Ckinϕ(x)ist. -b)fheißtdifferenzierbar (von Klasse Ck), wennfin jedemx∈Xdifferenzierbar ist. -c)fheißtDiffeomorphismus , wennfdifferenzierbar von Klasse C∞ist und es eine -differenzierbare Abbildung g:Y→Xvon Klasse C∞gibt mitg◦f=idXund -f◦g=idY. +Seien X,Y differenzierbare Mannigfaltigkeiten der Dimensionn bzw. m, x∈X. +a) Eine stetige Abbildungf : X →Y heißtdifferenzierbar in x(von KlasseCk), wenn +es Karten (U,ϕ) von X mit x ∈U und (V,ψ) von Y mit f(U) ⊆V gibt, sodass +ψ◦f ◦ϕ−1 stetig differenzierbar von KlasseCk in ϕ(x) ist. +b) f heißtdifferenzierbar (von KlasseCk), wennf in jedemx∈X differenzierbar ist. +c) f heißtDiffeomorphismus, wennf differenzierbar von KlasseC∞ ist und es eine +differenzierbare Abbildung g : Y →X von Klasse C∞ gibt mit g◦f = idX und +f ◦g= idY. 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN Bemerkung 29 Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab. -Beweis: Seien (U′,ϕ′)und(V′,ψ′)Karten von Xbzw.Yumxbzw.f(x)mitf(U′)⊆V′. -⇒ψ′◦f◦(ϕ′)−1 -=ψ′◦(ψ−1◦ψ)◦f◦(ϕ−1◦ϕ)◦(ϕ′)−1 -ist genau dann differenzierbar, wenn ψ◦f◦ϕ−1differenzierbar ist. +Beweis: Seien (U′,ϕ′) und (V′,ψ′) Karten vonX bzw. Y um x bzw. f(x) mit f(U′) ⊆V′. +⇒ψ′◦f ◦(ϕ′)−1 += ψ′◦(ψ−1 ◦ψ) ◦f ◦(ϕ−1 ◦ϕ) ◦(ϕ′)−1 +ist genau dann differenzierbar, wennψ◦f ◦ϕ−1 differenzierbar ist. Beispiel 23 -f:R→R, x↦→x3ist kein Diffeomorphismus, aber Homöomorphismus, da mit g(x) :=3√x -gilt:f◦g=idR, g◦f=idR +f : R →R, x ↦→x3 ist kein Diffeomorphismus, aber Homöomorphismus, da mitg(x) := 3√x +gilt: f ◦g= idR, g ◦f = idR Bemerkung 30 -SeiXeine glatte Mannigfaltigkeit. Dann ist -Diffeo(X) :={f:X→X|fist Diffeomorphismus } -eine Untergruppe von Homöo (X). +Sei X eine glatte Mannigfaltigkeit. Dann ist +Diffeo(X) := {f : X →X |f ist Diffeomorphismus} +eine Untergruppe von Homöo(X). Definition 32 -S⊆R3heißtreguläre Fläche :⇔∀s∈S∃Umgebung V(s)⊆R3∃U⊆R2offen: -∃differenzierbare Abbildung F:U→V∩S: Rg(JF(u)) = 2∀u∈U. -Fheißt (lokale) reguläre Parametrisierung vonS. +S ⊆ R3 heißtreguläre Fläche :⇔∀s ∈ S ∃Umgebung V(s) ⊆ R3 ∃U ⊆ R2 offen: +∃differenzierbare AbbildungF : U →V ∩S: Rg(JF(u)) = 2 ∀u∈U. +F heißt (lokale)reguläre Parametrisierungvon S. F(u,v) = (x(u,v),y(u,v),z(u,v)) -JF(u,v) = -∂x -∂u(p)∂x +JF(u,v) = + + +∂x +∂u(p) ∂x ∂v(p) ∂y -∂u(p)∂y +∂u(p) ∂y ∂v(p) ∂z -∂u(p)∂z -∂v(p) +∂u(p) ∂z +∂v(p) +  Beispiel 24 -1) Rotationsflächen: Sei r:R→R>0eine differenzierbare Funktion. -F:R2→R3(u,v)↦→(r(u) cos(u),r(v) sin(u),v) -JF(u,v) = -−r(v) sinu r′(v) cosu -r(v) cosu r′(v) sinu -0 1 +1) Rotationsflächen: Seir: R →R>0 eine differenzierbare Funktion. +F : R2 →R3 (u,v) ↦→(r(u) cos(u),r(v) sin(u),v) +JF(u,v) = + + +−r(v) sinu r′(v) cosu +r(v) cosu r ′(v) sinu +0 1 +  -hat Rang 2 für alle (u,v)∈R2. -2) Kugelkoordinaten: F:R2→R3, -(u,v)↦→(Rcosvcosu,Rcosvsinu,Rsinv) -Es gilt:F(u,v)∈S2 +hat Rang 2 für alle(u,v) ∈R2. +2) Kugelkoordinaten: F : R2 →R3, +(u,v) ↦→(Rcos vcos u,R cos vsin u,R sin v) +Es gilt:F(u,v) ∈S2 R, denn -R2cos2(v) cos2(u) +R2cos2(v) sin2(u) +R2sin2(v) +R2 cos2(v) cos2(u) + R2 cos2(v) sin2(u) + R2 sin2(v) =R2(cos2(v) cos2(u) + cos2(v) sin2(u) + sin2(v)) -=R2( -cos2(v)(cos2(u) + sin2(u)) + sin2(v)) -=R2( -cos2(v) + sin2(v)) +=R2 ( +cos2(v)(cos2(u) + sin2(u)) + sin2(v) +) +=R2 ( +cos2(v) + sin2(v) +) =R2 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN N -Svu -(a) Kugelkoordinaten−1 -0 +S +vu +(a) Kugelkoordinaten +−1 0 +1 +2−2 −1 0 1 2 +0.6 +0.8 1 -2−2−10120.60.81 (b) Rotationskörper -π -2π 3π -22π -−1−0.50.51 -xy -sinx -cosx +π2 π 3π2 2π +−1 +−0.5 +0.5 +1 +x +y +sinxcosx (c) Sinus und Kosinus haben keine gemeinsame Nullstelle 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN Die Jacobi-Matrix -JF(u,v) = -−Rcosvsinu−Rsinvcosu -Rcosvcosu−Rsinvsinu -0 Rcosv +JF(u,v) = + + +−Rcos vsin u −Rsin vcos u +Rcos vcos u −Rsin vsin u +0 Rcos v +  -hat Rang 2 für cosv̸= 0. InNundSistcosv= 0. +hat Rang 2 fürcos v̸= 0. InN und S ist cos v= 0. Bemerkung 31 -Jede reguläre Fläche S⊆R3ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit. +Jede reguläre FlächeS ⊆R3 ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit. Beweis: -S⊆R3ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von -regulären Flächen folgt direkt, dass Karten (Ui,Fi)und(Uj⊆R2,Fj:R2→R3)vonSmit -Ui∩Uj̸=∅existieren, wobei FiundFjnach Definition differenzierbare Abbildungen sind. -z.Z.:F−1 -j◦Fiist ein Diffeomorphismus. -Ui UjS +S ⊆R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von +regulären Flächen folgt direkt, dass Karten(Ui,Fi) und (Uj ⊆R2,Fj : R2 →R3) von S mit +Ui ∩Uj ̸= ∅existieren, wobeiFi und Fj nach Definition differenzierbare Abbildungen sind. +z.Z.: F−1 +j ◦Fi ist ein Diffeomorphismus. +Ui Uj +S s Fi Fj F−1 -j◦Fi -Abbildung 2.5: Reguläre Fläche Szum Beweis von Bemerkung 31 -Idee:Finde differenzierbare Funktion˜F−1 -jin Umgebung Wvons, sodass˜F−1 -j|S∩W=F−1 -j. -Ausführung: Seiu0∈Ui,v0∈UjmitFi(u0) =s=Fj(v0). -DaRg(JFj(v0)) = 2ist, ist o. B. d. A. -det(∂x -∂u∂x +j ◦Fi +Abbildung 2.5: Reguläre FlächeS zum Beweis von Bemerkung 31 +Idee: Finde differenzierbare Funktion˜F−1 +j in UmgebungW von s, sodass ˜F−1 +j |S∩W = F−1 +j . +Ausführung: Sei u0 ∈Ui, v0 ∈Uj mit Fi(u0) = s= Fj(v0). +Da Rg(JFj(v0)) = 2 ist, ist o. B. d. A. +det +(∂x +∂u +∂x ∂v∂y -∂u∂y -∂v) -(v0)̸= 0 -undFj(u,v) = (x(u,v),y(u,v),z(u,v)). -Definiere˜Fj:Uj×R→R3durch -˜Fj(u,v,t ) := (x(u,v),y(u,v),z(u,v) +t) -Offensichtlich: ˜Fj|Uj×{0}=Fj -J˜Fj= -∂x -∂u∂x -∂v0 +∂u +∂y +∂v +) +(v0) ̸= 0 +und Fj(u,v) = (x(u,v),y(u,v),z(u,v)). +Definiere ˜Fj : Uj ×R →R3 durch +˜Fj(u,v,t ) := (x(u,v),y(u,v),z(u,v) + t) +Offensichtlich: ˜Fj|Uj×{0 }= Fj +J˜Fj += + + +∂x +∂u +∂x +∂v 0 ∂y -∂u∂y -∂v0 +∂u +∂y +∂v 0 +∂z +∂u ∂z -∂u∂z -∂v1 -⇒detJ˜Fj(v0,0)̸= 0 -Analysis II= = = = = =⇒Es gibt Umgebungen WvonFjvon˜Fj(v0,0) =Fj(v0) =s, sodass˜FjaufWeine -differenzierbar Inverse F−1 -jhat. +∂v 1 + +⇒det J˜Fj +(v0,0) ̸= 0 +Analysis II += = = = = =⇒Es gibt UmgebungenW von Fj von ˜Fj(v0,0) = Fj(v0) = s, sodass˜Fj auf W eine +differenzierbar InverseF−1 +j hat. 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN Weiter gilt: -˜Fj−1|W∩S=F−1 -j|W∩S +˜Fj +−1 +|W∩S = F−1 +j |W∩S ⇒F−1 -j◦Fi|F−1 -i(W∩S)=F−1 -j◦Fi|F−1 -i(W∩S) +j ◦Fi|F−1 +i (W∩S) = F−1 +j ◦Fi|F−1 +i (W∩S) ist differenzierbar. Definition 33 -SeiGeine Mannigfaltigkeit und (G,◦)eine Gruppe. -a)Gheißttopologische Gruppe , wenn die Abbildungen ◦:G×G→Gundι:G→G +Sei G eine Mannigfaltigkeit und(G,◦) eine Gruppe. +a) Gheißttopologische Gruppe, wenn die Abbildungen◦: G×G→Gund ι: G→G definiert durch -g◦h:=g·hundι(g) :=g−1 +g◦h:= g·h und ι(g) := g−1 stetig sind. -b)IstGeine differenzierbare Mannigfaltigkeit, so heißt GLie-Gruppe , wenn (G,◦)und -(G,ι)differenzierbar sind. +b) Ist Geine differenzierbare Mannigfaltigkeit, so heißtGLie-Gruppe, wenn(G,◦) und +(G,ι) differenzierbar sind. Beispiel 25 (Lie-Gruppen) 1) Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen. -2)GLn(R) -3)(R×,·) -4)(R>0,·) -5)(Rn,+), dennA·B(i,j) =∑n -k=1aikbkjist nach allen Variablen differenzierbar -(A−1)(i,j) =det(Aij) -detA -Aij= -ai1... ain -......... -an1... ann +2) GLn(R) +3) (R×,·) +4) (R>0,·) +5) (Rn,+), dennA·B(i,j) = ∑n +k=1 aikbkj ist nach allen Variablen differenzierbar +(A−1)(i,j) = det(Aij) +det A +Aij = + + +ai1 ... a in +... ... ... +an1 ... a nn + ∈R(n−1)×(n−1) ist differenzierbar. -detAijkann 0werden, da:(1 1 -−1 0) -6)SLn(R) ={A∈GLn(R)|det(A) = 1} +det Aij kann 0 werden, da: (1 1 +−1 0 +) +6) SLn(R) = {A∈GLn(R) |det(A) = 1 } Bemerkung 32 -IstGeine Lie-Gruppe und g∈G, so ist die Abbildung -lg:G→G +Ist G eine Lie-Gruppe undg∈G, so ist die Abbildung +lg : G→G h↦→g·h ein Diffeomorphismus. 2.3. SIMPLIZIALKOMPLEX 2.3 Simplizialkomplex Definition 34 -Seienv0,...,vk∈RnPunkte. -a)v0,...,vksindin allgemeiner Lage -⇔es gibt keinen (k−1)-dimensionalen affinen Untervektorraum, der v0,...,vkenthält -⇔v1−v0,...,vk−v0sind linear unabhängig. -b)conv(v0,...,vk) :={∑k -i=0λivi⏐⏐⏐λi≥0,∑k -i=0λi= 1} -heißt diekonvexe Hülle von -v0,...,vk. +Seien v0,...,v k ∈Rn Punkte. +a) v0,...,v k sind in allgemeiner Lage +⇔es gibt keinen(k−1)-dimensionalen affinen Untervektorraum, derv0,...,v k enthält +⇔v1 −v0,...,v k −v0 sind linear unabhängig. +b) conv(v0,...,v k) := +{∑k +i=0 λivi +⏐⏐⏐λi ≥0,∑k +i=0 λi = 1 +} +heißt diekonvexe Hüllevon +v0,...,v k. Definition 35 -a)Sei∆n=conv(e0,...,en)⊆Rn+1die konvexe Hülle der Standard-Basisvektoren -e0,...,en. -Dann heißt ∆nStandard-Simplex undndie Dimension des Simplex. -b)Für Punkte v0,...,vkimRnin allgemeiner Lage heißt ∆(v0,...,vk) =conv(v0,...,vk) -eink-Simplex inRn. -c)Ist∆(v0,...,vk)eink-Simplex und I={i0,...,ir}⊆{ 0,...,k}, so istsi0,...,ir:= -conv(vi0,...,vir)einr-Simplex und heißt Teilsimplex oderSeitevon∆. -(a) 0-Simplex ∆0 -1 2 3123 -e0e1 -(b) 1-Simplex ∆11 2 3123 -e0e1 +a) Sei ∆n = conv(e0,...,e n) ⊆Rn+1 die konvexe Hülle der Standard-Basisvektoren +e0,...,e n. +Dann heißt∆n Standard-Simplex und n die Dimension des Simplex. +b) Für Punktev0,...,v k im Rn in allgemeiner Lage heißt∆(v0,...,v k) = conv(v0,...,v k) +ein k-Simplex in Rn. +c) Ist ∆(v0,...,v k) ein k-Simplex undI = {i0,...,i r }⊆{ 0,...,k }, so istsi0,...,ir := +conv(vi0 ,...,v ir) ein r-Simplex und heißtTeilsimplexoder Seite von ∆. +(a) 0-Simplex∆0 +1 2 3 +1 +2 +3 +e0 +e1 +(b) 1-Simplex∆1 +1 2 3 +1 +2 +3 +e0 +e1 +e2 +(c) 2-Simplex∆2 +e0 e1 e2 -(c) 2-Simplex ∆2e0 e1e2 e3 -(d) 3-Simplex ∆3 -Abbildung 2.6: Beispiele für k-Simplexe +(d) 3-Simplex∆3 +Abbildung 2.6: Beispiele fürk-Simplexe Definition 36 -a)Eine endliche Menge Kvon Simplizes im Rnheißt (endlicher) Simplizialkomplex , +a) Eine endliche MengeK von Simplizes imRn heißt (endlicher)Simplizialkomplex, wenn gilt: -(i) Für ∆∈KundS⊆∆Teilsimplex ist S∈K. -(ii) Für ∆1,∆2∈Kist∆1∩∆2leer oder ein Teilsimplex von ∆1und von ∆2. -b)|K|:=⋃ -∆∈K∆(mit Teilraumtopologie) heißt geometrische Realisierung vonK. -c) Istd= max{k∈N0|Kenthältk-Simplex}, so heißtddieDimension vonK. +(i) Für ∆ ∈K und S ⊆∆ Teilsimplex istS ∈K. +(ii) Für ∆1,∆2 ∈K ist ∆1 ∩∆2 leer oder ein Teilsimplex von∆1 und von∆2. +b) |K|:= ⋃ +∆∈K ∆ (mit Teilraumtopologie) heißtgeometrische Realisierungvon K. +c) Ist d= max {k∈N0 |K enthält k-Simplex}, so heißtd die Dimension von K. 2.3. SIMPLIZIALKOMPLEX -(a) 1D Simplizialkomplex (b)2D Simplizialkomplex -(ohne untere Fläche!)(c) 2D Simplizialkomplex +(a) 1D Simplizialkomplex(b) 2D Simplizialkomplex +(ohne untere Fläche!) +(c) 2D Simplizialkomplex (d) 1D Simplizialkomplex (e) 2D Simplizialkomplex P -(f)Pist kein Teilsimplex, da Eigenschaft - Punkt b.ii verletzt istP +(f) P ist kein Teilsimplex, da Eigenschaft + Punkt b.ii verletzt ist +P (g) Simplizialkomplex Abbildung 2.7: Beispiele für Simplizialkomplexe Definition 37 -SeienK,LSimplizialkomplexe. Eine stetige Abbildung -f:|K|→|L| -heißtsimplizial , wenn für jedes ∆∈Kgilt: -a)f(∆)∈L -b)f|∆: ∆→f(∆)ist eine affine Abbildung. +Seien K,L Simplizialkomplexe. Eine stetige Abbildung +f : |K|→| L| +heißtsimplizial, wenn für jedes∆ ∈K gilt: +a) f(∆) ∈L +b) f|∆ : ∆ →f(∆) ist eine affine Abbildung. Beispiel 26 (Simpliziale Abbildungen) -1)ϕ(e1) :=b1,ϕ(e2) :=b2 -ϕist eine eindeutig bestimmte lineare Abbildung +1) ϕ(e1) := b1, ϕ(e2) := b2 +ϕ ist eine eindeutig bestimmte lineare Abbildung 2.3. SIMPLIZIALKOMPLEX -0 e2e1 -0 b1b2 +0 e2 +e1 +0 b1 +b2 ϕ -2) Folgende Abbildung ϕ: ∆n→∆n−1ist simplizial: +2) Folgende Abbildungϕ: ∆n →∆n−1 ist simplizial: ϕ 3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8) -M Ma -aab -bbc -cc -dd -dMa -bc +M M +a +a +a +b +b +b +c +c +c +d +d +d +M +a +b +c d -/Bullet /Bullet /Bullet/Bullet /Bullet /Bullet/Bullet /Bullet /Bullet +/Bullet /Bullet /Bullet +/Bullet /Bullet /Bullet +/Bullet /Bullet /Bullet +/Bullet +/Bullet +/Bullet +/Bullet +/Bullet +/Bullet +/Bullet /Bullet +/Bullet +/Bullet /Bullet +/Bullet /Bullet +/Bullet /Bullet +/Bullet /Bullet -/Bullet/Bullet/Bullet -/Bullet/Bullet -/Bullet /Bullet/Bullet -/Bullet /Bullet/Bullet /Bullet/Bullet /Bullet /Bullet -/Bullet/Bullet /Bullet Abbildung 2.8: Abbildung eines Torus auf eine Sphäre Definition 38 -SeiKein endlicher Simplizialkomplex. Für n≥0seian(K)die Anzahl der n-Simplizes in +Sei K ein endlicher Simplizialkomplex. Fürn≥0 sei an(K) die Anzahl dern-Simplizes in K. Dann heißt -χ(K) :=dimK∑ -n=0(−1)nan(K) -Eulerzahl (oder Euler-Charakteristik) von K. +χ(K) := +dim K∑ +n=0 +(−1)nan(K) +Eulerzahl (oder Euler-Charakteristik) vonK. Beispiel 27 -1)χ(∆1) = 2−1 = 1 -χ(∆2) = 3−3 + 1 = 1 -χ(∆3) = 4−6 + 4−1 = 1 -2)χ(Oktaeder-Oberfläche ) = 6−12 + 8 = 2 -χ(Rand des Tetraeders ) = 2 -χ(Ikosaeder ) = 12−30 + 20 = 2 -3)χ(Würfel ) = 8−12 + 6 = 2 -χ(Würfel, unterteilt in Dreiecksflächen ) = 8−(12 + 6) + (6·2) = 2 +1) χ(∆1) = 2 −1 = 1 +χ(∆2) = 3 −3 + 1 = 1 +χ(∆3) = 4 −6 + 4−1 = 1 +2) χ(Oktaeder-Oberfläche) = 6 −12 + 8 = 2 +χ(Rand des Tetraeders) = 2 +χ(Ikosaeder) = 12 −30 + 20 = 2 +3) χ(Würfel) = 8 −12 + 6 = 2 +χ(Würfel, unterteilt in Dreiecksflächen) = 8 −(12 + 6) + (6·2) = 2 Bemerkung 33 -χ(∆n) = 1für jedesn∈N0 +χ(∆n) = 1 für jedesn∈N0 2.3. SIMPLIZIALKOMPLEX -Beweis: ∆nist die konvexe Hülle von (e0,...,en)inRn+1. Jede (k+ 1)-elementige Teilmenge -von{e0,...,en}definiert ein k-Simplex. -⇒ak(∆n) =(n+1 -k+1) +Beweis: ∆n ist die konvexe Hülle von(e0,...,e n) in Rn+1. Jede(k+ 1)-elementige Teilmenge +von {e0,...,e n }definiert eink-Simplex. +⇒ak(∆n) = +(n+1 +k+1 +) , k = 0,...,n -⇒χ(∆n) =∑n +⇒χ(∆n) = ∑n k=0(−1)k(n+1 -k+1) -f(x) = (x+ 1)n+1Binomischer -Lehrsatz=∑n+1 -k=0(n+1 -k) +k+1 +) +f(x) = (x+ 1)n+1 +Binomischer +Lehrsatz += ∑n+1 +k=0 +(n+1 +k +) xk -⇒0 =∑n+1 -k=0(n+1 -k) -(−1)k=χ(∆n)−1 +⇒0 = ∑n+1 +k=0 +(n+1 +k +) +(−1)k = χ(∆n) −1 ⇒χ(∆n) = 1 ■ Definition 39 -a) Ein 1D-Simplizialkomplex heißt Graph. -b) Ein Graph, der homöomorph zu S1ist, heißtKreis. -c) Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält. -(a)Dies wird häufig auch als -Multigraph bezeichnet.(b)Planare Einbettung des Tetraeders +a) Ein 1D-Simplizialkomplex heißtGraph. +b) Ein Graph, der homöomorph zuS1 ist, heißtKreis. +c) Ein zusammenhängender Graph heißtBaum, wenn er keinen Kreis enthält. +(a) Dies wird häufig auch als +Multigraph bezeichnet. +(b) Planare Einbettung des Tetraeders -(c)K5 (d)K3,3 +(c) K5 (d) K3,3 Abbildung 2.9: Beispiele für Graphen Bemerkung 34 -Für jeden Baum Tgiltχ(T) = 1. +Für jeden BaumT gilt χ(T) = 1. Beweis: Induktion über die Anzahl der Ecken. Bemerkung 35 -a)Jeder zusammenhängende Graph Γenthält einen Teilbaum T, der alle Ecken von Γ +a) Jeder zusammenhängende GraphΓ enthält einen TeilbaumT, der alle Ecken vonΓ enthält.2 -b) Istn=a1(Γ)−a1(T), so istχ(Γ) = 1−n. +b) Ist n= a1(Γ) −a1(T), so istχ(Γ) = 1 −n. Beweis: a) Siehe „Algorithmus von Kruskal“. -2Twird „Spannbaum“ genannt. +2T wird „Spannbaum“ genannt. 2.3. SIMPLIZIALKOMPLEX -b)χ(Γ) =a0(Γ)−a1(Γ) -=a0(Γ)−(n+a1(T)) -=a0(T)−a1(T)−n -=χ(T)−n -= 1−n +b) χ(Γ) = a0(Γ) −a1(Γ) += a0(Γ) −(n+ a1(T)) += a0(T) −a1(T) −n += χ(T) −n += 1 −n Bemerkung 36 -Sei∆einn-Simplex und x∈∆◦⊆Rn. SeiKder Simplizialkomplex, der aus ∆durch -„Unterteilung“ in xentsteht. Dann ist χ(K) =χ(∆) = 1. -(a)K (b)∆, das ausKdurch Unterteilung +Sei ∆ ein n-Simplex und x ∈∆◦ ⊆Rn. Sei K der Simplizialkomplex, der aus∆ durch +„Unterteilung“ inx entsteht. Dann istχ(K) = χ(∆) = 1. +(a) K (b) ∆, das ausK durch Unterteilung entsteht Abbildung 2.10: Beispiel für Bemerkung 36. -Beweis:χ(K) =χ(∆)−(−1)n +Beweis: χ(K) = χ(∆) − (−1)n  -n-Simplex+n∑ -k=0(−1)k(n+ 1 -k) -  -(1+(−1))n+1=χ(∆) ■ +n-Simplex ++ +n∑ +k=0 +(−1)k +(n+ 1 +k +) +   +(1+(−1))n+1 += χ(∆) ■ Definition 40 -SeiXein topologischer Raum, Kein Simplizialkomplex und -h:|K|→X -ein Homöomorphismus von der geometrischen Realisierung |K|aufX. Dann heißt heine -Triangulierung vonX. +Sei X ein topologischer Raum,K ein Simplizialkomplex und +h: |K|→ X +ein Homöomorphismus von der geometrischen Realisierung|K|auf X. Dann heißth eine +Triangulierungvon X. Beispiel 28 (Triangulierung des Torus) Für eine Triangulierung des Torus werden mindestens 14 Dreiecke benötigt. Beispiele für fehlerhafte „Triangulierungen“ sind in Beispiel 28 zu sehen. Korrekte Triangulierungen sind in Beispiel 28. Satz 2.1 (Eulersche Polyederformel) -SeiPein konvexes Polyeder in R3, d. h.∂Pist ein 2-dimensionaler Simplizialkomplex, +Sei P ein konvexes Polyeder inR3, d. h.∂P ist ein 2-dimensionaler Simplizialkomplex, sodass gilt: -∀x,y∈∂P: [x,y]⊆P +∀x,y ∈∂P : [x,y] ⊆P Dann istχ(∂P) = 2. Beweis: 1) Die Aussage ist richtig für den Tetraeder. -2)O. B. d. A. sei 0∈PundP⊆B1(0). Projeziere ∂Pvon0aus auf∂B1(0) =S2. -Erhalte Triangulierung von S2. +2) O. B. d. A. sei0 ∈P und P ⊆B1(0). Projeziere ∂P von 0 aus auf ∂B1(0) = S2. +Erhalte Triangulierung vonS2. 2.3. SIMPLIZIALKOMPLEX -(a)Die beiden markierten Dreiecke schneiden sich im -Mittelpunkt und in einer Seite.(b)Die beiden markierten Dreiecke schneiden sich im +(a) Die beiden markierten Dreiecke schneiden sich im +Mittelpunkt und in einer Seite. +(b) Die beiden markierten Dreiecke schneiden sich im Mittelpunkt und außen. Abbildung 2.11: Fehlerhafte Triangulierungen (a) Einfache Triangulierung (b) Minimale Triangulierung Abbildung 2.12: Triangulierungen des Torus 2.3. SIMPLIZIALKOMPLEX -3)SindP1undP2konvexe Polygone und T1,T2die zugehörigen Triangulierungen von -S2, so gibt es eine Triangulierung T, die sowohl um T1als auch um T2Verfeinerung +3) Sind P1 und P2 konvexe Polygone undT1,T2 die zugehörigen Triangulierungen von +S2, so gibt es eine TriangulierungT, die sowohl umT1 als auch umT2 Verfeinerung ist (vgl. Abbildung 2.13). T1 T2 T -Abbildung 2.13: Tist eine Triangulierung, die für T1undT2eine Verfeinerung ist. -Nach Bemerkung 36 ist χ(∂P1) =χ(T1) =χ(T) =χ(T2) =χ(∂P2) = 2, weil o. B. d. A. -P2ein Tetraeder ist. +Abbildung 2.13:T ist eine Triangulierung, die fürT1 und T2 eine Verfeinerung ist. +Nach Bemerkung 36 istχ(∂P1) = χ(T1) = χ(T) = χ(T2) = χ(∂P2) = 2, weil o. B. d. A. +P2 ein Tetraeder ist. Bemerkung 37 (Der Rand vom Rand ist 0) -SeiKein endlicher Simplizialkomplex mit Knotenmenge Vund d(P,C) =d(P,B) +d(B,C) =d(P,A) +d(B,C)⇒ -d(A,C)>d(B,C)⇒Widerspruch zu Punkt (i) -b)Cliegt zwischen PundB -d(P,C) +d(C,A)>d(P,A) =d(P,B) =d(P,C) +d(C,B) -⇒d(C,A)>d(C,B) +(ii) a) B liegt zwischenP und C. +d(P,A) + d(A,C) > d(P,C) = d(P,B) + d(B,C) = d(P,A) + d(B,C) ⇒ +d(A,C) >d(B,C) ⇒Widerspruch zu Punkt (i) +b) C liegt zwischenP und B +d(P,C) + d(C,A) >d(P,A) = d(P,B) = d(P,C) + d(C,B) +⇒d(C,A) >d(C,B) ⇒Widerspruch zu Punkt (i) -2. Fall:QundBliegen auf verschieden Halbebenen bzgl. PA. -Dann liegen AundQin derselben Halbebene bzgl. PB. -TauscheAundB⇒Fall 1 ■ +2. Fall: Q und B liegen auf verschieden Halbebenen bzgl.PA. +Dann liegenA und Q in derselben Halbebene bzgl.PB. +TauscheA und B ⇒Fall 1 ■ Bemerkung 63 -Sei(X,d,G )eine Geometrie, die §1 - §3 erfüllt, P,Q∈XmitP̸=Qundϕeine Isometrie -mitϕ(P) =Pundϕ(Q) =Q. -Dann giltϕ(S) =S∀S∈PQ. +Sei (X,d,G ) eine Geometrie, die §1 - §3 erfüllt,P,Q ∈X mit P ̸= Q und ϕ eine Isometrie +mit ϕ(P) = P und ϕ(Q) = Q. +Dann giltϕ(S) = S ∀S ∈PQ. Beweis: -O. B. d. A. sei S∈PQ2⇔d(P,Q) =d(P,S) +d(S,Q) -ϕ∈Iso(X)⇒d(ϕ(P),ϕ(Q)) =d(ϕ(P),ϕ(S)) +d(ϕ(S),ϕ(Q)) -P,Q∈Fix(ϕ)⇒d(P,Q) =d(P,ϕ(S)) +d(ϕ(S),Q) -⇒ϕ(S)liegt zwischen PundQ -⇒d(P,S) =d(ϕ(P),ϕ(S)) =d(P,ϕ(S)) -3(i)⇒ϕ(S) =S +O. B. d. A. seiS ∈PQ 2⇔d(P,Q) = d(P,S) + d(S,Q) +ϕ∈Iso(X) +⇒ d(ϕ(P),ϕ(Q)) = d(ϕ(P),ϕ(S)) + d(ϕ(S),ϕ(Q)) +P,Q∈Fix(ϕ) +⇒ d(P,Q) = d(P,ϕ(S)) + d(ϕ(S),Q) +⇒ϕ(S) liegt zwischenP und Q +⇒d(P,S) = d(ϕ(P),ϕ(S)) = d(P,ϕ(S)) +3(i) +⇒ϕ(S) = S ■ Proposition 4.2 -In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P,P′,Q,Q′mitd(P,Q) =d(P′,Q′) -höchstens zwei Isometrien mit ϕ(P) =P′undϕ(Q) =Q′ +In einer Geometrie, die §1 - §3 erfüllt, gibt es zuP,P ′,Q,Q ′ mit d(P,Q) = d(P′,Q′) +höchstens zwei Isometrien mitϕ(P) = P′und ϕ(Q) = Q′ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometrien mit -ϕi(P) =P′undϕi(Q) =Q′gibt. -Beweis: Seienϕ1,ϕ2,ϕ3Isometrien mit ϕi(P) =P′,ϕi(Q) =Q′miti= 1,2,3. +ϕi(P) = P′und ϕi(Q) = Q′gibt. +Beweis: Seien ϕ1,ϕ2,ϕ3 Isometrien mitϕi(P) = P′, ϕi(Q) = Q′mit i= 1,2,3. Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen: -(Teil i)∃R∈X\PQmitϕ1(R) =ϕ2(R). -(Teil ii) Hat ϕ3 Fixpunkte, die nicht kollinear sind, so ist ϕ=idX. -Aus (Teil i) und (Teil ii) folgt, dass ϕ−1 -2◦ϕ1=idX, alsoϕ2=ϕ1, daP,QundRin diesem +(Teil i) ∃R∈X\PQ mit ϕ1(R) = ϕ2(R). +(Teil ii) Hatϕ 3 Fixpunkte, die nicht kollinear sind, so istϕ= idX. +Aus (Teil i) und (Teil ii) folgt, dassϕ−1 +2 ◦ϕ1 = idX, alsoϕ2 = ϕ1, daP, Qund R in diesem Fall Fixpunkte sind. Nun zu den Beweisen der Teilaussagen: -(Teil i)SeiR∈X\PQ. Von den drei Punkten ϕ1(R),ϕ2(R),ϕ3(R)liegen zwei in der selben -Halbebene bzgl. P′Q′=ϕi(PQ). -O. B. d. A. seien ϕ1(R)undϕ2(R)in der selben Halbebene. -Es gilt:d(P′,ϕ1(R)) =d(ϕ1(P),ϕ1(R)) -=d(P,R) -=d(ϕ2(P),ϕ2(R)) -=d(P′,ϕ2(R)) -und analog d(Q′,ϕ1(R)) =d(Q′,ϕ2(R)) -(Teil ii) SeienP,QundRFixpunkte von ϕ,R /∈PQundA /∈PQ∪PR∪QR. SeiB∈ -PQ\{P,Q}. Dann istϕ(B) =Bwegen Bemerkung 63. -IstR∈AB, so enthält AB2 Fixpunkte von ϕBem. 63= = = = =⇒ϕ(A) =A. -P BQCRA -Abbildung 4.5: P,Q,Rsind Fixpunkte, B∈PQ\{P,Q},A /∈PQ∪PR∪QR -IstR /∈AB, so istAB∩PR̸=∅oderAB∈RQ̸=∅nach Satz 4.1. Der Schnittpunkt -Cist dann Fixpunkt von ϕ′nach Bemerkung 63 ⇒ϕ(A) =A. +(Teil i) Sei R∈X\PQ. Von den drei Punktenϕ1(R),ϕ2(R),ϕ3(R) liegen zwei in der selben +Halbebene bzgl.P′Q′= ϕi(PQ). +O. B. d. A. seienϕ1(R) und ϕ2(R) in der selben Halbebene. +Es gilt: d(P′,ϕ1(R)) = d(ϕ1(P),ϕ1(R)) += d(P,R) += d(ϕ2(P),ϕ2(R)) += d(P′,ϕ2(R)) +und analogd(Q′,ϕ1(R)) = d(Q′,ϕ2(R)) +(Teil ii) Seien P, Q und R Fixpunkte von ϕ, R /∈PQ und A /∈PQ ∪PR ∪QR. Sei B ∈ +PQ \{P,Q }. Dann istϕ(B) = B wegen Bemerkung 63. +Ist R∈AB, so enthältAB 2 Fixpunkte vonϕ Bem. 63= = = = =⇒ϕ(A) = A. +P B Q +C +RA +Abbildung 4.5:P,Q,R sind Fixpunkte,B ∈PQ \{P,Q }, A /∈PQ ∪PR ∪QR +Ist R /∈AB, so istAB∩PR ̸= ∅oder AB ∈RQ̸= ∅nach Satz 4.1. Der Schnittpunkt +C ist dann Fixpunkt vonϕ′nach Bemerkung 63⇒ϕ(A) = A. Bemerkung 64 (SWS-Kongruenzsatz) -Sei(X,d,G )eine Geometrie, die §1 - §4 erfüllt. Seien außerdem △ABCund△A′B′C′ +Sei (X,d,G ) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem△ABC und △A′B′C′ Dreiecke, für die gilt: -(i)d(A,B) =d(A′,B′) -(ii)∠CAB∼=∠C′A′B′ +(i) d(A,B) = d(A′,B′) +(ii) ∠CAB ∼= ∠C′A′B′ 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE -(iii)d(A,C) =d(A′,C′) -Dann ist△ABCkongruent zu△A′B′C′. -Beweis: Seiϕdie Isometrie mit ϕ(A′) =A,ϕ(A′C′+) =AC+undϕ(A′B′+) =AB+. Diese +(iii) d(A,C) = d(A′,C′) +Dann ist△ABC kongruent zu△A′B′C′. +Beweis: Sei ϕ die Isometrie mitϕ(A′) = A, ϕ(A′C′+) = AC+ und ϕ(A′B′+) = AB+. Diese Isometrie existiert wegen Punkt §4. -⇒C∈ϕ(A′C′+)undB∈ϕ(A′B′+). -d(A′,C′) =d(ϕ(A′),ϕ(C′)) =d(A,ϕ(C′))3(i)= =⇒ϕ(C′) =C -d(A′,B′) =d(ϕ(A′),ϕ(B′)) =d(A,ϕ(B′))3(i)= =⇒ϕ(B′) =B -Also gilt insbesondere ϕ(△A′B′C′) =△ABC. ■ +⇒C ∈ϕ(A′C′+) und B ∈ϕ(A′B′+). +d(A′,C′) = d(ϕ(A′),ϕ(C′)) = d(A,ϕ(C′)) +3(i) += =⇒ϕ(C′) = C +d(A′,B′) = d(ϕ(A′),ϕ(B′)) = d(A,ϕ(B′)) +3(i) += =⇒ϕ(B′) = B +Also gilt insbesondereϕ(△A′B′C′) = △ABC. ■ Bemerkung 65 (WSW-Kongruenzsatz) -Sei(X,d,G )eine Geometrie, die §1 - §4 erfüllt. Seien außerdem △ABCund△A′B′C′ +Sei (X,d,G ) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem△ABC und △A′B′C′ Dreiecke, für die gilt: -(i)d(A,B) =d(A′,B′) -(ii)∠CAB∼=∠C′A′B′ -(iii)∠ABC∼=∠A′B′C′ -Dann ist△ABCkongruent zu△A′B′C′. -Beweis: Seiϕdie Isometrie mit ϕ(A′) =A,ϕ(B′) =Bundϕ(C′)liegt in der selben Halbebene -bzgl.ABwieC. Diese Isometrie existiert wegen §4. -Aus∠CAB =∠C′A′B′=∠ϕ(C′)ϕ(A′)ϕ(B′) =∠ϕ(C′)ABfolgt, dassϕ(C′)∈AC+. -Analog folgt aus ∠ABC =∠A′B′C′=∠ϕ(A′)ϕ(B′)ϕ(C′) =∠ABϕ (C′), dassϕ(C′)∈ +(i) d(A,B) = d(A′,B′) +(ii) ∠CAB ∼= ∠C′A′B′ +(iii) ∠ABC ∼= ∠A′B′C′ +Dann ist△ABC kongruent zu△A′B′C′. +Beweis: Sei ϕdie Isometrie mitϕ(A′) = A, ϕ(B′) = B und ϕ(C′) liegt in der selben Halbebene +bzgl. AB wie C. Diese Isometrie existiert wegen §4. +Aus ∠CAB = ∠C′A′B′= ∠ϕ(C′)ϕ(A′)ϕ(B′) = ∠ϕ(C′)AB folgt, dassϕ(C′) ∈AC+. +Analog folgt aus ∠ABC = ∠A′B′C′ = ∠ϕ(A′)ϕ(B′)ϕ(C′) = ∠ABϕ(C′), dass ϕ(C′) ∈ BC+. -Dann giltϕ(C′)∈AC∩BC={C}⇒ϕ(C′) =C. -Es gilt also ϕ(△A′B′C′) =△ABC. ■ +Dann giltϕ(C′) ∈AC∩BC = {C}⇒ ϕ(C′) = C. +Es gilt alsoϕ(△A′B′C′) = △ABC. ■ Definition 61 -a)EinWinkel ist ein Punkt P∈Xzusammen mit 2Halbgeraden mit Anfangspunkt P. -Man schreibt:∠R1PR2bzw.∠R2PR12 -b)Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den +a) Ein Winkelist ein PunktP ∈X zusammen mit2 Halbgeraden mit AnfangspunktP. +Man schreibt:∠R1PR2 bzw. ∠R2PR12 +b) Zwei Winkel sindgleich, wenn es eine Isometrie gibt, die den einen Winkel auf den anderen abbildet. -c)∠R′ +c) ∠R′ 1P′R′ -2heißtkleinerals∠R1PR2, wenn es eine Isometrie ϕgibt, mitϕ(P′) =P, +2 heißtkleiner als ∠R1PR2, wenn es eine Isometrieϕ gibt, mitϕ(P′) = P, ϕ(P′R′+ -1) =PR+ -1undϕ(R′ -2)liegt in der gleichen Halbebene bzgl. PR1wieR2und in -der gleichen Halbebene bzgl. PR2wieR1 -d) Im Dreieck△PQRgibt esInnenwinkel undAußenwinkel . +1 ) = PR+ +1 und ϕ(R′ +2) liegt in der gleichen Halbebene bzgl.PR1 wie R2 und in +der gleichen Halbebene bzgl.PR2 wie R1 +d) Im Dreieck△PQR gibt esInnenwinkelund Außenwinkel. Bemerkung 66 In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel. -Beweis: Zeige∠PRQ<∠RQP′. -SeiMder Mittelpunkt der Strecke QRundP′∈PQ+\PQ. SeiA∈MP−mitd(P,M ) = -d(M,A ). -2Für dieses Skript gilt: ∠R1PR 2=∠R2PR 1. Also sind insbesondere alle Winkel ≤180◦. +Beweis: Zeige ∠PRQ< ∠RQP′. +Sei M der Mittelpunkt der StreckeQR und P′∈PQ+ \PQ. SeiA∈MP−mit d(P,M) = +d(M,A). +2Für dieses Skript gilt:∠R1PR2 = ∠R2PR1. Also sind insbesondere alle Winkel≤180◦. 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE -PR′ -1R1R′ -2R2 -(a)∠R′ +P R′ +1 R1 +R′ +2 +R2 +(a) ∠R′ 1P′R′ -2ist kleiner als∠R1PR 2, -vgl. Definition 61.cP +2 ist kleiner als∠R1PR2, +vgl. Definition 61.c +P Q R -(b)Innenwinkel und Außenwinkel +(b) Innenwinkel und Außenwinkel in△PQR, vgl. Definition 61.d Abbildung 4.6: Situation aus Definition 61 -QM -AP +Q M +A +P +R +(a) Parallelogramm AQPR +α +β R -(a)Parallelogramm AQPR -αβR Q P -(b)Innen- und Außenwinkel +(b) Innen- und Außenwinkel von△PQR Abbildung 4.7: Situation aus Bemerkung 66 -Es gilt:d(Q,M ) =d(M,R )undd(P,M ) =d(M,A )sowie∠PMR =∠AMQ⇒△MRQ -ist kongruent zu△AMQ, denn eine der beiden Isometrien, die ∠PMRauf∠AMQabbildet, -bildetRaufQundPaufAab. -⇒∠MQA =∠MRP =∠QRP =∠PRQ. -Noch zu zeigen:∠MQA<∠RQP′, dennAliegt in der selben Halbebene bzgl. PQwieM. +Es gilt:d(Q,M) = d(M,R) und d(P,M) = d(M,A) sowie ∠PMR = ∠AMQ ⇒△MRQ +ist kongruent zu△AMQ, denn eine der beiden Isometrien, die∠PMR auf ∠AMQ abbildet, +bildet R auf Q und P auf A ab. +⇒∠MQA = ∠MRP = ∠QRP = ∠PRQ. +Noch zu zeigen:∠MQA< ∠RQP′, dennA liegt in der selben Halbebene bzgl.PQ wie M. Proposition 4.3 (Existenz der Parallelen) -Sei(X,d,G )eine Geometrie mit den Axiomen §1 - §4. -Dann gibt es zu jeder Geraden g∈Gund jedem Punkt P∈X\gmindestens eine -Paralleleh∈GmitP∈hundg∩h=∅. -Beweis: SeienP,Q∈f∈Gundϕdie Isometrie, die QaufPundPaufP′∈fmit -d(P,P′) =d(P,Q)abbildet und die Halbebenen bzgl. ferhält. +Sei (X,d,G ) eine Geometrie mit den Axiomen §1 - §4. +Dann gibt es zu jeder Geradeng ∈G und jedem PunktP ∈X \g mindestens eine +Parallele h∈G mit P ∈h und g∩h= ∅. +Beweis: Seien P,Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P′ ∈ f mit +d(P,P ′) = d(P,Q) abbildet und die Halbebenen bzgl.f erhält. 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE -Qhf -gP +Q +h +f +g +P Abbildung 4.8: Situation aus Proposition 4.3 -Annahme:ϕ(g)∩g̸=∅ -⇒Es gibt einen Schnittpunkt {R}=ϕ(g)∩g. -Dann ist∠RQP =∠RQP′<∠RPP′nach Bemerkung 66 und ∠RQP =∠RPP′, weil -ϕ(∠RQP ) =∠RPP′. +Annahme: ϕ(g) ∩g̸= ∅ +⇒Es gibt einen Schnittpunkt{R}= ϕ(g) ∩g. +Dann ist ∠RQP = ∠RQP′ < ∠RPP′ nach Bemerkung 66 und∠RQP = ∠RPP′, weil +ϕ(∠RQP) = ∠RPP′. ⇒Widerspruch -⇒ϕ(g)∩g=∅ ■ +⇒ϕ(g) ∩g= ∅ ■ Folgerung 4.4 -Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π. -D. h. es gibt eine Isometrie ϕmitϕ(Q) =Pundϕ(QP+) =PR+, sodassϕ(R)in der gleichen -Halbebene bzgl. PQliegt wieR. -Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln ist π, d. h. die +Die Summe zweier Innenwinkel in einem Dreieck ist kleiner alsπ. +D. h. es gibt eine Isometrieϕ mit ϕ(Q) = P und ϕ(QP+) = PR+, sodassϕ(R) in der gleichen +Halbebene bzgl.PQ liegt wieR. +Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln istπ, d. h. die beiden Halbgeraden bilden eine Gerade. -Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie, -Dreiecke mit drei 90◦-Winkeln. +Abbildung 4.9:In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie, +Dreiecke mit drei90◦-Winkeln. Proposition 4.5 In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der -Innenwinkel≤π. +Innenwinkel ≤π. 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE -Sei im Folgenden „ IWS“ die „Innenwinkelsumme“. -Beweis: Sei△ein Dreieck mit IWS(△) =π+ε -αβγ +Sei im Folgenden „IWS“ die „Innenwinkelsumme“. +Beweis: Sei △ein Dreieck mitIWS(△) = π+ ε +α +βγ P -(a) Summe der Winkel α,βundγα1α2 βγ +(a) Summe der Winkelα, β und γ +α1 +α2 β +γ M -A BC A′ +A B +C A′ α (b) Situation aus Proposition 4.5 Abbildung 4.10: Situation aus Proposition 4.5 -Seiαein Innenwinkel von △. -Beh.:Es gibt ein Dreieck △′mitIWS(△′) = IWS(△)und einem Innenwinkel α′≤α -2. -Dann gibt es für jedes nein△nmitIWS(△n) =IWS(△)und Innenwinkel α′≤α +Sei α ein Innenwinkel von△. +Beh.: Es gibt ein Dreieck△′mit IWS(△′) = IWS(△) und einem Innenwinkelα′≤α +2 . +Dann gibt es für jedesnein △n mit IWS(△n) = IWS(△) und Innenwinkelα′≤ α 2n. Für α -2n<εist dann die Summe der beiden Innenwinkel um △ngrößer alsπ⇒Widerspruch +2n <ε ist dann die Summe der beiden Innenwinkel um△n größer alsπ⇒Widerspruch zu Folgerung 4.4. -Beweis: Es seienA,B,C∈Xund△das Dreieck mit den Eckpunkten A,B,Cundαsei -der Innenwinkel bei A,βder Innenwinkel bei Bundγder Innenwinkel bei C. -SeiMder Mittelpunkt der Strecke BC. Sei außerdem α1=∠CAMundα2=∠BAM. -Sei weiterA′∈MA−mitd(A′,M) =d(A,M ). +Beweis: Es seienA,B,C ∈X und △das Dreieck mit den EckpunktenA,B,C und α sei +der Innenwinkel beiA, β der Innenwinkel beiB und γ der Innenwinkel beiC. +Sei M der Mittelpunkt der StreckeBC. Sei außerdemα1 = ∠CAM und α2 = ∠BAM. +Sei weiterA′∈MA−mit d(A′,M) = d(A,M). Die Situation ist in Abbildung 4.10b skizziert. -⇒△ (MA′C)und△(MAB )sind kongruent.⇒∠ABM =∠A′CMund∠MA′C= -∠MAB.⇒α+β+γ=IWS(△ABC ) =IWS(△AA′C)undα1+α2=α, also o. B. d. A. -α1≤α +⇒△(MA′C) und △(MAB) sind kongruent.⇒∠ABM = ∠A′CM und ∠MA′C = +∠MAB. ⇒α+β+γ = IWS(△ABC) = IWS(△AA′C) und α1 +α2 = α, also o. B. d. A. +α1 ≤α 2 Bemerkung 67 -In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π. +In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleichπ. α′ α′′ -α ββ′ +α β +β′ γ -A BC +A B +C g Abbildung 4.11: Situation aus Bemerkung 67 -Beweis: Seigeine Parallele von ABdurchC. -•Es giltα′=αwegen Proposition 4.3. -•Es giltβ′=βwegen Proposition 4.3. -•Es giltα′′=α′wegen Aufgabe 8. +Beweis: Sei g eine Parallele vonAB durch C. +•Es giltα′= α wegen Proposition 4.3. +•Es giltβ′= β wegen Proposition 4.3. +•Es giltα′′= α′wegen Aufgabe 8. 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE -⇒IWS(△ABC ) =γ+α′′+β′=π +⇒IWS(△ABC) = γ+ α′′+ β′= π Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich -πist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW. +π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW. 4.2 Weitere Eigenschaften einer euklidischen Ebene Satz 4.6 (Strahlensatz) In ähnlichen Dreiecken sind Verhältnisse entsprechender Seiten gleich. -xy -−1 0 1 2 3 40123 +x +y +−1 0 1 2 3 4 +0 +1 +2 +3 z -xλ2z +x +λ2z λ2x Abbildung 4.12: Strahlensatz Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar. -A B′C′ -BC -cba -c′b′ +A B′ +C′ +B +C +c +b a +c′ +b′ a′ -Abbildung 4.13: Die Dreiecke △ABCund△AB′C′sind ähnlich. +Abbildung 4.13: Die Dreiecke△ABC und △AB′C′sind ähnlich. 4.2.1 Flächeninhalt Definition 62 -„Simplizialkomplexe“ in euklidischer Ebene (X,d)heißenflächengleich , wenn sie sich in +„Simplizialkomplexe“ in euklidischer Ebene(X,d) heißenflächengleich, wenn sie sich in kongruente Dreiecke zerlegen lassen. 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE -(a) Zwei kongruente Dreiecke (b)ZweiweiterekongruenteDreiecke +(a) Zwei kongruente Dreiecke(b) ZweiweiterekongruenteDreiecke Abbildung 4.14: Flächengleichheit -Der Flächeninhalt eines Dreiecks ist 1/2·Grundseite·Höhe. -A BC -LChc +Der Flächeninhalt eines Dreiecks ist1/2 ·Grundseite·Höhe. +A B +C +LC +hc c -(a)1/2·|AB|·|hc|· -A BC +(a) 1/2 ·|AB|·|hc| +· +A B +C LA -hac -(b)1/2·|BC|·|ha| +ha +c +(b) 1/2 ·|BC|·|ha| Abbildung 4.15: Flächenberechnung im Dreieck -Zu zeigen: Unabhängigkeit von der gewählten Grundseite. -αα -γγ -A BC +Zu zeigen:Unabhängigkeit von der gewählten Grundseite. +α +α +γ +γ +A B +C LA LC -Abbildung 4.16:△ABLaund△CLCBsind ähnlich, weil IWS =π -Strahlensatz= = = = = = =⇒a -hc=c -ha→a·ha=c·hc +Abbildung 4.16:△ABLa und △CLCB sind ähnlich, weilIWS = π +Strahlensatz= = = = = = =⇒ a +hc = c +ha →a·ha = c·hc Satz 4.7 (Satz des Pythagoras) -Im rechtwinkligen Dreieck gilt a2+b2=c2, wobeicdie Hypotenuse und a,bdie beiden +Im rechtwinkligen Dreieck gilta2 + b2 = c2, wobeic die Hypotenuse unda,b die beiden Katheten sind. -Beweis: (a+b)·(a+b) =a2+ 2ab+b2=c2+ 4·(1 -2·a·b) +Beweis: (a+ b) ·(a+ b) = a2 + 2ab+ b2 = c2 + 4 ·(1 +2 ·a·b) 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE -cb a -A BC +c +b a +A B +C · -(a)a,bsind Katheten und cist die Hypotenuseb - abab a +(a) a,b sind Katheten undc ist die Hypotenuse + +b a +b +a +ba b -a· -· ·· +a +· +·· +· γ (b) Beweisskizze Abbildung 4.17: Satz des Pythagoras Satz 4.8 -Bis auf Isometrie gibt es genau eine euklidische Ebene (X,d,G ), nämlichX=R2, -d=euklidischer Abstand, G=Menge der üblichen Geraden. +Bis auf Isometrie gibt es genau eine euklidische Ebene(X,d,G ), nämlich X = R2, +d= euklidischer Abstand,G= Menge der üblichen Geraden. Beweis: -(i)(R2,dEuklid )ist offensichtlich eine euklidische Ebene. -(ii)Sei(X,d)eine euklidische Ebene und g1,g2Geraden in X, die sich in einem Punkt 0 +(i) (R2,dEuklid) ist offensichtlich eine euklidische Ebene. +(ii) Sei (X,d) eine euklidische Ebene undg1,g2 Geraden inX, die sich in einem Punkt0 im rechten Winkel schneiden. -SeiP∈X\(g1∪g2)ein Punkt und PXder Fußpunkt des Lots von Paufg1(vgl. -Aufgabe 9 (c)) und PYder Fußpunkt des Lots von Paufg2. -SeixP:=d(PX,0)undyP:=d(PY,0). +Sei P ∈X\(g1 ∪g2) ein Punkt undPX der Fußpunkt des Lots vonP auf g1 (vgl. +Aufgabe 9 (c)) undPY der Fußpunkt des Lots vonP auf g2. +Sei xP := d(PX,0) und yP := d(PY,0). In Abbildung 4.19 wurde die Situation skizziert. -Seih:X→R2eine Abbildung mit h(P) := (xP,yP)Dadurch wird hauf dem -Quadranten definiert, in dem Pliegt, d. h. -∀Q∈XmitPQ∩g1=∅=PQ∩g2 -Fortsetzung auf ganz Xdurch konsistente Vorzeichenwahl. +Sei h : X →R2 eine Abbildung mit h(P) := ( xP,yP) Dadurch wird h auf dem +Quadranten definiert, in demP liegt, d. h. +∀Q∈X mit PQ ∩g1 = ∅= PQ ∩g2 +Fortsetzung auf ganzX durch konsistente Vorzeichenwahl. Im Folgenden werden zwei Aussagen gezeigt: -(i)hist surjektiv -(ii)hist eine Isometrie -Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dass hbijektiv ist. +(i) h ist surjektiv +(ii) h ist eine Isometrie +Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dassh bijektiv ist. Nun zu den Beweisen der Teilaussagen: 4.3. HYPERBOLISCHE GEOMETRIE · -g1g2 -PX -(a) Schritt 1· -g1g2 -xPyPP -0 PXPYX +g1 +g2 +P +X +(a) Schritt 1 +· +g1 +g2 +xP +yP +P +0 PX +PY +X (b) Schritt 2 Abbildung 4.18: Beweis zu Satz 4.8 -(i)Sei(x,y)∈R2, z. B.x≥0,y≥0. SeiP′∈g1mitd(0,P′) =xundP′auf der -gleichen Seite von g2wieP. -g1g2 -xPyPPQ -0R +(i) Sei (x,y) ∈R2, z. B.x≥0,y ≥0. SeiP′∈g1 mit d(0,P′) = x und P′ auf der +gleichen Seite vong2 wie P. +g1 +g2 +xP +yP +P +Q +0 +R X Abbildung 4.19: Beweis zu Satz 4.8 -(ii) Zu Zeigen: d(P,Q) =d(h(P),h(Q)) -d(P,Q)2Pythagoras=d(P,R)2+d(R,Q)2= (yQ−yP)2+ (xQ−xP)2. +(ii) Zu Zeigen:d(P,Q) = d(h(P),h(Q)) +d(P,Q)2 Pythagoras += d(P,R)2 + d(R,Q)2 = (yQ −yP)2 + (xQ −xP)2. h(Q) = (xQ,yQ) 4.3 Hyperbolische Geometrie Definition 63 Sei -H:={z∈C|ℑ(z)>0}={ -(x,y)∈R2⏐⏐y>0} +H := {z∈C |ℑ(z) >0 }= +{ +(x,y) ∈R2 ⏐⏐y >0 +} 4.3. HYPERBOLISCHE GEOMETRIE -die obere Halbebene bzw. Poincaré-Halbebene und G=G1∪G2mit -G1={g1⊆H|∃m∈R,r∈R>0:g1={z∈H:|z−m|=r}} -G2={g2⊆H|∃x∈R:g2={z∈H:ℜ(z) =x}} -Die Elemente aus Gheißenhyperbolische Geraden . +die obere Halbebene bzw. Poincaré-Halbebene undG= G1 ∪G2 mit +G1 = {g1 ⊆H |∃m∈R,r ∈R>0 : g1 = {z∈H : |z−m|= r}} +G2 = {g2 ⊆H |∃x∈R : g2 = {z∈H : ℜ(z) = x}} +Die Elemente ausG heißenhyperbolische Geraden. Bemerkung 68 (Eigenschaften der hyperbolischen Geraden) Die hyperbolischen Geraden erfüllen... a) ...die Inzidenzaxiome §1 @@ -2518,1017 +2965,1289 @@ b) ...das Anordnungsaxiom §3 (ii) c) ...nicht das Parallelenaxiom §5 Beweis: a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt: -Gegebenz1,z2∈H +Gegeben z1,z2 ∈H Existenz: -Fall 1ℜ(z1) =ℜ(z2) -⇒z1undz2liegen auf -g={z∈C|ℜ(z) =ℜ(z1)∧H} +Fall 1 ℜ(z1) = ℜ(z2) +⇒z1 und z2 liegen auf +g= {z∈C |ℜ(z) = ℜ(z1) ∧H } Siehe Abbildung 4.20a. -Fall 2ℜ(z1)̸=ℜ(z2) -Betrachte nun z1undz2als Punkte in der euklidischen Ebene. Die Mittelsenkrechte - zu diesen Punkten schneidet die x-Achse. Alle Punkte auf der Mittelsenkrechten -zuz1undz2sindgleichweitvon z1undz2entfernt.DaheristderSchnittpunktmit -derx-Achse der Mittelpunkt eines Kreises durch z1undz2(vgl. Abbildung 4.20b) -xy -−1 0 1 2 3 4 501234 -Z1Z2 +Fall 2 ℜ(z1) ̸= ℜ(z2) +Betrachte nunz1 und z2 als Punkte in der euklidischen Ebene. Die Mittelsenkrechte + zu diesen Punkten schneidet diex-Achse. Alle Punkte auf der Mittelsenkrechten +zuz1 undz2 sind gleich weit vonz1 undz2 entfernt. Daher ist der Schnittpunkt mit +der x-Achse der Mittelpunkt eines Kreises durchz1 und z2 (vgl. Abbildung 4.20b) +x +y +−1 0 1 2 3 4 5 +0 +1 +2 +3 +4 +Z1 +Z2 ℜ(Z1) -(a) Fall 1xy -−1 0 1 2 3 4 501234 -Z1Z2 +(a) Fall 1 +x +y +−1 0 1 2 3 4 5 +0 +1 +2 +3 +4 +Z1 +Z2 (b) Fall 2 -Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer +Abbildung 4.20:Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer Geraden -b) Seig∈G1˙∪G2eine hyperbolische Gerade. +b) Sei g∈G1 ˙∪G2 eine hyperbolische Gerade. 4.3. HYPERBOLISCHE GEOMETRIE -Es existieren disjunkte Zerlegungen von H\g: -Fall 1:g={z∈H∥z−m|=r}∈G1 +Es existieren disjunkte Zerlegungen vonH \g: +Fall 1:g= {z∈H ∥z−m|= r}∈ G1 Dann gilt: -H={z∈H∥z−m|r} -=:H2(Kreisäußeres) -Dar>0istH1nicht leer, da r∈RistH2nicht leer. -Fall 2:g={z∈H|ℜz=x}∈G2 +H = {z∈H ∥z−m|r }   +=:H2 (Kreisäußeres) +Da r> 0 ist H1 nicht leer, dar∈R ist H2 nicht leer. +Fall 2:g= {z∈H |ℜz= x}∈ G2 Die disjunkte Zerlegung ist: -H={z∈H|ℜ(z)x} -=:H2(Rechts) -Zu zeigen:∀A∈Hi,B∈Hjmiti,j∈{1,2}gilt:AB∩g̸=∅⇔i̸=j -„⇐“:A∈H1,B∈H2:AB∩g̸=∅ -DadHstetig ist, folgt diese Richtung direkt. Alle Punkte in H1haben einen Abstand -vonmder kleiner ist als rund alle Punkte in H2haben einen Abstand von mder -größer ist als r. Da man jede Strecke von AnachBinsbesondere auch als stetige -Abbildung f:R→R>0auffassen kann, greift der Zwischenwertsatz ⇒AB∩g̸=∅ -„⇒“:A∈Hi,B∈Hjmiti,j∈{1,2}:AB∩g̸=∅⇒i̸=j -Seihdie Gerade, die durch AundBgeht. -DaA,B /∈g, aberA,B∈hgilt, haben gundhinsbesondere mindestens einen -unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich gundhin höchstens einen Punkt -schneiden. Sei Cdieser Punkt. -AusA,B /∈gfolgt:C̸=AundC̸=B. Also liegt CzwischenAundB. Daraus folgt, -dassAundBbzgl.gin verschiedenen Halbebenen liegen. +H = {z∈H |ℜ(z) x }   +=:H2 (Rechts) +Zu zeigen:∀A∈Hi, B ∈Hj mit i,j ∈{1,2 }gilt: AB∩g̸= ∅⇔ i̸= j +„⇐“:A∈H1,B ∈H2 : AB∩g̸= ∅ +Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte inH1 haben einen Abstand +von m der kleiner ist alsr und alle Punkte inH2 haben einen Abstand vonm der +größer ist alsr. Da man jede Strecke vonA nach B insbesondere auch als stetige +Abbildung f : R →R>0 auffassen kann, greift der Zwischenwertsatz⇒AB∩g̸= ∅ +„⇒“:A∈Hi,B ∈Hj mit i,j ∈{1,2 }: AB∩g̸= ∅⇒ i̸= j +Sei h die Gerade, die durchA und B geht. +Da A,B /∈g, aber A,B ∈h gilt, haben g und h insbesondere mindestens einen +unterschiedlichen Punkt. Aus §1 (i) folgt, dass sichg und h in höchstens einen Punkt +schneiden. SeiC dieser Punkt. +Aus A,B /∈g folgt: C ̸= A und C ̸= B. Also liegtC zwischen A und B. Daraus folgt, +dass A und B bzgl. g in verschiedenen Halbebenen liegen. c) Siehe Abbildung 4.21. -xy -−5−4−3−2−1 0 1 2 3 4 5 6012345 +x +y +−5 −4 −3 −2 −1 0 1 2 3 4 5 6 +0 +1 +2 +3 +4 +5 Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht. 4.3. HYPERBOLISCHE GEOMETRIE Definition 64 -Es seiena,b,c,d∈Rmitad−bc̸= 0undσ:C→Ceine Abbildung definiert durch -σ(z) :=az+b -cz+d -σheißtMöbiustransformation . +Es seiena,b,c,d ∈R mit ad−bc̸= 0 und σ: C →C eine Abbildung definiert durch +σ(z) := az+ b +cz+ d +σ heißtMöbiustransformation. Proposition 4.9 -a) Die Gruppe SL2(R)operiert auf Hdurch die Möbiustransformation -σ(z) :=(a b -c d) -◦z:=az+b -cz+d -b) Die Gruppe PSL 2(R) = SL 2(R)/(±I)operiert durch σaufH. -c)PSL 2(R)operiert auf R∪{∞}. Diese Gruppenoperation ist 3-fach transitiv, d. h. -zux00undσ=(a b -c d) +a) Sei z= x+ iy ∈H, d. h.y >0 und σ= +(a b +c d +) ∈SL2(R) -⇒σ(z) =a(x+iy) +b -c(x+iy) +d -=(ax+b) +iay -(cx+d) +icy·(cx+d)−icy -(cx+d)−icy -=(ax+b)(cx+d) +aycy -(cx+d)2+ (cy)2+iay(cx+d)−(ax+b)cy -(cx+d)2+ (cy)2 -=axcx +axd+bcx+bd+aycy -(cx+d)2+ (cy)2+i(ad−bc)y -(cx+d)2+ (cy)2 -SL2(R)=ac(x2+y2) +adx+bcx+bd -(cx+d)2+ (cy)2+iy -(cx+d)2+ (cy)2 -⇒ℑ(σ(z)) =y -(cx+d)2+(cy)2>0 -Die Abbildung bildet also nach Hab. Außerdem gilt: +⇒σ(z) = a(x+ iy) + b +c(x+ iy) + d += (ax+ b) + iay +(cx+ d) + icy ·(cx+ d) −icy +(cx+ d) −icy += (ax+ b)(cx+ d) + aycy +(cx+ d)2 + (cy)2 + iay(cx+ d) −(ax+ b)cy +(cx+ d)2 + (cy)2 += axcx+ axd+ bcx+ bd+ aycy +(cx+ d)2 + (cy)2 + i (ad−bc)y +(cx+ d)2 + (cy)2 +SL2(R) += ac(x2 + y2) + adx+ bcx+ bd +(cx+ d)2 + (cy)2 + i y +(cx+ d)2 + (cy)2 +⇒ℑ(σ(z)) = y +(cx+d)2+(cy)2 >0 +Die Abbildung bildet also nachH ab. Außerdem gilt: (1 0 -0 1) -◦z=x+iy -1=x+iy=z +0 1 +) +◦z= x+ iy +1 = x+ iy = z 4.3. HYPERBOLISCHE GEOMETRIE und (a b -c d) -◦((a′b′ -c′d′) -◦z) -=(a b -c d) -◦a′z+b′ -c′z+d′ -=aa′z+b′ -c′z+d′+b +c d +) +◦ +((a′ b′ +c′ d′ +) +◦z +) += +(a b +c d +) +◦a′z+ b′ +c′z+ d′ += +aa′z+b′ +c′z+d′ + b ca′z+b′ -c′z+d′+d -=a(a′z+b′)+b(c′z+d′) +c′z+d′ + d += +a(a′z+b′)+b(c′z+d′) c′z+d′ c(a′z+b′)+d(c′z+d′) c′z+d′ -=a(a′z+b′) +b(c′z+d′) -c(a′z+b′) +d(c′z+d′) -=(aa′+bc′)z+ab′+bd′ -(ca′+db′)z+cb′+dd′ -=(aa′+bc′ab′+bd′ -ca′+db′cb′+dd′) += a(a′z+ b′) + b(c′z+ d′) +c(a′z+ b′) + d(c′z+ d′) += (aa′+ bc′)z+ ab′+ bd′ +(ca′+ db′)z+ cb′+ dd′ += +(aa′+ bc′ ab′+ bd′ +ca′+ db′ cb′+ dd′ +) ◦z -=((a b -c d) -·(a′b′ -c′d′)) += +((a b +c d +) +· +(a′ b′ +c′ d′ +)) ◦z -b) Es giltσ(z) = (−σ)(z)für alleσ∈SL2(R)undz∈H. -c) Ansatz: σ=(a b -c d) -σ(x0) =ax0+b -cx0+d!= 0⇒ax0+b= 0⇒b=−ax0 -σ(x∞) =∞⇒cx∞+d= 0⇒d=−cx∞ -σ(x1) = 1⇒ax1+b=cx1+d -a(x1−x0) =c(x1−x∞)⇒c=ax1−x0 +b) Es giltσ(z) = (−σ)(z) für alleσ∈SL2(R) und z∈H. +c) Ansatz: σ= +(a b +c d +) +σ(x0) = ax0+b +cx0+d +!= 0 ⇒ax0 + b= 0 ⇒b= −ax0 +σ(x∞) = ∞⇒ cx∞+ d= 0 ⇒d= −cx∞ +σ(x1) = 1 ⇒ax1 + b= cx1 + d +a(x1 −x0) = c(x1 −x∞) ⇒c= ax1−x0 x1−x∞ -⇒−a2·x∞x1−x0 -x1−x∞+a2x0x1−x0 -x1−x∞= 1 -⇒a2x1−x0 -x0−x∞(x0−x∞) = 1⇒a2=x1−x∞ +⇒−a2 ·x∞x1−x0 +x1−x∞ + a2x0 x1−x0 +x1−x∞ = 1 +⇒a2 x1−x0 +x0−x∞(x0 −x∞) = 1 ⇒a2 = x1−x∞ (x1−x∞)(x1−x0) d) Es gilt: A−1 -λ=A1 +λ = A1 λ B−1 -t=B−t -C−1=C3 -Daher genügt es zu zeigen, dass man mit Aλ,BtundCalle Matrizen aus SL2(R) +t = B−t +C−1 = C3 +Daher genügt es zu zeigen, dass man mitAλ, Bt und C alle Matrizen ausSL2(R) erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit -Matrizen der Form Aλ,BtundCdie Einheitsmatrix zu generieren. +Matrizen der FormAλ, Bt und C die Einheitsmatrix zu generieren. Sei also -M=(a b -c d) +M = +(a b +c d +) ∈SL2(R) beliebig. Fall 1:a= 0 -DaM∈SL2(R)ist, gilt detM= 1 =ad−bc=−bc. Daher ist insbesondere c̸= 0. Es +Da M ∈SL2(R) ist, giltdet M = 1 = ad−bc= −bc. Daher ist insbesonderec̸= 0. Es folgt: (0 1 -−1 0) -·(a b -c d) -=(c d -−a−b) +−1 0 +) +· +(a b +c d +) += +( c d +−a −b +) 4.3. HYPERBOLISCHE GEOMETRIE Gehe zu Fall 2. Fall 2:a̸= 0 -Nun wird in MdurchM·A1 -aan der Stelle von aeine1erzeugt: +Nun wird inM durch M ·A1 +a +an der Stelle vona eine 1 erzeugt: (a b -c d) -·(1 -a0 -0a) -=(1ab +c d +) +· +(1 +a 0 +0 a +) += +(1 ab c -aad) +a ad +) Gehe zu Fall 3. Fall 3:a= 1 -(1b -c d) -·(1−b -0 1) -=(1 0 -c d−bc) -Da wir detM= 1 =ad−bc=d−bcwissen, gilt sogar M2,2= 1. +(1 b +c d +) +· +(1 −b +0 1 +) += +(1 0 +c d −bc +) +Da wirdet M = 1 = ad−bc= d−bc wissen, gilt sogarM2,2 = 1. Gehe zu Fall 4. -Fall 4:a= 1,b= 0,d= 1 -A−1CBcC(1 0 -c1) -=(1 0 -0 1) -Daher erzeugen Matrizen der Form Aλ,BtundCdie Gruppe SL2R.■ +Fall 4:a= 1, b= 0, d= 1 +A−1CBcC +(1 0 +c 1 +) += +(1 0 +0 1 +) +Daher erzeugen Matrizen der FormAλ, Bt und C die GruppeSL2R. ■ e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen. -•σ=(λ0 -0λ−1) -, alsoσ(z) =λ2z. Daraus ergeben sich die Situationen, die in +•σ = +(λ 0 +0 λ−1 +) +, also σ(z) = λ2z. Daraus ergeben sich die Situationen, die in Abbildung 4.22a und Abbildung 4.22b dargestellt sind. -xy -−1 0 1 2 3 4 5 6 70123 -mλ2mm+irλ2m+iλ2r +x +y +−1 0 1 2 3 4 5 6 70 +1 +2 +3 +m λ2m +m+ir +λ2m+iλ2r m+ 1 -(a) Fall 1xy -−1 0 1 2 3 40123 +(a) Fall 1 +x +y +−1 0 1 2 3 4 +0 +1 +2 +3 z -xλ2z +x +λ2z λ2x (b) Fall 2 (Strahlensatz) Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix -•Offensichtlich gilt die Aussage für σ=(1a -0 1) -•Sei nunσ=(0 1 -−1 0) -, alsoσ(z) =−1 +•Offensichtlich gilt die Aussage fürσ= +(1 a +0 1 +) +•Sei nunσ= +(0 1 +−1 0 +) +, alsoσ(z) = −1 z Bemerkung 69 -Zu hyperbolischen Geraden g1,g2gibt esσ∈PSL 2(R)mitσ(g1) =g2. +Zu hyperbolischen Geradeng1,g2 gibt esσ∈PSL2(R) mit σ(g1) = g2. 4.3. HYPERBOLISCHE GEOMETRIE · -xy -−1 0 101z=r·eiϕ +x +y +−1 0 1 +0 +1 +z= r·eiϕ 1 -z=1 -r·eiϕ +z = 1 +r ·eiϕ Abbildung 4.23: Inversion am Kreis -Beweis: Nach Proposition 4.9 (c) gibt es σmitσ(a1) =b1undσ(a2) =b2. Dann existiert -σ(g1) :=g2wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt. +Beweis: Nach Proposition 4.9 (c) gibt esσ mit σ(a1) = b1 und σ(a2) = b2. Dann existiert +σ(g1) := g2 wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt. Definition 65 -Seienz1,z2,z3,z4∈Cpaarweise verschieden. +Seien z1,z2,z3,z4 ∈C paarweise verschieden. Dann heißt -DV(z1,z2,z3,z4) :=z1−z4 +DV(z1,z2,z3,z4) := +z1−z4 z1−z2 z3−z4 -z3−z2=(z1−z4)·(z3−z2) -(z1−z2)·(z3−z4) -Doppelverhältnis vonz1,...,z 4. +z3−z2 += (z1 −z4) ·(z3 −z2) +(z1 −z2) ·(z3 −z4) +Doppelverhältnis von z1,...,z 4. Bemerkung 70 (Eigenschaften des Doppelverhältnisses) -a)DV(z1,...,z 4)∈C\{0,1} -b)DV(z1,z4,z3,z2) =1 +a) DV(z1,...,z 4) ∈C \{0,1 } +b) DV(z1,z4,z3,z2) = 1 DV(z1,z2,z3,z4) -c)DV(z3,z2,z1,z4) =1 +c) DV(z3,z2,z1,z4) = 1 DV(z1,z2,z3,z4) -d)DVist auch wohldefiniert, wenn eines der zi=∞oder wenn zwei der zigleich sind. -e)DV(0,1,∞,z4) =z4(Der Fallz4∈{0,1,∞}ist zugelassen). -f) Fürσ∈PSL 2(C)undz1,...,z 4∈C∪{∞}ist +d) DV ist auch wohldefiniert, wenn eines derzi = ∞oder wenn zwei derzi gleich sind. +e) DV(0,1,∞,z4) = z4 (Der Fallz4 ∈{0,1,∞} ist zugelassen). +f) Für σ∈PSL2(C) und z1,...,z 4 ∈C ∪{∞} ist DV(σ(z1),σ(z2),σ(z3),σ(z4)) = DV(z1,z2,z3,z4) -und fürσ(z) =1 -zgilt +und fürσ(z) = 1 +z gilt DV(σ(z1),σ(z2),σ(z3),σ(z4)) = DV(z1,z2,z3,z4) -g)DV(z1,z2,z3,z4)∈R∪{∞}⇔ z1,...,z 4liegen auf einer hyperbolischen Geraden. +g) DV(z1,z2,z3,z4) ∈R ∪{∞}⇔ z1,...,z 4 liegen auf einer hyperbolischen Geraden. Beweis: -a)DV(z1,...,z 4)̸= 0, dazipaarweise verschieden -DV(z1,...,z 4)̸= 1, da: +a) DV(z1,...,z 4) ̸= 0, dazi paarweise verschieden +DV(z1,...,z 4) ̸= 1, da: Annahme: DV(z1,...,z 4) = 1 -⇔(z1−z2)(z3−z4) = (z1−z4)(z3−z2) +⇔(z1 −z2)(z3 −z4) = (z1 −z4)(z3 −z2) 4.3. HYPERBOLISCHE GEOMETRIE -⇔z1z3−z2z3−z1z4+z2z4=z1z3−z3z4−z1z2+z2z4 -⇔z2z3+z1z4=z3z4+z1z2 -⇔z2z3−z3z4=z1z2−z1z4 -⇔z3(z2−z4) =z1(z2−z4) -⇔z3=z1oderz2=z4 -Allezisind paarweise verschieden ⇒Widerspruch ■ -b)DV(z1,z4,z3,z2) =(z1−z2)·(z3−z4) -(z1−z4)·(z3−z2)=1 +⇔z1z3 −z2z3 −z1z4 + z2z4 = z1z3 −z3z4 −z1z2 + z2z4 +⇔z2z3 + z1z4 = z3z4 + z1z2 +⇔z2z3 −z3z4 = z1z2 −z1z4 +⇔z3(z2 −z4) = z1(z2 −z4) +⇔z3 = z1 oder z2 = z4 +Alle zi sind paarweise verschieden⇒Widerspruch ■ +b) DV(z1,z4,z3,z2) = (z1−z2)·(z3−z4) +(z1−z4)·(z3−z2) = 1 DV(z1,z2,z3,z4) -c)DV(z3,z2,z1,z4) =(z3−z4)·(z1−z2) -(z3−z2)·(z1−z4)=1 +c) DV(z3,z2,z1,z4) = (z3−z4)·(z1−z2) +(z3−z2)·(z1−z4) = 1 DV(z1,z2,z3,z4) -d) Zwei der zidürfen gleich sein, da: -Fall 1z1=z4oderz3=z2 -In diesem Fall ist DV(z1,...,z 4) = 0 -Fall 2z1=z2oderz3=z4 -Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1,...,z 4) =∞gilt. -Fall 3z1=z3oderz2=z4 -Durch Einsetzen ergibt sich DV(z1,...,z 4) = 1. -Im Fall, dass ein zi=∞ist, ist entweder DV(0,1,∞,z4) = 0oder DV(0,1,∞,z4)±∞ -e)DV(0,1,∞,z4) =(0−z4)·(∞−1) -(0−1)·(∞−z4)=z4·(∞−1) -∞−z4=z4 +d) Zwei derzi dürfen gleich sein, da: +Fall 1 z1 = z4 oder z3 = z2 +In diesem Fall istDV(z1,...,z 4) = 0 +Fall 2 z1 = z2 oder z3 = z4 +Mit der Regel von L’Hospital folgt, dass in diesem FallDV(z1,...,z 4) = ∞gilt. +Fall 3 z1 = z3 oder z2 = z4 +Durch Einsetzen ergibt sichDV(z1,...,z 4) = 1. +Im Fall, dass einzi = ∞ist, ist entwederDV(0,1,∞,z4) = 0 oder DV(0,1,∞,z4) ±∞ +e) DV(0,1,∞,z4) = (0−z4)·(∞−1) +(0−1)·(∞−z4) = z4·(∞−1) +∞−z4 += z4 f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. -g)Seiσ∈PSL 2(C)mitσ(z1) = 0,σ(z2) = 1,σ(z3) =∞. Ein solches σexistiert, da man -drei Parameter von σwählen darf. +g) Sei σ∈PSL2(C) mit σ(z1) = 0, σ(z2) = 1, σ(z3) = ∞. Ein solchesσ existiert, da man +drei Parameter vonσ wählen darf. Bem. 70.f⇒ DV(z1,...,z 4) = DV(0,1,∞,σ(z4)) -⇒ DV(z1,...,z 4)∈R∪{∞} -⇔σ(z4)∈R∪{∞} -Behauptung folgt, weil σ−1(R∪∞)ein Kreis oder eine Gerade in Cist. +⇒ DV(z1,...,z 4) ∈R ∪{∞} +⇔σ(z4) ∈R ∪{∞} +Behauptung folgt, weilσ−1(R ∪∞) ein Kreis oder eine Gerade inC ist. Definition 66 -Fürz1,z2∈Hseigz1,z2die eindeutige hyperbolische Gerade durch z1undz2unda1,a2die -„Schnittpunkte“ von gz1,z2mitR∪{∞}. -Dann seidH(z1,z2) :=1 -2|ln DV(a1,z1,a2,z2)|und heiße hyperbolische Metrik . -Beh.:Fürz1,z2∈Hseigz1,z2die eindeutige hyperbolische Gerade durch z1undz2unda1,a2 -die „Schnittpunkte“ von gz1,z2mitR∪{∞}. +Für z1,z2 ∈H sei gz1,z2 die eindeutige hyperbolische Gerade durchz1 und z2 und a1,a2 die +„Schnittpunkte“ vongz1,z2 mit R ∪{∞} . +Dann seidH(z1,z2) := 1 +2 |ln DV(a1,z1,a2,z2)|und heißehyperbolische Metrik. +Beh.: Für z1,z2 ∈H sei gz1,z2 die eindeutige hyperbolische Gerade durchz1 und z2 und a1,a2 +die „Schnittpunkte“ vongz1,z2 mit R ∪{∞} . Dann gilt: 1 -2|ln DV(a1,z1,a2,z2)|=1 +2|ln DV(a1,z1,a2,z2)|= 1 2|ln DV(a2,z1,a1,z2)| Beweis: Wegen Bemerkung 70.c gilt: -DV(a1,z1,a2,z2) =1 +DV(a1,z1,a2,z2) = 1 DV(a2,z1,a1,z2) Außerdem gilt: -ln1 -x= lnx−1= (−1)·lnx=−lnx +ln 1 +x = ln x−1 = (−1) ·ln x= −ln x 4.3. HYPERBOLISCHE GEOMETRIE -Da der lnim Betrag steht, folgt direkt: +Da derln im Betrag steht, folgt direkt: 1 -2|ln DV(a1,z1,a2,z2)|=1 +2|ln DV(a1,z1,a2,z2)|= 1 2|ln DV(a2,z1,a1,z2)| -Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x-Achse im Doppelverhältnis +Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit derx-Achse im Doppelverhältnis genutzt werden. ■ -Beh.:Die hyperbolische Metrik ist eine Metrik auf H. +Beh.: Die hyperbolische Metrik ist eine Metrik aufH. Beweis: Wegen Bemerkung 70.f ist -d(z1,z2) :=d(σ(z1),σ(z2))mitσ(a1) = 0, σ(a2) =∞ -d. h.σ(gz1,z2) =iR(imaginäre Achse). -also gilt o. B. d. A. z1=iaundz2=ibmita,b∈Runda0mitγ(0) = -sundγ′(0) =x} +c) TsS = {x ∈R3|∃parametrisierte Kurveγ : [−ε,+ε] →S für einε >0 mit γ(0) = +s und γ′(0) = x} Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. -d)Seix∈TsS,γ: [−ε,+ε]→Seine parametrisierte Kurve mit ε >0undγ′(0) =s, -sodassγ′(0) =xgilt. Daγ(t)∈Sfür allet∈[−ε,ε], istf◦γ= 0 -⇒0 = (f◦γ)′(0) =⟨grad(f)(γ(0)),γ′(0)⟩ -⇒TsS⊆grad(f)(s)⊥ -dim=2= = = =⇒TsS= (grad(f)(s))⊥ +d) Sei x ∈TsS,γ : [−ε,+ε] →S eine parametrisierte Kurve mitε >0 und γ′(0) = s, +sodass γ′(0) = x gilt. Daγ(t) ∈S für allet∈[−ε,ε], istf ◦γ = 0 +⇒0 = (f ◦γ)′(0) = ⟨grad(f)(γ(0)),γ′(0)⟩ +⇒TsS ⊆grad(f)(s)⊥ +dim=2= = = =⇒TsS = (grad(f)(s))⊥ Definition 72 -a)EinNormalenfeld auf der regulären Fläche S⊆R3ist eine Abbildung n:S→S2⊆ -R3mitn(s)∈TsS⊥für jedess∈S. -b)Sheißtorientierbar , wenn es ein stetiges Normalenfeld auf Sgibt. -Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden. +a) Ein Normalenfeld auf der regulären FlächeS ⊆R3 ist eine Abbildungn: S →S2 ⊆ +R3 mit n(s) ∈TsS⊥für jedess∈S. +b) S heißtorientierbar, wenn es ein stetiges Normalenfeld aufS gibt. +Manchmal wird zwischen einemNormalenfeld und einemEinheitsnormalenfeld unterschieden. Im Folgenden werden diese Begriffe jedoch synonym benutzt. Bemerkung 74 (Eigenschaften von Normalenfeldern) -a) Ein Normalenfeld auf Sist genau dann stetig, wenn es glatt ist (also C∞). -b)Zu jedems∈Sgibt es eine Umgebung V⊆R3vonsund eine lokale Parametrisierung -F:U→VvonSums, sodass auf F(U) =V∩Sein stetiges Normalenfeld existiert. -c)Sist genau dann orientierbar, wenn es einen differenzierbaren Atlas von Saus lokalen -Parametrisierungen Fi:Ui→Vi, i∈Igibt, sodass für alle i,j∈Fund alle -s∈Vi∩Vj∩Sgilt: -det(DsVi→Vj -Fj◦F−1 -i -∈R3×3)>0 +a) Ein Normalenfeld aufS ist genau dann stetig, wenn es glatt ist (alsoC∞). +b) Zu jedems∈S gibt es eine UmgebungV ⊆R3 von sund eine lokale Parametrisierung +F : U →V von S um s, sodass aufF(U) = V ∩S ein stetiges Normalenfeld existiert. +c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas vonS aus lokalen +Parametrisierungen Fi : Ui → Vi, i ∈ I gibt, sodass für alle i,j ∈ F und alle +s∈Vi ∩Vj ∩S gilt: +det(Ds +Vi→Vj +   +Fj ◦F−1 +i   +∈R3×3 +) >0 Beweis: Wird hier nicht geführt. Beispiel 46 (Normalenfelder) -1)S=S2,n1=idS2ist ein stetiges Normalenfeld. -Auchn2=−idS2ist ein stetiges Normalenfeld. -2)S=Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Normalenfeld, +1) S = S2, n1 = idS2 ist ein stetiges Normalenfeld. +Auch n2 = −idS2 ist ein stetiges Normalenfeld. +2) S = Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Normalenfeld, aber kein stetiges Normalenfeld. 5.3. GAUSS-KRÜMMUNG Abbildung 5.1: Möbiusband 5.3 Gauß-Krümmung Bemerkung 75 -SeiSeine reguläre Fläche, s∈S,n(s)ist ein Normalenvektor in s,x∈TsS,∥x∥= 1. -SeiEder vonxundn(s)aufgespannte 2-dimensionale Untervektorraum von R3. -Dann gibt es eine Umgebung V⊆R3vons, sodass -C:= (s+E)∩S∩V -das Bild einer durch Bogenlänge parametrisierten Kurve γ: [−ε,ε]→Senthält mit γ(0) =s -undγ′(0) =x. +Sei S eine reguläre Fläche,s∈S, n(s) ist ein Normalenvektor ins, x∈TsS, ∥x∥= 1. +Sei E der vonx und n(s) aufgespannte 2-dimensionale Untervektorraum vonR3. +Dann gibt es eine UmgebungV ⊆R3 von s, sodass +C := (s+ E) ∩S∩V +das Bild einer durch Bogenlänge parametrisierten Kurveγ : [−ε,ε] →S enthält mitγ(0) = s +und γ′(0) = x. Beweis: „Satz über implizite Funktionen“1 Definition 73 -In der Situation aus Bemerkung 75 heißt die Krümmung κγ(0)der Kurveγin der Ebene -(s+E)im PunktsdieNormalkrümmung vonSinsin Richtung x=γ′(0). -Man schreibt: κNor(s,x) :=κγ(0) +In der Situation aus Bemerkung 75 heißt die Krümmungκγ(0) der Kurveγ in der Ebene +(s+ E) im Punkts die Normalkrümmung von S in s in Richtungx= γ′(0). +Man schreibt:κNor(s,x) := κγ(0) Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt. Beispiel 47 (Gauß-Krümmung) -1)S=S2=V(X2+Y2+Z2−1)ist die Kugel um den Ursprung mit Radius 1, n=id, -s= (0,0,1),x= (1,0,0) -⇒E=R·x+R·n(s)(x,z-Ebene) -C=E∩Sist Kreislinie -κNor(s,x) =1 -r= 1 -2)S=V(X2+Z2−1)⊆R3ist ein Zylinder (siehe Abbildung 5.2a). s= (1,0,0) -x1= (0,1,0)⇒E1=R·e1+R·e2(x,y-Ebene) -S∩E1=V(X2+Y2−1)∩E, Kreislinie in E -⇒κNor(s,x1) =±1 -x2= (0,0,1),E2=R·e1+R·e3(x,z-Ebene) -1Siehe z. B. https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II +1) S = S2 = V(X2 + Y2 + Z2 −1) ist die Kugel um den Ursprung mit Radius 1,n= id, +s= (0,0,1), x= (1,0,0) +⇒E = R ·x+ R ·n(s) (x,z-Ebene) +C = E∩S ist Kreislinie +κNor(s,x) = 1 +r = 1 +2) S = V(X2 + Z2 −1) ⊆R3 ist ein Zylinder (siehe Abbildung 5.2a).s= (1,0,0) +x1 = (0,1,0) ⇒E1 = R ·e1 + R ·e2 (x,y-Ebene) +S∩E1 = V(X2 + Y2 −1) ∩E, Kreislinie inE +⇒κNor(s,x1) = ±1 +x2 = (0,0,1),E2 = R ·e1 + R ·e3 (x,z-Ebene) +1Siehe z. B.https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II 5.3. GAUSS-KRÜMMUNG -V∩E2∩S={ -(1,0,z)∈R3⏐⏐z∈R} +V ∩E2 ∩S = +{ +(1,0,z) ∈R3 ⏐⏐z∈R +} ist eine Gerade ⇒κNor(s,x2) = 0 -3)S=V(X2−Y2−Z),s= (0,0,0)(Hyperbolisches Paraboloid, siehe Abbildung 5.2b) -x1= (1,0,0),n(s) = (0,0,1) -x2= (0,1,0) +3) S = V(X2 −Y2 −Z), s= (0,0,0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b) +x1 = (1,0,0), n(s) = (0,0,1) +x2 = (0,1,0) κNor(s,x1) = 2 -κNor(s,x2) =−2 -−1.5−1−0.500.511.5 -−101012345 -xyz -(a)S=V(X2+Z2−1)−2−1.5−1−0.500.511.52 -−2−1012−202 -xyz +κNor(s,x2) = −2 +−1.5 −1 −0.5 0 0.5 1 1.5−1 +0 +1 +0 +1 +2 +3 +4 +5 +x +y +z +(a) S = V(X2 + Z2 −1) +−2 −1.5 −1 −0.5 0 0.5 1 1.5 2 +−2 +−1 +0 +1 +2 +−2 +0 +2 +x +y +z −4−2024f(x,y) -(b)S=V(X2−Y2−Z) +(b) S = V(X2 −Y2 −Z) Abbildung 5.2: Beispiele für reguläre Flächen Definition 74 -SeiS⊆R3eine reguläre Fläche, s∈Sundnein stetiges Normalenfeld auf S. -γ: [−ε,ε]→Seine nach Bogenlänge parametrisierte Kurve ( ε >0) mitγ(0) =sund -γ′′(0)̸= 0. -Sein(0) :=γ′′(0) +Sei S ⊆R3 eine reguläre Fläche,s∈S und n ein stetiges Normalenfeld aufS. +γ : [−ε,ε] →S eine nach Bogenlänge parametrisierte Kurve (ε >0) mit γ(0) = s und +γ′′(0) ̸= 0. +Sei n(0) := γ′′(0) ∥γ′′(0)∥. Zerlege -n(0) =n(0)t+n(0)⊥mitn(0)t∈TsSundn(0)⊥∈(TsS)⊥ -Dann istn(0)⊥=⟨n(0),n(s)⟩·n(s) -κNor(s,γ) :=⟨γ′′(0),n(s)⟩dieNormalkrümmung . +n(0) = n(0)t + n(0)⊥mit n(0)t ∈TsS und n(0)⊥∈(TsS)⊥ +Dann istn(0)⊥= ⟨n(0),n(s)⟩·n(s) +κNor(s,γ) := ⟨γ′′(0),n(s)⟩die Normalkrümmung. Bemerkung 76 -Seiγ(t) =γ(−t),t∈[−ε,ε]. Dann istκNor(s,γ) =κNor(s,γ). -Beweis:γ′′(0) =γ′′(0), daγ′(0) =−γ′(0). -Es gilt:κNor(s,γ)hängt nur von|γ′(0)|ab und ist gleich κNor(s,γ′(0)). +Sei γ(t) = γ(−t), t∈[−ε,ε]. Dann istκNor(s,γ) = κNor(s,γ). +Beweis: γ′′(0) = γ′′(0), daγ′(0) = −γ′(0). +Es gilt:κNor(s,γ) hängt nur von|γ′(0)|ab und ist gleichκNor(s,γ′(0)). Bemerkung 77 -SeiSeine reguläre Fläche und n=n(s)ein Normalenvektor an Sins. -SeiT1 -sS={x∈TsS|∥x∥= 1}∼=S1. Dann ist +Sei S eine reguläre Fläche undn= n(s) ein Normalenvektor anS in s. +Sei T1 +sS = {x∈TsS |∥x∥= 1 }∼= S1. Dann ist κn -Nor(s) :T1 -sS→R, x↦→κNor(s,x) -eine glatte Funktion und Bildκn -Nor(s)ist ein abgeschlossenes Intervall. +Nor(s) : T1 +sS →R, x ↦→κNor(s,x) +eine glatte Funktion undBild κn +Nor(s) ist ein abgeschlossenes Intervall. Definition 75 -SeiSeine reguläre Fläche und n=n(s)ein Normalenvektor an Sins. +Sei S eine reguläre Fläche undn= n(s) ein Normalenvektor anS in s. 5.3. GAUSS-KRÜMMUNG -a)κn -1(s) : = min{ +a) κn +1 (s) : = min +{ κn -Nor(s,x)⏐⏐x∈T1 -sS} +Nor(s,x) +⏐⏐x∈T1 +sS +} und κn -2(s) : = max{ +2 (s) : = max +{ κn -Nor(s,x)⏐⏐x∈T1 -sS}heißenHauptkrümmungen vonSins. -b)K(s) :=κn -1(s)·κn -2(s)heißtGauß-Krümmung vonSins. +Nor(s,x) +⏐⏐x∈T1 +sS +} +heißenHauptkrümmungenvon S in s. +b) K(s) := κn +1 (s) ·κn +2 (s) heißtGauß-Krümmungvon S in s. Bemerkung 78 -Ersetzt man ndurch−n, so gilt: +Ersetzt mann durch −n, so gilt: κ−n -Nor(s,x) =−κn -Nor(x)∀x∈T1 +Nor(s,x) = −κn +Nor(x) ∀x∈T1 sS ⇒κ−n -1(s) =−κn -2(s) +1 (s) = −κn +2 (s) κ−n -2(s) =−κn -1(s) -undK−n(s) =Kn(s) =:K(s) +2 (s) = −κn +1 (s) +und K−n(s) = Kn(s) =: K(s) Beispiel 48 -1)S=S2. Dann istκ1(s) =κ2(s) =±1∀s∈S2 +1) S = S2. Dann istκ1(s) = κ2(s) = ±1 ∀s∈S2 ⇒K(s) = 1 2) Zylinder: -κ1(s) = 0,κ2(s) = 1⇒K(s) = 0 +κ1(s) = 0,κ2(s) = 1 ⇒K(s) = 0 3) Sattelpunkt auf hyperbolischem Paraboloid: -κ1(s)<0,κ2(s) = 0→K(s)<0 -4)S=Torus. Siehe Abbildung 5.3 -s1s2 +κ1(s) <0,κ2(s) = 0 →K(s) <0 +4) S = Torus. Siehe Abbildung 5.3 +s1 +s2 s3 -Abbildung 5.3: K(s1)>0,K(s2) = 0,K(s3)<0 +Abbildung 5.3:K(s1) >0, K(s2) = 0, K(s3) <0 Bemerkung 79 -SeiSeine reguläre Fläche, s∈Sein Punkt. +Sei S eine reguläre Fläche,s∈S ein Punkt. 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM -a) IstK(s)>0, so liegtSin einer Umgebung von sganz auf einer Seite von TsS+s. -b) IstK(s)<0, so schneidet jede Umgebung von sinSbeide Seiten von TsS+s. +a) Ist K(s) >0, so liegtS in einer Umgebung vons ganz auf einer Seite vonTsS+ s. +b) Ist K(s) <0, so schneidet jede Umgebung vons in S beide Seiten vonTsS+ s. 5.4 Erste und zweite Fundamentalform -SeiS⊆R3eine reguläre Fläche, s∈S,TsSdie Tangentialebene an SinsundF:U→Veine -lokale Parametrisierung von Sums. Weiter sei p:=F−1(s). +Sei S ⊆R3 eine reguläre Fläche,s∈S, TsS die Tangentialebene anS in s und F : U →V eine +lokale Parametrisierung vonS um s. Weiter seip:= F−1(s). Definition 76 -SeiIS∈R2×2definiert als -IS: =(g1,1(s)g1,2(s) -g1,2(s)g2,2(s)) -=(E(s)F(s) -F(s)G(s)) -mitgi,j=gs(DpF(ei),DpF(ej)) -=⟨∂F -∂ui(p),∂F -∂uj(p)⟩i,j∈{1,2} -Die Matrix ISheißterste Fundamentalform vonSbzgl. der Parametrisierung F. +Sei IS ∈R2×2 definiert als +IS : = +(g1,1(s) g1,2(s) +g1,2(s) g2,2(s) +) += +(E(s) F(s) +F(s) G(s) +) +mit gi,j = gs(DpF(ei),DpF(ej)) += ⟨∂F +∂ui +(p), ∂F +∂uj +(p)⟩ i,j ∈{1,2 } +Die MatrixIS heißterste Fundamentalformvon S bzgl. der ParametrisierungF. Bemerkung 80 -a)Die Einschränkung des Standardskalarproduktes des R3aufTsSmachtTsSzu einem +a) Die Einschränkung des Standardskalarproduktes desR3 auf TsS macht TsS zu einem euklidischen Vektorraum. -b){DpF(e1),DpF(e2)}ist eine Basis von TsS. -c)Bzgl. der Basis{DpF(e1),DpF(e2)}hat das Standardskalarprodukt aus Bemerkung - 80.a die Darstellungsmatrix IS. -d)gi,j(s)ist eine differenzierbare Funktion von s. +b) {DpF(e1),DpF(e2) }ist eine Basis vonTsS. +c) Bzgl. der Basis {DpF(e1),DpF(e2) }hat das Standardskalarprodukt aus Bemerkung + 80.a die DarstellungsmatrixIS. +d) gi,j(s) ist eine differenzierbare Funktion vons. Bemerkung 81 -det(IS) =∂F -∂u1(p)×∂F -∂u2(p)2 -Beweis: Sei∂F -∂u1(p) = -x1 +det(IS) = + +∂F +∂u1 +(p) ×∂F +∂u2 +(p) + +2 +Beweis: Sei ∂F +∂u1 +(p) = + + +x1 x2 -x3 -,∂F -∂u2(p) = -y1 +x3 + +, ∂F +∂u2 +(p) = + + +y1 y2 -y3 +y3 +  -Dann ist∂F -∂u1(p)×∂F -∂u2(p) = -z1 +Dann ist ∂F +∂u1 +(p) ×∂F +∂u2 +(p) = + + +z1 z2 -z3 +z3 + mit -z1=x2y3−x3y2 -z2=x3y1−x1y3 -z3=x1y2−x2y1 +z1 = x2y3 −x3y2 +z2 = x3y1 −x1y3 +z3 = x1y2 −x2y1 ⇒∥∂F -∂u1(p)×∂F -∂u2(p)∥=z2 -1+z2 -2+z2 +∂u1 +(p) ×∂F +∂u2 +(p)∥= z2 +1 + z2 +2 + z2 3 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM -det(IS) =g1,1g2,2−g2 +det(IS) = g1,1g2,2 −g2 1,2 -=⟨ -x1 += +⟨ + +x1 x2 -x3 -, -x1 +x3 + +, + + +x1 x2 -x3 -⟩⟨ -y1 +x3 + + +⟩⟨ + +y1 y2 -y3 -, -y1 +y3 + +, + + +y1 y2 -y3 -⟩ -−⟨ -x1 +y3 + + +⟩ +− +⟨ + +x1 x2 -x3 -, -y1 +x3 + +, + + +y1 y2 -y3 -⟩2 +y3 + + +⟩2 = (x2 -1+x2 -2+x2 +1 + x2 +2 + x2 3)(y2 -1+y2 -2+y2 -3)−(x1y1+x2y2+x3y3)2 +1 + y2 +2 + y2 +3) −(x1y1 + x2y2 + x3y3)2 Definition 77 -a)Das Differential dA=√ -det(I)du1du2heißtFlächenelement vonSbzgl. der Parametrisierung +a) Das DifferentialdA= +√ +det(I)du1du2 heißtFlächenelementvon S bzgl. der Parametrisierung F. -b) Für eine Funktion f:V→Rheißt +b) Für eine Funktionf : V →R heißt +∫ +V +fdA:= ∫ -VfdA:=∫ -Uf(F(u1,u2) -=:s)√ -detI(s)du1du2 -derWert des Integrals vonfüberV, falls das Integral rechts existiert. +U +f(F(u1,u2)   +=:s +) +√ +det I(s)du1du2 +der Wert des Integralsvon f über V, falls das Integral rechts existiert. Bemerkung 82 -a)∫ -VfdAist unabhängig von der gewählten Parametrisierung. -b) Seif:S→Reine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist. -Dann ist∫ -SfdAwohldefiniert, falls (z. B.) Skompakt ist. +a) +∫ +V fdA ist unabhängig von der gewählten Parametrisierung. +b) Sei f : S →R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist. +Dann ist +∫ +SfdA wohldefiniert, falls (z. B.)S kompakt ist. Etwa: ∫ -SfdA=n∑ -i=1∫ -VifdA -−∑ -i̸=j∫ -Vi∩VjfdA -+∑ -i,j,k∫ -Vi∩Vj∩VkfdA +S +fdA= +n∑ +i=1 +∫ +Vi +fdA +− +∑ +i̸=j +∫ +Vi∩Vj +fdA ++ +∑ +i,j,k +∫ +Vi∩Vj∩Vk +fdA −... Beweis: a) Mit Transformationsformel. b) Ist dem Leser überlassen. Proposition 5.1 -SeiS⊆R3eine reguläre, orientierbare Fläche mit glatten Normalenfeld n:S→S2. +Sei S ⊆R3 eine reguläre, orientierbare Fläche mit glatten Normalenfeldn : S →S2. Dann gilt: -a)ninduziert für jedes s∈Seine lineare Abbildung dsn:TsS→Tn(s)S2durch -dsn(x) =d -dtn(s„+“tx -Soll auf Fläche Sbleiben)⏐⏐⏐ +a) n induziert für jedess∈S eine lineare Abbildungdsn: TsS →Tn(s)S2 durch +dsn(x) = d +dtn(s„+“tx   +Soll auf FlächeS bleiben +) +⏐⏐⏐ t=0 -Die Abbildung dsnheißtWeingarten-Abbildung +Die Abbildungdsn heißtWeingarten-Abbildung 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM -b)Tn(s)S2=TsS. -c)dsnist ein Endomorphismus von TsS. -d)dsnist selbstadjungiert bzgl. des Skalarproduktes IS. -Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt. +b) Tn(s)S2 = TsS. +c) dsn ist ein Endomorphismus vonTsS. +d) dsn ist selbstadjungiert bzgl. des SkalarproduktesIS. +Hinweis: Die Weingarten-Abbildung wird auchFormoperator genannt. 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM Beweis: a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. -b)Tn(S)S2=⟨n(s)⟩⊥=TsS -c) Wegen Proposition 5.1 (a) ist dsnein Homomorphismus. -d) Zu zeigen:∀x,y∈IsS:⟨x,dsn(y)⟩=⟨dsn(x),y⟩ +b) Tn(S)S2 = ⟨n(s)⟩⊥= TsS +c) Wegen Proposition 5.1 (a) istdsn ein Homomorphismus. +d) Zu zeigen:∀x,y ∈IsS : ⟨x,dsn(y)⟩= ⟨dsn(x),y⟩ Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die Basisvektoren zu zeigen. -Seixi=DpF(ei) =∂F -∂ui(p)i= 1,2 -Beh.:⟨xi,dsn(xj)⟩=⟨∂2F -∂ui∂uj(p),dsn(xi)⟩ -⇒⟨∂2F -∂ui∂uj(p),dsn(xi)⟩=⟨xj,dsn(xi)⟩ -Bew.: 0 =⟨∂F -∂u(p+tej),n(p+tej)⟩ -⇒0 =d -dt( +Sei xi = DpF(ei) = ∂F +∂ui +(p) i= 1,2 +Beh.: ⟨xi,dsn(xj)⟩= ⟨ ∂2F +∂ui∂uj +(p),dsn(xi)⟩ +⇒⟨ ∂2F +∂ui∂uj +(p),dsn(xi)⟩= ⟨xj,dsn(xi)⟩ +Bew.: 0 = ⟨∂F +∂u(p+ tej),n(p+ tej)⟩ +⇒0 = d +dt +( ⟨∂F -∂u(p+tej),n(p+tej)⟩)⏐⏐⏐ +∂u(p+ tej),n(p+ tej)⟩ +)⏐⏐⏐ t=0 -=⟨d -dt∂F -∂ui(p+tej) - += ⟨d +dt +∂F +∂ui +(p+ tej) +   ∂2F -∂uj∂ui(p)⏐⏐⏐ -t=0,n(s)⟩+⟨xi,dsnDpF(ej) -xj⟩ +∂uj∂ui +(p) +⏐⏐⏐ +t=0 +,n(s)⟩+ ⟨xi,dsnDpF(ej)   +xj +⟩ Definition 78 -Die durch−dsndefinierte symmetrische Bilinearform auf TsSheißtzweite FundamentalformvonSinsbzgl.F. - -Man schreibt: IIs(x,y) =⟨−dsn(x),y⟩=Is(−dsn(x),y) +Die durch−dsndefinierte symmetrische Bilinearform aufTsS heißtzweite Fundamentalform + von S in s bzgl. F. +Man schreibt:IIs(x,y) = ⟨−dsn(x),y⟩= Is(−dsn(x),y) Bemerkung 83 -Bezüglich der Basis {x1,x2}vonTsShatIIsdie Darstellungsmatrix +Bezüglich der Basis{x1,x2 }von TsS hat IIs die Darstellungsmatrix (h(s) -i,j)i,j=1,2mithi,j(s) =⟨∂2F -∂ui∂uj(p),n(s)⟩ +i,j)i,j=1,2 mit hi,j(s) = ⟨ ∂2F +∂ui∂uj +(p),n(s)⟩ Proposition 5.2 -Seiγ: [−ε,ε]→Seine nach Bogenlänge parametrisierte Kurve mit γ(0) =s. Dann gilt: -κNor(s,γ) =IIs(γ′(0),γ′(0)) -Beweis: Nach Definition 74 ist κNor(s,γ) =⟨γ′′(0),n(s)⟩. Nach Voraussetzung gilt -n(γ(t))⊥γ′(t)⇔⟨γ′′(0),n(s)⟩= 0 -Die Ableitung nach tergibt -0 =d +Sei γ : [−ε,ε] →S eine nach Bogenlänge parametrisierte Kurve mitγ(0) = s. Dann gilt: +κNor(s,γ) = IIs(γ′(0),γ′(0)) +Beweis: Nach Definition 74 istκNor(s,γ) = ⟨γ′′(0),n(s)⟩. Nach Voraussetzung gilt +n(γ(t)) ⊥γ′(t) ⇔⟨γ′′(0),n(s)⟩= 0 +Die Ableitung nacht ergibt +0 = d dt(⟨n(γ(t)),γ′(t)) -=⟨d -dtn(γ(t))⏐⏐⏐ -t=0,γ′(0)⟩ -+⟨n(s),γ′′(0)⟩ += +⟨d +dtn(γ(t)) +⏐⏐⏐ +t=0 +,γ′(0) +⟩ ++ ⟨n(s),γ′′(0)⟩ 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM -=⟨dsn(γ′(0)),γ′(0)⟩+κNor(s,γ) -=−IIs(γ′(0),γ′(0)) +κNor(s,γ) += ⟨dsn(γ′(0)),γ′(0)⟩+ κNor(s,γ) += −IIs(γ′(0),γ′(0)) + κNor(s,γ) Folgerung 5.3 Die beiden Definitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein: -κNor(s,γ) =κNor(s,γ′(0)) +κNor(s,γ) = κNor(s,γ′(0)) Satz 5.4 -SeiS⊆R3eine reguläre, orientierbare Fläche und s∈S. -a) Die Hauptkrümmungen κ1(s),κ2(s)sind die Eigenwerte von IIs. -b) Für die Gauß-Krümmung gilt: K(s) = det(IIs) +Sei S ⊆R3 eine reguläre, orientierbare Fläche unds∈S. +a) Die Hauptkrümmungenκ1(s),κ2(s) sind die Eigenwerte vonIIs. +b) Für die Gauß-Krümmung gilt:K(s) = det(IIs) Beweis: -a)IIsist symmetrisch, IsShat also eine Orthonormalbasis aus Eigenvektoren y1,y2von -IIs. Istx∈TsS,∥x∥= 1, so gibt es ϕ∈[0,2π)mitx= cosϕ·y1+ sinϕ·y2. -Seienλ1,λ2die Eigenwerte von IIs, alsoIIs(yi,yi) =λi. Dann gilt: -IIs(x,x) = cos2ϕλ1+ sin2ϕλ2 -= (1−sin2ϕ)λ1+ sin2ϕλ2 -=λ1+ sin2ϕ(λ2−λ1)≥λ1 -= cos2ϕ+ (1−cos2ϕ)λ2 -=λ2−cos2ϕ(λ2−λ1)≤λ2 -Prop. 5.2= = = = =⇒λ1= min{ -κNor(s,x)⏐⏐x∈T1 -sS} -λ2= max{ -κNor(s,x)⏐⏐x∈T1 -sS} +a) IIs ist symmetrisch,IsS hat also eine Orthonormalbasis aus Eigenvektoreny1,y2 von +IIs. Istx∈TsS, ∥x∥= 1, so gibt esϕ∈[0,2π) mit x= cos ϕ·y1 + sinϕ·y2. +Seien λ1,λ2 die Eigenwerte vonIIs, alsoIIs(yi,yi) = λi. Dann gilt: +IIs(x,x) = cos2 ϕλ1 + sin2 ϕλ2 += (1 −sin2 ϕ)λ1 + sin2 ϕλ2 += λ1 + sin2 ϕ(λ2 −λ1) ≥λ1 += cos2 ϕ+ (1 −cos2 ϕ)λ2 += λ2 −cos2 ϕ(λ2 −λ1) ≤λ2 +Prop. 5.2 += = = = =⇒λ1 = min +{ +κNor(s,x) +⏐⏐x∈T1 +sS +} +λ2 = max +{ +κNor(s,x) +⏐⏐x∈T1 +sS +} Satz 5.5 (Satz von Gauß-Bonnet) -SeiS⊆R3eine kompakte orientierbare reguläre Fläche. Dann gilt: +Sei S ⊆R3 eine kompakte orientierbare reguläre Fläche. Dann gilt: ∫ -SK(s)dA= 2πχ(S) -Dabei istχ(S)die Euler-Charakteristik von S. +S +K(s)dA= 2πχ(S) +Dabei istχ(S) die Euler-Charakteristik vonS. Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen werden. Lösungen der Übungsaufgaben Lösung zu Aufgabe 1 -Teilaufgabe a) Es gilt: -(i)∅,X∈TX. -(ii)TXist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U1,U2∈ -TX:U1∩U2∈TX. -(iii)Auch unter beliebigen Vereinigungen ist TXabgeschlossen, d. h. es gilt für eine -beliebige Indexmenge Iund alleUi∈TXfür allei∈I:⋃ -i∈IUi∈TX -Also ist (X,TX)ein topologischer Raum. -Teilaufgabe b) Wählex= 1,y= 0. Dann gilt x̸=yund die einzige Umgebung von x -istX. Day= 0∈Xkönnen also xundynicht durch offene Mengen getrennt werden. -(X,TX)ist also nicht hausdorffsch. -Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X,TX)nach -(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass (X,TX) +Teilaufgabe a)Es gilt: +(i) ∅,X ∈TX. +(ii) TX ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alleU1,U2 ∈ +TX : U1 ∩U2 ∈TX. +(iii) Auch unter beliebigen Vereinigungen istTX abgeschlossen, d. h. es gilt für eine +beliebige IndexmengeI und alleUi ∈TX für allei∈I : ⋃ +i∈I Ui ∈TX +Also ist(X,TX) ein topologischer Raum. +Teilaufgabe b)Wähle x= 1,y = 0. Dann giltx̸= y und die einzige Umgebung vonx +ist X. Day = 0 ∈X können alsox und y nicht durch offene Mengen getrennt werden. +(X,TX) ist also nicht hausdorffsch. +Teilaufgabe c)Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da(X,TX) nach +(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass(X,TX) kein metrischer Raum sein kann. Lösung zu Aufgabe 2 Teilaufgabe a) -Beh.:∀a∈Z:{a}ist abgeschlossen. -Seia∈Zbeliebig. Dann gilt: +Beh.: ∀a∈Z : {a}ist abgeschlossen. +Sei a∈Z beliebig. Dann gilt: Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de schicken. Teilaufgabe b) -Beh.:{−1,1}ist nicht offen -Bew.:durch Widerspruch -Annahme:{−1,1}ist offen. -Dann gibt es T⊆B, sodass⋃ -M∈TM={−1,1}. Aber alleU∈Bhaben unendlich viele -Elemente. Auch endlich viele Schnitte von Elementen in Bhaben unendlich viele Elemente -⇒keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒{− 1,1}ist +Beh.: {−1,1 }ist nicht offen +Bew.: durch Widerspruch +Annahme: {−1,1 }ist offen. +Dann gibt esT ⊆B, sodass⋃ +M∈T M = {−1,1 }. Aber alleU ∈B haben unendlich viele +Elemente. Auch endlich viele Schnitte von Elementen inB haben unendlich viele Elemente +⇒keine endliche nicht-leere Menge kann in dieser Topologie offen sein⇒{− 1,1 }ist nicht offen. ■ Teilaufgabe c) -Beh.:Es gibt unendlich viele Primzahlen. +Beh.: Es gibt unendlich viele Primzahlen. Lösungen der Übungsaufgaben -Bew.:durch Widerspruch -Annahme: Es gibt nur endlich viele Primzahlen p∈P +Bew.: durch Widerspruch +Annahme: Es gibt nur endlich viele Primzahlenp∈P Dann ist -Z\{− 1,+1}FS d. Arithmetik=⋃ -p∈PU0,p -endlich. Das ist ein Widerspruch zu |Z|ist unendlich und |{−1,1}|ist endlich. ■ +Z \{−1,+1 }FS d. Arithmetik= +⋃ +p∈P +U0,p +endlich. Das ist ein Widerspruch zu|Z|ist unendlich und|{−1,1 }|ist endlich. ■ Lösung zu Aufgabe 3 -(a)Beh.:Die offenen Mengen von Psind Vereinigungen von Mengen der Form +(a) Beh.: Die offenen Mengen vonP sind Vereinigungen von Mengen der Form +∏ +j∈J +Uj × ∏ -j∈JUj×∏ -i∈N,i̸=jPi -wobeiJ⊆Nendlich und Uj⊆Pjoffen ist. +i∈N,i̸=j +Pi +wobei J ⊆N endlich undUj ⊆Pj offen ist. Beweis: Nach Definition der Produkttopologie bilden Mengen der Form ∏ -i∈JUj×∏ -i∈N\JPi -wobeiJ⊆Nendlich und Uj⊆Pjoffen∀j∈Jeine Basis der Topologie. -Damit sind die offenen Mengen von PVereinigungen von Mengen der obigen +i∈J +Uj × +∏ +i∈N\J +Pi +wobei J ⊆N endlich undUj ⊆Pj offen ∀j ∈J eine Basis der Topologie. +Damit sind die offenen Mengen vonP Vereinigungen von Mengen der obigen Form. ■ -(b)Beh.:Die Zusammenhangskomponenten von Psind alle einpunktig. -Beweis: Es seinenx,y∈Pundxsowieyliegen in der gleichen Zusammenhangskomponente - Z⊆P. DaZzusammenhängend ist und ∀i∈I:pi:P→Piist -stetig, istpi(Z)⊆Pizusammenhängend für alle i∈N. Die zusammenhängenden -Mengen von Pisind genau{0}und{1}, d. h. für alle i∈Ngilt entweder -pi(Z)⊆{0}oderpi(Z)⊆{1}. Es seizi∈{0,1}so, dasspi(Z)⊆{zi}für -allei∈N. Dann gilt also: +(b) Beh.: Die Zusammenhangskomponenten vonP sind alle einpunktig. +Beweis: Es seinenx,y ∈P und x sowie y liegen in der gleichen Zusammenhangskomponente + Z ⊆P. DaZ zusammenhängend ist und∀i∈I : pi : P →Pi ist +stetig, istpi(Z) ⊆Pi zusammenhängend für allei∈N. Die zusammenhängenden +Mengen von Pi sind genau {0 }und {1 }, d. h. für allei ∈N gilt entweder +pi(Z) ⊆{ 0 }oder pi(Z) ⊆{ 1 }. Es seizi ∈{ 0,1 }so, dasspi(Z) ⊆{ zi }für +alle i∈N. Dann gilt also: pi(x) -=xi=zi=pi(y) -=yi∀i∈N -Somit folgt: x=y ■ +=xi += zi = pi(y) +=yi +∀i∈N +Somit folgt:x= y ■ Lösung zu Aufgabe 4 -(a)Beh.: GLn(R)ist nicht kompakt. -Bew.: det:GLn(R)→R\{0}ist stetig. Außerdem ist det(GLn(R)) =R\{0} -nicht kompakt.22⇒GLn(R)ist nicht kompakt. ■ -(b)Beh.: SL1(R)ist nicht kompakt, für n>1istSLn(R)kompakt. -Bew.:FürSL1(R)gilt:SL1(R) ={ -A∈R1×1⏐⏐detA= 1} -=( -1)∼={1}.22⇒SL1(R) +(a) Beh.: GLn(R) ist nicht kompakt. +Bew.: det : GLn(R) →R \{0 }ist stetig. Außerdem istdet(GLn(R)) = R \{0 } +nicht kompakt.22⇒GLn(R) ist nicht kompakt. ■ +(b) Beh.: SL1(R) ist nicht kompakt, fürn> 1 ist SLn(R) kompakt. +Bew.: Für SL1(R) gilt: SL1(R) = +{ +A∈R1×1 ⏐⏐det A= 1 +} += +( +1 +)∼= {1 }. 22⇒SL1(R) ist kompakt. Lösungen der Übungsaufgaben -SLn(R)⊆GLn(R)lässt sich mit einer Teilmenge des Rn2identifizieren. Nach Satz 1.1 +SLn(R) ⊆GLn(R) lässt sich mit einer Teilmenge desRn2 +identifizieren. Nach Satz 1.1 sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Definiere -nun für für n∈N≥2,m∈N: -Am=diagn(m,1 +nun für fürn∈N≥2,m ∈N: +Am = diagn(m, 1 m,..., 1) -Dann gilt: detAm= 1, d. h.Am∈SLn(R), undAmist unbeschränkt, da ∥Am∥∞= +Dann gilt:det Am = 1, d. h.Am ∈SLn(R), undAm ist unbeschränkt, da∥Am∥∞= m−−−−→ -m→∞∞. ■ -(c)Beh.:P(R)ist kompakt. -Bew.:P(R)∼=Sn/x∼−x. Per Definition der Quotiententopologie ist die Klassenabbildung - stetig. Da Snals abgeschlossene und beschränkte Teilmenge des Rn+1kompakt -ist22⇒P (R)ist kompakt. ■ +m→∞ +∞. ■ +(c) Beh.: P(R) ist kompakt. +Bew.: P(R) ∼= Sn/x∼−x. Per Definition der Quotiententopologie ist die Klassenabbildung + stetig. DaSn als abgeschlossene und beschränkte Teilmenge desRn+1 kompakt +ist 22⇒P(R) ist kompakt. ■ Lösung zu Aufgabe 5 Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden. Definition 79 -Seien (G,∗)und(H,◦)Gruppen und ϕ:G→Heine Abbildung. -ϕheißtHomomorphismus , wenn -∀g1,g2∈G:ϕ(g1∗g2) =ϕ(g1)◦ϕ(g2) +Seien (G,∗) und (H,◦) Gruppen undϕ: G→H eine Abbildung. +ϕ heißtHomomorphismus, wenn +∀g1,g2 ∈G: ϕ(g1 ∗g2) = ϕ(g1) ◦ϕ(g2) gilt. Es folgt direkt: -1)SeiX=Rmit der Standarttopologie und ϕ1:idRundR= (R,+). Dann istϕ1ein +1) Sei X = R mit der Standarttopologie undϕ1 : idR und R = (R,+). Dann istϕ1 ein Gruppenhomomorphismus und ein Homöomorphismus. -2)SeiG= (Z,+)undH= (Z/3Z,+). Dann ist ϕ2:G→H,x↦→xmod 3ein -Gruppenhomomorphismus. Jedoch ist ϕ2nicht injektiv, also sicher kein Homöomorphismus. +2) Sei G = ( Z,+) und H = ( Z/3Z,+). Dann ist ϕ2 : G →H,x ↦→x mod 3 ein +Gruppenhomomorphismus. Jedoch istϕ2 nicht injektiv, also sicher kein Homöomorphismus. -3)SeiXein topologischer Raum. Dann ist idXein Homöomorphismus. Da keine -Verknüpfung auf Xdefiniert wurde, ist Xkeine Gruppe und daher auch kein Gruppenhomomorphismus. +3) Sei X ein topologischer Raum. Dann ist idX ein Homöomorphismus. Da keine +Verknüpfung aufX definiert wurde, istX keine Gruppe und daher auch kein Gruppenhomomorphismus. Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten verwendet. @@ -3536,300 +4255,325 @@ Lösung zu Aufgabe 6 Die Definition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf Seite 6. Definition 80 -Seien (G,∗)und(H,◦)Gruppen und ϕ:G→Heine Abbildung. -ϕheißtIsomorphismus , wennϕein bijektiver Homomorphismus ist. +Seien (G,∗) und (H,◦) Gruppen undϕ: G→H eine Abbildung. +ϕ heißtIsomorphismus, wennϕ ein bijektiver Homomorphismus ist. Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen Sinn und ein Isomorphismus benötigt eine Gruppenstruktur. Lösungen der Übungsaufgaben Lösung zu Aufgabe 7 -(a)Vor.:SeiMeine topologische Mannigfaltigkeit. -Beh.:Mist wegzusammehängend ⇔Mist zusammenhängend -Beweis: „⇒“: DaMinsbesondere ein topologischer Raum ist folgt diese Richtung +(a) Vor.:Sei M eine topologische Mannigfaltigkeit. +Beh.: M ist wegzusammehängend⇔M ist zusammenhängend +Beweis: „⇒“: DaM insbesondere ein topologischer Raum ist folgt diese Richtung direkt aus Bemerkung 23. -„⇐“: Seienx,y∈Mund -Z:={z∈M|∃Weg vonxnachz} +„⇐“: Seienx,y ∈M und +Z := {z∈M |∃Weg vonx nach z} Es gilt: -(i)Z̸=∅, daMlokal wegzusammenhängend ist -(ii)Zist offen, da Mlokal wegzusammenhängend ist -(iii)ZC:={˜z∈M|∄Weg vonxnach ˜z}ist offen -DaMeine Mannigfaltigkeit ist, existiert zu jedem ˜z∈ZCeine offene und -wegzusammenhängende Umgebung U˜z⊆M. -Es gilt sogar U˜z⊆ZC, denn gäbe es ein U˜z∋z∈Z, so gäbe es Wege γ2: -[0,1]→M,γ 2(0) =z,γ2(1) =xundγ1: [0,1]→M,γ 1(0) = ˜z,γ1(1) =z. +(i) Z ̸= ∅, daM lokal wegzusammenhängend ist +(ii) Z ist offen, daM lokal wegzusammenhängend ist +(iii) ZC := {˜z∈M |∄Weg vonx nach ˜z}ist offen +Da M eine Mannigfaltigkeit ist, existiert zu jedem˜z∈ZC eine offene und +wegzusammenhängende UmgebungU˜z ⊆M. +Es gilt sogarU˜z ⊆ZC, denn gäbe es einU˜z ∋z ∈Z, so gäbe es Wegeγ2 : +[0,1] →M,γ2(0) = z,γ2(1) = x und γ1 : [0,1] →M,γ1(0) = ˜z,γ1(1) = z. Dann wäre aber -γ: [0,1]→M, -γ(x) ={ -γ1(2x)falls0≤x≤1 +γ : [0,1] →M, +γ(x) = +{ +γ1(2x) falls 0 ≤x≤1 2 -γ2(2x−1)falls1 -2d(A,B). Nach §3 (i) gibt es C′∈AC+mitd(A,C′) =d(A,B) -⇒C′liegt zwischen AundC. -Es gilt∡ABC′<∡ABCund aus Aufgabe 9 (a) folgt: ∡ABC′=∡AC′B. -∠BC′Aist ein nicht anliegender Außenwinkel zu ∠BCABem. 66= = = = =⇒∡BC′A>∡BCA -⇒∡BCA <∡BC′A=∡ABC′<∡ABCSei umgekehrt∡ABC >∡BCA, kann -wegen 1. Teil von Aufgabe 9 (b) nicht d(A,B)>d(A,C)gelten. -Wegen Aufgabe 9 (a) kann nicht d(A,B) =d(A,C)gelten. -⇒d(A,B)d(A,B). Nach §3 (i) gibt esC′∈AC+ mit d(A,C′) = d(A,B) +⇒C′liegt zwischenA und C. +Es gilt∡ABC′<∡ABC und aus Aufgabe 9 (a) folgt:∡ABC′= ∡AC′B. +∠BC′A ist ein nicht anliegender Außenwinkel zu∠BCA Bem. 66= = = = =⇒∡BC′A> ∡BCA +⇒∡BCA <∡BC′A = ∡ABC′ < ∡ABC Sei umgekehrt∡ABC >∡BCA, kann +wegen 1. Teil von Aufgabe 9 (b) nichtd(A,B) >d(A,C) gelten. +Wegen Aufgabe 9 (a) kann nichtd(A,B) = d(A,C) gelten. +⇒d(A,B) 0}obere Halbebene -I= [0,1]⊊ REinheitsintervall -f:S1↪→R2Einbettung der Kreislinie in die Ebene -π1(X,x)Fundamentalgruppe im topologischen Raum Xumx∈X -Fix(f)Menge der Fixpunkte der Abbildung f -∥·∥ 2 2-Norm; Euklidische Norm ++,0 := {(x1,...,x n) ∈Rn |xn ≥0 } Halbraum +R×= R \{0 } Einheitengruppe vonR +C = {a+ ib|a,b ∈R } Komplexe Zahlen +P = {2,3,5,7,... } Primzahlen +H = {z∈C |ℑz >0 } obere Halbebene +I = [0,1] ⊊ R Einheitsintervall +f : S1 ↪→R2 Einbettung der Kreislinie in die Ebene +π1(X,x) Fundamentalgruppe im topologischen RaumX um x∈X +Fix(f) Menge der Fixpunkte der Abbildungf +∥·∥2 2-Norm; Euklidische Norm κ Krümmung κNor Normalenkrümmung -V(f) Nullstellenmenge von f2 +V(f) Nullstellenmenge vonf2 Krümmung -DpF:R2→R3Lineare Abbildung mit Jacobi-Matrix in p(siehe Seite 89) -TsS Tangentialebene an S⊆R3durchs∈S +DpF : R2 →R3 Lineare Abbildung mit Jacobi-Matrix inp (siehe Seite 89) +TsS Tangentialebene anS ⊆R3 durch s∈S dsn(x) Weingarten-Abbildung -2vonVanishing Set +2von Vanishing Set Stichwortverzeichnis Abbildung affine, 107 @@ -3842,7 +4586,7 @@ Abschluss, 3 Abstand, 86 Abstandsaxiom, 65 Achterknoten, 20 -Aktion, sieheGruppenoperation +Aktion, siehe Gruppenoperation Anordnungsaxiome, 66 Atlas, 24 Außenwinkel, 70 @@ -3868,18 +4612,19 @@ begleitendes, 89 Ebene euklidische, 64 Eigenvektor, 107 -Eigenwert, 107einfach zusammenhängend, 49 +Eigenwert, 107 +einfach zusammenhängend, 49 Einheitsnormalenfeld, 90 -Euler-Charakteristik, sieheEulerzahl +Euler-Charakteristik, siehe Eulerzahl Eulersche Polyederformel, 38 Eulerzahl, 36 Färbbarkeit, 21 -Faser, sieheUrbild +Faser, siehe Urbild Fläche orientierbare, 90 reguläre, 30 Flächenelement, 95 -Formoperator, sieheWeingarten-Abbildung +Formoperator, siehe Weingarten-Abbildung Fundamentalform erste, 94 zweite, 97 @@ -3896,7 +4641,7 @@ spezielle lineare, 22 topologische, 33 Gruppe operiert durch Homöomorphismen, 61 -Gruppenaktion, sieheGruppenoperation +Gruppenaktion, siehe Gruppenoperation Gruppenoperation, 60, 60–63 stetige, 61 Häufungspunkt, 107 @@ -3929,14 +4674,14 @@ Kartenwechsel, 28 Kern offener, 3 Kleeblattknoten, 20 -Klumpentopologie, siehetriviale Topologie +Klumpentopologie, siehe triviale Topologie Knoten, 20, 17–21 äquivalente, 20 trivialer, 20 Knotendiagramm, 20 kollinear, 65 -kongruent, sieheisometrisch -Kongruenz, sieheIsometrie +kongruent, siehe isometrisch +Kongruenz, siehe Isometrie Kongruenzsatz SSS, 104 SWS, 69 @@ -3954,7 +4699,8 @@ Binomischer, 107 Lie-Gruppe, 33 liegt zwischen, 65 Liftung, 54 -Limes, 8lokal, 3 +Limes, 8 +lokal, 3 Lot, 86 Lotfußpunkt, 86 Möbiusband, 91 @@ -4038,15 +4784,16 @@ Torus, iii, 5, 38, 51, 93 Total Unzusammenhängend, 100 Triangulierung, 38 Überdeckung, 14 -Übergangsfunktion, sieheKartenwechsel +Übergangsfunktion, siehe Kartenwechsel Überlagerung, 51, 51–60 reguläre, 59 universelle, 57 Umgebung, 3 Umgebungsbasis, 58 vanishing set, 26 -Vektorprodukt, sieheKreuzprodukt -Verklebung, 26verträglich, 29 +Vektorprodukt, siehe Kreuzprodukt +Verklebung, 26 +verträglich, 29 Würfel, 34 Weg, 17 einfacher, 17 diff --git a/read/results/tika/1601.03642.txt b/read/results/tika/1601.03642.txt index 182436f..711312f 100644 --- a/read/results/tika/1601.03642.txt +++ b/read/results/tika/1601.03642.txt @@ -25,19 +25,6 @@ - - - - - - - - - - - - - @@ -119,14 +106,19 @@ in T , as measured by P , improves with experience E. Σ ϕ x0 + x1 + x2 + x3 xn w0 + w1 + w2 w3 @@ -186,7 +178,9 @@ functions called artificial neurons which take n ∈ N num- bers x1, . . . , xn ∈ R as input, multiply them with weights w1, . . . , wn ∈ R, add them and apply a so called activation function ϕ as visualized in Figure 1(a). One example of such -an activation function is the sigmoid function ϕ(x) = 11+e−x . +an activation function is the sigmoid function ϕ(x) = 1 + +1+e−x . Those functions act as building blocks for more complex systems as they can be chained and grouped in layers as visualized in Figure 1(b). The interesting question is how @@ -529,6 +523,7 @@ music. Instead of taking notes directly or MIDI files, Nayebi and Vitelli took raw audio waveforms as input. Those audio waveforms are feature vectors given for time steps 0, 1, . . . , t− 1, t. The network is given those feature vectors X1, . . . , Xt + and has to predict the following feature vector Xt+1. This means it continues the music. As the input is continuous, the problem was modeled as a regression task. Discrete Fourier diff --git a/read/results/tika/1602.06541.txt b/read/results/tika/1602.06541.txt index 3cf5654..387aa43 100644 --- a/read/results/tika/1602.06541.txt +++ b/read/results/tika/1602.06541.txt @@ -25,19 +25,6 @@ - - - - - - - - - - - - - @@ -337,8 +324,9 @@ car” Three accuracy metrics which do not suffer from problem P1 are used in [LSD14]: -• mean accuracy: 1k · +• mean accuracy: 1 +k · ∑k i=1 @@ -360,8 +348,8 @@ ti−nii+ j=1 nji ∈ [0, 1] -• frequency weighted intersection over union: +• frequency weighted intersection over union: ( ∑k i=1 ti) @@ -376,8 +364,8 @@ ti−nii+ j=1 nji ∈ [0, 1] -Another problem might be pixels which cannot be +Another problem might be pixels which cannot be assigned to one of the known classes. For this reason, [SWRC06] makes use of a void class. This class gets completely ignored for all quality measures. Hence the @@ -395,10 +383,11 @@ benchmark [FKG13] or crypt segmentation as done by [CRSS14]. It is calculated as “the harmonic mean of the precision and recall” [PH05]: -Fβ = (1 + β) -2 tp +Fβ = (1 + β)2 +tp (1 + β2) · tp + β2 · fn + fp + where β = 1 is chosen in most cases and tp means true positive, fn means false negative and fp means false positive. @@ -949,7 +938,9 @@ duction of slack variables to relax the requirement of linear separability solves this problem. The trade-off between accepting some errors and a more complex model is weighted by a parameter -C ∈ R+0 . The bigger C, the more errors are +C ∈ R+ + +0 . The bigger C, the more errors are accepted. The new optimization problem is: minimize @@ -1022,6 +1013,132 @@ i=1 αiyi = 0 +C. Random Decision Forests + +Random Decision Forests were first proposed +in [Ho95]. This type of classifier applies techniques +called ensemble learning, where multiple classifiers +are trained and a combination of their hypotheses is +used. One ensemble learning technique is the random +subspaces method where each classifier is trained +on a random subspace of the feature space. Another +ensemble learning technique is bagging, which is +training the trees on random subsets of the training set. +In the case of Random Decision Forests, the classifiers +are decision trees. A decision tree is a tree where each +inner node uses one or more features to decide in which +branch to descend. Each leaf is a class. + +One strength of Random Decision Forests compared +to many other classifiers like[SVMs|and neural networks +is that the scale of measure of the features (nominal, +ordinal, interval, ratio) can be arbitrary. Another advan- +tage of Random Decision Forests compared to +for example, is the speed of training and classification. + +Decision trees were extensively studied in the past +20 years and a multitude of training algorithms have +been proposed (e.g. ID3 in [Qui86], C4.5 in [Qui93]). +Possible training hyperparameters are the measure to +evaluate the “goodness of split” [Min89], the number of +decision trees being used, and if the depth of the trees +is restricted. Typically in the context of classification, +decision trees are trained by adding new nodes until +each leaf contains only nodes of a single class or until it +is not possible to split further. This is called a stopping +criterion. + +There are two typical training modes: Central axis +projection and perceptron training. In training, for +each node a hyperplane is searched which is optimal +according to an error function. + +Random Decision Forests with texton features (see +are applied in [SJCO8] for segmentation. +In the [MSC] dataset, they report a per-pixel accuracy +rate of 66.9% for their best system. This system +requires 415 ms for the segmentation of 320 px x 213 px +images on a single 2.7GHz core. On the Pascal +VOC 2007 dataset, they report an average per-pixel +accuracy for their best segmentation system of 42 %. + +An excellent introduction to Random Decision +Forests for semantic segmentation is given by [SCZ08]. + +D. SVMs + +are well-studied binary classifiers which can +be described by five central ideas. For those ideas, the +training data is represented as (x;, y;) where x; is the +feature vector and y; € { —1,1} the binary label for +training example i € { 1,...,m }. + +1) If data is linearly separable, it can be separated +by a hyperplane. There is one hyperplane which +maximizes the distance to the next datapoints +(support vectors). This hyperplane should be taken: + +minimize : || w ||? +w,b 2 +s.t. Vi yi: ((w,x;) +b) > 1 +sgn applied to this gives the classification + +2) Even if the underlying process which generates the +features for the two classes is linearly separable, +noise can make the data not separable. The intro- +duction of slack variables to relax the requirement +of linear separability solves this problem. The +trade-off between accepting some errors and a +more complex model is weighted by a parameter +C € Rg. The bigger C, the more errors are +accepted. The new optimization problem is: + +1 m + +minimize 5llwil? +C- SO& +i=1 + +st. Vit iyi: ((w, xi) +6) 2 1- & + +Note that 0 < €; < 1 means that the data point +is within the margin, whereas €; > 1 means it is +misclassified. An with C > 0 is also called +a soft-margin + +3) The primal problem is to find the normal vector +w and the bias b. The dual problem is to express +w as a linear combination of the training data x;: + +m +w= y OGYi Xi +i=1 + +where y; € { —1,1} represents the class of the +training example and a; are Lagrange multipliers. +The usage of Lagrange multipliers is explained +with some examples in [[Sm104]. The usage of the +Lagrange multipliers a; changes the optimization +problem depend on the a; which are weights for +the feature vectors. It turns out that most a; will +be zero. The non-zero weighted vectors are called +support vectors. + +The optimization problem is now, according +to [Bur98]: + +m mom + +os 1 +maximize S> ai 3 S> S> Oj YiYy (Xi, Xj) + +i=l i=1 j=1 +st. Vi O0< a, Il + +Kauss (Xi; x;) =e 2Qo2 + +and the sigmoid kernel +Kanh(Xi, xj) = tanh(y(xi, Xj) ~~ r) + +where the parameter y determines how much +influence single training examples have. + +5) The described can only distinguish between +two classes. Common strategies to expand those +binary classifiers to multi-class classification is +the one-vs-all and the one-vs-one strategy. In the +one-vs-all strategy n classifiers have to be trained +which can distinguish one of the n classes against +all other classes. In the one-vs-one strategy “.- +classifiers are trained; one classifier for each pair +of classes. + +A detailed description of can be found +in [Bur98]. +are used by [YHRF12] on the 2009 and 2010 +PASCAL segmentation challenge [EVGW* 10]. They +did not hand their classifier in to the challenge itself, +but calculated an average rank of 7 among the different +categories. +[FGMRT0] also used an [SVM] based method with +features and achieved the 7 rank in the 2010 +PASCAL segmentation challenge by mean accuracy. It +needs about 2s on a 2.8 GHz 8-core Intel processor. + + + +E. Markov Random Fields + +are undirected probabilistic graphical models +which are wide-spread model in computer vision. The +overall idea of is to assign a random variable for +each feature and a random variable for each pixel which + + + +Figure 3: with 4-neighborhood. Each node 2; +represents a pixel and each node y; represents +a label. + +gets labeled as shown in For example, a MRF + +which is trained on images of the size 224 px x 224 pixel +and gets the raw RGB values as features has + +224 - 224-34 224 - 224 = 200 704 +—S=_—Sr Ss ~~ + +input output + +random variables. Those random variables are condi- +tionally independent, given their local neighborhood. +These (in)dependencies can be expressed with a graph. + +Let G = (V,€) be the associated undirected graph +of an and C be the set of all maximal cliques in +that graph. Nodes represent random variables x, y and +edges represent conditional dependencies. Just like in +he 4-neighborhood [SWRCO06] and the 8-neighborhood +are reasonable choices for constructing the graph. + +Typically, random variables y represent the class of a +single pixel, random variables x represent a pixel values +and edges represent pixel neighborhood in computer +vision problems segmentation problems where +are used. Accordingly, the random variables y live +on 1,...,nr of classes and the random variables x +typically live on 0,...,255 or [0, 1]. + +The probability of x, y can be expressed as + +1 +Plx,y) = pe PO) + +where Z = Vy © UY) is a normalization term +called the partition function and E is called the energy +function. A common choice for the energy function is + +E(x,y) =o ve(xy) +cEC +where 7 is called a clique potential. One choice for + +cliques of size two x, y = (x1, 22) is [KP06] + ++w + +—W + +if XY x LQ +if v1, = 7% + +We(21, £2) = wd(ax1, 22) = + +According to [Mur12], the most common way of +inference over the posterior MRF in computer vision + +problems is [Maximum A Posteriori (MAP) estimation. + + 9 @@ -1221,7 +1465,9 @@ P (x,y) and joint probability distribution -P (y|x) = 1 +P (y|x) = +1 + Z(x) ∏ diff --git a/read/results/tika/1707.09725.txt b/read/results/tika/1707.09725.txt index 123c7a6..4c9298a 100644 --- a/read/results/tika/1707.09725.txt +++ b/read/results/tika/1707.09725.txt @@ -25,19 +25,6 @@ - - - - - - - - - - - - - @@ -114,6 +101,8 @@ l 2 + + Analysis and Optimization of Convolutional Neural Network Architectures @@ -149,6 +138,8 @@ v + + Abstract Convolutional Neural Networks (CNNs) dominate various computer vision tasks since @@ -362,6 +353,8 @@ I Glossary 119 + + 1. Introduction Computer vision is the academic field which aims to gain a high-level understanding of the @@ -456,14 +449,15 @@ b kw 2 c∑ -ix=1−d kw2 e +ix=1−d kw +2 +e b kh 2 c∑ -iy=1−d -kh +iy=1−d kh 2 e @@ -673,6 +667,49 @@ output image, k2 multiplications and k2 additions of the products have to be cal 3 +2. Convolutional Neural Networks + + + +In the following, it is assumed that the reader knows what a|multilayer perceptron (MLP)} + + + +is and how they are designed for classification problems, what activation functions are and + +how gradient descent works. In case the reader needs a refresher on any of those topics, I +recommend chapter 4.3 and 4.4 of [Thol4a] as well as [EBH15}. + +This chapter introduces linear image filters in then standard layer types of + +are explained in The layer block pattern is described in +transition layers in and nine ways to analyze are described in + +2.1. Linear Image Filters + +A linear image filter (also called a filter bank or a kernel) is an element F € Rew *knxd, +where k,, represents the filter’s width, ky, the filter’s height and d the number of input +channels. The filter F is convolved with the image J € R”*’*¢ to produce a new image I’. +The output image I’ has only one channel. Each pixel I'(z,y) of the output image gets +calculated by point-wise multiplication of one filter element with one element of the original + +image I: + +[Ae | [Pld +I'(a,y) = S- (a + ix, y + ty, tc) - Flix; iy, ie) +=1 + +io=1—[ 99] iy=1— [4 + +Filter kernel Result of point-wise +Fe RS multiplication + + + +Figure 2.1.: Visualization of the application of a linear k x k x 1 image filter. For each pixel of the +output image, k? multiplications and k? additions of the products have to be calculated. + + 2. Convolutional Neural Networks @@ -696,7 +733,9 @@ smoothing, sharpening, median filtering, box filtering. See Figure A.1 for five Please note that the result of a filtering operation is again an image. This means filters can be applied successively. While each pixel after one filtering operation with a 3 × 3 + filter got influenced by 3 · 3 = 9 pixels of the original image, two successively applied 3× 3 + filters increase the area of the original image which influenced the output. The output is then influenced by 25 pixel. This is called the receptive field. The kind of pattern which is detected by a filter is called a feature. The bigger the receptive field is, the more complex @@ -795,14 +834,15 @@ b kw 2 c∑ -ix=1−d kw2 e +ix=1−d kw +2 +e b kh 2 c∑ -iy=1−d -kh +iy=1−d kh 2 e @@ -874,6 +914,59 @@ s = 1 to input data of size width× height with three channels. 6 +2. Convolutional Neural Networks + +This is easier to see when the filtering operation is denoted formally: + +k +o%(x)=b+S ow x) with ie {1,....w} x {1,...,h} x {1...,d} [2] +j=l + +[SJ [Bld +ofH)(D) = b+ » » S > Filins ty, te) T(x + ix, y + ty; te) [2.2] +ig=1—-[ M2] iy=1—p Ab] t=! + +with abiasbe R,xe{l,...,.w},ye{l,...,h} andze{l,...,d} + +One can see that most weights of the equivalent [MLP] are zero and many weights are +equivalent. Hence the advantage of compared to is the reduction of parameters. +The effect of fewer parameters is that less training data is necessary to get suitable +estimations for those. This means a[MLP] which is able to compute the same functions as a +[CNN] will likely have worse results on the same dataset, if a architecture is suitable +for the dataset. + +See for a visualization of the application of a convolutional layer. + +n filters of +sizekxkx3 + +Be + +neural +network +apply + +re re + +» a) + +data “eb “eb + +oO o + +a GS + +Vo NEGA Sas +3 feature maps + +(e.g. RGB) + +n feature maps + +Figure 2.2.: Application of a single convolutional layer with n filters of size k x k x 3 with stride +5 = 1 to input data of size width x height with three channels. + + 2.2. CNN Layer Types @@ -926,12 +1019,12 @@ functions as introduced in [LGT16]. Name Definition Used by Max pooling max { a ∈ A } [BPL10, KSH12] -Average / mean pooling 1|A| +Average / mean pooling 1 +|A| ∑ -a∈A a LeNet-5 [LBBH98] and [KSlB -+10] +a∈A a LeNet-5 [LBBH98] and [KSlB+10] `2 pooling √∑ @@ -945,8 +1038,7 @@ Table 2.1.: Pooling types for a set A of activations a ∈ R. (*) For stochastic pooling, each of the p×p activation values ai in the pooling region gets picked with probability pi = ai∑ -aj∈A -aj +aj∈A aj . This assumes the activations ai are non-negative. Pooling is applied for three reasons: To get local translational invariance, to get invariance @@ -1038,7 +1130,9 @@ where � is the Hadamard product Hence every value of the input gets set to zero with a dropout probability of p. Typically, Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout prob- ability than later layers. In order to keep the expected output at the same value, the -output of a dropout layer is multiplied with 11−p when dropout is enabled [Las17, tf-16b]. +output of a dropout layer is multiplied with 1 + +1−p when dropout is enabled [Las17, tf-16b]. At inference time, dropout is disabled. Dropout is usually only applied after fully connected layers, but not after convolutional @@ -1076,22 +1170,28 @@ x̂(k) = x(k) − x̄(k)√ s′[x(k)]2 + ε -with x̄(k) = 1m -∑m +with x̄(k) = 1 +m +∑m i=1 x + (k) -i being the sample mean and s +i being the sample mean and s′[x(k)]2 = 1 -′[x(k)]2 = 1m -∑m +m +∑m i=1(x + (k) i − x̄(k)) the sample variance where m ∈ N≥1 is the number of training samples per mini-batch, ε > 0 -being a small constant to prevent division by zero and x(k)i is the activation of neuron k for + +being a small constant to prevent division by zero and x(k) +i is the activation of neuron k for + training sample i. Additionally, for each activation x(k) two parameters γ(k), β(k) are introduced which scale @@ -1236,7 +1336,9 @@ aspect to using the group network without an aggregation block. Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The idea is to connect each convolutional layer directly to subsequent convolutional layers. Traditional CNNs with L layers and one input layer have L connections between layers, -but dense blocks have L(L+1)2 connections between layers. The input feature maps are +but dense blocks have L(L+1) + +2 connections between layers. The input feature maps are concatenated in depth. According to the authors, this prevents features from being re- learned and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16 have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors @@ -1488,10 +1590,7 @@ x∈X K∑ k=1 -[txk log(o -x -k) + (1− txk) log(1− oxk)]︸ ︷︷ ︸ - +[txk log(oxk) + (1− txk) log(1− oxk)]︸ ︷︷ ︸ cross-entropy data loss +λ1 · @@ -1504,22 +1603,92 @@ w∈W `2︷ ︸︸ ︷∑ w∈W -w2︸ ︷︷ ︸ +w2 + +︸ ︷︷ ︸ model complexity loss where W are the weights, X is the training data set, K ∈ N≥0 is the number of classes and -txk indicates if the training example x is of class k. o +txk indicates if the training example x is of class k. oxk is the output of the classification +algorithm which depends on the weights. λ1, λ2 ∈ [0,∞) weights the regularization and is +typically smaller than 0.1. -x -k is the output of the classification +17 + +2.5. Analysis Techniques + + + +plotting the error on the training set as well as the error on a validation set, one can also +estimate if overfitting might become a problem. See for an example. + + + + + + + + + + + + + + + +Error | . --- Training set +08. . — Validation set +0.6 + +0.4 4 +0.2 4 ws 1 overfitting > + +' Epochs +t + + + +10 20 30 40 50 60 70 80 90 100 + +Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs +and the quality metric is the error (1 — accuracy). The longer the network is trained, +the better it gets on the training set. At some point the network is fit too well to the +training data and loses its capability to generalize. At this point the quality curve of +the training set and the validation set diverge. While the classifier is still improving on +the training set, it gets worse on the validation and the test set. + +When the epoch-loss validation curve has plateaus as in this means the opti- +mization process did not improve for several epochs. Three possible ways to reduce the +problem of plateaus are (i) to change weight initialization if the plateau was at the beginning, + +(ii) regularizing the model or (iii) changing the optimization algorithm. + +Loss functions + +The loss function (also called error function or cost function) is a function which assigns a +real value to a complex event like the predicted class of a feature vector. It is used to define +the objective function. For classification problems the loss function is typically cross-entropy + +with ¢; or @2 regularization, as it was described in [NH92]: + + + +£ £ +K — — +Eor(W) = — >> Sef log(of) + (1 = #8) loa(t = of +1» S7 Jw) Ar» SO +cEX k=1 wew wEew +cross-entropy data loss model complexity loss + +where W are the weights, X is the training data set, kK € N>0 is the number of classes and +;, indicates if the training example z is of class k. of is the output of the classification +algorithm which depends on the weights. A1, A2 € [0,0o) weights the regularization and is -algorithm which depends on the weights. λ1, λ2 ∈ [0,∞) weights the regularization and is typically smaller than 0.1. 17 + 2. Convolutional Neural Networks Figure 2.8.: Example for a validation curve (plotted loss function) with plateaus. The dark orange @@ -1568,8 +1737,8 @@ j=1 cij be the number of training samples for class i. The most common quality criterion is accuracy: accuracy(c) = -∑k +∑k i=1 cii∑k i=1 ti @@ -1980,15 +2149,97 @@ Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, w weights are deterministic and fixed at prediction time, each weight wij in Meiosis networks follows a normal distribution: -wij ∼ N (µij , σ2ij) +wij ∼ N (µij , σ +2 +ij) + +28 + +3. Topology Learning + + + +4. Correlation Maximization: Train the weights of the candidates by maximizing S, + +the correlation between candidates output value V with the networks residual error: + +$= 30 |N (W-P) BoB) + +o€O |pEeT + +where O is the set of output nodes, T is the training set, V, is the candidate neurons +activation for a training pattern p. E, is the residual output error at node o for +pattern p. V and E, are averaged values over all elements of T’. This step is finished + +when the correlation no longer increases. + +5. Candidate selection: Keep the candidate node with the highest correlation, freeze + +its incoming weights and add connections to the output nodes. +6. Continue: If the error is higher than desired, continue with step 2. + +One network with three hidden nodes trained by Cascade-Correlation is shown in + +O +© +© +O +© + +Figure 3.1.: A Cascade-Correlation network with three input nodes (red) and one bias node (gray) +to the left, three hidden nodes (green) in the middle and two output nodes in the upper +right corner. The black squares represent frozen weights which are found by correlation +maximization whereas the white squares are trainable weights. + + + +Vv + +© + + + +Vv + + + +Vv + +Vv + + + +Vv + + + +Vv + + + +PH) +PHI) + +» +> + +3.1.2. Meiosis Networks + +Meiosis Networks are introduced in [Han89]. In contrast to most and where +weights are deterministic and fixed at prediction time, each weight w;; in Meiosis networks +follows a normal distribution: + +Wij ~ N (Miz, 074) 28 + 3.2. Pruning approaches -Hence every connection has two learned parameters: µij and σ2ij . +Hence every connection has two learned parameters: µij and σ2 +ij . The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell division. A node j is splitted, when the random part dominates the value of the sampled @@ -2015,6 +2266,7 @@ layers or add skip connections. Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of on- line handwriting recognition. It makes use of the confusion matrix C = (cij) ∈ Nk×k≥0 + (see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix S with sij = sji = cij · cji. The maximum of S defines where the ASO algorithm adds more parameters. The details how the resources are added are not transferable to CNNs. @@ -2059,7 +2311,10 @@ A much simpler and computationally cheaper pruning criterion is the weight magni w ← -w if w ≥ θ0 otherwise +w if w ≥ θ + +0 otherwise + 3.3. Genetic approaches The general idea of genetic algorithms (GAs) is to encode the solution space as genes, which @@ -2152,6 +2407,7 @@ Figure 4.1.: Example for a hierarchy of classifiers. Each classifier is visualiz The root classifier C0 has to distinguish six coarse classes (pedestrian, four+-wheelers, traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C0 predicts a pedestrian, another classifier has to predict if it is an adult or a child. Similar, if C0 + predicts traffic sign, then another classifier has to predict if it is a speed limit, a sign indicating danger or something else. If C0, however, predicts road, then no other classifier will become active. @@ -2261,6 +2517,7 @@ but besides that it is also chosen uniformly random. Simple row-swapping can exploit local improvements. For example, in the context of ImageNet, it can swap the dog-class Silky Terrier to the dog-class Yorkshire terrier + and both dog classes Dalmatian and Greyhound next to each other. Both the two clusters of dog breeds could be separated by car and bus due to random chance. Moving any single class increases the score, but moving either one of the dog breed clusters or the vehicle @@ -2284,11 +2541,7 @@ Those will be moved to the corners of the confusion matrix by optimizing Equatio Once a permutation of the classes is found which has a low score Equation (4.1), the clusters can either be made by hand by deciding why classes should not be in one clusters. With such a permutation, only n− 1 binary decisions have to be made and hence only the list of -classes has to be read. Alternatively, one can calculate the confusions C ′i,i+1 + C - -′ -i+1,i for - +classes has to be read. Alternatively, one can calculate the confusions C ′i,i+1 + C ′i+1,i for each pair of classes which are neighbors in the confusion matrix. The higher this value, the more similar are the classes according to the classifier. Hence a threshold θ can be applied. θ can either be set automatically (e.g., such that 10 % of all pairs are above the threshold) @@ -2453,6 +2706,7 @@ each power of two there are two Convolution + BN + ELU blocks and one Max poolin block added. This is the framed part in the table. 32× 32 + Input C 32@3× 3/1 @@ -2464,6 +2718,7 @@ C 32@3× 3/1 BN + ELU 16× 16 + max pooling 2× 2/2 C 64@3× 3/1 @@ -2475,6 +2730,7 @@ C 64@3× 3/1 BN + ELU 8× 8 + max pooling 2× 2/2 C 64@3× 3/1 @@ -2482,6 +2738,7 @@ C 64@3× 3/1 BN + ELU 4× 4 + max pooling 2× 2/2 C 512@4× 4/1 (V) @@ -2491,6 +2748,7 @@ BN + ELU Dropout, p = 0.5 1× 1 + C 512@1× 1/1 BN + ELU @@ -2517,7 +2775,8 @@ of kernel size 3× 3 with stride 1. The results for the baseline model evaluated on eight datasets are given in Table 5.2. The speed for inference for different GPUs is given in Table 5.3. -Dataset Single Model Accuracy Ensemble of 10Training Set Test Set Training Set Test Set +Dataset Single Model Accuracy Ensemble of 10 +Training Set Test Set Training Set Test Set Asirra 94.22 % σ = 3.49 94.37 % σ = 3.47 97.07 % 97.37 % CIFAR-10 91.23 % σ = 1.10 85.84 % σ = 0.87 92.36 % 86.75 % @@ -2534,7 +2793,8 @@ CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the models uses unlabeled data or data from other datasets. For HASYv2 no test time transformations are used. -Network GPU Tensorflow Inference per Training1 Image 128 images time / epoch +Network GPU Tensorflow Inference per Training +1 Image 128 images time / epoch Baseline Default Intel i7-4930K 3 ms 244 ms 231.0 s Baseline Optimized Intel i7-4930K 2 ms 143 ms 149.0 s @@ -2660,9 +2920,8 @@ training. The image might lead to the wrong conclusion that models which are bet the start are also better at the end. In order to check this hypothesis, the relative order of validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering stays approximately the same, then it can be considered to run the first few epochs many -times and only train the best models to the end. For 10 models, there can be 10 +times and only train the best models to the end. For 10 models, there can be 102−10 -2−10 2 = 45 pair-wise changes in the ordering at maximum if the relative order of validation accuracies @@ -2938,6 +3197,139 @@ Table 5.5.: Differences in spectral clustering and CMO. 52 +5. Experimental Evaluation + + + + + + + + + + + + + + + +Cluster Spectral clustering Errors |CMO Errors + +fish aquarium fish, orchid + flatfish 5 aquarium fish, orchid + flatfish 4 ++ ray, shark + trout, lion + ray + shark, trout + +flowers orchid, aquarium fish + sun- 5 orchid, aquarium fish + sun- 2 +flower + poppy, tulip + rose, flower, poppy, tulip, rose +train + +people baby, boy, man + girl + woman 2 baby, boy, girl, woman, man 0 + +reptiles crocodile, plain, road, table, 9 crocodile, lizard, lobster, cater- 6 +wardrobe + dinosaur + lizard pillar + dinosaur + snake + tur- ++ snake, worm + turtle tle, crab + +trees maple, oak, pine + willow, forest 3 palm, willow, pine, maple, oak 0 ++ palm + +Total 24 12 + + + +Table 5.4.: + +Differences in spectral clustering and Classes in a cluster are separated by , +whereas clusters are separated by +. + + + + + + + + + + + +Cluster Spectral clustering Errors |CMO Errors +A, A, A, A, &, A +B,B B,B +C,c, C and @, €, €& andC C,c, C,C and @ +D,D, 9, > D,D,G +E and €,é€ FE and €, €, €, € +F and ¥, F Fand ¥, F +A and #, x and H A andH, # +K,k K,k +L,| andl, & L,| andl, & + +NK MSs GCGHnDOVOAZZOrO A eRe eva + +M and M and IN + +N and N, N and NV + +O, O, 0, 0, °, O and o +P,P and p, p and Y and o +Q, Q, Q, 4, U, 2, €, S, #, 1 +R,R and R, R, & and R +S,s,S + +T, T and 7,7 + +U, U and u, U, 2 + +Vi,vu,V + +W,w,w + +X, x, X, xX, X + +Y andy + +Z,2,ZandZ,Z + +FPrRFooorRrrRFOoOWN wWworRnnroewrnrF & OO + +M and p, M and I + +N and N, N and N, 8 +O, O, 0, 0, ° and O and o +PandP, FY, 9 and p, p +Q and Q, Q +Rand k, R, R, R +S,s,S + +T, T and 7,7 + +U, u, U, A and U +Vi,vu,V + +W,w andw + +X, x, X, xX, X + +Yyy + +Z,2,Z, 2,2 + +GO co OF CO NWMrFR OF FN NHN WWrR DWF FP FOF OD FE + + + +Total + +oo +neg + +No +oO + + + +52 + +Table 5.5.: Differences in spectral clustering and + + 5.4. Hierarchy of Classifiers @@ -2968,19 +3360,31 @@ root classifier leaf classifier cluster identified class identified | cluster class identified | cluster 1 3 69.67 % 84.27 % 72.98 % + 2 5 46.60 % 58.54 % 43.47 % + 3 2 58.50 % 92.13 % 83.46 % + 4 2 50.50 % 87.83 % 81.74 % + 5 3 44.67 % 79.29 % 71.01 % + 6 2 29.50 % 78.67 % 72.00 % + 7 2 52.50 % 92.11 % 87.72 % + 8 2 59.50 % 86.23 % 81.88 % + 9 2 59.00 % 90.08 % 87.79 % 10 2 62.00 % 85.52 % 73.10 % + 11 2 67.00 % 87.01 % 75.32 % + 12 2 72.50 % 94.77 % 76.77 % + 13 2 64.00 % 82.58 % 86.27 % + 14 2 79.67 % 89.85 % 89.10 % Table 5.6.: Accuracies of the root classifier trained on the full set of 100 classes evaluated on @@ -3005,7 +3409,8 @@ trained number of epochs. As more filters can lead to different results dependin layer where they are added, five models are trained. The details about those models are given in Table 5.7 -Name Layer Filter count TotalBaseline New parameters +Name Layer Filter count Total +Baseline New parameters m9 9 64 638 5 978 566 m′9 9 64 974 8 925 622 @@ -3034,13 +3439,19 @@ Single Model Ensemble Mean Epochs Mean Time Mean std baseline 944 012 63.38 % 0.55 64.70 % 154.7 3856 s + m9 5 978 566 65.53 % 0.37 66.72 % 105.7 4472 s + m′9 8 925 622 65.10 % 1.09 66.54 % 95.6 5261 s + m11 5 982 698 65.73% 0.77 67.38% 149.2 5450 s + m′11 1 731 980 62.12 % 0.48 62.89 % 143.6 3665 s + m13 5 982 092 62.39 % 0.66 63.77 % 147.8 4485 s Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m9, m11, m13 + as well as their accuracies. 54 @@ -3166,10 +3577,12 @@ removed. The first convolutional layer was increased from 32 filters to 59 filte convolutional layer was increased from 32 filter s to 58 filters in order to keep the amount of parameters of the model constant. The adjusted model achieved 62.72 % (-0.66) mean test accuracy with a standard deviation of σ = 0.84 (+0.29). The ensemble achieved 63.88 % + test accuracy (-0.66). Even more extreme, if both convolutional layers are removed from the 16× 16 feature map scale, the mean test accuracy drops to 61.21 % (-2.17) with a standard deviation of σ = 0.51 + (-0.04). The ensemble achieves a test accuracy of 63.07 % (-1.63). Thus it is very important to have at least one convolutional layer at this feature map scale. @@ -3187,6 +3600,7 @@ Hence the effect of removing Batch Normalization from the baseline is investigat experiment. As before, 10 models are trained on CIFAR-100. The training setup and the model mno-bn + are identical to the baseline model m, except that in mno-bn the Batch Normalization layers are removed. @@ -3253,11 +3667,20 @@ Mean total Single model Ensemble time training time Accuracy std Accuracy -8 118 sepoch 81 – 153 14 131 s 61.93 % σ = 1.03 65.68 % -16 62 sepoch 103 – 173 8349 s 64.16% σ = 0.81 66.98% -32 35 sepoch 119 – 179 5171 s 64.11 % σ = 0.75 65.89 % -64 25 sepoch 133 – 195 2892 s 63.38 % σ = 0.55 64.70 % -128 18 sepoch 145 – 239 3126 s 62.23 % σ = 0.73 63.55 % +8 118 s +epoch 81 – 153 14 131 s 61.93 % σ = 1.03 65.68 % + +16 62 s +epoch 103 – 173 8349 s 64.16% σ = 0.81 66.98% + +32 35 s +epoch 119 – 179 5171 s 64.11 % σ = 0.75 65.89 % + +64 25 s +epoch 133 – 195 2892 s 63.38 % σ = 0.55 64.70 % + +128 18 s +epoch 145 – 239 3126 s 62.23 % σ = 0.73 63.55 % Table 5.9.: Training time per epoch and single model test set accuracy (mean and standard deviation) of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on @@ -3271,6 +3694,7 @@ layers 11, 13 and 15 is removed. The mean test accuracy of 10 trained mno-bias is 63.74 % which is an improvement of 0.36 percentage points over the baseline. The ensemble achieves a test accuracy of 65.13 % + which is 0.43 percentage points better than the baseline. Hence the bias can safely be removed. @@ -3287,6 +3711,7 @@ of the Batch Normalization layers did not noticeably change. 5.11. Learned Color Space Transformation In [MSM16] it is described that placing one convolutional layer with 10 filters of size 1× 1 + directly after the input and then another convolutional layer with 3 filters of size 1× 1 acts as a learned transformation in another color space and boosts the accuracy. @@ -3310,6 +3735,7 @@ stride 2. This approach was evaluated on CIFAR-100 by replacing all max pooling layers with the 3× 3 kernel max pooling (and SAME padding). The mean accuracy of 10 models was 63.32 % + (−0.06) and the standard deviation was 0.57 (+0.02). The ensemble achieved 65.15 % test accuracy (+0.45). @@ -3405,9 +3831,12 @@ x + 1) =  -−x2 + 1 if x ≤ −2 +−x + +2 + 1 if x ≤ −2 x if − 2 ≤ x ≤ 2 + x 2 + 1 if x > −2 @@ -3452,17 +3881,29 @@ Single model Ensemble of 10 Training set Test set Training set Test set Identity 66.25 % σ = 0.77 56.74 % σ = 0.51 68.77 % 58.78 % + Logistic 51.87 % σ = 3.64 46.54 % σ = 3.22 61.19 % 54.58 % + Logistic− 66.49 % σ = 1.99 57.84 % σ = 1.15 69.04 % 60.10 % + Softmax 75.22 % σ = 2.41 59.49 % σ = 1.25 78.87 % 63.06 % + Tanh 67.27 % σ = 2.38 55.70 % σ = 1.44 70.21 % 58.10 % + Softsign 66.43 % σ = 1.74 55.75 % σ = 0.93 69.78 % 58.40 % + ReLU 78.62 % σ = 2.15 62.18 % σ = 0.99 81.81 % 64.57 % + ReLU− 76.01 % σ = 2.31 62.87 % σ = 1.08 78.18 % 64.81 % + Softplus 66.75 % σ = 2.45 56.68 % σ = 1.32 71.27 % 60.26 % + S2ReLU 63.32 % σ = 1.69 56.99 % σ = 1.14 65.80 % 59.20 % + LReLU 74.92 % σ = 2.49 61.86 % σ = 1.23 77.67 % 64.01 % + PReLU 80.01% σ = 2.03 62.16 % σ = 0.73 83.50% 64.79% + ELU 76.64 % σ = 1.48 63.38% σ = 0.55 78.30 % 64.70 % Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation @@ -3476,22 +3917,43 @@ Mean total 1 Image 128 time training time -Identity 8 ms 42 ms 31 sepoch 108 – 148 3629 s -Logistic 6 ms 31ms 24 sepoch 101 – 167 2234 s -Logistic− 6 ms 31ms 22 s +Identity 8 ms 42 ms 31 s +epoch 108 – 148 3629 s + +Logistic 6 ms 31ms 24 s +epoch 101 – 167 2234 s +Logistic− 6 ms 31ms 22 s epoch + 133 – 255 3421 s -Softmax 7 ms 37 ms 33 sepoch 127 – 248 5250 s -Tanh 6 ms 31ms 23 sepoch 125 – 211 3141 s -Softsign 6 ms 31ms 23 sepoch 122 – 205 3505 s -ReLU 6 ms 31ms 23 sepoch 118 – 192 3449 s -Softplus 6 ms 31ms 24 sepoch 101 – 165 2718 s -S2ReLU 5ms 32 ms 26 sepoch 108 – 209 3231 s -LReLU 7 ms 34 ms 25 sepoch 109 – 198 3388 s -PReLU 7 ms 34 ms 28 sepoch 131 – 215 3970 s -ELU 6 ms 31ms 23 sepoch 146 – 232 3692 s +Softmax 7 ms 37 ms 33 s +epoch 127 – 248 5250 s + +Tanh 6 ms 31ms 23 s +epoch 125 – 211 3141 s + +Softsign 6 ms 31ms 23 s +epoch 122 – 205 3505 s + +ReLU 6 ms 31ms 23 s +epoch 118 – 192 3449 s + +Softplus 6 ms 31ms 24 s +epoch 101 – 165 2718 s + +S2ReLU 5ms 32 ms 26 s +epoch 108 – 209 3231 s + +LReLU 7 ms 34 ms 25 s +epoch 109 – 198 3388 s + +PReLU 7 ms 34 ms 28 s +epoch 131 – 215 3970 s + +ELU 6 ms 31ms 23 s +epoch 146 – 232 3692 s Table 5.12.: Training time and inference time of adjusted baseline models trained with different activation functions on GTX 970 GPUs on CIFAR-100. It was expected that the @@ -3566,6 +4028,7 @@ could be relaxed. 10 models msmooth are trained with the α = 0.5 smoothed labels from the prediction of an ensemble of 10 baseline models. The mean accuracy of the models trained on the smoothed training set labels was 63.61 % (+0.23 %) and the standard deviation was σ = 0.72 + (+0.17 %). The ensemble of 10 msmooth models achieved 64.79 % accuracy (+0.09 %). Hence the effect of this kind of label smoothing on the final accuracy is questionable. @@ -3653,6 +4116,7 @@ block added. This is the framed part in the table. 5.15. Optimized Classifier 32× 32 + Input C 69@3× 3/1 @@ -3664,6 +4128,7 @@ C 69@3× 3/1 BN + ELU 16× 16 + max pooling 3× 3/2 C 64@3× 3/1 @@ -3675,6 +4140,7 @@ C 64@3× 3/1 BN + ELU 8× 8 + max pooling 3× 3/2 C 64@3× 3/1 @@ -3682,6 +4148,7 @@ C 64@3× 3/1 BN + ELU 4× 4 + max pooling 3× 3/2 C* 512@4× 4/1 (V) @@ -3691,6 +4158,7 @@ BN + ELU Dropout, p = 0.5 1× 1 + C* 512@1× 1/1 BN + ELU @@ -3706,7 +4174,8 @@ BN + Softmax Figure 5.16.: Architecture of the optimized model. C 32@3 × 3/1 is a convolutional layer with 32 filters of kernel size 3× 3 with stride 1. The * indicates that no bias is used. -Dataset Single Model Accuracy Ensemble of 10Training Set Test Set Training Set Test Set +Dataset Single Model Accuracy Ensemble of 10 +Training Set Test Set Training Set Test Set Asirra 95.83 % σ = 4.70 90.75 % σ = 4.73 98.78 % 93.09 % CIFAR-10 94.58 % σ = 0.70 87.92 % σ = 0.46 96.47 % 89.86 % @@ -3723,7 +4192,8 @@ CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the models uses unlabeled data or data from other datasets. For MNIST, GTSRB, SVHN and HASY, no test time transformations are used. -Network GPU Tensorflow Inference per Training1 Image 128 images time / epoch +Network GPU Tensorflow Inference per Training +1 Image 128 images time / epoch Optimized Default Intel i7-4930K 5 ms 432 ms 386 s Optimized Optimized Intel i7-4930K 4 ms 307 ms 315 s @@ -3796,7 +4266,8 @@ pattern, the number of epochs increases with lower model regularization (see Tab 5.17. Regularization -Dataset Early Stopping Fixed epochsval. acc train loss +Dataset Early Stopping Fixed epochs +val. acc train loss Asirra 93.09 % 96.01 %3 96.01 % CIFAR-10 89.86 % 91.75 % 88.88 % @@ -3926,6 +4397,7 @@ from 81.00 % [Tho17a] to 85.92 %, for GTSRB the state of the art was improved fr This was mainly achieved by the combination of ELU, Dropout, ensembles, training data augmentation and test-time transformations. The removal of the bias of layers close to the output and re-usage of those parameters in layers close to the input as well as using 3× 3 + pooling instead of 2× 2 pooling improved the baseline. While writing this masters thesis, several related questions could not be answered: @@ -3965,6 +4437,9 @@ might be crucial for the models quality. 74 +74 + + A. Figures, Tables and Algorithms @@ -3975,7 +4450,8 @@ A. Figures, Tables and Algorithms Figure A.1.: Examples of image filters. Best viewed in electronic form. -Layer 99-percentile intervalfilter bias +Layer 99-percentile interval +filter bias 1 [-0.50, 0.48] [-0.06, 0.07] 3 [-0.21, 0.19] [-0.07, 0.07] @@ -3998,26 +4474,33 @@ CIFAR-100. Algorithm 1 Simulated Annealing for minimizing Equation (4.1). Require: C ∈ Nn×n, steps ∈ N, T ∈ R+, c ∈ (0, 1) -procedure SimulatedAnnealing(C, steps, T , c) +procedure SimulatedAnnealing(C, steps, T , c) bestScore← accuracy(C) + bestC← C -for i = 0; i < steps; i← i+ 1 do +for i = 0; i < steps; i← i+ 1 do p← randomFloat(0, 1) -if p < 0.5 then . Swap rows +if p < 0.5 then . Swap rows i← randomInteger(1, . . . , n) + j ← randomInteger(1, . . . , n) \ { i } p← randomUniform(0, 1) + C ′ ← swap(C, i, j) + s← accuracy(C ′) -if p < exp( s−bestScoreT ) then + +if p < exp( s−bestScore +T ) then C ← C ′ if s > bestScore then bestScore← s + bestC← C T ← T · c @@ -4190,12 +4673,13 @@ same idea, that unit-variance is desired for each layer as the training converge Name α β γ Reference Constant α = 0 β = 0 γ ≥ 0 used by [ZF14] -Xavier/Glorot uniform α = +Xavier/Glorot uniform α = √ -6 +6 nin+nout + β = 0 γ = 0 [GB10] Xavier/Glorot normal α = 0 β = @@ -4207,7 +4691,10 @@ Xavier/Glorot normal α = 0 β = )2 γ = 0 [GB10] -He α = 0 β = 2nin γ = 0 [HZRS15b] +He α = 0 β = 2 +nin + +γ = 0 [HZRS15b] Orthogonal — — γ = 0 [SMG13] LSUV — — γ = 0 [MM15] @@ -4229,9 +4716,7 @@ x∈X K∑ k=1 -[txk log(o -x -k) + (1− txk) log(1− oxk)] +[txk log(oxk) + (1− txk) log(1− oxk)] is by far the most commonly used objective function (e.g., used by [ZF14]). In this equation, X is the set of training examples, K is the number of classes, txk ∈ { 0, 1 } indicates if the @@ -4255,8 +4740,7 @@ B.5. Optimization Techniques Most relevant optimization techniques for CNNs are based on SGD, which updates the weights according to the rule -wji ← wji + ∆wji with ∆wji = −η -∂Ex +wji ← wji + ∆wji with ∆wji = −η ∂Ex ∂wji where η ∈ (0, 1), typically 0.01 (e.g., [MSM16]), is called the learning rate. @@ -4267,8 +4751,7 @@ lead to sharp minima and thus poor generalization [KMN+16]. Smaller mini-batch s lead to longer training times due to computational overhead and to more training steps due to gradient noise. -wji ← wji + ∆wji with ∆wji = −η -∂EB +wji ← wji + ∆wji with ∆wji = −η∂EB ∂wji Nine variations which adjust the learning rate during training are: @@ -4286,12 +4769,11 @@ ji + ∆w ji with ∆w (t+1) -ji = −η +ji = −η∂EB -∂EB ∂wji - + α∆w + (t) ji @@ -4314,7 +4796,9 @@ t k where t ∈ N0 is the training step, η(0) is the initial learning rate, k ∈ N≥1 is the number of training steps -until the learning rate is decreased by 110th. +until the learning rate is decreased by 1 + +10th. • Newbob Scheduling [new00]: Start with Performance Scheduling, then use Exponential Decay Scheduling. @@ -4372,65 +4856,85 @@ Name Function ϕ(x) Range of Values ϕ′(x) Used by Sign function† -+1 if x ≥ 0−1 if x < 0 { −1, 1 } 0 [KS02] ++1 if x ≥ 0 + +−1 if x < 0 +{ −1, 1 } 0 [KS02] + Heaviside step function† -+1 if x > 00 if x < 0 { 0, 1 } 0 [MP43] -Logistic function 1 ++1 if x > 0 -1+e−x [0, 1] -ex +0 if x < 0 +{ 0, 1 } 0 [MP43] + +Logistic function 1 +1+e−x [0, 1] ex (ex+1)2 [DJ99] -Tanh e -x−e−x -ex+e−x = tanh(x) [−1, 1] sech +Tanh ex−e−x -2(x) [LBBH98, Tho14a] +ex+e−x = tanh(x) [−1, 1] sech2(x) [LBBH98, Tho14a] ReLU† max(0, x) [0,+∞) -1 if x > 00 if x < 0 [KSH12] +1 if x > 0 + +0 if x < 0 +[KSH12] + LReLU†2 (PReLU) ϕ(x) = max(αx, x) (−∞,+∞) -1 if x > 0α if x < 0 [MHN13, HZRS15b] -Softplus log(ex + 1) (0,+∞) exex+1 [DBB +1 if x > 0 -+01, GBB11] +α if x < 0 +[MHN13, HZRS15b] + +Softplus log(ex + 1) (0,+∞) ex + +ex+1 [DBB+01, GBB11] ELU -x if x > 0α(ex − 1) if x ≤ 0 (−∞,+∞) -1 if x > 0αex otherwise [CUH15] +x if x > 0 + +α(ex − 1) if x ≤ 0 +(−∞,+∞) -Softmax‡ o(x)j = e -xj∑K +1 if x > 0 +αex otherwise +[CUH15] + +Softmax‡ o(x)j = exj∑K k=1 e -xk +xk [0, 1]K o(x)j · -∑K +∑K k=1 e -xk−exj∑K +xk−exj∑K k=1 e -xk +xk [KSH12, Tho14a] Maxout‡ o(x) = maxx∈x x (−∞,+∞) -1 if xi = maxx0 otherwise [GWFM+13] -Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0 +1 if xi = maxx + +0 otherwise +[GWFM+13] +Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0 and functions marked with ‡ operate on all elements of a layer simultaneously. The hyperparameters α ∈ (0, 1) of Leaky ReLU and ELU are typically α = 0.01. Other activation function like randomized leaky ReLUs exist [XWCL15], but are far less @@ -4463,20 +4967,17 @@ as it produces a probability distribution. See Figure B.1 for a plot of some of x y -ϕ1(x) = +ϕ1(x) = 1 -1 1+e−x ϕ2(x) = tanh(x) ϕ3(x) = max(0, x) -ϕ4(x) = log(e -x + 1) +ϕ4(x) = log(ex + 1) -ϕ5(x) = max(x, e -x − 1) +ϕ5(x) = max(x, ex − 1) Figure B.1.: Activation functions plotted in [−2,+2]. tanh and ELU are able to produce negative numbers. The image of ELU, ReLU and Softplus is not bound on the positive side, @@ -4510,6 +5011,9 @@ Regularization techniques are: 86 +86 + + C. Calculating Network Characteristics @@ -4526,12 +5030,11 @@ has ki · ki−1(n ·m+ 1) parameters. The +1 is due to the bias. n · (k ·m1 ·m2 + 1) parameters. • A dense block with a depth of L, a growth rate of n and 3× 3 filters has L+ n · 32 + -32 · n2 +32 · n2 ∑L -i=0(L− i) = L+ 9n+ 9n2 -L2−L +i=0(L− i) = L+ 9n+ 9n2L2−L 2 parameters. According to [HPTD15], AlexNet has 60 million parameters which is roughly the number @@ -4547,6 +5050,7 @@ simplicity, nϕ = 5 was chosen. • A fully connected layer with n nodes and k inputs has to calculate ϕ(W · x+ b) with W ∈ Rn×k, x ∈ Rk×1, b ∈ Rn×1. It hence needs about n · (k + (k − 1) + 1) = 2nk + additions / multiplications before the non-linearity ϕ is calculated. The total number of FLOPs is 2 · n · k + n · nϕ. @@ -4554,6 +5058,7 @@ of FLOPs is 2 · n · k + n · nϕ. being applied to ki−1 filter maps of size w× h results in ki filter maps of size w× h if padding is applied. For each element of each filter map, n ·m ·ki−1 multiplications and (n ·m · ki−1 − 1) additions have to be made. This results in (2nmki−1 − 1) · (ki ·w · h) + operations. The total number of FLOPs is (2 ·n ·m ·ki−1−1) · (ki ·w ·h)+ki ·w ·h ·nϕ. This is, of course, a naive way of calculating a convolution. There are other ways of calculating convolutions [LG16]. @@ -4563,8 +5068,8 @@ calculating convolutions [LG16]. • A fully connected layer with n nodes after k feature maps of size w×h needs 2n(k ·w ·h) -FLOPs. The total number of FLOPs is 2n · (k · w · h) + n · nϕ. +FLOPs. The total number of FLOPs is 2n · (k · w · h) + n · nϕ. • As Dropout is only calculated during training, the number of FLOPs was set to 0. • The number of FLOPs for max pooling is dominated by the number of positions to which the pooling kernel is applied. For a feature map of size w × h a max pooling @@ -4677,7 +5182,9 @@ D.2. AlexNet The first CNN which achieved major improvements on the ImageNet dataset was AlexNet [KSH12]. Its architecture is shown in Figure D.2 and described in Table D.2. It has about 60·106 param- eters. A trained AlexNet can be downloaded at www.cs.toronto.edu/g̃uerzhoy/tf_alexnet. -Note that the uncompressed size is at least 60 965 224 floats · 32 bitfloat ≈ 244 MB. +Note that the uncompressed size is at least 60 965 224 floats · 32 bit + +float ≈ 244 MB. Figure D.2.: Architecture of AlexNet as shown in [KSH12]: Convolutional Layers are followed by pooling layers multiple times. At the end, a fully connected network is applied. @@ -4726,11 +5233,14 @@ D.3. VGG-16 D Another widespread architecture is the VGG-16 (D) [SZ14]. VGG comes from the Visual Geometry Group in Oxford which developed this architecture. It has 16 layers which can learn parameters. A major difference compared to AlexNet is that VGG-16 uses only 3× 3 + filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a detailed textual description is given in Table D.3. A trained VGG-16 D for Tensorflow can be downloaded at https://github.com/machrisaa/ -tensorflow-vgg. Note that the uncompressed size is at least 138 357 544 floats · 32 bitfloat ≈ +tensorflow-vgg. Note that the uncompressed size is at least 138 357 544 floats · 32 bit + +float ≈ 520 MB. The downloaded Numpy binary file npz needs 553 MB without compression and 514 MB with compression. @@ -4913,6 +5423,9 @@ Table D.4.: Inception-v4 network. 96 +96 + + E. Datasets @@ -4949,21 +5462,38 @@ CIFAR-10 32 px× 32 px 60 000 10 3 [Kri, KH09] CIFAR-100 32 px× 32 px 60 000 100 3 [Kri, KH09] STL-10 96 px× 96 px 13 000 10 3 [CLN11, CLN10] -Caltech-101 (80 px− 3481 px)×(92 px− 3999 px) 9144 102 3 [FFP03, FFFP06] +Caltech-101 (80 px− 3481 px) +×(92 px− 3999 px) -Caltech-256 (75 px− 7913 px)×(75 px− 7913 px) 30 607 257 3 [Gri06, GG07] +9144 102 3 [FFP03, FFFP06] -ILSVRC 20121 (8 px− 9331 px)×(10 px− 6530 px) 1.2 · 10 -6 1000 3 [Ima12, RDS+14] +Caltech-256 (75 px− 7913 px) +×(75 px− 7913 px) -Places3652 (290px− 3158px)×(225px− 2630px) 1.8 · 10 -6 365 3 [Zho16, ZKL+16] +30 607 257 3 [Gri06, GG07] -GTSRB (25 px− 266 px)×(25 px− 232 px) 51 839 43 3 [SSSI, SSSI12] +ILSVRC 20121 (8 px− 9331 px) +×(10 px− 6530 px) -Asirra3 (4 px− 500 px)×(4 px− 500 px) 25 000 2 3 [Asi17, EDHS07] +1.2 · 106 1000 3 [Ima12, RDS+14] -Graz-02 480 px× 640 pxand 640 px× 480 px 1096 3 3 [Mar08, MS07] +Places3652 (290px− 3158px) +×(225px− 2630px) + +1.8 · 106 365 3 [Zho16, ZKL+16] + +GTSRB (25 px− 266 px) +×(25 px− 232 px) + +51 839 43 3 [SSSI, SSSI12] + +Asirra3 (4 px− 500 px) +×(4 px− 500 px) + +25 000 2 3 [Asi17, EDHS07] + +Graz-02 480 px× 640 px +and 640 px× 480 px 1096 3 3 [Mar08, MS07] Table E.1.: An overview over publicly available image databases for classification. The number of images row gives the sum of the training and the test images. Some datasets, like @@ -5001,16 +5531,20 @@ Require: Semantic segmentation dataset (DS) procedure CreateDataset(Annotated dataset DS) DC ← List + w ← desired image width h← desired image height for Image and associated label (x, y) in DS do i← randint(0, L.width− w) + j ← randint(0, L.height− h) + cL ← crop(y, (i, j), (i+ w, j + h)) -if at least 50% of s are of one class then +if at least 50% of s are of one class then cI ← crop(x, (i, j), (i+ w, j + h)) + D.append((cI , cL)) return (DC) diff --git a/read/results/tika/2201.00021.txt b/read/results/tika/2201.00021.txt index 7fd9f6b..d538039 100644 --- a/read/results/tika/2201.00021.txt +++ b/read/results/tika/2201.00021.txt @@ -18,27 +18,6 @@ - - - - - - - - - - - - - - - - - - - - - @@ -254,6 +233,7 @@ position switching mode, and the off position was 10′ in azimuth away from the source. For observations made before 2021 Au- gust, we used a spectrometer that covered 2 GHz wide backends with a channel width of 38.1 kHz, corresponding to ∼0.62 km s−1 + at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar 1975). A high spectral resolution backend with 65536 channels and a bandwidth of 300 MHz was employed in 2021 August, @@ -320,6 +300,147 @@ sociated Universities, Inc. Article number, page 2 of 10 +A&A proofs: manuscript no. mainArxiv + +tions (e.g., Mauersberger et al.|/1987) |1988; Walsh et al.|/2007; +Henkel et al.|/2013; (Mei et al.|2020). Except for the NH3 (3,3) +masers proposed to be associated with four supernova remnants +(McEwen et al.|/2016), almost all the other ammonia masers are +detected in high-mass star-forming regions (HMSFRs). How- +ever, while many HMSFRs host water (H2O), hydroxyl (OH), +or methanol (CH3;OH) masers, ammonia masers are quite rare +in these sources, and the role that the environment of a young +high-mass star plays in their excitation remains unclear. There- +fore, dedicated searches for ammonia masers in HMSFRs are +indispensable in regard to their overall incidence and associa- +tion with different environments, which can provide additional +constraints on the pumping mechanism of ammonia masers. + +So far, a total of 32 NH3 inversion transitions (AK = 0 +and AJ = 0) have been identified as masers. Among these, and +despite arising from energy levels as high as 1090 K above +the ground state, the NH3 (9,6) maser stands out as being the +strongest and most variable one in W51-IRS2 (e.g., Henkel et al. +2013). Maser emission in this line has only been detected in five +HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al. +1986), and Sgr B2(N) (Mei et al.|/2020). The NH3 (3,3) masers +are thought to be collisionally excited (e.g., [Flower et al.||1990; +Mangum & Wootten} |1994); in contrast, the pumping mecha- +nism of NH3 (9,6) masers is less well constrained (Madden et al. +1986). [Brown & Cragg) (1991) have studied ortho-ammonia and +found that it could possibly pump the (6,3) inversion line, but +they did not extend their model to the (9,6) transition due to the +fact that collision rates are only known for inversion levels up to +J = 6(e.g.,/Danby et al.1988). + +NH; (9,6) masers are found to be strongly variable, similar to +H2O masers (Madden et al. 1986; Pratap et al. 1991; {Henkel et al. +2013). In W51-IRS2, |Henkel et al.| (2013) found that the (9,6) +line showed significant variation in line shape within a time in- +terval of only two days. Mapping of the (9,6) maser toward W51 +with very long baseline interferometry (VLBI) suggests that the +masers are closer to the H.O masers than to the OH masers or +to ultracompact (UC) Hum regions (Pratap et al.||1991). While +Henkel et al.) (2013) and|Godd1 et al.) (2015) showed that the SiO +and NH3 masers in W51-IRS2 are very close to each other, their +positions, differing by 0’/065 (~0.015 pc), do not fully coincide. + +In this paper we report the discovery of NH3 (9,6) masers +in two HMSFRs, Cepheus A and G34.26+0.15. This increases +the number of (9,6) maser detections in our Galaxy from five +to seven. In Sect. 2] observations with the Effelsberg 100-meter +telescope and the Karl G. Jansky Very Large Array (JVLA) are +described. Results are presented in Sect. B| The morphology of +Cep A and G34.26+0.15 as well as a comparison of the emission +distributions of different tracers with the NH3 (9,6) masers are +presented in Sect. | Our main results are summarized in Sect. 5} + +2. Observations and data reduction +2.1. Effelsberg observations and data reduction + +The NH3 (9,6) line was observed toward Cep A and +G34.26+0.15 with the 100-meter Effelsberg telescopd!| in 2020 +January and 2021 February, July, and August. The S14mm dou- +ble beam secondary focus receiver was employed. The full width +at half maximum (FWHM) beam size is 49” at 18.5 GHz, the +frequency of the target line. The observations were performed in +position switching mode, and the off position was 10’ in azimuth + +' Based on observations with the 100-meter telescope of the MPIfR +(Max-Planck-Institut fiir Radioastronomie) at Effelsberg. + +Article number, page 2 of 10 + +away from the source. For observations made before 2021 Au- +gust, we used a spectrometer that covered 2 GHz wide backends +with a channel width of 38.1 kHz, corresponding to ~0.62 km s~! +at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar +1975). A high spectral resolution backend with 65536 channels +and a bandwidth of 300 MHz was employed in 2021 August, +providing a channel width of 0.07 km s7! at 18.5 GHz. Point- +ing was checked every 2 hours using 3C 286 or NGC 7027. +Focus calibrations were done at the beginning of the observa- +tions and during sunset and sunrise toward the abovementioned +pointing sources. The system temperatures were 100-130 K on +a main-beam brightness temperature, T)yp, scale. This flux den- +sity was calibrated assuming a Typ/S ratio of 1.95 K/Jy, derived +from continuum cross scans of NGC 7027 (the flux density was +adopted from |Ott et al.||1994)). Calibration uncertainties are esti- +mated to be ~ 10%. + +We used the GILDAS/CLASS}| package (Pety|/2005) to re- +duce the spectral line data. A first-order polynomial was sub- +tracted from each spectrum for baseline removal. + +2.2. JVLA observations and data reduction + +Observations of the NH3 (9,6) line toward Cep A and +G34.26+0.15 were obtained on 2021 July 13 with the JVLA +of the National Radio Astronomy Observatoryp] (NRAO) in the +C configuration (project ID: 21A-157, PI: Yaoting Yan). We +employed 27 antennas for the observations. The primary beam +of the JVLA antennas is 150’ (FWHM) at 18.5 GHz. A mix- +ture of mixed three-bit and eight-bit samplers were used to per- +form the observations. For the NH3 (9,6) line observations, we +used one subband with the eight-bit sampler covering a band- +width of 16 MHz with full polarization, eight recirculations, and +four baseline board pairs (BIBPs) to provide a velocity range +of 260 km s~! with a channel spacing of 0.13 km s~!. Two +additional subbands of bandwidth 16 MHz were used to cover +the NH3 (8,5) and (10,7) lines. The three-bit sampler with 32 +subbands, each with a bandwidth of 128 MHz to cover a to- +tal range of 4 GHz between 20—24 GHz, was used to mea- +sure the continuum emission. 3C 286 with a flux density of +2.89 Jy at 18.5 GHz (Perley & Butler) |2013) was used as a +calibrator for pointing, flux density, bandpass, and polarization. +J2230+6946 and J1851+0035 served as gain calibrators for Cep +A and G34.26+0.15, respectively. The on-source times were +430° and 4’"50° toward Cep A and G34.26+0.15, respectively. + +Data from two antennas were lost due to technical is- +sues. The data from the remaining 25 antennas were reduced +through the Common Astronomy Software Applications pack- +age (CASAFt McMullin et al.|2007)). We calibrated the data with +the JVLA CASA calibration pipeline using CASA 6.1.2. The +results were obtained after flagging data that contain artifacts. +We inspected the phase, amplitude, and bandpass variations of +the calibrated visibility data to search for additional artifacts be- +fore imaging. Then, the uvcontsub task in CASA was used to +separate the calibrated visibilities into two parts, one with line- +only data and the other with the continuum data. The tclean task +with a cell size of 02 and Briggs weighting with robust=0 was +used to produce the images of spectral line and continuum emis- +sion. The synthesized beams for NH3 (9,6) are 1’’47 x 0°99 at + +> https://www.iram.fr/IRAMFR/GILDAS/ + +3 The National Radio Astronomy Observatory is a facility of the Na- +tional Science Foundation operated under cooperative agreement by As- +sociated Universities, Inc. + +4 https://casa.nrao.edu/ + + Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions @@ -414,6 +535,217 @@ tainties, as unresolved. Article number, page 3 of 10 +Y. T. Yan (12) #2 #€) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions + +PA. = 58°79 and 17733 x 106 at PA. = 5°36 toward Cep A +and G34.26+0.15, respectively. For the 1.36cm (20-24 GHz) +continuum emission, the synthesized beams are 1’’08 x 0/’67 at +P.A. = 60°64 and 095 x 071 at P.A. = 5°91 toward Cep A and +G34.26+0.15. The typical absolute astrometric accuracy of the +JVLA is ~10% of the synthesized beanp} The flux density scale +calibration accuracy is estimated to be within 15%. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.6 fil a 7 0.3 +F | CepA 04-Jan-2020 J +0.4 FI 4 O.2 6 +rE | Effelsberg 4 c +0.2 Fl 4 O.1E +| q c +Of 4 OE +E J o2k +L J oo4k +E | of +E 4 o2F +~ E J O15= +> rt 4 E +2 FE 4 OWE +> E 7 0.056 +o G | OF +c E' "| FE +o E J 026 +x E 4 016 +i E 0 +—0.9 BH 0-1 +Fi fF 0.1E +0.5 Fi 4 0.05 F Ht) +E | f O; rh +Of c) — Et ty +7 BRE EEE 0.09 HAH EH +t | Effelsberg 12-Aug-2021]. E | 12-Aug-2021 +0.5 FI + E Ln) dl 5 +E | {| 0 ! +Ok 1 +—10 0 10 40 50 60 70 + +Velocity (km/s) + +Velocity (km/s) + +Fig. 1. Spectra from NH; (9,6) transition lines. Left: Top to bottom: +Time sequence of NH3 (9,6) profiles observed toward Cep A with the +Effelsberg 100-meter telescope (after subtracting a first-order polyno- +mial baseline). A JVLA spectrum is interspersed. The systemic veloc- +ity from CO and HCO? lines is indicated by a dashed blue line. The +two dashed red lines at LSR velocities, Visp, of —0.90 km s7! and +—0.28 km s7! indicate the central velocities of the two major compo- +nents. Right: NH; (9,6) spectra from G34.26+0.15. The systemic ve- +locity from C!70 is indicated by a dashed blue line. The three dashed +red lines at Visp = 54.1 kms7!, 55.8 km s7!, and 62.5 kms“! show the +central velocities of the main ammonia emission components. + +3. Results + +The spectra from different epochs are shown in Figs. [I] and 2} +Toward Cep A, the NH3 (9,6) line profile from the JVLA is ex- +tracted from an Effelsberg-beam-sized region (FWHM, 49’). In +the case of G34.26+0.15, the NH3 spectrum is below the noise +level if a similarly large beam size is used. Therefore, we de- +rived the JVLA NHs3 (9,6) spectrum from a smaller region, with +radius 3’’5, that contains all the detected NH3 (9,6) emission. In +Table [A.1] the observed NH; (9,6) line parameters obtained by +Gaussian fits are listed. NH3 (8,5) and (10,7) emission is not de- +tected by our JVLA observations. The 3a upper limits for the +NH; (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam™! + +> https://science.nrao.edu/facilities/vla/docs/manuals/oss/performance- +/positional-accuracy + + + + + + + + + + + + +0.04 F q (0.2 E-634.2640.15 | 13-Jul-2021 4 +0.02 F 401 BN Wid 4 +k i WL A +Ss ° 4 0 EF ol 5 +LS Ei itt 4 O.1F ! 1 +E lEtfelsberg th—Aug—2021 4 E ertelcber ee +S in f E +S oR Po i +3 O1F | EFfelsber 12-Au tin OTE ane 4 +tL F | 8 | 9 q EF Effelsberg ii 12-Aug~2021 4 +0.05 5 ii! J 0.05 i 3 +i | 4 A ei a +0 i rl tH 0 i i +E ll H E roid +C —0.05 & | +—10 0 10 45 50 55 60 65 + +Velocity (km/s) Velocity (km/s) + +Fig. 2. NH; (9,6) line profiles emphasizing, in contrast to the spectra +in Fig. [I] weaker features. Cep A spectra are presented on the left, +G34.26+0.15 spectra on the right. The two dashed red lines in the left +panels indicate Vi sp = 1.48 km s7! and 2.89 km s“!. In the right panels, +the two dashed red lines refer to 54.1 kms"! and 55.8 kms"!. + +and 27.2 mJy beam™', respectively. In G34.26+0.15, the corre- +sponding 3c upper limits for the NH; (8,5) and (10,7) lines are +22.1 mJy beam™! and 30.4 mJy beam™!. For both sources, sen- +sitivity levels refer to emission from a single channel of width +0.13 km s~!. Taking the larger measured line widths of the (9,6) +maser features (see Table[A.1), these limits could be further low- +ered by factors of two to four. + +3.1. Centimeter-continuum emission + +The 1.36cm continuum, derived from our JVLA observations, +toward Cep A is presented in Fig. 8] Six published compact +sources, HW2, HW3a, HW3b, HW3c, HW3d, and HW9, are de- +tected in our observations. Figure 4] shows the 1.36cm contin- +uum in G34.26+0.15. Three main continuum objects, A, B, and +C, are detected. By using the imfit task in CASA, we measured +the continuum flux at 1.36 cm toward individual compact source +components in Cep A and G34.26+0.15. Details are given in Ta- +ble + +3.2. NH; (9,6) emission in Cep A + +In 2020 January, NH3 (9,6) emission with a peak flux density of +0.67 + 0.07 Jy was first detected with the Effelsberg 100-meter +telescope in Cep A. Emission with similar strength was also de- +tected in 2021 February and August with the same telescope. +Higher velocity resolution data, which were obtained in 2021 +August, again with the Effelsberg 100-meter telescope, show +that the (9,6) emission contains two main velocity components. +Overall, the flux densities of the NH3 (9,6) emission line mea- +sured with the Effelsberg 100-meter telescope are, within the cal- +ibration uncertainties, unchanged. This is valid for the time inter- +val between 2020 January and August 2021, when we smoothed +the obtained spectra to the same velocity resolution. We also +see another two weaker components. Figure 2] emphasizes these +weak components with an expanded flux density scale. + +Higher angular resolution data from the JVLA pinpoint the +position of the NH3 (9,6) emission with an offset of (—0/28, +0’’02) relative to the 1.36cm continuum peak of Cep A HW2 +(Fig. Bh). The deconvolved NH3 (9,6) component size is (0729 + +0715) x (019 + 014) at PA. = 174°, derived with the imfit task +in CASA, and can thus be considered, accounting for the uncer- +tainties, as unresolved. + +Article number, page 3 of 10 + + A&A proofs: manuscript no. mainArxiv @@ -454,6 +786,126 @@ component sizes are (1′′.42±0′′.43)× (0′′.54±0′′.62) at P.A. Article number, page 4 of 10 +A&A proofs: manuscript no. mainArxiv + +ke NH3 (9,6) - + + + + +Velocity (km s~}) + +DEC_offset (arcsec) +DEC_offset (arcsec) + + + + + +RA _offset (arcsec) + +RA_offset (arcsec) + +Fig. 3. Cepheus A. White contours mark the 1.36cm JVLA continuum map of Cep A; levels are —5, 5, 10, 20, 30, 40, 50, 70, 90, +and 110 x 0.125 mJy beam™!. The background image is the Spitzer 4.5m emission, taken from the Galactic Legacy Infrared Mid-Plane +Survey Extraordinaire (GLIMPSE; [Benjamin et al. |2003} |Churchwell et al.|{2009). The reference position is @y2999 = 22"56"173972, and +62000 = 62°01'49”587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black +ellipse denoting the position of the NH; (9,6) emission with a purple star at its center. OH (Bartkiewicz et al.|/2005), HO (Sobolev et al.|/2018), +and CH;0H masers are presented as diamonds, circles, and squares, respectively. The color bar on the right-hand side indicates +the LSR velocity range of the maser spots. + +1 1. . 1. 1 ’ +Ke NH: (9.6) of | om Nh 8.6) +3 OH ia 3 OH +H:0 -2.0 S 62 +44M ct.0H 0H + + + + + +7 H20 + +bw) +2.4 M2 + +w +© + +w +8 +Velocity (km s~?) + +M3 + +DEC_offset (arcsec) + +DEC_offset (arcsec) +° + +3.0 + + + + + +10 + +4 2 + + + + + +see + + + + + + + + + +2.4 2.2 2.0 18 16 14 1.2 1.0 +RA_offset (arcsec) + + + +RA_offset (arcsec) + +Fig. 4. 1.36cm JVLA continuum map of G34.26+0.15 presented as white contours with levels of —5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130, +150, 180, and 200 x 5.0 mJy beam™!. The background image is the Spitzer 4.5 ym emission, taken from GLIMPSE. The reference position is +@y000 = 18"53™188560, and dy2999 = 01°14’58”201, the peak position, is marked by a black cross. The black ellipses show the positions of NH; +(9,6) emissions with stars at their center (i.e, M1, M2, and M3). OH (Zheng et al/2000), H.O (Imai et al/2011), and CH;0H + +[2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range (Vis) of maser spots. + +velocity resolution data from 2021 August show the NH; (9,6) + +In view of the constancy of the flux densities obtained at Ef- +emission to be composed of two different components. The spec- + +felsberg and the similar JVLA flux density, measured in 2021 + +July, there is no missing interferometric flux density in the JVULA +data. +3.3. NH; (9,6) emission in G34.26+0.15 + +The NH; (9,6) emission was first detected toward G34.26+0.15 +in 2020 January with the Effelsberg 100-meter telescope. Higher + +Article number, page 4 of 10 + +tra of weak components on a smaller flux density scale are pre- +sented in Fig. + +Three different locations showing NH3 (9,6) emission are +found toward G34.26+0.15 (Fig. A). The deconvolved NH; (9,6) +component sizes are (1/742 + 0’'43) x (054 + 062) at PA. = 97° +(M1), (0/742 + 0°'27) x (015 + 0’’27) at P.A. = 150° (M2), and + + Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions @@ -560,6 +1012,7 @@ NH3 column density of ∼5×1015 cm−2 was estimated for a region of 3′′ around HW2 (Torrelles et al. 1999). This high NH3 abun- dance could provide a suitable environment for maser species. Large line widths (∆V1/2 '7.0 km s−1) with VLSR ∼ −10 km s−1 + in both (1,1) and (2,2) lines were found toward HW2 (Torrelles et al. 1993). The velocity is similar to the cloud’s systemic lo- cal standard of rest (LSR) velocity of −11.2 km s−1, which @@ -581,6 +1034,7 @@ lines with the kinetic temperature reveals the size of the hot, ammonia-emitting core to be only ∼2.5′′. All those measured NH3 lines were quasi-thermal and had LSR velocities of ∼ 58.5 km s−1, close to the systemic velocity of ∼ 58.1 km s−1 + obtained from C17O observations (Wyrowski et al. 2012). Their line widths (∆V1/2 ≥3.6 km s−1) are larger than what we find (0.35 km s−1 ≤ ∆V1/2 ≤ 0.94 km s−1) for each (9,6) @@ -595,6 +1049,143 @@ against continuum source C (∼ 7′′ resolution; Keto et al. 1987) Article number, page 5 of 10 +Y. T. Yan (12) #2 #€) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions + +(1717 + 034) x (0/27 + 0/46) at P.A. = 53° (M3) and are thus +comparable to or smaller than the beam size. + +Overall, the NH3 (9,6) line from G34.26+0.15 weakened +during the time interval from 2020 January to 2021 August by +about 70%. A comparison between the JVLA spectrum and the +Effelsberg data, assuming a linear decrease in the integrated in- +tensity as a function of time between different epochs of the +100-meter observations, suggests there is no missing flux in the +JVLA data. This is similar to the situation in Cep A. + +4. Discussion +4.1. Morphology of Cep A and G34.26+0.15 + +Cep A, at a trigonometric parallax distance of 0.7040.04 kpc +(Moscadelli et al.|/2009} |Dzib et al.|/2011)), is the second closest +HMSFR (after Orion) and by far the closest NH3 (9,6) maser +known. About 16 compact (~1’’) radio sources (e.g., Hughes & +Wouterloot |1984; |Hughes||1991; |Garay et al.||1996) have been +identified in Cep A. [Hughes & Wouterloot| (1984) discovered +these targets at radio wavelengths, which are UC and hypercom- +pact (HC) Hm regions and/or stellar wind sources, subsequently +named as HW sources. The HW2 object is one of the best known +examples of a protostellar jet or disk system driving a powerful +outflow (e.g.,/Rodriguez et al.|1980;|Giisten et al.|1984; /Torrelles +et al.|/1986; \Curiel et al.|/2006; \Carrasco-Gonzalez et al.|2021)). +The observed NH3 (9,6) emission is slightly offset (—0’28, 0702) +from the center of HW2 (see Fig. B). + +G34.26+0.15 is an HMSER located at a distance of 3.3 kpc +(Kuchar & Bania|1994). It hosts four radio continuum compo- +nents named A, B, C, and D. Component C is a prototypical +cometary UC Hr region containing a compact head and a diffuse +tail that extends from east to west (e.g., [Reid & Ho} 1985} |Garay +et al.|/19865 |Sewilo et al.|/2004} |Sewito et al.201 1). Components +A and B are HC Hn regions, located to the east of component +C. An extended ring-like Hm region, called component D, is lo- +cated southeast of components A-C. One of the three observed +NHz (9,6) emission line sources, M1, is close to the head of com- +ponent C, whereas M2 and M3 originate from another compact +region in the west of the HC H1 component A (see Fig. 4). + +4.2. NH; (9,6) emission possibly caused by maser action + +As shown in Fig. [I} the NH3 (9,6) profiles in Cep A and +G34.26+0.15 are narrow (AVj;2 <2.0 km s7!), much narrower +than the expected line widths (>4 km s~') of thermal lines ob- +served at a similar angular resolution (e.g., /Torrelles et al.|/1985) +1986, |1993)|1999; Henkel et al.|1987; Comito et al.2007; |Mook- +erjea et al.2007; (Wyrowski et al.|/2012; |Beuther et al.2018). Ve- +locity shifts with respect to the systemic velocities of the two +sources are both observed, that is, V ~10 km s~! in Cep A and +V ~4km s"! in G34.26+0.15 (see details in Sect. 4.3). Further- +more, time variability is observed in the case of G34.26+0.15, +which is also a characteristic feature of maser emission. +Additional evidence of their maser nature is the high bright- +ness temperatures of the (9,6) emission spots toward Cep A and +G34.26+0.15. The spectral parameters are listed in Table [A.3} +Because at least a significant part of the NH3 (9,6) emission +is not resolved by our JVLA observations, the derived bright- +ness temperatures are only lower limits. Nevertheless, the lower +limits on the brightness temperature are >800 K in Cep A (see +Table [A.3), which is much higher than the expected thermal +gas temperature of ~250 K (e.g., [Patel et al.| (2005; (Comito +et al. (2007; |Beuther et al. |2018). This strongly suggests that + +the NH3 (9,6) emission in Cep A is due to maser action. Be- +cause G34.26+0.15 is located at about five times the distance to +Cep A, beam dilution effects reduce the lower main beam bright- +ness temperature limit to 400 K in G34.26+0.15 (M2) (see Ta- +ble[A.3). We also note that the luminosity of the NH3 (9,6) emis- +sion in G34.26+0.15 is higher than or comparable to that in Cep +A, depending on the epoch of our observations. + +Finally, the non-detections of the (8,5) and (10,7) lines also +indicate that the (9,6) line is special. This allows us to derive +lower 3c limits of the (9,6)/(8,5) and (9,6)/(10,7) line intensity +ratios. The (9,6) line arises from ortho-NH3 (K = 3n), whereas +the NH3 (8,5) and (10,7) lines are para-NH3 (K # 3n) lines. +The minimum ortho-to-para ratios are in the range 12-42 and 1|- +8 toward Cep A and G34.26+0.15, respectively. The statistical +weights for the ortho states are twice as large as those for the +para states (e.g.,,; Umemoto et al.}1999;|Goddi et al./201 1; Henkel +et al.2013). In Cep A, the line intensity ratios are far higher than +this factor of two. Thus, at least in Cep A the higher main beam +brightness peak temperature of the (9,6) emission is caused by +maser action, perhaps involving exponential amplification, and +the case of G34.26+0.15 is likely similar. + +4.3. Comparison of NH; (9,6) masers with previously +published (quasi-)thermal NH; emission + +The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines +show thermal emission toward Cep A over a velocity range of +-13kms"! < Visp < -4kms7! (Brown et al.|/1981; |Giisten +et al.|/1984} |Torrelles et al.) 1985) |1986) |1993] |1999). An average +NH; column density of ~5x10!° cm~? was estimated for a region +of 3” around HW? (Torrelles et al.||1999). This high NH3 abun- +dance could provide a suitable environment for maser species. +Large line widths (AV, /2 ~7.0 km s~') with Visp ~ —10 kms“! +in both (1,1) and (2,2) lines were found toward HW2 (Torrelles +et al.|1993). The velocity is similar to the cloud’s systemic lo- +cal standard of rest (LSR) velocity of —11.2 km s7!, which +is based on CO (Narayanan & Walker |/1996) and HCO* ob- +servations (Gomez et al.|/1999). Our (9,6) maser is redshifted +(-0.9 kms"! < Vise <2.9 km s7!) and shares positions with +the outflowing gas seen in CO and HCO? with similarly red- +shifted velocities. Therefore, we argue that the (9,6) masers are +related to outflowing gas. +In G34.26+0.15, a large NH; column density, +10!85#02 cm-2, and a kinetic temperature of 225475 K +were derived by |Henkel et al.| (1987) based on measurements +of 15 NH3 inversion transitions in the frequency range of +22.0-26.0 GHz. These did not include the (9,6) transition. +While these lines were measured with a beam size of about +40”, a comparison of the peak intensities of the optically thick +lines with the kinetic temperature reveals the size of the hot, +ammonia-emitting core to be only ~2.5’’. All those measured +NH3 lines were quasi-thermal and had LSR velocities of +~ 58.5 km s~!, close to the systemic velocity of ~ 58.1 km sg! +obtained from C!’O observations (Wyrowski et al.) |2012). +Their line widths (AV;;2 >3.6 km s~') are larger than what +we find (0.35 km s7! < AV\/2 < 0.94 km s!) for each (9,6) +maser component (see details in Table [A.3). In all, we may +have observed four different (9,6) velocity features. Three +are blueshifted at Visp ~ 53.8 km s7!, 55.8 km s7!, and +56.8 km s~!, and a fourth, tentatively detected, at 62.5 km s7!. +This tentative redshifted feature was only potentially detected +with Effelsberg in 2020 January. The velocity is similar to that +of the JVLA measurements on the NH; (1,1) absorption line +against continuum source C (~ 7” resolution; |Keto et al.||1987) + +Article number, page 5 of 10 + + A&A proofs: manuscript no. mainArxiv @@ -639,6 +1230,7 @@ is powered by continuum source C or by an outflow. Near com- ponent B, there are some OH and CH3OH masers but no H2O or NH3 masers. A group of H2O masers, well-known tracers of outflows, with a large velocity distribution of 43 km s−1 ≤ + VLSR ≤54 km s−1, was found to the west of the centimeter- continuum source A and close to the peak of the millimeter- continuum emission (see details in our Fig. A.2 and also in Fig. 5 @@ -948,9 +1540,10 @@ detected NH3 (9,6) emissions. Table A.2. 1.36 cm JVLA flux densities of individual continuum sources. Source R.A. Dec. Size P.A. S ν -(h m s) (◦ ′ ′′) (arcsec) (deg) (mJy) +(h m s) (◦ ′ ′′) (arcsec) (deg) (mJy) Cep A HW2 22 56 17.972 ± 0.003 +62 01 49.587 ± 0.015 (0.45 ± 0.19) × (0.22 ± 0.10) 50.0 20.2 ± 1.4 + HW3a 22 56 17.420 ± 0.022 +62 01 44.576 ± 0.076 (2.35 ± 0.45) × (0.55 ± 0.14) 66.6 4.75 ± 0.74 HW3b 22 56 17.578 ± 0.009 +62 01 45.041 ± 0.043 (1.43 ± 0.24) × (0.45 ± 0.10) 59.9 3.19 ± 0.36 HW3c 22 56 17.956 ± 0.016 +62 01 46.224 ± 0.038 (1.44 ± 0.37) × (0.36 ± 0.19) 86.0 9.90 ± 1.7 @@ -963,6 +1556,75 @@ C 18 53 18.560 ± 0.004 +01 14 58.201 ± 0.112 (2.03 ± 0.30) × (1.34 ± 0.20) Article number, page 8 of 10 +A&A proofs: manuscript no. mainArxiv + +Appendix A: + +Table A.1. Summary of NH; (9, 6) maser observations. + + + + + + + +Source Telescope Beam Epoch Channel Sy rms f S,dv Visr AVi/2 +size spacing +(kms!) Gy) (mJy) Gy kms“) (km s7!) + +Cep A Effelsberg 49” 2020, Jan. 04 0.62 0.67 3.41 1.19+0.02 -1.11+002 1.67+0.04 +Effelsberg 49” 2021, Feb. 11 0.62 0.59 5.97 1.08+0.02 -0.74+0.02 1.70+0.04 +Effelsberg 49” 2021, Feb. 15 0.62 0.65 10.98 1.11+40.03 -0.75+0.02 1.60+0.05 +JVLA‘ 1”47 x 0799 =. 2021, Jul. 13 0.13 1.13 144 0.89+0.09 -0.86+0.03 0.74+0.12 +Effelsberg 49” 2021, Aug. 11 0.07 0.98 13.36 049+0.02 -0.90+0.01 0.47+0.01 +0.35 0.26+0.02 -0.28+0.02 0.69+0.05 +Effelsberg 49” 2021, Aug. 12 0.07 0.98 13.35 0.50+0.01 -0.89+0.07 0.48 + 0.07 +0.35 0.20+0.01 -0.29+0.07 0.54 + 0.07 +0.06 0.07 + 0.01 0.51+0.07 1.09 +0.07 +0.02 0.02 + 0.01 2.15+0.07 0.80+0.07 +0.07 0.06 + 0.01 2.89 +0.07 0.92 + 0.07 +G34.26+0.15 Effelsberg 49” 2020, Jan. 03 0.62 0.30 1.26 0.65+0.03 62.50+0.05 2.05+0.13 +Effelsberg 49” 2021, Feb. 11 0.62 0.24 2.42 0.40+0.02 55.76+0.04 1.60+0.12 +Effelsberg 49” 2021, Feb. 15 0.62 0.20 4.86 0.38+0.02 55.71+0.05 1.80+0.14 +JVLA? 1733 x 1706 2021, Jul. 13 0.13 0.23 37.1 0.09+0.02 54.41+0.03 0.38 + 0.09 +0.22 0.22+0.02 55.82+0.05 0.95+0.12 +0.15 0.06+0.01 57.214+0.04 0.35+0.08 +Effelsberg 49” 2021, Aug. 11 0.07 0.08 13.92 0.06+0.007 54.10+0.05 0.68 +0.12 +0.07 0.02+0.006 54.82+0.03 0.31 40.09 +0.12 0.10+0.006 55.85+0.02 0.75 + 0.06 +Effelsberg 49” 2021, Aug. 12 0.07 0.16 27.40 0.09+0.008 55.83+0.02 0.56 + 0.05 + + + +Notes. The spectral parameters are obtained from Gaussian fitting. “ The JVLA spectrum toward Cep A is extracted from the Effelsberg-beam- +sized region (FWHM 49”). © For G34.26+0.15, the JVLA beam samples the NH3 (9,6) spectrum over a region of radius 35, which contains all +detected NH; (9,6) emissions. + +Table A.2. 1.36 cm JVLA flux densities of individual continuum sources. + + + + + + + +Source R.A. Dec. Size PA. Sy +(h m= s) ce’ ") (arcsec) (deg) (mJy) +Cep A HW2 = 225617.972 + 0.003 +6201 49.587+0.015 (0.45 + 0.19) x (0.22 + 0.10) 50.0 20.2 + 1.4 +HW3a_ 2256 17.420+0.022 +620144.576+0.076 (2.35+40.45)x(0.55+0.14) 666 4.75 +0.74 +HW3b = 22 56.17.578 + 0.009 +6201 45.041 +0.043 (1.43 +0.24)x(0.45+0.10) 59.9 3.19+0.36 +HW3c 225617.956+0.016 +6201 46.224+0.038 (1.44 +0.37) x (0.36+0.19) 86.0 9.90 + 1.7 +HW3d 225618.195+0.005 +6201 46.325+0.014 (1.26+0.12) x (0.30+0.19) 102.5 13.75 +0.92 +HW9 2256 18.626+0.014 +6201 47.851+0.137 (1.53 +0.51) x (0.29+0.30) 28.0 3.26+0.78 +G34.26+0.15 A 18 53 18.774 +0.005 +01 1456.208+0.125 (0.66 + 0.49) x (0.50 + 0.33) 10.0 94 + 33 +B 18 53 18.649+ 0.005 +01 1500.071+0.180 (2.31 +0.49) x (0.85+0.21) 17.4 597 + 110 +C 18 53 18.560 + 0.004 +01 1458.201+0.112 (2.03 + 0.30) x (1.34+0.20) 178.0 5070 +660 + + + +Article number, page 8 of 10 + + Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions diff --git a/read/results/tika/2201.00022.txt b/read/results/tika/2201.00022.txt index f41aedf..99832bf 100644 --- a/read/results/tika/2201.00022.txt +++ b/read/results/tika/2201.00022.txt @@ -18,27 +18,6 @@ - - - - - - - - - - - - - - - - - - - - - @@ -72,6 +51,7 @@ ABSTRACT Most stellar evolution models predict that black holes (BHs) should not exist above approximately 50− 70 M�, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections + indicate the existence of BHs with masses at and above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions @@ -116,6 +96,7 @@ approximately 142 M�. This event may have also had a mass gap that limits stellar black holes (BHs) to no more than ∼< 50 M� (e.g., Heger et al. 2003; Woosley + 2017)1. Similarly, the merger products of GW150914, GW170104, and GW170814 fall within the mass gap @@ -284,6 +265,7 @@ back towards the cluster center over a dynamical fric- tion timescale. Using this approach, they showed that 103 − 104 M� IMBHs can form efficiently over the life- + time of a cluster. However, as discussed in Section 2.2, direct BH-star @@ -383,6 +365,7 @@ sume that the BH mass distribution follows that of the stars from which they originate, a Kroupa initial mass function dN/dm ∝ m−2.35. While this choice represents + a gross oversimplification, it has very little bearing on our final results. Future work may address the particu- @@ -448,7 +431,8 @@ BHs in the GN can undergo direct collisions with other objects. The timescale for this process, tcoll, can be es- -timated using a simple rate calculation: t−1coll = nσA, +timated using a simple rate calculation: t−1 +coll = nσA, where n is the number density of objects, σ is the ve- @@ -456,15 +440,14 @@ locity dispersion, and A is the cross-section. We use the collision timescale from Rose et al. (2020): -t−1coll =πn(a•)σ(a•) +t−1 +coll =πn(a•)σ(a•) × ( -f1(e•)r +f1(e•)r2 -2 c + f2(e•)rc - 2G(mBH +m?) σ(a•)2 @@ -511,16 +494,19 @@ r• r0 )−α + , (2) where r• denotes the distance from the SMBH. We adopt a SMBH mass of 4× 106 M� such that our fiducial GN + matches our own galactic center (e.g., Ghez et al. 2005; Genzel et al. 2003). In this case, the normalization in Eq. (2) is ρ0 = 1.35× 106M�/pc3 at r0 = 0.25 pc (Gen- + zel et al. 2010). Additionally, in Eq. (2), α gives the slope of the power law. We assume that a uniform pop- @@ -562,6 +548,7 @@ and a star, and the former has a much smaller physi- cal cross-section. For example, the Schwarzschild radius of a 10M� BH is only 30 km, or 4.31 × 10−5R�. For + this reason, direct collisions between compact objects are very rare and not included in our model. @@ -595,6 +582,137 @@ GN. 2 We note that the eccentricity has a very minor effect on the collision timescale (Rose et al. 2020). +IMBH ForMATION IN GALACTIC NUCLEI 3 + + + +1015 4 + +1013 4 + +101! 4 + +Timescale [yr] + +10? 4 + +107 4 + + + + + + + + + +107 10-2 10-1 10° +Distance from SMBH [pc] + +Figure 1. We plot the relevant timescales, including col- +lision (green), relaxation (gold), and BH-BH GW capture +(purple), for a single BH in the GN as a function of distance +from the SMBH. For the collision timescale, we assume the +BH is on a circular orbit. The timescales depend on the +density, so we adopt a range of density profiles, bounded by +a = 1 (dashed curve) to a = 2 (dark, solid curve). The dark +blue line represents the time for a 10° Mo BH to merge with +the SMBH through GW emission. + +observationally motivated distributions in Section 2.9, +but reserve a more detailed examination of the distribu- +tion’s impact for future work. + +2.2. Direct Collisions + +BHs in the GN can undergo direct collisions with other +objects. The timescale for this process, tgo, can be es- +timated using a simple rate calculation: t{j, = noA, +where n is the number density of objects, o is the ve- +locity dispersion, and A is the cross-section. We use the +collision timescale from Rose et al. (2020): + +t— | =1n(de)o(de) + +coll +x (salca)r2 + falee)ra7 pues) ) (1) + +o(ae)* + +where G is the gravitational constant and r, is the sum +of the radii of the interacting objects, a black hole with +mass mpy and a star with mass m,. Detailed in Rose +et al. (2020), fi(e.) and fo(e.) account for the effect of +the eccentricity of the BH’s orbit about the SMBH on +the collision rate, while n and o are simply evaluated +at the semimajor axis of the orbit (see below). Note +that this timescale equation includes the effects of grav- +itational focusing, which enhances the cross-section of +interaction. + +Assuming a circular orbit for simplicity, we plot the +timescale for a BH orbiting in the GN to collide with +a1 Mo star as a function of distance from the SMBH + +in Figure 1.2 As this timescale depends on the density +of surrounding stars, we adopt a density profile of the +form: + +plre) = pa (=) | (2) + +To + +where r, denotes the distance from the SMBH. We adopt +a SMBH mass of 4 x 10° Mo such that our fiducial GN +matches our own galactic center (e.g., Ghez et al. 2005; +Genzel et al. 2003). In this case, the normalization in +Eq. (2) is pp = 1.35 x 10° Me /pc3 at ro = 0.25 pe (Gen- +zel et al. 2010). Additionally, in Eq. (2), @ gives the +slope of the power law. We assume that a uniform pop- +ulation of solar mass stars account for most of the mass +in the GN, making the stellar number density: + +_ PlTe) +n(re) = Mo (3) + + + +The collision timescale also depends on the velocity dis- +persion, which we express as: + +GM, +o(re) = pay’ (4) + +where a is the slope of the density profile and M, de- +notes the mass of the SMBH (Alexander 1999; Alexan- +der & Pfuhl 2014). As mentioned above, Eq. (1) depends +on the sum of the radii of the colliding objects, r.. We +take re = 1 Re because these interactions involve a BH +and a star, and the former has a much smaller physi- +cal cross-section. For example, the Schwarzschild radius +of a 10 Ms BH is only 30 km, or 4.31 x 107° Ro. For +this reason, direct collisions between compact objects +are very rare and not included in our model. + +We note that direct collisions between BHs, via GW +emission, were shown to be efficient in nuclear star clus- +ters without SMBHs (e.g., Portegies Zwart & McMil- +lan 2000; O’Leary et al. 2006; Rodriguez et al. 2016). +However, in the GN, star-BH collisions are much more +frequent than direct BH-BH collisions. As depicted in +Figure 1, the star-BH collision timescale for a range +of density profiles is many orders of magnitude shorter +than the BH-BH GW collision timescale (for the rele- +vant equations, see O’Leary et al. 2009; Gondan et al. +2018, for example). Thus, we expect that star-BH col- +lisions will be the main driver of IMBH growth in the +GN. + +2 We note that the eccentricity has a very minor effect on the + +collision timescale (Rose et al. 2020). + + 4 Rose et al. @@ -656,6 +774,7 @@ maximum impact parameter 1 R�. Qualitatively, one might expect that the BH could capture the entire star (i.e., ∆m ∼ 1 M�) if the relative velocity is smaller than + the escape velocity from the BH at this point. However, in the vicinity of the SMBH, the dispersion velocity of @@ -683,12 +802,13 @@ To estimate ∆m, we begin with the Bondi-Hoyle ac- cretion rate, ṁ, given by: ṁ = -4πG2m2BHρstar +4πG2m2 -(c2s + σ -2) +BHρstar +(c2s + σ2) 3/2 + , (5) 3 Closer to the SMBH, ∆t may exceed the collision timescale by @@ -720,8 +840,7 @@ approximate the density as 1M�/(4πR 3 �/3) and take -the conservative value of cs = 500 km s -−1, which is +the conservative value of cs = 500 km s−1, which is consistent with the sound speed inside a 1 M� star @@ -734,7 +853,9 @@ have: ∆m = min(ṁ× t?,cross, 1 M�) , (6) where t?,cross ∼ R�/σ is the crossing time of the BH in + the star. We take the minimum between ṁ× t?,cross and + 1 M� because the BH cannot accrete more mass than one star at each collision. @@ -765,6 +886,144 @@ tions assume α = 1 for the stellar density profile, ensur- ing the collision timescale is long compared to the sim- +ROSE ET AL. + +2.3. Statistical Approach to Collisions + +We simulate the mass growth of a population of BHs + +with initial conditions detailed in Section 2.1. Over an + +increment At of 10° yr, we calculate the probability of +a collision occurring, given by At/tcon. This choice of +At is motivated by our galactic center’s star formation +timescale (e.g., Lu et al. 2009), allowing for regular re- +plenishment of the stellar population in the GN. We have + +checked that the results are not sensitive to this choice + +of At, omitted here to avoid clutter. We draw a number +between 0 and 1 using a random number generator. If +that number is less than or equal to the probability, we +increase the BH’s mass by Am, the mass that the BH is +expected to accrete in a single collision (see Section 2.4 +for details). We recalculate the collision timescale using +the updated BH mass and repeat this process until the +time elapsed equals the simulation time of 10 Gyr’. + +2.4. Mass Growth + +When a BH collides with a star, it may accrete ma- +terial and grow in mass. The details of the accretion +depend on the relative velocity between the BH and +star. For simplicity, this calculation assumes that the +two objects experience a head on collision, with the BH +passing through the star’s center. We begin by con- +sidering the escape velocity from the BH at the star’s +outermost point, its surface, which corresponds to the +maximum impact parameter 1 Ro. Qualitatively, one +might expect that the BH could capture the entire star +(i.e., Am ~ 1 Mo) if the relative velocity is smaller than +the escape velocity from the BH at this point. However, +in the vicinity of the SMBH, the dispersion velocity of +the stars may be much larger than the escape velocity +from the BH at the star’s surface. In this case, the BH +captures a “tunnel” of material through the star. This +tunnel has radius equal to the Bondi radius and length +approximately 1 Re. For the purposes of this study, we + +assume that the BH accretes all of the material that + +it captures. The details of the accretion are uncertain, +however, and it may be much less efficient than our re- +sults imply. We discuss accretion in Section 2.5. + +To estimate Am, we begin with the Bondi-Hoyle ac- +cretion rate, m, given by: + +Q 2 +. 4nG MBH Pstar + +e422? (5) + +3 Closer to the SMBH, At may exceed the collision timescale by +a factor of a few for steep density profiles. We include a safe- +guard in our code which takes the ratio teo/At and rounds it +to the nearest integer. We take this integer to be the number of +collisions and increase the BH mass accordingly. + + + +% * Initial +4x10 e e@ AM=1M. +¢ Bondi-Hoyle-Lyttleton + +3x10 ¥ + +3 ate ° +es += es +1 ° +n 2x10 on oe, += os +103 4 + + + + + + + + + +10-3 10-7 107? 10° +Distance from SMBH [pc] + +Figure 2. We consider an example that highlights the mass +growth as a function of distance from the SMBH. Grey dots +represent the initial masses and distances from the SMBH +of the BHs involved in the simulation. For simplicity, we set +the inital mass equal to 10 Mo for all of the BHs. Assuming +the density profile of stars has a = 1, we consider two cases: +BHs accrete all of the star’s mass during a collision (red) and +only a portion of the star’s mass is accreted during a collision +given by Eq. 6 (blue). The latter case results in less growth +closer to the SMBH where the velocity dispersion becomes +high. The shaded regions and dashed lines represent the +analytical predictions detailed in Section 2.4. + +where c, is the speed of sound in the star and Pgtar is its +density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima +et al. 1985; Edgar 2004, see latter for a review). We +approximate the density as 1Mo/(4rR3,/3) and take +the conservative value of c, = 500 km s~', which is +consistent with the sound speed inside a 1 Mo star +(Christensen-Dalsgaard et al. 1996) and allows us to set +a lower limit on Am. To find Am, at each collision, we +have: + +Am = min(7m x tx,cross) 1 Mo) ’ (6) + +where ty cross ~ Ro/o is the crossing time of the BH in +the star. We take the minimum between 7m ty cross and +1 Mo because the BH cannot accrete more mass than +one star at each collision. + +Figure 2 juxtaposes the expected growth using Bondi- +Hoyle-Lyttleton accretion (blue small points) with a +much simpler model in which the BH accretes the star’s +entire mass, 1 Mo (red large points). Both examples +start with identical populations of 10 Ms BHs (grey) +and simulate growth through collisions using a statisti- +cal approach. As the BHs grow, the collision timescale, +which depends on mgy, decreases. Simultaneously, +Am, which also depends on mgy, increases. The re- +sult is exponential growth (see discussion and details +surrounding Eq. (8)). In Figure 2, however, the simula- +tions assume a = 1 for the stellar density profile, ensur- +ing the collision timescale is long compared to the sim- + + IMBH Formation in Galactic Nuclei 5 @@ -950,9 +1209,10 @@ characteristic timescale to merge a BH with an SMBH is given by: tGW ≈2.9× 1012 yr -( +( M• + 106 M� )−1( @@ -961,9 +1221,11 @@ mBH 106 M� )−1 + × ( M• +mBH + 2× 106 M� )−1( @@ -972,17 +1234,136 @@ a• 10−2 pc )4 -× f(e•)(1− e2•)7/2 , (9) + +× f(e•)(1− e2 +•)7/2 , (9) where f(e•) is a function of e•. For all values of e•, f(e•) is between 0.979 and 1.81 (Blaes et al. 2002). We plot this timescale for a 1 × 105M� BH in Figure 1 in + +blue. + +IMBH ForMATION IN GALACTIC NUCLEI 5 + +ulation time, 10 Gyr. Therefore, the BHs grow slowly, +and their final masses can be approximated using the +following equation: + + + +Ménal(tcon — Const.) = Minitial + Am — , (7) +co +in which T represents the simulation time and Am and +teoll Temain constant, approximated as their initial val- +ues. + +This equation is plotted in Figure 2 for both cases, +Am = 1 Mo (red) and Am from Bondi-Hoyle-Lyttleton +accretion (blue), and the curves coincide with the cor- +responding simulated results. The shaded regions rep- +resent one standard deviation from Eq. (7), calculated +using the square root of the number of collisions, T/teon- +As indicated by the results in red, in the absence of +Bondi-Hoyle-Lyttleton accretion, the BHs closest to the +SMBH experience the most growth because they have +shorter collision timescales. However, Bondi-Hoyle- +Lyttleton accretion becomes important closer to the +SMBH, where the velocity dispersion is large compared +with the stars’ escape velocity, and curtails the mass +growth for BHs in this region. Outside of 10~? pc, a BH +consumes the star’s entire mass: the accretion-limited +Am governed by Eq. (7) is greater than or equal to the +star’s mass. + +Eq. 7 does not apply for other values of a. When the +collision timescale is shorter, corresponding to a larger +index a in the density profile (see Figure 1), the growth +is very efficient and Am quickly approaches 1 Mg. Con- +sequently, while we can now assume Am = 1Mo, we +can no longer assume the collision timescale is constant. +The final mass grows exponentially as a result. For +Am = 1Mo, the general solution is reached by solving +the differential equation dm/dt = 1Mo/tcon(m), which +gives: + +Méinal(Am + 1M) =—A + (minitiat + A) eC? (8) + +where A = 07 Retar/G and C = 27GngtarRstar/7- As an +example, we plot this curve in purple for the a = 2 case, +in Figure 3, which agrees with the simulated masses. + +2.5. Uncertainties in Accretion + +We note that the AM calculated in this proof-of- +concept study assumes that the BH accretes all of the +material that it captures. Estimating the true fraction +of the material accreted by the BH is very challeng- +ing; this complex problem requires numerically solving +the generalized GR fluid equations with cooling, heat- +ing, and radiative transfer, etc. and remains an active +field of research (e.g., Blandford & Begelman 1999; Park +& Ostriker 2001; Narayan et al. 2003; Igumenshchev + +et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang +et al. 2014; McKinney et al. 2014; Narayan et al. 2022). +Heuristically, if a collision between a BH and a star re- +sults in an accretion disk, the disk’s viscous timescale +may be as low as days. The resultant luminosity can +unbind most of the captured material, though details +such as the amount accreted and peak luminosity re- +main uncertain (e.g., Yuan et al. (2012); Jiang et al. +(2014), see also the discussion in Stone et al. (2017), +Rizzuto et al. (2022), and Kremer et al. (2022)). The +question becomes whether or not a BH can still accu- +mulate significant amounts of mass over many collisions +even if it accretes very little in a single one. We ex- +plore the viability of our channel using a physically mo- +tivated inefficient accretion model. Several studies have +invoked momentum-driven winds in BH accretion (e.g., +Murray et al. 2005; Ostriker et al. 2010; Brennan et al. +2018). We thus estimate the fraction of captured mass +accreted to be approximately vese/(cn), where Vesc is +the escape velocity from the BH at 1 Reo and 7 is the +accretion efficiency at the ISCO. We take 7 to be 0.1 +(e.g., Yu & Tremaine 2002). This expression for the +fraction accreted is consistent with Kremer et al. (2022) +equation 19 for s = 0.5, which is a reasonable value for +s, a free parameter between 0.2 and 0.8. We discuss +the results of the momentum-driven winds estimate in +Section 3. We note that the accretion process may be +more efficient than this estimate implies if, for example, +jets or other instabilities result in the beaming of radi- +ation away from the captured material (e.g., Blandford +& Znajek 1977; Begelman 1979; De Villiers et al. 2005; +McKinney & Gammie 2004; McKinney 2006; Igumen- +shchev 2008; Begelman 2012a,b; McKinney et al. 2014). + +2.6. GW Inspiral + +When a BH is close to the SMBH, GW emission can +circularize and shrink its orbit. We implement the ef- +fects of GW emission on the BH’s semimajor axis and +eccentricity following Peters & Mathews (1963a). The +characteristic timescale to merge a BH with an SMBH +is given by: + +12 MM, 7 MBH - +tew 2.9 x 10°° yr (mic) (eet) +« (Meee) Ge ) +2x 10° Mo 10-2 pe +x f(ee)\(1— es)? , (9) + +where f(e.) is a function of e,. For all values of eé., +f (ee) is between 0.979 and 1.81 (Blaes et al. 2002). We +plot this timescale for a 1 x 10° Mo BH in Figure 1 in blue. + 6 Rose et al. Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to @@ -1087,6 +1468,7 @@ G2ρ〈M∗〉 ln Λrlx , (10) where ln Λrlx is the Coulomb logarithm and 〈M∗〉 is the + average mass of the surrounding objects, here assumed to be 1M� (Spitzer 1987; Binney & Tremaine 2008, @@ -1148,6 +1530,7 @@ Miralda-Escudé & Gould 2000; Baumgardt et al. 2004). They sink toward the SMBH on the mass segregation timescale, tseg ≈ 〈M∗〉/mBH × trelax (e.g., Spitzer 1987; + Fregeau et al. 2002; Merritt 2006), which is typically an order of magnitude smaller than the relaxation timescale @@ -1210,6 +1593,7 @@ BHs will then settle onto a Bahcall-Wolf profile, while the stars may follow a shallower profile, with approx- imately n? ∝ r−1.5, inwards of the transition radius + (Linial & Sari in prep.). Therefore, after the initial mass segregation, we allow @@ -1301,6 +1685,7 @@ distributions were drawn from the same sample for the IMF with an average of 10 M� leads to a final distri- bution with an average of ∼ 200 M� and a median of + ∼ 45 M�, which lies within the mass gap. 3. DISCUSSION AND PREDICTIONS @@ -1381,6 +1766,7 @@ profile by allowing BHs to diffuse into regions of more or less efficient growth. As a result, more BHs grow in mass, but their maximum mass is smaller (∼ 104 M�). + Additionally, the final masses have no apparent depen- dence on distance from the SMBH (see Figure 4). @@ -1420,6 +1806,7 @@ to 100 M�. Furthermore, if we increase this ∆M esti- mate by a factor of 2 (i.e., use η = 0.05), the simula- tion produces a 3.5× 103 M� IMBH for the same initial + conditions. Our proof-of-concept demonstrates that col- lisions between BH and stars are an important process @@ -1497,9 +1884,11 @@ Center as close as 0.04 pc from the SMBH (e.g., Levin & Beloborodov 2003; Paumard et al. 2006), and star formation episodes can occur as often as every ∼ 5 Myr + (e.g. Lu et al. 2009). Therefore, we expect that after the first Gyr, stars within . 0.01 pc will be replenished + at intervals consistent with the star formation episodes; the infalling populations of stars are separated by ∼ @@ -1508,9 +1897,11 @@ the infalling populations of stars are separated by ∼ However, star-star collisions may complicate this pic- ture within ∼ 0.01 pc. As discussed above, regular star + formation ensures the BHs always have a stellar popula- tion to interact with outside of ∼ 0.01 pc.5 At 0.01 pc, + however, the kinetic energy during a collision between two 1 M� stars is larger than their binding energies. @@ -1520,6 +1911,7 @@ Collisions can therefore thin out the stellar populations during the time it takes them to diffuse to these small radii, . 0.01 pc, and may reduce the BH growth in the + innermost region. We reserve the inclusion of star-star collisions for future work. We also note that the disrup- diff --git a/read/results/tika/2201.00029.txt b/read/results/tika/2201.00029.txt index 1ae811b..efed233 100644 --- a/read/results/tika/2201.00029.txt +++ b/read/results/tika/2201.00029.txt @@ -22,12 +22,6 @@ - - - - - - diff --git a/read/results/tika/2201.00037.txt b/read/results/tika/2201.00037.txt index ff2f257..df3a299 100644 --- a/read/results/tika/2201.00037.txt +++ b/read/results/tika/2201.00037.txt @@ -26,13 +26,6 @@ - - - - - - - @@ -209,12 +202,17 @@ descending node of orbit Ωp -ê3I +ê3 + +I I -ê3Lεm -I ê3p +ê3 +Lεm + +I ê3 +p ascending node of orbit @@ -231,7 +229,10 @@ direction S -ê3Iê3L +ê3 +Iê3 + +L M @@ -241,19 +242,28 @@ orbital plane Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded -rectangle) and the Cassini state of Mercury. The normal to the orbital plane (êI3) is offset from the nor- +rectangle) and the Cassini state of Mercury. The normal to the orbital plane (êI +3) is offset from the nor- + +mal to the Laplace plane (êL +3 ) by an angle I = 8.5330◦. The symmetry axis of the mantle êp + +3 is offset + +from êI +3 by εm ≈ 2 arcmin. êI -mal to the Laplace plane (êL3 ) by an angle I = 8.5330 -◦. The symmetry axis of the mantle êp3 is offset +3 and êp +3 are coplanar with, and precess about, êL -from êI3 by εm ≈ 2 arcmin. êI3 and êp3 are coplanar with, and precess about, êL3 in a retrograde direction +3 in a retrograde direction -at frequency Ωp = 2π/325, 513 yr -−1. The blue (orange) shaded region indicates the portion of the orbit +at frequency Ωp = 2π/325, 513 yr−1. The blue (orange) shaded region indicates the portion of the orbit when Mercury is above (below) the Laplace plane. Angles are not drawn to scale. arcmin [Margot et al., 2012], 2.029±0.085 arcmin [Stark et al., 2015a] and 1.968±0.027 [Gen- + ova et al., 2019] to list a few) matches that expected if Mercury occupies Cassini state 1. The prediction of Mercury’s obliquity is based on the assumption that the whole planet @@ -279,6 +289,7 @@ its outermost part must be. A solid inner core may have nucleated at the centre size is not well constrained. Inner core growth leads to planetary contraction, and the inferred radial contraction of ∼ 7 km since the late heavy bombardment [Byrne et al., 2014] places an + approximate limit of 800 km on the inner core radius [Grott et al., 2011]. However, the inner core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history. @@ -434,6 +445,7 @@ strategy facilitates comparisons between our results. We build our interior model as detailed in Peale et al. [2016]. We first specify rs, ρs (or a density contrast at the ICB), the crustal density ρc and crustal thickness h = R−rm. The + three unknowns rf , ρf and ρm are then solved such that the interior model is consistent with the known mass M and chosen values of the moments of inertia of the whole planet C and that @@ -478,9 +490,15 @@ MR2 1 MR2 + [ -(ρs − ρf )r5sεs + (ρf − ρm)r5f εf + (ρm − ρc)r5mεm + ρcR5εr +(ρs − ρf )r5 +sεs + (ρf − ρm)r5 +f εf + (ρm − ρc)r5 + +mεm + ρcR +5εr ] , (1a) @@ -496,9 +514,15 @@ B −A 1 4MR2 + [ -(ρs − ρf )r5sξs + (ρf − ρm)r5fξf + (ρm − ρc)r5mξm + ρcR5ξr +(ρs − ρf )r5 +sξs + (ρf − ρm)r5 +fξf + (ρm − ρc)r5 + +mξm + ρcR +5ξr ] . (1b) @@ -516,13 +540,12 @@ Mercury Parameter Numerical value Reference mean motion, n 2π/87.96935 day−1 Stark et al. [2015b] -rotation rate, Ωo = 1.5n 2π/58.64623 day -−1 Stark et al. [2015b] +rotation rate, Ωo = 1.5n 2π/58.64623 day−1 Stark et al. [2015b] -orbit precession rate, Ωp 2π/325, 513 yr -−1 Baland et al. [2017] +orbit precession rate, Ωp 2π/325, 513 yr−1 Baland et al. [2017] Poincaré number, δω = Ωp/Ωo 4.9327× 10−7 + orbital eccentricity, ec 0.20563 Baland et al. [2017] orbital inclination, I 8.5330◦ Baland et al. [2017] @@ -530,11 +553,15 @@ orbital inclination, I 8.5330◦ Baland et al. [2017] mean planetary radius, R 2439.360 km Perry et al. [2015] mass, M 3.3012× 1023 kg Genova et al. [2019] + mean density, ρ̄ 5429.5 kg m−3 J2 5.0291× 10−5 Genova et al. [2019] + C22 8.0415× 10−6 Genova et al. [2019] + polar surface flattening, εr 6.7436× 10−4 Perry et al. [2015] + equatorial surface flattening, ξr 5.1243× 10−4 Perry et al. [2015] Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031.8636 × 109 @@ -543,7 +570,9 @@ m3/s2 taken from Genova et al. [2019]. The mean density is calculated from 4π 3 ρ̄R3 = M . The numerical -values of εr and ξr are calculated from εr = (ā− c)/R and ξr = (a− b)/R, where ā = 12 (a+ b) and where +values of εr and ξr are calculated from εr = (ā− c)/R and ξr = (a− b)/R, where ā = 1 +2 +(a+ b) and where a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semimajor, intermediate and semiminor @@ -556,6 +585,7 @@ and Wieczorek [2016] who adopted the same strategy in their interior modelling o Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topog- raphy and the axes of the principal moments of inertia, which amount to a polar offset of ∼ 2◦ + and an equatorial offset of ∼ 15◦ [Perry et al., 2015]. Once the densities and flattenings of all interior regions are known, we can specify the mo- @@ -592,12 +622,13 @@ Ā ef = Cf − Āf -Āf +Āf es = + Cs − Ās -Ās +Ās , (3a) γ = @@ -606,8 +637,8 @@ Ā γs = Bs −As -Ās +Ās . (3b) We further note that e and γ are connected to J2 and C22 by @@ -642,9 +673,12 @@ Confidential manuscript submitted to JGR-Planets Ωf -ê3p +ê3 +p + +ê3 +sê3 -ê3sê3 I I @@ -653,39 +687,50 @@ I θp -ê3L +ê3 +L + +ê1 +p -ê1p ê2p +ê2 +p Cassini plane ωΩot -ê3I +ê3 +I I εm -ê3p +ê3 +p ê1 -ê2p +ê2 +p -ê3L +ê3 +L a) b) Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b) -in a frame attached to the rotating mantle. The orbit normal (êI3) is tilted by an angle I = 8.533 -◦ from +in a frame attached to the rotating mantle. The orbit normal (êI +3) is tilted by an angle I = 8.533◦ from + +the Laplace normal (êL +3 ) and the symmetry axis of Mercury’s mantle (êp -the Laplace normal (êL3 ) and the symmetry axis of Mercury’s mantle (ê -p 3) is tilted by an obliquity εm -with respect to êI3. Shown in (a) are the orientations of the symmetry axis of the inner core (ê -s +with respect to êI +3. Shown in (a) are the orientations of the symmetry axis of the inner core (ês + 3), the rotation rate vectors of the mantle (Ω), fluid core (Ωf ) and inner core (Ωf ) and angles θp, θn, θm, θf @@ -698,8 +743,9 @@ the mantle, fluid core and inner core, respectively; blue shaded parts show an e The black curved arrow in the equatorial plane in (a) indicates the direction of rotation of the equatorial -mantle axes êp1 and ê -p +mantle axes êp +1 and êp + 2 with respect to the Cassini plane. Viewed in the frame attached to the rotating mantle (b), the Cassini plane is rotating at frequency ωΩo = −Ωo − Ωp cos I in the longitudinal direc- @@ -726,37 +772,17 @@ fine the mean motion n = 2π/87.96935 day−1 and the sidereal frequency Ωo = day−1, with Ωo = 1.5n. Mercury’s rotational state is also characterized by a Cassini state whereby -the orientations of the orbit normal (êI3) and of the mantle symmetry axis (ê -p -3) are both copla- +the orientations of the orbit normal (êI3) and of the mantle symmetry axis (êp3) are both copla- nar with, and precess about, the normal to the Laplace plane (êL3 ). The orientation of the Laplace plane varies on long timescales, but it can be taken as invariable in inertial space for our present -purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between êL3 and ê -I -3 - -is the orbital inclination I = 8.5330◦ [Baland et al., 2017], the angle between êI3 and ê -p -3 is the - -obliquity εm and the angle between ê -L -3 and ê - -p -3 is θp = I + εm. The precession of ê - -I -3 and ê - -p -3 +purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between êL3 and êI3 +is the orbital inclination I = 8.5330◦ [Baland et al., 2017], the angle between êI3 and êp3 is the -about the Laplace pole is retrograde with frequency Ωp = 2π/325, 513 yr -−1 [Baland et al., 2017]. +obliquity εm and the angle between êL3 and êp3 is θp = I + εm. The precession of êI3 and êp3 +about the Laplace pole is retrograde with frequency Ωp = 2π/325, 513 yr−1 [Baland et al., 2017]. The mantle and crust are welded together and form a single rotating region which we re- @@ -767,10 +793,8 @@ of the mantle are expected to remain in close alignment, but they do not coincid define the rotation rate vector of the mantle by Ω, and its misalignment from êp3 by an angle θm. Note that θm � εm and it is often the spin axis of Mercury which is used to define the -obliquity εm [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, ê -p -3 and Ω would +obliquity εm [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, êp3 and Ω would characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and @@ -778,9 +802,7 @@ the angles I, εm and θm would completely describe the Cassini state. The prese outer core and solid inner core require three additional orientation vectors and angles. The sym- -metry axis of the inner core is defined by unit vector ês3 and its misalignment from ê -p -3 by an +metry axis of the inner core is defined by unit vector ês3 and its misalignment from êp3 by an angle θn. The rotation vectors of the fluid core and inner core are defined as Ωf and Ωs, re- @@ -789,20 +811,14 @@ spectively, and their misalignment from the rotation vector of the mantle Ω ar gles θf and θs (see Figure 2a). The rotation and symmetry axes of the inner core remain in close alignment, so θn ≈ θs. To be formal in our definition of the different angles of misalignment, + for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise direction. -At equilibrium in the Cassini state, the three orientation vectors (êI3, ê -p -3, ê - -s -3) and three +At equilibrium in the Cassini state, the three orientation vectors (êI3, êp3, ês3) and three -rotation vectors (Ω, Ωf , Ωs) are forced to precess about ê -L -3 at the same frequency. If we ne- +rotation vectors (Ω, Ωf , Ωs) are forced to precess about êL3 at the same frequency. If we ne- glect dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed @@ -817,6 +833,7 @@ per Mercury day, is equal to ω = −1− δω cos(θp) . (5) The factor δω = Ωp/Ωo = 4.933 × 10−7 is the Poincaré number, expressing the ratio of the + forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal as seen in the mantle frame is expressed as @@ -876,12 +893,7 @@ ing in the same direction as the vector connecting the Sun to the descending nod orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque -is periodic, rotating at frequency ωΩo. Setting the equatorial directions ê -p -1 and ê - -p -2 to correspond +is periodic, rotating at frequency ωΩo. Setting the equatorial directions êp1 and êp2 to correspond to the real and imaginary axes of the complex plane, respectively, we can write the equatorial @@ -935,6 +947,7 @@ corresponds to the observed obliquity of the mantle symmetry axis. It is thus eq though we keep the tilde notation in the presentation of our results to emphasize that it rep- resents the real part of the solution from our system. Furthermore, since m̃ � ε̃m, we often + refer to ε̃m as the orientation of spin axis of the mantle, since the Cassini state of Mercury is more customarily described in terms of the latter in the literature. @@ -955,14 +968,17 @@ equations are [ Āf + Ā m̃f + Ās + Ā m̃s + α3es Ās + Ā ñs @@ -971,7 +987,8 @@ ñs 1 -iΩ2oĀ +iΩ2 +oĀ ( Γ̃sun @@ -981,12 +998,14 @@ iΩ2oĀ ωm̃+ (1 + ω + ef ) m̃f − ωα1es Ās -Āf +Āf ñs = + 1 -iΩ2oĀf +iΩ2 +oĀf ( − Γ̃cmb − Γ̃icb @@ -997,10 +1016,12 @@ iΩ2oĀf (ω − α3es)m̃+ α1esm̃f + (1 + ω) m̃s + (1 + ω − α2) esñs = 1 -iΩ2oĀs +iΩ2 +oĀs ( -Γ̃ssun + Γ̃icb +Γ̃s +sun + Γ̃icb ) , (12c) @@ -1027,7 +1048,9 @@ on the inner core, αg = 8πG -5Ω2o +5Ω2 +o + [ρc(εr − εm) + ρm(εm − εf ) + ρf εf ] , (13b) where G is the gravitational constant. @@ -1036,11 +1059,14 @@ where G is the gravitational constant. a small mantle obliquity ε̃m and a small inner core tilt ñs, it is given by -Γ̃sun = −iΩ2oĀ +Γ̃sun = −iΩ2 +oĀ + ( φmε̃m + Ās + Ā α3φsñs @@ -1051,6 +1077,67 @@ where –10– +at the boundaries of the fluid core. In the absence of dissipation, all tilded variables are purely +real. We concentrate our analysis in this work on the real part of the solutions, which corre- +sponds to the mutual alignment of these five rotation angles in the Cassini plane. As such, é, +corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to em, +though we keep the tilde notation in the presentation of our results to emphasize that it rep- +resents the real part of the solution from our system. Furthermore, since m < &m, we often +refer to €m as the orientation of spin axis of the mantle, since the Cassini state of Mercury is +more customarily described in terms of the latter in the literature. + +The model of Mathews et al. [1991] is developed under the assumption of small angles as +appropriate for the nutations on Earth. The details on how the equations of the model are de- +rived can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. Three equa- +tions describe, respectively, the time rate of change of the angular momenta of the whole of Mer- +cury, the fluid core, and the inner core in the reference frame of the rotating mantle. These three +equations are + + + + + + + +- As. Ag. . 1 / +(w—e)m+ (1+w) spiny + qs + A3es Ms | = i@A (Foun) , (12a) +i+ (L+w+ep) i As . (= DPomo — Tico) (12b) +wm Ww e Mf — WALEs =—Ns = = = —~+Lemb~— tic ’ +PPINP COLES TS GORA, pee +1 ~ ~ +(w —ages)ia + ayestiny + (1+) tas + (L+w— a9) esits = 55 (ee + Pe) , (12¢) + +and a fourth equation consists of a kinematic relation that expresses the change in the orien- +tation of the inner core figure as a result of its own rotation, + +Ms + wits =0. (12d) + +In these equations, the parameters a1, @2 and a3 involve the density contrast at the ICB +and are given by + +a, — Pf. a3=l-ay, a,=a,—aszd,, (13a) +Ps : +where the parameter a, is a measure of the ratio of the gravitational to inertial torque applied + +on the inner core, + +_ 81G +52 + +where G is the gravitational constant. + +Qg [Pcl€r — Em) + Pm(€m — Ef) + Pres] » (13b) + +Tsun is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For +a small mantle obliquity €,, and a small inner core tilt ,, it is given by + +Peun = —i02A (¢nén + Tobutis) ’ (14) + +where + +—10— + + Confidential manuscript submitted to JGR-Planets @@ -1062,7 +1149,8 @@ Confidential manuscript submitted to JGR-Planets n2 -Ω2o +Ω2 +o [ G210 e+ @@ -1082,7 +1170,8 @@ G201 γ n2 -Ω2o +Ω2 +o [ G210 es + @@ -1100,7 +1189,9 @@ and where G210 and G201 are functions of the orbital eccentricity ec, G210 = 1 -(1− e2c)3/2 +(1− e2 +c)3/2 + , (16a) G201 = @@ -1112,16 +1203,22 @@ ec − 123 16 -e3c + +e3 +c + 489 128 -e5c . (16b) +e5 +c . (16b) + +The gravitational torque by the Sun acting on the inner core alone, Γ̃s +sun, is -The gravitational torque by the Sun acting on the inner core alone, Γ̃ssun, is +Γ̃s +sun = −iΩ2 -Γ̃ssun = −iΩ2oĀsα3φs(ε̃m + ñs) . (17) +oĀsα3φs(ε̃m + ñs) . (17) Γ̃cmb and Γ̃icb are the torques from tangential stresses by the fluid core on the mantle at the @@ -1131,12 +1228,10 @@ terms of dimensionless complex coupling constants Kicb and Kcmb and the differen lar velocities at each boundary [e.g Buffett , 1992; Buffett et al., 2002], -Γ̃icb = iΩ -2 +Γ̃icb = iΩ2 oĀsKicb(m̃f − m̃s) , (18a) -Γ̃cmb = iΩ -2 +Γ̃cmb = iΩ2 oĀfKcmb m̃f . (18b) Specific expressions for Kicb and Kcmb are delayed to sections 4 and 5 when we consider the @@ -1154,6 +1249,7 @@ m̃+ (1 + ω)p̃ = 0 . (19) For Mercury, it is more convenient to connect the internal model with ε̃m instead of p̃. This is because θp ≈ 8.567◦ whereas ε̃m ≈ 2 arcmin and thus the latter obeys more strictly the + condition of small angles assumed in our framework. Furthermore, the external torques act- ing on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ε̃m. Writ- @@ -1181,6 +1277,7 @@ quency ω. The system can be written in a matrix form as Confidential manuscript submitted to JGR-Planets M · x = y , (22a) + where the solution (x) and forcing (y) vectors are xT = [m̃, m̃f , m̃s, ñs, ε̃m] , (22b) @@ -1192,29 +1289,36 @@ and the elements of matrix M are M =  -ω − e (1 + ω) Āf +ω − e (1 + ω) + +Āf Ā (1 + ω) Ās Ā Ās + Ā α3 + ( (1 + ω)es + φs ) φm -ω 1 + ω + ef +Kcmb + -Ās +ω 1 + ω + ef +Kcmb + Ās + Āf -Kicb − ĀsĀf Kicb −ωesα1 +Kicb − Ās -Ās Āf +Kicb −ωesα1 + +Ās +Āf 0 ω − α3es α1es −Kicb 1 + ω +Kicb (1 + ω − α2)es + α3φs α3φs @@ -1304,6 +1408,7 @@ For a rigid planet with no fluid and solid cores, our system of equations reduce tions (12a) and (20), (ω − e)m̃+ φm ε̃m = 0 , (23a) + m̃+ (1 + ω)ε̃m = −(1 + ω) tan I . (23b) Using Equation (21), δω � 1, and the approximation Ā(1 + e+ δω cos I) = C+ Āδω cos I ≈ @@ -1318,20 +1423,20 @@ sin I + cos I ε̃m ) . (24b) -Equation (24b) gives a direct relationship between m̃ and ε̃m. For I = 8.5330 -◦, δω = +Equation (24b) gives a direct relationship between m̃ and ε̃m. For I = 8.5330◦, δω = 4.9327×10−7 and taking ε̃m = 2.04 arcmin, this gives m̃ = 2.52×10−4 arcmin, much smaller + than ε̃m: the offset of the rotation axis of the mantle with respect to its symmetry axis is very small. Substituting Equation (24b) in Equation (24a) gives CΩp -( +( sin I + cos I ε̃m -) +) = ĀΩoφmε̃m , (25) and isolating for ε̃m, @@ -1342,8 +1447,7 @@ CΩp sin I −CΩp cos I + ĀΩoφm . (26) -Upon using Equations (4), (15a), and Ωo = -3 +Upon using Equations (4), (15a), and Ωo = 3 2n, we can write ε̃m = @@ -1355,6 +1459,7 @@ CΩp sin I This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1 [see for instance Equation (1) of Baland et al., 2017, where their definition of Ω̇ is equal to −Ωp]. + Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized mo- @@ -1372,8 +1477,8 @@ n Ωp G210J2 + 2G201C22 -cos I + sin I/ε̃m +cos I + sin I/ε̃m . (28) which is equivalent to Equation (89) of Van Hoolst [2015]. It is based on the latter equation @@ -1410,11 +1515,13 @@ which is equivalent to the prediction by Peale [2005] when neglecting its small ponent. Note that in Peale [2005] it was assumed that only the mantle was involved in the solid- body precession and hence C was replaced by Cm. Using C = 0.346 · MR2 [Margot et al., + 2012] and the numerical values for n, J2, C22 and ec given in Table 1, we obtain a free preces- sion period of Tfp = 2π/ωfp = 1298 yr. If we use Cm instead of C in Equation (29), and take Cm = 0.431 ·C = 0.431 ·0.346 ·MR2 [Margot et al., 2012], we obtain Tfp = 2π/ωfp = 560 yr. + These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical, the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid @@ -1440,11 +1547,13 @@ the free precession frequency ωfp. Because ωfp > Ωp, Mercury occupies Cassin 1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant amplification if Ωp ≈ ωfp. Since ωfp � Ωp, resonant amplification is minimal and the re- + sulting obliquity, ε̃m ≈ 2 arcmin, is much smaller than the inclination angle I ≈ 8.5◦. 2.3.2 The misalignment of the fluid and solid cores With ω = −1− δω cos I and δω � 1, Equation (12d) gives ñs ≈ m̃s; as for the mantle, + the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state. The relationship between m̃ and ε̃m of Equation (24b) is independent of the interior structure, @@ -1454,11 +1563,11 @@ so it remains unchanged when a fluid and a solid cores are present. Substituting tion (12a), and setting ñs = m̃s, the angular momentum equation of the whole planet becomes CΩp -( +( sin I + cos I ε̃m -) +) + (Āf cos I Ωp)m̃f + Ās(cos I Ωp − Ωoα3φs)ñs = ĀΩoφmε̃m . (31) This latter equation shows how the misaligned inner core and fluid core can lead to a modifi- @@ -1467,12 +1576,14 @@ cation of the mantle obliquity ε̃m. Approximate analytical solutions of ñs a ñs ≈ Ωp + κλs ( 1 + Ωo(Kicb − α1es) + λf )( @@ -1486,6 +1597,7 @@ sin I + cos I ε̃m m̃f ≈ Ωp + λf ( @@ -1495,9 +1607,11 @@ sin I + cos I ε̃m + Ωo + λf Ās + Āf ( @@ -1509,9 +1623,12 @@ ñs , (32b) where κ = 1− Ās + Āf -Ω2o +Ω2 +o + ( Kicb − α1es @@ -1521,10 +1638,76 @@ Kicb − α1es , (33a) λf = σ̄f − Ωp cos I , (33b) + λs = σ̄s − Ωp cos I , (33c) –14– +MR? +C + +which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical com- + + + +Wfp =n (GaroJe + 2G201C22) (29) + +ponent. Note that in Peale [2005] it was assumed that only the mantle was involved in the solid- +body precession and hence C was replaced by C,,. Using C = 0.346 - MR? [Margot et al., +2012] and the numerical values for n, Jo, C22 and e, given in Table 1, we obtain a free preces- +sion period of Ts, = 27/w yp, = 1298 yr. If we use C,, instead of C' in Equation (29), and take +Cm = 0.431-C = 0.431-0.346-M R? [Margot et al., 2012], we obtain Typ = 27/wy, = 560 yr. +These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical, +the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid +core, the degree of which depends on the amplitude of the pole-to-equator CMB flattening. The +true free precession period lies somewhere between 560 and 1298 yr. Regardless of its exact value, +the free precession period is much shorter than the forcing period of 325 kyr. Using Equation +(29), Equation (27) can be written as [e.g. Baland et al., 2017] + +Q, sin I +—Q, cosI + wep + +Em = + +(30) + +The obliquity of Mercury is thus determined by how the forcing frequency 2, compares with +the free precession frequency wy). Because wr, > Qp, Mercury occupies Cassini state 1 [Peale, +1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant +amplification if Q, % wp. Since wrp “> Q,, resonant amplification is minimal and the re- +sulting obliquity, €,, * 2 arcmin, is much smaller than the inclination angle I * 8.5°. + +2.3.2 The misalignment of the fluid and solid cores + +With w = —1—dwcosT and dw <« 1, Equation (12d) gives i, ~ ms; as for the mantle, +the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state. +The relationship between m and é,, of Equation (24b) is independent of the interior structure, +so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equa- +tion (12a), and setting i, = ms, the angular momentum equation of the whole planet becomes + +CQ, (sin I + cos I En) + (Af cos IQ,) mz + As(cos I QW — Q503¢s)s = AQOmEn - (31) + +This latter equation shows how the misaligned inner core and fluid core can lead to a modifi- +cation of the mantle obliquity €,,. Approximate analytical solutions of i, and my are given by + + + + + +Q Q, Kic _ Ss ‘ ~ Q, S~ +fig & = (1 + Regents) (sin + cosI Em) — me Em, (32a) +Q Q, As . +mp & xy ooind + cos I Em) + Np Ay (Mit — Q1€s) is , (32b) +where +Ag 93 (Kieb — ares)” +Ka] — Seek OS) (33a) +Ay de Ap +Ap = of —Qpcosl, (33b) +As = Gs; —Q,cosl, (33c) + +—|4— + + Confidential manuscript submitted to JGR-Planets @@ -1537,8 +1720,8 @@ and where we have introduced the frequencies ef +Kcmb + Ās -Āf +Āf Kicb ) @@ -1560,6 +1743,7 @@ vide useful predictions of ñs and m̃f . In the limit of a very strong coupling between the fluid core, solid core and mantle, σ̄s � Ωp and σ̄f � Ωp, so that ñs → 0, m̃f → 0 and Equation (31) reverts back to Equation (25) + for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and mantle (i.e. for spherical internal boundaries, ef = es = γs = 0 and no viscous or EM cou- @@ -1573,19 +1757,21 @@ Inserting these in Equation (31), and with the moment of inertia of the mantle e C − Āf − Ās, we obtain Cm Ωp -( +( sin I + cos I ε̃m -) +) = ĀΩoφmε̃m . (35) which describes, as expected, a forced precession of the mantle alone. If this was the case for Mercury, taking Cm/C = 0.431, the obliquity should be ε̃m ≈ 0.88 arcmin, substantially smaller + than the observed obliquity of ε̃m ≈ 2 arcmin. If σ̄f ≈ Ωp (and thus λf → 0) and/or σ̄s ≈ Ωp (and thus λs → 0) resonant amplifica- + tion leads to large amplitudes for m̃f , ñs and the mantle obliquity ε̃m. The frequencies σ̄f and σ̄s are closely related to the FCN and FICN frequencies ωfcn and ωficn, respectively. Hence, @@ -1603,7 +1789,9 @@ that for reasonable interior models of Mercury, the FCN and FICN periods are in a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not ex- pect an important amplification effect. Furthermore, since ωfcn, ωficn � Ωp, then σ̄f � Ωp + and σ̄s � Ωp, and we are in the strong coupling limit. The mantle obliquity should be close + to that expected for a rigid planet, as observations suggest. Therefore, we expect that m̃f and ñs should be of the order of ε̃m or smaller. This further justifies the assumption of small an- @@ -1626,6 +1814,63 @@ Obviously, this reflects a Cassini state equilibrium in which the fluid core and –15– +and where we have introduced the frequencies + +As + +of= OQ («; + Kemp + Ke) ; (33d) +Af + +G5 =o (casa, — €,a4 + aghs + Kies) . (33e) + +These solutions are good approximations for all the results that we present in section 3. For +an observed mantle obliquity €,, and for a chosen set of interior model parameters, they pro- +vide useful predictions of n, and mf. + +In the limit of a very strong coupling between the fluid core, solid core and mantle, o, > +Q, and of > Dp, so that rs > 0, mz > 0 and Equation (31) reverts back to Equation (25) +for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and +mantle (i.e. for spherical internal boundaries, e f = €s = Ys = 0 and no viscous or EM cou- +pling, Kemp = Kich = 0), then + +d,=0, K=1, Ap=HAs=—Hpcosl, My =As = —(tanI+é,,). (34) + +Inserting these in Equation (31), and with the moment of inertia of the mantle equal to C,, = +C — Ay — Ag, we obtain + +Cm Q, (sin I + cos I En) = ANodmém - (35) + +which describes, as expected, a forced precession of the mantle alone. If this was the case for +Mercury, taking C,,/C = 0.431, the obliquity should be é,, ~ 0.88 arcmin, substantially smaller +than the observed obliquity of €,, * 2 arcmin. + +If of © Q, (and thus A~ — 0) and/or a, & Q, (and thus A, — 0) resonant amplifica- +tion leads to large amplitudes for mf, %, and the mantle obliquity €,,. The frequencies a, and +a, are closely related to the FCN and FICN frequencies wyen and wricn, respectively. Hence, +just as a large mantle obliquity can result from resonant amplification when the forcing frequency +approaches the free precession frequency, a large mantle obliquity can likewise result from res- +onant amplification when the forcing frequency approaches the FCN or FICN frequencies. These +frequencies depend on the interior density structure and are not known. However, we will show +that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of +a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not ex- +pect an important amplification effect. Furthermore, since wWfen,Wficen > Qp, then Tf > OQp +and @, > Qp»), and we are in the strong coupling limit. The mantle obliquity should be close +to that expected for a rigid planet, as observations suggest. Therefore, we expect that my and +ns should be of the order of €,, or smaller. This further justifies the assumption of small an- +gles that we have adopted. + +3 Results +3.1 Geodetic constraints and interior density structure + +All our interior models are constrained to match the mass M of Mercury and specific choices +of C = C/MR? and C,,/C. The choice of C is determined from Equation (28). For the pa- +rameters listed in Table 1, and an observed obliquity of ¢,, = 2.04 arcmin [Margot et al., 2012], +this gives C = C /M R? = 0.3455 and all our interior models are consistent with this choice. +Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are + +—15-— + + Confidential manuscript submitted to JGR-Planets @@ -1654,11 +1899,14 @@ Cm where -f(ec) = 1− 11e2c + +f(ec) = 1− 11e2 +c + + 959 48 -e4c , (37) +e4 +c , (37) and where ζ is a correction that takes into account the entrainment of the inner core in the li- @@ -1668,11 +1916,9 @@ rection is small and, to simplify, we neglect it here. Taking the observed libra to be 38.5 arcsec [Margot et al., 2012], Ĉ = C/MR2 = 0.3455 and C22 and ec from Table 1, -this corresponds to a ratio Cm/C = 0.4269, or equivalently Ĉm = Cm/MR -2 = 0.1475. +this corresponds to a ratio Cm/C = 0.4269, or equivalently Ĉm = Cm/MR2 = 0.1475. -For all results presented in our study, the crustal density is set at ρc = 2974 kg m -−3 [Sori , +For all results presented in our study, the crustal density is set at ρc = 2974 kg m−3 [Sori , 2018]. Our standard choice for the crustal thickness is h = 26 km [Sori , 2018], although in @@ -1680,8 +1926,7 @@ section 3.2 we also present some results with other choices of h. We have consid sible prescriptions connected to the density of the inner core. First, for all the results presented -in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρs = 8800 kg m -−3 ap- +in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρs = 8800 kg m−3 ap- proximately that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure @@ -1715,18 +1960,11 @@ ner core radius rs for each of the two inner core density scenarios: a fixed ρs the inner core is small, its presence has a limited influence on the resulting density structure, -and we find ρm = 3197 kg m -−3, ρf = 7263 kg m - -−3 and rf = 2000 km in each of the two +and we find ρm = 3197 kg m−3, ρf = 7263 kg m−3 and rf = 2000 km in each of the two -scenarios. When ρs is fixed to 8800 kg m -−3, as the inner core reaches 1500 km in size, rf in- +scenarios. When ρs is fixed to 8800 kg m−3, as the inner core reaches 1500 km in size, rf in- -creases to above 2100 km, ρm approaches 4000 kg m -−3 and ρf is reduced to below 5000 kg m - -−3. +creases to above 2100 km, ρm approaches 4000 kg m−3 and ρf is reduced to below 5000 kg m−3. Figure 3a illustrates that when adopting a fixed ρs, there is a limit in the possible inner core @@ -1834,10 +2072,7 @@ m fluid core density -CM -B - -ra +CMB ra diu s @@ -1885,8 +2120,8 @@ imation, the FCN and FICN frequencies (as seen in an inertial frame) for Kcmb = are given by ωfcn ≈ −Ωo -( +( Ā Ām + Ās @@ -1903,8 +2138,10 @@ efφm , (38a) ωficn ≈ Ωo + ( Ā+ Ās + Ā− Ās )( @@ -1917,10 +2154,10 @@ The expression of the FICN frequency involves the inertial torque (term esα1) a itational torque from the rest of Mercury (esα3αg) and the Sun (α3φs) acting on the inner core. -For both of our inner core density scenarios (and our choices of ρs = 8800 kg m -−3 and α3 = +For both of our inner core density scenarios (and our choices of ρs = 8800 kg m−3 and α3 = 0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α3αg � α1; + the gravitational torque dominates the inertial torque, in large part because of the slow rota- tion rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion @@ -1938,6 +2175,7 @@ and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approxi Confidential manuscript submitted to JGR-Planets sion for the FICN differs by a factor (Ā+Ās)/(Ā−Ās) compared to that given in Dumberry + and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon. The expression for FCN frequency differs from the usual expression for Earth. First, it @@ -1947,8 +2185,8 @@ involves the external torque from the Sun captured by the parameter φm. If we s we obtain the FCN frequency for a decoupled model in which only interior torques contribute, ωfcn,int ≈ −Ωo -( +( Ā Ām + Ās @@ -2009,6 +2247,7 @@ at the largest rs. Recall that m̃f is measured with respect to the mantle rotat coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with respect to the orbit normal is ε̃m+m̃f ≈ 6 arcmin. The reason why the obliquity of the spin + axis of the fluid core is larger than that of the mantle can be understood from Equation (32b), which shows that m̃f is determined by the resonant amplification of the FCN mode at the forc- @@ -2174,10 +2413,12 @@ Āf + Ās + (σ̄s − Ωp cos I) ) − Ās + Āc Ωoα3φs @@ -2194,7 +2435,9 @@ how the core is entrained to precess with the mantle, with the coupling between pressed in terms of the resonant amplification of the FCN and FICN frequencies. In the limit of σ̄f , σ̄s → 0, then χ = −1, C ′ = Cm, the core is fully decoupled from the mantle and we + retrieve Equation (35). If instead σ̄f , σ̄s → ∞, then χ = 0, C ′ = C and we retrieve the pre- + diction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ωp, as is the case here, resonant amplification is weak, χ is small and positive, C ′ > C and this @@ -2212,6 +2455,7 @@ misalignment of the fluid core. In Equation (41), σ̄s � σ̄f , so to a good Confidential manuscript submitted to JGR-Planets χ ≈ Āf + Āc Ωo cos I @@ -2220,9 +2464,8 @@ Āc . (42) For a small inner core, χ ≈ 7.55×10−3. As the inner core grows, Āf decreases, and the com- -bination Ācχ also decreases. This implies that C -′ decreases with inner core size and, consequently, +bination Ācχ also decreases. This implies that C ′ decreases with inner core size and, consequently, ε̃m also decreases with inner core size, as seen in Figure 4a, though it remains larger than the @@ -2243,6 +2486,7 @@ terior boundary in order to match J2 and C22, and thus different predictions for ñs. To illustrate this, we show on Figure 4 two additional predictions computed with crustal thicknesses changed to h = 16 and 36 km. The change in ε̃m remains modest, ∼ 0.025%, but + the changes in m̃f and ñs are more substantial, ∼ 5% and ∼ 10%, respectively. We also show on Figure 4a (only for h = 26 km) the obliquity of the principal moment @@ -2254,6 +2498,7 @@ if the inner core is misaligned with the mantle. As seen in the mantle frame, a (with ñs assumed small) leads to an off-diagonal component of the moment of inertia tensor of (Cs−Ās)α3ñs = Āsesα3ñs. The angle by which the mantle frame must be rotated so that + the moment of inertia of the whole planet is purely diagonal is (Āsesα3ñs)/(Āe), and hence a good approximation of ε̃g is @@ -2345,7 +2590,9 @@ ary layer remains laminar. Whether this is reasonable can be assessed by evaluat number Re = rf∆uf/ν, associated with the differential velocity ∆uf = rfΩom̃f at the CMB. For rf = 2000 km, and taking m̃f = 4 arcmin ≈ 0.001 rad from the results in the previous + section, we get ∆uf ∼ 2 mm/s and Re ∼ 6 × 109. Such a large Reynolds number indicates + that the viscous friction between the fluid core and mantle should induce turbulent flows, as is the case for the Cassini state of the Moon [Yoder , 1981; Williams et al., 2001; Cébron et al., @@ -2357,7 +2604,12 @@ pendent of the fluid viscosity and proportional to the square of the differentia coupling constant Kcmb should be in the form Kcmb = fcmb -∣∣m̃f ∣∣(0.195− 1.976i) , (45) + +∣∣m̃f + +∣∣(0.195− 1.976i +) +, (45) where fcmb is a numerical factor that depends among other things on surface roughness. In- @@ -2380,13 +2632,18 @@ by fitting a rotation model to the librations of the Moon observed by Lunar Lase [Williams et al., 2001, 2014; Williams and Boggs, 2015]. Viscous dissipation is reported in terms of a coupling parameter K and a recent estimate is K/CL = (1.41±0.34)×10−8 day−1 [Williams + and Boggs, 2015], where CL is the lunar polar moment of inertia. The connection between K and Kcmb is -∣∣∣Im[Kcmb]∣∣∣ = K +∣∣∣Im[Kcmb] +∣∣∣ = + +K CL CL + CfL 1 @@ -2395,17 +2652,22 @@ CfL , (46) where CfL is the moment of inertia of the lunar core and ΩL = 2.66 × 10−6 s−1 the lunar + rotation rate. With CfL/CL ∼ 7× 10−4 [e.g. Williams et al., 2014], this gives |Im[Kcmb]| ∼ 9×10−5. In order to match this amplitude in Equation (44a), with lunar parameters and as- + suming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10−4 m2 + s−1, about 500 times larger than the laminar viscosity. Note that the differential velocity at the CMB of the Moon is closer to 3 cm/s [Yoder , 1981; Williams et al., 2001], more than 10 times larger than our estimate for Mercury above. Since the effective turbulent coupling constant Kcmb + is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mer- cury should be smaller. Thus, ν ≈ 5×10−4 m2 s−1 gives a conservative upper bound for the + possible effective turbulent viscosity that can be expected for Mercury. Figure 5 shows how ε̃m, m̃f and ñs vary as functions of inner core radius for different choices @@ -2429,11 +2691,74 @@ cosity that we have identified above (i.e ν ≈ 5 × 10−4 m2 s−1), the infl –21– +The above parameterizations are valid only under the assumption that the flow in the bound- +ary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds +number Re = rpAuy/v, associated with the differential velocity Aur = rpQommy at the CMB. + +For rs = 2000 km, and taking mys = 4 arcmin ~ 0.001 rad from the results in the previous +section, we get Aus ~ 2 mm/s and Re ~ 6 x 10°. Such a large Reynolds number indicates +that the viscous friction between the fluid core and mantle should induce turbulent flows, as +is the case for the Cassini state of the Moon [ Yoder, 1981; Williams et al., 2001; Cébron et al., +2019]. For a boundary layer that involves turbulent flows, the viscous torque should be inde- +pendent of the fluid viscosity and proportional to the square of the differential velocity. The +coupling constant Kemp should be in the form + +Kemb = femo|tirs| (0.195 — 1.976i) . (45) + +where femp is a numerical factor that depends among other things on surface roughness. In- +corporating a viscous coupling of this form in our rotational model is more challenging not only +because femp is not known but also because the viscous torque is no longer linear in my. One +strategy is to find solutions through an iterative process. The simpler alternative strategy that +we adopt is to use the laminar formulas of Equation (44) but with the understanding that v +represents an effective turbulent viscosity. + +To give an estimate of an appropriate turbulent value for v, we turn to the Cassini state +of the Moon. A measure of the viscous dissipation at the CMB of the Moon has been obtained +by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR) +[Williams et al., 2001, 2014; Williams and Boggs, 2015]. Viscous dissipation is reported in terms +of a coupling parameter K and a recent estimate is K/C, = (1.4140.34) x10~° day~! [ Williams +and Boggs, 2015], where Cz is the lunar polar moment of inertia. The connection between KC +and Kemp is + +K Cy, 1 +Im[Ken | =A SE 46 +|Tra[Kems) Gt (46) +where C'ry is the moment of inertia of the lunar core and Q;, = 2.66 x 10-® s—! the lunar + +rotation rate. With Cr,/Cr ~ 7 x 10~4 [e.g. Williams et al., 2014], this gives |Im[Kems]| ~ +9x10~°. In order to match this amplitude in Equation (44a), with lunar parameters and as- +suming a lunar core radius of 400 km, the required turbulent viscosity is v =~ 5 x 1074+ m? + +s-', about 500 times larger than the laminar viscosity. Note that the differential velocity at the +CMB of the Moon is closer to 3 cm/s [ Yoder, 1981; Williams et al., 2001], more than 10 times +larger than our estimate for Mercury above. Since the effective turbulent coupling constant Kemp +is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mer- +cury should be smaller. Thus, v & 5x 1074 m? s~! gives a conservative upper bound for the +possible effective turbulent viscosity that can be expected for Mercury. + +Figure 5 shows how €,,, my and m, vary as functions of inner core radius for different choices + +of effective viscosities. For vy = 107° m? s~!, viscous coupling is too weak to affect €,, and + +my and they are essentially unchanged from the solutions shown in Figure 4. With increasing +v, the stronger viscous coupling between the core and the mantle reduces their differential ve- +locity, and my is reduced. With the reduced differential velocity at the CMB, the prediction + +of Em gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB +viscous coupling model is different than the one used by Peale et al. [2014], our results for é,, +and my are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the +fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent vis- +cosity that we have identified above (i.e v © 5 x 107+ m? s~'), the influence of viscous cou- + +—21-— + + Confidential manuscript submitted to JGR-Planets εm + εg mf @@ -2543,6 +2868,7 @@ ner core with the fluid core spin axis. The viscous coupling strength is inverse to rs, so a larger viscosity results in a larger inner core radius at which viscous coupling is of a similar magnitude to gravitational coupling. Taking again an upper bound of ν = 5×10−4 + m2 s−1, Figure 5 indicates that ñs may be 1 arcmin or larger only if the inner core radius is smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravi- @@ -2556,6 +2882,7 @@ The larger inner core tilt observed with increasing effective viscosity results offset between the obliquity of the principal moment of inertia ε̃g and that of the mantle ε̃m, though it remains limited. For the upper bound of ν = 5 × 10−4 m2 s−1, and for rs = 1500 + km, the difference between ε̃g and ε̃m is limited to 0.0013 arcmin. The conclusion that emerges from Figure 5 is that the larger the inner core is, the smaller @@ -2567,6 +2894,7 @@ implies that the larger the inner core is, the more we approach a planet precess body, although the misalignment of the spin axis of the fluid core remains important, approx- imately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ε̃m, m̃f + and ñs change with inner core size would certainly be different for a turbulent model of viscous coupling. But the general conclusion remains that the addition of viscous coupling at the CMB @@ -2600,6 +2928,7 @@ of the radial magnetic field Br and the electrical conductivity σ on either sid ary [Rochester , 1960, 1962, 1968]. The parametrization of EM coupling in terms of the coupling constants Kcmb and Kicb + has been developed in a few studies [e.g. Buffett , 1992; Buffett et al., 2002; Dumberry and Koot , 2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given @@ -2609,21 +2938,30 @@ by Br = 3 〈 -Bdr -〉 +Bd +r + +〉 cos θ, where + 〈 -Bdr -〉 +Bd +r + +〉 is the r.m.s. strength of the field, the coupling constant Kcmb can be written is the form Kcmb = 3(1− i)Fcmb + 〈 -Bdr +Bd + +r + 〉2 , (47) @@ -2645,6 +2983,7 @@ Fcmb = σfδf )−1 + , (48) and where σm, δm = @@ -2656,21 +2995,27 @@ and where σm, δm = 2/(σfµΩo) are the electrical conductivi- ties and magnetic skin depths in the mantle and fluid core, respectively, with µ = 4π×10−7 -N A−2 the magnetic permeability of free space. The r.m.s. field strength +N A−2 the magnetic permeability of free space. The r.m.s. field strength 〈 -Bdr -〉 +Bd + +r +〉 is connected to -the Gauss coefficient g01 of the surface magnetic field by +the Gauss coefficient g0 +1 of the surface magnetic field by 〈 -Bdr -〉 +Bd +r + +〉 = + 2√ 3 @@ -2679,27 +3024,34 @@ R rf -)3 ∣∣g01∣∣ . (49) +)3 ∣∣g0 +1 + +∣∣ . (49) + We can readily build an estimate of the amplitude of Kcmb. The electrical conductivity of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding to the CMB of Mercury is in the range of σm ∼ 0.01 − 1 S m−1 [Constable, 2015]. In con- + trast, the electrical conductivity of Fe in planetary cores is expected to be close σf ∼ 106 S -m−1 [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σmδm) -−1 � (σfδf )−1. Tak- -ing σm = 1 S m +m−1 [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σmδm)−1 � (σfδf )−1. Tak- -−1, -∣∣g01∣∣ = 190 nT for Mercury’s dipole field [Anderson et al., 2012], rf = +ing σm = 1 S m−1, +∣∣g0 -2000 km, ρf = 7000 kg m -−3, this gives Kcmb ≈ (3.1× 10−11) · (1− i). To put this amplitude +1 + +∣∣ = 190 nT for Mercury’s dipole field [Anderson et al., 2012], rf = + +2000 km, ρf = 7000 kg m−3, this gives Kcmb ≈ (3.1× 10−11) · (1− i). To put this amplitude in perspective, taking a molecular viscosity of ν = 10−6 m2 s−1 in Equation (44a) gives a vis- cous coupling constant of Kcmb ≈ (6.0× 10−7) · (0.195− 1.976i). Hence, EM coupling at the + CMB is much weaker than viscous coupling, even if we include other spherical harmonic com- ponents of the radial magnetic field. @@ -2714,16 +3066,76 @@ at the bottom of Mercury’s mantle, for instance by the upward sedimentation an of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even -in the extreme case of σm = σf = 10 -6 S m−1, Kcmb ≈ (1.6 × 10−8) · (1 − i), which remains +in the extreme case of σm = σf = 106 S m−1, Kcmb ≈ (1.6 × 10−8) · (1 − i), which remains –23– +3.4 Electromagnetic coupling + +Let us now turn to electromagnetic (EM) coupling. To focus on its role in the equilibrium +Cassini state, we set the viscous coupling back to zero. Because magnetic field lines tend to re- +main attached to electrically conducting materials, a differential tangential motion between two +electrically conducting regions stretches existing magnetic field lines that thread their interface. +This induces a secondary magnetic field (or equivalently, an electrical current) and an associ- +ated tangential EM stress resisting the differential motion. EM coupling at the CMB and ICB +acts then in a similar way to viscous coupling, and this ’magnetic friction’ depends on the strength +of the radial magnetic field B, and the electrical conductivity o on either side of the bound- +ary [Rochester, 1960, 1962, 1968). + +The parametrization of EM coupling in terms of the coupling constants Kemp and Kicp +has been developed in a few studies [e.g. Buffett, 1992; Buffett et al., 2002; Dumberry and Koot, +2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given +by B, = V3 (Be) cos @, where (Be) is the r.m.s. strength of the field, the coupling constant +Kemp can be written is the form + +Kemb = 3(1 _ t)Femb (Be)” ’ (47) + +where + + + +1 1 1 \7 +Femb = ( + ) 5 (48) + +Qoprre OmOm oof + +and where Om, Om = \/2/(OmpQo) and of, df = v/2/(ofHQo) are the electrical conductivi- +ties and magnetic skin depths in the mantle and fluid core, respectively, with pp = 40 x 1077 + +N A~? the magnetic permeability of free space. The r.m.s. field strength (Be) is connected to +the Gauss coefficient g? of the surface magnetic field by + +ay 2 (RY) +(at) = = (F) 9? | - (49) + +We can readily build an estimate of the amplitude of Kemp. The electrical conductivity +of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding +to the CMB of Mercury is in the range of om ~ 0.01 — 1S m~! [Constable, 2015]. In con- +trast, the electrical conductivity of Fe in planetary cores is expected to be close af ~ 10° S +m7! [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (0m6m)~' >> (ofdf)~. Tak- +ing om = 1S mt, |g?| = 190 nT for Mercury’s dipole field [Anderson et al., 2012], rp = +2000 km, p¢ = 7000 kg m~?, this gives Kemp © (3.1 x 107!) -(1—1). To put this amplitude +in perspective, taking a molecular viscosity of y = 10~° m? s~! in Equation (44a) gives a vis- +cous coupling constant of Kemp © (6.0 x 1077) - (0.195 — 1.9767). Hence, EM coupling at the +CMB is much weaker than viscous coupling, even if we include other spherical harmonic com- +ponents of the radial magnetic field. + +EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by +CMB cavities [Buffett, 2010; Glane and Buffett, 2018], in which case the effective o,, could be +closer to af. Likewise, a, can be increased if a more electrically conducting layer has formed +at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction +of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even +in the extreme case of om = of = 10° S m7!, Kemp © (1.6 x 1078) - (1 — 7), which remains + +—23-— + + Confidential manuscript submitted to JGR-Planets smaller by a factor ∼ 60 than the smallest possible viscous coupling constant. Viscous forces + dominate the tangential stress on the CMB of Mercury. At the ICB, because we can expect the electrical conductivity in both the solid inner core @@ -2737,6 +3149,7 @@ morphology at the ICB is dominantly comprised of small spatial scales for exampl by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in terms of an equivalent uniform radial magnetic field 〈Br〉 capturing its r.m.s. strength [Buf- + fett et al., 2002; Dumberry and Koot , 2012]. Assuming an electrical conductivity σ equal in the fluid and solid core, the coupling constant Kicb can be written in the form @@ -2759,9 +3172,11 @@ and where δ = √ 2/(σµΩo) is the magnetic skin depth. As Ficb is inversely proportional to + rs, Kicb is inversely proportional to inner core size. Note that computing the EM coupling based on the r.m.s. strength 〈Br〉 rather than a true field morphology tends to overestimate the strength + of the coupling [Koot and Dumberry , 2013]. However, since the strength of the radial magnetic field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are @@ -2773,32 +3188,44 @@ The parametrization of Equation (50) is only valid in a ’weak field’ regime 2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected. When 〈Br〉 is sufficiently large, this is no longer the case. EM coupling then enters a ’strong + field’ regime [Buffett et al., 2002; Dumberry and Koot , 2012; Koot and Dumberry , 2013] in which Kicb increases linearly with 〈Br〉 instead of quadratically. A good approximation of Kicb cal- + culated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012], -KEicb = (0.175− i0.138) 〈Br〉 , (52) +KE +icb = (0.175− i0.138) 〈Br〉 , (52) where 〈Br〉 is in units of Tesla. The superscript E emphasizes that the numerical factors are + appropriate for the parameter values adopted for Earth in the computation of Dumberry and Koot [2012]. To adapt these numerical factors to Mercury, we write, Kicb = (0.175− i0.138) Ficb -FEicb + +FE +icb 〈Br〉 , (53) -where FEicb is defined as in Equation (51) but using the parameters for Earth as defined in Dumb- +where FE +icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumb- + erry and Koot [2012]. These are Ωo = 7.292 × 10−5 s−1, ρs = 12846 kg m−3, rs = 1221.5 -km, σ = 5× 105 S m−1, which gives FEicb = 90.36 T−2. + +km, σ = 5× 105 S m−1, which gives FE +icb = 90.36 T−2. To compute Ficb, we assume an electrical conductivity of σ = 106 S m−1 in the core of + Mercury [e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and strong field regime occurs when 〈Br〉 ≈ 1.53 mT for the real part of Kicb. 〈Br〉 at the ICB + of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geom- etry inside the core could be dominated by small length scales, yet only the weaker lower har- @@ -2807,6 +3234,74 @@ monics of the field would penetrate through a thermally stratified layer in the –24– +smaller by a factor ~ 60 than the smallest possible viscous coupling constant. Viscous forces +dominate the tangential stress on the CMB of Mercury. + +At the ICB, because we can expect the electrical conductivity in both the solid inner core +and fluid core to be similar, and because the radial magnetic field is likely much stronger, EM +coupling can be much larger and dominate viscous coupling. We assume that the magnetic field +morphology at the ICB is dominantly comprised of small spatial scales for example as predicted +by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in +terms of an equivalent uniform radial magnetic field (B,) capturing its r.m.s. strength [Buf- +fett et al., 2002; Dumberry and Koot, 2012]. Assuming an electrical conductivity o equal in the +fluid and solid core, the coupling constant K;., can be written in the form + + + +5 . +Kies = 71 i) Fien (Br)” (50) +where +exe) +F,,= 51 +icb Q6Psrs ’ ( ) +and where 6 = \/2/(o~Q,) is the magnetic skin depth. As F;.» is inversely proportional to + +rs, Kicp is inversely proportional to inner core size. Note that computing the EM coupling based +on the r.m.s. strength (B,) rather than a true field morphology tends to overestimate the strength +of the coupling [Koot and Dumberry, 2013]. However, since the strength of the radial magnetic +field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are +absorbed in the range of possible (B,.) values. + +The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al., +2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected. +When (B,) is sufficiently large, this is no longer the case. EM coupling then enters a ’strong +field’ regime [Buffett et al., 2002; Dumberry and Koot, 2012; Koot and Dumberry, 2013] in which +Kip increases linearly with (B,.) instead of quadratically. A good approximation of K;j,y cal- +culated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012], + +KE, = (0.175 — i0.138) (B,) , (52) + +ich —_ + +where (B,.) is in units of Tesla. The superscript E emphasizes that the numerical factors are +appropriate for the parameter values adopted for Earth in the computation of Dumberry and +Koot [2012]. To adapt these numerical factors to Mercury, we write, + + + +Fic + +Kies = (0.175 — 10.138) 2 (B,) , (53) +Fic + +where FE, is defined as in Equation (51) but using the parameters for Earth as defined in Dumb- + +erry and Koot [2012]. These are Q, = 7.292 x 107° s-!, ps = 12846 kg m~3, rz = 1221.5 + +km, o = 5 x 10° S m~!, which gives F%, = 90.36 T~?. + +icb + +To compute F;.p, we assume an electrical conductivity of ¢ = 10° S m7! in the core of +Mercury |e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and +strong field regime occurs when (B,) ~ 1.53 mT for the real part of K;.5. (B,) at the ICB +of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geom- +etry inside the core could be dominated by small length scales, yet only the weaker lower har- +monics of the field would penetrate through a thermally stratified layer in the upper region of + +—24— + + Confidential manuscript submitted to JGR-Planets @@ -2814,7 +3309,9 @@ Confidential manuscript submitted to JGR-Planets the fluid core and reach the surface. If so, the field strength inside the core can exceed the sur- face field strength by a factor 1000. Taking a surface field strength equal to ∼ 300 nT [e.g An- + derson et al., 2012], 〈Br〉 at the ICB could be as large as 0.3 mT, corresponding to approxi- + mately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mer- cury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of @@ -2824,6 +3321,7 @@ Mercury remains in the weak field regime. Figure 6 shows how ε̃m, m̃f and ñs vary as functions of inner core radius for different choices of 〈Br〉. The larger 〈Br〉 is, the stronger is the EM coupling at the ICB, and the smaller is the + differential rotation between the fluid core and inner core. The inner core and fluid core are vir- tually locked into a common precession motion when 〈Br〉 > 0.3 mT. Further increasing 〈Br〉 @@ -2831,10 +3329,13 @@ above 1 mT does not change the solution as EM coupling already dominates all oth on the inner core. This is the case even when EM coupling transitions into the strong field regime. -EM coupling at the CMB is included in these calculations, with σm = 1 S m -−1 and +EM coupling at the CMB is included in these calculations, with σm = 1 S m−1 and +∣∣g0 + +1 + +∣∣ = -∣∣g01∣∣ = 190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core we retrieved the solutions of ε̃m and m̃f shown in Figure 4. @@ -2876,6 +3377,7 @@ with inner core size, χ gets smaller, and so do C ′ and ε̃m. The mantle obl arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015 arcmin. For an inner core larger than ≈ 1000 km, ĀcΩp cos I < ĀsΩoα3φs, so χ becomes neg- + ative, C ′ becomes smaller than the moment of inertia of a rigid Mercury C, and ε̃m becomes smaller than the prediction based on a rigid planet. @@ -2902,6 +3404,67 @@ Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of –25– +the fluid core and reach the surface. If so, the field strength inside the core can exceed the sur- +face field strength by a factor 1000. Taking a surface field strength equal to ~ 300 nT [e.g An- +derson et al., 2012], (B,) at the ICB could be as large as 0.3 mT, corresponding to approxi- +mately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mer- +cury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of +Mercury remains in the weak field regime. + +Figure 6 shows how ém, my and mr. vary as functions of inner core radius for different choices +of (B,). The larger (B,) is, the stronger is the EM coupling at the ICB, and the smaller is the +differential rotation between the fluid core and inner core. The inner core and fluid core are vir- +tually locked into a common precession motion when (B,) > 0.3 mT. Further increasing (B,-) +above 1 mT does not change the solution as EM coupling already dominates all other torques +on the inner core. This is the case even when EM coupling transitions into the strong field regime. +EM coupling at the CMB is included in these calculations, with ao, = 1S m7! and | 9) | = +190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core +we retrieved the solutions of €,, and my shown in Figure 4. + +As the inner core radius is increased, both €,, and my get smaller, as it was the case with +viscous coupling alone, although the addition of EM coupling lead to more substantial changes. +The inner core needs to be larger than approximately 500 km for changes in the Cassini state +equilibrium to be noticeable. It is important to point out that my is reduced not because of +EM coupling at the CMB, but rather from the combination of EM coupling at the ICB, which +pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the +inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the +greater is the reduction in €,, and mf. + +When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are +locked into a common precession motion, a good approximation of €,, is given by the same pre- + +diction as Equations (39-40) involving the effective moment of inertia C’, except y is now given +by + +_ AQy cos I — A,Q.a3¢s + ApQoler + Kem) + AsQo€s03Aq — AcQ» cos I + + + +x (54) +For a small inner core, AQ, cosI > A,Q,a3¢, and x is positive. Because A,Q,03¢, increases +with inner core size, y gets smaller, and so do C’ and €,,. The mantle obliquity drops from 2.049 +arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015 +arcmin. For an inner core larger than ~ 1000 km, AQy cosI < A,Q,a3¢s5, so x becomes neg- +ative, C’ becomes smaller than the moment of inertia of a rigid Mercury C, and é,, becomes +smaller than the prediction based on a rigid planet. + +The larger the inner core is, the smaller are the misalignments of the fluid and solid cores +with respect to the mantle. Hence, the general conclusion we reached for viscous coupling alone +is not altered with the addition of EM coupling but further strengthened; the larger the inner +core is, the closer we approach a planet precessing as a rigid body. This is best revealed by the +obliquity of the gravity field €, which, for a large inner core, asymptotically approaches the oblig- +uity expected for a rigid planet. Note that with strong EM coupling at the ICB, the offset be- +tween €,, and &, can be as large as 0.008 arcmin for a large inner core. + +3.5 Fixed inner core density versus fixed ICB density contrast + +Coupling models when viscous and EM stresses are both present have been presented in +Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of our results, + +—25— + + Confidential manuscript submitted to JGR-Planets @@ -3020,6 +3583,7 @@ We choose an effective viscosity at the CMB of ν = 10−4 m2 s−1, which we be representative value given the comparison with the Moon (see section 3.3). We take a radial field strength at the ICB of 〈Br〉 = 0.3 mT, approximately the field strength expected under + the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representa- tive’ coupling model, although the uncertainty on ν and 〈Br〉 obviously remains high. @@ -3166,24 +3730,13 @@ Figure 7. a) Obliquity of the mantle (ε̃m, solid lines) and gravity field (ε 8800 kg m−3 (black lines) and for different choices of α3 (coloured lines). -i′m, i -′ -f and i - -′ -s; these represent the obliquities with respect to the orbital plane and are connected +i′m, i′f and i′s; these represent the obliquities with respect to the orbital plane and are connected -to our variables by: i′m = ε̃m, i -′ -f = ε̃m+m̃+m̃f ≈ ε̃m+m̃f and i′s = ε̃m+ ñs. To summarize +to our variables by: i′m = ε̃m, i′f = ε̃m+m̃+m̃f ≈ ε̃m+m̃f and i′s = ε̃m+ ñs. To summarize -their results, i′f and i -′ -s vary substantially for different inner core sizes, are always of compara- +their results, i′f and i′s vary substantially for different inner core sizes, are always of compara- -ble amplitude, and i′s is always larger than i -′ -f . Furthermore, they find that as the inner core +ble amplitude, and i′s is always larger than i′f . Furthermore, they find that as the inner core size is increased, the mantle obliquity i′m gets progressively larger and is displaced further away @@ -3192,7 +3745,9 @@ from its expected orientation based of a rigid planet (see their Figure 6). The obtain between a case with no inner core and an inner core radius equal to 0.6 times the plan- etary radius (≈ 1463 km, close to the maximum inner core size of 1500 km we have considered), + is approximately an increase of 5 × 10−5 rad = 0.17 arcmin. This also corresponds approxi- + mately to the deviation of the obliquity with respect to that of a rigid planet. When only viscous stress is included in our model (section 3.3), our results are substan- @@ -3231,6 +3786,104 @@ be more strongly aligned with the mantle. The more strongly the inner core and m –27– + + +— Ps=8800kgm3 G3: —— 0.20 0.15 —010 —005 —0.01 + +164 +2.050 4] A L + +4.04 + + + + + + + + + += Em 354 +& 2.046 4 __¢ L + +5 g 3.04 +& 2.044 4 L + +© J +& 2.042 4 L 25 + +Em for a rigid planet + + + +Obliquity angle (arcmin +nm +° + +Obliquity a +ie) np +oa oOo +& 8 +3 + + + + + + + + + + + + + + + +2.036 + +2.034 4 L 0.54 +2.032 , , , t 0.0 + t 1 +0 200 400 600 800 1000 1200 1400 0) 200 400 600 800 1000 1200 1400 +Inner core radius (km) Inner core radius (km) + +Figure 7. a) Obliquity of the mantle (€, solid lines) and gravity field (€,, dashed lines) b) my +(solid lines) and 7s (dashed lines) as a function of inner core radius, for a fixed inner core density of + +8800 kg m~® (black lines) and for different choices of a3 (coloured lines). + +vas ap and i; these represent the obliquities with respect to the orbital plane and are connected +to our variables by: 7), = Em, ue =EmtM+Ms & Em +My and i, =Em+n,. To summarize +their results, ue and i, vary substantially for different inner core sizes, are always of compara- +ble amplitude, and 7, is always larger than i. Furthermore, they find that as the inner core +size is increased, the mantle obliquity 7/,, gets progressively larger and is displaced further away +from its expected orientation based of a rigid planet (see their Figure 6). The change in i/,, they +obtain between a case with no inner core and an inner core radius equal to 0.6 times the plan- +etary radius (* 1463 km, close to the maximum inner core size of 1500 km we have considered), +is approximately an increase of 5 x 107° rad = 0.17 arcmin. This also corresponds approxi- +mately to the deviation of the obliquity with respect to that of a rigid planet. + +When only viscous stress is included in our model (section 3.3), our results are substan- +tially different. As illustrated in Figure 4, we find instead that the obliquity of the fluid core +gets smaller with inner core size and that the change is very modest. In contrast with the re- +sults of Peale et al. [2016], we find that the inner core obliquity is typically smaller than that +of the fluid core, except when the inner core is very small or when the effective viscosity is un- +reasonably large. We also find that as the inner core size is increased, the mantle obliquity gets +smaller, opposite to the results of Peale et al. [2016], and that the changes remain small, at most +of the order of 0.005 arcmin. A part of the difference is due to the different viscous coupling +model that we use. But even when we adopt their model parameters and use their viscosity model, +we were not able to reproduce their results. + +In the absence of viscous and EM coupling, the strong gravitational torque exerted on the +inner core by the mantle should prevent any large misalignment between the two. This is cap- +tured by the period of the FICN, which is of the order of 100 yr, much shorter than the forc- +ing period of 325 kyr. Viscous and/or EM coupling at the ICB can counteract the gravitational +torque (and alter the period of the FICN), but only for a small inner core. The ratio of the viscous- +EM torque to the gravitational torque decreases with inner core size, so a large inner core should +be more strongly aligned with the mantle. The more strongly the inner core and mantle are + +—27-— + + Confidential manuscript submitted to JGR-Planets @@ -3273,6 +3926,7 @@ est changes of the mantle obliquity εm compared to the obliquity predicted on t entirely rigid planet (εrm). Let us denote this difference as ∆εm = εm−εrm. The largest ∆εm occurs for a small or no inner core, and is ∆εm ≈ 0.01 arcmin. This difference is decreased + as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM coupling and large density contrast at the ICB, ∆εm can be negative, but its absolute value @@ -3298,12 +3952,15 @@ Nevertheless, our results show that the presence of a fluid core and inner core resulting mantle obliquity by as much as 0.01 arcmin. This is of the same order as the change in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec (≈ 0.006 + arcmin) [Baland et al., 2017]. This is also of the same order as the amplitude of the nutation motion about the mean equilibrium Cassini state forced by the precession of the pericenter, which is approximately 0.85 arcsec (≈ 0.014 arcmin) [Baland et al., 2017]. The precision on the obliq- + uity from the upcoming BepiColombo satellite mission is expected to be ≤ 0.5 arcsec (≤ 0.008 + arcmin) [Cicalò et al., 2016]. Thus, in addition to including tidal deformation and the preces- sion of the pericenter, a Cassini state model that includes a fluid and solid core will then be @@ -3331,15 +3988,20 @@ tle. Since gravitational coupling prevents a large inner core tilt with respect Confidential manuscript submitted to JGR-Planets find that the misalignment ∆εg = εg − εm is limited. The maximum offset that we obtain + is approximately ∆εg ≈ 0.007 arcmin. This limited magnitude of offset is important in the + light of the recent obliquity of the gravity field estimated in Genova et al. [2019], εg = 1.968± 0.027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the spin-symmetry axis of the mantle: εm = 2.04 ± 0.08 arcmin [Margot et al., 2012] and εm = + 2.029±0.085 arcmin [Stark et al., 2015a], although all three measurements remain consistent + with one another within their error estimates. In their interpretation, Genova et al. [2019] sug- gest that the different central value of the obliquity that they obtain (smaller by ∼ 0.07 ar- + cmin) is perhaps explained by an offset ∆εg due to the presence of a (possibly large) solid in- ner core. However, this is one order of magnitude larger than the maximum magnitude of ∆εg @@ -3364,6 +4026,7 @@ lar to the Cassini plane [e.g Peale et al., 2014]. Indeed, the two measurements ing surface topographic features from Margot et al. [2012] and Stark et al. [2015a] suggest that the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼ 0.03 arcmin). + Although this offset is smaller than the measurement errors, so that the observed obliquity is still consistent with no deviation away from the Cassini plane, some amount of dissipation in- diff --git a/read/results/tika/2201.00069.txt b/read/results/tika/2201.00069.txt index edb4cfb..1a05855 100644 --- a/read/results/tika/2201.00069.txt +++ b/read/results/tika/2201.00069.txt @@ -18,27 +18,6 @@ - - - - - - - - - - - - - - - - - - - - - @@ -518,6 +497,138 @@ uum sources in the MeerKAT observations with the FRB loca- MNRAS 000, 1–15 (2021) https://github.com/e-merlin/eMERLIN_CASA_pipeline +4 = Chibueze et al. + +2.2 e-MERLIN Observations + +To constrain the position of the persistent continuum emission +associated with FRB 20190714A, we conducted L-band (centre +frequency of 1.51 GHz) observations of the target with the en- +hanced Multi-Element Remote-Linked Interferometer Network, e- +MERLIN array in the United Kingdom (project code: CY 10003) +on 13 January, 2021 (see Section 3.1.2). Six antennas were used +including the 75-m Lovell telescope and the target pointing cen- +tre was R.A. = 12/15'55%.12, Dec. = —13°01/15!’7. 1407+2827 +was used as the bandpass calibrator, 1331+3030 as the flux cal- +ibrator and 1216-1033 as the phase calibrator. The angular sep- +aration between the target and the phase calibrator is 2.47°. The +data reduction was done following standard e-MERLIN calibra- +tion procedures with additional flagging of bad visibilities fol- +lowed by imaging. We found two confusing sources in the field, +at R.A. = 12715'445 669, Dec. = —12°57/59/’56 and R.A. = +12" 15378 216, Dec. = —13°09/33/'44 at 4.1’ and 9.4’ from the +pointing centre, respectively. They had apparent flux densities of 4 +and 1.3 mJy without primary beam correction. We used these for +self-calibration of the field and then subtracted them before final +imaging. The final image synthesized beam is 0°65 x 0/15, posi- +tion angle 15° elongated in the Declination direction due to the low +target elevation from the UK. + +2.3 The Swift satellite. UVOT and XRT observations + +Neil Gehrels Swift Observatory (Swift) is a multi-wavelength NASA +space mission operating in soft-X-rays and optical/UV. Here we +use data from the X-ray Telescope (XRT) (Burrows et al. 2005) +which operates in the soft X-ray domain of 0.3 — 10 keV as well as +data taken by the UV/Optical Telescope (UVOT) (Roming et al. +2005) operating in the UV to optical domain (170 — 600 nm). +During the FRB 20171019A multi-wavelength (MWL) observing +campaign, two 2 ks target-of-opportunity (ToO) observations were +performed with Swift from 2019-09-28 18:37:02 to 2019-09-28 +21:52:54 and 2019-10-18 18:03:00 to 2019-10-18 20:03:00 on the +FRB 20171019A localisation region. Simultaneously with Swift- +XRT, five UVOT images were taken with the UVM2 filter (central +wavelengh = 2246 A) over the 2 epochs with a total exposure of 4 ks. +The images are aspect-corrected and summed with the uvotimsum +tool (HEASOFT 6.26). Observations were performed with Swift- +XRT in the standard Photon Counting observing mode (PC). The +XRT PC data are processed with xrtpipeline (HEASOFT 6.26). +A summed image is extracted with xselect. + +2.4 Very-high energy gamma-ray observations with H.E.S.S. + +Observations of FRB 20171019A were also obtained in the very- +high energy gamma-ray domain with the H.E.S.S. imaging atmo- +spheric Cherenkov telescope array, sensitive in the range between a +few tens of GeVs and 100 TeV. H.E.S.S. is located on the Khomas +Highland plateau of Namibia (23°16’18’ South, 16°30’00” East), +at an elevation of ~1800 m above sea level. Observations took place +contemporaneously to the first epoch of MeerKAT observations of +FRB 20171019A described above. The data set was obtained with +the H.E.S.S. phase II array, including the upgraded 12 m-diameter +CT 1-4 telescopes (Ashton et al. 2020) and the large 28 m-diameter + +6 https://github.com/e-merlin/eMERLIN_CASA_pipeline + +CT5 telescope (Bolmont et al. 2014). A standard data quality selec- +tion was applied to the data (Aharonian et al. 2006). The events have +then been selected and their direction and energy reconstructed us- +ing a log-likelihood minimization comparing the recorded shower +images of all triggered telescopes (requiring at least two telescopes +to see the same gamma-ray event) to a semi-analytical model of air +showers (de Naurois & Rolland 2009). + +We define a circular region-of-interest centered on the position +of FRB 20171019A with a radius of 0.12°, optimal for a point-like +source of emission as expected from FRB 20171019A. The back- +ground level in this ON region was determined using the standard +“ring background” technique (Berge et al. 2007) based on a radially +symmetric ring around the source position. This technique allows us +to derive the background level from the same field of view and as- +sures that the gamma-ray signal and background are estimated with +the same acceptance and under the same observation conditions. + +3 RESULTS +3.1 MeerKAT + +The theoretical thermal noise of the MeerKAT can be calculated as + +1 SEFD +SS (1) + +Stms = : +Me [ropot x N(N - 1) xX Av X tint + +The system equivalent flux density (SEFD) of MeerKAT at the +1.28 GHz is 443 Jy and 77- is the correlator efficiency. We used nyo} += 2 polarisation products (XX and YY), N = 64 telescopes, Av = +856 MHz bandwidth and fy, = 21600 sec observing time for one +epoch. This gives the theoretical rms of ~ 2 Jy beam~!. The typical +image rms obtained from our residual images is ~5 Jy beam@!, +which is 2.5 times the expected theoretical rms. The wideband MFS +image does not allow primary beam correction procedure as this can +only be done on the sub-band images with limited rms for detection +of the sources. However, our sources are the phase centres of our +fields and thus unaffected by the effect of the primary beam. + +Due to the lack of MeerKAT primary beam correction, we +did not compare the flux densities of the discrete sources with +their NRAO (National Radio Astronomy Observatory) VLA (Very +Large Array) Sky Survey (NVSS) counterparts. However, Chibueze +et al. (2021, submitted) confirmed that the overall flux densities +obtained with MeerKAT and NVSS are in good agreement with +each other within errors of ~ 5%. We compared the astrometry of +the discrete radio sources obtained with MeerKAT and NVSS in +Figure 1. The position uncertainty of the MeerKAT ranges from +0/’2 (close to the centre of the primary beam) to a few arcseconds +towards the edge of the primary beam. The scatter observed in +Figure 1 is mostly due to the probability of the centroids of emission +in the ~45’” NVSS resolution being different from the centroids at +MeerKAT’s resolution and partly due to higher position uncertainty +of the fainter sources. Therefore, we conclude that our MeerKAT +data are well calibrated and the flux density and astrometry are as +accurate as the errors indicate. + +3.1.1 Looking for persistent continuum emission associated with +the FRB fields + +Considering the results of the astrometric comparison with NVSS +(see Figure 1), we considered potential associations of contin- +uum sources in the MeerKAT observations with the FRB loca- + +MNRAS 000, 1-15 (2021) + + MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs 5 @@ -656,6 +767,140 @@ https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3pimms/w3pimms.pl https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3pimms/w3pimms.pl https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3nh/w3nh.pl https://heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3nh/w3nh.pl +MeerKAT, e-MERLIN, Swift and H.E.S.S., observations of three localised FRBs — 5 + +tion to sources within 5’’. Using this spatial coincidence criterion, +we identified a persistent 1283 MHz continuum source near FRB +20190714A, detected in both the 14 September 2019 and the 28 +September 2019 epoch. The peak of the MeerKAT radio emission +is offset by ~ 2’’.1 from the peak of the i-band magnitude of the op- +tical galaxy identified in the Panoramic Survey Telescope and Rapid +Response System (PanSTARRS, located at Haleakala Observatory) +image (shown as contours in Figures 2 and 3). The MeerKAT ra- +dio source is offset by 1/’68 from the localisation region of FRB +20190714 (cyan circle in Figures 2 and 3). + +3.1.2. e-MERLIN detection of compact emission towards +FRB 20190714 + +Compact persistent emission was detected in the 1.51 GHz e- +MERLIN image at R.A. = 12"15’"55.116, Dec. = —13°01/14/’48 +at 86 uJy beam! by e-MERLIN. The stochastic position uncer- +tainty is (0.04, 0.15) arcsec and the uncertainty (due to the sepa- +ration between phase-calibrator and target, and antenna position +uncertainty) is (0.013, 0.056) arcsec, giving a total astrometric +uncertainty of (0.04, 0.16) arcsec in R.A. and Dec., respectively. +The offset from the FRB position is negligible in R.A. and 1.2 +arcsec in Dec. The rms in this region (of full primary beam sen- +sitivity) is 20 wy beam!, making this a 4.30;ms detection. It is +~1.50;ms higher than that of the MeerKAT detection. Although the +e-MERLIN fiux scale nominal uncertainty is ~5%, in these data it +is possibly higher due to the low declination of the phase-reference +source and to the strong RFI which were removed from the data +but may have affected the linearity of the receiver response. The +peak of the e- MERLIN radio emission is offset by ~ 1°’4 from the +peak of the PanSTARRS i-band emission in Figures 2 and 3. The +e-MERLIN radio source (shown by the cyan cross in Figures 2 and +3) is offset by 0’’53 from the localised position of FRB 20190714. + +We estimate the probability of a chance alignment of a back- +ground persistent radio source and the host galaxy, following the +procedure of Eftekhari et al. (2018). Instead of using the FRB lo- +calisation region, we use the area of the galaxy, which is taken as +2” x 2’’, twice the half light radius from Heintz et al. (2020). Given +the source has a flux density of ~ 90uJy we estimate the chance +alignment probability of 0.0008, which corresponds to 3.40. The +flux density threshold, assuming 3c, for an unresolved radio source +is ~ 15 wJy. If instead we consider the probability of detecting any +radio source above our flux density threshold of 15 Jy, the probabil- +ity of a chance alignment is, therefore, approximately 0.8%, making +the statistical significance of our detection 2.60. This represents the +first detection of radio continuum emission associated with the host +(galaxy) of FRB 20190714A (see Figure 2 and 3). + +3.1.3. MeerKAT non-detections + +No continuum emission was detected near FRBs 20171019A and +20190711A. As each of the images of these sources has an rms +of ~5uJy beam™!, the 3 intensity upper limit of any emission +associated with FRBs 20171019A and 20190711A will be ~ 15 wJy +beam! (see Table 1). + +Candidate pulses above a signal-to-noise (S/N) of 10 from the +single pulse search with MeerTRAP were visually inspected offline. +No new FRBs or repeat bursts from the known FRBs were detected +above a fluence threshold of 0.08 Jy ms assuming a 1 ms duration +burst. + +MNRAS 000, 1-15 (2021) + +3.2 Swift + +The UVOT summed image is presented in Figure 4. The UVOT +field of view corresponds roughly to the uncertainty’ of the locali- +sation region of FRB 20171019A (RA = 7.5’and DEC = 7’). Using +uvotdetect, we find 30 sources above the 5c level and within the +FRB 20171019A uncertainty region. Using a 3 arcsec maximum +separation, which is slightly larger than the UVOT PSF (Breeveld +et al. 2010), these sources are cross-matched with known catalogue +sources. We find that out of the 30 sources detected by UVOT, 28 +are spatially coincident with stars catalogued in the SDSS catalogue +(DR12; Alam et al. 2015), and one source is coincident with a galaxy +(AGN broadline SDSS ID: 1237652599570890948 at z ~ 0.156). +This galaxy is also detected by the MeerKAT radio observations. We +use the NASA/IPAC Extragalactic Database (NED)® to search for +known galaxies in the FRB 20171019A uncertainty regions. We find +multiple galaxies with unknown redshifts, therefore we cannot draw +conclusions on the host galaxy from our observations. Using a 50’” +circular ON region centred on the position of FRB 20171019A and +a50” OFF region that does not contain any of the detected sources, +we run the uvotsource tool with a 5a background threshold and +obtain a flux upper limit of 1.4 x 10-16 erg cm? s~!A-! without +applying a Calactic extinction correction. + +The XRT summed image is shown in Figure 5. At the edge +of the field-of-view, we detect a source spatially coincident with +the Wolf 1561 star. As we consider this source unrelated to the +FRB, we use the online Swift-XRT data products generator (Evans +et al. 2007) (Evans et al. 2009) to derive upper limits in the 0.3- +10 keV range on the count rate of 0.001885 counts.s~!. Using +WebPIMMS? (v4.11a) and assuming a weighted average Ny = 5.12 +1029 cm-? from the direction of the source estimated from the +NASA’s HEASARC !° online tools (HI4PI Collaboration et al. +2016) and a power law model with a photon index = 2, this upper +limit translates to an energy flux of 6.6 x 10-14 erg em? s~! (8.3 x +10714 erg cm~ s7! unabsorbed). + +3.3. H.E.S.S. + +No significant gamma-ray excess above the expected background +is detected from the direction of FRB 20171019A, with 52 gamma +candidate events from the source region and 524 background event. +A second analysis using an independent event calibration and recon- +struction (Parsons & Hinton 2014) confirms this result. A search for +variable emission on timescales ranging from milliseconds to sev- +eral minutes with tools provided in (Brun et al. 2020) does not reveal +any variability above 2.2 o-. For the total data set of 1.8 h, 95% confi- +dence level (C. L.) upper limits on the photon flux are derived using +the method described by Rolke et al. (2005). The energy threshold +of the data is highly dependent on the zenith angle of the observa- +tions. For these observations, the zenith angles range from 15 to 25 +deg, which leads to an energy threshold for the stacked data set of +Ew, = 120 GeV. The upper limit on the Very High Energy (VHE) + +7 https://www.wis-tns.org/object/20171019a + +8 https://ned.ipac.caltech.edu; NED is funded by the National +Aeronautics and Space Administration and operated by the California Insti- +tute of Technology + +9 https: //heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3pimms/ +w3pimms.pl + +10 https: //heasarc.gsfc.nasa.gov/cgi-bin/Tools/w3nh/w3nh. +pl + + 6 Chibueze et al. @@ -708,6 +953,101 @@ MeerKAT flux and is unresolved on the e-MERLIN baselines. The MNRAS 000, 1–15 (2021) +6 = Chibueze et al. + + + +10.0 T T T T T T +7.54 5 4 +9 +O +5.0+ : +O +° +2.5 ° += . ho oO _ +% " eo +UO DOD @& Oo +Oo Oo +© oF 0 0% | +O +@ +2 os ° " +” o 00 +O +O +-5.0+ O 4 + +—7.5 + +T + + + +—10 | | + + + + + +1 +-10.0 —-7.5 —5.0 -2.5 + +| +2.5 5.0 7.5 10.0 + +AR.A. (arcsec) + +Figure 1. Astrometric comparison between MeerKAT and NVSS discrete compact sources.The open circles represent the difference in position between the + +MeerKAT and NVSS sources. + +gamma-ray flux above that threshold and assuming an energy depen- +dence following E~? is ®(E > 120GeV) < 2.10x 107!2 cm=? s7! +or ®(E > 120GeV) < 1.7 x 107! ergem~?s7!. A variation of ++ 0.5 of the assumed spectral index leads to a variation in the upper +limit of less than + 19%. A map of energy flux upper limits covering +the full region accessible within the H.E.S.S. field of view above +120 GeV is given in Figure 6. + +4 DISCUSSION + +Of the targeted FRB fields reported here, only FRB 20190714A +is observed to be spatially coincident with a persistent radio con- +tinuum source. We obtain an upper limit of ~ 15 uJy beam™! for +FRBs 20190711A and 20171019A, respectively, and a peak inten- +sity of ~ S53 wy beam7! for the emission coincident with FRB +20190714A. This source is detected at both epochs with similar +intensities within the measured rms of the images (see Tables 1 and +2 for details). The values in the Table 2 are derived by carrying +out 2D Gaussian fit using similar ellipses enclosing the detected +persistent emission. The average flux density is ~ 3 times less than + +that of the persistent source associated with FRBs 20121102A, one +of the most prolific repeaters, located at z = 0.19273(8). Persistent +radio emission from FRB 20201124A was detected by the UGMRT +(Wharton et al. 2021) and the JVLA (Ricci et al. 2021) on angular +scales of a few arcseconds. However, it is resolved out at scales of +~ 0.1 arcseconds with the European VLBI Network (Marcote et al. +2021) suggesting that it is not a compact source directly associated +with the FRB. In contrast, the other localised, prolific repeating +FRB 20180916A has no persistent radio counterpart. + +In the image in Figure 3 one can see that the persistent radio +source lies at the edge of the optical extent of the host galaxy +as seen in PanSTARRS observations (Heintz et al. 2020). Our +derived 1283 MHz peak position with MeerKAT places it just +1’’68 away from the position of FRB 20190714A (@;2000, 6.72000 += 12/15558 12, -13°01'15’’70; Heintz et al. 2020). The posi- +tional uncertainty on the FRB position is 0’’283. Similarly, the peak +1.51 GHz e-MERLIN position of the persistent radio source is sepa- +rated from the position of FRB 20190714A by 0°’53. The persistent +source near FRB 20190714 r0 +a(log r − log r0)c + log(Υ0) r > r0 (5) Article number, page 5 of 12 @@ -1350,6 +1331,7 @@ der to smooth out the numerical artifacts, the three-dimensional imums (identified as the best-fitting models) and 1, 2, 3σ con- fidence levels which for three parameters correspond to ∆χ2 = + 3.53, 8.02, 14.2 (Press et al. 1992). 3.2. Application to mock data @@ -1465,7 +1447,9 @@ ALL r [kpc] 1010 + 1011 + 1012 10 100 @@ -1610,7 +1594,9 @@ POPULATIONS r [kpc] 1010 + 1011 + 1012 10 100 @@ -1921,9 +1907,7 @@ Number of stars (Nphot) 65 797 14 882 49 205 Number of stars (Nspec) 3286 1136 1151 Stars within 1.8 kpc 3268 1134 1130 -Fitted normalization (N0) [×10 -4] 6.95 1.81 5.45 - +Fitted normalization (N0) [×104] 6.95 1.81 5.45 Sérsic radius (RS) [kpc] 0.454 0.429 0.420 Sérsic parameter (m) 0.808 0.807 0.898 @@ -2043,9 +2027,7 @@ m s -1 -)2 -] - +)2 ] R [kpc] -16 @@ -2067,15 +2049,13 @@ R )[ 10 -2 ( +2 (k -km +m s -1 -)3 - -] +)3 ] R [kpc] @@ -2098,15 +2078,13 @@ R )[ 10 -4 ( +4 (k -km +m s -1 -)4 - -] +)4 ] R [kpc] @@ -2207,10 +2185,9 @@ c 12 -χ2 --χ - +χ2 -χ 2 m + in Fig. 13. Values of χ2 relative to the fitted minimum within the range of 3σ confidence level for all stars (left panel) and for the populations (right @@ -2218,21 +2195,18 @@ panel) for the Fornax dSph. (Kowalczyk et al. 2019), we obtained higher estimates of the en- closed total mass at larger radii. In particular, for the mass en- -closed within 1.8 kpc we get Mall(< 1.8 kpc) = 3.87 +closed within 1.8 kpc we get Mall(< 1.8 kpc) = 3.87+1.48 -+1.48 −1.56 × 108 -M⊙ from the fit for all stars and Mpops(< 1.8 kpc) = 4.71 -+0.87 +M⊙ from the fit for all stars and Mpops(< 1.8 kpc) = 4.71+0.87 −1.13 × 108 M⊙ from the fit of populations, while previously we had -Mold(< 1.8 kpc) = 3.7 +Mold(< 1.8 kpc) = 3.7+1.4 -+1.4 −1.3 × 108 M⊙. diff --git a/read/results/tika/2201.00178.txt b/read/results/tika/2201.00178.txt index 1aed9f1..811110c 100644 --- a/read/results/tika/2201.00178.txt +++ b/read/results/tika/2201.00178.txt @@ -18,27 +18,6 @@ - - - - - - - - - - - - - - - - - - - - - @@ -78,7 +57,9 @@ Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on measurements to show that the resulting divergence and radial vorticity maps at supergranular length scales (∼30 Mm) near the surface compare extremely well with those obtained using the Local Corre- + lation Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows, + while ≥ 0.8 is obtained for the radial vorticity. Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662) @@ -224,7 +205,9 @@ solar surface, respectively, and ez points outwards. This approximation is valid that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood of the supergranular scale (∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the + horizontal wavenumber qR� ≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(qx, qy)| is the vector horizontal + wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, see Rincon @@ -238,6 +221,7 @@ uσ =∇×[∇×(P ez)] +∇×(T ez), (1) where P = Pσ(x) and T = Tσ(x) are poloidal and toroidal scalar functions, varying with position x and temporal frequency σ. ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying + perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for example), here we only consider the frequency bin σ = 0, denoting the temporally averaged flow over the period @@ -251,6 +235,7 @@ vector calculus results in u = −∇2Pez +∇(∂zP ) +∇hT×ez, (2) where ∇h refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the + Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a function of horizontal wavenumber q and depth zez. Hence the poloidal and toroidal flows are described by Pq(z) and @@ -273,12 +258,7 @@ The flow coefficients Pqj and Tqj , represented by the discrete indices q and j, where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be -exploited to expedite inversions. Note that Pqj = P -∗ -−qj and Tqj = T - -∗ -−qj for the flow field to be real in the spatio- +exploited to expedite inversions. Note that Pqj = P ∗−qj and Tqj = T ∗−qj for the flow field to be real in the spatio- temporal domain. @@ -288,25 +268,19 @@ To infer flows from wavefields φ scattered by a perturbation of length scale q, Imaging near-surface flows using mode-coupling analysis 3 -φω∗k φ -ω -k+q, where k is the oscillation mode wavenumber (kx, ky) and ω is the temporal frequency. Relate φ - -ω∗ -k φ - -ω -k+q thus +φω∗k φωk+q, where k is the oscillation mode wavenumber (kx, ky) and ω is the temporal frequency. Relate φω∗k φωk+q thus to the flow coefficients Pqj and Tqj (see eq A7) 〈φω∗k φωk+q〉 = Hωkk′nn′ + ∑ j Cqj,kPqj +Dqj,kTqj . (4) The weight factorHω (see eq A8) is a function of frequency, capturing information about the extent of coupling between + the two modes [n, k] and [n′, k′], where n and n′ are the radial orders of the modes, and k = |k| and k′ = |k′| = |k+q|. The spectral profile of the mode (see eq A9) is approximated using a Lorentzian (Anderson et al. 1990). The more the @@ -320,15 +294,11 @@ Dqj,k = D−qj,−k (see eq A6). The kernels, as flows, are expressed on the bas 1.2. Least-squares of cross-correlation -Even though φω∗k φ -ω -k+q isolates the effect of flow perturbations at individual wavenumbers q, a more compact mea- +Even though φω∗k φωk+q isolates the effect of flow perturbations at individual wavenumbers q, a more compact mea- surement, known in mode-coupling literature as ’B-coefficients’, is much better designed for inversion as it reduces the -dimension of the problem. A least-squares fit to the cross-correlation φω∗k φ -ω -k+q (see Woodard 2006, 2014, 2016) results +dimension of the problem. A least-squares fit to the cross-correlation φω∗k φωk+q (see Woodard 2006, 2014, 2016) results in the B-coefficients Bk,q, according to @@ -343,6 +313,7 @@ Hω∗kk′nn′φω∗k φωk+q∑ . (5) Multiplying eq 4 on both sides by Hω∗kk′nn′ and substituting by eq 5 on the left-hand-side results in a concisely defined + forward problem (compare with eq 4) Bk,q = @@ -370,9 +341,11 @@ or . (7) Summing over ±ω guarantees that the parity Bk,q = B∗−k,−q (see Appendix A for derivation) is obeyed, thereby + ensuring that the flow field on the right-hand-side of eq 6 is a real physical quantity in the spatio-temporal domain. Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components −q and + −k, B∗−k,−q = @@ -389,6 +362,7 @@ C−qj,−kP Substituting parity and symmetry relations for all terms in the above results in eq 6. As Bk,q is constructed by a least-squares fitting, it is noteworthy that summing over −ω will also lead to improvement in its signal-to-noise as a + by-product. 1.3. Noise model @@ -407,6 +381,74 @@ random forcing function (see Duvall & Harvey 1986). Modes are thus generated wit and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters +IMAGING NEAR-SURFACE FLOWS USING MODE-COUPLING ANALYSIS 3 + +Pe Petq: Where k is the oscillation mode wavenumber (ka, ky) and w is the temporal frequency. Relate d%* d% +q thus + +to the flow coefficients Pj; and Ty; (see eq A7) + +(oe k-+q) = Hkkinn! S- Cain Pag + Daj,kTq;: (4) +j + +The weight factor H” (see eq A8) is a function of frequency, capturing information about the extent of coupling between +the two modes [n, k] and [n’, k’], where n and n’ are the radial orders of the modes, and k = |k| and k’ = |k’| = |k+q]. +The spectral profile of the mode (see eq A9) is approximated using a Lorentzian (Anderson et al. 1990). The more the +Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms Cg;,, and Dgj,~ are poloidal +and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements +and are derived from the solar model see Appendix A. They possess the symmetry relation: Cqj;~% = C—qj,-z and +Daj,k = D—qj,-k (see eq AG). The kernels, as flows, are expressed on the basis f;(z). + +1.2. Least-squares of cross-correlation + +Even though $;* dg 44 isolates the effect of flow perturbations at individual wavenumbers qg, a more compact mea- +surement, known in mode-coupling literature as ’B-coefficients’, is much better designed for inversion as it reduces the +dimension of the problem. A least-squares fit to the cross-correlation $2" O44 (see Woodard 2006, 2014, 2016) results +in the B-coefficients By,q, according to + +»— Heenan! PR k-+q + +w +» He krnan |? , +w + +Multiplying eq 4 on both sides by H¥¥,,,,,, and substituting by eq 5 on the left-hand-side results in a concisely defined +forward problem (compare with eq 4) + +Brig = (5) + +Brg = S> Cqj,kPaj + Daj,eTaj- (6) +J + +In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over w. +Here, we sum over both +w within a few mode linewidths I’. Denoting the resonant frequency of a mode using wy, + +\w| € (wn _ Eng /2,Wnk + Dnx/2) or +lw] € (snr = LD yp /2, Wn Ky + Dyrnr/2), (7) + +Summing over tw guarantees that the parity Brg = Bq (see Appendix A for derivation) is obeyed, thereby +ensuring that the flow field on the right-hand-side of eq 6 is a real physical quantity in the spatio-temporal domain. +Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components —q and +—k, + +Br yg =) C-aj bP gj + Doi KT" gy: (8) + +j + +Substituting parity and symmetry relations for all terms in the above results in eq 6. As Bx gq is constructed by a +least-squares fitting, it is noteworthy that summing over —w will also lead to improvement in its signal-to-noise as a +by-product. + +1.3. Noise model + +In the addition to the sensitivity kernels, a systematic background noise model is required to infer the flows from +the observed B-coefficients. For estimating the contribution from realization noise to the measurements, we make the +following assumptions (Gizon & Birch 2004): that the excitation of the wavefield is modelled as a multivariate Gaussian +random process and the wavefields are uncorrelated across wavenumber and frequency in the absence of perturbations. +Every independent realization of a mode can be understood as the output of a damped harmonic oscillator driven by a +random forcing function (see Duvall & Harvey 1986). Modes are thus generated with random phases and amplitudes +and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters + + 4 Mani et al. @@ -423,6 +465,7 @@ as in H21, which was motivated by the above discussion, Gk,q ≡ 〈|Bk,q|2〉, (9) where, unlike H21, we again sum over ±ω. Gk,q is real, with the symmetry relation Gk,q = G−k,−q (see Appendix A + for explanation). 2. DATA ANALYSIS @@ -434,16 +477,12 @@ Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO, Sch is Postel projected, with a spatial resolution of approximately 0.48Mm, sperated in time by 45 seconds, and is tracked at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194.4× 194.4 Mm2 in size, tracked for 24 hours + and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number 2197, Carrington longitude 90◦). This Dopplercube is considered as the physical wavefield φ(x, y; t). The Fourier-space -wavefield φωk (and subsequently, the cross-correlation φ -ω∗ -k φ - -ω -k+q) is obtained by computing the 3D spatial and temporal +wavefield φωk (and subsequently, the cross-correlation φω∗k φωk+q) is obtained by computing the 3D spatial and temporal Fourier transform of the Dopplercube. @@ -460,8 +499,11 @@ Maximum signal can be extracted from the weighted summation of the cross correla profiles of the two modes [n, k] and [n′, k′] closely align in ω space. This implies that their mode frequencies should be sufficiently close (|ωnk − ωn′k′ | ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over + ±ω is significant only over a few linewidths (ε, the summation parameter; see eq 7). We have empirically found and + tabulated δ in Table 1 for the radial order couplings n-n′ ∈ f-f, p1-p1, and p2-p2 (the signal strength depends only + weakly on ε; we set it to 3 line widths). Figure 1 shows that for any two adjacent ridges (adjacent n and n′), mode frequencies ωnk and ωn′k become spaced @@ -477,8 +519,83 @@ affecting the quality of the seismic measurements. Owing to these factors, to ma inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR� at fixed radial order are different. In wavenumber, we restrict our analysis to within 200 ≤ kR� ≤ 2000 and qR� ≤ 300. Our + frequency range is confined to span the range over which acoustic modes are observed (2 ≤ ω/2π ≤ 5 in mHz). +4 MANI ET AL. + + + + + + + + + +5 a +P27 6 ese +—_ +44 a — +N 7 wee +E 7 ae +237 A A ane +k 7 a +Q i“ +324 “+ +4“ +1; +sere Theoretical +—--— Observed +0 T T T 1 +0 500 1000 1500 2000 2500 +kRo + +Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p1 (orange) and pz (green). The shaded +regions of the same colours indicate 1-linewidth [ about the mode frequency. The yellow shaded region indicates the range of +kRo and w/27 to which we have restricted ourselves in this analysis. Beyond kRo of 2000, it is seen that the theoretical fitting +of mode frequencies start deviating from the observed dispersion relation for the f-mode. + +such as its amplitude, frequency and linewidth, and consequently in Bz,q in our case. We use the same noise model +as in H21, which was motivated by the above discussion, + +Gq = (|Bral”): (9) + +where, unlike H21, we again sum over tw. Gx,q is real, with the symmetry relation Gxq = G_kr,—q (see Appendix A +for explanation). + +2. DATA ANALYSIS + +In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the +Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO, Scherrer et al. 2012). Each image +is Postel projected, with a spatial resolution of approximately 0.48Mm, sperated in time by 45 seconds, and is tracked +at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194.4 x 194.4 Mm? in size, tracked for 24 hours +and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number +2197, Carrington longitude 90°). This Dopplercube is considered as the physical wavefield ¢(z, y;t). The Fourier-space +wavefield ¢g (and subsequently, the cross-correlation ¢%* ¢¢ @ is obtained by computing the 3D spatial and temporal +Fourier transform of the Dopplercube. + +The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in +Eq 6, while short enough that supergranules do not substantially evolve (lifetime is purported to be 1.6 days; Rincon +& Rieutord 2018) over this period. Our observation region is close to the disk center to also avoid any contamination +from center-to-limb systematics (Zhao et al. 2012; Langfellner et al. 2015). + +Maximum signal can be extracted from the weighted summation of the cross correlations (eq 5) when the spectral +profiles of the two modes [n, k] and [n’, k’] closely align in w space. This implies that their mode frequencies should be +sufficiently close (|wnx — Wnn’| < 6, the separation parameter). Since Lorentzians decay rapidly, the summation over ++w is significant only over a few linewidths (e, the summation parameter; see eq 7). We have empirically found and +tabulated 6 in Table 1 for the radial order couplings n-n’ € f-f, pi-p1, and p2-p2 (the signal strength depends only +weakly on €; we set it to 3 line widths). + +Figure 1 shows that for any two adjacent ridges (adjacent n and n’), mode frequencies wnz, and wy, become spaced +farther apart with increasing wavenumber /Ro. It is also known that mode linewidth [ grows with radial orders for +a given kRo. Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of +observation set the total number of modes within a range of kRe (and w/27) that can be clearly observed, thereby +affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually +inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kRo at fixed +radial order are different. In wavenumber, we restrict our analysis to within 200 < kKRe < 2000 and qRo < 300. Our +frequency range is confined to span the range over which acoustic modes are observed (2 < w/2a <5 in mHz). + + Imaging near-surface flows using mode-coupling analysis 5 @@ -517,6 +634,7 @@ complement each other (see Sekii 1997), where RLS tries to minimize the misfit b SOLA gives better localization. For total number of modes M , RLS scales as MxJ where J is the number of basis functions fj(z) (J � M ; see eq 3 and section 3.1), whereas SOLA scales as M2 (see Appendix B). For M > 5000, + computation starts to quickly become expensive for SOLA. Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While @@ -554,12 +672,15 @@ k ||KU−B||2, with || ||2 denoting the L2 norm. Here, K is the matrix formed by the sensitivity kernels: {Cqj,k,Dqj,k}. U is a vector composed of the flow coefficients: {Pqj , Tqj} and B is a vector + composed of computed B-coefficients: {Bk,q}. The least-squares problem is solved simultaneously for poloidal and + toroidal flow. We use B-spline basis functions as our fj(z), comprising 11 knots spaced uniformly in acoustic radius, for both poloidal and toroidal coefficients. Hence, for M modes (total number of k for a given q is M) and 11 basis functions for each poloidal and toroidal, the dimensions of K, U and B are thus M×22, 22×1, and M×1 respectively. + Normalizing both sides of eq 10 by the noise covariance Λ (a diagonal matrix with the entries Gk,q; see eq 9; dimension M ×M) and pre-multiplying by Kᵀ, @@ -568,6 +689,83 @@ M ×M) and pre-multiplying by Kᵀ, U =(KᵀΛ−1K)−1KᵀΛ−1B. (12) +IMAGING NEAR-SURFACE FLOWS USING MODE-COUPLING ANALYSIS 5 + + + + + +Coupling kRo range # of 6 + +modes + +f-f [400,1000] 5240 4 + +[1000,1500] 7784 1.1 +[1500,2000] 10940 0.4 + + + + + + + +P1-P1 (400, 1000] 5240 4.5 +[1000,1750] 12852 2 +P2-Pp2 [200,1000] 5886 3 + + + +[1000,1300] 4280 3 + + + +Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different +ranges of kRo. + +3. INVERSION + +The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements +Bx,q from the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and +leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods +complement each other (see Sekii 1997), where RLS tries to minimize the misfit between data and model, whereas +SOLA gives better localization. For total number of modes M, RLS scales as MxJ where J is the number of basis +functions f;(z) (J < M; see eq 3 and section 3.1), whereas SOLA scales as M? (see Appendix B). For M > 5000, +computation starts to quickly become expensive for SOLA. + +Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While +f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is +present even in pj-p1, and p9-p2 (see Figure 3), and possibly other higher order self- and cross-couplings. Since we are +interested in only surface flows, we leave higher order coupling to future work. + +It bears mentioning that the slopes of the ridges in the kRo-v spectrum (Figure 1) increase with radial order. This +limits us to low-to-intermediate kRe (< 1000) for these higher radial orders if we are to remain under the acoustic cut- +off frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals +from low kRe - too large an observation region could possibly render invalid the Cartesian geometry approximation. +Regardless, in addition to performing inversions using all the couplings stacked together, we also demonstrate inversions +separately for the three couplings (see Table 2) in order to account for the full gamut of mode-coupling as a signal-rich +helioseismic technique. + +3.1. RLS + +For given q, the forward problem may be stated as +KU =B, (10) + +with the aim to minimize the misfit }* ||KU — B||2, with |] ||) denoting the Lz norm. Here, K is the matrix formed +k + +by the sensitivity kernels: {Cqj,n,Pqj,n}. U is a vector composed of the flow coefficients: {P;,Tq;} and B is a vector +composed of computed B-coefficients: {B,,q}. The least-squares problem is solved simultaneously for poloidal and +toroidal flow. We use B-spline basis functions as our f;(z), comprising 11 knots spaced uniformly in acoustic radius, +for both poloidal and toroidal coefficients. Hence, for M modes (total number of k& for a given q is M) and 11 basis +functions for each poloidal and toroidal, the dimensions of K, U and B are thus M x 22, 22x 1, and M x 1 respectively. +Normalizing both sides of eq 10 by the noise covariance A (a diagonal matrix with the entries G,,_; see eq 9; dimension +M x M) and pre-multiplying by KT, + +(KTA~'K)U =(KTA“!)B, (11) +U =(KTA7!K)~'KTA“!B. (12) + + 6 Mani et al. @@ -595,6 +793,7 @@ U = (KᵀΛ−1K + λI)−1KᵀΛ−1B, (13) where I is the identity matrix for L1 regularization. The knee-point of the L-curve (Hansen 1992), a curve formed by plotting ||U||2 vs ||KU − B||2 for different values of λ (see right panel of Figure 2), is usually chosen as the + regularization parameter. After successfully inverting for U, we reconstruct the flow using eq 3. Results for poloidal flow Pq are shown in Figure 3. @@ -606,7 +805,9 @@ To improve confidence in the imaged near-surface flows through mode-coupling, we from Local Correlation Tracking method (LCT; November & Simon 1988). LCT provides surface-flow maps by examining the advection of convective granules (1.2 Mm, qR� ≈ 3500; Hathaway et al. 2015) by underlying larger- + scale flow systems. Since granules are used as tracers, which are much smaller in size than supergranules (≈ 35 Mm), + LCT is an effective method (see Rieutord et al. 2001) to produce surface horizontal flow maps of supergranulation. Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2 @@ -616,9 +817,10 @@ Time series of intensity images from HMI, with the same properties of the Dopple tained and Postel projected. The horizontal flows are deduced by tracking the proper motions of granules between consecutive intensity images, which we denote as I1, I2. The LCT method selects a patch in two images each -(I1 = I1e(x−xij) +(I1 = I1e(x−xij) 2/2 sigma2 , I2 = I2e(x−xij) + 2/2 sigma2) that observe the same granule at the grid point xij = (xi, yj). A Gaussian of width sigma allows to isolate a small region surrounding the grid point of interest as the distance @@ -639,6 +841,7 @@ Provided that the time difference ∆t, here 45 seconds, between the images is l min), the velocities are given by vx = ∆x/∆t and vy = ∆y/∆t. This exercise is repeated for all grid points in the images I1, I2 and for each consecutive pair of images in the cube. + In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing vx and vy. FLCT requires the input sigma, which we set to 4 pix, that captures the extent of localization desired, and depends on the @@ -662,13 +865,12 @@ For mode-coupling, horizontal divergence (hereafter div) and radial vorticity (h substituting P and T from eq 3 into eq 2 as below - uuu(q, z) = −∇2Pez +∇(∂zP ) +∇hT×ez, -= −(0, 0, ∂2xP + ∂2yP + ∂2zP ) + (∂x∂zP, ∂y∂zP, ∂2zP ) + (∂yT, −∂xT, 0). (15) += −(0, 0, ∂2xP + ∂2yP + ∂2zP ) + (∂x∂zP, ∂y∂zP, ∂ -Setting ∂2x + ∂ 2 -y = q +zP ) + (∂yT, −∂xT, 0). (15) -2, div is given by, +Setting ∂2x + ∂2y = q2, div is given by, ∇h · uuu(q, z) = q2∂zP, (16) @@ -726,12 +928,14 @@ M-C : φ(x, y; t) =====⇒ φωk , Bk,q inversion -======⇒ P, T ∇h·===⇒ +======⇒ P, T +∇h·===⇒ ∇× -eqns 16, 17 +eqns 16, 17 Filter, + =====⇒ 2D FFT @@ -765,6 +969,7 @@ Table 2 summarizes the results of the comparison between flows obtained from mod where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from the two methods near supergranular scale (qR� ≈ 100). Near-surface flows are imaged most faithfully when all the + couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between @@ -831,6 +1036,7 @@ Thus, the amplitudes of the mode-coupling flows (and the correlation coefficient Here, we report in Table 2 only the maximum correlation found from among the points in the radial grid close to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR�, we first fix the coupling(s) + and the regularization parameter to be used in the inversion. We then separately compute filtered divergence and @@ -944,25 +1150,26 @@ uσq(z) = j { -q2 fjez + iq f - -′ -j +q2 fjez + iq f ′j } Pσjq + iq×ez fjTσjq. (A1) For flows in the anelastic limit (u � speed of sound), we can denote the flow perturbation operator as δLσ = + −2iωρuσ ·∇ (see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get, δLσq = −2iω ρ (iuσq · k + uσq · ez∂z), (A2) -=−2iωρ +=−2iωρ ∑ j { -−k · q f ′jPσjq − k · (q×ez) fjTσjq + q2 fjPσjq ∂z +−k · q f ′jPσjq − k · (q×ez) fjTσjq + q2 fjP + +σ +jq ∂z } . (A3) @@ -977,17 +1184,16 @@ Express the mode eigenfunction describing oscillations in the Cartesian domain b where H and V are real-valued functions; n and n′ are dropped for compactness of notation. Then the coupling of -two modes ξk and ξk′ (k -′ = k + q), by the flow perturbation operator δLσq , denoted by coupling integral Λkk′(σ), is +two modes ξk and ξk′ (k′ = k + q), by the flow perturbation operator δLσq , denoted by coupling integral Λk +k′(σ), is given by -Λkk′(σ) ≡ -∫ +Λk +k′(σ) ≡ -dx (δLσqξk) · ξ -∗ -k′ = +∫ +dx (δLσqξk) · ξ∗k′ = ∫ dx @@ -1008,10 +1214,8 @@ jq (k̂ · k̂ H ′kH ∗ -k′ + V +k′ + V ′kV -′ -kV ∗ k′) @@ -1050,10 +1254,8 @@ q2 fj (k̂ · k̂ H ′kH ∗ -k′ + V +k′ + V ′kV -′ -kV ∗ k′) @@ -1083,48 +1285,40 @@ k′ + VkV k′). (A6) Note the symmetry Cqj,k = C−qj,−k and Dqj,k = D−qj,−k. This coupling integral contributes to the cross-spectral + measurement between modes k and k + q From eq 8 of Woodard (2014), we write the first-order effect of flow on wavefield cross-correlation as -〈φω∗k φω+σk+q 〉 = H -ω -kk′σΛ +〈φω∗k φω+σk+q 〉 = Hω +kk′σΛk -k k′(σ), (A7) where the function H is given by -Hωkk′σ = −2iω(Nk|Rωk |2Rω+σk′ +Nk′ |R -ω+σ -k′ | - +Hωkk′σ = −2iω(Nk|Rωk |2Rω+σk′ +Nk′ |Rω+σk′ | 2Rω∗k ). (A8) We absorb the factor −2iω into the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4. + The mode spectral profile R is a Lorentzian, given by Rωk = 1 -ω2nk − ω2 − iωγnk/2 +ω2 +nk − ω2 − iωγnk/2 + , (A9) where ωnk is the resonant frequency of the mode, and γnk is the mode linewidth. Eq A9 can be derived by introducing mode damping −iωγρ as an operator in the differential equation that governs undamped, driven oscillations (see eq -5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation. - -Also, the parity Hωkk′σ = H -−ω∗ -kk′−σ and R -ω -k = R +5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation. -−ω∗ -k are established. Mode normalization N is given by +Also, the parity Hωkk′σ = H−ω∗kk′−σ and Rωk = R−ω∗k are established. Mode normalization N is given by Nk = 1 @@ -1143,7 +1337,8 @@ Rωk , (A10) -where the 1Q +where the 1 +Q Q∑ k @@ -1155,22 +1350,79 @@ This forces N to be isotropic, i.e., to only depend on k, and not k. The sum ove Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real. The three equations A8 through A10, along with the symmetry relation for kernels, and summation over ±ω, serve -to establish the parity Bσk,q = B -∗−σ -−k,−q. This allows for obtaining P +to establish the parity Bσk,q = B∗−σ−k,−q. This allows for obtaining Pσq = P ∗−σ−q , and subsequently, purely real flow in -σ -q = P +the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into -∗−σ -−q , and subsequently, purely real flow in +the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσk,q = G−σ−k,−q. -the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into +12 MANI ET AL. +Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006) +Ex = En (2) = ikHyx(2)ez + 2Vnx(2), (A4) + +where H and V are real-valued functions; n and n’ are dropped for compactness of notation. Then the coupling of +two modes €, and €,, (k’ = k + q), by the flow perturbation operator 6£%, denoted by coupling integral AR,(c), is +given by + +a al +AR (co) = [x (LG &x) «ke = [x - 2iwp >~ {@ fj Poy (kok Hy Ag + Vi Vp) +J + +a al +— [kg fP%y +h: (axez) fiT%] (bk Hy His + Veve)} (A5) + + + +We desire to linearly relate the coupling integral in the above equation to the flows P and T, through poloidal and +toroidal sensitivity kernels, Cg;,~ and Dgj,~ respectively. Hence, they are given by + +Caik = / dzp l@ fi (kk HLS +ViVe) + +—k-q fi (le- ke’ Hy Hy, + VaVi)| + +Daj.w =k (qxez) [eon (kk Hy His + VeVi). (A6) +Note the symmetry Cqj,4 = C_—qj,-k and Dgj,x = D—qj,-k- This coupling integral contributes to the cross-spectral + +measurement between modes k and k + q From eq 8 of Woodard (2014), we write the first-order effect of flow on +wavefield cross-correlation as + +(O8* Opig) = AiwoAe (2), (A7) +where the function H is given by +fog = —2iw( NelREI? REM? + Ne |GET? RE) (A8) + +We absorb the factor —2iw into the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4. +The mode spectral profile R is a Lorentzian, given by + +1 + +_ twnk /2 , (A9) + +a = +k w?, — Ww +where wpx is the resonant frequency of the mode, and 7ynz is the mode linewidth. Eq AY can be derived by introducing +mode damping —iwyp as an operator in the differential equation that governs undamped, driven oscillations (see eq +5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation. + +Also, the parity H%,, = Hy, and RZ = R,** are established. Mode normalization N is given by + +1 & & lee? +N, = — eo A10 +k QX Re (A10) + +Ww + +Q +where the o >> on the right-hand-side implies average over all [k,z,k,] (Q terms in all) such that k = |k| is constant. + +This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over w is within five linewidths of wy x. +Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real. + +The three equations A8 through A10, along with the symmetry relation for kernels, and summation over tw, serve +to establish the parity Bg = Bry ¢ This allows for obtaining Py = Pry, and subsequently, purely real flow in +the real domain. Setting o = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into +the noise model obtained in H21 and summing over tw establishes the symmetry GZ 4 = Gh ¢ -the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσk,q = G -−σ -−k,−q. @@ -1198,6 +1450,7 @@ Since the kernels in eq A6 are manifest as coefficients on a basis fj(z), we fir as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions: P ≡ Pq(z), p ≡ Pqj , F ≡ fj(z), B ≡ Bk,q C ≡ Cqj,k and K ≡ Kk,q(z), we write (assume only poloidal flow for + simplicity, the same derivations hold true for toroidal flow as well) P = Fp (B11) @@ -1251,8 +1504,8 @@ exp This can be achieved by solving the optimization problem minimize X = -∫ +∫ dz [ T (z, zo)−Θq(z, zo) @@ -1272,6 +1525,65 @@ As an aside, we note that averaging kernels can similarly be constructed for RLS and B14. +IMAGING NEAR-SURFACE FLOWS USING MODE-COUPLING ANALYSIS 13 + +B. SOLA INVERSIONS + +Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) aims to obtain a set of weight factors +for the mode q and depth z,, which we will call az,z.. A linear weighted sum of the measurements Bx,q in the fashion +>) ak,z0Br,q allows for an average value of the flow P,(z) to be estimated at the depth z,. To obtain the coefficients +k + +Qk,zo, it is assumed that a set of sensitivity kernels Ky,,q(z) for the mode q can be summed up coherently to give an +’averaging kernel’ that is localized at the depth z,. Conventionally, a Gaussian centered at z, and a width A is chosen +which the averaging kernel should resemble after performing inversion. + +B.1. Kernels in the integral form + +Since the kernels in eq A6 are manifest as coefficients on a basis f;(z), we first derive kernels that can be expressed +as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions: +P = Py(z), p = Pay, F = f(z), B = Beg C = Cajx and K = Kx q(z), we write (assume only poloidal flow for +simplicity, the same derivations hold true for toroidal flow as well) + +P=Fp (B11) + +The size of P is thus the same as the length of the radial grid z. +Now, pre-multiply by F7 and integrate over z on both sides (drop the integral notation for compactness), + +F’P=(F'F)p +p=(F'F)'F'P (B12) +Now, substituting eq B12 into the forward problem eq 6, +B=Cp +=(F'F)'F'CP +=KP (B13) +where +K =(F'F)"'F'C, +-1 +ie, Keg(2)=>.| / dz fi(2)Fir(2)| Sir(2)Casre (B14) + +i5' + +B.2. Obtaining the coefficients a + +Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at z, + + + + + +1 Z— Zo +T (2,20) = Tani exp( A? ). (B15) +This can be achieved by solving the optimization problem +2 +minimize ¥ = je [7 (2,20) = Oq(2,0)| ; (B16) +where we introduce the averaging kernel for mode q thus +Oq(z, 20) = > On,2oKR,g(2): (B17) +k + +As an aside, we note that averaging kernels can similarly be constructed for RLS (see section 3.1) using eqns 13 +and B14. + + 14 Mani et al. @@ -1281,7 +1593,8 @@ Figure 8. Left : Kernel Kk,q(z) (eq B14) shown vs depth z for the three radial o (eq B17) using SOLA, for qR� = [−112,−45] at depth z0 = −0.48 Mm, and the corresponding target Gaussian (eq B15). Integral of the averaging kernel over z is 0.89. -Setting ∂X∂α → 0 gives us the matrix problem to be solved +Setting ∂X +∂α → 0 gives us the matrix problem to be solved A{α} = v, @@ -1299,7 +1612,9 @@ dz Kk,q(z)Kk′,q(z) and v = ∫ dz Kk,q(z)T (z, zo). Here, k′ is just a dummy index for + denoting elements in the matrix A, (k′ 6= k+q). In the last line of eq B18, we introduce regularization using an Identity + matrix I, with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α @@ -1323,10 +1638,7 @@ k αk,zo ∫ -dz Kk,q(z)P - -σ -q (z), +dz Kk,q(z)Pσq (z), = @@ -1390,6 +1702,102 @@ http://doi.org/10.1051/0004-6361/201937331 http://doi.org/10.1086/324323 http://doi.org/10.1103/RevModPhys.74.1073 http://doi.org/10.1007/s41116-020-00028-3 +14 MANI ET AL. + +x1071? x10-’ + +6.0 4 — Avg. kernel +sees Target + + + + + + + + +aR =[-112, — 45] +kRo =[-853,-157] + +4.55 2=—-0.48 Mm + + + + +——o—or + + + + + + + + + + + + + + + + + +y OS 5 +Z N 3.07 +x < +1.55 +0.0 -fasmeererrstrccrssssesee et +—-4 —2 0 -5 -4 3 —2 -1 0 +z, Mm z,Mm + +Figure 8. Left: Kernel Kx,q(z) (eq B14) shown vs depth z for the three radial order couplings ff, pi-p1, and p2-p2e. qRo = +[—112, —45] and kRo = [-—853,—157] is chosen for all the radial order couplings for comparison. Right: Averaging kernel +(eq B17) using SOLA, for qRo = [—112, —45] at depth zo = —0.48 Mm, and the corresponding target Gaussian (eq B15). +Integral of the averaging kernel over z is 0.89. + +Setting ox — 0 gives us the matrix problem to be solved +A{a} =v, + +{a} = |4 + y| “'y, (B18) + +where the square matrix A = [dz Keq(z) Kp g(z) and vu = [ dz Ke.q(z)T(z, 20). Here, k’ is just a dummy index for +denoting elements in the matrix A, (k’ 4 k+q). In the last line of eq B18, we introduce regularization using an Identity +matrix I, with the regularization parameter jz - purpose being the same as that described in section 3.1. Obtaining +a thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute a + +obtained from eq B18 into last line of eq B13, and S> on both sides +k + +So on,2. Bq = So oR,2. ju Kxq(2) PG (2); +k k += f a2 0q(2.20)P§(2). +~ (Pq (Zo) (B19) + +Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Di- +vergence flow can then be obtained from eq 16. Results are shown in Figures 9 and 10. + +REFERENCES +Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M. Boning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., & +1990, ApJ, 364, 699, doi: 10.1086/169452 Schou, J. 2020, A&A, 635, A181, +Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of doi: 10.1051/0004-6361/201937331 + +Modern Physics, 64, 885, +doi: 10.1103/RevModPhys.64.885 + +Birch, A. C., Schunker, H., Braun, D. C., et al. 2016, +Science Advances, 2, e1600557, +doi: 10.1126/sciadv.1600557 + +Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019, —. 2021, Living Reviews in Solar Physics, 18, 2, +A&A, 628, A37, doi: 10.1051/0004-6361/201935591 doi: 10.1007/s41116-020-00028-3 + +Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189, +doi: 10.1086 /324323 + +Christensen-Dalsgaard, J. 2002, Reviews of Modern +Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073 + + Imaging near-surface flows using mode-coupling analysis 15 diff --git a/read/results/tika/2201.00200.txt b/read/results/tika/2201.00200.txt index cfe723b..83244e5 100644 --- a/read/results/tika/2201.00200.txt +++ b/read/results/tika/2201.00200.txt @@ -18,27 +18,6 @@ - - - - - - - - - - - - - - - - - - - - - @@ -353,11 +332,15 @@ versions of the Ledoux discriminant allowed them to obtain a model-independent profile for this quantity. Their reconstruction method also gives solar structures that are in excellent agree- ment with other structural inversions, namely the entropy, S , the -square of the speed of sound, c2s , and the density, ρ. To illustrate +square of the speed of sound, c2 + +s , and the density, ρ. To illustrate the convergence of their reconstruction procedure, they show (right panels of their Figs. 3-6) the successive iterations that con- verge to an excellent level of agreement for the four structural -inversions (A, S , c2s , ρ) starting from the initial reference model +inversions (A, S , c2 + +s , ρ) starting from the initial reference model adopted in their work. The differences found between the recon- structed model and the reference model are useful as they indi- cate the modifications of the reference model that are required to @@ -373,9 +356,9 @@ the quantity (ASun - Aref). The second concerns the speed of sound. The same positive bump at the same location as for the Ledoux discriminant, A, is -observed for the quantity (c2s,Sun − c +observed for the quantity (c2 -2 +s,Sun − c2 s,ref)/c 2 @@ -397,6 +380,7 @@ ancy is negative in the convective zone. The corrections applied to A help reduce these entropy discrepancies in both regions. The fourth concerns the density. The quantity (ρSun − + ρref)/ρref has a negative peak in the radiative region, at ∼ 35% of the stellar radius, and is positive in the convective zone. @@ -553,11 +537,15 @@ solid line. The vertical dashed line in each panel is located at a distance dov below the convective boundary. The impact on the whole stellar structure was quantified by -comparing the four structural quantities (A, S , c2s , ρ) between the +comparing the four structural quantities (A, S , c2 + +s , ρ) between the modified and the reference model. The results are displayed in Fig. 3, with ∆X defined as (X−Xref) for any structural quantity X. The forced local heating in the overshooting layer produces sim- -ilar positive peaks for ∆A, ∆S , and ∆c2s , as found for the temper- +ilar positive peaks for ∆A, ∆S , and ∆c2 + +s , as found for the temper- ature. The modification thus provides the correction required to improve the discrepancy for the Ledoux discriminant described in the first of the trends outlined in Sect. 3.1. Unsurprisingly, @@ -592,20 +580,21 @@ These trends are insensitive to the depth over which the tem- perature gradient is modified. Increasing the depth increases the magnitude of the differences but has no impact on their sign. We find that the maximum variation in the model properties, such as -the speed of sound, ∆c2s/c +the speed of sound, ∆c2 +s/c 2 -s,ref , roughly scales with d +s,ref , roughly scales with d2 -2 ov. This scal- - ing is linked to the integrated area between the modified temper- ature gradient curve and the one for the reference (non-modified) temperature gradient, which roughly decreases linearly with r. This area is proportional to the square of the overshooting depth, and consequently, the maximum variation in the model proper- -ties is also proportional to d2ov. The qualitative trends also remain +ties is also proportional to d2 + +ov. The qualitative trends also remain the same whether overshooting mixing in the reference model is ignored or included using a step function (with instantaneous mixing) or an exponential decay for the diffusion coefficient (e.g. @@ -640,7 +629,9 @@ mixing and whether microscopic diffusion is included or not. In the convective zone, all models give a positive difference for the density between the model with a modified temperature gra- dient and the relevant reference model. For the other quantities -(S , c2s ), the differences in the convective zone are very sensitive +(S , c2 + +s ), the differences in the convective zone are very sensitive Fig. 3. Difference of various structural quantities between a model with a modified temperature gradient in the overshoot- @@ -694,7 +685,9 @@ this problem, as mentioned in Sect. 1. However, the details of the physical process responsible for this local heating have been lacking, whereas we can now suggest an explanation based on the B21 results. The trends that we find for the four structural -quantities (A, S , c2s , ρ) are robust below the convective bound- +quantities (A, S , c2 + +s , ρ) are robust below the convective bound- ary and in a large fraction of the radiative core, independently of the treatment of mixing and diffusion and of the method for con- structing the models in Sects. 3.2.1 and 3.2.2. Our experiments diff --git a/read/results/tika/2201.00201.txt b/read/results/tika/2201.00201.txt index 3da09c0..fc0a70e 100644 --- a/read/results/tika/2201.00201.txt +++ b/read/results/tika/2201.00201.txt @@ -18,27 +18,6 @@ - - - - - - - - - - - - - - - - - - - - - diff --git a/read/results/tika/2201.00214.txt b/read/results/tika/2201.00214.txt index 95a891e..759766b 100644 --- a/read/results/tika/2201.00214.txt +++ b/read/results/tika/2201.00214.txt @@ -18,24 +18,6 @@ - - - - - - - - - - - - - - - - - - @@ -97,10 +79,17 @@ Temperature Analysis of Flaring Coronal Loops -N. Fathalian1, S. S. Hosseini Rad2, N. Alipour2, H. Safari2 -1Department of Physics, Payame Noor University (PNU), 19395-3697, Tehran, Iran. +N. Fathalian +1, S. S. Hosseini Rad + +2, N. Alipour +2, H. Safari + +2 +1Department of Physics, Payame Noor University (PNU), 19395-3697, Tehran, Iran. 2Department of Physics, Faculty of Science, University of Zanjan, 45195-313, Zanjan, Iran. + e-mail: narges_fathalian@alum.sharif.edu January 4, 2022 @@ -304,6 +293,7 @@ has ten different wavelength channels, three in white light and UV, and the othe channels. Between these seven, the 304 filter, which is mostly sensitive to chromospheric temper- atures (in order of T = 104.7K), not the corona, is not taken into account (Aschwanden et al. 2015). + Therefore, we consider the images of the events in the six wavelengths (94, 131, 171, 193, 211, 335 ). These are covering the coronal temperature range from T ≈ 0.6 to T ≥ 16MK. @@ -441,7 +431,9 @@ dT [log (T)− log (Tp,i) -2σ2T,i +2σ2 +T,i + ). (1) In which, Tp,i is the DEM peak temperature, EMp,i is the peak EM function, and σT,i is the @@ -462,6 +454,7 @@ k EM(Tk)Rλ(Tk). (2) Here, Rλ(T) is the instrumental temperature response function of each wavelength filter λ, which + is obtained by the code aia_get_response.pro in the SSW package. As time has passed, the AIA response functions calibration has partly changed. Here, we use the updated calibration of the @@ -472,7 +465,8 @@ DEM to the background-subtracted observed fluxes in multiple wavelengths, the th rameters, temperature width (σT,i), peak of temperature (Tp,i), and peak emission measure (EMp,i) -are found by minimizing χ2i . +are found by minimizing χ2 +i . Our data sample is uneven because of omitting some damaged images in between. There- fore to analyze the temperature oscillations, we use the Lomb-Scargle method. This method is @@ -748,6 +742,7 @@ with the non-flaring ones. And figure 9 shows that the increasing and decreasing range, or the difference between maximum and minimum of the temperature value (max(log(T))- min(log(T))), is much higher on average for the loops’ strips of the flaring AR in comparison with + the loops’ strips of the non-flaring one. V. Summery @@ -854,6 +849,7 @@ region are also hot loops with the mean temperature above this range. They also oscillations. Hence we think the above evidence confirms the slow-mode oscillations for flaring loops. The temperature of the non-flaring loops are lower (log(T) < 6) and as discussed above, + we believe that the observed oscillation-like periods in non-flaring loops should be more probably related to the high amplitude fluctuations. @@ -1006,8 +1002,11 @@ view of the area, marked by a box in the left, the loops are distinguished in re 6.6 6.8 -Lo -gT +L +o + +g +T F−LoopA @@ -1019,8 +1018,11 @@ F−LoopA 6.6 6.8 -Lo -gT +L +o + +g +T 22:10 22:20 22:30 22:40 22:50 23:00 5.8 @@ -1033,8 +1035,11 @@ gT time -Lo -gT +L +o + +g +T @@ -1050,8 +1055,11 @@ gT 6.8 -Lo -gT +L +o + +g +T F−LoopB1 @@ -1071,8 +1079,11 @@ F−LoopB1 time -Lo -gT +L +o + +g +T Figure 3: From up to down: The time-series of the temperature oscillations for the first 3 strips of Loop A (strip 1 to @@ -1104,10 +1115,10 @@ op Le ng -th( -Mm +th +(M -) +m) 5.8 @@ -1195,10 +1206,10 @@ op Le ng -th( -Mm +th +(M -) +m) 5.8 @@ -1234,10 +1245,10 @@ op Le ng -th( -Mm +th +(M -) +m) 5.6 @@ -1550,13 +1561,13 @@ ali ze d I -nte +nt -ns -ity +en +sit - Fe - X +y F +e X VI II @@ -1592,8 +1603,11 @@ work. 6.8 -Lo -gT +L +o + +g +T NonF−LoopA @@ -1613,8 +1627,11 @@ NonF−LoopA time -Lo -gT +L +o + +g +T @@ -1630,8 +1647,11 @@ gT 6.8 -Lo -gT +L +o + +g +T NonF−LoopB @@ -1651,8 +1671,11 @@ NonF−LoopB time -Lo -gT +L +o + +g +T Figure 6: from top to down: The time-series of the temperature for the first 2 strips (from top to down) of the non- @@ -1676,16 +1699,19 @@ Time -Lo -op +L +o + +o +p L en -gt -h( +g +th -M +(M m ) @@ -1718,16 +1744,19 @@ Time -Lo -op +L +o + +o +p L en -gt -h( +g +th -M +(M m ) @@ -1760,16 +1789,19 @@ Time -Lo -op +L +o + +o +p L en -gt -h( +g +th -M +(M m ) @@ -1819,22 +1851,27 @@ P er ce -nt +n + +ta +g -ag e +o -of - T +f +T em -p. +p - P +. P er io -ds +d + +s Figure 8: Hisogram of the temperature periods percentages for the loops’ strips of the flaring (blue bars) and non- @@ -1858,10 +1895,12 @@ flaring (red bars) ARs. The horizontal axis shows the temperature periods in min max(log(T))−min(log(T)) N -um +u -be -r +m +b + +er Figure 9: Hisogram of the parameter of (max(log(T))-min(log(T))) for each strip of the loops of the flaring (blue bars) diff --git a/read/results/tika/GeoTopo-book.txt b/read/results/tika/GeoTopo-book.txt index 4f88346..f1a29e5 100644 --- a/read/results/tika/GeoTopo-book.txt +++ b/read/results/tika/GeoTopo-book.txt @@ -1,7753 +1 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Geometrie und Topologie - - -Einführung in die - -Geometrie und Topologie - -0. Auflage, 31. Dezember 2016 Martin Thoma - - - -Vorwort - -Dieses Skript wurde im Wintersemester 2013/2014 von Martin Thoma geschrieben. Es beinhaltet -die Mitschriften aus der Vorlesung von Prof. Dr. Herrlich sowie die Mitschriften einiger Übungen -und Tutorien. - -Das Skript ist kostenlos über martin-thoma.com/geotopo verfügbar. Wer es gerne in A5 (Schwarz- -Weiß, Ringbindung) für 10 Euro hätte, kann mir eine E-Mail schicken (info@martin-thoma.de). - -Danksagungen - -An dieser Stelle möchte ich Herrn Prof. Dr. Herrlich für einige Korrekturvorschläge und einen -gut strukturierten Tafelanschrieb danken, der als Vorlage für dieses Skript diente. Tatsächlich -basiert die Struktur dieses Skripts auf der Vorlesung von Herrn Prof. Dr. Herrlich und ganze -Abschnitte konnten direkt mit LATEX umgesetzt werden. Vielen Dank für die Erlaubnis, Ihre -Inhalte in diesem Skript einbauen zu dürfen! - -Vielen Dank auch an Frau Lenz und Frau Randecker, die es mir erlaubt haben, ihre Übungsauf- -gaben und Lösungen zu benutzen. - -Jérôme Urhausen hat durch viele Verbesserungsvorschläge und Beweise zu einer erheblichen -Qualitätssteigerung am Skript beigetragen und meine Tutorin Sarah hat mir viele Fragen per -E-Mail und nach dem Tutorium beantwortet. Danke! - -Was ist Topologie? - -Die Kugeloberfläche S2 lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche -oder der Oberfläche einer Pyramide verformen, aber nicht zum R2 oder zu einem Torus T 2. Für -den R2 müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein -Loch machen. - -Erforderliche Vorkenntnisse - -Es wird ein sicherer Umgang mit den Quantoren (∀, ∃), Mengenschreibweisen (∪,∩, \, ∅,R,P(M)) -und ganz allgemein formaler Schreibweise vorausgesetzt. Auch die Beweisführung mittels Wider- -spruchsbeweisen sollte bekannt sein und der Umgang mit komplexen Zahlen C, deren Betrag, -Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werden vor allem -in „Analysis I“ vermittelt. - -Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit, -der Spektralsatz und der projektive Raum P(R) aus „Lineare Algebra I“ bekannt sind. In „Lineare -Algebra II“ wird der Begriff der Orthonormalbasis eingeführt. - -http://martin-thoma.com/geotopo/ - - -iii - -(a) S2 (b) Würfel (c) Pyramide - -y - -x - -(d) R2 (e) T 2 - -Abbildung 0.1: Beispiele für verschiedene Formen - -Obwohl es nicht vorausgesetzt wird, könnte es von Vorteil sein „Einführung in die Algebra und -Zahlentheorie“ gehört zu haben. - - - -Inhaltsverzeichnis - -1 Topologische Grundbegriffe 2 -1.1 Topologische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2 -1.2 Metrische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6 -1.3 Stetigkeit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9 -1.4 Zusammenhang . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 -1.5 Kompaktheit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14 -1.6 Wege und Knoten . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17 -Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22 - -2 Mannigfaltigkeiten und Simplizialkomplexe 24 -2.1 Topologische Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . . . 24 -2.2 Differenzierbare Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . 29 -2.3 Simplizialkomplex . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34 -Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 43 - -3 Fundamentalgruppe und Überlagerungen 44 -3.1 Homotopie von Wegen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 44 -3.2 Fundamentalgruppe . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 47 -3.3 Überlagerungen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 51 -3.4 Gruppenoperationen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 61 - -4 Euklidische und nichteuklidische Geometrie 64 -4.1 Axiome für die euklidische Ebene . . . . . . . . . . . . . . . . . . . . . . . . . . . 64 -4.2 Weitere Eigenschaften einer euklidischen Ebene . . . . . . . . . . . . . . . . . . . 74 - -4.2.1 Flächeninhalt . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 74 -4.3 Hyperbolische Geometrie . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 77 -Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 86 - -5 Krümmung 87 -5.1 Krümmung von Kurven . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87 -5.2 Tangentialebene . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 89 -5.3 Gauß-Krümmung . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 -5.4 Erste und zweite Fundamentalform . . . . . . . . . . . . . . . . . . . . . . . . . . 94 - -Lösungen der Übungsaufgaben 99 - -Bildquellen 105 - -Abkürzungsverzeichnis 106 - -Ergänzende Definitionen und Sätze 107 - -Symbolverzeichnis 108 - - - -2 Inhaltsverzeichnis - -Stichwortverzeichnis 111 - - - -1 Topologische Grundbegriffe - -1.1 Topologische Räume - -Definition 1 -Ein topologischer Raum ist ein Paar (X,T) bestehend aus einer Menge X und T ⊆ P(X) -mit folgenden Eigenschaften - -(i) ∅, X ∈ T -(ii) Sind U1, U2 ∈ T, so ist U1 ∩ U2 ∈ T - -(iii) Ist I eine Menge und Ui ∈ T für jedes i ∈ I, so ist -⋃ -i∈I - -Ui ∈ T - -Die Elemente von T heißen offene Teilmengen von X. - -A ⊆ X heißt abgeschlossen, wenn X \A offen ist. - -Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0, 1). Auch gibt es -Mengen, die sowohl abgeschlossen als auch offen sind. - -Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.) -Betrachte ∅ und X mit der trivialen Topologie Ttriv = { ∅, X }. -Es gilt: X ∈ T und ∅ ∈ T, d. h. X und ∅ sind offen. Außerdem XC = X \X = ∅ ∈ T und -X \ ∅ = X ∈ T, d. h. X und ∅ sind als Komplement offener Mengen abgeschlossen. � - -Beispiel 1 (Topologien) -1) X = Rn mit der von der euklidischen Metrik erzeugten Topologie TEuklid: - -U ⊆ Rn offen⇔ für jedes x ∈ U gibt es r > 0, -sodass Br(x) = { y ∈ Rn | d(x, y) < r } ⊆ U - -Diese Topologie wird auch „Standardtopologie des Rn“ genannt. Sie beinhaltet unter -anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedli- -chem Mittelpunkt (vgl. Definition 1.ii). - -2) Jeder metrische Raum (X, d) ist auch ein topologischer Raum. - -3) Für eine Menge X heißt TDiskret = P(X) diskrete Topologie. - -4) X := R,TZ := { U ⊆ R | R \ U endlich } ∪ { ∅ } heißt Zariski-Topologie -Beobachtungen: - -• U ∈ TZ ⇔ ∃f ∈ R[X], sodass R \ U = V (f) = { x ∈ R | f(x) = 0 } -• Es gibt keine disjunkten offenen Mengen in TZ . - - - -4 1.1. TOPOLOGISCHE RÄUME - -5) X := Rn,TZ = {U ⊆ Rn|Es gibt Polynome f1, . . . , fr ∈ R[X1, . . . , Xn] sodass -Rn \ U = V (f1, . . . , fr)} - -6) X := { 0, 1 } ,T = { ∅, { 0, 1 } , { 0 } } heißt Sierpińskiraum. -∅, { 0, 1 } , { 1 } sind dort alle abgeschlossenen Mengen. - -Definition 2 -Sei (X,T) ein topologischer Raum und x ∈ X. -Eine Teilmenge U ⊆ X heißt Umgebung von x, wenn es ein U0 ∈ T gibt mit x ∈ U0 und -U0 ⊆ U . - -Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokal gilt. - -Definition 3 -Sei (X,T) ein topologischer Raum und M ⊆ X eine Teilmenge. - -a) M◦ := { x ∈M |M ist Umgebung von x } = -⋃ -U⊆M -U∈T - -U heißt Inneres oder offener - -Kern von M . - -b) M := -⋂ -M⊆A - -A abgeschlossen - -A heißt abgeschlossene Hülle oder Abschluss von M . - -c) ∂M := M \M◦ heißt Rand von M . - -d) M heißt dicht in X, wenn M = X ist. - -Beispiel 2 -1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M◦ = ∅ - -2) Sei X = R und M = (a, b). Dann gilt: M = [a, b] - -3) Sei X = R,T = TZ und M = (a, b). Dann gilt: M = R - -Definition 4 -Sei (X,T) ein topologischer Raum. - -a) B ⊆ T heißt Basis der Topologie T, wenn jedes U ∈ T Vereinigung von Elementen -aus B ist. - -b) S ⊆ T heißt Subbasis der Topologie T, wenn jedes U ∈ T Vereinigung von endlichen -Durchschnitten von Elementen aus S ist. - -Beispiel 3 (Basis und Subbasis) -1) Jede Basis ist auch eine Subbasis, z.B. - -S = { (a, b) | a, b ∈ R, a < b } ist für R mit der Standardtopologie sowohl Basis als -auch Subbasis. - -2) Gegeben sei X = Rn mit euklidischer Topologie T. Dann ist - -B = {Br(x) | r ∈ Q>0, x ∈ Qn } - -ist eine abzählbare Basis von T. - -3) Sei (X,T) ein topologischer RaummitX = { 0, 1, 2 } und T = { ∅, { 0 } , { 0, 1 } , { 0, 2 } , X }. -Dann ist S = { ∅, { 0, 1 } , { 0, 2 } } eine Subbasis von T, da gilt: - - - -5 1.1. TOPOLOGISCHE RÄUME - -• S ⊆ T -• ∅, { 0, 1 } und { 0, 2 } ∈ S -• { 0 } = { 0, 1 } ∩ { 0, 2 } -• X = { 0, 1 } ∪ { 0, 2 } - -Allerings ist S keine Basis von (X,T), da { 0 } nicht als Vereinigung von Elementen -aus S erzeugt werden kann. - -Bemerkung 2 -Sei X eine Menge und S ⊆ P(X). Dann gibt es genau eine Topologie T auf X, für die S -Subbasis ist. - -Definition 5 -Sei (X,T) ein topologischer Raum und Y ⊆ X. -TY := { U ∩ Y | U ∈ T } ist eine Topologie auf Y . -TY heißt Teilraumtopologie und (Y,TY ) heißt ein Teilraum von (X,T). - -Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt. - -Definition 6 -Seien X1, X2 topologische Räume. -U ⊆ X1 × X2 sei offen, wenn es zu jedem x = (x1, x2) ∈ U Umgebungen Ui um xi mit -i = 1, 2 gibt, sodass U1 × U2 ⊆ U gilt. -T = { U ⊆ X1 ×X2 | U offen } ist eine Topologie auf X1×X2. Sie heißt Produkttopologie. -B = { U1 × U2 | Ui offen in Xi, i = 1, 2 } ist eine Basis von T. - -U - -xx2 - -x1 - -U2 - -U1 - -X1 - -X2 - -Abbildung 1.1: Zu x = (x1, x2) gibt es Umgebungen U1, U2 mit U1 × U2 ⊆ U - -Beispiel 4 (Produkttopologien) -1) X1 = X2 = R mit euklidischer Topologie. -⇒ Die Produkttopologie auf R× R = R2 stimmt mit der euklidischen Topologie auf -R2 überein. - -2) X1 = X2 = R mit Zariski-Topologie. T Produkttopologie auf R2: U1 × U2 -(Siehe Abbildung 1.2) - - - -6 1.1. TOPOLOGISCHE RÄUME - -U1 = R \ N - -U -2 - -= -R -\ -N - -Abbildung 1.2: Zariski-Topologie auf R2 - -Definition 7 -Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ sei die Menge -der Äquivalenzklassen, π : X → X, x 7→ [x]∼. - -TX := -{ -U ⊆ X - -∣∣ π−1(U) ∈ TX } -(X,TX) heißt Quotiententopologie. - -Beispiel 5 -X = R, a ∼ b :⇔ a− b ∈ Z - -R-1 0 1 2 3 4 5 - -0 - -a - -U - -aπ -−1(u) - -0 ∼ 1, d. h. [0] = [1] -Beispiel 6 - -Sei X = R2 und (x1, y1) ∼ (x2, y2)⇔ x1− x2 ∈ Z und y1− y2 ∈ Z. Dann ist X/∼ ein Torus. - -Beispiel 7 (Projektiver Raum) - -X = Rn+1 \ { 0 } , x ∼ y ⇔ ∃λ ∈ R× mit y = λx -⇔ x und y liegen auf der gleichen -Ursprungsgerade - -X = Pn(R) - - - -7 1.2. METRISCHE RÄUME - -Also für n = 1: - -−4 −2 2 4 6 8 - -−4 - -−2 - -2 - -4 - -1.2 Metrische Räume - -Definition 8 -Sei X eine Menge. Eine Abbildung d : X ×X → R+0 heißt Metrik, wenn gilt: -(i) Definitheit: d(x, y) = 0⇔ x = y ∀x, y ∈ X -(ii) Symmetrie: d(x, y) = d(y, x) ∀x, y ∈ X -(iii) Dreiecksungleichung: d(x, z) ≤ d(x, y) + d(y, z) ∀x, y, z ∈ X -Das Paar (X, d) heißt ein metrischer Raum. - -Bemerkung 3 -Sei (X, d) ein metrischer Raum und - -Br(x) := { y ∈ X | d(x, y) < r } für x ∈ X, r ∈ R+ - -B = {Br(x) ⊆ P(X) | x ∈ X, r ∈ R+ } ist Basis einer Topologie auf X. -Definition 9 - -Seien (X, dX) und (Y, dY ) metrische Räume und ϕ : X → Y eine Abbildung mit - -∀x1, x2 ∈ X : dX(x1, x2) = dY (ϕ(x1), ϕ(x2)) - -Dann heißt ϕ eine Isometrie von X nach Y . - -Beispiel 8 (Skalarprodukt erzeugt Metrik) -Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt 〈·, ·〉. Dann wird V -durch d(x, y) := - -√ -〈x− y, x− y〉 zum metrischen Raum. - -Beispiel 9 (diskrete Metrik) -Sei X eine Menge. Dann heißt - -d(x, y) = - -{ -0 falls x = y -1 falls x 6= y - -die diskrete Metrik. Die Metrik d induziert die diskrete Topologie. - - - -8 1.2. METRISCHE RÄUME - -Beispiel 10 -X = R2 und d ((x1, y1), (x2, y2)) := max(‖x1 − x2‖, ‖y1 − y2‖) ist Metrik. -Beobachtung: d erzeugt die euklidische Topologie. - -Br(0) = - -r r - -r - -r - -(a) Br(0) (b) Euklidische Topologie - -Abbildung 1.3: Veranschaulichungen zur Metrik d aus Beispiel 10 - - - -9 1.2. METRISCHE RÄUME - -Beispiel 11 (SNCF-Metrik1) -X = R2 - -−4 −2 2 4 6 8 - -−4 - -−2 - -2 - -4 - -Definition 10 -Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x 6= y in X -Umgebungen Ux um x und Uy um y gibt, sodass Ux ∩ Uy = ∅. - -Bemerkung 4 (Trennungseigenschaft) -Metrische Räume sind hausdorffsch, wegen - -d(x, y) > 0⇒ ∃ε > 0 : Bε(x) ∩Bε(y) = ∅ - -Beispiel 12 (Topologische Räume und Hausdorff-Räume) -1) (R,TZ) ist ein topologischer Raum, der nicht hausdorffsch ist. - -2) (R,TEuklid) ist ein topologischer Hausdorff-Raum. - -Bemerkung 5 (Eigenschaften von Hausdorff-Räumen) -Seien X,X1, X2 Hausdorff-Räume. - -a) Jeder Teilraum von X ist hausdorffsch. - -b) X1 ×X2 ist hausdorffsch (vgl. Abbildung 1.4). -Definition 11 - -Sei X ein topologischer Raum und (x)n∈N eine Folge in X. x ∈ X heißt Grenzwert oder -Limes von (xn), wenn es für jede Umgebung U von x ein n0 gibt, sodass xn ∈ U für alle -n ≥ n0. - -Bemerkung 6 -Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert. - -Beweis: Sei (xn) eine konvergierende Folge und x und y Grenzwerte der Folge. - -Da X hausdorffsch ist, gibt es Umgebungen Ux von x und Uy von y mit Ux ∩ Uy = ∅ falls -x 6= y. Da (xn) gegen x und y konvergiert, existiert ein n0 mit xn ∈ Ux ∩ Uy für alle n ≥ n0 -⇒ x = y � - -1Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt. - -https://de.wikipedia.org/wiki/Franz%C3%B6sische_Eisenbahnmetrik - - -10 1.3. STETIGKEIT - -(x1, y1) (x2, y2) - -x1 x2 -U1 ×X2 U2 ×X2 - -X1 - -X2 - -Abbildung 1.4: Wenn X1, X2 hausdorffsch sind, dann auch X1 ×X2 - -1.3 Stetigkeit - -Definition 12 -Seien (X,TX), (Y,TY ) topologische Räume und f : X → Y eine Abbildung. - -a) f heißt stetig :⇔ ∀U ∈ TY : f−1(U) ∈ TX . - -b) f heißt Homöomorphismus, wenn f stetig ist und es eine stetige Abbildung g : -Y → X gibt, sodass g ◦ f = idX und f ◦ g = idY . - -Bemerkung 72 - -Seien X,Y metrische Räume und f : X → Y eine Abbildung. -Dann gilt: f ist stetig ⇔ zu jedem x ∈ X und jedem ε > 0 gibt es δ(x, ε) > 0, sodass für -alle y ∈ X mit d(x, y) < δ gilt dY (f(x), f(y)) < ε. - -Beweis: „⇒“: Sei x ∈ X, ε > 0 gegeben und U := Bε(f(x)). -Dann ist U offen in Y . -Def. 12.a -=====⇒ f−1(U) ist offen in X. Dann ist x ∈ f−1(U). -⇒ ∃δ > 0, sodass Bδ(x) ⊆ f−1(U) -⇒ f(Bδ(x)) ⊆ U -⇒ { y ∈ X | dX(x, y) < δ } ⇒ Beh. -„⇐“: Sei U ⊆ Y offen, X ∈ f−1(U). -Dann gibt es ε > 0, sodass Bε(f(x)) ⊆ U -Vor. -==⇒ Es gibt δ > 0, sodass f(Bδ(x)) ⊆ Bε(f(x))) -⇒ Bδ(x) ⊆ f−1(Bε(f(x))) ⊆ f−1(U) � - -Bemerkung 8 -Seien X,Y topologische Räume und f : X → Y eine Abbildung. Dann gilt: -f ist stetig -⇔ für jede abgeschlossene Teilmenge A ⊆ Y gilt : f−1(A) ⊆ X ist abgeschlossen. - -Beispiel 13 (Stetige Abbildungen und Homöomorphismen) -1) Für jeden topologischen Raum X gilt: idX : X → X ist Homöomorphismus. - -2Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt. - - - -11 1.3. STETIGKEIT - -2) Ist (Y,TY ) trivialer topologischer Raum, d. h. TY = Ttriv, so ist jede Abbildung -f : X → Y stetig. - -3) Ist X diskreter topologischer Raum, so ist f : X → Y stetig für jeden topologischen -Raum Y und jede Abbildung f . - -4) Sei X = [0, 1), Y = S1 = { z ∈ C | ‖z‖ = 1 } und f(t) = e2πit. - -R0 1 -0 - -f - -g - -Abbildung 1.5: Beispiel einer stetigen Funktion f , deren Umkehrabbildung g nicht stetig ist. - -Die Umkehrabbildung g ist nicht stetig, da g−1(U) nicht offen ist (vgl. Abbildung 1.5). - -Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig) -Seien X,Y, Z topologische Räume, f : X → Y und g : Y → Z stetige Abbildungen. -Dann ist g ◦ f : X → Z stetig. - -X -f // - -g◦f - -Y - -g�� -Z - -Beweis: Sei U ⊆ Z offen ⇒ (g ◦ f)−1(U) = f−1(g−1(U)). g−1(U) ist offen in Y weil g stetig -ist, f−1(g−1(U)) ist offen in X, weil f stetig ist. � - -Bemerkung 10 -a) Für jeden topologischen Raum X ist - -Homöo(X) := { f : X → X | f ist Homöomorphismus } - -eine Gruppe. - -b) Jede Isometrie f : X → Y zwischen metrischen Räumen ist ein Homöomorphismus. - -c) Iso(X) := { f : X → X | f ist Isometrie } ist eine Untergruppe von Homöo(X) für -jeden metrischen Raum X. - -Bemerkung 11 (Projektionen sind stetig) -Seien X,Y topologische Räume. πX : X × Y → X und πY : X × Y → Y die Projektionen - -πX : (x, y) 7→ x und πY : (x, y) 7→ y - -Wird X × Y mit der Produkttopologie versehen, so sind πX und πY stetig. - -Beweis: Sei U ⊆ X offen -⇒ π−1X (U) = U × Y ist offen in X × Y . � - -Bemerkung 12 -Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X, X = X/∼ der Bahnenraum -versehen mit der Quotiententopologie, π : X → X, x 7→ [x]∼. -Dann ist π stetig. - - - -12 1.4. ZUSAMMENHANG - -Beweis: Nach Definition ist U ⊆ X offen ⇔ π−1(U) ⊆ X offen. � - -Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird. - -Beispiel 14 (Stereographische Projektion) -Rn und Sn \ {N } sind homöomorph für beliebiges N ∈ Sn. Es gilt: - -Sn = -{ -x ∈ Rn+1 - -∣∣ ‖x‖ = 1 } -= - -{ -x ∈ Rn+1 - -∣∣∣∣∣ -n+1∑ -i=1 - -x2i = 1 - -} - -O. B. d. A. sei N = - - -0 -... -0 -1 - -. Die Gerade durch N und P schneidet die Ebene H in genau -einem Punkt P̂ . P wird auf P̂ abgebildet. - -f :Sn \ {N } → Rn - -P 7→ -genau ein Punkt︷ ︸︸ ︷ -LP ∩H - -wobei Rn = H = - - - x1... -xn+1 - - ∈ Rn+1 -∣∣∣∣∣∣∣ xn+1 = 0 - - und LP die Gerade in Rn+1 durch N -und P ist. - -Sei P = - - x1... -xn+1 - -, so ist xn+1 < 1, also ist LP nicht parallel zu H. Also schneiden sich LP -und H in genau einem Punkt P̂ . - -Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig. - -1.4 Zusammenhang - -Definition 13 - -a) Ein RaumX heißt zusammenhängend, wenn es keine offenen, nichtleeren Teilmengen -U1, U2 von X gibt mit U1 ∩ U2 = ∅ und U1 ∪ U2 = X. - -b) Eine Teilmenge Y ⊆ X heißt zusammenhängend, wenn Y als topologischer Raum mit -der Teilraumtopologie zusammenhängend ist. - - - -13 1.4. ZUSAMMENHANG - -x - -y - -z - -N - -P̂ - -0 - -P - -Abbildung 1.6: Visualisierung der stereographischen Projektion - -Bemerkung 13 -X ist zusammenhängend ⇔ Es gibt keine abgeschlossenen, nichtleeren Teilmengen A1, A2 -mit A1 ∩A2 = ∅ und A1 ∪A2 = X. - -Beispiel 15 (Zusammenhang von Räumen) -1) (Rn,TEuklid) ist zusammenhängend, denn: - -Annahme: Rn = U1 ∪̇ U2 mit ∅ 6= U1, U2 ∈ TEuklid existieren. -Sei x ∈ U1, y ∈ U2 und [x, y] die Strecke zwischen x und y. Sei V = [x, y]. Nun -betrachten wir V ( Rn als (metrischen) Teilraum mit der Teilraumtopologie TV . -Somit gilt U1 ∩ [x, y] ∈ TV wegen der Definition der Teilraumtopologie. -Dann gibt es z ∈ [x, y] mit z ∈ ∂(U1 ∩ [x, y]), aber z /∈ U1 ⇒ z ∈ U2. In jeder -Umgebung von z liegt ein Punkt von U1 ⇒ Widerspruch zu U2 offen. - -2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R<0 ∪ R>0 -3) R2 \ { 0 } ist zusammenhängend. -4) Q ( R ist nicht zusammenhängend, da (Q ∩ R<√2) ∪ (Q ∩ R>√2) = Q -5) { x } ist zusammenhängend für jedes x ∈ X, wobei X ein topologischer Raum ist. - -6) R mit Zariski-Topologie ist zusammenhängend. - -Bemerkung 14 -Sei X ein topologischer Raum und A ⊆ X zusammenhängend. Dann ist auch A zusammen- -hängend. - - - -14 1.4. ZUSAMMENHANG - -Beweis: durch Widerspruch -Annahme: A = A1 ∪A2, Ai abgeschlossen, Ai 6= ∅, A1 ∩A2 = ∅ - -⇒ A = (A ∩A1)︸ ︷︷ ︸ -abgeschlossen - -∪̇ (A ∩A2)︸ ︷︷ ︸ -abgeschlossen︸ ︷︷ ︸ - -disjunkt - -Wäre A ∩A1 = ∅ -⇒ A ⊆ A = A1 ∪̇A2 -⇒ A ⊆ A2 ⇒ A ⊆ A2 -⇒ A1 = ∅ -⇒ Widerspruch zu A1 6= ∅ -⇒ A ∩A1 6= ∅ und analog A ∩A2 6= ∅ -⇒ Widerspruch zu A ist zusammenhängend. � - -Bemerkung 15 -Sei X ein topologischer Raum und A,B ⊆ X zusammenhängend. -Ist A ∩B 6= ∅, dann ist A ∪B zusammenhängend. - -Beweis: Sei A ∪B = U1 ∪̇ U2, Ui 6= ∅ offen -o. B. d. A. -======⇒ A = (A ∩ U1) ∪̇ (A ∩ U2) offen -A zhgd. -====⇒ A ∩ U1 = ∅ -A∩B 6=∅ -====⇒ U1 ⊆ B -B = (B ∩ U1)︸ ︷︷ ︸ - -=U1 - -∪ (B ∩ U2)︸ ︷︷ ︸ -=∅ - -ist unerlaubte Zerlegung. - -� - -Definition 14 -Sei X ein topologischer Raum. - -Für x ∈ X sei Z(x) ⊆ X definiert durch - -Z(x) := -⋃ - -A⊆Xzhgd. -x∈A - -A - -Z(x) heißt Zusammenhangskomponente. - -Bemerkung 16 (Eigenschaften von Zusammenhangskomponenten) -Sei X ein topologischer Raum. Dann gilt: - -a) Z(x) ist die größte zusammenhängende Teilmenge von X, die x enthält. - -b) Z(x) ist abgeschlossen. - -c) X ist disjunkte Vereinigung von Zusammenhangskomponenten. - -Beweis: - - - -15 1.5. KOMPAKTHEIT - -a) Sei Z(x) = A1 ∪̇A2 mit Ai 6= ∅ abgeschlossen. -O. B. d. A. sei x ∈ A1 und y ∈ A2. y liegt in einer zusammehängenden Teilmenge A, -die auch x enthält. ⇒ A = (A ∩A1)︸ ︷︷ ︸ - -3x - -∪ (A ∩A2)︸ ︷︷ ︸ -3y - -ist unerlaubte Zerlegung. - -b) Nach Bemerkung 14 ist Z(x) zusammenhängend ⇒ Z(x) ⊆ Z(x) ⇒ Z(x) = Z(x) - -c) Ist Z(y) ∩ Z(x) 6= ∅ Bem. 15=====⇒ Z(y) ∪ Z(x) ist zusammenhängend. - -⇒ Z(x) ∪ Z(y) ⊆ Z(x)⇒ Z(y) ⊆ Z(x) -⊆ Z(y)⇒ Z(x) ⊆ Z(y) - -� - -Bemerkung 17 -Sei f : X → Y stetig. Ist A ⊆ X zusammenhängend, so ist f(A) ⊆ Y zusammenhängend. - -Beweis: Sei f(A) = U1 ∪ U2, Ui 6= ∅, offen, disjunkt. -⇒ f−1(f(A)) = f−1(U1) ∪ f−1(U2) -⇒ A = (A ∩ f−1(U1))︸ ︷︷ ︸ - -6=∅ - -∪ (A ∩ f−1(U2))︸ ︷︷ ︸ -6=∅ - -� - -1.5 Kompaktheit - -Definition 15 -Sei X eine Menge und U ⊆ P(X). - -U heißt eine Überdeckung von X, wenn gilt: - -∀x ∈ X : ∃M ∈ U : x ∈M -Definition 16 - -Ein topologischer Raum X heißt kompakt, wenn jede offene Überdeckung von X - -U = { Ui }i∈I mit Ui offen in X - -eine endliche Teilüberdeckung ⋃ -i∈J⊆I - -Ui = X mit |J | ∈ N - -besitzt. - -Bemerkung 18 -Das Einheitsintervall I := [0, 1] ist kompakt bezüglich der euklidischen Topologie. - -Beweis: Sei (Ui)i∈J eine offene Überdeckung von I. - -Es genügt zu zeigen, dass es ein δ > 0 gibt, sodass jedes Teilintervall der Länge δ von I in -einem der Ui enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich viele Intervalle - - - -16 1.5. KOMPAKTHEIT - -der Länge δ unterteilen und alle Ui in die endliche Überdeckung aufnehmen, die Teilintervalle -enthalten. - -Angenommen, es gibt kein solches δ. Dann gibt es für jedes n ∈ N ein Intervall In ⊆ [0, 1] -der Länge 1/n sodass In ( Ui für alle i ∈ J . -Sei xn der Mittelpunkt von In. Die Folge (xn) hat einen Häufungspunkt x ∈ [0, 1]. Dann -gibt es i ∈ J mit x ∈ Ui. Da Ui offen ist, gibt es ein ε > 0, sodass (x − ε, x + ε) ⊆ Ui. -Dann gibt es n0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n ≥ n0 : |x− xn| < ε/2, also -In ⊆ (x− ε, x+ ε) ⊆ Ui für mindestens ein n ∈ N.4 - -⇒ Widerspruch -Dann überdecke [0, 1] mit endlich vielen Intervallen I1, . . . , Id der Länge δ. Jedes Ij ist in -Uij enthalten. - -⇒ Uj1 , . . . , Ujd ist endliche Teilüberdeckung von U . � -Beispiel 16 (Kompakte Räume) - -1) R ist nicht kompakt. - -2) (0, 1) ist nicht kompakt. -Un = (1/n, 1− 1/n)⇒ - -⋃ -n∈N Un = (0, 1) - -3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch. -Bemerkung 19 - -Sei X kompakter Raum, A ⊆ X abgeschlossen. Dann ist A kompakt. - -Beweis: Sei (Vi)i∈I offene Überdeckung von A. -Dann gibt es für jedes i ∈ I eine offene Teilmenge Ui ⊆ X mit Vi = Ui ∩A. - -⇒ A ⊆ -⋃ -i∈I - -Ui - -⇒ U = { Ui | i ∈ I } ∪ {X \A } ist offene Überdeckung von X -X kompakt -=======⇒ es gibt i1, . . . , in ∈ I, sodass - -n⋃ -j=1 - -Uij ∪ (X \A) = X - -⇒ - - n⋃ -j=1 - -Uij ∪ (X \A) - - ∩A = A -⇒ - -n⋃ -j=1 - -(Uij ∩A)︸ ︷︷ ︸ -=Vij - -∪ ((X \A) ∩A)︸ ︷︷ ︸ -=∅ - -= A - -⇒ Vi1 , . . . , Vin überdecken A. - -� - -Bemerkung 20 -Seien X,Y kompakte topologische Räume. Dann ist X × Y mit der Produkttopologie -kompakt. - -Beweis: Sei (Wi)i∈I eine offene Überdeckung von X × Y . Für jedes (x, y) ∈ X × Y gibt es -offene Teilmengen Ux,y von X und Vx,y von Y sowie ein i ∈ I, sodass Ux,y × Vx,y ⊆Wi. - -3Dies gilt nicht für alle n ≥ n0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. -4Sogar für unendlich viele. - - - -17 1.5. KOMPAKTHEIT - -Wi - -xy - -x - -Vx,y - -Ux,y - -Y - -X - -Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen - -Die offenen Mengen Ux0,y × Vx0,y für festes x0 und alle y ∈ Y überdecken { x0 } × y. Da Y -kompakt ist, ist auch { x0 } × Y kompakt. Also gibt es y1, . . . , ym(x0) mit - -⋃m(x0) -i=1 Ux0,yi × - -Vx0,yi ⊇ { x0 } × Y . - -Sei Ux0 := -⋂m(x) -i=1 Ux0,yi . Da X kompakt ist, gibt es x1, . . . , xn ∈ X mit - -⋃n -j=1 Uxj = X - -⇒ ⋃kj=1⋃m(xj)i=1 (Uxj ,yi × Vxj ,yi)︸ ︷︷ ︸ -Ein grün-oranges Kästchen - -⊇ X × Y - -⇒ ⋃j ⋃iWi(xj , yi) = X × Y � -Bemerkung 21 - -Sei X ein Hausdorffraum und K ⊆ X kompakt. Dann ist K abgeschlossen. - -Beweis: z. Z.: Komplement ist offen - -Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y ∈ X \K. Für jedes x ∈ K seien -Ux bzw. Vy Umgebungen von x bzw. von y, sodass Ux ∩ Vy = ∅. - -Xi - -K - -x - -y - -Da K kompakt ist, gibt es endlich viele x1, . . . , xn ∈ K, sodass -⋃m -i=1 Uxi ⊇ K. - -Sei V := -n⋂ -i=1 - -Vxi - - - -18 1.6. WEGE UND KNOTEN - -⇒ V ∩ -( - -n⋃ -i=1 - -Uxi - -) -= ∅ - -⇒ V ∩K = ∅ -⇒ V ist Überdeckung von y, die ganz in X \K enthalten ist. -⇒ X \K ist offen - -Damit ist K abgeschlossen. � - -Bemerkung 22 -Seien X,Y topologische Räume, f : X → Y stetig. -Ist K ⊆ X kompakt, so ist f(K) ⊆ Y kompakt. - -Beweis: Sei (Vi)i∈I offene Überdeckung von f(K) -f stetig -====⇒ (f−1(Vi))i∈I ist offene Überdeckung von K -Kompakt -=====⇒ es gibt i1, . . . , in, sodass f−1(Vi1), . . . , f−1(Vin) Überdeckung von K ist. -⇒ f(f−1(Vi1)), . . . , f(f−1(Vin)) überdecken f(K). -Es gilt: f(f−1(V )) = V ∩ f(X) � - -Satz 1.1 (Heine-Borel) -Eine Teilmenge von Rn oder Cn ist genau dann kompakt, wenn sie beschränkt und -abgeschlossen ist. - -Beweis: „⇒“: Sei K ⊆ Rn (oder Cn) kompakt. -Da Rn und Cn hausdorffsch sind, ist K nach Bemerkung 21 abgeschlossen. Nach Vorausset- -zung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ K ist -beschränkt. - -„⇐“ Sei A ⊆ Rn (oder Cn) beschränkt und abgeschlossen. - -Dann gibt es einen Würfel W = [−N,N ]× · · · × [−N,N ]︸ ︷︷ ︸ -n mal - -mit A ⊆W bzw. „Polyzylinder“ - -Z = { (z1, . . . , zn) ∈ Cn | zi ≤ N für i = 1, . . . , n } - -Nach Bemerkung 20 und Bemerkung 18 ist W kompakt, also ist A nach Bemerkung 19 auch -kompakt. Genauso ist Z kompakt, weil - -{ z ∈ C ‖ z| ≤ 1 } - -homöomorph zu { -(x, y) ∈ R2 - -∣∣ ‖(x, y)‖ ≤ 1 } -ist. � - -1.6 Wege und Knoten - -Definition 17 -Sei X ein topologischer Raum. - - - -19 1.6. WEGE UND KNOTEN - -a) Ein Weg in X ist eine stetige Abbildung γ : [0, 1]→ X. -b) γ heißt geschlossen, wenn γ(1) = γ(0) gilt. - -c) γ heißt einfach, wenn γ|[0,1) injektiv ist. - -Beispiel 17 -Ist X diskret, so ist jeder Weg konstant, d. h. von der Form - -∀x ∈ [0, 1] : γ(x) = c, c ∈ X - -Denn γ([0, 1]) ist zusammenhängend für jeden Weg γ. - -Definition 18 -Ein topologischer Raum X heißt wegzusammenhängend, wenn es zu je zwei Punkten -x, y ∈ X einen Weg γ : [0, 1]→ X gibt mit γ(0) = x und γ(1) = y. - -Bemerkung 23 -Sei X ein topologischer Raum. - -a) X ist wegzusammenhängend ⇒ X ist zusammenhängend -b) X ist wegzusammenhängend 6⇐ X ist zusammenhängend - -Beweis: - -a) Sei X ein wegzusammenhängender topologischer Raum, A1, A2 nichtleere, disjunkte, -abgeschlossene Teilmengen von X mit A1 ∪A2 = X. Sei x ∈ A1, y ∈ A2, γ : [0, 1]→ X -ein Weg von x nach y. - -Dann ist C := γ([0, 1]) ⊆ X zusammenhängend, weil γ stetig ist. - -C = (C ∩A1)︸ ︷︷ ︸ -3x - -∪ (C ∩A2)︸ ︷︷ ︸ -3y - -ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒ Widerspruch - -b) Sei X = -{ - -(x, y) ∈ R2 -∣∣∣ x2 + y2 = 1 ∨ y = 1 + 2 · e− 110x }. - -Abbildung 1.8a veranschaulicht diesen Raum. - -Sei U1 ∪ U2 = X,U1 6= U2 = ∅, Ui offen. X = C ∪ S. Dann ist C ⊆ U1 oder C ⊆ U2, -weil C und S zusammenhängend sind. - -Also ist C = U1 und S = U2 (oder umgekehrt). - -Sei y ∈ C = U1, ε > 0 und Bε(y) ⊆ U1 eine Umgebung von y, die in U1 enthalten ist. -Aber: Bε(y) ∩ S 6= ∅ ⇒ Widerspruch ⇒ X ∪ S ist zusammenhängend, aber nicht -wegzusammenhängend. � - -Beispiel 18 (Hilbert-Kurve) -Es gibt stetige, surjektive Abbildungen [0, 1]→ [0, 1]× [0, 1]. Ein Beispiel ist die in Abbil- -dung 1.9 dargestellte Hilbert-Kurve. - -Definition 19 -Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ : -[0, 1]→ C ⊆ X bzw. γ : S1 → C ⊆ X, wobei C := Bild γ. - - - -20 1.6. WEGE UND KNOTEN - -(a) Spirale S mit Kreis C - -0.1 1 - -−1 - -0 - -1 - -X - -Y - -{(x, sin( 1x)) ∈ X × Y } -(−1, 1) ⊆ Y - -(b) Sinus - -Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend -sind. - -(a) n = 1 (b) n = 2 (c) n = 3 (d) n = 4 (e) n = 5 - -Abbildung 1.9: Hilbert-Kurve - -Jede Jordankurve ist also ein einfacher Weg. - -Satz 1.2 (Jordanscher Kurvensatz) -Ist C = γ([0, 1]) eine geschlossene Jordankurve in R2, so hat R2 \ C genau zwei -Zusammenhangskomponenten, von denen eine beschränkt ist und eine unbeschränkt. - -außen -innen - -Jordankurve - -Abbildung 1.10: Die unbeschränkte Zusammenhangskomponente wird häufig inneres, die be- -schränkte äußeres genannt. - -Beweis: ist technisch mühsam und wird hier nicht geführt. Er kann in „Algebraische Topologie: -Eine Einführung“ von R. Stöcker und H. Zieschang auf S. 301f (ISBN 978-3519122265) -nachgelesen werden. - -Idee: Ersetze Weg C durch Polygonzug. - - - -21 1.6. WEGE UND KNOTEN - -Definition 20 -Eine geschlossene Jordankurve in R3 heißt Knoten. - -Beispiel 19 (Knoten) - -(a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten (d) 62-Knoten - -Abbildung 1.11: Beispiele für verschiedene Knoten - -Definition 21 -Zwei Knoten γ1, γ2 : S1 → R3 heißen äquivalent, wenn es eine stetige Abbildung - -H : S1 × [0, 1]→ R3 - -gibt mit - -H(z, 0) = γ1(z) ∀z ∈ S1 - -H(z, 1) = γ2(z) ∀z ∈ S1 - -und für jedes feste t ∈ [0, 1] ist - -Hz : S -1 → R3, z 7→ H(z, t) - -ein Knoten. Die Abbildung H heißt Isotopie zwischen γ1 und γ2. - -Definition 22 -Sei γ : [0, 1]→ R3 ein Knoten, E eine Ebene und π : R3 → E eine Projektion auf E. -π heißt Knotendiagramm von γ, wenn gilt:∣∣π−1(x)∣∣ ≤ 2 ∀x ∈ π(γ) -Ist (π|γ([0,1]))−1(x) = { y1, y2 }, so liegt y1 über y2, wenn gilt: - -∃λ > 1 : (y1 − x) = λ(y2 − x) - -Satz 1.3 (Satz von Reidemeister) -Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie -durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können. - - - -22 1.6. WEGE UND KNOTEN - -(a) Ω1 (b) Ω2 - -(c) Ω3 - -Abbildung 1.12: Reidemeister-Züge - -Beweis: Durch sorgfältige Fallunterscheidung.5 - -Definition 23 -Ein Knotendiagramm heißt 3-färbbar, wenn jeder Bogen von D so mit einer Farbe gefärbt -werden kann, dass an jeder Kreuzung eine oder 3 Farben auftreten und alle 3 Farben -auftreten. - -Abbildung 1.13: Ein 3-gefärber Kleeblattknoten - -5Siehe „Knot Theory and Its Applications“ von Kunio Murasugi. ISBN 978-0817638177. - - - -23 1.6. WEGE UND KNOTEN - -Übungsaufgaben - -Aufgabe 1 (Sierpińskiraum) - -Es sei X := { 0, 1 } und TX := { ∅, { 0 } , X }. Dies ist der sogenannte Sierpińskiraum. -(a) Beweisen Sie, dass (X,TX) ein topologischer Raum ist. - -(b) Ist (X,TX) hausdorffsch? - -(c) Ist TX von einer Metrik erzeugt? - -Aufgabe 2 - -Es sei Z mit der von den Mengen Ua,b := a+ bZ(a ∈ Z, b ∈ Z \ { 0 }) erzeugten Topologie -versehen. - -Zeigen Sie: - -(a) Jedes Ua,b und jede einelementige Teilmenge von Z ist abgeschlossen. - -(b) { −1, 1 } ist nicht offen. -(c) Es gibt unendlich viele Primzahlen. - -Aufgabe 3 (Cantorsches Diskontinuum) - -Für jedes i ∈ N sei Pi := { 0, 1 } mit der diskreten Topologie. Weiter Sei P := -∏ -i∈N Pi. - -(a) Wie sehen die offenen Mengen von P aus? - -(b) Was können Sie über den Zusammenhang von P sagen? - -Aufgabe 4 (Kompaktheit) - -(a) Ist GLn(R) = {A ∈ Rn×n | det(A) 6= 0 } kompakt? - -(b) Ist SLn(R) = {A ∈ Rn×n | det(A) = 1 } kompakt? - -(c) Ist P(R) kompakt? - -Aufgabe 5 (Begriffe) - -Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“. - -Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist, -begründen Sie warum. - -1) Ein Homomorphismus, der zugleich ein Homöomorphismus ist, - -2) ein Homomorphismus, der kein Homöomorphismus ist, - - - -24 1.6. WEGE UND KNOTEN - -3) ein Homöomorphismus, der kein Homomorphismus ist - -Aufgabe 6 (Begriffe) - -Definieren Sie die Begriffe „Isomorphismus“, „Isotopie“ und „Isometrie“. - - - -2 Mannigfaltigkeiten und -Simplizialkomplexe - -2.1 Topologische Mannigfaltigkeiten - -Definition 24 -Sei (X,T) ein topologischer Raum und n ∈ N. - -a) Eine n-dimensionale Karte auf X ist ein Paar (U,ϕ), wobei U ∈ T und ϕ : U → V -Homöomorphismus von U auf eine offene Teilmenge V ⊆ Rn. - -b) Ein n-dimensionaler Atlas A auf X ist eine Familie (Ui, ϕi)i∈I von Karten auf X, -sodass - -⋃ -i∈I Ui = X. - -c) X heißt (topologische) n-dimensionale Mannigfaltigkeit, wenn X hausdorffsch ist, -eine abzählbare Basis der Topologie hat und einen n-dimensionalen Atlas besitzt. - -Anschaulich ist also ein n-dimensionale Mannigfaltigkeit lokal dem Rn ähnlich. - -Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten) -Jede n-dimensionale Mannigfaltigkeit mit n ≥ 1 ist mindestens so mächtig wie R. - -Beweis: Sei (X,T) ein topologischer Raum und (U,ϕ) mit U ∈ T und ϕ : U → V ⊆ Rn, wobei -V offen und ϕ ein Homöomorphismus ist, eine Karte auf X. - -Da jede offene Teilmenge des Rn genauso mächtig ist wie der Rn, ϕ als Homöomorphismus -insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig -sind, ist U genauso mächtig wie der Rn. Da jede Mannigfaltigkeit mindestens eine Karte -hat, muss jede Mannigfaltigkeit X mindestens so mächtig sein wie der Rn. � - -Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können -beliebig viele Elemente haben. - -Bemerkung 25 -a) Es gibt surjektive, stetige Abbildungen [0, 1]→ [0, 1]× [0, 1] -b) Für n 6= m sind Rn und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz - -von der Gebietstreue“ (Brouwer): - -Ist U ⊆ Rn offen und f : U → Rn stetig und injektiv, so ist f(U) offen. -Ist n < m und Rm homöomorph zu Rn, so wäre - -f : Rn → Rm → Rn, (x1, . . . , xn) 7→ (x1, x2, . . . , xn, 0, . . . , 0) - -eine stetige injektive Abbildung. Also müsste f(Rn) offen sein ⇒ Widerspruch - - - -26 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN - -Beispiel 20 (Mannigfaltigkeiten) -1) Jede offene Teilmenge U ⊆ Rn ist eine n-dimensionale Mannigfaltigkeit mit einem - -Atlas aus einer Karte. - -2) Cn ist eine 2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte: - -(z1, . . . , zn) 7→ (<(z1),=(z1), . . . ,<(zn),=(zn)) - -3) Pn(R) = (Rn+1 \ { 0 })/∼ = Sn/∼ und Pn(C) sind Mannigfaltigkeiten der Dimension -n bzw. 2n, da gilt: - -Sei Ui := { (x0 : · · · : xn) ∈ Pn(R) | xi 6= 0 } ∀i ∈ 0, . . . , n. Dann ist Pn(R) = -⋃n -i=0 Ui - -und die Abbildung - -Ui → Rn - -(x0 : · · · : xn) 7→ -( -x0 -xi -, . . . , - -� -�� -xi -xi -, . . . , - -xn -xi - -) -(y1 : · · · : yi−1 : 1 : yi : · · · : yn) 7→(y1, . . . , yn) - -ist bijektiv. - -Die Ui mit i = 0, . . . , n bilden einen n-dimensionalen Atlas: - -x = (1 : 0 : 0) ∈ U0 → R2 x 7→ (0, 0) -y = (0 : 1 : 1) ∈ U2 → R2 y 7→ (0, 1) - -Umgebung: B1(0, 1)→ { (1 : u : v) | ‖(u, v)‖ < 1 } = V1 -Umgebung: B1(0, 1)→ - -{ -(w : z : 1) - -∣∣ w2 + z2 < 1 } = V2 -V1 ∩ V2 = ∅? -(a : b : c) ∈ V1 ∩ V2 -⇒ a 6= 0 und ( ba)2 + ( ca)2 < 1⇒ ca < 1 -⇒ c 6= 0 und (ac )2 + ( bc)2 < 1⇒ ac < 1 -⇒ Widerspruch - -4) Sn = -{ -x ∈ Rn+1 - -∣∣ ‖x‖ = 1 } ist n-dimensionale Mannigfaltigkeit. -Karten: -Di := {(x1, . . . , xn+1) ∈ Sn|xi > 0} → B1(0, . . . , 0︸ ︷︷ ︸ - -∈Rn - -) - -Ci := {(x1, . . . , xn+1) ∈ Sn|xi < 0} → B1(0, . . . , 0) -(x1, . . . , xn+1) 7→ (x1, . . . ,��xi, . . . , xn+1)1 -(x1, . . . , xn) 7→ (x1, . . . , xi−1, - -√ -1−∑nk=1 x2k, xi, . . . , xn), oder −√1−∑nk=1 x2k für Ci - -Sn = -⋃n+1 -i=1 (Ci ∪Di) - -Als kompakte Mannigfaltigkeit wird Sn auch „geschlossene Mannigfaltigkeit“ genannt. - -5) [0, 1] ist keine Mannigfaltigkeit, denn: -Es gibt keine Umgebung von 0 in [0, 1], die homöomorph zu einem offenem Intervall -ist. - -1xi wird rausgenommen - - - -27 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN - -6) V1 = -{ - -(x, y) ∈ R2 -∣∣ x · y = 0 } ist keine Mannigfaltigkeit. - -Das Problem ist (0, 0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4 -Zusammenhangskomponenten. Jeder Rn zerfällt jedoch in höchstens zwei Zusammen- -hangskomponenten, wenn man einen Punkt entfernt. - -7) V2 = -{ - -(x, y) ∈ R2 -∣∣ x3 = y2 } ist eine Mannigfaltigkeit. - -8) X = (R \ { 0 }) ∪ (01, 02) - -U ⊆ X offen ⇔ -{ -U offen in R \ { 0 } , falls 01 /∈ U, 02 ∈ U -∃ε > 0 : (−ε, ε) ⊆ U falls 01 ∈ U, 02 ∈ U - -Insbesondere sind (R \ { 0 }) ∪ { 01 } und (R \ { 0 }) ∪ { 02 } offen und homöomorph -zu R. - -Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 01 -und 02. - -9) GLn(R) ist eine Mannigfaltigkeit der Dimension n2, weil offene Teilmengen von Rn -2 - -eine Mannigfaltigkeit bilden. - -Definition 25 -Seien X,Y n-dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y offen, Φ : U → V ein Ho- -möomorphismus Z = (X ∪̇Y )/∼ mit der von u ∼ Φ(u) ∀u ∈ U erzeugten Äquivalenzrelation -und der von ∼ induzierten Quotiententopologie. -Z heißtVerklebung vonX und Y längs U und V . Z besitzt einen Atlas aus n-dimensionalen -Karten. Falls Z hausdorffsch ist, ist Z eine n-dimensionale Mannigfaltigkeit. - -Bemerkung 26 -Sind X,Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X × Y eine Mannigfaltigkeit -der Dimension n+m. - -Beweis: Produkte von Karten sind Karten. � - -Beispiel 21 -Mannigfaltigkeiten mit Dimension 1: - -1) Offene Intervalle, R, (0, 1) sind alle homöomorph - -2) S1 - -Mannigfaltigkeiten mit Dimension 2: - -1) R2 - -2) S2 (0 Henkel) - -3) T 2 (1 Henkel) - -4) oder mehr Henkel, wie z.B. der Zweifachtorus in Abbildung 2.1 - -Bemerkung 27 -Sei n ∈ N, F : Rn → R stetig differenzierbar und X = V (F ) := { x ∈ Rn | F (x) = 0 } das -„vanishing set“. - -Dann gilt: - - - -28 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN - -Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus. - -a) X ist abgeschlossen in Rn - -b) Ist grad(F )(X) 6= 0 ∀x ∈ X, so ist X eine Mannigfaltigkeit der Dimension n− 1. - -Beweis: - -a) Sei y ∈ Rn \ V (F ). Weil F stetig ist, gibt es δ > 0, sodass F (Bδ(y)) ⊆ Bε(F (y)) mit -ε = 12‖F (y)‖. Folgt Bδ(y) ∩ V (F ) = ∅ ⇒ Rn \ V (F ) ist offen. - -b) Sei x ∈ X mit grad(F )(x) 6= 0, also o. B. d. A. ∂F∂X1 (x) 6= 0, x = (x1, . . . , xn), -x′ := (x2, . . . , xn) ∈ Rn−1. Der Satz von der impliziten Funktion liefert nun: Es -gibt Umgebungen U von x′ und differenzierbare Funktionen g : U → R, sodass -G : U → Rn, u 7→ (g(u), u) eine stetige Abbildung auf eine offene Umgebung V von x -in X ist. - -� - -Beispiel 22 - -1) F : R3 → R, (x, y, z) 7→ x2+y2+z2−1, V (F ) = S2, grad(F ) = (2x, 2y, 2z) Bem. 27.b======⇒ -Sn ist n-dimensionale Mannigfaltigkeit in Rn+1 - -2) F : R2 → R, (x, y) 7→ y2−x3 Es gilt: grad(F ) = (−3x2, 2y). Also: grad(0, 0) = (0, 0). - -−5−4−3−2−10 -1 - -2 -3 - -4 -5 - -−4 - -−2 - -0 - -2 - -4 - -−100 - -0 - -100 - -x - -y - -z - -−100 - -0 - -100 - -f(x, y) - -(a) F (x, y) = y2 − x3 - -2 4 6 8 10 12 - -−10 - -−5 - -5 - -10 - -x - -y - -a = 13 -a = 1 -a = 2 - -(b) y2 − ax3 = 0 - -Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a. - -Daher ist Bemerkung 27.b nicht anwendbar, aber V (F ) ist trotzdem eine 1-dimensionale -topologische Mannigfaltigkeit. - - - -29 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN - -Definition 26 -Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n-dimensionale -Mannigfaltigkeit mit Rand, wenn es einen Atlas (Ui, ϕi) gibt, wobei Ui ⊆ Xi offen und -ϕi ein Homöomorphismus auf eine offene Teilmenge von - -Rn+,0 := { (x1, . . . , xn) ∈ Rn | xn ≥ 0 } - -ist. - -Rn+,0 ist ein „Halbraum“. - -Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten. - -∼ -= - -(a) Halbraum - -∼ -= - -(b) Pair of pants - -∼ -= - -(c) Sphäre mit einem Loch - -Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand - -Definition 27 -Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt - -∂X := -⋃ - -(U,ϕ)∈A - -{ x ∈ U | ϕ(x) = 0 } - -Rand von X. - -∂X ist eine Mannigfaltigkeit der Dimension n− 1. -Definition 28 - -Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui, ϕi)i∈I - -Für i, j ∈ I mit Ui ∩ Uj 6= ∅ heißt - -ϕij := ϕj ◦ ϕ−1i -ϕi(Ui ∩ Uj)→ ϕj(Ui ∩ Uj) - -Kartenwechsel oder Übergangsfunktion. - - - -30 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN - -Rn Rn - -Ui Uj - -Vi Vj - -X - -ϕi ϕj - -Abbildung 2.4: Kartenwechsel - -2.2 Differenzierbare Mannigfaltigkeiten - -Definition 29 -Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (Ui, ϕi)i∈I . - -a) X heißt differenzierbare Mannigfaltigkeit der Klasse Ck, wenn jede Karten- -wechselabbildung ϕij , i, j ∈ I k-mal stetig differenzierbar ist. - -b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannig- -faltigkeit der Klasse C∞ ist. - -Differenzierbare Mannigfaltigkeiten der Klasse C∞ werden auch glatt genannt. - -Definition 30 -Sei X eine differenzierbare Mannigfaltigkeit der Klasse Ck (k ∈ N ∪ {∞ }) mit Atlas -A = (Ui, ϕi)i∈I . - -a) Eine Karte (U,ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ ϕ−1i -und ϕi ◦ ϕ−1 (i ∈ I mit Ui ∩ U 6= ∅) differenzierbar von Klasse Ck sind. - -b) Die Menge aller mit A verträglichen Karten auf X bildet einen maximalen Atlas der -Klasse Ck. Er heißt Ck-Struktur auf X. - -Eine C∞-Struktur heißt auch differenzierbare Struktur auf X. - -Bemerkung 28 -Für n ≥ 4 gibt es auf Sn mehrere verschiedene differenzierbare Strukturen, die sogenannten -„exotische Sphären“. - -Definition 31 -Seien X,Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x ∈ X. - -a) Eine stetige Abbildung f : X → Y heißt differenzierbar in x (von Klasse Ck), wenn -es Karten (U,ϕ) von X mit x ∈ U und (V, ψ) von Y mit f(U) ⊆ V gibt, sodass -ψ ◦ f ◦ ϕ−1 stetig differenzierbar von Klasse Ck in ϕ(x) ist. - -b) f heißt differenzierbar (von Klasse Ck), wenn f in jedem x ∈ X differenzierbar ist. - -c) f heißt Diffeomorphismus, wenn f differenzierbar von Klasse C∞ ist und es eine -differenzierbare Abbildung g : Y → X von Klasse C∞ gibt mit g ◦ f = idX und -f ◦ g = idY . - - - -31 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN - -Bemerkung 29 -Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab. - -Beweis: Seien (U ′, ϕ′) und (V ′, ψ′) Karten von X bzw. Y um x bzw. f(x) mit f(U ′) ⊆ V ′. -⇒ ψ′ ◦ f ◦ (ϕ′)−1 -= ψ′ ◦ (ψ−1 ◦ ψ) ◦ f ◦ (ϕ−1 ◦ ϕ) ◦ (ϕ′)−1 - -ist genau dann differenzierbar, wenn ψ ◦ f ◦ ϕ−1 differenzierbar ist. -Beispiel 23 - -f : R→ R, x 7→ x3 ist kein Diffeomorphismus, aber Homöomorphismus, da mit g(x) := 3√x -gilt: f ◦ g = idR, g ◦ f = idR - -Bemerkung 30 -Sei X eine glatte Mannigfaltigkeit. Dann ist - -Diffeo(X) := { f : X → X | f ist Diffeomorphismus } - -eine Untergruppe von Homöo(X). - -Definition 32 -S ⊆ R3 heißt reguläre Fläche :⇔ ∀s ∈ S ∃ Umgebung V (s) ⊆ R3 ∃U ⊆ R2 offen: -∃ differenzierbare Abbildung F : U → V ∩ S: Rg(JF (u)) = 2 ∀u ∈ U . -F heißt (lokale) reguläre Parametrisierung von S. - -F (u, v) = (x(u, v), y(u, v), z(u, v)) - -JF (u, v) = - -∂x∂u(p) ∂x∂v (p)∂y -∂u(p) - -∂y -∂v (p) - -∂z -∂u(p) - -∂z -∂v (p) - - -Beispiel 24 - -1) Rotationsflächen: Sei r : R→ R>0 eine differenzierbare Funktion. -F : R2 → R3 (u, v) 7→ (r(u) cos(u), r(v) sin(u), v) - -JF (u, v) = - -−r(v) sinu r′(v) cosur(v) cosu r′(v) sinu -0 1 - - -hat Rang 2 für alle (u, v) ∈ R2. - -2) Kugelkoordinaten: F : R2 → R3, -(u, v) 7→ (R cos v cosu,R cos v sinu,R sin v) -Es gilt: F (u, v) ∈ S2R, denn - -R2 cos2(v) cos2(u) +R2 cos2(v) sin2(u) +R2 sin2(v) - -=R2(cos2(v) cos2(u) + cos2(v) sin2(u) + sin2(v)) - -=R2 -( -cos2(v)(cos2(u) + sin2(u)) + sin2(v) - -) -=R2 - -( -cos2(v) + sin2(v) - -) -=R2 - - - -32 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN - -N - -S - -vu - -(a) Kugelkoordinaten - -−1 -0 - -1 -2−2 −1 - -0 -1 - -2 - -0.6 - -0.8 - -1 - -(b) Rotationskörper - -π -2 - -π 3π -2 - -2π - -−1 - -−0.5 - -0.5 - -1 - -x - -y - -sinx -cosx - -(c) Sinus und Kosinus haben keine gemeinsame Nullstelle - - - -33 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN - -Die Jacobi-Matrix - -JF (u, v) = - -−R cos v sinu −R sin v cosuR cos v cosu −R sin v sinu -0 R cos v - - -hat Rang 2 für cos v 6= 0. In N und S ist cos v = 0. - -Bemerkung 31 -Jede reguläre Fläche S ⊆ R3 ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit. - -Beweis: - -S ⊆ R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von -regulären Flächen folgt direkt, dass Karten (Ui, Fi) und (Uj ⊆ R2, Fj : R2 → R3) von S mit -Ui ∩ Uj 6= ∅ existieren, wobei Fi und Fj nach Definition differenzierbare Abbildungen sind. -z.Z.: F−1j ◦ Fi ist ein Diffeomorphismus. - -Ui Uj - -S - -s - -Fi Fj - -F−1j ◦Fi - -Abbildung 2.5: Reguläre Fläche S zum Beweis von Bemerkung 31 - -Idee: Finde differenzierbare Funktion F̃−1j in Umgebung W von s, sodass F̃ -−1 -j |S∩W = F−1j . - -Ausführung: Sei u0 ∈ Ui, v0 ∈ Uj mit Fi(u0) = s = Fj(v0). -Da Rg(JFj (v0)) = 2 ist, ist o. B. d. A. - -det - -(∂x -∂u - -∂x -∂v - -∂y -∂u - -∂y -∂v - -) -(v0) 6= 0 - -und Fj(u, v) = (x(u, v), y(u, v), z(u, v)). - -Definiere F̃j : Uj × R→ R3 durch - -F̃j(u, v, t) := (x(u, v), y(u, v), z(u, v) + t) - -Offensichtlich: F̃j |Uj×{ 0 } = Fj - -J -F̃j - -= - -∂x∂u ∂x∂v 0∂y -∂u - -∂y -∂v 0 - -∂z -∂u - -∂z -∂v 1 - -⇒ det J -F̃j - -(v0, 0) 6= 0 - -Analysis II -======⇒ Es gibt Umgebungen W von Fj von F̃j(v0, 0) = Fj(v0) = s, sodass F̃j auf W eine -differenzierbar Inverse F−1j hat. - - - -34 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN - -Weiter gilt: - -F̃j -−1|W∩S = F−1j |W∩S - -⇒ F−1j ◦ Fi|F−1i (W∩S) = F -−1 -j ◦ Fi|F−1i (W∩S) - -ist differenzierbar. - -Definition 33 -Sei G eine Mannigfaltigkeit und (G, ◦) eine Gruppe. - -a) G heißt topologische Gruppe, wenn die Abbildungen ◦ : G×G→ G und ι : G→ G -definiert durch - -g ◦ h := g · h und ι(g) := g−1 - -stetig sind. - -b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, ◦) und -(G, ι) differenzierbar sind. - -Beispiel 25 (Lie-Gruppen) -1) Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen. - -2) GLn(R) - -3) (R×, ·) -4) (R>0, ·) -5) (Rn,+), denn A ·B(i, j) = ∑nk=1 aikbkj ist nach allen Variablen differenzierbar - -(A−1)(i, j) = -det(Aij) - -detA - -Aij = - -ai1 . . . ain... . . . ... -an1 . . . ann - - ∈ R(n−1)×(n−1) - -ist differenzierbar. - -detAij kann 0 werden, da: ( -1 1 -−1 0 - -) -6) SLn(R) = {A ∈ GLn(R) | det(A) = 1 } - -Bemerkung 32 -Ist G eine Lie-Gruppe und g ∈ G, so ist die Abbildung - -lg : G→ G -h 7→ g · h - -ein Diffeomorphismus. - - - -35 2.3. SIMPLIZIALKOMPLEX - -2.3 Simplizialkomplex - -Definition 34 -Seien v0, . . . , vk ∈ Rn Punkte. - -a) v0, . . . , vk sind in allgemeiner Lage -⇔ es gibt keinen (k−1)-dimensionalen affinen Untervektorraum, der v0, . . . , vk enthält -⇔ v1 − v0, . . . , vk − v0 sind linear unabhängig. - -b) conv(v0, . . . , vk) := -{∑k - -i=0 λivi - -∣∣∣ λi ≥ 0,∑ki=0 λi = 1 } heißt die konvexe Hülle von -v0, . . . , vk. - -Definition 35 -a) Sei ∆n = conv(e0, . . . , en) ⊆ Rn+1 die konvexe Hülle der Standard-Basisvektoren - -e0, . . . , en. - -Dann heißt ∆n Standard-Simplex und n die Dimension des Simplex. - -b) Für Punkte v0, . . . , vk im Rn in allgemeiner Lage heißt ∆(v0, . . . , vk) = conv(v0, . . . , vk) -ein k-Simplex in Rn. - -c) Ist ∆(v0, . . . , vk) ein k-Simplex und I = { i0, . . . , ir } ⊆ { 0, . . . , k }, so ist si0,...,ir := -conv(vi0 , . . . , vir) ein r-Simplex und heißt Teilsimplex oder Seite von ∆. - -(a) 0-Simplex ∆0 - -1 2 3 - -1 - -2 - -3 - -e0 - -e1 - -(b) 1-Simplex ∆1 -1 2 3 - -1 - -2 - -3 - -e0 - -e1 - -e2 - -(c) 2-Simplex ∆2 -e0 e1 - -e2 - -e3 - -(d) 3-Simplex ∆3 - -Abbildung 2.6: Beispiele für k-Simplexe - -Definition 36 -a) Eine endliche Menge K von Simplizes im Rn heißt (endlicher) Simplizialkomplex, - -wenn gilt: - -(i) Für ∆ ∈ K und S ⊆ ∆ Teilsimplex ist S ∈ K. -(ii) Für ∆1,∆2 ∈ K ist ∆1 ∩∆2 leer oder ein Teilsimplex von ∆1 und von ∆2. - -b) |K| := ⋃∆∈K ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K. -c) Ist d = max { k ∈ N0 | K enthält k-Simplex }, so heißt d die Dimension von K. - - - -36 2.3. SIMPLIZIALKOMPLEX - -(a) 1D Simplizialkomplex (b) 2D Simplizialkomplex -(ohne untere Fläche!) - -(c) 2D Simplizialkomplex - -(d) 1D Simplizialkomplex (e) 2D Simplizialkomplex - -P - -(f) P ist kein Teilsimplex, da Eigen- -schaft Punkt b.ii verletzt ist - -P - -(g) Simplizialkomplex - -Abbildung 2.7: Beispiele für Simplizialkomplexe - -Definition 37 -Seien K,L Simplizialkomplexe. Eine stetige Abbildung - -f : |K| → |L| - -heißt simplizial, wenn für jedes ∆ ∈ K gilt: -a) f(∆) ∈ L -b) f |∆ : ∆→ f(∆) ist eine affine Abbildung. - -Beispiel 26 (Simpliziale Abbildungen) -1) ϕ(e1) := b1, ϕ(e2) := b2 - -ϕ ist eine eindeutig bestimmte lineare Abbildung - - - -37 2.3. SIMPLIZIALKOMPLEX - -0 e2 - -e1 - -0 b1 - -b2 - -ϕ - -2) Folgende Abbildung ϕ : ∆n → ∆n−1 ist simplizial: -ϕ - -3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8) - -M M - -a - -a - -a - -b - -b - -b - -c - -c - -c - -d - -d - -d - -M - -a - -b - -c - -d -b b b - -b b b - -b b b - -b - -b - -b - -b - -b - -b - -bb - -b - -b b - -b b - -b b -b - -b - -b - -b - -Abbildung 2.8: Abbildung eines Torus auf eine Sphäre - -Definition 38 -Sei K ein endlicher Simplizialkomplex. Für n ≥ 0 sei an(K) die Anzahl der n-Simplizes in -K. - -Dann heißt - -χ(K) := -dimK∑ -n=0 - -(−1)nan(K) - -Eulerzahl (oder Euler-Charakteristik) von K. - -Beispiel 27 -1) χ(∆1) = 2− 1 = 1 - -χ(∆2) = 3− 3 + 1 = 1 -χ(∆3) = 4− 6 + 4− 1 = 1 - -2) χ(Oktaeder-Oberfläche) = 6− 12 + 8 = 2 -χ(Rand des Tetraeders) = 2 -χ(Ikosaeder) = 12− 30 + 20 = 2 - -3) χ(Würfel) = 8− 12 + 6 = 2 -χ(Würfel, unterteilt in Dreiecksflächen) = 8− (12 + 6) + (6 · 2) = 2 - -Bemerkung 33 -χ(∆n) = 1 für jedes n ∈ N0 - - - -38 2.3. SIMPLIZIALKOMPLEX - -Beweis: ∆n ist die konvexe Hülle von (e0, . . . , en) in Rn+1. Jede (k + 1)-elementige Teilmenge -von { e0, . . . , en } definiert ein k-Simplex. -⇒ ak(∆n) = - -( -n+1 -k+1 - -) -, k = 0, . . . , n - -⇒ χ(∆n) = ∑nk=0(−1)k(n+1k+1) -f(x) = (x+ 1)n+1 - -Binomischer -Lehrsatz= - -∑n+1 -k=0 - -( -n+1 -k - -) -xk - -⇒ 0 = ∑n+1k=0 (n+1k )(−1)k = χ(∆n)− 1 -⇒ χ(∆n) = 1 � - -Definition 39 -a) Ein 1D-Simplizialkomplex heißt Graph. - -b) Ein Graph, der homöomorph zu S1 ist, heißt Kreis. - -c) Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält. - -(a) Dies wird häufig auch als -Multigraph bezeichnet. - -(b) Planare Einbettung des Te- -traeders - -(c) K5 (d) K3,3 - -Abbildung 2.9: Beispiele für Graphen - -Bemerkung 34 -Für jeden Baum T gilt χ(T ) = 1. - -Beweis: Induktion über die Anzahl der Ecken. - -Bemerkung 35 -a) Jeder zusammenhängende Graph Γ enthält einen Teilbaum T , der alle Ecken von Γ - -enthält.2 - -b) Ist n = a1(Γ)− a1(T ), so ist χ(Γ) = 1− n. - -Beweis: - -a) Siehe „Algorithmus von Kruskal“. - -2T wird „Spannbaum“ genannt. - - - -39 2.3. SIMPLIZIALKOMPLEX - -b) χ(Γ) = a0(Γ)− a1(Γ) -= a0(Γ)− (n+ a1(T )) -= a0(T )− a1(T )− n -= χ(T )− n -= 1− n - -Bemerkung 36 -Sei ∆ ein n-Simplex und x ∈ ∆◦ ⊆ Rn. Sei K der Simplizialkomplex, der aus ∆ durch -„Unterteilung“ in x entsteht. Dann ist χ(K) = χ(∆) = 1. - -(a) K (b) ∆, das aus K durch Unter- -teilung entsteht - -Abbildung 2.10: Beispiel für Bemerkung 36. - -Beweis: χ(K) = χ(∆)− (−1)n︸ ︷︷ ︸ -n-Simplex - -+ -n∑ -k=0 - -(−1)k -( -n+ 1 - -k - -) -︸ ︷︷ ︸ - -(1+(−1))n+1 - -= χ(∆) � - -Definition 40 -Sei X ein topologischer Raum, K ein Simplizialkomplex und - -h : |K| → X - -ein Homöomorphismus von der geometrischen Realisierung |K| auf X. Dann heißt h eine -Triangulierung von X. - -Beispiel 28 (Triangulierung des Torus) -Für eine Triangulierung des Torus werden mindestens 14 Dreiecke benötigt. Beispiele für -fehlerhafte „Triangulierungen“ sind in Beispiel 28 zu sehen. Korrekte Triangulierungen sind -in Beispiel 28. - -Satz 2.1 (Eulersche Polyederformel) -Sei P ein konvexes Polyeder in R3, d. h. ∂P ist ein 2-dimensionaler Simplizialkomplex, -sodass gilt: - -∀x, y ∈ ∂P : [x, y] ⊆ P - -Dann ist χ(∂P ) = 2. - -Beweis: - -1) Die Aussage ist richtig für den Tetraeder. - -2) O. B. d. A. sei 0 ∈ P und P ⊆ B1(0). Projeziere ∂P von 0 aus auf ∂B1(0) = S2. -Erhalte Triangulierung von S2. - - - -40 2.3. SIMPLIZIALKOMPLEX - -(a) Die beiden markierten Dreiecke schneiden sich im -Mittelpunkt und in einer Seite. - -(b) Die beiden markierten Dreiecke schneiden sich im -Mittelpunkt und außen. - -Abbildung 2.11: Fehlerhafte Triangulierungen - -(a) Einfache Triangulierung (b) Minimale Triangulierung - -Abbildung 2.12: Triangulierungen des Torus - - - -41 2.3. SIMPLIZIALKOMPLEX - -3) Sind P1 und P2 konvexe Polygone und T1, T2 die zugehörigen Triangulierungen von -S2, so gibt es eine Triangulierung T , die sowohl um T1 als auch um T2 Verfeinerung -ist (vgl. Abbildung 2.13). - -T1 -T2 -T - -Abbildung 2.13: T ist eine Triangulierung, die für T1 und T2 eine Verfeinerung ist. - -Nach Bemerkung 36 ist χ(∂P1) = χ(T1) = χ(T ) = χ(T2) = χ(∂P2) = 2, weil o. B. d. A. -P2 ein Tetraeder ist. - -Bemerkung 37 (Der Rand vom Rand ist 0) -Sei K ein endlicher Simplizialkomplex mit Knotenmenge V und < eine Totalordnung auf V . - -Sei An die Menge der n-Simplizes in K, d. h. - -An(K) := { σ ∈ K | dim(σ) = n } für n = 0, . . . , d = dim(K) - -und Cn(K) der R-Vektorraum mit Basis An(K), d. h. - -Cn(K) = - - ∑ -σ∈An(K) - -cσ · σ - -∣∣∣∣∣∣ cσ ∈ R - - -Sei σ = ∆(x0, . . . , xn) ∈ An(K), sodass x0 < x1 < · · · < xn. -Für i = 0, . . . , n sei ∂iσ := ∆(x0, . . . , x̂i, . . . , xn) die i-te Seite von σ und dσ = dnσ :=∑ - -i=0(−1)i∂iσ ∈ Cn−1(K) und dn : Cn(K) → Cn−1(K) die dadurch definierte lineare -Abbildung. - -Dann gilt: dn−1 ◦ dn = 0 - -a b - -c - -σ - -e3 - -e1e2 - -Abbildung 2.14: Simplizialkomplex mit Totalordnung - -Beispiel 29 -Sei a < b < c. Dann gilt: - -d2σ = e1 − e2 + e3 -d1(e1 − e2 + e3) = (c− b)− (c− a) + (b− a) - - - -42 2.3. SIMPLIZIALKOMPLEX - -= 0 - -Sei a < b < c < d. Dann gilt für Tetraeder: - -d3(∆(a, b, c, d)) = ∆(b, c, d)−∆(a, c, d) + ∆(a, b, d)−∆(a, b, c),wobei: -d2( ∆(b, c, d)) = ∆(c, d)−∆(b, d) + ∆(b, c) -d2(−∆(a, c, d)) = −∆(c, d) + ∆(a, d)−∆(a, c) -d2( ∆(a, b, d)) = ∆(b, d)−∆(a, d) + ∆(a, b) -d2(−∆(a, b, c)) = −∆(b, c) + ∆(a, c)−∆(a, b) - -⇒ d2(d3(∆(a, b, c, d))) = 0 - -Beweis: Sei σ ∈ An. Dann gilt: - -dn−1(dnσ) = dn−1( - -n∑ -i=0 - -(−1)i∂iσ) - -= - -n∑ -i=0 - -(−1)idn−1(∂iσ) - -= - -n∑ -i=0 - -(−1)i -n−1∑ -j=0 - -∂i(∂jσ)(−1)j - -= -∑ - -0≤i≤j≤n−1 -(−1)i+j∂j(∂i(σ)) + - -∑ -0≤j d(P,C) = d(P,B) + d(B,C) = d(P,A) + d(B,C) ⇒ -d(A,C) > d(B,C)⇒ Widerspruch zu Punkt (i) - -b) C liegt zwischen P und B - -d(P,C) + d(C,A) > d(P,A) = d(P,B) = d(P,C) + d(C,B) -⇒ d(C,A) > d(C,B) -⇒ Widerspruch zu Punkt (i) - -2. Fall: Q und B liegen auf verschieden Halbebenen bzgl. PA. - -Dann liegen A und Q in derselben Halbebene bzgl. PB. - -Tausche A und B ⇒ Fall 1 � -Bemerkung 63 - -Sei (X, d,G) eine Geometrie, die §1 - §3 erfüllt, P,Q ∈ X mit P 6= Q und ϕ eine Isometrie -mit ϕ(P ) = P und ϕ(Q) = Q. - -Dann gilt ϕ(S) = S ∀S ∈ PQ. -Beweis: - -O. B. d. A. sei S ∈ PQ 2⇔ d(P,Q) = d(P, S) + d(S,Q) -ϕ∈Iso(X)⇒ d(ϕ(P ), ϕ(Q)) = d(ϕ(P ), ϕ(S)) + d(ϕ(S), ϕ(Q)) -P,Q∈Fix(ϕ)⇒ d(P,Q) = d(P,ϕ(S)) + d(ϕ(S), Q) -⇒ ϕ(S) liegt zwischen P und Q -⇒ d(P, S) = d(ϕ(P ), ϕ(S)) = d(P,ϕ(S)) -3(i)⇒ ϕ(S) = S - -� - -Proposition 4.2 -In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P, P ′, Q,Q′ mit d(P,Q) = d(P ′, Q′) -höchstens zwei Isometrien mit ϕ(P ) = P ′ und ϕ(Q) = Q′ - - - -70 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE - -Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometrien mit -ϕi(P ) = P - -′ und ϕi(Q) = Q′ gibt. - -Beweis: Seien ϕ1, ϕ2, ϕ3 Isometrien mit ϕi(P ) = P ′, ϕi(Q) = Q′ mit i = 1, 2, 3. - -Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen: - -(Teil i) ∃R ∈ X \ PQ mit ϕ1(R) = ϕ2(R). -(Teil ii) Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = idX . - -Aus (Teil i) und (Teil ii) folgt, dass ϕ−12 ◦ϕ1 = idX , also ϕ2 = ϕ1, da P , Q und R in diesem -Fall Fixpunkte sind. - -Nun zu den Beweisen der Teilaussagen: - -(Teil i) Sei R ∈ X \ PQ. Von den drei Punkten ϕ1(R), ϕ2(R), ϕ3(R) liegen zwei in der selben -Halbebene bzgl. P ′Q′ = ϕi(PQ). - -O. B. d. A. seien ϕ1(R) und ϕ2(R) in der selben Halbebene. - -Es gilt: d(P ′, ϕ1(R)) = d(ϕ1(P ), ϕ1(R)) -= d(P,R) - -= d(ϕ2(P ), ϕ2(R)) - -= d(P ′, ϕ2(R)) -und analog d(Q′, ϕ1(R)) = d(Q′, ϕ2(R)) - -(Teil ii) Seien P , Q und R Fixpunkte von ϕ, R /∈ PQ und A /∈ PQ ∪ PR ∪ QR. Sei B ∈ -PQ \ { P,Q }. Dann ist ϕ(B) = B wegen Bemerkung 63. - -Ist R ∈ AB, so enthält AB 2 Fixpunkte von ϕ Bem. 63=====⇒ ϕ(A) = A. - -P B Q - -C - -RA - -Abbildung 4.5: P,Q,R sind Fixpunkte, B ∈ PQ \ { P,Q }, A /∈ PQ ∪ PR ∪QR - -Ist R /∈ AB, so ist AB ∩ PR 6= ∅ oder AB ∈ RQ 6= ∅ nach Satz 4.1. Der Schnittpunkt -C ist dann Fixpunkt von ϕ′ nach Bemerkung 63 ⇒ ϕ(A) = A. - -Bemerkung 64 (SWS-Kongruenzsatz) -Sei (X, d,G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem 4ABC und 4A′B′C ′ -Dreiecke, für die gilt: - -(i) d(A,B) = d(A′, B′) - -(ii) ∠CAB ∼= ∠C ′A′B′ - - - -71 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE - -(iii) d(A,C) = d(A′, C ′) - -Dann ist 4ABC kongruent zu 4A′B′C ′ . - -Beweis: Sei ϕ die Isometrie mit ϕ(A′) = A, ϕ(A′C ′+) = AC+ und ϕ(A′B′+) = AB+. Diese -Isometrie existiert wegen Punkt §4. - -⇒ C ∈ ϕ(A′C ′+) und B ∈ ϕ(A′B′+). - -d(A′, C ′) = d(ϕ(A′), ϕ(C ′)) = d(A,ϕ(C ′)) -3(i) -==⇒ ϕ(C ′) = C - -d(A′, B′) = d(ϕ(A′), ϕ(B′)) = d(A,ϕ(B′)) -3(i) -==⇒ ϕ(B′) = B - -Also gilt insbesondere ϕ(4A′B′C ′) = 4ABC. � -Bemerkung 65 (WSW-Kongruenzsatz) - -Sei (X, d,G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem 4ABC und 4A′B′C ′ -Dreiecke, für die gilt: - -(i) d(A,B) = d(A′, B′) - -(ii) ∠CAB ∼= ∠C ′A′B′ - -(iii) ∠ABC ∼= ∠A′B′C ′ - -Dann ist 4ABC kongruent zu 4A′B′C ′ . - -Beweis: Sei ϕ die Isometrie mit ϕ(A′) = A, ϕ(B′) = B und ϕ(C ′) liegt in der selben Halbebene -bzgl. AB wie C. Diese Isometrie existiert wegen §4. - -Aus ∠CAB = ∠C ′A′B′ = ∠ϕ(C ′)ϕ(A′)ϕ(B′) = ∠ϕ(C ′)AB folgt, dass ϕ(C ′) ∈ AC+. -Analog folgt aus ∠ABC = ∠A′B′C ′ = ∠ϕ(A′)ϕ(B′)ϕ(C ′) = ∠ABϕ(C ′), dass ϕ(C ′) ∈ -BC+. - -Dann gilt ϕ(C ′) ∈ AC ∩BC = { C } ⇒ ϕ(C ′) = C. -Es gilt also ϕ(4A′B′C ′) = 4ABC. � - -Definition 61 -a) Ein Winkel ist ein Punkt P ∈ X zusammen mit 2 Halbgeraden mit Anfangspunkt P . - -Man schreibt: ∠R1PR2 bzw. ∠R2PR12 - -b) Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den -anderen abbildet. - -c) ∠R′1P ′R′2 heißt kleiner als ∠R1PR2, wenn es eine Isometrie ϕ gibt, mit ϕ(P ′) = P , -ϕ(P ′R′+1 ) = PR - -+ -1 und ϕ(R - -′ -2) liegt in der gleichen Halbebene bzgl. PR1 wie R2 und in - -der gleichen Halbebene bzgl. PR2 wie R1 - -d) Im Dreieck 4PQR gibt es Innenwinkel und Außenwinkel. - -Bemerkung 66 -In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel. - -Beweis: Zeige ∠PRQ < ∠RQP ′. - -Sei M der Mittelpunkt der Strecke QR und P ′ ∈ PQ+ \ PQ. Sei A ∈MP− mit d(P,M) = -d(M,A). - -2Für dieses Skript gilt: ∠R1PR2 = ∠R2PR1. Also sind insbesondere alle Winkel ≤ 180◦. - - - -72 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE - -P R′1 R1 - -R′2 - -R2 - -(a) ∠R′1P ′R′2 ist kleiner als ∠R1PR2, -vgl. Definition 61.c - -P - -Q R - -(b) Innenwinkel und Außenwin- -kel in 4PQR, vgl. Definiti- -on 61.d - -Abbildung 4.6: Situation aus Definition 61 - -Q M - -A - -P - -R - -(a) Parallelogramm AQPR - -α - -β - -R - -Q P - -(b) Innen- und Außenwin- -kel von 4PQR - -Abbildung 4.7: Situation aus Bemerkung 66 - -Es gilt: d(Q,M) = d(M,R) und d(P,M) = d(M,A) sowie ∠PMR = ∠AMQ ⇒ 4MRQ -ist kongruent zu 4AMQ, denn eine der beiden Isometrien, die ∠PMR auf ∠AMQ abbildet, -bildet R auf Q und P auf A ab. - -⇒ ∠MQA = ∠MRP = ∠QRP = ∠PRQ. -Noch zu zeigen: ∠MQA < ∠RQP ′, denn A liegt in der selben Halbebene bzgl. PQ wie M . - -Proposition 4.3 (Existenz der Parallelen) -Sei (X, d,G) eine Geometrie mit den Axiomen §1 - §4. - -Dann gibt es zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g mindestens eine -Parallele h ∈ G mit P ∈ h und g ∩ h = ∅. - -Beweis: Seien P,Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P ′ ∈ f mit -d(P, P ′) = d(P,Q) abbildet und die Halbebenen bzgl. f erhält. - - - -73 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE - -Q - -h - -f - -g - -P - -Abbildung 4.8: Situation aus Proposition 4.3 - -Annahme: ϕ(g) ∩ g 6= ∅ -⇒ Es gibt einen Schnittpunkt {R } = ϕ(g) ∩ g. -Dann ist ∠RQP = ∠RQP ′ < ∠RPP ′ nach Bemerkung 66 und ∠RQP = ∠RPP ′, weil -ϕ(∠RQP ) = ∠RPP ′. -⇒ Widerspruch -⇒ ϕ(g) ∩ g = ∅ � - -Folgerung 4.4 -Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π. - -D. h. es gibt eine Isometrie ϕ mit ϕ(Q) = P und ϕ(QP+) = PR+, sodass ϕ(R) in der gleichen -Halbebene bzgl. PQ liegt wie R. - -Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln ist π, d. h. die -beiden Halbgeraden bilden eine Gerade. - -Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie, -Dreiecke mit drei 90◦-Winkeln. - -Proposition 4.5 -In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der -Innenwinkel ≤ π. - - - -74 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE - -Sei im Folgenden „IWS“ die „Innenwinkelsumme“. - -Beweis: Sei 4 ein Dreieck mit IWS(4) = π + ε - -α -β - -γ - -P - -(a) Summe der Winkel α, β und γ - -α1 -α2 β - -γ -M - -A B - -C A′ - -α - -(b) Situation aus Proposition 4.5 - -Abbildung 4.10: Situation aus Proposition 4.5 - -Sei α ein Innenwinkel von 4. - -Beh.: Es gibt ein Dreieck 4′ mit IWS(4′) = IWS(4) und einem Innenwinkel α′ ≤ α2 . -Dann gibt es für jedes n ein 4n mit IWS(4n) = IWS(4) und Innenwinkel α′ ≤ α2n . Für -α -2n < ε ist dann die Summe der beiden Innenwinkel um 4n größer als π ⇒ Widerspruch -zu Folgerung 4.4. - -Beweis: Es seien A,B,C ∈ X und 4 das Dreieck mit den Eckpunkten A,B,C und α sei -der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C. - -Sei M der Mittelpunkt der Strecke BC. Sei außerdem α1 = ∠CAM und α2 = ∠BAM . - -Sei weiter A′ ∈MA− mit d(A′,M) = d(A,M). -Die Situation ist in Abbildung 4.10b skizziert. - -⇒ 4(MA′C) und 4(MAB) sind kongruent. ⇒ ∠ABM = ∠A′CM und ∠MA′C = -∠MAB.⇒ α+β+γ = IWS(4ABC) = IWS(4AA′C) und α1 +α2 = α, also o. B. d. A. -α1 ≤ α2 - -Bemerkung 67 -In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π. - -α′ -α′′ - -α β - -β′ - -γ - -A B - -C -g - -Abbildung 4.11: Situation aus Bemerkung 67 - -Beweis: Sei g eine Parallele von AB durch C. - -• Es gilt α′ = α wegen Proposition 4.3. -• Es gilt β′ = β wegen Proposition 4.3. -• Es gilt α′′ = α′ wegen Aufgabe 8. - - - -75 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE - -⇒ IWS(4ABC) = γ + α′′ + β′ = π - -Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich -π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW. - -4.2 Weitere Eigenschaften einer euklidischen Ebene - -Satz 4.6 (Strahlensatz) -In ähnlichen Dreiecken sind Verhältnisse entsprechender Seiten gleich. - -x - -y - -−1 0 1 2 3 4 -0 - -1 - -2 - -3 - -z - -x - -λ2z - -λ2x - -Abbildung 4.12: Strahlensatz - -Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar. - -A B′ - -C ′ - -B - -C - -c - -b a - -c′ - -b′ - -a′ - -Abbildung 4.13: Die Dreiecke 4ABC und 4AB′C ′ sind ähnlich. - -4.2.1 Flächeninhalt - -Definition 62 -„Simplizialkomplexe“ in euklidischer Ebene (X, d) heißen flächengleich, wenn sie sich in -kongruente Dreiecke zerlegen lassen. - - - -76 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE - -(a) Zwei kongruente Dreiecke (b) Zwei weitere kongruente Drei- -ecke - -Abbildung 4.14: Flächengleichheit - -Der Flächeninhalt eines Dreiecks ist 1/2 ·Grundseite ·Höhe. - -A B - -C - -LC - -hc - -c - -(a) 1/2 · |AB| · |hc| - -· - -A B - -C - -LA - -ha - -c - -(b) 1/2 · |BC| · |ha| - -Abbildung 4.15: Flächenberechnung im Dreieck - -Zu zeigen: Unabhängigkeit von der gewählten Grundseite. - -α - -α - -γ - -γ - -A B - -C - -LA - -LC - -Abbildung 4.16: 4ABLa und 4CLCB sind ähnlich, weil IWS = π - -Strahlensatz -=======⇒ ahc = - -c -ha -→ a · ha = c · hc - -Satz 4.7 (Satz des Pythagoras) -Im rechtwinkligen Dreieck gilt a2 + b2 = c2, wobei c die Hypotenuse und a, b die beiden -Katheten sind. - -Beweis: (a+ b) · (a+ b) = a2 + 2ab+ b2 = c2 + 4 · (12 · a · b) - - - -77 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE - -c - -b a - -A B - -C -· - -(a) a, b sind Katheten und c ist die Hypo- -tenuse - -b a - -b - -a - -ba - -b - -a - -· - -·· - -· - -γ - -(b) Beweisskizze - -Abbildung 4.17: Satz des Pythagoras - -Satz 4.8 -Bis auf Isometrie gibt es genau eine euklidische Ebene (X, d,G), nämlich X = R2, -d = euklidischer Abstand, G = Menge der üblichen Geraden. - -Beweis: - -(i) (R2, dEuklid) ist offensichtlich eine euklidische Ebene. - -(ii) Sei (X, d) eine euklidische Ebene und g1, g2 Geraden in X, die sich in einem Punkt 0 -im rechten Winkel schneiden. - -Sei P ∈ X \ (g1 ∪ g2) ein Punkt und PX der Fußpunkt des Lots von P auf g1 (vgl. -Aufgabe 9 (c)) und PY der Fußpunkt des Lots von P auf g2. - -Sei xP := d(PX , 0) und yP := d(PY , 0). - -In Abbildung 4.19 wurde die Situation skizziert. - -Sei h : X → R2 eine Abbildung mit h(P ) := (xP , yP ) Dadurch wird h auf dem -Quadranten definiert, in dem P liegt, d. h. - -∀Q ∈ X mit PQ ∩ g1 = ∅ = PQ ∩ g2 - -Fortsetzung auf ganz X durch konsistente Vorzeichenwahl. - -Im Folgenden werden zwei Aussagen gezeigt: - -(i) h ist surjektiv - -(ii) h ist eine Isometrie - -Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dass h bijektiv ist. - -Nun zu den Beweisen der Teilaussagen: - - - -78 4.3. HYPERBOLISCHE GEOMETRIE - -· -g1 - -g2 - -P - -X - -(a) Schritt 1 - -· -g1 - -g2 - -xP - -yP - -P - -0 PX - -PY - -X - -(b) Schritt 2 - -Abbildung 4.18: Beweis zu Satz 4.8 - -(i) Sei (x, y) ∈ R2, z. B. x ≥ 0, y ≥ 0. Sei P ′ ∈ g1 mit d(0, P ′) = x und P ′ auf der -gleichen Seite von g2 wie P . - -g1 - -g2 - -xP - -yP - -P - -Q - -0 - -R - -X - -Abbildung 4.19: Beweis zu Satz 4.8 - -(ii) Zu Zeigen: d(P,Q) = d(h(P ), h(Q)) - -d(P,Q)2 -Pythagoras - -= d(P,R)2 + d(R,Q)2 = (yQ − yP )2 + (xQ − xP )2. -h(Q) = (xQ, yQ) - -4.3 Hyperbolische Geometrie - -Definition 63 -Sei - -H := { z ∈ C | =(z) > 0 } = -{ - -(x, y) ∈ R2 -∣∣ y > 0 } - - - -79 4.3. HYPERBOLISCHE GEOMETRIE - -die obere Halbebene bzw. Poincaré-Halbebene und G = G1 ∪G2 mit - -G1 = { g1 ⊆ H | ∃m ∈ R, r ∈ R>0 : g1 = { z ∈ H : | z −m| = r } } -G2 = { g2 ⊆ H | ∃x ∈ R : g2 = { z ∈ H : <(z) = x } } - -Die Elemente aus G heißen hyperbolische Geraden. - -Bemerkung 68 (Eigenschaften der hyperbolischen Geraden) -Die hyperbolischen Geraden erfüllen. . . - -a) . . . die Inzidenzaxiome §1 - -b) . . . das Anordnungsaxiom §3 (ii) - -c) . . . nicht das Parallelenaxiom §5 - -Beweis: - -a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt: -Gegeben z1, z2 ∈ H -Existenz: - -Fall 1 <(z1) = <(z2) -⇒ z1 und z2 liegen auf - -g = { z ∈ C | <(z) = <(z1) ∧H } - -Siehe Abbildung 4.20a. - -Fall 2 <(z1) 6= <(z2) -Betrachte nun z1 und z2 als Punkte in der euklidischen Ebene. Die Mittelsenkrech- -te zu diesen Punkten schneidet die x-Achse. Alle Punkte auf der Mittelsenkrechten -zu z1 und z2 sind gleich weit von z1 und z2 entfernt. Daher ist der Schnittpunkt mit -der x-Achse der Mittelpunkt eines Kreises durch z1 und z2 (vgl. Abbildung 4.20b) - -x - -y - -−1 0 1 2 3 4 5 -0 - -1 - -2 - -3 - -4 - -Z1 - -Z2 - -<(Z1) - -(a) Fall 1 - -x - -y - -−1 0 1 2 3 4 5 -0 - -1 - -2 - -3 - -4 - -Z1 - -Z2 - -(b) Fall 2 - -Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer -Geraden - -b) Sei g ∈ G1 ∪̇G2 eine hyperbolische Gerade. - - - -80 4.3. HYPERBOLISCHE GEOMETRIE - -Es existieren disjunkte Zerlegungen von H \ g: -Fall 1: g = { z ∈ H ‖ z −m| = r } ∈ G1 -Dann gilt: - -H = { z ∈ H ‖ z −m| < r }︸ ︷︷ ︸ -=:H1 (Kreisinneres) - -∪̇ { z ∈ H ‖ z −m| > r }︸ ︷︷ ︸ -=:H2 (Kreisäußeres) - -Da r > 0 ist H1 nicht leer, da r ∈ R ist H2 nicht leer. -Fall 2: g = { z ∈ H | x }︸ ︷︷ ︸ -=:H2 (Rechts) - -Zu zeigen: ∀A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } gilt: AB ∩ g 6= ∅ ⇔ i 6= j -„⇐“: A ∈ H1, B ∈ H2 : AB ∩ g 6= ∅ -Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte in H1 haben einen Abstand -von m der kleiner ist als r und alle Punkte in H2 haben einen Abstand von m der -größer ist als r. Da man jede Strecke von A nach B insbesondere auch als stetige -Abbildung f : R→ R>0 auffassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g 6= ∅ -„⇒“: A ∈ Hi, B ∈ Hj mit i, j ∈ { 1, 2 } : AB ∩ g 6= ∅ ⇒ i 6= j -Sei h die Gerade, die durch A und B geht. - -Da A,B /∈ g, aber A,B ∈ h gilt, haben g und h insbesondere mindestens einen -unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich g und h in höchstens einen Punkt -schneiden. Sei C dieser Punkt. - -Aus A,B /∈ g folgt: C 6= A und C 6= B. Also liegt C zwischen A und B. Daraus folgt, -dass A und B bzgl. g in verschiedenen Halbebenen liegen. - -c) Siehe Abbildung 4.21. - -x - -y - -−5 −4 −3 −2 −1 0 1 2 3 4 5 6 -0 - -1 - -2 - -3 - -4 - -5 - -Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht. - - - -81 4.3. HYPERBOLISCHE GEOMETRIE - -Definition 64 -Es seien a, b, c, d ∈ R mit ad− bc 6= 0 und σ : C→ C eine Abbildung definiert durch - -σ(z) := -az + b - -cz + d - -σ heißt Möbiustransformation. - -Proposition 4.9 -a) Die Gruppe SL2(R) operiert auf H durch die Möbiustransformation - -σ(z) := - -( -a b -c d - -) -◦ z := az + b - -cz + d - -b) Die Gruppe PSL2(R) = SL2(R)/(±I) operiert durch σ auf H. - -c) PSL2(R) operiert auf R ∪ {∞ }. Diese Gruppenoperation ist 3-fach transitiv, d. h. -zu x0 < x1 < x∞ ∈ R gibt es genau ein σ ∈ PSL2(R) mit σ(x0) = 0, σ(x1) = 1, -σ(x∞) =∞. - -d) SL2(R) wird von den Matrizen( -λ 0 -0 λ−1 - -) -︸ ︷︷ ︸ - -=:Aλ - -, - -( -1 t -0 1 - -) -︸ ︷︷ ︸ - -=:Bt - -und -( - -0 1 -−1 0 - -) -︸ ︷︷ ︸ - -=:C - -mit t, λ ∈ R× - -erzeugt. - -e) PSL2(R) operiert auf G. - -Beweis: - -a) Sei z = x+ iy ∈ H, d. h. y > 0 und σ = -( -a b -c d - -) -∈ SL2(R) - -⇒ σ(z) = a(x+ iy) + b -c(x+ iy) + d - -= -(ax+ b) + iay - -(cx+ d) + icy -· (cx+ d)− icy - -(cx+ d)− icy - -= -(ax+ b)(cx+ d) + aycy - -(cx+ d)2 + (cy)2 -+ i - -ay(cx+ d)− (ax+ b)cy -(cx+ d)2 + (cy)2 - -= -axcx+ axd+ bcx+ bd+ aycy - -(cx+ d)2 + (cy)2 -+ i - -(ad− bc)y -(cx+ d)2 + (cy)2 - -SL2(R) -= - -ac(x2 + y2) + adx+ bcx+ bd - -(cx+ d)2 + (cy)2 -+ i - -y - -(cx+ d)2 + (cy)2 - -⇒ =(σ(z)) = y -(cx+d)2+(cy)2 - -> 0 - -Die Abbildung bildet also nach H ab. Außerdem gilt:( -1 0 -0 1 - -) -◦ z = x+ iy - -1 -= x+ iy = z - - - -82 4.3. HYPERBOLISCHE GEOMETRIE - -und ( -a b -c d - -) -◦ -(( - -a′ b′ - -c′ d′ - -) -◦ z -) - -= - -( -a b -c d - -) -◦ a -′z + b′ - -c′z + d′ - -= -aa -′z+b′ - -c′z+d′ + b - -ca -′z+b′ - -c′z+d′ + d - -= - -a(a′z+b′)+b(c′z+d′) -c′z+d′ - -c(a′z+b′)+d(c′z+d′) -c′z+d′ - -= -a(a′z + b′) + b(c′z + d′) - -c(a′z + b′) + d(c′z + d′) - -= -(aa′ + bc′)z + ab′ + bd′ - -(ca′ + db′)z + cb′ + dd′ - -= - -( -aa′ + bc′ ab′ + bd′ - -ca′ + db′ cb′ + dd′ - -) -◦ z - -= - -(( -a b -c d - -) -· -( -a′ b′ - -c′ d′ - -)) -◦ z - -b) Es gilt σ(z) = (−σ)(z) für alle σ ∈ SL2(R) und z ∈ H. - -c) Ansatz: σ = -( -a b -c d - -) -σ(x0) = - -ax0+b -cx0+d - -! -= 0 ⇒ ax0 + b = 0⇒ b = −ax0 - -σ(x∞) =∞⇒ cx∞ + d = 0⇒ d = −cx∞ -σ(x1) = 1⇒ ax1 + b = cx1 + d -a(x1 − x0) = c(x1 − x∞)⇒ c = a x1−x0x1−x∞ -⇒ −a2 · x∞ x1−x0x1−x∞ + a - -2x0 -x1−x0 -x1−x∞ = 1 - -⇒ a2 x1−x0x0−x∞ (x0 − x∞) = 1 ⇒ a -2 = x1−x∞(x1−x∞)(x1−x0) - -d) Es gilt: - -A−1λ = A 1λ - -B−1t = B−t - -C−1 = C3 - -Daher genügt es zu zeigen, dass man mit Aλ, Bt und C alle Matrizen aus SL2(R) -erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit -Matrizen der Form Aλ, Bt und C die Einheitsmatrix zu generieren. - -Sei also - -M = - -( -a b -c d - -) -∈ SL2(R) - -beliebig. - -Fall 1: a = 0 -Da M ∈ SL2(R) ist, gilt detM = 1 = ad− bc = −bc. Daher ist insbesondere c 6= 0. Es -folgt: - -( -0 1 -−1 0 - -) -· -( -a b -c d - -) -= - -( -c d -−a −b - -) - - - -83 4.3. HYPERBOLISCHE GEOMETRIE - -Gehe zu Fall 2. - -Fall 2: a 6= 0 -Nun wird in M durch M ·A 1 - -a -an der Stelle von a eine 1 erzeugt: - -( -a b -c d - -) -· -( - -1 -a 0 -0 a - -) -= - -( -1 ab -c -a ad - -) - -Gehe zu Fall 3. - -Fall 3: a = 1 ( -1 b -c d - -) -· -( - -1 −b -0 1 - -) -= - -( -1 0 -c d− bc - -) -Da wir detM = 1 = ad− bc = d− bc wissen, gilt sogar M2,2 = 1. -Gehe zu Fall 4. - -Fall 4: a = 1, b = 0, d = 1 - -A−1CBcC - -( -1 0 -c 1 - -) -= - -( -1 0 -0 1 - -) -Daher erzeugen Matrizen der Form Aλ, Bt und C die Gruppe SL2R. � - -e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen. - -• σ = -( -λ 0 -0 λ−1 - -) -, also σ(z) = λ2z. Daraus ergeben sich die Situationen, die in - -Abbildung 4.22a und Abbildung 4.22b dargestellt sind. - -x - -y - -−1 0 1 2 3 4 5 6 7 -0 - -1 - -2 - -3 - -m λ2m - -m+ ir - -λ2m+ iλ2r - -m+ 1 - -(a) Fall 1 - -x - -y - -−1 0 1 2 3 4 -0 - -1 - -2 - -3 - -z - -x - -λ2z - -λ2x - -(b) Fall 2 (Strahlensatz) - -Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix - -• Offensichtlich gilt die Aussage für σ = -( - -1 a -0 1 - -) -• Sei nun σ = - -( -0 1 -−1 0 - -) -, also σ(z) = −1z - -Bemerkung 69 -Zu hyperbolischen Geraden g1, g2 gibt es σ ∈ PSL2(R) mit σ(g1) = g2. - - - -84 4.3. HYPERBOLISCHE GEOMETRIE - -· - -x - -y - -−1 0 1 -0 - -1 - -z = r · eiϕ - -1 -z = - -1 -r · eiϕ - -Abbildung 4.23: Inversion am Kreis - -Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a1) = b1 und σ(a2) = b2. Dann existiert -σ(g1) := g2 wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt. - -Definition 65 -Seien z1, z2, z3, z4 ∈ C paarweise verschieden. -Dann heißt - -DV(z1, z2, z3, z4) := -z1−z4 -z1−z2 -z3−z4 -z3−z2 - -= -(z1 − z4) · (z3 − z2) -(z1 − z2) · (z3 − z4) - -Doppelverhältnis von z1, . . . , z4. - -Bemerkung 70 (Eigenschaften des Doppelverhältnisses) -a) DV(z1, . . . , z4) ∈ C \ { 0, 1 } -b) DV(z1, z4, z3, z2) = 1DV(z1,z2,z3,z4) - -c) DV(z3, z2, z1, z4) = 1DV(z1,z2,z3,z4) - -d) DV ist auch wohldefiniert, wenn eines der zi =∞ oder wenn zwei der zi gleich sind. -e) DV(0, 1,∞, z4) = z4 (Der Fall z4 ∈ { 0, 1,∞} ist zugelassen). -f) Für σ ∈ PSL2(C) und z1, . . . , z4 ∈ C ∪ {∞ } ist - -DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4) - -und für σ(z) = 1z gilt - -DV(σ(z1), σ(z2), σ(z3), σ(z4)) = DV(z1, z2, z3, z4) - -g) DV(z1, z2, z3, z4) ∈ R ∪ {∞ } ⇔ z1, . . . , z4 liegen auf einer hyperbolischen Geraden. - -Beweis: - -a) DV(z1, . . . , z4) 6= 0, da zi paarweise verschieden -DV(z1, . . . , z4) 6= 1, da: - -Annahme: DV(z1, . . . , z4) = 1 - -⇔ (z1 − z2)(z3 − z4) = (z1 − z4)(z3 − z2) - - - -85 4.3. HYPERBOLISCHE GEOMETRIE - -⇔ z1z3 − z2z3 − z1z4 + z2z4 = z1z3 − z3z4 − z1z2 + z2z4 -⇔ z2z3 + z1z4 = z3z4 + z1z2 -⇔ z2z3 − z3z4 = z1z2 − z1z4 -⇔ z3(z2 − z4) = z1(z2 − z4) - -⇔ z3 = z1 oder z2 = z4 - -Alle zi sind paarweise verschieden ⇒ Widerspruch � -b) DV(z1, z4, z3, z2) = - -(z1−z2)·(z3−z4) -(z1−z4)·(z3−z2) = - -1 -DV(z1,z2,z3,z4) - -c) DV(z3, z2, z1, z4) = -(z3−z4)·(z1−z2) -(z3−z2)·(z1−z4) = - -1 -DV(z1,z2,z3,z4) - -d) Zwei der zi dürfen gleich sein, da: - -Fall 1 z1 = z4 oder z3 = z2 -In diesem Fall ist DV(z1, . . . , z4) = 0 - -Fall 2 z1 = z2 oder z3 = z4 -Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z1, . . . , z4) =∞ gilt. - -Fall 3 z1 = z3 oder z2 = z4 -Durch Einsetzen ergibt sich DV(z1, . . . , z4) = 1. - -Im Fall, dass ein zi =∞ ist, ist entweder DV(0, 1,∞, z4) = 0 oder DV(0, 1,∞, z4)±∞ - -e) DV(0, 1,∞, z4) = (0−z4)·(∞−1)(0−1)·(∞−z4) = -z4·(∞−1) -∞−z4 = z4 - -f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. - -g) Sei σ ∈ PSL2(C) mit σ(z1) = 0, σ(z2) = 1, σ(z3) =∞. Ein solches σ existiert, da man -drei Parameter von σ wählen darf. - -Bem. 70.f⇒ DV(z1, . . . , z4) = DV(0, 1,∞, σ(z4)) -⇒ DV(z1, . . . , z4) ∈ R ∪ {∞ } -⇔ σ(z4) ∈ R ∪ {∞ } -Behauptung folgt, weil σ−1(R ∪∞) ein Kreis oder eine Gerade in C ist. - -Definition 66 -Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 die -„Schnittpunkte“ von gz1,z2 mit R ∪ {∞ }. -Dann sei dH(z1, z2) := 12 | ln DV(a1, z1, a2, z2)| und heiße hyperbolische Metrik. - -Beh.: Für z1, z2 ∈ H sei gz1,z2 die eindeutige hyperbolische Gerade durch z1 und z2 und a1, a2 -die „Schnittpunkte“ von gz1,z2 mit R ∪ {∞ }. -Dann gilt: - -1 - -2 -| ln DV(a1, z1, a2, z2)| = - -1 - -2 -| ln DV(a2, z1, a1, z2)| - -Beweis: Wegen Bemerkung 70.c gilt: - -DV(a1, z1, a2, z2) = -1 - -DV(a2, z1, a1, z2) - -Außerdem gilt: - -ln -1 - -x -= lnx−1 = (−1) · lnx = − lnx - - - -86 4.3. HYPERBOLISCHE GEOMETRIE - -Da der ln im Betrag steht, folgt direkt: - -1 - -2 -| ln DV(a1, z1, a2, z2)| = - -1 - -2 -| ln DV(a2, z1, a1, z2)| - -Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x-Achse im Doppelver- -hältnis genutzt werden. � - -Beh.: Die hyperbolische Metrik ist eine Metrik auf H. - -Beweis: Wegen Bemerkung 70.f ist - -d(z1, z2) := d(σ(z1), σ(z2)) mit σ(a1) = 0, σ(a2) =∞ - -d. h. σ(gz1,z2) = iR (imaginäre Achse). - -also gilt o. B. d. A. z1 = ia und z2 = ib mit a, b ∈ R und a < b. - -2d(ia, ib) =| ln DV(0, ia,∞, ib) | - -=| ln (0− ib)(∞− ia) -(0− ia)(∞− ib) | - -=| ln b -a -| - -= ln b− ln a - -Also: d(z1, z2) ≥ 0, d(z1, z2) = 0⇔ z1 = z2 - -2d(z2, z1) =| ln DV(a2, z2, a1, z1) | -=| ln DV(∞, ib, 0, ia) | - -Bem. 70.b -= | ln DV(0, ib,∞, ia) | -= 2d(z1, z2) - -Liegen drei Punkte z1, z2, z3 ∈ C auf einer hyperbolischen Geraden, so gilt d(z1, z3) = -d(z1, z2) + d(z2, z3) (wenn z2 zwischen z1 und z3 liegt). - -Dreiecksungleichung: Beweis ist umständlich und wird hier nicht geführt. Es sei auf die -Vorlesung „Hyperbolische Geometrie“ verwiesen. - -Satz 4.10 -Die hyperbolische Ebene H mit der hyperbolischen Metrik d und den hyperbolischen -Geraden bildet eine „nichteuklidische Geometrie“, d. h. die Axiome §1 - §4 sind erfüllt, -aber Axiom §5 ist verletzt. - - - -87 4.3. HYPERBOLISCHE GEOMETRIE - -Übungsaufgaben - -Aufgabe 8 - -Seien (X, d) eine absolute Ebene und P,Q,R ∈ X Punkte. Der Scheitelwinkel des Winkels -∠PQR ist der Winkel, der aus den Halbgeraden QP− und QR− gebildet wird. Die -Nebenwinkel von ∠PQR sind die von QP+ und QR− bzw. QP− und QR+ gebildeten -Winkel. - -Zeigen Sie: - -(a) Die beiden Nebenwinkel von ∠PQR sind gleich. - -(b) Der Winkel ∠PQR ist gleich seinem Scheitelwinkel. - -Aufgabe 9 - -Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ X von -Punkten ist definiert durch d(P, Y ) := inf d(P, y)|y ∈ Y . -Zeigen Sie: - -(a) Ist 4ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die -Winkel ∠ABC und ∠BCA gleich. - -(b) Ist 4ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel -gegenüber und umgekehrt. - -(c) Sind g eine Gerade und P /∈ g ein Punkt, so gibt es eine eindeutige Gerade h mit -P ∈ h und die g im rechten Winkel schneidet. Diese Grade heißt Lot von P auf g -und der Schnittpunkt des Lots mit g heißt Lotfußpunkt . - -Aufgabe 10 - -Seien f, g, h ∈ G und paarweise verschieden. -Zeigen Sie: f ‖ g ∧ g ‖ h⇒ f ‖ h - -Aufgabe 11 - -Beweise den Kongruenzsatz SSS. - - - -5 Krümmung - -Definition 67 -Sei f : [a, b]→ Rn eine eine Funktion aus C∞. Dann heißt f Kurve. - -5.1 Krümmung von Kurven - -Definition 68 -Sei γ : I = [a, b]→ Rn eine Kurve. - -a) Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt: - -‖γ′(t)‖2 = 1 ∀t ∈ I - -Dabei ist γ′(t) = (γ′1(t), γ′2(t), . . . , γ′n(t)). - -b) l(γ) = -∫ b -a ‖γ′(t)‖dt heißt Länge von γ. - -Bemerkung 71 (Eigenschaften von Kurven I) -Sei γ : I = [a, b]→ Rn eine C∞-Funktion. - -a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b− a. -b) Ist γ durch Bogenlänge parametrisiert, so ist γ′(t) orthogonal zu γ′′(t) für alle t ∈ I. - -Beweis: - -a) l(γ) = -∫ b -a ‖γ′(t)‖dt = - -∫ b -a 1dt = b− a. - -b) Im Folgenden wird die Aussage nur für γ : [a, b]→ R2 bewiesen. Allerdings funktioniert -der Beweis im Rn analog. Es muss nur die Ableitung angepasst werden. - -1 = ‖γ′(t)‖ = ‖γ′(t)‖2 = 〈γ′(t), γ′(t)〉 - -⇒ 0 = d -dt -〈γ′(t), γ′(t)〉 - -= -d - -dt -(γ′1(t)γ - -′ -1(t) + γ - -′ -2(t)γ - -′ -2(t)) - -= 2 · (γ′′1 (t) · γ′1(t) + γ′′2 (t) · γ′2(t)) -= 2 · 〈γ′′(t), γ′(t)〉 - -Definition 69 -Sei γ : I → R2 eine durch Bogenlänge parametrisierte Kurve. - -a) Für t ∈ I sei n(t) Normalenvektor an γ in t wenn gilt: - -〈n(t), γ′(t)〉 = 0, ‖n(t)‖ = 1 und det((γ′(t), n(t))) = +1 - - - -89 5.1. KRÜMMUNG VON KURVEN - -b) Seit κ : I → R so, dass gilt: -γ′′(t) = κ(t) · n(t) - -Dann heißt κ(t) Krümmung von γ in t. - -Da n(t) und γ′′(t) nach Bemerkung 71.b linear abhängig sind, existiert κ(t). -Beispiel 45 - -Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt: - -γ(t) = - -( -r · cos t - -r -, r · sin t - -r - -) -für t ∈ [0, 2πr] - -ist parametrisiert durch Bogenlänge, da gilt: - -γ′(t) = - -( -(r · 1 - -r -)(− sin t - -r -), r - -1 - -r -cos - -t - -r - -) -= - -( -− sin t - -r -, cos - -t - -r - -) -Der Normalenvektor von γ in t ist - -n(t) = - -( -− cos t - -r -,− sin t - -r - -) -da gilt: - -〈n(t), γ′(t)〉 = -〈( -− cos tr -− sin tr - -) -, - -( -− sin tr -cos tr - -)〉 -= (− cos t - -r -) · (− sin t - -r -) + (− sin t - -r -) · (cos t - -r -) - -= 0 - -‖n(t)‖ = -∥∥∥∥(− cos tr ,− sin tr ) - -∥∥∥∥ -= (− cos t - -r -)2 + (− sin t - -r -)2 - -= 1 - -det(γ′1(t), n(t)) = - -∥∥∥∥(− sin tr − cos trcos tr − sin tr -)∥∥∥∥ - -= (− sin t -r - -)2 − (− cos t -r - -) · cos t -r - -= 1 - -Die Krümmung ist für jedes t konstant 1r , da gilt: - -γ′′(t) = - -( -−1 -r - -cos -t - -r -,−1 - -r -sin - -t - -r - -) -= - -1 - -r -· -( -− cos t - -r -,− sin t - -r - -) -⇒ κ(t) = 1 - -r - - - -90 5.2. TANGENTIALEBENE - -Definition 70 -Sei γ : I → R3 eine durch Bogenlänge parametrisierte Kurve. - -a) Für t ∈ I heißt κ(t) := ‖γ′′(t)‖ die Krümmung von γ in t. - -b) Ist für t ∈ I die Ableitung γ′′(t) 6= 0, so heißt γ′′(t)‖γ′′(t)‖ Normalenvektor an γ in t. - -c) b(t) sei ein Vektor, der γ′(t), n(t) zu einer orientierten Orthonormalbasis von R3 ergänzt. -Also gilt: - -det(γ′(t), n(t), b(t)) = 1 - -b(t) heißt Binormalenvektor, die Orthonormalbasis{ -γ′(t), n(t), b(t) - -} -heißt begleitendes Dreibein. - -Bemerkung 72 (Eigenschaften von Kurven II) -Sei γ : I → R3 durch Bogenlänge parametrisierte Kurve. - -a) n(t) ist orthogonal zu γ′(t). - -b) b(t) aus Definition 70.c ist eindeutig. - -5.2 Tangentialebene - -Erinnerung Sie sich an Definition 32 „reguläre Fläche“. - -Äquivalent dazu ist: S ist lokal von der Form - -V (f) = -{ -x ∈ R3 - -∣∣ f(x) = 0 } -für eine C∞-Funktion f : R3 → R. -Definition 71 - -Sei S ⊆ R3 eine reguläre Fläche, s ∈ S, F : U → V ∩ S eine lokale Parametrisierung um -s ∈ V : - -(u, v) 7→ (x(u, v), y(u, v), z(u, v)) -Für p = F−1(s) ∈ U sei - -JF (p) = - -∂x∂u(p) ∂x∂v (p)∂y -∂u(p) - -∂y -∂v (p) - -∂z -∂u(p) - -∂z -∂v (p) - - -und DpF : R2 → R3 die durch JF (p) definierte lineare Abbildung. - -Dann heißt TsS := Bild(DpF ) die Tangentialebene an s ∈ S. - -Bemerkung 73 (Eigenschaften der Tangentialebene) -a) TsS ist 2-dimensionaler Untervektorraum von R3. - -b) TsS = 〈ũ, ṽ〉, wobei ũ, ṽ die Spaltenvektoren der Jacobi-Matrix JF (p) sind. -c) TsS hängt nicht von der gewählten Parametrisierung ab. - - - -91 5.2. TANGENTIALEBENE - -d) Sei S = V (f) eine reguläre Fläche in R3, also f : V → R eine C∞-Funktion, V ⊆ R3 -offen, grad(f)(x) 6= 0 für alle x ∈ S. -Dann ist TsS = (grad(f)(s))⊥ für jedes s ∈ S. - -Beweis: - -a) JF ist eine 3 × 2-Matrix, die mit einem 2 × 1-Vektor multipliziert wird. Das ist -eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein -Vektorraum ist. Da Rg(JF ) = 2, ist auch dim(TsS) = 2. - -b) Hier kann man wie in Punkt a) argumentieren - -c) TsS = {x ∈ R3|∃parametrisierte Kurve γ : [−ε,+ε] → S für ein ε > 0 mit γ(0) = -s und γ′(0) = x} -Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. - -d) Sei x ∈ TsS, γ : [−ε,+ε] → S eine parametrisierte Kurve mit ε > 0 und γ′(0) = s, -sodass γ′(0) = x gilt. Da γ(t) ∈ S für alle t ∈ [−ε, ε], ist f ◦ γ = 0 -⇒ 0 = (f ◦ γ)′(0) = 〈grad(f)(γ(0)), γ′(0)〉 -⇒ TsS ⊆ grad(f)(s)⊥ -dim=2 -====⇒ TsS = (grad(f)(s))⊥ - -Definition 72 -a) Ein Normalenfeld auf der regulären Fläche S ⊆ R3 ist eine Abbildung n : S → S2 ⊆ - -R3 mit n(s) ∈ TsS⊥ für jedes s ∈ S. - -b) S heißt orientierbar, wenn es ein stetiges Normalenfeld auf S gibt. - -Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden. -Im Folgenden werden diese Begriffe jedoch synonym benutzt. - -Bemerkung 74 (Eigenschaften von Normalenfeldern) -a) Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C∞). - -b) Zu jedem s ∈ S gibt es eine Umgebung V ⊆ R3 von s und eine lokale Parametrisierung -F : U → V von S um s, sodass auf F (U) = V ∩ S ein stetiges Normalenfeld existiert. - -c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas von S aus lokalen -Parametrisierungen Fi : Ui → Vi, i ∈ I gibt, sodass für alle i, j ∈ F und alle -s ∈ Vi ∩ Vj ∩ S gilt: - -det(Ds - -Vi→Vj︷ ︸︸ ︷ -Fj ◦ F−1i︸ ︷︷ ︸ -∈R3×3 - -) > 0 - -Beweis: Wird hier nicht geführt. - -Beispiel 46 (Normalenfelder) -1) S = S2, n1 = idS2 ist ein stetiges Normalenfeld. - -Auch n2 = −idS2 ist ein stetiges Normalenfeld. -2) S = Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Norma- - -lenfeld, aber kein stetiges Normalenfeld. - - - -92 5.3. GAUSS-KRÜMMUNG - -Abbildung 5.1: Möbiusband - -5.3 Gauß-Krümmung - -Bemerkung 75 -Sei S eine reguläre Fläche, s ∈ S, n(s) ist ein Normalenvektor in s, x ∈ TsS, ‖x‖ = 1. -Sei E der von x und n(s) aufgespannte 2-dimensionale Untervektorraum von R3. - -Dann gibt es eine Umgebung V ⊆ R3 von s, sodass - -C := (s+ E) ∩ S ∩ V - -das Bild einer durch Bogenlänge parametrisierten Kurve γ : [−ε, ε]→ S enthält mit γ(0) = s -und γ′(0) = x. - -Beweis: „Satz über implizite Funktionen“1 - -Definition 73 -In der Situation aus Bemerkung 75 heißt die Krümmung κγ(0) der Kurve γ in der Ebene -(s+ E) im Punkt s die Normalkrümmung von S in s in Richtung x = γ′(0). - -Man schreibt: κNor(s, x) := κγ(0) - -Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt. - -Beispiel 47 (Gauß-Krümmung) -1) S = S2 = V (X2 + Y 2 + Z2 − 1) ist die Kugel um den Ursprung mit Radius 1, n = id, - -s = (0, 0, 1), x = (1, 0, 0) -⇒ E = R · x+ R · n(s) (x, z-Ebene) -C = E ∩ S ist Kreislinie -κNor(s, x) = - -1 -r = 1 - -2) S = V (X2 + Z2 − 1) ⊆ R3 ist ein Zylinder (siehe Abbildung 5.2a). s = (1, 0, 0) -x1 = (0, 1, 0)⇒ E1 = R · e1 + R · e2 (x, y-Ebene) -S ∩ E1 = V (X2 + Y 2 − 1) ∩ E, Kreislinie in E -⇒ κNor(s, x1) = ±1 -x2 = (0, 0, 1), E2 = R · e1 + R · e3 (x, z-Ebene) - -1Siehe z. B. https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II - -https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II - - -93 5.3. GAUSS-KRÜMMUNG - -V ∩ E2 ∩ S = -{ - -(1, 0, z) ∈ R3 -∣∣ z ∈ R } ist eine Gerade - -⇒ κNor(s, x2) = 0 - -3) S = V (X2 − Y 2 − Z), s = (0, 0, 0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b) -x1 = (1, 0, 0), n(s) = (0, 0, 1) -x2 = (0, 1, 0) -κNor(s, x1) = 2 -κNor(s, x2) = −2 - -−1.5 −1 -−0.5 0 - -0.5 -1 1.5 - -−1 - -0 - -1 - -0 - -1 - -2 - -3 - -4 - -5 - -x - -y - -z - -(a) S = V (X2 + Z2 − 1) -−2 −1.5 −1 - -−0.5 0 -0.5 1 - -1.5 2 - -−2 - -−1 - -0 - -1 - -2 - -−2 - -0 - -2 - -x - -y - -z - -−4 -−2 -0 - -2 - -4 - -f(x, y) - -(b) S = V (X2 − Y 2 − Z) - -Abbildung 5.2: Beispiele für reguläre Flächen - -Definition 74 -Sei S ⊆ R3 eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S. -γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und -γ′′(0) 6= 0. - -Sei n(0) := γ -′′(0) - -‖γ′′(0)‖ . Zerlege - -n(0) = n(0)t + n(0)⊥ mit n(0)t ∈ TsS und n(0)⊥ ∈ (TsS)⊥ - -Dann ist n(0)⊥ = 〈n(0), n(s)〉 · n(s) -κNor(s, γ) := 〈γ′′(0), n(s)〉 die Normalkrümmung. - -Bemerkung 76 -Sei γ(t) = γ(−t), t ∈ [−ε, ε]. Dann ist κNor(s, γ) = κNor(s, γ). - -Beweis: γ′′(0) = γ′′(0), da γ′(0) = −γ′(0). -Es gilt: κNor(s, γ) hängt nur von |γ′(0)| ab und ist gleich κNor(s, γ′(0)). - -Bemerkung 77 -Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s. - -Sei T 1s S = { x ∈ TsS | ‖x‖ = 1 } ∼= S1. Dann ist - -κnNor(s) : T -1 -s S → R, x 7→ κNor(s, x) - -eine glatte Funktion und BildκnNor(s) ist ein abgeschlossenes Intervall. - -Definition 75 -Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s. - - - -94 5.3. GAUSS-KRÜMMUNG - -a) κn1 (s) : = min -{ -κnNor(s, x) - -∣∣ x ∈ T 1s S } und -κn2 (s) : = max - -{ -κnNor(s, x) - -∣∣ x ∈ T 1s S } -heißen Hauptkrümmungen von S in s. - -b) K(s) := κn1 (s) · κn2 (s) heißt Gauß-Krümmung von S in s. -Bemerkung 78 - -Ersetzt man n durch −n, so gilt: - -κ−nNor(s, x) = −κnNor(x) ∀x ∈ T 1s S -⇒ κ−n1 (s) = −κn2 (s) -κ−n2 (s) = −κn1 (s) - -und K−n(s) = Kn(s) =: K(s) - -Beispiel 48 -1) S = S2. Dann ist κ1(s) = κ2(s) = ±1 ∀s ∈ S2 -⇒ K(s) = 1 - -2) Zylinder: -κ1(s) = 0, κ2(s) = 1⇒ K(s) = 0 - -3) Sattelpunkt auf hyperbolischem Paraboloid: -κ1(s) < 0, κ2(s) = 0→ K(s) < 0 - -4) S = Torus. Siehe Abbildung 5.3 - -s1 - -s2 - -s3 - -Abbildung 5.3: K(s1) > 0, K(s2) = 0, K(s3) < 0 - -Bemerkung 79 -Sei S eine reguläre Fläche, s ∈ S ein Punkt. - - - -95 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM - -a) Ist K(s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von TsS + s. - -b) Ist K(s) < 0, so schneidet jede Umgebung von s in S beide Seiten von TsS + s. - -5.4 Erste und zweite Fundamentalform - -Sei S ⊆ R3 eine reguläre Fläche, s ∈ S, TsS die Tangentialebene an S in s und F : U → V eine -lokale Parametrisierung von S um s. Weiter sei p := F−1(s). - -Definition 76 -Sei IS ∈ R2×2 definiert als - -IS : = - -( -g1,1(s) g1,2(s) -g1,2(s) g2,2(s) - -) -= - -( -E(s) F (s) -F (s) G(s) - -) -mit gi,j = gs(DpF (ei), DpF (ej)) - -= 〈 ∂F -∂ui - -(p), -∂F - -∂uj -(p)〉 i, j ∈ { 1, 2 } - -Die Matrix IS heißt erste Fundamentalform von S bzgl. der Parametrisierung F . - -Bemerkung 80 -a) Die Einschränkung des Standardskalarproduktes des R3 auf TsS macht TsS zu einem - -euklidischen Vektorraum. - -b) {DpF (e1), DpF (e2) } ist eine Basis von TsS. -c) Bzgl. der Basis {DpF (e1), DpF (e2) } hat das Standardskalarprodukt aus Bemer- - -kung 80.a die Darstellungsmatrix IS . - -d) gi,j(s) ist eine differenzierbare Funktion von s. - -Bemerkung 81 - -det(IS) = - -∥∥∥∥ ∂F∂u1 (p)× ∂F∂u2 (p) -∥∥∥∥2 - -Beweis: Sei ∂F∂u1 (p) = - -x1x2 -x3 - - , ∂F∂u2 (p) = -y1y2 -y3 - - -Dann ist ∂F∂u1 (p)× - -∂F -∂u2 - -(p) = - -z1z2 -z3 - - mit -z1 = x2y3 − x3y2 -z2 = x3y1 − x1y3 -z3 = x1y2 − x2y1 - -⇒ ‖ ∂F -∂u1 - -(p)× ∂F -∂u2 - -(p)‖ = z21 + z22 + z23 - - - -96 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM - -det(IS) = g1,1g2,2 − g21,2 - -= - -〈x1x2 -x3 - - , -x1x2 -x3 - -〉〈y1y2 -y3 - - , -y1y2 -y3 - -〉−〈 -x1x2 -x3 - - , -y1y2 -y3 - -〉2 - -= (x21 + x -2 -2 + x - -2 -3)(y - -2 -1 + y - -2 -2 + y - -2 -3)− (x1y1 + x2y2 + x3y3)2 - -Definition 77 - -a) Das Differential dA = -√ - -det(I)du1du2 heißt Flächenelement von S bzgl. der Para- -metrisierung F . - -b) Für eine Funktion f : V → R heißt∫ -V -fdA := - -∫ -U -f(F (u1, u2)︸ ︷︷ ︸ - -=:s - -) -√ - -det I(s)du1du2 - -der Wert des Integrals von f über V , falls das Integral rechts existiert. - -Bemerkung 82 -a) -∫ -V fdA ist unabhängig von der gewählten Parametrisierung. - -b) Sei f : S → R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist. -Dann ist - -∫ -S fdA wohldefiniert, falls (z. B.) S kompakt ist. - -Etwa: ∫ -S -fdA = - -n∑ -i=1 - -∫ -Vi - -fdA - -− -∑ -i 6=j - -∫ -Vi∩Vj -fdA - -+ -∑ -i,j,k - -∫ -Vi∩Vj∩Vk -fdA - -− . . . - -Beweis: - -a) Mit Transformationsformel. - -b) Ist dem Leser überlassen. - -Proposition 5.1 -Sei S ⊆ R3 eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S → S2. -Dann gilt: - -a) n induziert für jedes s ∈ S eine lineare Abbildung dsn : TsS → Tn(s)S2 durch - -dsn(x) = -d - -dt -n(s„+“tx︸ ︷︷ ︸ - -Soll auf Fläche S bleiben - -) -∣∣∣ -t=0 - -Die Abbildung dsn heißt Weingarten-Abbildung - - - -97 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM - -b) Tn(s)S2 = TsS. - -c) dsn ist ein Endomorphismus von TsS. - -d) dsn ist selbstadjungiert bzgl. des Skalarproduktes IS . - -Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt. - - - -98 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM - -Beweis: - -a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. - -b) Tn(S)S2 = 〈n(s)〉⊥ = TsS -c) Wegen Proposition 5.1 (a) ist dsn ein Homomorphismus. - -d) Zu zeigen: ∀x, y ∈ IsS : 〈x, dsn(y)〉 = 〈dsn(x), y〉 -Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die -Basisvektoren zu zeigen. - -Sei xi = DpF (ei) = ∂F∂ui (p) i = 1, 2 - -Beh.: 〈xi, dsn(xj)〉 = 〈 ∂ -2F - -∂ui∂uj -(p), dsn(xi)〉 - -⇒ 〈 ∂2F∂ui∂uj (p), dsn(xi)〉 = 〈xj , dsn(xi)〉 - -Bew.: 0 = 〈∂F -∂u - -(p+ tej), n(p+ tej)〉 - -⇒ 0 = d -dt - -( -〈∂F -∂u - -(p+ tej), n(p+ tej)〉 -)∣∣∣ - -t=0 - -= 〈 d -dt - -∂F - -∂ui -(p+ tej)︸ ︷︷ ︸ - -∂2F -∂uj∂ui - -(p) - -∣∣∣ -t=0 - -, n(s)〉+ 〈xi, dsnDpF (ej)︸ ︷︷ ︸ -xj - -〉 - -Definition 78 -Die durch −dsn definierte symmetrische Bilinearform auf TsS heißt zweite Fundamental- -form von S in s bzgl. F . - -Man schreibt: IIs(x, y) = 〈−dsn(x), y〉 = Is(−dsn(x), y) -Bemerkung 83 - -Bezüglich der Basis { x1, x2 } von TsS hat IIs die Darstellungsmatrix - -(h -(s) -i,j )i,j=1,2 mit hi,j(s) = 〈 - -∂2F - -∂ui∂uj -(p), n(s)〉 - -Proposition 5.2 -Sei γ : [−ε, ε]→ S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt: - -κNor(s, γ) = IIs(γ -′(0), γ′(0)) - -Beweis: Nach Definition 74 ist κNor(s, γ) = 〈γ′′(0), n(s)〉. Nach Voraussetzung gilt - -n(γ(t)) ⊥ γ′(t)⇔ 〈γ′′(0), n(s)〉 = 0 - -Die Ableitung nach t ergibt - -0 = -d - -dt -(〈n(γ(t)), γ′(t)) - -= - -〈 -d - -dt -n(γ(t)) - -∣∣∣ -t=0 - -, γ′(0) - -〉 -+ 〈n(s), γ′′(0)〉 - - - -99 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM - -= 〈dsn(γ′(0)), γ′(0)〉+ κNor(s, γ) -= −IIs(γ′(0), γ′(0)) + κNor(s, γ) - -Folgerung 5.3 -Die beiden Definitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein: - -κNor(s, γ) = κNor(s, γ -′(0)) - -Satz 5.4 -Sei S ⊆ R3 eine reguläre, orientierbare Fläche und s ∈ S. - -a) Die Hauptkrümmungen κ1(s), κ2(s) sind die Eigenwerte von IIs. - -b) Für die Gauß-Krümmung gilt: K(s) = det(IIs) - -Beweis: - -a) IIs ist symmetrisch, IsS hat also eine Orthonormalbasis aus Eigenvektoren y1, y2 von -IIs. Ist x ∈ TsS, ‖x‖ = 1, so gibt es ϕ ∈ [0, 2π) mit x = cosϕ · y1 + sinϕ · y2. -Seien λ1, λ2 die Eigenwerte von IIs, also IIs(yi, yi) = λi. Dann gilt: - -IIs(x, x) = cos -2 ϕλ1 + sin - -2 ϕλ2 - -= (1− sin2 ϕ)λ1 + sin2 ϕλ2 -= λ1 + sin - -2 ϕ(λ2 − λ1) ≥ λ1 -= cos2 ϕ+ (1− cos2 ϕ)λ2 -= λ2 − cos2 ϕ(λ2 − λ1) ≤ λ2 - -Prop. 5.2 -=====⇒ λ1 = min - -{ -κNor(s, x) - -∣∣ x ∈ T 1s S } -λ2 = max - -{ -κNor(s, x) - -∣∣ x ∈ T 1s S } - -Satz 5.5 (Satz von Gauß-Bonnet) -Sei S ⊆ R3 eine kompakte orientierbare reguläre Fläche. Dann gilt:∫ - -S -K(s)dA = 2πχ(S) - -Dabei ist χ(S) die Euler-Charakteristik von S. - -Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von -Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen werden. - - - -Lösungen der Übungsaufgaben - -Lösung zu Aufgabe 1 - -Teilaufgabe a) Es gilt: - -(i) ∅, X ∈ TX . -(ii) TX ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U1, U2 ∈ - -TX : U1 ∩ U2 ∈ TX . -(iii) Auch unter beliebigen Vereinigungen ist TX abgeschlossen, d. h. es gilt für eine - -beliebige Indexmenge I und alle Ui ∈ TX für alle i ∈ I : -⋃ -i∈I Ui ∈ TX - -Also ist (X,TX) ein topologischer Raum. - -Teilaufgabe b) Wähle x = 1, y = 0. Dann gilt x 6= y und die einzige Umgebung von x -ist X. Da y = 0 ∈ X können also x und y nicht durch offene Mengen getrennt werden. -(X,TX) ist also nicht hausdorffsch. - -Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X,TX) nach -(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass (X,TX) -kein metrischer Raum sein kann. - -Lösung zu Aufgabe 2 - -Teilaufgabe a) - -Beh.: ∀a ∈ Z : { a } ist abgeschlossen. -Sei a ∈ Z beliebig. Dann gilt: -Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de -schicken. - -Teilaufgabe b) - -Beh.: { −1, 1 } ist nicht offen -Bew.: durch Widerspruch - -Annahme: { −1, 1 } ist offen. -Dann gibt es T ⊆ B, sodass ⋃M∈T M = { −1, 1 }. Aber alle U ∈ B haben unendlich viele -Elemente. Auch endlich viele Schnitte von Elementen in B haben unendlich viele Elemente -⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒ {−1, 1 } ist -nicht offen. � - -Teilaufgabe c) - -Beh.: Es gibt unendlich viele Primzahlen. - - - -101 Lösungen der Übungsaufgaben - -Bew.: durch Widerspruch - -Annahme: Es gibt nur endlich viele Primzahlen p ∈ P -Dann ist - -Z \ { −1,+1 } FS d. Arithmetik= -⋃ -p∈P - -U0,p - -endlich. Das ist ein Widerspruch zu |Z| ist unendlich und | { −1, 1 } | ist endlich. � - -Lösung zu Aufgabe 3 - -(a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form∏ -j∈J - -Uj × -∏ - -i∈N,i 6=j -Pi - -wobei J ⊆ N endlich und Uj ⊆ Pj offen ist. - -Beweis: Nach Definition der Produkttopologie bilden Mengen der Form∏ -i∈J - -Uj × -∏ -i∈N\J - -Pi - -wobei J ⊆ N endlich und Uj ⊆ Pj offen ∀j ∈ J eine Basis der Topologie. -Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen -Form. � - -(b) Beh.: Die Zusammenhangskomponenten von P sind alle einpunktig. - -Beweis: Es seinen x, y ∈ P und x sowie y liegen in der gleichen Zusammenhangs- -komponente Z ⊆ P . Da Z zusammenhängend ist und ∀i ∈ I : pi : P → Pi ist -stetig, ist pi(Z) ⊆ Pi zusammenhängend für alle i ∈ N. Die zusammenhängenden -Mengen von Pi sind genau { 0 } und { 1 }, d. h. für alle i ∈ N gilt entweder -pi(Z) ⊆ { 0 } oder pi(Z) ⊆ { 1 }. Es sei zi ∈ { 0, 1 } so, dass pi(Z) ⊆ { zi } für -alle i ∈ N. Dann gilt also: - -pi(x)︸ ︷︷ ︸ -=xi - -= zi = pi(y)︸ ︷︷ ︸ -=yi - -∀i ∈ N - -Somit folgt: x = y � - -Lösung zu Aufgabe 4 - -(a) Beh.: GLn(R) ist nicht kompakt. -Bew.: det : GLn(R) → R \ { 0 } ist stetig. Außerdem ist det(GLn(R)) = R \ { 0 } -nicht kompakt. 22⇒ GLn(R) ist nicht kompakt. � - -(b) Beh.: SL1(R) ist nicht kompakt, für n > 1 ist SLn(R) kompakt. -Bew.: Für SL1(R) gilt: SL1(R) = - -{ -A ∈ R1×1 - -∣∣ detA = 1 } = (1) ∼= { 1 }. 22⇒ SL1(R) -ist kompakt. - - - -102 Lösungen der Übungsaufgaben - -SLn(R) ⊆ GLn(R) lässt sich mit einer Teilmenge des Rn2 identifizieren. Nach Satz 1.1 -sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Definiere -nun für für n ∈ N≥2,m ∈ N: - -Am = diagn(m, -1 - -m -, . . . , 1) - -Dann gilt: detAm = 1, d. h. Am ∈ SLn(R), und Am ist unbeschränkt, da ‖Am‖∞ = -m −−−−→ - -m→∞ -∞. � - -(c) Beh.: P(R) ist kompakt. -Bew.: P(R) ∼= Sn/x∼−x. Per Definition der Quotiententopologie ist die Klassenabbil- -dung stetig. Da Sn als abgeschlossene und beschränkte Teilmenge des Rn+1 kompakt -ist 22⇒ P(R) ist kompakt. � - -Lösung zu Aufgabe 5 - -Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden. -Definition 79 - -Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G→ H eine Abbildung. -ϕ heißt Homomorphismus, wenn - -∀g1, g2 ∈ G : ϕ(g1 ∗ g2) = ϕ(g1) ◦ ϕ(g2) -gilt. - -Es folgt direkt: - -1) Sei X = R mit der Standarttopologie und ϕ1 : idR und R = (R,+). Dann ist ϕ1 ein -Gruppenhomomorphismus und ein Homöomorphismus. - -2) Sei G = (Z,+) und H = (Z/3Z,+). Dann ist ϕ2 : G → H,x 7→ x mod 3 ein -Gruppenhomomorphismus. Jedoch ist ϕ2 nicht injektiv, also sicher kein Homöomor- -phismus. - -3) Sei X ein topologischer Raum. Dann ist idX ein Homöomorphismus. Da keine -Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Grup- -penhomomorphismus. - -Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten -verwendet. - -Lösung zu Aufgabe 6 - -Die Definition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf -Seite 6. -Definition 80 - -Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G→ H eine Abbildung. -ϕ heißt Isomorphismus, wenn ϕ ein bijektiver Homomorphismus ist. - -Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen -Sinn und ein Isomorphismus benötigt eine Gruppenstruktur. - - - -103 Lösungen der Übungsaufgaben - -Lösung zu Aufgabe 7 - -(a) Vor.: Sei M eine topologische Mannigfaltigkeit. -Beh.: M ist wegzusammehängend ⇔M ist zusammenhängend - -Beweis: „⇒“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung -direkt aus Bemerkung 23. - -„⇐“: Seien x, y ∈M und - -Z := { z ∈M | ∃Weg von x nach z } - -Es gilt: - -(i) Z 6= ∅, da M lokal wegzusammenhängend ist -(ii) Z ist offen, da M lokal wegzusammenhängend ist - -(iii) ZC := { z̃ ∈M | @Weg von x nach z̃ } ist offen -Da M eine Mannigfaltigkeit ist, existiert zu jedem z̃ ∈ ZC eine offene und -wegzusammenhängende Umgebung Uz̃ ⊆M . -Es gilt sogar Uz̃ ⊆ ZC , denn gäbe es ein Uz̃ 3 z ∈ Z, so gäbe es Wege γ2 : -[0, 1] → M,γ2(0) = z, γ2(1) = x und γ1 : [0, 1] → M,γ1(0) = z̃, γ1(1) = z. -Dann wäre aber - -γ : [0, 1]→M, - -γ(x) = - -{ -γ1(2x) falls 0 ≤ x ≤ 12 -γ2(2x− 1) falls 12 < x ≤ 1 - -ein stetiger Weg von z̃ nach x ⇒ Widerspruch. -DaM zusammenhängend ist undM = Z︸︷︷︸ - -offen - -∪ ZC︸︷︷︸ -offen - -, sowie Z 6= ∅ folgt ZC = ∅. - -Also ist M = Z wegzusammenhängend. � - -(b) Beh.: X ist wegzusammenhängend. - -Beweis: X := (R \ { 0 }) ∪ { 01, 02 } und (R \ { 0 }) ∪ { 02 } sind homöomorph zu R. -Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte -01 und 02. - -Da (R\{ 0 })∪{ 01 } homöomorph zu R ist, exisitert ein Weg γ1 von 01 zu einem -beliebigen Punkt a ∈ R \ { 0 }. -Da (R \ { 0 }) ∪ { 02 } ebenfalls homöomorph zu R ist, existiert außerdem ein -Weg γ2 von a nach 02. Damit existiert ein (nicht einfacher) Weg γ von 01 nach -02. � - -Lösung zu Aufgabe 9 - -Vor.: Sei (X, d) eine absolute Ebene, A,B,C ∈ X und 4ABC ein Dreieck. - - - -104 Lösungen der Übungsaufgaben - -(a) Beh.: AB ∼= AC ⇒ ∠ABC ∼= ∠ACB -Bew.: Sei AB ∼= AC. -⇒ ∃ Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A. -⇒ ϕ(∠ABC) = ∠ACB -⇒ ∠ABC ∼= ∠ACB � - -(b) Beh.: Der längeren Seite von 4ABC liegt der größere Winkel gegenüber und umge- -kehrt. -Bew.: Sei d(A,C) > d(A,B). Nach §3 (i) gibt es C ′ ∈ AC+ mit d(A,C ′) = d(A,B) -⇒ C ′ liegt zwischen A und C. -Es gilt ]ABC ′ < ]ABC und aus Aufgabe 9 (a) folgt: ]ABC ′ = ]AC ′B. -∠BC ′A ist ein nicht anliegender Außenwinkel zu ∠BCA Bem. 66=====⇒ ]BC ′A > ]BCA -⇒ ]BCA < ]BC ′A = ]ABC ′ < ]ABC Sei umgekehrt ]ABC > ]BCA, kann -wegen 1. Teil von Aufgabe 9 (b) nicht d(A,B) > d(A,C) gelten. -Wegen Aufgabe 9 (a) kann nicht d(A,B) = d(A,C) gelten. -⇒ d(A,B) < d(A,C) � - -(c) Vor.: Sei g eine Gerade, P ∈ X und P /∈ g -Beh.: ∃! Lot -Bew.: ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g. ϕ vertauscht die beiden -Halbebenen bzgl. g. -⇒ ϕ(P )P schneidet g in F . -Es gibt eine Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g -⇒ ϕ(P )P schneidet g in F . -Sei A ∈ g\{ F }. Dann gilt ϕ(∠AFP ) = ∠AFϕ(P ) = π ⇒ ∠AFP ist rechter Winkel. -Gäbe es nun G ∈ g \ { F }, so dass PG weiteres Lot von P auf g ist, wäre 4PFG -ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4). - -· -· - -A - -G - -P - -F - -g - -Abbildung 5.4: Zwei Lote zu einer Geraden g durch einen Punkt P - -Nach Folgerung 4.4 ist die Summe von zwei Innenwinkeln immer < π -⇒ G gibt es nicht. � - -Lösung zu Aufgabe 10 - -Sei f ‖ h und o. B. d. A. f ‖ g. -f ∦ h⇒ f ∩ h 6= ∅, sei also x ∈ f ∩ h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele -zu g durch x, da x /∈ g. Diese ist f , da x ∈ f und f ‖ g. Da aber x ∈ h, kann h nicht - - - -105 Lösungen der Übungsaufgaben - -parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f 6= h).⇒ g ∦ h � - -Lösung zu Aufgabe 11 - -Sei (X, d,G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem 4ABC und 4A′B′C ′ -Dreiecke, für die gilt: - -d(A,B) = d(A′, B′) - -d(A,C) = d(A′, C ′) - -d(B,C) = d(B′, C ′) - -Sei ϕ die Isometrie mit ϕ(A) = A′, ϕ(B) = B′ und ϕ(C ′) liegt in der selben Halbebene -bzgl. AB wie C. Diese Isometrie existiert wegen §4. - -Es gilt d(A,C) = d(A′, C ′) = d(ϕ(A′), ϕ(C ′)) = d(A,ϕ(C ′)) und d(B,C) = d(B′, C ′) = -d(ϕ(B′), ϕ(C ′)) = d(B,ϕ(C ′)). -Bem. 62 -=====⇒ C = ϕ(C). -Es gilt also ϕ(4A′B′C ′) = 4ABC. � - - - -Bildquellen - -Alle Bilder, die hier nicht aufgeführt sind, wurden von Martin Thoma erstellt. - -Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert. - -Abb. 0.1a S2: Tom Bombadil, tex.stackexchange.com/a/42865 - -Abb. 0.1b Würfel: Jan Hlavacek, tex.stackexchange.com/a/12069 - -Abb. 0.1e T 2: Jake, tex.stackexchange.com/a/70979/5645 - -Abb. 1.6 Stereographische Projektion: texample.net/tikz/examples/map-projections - -Abb. 1.11 Knoten von Jim.belk aus der „Blue knots“-Serie: - -– Trivialer Knoten: commons.wikimedia.org/wiki/File:Blue_Unknot.png - -– Kleeblattknoten: commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png - -– Achterknoten: commons.wikimedia.org/wiki/File:Blue_Figure-Eight_Knot.png - -– 62-Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png - -Abb. 1.12 Reidemeister-Züge: YAMASHITA Makoto (1, 2, 3) - -Abb. 1.13 Kleeblattknoten, 3-Färbung: Jim.belk, commons.wikimedia.org/wiki/File:Tricoloring. -png - -Abb. 2.1 Doppeltorus: Oleg Alexandrov, commons.wikimedia.org/wiki/File:Double\_torus\_illustration. -png - -Abb. 2.8 Faltungsdiagramm: Jérôme Urhausen, Email vom 11.02.2014. - -Abb. 3.3b 3 Pfade auf Torus: Charles Staats, tex.stackexchange.com/a/149991/5645 - -Abb. 3.10 Überlagerung von S1 mit R: Alex, tex.stackexchange.com/a/149706/5645 - -Abb. 4.7a Sphärisches Dreieck: Dominique Toussaint, -commons.wikimedia.org/wiki/File:Spherical_triangle_3d_opti.png - -Abb. 5.1 Möbiusband: Jake, tex.stackexchange.com/a/118573/5645 - -Abb. 5.3 Krümmung des Torus: Charles Staats, tex.stackexchange.com/a/149991/5645 - -http://tex.stackexchange.com/a/42865/5645 -http://tex.stackexchange.com/a/12069/5645 -http://tex.stackexchange.com/a/70979/5645 -http://texample.net/tikz/examples/map-projections/ -https://commons.wikimedia.org/wiki/Category:Blue_knots -https://commons.wikimedia.org/wiki/File:Blue_Unknot.png -https://commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png -https://commons.wikimedia.org/wiki/File:Blue_Figure-Eight_Knot.png -https://commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png -https://commons.wikimedia.org/wiki/File:Reidemeister_move_1.png -https://commons.wikimedia.org/wiki/File:Reidemeister_move_1.png -https://commons.wikimedia.org/wiki/File:Reidemeister_move_1.png -https://commons.wikimedia.org/wiki/File:Tricoloring.png -https://commons.wikimedia.org/wiki/File:Tricoloring.png -https://commons.wikimedia.org/wiki/File:Double_torus_illustration.png -https://commons.wikimedia.org/wiki/File:Double_torus_illustration.png -http://tex.stackexchange.com/users/484/charles-staats -http://tex.stackexchange.com/a/149991/5645 -http://tex.stackexchange.com/users/22467/alex -http://tex.stackexchange.com/a/149706/5645 -https://commons.wikimedia.org/wiki/User:DemonDeLuxe -https://commons.wikimedia.org/wiki/File:Spherical_triangle_3d_opti.png -http://tex.stackexchange.com/users/2552/jake -http://tex.stackexchange.com/a/118573/5645 -http://tex.stackexchange.com/users/484/charles-staats -http://tex.stackexchange.com/a/149991/5645 - - -Abkürzungsverzeichnis - -Beh. Behauptung - -Bew. Beweis - -bzgl. bezüglich - -bzw. beziehungsweise - -ca. circa - -d. h. das heißt - -Def. Definition - -etc. et cetera - -ex. existieren - -Hom. Homomorphismus - -o. B. d. A. ohne Beschränkung der Allgemeinheit - -Prop. Proposition - -sog. sogenannte - -Vor. Voraussetzung - -vgl. vergleiche - -z. B. zum Beispiel - -zhgd. zusammenhängend - -z. z. zu zeigen - - - -Ergänzende Definitionen und Sätze - -Da dieses Skript in die Geometrie und Topologie einführen soll, sollten soweit wie möglich alle -benötigten Begriffe definiert und erklärt werden. Die folgenden Begriffe wurden zwar verwendet, -aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ sowie „Lineare Algebra -und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen. -Definition 81 - -Sei D ⊆ R und x0 ∈ R. x0 heißt ein Häufungspunkt von D :⇔ ∃ Folge xn in D \ { x0 } -mit xn → x0. - -Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra -entnommen: -Definition 82 - -Es seien V und W K-Vektorräume und A(V ) und A(W ) die zugehörigen affinen Räume. -Eine Abbildung f : V →W heißt affin, falls für alle a, b ∈ V und alle λ, µ ∈ K mit λ+µ = 1 -gilt: - -f(λa+ µb) = λf(a) + µf(b) - -Definition 83 -Sei V ein Vektorraum und S ⊆ V eine Teilmenge. -S heißt eine Orthonormalbasis von V , wenn gilt: - -(i) S ist eine Basis von V - -(ii) ∀v ∈ S : ‖v‖ = 1 -(iii) ∀v1, v2 ∈ S : v1 6= v2 ⇒ 〈v1, v2〉 = 0 - -Satz (Zwischenwertsatz) -Sei a < b und f ∈ C[a, b] := C([a, b]), weiter sei y0 ∈ R und f(a) < y0 < f(b) oder -f(b) < y0 < f(a). Dann existiert ein x0 ∈ [a, b] mit f(x0) = y0. - -Definition 84 -Sei V ein Vektorraum über einem Körper K und f : V → V eine lineare Abbildung. -v ∈ V \ { 0 } heißt Eigenvektor :⇔ ∃λ ∈ K : f(v) = λv. -Wenn ein solches λ ∈ K existiert, heißt es Eigenwert von f . - -Satz (Binomischer Lehrsatz) -Sei x, y ∈ R. Dann gilt: - -(x+ y)n = - -n∑ -k=0 - -( -n - -k - -) -xn−kyk ∀n ∈ N0 - -Definition 85 -Seien a, b ∈ R3 Vektoren. - -a× b := - -a1b3 -a3 - -× -a1b3 -a3 - - = -a2b3 − a3b2a3b1 − a1b3 -a1b2 − a2b1 - - - - - -Symbolverzeichnis - -Mengenoperationen - -Seien A,B und M Mengen. - -AC Komplement von A -P(M) Potenzmenge von M -M Abschluss von M -∂M Rand der Menge M -M◦ Inneres der Menge M -A×B Kreuzprodukt -A ⊆ B Teilmengenbeziehung -A ( B echte Teilmengenbeziehung -A \B Differenzmenge -A ∪B Vereinigung -A ∪̇B Disjunkte Vereinigung -A ∩B Schnitt - -Geometrie - -AB Gerade durch die Punkte A und -B - -AB Strecke mit Endpunkten A und B -4ABC Dreieck mit Eckpunkten A,B,C -AB ∼= CD Die Strecken AB und CD sind - -isometrisch -|K| Geometrische Realisierung des - -Simplizialkomplexes K - -Gruppen - -Sei X ein topologischer Raum und K ein Kör- -per. - -Homöo(X) Homöomorphismengruppe -Iso(X) Isometriengruppe -GLn(K) Allgemeine lineare Gruppe (von - -General Linear Group) -SLn(K) Spezielle lineare Gruppe -PSLn(K) Projektive lineare Gruppe - -Perm(X) Permutationsgruppe -Sym(X) Symmetrische Gruppe - -Wege - -Sei γ : I → X ein Weg. -[γ] Homotopieklasse von γ -γ1 ∗ γ2 Zusammenhängen von Wegen -γ1 ∼ γ2 Homotopie von Wegen -γ(x) Inverser Weg, also γ(x) := γ(1− x) -C Bild eines Weges γ, also C := - -γ([0, 1]) - -Weiteres - -B Basis einer Topologie -Bδ(x) δ-Kugel um x -S Subbasis einer Topologie -T Topologie - -A Atlas -P Projektiver Raum -〈·, ·〉 Skalarprodukt -X/∼ X modulo ∼ -[x]∼ Äquivalenzklassen von x bzgl. ∼ -‖x‖ Norm von x -|x| Betrag von x -〈a〉 Erzeugnis von a -Sn Sphäre -Tn Torus - -f ◦ g Verkettung von f und g -πX Projektion auf X -f |U f eingeschränkt auf U -f−1(M) Urbild von M -Rg(M) Rang von M -χ(K) Euler-Charakteristik von K - - - -110 Symbolverzeichnis - -∆k Standard-Simplex -X#Y Verklebung von X und Y -dn Lineare Abbildung aus Bemer- - -kung 37 -A ∼= B A ist isometrisch zu B -f∗ Abbildung zwischen Fundamental- - -gruppen (vgl. Seite 49) - - - -111 Symbolverzeichnis - -Zahlenmengen - -N = { 1, 2, 3, . . . } Natürliche Zahlen -Z = N ∪ { 0,−1,−2, . . . } Ganze Zahlen -Q = Z ∪ - -{ -1 -2 , - -1 -3 , - -2 -3 - -} -= -{ -z -n mit z ∈ Z und n ∈ Z \ { 0 } - -} -Rationale Zahlen - -R = Q ∪ -{√ - -2,− 3 -√ - -3, . . . -} - -Reele Zahlen -R+ Echt positive reele Zahlen -Rn+,0 := { (x1, . . . , xn) ∈ Rn | xn ≥ 0 } Halbraum -R× = R \ { 0 } Einheitengruppe von R -C = { a+ ib | a, b ∈ R } Komplexe Zahlen -P = { 2, 3, 5, 7, . . . } Primzahlen -H = { z ∈ C | =z > 0 } obere Halbebene -I = [0, 1] ( R Einheitsintervall - -f : S1 ↪→ R2 Einbettung der Kreislinie in die Ebene -π1(X,x) Fundamentalgruppe im topologischen Raum X um x ∈ X -Fix(f) Menge der Fixpunkte der Abbildung f -‖ · ‖2 2-Norm; Euklidische Norm -κ Krümmung -κNor Normalenkrümmung -V (f) Nullstellenmenge von f2 - -Krümmung - -DpF : R2 → R3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89) -TsS Tangentialebene an S ⊆ R3 durch s ∈ S -dsn(x) Weingarten-Abbildung - -2von Vanishing Set - - - -Stichwortverzeichnis - -Abbildung -affine, 107 -differenzierbare, 29 -homotope, 50 -offene, 53 -simpliziale, 35 -stetige, 9 - -Abschluss, 3 -Abstand, 86 -Abstandsaxiom, 65 -Achterknoten, 20 -Aktion, siehe Gruppenoperation -Anordnungsaxiome, 66 -Atlas, 24 -Außenwinkel, 70 -Axiom, 64 -Axiomensystem, 64 - -Basis, 3 -Baum, 37 -Betti-Zahl, 41 -Bewegungsaxiom, 66 -Binormalenvektor, 89 - -Cantorsches Diskontinuum, 22 -Ck-Struktur, 29 - -Decktransformation, 59 -Decktransformationsgruppe, 59 -Deformationsretrakt, 47 -dicht, 3 -Diffeomorphismus, 29 -Dimension, 34 -diskret, 53 -Doppelverhältnis, 83 -Dreibein - -begleitendes, 89 - -Ebene -euklidische, 64 - -Eigenvektor, 107 -Eigenwert, 107 - -einfach zusammenhängend, 49 -Einheitsnormalenfeld, 90 -Euler-Charakteristik, siehe Eulerzahl -Eulersche Polyederformel, 38 -Eulerzahl, 36 - -Färbbarkeit, 21 -Faser, siehe Urbild -Fläche - -orientierbare, 90 -reguläre, 30 - -Flächenelement, 95 -Formoperator, siehe Weingarten-Abbildung -Fundamentalform - -erste, 94 -zweite, 97 - -Fundamentalgruppe, 47 - -Gauß-Krümmung, 92, 91–94 -Geometrie, 64 -Gerade, 64 - -hyperbolische, 77 -Graph, 37 -Grenzwert, 8 -Gruppe - -allgemeine lineare, 22, 26 -spezielle lineare, 22 -topologische, 33 - -Gruppe operiert durch Homöomorphismen, -61 - -Gruppenaktion, siehe Gruppenoperation -Gruppenoperation, 60, 60–63 - -stetige, 61 - -Häufungspunkt, 107 -Hülle - -konvexe, 34 -Halbebene, 66 -Halbgerade, 65 -Halbraum, 28 -Hauptkrümmung, 92 -Hilbert-Kurve, 19, 19 - - - -113 Stichwortverzeichnis - -Homöomorphismengruppe, 10 -Homöomorphismus, 9 -Homologiegruppe, 41 -Homomorphismus, 101 -Homotopie, 44 -Homotopieklasse, 47 - -Inklusionsabbildung, 47 -Innenwinkel, 70 -Inneres, 3 -Inzidenzaxiome, 64 -Isometrie, 6, 10 -Isometriegruppe, 10 -Isomorphismus, 101 -Isotopie, 20 - -Jordankurve, 19 -geschlossene, 19 - -Karte, 24 -Kartenwechsel, 28 -Kern - -offener, 3 -Kleeblattknoten, 20 -Klumpentopologie, siehe triviale Topologie -Knoten, 20, 17–21 - -äquivalente, 20 -trivialer, 20 - -Knotendiagramm, 20 -kollinear, 65 -kongruent, siehe isometrisch -Kongruenz, siehe Isometrie -Kongruenzsatz - -SSS, 104 -SWS, 69 -SWW, 74 -WSW, 70 - -Krümmung, 88, 89 -Kreis, 37 -Kreuzprodukt, 107 -Kurve, 87 - -Länge einer, 87 - -Lage -allgemeine, 34 - -Lehrsatz -Binomischer, 107 - -Lie-Gruppe, 33 -liegt zwischen, 65 -Liftung, 54 -Limes, 8 - -lokal, 3 -Lot, 86 -Lotfußpunkt, 86 - -Möbiusband, 91 -Möbiustransformation, 80 -Mannigfaltigkeit, 24 - -differenzierbare, 29 -geschlossene, 25 -glatte, 29 -mit Rand, 28 - -Menge -abgeschlossene, 2 -offene, 2 -zusammenhängende, 11 - -Metrik, 6 -diskrete, 6 -hyperbolische, 84 -SNCF, 8 - -Nebenwinkel, 86 -Neilsche Parabel, 27 -Normalenfeld, 90 -Normalenvektor, 87, 89 -Normalkrümmung, 91, 92, 98 - -Oktaeder, 34 -Orthonormalbasis, 107 - -Paraboloid -hyperbolisches, 92 - -Parallele, 66 -Parallelenaxiom, 64 -parametrisiert - -durch Bogenlänge, 87 -Parametrisierung - -reguläre, 30 -Polyzylinder, 17 -Produkttopologie, 4 -Projektion - -stereographische, 11 -Punkt, 34 - -Quotiententopologie, 5, 10, 11 - -Rand, 3, 28 -Raum - -hausdorffscher, 8 -kompakter, 14 -metrischer, 6 -projektiver, 5, 22, 25, 52 - - - -114 Stichwortverzeichnis - -topologischer, 2 -zusammenhängender, 11 - -Realisierung -geometrische, 34 - -Retraktion, 47 - -Satz von -Gauß-Bonnet, 98 - -Scheitelwinkel, 86 -Seite, 34 -Sierpińskiraum, 3, 22 -Simplex, 34 -Simplizialkomplex, 34 -Simplizialkomplexe - -flächengleiche, 74 -Sphäre - -exotische, 29 -Standard-Simplex, 34 -Standardtopologie, 2 -sternförmig, 48 -Stetigkeit, 9–11 -Strecke, 65 -Struktur - -differenzierbare, 29 -Subbasis, 3 - -Tangentialebene, 89, 89–90 -Teilraum, 4 -Teilraumtopologie, 4 -Teilsimplex, 34 -Topologie - -diskrete, 2, 6 -euklidische, 2 -feinste, 11 -triviale, 2 -Zariski, 2, 12, 15 - -Torus, iii, 5, 38, 51, 93 -Total Unzusammenhängend, 100 -Triangulierung, 38 - -Überdeckung, 14 -Übergangsfunktion, siehe Kartenwechsel -Überlagerung, 51, 51–60 - -reguläre, 59 -universelle, 57 - -Umgebung, 3 -Umgebungsbasis, 58 - -vanishing set, 26 -Vektorprodukt, siehe Kreuzprodukt -Verklebung, 26 - -verträglich, 29 - -Würfel, 34 -Weg, 17 - -einfacher, 17 -geschlossener, 17 -homotope, 44 -inverser, 48 -zusammengesetzter, 46 - -Wegzusammenhang, 18 -Weingarten-Abbildung, 95 -Winkel, 70 - -Zusammenhang, 11–14 -Zusammenhangskomponente, 13 -Zwischenwertsatz, 107 - - - 1 Topologische Grundbegriffe - 1.1 Topologische Räume - 1.2 Metrische Räume - 1.3 Stetigkeit - 1.4 Zusammenhang - 1.5 Kompaktheit - 1.6 Wege und Knoten - Übungsaufgaben - - 2 Mannigfaltigkeiten und Simplizialkomplexe - 2.1 Topologische Mannigfaltigkeiten - 2.2 Differenzierbare Mannigfaltigkeiten - 2.3 Simplizialkomplex - Übungsaufgaben - - 3 Fundamentalgruppe und Überlagerungen - 3.1 Homotopie von Wegen - 3.2 Fundamentalgruppe - 3.3 Überlagerungen - 3.4 Gruppenoperationen - - 4 Euklidische und nichteuklidische Geometrie - 4.1 Axiome für die euklidische Ebene - 4.2 Weitere Eigenschaften einer euklidischen Ebene - 4.2.1 Flächeninhalt - - 4.3 Hyperbolische Geometrie - Übungsaufgaben - - 5 Krümmung - 5.1 Krümmung von Kurven - 5.2 Tangentialebene - 5.3 Gauß-Krümmung - 5.4 Erste und zweite Fundamentalform - - Lösungen der Übungsaufgaben - Bildquellen - Abkürzungsverzeichnis - Ergänzende Definitionen und Sätze - Symbolverzeichnis - Stichwortverzeichnis - +[[[Tika text extraction failed!]]] \ No newline at end of file From f624925826dbcdaac22f70a08417a6ef17897db2 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 19 Feb 2025 22:21:06 -0500 Subject: [PATCH 09/18] fix: use playa main branch with xobject fix --- README.md | 14 +- cache.json | 152 +++++------ read/results/playa/1602.06541.txt | 9 +- read/results/playa/2201.00029.txt | 374 +++++++++++++++++++++++++++- read/results/playa/2201.00037.txt | 167 ++++++++++++- read/results/playa/GeoTopo-book.txt | 41 ++- 6 files changed, 650 insertions(+), 107 deletions(-) diff --git a/README.md b/README.md index 01fa509..53550b0 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | pdfplumber | 2023-07-29 | MIT | git+https://github.com/dhdaines/pdfplumber | pdfminer.six | | pdfrw | 2017-09-18 | MIT | 0.4 | | | pdftotext | - | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | -| playa | 2025-02-18 | MIT | 0.2.10 | | +| playa | 2025-02-18 | MIT | 0.3.0rc1.dev36+gbff14d4.d20250220 | | | PyMuPDF | 2023-08-24 | GNU AFFERO GPL 3.0 / Commerical | 1.25.3 | MuPDF | | pypdf | 2023-08-26 | BSD 3-Clause | 5.3.0 | | | Tika | 2023-01-01 | Apache v2 | 2.6.0 | Apache Tika | @@ -45,7 +45,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 1.3s | 0.4s | 0.7s | 0.3s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.0s | | 3 | [pdftotext ](https://poppler.freedesktop.org/) | 0.3s | 1.0s | 1.1s | 0.3s | 0.8s | 0.1s | 0.3s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | | 4 | [pypdf ](https://pypi.org/project/pypdf/) | 4.1s | 28.7s | 8.1s | 8.1s | 3.9s | 1.2s | 2.0s | 0.8s | 1.0s | 0.8s | 1.0s | 0.9s | 0.8s | 0.6s | 0.4s | -| 5 | [playa ](https://pypi.org/project/playa-pdf/) | 4.3s | 33.4s | 7.9s | 8.2s | 3.6s | 0.6s | 1.5s | 0.9s | 0.9s | 0.6s | 1.0s | 0.4s | 0.8s | 0.0s | 0.3s | +| 5 | [playa ](https://pypi.org/project/playa-pdf/) | 4.4s | 32.7s | 8.8s | 8.2s | 3.7s | 1.1s | 1.8s | 0.9s | 0.8s | 0.6s | 1.1s | 0.8s | 0.8s | 0.7s | 0.3s | | 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 9.0s | 55.9s | 23.7s | 16.8s | 8.9s | 2.3s | 4.0s | 1.8s | 2.2s | 1.5s | 2.7s | 1.8s | 2.0s | 1.1s | 0.9s | | 7 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 12.6s | 84.0s | 20.8s | 23.3s | 13.6s | 3.8s | 6.8s | 3.4s | 3.1s | 2.8s | 4.4s | 3.1s | 3.5s | 1.8s | 1.7s | | 8 | [Tika ](https://pypi.org/project/tika/) | 24.4s | 17.8s | 100.1s | 0.6s | 23.4s | 47.3s | 48.3s | 31.5s | 34.5s | 0.1s | 13.2s | 0.1s | 24.2s | 0.1s | 0.1s | @@ -66,7 +66,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :--------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.1s | 0.1s | 0.5s | 0.1s | 0.4s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.0s | +| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.1s | 0.1s | 0.5s | 0.1s | 0.4s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.1s | | 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 0.5s | 0.7s | 0.2s | 0.5s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | | 3 | [pypdf ](https://pypi.org/project/pypdf/) | 0.6s | 0.7s | 2.3s | 0.5s | 1.7s | 0.3s | 0.4s | 0.5s | 0.4s | 0.2s | 0.5s | 0.2s | 0.6s | 0.1s | 0.1s | @@ -86,9 +86,9 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | 1 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 97% | 99% | 97% | 94% | 99% | 98% | 96% | 99% | 99% | 99% | 99% | 98% | 78% | 99% | 99% | | 2 | [pypdf ](https://pypi.org/project/pypdf/) | 96% | 99% | 95% | 93% | 98% | 99% | 96% | 97% | 99% | 99% | 99% | 99% | 78% | 100% | 99% | | 3 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 96% | 98% | 96% | 93% | 97% | 98% | 95% | 99% | 98% | 98% | 98% | 97% | 77% | 98% | 99% | -| 4 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 93% | 96% | 89% | 89% | 98% | 92% | 94% | 93% | 95% | 93% | 97% | 94% | 76% | 99% | 98% | -| 5 | [pdftotext ](https://poppler.freedesktop.org/) | 92% | 96% | 94% | 91% | 95% | 92% | 96% | 96% | 96% | 97% | 83% | 94% | 77% | 96% | 79% | -| 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 89% | 95% | 79% | 86% | 92% | 86% | 93% | 95% | 93% | 92% | 92% | 93% | 71% | 98% | 86% | -| 7 | [playa ](https://pypi.org/project/playa-pdf/) | 88% | 98% | 93% | 92% | 98% | 97% | 95% | 97% | 96% | 98% | 98% | 97% | 77% | 0% | 99% | +| 4 | [playa ](https://pypi.org/project/playa-pdf/) | 95% | 98% | 93% | 92% | 98% | 97% | 94% | 97% | 96% | 98% | 98% | 97% | 77% | 94% | 99% | +| 5 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 93% | 96% | 89% | 89% | 98% | 92% | 94% | 93% | 95% | 93% | 97% | 94% | 76% | 99% | 98% | +| 6 | [pdftotext ](https://poppler.freedesktop.org/) | 92% | 96% | 94% | 91% | 95% | 92% | 96% | 96% | 96% | 97% | 83% | 94% | 77% | 96% | 79% | +| 7 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 89% | 95% | 79% | 86% | 92% | 86% | 93% | 95% | 93% | 92% | 92% | 93% | 71% | 98% | 86% | | 8 | [Tika ](https://pypi.org/project/tika/) | 83% | 99% | 0% | 92% | 95% | 77% | 86% | 81% | 82% | 98% | 88% | 98% | 67% | 98% | 96% | | 9 | [Borb ](https://pypi.org/project/borb/) | 45% | 70% | 79% | 0% | 40% | 48% | 92% | 0% | 64% | 51% | 41% | 55% | 41% | 0% | 53% | diff --git a/cache.json b/cache.json index e8bef14..1636790 100644 --- a/cache.json +++ b/cache.json @@ -206,46 +206,46 @@ }, "pdfrw": { "2201.00214": { - "watermark": 0.06678032875061035 + "watermark": 0.06585884094238281 }, "GeoTopo-book": { - "watermark": 0.5417091846466064 + "watermark": 0.5485935211181641 }, "2201.00151": { - "watermark": 0.05851268768310547 + "watermark": 0.05677437782287598 }, "1707.09725": { - "watermark": 0.4045860767364502 + "watermark": 0.40639281272888184 }, "2201.00021": { - "watermark": 0.11277961730957031 + "watermark": 0.11320853233337402 }, "2201.00037": { - "watermark": 0.0764768123626709 + "watermark": 0.07631802558898926 }, "2201.00069": { - "watermark": 0.13684296607971191 + "watermark": 0.14584708213806152 }, "2201.00178": { - "watermark": 0.11200451850891113 + "watermark": 0.11374044418334961 }, "2201.00201": { - "watermark": 0.07941722869873047 + "watermark": 0.08190035820007324 }, "1602.06541": { - "watermark": 0.13727259635925293 + "watermark": 0.13877344131469727 }, "2201.00200": { - "watermark": 0.06008315086364746 + "watermark": 0.05585813522338867 }, "2201.00022": { - "watermark": 0.15844249725341797 + "watermark": 0.15651655197143555 }, "2201.00029": { - "watermark": 0.017334461212158203 + "watermark": 0.01755547523498535 }, "1601.03642": { - "watermark": 0.04838132858276367 + "watermark": 0.05137896537780762 } }, "pdftotext": { @@ -292,50 +292,6 @@ "read": 0.05305290222167969 } }, - "playa": { - "2201.00214": { - "read": 33.35531687736511 - }, - "GeoTopo-book": { - "read": 7.86867094039917 - }, - "2201.00151": { - "read": 8.23330020904541 - }, - "1707.09725": { - "read": 3.6483688354492188 - }, - "2201.00021": { - "read": 0.6167638301849365 - }, - "2201.00037": { - "read": 1.4718003273010254 - }, - "2201.00069": { - "read": 0.8515617847442627 - }, - "2201.00178": { - "read": 0.8537464141845703 - }, - "2201.00201": { - "read": 0.5607175827026367 - }, - "1602.06541": { - "read": 0.9524991512298584 - }, - "2201.00200": { - "read": 0.39577698707580566 - }, - "2201.00022": { - "read": 0.7865602970123291 - }, - "2201.00029": { - "read": 0.028035879135131836 - }, - "1601.03642": { - "read": 0.32507753372192383 - } - }, "pymupdf": { "2201.00214": { "read": 1.2650783061981201, @@ -523,6 +479,50 @@ "1601.03642": { "read": 0.07596778869628906 } + }, + "playa": { + "2201.00214": { + "read": 32.66150975227356 + }, + "GeoTopo-book": { + "read": 8.839988231658936 + }, + "2201.00151": { + "read": 8.197897672653198 + }, + "1707.09725": { + "read": 3.6952500343322754 + }, + "2201.00021": { + "read": 1.0743937492370605 + }, + "2201.00037": { + "read": 1.8004868030548096 + }, + "2201.00069": { + "read": 0.8580679893493652 + }, + "2201.00178": { + "read": 0.8492274284362793 + }, + "2201.00201": { + "read": 0.5559432506561279 + }, + "1602.06541": { + "read": 1.0579159259796143 + }, + "2201.00200": { + "read": 0.8115437030792236 + }, + "2201.00022": { + "read": 0.769277811050415 + }, + "2201.00029": { + "read": 0.6836247444152832 + }, + "1601.03642": { + "read": 0.3228261470794678 + } } }, "read_quality": { @@ -607,22 +607,6 @@ "2201.00029": 0.9649219467401285, "1601.03642": 0.7867700010287713 }, - "playa": { - "2201.00214": 0.9761700404077421, - "GeoTopo-book": 0.927598899820742, - "2201.00151": 0.9222810491856283, - "1707.09725": 0.9757090668337609, - "2201.00021": 0.9719382936299716, - "2201.00037": 0.9513322686391528, - "2201.00069": 0.9697201017811705, - "2201.00178": 0.960335879151019, - "2201.00201": 0.9768103792804297, - "1602.06541": 0.9822372862286228, - "2201.00200": 0.9697131992609057, - "2201.00022": 0.7673033675330817, - "2201.00029": 0.0014646649578908821, - "1601.03642": 0.9891916003293989 - }, "pymupdf": { "2201.00214": 0.9780473882293753, "GeoTopo-book": 0.957868684569868, @@ -670,6 +654,22 @@ "2201.00022": 0.6698799418093457, "2201.00029": 0.9828859664925239, "1601.03642": 0.9551993153165015 + }, + "playa": { + "2201.00214": 0.9761700404077421, + "GeoTopo-book": 0.9276858797705095, + "2201.00151": 0.9222810491856283, + "1707.09725": 0.9757090668337609, + "2201.00021": 0.9719382936299716, + "2201.00037": 0.9425468498406934, + "2201.00069": 0.9697201017811705, + "2201.00178": 0.960335879151019, + "2201.00201": 0.9768103792804297, + "1602.06541": 0.981389499715468, + "2201.00200": 0.9697131992609057, + "2201.00022": 0.7673033675330817, + "2201.00029": 0.9408845676697153, + "1601.03642": 0.9891916003293989 } }, "watermarking_result_file_size": { @@ -694,7 +694,6 @@ "1601.03642": 1026759.0 }, "pdftotext": {}, - "playa": {}, "pymupdf": { "2201.00214": 2716298.0, "GeoTopo-book": 6857999.0, @@ -727,6 +726,7 @@ "2201.00029": 830154.0, "1601.03642": 1014378.0 }, - "tika": {} + "tika": {}, + "playa": {} } } \ No newline at end of file diff --git a/read/results/playa/1602.06541.txt b/read/results/playa/1602.06541.txt index 9184247..2d9131e 100644 --- a/read/results/playa/1602.06541.txt +++ b/read/results/playa/1602.06541.txt @@ -401,7 +401,14 @@ images were labeled by anonymous untrained workers to which they refer to as knowledge workers (KWs). One crowd annotation was obtained for each image by a majority vote on a pixel basis of 10 segmentations -given by 10 different KWs. Figure 2: A typical segmentation pipeline gets raw +given by 10 different KWs. Training +Prediction + Post- +processingWindow-wise +ClassificationWindow +extraction Data +augmentation +Feature extraction Preprocessing Figure 2: A typical segmentation pipeline gets raw pixel data, applies preprocessing techniques like scaling and feature extraction like HOG features. For training, data augmentation diff --git a/read/results/playa/2201.00029.txt b/read/results/playa/2201.00029.txt index 1901ba9..8d989c2 100644 --- a/read/results/playa/2201.00029.txt +++ b/read/results/playa/2201.00029.txt @@ -1,12 +1,362 @@ - - - - - - - - - - - - + 1 + + + + + + +Exploring new techniques for analyzing variability in white dwarf KIC 8626021 +Thomas Huckans , Peter Stine +Department of Physics and Engineering , Bloomsburg University of Pennsylvania , 400 E 2 nd + St ., +Bloomsburg, PA 17815 + + 2 +Abst r act + + As is common with the collection of astronomical data, signals are frequently dominated +by noise. However, when performing FTs of light curves, re - binning data can improve the signal - +to - noise ratio ( SNR ) at lower frequencies. Using data collected from the K epler space telescope, +we sequentially re - binned data three times to investigate the SNR i mprovement of lower frequency +(< 1 7 µ Hz) variability in white dwarf KIC 8626021 . We fou nd that the SNR at approximately 5.8 +µ Hz greatly improved through this process, and we postulate that this frequen c y is linked to the +rotation of KIC 8626021. + + + Introduction + +First detected in 1862, white dwarfs long posed a mystery for early observ ers. When the +companion to Sirius was detected, apparent contradictions concerning the mass, luminosities, and +densities baffled astronomers. Lacking full understanding of atom ic structures and the energy +states of electrons, these early researchers believ ed white dwarfs to o dense to exist . However, new +discoveries at the turn of the 20 th + century explained the existence of these stars , and between the +world wars white dwarfs wer e increasingly studied and modeled (Holberg, 2009 ) . +As stars age, those that lack the mass to become neutron stars and black holes become +white dwarf stars, representing 98% of the stars in our galaxy (Winget & Kepler, 2008 ) . They are +composed of a core o f carbon and oxygen ions that slowly cools over billions of years, and the +light emanating from these star s is a result of thermal energy. White dwarf stars are no longer +supported against the force of gravity by fusion, so the stars collapse into an elect ron - degenerate +state where the electrons in the carbon and oxygen atoms occupy the lowest energy levels. As two +electrons cannot occupy the same quantum state, Pauli repulsion keeps white dwarfs from +collapsing entirely. +For many years, accurate detection of light variability in white dwarfs was difficult due to +a lack of adequate instruments. However , the launch of the Kepler space telescope in 2009 made +capturing the light of distant stars much more efficient and effective (Basri et al., 2010 ) . Kepler +was initially de veloped with the intention of surveying our region of the Milky Way galaxy in +order to find potentially habitable planets. The purpose of the mission was to identify key traits for +such planets by determining the number of planets in habitable zones, the s izes and shapes of orbits, +and the characteristics of the stars being orbited. Over the lifespan of its first mission, Kepler +observed approximately 1.5 x 10 5 + stars ( Johnson, 2018) , affording scientists excellent +opportunities to research stel lar variability . Due to the loss of a second reaction wheel in 2013, +NASA developed the K2 mission, a way to prolong Kepler’s assistance to astronomy and +astrophysics. +Utilizing Kepler’s ability to maintain three - dimensional control, NASA proceeded to use +the telescope to collect photometry data of certain sections of our galaxy, although the number of +targets was significantly reduced. In addition, the K2 mission was designed to be community - +oriented, with the scientific community having a n influence on th e fields observed and serving as +the analysts of the vast amounts of data being received ( Howell et al., 2014 ). Although Kepler was +deactivated in 2018, the data used in this paper came from observations during 2010 and 2012 of +white dwarf KIC 8626021 and was obtained from the Kepler Asteroseismic Science Operations +Center (KASOC). + 3 +The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon +previous studies, this research investigated novel techniques of analyzing variability in white +dw arfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on +the star, allowing for the validation of results using our methods. KIC 8626021 has an effective +temperature of 2 9,700 K, log g = 7.890, and mass of 0.56 M + ☉ (Córsico, 2020 ) . Other research +has found that this white dwarf is the DBV with the highest known temperature, and its helium +layer is the thinn est (Bischoff - K im et al., 2015). Despite the long - cadence light curve being too +noisy to draw many conclusions , other FTs of short - cadence data have been performed to find +variability in the dwarf. Analyses at high frequencies of KIC 8626021 yielded pulsations with +frequencies of 4309.89 µHz , 5073.26 µHz , 36 81.87 µHz , 3294.22 µHz and 2658.85 µ Hz +(Østensen et al., 2011 ). These fin dings confirm the classification of the white dwarf as a V777 +Herculis, although our research focuses on low frequencies using long - cadence data. + + + + Method s + +All data were downloaded from the KASOC database, and the long - cadence (data +sampled ap proximately every thirty minutes) measurements of Corrected F lux (ppm) were +analyzed. All computations were made in Wolfram Mathematica and Microsoft Exce l , and FTs +were performed in Mathematica . The re - binning process consist ed of summin g adjacent light +c urve data points in each quarter , therefore doubling the sampling interval from 0 .5 hour to one +hour, and then repeating this process on the data sample fo r a total of three times. In addition, a +significant detection was defin ed as being 3 𝝈 above the mean of the relative flux, and 0 on the +graphs below represents this 3 𝝈 cutoff. ( Koch, D. G., 2010), ( Wolfram Research, Inc., 2021). To +find the SNR , we converted to decibels . Using these SNRs , we were able to easily identify +im provement in signal strength. + + + Results + + Figure 1 presents the lightcurves constructed for quarters seven (Q 7) and thirteen (Q13) , +with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs +of the first iteration and three successive re - bins for Q7 , while Figure 3 presents the FTs of the +same for Q13 . + Tables 1 and 2 both show the hypothesized f requency corresponding to the rotation of +KIC 8626021 that is found in the FTs of the f irst iteration and subsequent re - bins for Q7 and +Q1 3 . Tables 3 and 4 show all data values < 17 µ Hz found in the first iterations and re - bins of Q7 +and Q13 . + + + + + + + + 4 + + +FIG. 1 : Pictured top is the light curve constructed for Q7 , below is the light curve for Q13 . Q 7 +lasted from September 24 – December 13 , 2010, and Q13 was from M arch 29 – June 23, 2012. +Both graphs were constructed by plotting corrected flux magnitude (flux corrected for +instrumental artifacts) versus time in Excel, and gaps in the data were filled in by interpolating +between points. Q 7 had forty - three interpolated points, and Q13 had sixty - six . + + + + + + + + + + + 5 + + +FIG. 2 : The graphs show the initial FTs of Q7, and then the FTs of the three successive re - bins of +the light curve data. The significant fr equenc ies of 5.886 µHz and 5.889 µHz are circled. The +d isappearance of the freque ncy in the last FT is most likely a b yproduct of the method, and the +spurious frequency of 5.464 µHz in the last FT most probably represents an artifact of the re - +binning proc ess. + + + + + + + + + + 6 + + + +FIG. 3 : The graphs show the initial FT of Q13 , and t he n the FT s of the three successive re - bins +of the light curve data. The significant frequencies of 5.784 µHz and 5. 787 µHz are circled. In +addition, in the third re - bin , the frequencies 11.641 µHz and 16.823 µHz rise above 3 𝝈 and are +near ly perfect integer multiples of 5 .787 µHz . These harmonics are potentially indications of a +starspot (Santos et al., 2017). + + + + + + + + + + + + + + 7 +Q7 Significant +Data Points Light +Variability +Frequenc y +(µHz) Corrected Flux +Magnitude +(ppm) Period (days ) Signal - to - Noise +(dB) +Q7 First +Iteration 5.886 - 1.198 1.966 9.9 +Q7 Re - bin 1 5.886 - 1.477 1.966 12.8 +Q7 Re - bin 2 5.889 0.59 7 1.965 19.2 +TABLE I : The table displays the various frequencies collected from Q7 and the information +found throu gh calculations to find period and SNR. The frequency of 5.464 µHz is not included, +and therefore was not used in any calculations deter mining the average period of rotation. The +values under corrected flux magnitude are relative to our significant frequency cutoff of 3 𝝈 , thus +negative numbers are under the cutoff. + + + + Q 13 Significant +Data Points Light +Variability +Frequenc y +(µHz) Corrected Flux +Magnitude +(ppm) Period (days) Signal - to - Noise +(dB) +Q13 First +Iteration 5.784 1.555 2.001 15.6 +Q13 Re - bin 1 5.784 2.873 2.001 1 7.7 +Q13 Re - bin 2 5.787 4.938 2.000 22.6 +Q13 Re - bin 3 5.787 6.909 2.0 00 26.3 +Q13 Re - bin 3 11.641 7.073 0.994 26.4 +Q13 Re - bin 3 16.823 2.299 0.688 24.1 +TABLE II : The table displays the various frequencies collected from Q13 and the information +found through calculations to find period and SNR. The last two signific ant frequencies (11.641 +µHz and 16.823 µHz ) for Q13 Re - bin 3 represent potential harmonic s, which are discussed in +further detail in the Con clusions section of this paper . The values under corrected flux magnitude +are relative to our significant frequency cutoff of 3 𝝈 , thus negative numbers are under the cutoff. + + + + + + + + + + 8 +First Iteration ( µ Hz) First Re - bin ( µ Hz) Second Re - bin ( µ Hz) Third Re - bin ( µ Hz) +0.933 0.933 0.21 5 0.216 +1.148 1.148 0.575 0.575 +1.364 1.364 0.934 0.935 +1.507 1.507 1.005 1.006 +12.5 61 12.561 1.149 1.150 +16.581 16.581 1.221 1.222 + 1.364 1.366 + 1.508 1 .509 + 1 .580 1. 582 + 1.7 24 1. 725 + 1.795 1.797 + 5.889 2.0 85 + 6.822 5.392 + 9.192 5. 464 + 9.479 7. 476 + 11.203 9. 489 + 12.568 11.215 + 14 . 291 12.581 + 16.230 13.084 + 1 6.589 13.443 + 13.659 + 14.018 + 14. 809 + 15.097 + 16.031 + 16.463 + 16.894 +TABLE III : The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm) +above the cutoff of 3 𝝈 . The minor shifting of significant frequencies between re - bins is a by - +product of the method, and we calculated for such errors when finding our average. + + + + + 9 +First Iteration ( µ Hz) First Re - bin ( µ Hz) Second Re - bin ( µ Hz) Third Re - bin ( µ Hz) +3.094 2 .018 2.019 1.951 +5.784 3.094 3.095 2.019 +9.080 5.784 5.787 2.442 +13.519 7.667 7.671 2. 759 +15.671 9.080 9.084 3.095 +16.209 11.165 11.641 3.634 +16.411 13.519 13.526 4.374 + 15.469 15.477 4.778 + 15.671 15.679 4.912 + 16.209 15.881 5.0 47 + 16.41 1 16.419 5.787 + 8. 479 + 9. 084 + 10.565 + 11.641 + 13.526 + 15.544 + 15.881 + 16.82 3 +TABLE IV : The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm) +above the cutoff of 3 𝝈 . The minor shifting of significant frequencies between re - bins is a by - +product of the method, and we calculate d for such errors when finding our average. + + + Conclusions + +As our research used the long - cadence data from Kepler, much of the high - frequency +va riability due to gravitational wave pulsations is lost. However, this presents an opportunity to +verify our results with the work of research groups that analyzed short - cadence data. With the +data analyzed, the lower fre quencies between 5 - 6 µHz emerged . Aft er finding the average of the +periods and accounting for a 1 𝝈 margin of error, our research hypothesizes that the rotation +period of KIC 8626021 is 1.99 ± 0.02 days. Other short - cadence re search has found the rotation +period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff - K im et +al ., 2015 ) . Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011 ) , and +these periods indicate that the more precise significant period identified through our re - binning +relates to the rotation of the white dwarf. +Through the re - b inning process , the SNR clearly improves for both quarters, and for Q7 it +improves by approximately 1.3 dB, except f o r the last data re - bin. In the last re - bin, the previous + 10 +significant frequency disappears , which beco mes increasingly likely after succe ssive re - binning +processes . The frequency 5.464 µHz rises as another significant frequency; however, we believe +that this new frequ ency is simply an artifact of the re - binning process. In Q13 , we saw SNR +improvement ranging from 1.1 dB to 1.3 dB . +Through the re - binning process , more lines , or significant frequencies, appeared above +the 3 𝝈 cutoff , particularly at lower frequencies. These findings suggest that as an alternative to +short - cadence analysis, the re - binning process of long - cadence dat a can be used to identify +significant lower frequencies in white dwarfs. The methods we used are also si mple and +replicable, which allows even those with less experience to quickly analyze the large amounts of +data being collected by orbiting telescopes, s uch as the currently active TESS (Transiting +Exoplanet Survey Satellite) telescope. +The presence of poss ible harmonics in the third re - bin of Q13 also indicates the possible +presence of a previously unseen starspot in KIC 8626021 caused by mag netic activity. These +spots are darker, cooler, and modulate stellar light curves, and with confirmation of its existence, +the harmonic frequencies can be used to calculate the spot’s rotation rate, size, latitude, and +contrast (Santos et al., 2017) . Using the process of re - binning, a starspot signal, previously +dominated by noise, may have been discov ered. + 11 +Acknowledgments + +W e wish to thank Bloomsburg University of Pennsylvania for its continued support of our +research. +This paper includes data collected b y the Kepler mission and obtained from the MAST +data archive at the Space Telescope Science Institute (STScI). Funding for the Kepler mission is +provided by the NASA Science Mission Directorate. STScI is operated by the Association of +Universities for Rese arch in Astronomy, Inc., under NASA contract NAS 5 – 26555. + + + References + + Basri, G., Walkowicz, L. M., Batalha, N., Gilliland, R. L., Jenkins, J., Borucki, W. J., Koch, D., +Caldwell, D., Dupree, A. K., Latham, D. W., Meibom, S., Howell, S., & Brown, T. (2010) . +PHOTOMETRIC VARIABILITY IN KEPLER TARGET stars: THE SUN AMONG +stars — a FIRST LOOK. The Astr ophysical Journal, 713(2), L155 - L159. +https://doi.org/10.1088/2041 - 8205/713/2/L155 +Bischoff - K im , A., Øs tensen, R. H., Hermes, J.j., & Provencal, J. L. (2015). Seven - Period +asteroseismic fit of KI C 8626021. EPJ Web of Conferences, 101, 06009. +https://doi.org/10.1051/epjconf/ 201510106009 +Córsico, A. H. (2020). White - Dwarf asteros eismology with the kepler space telescope. Frontiers +in Astronomy and Space Sciences, 7. https://doi.org/10.3389/fspas.2020.00047 +Holberg, J . B. (2009). The discovery of the existence of white dwarf stars: 1862 to 1930. Journal +for the History of Astrono my, 40(2), 137 - 154. +https://doi.org/10.1177%2F002182860904000201 +Howell, S. B., Sobeck, C., Haas, M., Still, M., Barclay, T., Mullally, F., Tr oeltzsch, J., Aigrain, S., +Bryson, S. T., Caldwell, D., Chaplin, W. J., Cochran, W. D., Huber, D., Marcy, G. W., +M iglio, A., Najita, J. R., Smith, M., Twicken, J. D., & Fortney, J. J. (2014). The k2 mission: +Characterization and early results. Publications of the Astronomical Society of the Pacific, +126(938), 398 - 408. https://doi.org/10.1086/676406 +Johnson, M. (Ed.). (2018, October 30). Mission overview. National Aeronautics and Space +Administration. Retrieved September 2, 2021, from +https://www.nasa.gov/mission_pages/keple r/overview/index.html +Koch, D. G., Borucki, W. J., Basri, G., Batalha, N. M., Brown, T. M., Caldwell, D., Christensen - +dalsgaard, J., Cochran, W. D., Devore, E., Dunham, E. W., Gautier, T. N., Geary, J. C., +Gilliland, R. L., Gould, A., Jenkins, J., Kondo, Y ., Latham, D. W., Lissauer, J. J., Marcy, +G., . . . Morrison , D. (2010). KEPLER MISSION design, REALIZED PHOTOMETRIC +performance, AND EARLY SCIENCE. The Astrophy sical Journal , 713 (2), L79 - L86. +https://dx.doi.org/10.1088/2041 - 8205/713/2/L79 +Østensen, R. H., Bloemen, S., Vučković, M., Aerts, C., Oreiro, R., Kinemuchi, K., Still, M., & +Koester, D. (2011) . AT last — a v777 HER PULSATOR IN THE KEPLER FIELD. The +Astrophysical Journal , 736 (2), L39. https://doi.org/10.1088/2041 - 8205/736/2/L39 +Santos, A. R. G., Cunha, M. S., Avelino, P. P., García, R. A., & Mathur, S. (2017). Starspot +signature on the light curv e. Astronomy & Astrophysics , 599 , A1. +https://doi.org/10.1051/0004 - 6361/201629923 + 12 +Winget, D.e., & Kepler, S.o. (2008). Pulsating white dwarf stars and precision asteroseismology. +Annual Review of Astronomy and Astrophyics, 46(1), 157 - 199. +https://doi.org/10.1146/annurev.astro. 46.060407.145250 +Wolfram Research , Inc., Mathematica, Version 12.3.1, Champaig n, IL (2021). diff --git a/read/results/playa/2201.00037.txt b/read/results/playa/2201.00037.txt index 52535ee..fdf2ac3 100644 --- a/read/results/playa/2201.00037.txt +++ b/read/results/playa/2201.00037.txt @@ -66,7 +66,31 @@ all techniques yield an obliquity which is coplanar with the orbit and Laplace p and consistent with a Cassini state. Furthermore, the observed obliquity angle (2. 042 ± 0 .08 –2– Confidential manuscript submitted to JGR-Planets -Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded +I +descending +node of orbit Ω + p + ê + 3 I +I ê + 3 Lε + mI + ê + 3 p + ascending +node of orbit +descending +node of equator equatorial +plane + orbital +direction +Sê + 3 I +ê + 3 L + M + ε + morbital planeFigure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded rectangle) and the Cassini state of Mercury. The normal to the orbital plane ( ˆe I 3 ) is offset from the nor- mal to the Laplace plane ( ˆe L @@ -386,6 +410,42 @@ A C 22 . (4) –6– Confidential manuscript submitted to JGR-Planets +θ + m + θ + n +θ + s + θ + fΩ + Ω + s + Ω + fê + 3 p + ê + 3 sê + 3 I +I ε + m +θ + pê + 3 L + ê + 1 p + ê + 2 p Cassini plane + ω Ω + o tê + 3 I +I + ε + m ê + 3 p + ê + 1ê + 2 pê + 3 La) b) Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b) in a frame attached to the rotating mantle. The orbit normal ( ˆe I 3 ) is tilted by an angle I = 8 . 533◦ @@ -1480,7 +1540,19 @@ core density scenarios and in the absence of viscous and EM coupling (i.e. K icb = –16– Confidential manuscript submitted to JGR-Planets -Figure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand + 0200400600800100012001400 +period (yr) + 0 200 400 600 800 1000 1200 1400 +Inner core radius (km)300040005000600070008000 +density (kg/m3 ) + 0 200 400 600 800 1000 1200 1400 +Inner core radius (km) 200020202040206020802100 + Fluid core radius (km)fluid core density + CMB radius + FICNFCN + int +mantle densitya b + FCNFigure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand side scale) and b) FICN (blue) and FCN (red) periods as a function of inner core radius. The FCN period when the external torque is set to zero (FCN int ) is shown in orange. Solid lines correspond to @@ -1677,7 +1749,26 @@ core must remain in close alignment with the mantle. Presented differently, sin riod is more than 3000 times shorter than the forced precession period, the inner core can eas- –18– Confidential manuscript submitted to JGR-Planets -Figure 4. a) Obliquity of the mantle ( ˜ε +2.0382.0402.0422.0442.0462.0482.050 +Obliquity angle (arcmin) + 0 200 400 600 800 1000 1200 1400 +Inner core radius (km) 1.52.02.53.03.54.04.5 +Obliquity angle (arcmin) + 0 200 400 600 800 1000 1200 1400 +Inner core radius (km)crustal thickness + 16 km +36 km26 km crustal thickness + 16 km +36 km26 kmε + m +ε + g + for a rigid planet +ε + m m + f +n + s (x100)a bFigure 4. a) Obliquity of the mantle ( ˜ε m , solid lines) and of the principal moment of inertia ( ˜ε g , dashed line) b) ˜m @@ -2028,7 +2119,34 @@ cosity that we have identified above (i.e ν ≈ 5 × 10 − 4 ), the influence of viscous cou- –21– Confidential manuscript submitted to JGR-Planets -Figure 5. a) Obliquity of the mantle ( ˜ε +ε + mε + g + m + f +n + s +2.0382.0402.0422.0442.0462.0482.050 +Obliquity angle (arcmin) + 0 200 400 600 800 1000 1200 1400 +Inner core radius (km) 0.00.51.01.52.02.53.03.54.04.5 +Obliquity angle (arcmin) + 0 200 400 600 800 1000 1200 1400 +Inner core radius (km)kinematic viscosity: + 0.01 m 2 + s -1 + 0.00001 m 2 + s -1 +0.0001 m 2 + s -1 +0.0005 m 2 + s -1 +0.001 m 2 + s -1 +a b + for a rigid planet +ε + m Figure 5. a) Obliquity of the mantle ( ˜ε m , solid lines) and gravity field ( ˜ε g , dashed lines) b) ˜m f @@ -2428,7 +2546,26 @@ Coupling models when viscous and EM stresses are both present have been presente Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of our results, –25– Confidential manuscript submitted to JGR-Planets -Figure 6. a) Obliquity of the mantle ( ˜ε +2.0322.0342.0362.0382.0402.0422.0442.0462.0482.050 +Obliquity angle (arcmin) + 0 200 400 600 800 1000 1200 1400 +Inner core radius (km) 0.00.51.01.52.02.53.03.54.04.5 +Obliquity angle (arcmin) + 0 200 400 600 800 1000 1200 1400 +Inner core radius (km)B + r at ICB: + 1 mT 0.01 mT0.03 mT0.1 mT0.3 mT +ε + m +ε + g + m + f +n + sa b + for a rigid planet +ε + m Figure 6. a) Obliquity of the mantle ( ˜ε m , solid lines) and gravity field ( ˜ε g , dashed lines) b) ˜m f @@ -2479,7 +2616,25 @@ model included the tangential viscous stress at the ICB and CMB, but not the EM Table 1 gives the obliquities of the mantle, fluid core and inner core, denoted respectively as –26– Confidential manuscript submitted to JGR-Planets -Figure 7. a) Obliquity of the mantle ( ˜ε +2.0322.0342.0362.0382.0402.0422.0442.0462.0482.050 +Obliquity angle (arcmin) + 0 200 400 600 800 1000 1200 1400 +Inner core radius (km) 0.00.51.01.52.02.53.03.54.04.5 +Obliquity angle (arcmin) + 0 200 400 600 800 1000 1200 1400 +Inner core radius (km) for a rigid planet +ε + ma bα + 3 : + 0.20 0.010.05 0.100.15ρ + s = 8800 kg m -3 + m + f +n + sε + m +ε + gFigure 7. a) Obliquity of the mantle ( ˜ε m , solid lines) and gravity field ( ˜ε g , dashed lines) b) ˜m f diff --git a/read/results/playa/GeoTopo-book.txt b/read/results/playa/GeoTopo-book.txt index eb0a8c8..4a00ee3 100644 --- a/read/results/playa/GeoTopo-book.txt +++ b/read/results/playa/GeoTopo-book.txt @@ -1979,13 +1979,22 @@ R 2 ( v ) =R 2 32 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN -(a)Kugelkoordinaten −1 +N +S v +u (a)Kugelkoordinaten −1 0 1 2 −2 −1 0 1 20. 60. 81 (b)Rotationskörper -(c)Sinus und Kosinus haben keine gemeinsame Nullstelle +π +2 π + 3 π +2 2 π +− 1− 0. 50. 51 + xy + sin x +cos x (c)Sinus und Kosinus haben keine gemeinsame Nullstelle 33 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN Die Jacobi-Matrix J @@ -2327,7 +2336,24 @@ Beispiel 26 (Simpliziale Abbildungen) ist simplizial: ϕ 3)Tori können simplizial auf Sphären abgebildet werden (vgl.Abbildung 2.8) -Abbildung 2.8:Abbildung eines Torus auf eine Sphäre +M + Ma +a ab + b bc +c c +dd + dM a +b c +d +bb bb b bbb b + b + b bb +bb +bbb + b bb bb b +b +b b +bAbbildung 2.8:Abbildung eines Torus auf eine Sphäre Definition 38 Sei K ein endlicher Simplizialkomplex. Für n ≥ 0 sei a n ( K ) die Anzahl der n -Simplizes in @@ -2887,7 +2913,8 @@ ab γ 1γ 2 -(a)Kreis mit zwei Wegen (b)Torus mit drei Wegen +(a)Kreis mit zwei Wegen +a b(b)Torus mit drei Wegen Abbildung 3.3:Beispiele für (nicht)-Homotopie von Wegen Definition 43 Seien γ @@ -6590,7 +6617,11 @@ Beispiel 48 1 (s ) < 0 , κ 2 ( s) = 0 → K (s ) < 0 4) S = Torus. SieheAbbildung 5.3 -Abbildung 5.3: K ( s + s + 1s + 2 +s + 3Abbildung 5.3: K ( s 1 ) > 0 , K (s 2 ) = 0, K (s 3 ) < 0 From 6508f3023c26338044feb5517e3ad73373caf466 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 19 Feb 2025 22:29:06 -0500 Subject: [PATCH 10/18] feat: use 2 CPUs for PLAYA --- README.md | 6 ++-- cache.json | 56 +++++++++++++++++------------------ pdf_benchmark/library_code.py | 2 +- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 53550b0..851307f 100644 --- a/README.md +++ b/README.md @@ -44,8 +44,8 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | 1 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 0.1s | 0.8s | 0.3s | 0.2s | 0.2s | 0.0s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | | 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 1.3s | 0.4s | 0.7s | 0.3s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.0s | | 3 | [pdftotext ](https://poppler.freedesktop.org/) | 0.3s | 1.0s | 1.1s | 0.3s | 0.8s | 0.1s | 0.3s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | -| 4 | [pypdf ](https://pypi.org/project/pypdf/) | 4.1s | 28.7s | 8.1s | 8.1s | 3.9s | 1.2s | 2.0s | 0.8s | 1.0s | 0.8s | 1.0s | 0.9s | 0.8s | 0.6s | 0.4s | -| 5 | [playa ](https://pypi.org/project/playa-pdf/) | 4.4s | 32.7s | 8.8s | 8.2s | 3.7s | 1.1s | 1.8s | 0.9s | 0.8s | 0.6s | 1.1s | 0.8s | 0.8s | 0.7s | 0.3s | +| 4 | [playa ](https://pypi.org/project/playa-pdf/) | 2.4s | 16.8s | 5.1s | 4.2s | 2.1s | 0.7s | 1.0s | 0.5s | 0.6s | 0.4s | 0.7s | 0.5s | 0.6s | 0.4s | 0.2s | +| 5 | [pypdf ](https://pypi.org/project/pypdf/) | 4.1s | 28.7s | 8.1s | 8.1s | 3.9s | 1.2s | 2.0s | 0.8s | 1.0s | 0.8s | 1.0s | 0.9s | 0.8s | 0.6s | 0.4s | | 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 9.0s | 55.9s | 23.7s | 16.8s | 8.9s | 2.3s | 4.0s | 1.8s | 2.2s | 1.5s | 2.7s | 1.8s | 2.0s | 1.1s | 0.9s | | 7 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 12.6s | 84.0s | 20.8s | 23.3s | 13.6s | 3.8s | 6.8s | 3.4s | 3.1s | 2.8s | 4.4s | 3.1s | 3.5s | 1.8s | 1.7s | | 8 | [Tika ](https://pypi.org/project/tika/) | 24.4s | 17.8s | 100.1s | 0.6s | 23.4s | 47.3s | 48.3s | 31.5s | 34.5s | 0.1s | 13.2s | 0.1s | 24.2s | 0.1s | 0.1s | @@ -66,7 +66,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :--------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.1s | 0.1s | 0.5s | 0.1s | 0.4s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.1s | +| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.2s | 0.1s | 0.5s | 0.1s | 0.4s | 0.1s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.1s | | 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 0.5s | 0.7s | 0.2s | 0.5s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | | 3 | [pypdf ](https://pypi.org/project/pypdf/) | 0.6s | 0.7s | 2.3s | 0.5s | 1.7s | 0.3s | 0.4s | 0.5s | 0.4s | 0.2s | 0.5s | 0.2s | 0.6s | 0.1s | 0.1s | diff --git a/cache.json b/cache.json index 1636790..85b15fd 100644 --- a/cache.json +++ b/cache.json @@ -206,46 +206,46 @@ }, "pdfrw": { "2201.00214": { - "watermark": 0.06585884094238281 + "watermark": 0.0659644603729248 }, "GeoTopo-book": { - "watermark": 0.5485935211181641 + "watermark": 0.545504093170166 }, "2201.00151": { - "watermark": 0.05677437782287598 + "watermark": 0.06070399284362793 }, "1707.09725": { - "watermark": 0.40639281272888184 + "watermark": 0.43261241912841797 }, "2201.00021": { - "watermark": 0.11320853233337402 + "watermark": 0.11702942848205566 }, "2201.00037": { - "watermark": 0.07631802558898926 + "watermark": 0.08114290237426758 }, "2201.00069": { - "watermark": 0.14584708213806152 + "watermark": 0.156080961227417 }, "2201.00178": { - "watermark": 0.11374044418334961 + "watermark": 0.12212061882019043 }, "2201.00201": { - "watermark": 0.08190035820007324 + "watermark": 0.0823523998260498 }, "1602.06541": { - "watermark": 0.13877344131469727 + "watermark": 0.146378755569458 }, "2201.00200": { - "watermark": 0.05585813522338867 + "watermark": 0.059378623962402344 }, "2201.00022": { - "watermark": 0.15651655197143555 + "watermark": 0.1740405559539795 }, "2201.00029": { - "watermark": 0.01755547523498535 + "watermark": 0.018683671951293945 }, "1601.03642": { - "watermark": 0.05137896537780762 + "watermark": 0.07303500175476074 } }, "pdftotext": { @@ -482,46 +482,46 @@ }, "playa": { "2201.00214": { - "read": 32.66150975227356 + "read": 16.78051209449768 }, "GeoTopo-book": { - "read": 8.839988231658936 + "read": 5.137176275253296 }, "2201.00151": { - "read": 8.197897672653198 + "read": 4.224908351898193 }, "1707.09725": { - "read": 3.6952500343322754 + "read": 2.1480777263641357 }, "2201.00021": { - "read": 1.0743937492370605 + "read": 0.6732006072998047 }, "2201.00037": { - "read": 1.8004868030548096 + "read": 1.0461030006408691 }, "2201.00069": { - "read": 0.8580679893493652 + "read": 0.541226863861084 }, "2201.00178": { - "read": 0.8492274284362793 + "read": 0.5686733722686768 }, "2201.00201": { - "read": 0.5559432506561279 + "read": 0.36340975761413574 }, "1602.06541": { - "read": 1.0579159259796143 + "read": 0.6587491035461426 }, "2201.00200": { - "read": 0.8115437030792236 + "read": 0.49942922592163086 }, "2201.00022": { - "read": 0.769277811050415 + "read": 0.5751311779022217 }, "2201.00029": { - "read": 0.6836247444152832 + "read": 0.42870497703552246 }, "1601.03642": { - "read": 0.3228261470794678 + "read": 0.2176511287689209 } } }, diff --git a/pdf_benchmark/library_code.py b/pdf_benchmark/library_code.py index 923529c..11ea26b 100644 --- a/pdf_benchmark/library_code.py +++ b/pdf_benchmark/library_code.py @@ -27,7 +27,7 @@ def playa_get_text(data: bytes) -> str: outpath = os.path.join(tempdir, "pdf.txt") with open(outpath, "wt") as outfh: args = argparse.Namespace(pages="all", outfile=outfh) - with playa.open(path) as pdf: + with playa.open(path, max_workers=2) as pdf: playa_extract_text(pdf, args) with open(outpath) as infh: text = infh.read() From 6622f6ce008161add435d3307462ff21d810abe6 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 19 Feb 2025 23:03:36 -0500 Subject: [PATCH 11/18] fix: correct pdfplumber version --- README.md | 28 +++++----- benchmark.py | 2 +- cache.json | 152 +++++++++++++++++++++++++-------------------------- 3 files changed, 91 insertions(+), 91 deletions(-) diff --git a/README.md b/README.md index 851307f..bc753bd 100644 --- a/README.md +++ b/README.md @@ -23,18 +23,18 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | 14 | [1601.03642](https://arxiv.org/pdf/1601.03642.pdf) | 1004.9KiB | 8 | ## Libraries -| Name | Last PyPI Release | License | Version | Dependencies | -| -----------: | :---------------- | ------------------------------: | -----------------------------------------: | :-------------------------------------------------------- | -| Borb | 2023-06-23 | AGPL/Commercial | 2.1.16 | | -| pypdfium2 | 2023-07-04 | Apache-2.0 or BSD-3-Clause | 4.30.1 | PDFium (Foxit/Google) | -| pdfminer.six | 2022-11-05 | MIT/X | 20231228 | | -| pdfplumber | 2023-07-29 | MIT | git+https://github.com/dhdaines/pdfplumber | pdfminer.six | -| pdfrw | 2017-09-18 | MIT | 0.4 | | -| pdftotext | - | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | -| playa | 2025-02-18 | MIT | 0.3.0rc1.dev36+gbff14d4.d20250220 | | -| PyMuPDF | 2023-08-24 | GNU AFFERO GPL 3.0 / Commerical | 1.25.3 | MuPDF | -| pypdf | 2023-08-26 | BSD 3-Clause | 5.3.0 | | -| Tika | 2023-01-01 | Apache v2 | 2.6.0 | Apache Tika | +| Name | Last PyPI Release | License | Version | Dependencies | +| -----------: | :---------------- | ------------------------------: | --------------------------------: | :-------------------------------------------------------- | +| Borb | 2023-06-23 | AGPL/Commercial | 2.1.16 | | +| pypdfium2 | 2023-07-04 | Apache-2.0 or BSD-3-Clause | 4.30.1 | PDFium (Foxit/Google) | +| pdfminer.six | 2022-11-05 | MIT/X | 20231228 | | +| pdfplumber | 2023-07-29 | MIT | 0.11.5 | pdfminer.six | +| pdfrw | 2017-09-18 | MIT | 0.4 | | +| pdftotext | - | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | +| playa | 2025-02-18 | MIT | 0.3.0rc1.dev36+gbff14d4.d20250220 | | +| PyMuPDF | 2023-08-24 | GNU AFFERO GPL 3.0 / Commerical | 1.25.3 | MuPDF | +| pypdf | 2023-08-26 | BSD 3-Clause | 5.3.0 | | +| Tika | 2023-01-01 | Apache v2 | 2.6.0 | Apache Tika | ## Text Extraction Speed @@ -47,7 +47,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | 4 | [playa ](https://pypi.org/project/playa-pdf/) | 2.4s | 16.8s | 5.1s | 4.2s | 2.1s | 0.7s | 1.0s | 0.5s | 0.6s | 0.4s | 0.7s | 0.5s | 0.6s | 0.4s | 0.2s | | 5 | [pypdf ](https://pypi.org/project/pypdf/) | 4.1s | 28.7s | 8.1s | 8.1s | 3.9s | 1.2s | 2.0s | 0.8s | 1.0s | 0.8s | 1.0s | 0.9s | 0.8s | 0.6s | 0.4s | | 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 9.0s | 55.9s | 23.7s | 16.8s | 8.9s | 2.3s | 4.0s | 1.8s | 2.2s | 1.5s | 2.7s | 1.8s | 2.0s | 1.1s | 0.9s | -| 7 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 12.6s | 84.0s | 20.8s | 23.3s | 13.6s | 3.8s | 6.8s | 3.4s | 3.1s | 2.8s | 4.4s | 3.1s | 3.5s | 1.8s | 1.7s | +| 7 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 13.0s | 86.4s | 22.7s | 23.4s | 14.2s | 4.2s | 7.1s | 3.3s | 3.2s | 2.9s | 4.4s | 3.3s | 3.5s | 1.9s | 1.7s | | 8 | [Tika ](https://pypi.org/project/tika/) | 24.4s | 17.8s | 100.1s | 0.6s | 23.4s | 47.3s | 48.3s | 31.5s | 34.5s | 0.1s | 13.2s | 0.1s | 24.2s | 0.1s | 0.1s | | 9 | [Borb ](https://pypi.org/project/borb/) | 50.5s | 188.4s | 149.1s | 2.3s | 113.6s | 28.4s | 11.7s | 112.3s | 23.7s | 27.1s | 8.4s | 5.7s | 27.7s | 4.9s | 2.9s | @@ -66,7 +66,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :--------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.2s | 0.1s | 0.5s | 0.1s | 0.4s | 0.1s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.1s | +| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.2s | 0.1s | 0.9s | 0.1s | 0.4s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.0s | | 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 0.5s | 0.7s | 0.2s | 0.5s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | | 3 | [pypdf ](https://pypi.org/project/pypdf/) | 0.6s | 0.7s | 2.3s | 0.5s | 1.7s | 0.3s | 0.4s | 0.5s | 0.4s | 0.2s | 0.5s | 0.2s | 0.6s | 0.1s | 0.1s | diff --git a/benchmark.py b/benchmark.py index 8e7e514..a31bb10 100644 --- a/benchmark.py +++ b/benchmark.py @@ -192,7 +192,7 @@ def write_single_result( "pdfplumber", "https://pypi.org/project/pdfplumber/", text_extraction_function=pdfplubmer_get_text, - version="git+https://github.com/dhdaines/pdfplumber", + version=pdfplumber.__version__, license="MIT", last_release_date="2023-07-29", dependencies="pdfminer.six", diff --git a/cache.json b/cache.json index 85b15fd..f5cde75 100644 --- a/cache.json +++ b/cache.json @@ -160,92 +160,48 @@ "image_extraction": 0.9357635974884033 } }, - "pdfplumber": { - "2201.00214": { - "read": 83.96660041809082 - }, - "GeoTopo-book": { - "read": 20.792396068572998 - }, - "2201.00151": { - "read": 23.254782676696777 - }, - "1707.09725": { - "read": 13.614238023757935 - }, - "2201.00021": { - "read": 3.8416290283203125 - }, - "2201.00037": { - "read": 6.843621730804443 - }, - "2201.00069": { - "read": 3.3514842987060547 - }, - "2201.00178": { - "read": 3.1007227897644043 - }, - "2201.00201": { - "read": 2.8469505310058594 - }, - "1602.06541": { - "read": 4.3501877784729 - }, - "2201.00200": { - "read": 3.129256248474121 - }, - "2201.00022": { - "read": 3.5482122898101807 - }, - "2201.00029": { - "read": 1.827713966369629 - }, - "1601.03642": { - "read": 1.7083258628845215 - } - }, "pdfrw": { "2201.00214": { - "watermark": 0.0659644603729248 + "watermark": 0.06779885292053223 }, "GeoTopo-book": { - "watermark": 0.545504093170166 + "watermark": 0.9143116474151611 }, "2201.00151": { - "watermark": 0.06070399284362793 + "watermark": 0.05871248245239258 }, "1707.09725": { - "watermark": 0.43261241912841797 + "watermark": 0.4033334255218506 }, "2201.00021": { - "watermark": 0.11702942848205566 + "watermark": 0.11349081993103027 }, "2201.00037": { - "watermark": 0.08114290237426758 + "watermark": 0.07494401931762695 }, "2201.00069": { - "watermark": 0.156080961227417 + "watermark": 0.13128018379211426 }, "2201.00178": { - "watermark": 0.12212061882019043 + "watermark": 0.11118340492248535 }, "2201.00201": { - "watermark": 0.0823523998260498 + "watermark": 0.07940006256103516 }, "1602.06541": { - "watermark": 0.146378755569458 + "watermark": 0.14059066772460938 }, "2201.00200": { - "watermark": 0.059378623962402344 + "watermark": 0.05641889572143555 }, "2201.00022": { - "watermark": 0.1740405559539795 + "watermark": 0.15761399269104004 }, "2201.00029": { - "watermark": 0.018683671951293945 + "watermark": 0.017118453979492188 }, "1601.03642": { - "watermark": 0.07303500175476074 + "watermark": 0.04789161682128906 } }, "pdftotext": { @@ -523,6 +479,50 @@ "1601.03642": { "read": 0.2176511287689209 } + }, + "pdfplumber": { + "2201.00214": { + "read": 86.39272856712341 + }, + "GeoTopo-book": { + "read": 22.65720844268799 + }, + "2201.00151": { + "read": 23.390413761138916 + }, + "1707.09725": { + "read": 14.161987543106079 + }, + "2201.00021": { + "read": 4.160851240158081 + }, + "2201.00037": { + "read": 7.055023908615112 + }, + "2201.00069": { + "read": 3.3328192234039307 + }, + "2201.00178": { + "read": 3.220952272415161 + }, + "2201.00201": { + "read": 2.854520559310913 + }, + "1602.06541": { + "read": 4.377838373184204 + }, + "2201.00200": { + "read": 3.310704469680786 + }, + "2201.00022": { + "read": 3.518846035003662 + }, + "2201.00029": { + "read": 1.9474315643310547 + }, + "1601.03642": { + "read": 1.671983242034912 + } } }, "read_quality": { @@ -574,22 +574,6 @@ "2201.00029": 0.975523516322736, "1601.03642": 0.8623963054819123 }, - "pdfplumber": { - "2201.00214": 0.9624093076027349, - "GeoTopo-book": 0.8932082690274208, - "2201.00151": 0.8857353838250874, - "1707.09725": 0.977952891119146, - "2201.00021": 0.9174005666220104, - "2201.00037": 0.9432015121388418, - "2201.00069": 0.9320623652220378, - "2201.00178": 0.9530470165622914, - "2201.00201": 0.9316913879761284, - "1602.06541": 0.9741434157570039, - "2201.00200": 0.9378122018297131, - "2201.00022": 0.7645679514756893, - "2201.00029": 0.9927616243405717, - "1601.03642": 0.982476230133944 - }, "pdfrw": {}, "pdftotext": { "2201.00214": 0.9600762653108389, @@ -670,13 +654,28 @@ "2201.00022": 0.7673033675330817, "2201.00029": 0.9408845676697153, "1601.03642": 0.9891916003293989 + }, + "pdfplumber": { + "2201.00214": 0.9624093076027349, + "GeoTopo-book": 0.8932082690274208, + "2201.00151": 0.8857353838250874, + "1707.09725": 0.977952891119146, + "2201.00021": 0.9174005666220104, + "2201.00037": 0.9432015121388418, + "2201.00069": 0.9320623652220378, + "2201.00178": 0.9530470165622914, + "2201.00201": 0.9316913879761284, + "1602.06541": 0.9741434157570039, + "2201.00200": 0.9378122018297131, + "2201.00022": 0.7645679514756893, + "2201.00029": 0.9927616243405717, + "1601.03642": 0.982476230133944 } }, "watermarking_result_file_size": { "borb": {}, "pdfium": {}, "pdfminer": {}, - "pdfplumber": {}, "pdfrw": { "2201.00214": 2515466.0, "GeoTopo-book": 5738184.0, @@ -727,6 +726,7 @@ "1601.03642": 1014378.0 }, "tika": {}, - "playa": {} + "playa": {}, + "pdfplumber": {} } } \ No newline at end of file From 1871f1836817c1e224ebac535c0ac512f8948981 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Thu, 20 Feb 2025 09:21:27 -0500 Subject: [PATCH 12/18] feat: update to use new PLAYA extract_text (and improve accuracy) --- README.md | 30 +- cache.json | 124 +- pdf_benchmark/library_code.py | 12 +- read/results/playa/1601.03642.txt | 136 +- read/results/playa/1602.06541.txt | 604 +++---- read/results/playa/1707.09725.txt | 1587 +++++++++--------- read/results/playa/2201.00021.txt | 592 +++---- read/results/playa/2201.00022.txt | 714 ++++---- read/results/playa/2201.00029.txt | 716 ++++---- read/results/playa/2201.00037.txt | 1586 +++++++++--------- read/results/playa/2201.00069.txt | Bin 55856 -> 55732 bytes read/results/playa/2201.00151.txt | 708 ++++---- read/results/playa/2201.00178.txt | 584 +++---- read/results/playa/2201.00200.txt | 690 ++++---- read/results/playa/2201.00201.txt | 636 ++++---- read/results/playa/2201.00214.txt | 546 +++---- read/results/playa/GeoTopo-book.txt | 2350 +++++++++++++-------------- 17 files changed, 5806 insertions(+), 5809 deletions(-) diff --git a/README.md b/README.md index bc753bd..8db2045 100644 --- a/README.md +++ b/README.md @@ -23,18 +23,18 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | 14 | [1601.03642](https://arxiv.org/pdf/1601.03642.pdf) | 1004.9KiB | 8 | ## Libraries -| Name | Last PyPI Release | License | Version | Dependencies | -| -----------: | :---------------- | ------------------------------: | --------------------------------: | :-------------------------------------------------------- | -| Borb | 2023-06-23 | AGPL/Commercial | 2.1.16 | | -| pypdfium2 | 2023-07-04 | Apache-2.0 or BSD-3-Clause | 4.30.1 | PDFium (Foxit/Google) | -| pdfminer.six | 2022-11-05 | MIT/X | 20231228 | | -| pdfplumber | 2023-07-29 | MIT | 0.11.5 | pdfminer.six | -| pdfrw | 2017-09-18 | MIT | 0.4 | | -| pdftotext | - | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | -| playa | 2025-02-18 | MIT | 0.3.0rc1.dev36+gbff14d4.d20250220 | | -| PyMuPDF | 2023-08-24 | GNU AFFERO GPL 3.0 / Commerical | 1.25.3 | MuPDF | -| pypdf | 2023-08-26 | BSD 3-Clause | 5.3.0 | | -| Tika | 2023-01-01 | Apache v2 | 2.6.0 | Apache Tika | +| Name | Last PyPI Release | License | Version | Dependencies | +| -----------: | :---------------- | ------------------------------: | ----------------------: | :-------------------------------------------------------- | +| Borb | 2023-06-23 | AGPL/Commercial | 2.1.16 | | +| pypdfium2 | 2023-07-04 | Apache-2.0 or BSD-3-Clause | 4.30.1 | PDFium (Foxit/Google) | +| pdfminer.six | 2022-11-05 | MIT/X | 20231228 | | +| pdfplumber | 2023-07-29 | MIT | 0.11.5 | pdfminer.six | +| pdfrw | 2017-09-18 | MIT | 0.4 | | +| pdftotext | - | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | +| playa | 2025-02-18 | MIT | 0.3.0rc1.dev41+g4a84b70 | | +| PyMuPDF | 2023-08-24 | GNU AFFERO GPL 3.0 / Commerical | 1.25.3 | MuPDF | +| pypdf | 2023-08-26 | BSD 3-Clause | 5.3.0 | | +| Tika | 2023-01-01 | Apache v2 | 2.6.0 | Apache Tika | ## Text Extraction Speed @@ -44,7 +44,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | 1 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 0.1s | 0.8s | 0.3s | 0.2s | 0.2s | 0.0s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | | 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 1.3s | 0.4s | 0.7s | 0.3s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.0s | | 3 | [pdftotext ](https://poppler.freedesktop.org/) | 0.3s | 1.0s | 1.1s | 0.3s | 0.8s | 0.1s | 0.3s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | -| 4 | [playa ](https://pypi.org/project/playa-pdf/) | 2.4s | 16.8s | 5.1s | 4.2s | 2.1s | 0.7s | 1.0s | 0.5s | 0.6s | 0.4s | 0.7s | 0.5s | 0.6s | 0.4s | 0.2s | +| 4 | [playa ](https://pypi.org/project/playa-pdf/) | 2.4s | 16.9s | 5.1s | 4.3s | 2.2s | 0.7s | 1.1s | 0.5s | 0.6s | 0.4s | 0.7s | 0.5s | 0.6s | 0.4s | 0.2s | | 5 | [pypdf ](https://pypi.org/project/pypdf/) | 4.1s | 28.7s | 8.1s | 8.1s | 3.9s | 1.2s | 2.0s | 0.8s | 1.0s | 0.8s | 1.0s | 0.9s | 0.8s | 0.6s | 0.4s | | 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 9.0s | 55.9s | 23.7s | 16.8s | 8.9s | 2.3s | 4.0s | 1.8s | 2.2s | 1.5s | 2.7s | 1.8s | 2.0s | 1.1s | 0.9s | | 7 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 13.0s | 86.4s | 22.7s | 23.4s | 14.2s | 4.2s | 7.1s | 3.3s | 3.2s | 2.9s | 4.4s | 3.3s | 3.5s | 1.9s | 1.7s | @@ -66,7 +66,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :--------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.2s | 0.1s | 0.9s | 0.1s | 0.4s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.0s | +| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.1s | 0.1s | 0.5s | 0.1s | 0.4s | 0.1s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.1s | | 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 0.5s | 0.7s | 0.2s | 0.5s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | | 3 | [pypdf ](https://pypi.org/project/pypdf/) | 0.6s | 0.7s | 2.3s | 0.5s | 1.7s | 0.3s | 0.4s | 0.5s | 0.4s | 0.2s | 0.5s | 0.2s | 0.6s | 0.1s | 0.1s | @@ -86,7 +86,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | 1 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 97% | 99% | 97% | 94% | 99% | 98% | 96% | 99% | 99% | 99% | 99% | 98% | 78% | 99% | 99% | | 2 | [pypdf ](https://pypi.org/project/pypdf/) | 96% | 99% | 95% | 93% | 98% | 99% | 96% | 97% | 99% | 99% | 99% | 99% | 78% | 100% | 99% | | 3 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 96% | 98% | 96% | 93% | 97% | 98% | 95% | 99% | 98% | 98% | 98% | 97% | 77% | 98% | 99% | -| 4 | [playa ](https://pypi.org/project/playa-pdf/) | 95% | 98% | 93% | 92% | 98% | 97% | 94% | 97% | 96% | 98% | 98% | 97% | 77% | 94% | 99% | +| 4 | [playa ](https://pypi.org/project/playa-pdf/) | 96% | 98% | 93% | 93% | 98% | 98% | 95% | 97% | 97% | 98% | 99% | 98% | 77% | 96% | 99% | | 5 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 93% | 96% | 89% | 89% | 98% | 92% | 94% | 93% | 95% | 93% | 97% | 94% | 76% | 99% | 98% | | 6 | [pdftotext ](https://poppler.freedesktop.org/) | 92% | 96% | 94% | 91% | 95% | 92% | 96% | 96% | 96% | 97% | 83% | 94% | 77% | 96% | 79% | | 7 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 89% | 95% | 79% | 86% | 92% | 86% | 93% | 95% | 93% | 92% | 92% | 93% | 71% | 98% | 86% | diff --git a/cache.json b/cache.json index f5cde75..2553089 100644 --- a/cache.json +++ b/cache.json @@ -162,46 +162,46 @@ }, "pdfrw": { "2201.00214": { - "watermark": 0.06779885292053223 + "watermark": 0.06520223617553711 }, "GeoTopo-book": { - "watermark": 0.9143116474151611 + "watermark": 0.537128210067749 }, "2201.00151": { - "watermark": 0.05871248245239258 + "watermark": 0.0580286979675293 }, "1707.09725": { - "watermark": 0.4033334255218506 + "watermark": 0.42675042152404785 }, "2201.00021": { - "watermark": 0.11349081993103027 + "watermark": 0.11134600639343262 }, "2201.00037": { - "watermark": 0.07494401931762695 + "watermark": 0.08258700370788574 }, "2201.00069": { - "watermark": 0.13128018379211426 + "watermark": 0.1533362865447998 }, "2201.00178": { - "watermark": 0.11118340492248535 + "watermark": 0.1181187629699707 }, "2201.00201": { - "watermark": 0.07940006256103516 + "watermark": 0.08050227165222168 }, "1602.06541": { - "watermark": 0.14059066772460938 + "watermark": 0.14638280868530273 }, "2201.00200": { - "watermark": 0.05641889572143555 + "watermark": 0.058241844177246094 }, "2201.00022": { - "watermark": 0.15761399269104004 + "watermark": 0.15881800651550293 }, "2201.00029": { - "watermark": 0.017118453979492188 + "watermark": 0.018030166625976562 }, "1601.03642": { - "watermark": 0.04789161682128906 + "watermark": 0.07268810272216797 } }, "pdftotext": { @@ -436,92 +436,92 @@ "read": 0.07596778869628906 } }, - "playa": { + "pdfplumber": { "2201.00214": { - "read": 16.78051209449768 + "read": 86.39272856712341 }, "GeoTopo-book": { - "read": 5.137176275253296 + "read": 22.65720844268799 }, "2201.00151": { - "read": 4.224908351898193 + "read": 23.390413761138916 }, "1707.09725": { - "read": 2.1480777263641357 + "read": 14.161987543106079 }, "2201.00021": { - "read": 0.6732006072998047 + "read": 4.160851240158081 }, "2201.00037": { - "read": 1.0461030006408691 + "read": 7.055023908615112 }, "2201.00069": { - "read": 0.541226863861084 + "read": 3.3328192234039307 }, "2201.00178": { - "read": 0.5686733722686768 + "read": 3.220952272415161 }, "2201.00201": { - "read": 0.36340975761413574 + "read": 2.854520559310913 }, "1602.06541": { - "read": 0.6587491035461426 + "read": 4.377838373184204 }, "2201.00200": { - "read": 0.49942922592163086 + "read": 3.310704469680786 }, "2201.00022": { - "read": 0.5751311779022217 + "read": 3.518846035003662 }, "2201.00029": { - "read": 0.42870497703552246 + "read": 1.9474315643310547 }, "1601.03642": { - "read": 0.2176511287689209 + "read": 1.671983242034912 } }, - "pdfplumber": { + "playa": { "2201.00214": { - "read": 86.39272856712341 + "read": 16.891082525253296 }, "GeoTopo-book": { - "read": 22.65720844268799 + "read": 5.135345220565796 }, "2201.00151": { - "read": 23.390413761138916 + "read": 4.301593780517578 }, "1707.09725": { - "read": 14.161987543106079 + "read": 2.1891415119171143 }, "2201.00021": { - "read": 4.160851240158081 + "read": 0.6821308135986328 }, "2201.00037": { - "read": 7.055023908615112 + "read": 1.0525555610656738 }, "2201.00069": { - "read": 3.3328192234039307 + "read": 0.547914981842041 }, "2201.00178": { - "read": 3.220952272415161 + "read": 0.5721349716186523 }, "2201.00201": { - "read": 2.854520559310913 + "read": 0.3648381233215332 }, "1602.06541": { - "read": 4.377838373184204 + "read": 0.6639382839202881 }, "2201.00200": { - "read": 3.310704469680786 + "read": 0.506413459777832 }, "2201.00022": { - "read": 3.518846035003662 + "read": 0.5782179832458496 }, "2201.00029": { - "read": 1.9474315643310547 + "read": 0.42621445655822754 }, "1601.03642": { - "read": 1.671983242034912 + "read": 0.22521281242370605 } } }, @@ -639,22 +639,6 @@ "2201.00029": 0.9828859664925239, "1601.03642": 0.9551993153165015 }, - "playa": { - "2201.00214": 0.9761700404077421, - "GeoTopo-book": 0.9276858797705095, - "2201.00151": 0.9222810491856283, - "1707.09725": 0.9757090668337609, - "2201.00021": 0.9719382936299716, - "2201.00037": 0.9425468498406934, - "2201.00069": 0.9697201017811705, - "2201.00178": 0.960335879151019, - "2201.00201": 0.9768103792804297, - "1602.06541": 0.981389499715468, - "2201.00200": 0.9697131992609057, - "2201.00022": 0.7673033675330817, - "2201.00029": 0.9408845676697153, - "1601.03642": 0.9891916003293989 - }, "pdfplumber": { "2201.00214": 0.9624093076027349, "GeoTopo-book": 0.8932082690274208, @@ -670,6 +654,22 @@ "2201.00022": 0.7645679514756893, "2201.00029": 0.9927616243405717, "1601.03642": 0.982476230133944 + }, + "playa": { + "2201.00214": 0.9849718101175701, + "GeoTopo-book": 0.9322408377322038, + "2201.00151": 0.931934317626386, + "1707.09725": 0.9802441595024175, + "2201.00021": 0.9786885590106452, + "2201.00037": 0.9520637063559613, + "2201.00069": 0.9722151461115761, + "2201.00178": 0.9684549898853675, + "2201.00201": 0.9835579567396774, + "1602.06541": 0.9856410256410256, + "2201.00200": 0.9821904061015588, + "2201.00022": 0.7724464182274571, + "2201.00029": 0.9566110247308358, + "1601.03642": 0.9919385269802065 } }, "watermarking_result_file_size": { @@ -726,7 +726,7 @@ "1601.03642": 1014378.0 }, "tika": {}, - "playa": {}, - "pdfplumber": {} + "pdfplumber": {}, + "playa": {} } } \ No newline at end of file diff --git a/pdf_benchmark/library_code.py b/pdf_benchmark/library_code.py index 11ea26b..91274e5 100644 --- a/pdf_benchmark/library_code.py +++ b/pdf_benchmark/library_code.py @@ -1,4 +1,3 @@ -import argparse import os import subprocess import tempfile @@ -12,7 +11,6 @@ import pypdfium2 as pdfium from borb.pdf.pdf import PDF from borb.toolkit.text.simple_text_extraction import SimpleTextExtraction -from playa.cli import extract_text as playa_extract_text from pdfminer.high_level import extract_pages from requests import ReadTimeout @@ -25,13 +23,13 @@ def playa_get_text(data: bytes) -> str: with open(path, "wb") as outfh: outfh.write(data) outpath = os.path.join(tempdir, "pdf.txt") + texts = [] with open(outpath, "wt") as outfh: - args = argparse.Namespace(pages="all", outfile=outfh) with playa.open(path, max_workers=2) as pdf: - playa_extract_text(pdf, args) - with open(outpath) as infh: - text = infh.read() - return text + pages = pdf.pages + page_labels = [page.label for page in pages] + texts = list(pages.map(playa.Page.extract_text)) + return postprocess(texts, page_labels) def pymupdf_get_text(data: bytes) -> str: diff --git a/read/results/playa/1601.03642.txt b/read/results/playa/1601.03642.txt index 7d39186..66e7c41 100644 --- a/read/results/playa/1601.03642.txt +++ b/read/results/playa/1601.03642.txt @@ -1,8 +1,8 @@ -1 + Creativity in Machine Learning Martin Thoma E-Mail: info@martin-thoma.de -Abstract —Recent machine learning techniques can be modified +Abstract —Recent machine learning techniques can be modified to produce creative results. Those results did not exist before; it is not a trivial combination of the data which was fed into the machine learning system. The obtained results come in multiple @@ -20,7 +20,7 @@ Recent advances in machine learning produce results which the author would intuitively call creative. A high-level overview over several of those algorithms are described in the following. This paper is structured as follows: Section II introduces the -reader on a very simple and superficial level to machine +reader on a very simple and superficial level to machine learning, Section III gives examples of creativity with images, Section IV gives examples of machines producing textual content, and Section V gives examples of machine learning @@ -40,10 +40,10 @@ like Wikipedia and lots of Websites, services like Amazon Mechanical Turk and several other changes in the past decades a lot of data has become available. The idea of machine learning is to make use of this data. -A formal definition of the field of Machine Learning is given +A formal definition of the field of Machine Learning is given by Tom Mitchel [Mit97]: -A computer program is said to learn from experi- -ence E with respect to some class of tasks T and +A computer program is said to learn from experience + E with respect to some class of tasks T and performance measure P , if its performance at tasks in T , as measured by P , improves with experience E . Σ ϕx 0 @@ -66,35 +66,35 @@ w n. . . -(a) Example of an artificial neuron unit. +(a) Example of an artificial neuron unit. x i are the input signals and w i are weights which have to get learned. Each input signal gets multiplied with its weight, everything gets -summed up and the activation func- -tion ϕ is applied. (b) A visualization of a simple feed- -forward neural network. The 5 in- -put nodes are red, the 2 bias nodes +summed up and the activation function + ϕ is applied. (b) A visualization of a simple feedforward + neural network. The 5 input + nodes are red, the 2 bias nodes are gray, the 3 hidden units are green and the single output node is blue. Fig. 1: Neural networks are based on simple units which get combined to complex networks. This means that machine learning programs adjust internal -parameters to fit the data they are given. Those computer +parameters to fit the data they are given. Those computer programs are still developed by software developers, but the developer writes them in a way which makes it possible to adjust them without having to re-program everything. Machine learning programs should generally improve when they are fed with more data. -The field of machine learning is related to statistics. Some -algorithms directly try to find models which are based on well- -known distribution assumptions of the developer, others are +The field of machine learning is related to statistics. Some +algorithms directly try to find models which are based on wellknown + distribution assumptions of the developer, others are more general. A common misunderstanding of people who are not related -in this field is that the developers don’t understand what their +in this field is that the developers don’t understand what their machine learning program is doing. It is understood very well in the sense that the developer, given only a pen, lots of paper and a calculator could calculate the same result as the machine @@ -105,12 +105,12 @@ this is similar to expecting from an electrical engineer to explain how a computer works. The electrical engineer could probably get the knowledge he needs to do so, but the amount of time required to understand such a complex system from -basic building blocks is a time-intensive and difficult task. +basic building blocks is a time-intensive and difficult task. An important group of machine learning algorithms was -inspired by biological neurons and are thus called artificial +inspired by biological neurons and are thus called artificial neural networks . Those networks are based on mathematical -functions called artificial neurons which take n ∈ N num- -bers x +functions called artificial neurons which take n ∈ N numbers + x 1 , . . . , x n ∈ R as input, multiply them with weights w @@ -127,9 +127,9 @@ i are learned. This is usually done by an optimization technique called gradient descent . The gradient descent algorithm takes a function which has to be derivable, starts at any point of the surface of this error function andarXiv:1601.03642v1 [cs.CV] 12 Jan 2016 -2 + makes a step in the direction which goes downwards. Hence -it tries to find a minimum of this high-dimensional function. +it tries to find a minimum of this high-dimensional function. There is, of course, a lot more to say about machine learning. The interested reader might want to read the introduction given by Mitchell [Mit97]. @@ -140,17 +140,17 @@ One would take one neuron per pixel and channel. This means for 500 px × 500 px RGB images one would get 750 ,000 input signals. To approach this problem, so called Convolutional Neural Networks (CNNs) were introduced. Instead of learning -the full connection between the input layer and the first +the full connection between the input layer and the first hidden layer, those networks make use of convolution layers. Convolution layers learn a convolution; this means they learn -the weights of an image filter. An additional advantage is that +the weights of an image filter. An additional advantage is that CNNs make use of spacial relationships of the pixels instead -of flattening the image to a stream of single numbers. +of flattening the image to a stream of single numbers. An excellent introduction into CNNs is given by [Nie15]. A. Google DeepDream The gradient descent algorithm which optimizes most of the parameters in neural networks is well-understood. However, the -effect it has on the recognition system is difficult to estimate. +effect it has on the recognition system is difficult to estimate. [MOT15] proposes a technique to analyze the weights learned by such a network. A similar idea was applied by [VKMT13]. For example, consider a neural network which was trained to @@ -172,7 +172,7 @@ network recognize the bird even more strongly on the next pass and so forth, until a highly detailed bird appears, seemingly out of nowhere. The name “Inceptionism” in the title of [MOT15] comes from -the science-fiction movie “Inception” (2010). One reason it +the science-fiction movie “Inception” (2010). One reason it might be chosen is because neural networks are structured in layers. Recent publications tend to have more and more layers [HZRS15]. The used jargon is to say they get “deeper”. @@ -189,15 +189,15 @@ B. Artistic Style Imitation A key idea of neural networks is that they learn different representations of the data in each layer. In the case of CNNs, this can easily be visualized as it was done in various -papers [ZF14]. Usually, one finds that the network learned -to build edge detectors in the first layer and more complex +papers [ZF14]. Usually, one finds that the network learned +to build edge detectors in the first layer and more complex structures in the upper layers. Gatys, Ecker and Bethge showed in [GEB15] that with a clever choice of features it is possible to separate the general style of an image in terms of local image appearance from the content of an image. They support their claim by applying the style of different artists to an arbitrary image of their choice. -3 + (a) Original Image (b) Style image (c) The artistic style of Van Gogh’s “Starry Night” applied to the photograph of a Scottish Highland Cattle. @@ -225,11 +225,11 @@ known from computer vision for detecting the human. It could apply self-learning techniques to draw results most similar to the artists impression of the image. However, the system described in [TL05] seems not to be a machine -learning computer program according to the definition by Tom +learning computer program according to the definition by Tom Mitchell [Mit97]. IV. T EXT D ATA -Digital text is the first form of natural communication which +Digital text is the first form of natural communication which involved computers. It is used in the form of chats, websites, -on collaborative projects like Wikipedia, in scientific literature. +on collaborative projects like Wikipedia, in scientific literature. Of course, it was used in pre-digital times, too: In newspaper, in novels, in dramas, in religious texts like the bible, in books for education, in notes from conversations. @@ -240,8 +240,8 @@ The most simple language model which is of use is ann -gram model. This model makes use of sequences of the lengthn to model language. It can be used to get the probability of a third word, given the previous two words. This way, a complete text -can be generated word by word. Refinements and extensions -to this model are discussed in the field of Natural Language +can be generated word by word. Refinements and extensions +to this model are discussed in the field of Natural Language Processing (NLP). However, there are much more sophisticated models. One of those are character predictors based on Recurrent Neural @@ -252,8 +252,8 @@ a lower level. Using such a predictor, one can generate texts character by character. If the model is good, the text can have the correct punctuation. This would not be possible with a word predictor. -Character predictors can be implemented with RNNs. In con- -trast to standard feed-forward neural networks like multilayer +Character predictors can be implemented with RNNs. In contrast + to standard feed-forward neural networks like multilayer Perceptrons (MLPs) which was shown in Figure 1(b), those networks are trained to take their output at some point as well as the normal input. This means they can keep some information @@ -261,8 +261,8 @@ over time. One of the most common variant to implement RNNs is by using so called Long short-term memory (LSTM) cells [HS97]. Recurrent networks apply two main ideas in order to learn: The -first is called unrolling and means that an recurrent network -is imagined to be an infinite network over time. At each time +first is called unrolling and means that an recurrent network +is imagined to be an infinite network over time. At each time step the recurrent neurons get duplicated. The second idea is weight sharing which means that those unrolled neurons share the same weight. @@ -273,13 +273,13 @@ Paul Graham’s essays, all the works of Shakespeare, the Hutter Prize [hut] 100 MB dataset of raw Wikipedia articles, the raw LA T -EX source file of a book about algebraic stacks and geometry +EX source file of a book about algebraic stacks and geometry and Linux C code. With that training data, the models can generate similar texts. New works which look like Shakespeare plays, new Wikipedia articles, new Linux code and new papers about algebraic -geometry can thus automatically be generated. At a first -4 +geometry can thus automatically be generated. At a first + glance, they do look authentic. The syntax was mostly used correctly, the formatting looks as expected, the sentences are grammatically correct. However, when one looks at the broader @@ -326,7 +326,7 @@ Human: what is the purpose of living ? Machine : to live forever . V. A UDIO D ATA Common machine learning tasks which involve audio data -are speech recognition, speaker identification, identification of +are speech recognition, speaker identification, identification of songs. This leads to some less-common, but interesting topics: The composition of music, the synthesizing of audio as art. While the composition might be considered in Section IV, we will now investigate the work which was done in audio @@ -346,16 +346,16 @@ highly authentic replications and novel music compositions”. The reader might want to listen to [Cop12] to get an impression of the beauty of the created music. According to Cope, an essential part of music is “a set of -instructions for creating different, but highly related self- -replications”. Emmy was programmed to find this set of -instructions. It tries to find the “signature” of a composer, +instructions for creating different, but highly related selfreplications”. + Emmy was programmed to find this set of +instructions. It tries to find the “signature” of a composer, which Cope describes as “contiguous patterns that recur in two or more works of the composer”. The new feature of Emily Howell compared to Emmy is that Emily Howell does not necessarily remain in a single, already known style. -Emily Howell makes use of association network. Cope empha- -sizes that this is not a form of a neural network. However, it +Emily Howell makes use of association network. Cope emphasizes + that this is not a form of a neural network. However, it is not clear from [Cop13] how exactly an association network is trained. Cope mentions that Emily Howell is explained in detail in [Cop05]. @@ -363,7 +363,7 @@ B. GRUV Recurrent neural networks — LSTM networks, to be exact — are used in [NV15] together with Gated Recurrent Units (GRU) to build a network which can be trained to generate -music. Instead of taking notes directly or MIDI files, Nayebi +music. Instead of taking notes directly or MIDI files, Nayebi and Vitelli took raw audio waveforms as input. Those audio waveforms are feature vectors given for time steps0, 1 , . . . , t − 1 , t. The network is given those feature vectors X @@ -378,9 +378,9 @@ music to obtain features in the frequency domain. An implementation can be found at [VN15] and a demonstration can be found at [Vit15]. C. Audio Synthesization -Audio synthesization is generating new audio files. This can +Audio synthesization is generating new audio files. This can either be music or speech. With the techniques described before, -5 + neural networks can be trained to generate music note by note. However, it is desirable to allow multiple notes being played at the same time. @@ -391,8 +391,8 @@ compositions are available there, too. He also made the code for his Biaxial Recurrent Neural Network available under [Joh15a]. VI. D ISCUSSION What does these examples mean for our understanding of -creativity? Does it influence how much we value art? Could -we define art and creativity better after having those and similar +creativity? Does it influence how much we value art? Could +we define art and creativity better after having those and similar results? I think we might readjust our understanding of creativity just like we adjusted our understanding of algorithmically hard @@ -416,8 +416,8 @@ Available: https://www.youtube.com/watch?v=jLR- c uCwI composition,” XRDS: Crossroads, The ACM Magazine for Students , vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available: http://dl.acm.org/citation.cfm?id=2460444 -[Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [On- -line]. Available: http://www.bbc.co.uk/blogs/adamcurtis/entries/ +[Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [Online]. + Available: http://www.bbc.co.uk/blogs/adamcurtis/entries/ 78691781- c9b7-30a0- 9a0a-3ff76e8bfe58 [Gad06] A. Gadsby, Ed., Dictionary of Contemporary English. Pearson Education Limited, 2006. @@ -439,9 +439,9 @@ for image recognition,” arXiv preprint arXiv:1512.03385 , 2015. [Online]. Available: http://arxiv.org/abs/1512.03385 [Joh15a] D. Johnson, “Biaxial recurrent neural network for music composition,” GitHub, Aug. 2015. [Online]. Available: https: //github.com/hexahedria/biaxial-rnn- music-composition -[Joh15b] ——, “Composing music with recurrent neu- -ral networks,” Personal Blog, Aug. 2015. [On- -line]. Available: http://www.hexahedria.com/2015/08/03/ +[Joh15b] ——, “Composing music with recurrent neural + networks,” Personal Blog, Aug. 2015. [Online]. + Available: http://www.hexahedria.com/2015/08/03/ composing-music- with-recurrent- neural-networks/ [Joh16] J. Johnson, “neural-style,” GitHub, Jan. 2016. [Online]. Available: @@ -454,7 +454,7 @@ networks,” Personal Blog, May 2015. [Online]. Available: http://karpathy.github.io/2015/05/21/rnn-effectiveness/ [KMN + 02] T. Kanungo, D. Mount, N. Netanyahu, C. Piatko, R. Silverman, -and A. Wu, “An efficient k-means clustering algorithm: analysis +and A. Wu, “An efficient k-means clustering algorithm: analysis and implementation,” Pattern Analysis and Machine Intelligence, IEEE Transactions on, vol. 24, no. 7, pp. 881–892, Jul 2002. [Mit97] T. M. Mitchell, Machine learning , ser. McGraw Hill series in @@ -464,9 +464,9 @@ computer science. McGraw-Hill, 1997. deeper into neural networks,” googleresearch.blogspot.co.uk, Jun. 2015. [Online]. Available: http://googleresearch.blogspot.de/ 2015/06/inceptionism-going- deeper-into- neural.html -[Nie15] M. A. Nielsen, Neural Networks and Deep Learn- -ing . Determination Press, 2015. [Online]. Avail- -able: http://neuralnetworksanddeeplearning.com/chap6.html# +[Nie15] M. A. Nielsen, Neural Networks and Deep Learning + . Determination Press, 2015. [Online]. Available: + http://neuralnetworksanddeeplearning.com/chap6.html# introducing convolutional networks [NV15] A. Nayebi and M. Vitelli, “GRUV: Algorithmic music generation using recurrent neural networks,” 2015. [Online]. Available: @@ -498,10 +498,10 @@ http://arxiv.org/abs/1506.05869v2 Available: https://github.com/MattVitelli/GRUV [Wei76] J. Weizenbaum, Computer Power and Human Reason: From Judgement to Calculation. W.H.Freeman & Co Ltd, 1976. -[ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding con- -volutional networks,” in Computer Vision–ECCV 2014. Springer, +[ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional + networks,” in Computer Vision–ECCV 2014. Springer, 2014, pp. 818–833. -6 + A PPENDIX A A UTOM ATICALLY GENERATED T EXTS The following texts were generated by [Kar15a] and published by Karpathy on [Kar15b]. This is meant to be a copy for @@ -547,7 +547,7 @@ was starting to signing a major tripad of aid exile.]] C. Linux Code, 1 / * -7 + * Increment the size file of the new incorrect UI_FILTER group information * of the size generatively. * / @@ -605,7 +605,7 @@ D. Linux Code, 2 #include #include #include -8 + #include #include #include @@ -629,4 +629,4 @@ os_prefix(unsigned long sys) PUT_PARAM_RAID(2, sel) = get_state_state(); set_pid_sum((unsigned long)state, current_state_str(), (unsigned long)-1->lr_full; low; -} +} \ No newline at end of file diff --git a/read/results/playa/1602.06541.txt b/read/results/playa/1602.06541.txt index 2d9131e..4cbf7c6 100644 --- a/read/results/playa/1602.06541.txt +++ b/read/results/playa/1602.06541.txt @@ -1,12 +1,12 @@ -1 + A Survey of Semantic Segmentation Martin Thoma info@martin-thoma.de Abstract —This survey gives an overview over different techniques used for pixel-level semantic segmentation. -Metrics and datasets for the evaluation of segmenta- -tion algorithms and traditional approaches for segmen- -tation such as unsupervised methods, Decision Forests +Metrics and datasets for the evaluation of segmentation + algorithms and traditional approaches for segmentation + such as unsupervised methods, Decision Forests and SVMs are described and pointers to the relevant papers are given. Recently published approaches with convolutional neural networks are mentioned and typical @@ -16,21 +16,21 @@ given. I. I NTRODUCTION Semantic segmentation is the task of clustering parts of images together which belong to the same -object class. This type of algorithm has several use- -cases such as detecting road signs [ MBLAGJ+ +object class. This type of algorithm has several usecases + such as detecting road signs [ MBLAGJ+ 07], -detecting tumors [ MBVLG02 ], detecting medical in- -struments in operations [WAH97 ], colon crypts segmen- -tation [ CRSS14], land use and land cover classifica- -tion [HDT02 ]. In contrast, non-semantic segmentation -only clusters pixels together based on general character- -istics of single objects. Hence the task of non-semantic -segmentation is not well-defined, as many different +detecting tumors [ MBVLG02 ], detecting medical instruments + in operations [WAH97 ], colon crypts segmentation + [ CRSS14], land use and land cover classification + [HDT02 ]. In contrast, non-semantic segmentation +only clusters pixels together based on general characteristics + of single objects. Hence the task of non-semantic +segmentation is not well-defined, as many different segmentations might be acceptable. Several applications of segmentation in medicine are listed in [PXP00]. -Object detection, in comparison to semantic seg- -mentation, has to distinguish different instances of the +Object detection, in comparison to semantic segmentation, + has to distinguish different instances of the same object. While having a semantic segmentation is certainly a big advantage when trying to get object instances, there are a couple of problems: neighboring @@ -57,19 +57,19 @@ operate on and the kind of segmentation they are able to produce. The following subsections will give four different criteria by which segmentation algorithms can be -classified. -This survey describes fixed-class (see Section II-A), -single-class affiliation (see Section II-B ) algorithms +classified. +This survey describes fixed-class (see Section II-A), +single-class affiliation (see Section II-B ) algorithms which work on grayscale or colored single pixel images (see Section II-C ) in a completely automated, passive fashion (see Section II-D). A. Allowed classes -Semantic segmentation is a classification task. As +Semantic segmentation is a classification task. As such, the classes on which the algorithm is trained is a central design decision. -Most algorithms work with a fixed set of classes; -some even only work on binary classes like fore- -ground vs background [ RM07], [ CS10 ] or street vs +Most algorithms work with a fixed set of classes; +some even only work on binary classes like foreground + vs background [ RM07], [ CS10 ] or street vs no street [BKTT15]. However, there are also unsupervised segmentation algorithms which do not distinguish classes at all (see @@ -82,7 +82,7 @@ a void class was also used in the MSRCv2 dataset (see Section III-B2) to make it possible to make more coarse segmentations and thus having to spend less time annotating the image. -B. Class affiliation of pixels +B. Class affiliation of pixels Humans do an incredible job when looking at the world. For example, when we see a glass of water standing on a table we can automatically say that there @@ -90,47 +90,47 @@ is the glass and behind it the table, even if we only had a single image and were not allowed to move. This means we simultaneously two labels to the coordinates of the glass: Glass and table. Although there is much more -work being done on single class affiliation segmenta- -tion algorithms, there is a publication about multiple -class affiliation +work being done on single class affiliation segmentation + algorithms, there is a publication about multiple +class affiliation segmentation [ LRAL08 ]. Similarly, recent publications in pixel-level object segmentation used layered models [YHRF12].arXiv:1602.06541v2 [cs.CV] 11 May 2016 -2 + C. Input Data The available data which can be used for the inference of a segmentation varies by application. • Grayscale vs colored: Grayscale images are commonly used in medical imaging such as -magnetic resonance (MR) imaging or ultrasonog- -raphy whereas colored photographs are obviously +magnetic resonance (MR) imaging or ultrasonography + whereas colored photographs are obviously widespread. • Excluding or including depth data: RGB-D, sometimes also called range [ HJBJ + - 96 ] is avail- -able in robotics, autonomous cars and recently + 96 ] is available + in robotics, autonomous cars and recently also in consumer electronics such as Microsoft Kinect [Zha12]. -• Single image vs stereo images vs co- -segmentation: Single image segmentation is the +• Single image vs stereo images vs cosegmentation: + Single image segmentation is the most wide-spread kind of segmentation, but using stereo images was already tried in [BVZ01]. It can be seen as a more natural way of segmentation as most mammals have two eyes. It can also be seen as being related to having depth data. Co-segmentation as in [ RMBK06], [ CXGS12 ] is -the problem of finding a consistent segmentation +the problem of finding a consistent segmentation for multiple images. This problem can be seen in two ways: One the one hand, it can be seen -as the problem of finding common objects in at +as the problem of finding common objects in at least two images. On the other hand, every image -after the first can be used as an additional source -of information to find a meaningful segmentation. +after the first can be used as an additional source +of information to find a meaningful segmentation. This idea can be extended to time series such as videos. • 2D vs 3D - : Segmenting images is a 2D segmenta- -tion task where the smallest unit is called a pixel. + : Segmenting images is a 2D segmentation + task where the smallest unit is called a pixel. In 3D data, such as volumetric X-ray CT images as they were used in [ HHR01 ], the smallest unit is called a voxel. @@ -138,18 +138,18 @@ D. Operation state The operation state of the classifying machine can either be active as in [SUM + 11 ], [SSA12] where robots -can move objects to find a segmentation or passive, -where the received image cannot be influenced. Among +can move objects to find a segmentation or passive, +where the received image cannot be influenced. Among the passive algorithms, some segment in a completely automatic fashion, others work in an interactive mode. One example would be a system where the user clicks on the background or marks a coarse segmentation and -the algorithm finds a fine-grained segmentation. [BJ00], +the algorithm finds a fine-grained segmentation. [BJ00], [ RKB04 ], [ PS07] describe systems which work in an -interactive mode. (a) Example Scene (b) Visualization of a found seg- -mentation -Figure 1: An example of a scene and a possible visu- -alization of a found segmentation. +interactive mode. (a) Example Scene (b) Visualization of a found segmentation + +Figure 1: An example of a scene and a possible visualization + of a found segmentation. III. E VALUATION AND D ATASETS A. Quality measures for evaluation A performance measure is a crucial part of any @@ -159,17 +159,17 @@ is the most commonly used performance measure, but there are other measures of quality which matter when segmentation algorithms are compared. This section gives an overview of those quality measures. -1) Accuracy: Showing the correctness of the segmen- -tation hypotheses is done in most publications about +1) Accuracy: Showing the correctness of the segmentation + hypotheses is done in most publications about semantic segmentation. However, there are a couple of different ways how this accuracy can be displayed. -One way to give readers a first qualitative impression +One way to give readers a first qualitative impression of the obtained segmentations is by showing examples such as Figure 1. However, this can only support the explanation of particular problems or showcase special situation. For meaningful information about the overall accuracy, there -are a couple of metrics how accuracy can be defined. +are a couple of metrics how accuracy can be defined. For this section, let k ∈ N be the number of classes, n ij ∈ N @@ -177,7 +177,7 @@ n which belong to class i and were labeled as class j . (n ij ) is called a confusion matrix . Let t - i = + i = k j =1 n ij @@ -185,34 +185,34 @@ be the total number of pixels of class i. One way to compare segmentation algorithms is by the pixel-wise accuracy of the predicted segmentation as done in many publications [ SWRC06], [ CP08], -[ LSD14 ]. This is also called per-pixel rate and de- -fined as +[ LSD14 ]. This is also called per-pixel rate and defined + as k i=1 n ii k i=1 t -i . Taking the pixel-wise classification +i . Taking the pixel-wise classification accuracy has two major drawbacks: P1 Tasks like segmenting images for autonomous cars have large regions which have one class. This -makes achieving classification accuracies of more +makes achieving classification accuracies of more than 30 % with a priori knowledge only possible. For example, a system might learn that a certain position of the image is most of the time “sky” while another position is most of the time “road”. -3 + P2 The manually labeled images could have a more -coarse labeling. For example, a human classifier +coarse labeling. For example, a human classifier could have labeled a region as “car” and the algorithm could have split that region into the -general “car” and the more specific “wheel of a +general “car” and the more specific “wheel of a car” Three accuracy metrics which do not suffer from problem P1 are used in [LSD14]: • mean accuracy : 1 -k · +k · k i =1 n ii @@ -220,7 +220,7 @@ t i ∈ [0 , 1] • mean intersection over union: 1 -k · +k · k i =1 n ii @@ -234,7 +234,7 @@ j =1 n ( k i =1 t -i ) −1 +i ) −1 k i =1 t i · n @@ -255,8 +255,8 @@ One way to deal with problem P1 and problem P2 is giving the confusion matrix as done in [ SWRC06 ]. However, this approach is not feasible if many classes are given. -The F -measure is useful for binary classifica- -tion task such as the KITTI road segmentation +The F -measure is useful for binary classification + task such as the KITTI road segmentation benchmark [ FKG13 ] or crypt segmentation as done by [ CRSS14]. It is calculated as “the harmonic mean of the precision and recall” [PH05]: @@ -272,46 +272,46 @@ Finally, it should be noted that a lot of other measures for the accuracy of segmentations were proposed for non-semantic segmentation. One of those accuracy measures is Normalized Probabilistic Rand (NPR) -index which was introduced in [ UPH05 ] and eval- -uated in [ CSI + +index which was introduced in [ UPH05 ] and evaluated + in [ CSI + 09 ] on dermoscopy images. Other non-semantic segmentation measures were introduced in [MFTM01], but the reason for creating them seems to -be to deal with the under-defined task description of non- -semantic segmentation. These accuracy measures try to +be to deal with the under-defined task description of nonsemantic + segmentation. These accuracy measures try to deal with different levels of coarsity of the segmentation. This is much less of a problem in semantic segmentation and thus those measures are not explained here. 2) Speed: A maximum upper bound on the execution time for the inference on a single image is a hard requirement for some applications. For example, in the -case of autonomous cars an algorithm which classifies +case of autonomous cars an algorithm which classifies pixel as street or no-street and thus makes a semantic segmentation, every image needs to be processed within 20 ms [BKTT15]. This time is called latency. Most papers do not give exact values for the time their application needs. One reason might be that this is very hardware, implementation and in some cases even -data specific. For example, [HJBJ + +data specific. For example, [HJBJ + 96 ] notes that their algorithm needs 10 s on a Sun SparcStation 20. The fastest CPU ever produced for this system had200 MHz. -Comparing this directly with results which were ob- -tained using an Intel i7-4820K with 3.9 GHz would not +Comparing this directly with results which were obtained + using an Intel i7-4820K with 3.9 GHz would not be meaningful. However, it does still make sense to mention the execution time as well as the hardware in individual papers. This gives the interested reader the possibility to -estimate how difficult it might be to adjust the algorithm +estimate how difficult it might be to adjust the algorithm to work in the required time-constraints. Besides the latency, the throughput is another -relevant characteristic of algorithms and implementa- -tions for semantic segmentation. For example, for the +relevant characteristic of algorithms and implementations + for semantic segmentation. For example, for the automatic description of images in order to enable text search the throughput is of much higher importance than latency. 3) Stability: A reasonable requirement on semantic -segmentation algorithms is the stability of a segmen- -tation over slight changes in the input image. When +segmentation algorithms is the stability of a segmentation + over slight changes in the input image. When the image data is sightly blurred by smoke such as in Figure 4(c), the segmentation should not change. Also, two images which show a slight change in @@ -320,7 +320,7 @@ the segmentation [PH05]. 4) Memory usage: Peak memory usage matters when segmentation algorithms are used in devices like smartphones or cameras, or when the algorithms have -to finish in a given time frame, run on the graphics +to finish in a given time frame, run on the graphics processing unit (GPU) and consume so much memory for single image segmentation that only the latest graphic cards can be used. However, no publication @@ -344,14 +344,14 @@ Once every year from 2005 to 2012 [ EVGW+ an EU network of excellence 2 V isual Object Classes -4 + Beginning with 2007, a segmentation challenge was added [EVGW + a]. The dataset consists of annotated photographs from -www.flicker.com, a photo sharing website. There are +www.flicker.com, a photo sharing website. There are multiple challenges for PASCAL VOC. The 2012 -competition had five challenges of which one is a +competition had five challenges of which one is a segmentation challenge where a single class label was given for each pixel. The classes are: aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow, dining table, @@ -365,7 +365,7 @@ segmentation over union criterion (see Section III-A). 2) MSRCv2: Microsoft Research has published a database of 591 photographs with pixel-level annotation of 21 classes: aeroplane, bike, bird, boat, body, book, -building, car, cat, chair, cow, dog, face, flower, grass, +building, car, cat, chair, cow, dog, face, flower, grass, road, sheep, sign, sky, tree, water. Additionally, there is a void label for pixels which do not belong to any of the 21 classes or which are close to the @@ -374,22 +374,22 @@ hand-segmentation which does not align exactly with the object boundaries” [SWRC06]. 3) Medical Databases: The Warwick-QU Dataset consists of 165 images with pixel-level annotation of -5 classes: “healthy, adenomatous, moderately differen- -tiated, moderately-to-poorly differentiated, and poorly +5 classes: “healthy, adenomatous, moderately differentiated, + moderately-to-poorly differentiated, and poorly differentiated” [ CSM09]. This dataset is part of the Gland Segmentation (GlaS) challenge. The DIARETDB1 [KKV + - 14 ] is a dataset of 89 im- -ages fundus images. Those images show the interior + 14 ] is a dataset of 89 images + fundus images. Those images show the interior surface of the eye. Fundus images can be used to detect diabetic retinopathy. The images have four classes of coarse annotations: hard and soft exudates, hemorrhages and red small dots. -20 test and additionally 20 training retinal fun- -dus images are available through the DRIVE data +20 test and additionally 20 training retinal fundus + images are available through the DRIVE data set [ SAN+ - 04 ]. The vessels were annotated. Addition- -ally, [AP11] added vascular features. + 04 ]. The vessels were annotated. Additionally, + [AP11] added vascular features. The Open-CAS Endoscopic Datasets [ MHMK+ 14] are 60 images taken from laparoscopic adrenalectomies @@ -403,9 +403,9 @@ One crowd annotation was obtained for each image by a majority vote on a pixel basis of 10 segmentations given by 10 different KWs. Training Prediction - Post- -processingWindow-wise -ClassificationWindow + PostprocessingWindow-wise + +ClassificationWindow extraction Data augmentation Feature extraction Preprocessing Figure 2: A typical segmentation pipeline gets raw @@ -415,38 +415,38 @@ features. For training, data augmentation techniques such as image rotation can be applied. For every single image, patches of the image called windows are extracted and -those windows are classified. The resulting -semantic segmentation can be refined by +those windows are classified. The resulting +semantic segmentation can be refined by simple morphologic operations or by more complex approaches such as Markov Random Fields (MRFs). IV. S EGMENTATION P IPELINE Typically, semantic segmentation is done with a -classifier which operates on fixed-size feature inputs +classifier which operates on fixed-size feature inputs and a sliding-window approach [ DT05 ], [ YBCK10], -[SCZ08]. This means a classifier is trained on images -of a fixed size. The trained classifier is then fed with -rectangular regions of the image which are called win- -dows. Although the classifier gets an image patch of e.g. +[SCZ08]. This means a classifier is trained on images +of a fixed size. The trained classifier is then fed with +rectangular regions of the image which are called windows. + Although the classifier gets an image patch of e.g. 51 px × 51 px of the environment, it might only classify the center pixel or a subset of the complete window. This segmentation pipeline is visualized in Figure 2. -This approach was taken by [BKTT15 ] and a major- -ity of the VOC2007 participants [ EVGW + +This approach was taken by [BKTT15 ] and a majority + of the VOC2007 participants [ EVGW + a]. As this -approach has to apply the patch classifier 512 · 512 = +approach has to apply the patch classifier 512 · 512 = 262 144 times for images of size 512 px × 512 px, there are techniques for speeding it up such as applying a stride and interpolating the results. Neural networks are able to apply the sliding window -approach in a very efficient way by handling a trained +approach in a very efficient way by handling a trained network as a convolution and applying the convolution on the complete image. However, there are alternatives. Namely MRFs and Conditional Random Fields (CRFs) which take the information of the complete image and segment it in an holistic approach. -5 + V. T RADITIONAL A PPROACHES Image segmentation algorithms which use traditional approaches, hence don’t apply neural networks and @@ -459,11 +459,11 @@ Forests are described in Section V-C, Markov Random Fields in Section V-E and Support Vector Machines (SVMs) in Section V-D. Postprocessing is covered in Section V-G. -It should be noted that algorithms can use combina- -tion of methods. For example, [TNL14] makes use of a +It should be noted that algorithms can use combination + of methods. For example, [TNL14] makes use of a combination of a SVM and a MRF. Also, auto-encoders can be used to learn features which in turn can be used -by any classifier. +by any classifier. A. Features and Preprocessing methods The choice of features is very important in traditional approaches. The most commonly used local and global @@ -473,7 +473,7 @@ dimensionality reduction algorithms. (e.g. 3 features for RGB, 3 features for HSV, 1 feature for the gray-value) are the most widely used features. A typical image is in the RGB color space, but depending -on the classifier and the problem another color space +on the classifier and the problem another color space might result in better segmentations. RGB, YcBcr, HSL, Lab and YIQ are some examples used by [ CRSS14]. No single color space has been proven to be superior @@ -482,7 +482,7 @@ most common choices seem to be RGB and HSI. Reasons for choosing RGB is simplicity and the support by programming languages, whereas the choice of the HSI color space might make it simpler for the -classifier to become invariant to illumination. One +classifier to become invariant to illumination. One reason for choosing CIE-L*a*b* color space is that it approximates human perception of brightness [ KP92 ]. It follows that choosing the L*a*b color space helps @@ -527,33 +527,33 @@ keypoints such as “right shoulder”, “left shoulder”, “right knee” and “left knee”. They were originally used for human pose estimation. Finding those extra keypoints is easily possible for well-known image -classes like humans. However, it is difficult for classes +classes like humans. However, it is difficult for classes like airplanes, ships, organs or cells where the human annotators do not know the keypoints. Additionally, the keypoints have to be chosen for every single class. There -are strategies to deal with those problems like viewpoint- -dependent keypoints. Poselets were used in [BMBM10] +are strategies to deal with those problems like viewpointdependent + keypoints. Poselets were used in [BMBM10] to detect people and in [BBMM11] for general object detection of the PASCAL VOC dataset. 6) Textons: A texton is the minimal building block of vision. The computer vision literature does not give a -strict definition for textons, but edge detectors could be -one example. One might argue that deep learning tech- -niques with Convolution Neuronal Networks (CNNs) -learn textons in the first filters. +strict definition for textons, but edge detectors could be +one example. One might argue that deep learning techniques + with Convolution Neuronal Networks (CNNs) +learn textons in the first filters. An excellent explanation of textons can be found in [ZGWX05]. -7) Dimensionality Reduction: High-resolution im- -ages have a lot of pixels. Having one or more feature per +7) Dimensionality Reduction: High-resolution images + have a lot of pixels. Having one or more feature per pixel results in well over a million features. This makes -training difficult while the higher resolution might not +training difficult while the higher resolution might not contain much more information. A simple approach to deal with this is downsampling the high-resolution image to a low-resolution variant. Another way of doing dimensionality reduction is principal component analysis (PCA), which is applied by [ COWR11 ]. The -idea behind PCA is to find a hyperplane on which all -6 +idea behind PCA is to find a hyperplane on which all + feature vectors can be projected with a minimal loss of information. A detailed description of PCA is given by [Smi02]. @@ -566,8 +566,8 @@ reduction. An overview and a comparison over some of them is given by [vdMPvdH09]. B. Unsupervised Segmentation Unsupervised segmentation algorithms can be used -in supervised segmentation as another source of infor- -mation or to refine a segmentation. While unsupervised +in supervised segmentation as another source of information + or to refine a segmentation. While unsupervised segmentation algorithms can never be semantic, they are well-studied and deserve at least a very brief overview. Semantic segmentation algorithms store information @@ -578,8 +578,8 @@ consistent regions or region boundaries. directly be applied on the pixels, when one gives a feature vector per pixel. Two clustering algorithms are k -means and the mean-shift algorithm. -The k -means algorithm is a general-purpose cluster- -ing algorithm which requires the number of clusters to +The k -means algorithm is a general-purpose clustering + algorithm which requires the number of clusters to be given beforehand. Initially, it places the k centroids randomly in the feature space. Then it assigns each data point to the nearest centroid, moves the centroid @@ -588,23 +588,23 @@ until a stopping criterion is reached. A faster variant is described in [Har75]. k -means was applied by [CLP98] for medical image segmentation. -Another clustering algorithm is the mean-shift algo- -rithm which was introduced by [ CM02] for segmen- -tation tasks. The algorithm finds the cluster centers +Another clustering algorithm is the mean-shift algorithm + which was introduced by [ CM02] for segmentation + tasks. The algorithm finds the cluster centers by initializing centroids at random seed points and iteratively shifting them to the mean coordinate within a certain range. Instead of taking a hard range constraint, the mean can also be calculated by using any kernel. This effectively applies a weight to the coordinates -of the points. The mean shift algorithm finds cluster +of the points. The mean shift algorithm finds cluster centers at positions with a highest local density of points. 2) Graph Based Image Segmentation: Graph-based image segmentation algorithms typically interpret pixels as vertices and an edge weight is a measure of dissimilarity such as the difference in color [ FH04 ], -[ Fel]. There are several different candidates for edges. The 4-neighborhood (north, east, south west) or an 8- -neighborhood (north, north-east, east, south-east, south, +[ Fel]. There are several different candidates for edges. The 4-neighborhood (north, east, south west) or an 8neighborhood + (north, north-east, east, south-east, south, south-west, west, north-west) are plausible choices. One way to cut the edges is by building a minimum spanning tree and removing edges above a threshold. @@ -615,8 +615,8 @@ A graph-based method which ranked 2 nd in the Pascal VOC 2010 challenge [EVGW+ 10 ] is described -in [ CS10 ]. The system makes heavy use of the multi- -cue contour detector globalPb [ MAFM08] and needs +in [ CS10 ]. The system makes heavy use of the multicue + contour detector globalPb [ MAFM08] and needs about 10 GB of main memory [CS11]. 3) Random Walks: Random walks belong to the graph-based image segmentation algorithms. Random @@ -628,25 +628,25 @@ random walk is calculated. This is done by taking image gradients as described in Section V-A for HOG features. The class of the pixel is the class of which a seed point will be reached with highest probability. At -first, this is an interactive segmentation method, but it +first, this is an interactive segmentation method, but it can be extended to be non-interactive by using another segmentation methods output as seed points. 4) Active Contour Models: Active contour models (ACMs) are algorithms which segment images roughly -along edges, but also try to find a border which is -smooth. This is done by defining a so called energy +along edges, but also try to find a border which is +smooth. This is done by defining a so called energy function which will be minimized. They were initially described in [KWT88]. ACMs can be used to segment -an image or to refine segmentation as it was done +an image or to refine segmentation as it was done in [AM98] for brain MR images. -5) Watershed Segmentation: The watershed algo- -rithm takes a grayscale image and interprets it as a +5) Watershed Segmentation: The watershed algorithm + takes a grayscale image and interprets it as a height map. Low values are catchment basins and the higher values between two neighboring catchment basins is the watershed. The catchment basins should contain what the developer wants to capture. This implies that those areas must be dark on grayscale -images. The algorithm starts to fill the basins from +images. The algorithm starts to fill the basins from the lowest point. When two basins are connected, a watershed is found. The algorithm stops when the highest point is reached. @@ -655,36 +655,36 @@ algorithm is given in [RM00]. The watershed segmentation was used in [JLD03] to segment white blood cells. As the authors describe, the segmentation by watershed transform has two -flaws: Over-segmentation due to local minima and thick +flaws: Over-segmentation due to local minima and thick watersheds due to plateaus. -7 + C. Random Decision Forests -Random Decision Forests were first proposed -in [ Ho95 ]. This type of classifier applies techniques -called ensemble learning , where multiple classifiers +Random Decision Forests were first proposed +in [ Ho95 ]. This type of classifier applies techniques +called ensemble learning , where multiple classifiers are trained and a combination of their hypotheses is used. One ensemble learning technique is the random -subspaces method where each classifier is trained +subspaces method where each classifier is trained on a random subspace of the feature space. Another ensemble learning technique is bagging , which is training the trees on random subsets of the training set. -In the case of Random Decision Forests, the classifiers +In the case of Random Decision Forests, the classifiers are decision trees. A decision tree is a tree where each inner node uses one or more features to decide in which branch to descend. Each leaf is a class. One strength of Random Decision Forests compared -to many other classifiers like SVMs and neural networks +to many other classifiers like SVMs and neural networks is that the scale of measure of the features (nominal, -ordinal, interval, ratio) can be arbitrary. Another advan- -tage of Random Decision Forests compared to SVMs, -for example, is the speed of training and classification. +ordinal, interval, ratio) can be arbitrary. Another advantage + of Random Decision Forests compared to SVMs, +for example, is the speed of training and classification. Decision trees were extensively studied in the past 20 years and a multitude of training algorithms have been proposed (e.g. ID3 in [ Qui86], C4.5 in [ Qui93 ]). Possible training hyperparameters are the measure to evaluate the “goodness of split” [Min89], the number of decision trees being used, and if the depth of the trees -is restricted. Typically in the context of classification, +is restricted. Typically in the context of classification, decision trees are trained by adding new nodes until each leaf contains only nodes of a single class or until it is not possible to split further. This is called astopping @@ -704,8 +704,8 @@ accuracy for their best segmentation system of 42 %. An excellent introduction to Random Decision Forests for semantic segmentation is given by [SCZ08]. D. SVMs -SVMs are well-studied binary classifiers which can -be described by five central ideas. For those ideas, the +SVMs are well-studied binary classifiers which can +be described by five central ideas. For those ideas, the training data is represented as (x i , y i ) where x @@ -723,12 +723,12 @@ s.t. ∀m i=1 y i · ( w , x i + b) - -sgn applied to this gives the classification≥ 1 + +sgn applied to this gives the classification≥ 1 2) Even if the underlying process which generates the features for the two classes is linearly separable, -noise can make the data not separable. The intro- -duction of slack variables to relax the requirement +noise can make the data not separable. The introduction + of slack variables to relax the requirement of linear separability solves this problem. The trade-off between accepting some errors and a more complex model is weighted by a parameter @@ -751,9 +751,9 @@ Note that 0 ≤ ξ i ≤ 1 means that the data point is within the margin, whereas ξ i ≥ 1 means it is -misclassified. An SVM with C > 0 is also called +misclassified. An SVM with C > 0 is also called a soft-margin SVM. -3) The primal problem is to find the normal vector +3) The primal problem is to find the normal vector w and the bias b . The dual problem is to express w as a linear combination of the training data x i : @@ -795,7 +795,7 @@ j y i y j x i , x - j + j s.t. ∀m i=1 0 ≤ α i ≤ C @@ -804,9 +804,9 @@ s.t. m i=1 α i y i = 0 -8 -4) Not every dataset is linearly separable. This prob- -lem is approached by transforming the feature + +4) Not every dataset is linearly separable. This problem + is approached by transforming the feature vectors x with a non-linear mapping Φ into a higher dimensional (probably ∞ -dimensional) space. As the feature vectors x are only used @@ -819,7 +819,7 @@ calculation i , x j ) = x i , x - j + j This function K is called a kernel . The idea of never explicitly transforming the vectors x i to the @@ -831,7 +831,7 @@ K j ) = (x i , x j + r )p -of degree p and coefficient r , the Gaussian radial +of degree p and coefficient r , the Gaussian radial basis function (RBF) kernel K Gauss (x @@ -848,24 +848,24 @@ j ) = tanh( γ x i , x j − r ) where the parameter γ determines how much -influence single training examples have. +influence single training examples have. 5) The described SVMs can only distinguish between two classes. Common strategies to expand those -binary classifiers to multi-class classification is +binary classifiers to multi-class classification is the one-vs-all and the one-vs-one strategy. In the -one-vs-all strategy n classifiers have to be trained +one-vs-all strategy n classifiers have to be trained which can distinguish one of the n classes against all other classes. In the one-vs-one strategy n 2 − n 2 -classifiers are trained; one classifier for each pair +classifiers are trained; one classifier for each pair of classes. A detailed description of SVMs can be found in [Bur98]. SVMs are used by [YHRF12 ] on the 2009 and 2010 PASCAL segmentation challenge [ EVGW + 10 ]. They -did not hand their classifier in to the challenge itself, +did not hand their classifier in to the challenge itself, but calculated an average rank of 7 among the different categories. [ FGMR10] also used an SVM based method with @@ -926,12 +926,12 @@ gets labeled as shown in Figure 3. For example, a MRF which is trained on images of the size224 px × 224 pixel and gets the raw RGB values as features has 224 · 224 · 3 - + input + 224 · 224 - + output = 200 704 -random variables. Those random variables are condi- -tionally independent, given their local neighborhood. +random variables. Those random variables are conditionally + independent, given their local neighborhood. These (in)dependencies can be expressed with a graph. Let G = ( V , E ) be the associated undirected graph of an MRF and C be the set of all maximal cliques in @@ -949,12 +949,12 @@ typically live on 0, . . . , 255 or [0 , 1]. The probability of x , y can be expressed as P ( x, y ) = 1 Z e − E (x ,y ) -where Z = +where Z = x ,y e −E ( x,y ) is a normalization term called the partition function and E is called the energy function. A common choice for the energy function is -E ( x, y ) = +E ( x, y ) = c∈C ψ c (x , y ) where ψ is called a clique potential . One choice for @@ -966,7 +966,7 @@ c (x 1 , x 2 ) = wδ (x 1 , x -2 ) = +2 ) = +w if x 1 = x 2 @@ -976,7 +976,7 @@ c (x According to [ Mur12], the most common way of inference over the posterior MRF in computer vision problems is Maximum A Posteriori (MAP) estimation. -9 + Detailed introductions to MRFs are given by [ BKR11 ], [ Mur12]. MRFs are used by [ ZBS01] and [MSB12] for image segmentation. @@ -991,15 +991,15 @@ not have to be estimated. Another advantage of CRFs compared to MRFs is that no distribution assumption about x has to be made. A CRF has the partition function Z : -Z (x ) = +Z (x ) = y P ( x, y ) and joint probability distribution P ( y | x ) = 1 -Z ( x ) +Z ( x ) c∈C ψ c (y c | x ) -The simplest way to define the clique potentialsψ is +The simplest way to define the clique potentialsψ is the count of the class y c given x added with a positive smoothing constant to prevent the complete term from @@ -1017,23 +1017,23 @@ of the PASCAL VOC 2010 challenge [EVGW+ 10]. An introduction to CRFs is given by [SM11]. G. Post-processing methods -Post-processing refine a found segmentation and +Post-processing refine a found segmentation and remove obvious errors. For example, the morphological operations opening and closing can remove noise. The opening operation is a dilation followed by a erosion. This removes tiny segments. The closing operation is a erosion followed by a dilation. This removes tiny gaps -in otherwise filled regions. They were used in [CLP98] +in otherwise filled regions. They were used in [CLP98] for biomedical image segmentation. -Another way of refinement of the found segmentation +Another way of refinement of the found segmentation is by adjusting the segmentation to match close edges. This was used in [ BBMM11] with an ultra-metric contour map [AMFM09]. Active contour models are another example of a post-processing method [KWT88]. VI. N EURAL N ETWORKS FOR S EM ANTIC S EGM ENTATION -Artificial neural networks are classifiers which are -inspired by biologic neurons. Every single artificial +Artificial neural networks are classifiers which are +inspired by biologic neurons. Every single artificial neuron has some inputs which are weighted and sumed up. Then, the neuron applies a so called activation function to the weighted sum and gives an output. Those @@ -1049,11 +1049,11 @@ as variables and the error function as a surface in this weight-space. Minimizing the error function in the weight space adapts the neural network to the problem. There are lots of ideas around neural networks like -regularization, better optimization algorithms, automat- -ically building up architectures, design choices for +regularization, better optimization algorithms, automatically + building up architectures, design choices for activation functions. This is not explained in detail here, but some of the mayor breakthroughs are outlined. -CNNs are neural networks which learn image filters. +CNNs are neural networks which learn image filters. They drastically reduce the number of parameters which have to be learned while being still general enough for the problem domain of images. This was shown by Alex @@ -1061,7 +1061,7 @@ Krizhevsky et al. in [ KSH12 ]. One major idea was a clever regularization called dropout training, which set the output of neurons while training randomly to zero. Another contribution was the usage of an activation -function called rectified linear unit : +function called rectified linear unit : ϕ ReLU ( x ) = max(0 , x) Those are much faster to train than the commonly used @@ -1070,25 +1070,25 @@ sigmoid activation functions Sigmoid ( x ) = 1 e − x + 1 -Krizhevsky et al. implemented those ideas and partici- -pated in the ImageNet Large-Scale Visual Recognition +Krizhevsky et al. implemented those ideas and participated + in the ImageNet Large-Scale Visual Recognition Challenge (ILSVRC). The best other system, which -used SIFT features and Fisher Vectors, had a perfor- -mance of about 25 .7 % while the network by Alex -Krizhevsky et al. got 17 .0 % error rate on the ILSVRC- -2010 dataset. As a preprocessing step, they downsam- -pled all images to a fixed size of 256 px × 256 px before +used SIFT features and Fisher Vectors, had a performance + of about 25 .7 % while the network by Alex +Krizhevsky et al. got 17 .0 % error rate on the ILSVRC2010 + dataset. As a preprocessing step, they downsampled + all images to a fixed size of 256 px × 256 px before they fed the features into their network. This network is commonly known as AlexNet. Since AlexNet was developed, a lot of different neural networks have been proposed. One interesting example is [PC13 ], where a recurrent CNN for semantic segmentation is presented. -10 + Another notable paper is [ LSD14 ]. The algorithm presented there makes use of a classifying network such as AlexNet, but applies the complete network as an -image filter. This way, each pixel gets a probability +image filter. This way, each pixel gets a probability distribution for each of the trained classes. By taking the most likely class, a semantic segmentation can be done with arbitrary image sizes. @@ -1099,23 +1099,23 @@ More detailed explanations to neural networks for visual recognition is given by [LKJ15]. VII. P OSSIBLE P ROBLEMS IN THE D ATA FOR S EGMENTATION ALGORITHMS -Different segmentation workflows have different +Different segmentation workflows have different problems. However, there are a couple of special cases which should be tested. Those cases might not occur often in the training data, but it could still happen in the productive system. -I am not aware of any systematic work which exam- -ined the influence of problems such as the following. +I am not aware of any systematic work which examined + the influence of problems such as the following. A. Lens Flare -Lens flare is the effect of light getting scattered in +Lens flare is the effect of light getting scattered in the lens system of the camera. The testing data set of the KITTI road evaluation benchmark [ FKG13] has a couple of photos with this problem. Figure 4(a) shows -an extreme example of lens flare. +an extreme example of lens flare. B. Vignetting Vignetting is the effect of a photograph getting darker in the corners. This can have many reasons, for example -filters on the camera blocking light at the corners. +filters on the camera blocking light at the corners. C. Blurred images Images can be blurred for a couple of reasons. A problem with the lenses mechanics, focusing on the @@ -1133,12 +1133,12 @@ segmented might suffer from partial occlusions. (a)Lens Flare Image by [Hus07] (b)Vignetting Image by [Man12] (c)Smoke by cauterization -Image by [GVSY13] (d)Camouflage +Image by [GVSY13] (d)Camouflage Image by [Kaf07] (e) Transparency (f) Viewpoint Figure 4: Examples of images which might cause semantic segmentation systems to fail. -2) Camouflage: Some objects, like animals in the +2) Camouflage: Some objects, like animals in the wild, actively try to hide (see Figure 4(d) as an example). In other cases it might just be bad luck that objects are hard for humans to detect. This problem has two @@ -1151,7 +1151,7 @@ system is forced to learn something wrong. 3) Semi-transparent Occlusion: Some objects like drinking glasses can be visible and still leave the object behind them visible as shown in Figure 4(e). This is -mainly a definition problem: Is the seen pixel the glass +mainly a definition problem: Is the seen pixel the glass label or the smartphone label? 4) Viewpoints: Changes in viewpoints can be a problem, if they don’t occur in the training data. For @@ -1159,10 +1159,10 @@ example, an image captioning system which was trained on photographs of professional photographers might not have photos from the point of view of a child. This is visualized in Figure 4(f). -11 + VIII. D ISCUSSION Ohta et al. wrote [ OKS78 ] 38 years ago. It is one -of the first papers mentioning semantic segmentation. +of the first papers mentioning semantic segmentation. In this time, a lot of work was done and many different directions have been explored. Different kinds of semantic segmentation have emerged. @@ -1173,15 +1173,15 @@ algorithms. Future work includes a comparative study of those algorithms on publicly available dataset such as the ones presented in Table I. Another open -question is the influence of the problems described +question is the influence of the problems described in Section VII. This could be done using a subset of the thousands of images of Wikipedia Commons, such as https://commons.wikimedia.org/wiki/Category:Blurring for blurred images. -A combination of different classifiers in an ensemble +A combination of different classifiers in an ensemble would be an interesting option to explore in order to improve accuracy. Another direction which is currently -studied is combining classifiers such as neural networks +studied is combining classifiers such as neural networks with CRFs [ZJRP + 15]. R EFERENCES [AM98] M. S. Atkins and B. T. Mackiewich, “Fully @@ -1199,7 +1199,7 @@ Conference on . IEEE, Jun. 2009, pp. 2294–2301. abs_all. jsp?arnumber=5206707 [AP11] G. Azzopardi and N. Petkov, “Detection of retinal vascular bifurcations by trainable v4-like -filters,” in Computer Analysis of Images and +filters,” in Computer Analysis of Images and Patterns . Springer, 2011, pp. 451–459. [Online]. Available: http://www. cs. rug. nl/~imaging/databases/ retina_database/retinalfeatures_database. html @@ -1217,7 +1217,7 @@ MICCAI 2000 . Springer, 2000, pp. 276– 286. [Online]. Available: http://link . springer . com/ chapter/10 . 1007/978- 3-540- 40899-4_28 [BKR11] A. Blake, P. Kohli, and C. Rother, Markov random -fields for vision and image processing. Mit Press, +fields for vision and image processing. Mit Press, 2011. [BKTT15] S. Bittel, V. Kaiser, M. Teichmann, and M. Thoma, “Pixel-wise segmentation of street with neural @@ -1253,7 +1253,7 @@ segmentation via adaptive k-mean clustering and knowledge-based morphological operations with biomedical applications,” Image Processing, IEEE Transactions on, vol. 7, no. 12, pp. 1673–1683, Dec. -12 + 1998. [Online]. Available: http://ieeexplore. ieee. org/ xpls/abs_all. jsp?arnumber=730379 [CM02] D. Comaniciu and P. Meer, “Mean shift: A @@ -1264,7 +1264,7 @@ Transactions on, vol. 24, no. 5, pp. 603–619, 2002. login . jsp?tp=&arnumber=1000236 [COWR11] C. Chen, J. Ozolek, W. Wang, and G. K. Rohde, -“A pixel classification system for segmenting +“A pixel classification system for segmenting biomedical images using intensity neighborhoods and dimension reduction,” in Biomedical Imaging: From Nano to Macro, 2011 IEEE International @@ -1273,24 +1273,24 @@ Symposium on . IEEE, 2011, pp. 1649–1652. user/gustavor/chen_isbi_11. pdf [CP08] G. Csurka and F. Perronnin, “A simple high performance approach to semantic segmentation.” -in BMVC , 2008, pp. 1–10. [Online]. Avail- -able: http://www . xrce . xerox . com/layout/set/print/ -content/download/16654/118653/file/2008-023 . pdf +in BMVC , 2008, pp. 1–10. [Online]. Available: + http://www . xrce . xerox . com/layout/set/print/ +content/download/16654/118653/file/2008-023 . pdf [CRSS] A. Cohen, E. Rivlin, I. Shimshoni, and -E. Sabo, “Colon crypt segmentation website.” [On- -line]. Available: http://mis . haifa . ac . il/~ishimshoni/ +E. Sabo, “Colon crypt segmentation website.” [Online]. + Available: http://mis . haifa . ac . il/~ishimshoni/ SegmentCrypt/Download. htm [CRSS14] ——, “Memory based active contour algorithm -using pixel-level classified images for colon crypt +using pixel-level classified images for colon crypt segmentation,” Computerized Medical Imaging and Graphics , Nov. 2014. [Online]. Available: http://mis . haifa . ac . il/~ishimshoni/SegmentCrypt/ -Active%20contour%20based%20on%20pixel- -level%20classified%20image%20for%20colon% +Active%20contour%20based%20on%20pixellevel%20classified%20image%20for%20colon% + 20crypts%20segmentation. pdf [CS10] J. Carreira and C. Sminchisescu, “Constrained -parametric min-cuts for automatic object segmenta- -tion,” in Computer Vision and Pattern Recognition +parametric min-cuts for automatic object segmentation,” + in Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on . IEEE, 2010, pp. 3241–3248. [CS11] ——, “Cpmc: Constrained parametric min-cuts for @@ -1305,8 +1305,8 @@ detection in dermoscopy images,” Skin Research and Technology, vol. 15, no. 4, pp. 444–450, 2009. [Online]. Available: http://arxiv. org/abs/1009. 1020 [CSM09] L. P. Coelho, A. Shariff, and R. F. Murphy, “Nuclear -segmentation in microscope cell images: a hand- -segmented dataset and comparison of algorithms,” +segmentation in microscope cell images: a handsegmented + dataset and comparison of algorithms,” in Biomedical Imaging: From Nano to Macro, 2009. ISBI’09. IEEE International Symposium on . IEEE, 2009, pp. 518–521. [Online]. Available: @@ -1318,8 +1318,8 @@ in Computer Vision and Pattern Recognition (CVPR), 2012 IEEE Conference on . IEEE, 2012, pp. 1656–1663. [Online]. Available: http: //pages. cs. wisc. edu/~jiaxu/pub/rwcoseg. pdf -[DHS15] J. Dai, K. He, and J. Sun, “Instance-aware seman- -tic segmentation via multi-task network cascades,” +[DHS15] J. Dai, K. He, and J. Sun, “Instance-aware semantic + segmentation via multi-task network cascades,” arXiv preprint arXiv:1512.04412, 2015. [DT05] N. Dalal and B. Triggs, “Histograms of oriented gradients for human detection,” in Computer @@ -1331,14 +1331,14 @@ abs_all. jsp?arnumber=1467360 a] M. Everingham, L. Van Gool, C. K. I. Williams, J. Winn, and A. Zisserman, “The PASCAL Visual Object Classes Challenge -2007 (VOC2007) Results,” http://www.pascal- -network.org/challenges/VOC/voc2007/workshop/index.html. +2007 (VOC2007) Results,” http://www.pascalnetwork.org/challenges/VOC/voc2007/workshop/index.html. + [Online]. Available: http://host . robots . ox . ac . uk: 8080/pascal/VOC/voc2007/index . html [EVGW + - b] ——, “The PASCAL Visual Object Classes Chal- -lenge 2012 (VOC2012) Results,” http://www.pascal- -network.org/challenges/VOC/voc2012/workshop/index.html. + b] ——, “The PASCAL Visual Object Classes Challenge + 2012 (VOC2012) Results,” http://www.pascalnetwork.org/challenges/VOC/voc2012/workshop/index.html. + [Online]. Available: http://host . robots . ox . ac . uk: 8080/pascal/VOC/voc2012/index . html [EVGW + @@ -1352,17 +1352,17 @@ J. Winn, and A. Zisserman, “Visual object classes challenge 2012 (voc2012),” 2012. [Online]. Available: http://host . robots . ox . ac . uk:8080/pascal/ VOC/voc2012/index. html -[Fel] P. F. Felzenszwalb, “Graph based im- -age segmentation.” [Online]. Available: http: +[Fel] P. F. Felzenszwalb, “Graph based image + segmentation.” [Online]. Available: http: //cs . brown. edu/~pff/segment/ [FGMR10] P. F. Felzenszwalb, R. B. Girshick, D. McAllester, -and D. Ramanan, “Object detection with discrimina- -tively trained part-based models,” Pattern Analysis +and D. Ramanan, “Object detection with discriminatively + trained part-based models,” Pattern Analysis and Machine Intelligence, IEEE Transactions on , vol. 32, no. 9, pp. 1627–1645, 2010. [FH04] P. F. Felzenszwalb and D. P. Huttenlocher, -“Efficient graph-based image segmentation,” +“Efficient graph-based image segmentation,” International Journal of Computer Vision , vol. 59, no. 2, pp. 167–181, 2004. [Online]. Available: http://link . springer . com/article/10 . 1023/ @@ -1370,14 +1370,14 @@ B:VISI . 0000022288 . 19776. 77 [FKG13] J. Fritsch, T. Kuehnl, and A. Geiger, “A new performance measure and evaluation benchmark for road detection algorithms,” in -International Conference on Intelligent Transporta- -tion Systems (ITSC) , 2013. [Online]. Available: +International Conference on Intelligent Transportation + Systems (ITSC) , 2013. [Online]. Available: http://www . cvlibs. net/datasets/kitti/eval_road. php [GBVdW + 10] J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D. -Bagdanov, J. Serrat, and J. Gonzalez, “Harmony po- -tentials for joint classification and segmentation,” in +Bagdanov, J. Serrat, and J. Gonzalez, “Harmony potentials + for joint classification and segmentation,” in Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on. IEEE, 2010, pp. 3280– 3287. @@ -1386,8 +1386,8 @@ Computer Vision and Pattern Recognition (CVPR), D. Koller, “Multi-class segmentation with relative location prior,” International Journal of Computer Vision , vol. 80, no. 3, pp. 300–316, Apr. 2008. -[GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.- -Z. Yang, “Probabilistic tracking of affine-invariant +[GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.Z. + Yang, “Probabilistic tracking of affine-invariant anisotropic regions,” Pattern Analysis and Machine Intelligence, IEEE Transactions on , vol. 35, no. 1, pp. 130–143, 2013. @@ -1395,12 +1395,12 @@ pp. 130–143, 2013. & Sons, Inc., 1975. [HDT02] C. Huang, L. Davis, and J. Townshend, “An assessment of support vector machines for land -cover classification,” International Journal of remote +cover classification,” International Journal of remote sensing , vol. 23, no. 4, pp. 725–749, 2002. [HHR01] S. Hu, E. Hoffman, and J. Reinhardt, “Automatic lung segmentation for accurate quantitation of volumetric x-ray ct images,” Medical Imaging, IEEE -13 + Transactions on , vol. 20, no. 6, pp. 490–498, Jun. 2001. [HJBJ+ @@ -1419,12 +1419,12 @@ Proceedings of the Third International Conference on , vol. 1. IEEE, 1995, pp. 278–282. [Online]. Available: http://ect . bell-labs . com/who/ tkh/publications/papers/odt. pdf -[Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia -Commons, Nov. 2007. [Online]. Avail- -able: https://commons . wikimedia . org/wiki/File: -CCTV_Lens_flare. jpg +[Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia +Commons, Nov. 2007. [Online]. Available: + https://commons . wikimedia . org/wiki/File: +CCTV_Lens_flare. jpg [HZCP04] X. He, R. Zemel, and M. Carreira-Perpindn, -“Multiscale conditional random fields for image +“Multiscale conditional random fields for image labeling,” in Computer Vision and Pattern Recognition, 2004. CVPR 2004. Proceedings of the 2004 IEEE Computer Society Conference @@ -1433,13 +1433,13 @@ on , vol. 2, Jun. 2004, pp. II–695–II–702 Vol.2. login . jsp?tp=&arnumber=1315232 [JLD03] K. Jiang, Q.-M. Liao, and S.-Y. Dai, “A novel white blood cell segmentation scheme using scale-space -filtering and watershed clustering,” in Machine +filtering and watershed clustering,” in Machine Learning and Cybernetics, 2003 International Conference on , vol. 5, Nov 2003, pp. 2820–2825 Vol.5. [Online]. Available: http://ieeexplore. ieee. org/ xpl/login. jsp?tp=&arnumber=1260033 -[Kaf07] L. Kaffer, “File:great male leopard in south afrika- -jd.jpg,” Wikipedia Commons, Jul. 2007. [Online]. +[Kaf07] L. Kaffer, “File:great male leopard in south afrikajd.jpg,” + Wikipedia Commons, Jul. 2007. [Online]. Available: https://commons. wikimedia. org/wiki/File: Great_male_Leopard_in_South_Afrika-JD . JPG [KKV+ @@ -1447,19 +1447,19 @@ Great_male_Leopard_in_South_Afrika-JD . JPG J. Pietilä, H. Kälviäinen, and H. Uusitalo, “Diaretdb1 diabetic retinopathy database and evaluation protocol,” 2014. [Online]. Available: -http://www2 . it. lut. fi/project/imageret/diaretdb1/ +http://www2 . it. lut. fi/project/imageret/diaretdb1/ [KP92] J. M. Kasson and W. Plouffe, “An analysis of selected computer interchange color spaces,” ACM Transactions on Graphics (TOG), vol. 11, no. 4, pp. 373–405, 1992. [KP06] Z. Kato and T.-C. Pong, “A markov random -field image segmentation model for color +field image segmentation model for color textured images,” Image and Vision Computing , vol. 24, no. 10, pp. 1103–1114, 2006. [Online]. Available: http://www . sciencedirect . com/science/ article/pii/S0262885606001223 [KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, -“Imagenet classification with deep convolutional +“Imagenet classification with deep convolutional neural networks,” in Advances in neural information processing systems , 2012, pp. 1097–1105. [KWT88] M. Kass, A. Witkin, and D. Terzopoulos, @@ -1500,23 +1500,23 @@ IEEE Conference on , June 2008, pp. 1–8. [Online]. Available: http://ieeexplore . ieee . org/xpls/ abs_all. jsp?arnumber=4587420 [Man12] M. Manske, “File:randabschattung mikroskop -kamera 6.jpg,” Wikipedia Com- -mons, Dec. 2012. [Online]. Avail- -able: https://commons . wikimedia . org/wiki/File: +kamera 6.jpg,” Wikipedia Commons, + Dec. 2012. [Online]. Available: + https://commons . wikimedia . org/wiki/File: Randabschattung_Mikroskop_Kamera_6. JPG [MBLAGJ + - 07] S. Maldonado-Bascon, S. Lafuente-Arroyo, P. Gil- -Jimenez, H. Gomez-Moreno, and F. Lopez- -Ferreras, “Road-sign detection and recognition + 07] S. Maldonado-Bascon, S. Lafuente-Arroyo, P. GilJimenez, + H. Gomez-Moreno, and F. LopezFerreras, + “Road-sign detection and recognition based on support vector machines,” Intelligent Transportation Systems, IEEE Transactions on , vol. 8, no. 2, pp. 264–278, Jun. 2007. [Online]. Available: http://ieeexplore . ieee . org/xpls/ abs_all. jsp?arnumber=4220659 [MBVLG02] N. Moon, E. Bullitt, K. Van Leemput, and G. Gerig, -“Automatic brain and tumor segmentation,” inMed- -ical Image Computing and Computer-Assisted In- -tervention—MICCAI 2002 . Springer, 2002, pp. +“Automatic brain and tumor segmentation,” inMedical + Image Computing and Computer-Assisted Intervention—MICCAI + 2002 . Springer, 2002, pp. 372–379. [MFTM01] D. Martin, C. Fowlkes, D. Tal, and J. Malik, “A database of human segmented natural @@ -1532,7 +1532,7 @@ abs_all. jsp?arnumber=937655 S. Bodenstedt, A. Sanchez, C. Stock, H. G. Kenngott, M. Eisenmann, and S. Speidel, “Can masses of non-experts train highly accurate -image classifiers?” in Medical Image Computing +image classifiers?” in Medical Image Computing and Computer-Assisted Intervention–MICCAI 2014. Springer, 2014, pp. 438–445. [Online]. Available: http://opencas. webarchiv. kit. edu/?q=node/26 @@ -1542,9 +1542,9 @@ Learning , vol. 3, no. 4, pp. 319–342, 1989. [Online]. Available: http://dx . doi . org/10 . 1023/A% 3A1022645801436 [MSB12] G. Moser, S. B. Serpico, and J. A. Benediktsson, -“Markov random field models for supervised land -14 -cover classification from very high resolution +“Markov random field models for supervised land + +cover classification from very high resolution multispectral remote sensing images,” in Advances in Radar and Remote Sensing (TyWRRS), 2012 Tyrrhenian Workshop on . IEEE, 2012, pp. 235– @@ -1554,14 +1554,14 @@ xpl/login. jsp?tp=&arnumber=6381135 [Online]. Available: http://research . microsoft . com/ vision/cambridge/recognition/ [MSR] “Image understanding - research data,” -Microsoft Research. [Online]. Avail- -able: http://research . microsoft . com/en-us/projects/ +Microsoft Research. [Online]. Available: + http://research . microsoft . com/en-us/projects/ objectclassrecognition/ [Mur12] K. P. Murphy, Machine learning: a probabilistic perspective. MIT press, 2012. [OKS78] Y.-i. Ohta, T. Kanade, and T. Sakai, “An analysis -system for scenes containing objects with substruc- -tures,” in Proceedings of the Fourth International +system for scenes containing objects with substructures,” + in Proceedings of the Fourth International Joint Conference on Pattern Recognitions, 1978, pp. 752–754. [PAA + @@ -1581,7 +1581,7 @@ Available: http://arxiv. org/abs/1306. 2795v1 comparison of image segmentation algorithms,” Robotics Institute , p. 336, 2005. [Online]. Available: http://riweb-backend . ri . cmu . edu/ -pub _files/pub4/pantofaru _caroline _ 2005 _1/ +pub _files/pub4/pantofaru _caroline _ 2005 _1/ pantofaru_caroline_2005_1 . pdf [PS07] A. Protiere and G. Sapiro, “Interactive image segmentation via adaptive weighted @@ -1589,9 +1589,9 @@ distances,” Image Processing, IEEE Transactions on , vol. 16, no. 4, pp. 1046–1057, 2007. [Online]. Available: http://ieeexplore . ieee . org/xpls/ abs_all. jsp?arnumber=4130436 -[PTN09] N. Plath, M. Toussaint, and S. Nakajima, “Multi- -class image segmentation using conditional random -fields and global classification,” in Proceedings +[PTN09] N. Plath, M. Toussaint, and S. Nakajima, “Multiclass + image segmentation using conditional random +fields and global classification,” in Proceedings of the 26th Annual International Conference on Machine Learning. ACM, 2009, pp. 817–824. [PXP00] D. L. Pham, C. Xu, and J. L. Prince, “A @@ -1605,8 +1605,8 @@ dx. doi. org/10. 1146/annurev . bioeng . 2. 1. 315 Machine learning , vol. 1, no. 1, pp. 81–106, Aug. 1986. [Online]. Available: http://dx . doi . org/ 10 . 1023/A%3A1022643204877 -[Qui93] ——, C4.5: Programs for Machine Learning, P. Lan- -gley, Ed. Morgan Kaufmann Publishers, Inc., 1993. +[Qui93] ——, C4.5: Programs for Machine Learning, P. Langley, + Ed. Morgan Kaufmann Publishers, Inc., 1993. [RKB04] C. Rother, V. Kolmogorov, and A. Blake, “Grabcut: Interactive foreground extraction using iterated graph cuts,” ACM Transactions on Graphics @@ -1614,11 +1614,11 @@ graph cuts,” ACM Transactions on Graphics Available: http://delivery. acm. org/10. 1145/1020000/ 1015720/p309- rother. pdf [RM00] J. B. Roerdink and A. Meijster, “The watershed -transform: Definitions, algorithms and paralleliza- tion strategies,” Fundam. Inform. , vol. 41, no. 1-2, +transform: Definitions, algorithms and paralleliza- tion strategies,” Fundam. Inform. , vol. 41, no. 1-2, pp. 187–228, 2000. [RM07] J. Reynolds and K. Murphy, “Figure-ground segmentation using a hierarchical conditional -random field,” in Computer and Robot +random field,” in Computer and Robot Vision, 2007. CRV ’07. Fourth Canadian Conference on , May 2007, pp. 175–182. [Online]. Available: http://ieeexplore . ieee . org/xpls/ @@ -1641,8 +1641,8 @@ no. 4, pp. 501–509, 2004. [Online]. Available: http://www . isi . uu. nl/Research/Databases/DRIVE/ [SCZ08] F. Schroff, A. Criminisi, and A. Zisserman, “Object class segmentation using random -forests.” in BMVC , 2008, pp. 1–10. [On- -line]. Available: http://research. microsoft. com/pubs/ +forests.” in BMVC , 2008, pp. 1–10. [Online]. + Available: http://research. microsoft. com/pubs/ 72423/Criminisi_bmvc2008. pdf [SJC08] J. Shotton, M. Johnson, and R. Cipolla, @@ -1653,7 +1653,7 @@ Conference on . IEEE, Jun. 2008, pp. 1–8. [Online]. Available: http://ieeexplore . ieee . org/xpls/ abs_all. jsp?arnumber=4587503 [SM11] C. Sutton and A. McCallum, “An introduction -to conditional random fields,” Machine Learning , +to conditional random fields,” Machine Learning , vol. 4, no. 4, pp. 267–373, 2011. [Online]. Available: http://homepages . inf . ed . ac . uk/csutton/ publications/crftutv2 . pdf @@ -1661,13 +1661,13 @@ publications/crftutv2 . pdf analysis,” Cornell University, USA , vol. 51, p. 52, 2002. [Smi04] B. T. Smith, “Lagrange multipliers tutorial in the -context of support vector machines,” Memorial Uni- -versity of Newfoundland St. John’s, Newfoundland, +context of support vector machines,” Memorial University + of Newfoundland St. John’s, Newfoundland, Canada , Jun. 2004. [SSA12] D. Schiebener, J. Schill, and T. Asfour, “Discovery, segmentation and reactive grasping of unknown -objects.” in Humanoids , 2012, pp. 71–77. [On- -line]. Available: http://h2t . anthropomatik . kit . edu/ +objects.” in Humanoids , 2012, pp. 71–77. [Online]. + Available: http://h2t . anthropomatik . kit . edu/ pdf/Schiebener2012. pdf [SUM+ 11] D. Schiebener, A. Ude, J. Morimotot, @@ -1687,7 +1687,7 @@ Springer, 2006, pp. 1–15. [Online]. Available: http: [TNL14] J. Tighe, M. Niethammer, and S. Lazebnik, “Scene parsing with object instances and occlusion ordering,” in Computer Vision and -15 + Pattern Recognition (CVPR), 2014 IEEE Conference on . IEEE, 2014, pp. 3748–3755. [Online]. Available: http://ieeexplore . ieee . org/xpls/ @@ -1701,8 +1701,8 @@ Conference on . IEEE, 2005, pp. 34–34. [Online]. Available: http://repository . cmu . edu/cgi/ viewcontent. cgi?article=1365&context=robotics [vdMPvdH09] L. J. van der Maaten, E. O. Postma, and H. J. -van den Herik, “Dimensionality reduction: A com- -parative review,” Journal of Machine Learning +van den Herik, “Dimensionality reduction: A comparative + review,” Journal of Machine Learning Research, vol. 10, no. 1-41, pp. 66–71, 2009. [VOC10] “Voc2010 preliminary results,” 2010. [Online]. Available: http://host . robots . ox . ac . uk/pascal/VOC/ @@ -1716,7 +1716,7 @@ and R. Mösges, Eds. Springer Berlin Heidelberg, http://dx . doi . org/10. 1007/BFb0029257 [YBCK10] Z. Yin, R. Bise, M. Chen, and T. Kanade, “Cell segmentation in microscopy imagery using a -bag of local bayesian classifiers,” in Biomedical +bag of local bayesian classifiers,” in Biomedical Imaging: From Nano to Macro, 2010 IEEE International Symposium on , Apr. 2010, pp. 125– 128. [Online]. Available: http://ieeexplore. ieee. org/ @@ -1730,8 +1730,8 @@ vol. 34, no. 9, pp. 1731–1743, Sep. 2012. abs_all. jsp?arnumber=6042883 [ZBS01]Y. Zhang, M. Brady, and S. Smith, “Segmentation of brain MR images through a hidden Markov -random field model and the expectation- -maximization algorithm,” Medical Imaging, IEEE +random field model and the expectationmaximization + algorithm,” Medical Imaging, IEEE Transactions on , vol. 20, no. 1, pp. 45–57, 2001. [Online]. Available: http://ieeexplore . ieee . org/xpls/ abs_all. jsp?arnumber=906424 @@ -1746,7 +1746,7 @@ MultiMedia, IEEE , vol. 19, no. 2, pp. 4–10, Feb. [ZJRP+ 15] S. Zheng, S. Jayasumana, B. Romera-Paredes, V. Vineet, Z. Su, D. Du, C. Huang, and -P. H. Torr, “Conditional random fields as +P. H. Torr, “Conditional random fields as recurrent neural networks,” in Proceedings of the IEEE International Conference on Computer Vision , 2015, pp. 1529–1537. [Online]. @@ -1767,7 +1767,7 @@ PCA principal component analysis. 5 RBF radial basis function. 8 SIFT scale-invariant feature transform. 5 SVM Support Vector Machine. 4, 6–8 -16 + A PPENDIX A TABLES Database Image Resolution (width × height) Number @@ -1786,4 +1786,4 @@ Open-CAS Endoscopic Datasets 640 px × 480 px 120 2 3 [MHMK + PASCAL VOC 2012 (142 px − 500 px) × ( 71 px − 500 px) 2913 20 3 [EVGW + 12] Warwick-QU (567 px − 775 px) × (430 px − 522 px) 165 5 3 [CSM09] -Table I: An overview over publicly available image databases with a semantic segmentation ground trouth. +Table I: An overview over publicly available image databases with a semantic segmentation ground trouth. \ No newline at end of file diff --git a/read/results/playa/1707.09725.txt b/read/results/playa/1707.09725.txt index 8a18faf..5769be5 100644 --- a/read/results/playa/1707.09725.txt +++ b/read/results/playa/1707.09725.txt @@ -25,50 +25,50 @@ Department of Computer Science, 2017 Gutachter: Prof. Dr.–Ing. R. Dillmann, Prof. Dr.–Ing. J. M. Zöllner Abteilung Technisch Kognitive Assistenzsysteme FZI Research Center for Information Technology -Affirmation -Ich versichere wahrheitsgemäß, die Arbeit selbstständig angefertigt, alle benutzten Hilfs- -mittel vollständig und genau angegeben und alles kenntlich gemacht zu haben, was aus +Affirmation +Ich versichere wahrheitsgemäß, die Arbeit selbstständig angefertigt, alle benutzten Hilfsmittel + vollständig und genau angegeben und alles kenntlich gemacht zu haben, was aus Arbeiten anderer unverändert oder mit Abänderungen entnommen wurde. Karlsruhe, Martin Thoma August 2017 - v + Abstract Convolutional Neural Networks (CNNs) dominate various computer vision tasks since -Alex Krizhevsky showed that they can be trained effectively and reduced the top-5 error +Alex Krizhevsky showed that they can be trained effectively and reduced the top-5 error from 26.2 % to 15. 3 % on the ImageNet large scale visual recognition challenge. Many aspects of CNNs are examined in various publications, but literature about the analysis and construction of neural network architectures is rare. This work is one step to close this gap. A comprehensive overview over existing techniques for CNN analysis and topology -construction is provided. A novel way to visualize classification errors with confusion -matrices was developed. Based on this method, hierarchical classifiers are described and -evaluated. Additionally, some results are confirmed and quantified for CIFAR-100. For +construction is provided. A novel way to visualize classification errors with confusion +matrices was developed. Based on this method, hierarchical classifiers are described and +evaluated. Additionally, some results are confirmed and quantified for CIFAR-100. For example, the positive impact of smaller batch sizes, averaging ensembles, data augmentation and test-time transformations on the accuracy. Other results, such as the positive impact of -learned color transformation on the test accuracy could not be confirmed. A model which +learned color transformation on the test accuracy could not be confirmed. A model which has only one million learned parameters for an input size of32 × 32 × 3 and 100 classes and which beats the state of the art on the benchmark dataset Asirra, GTSRB, HASYv2 and STL-10 was developed. - vii + Zusammenfassung Modelle welche auf Convolutional Neural Networks (CNNs) basieren sind in verschiedenen Aufgaben der Computer Vision dominant seit Alex Krizhevsky gezeigt hat dass diese -effektiv trainiert werden können und er den Top-5 Fehler in dem ImageNet large scale visual +effektiv trainiert werden können und er den Top-5 Fehler in dem ImageNet large scale visual recognition challenge Benchmark von 26 .2 % auf 15.3 % drücken konnte. Viele Aspekte -von CNNs wurden in verschiedenen Publikationen untersucht, aber es wurden vergleich- -sweise wenige Arbeiten über die Analyse und die Konstruktion von Neuronalen Netzen +von CNNs wurden in verschiedenen Publikationen untersucht, aber es wurden vergleichsweise + wenige Arbeiten über die Analyse und die Konstruktion von Neuronalen Netzen geschrieben. Diese Masterarbeit stellt einen Schritt dar um diese Lücke zu schließen. Eine umfassende Überblick über Analyseverfahren und Topologielernverfahren wird gegeben. Ein -neues Verfahren zur Visualisierung der Klassifikationsfehler mit Konfusionsmatrizen wurde -entwickelt. Basierend auf diesem Verfahren wurden hierarchische Klassifizierer eingeführt -und evaluiert. Zusätzlich wurden einige bereits in der Literatur beschriebene Beobachtun- -gen wie z.B. der positive Einfluss von kleinen Batch-Größen, Ensembles, Erhöhung der -Trainingsdatenmenge durch künstliche Transformationen (Data Augmentation) und die In- -varianzbildung durch künstliche Transformationen zur Test-Zeit (Test-time transformations) -experimentell bestätigt. Andere Beobachtungen, wie beispielsweise der positive Einfluss +neues Verfahren zur Visualisierung der Klassifikationsfehler mit Konfusionsmatrizen wurde +entwickelt. Basierend auf diesem Verfahren wurden hierarchische Klassifizierer eingeführt +und evaluiert. Zusätzlich wurden einige bereits in der Literatur beschriebene Beobachtungen + wie z.B. der positive Einfluss von kleinen Batch-Größen, Ensembles, Erhöhung der +Trainingsdatenmenge durch künstliche Transformationen (Data Augmentation) und die Invarianzbildung + durch künstliche Transformationen zur Test-Zeit (Test-time transformations) +experimentell bestätigt. Andere Beobachtungen, wie beispielsweise der positive Einfluss gelernter Farbraumtransformationen konnten nicht bestätigt werden. Ein Modell welches weniger als eine Millionen Parameter nutzt und auf den Benchmark-Datensätzen Asirra, -GTSRB, HASYv2 und STL-10 den Stand der Technik neu definiert wurde entwickelt. +GTSRB, HASYv2 und STL-10 den Stand der Technik neu definiert wurde entwickelt. Acknowledgment I would like to thank Stephan Gocht and Marvin Teichmann for the many inspiring conversations we had about various topics, including machine learning. @@ -76,7 +76,7 @@ I also want to thank my father for the support he gave me. He made it possible f study without having to worry about anything besides my studies. Thank you! Finally, I want to thank Timothy Gebhard, Daniel Schütz and Yang Zhang for proof-reading my masters thesis and Stephan Gocht for giving me access to a GTX 1070. - ix + This work can be cited the following way: @MastersThesis{Thoma:2017, Title = {Analysis and Optimization of Convolutional Neural Network @@ -126,10 +126,10 @@ Contents 3.2 Pruning approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 29 3.3 Genetic approaches . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30 3.4 Reinforcement Learning . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30 -xi + 3.5 Convolutional Neural Fabrics . . . . . . . . . . . . . . . . . . . . . . . . . . 31 -4 Hierarchical Classification 33 -4.1 Advantages of classifier hierarchies . . . . . . . . . . . . . . . . . . . . . . 34 +4 Hierarchical Classification 33 +4.1 Advantages of classifier hierarchies . . . . . . . . . . . . . . . . . . . . . . 34 4.2 Clustering classes . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34 5 Experimental Evaluation 37 5.1 Baseline Model and Training setup . . . . . . . . . . . . . . . . . . . . . . . 38 @@ -138,7 +138,7 @@ xi 5.1.3 Training behavior . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 45 5.2 Confusion Matrix Ordering . . . . . . . . . . . . . . . . . . . . . . . . . . . . 48 5.3 Spectral Clustering vs CMO . . . . . . . . . . . . . . . . . . . . . . . . . . . 51 -5.4 Hierarchy of Classifiers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 53 +5.4 Hierarchy of Classifiers . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 53 5.5 Increased width for faster learning . . . . . . . . . . . . . . . . . . . . . . . 54 5.6 Weight updates . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 55 5.7 Multiple narrow layers vs One wide layer . . . . . . . . . . . . . . . . . . . . 56 @@ -149,7 +149,7 @@ xi 5.12 Pooling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 60 5.13 Activation Functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 60 5.14 Label smoothing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 64 -5.15 Optimized Classifier . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66 +5.15 Optimized Classifier . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66 5.16 Early Stopping vs More Data . . . . . . . . . . . . . . . . . . . . . . . . . . 68 5.17 Regularization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 68 6 Conclusion and Outlook 71 @@ -178,26 +178,26 @@ G List of Figures 101 H Bibliography 103 I Glossary 119 -1. Introduction -Computer vision is the academic field which aims to gain a high-level understanding of the +. Introduction +Computer vision is the academic field which aims to gain a high-level understanding of the low-level information given by raw pixels from digital images. Robots, search engines, self-driving cars, surveillance agencies and many others have -applications which include one of the following six problems in computer vision as sub- -problems: -• Classification:1 +applications which include one of the following six problems in computer vision as subproblems: + +• Classification:1 The algorithm is given an image and k possible classes. The task is to decide which of the k classes the image belongs to. For example, an image from a self-driving cars on-board camera contains either paved road , unpaved road or no road : Which of those given three classes is in the image? -• Localization: The algorithm is given an image and one class k . The task is to find +• Localization: The algorithm is given an image and one class k . The task is to find bounding boxes for all instances of k . -• Detection: Given an image and k classes, find bounding boxes for all instances of +• Detection: Given an image and k classes, find bounding boxes for all instances of those classes. • Semantic Segmentation : Given an image and k classes, classify each pixel. • Instance segmentation: Given an image and k classes, classify each pixel as one of -the k classes, but distinguish different instances of the classes. +the k classes, but distinguish different instances of the classes. • Content-based Image Retrieval : Given an image x and n images in a database, -find the top u images which are most similar to x . +find the top u images which are most similar to x . There are many techniques to approach those problems, but since AlexNet [ KSH12] was published, all of those problems have high-quality solutions which make use of Convolutional Neural Networks (CNNs) [HZRS15a, LAE + @@ -208,59 +208,59 @@ are methods for analyzing CNNs, those methods are not enough to determine all st the development of network architectures without gut feeling. A detailed introduction to CNNs as well as nine methods for analysis of CNNs is given in Chapter 2. 1 - Classification is also called identification if the classes are humans. Another name is object recognition, + Classification is also called identification if the classes are humans. Another name is object recognition, although the classes can be humans and animals as well. - 1 + 1. Introduction Despite the fact that most researchers and developers do not use topology learning, a couple of algorithms have been proposed for this task. Five classes of topology learning algorithms are introduced in Chapter 3. When datasets and the number of classes are large, evaluating a single idea how to improve the network can take several weeks just for the training. Hence the idea of building a -hierarchy of classifiers which allows to split the classification task into various sub-tasks +hierarchy of classifiers which allows to split the classification task into various sub-tasks that can easily be combined is evaluated in Chapter 4. -Confusion Matrix Ordering (CMO), the hierarchical classifier, 9 types of hyperparameters +Confusion Matrix Ordering (CMO), the hierarchical classifier, 9 types of hyperparameters and label smoothing are evaluated in Chapter 5. -This work focuses on classification problems to keep the presented ideas as pure and +This work focuses on classification problems to keep the presented ideas as pure and simple as possible. The described techniques are relevant to all six described computer vision problems due to the fact that Encoder-Decoder architectures are one component of state-of-the-art algorithms for all six of them. -2 + 2. Convolutional Neural Networks In the following, it is assumed that the reader knows what a multilayer perceptron (MLP) -is and how they are designed for classification problems, what activation functions are and +is and how they are designed for classification problems, what activation functions are and how gradient descent works. In case the reader needs a refresher on any of those topics, I recommend chapter 4.3 and 4.4 of [Tho14a] as well as [LBH15]. -This chapter introduces linear image filters in Section 2.1, then standard layer types of +This chapter introduces linear image filters in Section 2.1, then standard layer types of CNNs are explained in Section 2.2. The layer block pattern is described in Section 2.3, transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5. 2.1. Linear Image Filters -A linear image filter (also called a filter bank or a kernel ) is an element F ∈ R k +A linear image filter (also called a filter bank or a kernel ) is an element F ∈ R k w × k h × d , where k - w represents the filter’s width, k -h the filter’s height and d the number of input -channels. The filter F is convolved with the image I ∈ R w × h× d - to produce a new image I + w represents the filter’s width, k +h the filter’s height and d the number of input +channels. The filter F is convolved with the image I ∈ R w × h× d + to produce a new image I . -The output image I - has only one channel. Each pixel I +The output image I + has only one channel. Each pixel I ( x, y ) of the output image gets -calculated by point-wise multiplication of one filter element with one element of the original +calculated by point-wise multiplication of one filter element with one element of the original image I : - I + I (x, y ) = k w -2 +2 i x =1− k w 2 k h -2 +2 i y =1− k @@ -278,7 +278,7 @@ c ) This procedure is explained by Figure 2.1. It is essentially a discrete convolution.I ∈ R 7 ×7 Filter kernel F ∈ R3× 3 Result of point-wise -multiplication I +multiplication I ∈ R 7 ×7 104 116 116 112 58 47 47 109 97 114 116 105 110 45 @@ -297,16 +297,16 @@ multiplication I -567 -53 -75 80 571 -128 24 -408 596 -550 368 26 976 156 302 647 879 223 811 54 660 -Figure 2.1.: Visualization of the application of a linear k × k × 1 image filter. For each pixel of the +Figure 2.1.: Visualization of the application of a linear k × k × 1 image filter. For each pixel of the output image, k 2 multiplications and k 2 additions of the products have to be calculated. -3 + 2. Convolutional Neural Networks One important detail is how boundaries are treated. There are four common ways of boundary treatment: -• don’t compute: The image I - will be smaller than the original image. I +• don’t compute: The image I + will be smaller than the original image. I ∈ R (w − k w +1)× (h− k @@ -314,23 +314,23 @@ h +1)× d 3 , to be exact. • zero padding - : The image I is padded by zeros where the filter would access elements + : The image I is padded by zeros where the filter would access elements which do not exist. This will result in edges being detected at the border if the border pixels are not black, but doesn’t need any computation. • nearest: Repeat the pixel which is closest to the boundary. -• reflect: Reflect the image at the boundaries. -Common tasks that can be done with linear filters include edge detection, corner detection, -smoothing, sharpening, median filtering, box filtering. See Figure A.1 for five examples. -Please note that the result of a filtering operation is again an image. This means filters -can be applied successively. While each pixel after one filtering operation with a 3 × 3 -filter got influenced by 3 · 3 = 9 pixels of the original image, two successively applied 3 × 3 -filters increase the area of the original image which influenced the output. The output is -then influenced by 25 pixel. This is called the receptive field. The kind of pattern which is -detected by a filter is called a feature . The bigger the receptive field is, the more complex +• reflect: Reflect the image at the boundaries. +Common tasks that can be done with linear filters include edge detection, corner detection, +smoothing, sharpening, median filtering, box filtering. See Figure A.1 for five examples. +Please note that the result of a filtering operation is again an image. This means filters +can be applied successively. While each pixel after one filtering operation with a 3 × 3 +filter got influenced by 3 · 3 = 9 pixels of the original image, two successively applied 3 × 3 +filters increase the area of the original image which influenced the output. The output is +then influenced by 25 pixel. This is called the receptive field. The kind of pattern which is +detected by a filter is called a feature . The bigger the receptive field is, the more complex can features get as they are able to consider more of the original image. Instead of taking -one 5 × 5 filter with 25 parameters, one might consider to take two successive 3 × 3 filters -with 2 · (3 · 3) = 18 parameters. The 5 × 5 filter is a strict superset of possible filtering -operations compared to the two 3 × 3 filters, but the relevance of this technique will become +one 5 × 5 filter with 25 parameters, one might consider to take two successive 3 × 3 filters +with 2 · (3 · 3) = 18 parameters. The 5 × 5 filter is a strict superset of possible filtering +operations compared to the two 3 × 3 filters, but the relevance of this technique will become clear in Section 2.2. 2.2. CNN Layer Types While the idea behind deep MLPs is that feature hierarchies capture the important parts @@ -338,14 +338,14 @@ of the input more easily, CNNs are inspired by the idea of translational invaria features in an image are translationally invariant. For example, if a car is developed, one could try to detect it by its parts [FGMR10 ]. But then there are many positions at which the wheels could be. Combining those, it is desirable to capture low-level, translationally -invariant features at lower layers of an artificial neural network (ANN) and in higher layers +invariant features at lower layers of an artificial neural network (ANN) and in higher layers high-level features which are combinations of the low-level features. Also, models should utilize the fact that the pixels of images are ordered. One way to use -this is by learning image filters in so called convolutional layers . +this is by learning image filters in so called convolutional layers . While MLPs vectorize the input, the input of a layer in a CNN arefeature maps. A feature map is a matrix m ∈ R w ×h , but typically the width equals the height (w = h). For an RGB -4 + 2.2. CNN Layer Types input image, the number of feature maps is d = 3. Each color channel is a feature map. Since AlexNet [ KSH12] almost halved the error in the ImageNet challenge, CNNs are @@ -357,15 +357,15 @@ Traditional CNNs have three important building tools: 2.2.1. Convolutional Layers Convolutional layers take several feature maps as input and produce n feature maps 1 as -output, where n is the number of filters in the convolution layer. The filter weights of +output, where n is the number of filters in the convolution layer. The filter weights of the linear convolutions are the parameters which are adapted to the training data. The -number n of filters as well as the filter’s size k +number n of filters as well as the filter’s size k w × k h are hyperparameters of convolutional layers. Sometimes, it is denoted as n @k w × k - h . Although the filter depth is usually omitted -in the notation, the filters are of dimensionk + h . Although the filter depth is usually omitted +in the notation, the filters are of dimensionk w × k h × d(i − 1) , where d(i − 1) @@ -377,12 +377,12 @@ Padding (usually zero-padding [SCL12, SEZ+ 13, HZRS15a]) is used to make sure that the size of the feature maps doesn’t change. The hyperparameters of convolutional layers are -• the number of filters n ∈ N +• the number of filters n ∈ N ≥ 1 , • k w , k h ∈ N - ≥1 of the filter size k + ≥1 of the filter size k w × k h × d( i −1) , @@ -393,17 +393,17 @@ Typical choices are n ∈ { 32, 64, 128 }, k w = k h = k ∈ { 1 , 3 , 5 , 11 } such as in [ KSH12, SZ14, SLJ + - 15], rectified linear unit (ReLU) activation and s = 1. + 15], rectified linear unit (ReLU) activation and s = 1. The concept of weight sharing is crucial for CNNs. This concept was introduced in [WHH + 89]. -With weight sharing, the filters can be learned with stochastic gradient descent (SGD) just +With weight sharing, the filters can be learned with stochastic gradient descent (SGD) just like MLPs. In fact, every CNN has an equivalent MLP which computes the same function -if only the flattened output is compared. +if only the flattened output is compared. 1 also called activation maps or channels - 5 + 2. Convolutional Neural Networks -This is easier to see when the filtering operation is denoted formally: +This is easier to see when the filtering operation is denoted formally: o (i ) (x ) = b + k @@ -413,14 +413,14 @@ j =1 w o (x,y,z ) (I ) = b + k w -2 +2 i x =1− k w 2 k h -2 +2 i y =1− k @@ -439,13 +439,13 @@ c ) [2.2] with a bias b ∈ R , x ∈ { 1, . . . , w } , y ∈ { 1, . . . , h } and z ∈ { 1, . . . , d } One can see that most weights of the equivalent MLP are zero and many weights are equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters. -The effect of fewer parameters is that less training data is necessary to get suitable +The effect of fewer parameters is that less training data is necessary to get suitable estimations for those. This means a MLP which is able to compute the same functions as a CNN will likely have worse results on the same dataset, if a CNN architecture is suitable for the dataset. See Figure 2.2 for a visualization of the application of a convolutional layer. 3 feature maps -(e.g. RGB) n feature mapsn filters of +(e.g. RGB) n feature mapsn filters of size k × k × 3 width w width wheight h height hneural network @@ -455,11 +455,11 @@ data apply . . .. . . . . . . . . -Figure 2.2.: Application of a single convolutional layer with n filters of size k × k × 3 with stride +Figure 2.2.: Application of a single convolutional layer with n filters of size k × k × 3 with stride s = 1 to input data of size width × height with three channels. -6 + 2.2. CNN Layer Types -A convolutional layer with n filters of size k +A convolutional layer with n filters of size k w × k h and SAME padding after d(i − 1) feature @@ -478,23 +478,23 @@ y ) 2 parameters. One the one hand, this means it can learn less complex decision boundaries. On the other hand, it means fewer parameters have to be learned and hence the optimization procedure needs fewer examples and the optimization ob jective is simpler. -It is particularly interesting to notice that even a convolutional layer of 1 × 1 filters does +It is particularly interesting to notice that even a convolutional layer of 1 × 1 filters does learn a linear combination of the d input feature maps. This can be used for dimensionality -reduction, if there are fewer 1 × 1 filters in a convolutional layer than input feature maps. +reduction, if there are fewer 1 × 1 filters in a convolutional layer than input feature maps. Another insight recently got important: Every fully connected layer has an equivalent convolutional layer which has the same weights. 2 This way, one can use the complete -classification network as a very complex non-linear image filter which can be used for +classification network as a very complex non-linear image filter which can be used for semantic segmentation. A fully connected layer with d ∈ N ≥1 inputs and n ∈ N ≥1 nodes can be interpreted as a -convolutional layer with an input of shape 1 × 1 × d and n filters of size 1 × 1 . This will +convolutional layer with an input of shape 1 × 1 × d and n filters of size 1 × 1 . This will produce an output shape 1 × 1 × n . Every single output is connected to all of the inputs. When a convolutional layer is followed by a fully connected layer, it is necessary to vectorize -to feature maps. If the 1 × 1 convolutional filter layer is applied to the vectorized output, +to feature maps. If the 1 × 1 convolutional filter layer is applied to the vectorized output, it is completely equivalent to a fully connected layer. However, the vectorization can be -omitted if a convolution layer without padding and a filter size equal to the feature maps +omitted if a convolution layer without padding and a filter size equal to the feature maps size is applied. This was used by [LSD15]. 2.2.2. Pooling Layers Pooling summarizes a p × p area of the input feature map. Just like convolutional layers, @@ -507,16 +507,16 @@ in Table 2.1, spatial pyramid pooling as introduced in [ HZRS14] and generalizin functions as introduced in [LGT16]. 2 But convolutional layers only have equivalent fully connected layers if the output feature map is 1 × 1 -7 + 2. Convolutional Neural Networks -Name Definition Used by +Name Definition Used by Max pooling max { a ∈ A } [BPL10, KSH12] Average / mean pooling 1 -| A | +| A | a∈ A a LeNet-5 [LBBH98] and [KSlB + 10] -2 pooling +2 pooling a∈A a 2 [Le13] @@ -545,7 +545,7 @@ Average pooling of p × p areas with stride s can be replaced by a convolutional the input of the pooling layer are d(i −1) feature maps, the convolutional layer has to have d(i −1) - filters of size p × p and stride s . The i th filter has the values + filters of size p × p and stride s . The i th filter has the values    @@ -583,10 +583,10 @@ for the dimension i and the zero matrix  for all other dimensions i = 1, . . . , d ( i −1) . -8 + 2.2. CNN Layer Types 2.2.3. Dropout -Dropout is a technique used to prevent overfitting and co-adaptations of neurons by setting +Dropout is a technique used to prevent overfitting and co-adaptations of neurons by setting the output of any neuron to zero with probabilityp. It was introduced in [HSK+ 12] and is well-described in [SHK + @@ -605,17 +605,17 @@ where is the Hadamard product i,j ( B ) i,j Hence every value of the input gets set to zero with a dropout probability of p. Typically, -Dropout is used with p = 0. 5. Layers closer to the input usually have a lower dropout prob- -ability than later layers. In order to keep the expected output at the same value, the +Dropout is used with p = 0. 5. Layers closer to the input usually have a lower dropout probability + than later layers. In order to keep the expected output at the same value, the output of a dropout layer is multiplied with 1 1− p when dropout is enabled [ Las17, tf-16b]. At inference time, dropout is disabled. Dropout is usually only applied after fully connected layers, but not after convolutional layers as it usually increases the test error as pointed out in [GG16]. -Models which use Dropout can be interpreted as an ensemble of models with different +Models which use Dropout can be interpreted as an ensemble of models with different numbers of neurons in each layer, but also with weight sharing. -Conceptually similar are DropConnect and networks with stochastic depth. DropCon- -nect [ WZZ+ +Conceptually similar are DropConnect and networks with stochastic depth. DropConnect + [ WZZ+ 13] is a generalization of Dropout, which sets weights to zero in contrast to setting the output of a neuron to zero. Networks with stochastic depth as introduced in [HSL+ @@ -628,12 +628,12 @@ parameters of layers close to the output are adapted to some input produced by l those lower layers parameters are also adapted. This leads to the parameters in the upper layers being worse. A very low learning rate has to be chosen to adjust for the fact that the input features might drastically change over time. - 9 + 2. Convolutional Neural Networks One way to approach this problem is by normalizing mini-batches as described in [IS15]. A Batch Normalization layer with d-dimensional input x = (x (1) , . . . , x ( d) - ) is first normalized + ) is first normalized point-wise to ˆx( k ) = x (k ) @@ -645,14 +645,14 @@ s + ε with ¯x (k ) = 1 -m +m m i =1 x (k ) i being the sample mean and s [ x ( k ) ] 2 = 1 -m +m m i =1 ( x (k ) i − ¯x (k ) @@ -675,8 +675,8 @@ In the case of fully connected layers, this is applied to the activation, before is applied. If it is applied after the activation, it harms the training in early stages. For convolution, only one γ and one β is learned per feature map. One important special case is γ (k ) - = -s + = +s [x (k ) ] 2 + ε and β (k ) @@ -697,12 +697,12 @@ The authors of [ IS15] suggest to use Batch Normalization before the activation as in Items 1 and 4. Batch Normalization after the activation lead to better results in https://github. com/ducha- aiki/caffenet-benchmark/blob/master/batchnorm.md Another normalization layer is Local Response Normalization as described in [ KSH12], -which includes +which includes 2 normalization as described in [WWQ13 ]. Those two normalization layers, however, are superseded by Batch Normalization. 3 also called inference time -10 + 2.3. CNN Blocks 2.3. CNN Blocks This section describes more complex building blocks than simple layers. CNN blocks act @@ -738,16 +738,16 @@ x Figure 2.4.: ResNet module Image source: [HZRS15a] [HM16] provides some insights why deep residual networks are successful. - 11 + 2. Convolutional Neural Networks 2.3.2. Aggregation Blocks Two common ways to add more parameters to neural networks are increasing their depth -by adding more layers or increasing their width by adding more neurons / filters. Inception +by adding more layers or increasing their width by adding more neurons / filters. Inception blocks [AM15] implicitly started a new idea which was explicitly described in [XGD + 16] as “ResNeXt block”: Increasing the cardinality C ∈ N ≥1 . By cardinality, the authors describe -the concept of having C small convolutional networks with the same topology but different +the concept of having C small convolutional networks with the same topology but different weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not combine aggregation blocks with residual blocks as the authors did. 256-d in @@ -759,16 +759,16 @@ groups 4 @ 3 × 3 × 4 4 @ 1 × 1 × 256 4 @ 3 × 3 × 4 Figure 2.5.: Aggregation block with a cardinality of C = 32 . Each of the 32 groups is a 2-layer -convolutional network. The first layer receives 256 feature maps and applies four1 × 1 -filters to it. The second layer applies four 3 × 3 filters. Although every group has -the same topology, the learned weights are different. The outputs of the groups are +convolutional network. The first layer receives 256 feature maps and applies four1 × 1 +filters to it. The second layer applies four 3 × 3 filters. Although every group has +the same topology, the learned weights are different. The outputs of the groups are concatenated. The hyperparameters of an aggregation block are: • The topology of the group members. • The cardinality C ∈ N ≥1 . Note that a cardinality of C = 1 is equivalent in every aspect to using the group network without an aggregation block. -12 + 2.3. CNN Blocks 2.3.3. Dense Blocks Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The @@ -776,9 +776,9 @@ idea is to connect each convolutional layer directly to subsequent convolutional Traditional CNNs with L layers and one input layer have L connections between layers, but dense blocks have L( L+1) 2 connections between layers. The input feature maps are -concatenated in depth. According to the authors, this prevents features from being re- -learned and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16 -have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors +concatenated in depth. According to the authors, this prevents features from being relearned + and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16 +have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors used only on the order of 12 feature maps per layer. A dense block is visualized in Figure 2.6. 256 -d in @@ -791,34 +791,34 @@ k -d k -d (256 + L · k )-d out Figure 2.6.: Dense block with L = 2 layers and a growth factor of k . -Dense block have five hyperparameters: +Dense block have five hyperparameters: • The activation function being used. The authors use ReLU. • The size k w × k -h of filters. The authors use k +h of filters. The authors use k w = k h = 3. • The number of layers L, where L = 2 is a simple convolutional layer. -• The number k of filters added per layer (called growth rate in the paper) +• The number k of filters added per layer (called growth rate in the paper) It might be necessary use 1 × 1 convolutions to reduce the number of L · k feature maps. -13 + 2. Convolutional Neural Networks 2.4. Transition Layers Transition layers are used to overcome constraints imposed by resource limitations or architectural design choices. One constraint is the number of feature maps (see Appendix C.3 for details). In order to reduce the number of feature maps while still keeping as much relevant information as possible in the network, a convolutional layer i with k - i filters of + i filters of the shape 1 × 1 × k -i − 1 is added. The number of filters k +i − 1 is added. The number of filters k i directly controls the number of generated feature maps. In order to reduce the dimensionality (width and height) of the feature maps, one typically applies pooling. Global pooling is another type of transition layer. It applies pooling over the complete feature map size to shrink the input to a constant 1 × 1 feature map and hence allows one -network to have different input sizes. -14 +network to have different input sizes. + 2.5. Analysis Techniques 2.5. Analysis Techniques CNNs have dozens of hyperparameters and ways to tune them. Although there are @@ -829,7 +829,7 @@ manual investigation to improve the model’s quality. For this reason, analysis which guide developers and researchers to the important hyperparameters are necessary. In the following, nine diagnostic techniques are explained. A machine learning developer has the following choices to improve the model’s quality: -(I1)Change the problem definition (e.g., the classes which are to be distinguished) +(I1)Change the problem definition (e.g., the classes which are to be distinguished) (I2)Get more training data (I3)Clean the training data (I4)Change the preprocessing (see Appendix B.1) @@ -837,53 +837,53 @@ A machine learning developer has the following choices to improve the model’s (I6)Change the training setup (see Appendices B.3 to B.5) (I7)Change the model (see Appendices B.6 and B.7) The preprocessing is usually not changed in modern architectures. However, this still leaves -six very different ways to improve the classifier. Changing the training setup and the model +six very different ways to improve the classifier. Changing the training setup and the model each have too many possible choices to explore them completely. Thus, techniques are necessary to guide the developer to changes which are most promising to improve the model. For all of the following methods, it is important to use only the training set and the validation set. 2.5.1. Qualitative Analysis by Example The most basic analysis technique which should always be used is looking at examples -which the network correctly predicted with a high certainty and what the classifier got +which the network correctly predicted with a high certainty and what the classifier got wrong with a high certainty. Those examples can be arranged by applying t-SNE [MH08]. One the one hand, this might reveal errors in the training data. Most of the time, training -data is manually labeled by humans who make mistakes. If a model is fit to those errors, +data is manually labeled by humans who make mistakes. If a model is fit to those errors, its quality decreases. -On the other hand, this can show differences in the distribution of validation data which +On the other hand, this can show differences in the distribution of validation data which are not covered by the training set and thus indicate the need to collect more data. - 15 + 2. Convolutional Neural Networks 2.5.2. Confusion Matrices A confusion matrix is a matrix ( c) ij ∈ N K ×K ≥ 0 , where K ∈ N ≥ 2 is the number of classes, -which contains all correct and wrong classifications. The item c +which contains all correct and wrong classifications. The item c ij is the number of times -items of class i were classified as class j . This means the correct classification is on the +items of class i were classified as class j . This means the correct classification is on the diagonal c -ii and all wrong classifications are of the diagonal. The sum +ii and all wrong classifications are of the diagonal. The sum K -i =1 +i =1 K j =1 c ij is the -total number of samples which were evaluated and +total number of samples which were evaluated and i =1 c ii K -i=1 +i=1 K j =1 c ij is the accuracy. -The sums r ( i ) = +The sums r ( i ) = K j =1 c ij of each class i are worth being investigated as they show if the classes are skewed. If the number of samples of one class dominates the data set, then the -classifier can get a high accuracy by simply always prediction the most common class. If -the accuracy of the classifier is close to the a priory probability of the most common class, +classifier can get a high accuracy by simply always prediction the most common class. If +the accuracy of the classifier is close to the a priory probability of the most common class, techniques to deal with skewed classes might help. An automatic criterion to check for this problem is accuracy ≤ max({ r (i ) | i = 1, . . . , k } ) @@ -893,7 +893,7 @@ i =1 r (i ) + ε where ε is a small value to compensate the fact that some examples might be correct just by chance. Other values which should be checked are the class-wise sensitivities: -s (k ) = # correctly identified instances of class k +s (k ) = # correctly identified instances of class k # instances of class k = c kk r (k ) ∈ [0, 1] @@ -913,7 +913,7 @@ j =1 c k 1 j indicates if class k -1 gets often classified as class k +1 gets often classified as class k 2 . The highest values here can indicate if two classes should be merged or a specialized model for separating those classes could improve the overall system. @@ -923,10 +923,10 @@ axis and a quality metric on the vertical axis. Accuracy, error = (1 − accurac typical quality metrics. Other quality metrics can be found in [OHIL16]. In case that the number of training epochs are used as the examined hyperparameter, validation curves give an indicator if training longer improves the model’s performance. By -16 + 2.5. Analysis Techniques plotting the error on the training set as well as the error on a validation set, one can also -estimate if overfitting might become a problem. See Figure 2.7 for an example. +estimate if overfitting might become a problem. See Figure 2.7 for an example. 10 20 30 40 50 60 70 80 90 1000 .20 .40 .60 .8 overfitting EpochsError @@ -934,23 +934,23 @@ estimate if overfitting might become a problem. See Figure 2.7 for an example. Validation set Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs and the quality metric is the error (1 − accuracy ) . The longer the network is trained, -the better it gets on the training set. At some point the network is fit too well to the +the better it gets on the training set. At some point the network is fit too well to the training data and loses its capability to generalize. At this point the quality curve of -the training set and the validation set diverge. While the classifier is still improving on +the training set and the validation set diverge. While the classifier is still improving on the training set, it gets worse on the validation and the test set. -When the epoch-loss validation curve has plateaus as in Figure 2.8, this means the opti- -mization process did not improve for several epochs. Three possible ways to reduce the +When the epoch-loss validation curve has plateaus as in Figure 2.8, this means the optimization + process did not improve for several epochs. Three possible ways to reduce the problem of plateaus are(i)to change weight initialization if the plateau was at the beginning, (ii)regularizing the model or(iii)changing the optimization algorithm. Loss functions The loss function (also called error function or cost function ) is a function which assigns a -real value to a complex event like the predicted class of a feature vector. It is used to define -the objective function. For classification problems the loss function is typically cross-entropy -with - 1 or +real value to a complex event like the predicted class of a feature vector. It is used to define +the objective function. For classification problems the loss function is typically cross-entropy +with + 1 or 2 regularization, as it was described in [NH92]: E -C E (W ) = − +C E (W ) = − x ∈ X K k =1 [ tx @@ -959,34 +959,34 @@ k ) + (1 − tx k ) log(1 − o x k )] - + cross-entropy data loss + λ -1 · +1 · 1 - + w ∈W |w | +λ - 2 · + 2 · 2 - + w ∈ W w 2 - + model complexity loss where W are the weights, X is the training data set, K ∈ N ≥ 0 is the number of classes and tx k indicates if the training example x is of class k . o x -k is the output of the classification +k is the output of the classification algorithm which depends on the weights. λ 1 , λ 2 ∈ [0, ∞) weights the regularization and is typically smaller than 0 .1 . - 17 + 2. Convolutional Neural Networks Figure 2.8.: Example for a validation curve (plotted loss function) with plateaus. The dark orange curve is smoothed, but the non-smoothed curve is also plotted in light orange. -The data loss is positive whenever the classification is not correct, whereas the model +The data loss is positive whenever the classification is not correct, whereas the model complexity loss is higher for more complex models. The model complexity loss exists due to the intuition of Occam’s razor: If two models explain the same data with an accuracy of 100 %, the simpler model is to be preferred. @@ -1003,23 +1003,23 @@ The optimization process might also be stuck in a local minimum. Loss being NAN might be due to too high learning rates. Another reason is division by zero or taking the logarithm of zero. In both cases, adding a small constant like 10 −7 - fixes the problem. -• If the loss-epoch validation curve has a plateau at the beginning, the weight initializa- -tion might be bad. -18 + fixes the problem. +• If the loss-epoch validation curve has a plateau at the beginning, the weight initialization + might be bad. + 2.5. Analysis Techniques Quality criteria -There are several quality criteria for classification models. Most quality criteria are based +There are several quality criteria for classification models. Most quality criteria are based the confusion matrix c which denotes at c ij the number of times the real class was i and j was predicted. This means the diagonal contains the number of correct predictions. For the following, let t - i = + i = k j =1 c ij be the number of training samples for class i. The most common quality criterion is accuracy: -accuracy( c) = +accuracy( c) = k i =1 c ii @@ -1030,7 +1030,7 @@ i ∈ [0, 1] One problem of accuracy as a quality criterion are skewed classes. If one class is by far more common than all other classes, then the simplest way to achieve a high score is to always classify everything as the most common class. -In order to fix this problem, one can use the mean accuracy: +In order to fix this problem, one can use the mean accuracy: mean-accuracy( c) = 1 k · k @@ -1041,7 +1041,7 @@ t For two-class problems there are many other metrics like precision, recall and F β -score. Quality criteria for semantic segmentation are explained in [Tho16]. -Besides the quality of the classification result, several other quality criteria are important +Besides the quality of the classification result, several other quality criteria are important in practice: • Speed of evaluation for new images, • latency, @@ -1054,23 +1054,23 @@ PMW + • robustness against (non)random perturbations in the training labels (see [ NDRT13 , XXE12]), • model size -As reducing the floating point accuracy allows to process more data on a given device [Har15], +As reducing the floating point accuracy allows to process more data on a given device [Har15], analysis under this aspect is also highly relevant in some scenarios. -However, the following focuses on the quality of the classification result. - 19 +However, the following focuses on the quality of the classification result. + 2. Convolutional Neural Networks 2.5.4. Learning Curves A learning curve is a plot where the horizontal axis displays the number of training samples given to the network and the vertical axis displays the error. Two curves are plotted: The error on the training set (of which the size is given by the horizontal axis) and the error on -the test set (which is of fixed size). See Figure 2.9 for an example. The learning curve for the +the test set (which is of fixed size). See Figure 2.9 for an example. The learning curve for the validation set is an indicator if more training data without any other changes will improve the networks performance. Having the training set’s learning curve, it is possible to estimate -if the capacity of the model to fit the data is high enough for the desired classification error. -The error on the validation set should never be expected to be significantly lower than the +if the capacity of the model to fit the data is high enough for the desired classification error. +The error on the validation set should never be expected to be significantly lower than the error on the training set. If the error on the training set is too high, then more data will not help. Instead, the model or the training algorithm need to be adjusted. -If the training set’s learning curve is significantly higher than the validation set’s learning +If the training set’s learning curve is significantly higher than the validation set’s learning curve, then removing features (e.g., by decreasing the images resolution), more training samples or more regularization will help. 10 20 30 40 50 60 70 80 90 1000 .20 .40 .6 @@ -1080,17 +1080,17 @@ samples or more regularization will help. Validation set Training set Figure 2.9.: A typical learning curve: The more data is used for training, the more errors a given -architecture will make to fit the given training data. At the same time, it is expected +architecture will make to fit the given training data. At the same time, it is expected that the training data gets more similar to the true distribution of the data which should be captured by the test data. At some point, the error on the training and test set should be about the same. The term “avoidable bias” was coined by Andrew Ng [ Ng16]. In some cases it is not possible to classify data correctly by the given features. If humans can classify the data given the features correctly, however, then -the bias is avoidable by building a better classifier. +the bias is avoidable by building a better classifier. The ma jor drawback of this analysis technique is its computational intensity. In order to get one point on the training curve and one point on the testing curve, a complete training has to be executed. On the full data set, this can be several days on high-end computers. -20 + 2.5. Analysis Techniques 2.5.5. Input-feature based model explanations Understanding which clues the model took to come to its prediction is crucial to check if @@ -1098,7 +1098,7 @@ the model actually learns what the developer thinks it learns. For example, a mo has to distinguish sled dogs from Chihuahuas might simply look at the background and check if there is snow. Depending on the training and test data, this works exceptionally well. However, it is not the desired solution. -For classification problems in computer vision, there are two types of visualizations which +For classification problems in computer vision, there are two types of visualizations which help to diagnose such problems. Both color superpixels of the original image to convey information how the model used those superpixels: • Correct class heatmap : The probability of the correct class is encoded to give a @@ -1106,13 +1106,13 @@ heat map which superpixels are important for the correct class. This can also be by setting the opacity accordingly. • Most-likely class image : Each of the most likely classes for all superpixels is -represented by a color. The colored image thus gives clues why different predictions +represented by a color. The colored image thus gives clues why different predictions were assigned a high probability. Two methods to generate such images are explained in the following. Occlusion Sensitivity Analysis Occlusion sensitivity analysis is described in [ ZF14]. The idea is to occlude a part of the image by something. This could be a gray square as in [ ZF14] or a black superpixel as -in [RSG16]. Then the classifier is run on the image again. This is done for each region (e.g., +in [RSG16]. Then the classifier is run on the image again. This is done for each region (e.g., superpixel or position of the square) and the regions are then colored to generate either a correct class heatmap of the most-likely class image. It is important to note that the color at region r @@ -1122,37 +1122,37 @@ Both visualizations are shown in Figure 2.10. One can see that the network makes predictions for this image of the class “Pomeranian”. However, the image of the class “Afghan Hound” gets confused with “Ice lolly”, which is a sign that this needs further investigation. Gradient-based approaches -In [ SVZ13], a gradient-based approach was used to generate image-specific class saliency +In [ SVZ13], a gradient-based approach was used to generate image-specific class saliency maps . The authors describe the problem as a ranking problem, where each pixel of the image I 0 is assigned a score S c (I 0 ) for a class c of interest. CNNs are non-linear functions, -but they can be approximated by the first order Taylor expansion S +but they can be approximated by the first order Taylor expansion S c (I ) ≈ w T I + b where w is the derivative of S c at I 0 . - 21 + 2. Convolutional Neural Networks 2.5.6. Argmax Method The argmax method has two variants: • Fixed class argmax : Propagate all elements of a given class through the network and analyze which neurons are activated most often / have the highest activation. -• Fixed neuron argmax: Propagate the data through the network and find the n +• Fixed neuron argmax: Propagate the data through the network and find the n data elements which cause the highest activation for a given neuron. -Note that a “neuron” is a filter in a CNN. The amount of activation of a filter F by an +Note that a “neuron” is a filter in a CNN. The amount of activation of a filter F by an image I is calculated by applying F to I and calculating the element-wise sum of the result. Fixed-neuron argmax was applied in [ZF14]. However, they did not stop with that. Besides showing the 9 images which caused the highest activation, they also trained a deconvolutional -neural network to pro ject the activation of the filter back into pixel space. -The fixed neuron argmax can be used qualitatively to get an impression of the kind of +neural network to pro ject the activation of the filter back into pixel space. +The fixed neuron argmax can be used qualitatively to get an impression of the kind of features which are learned. This is useful to diagnose problems, for example in [AM15] it is described that the network recognized the class “dumbbell” only if a hand was present, too. Fixed neuron argmax can also be used quantitatively to estimate the amount of parameters being shared between classes or how many parameters are mainly assigned to which classes. -Going one step further from the fixed neuron argmax method is using an optimization +Going one step further from the fixed neuron argmax method is using an optimization algorithm to change an initial image minimally in such a way that any desired class gets predicted. This is called caricaturization in [MV16]. 2.5.7. Feature Map Reconstructions @@ -1161,18 +1161,18 @@ into the learned features. This shows what the network emphasizes. However, it i necessarily the case that the feature maps allow direct and easy conclusions about the learned features. This technique is called inversion in [MV16]. A key idea of feature map visualizations is to reconstruct a layers input, given its activation. -This makes it possible find which inputs would cause neurons to activate with extremely +This makes it possible find which inputs would cause neurons to activate with extremely high or low values. More recent work like [ NYC16] tries to make the reconstructions appearance look more natural. -22 + 2.5. Analysis Techniques 2.5.8. Filter comparison One question which might lead to some insight is how robust the features are which -are learned. If the same network is trained with the same data, but different weight +are learned. If the same network is trained with the same data, but different weight initializations, the learned weights should still be comparable. -If the set of learned filters changes with initialization, this might be an indicator for too -little capacity of that layer. Hence adding more filters to that layer could improve the +If the set of learned filters changes with initialization, this might be an indicator for too +little capacity of that layer. Hence adding more filters to that layer could improve the performance. Filters can be compared with the k -translation correlation as introduced in [ZCZL16]: ρ @@ -1185,22 +1185,22 @@ k (W j , x, y ) f W -i +i 2 W - j + j 2 ∈ [−1 , 1], -where T (·, x, y ) denotes the translation of the first operand by (x, y ), with zero padding at +where T (·, x, y ) denotes the translation of the first operand by (x, y ), with zero padding at the borders to keep the shape. ·, · -f denotes the flattened inner product, where the two -operands are flattened into column vectors before applying the standard inner product. The -closer the absolute value of the k -translation correlation to one, the more similar two filters +f denotes the flattened inner product, where the two +operands are flattened into column vectors before applying the standard inner product. The +closer the absolute value of the k -translation correlation to one, the more similar two filters W i , W j are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and -VGG-16 (see Appendix D.3) have many filters which are highly correlated. They found +VGG-16 (see Appendix D.3) have many filters which are highly correlated. They found this by comparing the averaged maximum k -translational correlation of the networks with -Gaussian-distributed initialized filters. The averaged maximum k -translational correlation -is defined as +Gaussian-distributed initialized filters. The averaged maximum k -translational correlation +is defined as ¯ρ k (W ) = 1 N N @@ -1211,8 +1211,8 @@ j =1,j = i ρ k (W i , W j ) -where N is the number of filters in the layer W and W - i denotes the ith filter. +where N is the number of filters in the layer W and W + i denotes the ith filter. 2.5.9. Weight update tracking Andrej Karpathy proposed in the 5th lecture of CS231n to track weight updates to check if the learning rate is well-chosen. He suggests that the weight update should be in the order @@ -1222,44 +1222,44 @@ weight update is too low, then the learning rate has to be increased. The order of the weight updates as well as possible implications highly depend on the model and the training algorithm. See Appendix B.5 for a short overview of training algorithms for neural networks. - 23 + 2. Convolutional Neural Networks 2.6. Accuracy boosting techniques There are techniques which can almost always be applied to improve accuracy of CNN -classifiers: +classifiers: • Ensembles [CMS12] • Training-time augmentation (see Appendix B.2) • Test-time transformations [DDFK16, How13, HZRS15b] -• Pre-training and fine-tuning [ZDGD14, GDDM14] +• Pre-training and fine-tuning [ZDGD14, GDDM14] One of the most simple ensemble techniques which was introduced in [CMS12] is averaging -the prediction of n classifiers. This improves the accuracy even if the classifiers use exactly +the prediction of n classifiers. This improves the accuracy even if the classifiers use exactly the same training setup by reducing variance. Data augmentation techniques give the optimizer the possibility to take invariances like -rotation into account by generating artificial training samples from real training samples. +rotation into account by generating artificial training samples from real training samples. Data augmentation hence reduces bias and variance with no cost at inference time. -Data augmentation at inference time reduces the variance of the classifier. Similar to using +Data augmentation at inference time reduces the variance of the classifier. Similar to using an ensemble, it increases the computational cost of inference. -Pretraining the classifier on another dataset to obtain start from a good position or finetuning +Pretraining the classifier on another dataset to obtain start from a good position or finetuning a model which was originally created for another task is also a common technique. -24 + 2.6. Accuracy boosting techniques Figure 2.10.: Occlusion sensitivity analysis by [ZF14]: The left column shows three example images, where a gray square occluded a part of the image. This gray squares center(x, y ) was -moved over the complete image and the classifier was run on each of the occluded +moved over the complete image and the classifier was run on each of the occluded images. The probability of the correct class, depending on the gray squares position, is showed in the middle column. One can see that the predicted probability of the correct class “Pomeranian” drops if the face of the dog is occluded. The last image gives the class with the highest predicted probability. In the case of the Pomeranian, it always predicts the correct class if the head is visible. However, if the head of the dog is occluded, it predicts other classes. - 25 + 2. Convolutional Neural Networks -Figure 2.11.: Filter visualization from [ ZF14]: The filters themselves as well as the input feature +Figure 2.11.: Filter visualization from [ ZF14]: The filters themselves as well as the input feature maps which caused the highest activation are displayed. -26 + 3. Topology Learning The topology of a neural network is crucial for the number of parameters, the number -of floating point operations (FLOPs), the required memory, as well as the features being +of floating point operations (FLOPs), the required memory, as well as the features being learned. The choice of the topology, however, is still mainly done by trial-and-error. This chapter introduces three general approaches to automatic topology learning: Growing a networks from a minimal network in Section 3.1, pruning in Section 3.2, genetic approaches @@ -1269,37 +1269,37 @@ Growing approaches for topology learning start with a minimal network, which onl the necessary number of input nodes and the number of output nodes which are determined by the application and the features of the input. They then apply a criterion to insert new layers / neurons into the network. -In the following, Cascade-Correlation, Meiosis Networks and Automatic Structure Opti- -mization are introduced. +In the following, Cascade-Correlation, Meiosis Networks and Automatic Structure Optimization + are introduced. 3.1.1. Cascade-Correlation Cascade-Correlation was introduced in [FL89]. It generates a cascading architecture which is similar to dense block described in Section 2.3.3. Cascade-Correlation works as follows: 1. Initialization: The number of input nodes and the number of output nodes are -defined by the problem. Create a minimal, fully connected network for those. +defined by the problem. Create a minimal, fully connected network for those. 2. Training : Train the network until the error no longer decreases. -3. Candidate Generation : Generate candidate nodes. Each candidate node is con- -nected to all inputs. They are not connected to other candidate nodes and not +3. Candidate Generation : Generate candidate nodes. Each candidate node is connected + to all inputs. They are not connected to other candidate nodes and not connected to the output nodes. - 27 + 3. Topology Learning 4. Correlation Maximization: Train the weights of the candidates by maximizingS , the correlation between candidates output value V with the networks residual error: -S = -o ∈ O +S = +o ∈ O - -p ∈ T + +p ∈ T V p − ¯ -V +V (E p,o − ¯ E -o ) +o ) @@ -1313,7 +1313,7 @@ pattern p . ¯ V and ¯ E -o are averaged values over all elements of T . This step is finished +o are averaged values over all elements of T . This step is finished when the correlation no longer increases. 5. Candidate selection : Keep the candidate node with the highest correlation, freeze its incoming weights and add connections to the output nodes. @@ -1326,14 +1326,14 @@ right corner. The black squares represent frozen weights which are found by corr maximization whereas the white squares are trainable weights. 3.1.2. Meiosis Networks Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, where -weights are deterministic and fixed at prediction time, each weightw +weights are deterministic and fixed at prediction time, each weightw ij in Meiosis networks follows a normal distribution: w ij ∼ N (µ ij , σ 2 ij ) -28 + 3.2. Pruning approaches Hence every connection has two learned parameters: µ ij and σ 2 @@ -1341,12 +1341,12 @@ ij . The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell division. A node j is splitted, when the random part dominates the value of the sampled weights: - + i σ ij i µ - ij > 1 and + ij > 1 and k σ jk @@ -1357,16 +1357,16 @@ to the new connections. Hence Meiosis networks only change the number of neurons per layer. They do not add layers or add skip connections. 3.1.3. Automatic Structure Optimization -Automatic Structure Optimization (ASO) was introduced in [ BM93] for the task of on- -line handwriting recognition. It makes use of the confusion matrix C = ( c +Automatic Structure Optimization (ASO) was introduced in [ BM93] for the task of online + handwriting recognition. It makes use of the confusion matrix C = ( c ij ) ∈ Nk × k ≥ 0 -(see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix +(see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix S with s i j = s j i = c ij · c -ji . The maximum of S defines where the ASO algorithm adds +ji . The maximum of S defines where the ASO algorithm adds more parameters. The details how the resources are added are not transferable to CNNs. 3.2. Pruning approaches Pruning approaches start with a network which is bigger than necessary and prune it. The @@ -1382,8 +1382,8 @@ This procedure can be repeated. One family of pruning criterions uses the Hessian matrix . For example, Optimal Brain Damage (OBD) as introduced in [LDS+ 89]. For every single parameter k , OBD calculates -the effect on the ob jective function of deletingk . The authors call the effect of the deletion -29 +the effect on the ob jective function of deletingk . The authors call the effect of the deletion + 3. Topology Learning of parameter k the saliency s k . The parameters with the lowest saliency are deleted, which @@ -1413,17 +1413,17 @@ about 92 % accuracy [ TF-16a] and state of the art is 99. 79 % [ WZZ+ algorithm achieves only 23.9 % accuracy [VH13]. Kocmánek shows in [ Koc15] that HyperNEAT approaches can achieve 96 .47 % accuracy on MNIST. Kocmánek mentions that HyperNEAT becomes slower with each hidden layer -so that not more than three hidden layers could be trained. At the same time, VGG- -19 [SZ14] already has 19 hidden layers and ResNets are successfully trained with 1202 layers +so that not more than three hidden layers could be trained. At the same time, VGG19 + [SZ14] already has 19 hidden layers and ResNets are successfully trained with 1202 layers in [HZRS15a]. [ LX17] shows that Genetic algorithms can achieve competitive results on MNIST and SVHN, but the best results on CIFAR-10 were 7.10 % error whereas the state of the art is at 3.74 % [HLW16]. Similarly, the Genetic algorithm achieves29.03 % error on CIFAR-100, but the state of the art is 17.18 % [HLW16]. 3.4. Reinforcement Learning -Reinforcement learning is a sub-field of machine learning, which focuses on the question +Reinforcement learning is a sub-field of machine learning, which focuses on the question how to choose actions that lead to high rewards. -30 + 3.5. Convolutional Neural Fabrics One can think of the search for good neural network topologies as a reinforcement learning problem. The agent is a recurrent neural network which can generate bitstrings. Those @@ -1435,68 +1435,68 @@ for CIFAR-10 and the Penn Treebank dataset. A drawback of this method is that en amounts of computational resources were used to obtain those results. 3.5. Convolutional Neural Fabrics Convolutional Neural Fabrics are introduced in [ SV16]. They side-step hard decisions -about topologies by learning an ensemble of different CNN architectures. The idea is to -define a single architecture as a trellis through a 3D grid of nodes. Each node represents a +about topologies by learning an ensemble of different CNN architectures. The idea is to +define a single architecture as a trellis through a 3D grid of nodes. Each node represents a convolutional layer. One dimension is the index of the layer, the other two dimensions are -the amount of filters and the feature size. Each node is connected to nine other nodes and +the amount of filters and the feature size. Each node is connected to nine other nodes and thus represents nine possible choices of convolutional layers: • Resolution :(i)convolution with stride=1 or(ii)convolution with stride=2 or (iii)deconvolution (doubling the resolution) -• Channels:(i)half the number of filters than the layer before(ii)the same number -of filters as the layer before(iii)double the number of filters than the layer before -They always use ReLU as an activation function and they always use filters of size 3 × 3 . +• Channels:(i)half the number of filters than the layer before(ii)the same number +of filters as the layer before(iii)double the number of filters than the layer before +They always use ReLU as an activation function and they always use filters of size 3 × 3 . They don’t use pooling at all. - 31 + 3. Topology Learning -32 -4. Hierarchical Classification -Designing a classifier for a new dataset is hard for two main reasons: Many design choices are + +4. Hierarchical Classification +Designing a classifier for a new dataset is hard for two main reasons: Many design choices are not clearly superior to others and evaluating one design choice takes much time. Especially CNNs are known to take several days [ KSH12, SLJ + 15] or even weeks [ SZ14] to train. Additionally, some methods for analyzing a dataset become harder to use with more classes and more training samples. Examples are t-SNE, the manual inspection of errors and confusion matrices, and the argmax method. -One idea to approach this problem is by building a hierarchy of classifiers. The root -classifier distinguishes clusters of classes, whereas the leaf classifiers distinguish single -classes. Figure 4.1 gives an example for an hierarchy of classifiers. -Figure 4.1.: Example for a hierarchy of classifiers. Each classifier is visualized by a rounded rectangle. -The root classifier C +One idea to approach this problem is by building a hierarchy of classifiers. The root +classifier distinguishes clusters of classes, whereas the leaf classifiers distinguish single +classes. Figure 4.1 gives an example for an hierarchy of classifiers. +Figure 4.1.: Example for a hierarchy of classifiers. Each classifier is visualized by a rounded rectangle. +The root classifier C 0 has to distinguish six coarse classes (pedestrian, four+ -wheelers, -traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C +traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C 0 predicts a -pedestrian, another classifier has to predict if it is an adult or a child. Similar, if C +pedestrian, another classifier has to predict if it is an adult or a child. Similar, if C 0 -predicts traffic sign, then another classifier has to predict if it is a speed limit, a +predicts traffic sign, then another classifier has to predict if it is a speed limit, a sign indicating danger or something else. If C 0 , however, predicts road, then no other -classifier will become active. +classifier will become active. In this example, the problem has 17 classes. The hierarchical approach introduces -7 clusters of classes and thus uses 8 classifiers. -Such a hierarchy of classifiers needs clusters of classes. - 33 -4. Hierarchical Classification -4.1. Advantages of classifier hierarchies -Having a classifier hierarchy has five advantages: -• Division of labor : Different teams can work together. Instead of having a monolithic +7 clusters of classes and thus uses 8 classifiers. +Such a hierarchy of classifiers needs clusters of classes. + +4. Hierarchical Classification +4.1. Advantages of classifier hierarchies +Having a classifier hierarchy has five advantages: +• Division of labor : Different teams can work together. Instead of having a monolithic task, the solutions can be combined. -• Guarantees : Changing a classifier will only change the prediction of itself and its -children. Siblings are not affected. In the example from Figure 4.1, the classifier -which distinguishes traffic signs can be changed while the classification aspedestrian , +• Guarantees : Changing a classifier will only change the prediction of itself and its +children. Siblings are not affected. In the example from Figure 4.1, the classifier +which distinguishes traffic signs can be changed while the classification aspedestrian , four + - -wheelers , traffic sign , street , other will not be affected. Also, the -classification between speed limits, danger signs and other signs will not change. -• Faster training : Except for the root classifier C - 0 , each other classifier will have + -wheelers , traffic sign , street , other will not be affected. Also, the +classification between speed limits, danger signs and other signs will not change. +• Faster training : Except for the root classifier C + 0 , each other classifier will have less than the total amount of training data. Depending on the combined classes, the models could also be simpler. Hence the training time is reduced. • Weighting of errors: In practice, some errors are more severe than others. For -example, it could be acceptable if the two-wheelers classifier has an error rate of -40 %. But it is not acceptable if the speed limit classifier has such a high error rate. +example, it could be acceptable if the two-wheelers classifier has an error rate of +40 %. But it is not acceptable if the speed limit classifier has such a high error rate. • Post-hoc explanations: The simpler a model is, the easier it is to explain why a -classification is made the way it is made. +classification is made the way it is made. 4.2. Clustering classes There are two ways to cluster classes: By similarity or by semantics. While semantic clustering needs either additional information or manual work, the similarity can be @@ -1507,7 +1507,7 @@ are semantically and visually more similar to each other than to non-dogs. An ex where this is obviously not the case are symbols: The summation symbol \sum is identical in appearance to the Greek letter \Sigma , but semantically much closer to the addition operator + . -One approach to cluster classes by similarity is to train a classifier and examine its +One approach to cluster classes by similarity is to train a classifier and examine its predictions. Each class is represented in the confusion matrix by one row. Those rows can be directly with standard clustering algorithms such as k -means, DBSCAN [EKS+ 96], @@ -1518,10 +1518,10 @@ them do not allow a human to improve the found clustering manually. The confusion matrix ( c) ij ∈ N k × k states how often class i was present and class j was -34 + 4.2. Clustering classes predicted. The more often this confusion happens, the more similar those two classes are to -the classifier. Based on the confusion matrix, the classes can be clustered as explained in +the classifier. Based on the confusion matrix, the classes can be clustered as explained in the following. [ HAE16] indicates that more classes make it easier to generalize, but the accuracy gains diminish after a critical point of classes is reached. Hence a binary tree might not be a @@ -1531,7 +1531,7 @@ The proposed algorithm has two main ideas: • The order of columns and rows in the confusion matrix is arbitrary. This means one can swap rows and columns. If row i and j are swapped, then the columns i and j have to be swapped to in order to keep the same confusion matrix. -• If two classes are confused often, then they are similar to the classifier. +• If two classes are confused often, then they are similar to the classifier. Hence the order of the classes is permutated in such a way that the highest errors are close to the diagonal. One possible ob jective function to be minimized is f (C ) = n @@ -1543,8 +1543,8 @@ j =1 C which punishes errors linearly with the distance to the diagonal. This method is called CMO in the following. As pointed out by Tobias Ribizel (personal communication), this optimization problem -is a weighted version of Optimal Linear Arrangement problem . That problem is NP- -complete [ GJ02, GJS76]. Simulated Annealing as described in Algorithm 1, however, +is a weighted version of Optimal Linear Arrangement problem . That problem is NPcomplete + [ GJ02, GJS76]. Simulated Annealing as described in Algorithm 1, however, produces reasonable clusterings as well as visually appealing confusion matrices. The algorithm works as follows: First, decide with probability 0.5 if only two random rows are swapped or a block is swapped. If two rows are swapped, choose both of them randomly. @@ -1556,12 +1556,12 @@ ImageNet, it can swap the dog-class Silky Terrier to the dog-class Yorkshire ter and both dog classes Dalmatian and Greyhound next to each other. Both the two clusters of dog breeds could be separated by car and bus due to random chance. Moving any single class increases the score, but moving either one of the dog breed clusters or the vehicle -cluster decreases the score. Hence it is beneficial to implement block moving. +cluster decreases the score. Hence it is beneficial to implement block moving. One advantage of permutating the classes in order to minimize Equation (4.1) in comparison to spectral clustering as used in [ XZY+ 14] is that the adjusted confusion matrix can be -35 -4. Hierarchical Classification + +4. Hierarchical Classification split into many much smaller matrices along the diagonal. In the case of many classes (e.g., 1000 classes of ImageNet or 369 classes of HASYv2) this permutation makes it possible to visualize the types of errors made. If the errors are systematic due to visual similarity, many @@ -1570,33 +1570,33 @@ Those will be moved to the corners of the confusion matrix by optimizing Equatio Once a permutation of the classes is found which has a low score Equation (4.1), the clusters can either be made by hand by deciding why classes should not be in one clusters. With such a permutation, only n − 1 binary decisions have to be made and hence only the list of -classes has to be read. Alternatively, one can calculate the confusions C -i,i +1 + C +classes has to be read. Alternatively, one can calculate the confusions C +i,i +1 + C i +1,i for each pair of classes which are neighbors in the confusion matrix. The higher this value, the -more similar are the classes according to the classifier. Hence a thresholdθ can be applied. +more similar are the classes according to the classifier. Hence a thresholdθ can be applied. θ can either be set automatically (e.g., such that 10 % of all pairs are above the threshold) or semi-automatically by asking the user for information if two classes belong to the same cluster. Such an approach only needs log (n ) binary decisions from the user where n is the number of classes. -Please note that CMO only works if the classifier is neither too bad nor too good. A classifier +Please note that CMO only works if the classifier is neither too bad nor too good. A classifier which does not solve the task at all might just give almost uniform predictions whereas the -confusion matrix of an extremely good classifier is almost diagonal and thus contains no +confusion matrix of an extremely good classifier is almost diagonal and thus contains no information about the similarity of classes. One possible solution to this problem is to take -the prediction of the class in contrast to using only the argmax in order to find a useful +the prediction of the class in contrast to using only the argmax in order to find a useful permutation. -36 + 5. Experimental Evaluation -All experiments are implemented using Keras 2.0 [ Cho15] with Tensorflow 1.0 [ AAB+ +All experiments are implemented using Keras 2.0 [ Cho15] with Tensorflow 1.0 [ AAB+ 16] and cuDNN 5.1 [CWV + - 14] as the backend. The experiments were run on different machines -with different Nvidia graphics processing units (GPUs), including the Titan Black, GeForce + 14] as the backend. The experiments were run on different machines +with different Nvidia graphics processing units (GPUs), including the Titan Black, GeForce GTX 970 and GeForce 940MX. The GTSRB [SSSI12], SVHN [NWC + 11b], CIFAR-10 and CIFAR-100 [Kri], MNIST [YL98], HASYv2 [Tho17a], STL-10 [CLN10] dataset are used for the evaluation. Those datasets are -used as their size is small enough to be trained within a day. Other classification datasets +used as their size is small enough to be trained within a day. Other classification datasets which were considered are listed in Appendix E. CIFAR-10 (Canadian Institute for Advanced Research 10) is a 10-class dataset of color images of the size 32 px × 32 px. Its ten classes are airplane, automobile, bird, cat, deer, @@ -1606,7 +1606,7 @@ CIFAR-100 is a 100-class dataset of color images of the size32 px × 32 px. Its are grouped to 20 superclasses. It includes animals, people, plants, outdoor scenes, vehicles and other items. CIFAR-100 is not a superset of CIFAR-10, as CIFAR-100 does not contain the class airplane . The state of the art achieves an accuracy of 82.82 % [HLW16]. -GTSRB (German Traffic Sign Recognition Benchmark) is a 43-class dataset of traffic signs. +GTSRB (German Traffic Sign Recognition Benchmark) is a 43-class dataset of traffic signs. The 51 839 images are in color and of a minimum size of25 px × 25 px up to 266 px × 232 px. The state of the art achieves 99.46 % accuracy with an ensemble of 25 CNNs [ SL11]. According to [SSSI], human performance is at 98.84 %. @@ -1622,7 +1622,7 @@ for unsupervised training and 500 images per class for supervised training. SVHN (Street View House Numbers) exists in two formats. For the following experiments, the cropped digit format was used. It contains the 10 digits cropped from photos of Google Street View. The images are in color and of size 32 px × 32 px. The state of the art -37 + 5. Experimental Evaluation achieves an accuracy of 98. 41 % [ HLW16]. According to [ NWC + 11a], human performance @@ -1635,7 +1635,7 @@ The baseline model is trained with Adam [KB14], an initial learning rate of 10 size of 64 for at most 1000 epochs with data augmentation. The kind of data augmentation depends on the dataset: • CIFAR-10 , CIFAR-100 and STL-10: Random width and height shift by at most -±3 pixels in either direction; Random horizontal flip. +±3 pixels in either direction; Random horizontal flip. • GTSRB , MNIST: Random width and height shift by at most ±5 pixels in either direction; random rotation by at most ±15 degrees; random channel shift; random @@ -1643,8 +1643,8 @@ zoom in [0.5 , 1 .5]; random shear by at most 6 degrees. • HASYv2: Random width and height shift by at most ±5 pixels in either direction; random rotation by at most ±5 degree. • SVHN: No data augmentation. -If the dataset does not define a training/test set, a stratified67 % / 33 % split is applied. If -the dataset does not define a validation set, the training set is split in a stratified manner +If the dataset does not define a training/test set, a stratified67 % / 33 % split is applied. If +the dataset does not define a validation set, the training set is split in a stratified manner into 90 % training set / 10 % test set. Early stopping [Pre98] with the validation accuracy as a stopping criterion and a patience of 10 epochs is applied. After this, the model is trained without data augmentation for at most @@ -1661,7 +1661,7 @@ Please note that the number of input- and output channels of the network depends the dataset. If the input image is larger than 32 px × 32 px, for each power of two a Conv-Block (2) is added at the input. For MNIST, the images are bilinearly upsampled to 32 px × 32 px. -38 + 5.1. Baseline Model and Training setup # Type Filters @ Patch size / stride Parameters FLOPs Output size @@ -1715,13 +1715,13 @@ Dropout, p = 0. 5 C k @1 × 1/ 1 Global AVG pooling BN + Softmax -Figure 5.1.: Architecture of the baseline model. C 32@3 × 3/ 1 is a convolutional layer with 32 filters +Figure 5.1.: Architecture of the baseline model. C 32@3 × 3/ 1 is a convolutional layer with 32 filters of kernel size 3 × 3 with stride 1. - 39 + 5. Experimental Evaluation 5.1.1. Baseline Evaluation The results for the baseline model evaluated on eight datasets are given in Table 5.2. The -speed for inference for different GPUs is given in Table 5.3. +speed for inference for different GPUs is given in Table 5.3. Dataset Single Model Accuracy Ensemble of 10 Training Set Test Set Training Set Test Set Asirra 94.22 % σ = 3. 49 94.37 % σ = 3.47 97 .07 % 97. 37 % @@ -1737,7 +1737,7 @@ used in the ensemble. The empirical standard deviation σ of the accuracy is als CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the models uses unlabeled data or data from other datasets. For HASYv2 no test time transformations are used. -Network GPU Tensorflow Inference per Training +Network GPU Tensorflow Inference per Training 1 Image 128 images time / epoch Baseline Default Intel i7-4930K 3 ms 244 ms 231. 0 s Baseline Optimized Intel i7-4930K 2 ms 143 ms 149. 0 s @@ -1751,23 +1751,23 @@ Baseline Optimized Titan Black 3 ms 22 ms 24.4 s-24. 4 s DenseNet-40-12 Default GeForce 940MX 27 ms 2403 ms — Table 5.3.: Speed comparison of the baseline model on CIFAR-10. The baseline model is evaluated on six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken from [Ma j17]. -Weights the baseline model can be found at [Tho17b ]. The optimized Tensorflow build +Weights the baseline model can be found at [Tho17b ]. The optimized Tensorflow build makes use of SSE4.X, AVX, AVX2 and FMA instructions. -40 + 5.1. Baseline Model and Training setup 5.1.2. Weight distribution -The distribution of filter weights by layer is visualized in Figure 5.2 and the distribution -of bias weights by layer is shown in Figure 5.3. Although both figures only show the -distribution for one specific model trained on CIFAR-100, the following observed patterns +The distribution of filter weights by layer is visualized in Figure 5.2 and the distribution +of bias weights by layer is shown in Figure 5.3. Although both figures only show the +distribution for one specific model trained on CIFAR-100, the following observed patterns are consistent for 70 models (7 datasets and 10 models per dataset): • The empiric [0 .5 − percentile, 99 .5 − percentile] interval which contains 99 % of the -filter weights is almost symmetric around zero. The same is true for the bias weights. +filter weights is almost symmetric around zero. The same is true for the bias weights. • The farther a layer is from the input away, the smaller the 99-percentile interval is, except for the last layer (see Table A.1). -• The 99-percentile interval of the first layers filter weights is about[−0. 5, +0. 5], except +• The 99-percentile interval of the first layers filter weights is about[−0. 5, +0. 5], except for MNIST and HASYv2 where it is in [ −0. 8, 0.8]. -• The 99-percentile interval of the first layers bias weights is always in [ −0 .2 , 0 .2]. -• The distribution of filter weights of the last convolutional layer is not symmetric. In +• The 99-percentile interval of the first layers bias weights is always in [ −0 .2 , 0 .2]. +• The distribution of filter weights of the last convolutional layer is not symmetric. In some cases the distribution is also not unimodal. • The bias weights of the last three layers are very close to zero. The absolute value of @@ -1776,54 +1776,54 @@ most of them is smaller than 10−2 Similarly, Figure 5.4 and Figure 5.5 show the distribution of the γ and the β parameter of Batch Normalization. It is expected that γ is close to 1 and β is close to 0. In those cases, the Batch Normalization layer equals the identity and thus is only relevant for the training. -While γ and β do not show as clear patterns as the filter and bias weights of convolutional -layers, some observations are also consistent through all models even for different datasets: +While γ and β do not show as clear patterns as the filter and bias weights of convolutional +layers, some observations are also consistent through all models even for different datasets: • γ of the last layer (layer 16) is bigger than 1.3. • The 99-percentile interval for β of the last layer is longer than the other 99-percentile intervals. • The 99-percentile interval for β of the fourth-last (layer 14 for STL-10, layer 10 for all other models) is more negative then all other layers. -Finally, the distribution of filter weight ranges is plotted in Figure 5.6 for each convolutional -layer. The ranges are calculated for each channel and filter separately. The smaller the -values are, the less information is lost if the filters are replaced by smaller filters. - 41 +Finally, the distribution of filter weight ranges is plotted in Figure 5.6 for each convolutional +layer. The ranges are calculated for each channel and filter separately. The smaller the +values are, the less information is lost if the filters are replaced by smaller filters. + 5. Experimental Evaluation -Figure 5.2.: Violin plots of the distribution of filter weights of a baseline model trained on CIFAR- -100. The weights of the first layer are relatively evenly spread in the interval[−0. 4, +0.4]. +Figure 5.2.: Violin plots of the distribution of filter weights of a baseline model trained on CIFAR100. + The weights of the first layer are relatively evenly spread in the interval[−0. 4, +0.4]. With every layer the interval which contains95 % of the weights and is centered around the mean becomes smaller, especially with layer 11 where the feature maps are of size 1 × 1 . In contrast to the other layers, the last convolutional layer has a bimodal distribution. -This plot indicates that the network might benefit from bigger filters in the first layer, -whereas the filters in layers 7 – 11 could potentially be smaller. +This plot indicates that the network might benefit from bigger filters in the first layer, +whereas the filters in layers 7 – 11 could potentially be smaller. Figure 5.3.: Violin plots of the distribution of bias weights of a baseline model trained on CIFAR-100. -While the first layers biases are in[− 0. 1, +0.1], after each max-pooling layer the interval +While the first layers biases are in[− 0. 1, +0.1], after each max-pooling layer the interval which contains 95 % of the weights and is centered around the mean becomes smaller. In the last three convolutional layer, most bias weights are in [− 0. 005 , +0.005] . -42 + 5.1. Baseline Model and Training setup Figure 5.4.: Violin plots of the distribution of the γ parameter of Batch Normalization layers of a baseline model trained on CIFAR-100. Figure 5.5.: The distribution of the β parameter of Batch Normalization layers of a baseline model trained on CIFAR-100. - 43 + 5. Experimental Evaluation -Figure 5.6.: The distribution of the range of values (max - min) of filters by channel and layer. For -each filter, the range of values is recorded by channel. The smaller this range is, the -less information is lost if a n × n filter is replaced by a (n − 1) × (n − 1) filter. -44 +Figure 5.6.: The distribution of the range of values (max - min) of filters by channel and layer. For +each filter, the range of values is recorded by channel. The smaller this range is, the +less information is lost if a n × n filter is replaced by a (n − 1) × (n − 1) filter. + 5.1. Baseline Model and Training setup 5.1.3. Training behavior -Due to early stopping, the number of epochs which a model was trained differ. The number +Due to early stopping, the number of epochs which a model was trained differ. The number of epochs trained with augmentation ranged from 133 epochs to 182 epochs with a standard deviation of 17.3 epochs for CIFAR-100. Figure 5.7 shows the worst and the best validation accuracy during the training with -augmented data. Different initializations lead to very similar validation accuracies during +augmented data. Different initializations lead to very similar validation accuracies during training. The image might lead to the wrong conclusion that models which are better at the start are also better at the end. In order to check this hypothesis, the relative order of validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering -stays approximately the same, then it can be considered to run the first few epochs many +stays approximately the same, then it can be considered to run the first few epochs many times and only train the best models to the end. For 10 models, there can be 102 −10 2 = 45 @@ -1832,7 +1832,7 @@ is reversed. For the baseline model, 21.8 changes in the relative order of accur in average for each pair of epochs (i, i + 1). This means if one knows only the relative order of the validation accuracy of two models m and m in epoch i , it is doubtful if one can -make any statement about the ordering of m and m +make any statement about the ordering of m and m in epoch i + 1 . 0 10 20 30 40 50 60 70 80 90 @@ -1850,29 +1850,29 @@ maximum validation accuracy minimum validation accuracy mean loss Figure 5.7.: Minimum and maximum validation accuracy of the 10 trained models by epoch. The -differences do not exceed 1 % and does not increase by training epoch. Four models -stopped the first training stage at epoch 133 which causes the shift in the loss and the +differences do not exceed 1 % and does not increase by training epoch. Four models +stopped the first training stage at epoch 133 which causes the shift in the loss and the maximum validation accuracy. Figures 5.8 to 5.10 show how the weights changed while training on CIFAR-100. It was expected that the absolute value of weight updates during epochs (sum, max, and mean) decrease in later training stages. The intuition was that weights need to be adjusted in a -coarse way first. After that, the intuition was that only slight modifications are applied by -45 +coarse way first. After that, the intuition was that only slight modifications are applied by + 5. Experimental Evaluation the SGD based training algorithm (ADAM). The mean, max and sum of weight updates as displayed in Figures 5.8 to 5.10, however, do not show such a clear pattern. The biggest -change happens as expected in the first epoch after the weights are initialized. The change +change happens as expected in the first epoch after the weights are initialized. The change from augmented training to non-augmented training was at epoch 156 to epoch 157 It can be observed, that layers which receive more input feature maps get larger weight updates in mean. As layers which are closer to the output take more input feature maps, their weight updates are larger. This pattern does not occur when SGD is used as the optimizer. Figure 5.8.: Mean weight updates of the baseline model between epochs by layer. -46 + 5.1. Baseline Model and Training setup Figure 5.9.: Maximum weight updates of the baseline model between epochs by layer. Figure 5.10.: Sum of weight updates of the baseline model between epochs by layer. - 47 + 5. Experimental Evaluation 5.2. Confusion Matrix Ordering The visualization of the confusion matrix can give valuable information about which part @@ -1884,30 +1884,30 @@ classes together (see Figure 5.11a). Figure 5.11.: Figure 5.11a shows an ordered confusion matrix of the CIFAR-10 dataset. The diagonal elements are set to 0 in order to make other elements easier to see. Figure 5.11b shows a confusion matrix with random mistakes. -The first image of Figure 5.12 shows one example of a classifier with only 97.13 % test -accuracy where a good permutation was found. Please note that this is not the best classifier. -The confusion matrix which resulted from a baseline classifier with99.32 % test accuracy is +The first image of Figure 5.12 shows one example of a classifier with only 97.13 % test +accuracy where a good permutation was found. Please note that this is not the best classifier. +The confusion matrix which resulted from a baseline classifier with99.32 % test accuracy is displayed in as the second image. Those results suggest that the ordering of classes is a valuable tool to make patterns easier -to see. Humans, however, are good at finding patterns even if they come from random noise. -Hence, for comparison, a confusion matrix of a classifier with 30 classes, 60 % accuracy +to see. Humans, however, are good at finding patterns even if they come from random noise. +Hence, for comparison, a confusion matrix of a classifier with 30 classes, 60 % accuracy and 40 % uniformly random errors of a balanced dataset is created, optimized according to -Equation (4.1) and shown in Figure 5.11b. It clearly looks different than Figure 5.11a. +Equation (4.1) and shown in Figure 5.11b. It clearly looks different than Figure 5.11a. On the HASYv2 dataset the class-ordering is necessary to see anything as most possible -confusions do not happen. See Figure 5.13 for comparison of the first 50 classes of the +confusions do not happen. See Figure 5.13 for comparison of the first 50 classes of the unsorted confusion matrix and the sorted confusion matrix. If confusion matrices of a maximum size of 50 × 50 are displayed, the ordered method can show only 8 matrices -because the off-diagonal matrices are almost 0. Without sorting, 64 matrices have to be +because the off-diagonal matrices are almost 0. Without sorting, 64 matrices have to be displayed. -48 + 5.2. Confusion Matrix Ordering -Figure 5.12.: The first image shows the confusion matrix for the test of GTSRB set after optimization +Figure 5.12.: The first image shows the confusion matrix for the test of GTSRB set after optimization to Equation (4.1). The diagonal elements are set to 0 in order to make other elements easier to see. The symbols next to the label on the vertical axis indicate the shape and the color of the signs. The second image shows the same, but with baseline model. -Best viewed in electronic form. 49 -Figure 5.13.: The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal +Best viewed in electronic form. +Figure 5.13.: The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal elements are set to 0 in order to make other elements easier to see. The top image shows arbitrary class ordering, the bottom image shows the optimized ordering. 5.3. Spectral Clustering vs CMO @@ -1916,41 +1916,41 @@ This section evaluates the clustering quality of CMO in comparison to the cluste of spectral clustering. The evaluated model achieves 70 .50 % training accuracy and 53.16 % test accuracy on CIFAR-100. Figure 5.14 shows the sorted confusion matrix. -Figure 5.14.: The first 50 entries of the ordered confusion matrix of the CIFAR-100 dataset. The +Figure 5.14.: The first 50 entries of the ordered confusion matrix of the CIFAR-100 dataset. The diagonal elements are set to 0 in order to make other elements easier to see. Best viewed in electronic form. -CIFAR-100 has pre-defined coarse classes. Those are used as a ground truth for the clusters +CIFAR-100 has pre-defined coarse classes. Those are used as a ground truth for the clusters which are to be found. The number of errors is determined by(i)Join alln clusters which -contain the classes of the coarse classC to a set M . The error is n .(ii)Within M , find the +contain the classes of the coarse classC to a set M . The error is n .(ii)Within M , find the set of classes M − - which do not belong to C .(iii)The final error is n + | M − + which do not belong to C .(iii)The final error is n + | M − |. As can be -seen in Table 5.4, both clustering methods find reasonable clusters. CMO, however, has +seen in Table 5.4, both clustering methods find reasonable clusters. CMO, however, has only half the error of spectral clustering. The results for the HASYv2 dataset are qualitatively similar (see Table 5.5). It should be noted that the number of clusters was determined by using the semi-automatic method based on CMO as described in Section 4.2. - 51 + 5. Experimental Evaluation Cluster Spectral clustering Errors CMO Errors -fish aquarium fish, orchid + flatfish -+ ray, shark + trout, lion 5 aquarium fish, orchid + flatfish +fish aquarium fish, orchid + flatfish ++ ray, shark + trout, lion 5 aquarium fish, orchid + flatfish + ray + shark, trout 4 -flowers orchid, aquarium fish + sun- -flower + poppy, tulip + rose, +flowers orchid, aquarium fish + sunflower + + poppy, tulip + rose, train 5 - orchid, aquarium fish + sun- -flower, poppy, tulip, rose 2 + orchid, aquarium fish + sunflower, + poppy, tulip, rose 2 people baby, boy, man + girl + woman 2 baby, boy, girl, woman, man 0 reptiles crocodile, plain, road, table, wardrobe + dinosaur + lizard -+ snake, worm + turtle 9 crocodile, lizard, lobster, cater- -pillar + dinosaur + snake + tur- -tle, crab 6 ++ snake, worm + turtle 9 crocodile, lizard, lobster, caterpillar + + dinosaur + snake + turtle, + crab 6 trees maple, oak, pine + willow, forest + palm 3 palm, willow, pine, maple, oak 0 Total 24 12 -Table 5.4.: Differences in spectral clustering and CMO. Classes in a cluster are separated by , +Table 5.4.: Differences in spectral clustering and CMO. Classes in a cluster are separated by , whereas clusters are separated by +. Cluster Spectral clustering Errors CMO Errors A A , A, A 0 A , A, A , Å 1 @@ -1977,29 +1977,29 @@ X X , x , X , χ , × 0 X , x , X , χ, × 0 Y Y and y 1 Y , y 0 Z Z , z , Z and Z, Z 1 Z , z , Z, Z , Z 0 Total 34 25 -Table 5.5.: Differences in spectral clustering and CMO. -52 -5.4. Hierarchy of Classifiers -5.4. Hierarchy of Classifiers -In a first step, a classifier is trained on the 100 classes of CIFAR-100. The fine-grained root -classifier achieves an accuracy of 65.29 % with test-time transformations. The accuracy on -the found sub-classes are listed in Table 5.6. The fact that the root classifier achieves better -results within a cluster than the specialized leaf classifiers in 13 of 14 cases could either -be due to limited training data, overfitting or the small size of 32 px × 32 px of the data. +Table 5.5.: Differences in spectral clustering and CMO. + +5.4. Hierarchy of Classifiers +5.4. Hierarchy of Classifiers +In a first step, a classifier is trained on the 100 classes of CIFAR-100. The fine-grained root +classifier achieves an accuracy of 65.29 % with test-time transformations. The accuracy on +the found sub-classes are listed in Table 5.6. The fact that the root classifier achieves better +results within a cluster than the specialized leaf classifiers in 13 of 14 cases could either +be due to limited training data, overfitting or the small size of 32 px × 32 px of the data. The experiment also shows that most of the errors are due to not identifying the correct -cluster. Hence, in this case, more work in improving the root classifier is necessary rather +cluster. Hence, in this case, more work in improving the root classifier is necessary rather than improving the discrimination of classes within a cluster. -Although the classes within a cluster capture most of the classifications, many misclassifica- -tions happen outside of the clusters. For example, in cluster 3, a perfect leaf classifier would -push the accuracy in the ful l column only to 63.50 % due to errors of the root classifier -where the root classifier does not predict the correct cluster. -The leaf classifiers use the same topology as the root classifier. By initializing them with -the root classifiers weights their performance can be pushed at about the inner accuracy. +Although the classes within a cluster capture most of the classifications, many misclassifications + happen outside of the clusters. For example, in cluster 3, a perfect leaf classifier would +push the accuracy in the ful l column only to 63.50 % due to errors of the root classifier +where the root classifier does not predict the correct cluster. +The leaf classifiers use the same topology as the root classifier. By initializing them with +the root classifiers weights their performance can be pushed at about the inner accuracy. They are, however, only useful if their accuracy is well above theinner accuracy of the root -classifier. Hence, for CIFAR-100, building hierarchies of classifiers is not useful. +classifier. Hence, for CIFAR-100, building hierarchies of classifiers is not useful. Cluster Classes accuracy -root classifier leaf classifier -cluster identified class identified | cluster class identified | cluster +root classifier leaf classifier +cluster identified class identified | cluster class identified | cluster 1 3 69.67 % 84.27 % 72.98 % 2 5 46.60 % 58.54 % 43.47 % 3 2 58.50 % 92.13 % 83.46 % @@ -2014,38 +2014,38 @@ cluster identified class identified | cluster class identified | cluster 12 2 72.50 % 94.77 % 76.77 % 13 2 64.00 % 82.58 % 86.27 % 14 2 79.67 % 89.85 % 89.10 % -Table 5.6.: Accuracies of the root classifier trained on the full set of 100 classes evaluated on -14 clusters of classes. Each class has 100 elements to test. The columncluster identified -gives the percentage that the root classifiers argmax prediction is within the correct -cluster, but not necessarily the correct class. The columnsclass identified | cluster only -consider data points where the root classifier correctly identified the cluster. - 53 +Table 5.6.: Accuracies of the root classifier trained on the full set of 100 classes evaluated on +14 clusters of classes. Each class has 100 elements to test. The columncluster identified +gives the percentage that the root classifiers argmax prediction is within the correct +cluster, but not necessarily the correct class. The columnsclass identified | cluster only +consider data points where the root classifier correctly identified the cluster. + 5. Experimental Evaluation 5.5. Increased width for faster learning -More filters in one layer could simplify the optimization problem as each filter needs smaller +More filters in one layer could simplify the optimization problem as each filter needs smaller updates. Hence a CNN N with n - i filters in layer i is expected to take more epochs than a -CNN N + i filters in layer i is expected to take more epochs than a +CNN N with 2 · n - i filters in layer i to achieve the same validation accuracy. -This hypothesis can be falsified by training a CNN N and a CNN N + i filters in layer i to achieve the same validation accuracy. +This hypothesis can be falsified by training a CNN N and a CNN N and comparing the -trained number of epochs. As more filters can lead to different results depending on the -layer where they are added, five models are trained. The details about those models are +trained number of epochs. As more filters can lead to different results depending on the +layer where they are added, five models are trained. The details about those models are given in Table 5.7 Name Layer Filter count Total Baseline New parameters m 9 9 64 638 5 978 566 -m +m 9 9 64 974 8 925 622 m 11 11 512 3786 5 982 698 -m +m 11 11 512 1024 1 731 980 m 13 13 512 8704 5 982 092 -Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer +Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer was increased. The detailed results are given in Table 5.8. As expected, the number of training epochs of the models with increased numbers of parameters is lower. The wall-clock time, however, is @@ -2053,12 +2053,12 @@ higher due to the increase in computation per forward- and backward-pass. For m 9 , m 11 and m -13 , the filter weight range of the layer with increased capacity decreases -compared to Figure 5.6, the filter weights of the layer with increased capacity are more +13 , the filter weight range of the layer with increased capacity decreases +compared to Figure 5.6, the filter weights of the layer with increased capacity are more concentrated around zero compared to Figure 5.2. For model m 13 , the distribution of weight of the output layer changed to a more bell-shaped distribution. Except for this, the -distribution of filter weights in other layers did not change for all three models compared to +distribution of filter weights in other layers did not change for all three models compared to the baseline. Model Parameters Accuracy Training Single Model Ensemble Mean Epochs Mean Time @@ -2066,11 +2066,11 @@ Mean std baseline 944 012 63.38 % 0.55 64.70 % 154.7 3856 s m 9 5 978 566 65.53 % 0.37 66.72 % 105.7 4472 s -m +m 9 8 925 622 65.10 % 1.09 66.54 % 95.6 5261 s m 11 5 982 698 65. 73 % 0.77 67. 38 % 149.2 5450 s -m +m 11 1 731 980 62.12 % 0.48 62.89 % 143.6 3665 s m 13 5 982 092 62.39 % 0.66 63.77 % 147.8 4485 s @@ -2079,19 +2079,19 @@ Table 5.8.: Training time in epochs and wall-clock time for the baseline and mod 11 , m 13 as well as their accuracies. -54 + 5.6. Weight updates 5.6. Weight updates Section 5.5 shows that wider networks learn faster. One hypothesis why this happens is that every single weight updates can be smaller to learn the same function. Thus the loss function is smoother and thus gradient descent based optimization algorithms lead to more consistent weight updates. -Consequently, it is expected that layers with fewer filters have more erratic updates. If -there are many filters, the weights of a filter which does not contribute much to the end -results or is even harmful filter can gradually be set to zero, essentially removing one path +Consequently, it is expected that layers with fewer filters have more erratic updates. If +there are many filters, the weights of a filter which does not contribute much to the end +results or is even harmful filter can gradually be set to zero, essentially removing one path in the network. -In order to test the hypothesis, the baseline model was adjusted. The number of filters in -layer 5 was reduced from 64 filters to 3 filters. As one can see in Figure 5.15, the mean +In order to test the hypothesis, the baseline model was adjusted. The number of filters in +layer 5 was reduced from 64 filters to 3 filters. As one can see in Figure 5.15, the mean weight update of the layers 1, 3, 5, 7 and 9 have a far bigger range than the layers 11, 13 and 15 after epoch 50. Compared to the baseline models mean updates (Figure 5.8, Page 46), the mean weight updates of layers 1 and 3 are higher, the range of the mean weight update @@ -2099,70 +2099,70 @@ from epoch 50 is higher for layer 5 and the range of mean updates of layer 7 is For the maximum and the sum, no similar pattern could be observed (see Figures A.3 and A.4). Figure 5.15.: Mean weight updates between epochs by layer. The model is the baseline model, but -with layer 5 reduced to 3 filters. - 55 +with layer 5 reduced to 3 filters. + 5. Experimental Evaluation 5.7. Multiple narrow layers vs One wide layer On a given feature map size one can have an arbitrary number of convolutional layers with -SAME padding and each layer can have an arbitrary number of filters. A convolutional layer -with more filters is called wider [ZK16], a convolutional layer with fewer filters is thus called -narrower and the number of filters in a convolutional layer is the layers width. -If the number of parameters which may be used for the feature map scale is fixed and high +SAME padding and each layer can have an arbitrary number of filters. A convolutional layer +with more filters is called wider [ZK16], a convolutional layer with fewer filters is thus called +narrower and the number of filters in a convolutional layer is the layers width. +If the number of parameters which may be used for the feature map scale is fixed and high enough, there are still many combinations. If n i with i = 0, . . . , k is the number of output -feature maps of layer i where i = 0 is the input layer and all filters are 3 × 3 filters without +feature maps of layer i where i = 0 is the input layer and all filters are 3 × 3 filters without a bias, then the number of parameters is Parameters = k -i =1 +i =1 (n i − 1 · 3 2 + 1) · n - i -Hence the width of one layer does not only influence the parameters in this layer, but also + i +Hence the width of one layer does not only influence the parameters in this layer, but also in the next layer. The number of possible subsequent layers of one feature map size is enormous, even if -constraints are placed on the number of parameters. For example, the first convolutional -layer of the baseline model has 896 parameters. If one assumes that less than 3 filters per -layer are not desirable, one keeps all layers having a bias and all layers only use3 × 3 filters, +constraints are placed on the number of parameters. For example, the first convolutional +layer of the baseline model has 896 parameters. If one assumes that less than 3 filters per +layer are not desirable, one keeps all layers having a bias and all layers only use3 × 3 filters, then the maximum depth is 10. If one furthermore assumes that at least 800 parameters should be used, there are still 120 possible layer combinations. As experimentally evaluating one layer combination takes about 10 hours on a GTX 970 for CIFAR-100 it is not possible to evaluate all layer combinations. In the following, a couple of changes to the network width / depth will be evaluated. -Each layer expands the perceptive field. Hence deeper layer can use more of the input for +Each layer expands the perceptive field. Hence deeper layer can use more of the input for every single output value. But deeper networks need more time for inference as the output of layer i has to be computed before the output of i + 1 can be computed. Hence there is -less potential to parallelize computations. Each filter can be seen as a concept which can -be learned. The deeper the filter is in the network, the higher is the abstraction level of the -concept. In most cases, both is necessary: Many different concepts (width) and high-level +less potential to parallelize computations. Each filter can be seen as a concept which can +be learned. The deeper the filter is in the network, the higher is the abstraction level of the +concept. In most cases, both is necessary: Many different concepts (width) and high-level concepts (depth). -Reducing the two first convolutional layers of the baseline model (see Page 39) to one -convolutional layer of 48 filters ( 944 396 parameters in total, whereas the baseline model +Reducing the two first convolutional layers of the baseline model (see Page 39) to one +convolutional layer of 48 filters ( 944 396 parameters in total, whereas the baseline model has 944 012 parameters) resulted in a mean accuracy of 61.64 % (- 1. 74 %) and a standard deviation of σ = 1 .12 (+0.57). The ensemble achieved 63.18 % (- 1 .52 %). As expected, the training time per epoch was reduced. For the GTX 980, it was reduced from 22.0 s of the baseline model to 15 s of the model with one less convolutional layer, one less Batch Normalization and one less activation layer. The inference time was also reduced from6 ms -56 + 5.8. Batch Normalization to 4 ms for 1 image and from 32 ms to 23 ms for 128 images. Due to the loss in accuracy of more then one percentage point of the mean model and the increased standard deviation of the models performance, at least two convolutional layers are on the 32 px × 32 px feature map scale are recommendable for CIFAR-100. -Changing the baseline to have less filters but more layers is another option. This was tried -for the first block at the 32 px × 32 px feature map scale. The two convolutional layers -(layers 1 – 4 in Page 39) were replaced by two convolutional layers with 27 filters and one -convolutional layer with 26 filters in the convolution - BN - ELU pattern. The model +Changing the baseline to have less filters but more layers is another option. This was tried +for the first block at the 32 px × 32 px feature map scale. The two convolutional layers +(layers 1 – 4 in Page 39) were replaced by two convolutional layers with 27 filters and one +convolutional layer with 26 filters in the convolution - BN - ELU pattern. The model has 944 132 parameters. Compared to the baseline model, the time for inference was the same. This is unexpected, because the inference time changed when a layer was removed at this scale. The mean test accuracy was 63. 66 % (+0.28) and the standard deviation was σ = 1.03 (+0.48). The ensemble achieved 64.91 % test accuracy (+0.21). Having two nonlinearities at each feature map scale could be important to learn nonlinear transformations at that scale. As the baseline model does only have one nonlinearity at the -8 × 8 feature maps scale, another convolutional layer with 64 filters, Batch Normalization +8 × 8 feature maps scale, another convolutional layer with 64 filters, Batch Normalization and ELU was added. To keep the number of parameters constant, layer 11 of the baseline -model was reduced from 512 filters to 488 filters. The new model achieves a mean accuracy +model was reduced from 512 filters to 488 filters. The new model achieves a mean accuracy of 63. 09 % (-0.29) with a standard deviation of σ = 0 . 70 (+0.15). The ensemble achieves an accuracy of 64 .39 % (+0.31). This could indicate that having two convolutional layers is more important for layers close to the input than intermediate layer. Alternatively, the @@ -2170,8 +2170,8 @@ parameters could be more important in layer 11 than having a new convolutional l layer 9. In order to control the hypothesis that having two convolutional layers are less important in the middle of a network, the second convolutional layer at the16 × 16 feature map scale is -removed. The first convolutional layer was increased from 32 filters to 59 filters, the second -convolutional layer was increased from 32 filter s to 58 filters in order to keep the amount of +removed. The first convolutional layer was increased from 32 filters to 59 filters, the second +convolutional layer was increased from 32 filter s to 58 filters in order to keep the amount of parameters of the model constant. The adjusted model achieved 62. 72 % (-0.66) mean test accuracy with a standard deviation of σ = 0 .84 (+0.29). The ensemble achieved 63.88 % test accuracy (-0.66). @@ -2181,8 +2181,8 @@ scale, the mean test accuracy drops to61 .21 % (-2.17) with a standard deviation to have at least one convolutional layer at this feature map scale. 5.8. Batch Normalization In [CUH15], the authors write that Batch Normalization does not improve ELU networks. -Hence the effect of removing Batch Normalization from the baseline is investigated in this -57 +Hence the effect of removing Batch Normalization from the baseline is investigated in this + 5. Experimental Evaluation experiment. As before, 10 models are trained on CIFAR-100. The training setup and the modelm @@ -2190,7 +2190,7 @@ no-bn are identical to the baseline model m, except that in m no-bn the Batch Normalization layers are removed. -One notable difference is the training time: While m needs 21 ms per epoch in average on +One notable difference is the training time: While m needs 21 ms per epoch in average on a GTX 980, m no-bn only needs 21 ms per epoch. The number of epochs used for training, however, also increased noticeably from 149 epochs to 178 epochs in average. The standard @@ -2199,45 +2199,45 @@ no-bn . The mean accuracy of m no-bn is 62. 86 % and hence 0.52 percentage points worse. The standard deviation between models increased from 0.55 to 0.61. This is likely a result of the -early stopping policy and the differences in training epochs. This can potentially be fixed +early stopping policy and the differences in training epochs. This can potentially be fixed by retraining the models which stopped earlier than the model which was trained for the biggest amount of epochs. The ensemble test accuracy is63.88 % and hence 0.82 percentage points worse than the baseline. -The filter weight range and distribution is approximately the same as Figure 5.6 and +The filter weight range and distribution is approximately the same as Figure 5.6 and Figure 5.2, but the distribution of bias weights changed noticeably: While the bias weights of -the baseline are spread out in the first layer and much more concentrated in subsequent layers +the baseline are spread out in the first layer and much more concentrated in subsequent layers (see Figure 5.3), the model without Batch Normalization has rather concentrated weights -in the first layers and only the bias weights of the last layer is spread out (see Figure A.2). +in the first layers and only the bias weights of the last layer is spread out (see Figure A.2). Another model m -no-bn which has one more filter in the convolutional layer 1, 3, 5, and 7 to +no-bn which has one more filter in the convolutional layer 1, 3, 5, and 7 to compensate for the loss of parameters in Batch Normalization. The mean test accuracy of 10 such models is 62.87 % which is 0.51 percentage points worse than the baseline. The ensemble of m no-bn achieves 64.33 % which is 0.37 percentage points worse than the baseline. The mean training time was 14 s per epoch and 157.4 epochs with a standard deviation of 20.7 epochs. -Hence it is not advisable to remove Batch Normalization for the final model. It could, +Hence it is not advisable to remove Batch Normalization for the final model. It could, however, be possible to remove Batch Normalization for the experiments to iterate quicker -through different ideas if the relative performance changes behave the same with or without +through different ideas if the relative performance changes behave the same with or without Batch Normalization. -58 + 5.9. Batch size 5.9. Batch size The mini-batch size m ∈ N - ≥1 influences + ≥1 influences • Epochs until convergence : The smaller m, the more often the model is updated in one epoch. Those updates, however, are based on fewer samples of the dataset. -Hence the gradients of different mini-batches can noticeably differ. In the literature, +Hence the gradients of different mini-batches can noticeably differ. In the literature, this is referred to as gradient noise [KMN + 16]. • Training time per epoch : The smaller the batch size, the higher the training time per epoch as the hardware is not optimally utilized. -• Resulting model quality : The choice of the hyperparameter m influences the -accuracy of the classifier when training is finished. [KMN+ +• Resulting model quality : The choice of the hyperparameter m influences the +accuracy of the classifier when training is finished. [KMN+ 16] supports the view that smaller m result in less sharp minima. Hence smaller m lead to better generalization. -Empiric evaluation results can be found in Table 5.9. Those results confirm the claim +Empiric evaluation results can be found in Table 5.9. Those results confirm the claim of [KMN + 16] that lower batch sizes generalize better. m Training @@ -2254,7 +2254,7 @@ epoch 133 – 195 2892 s 63. 38 % σ = 0. 55 64.70 % 128 18 s epoch 145 – 239 3126 s 62 .23 % σ = 0.73 63.55 % Table 5.9.: Training time per epoch and single model test set accuracy (mean and standard deviation) -of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on +of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on CIFAR-100. 5.10. Bias Figure 5.3 suggests that the bias is not important for the layers 11, 13 and 15. Hence a @@ -2266,18 +2266,18 @@ no-bias is 63.74 % which is an improvement of 0.36 percentage points over the baseline. The ensemble achieves a test accuracy of65.13 % which is 0.43 percentage points better than the baseline. Hence the bias can safely be removed. -Removing the biases did not have a noticeable effect on the filter weight range, the filter +Removing the biases did not have a noticeable effect on the filter weight range, the filter weight distribution or the distribution of the remaining biases. Also, theγ and β parameters of the Batch Normalization layers did not noticeably change. - 59 + 5. Experimental Evaluation 5.11. Learned Color Space Transformation -In [MSM16] it is described that placing one convolutional layer with 10 filters of size 1 × 1 -directly after the input and then another convolutional layer with 3 filters of size1 × 1 acts +In [MSM16] it is described that placing one convolutional layer with 10 filters of size 1 × 1 +directly after the input and then another convolutional layer with 3 filters of size1 × 1 acts as a learned transformation in another color space and boosts the accuracy. -This approach was evaluated on CIFAR-100 by adding a convolutional layer with ELU ac- -tivation and 10 filters followed by another convolutional layer with ELU activation and -3 filters. The mean accuracy of 10 models was 63.31 % with a standard deviation of 1.37. +This approach was evaluated on CIFAR-100 by adding a convolutional layer with ELU activation + and 10 filters followed by another convolutional layer with ELU activation and +3 filters. The mean accuracy of 10 models was 63.31 % with a standard deviation of 1.37. The standard deviation is noticeable higher than the standard deviation of the baseline model (0.55) and the accuracy also decreased by 0.07 percentage points. The accuracy of the ensemble is at 64.77 % and hence 0.07 percentage points higher than the accuracy of @@ -2296,20 +2296,20 @@ The training time per epoch decreased from20.5 s-21 .1 s to 18. 6 s (mean of 10 on the Nvidia GTX 970. The time for inference increased from 25 ms to 26 ms for a batch of 128 images. 5.13. Activation Functions -Nonlinear, differentiable activation functions are important for neural networks to allow them +Nonlinear, differentiable activation functions are important for neural networks to allow them to learn nonlinear decision boundaries. One of the simplest and most widely used activation functions for CNNs is ReLU [ KSH12], but others such as ELU [ CUH15], parametrized -rectified linear unit (PReLU) [ HZRS15b], softplus [ ZYL+ +rectified linear unit (PReLU) [ HZRS15b], softplus [ ZYL+ 15] and softsign [ BDLB09 ] have been proposed. The baseline uses ELU. -60 + 5.13. Activation Functions -Activation functions differ in the range of values and the derivative. The definitions and +Activation functions differ in the range of values and the derivative. The definitions and other comparisons of eleven activation functions are given in Table B.3. Theoretical explanations why one activation function is preferable to another in some scenarios are the following: -• Vanishing Gradient : Activation functions like tanh and the logistic function sat- -urate outside of the interval [ −5 , 5] . This means weight updates are very small for +• Vanishing Gradient : Activation functions like tanh and the logistic function saturate + outside of the interval [ −5 , 5] . This means weight updates are very small for preceding neurons, which is especially a problem for very deep or recurrent networks as described in [BSF94]. Even if the neurons learn eventually, learning is slower [KSH12]. • Dying ReLU: The dying ReLU problem is similar to the vanishing gradient problem. @@ -2319,7 +2319,7 @@ training process, this neuron does not get any update and hence does not partici in the training process. This problem is addressed in [MHN13]. • Mean unit activation: Some publications like [ CUH15, IS15] claim that mean unit activations close to 0 are desirable. They claim that this speeds up learning -by reducing the bias shift effect. The speedup of learning is supported by many +by reducing the bias shift effect. The speedup of learning is supported by many experiments. Hence the possibility of negative activations is desirable. Those considerations are listed in Table 5.10 for 11 activation functions. Besides the theoretical properties, empiric results are provided in Tables 5.11 and 5.12. The baseline @@ -2335,7 +2335,7 @@ logistic− ( x) = 1 1 + e− x − 0 .5 The logistic− - function has the same derivative as the logistic function and hence still suffers + function has the same derivative as the logistic function and hence still suffers from the vanishing gradient problem. The network with the logistic− function achieves an accuracy which is 11.30 % better than the network with the logistic function, but is still @@ -2345,15 +2345,15 @@ ReLU− (x ) = max(−1 , x) = ReLU (x + 1) − 1 The results of ReLU− are much worse on the training set, but perform similar on the test -61 + 5. Experimental Evaluation set. The result indicates that the possibility of hard zero and thus a sparse representation is either not important or similar important as the possibility to produce negative outputs. This contradicts [GBB11, SMGS14]. -A key difference between the logistic − - function and ELU is that ELU does neither suffers +A key difference between the logistic − + function and ELU is that ELU does neither suffers from the vanishing gradient problem nor is its range of values bound. For this reason, the -S2ReLU activation function, defined as +S2ReLU activation function, defined as S2ReLU( x ) = ReLU ( x 2 + 1) − ReLU (− x 2 + 1) =  @@ -2370,7 +2370,7 @@ x if − 2 ≤ x ≤ 2 x 2 + 1 if x > −2 This function is similar to SReLUs as introduced in [JXF + - 16]. The difference is that S2ReLU + 16]. The difference is that S2ReLU does not introduce learnable parameters. The S2ReLU was designed to be symmetric, be the identity close to zero and have a smaller absolute value than the identity farther away. It is easy to compute and easy to implement. @@ -2398,7 +2398,7 @@ ELU No Yes No Table 5.10.: Properties of activation functions. 1 The dying ReLU problem is similar to the vanishing gradient problem. -62 + 5.13. Activation Functions Function Single model Ensemble of 10 Training set Test set Training set Test set @@ -2417,7 +2417,7 @@ S2ReLU 63. 32 % σ = 1.69 56 .99 % σ = 1. 14 65.80 % 59 .20 % LReLU 74. 92 % σ = 2.49 61 .86 % σ = 1. 23 77.67 % 64 .01 % PReLU 80 .01 % σ = 2.03 62 .16 % σ = 0. 73 83. 50 % 64. 79 % ELU 76. 64 % σ = 1.48 63. 38 % σ = 0. 55 78.30 % 64 .70 % -Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation +Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation functions on CIFAR-100. For LReLU, α = 0.3 was chosen. Function Inference per Training Epochs Mean total @@ -2447,11 +2447,11 @@ PReLU 7 ms 34 ms 28 s epoch 131 – 215 3970 s ELU 6 ms 31 ms 23 s epoch 146 – 232 3692 s -Table 5.12.: Training time and inference time of adjusted baseline models trained with different +Table 5.12.: Training time and inference time of adjusted baseline models trained with different activation functions on GTX 970 GPUs on CIFAR-100. It was expected that the -identity is the fastest function. This result is likely an implementation specific problem -of Keras 2.0.4 or Tensorflow 1.1.0. - 63 +identity is the fastest function. This result is likely an implementation specific problem +of Keras 2.0.4 or Tensorflow 1.1.0. + 5. Experimental Evaluation Function Single model Ensemble Epochs Accuracy std Accuracy Range Mean @@ -2464,34 +2464,34 @@ ReLU 99. 62 % σ = 0.04 99. 73 % 51 – 94 71.7 Softplus 99.52 % σ = 0.05 99.62 % 62 – 70 68.9 PReLU 99.57 % σ = 0.07 99. 73 % 44 – 89 71.2 ELU 99.53 % σ = 0.06 99.58 % 45 – 111 72.5 -Table 5.13.: Test accuracy of adjusted baseline models trained with different activation functions +Table 5.13.: Test accuracy of adjusted baseline models trained with different activation functions on MNIST. 5.14. Label smoothing Ensembles consisting of n models trained by the same procedure on the same data but -initialized with different weights and trained with a different order of the training data +initialized with different weights and trained with a different order of the training data perform consistently better than single models. One drawback of ensembles in applications such as self-driving cars is that they increase the computation by a factor of n . One idea why they improve the test accuracy is by reducing the variance. The idea of label smoothing is to use the ensemble prediction of the training data as labels -for another classifier. For every element x of the training set, the one-hot encoded target +for another classifier. For every element x of the training set, the one-hot encoded target t (x ) is smoothed by the ensemble prediction y E (x ) -t +t (x ) = α · t (x ) + (1 − α )y E (x ) where α ∈ [0, 1] is the smoothing factor. -There are three reasons why label smoothing could be beneficial: +There are three reasons why label smoothing could be beneficial: • Training speed : The ensemble prediction contains more information about the -image than binary class decisions. Classifiers in computer vision predict how similar +image than binary class decisions. Classifiers in computer vision predict how similar the input looks to other input of the classes they are trained on. By smoothing the labels, the information that one image could also belong to another class is passed to the optimizer. In early stages of the optimization this could lead to a lower loss on the non-smoothed validation set. • Higher accuracy : Using smoothed labels for the optimization could lead to a higher -accuracy of the base-classifier due to a smoothed error surface. It might be less likely -64 +accuracy of the base-classifier due to a smoothed error surface. It might be less likely + 5.14. Label smoothing -that the classifier gets into bad local minima. +that the classifier gets into bad local minima. • Label noise : Depending on the way how the labels are obtained, it might not always be clear which label is the correct one. Also, labeling errors can be present in training datasets. Those errors severely harm the training. By smoothing the labels errors @@ -2502,23 +2502,23 @@ of an ensemble of 10 baseline models. The mean accuracy of the models trained on smoothed training set labels was63.61 % (+0. 23 %) and the standard deviation wasσ = 0.72 (+0 .17 %). The ensemble of 10 m smooth models achieved 64 .79 % accuracy (+0 .09 %). Hence -the effect of this kind of label smoothing on the final accuracy is questionable. +the effect of this kind of label smoothing on the final accuracy is questionable. The training speed didn’t noticeably change either: The number of trained epochs ranged from 144 to 205, the mean number of epochs was 177. The baseline training ranged from 146 to 232 epochs with a mean of 174 epochs. After 10, 30 and 80 epochs both training -methods accuracy differed by less than one percentage point. Hence it is unlikely that label -smoothing has a positive effect on the training speed. +methods accuracy differed by less than one percentage point. Hence it is unlikely that label +smoothing has a positive effect on the training speed. Hinton et al. called this method distil lation in [ HVD15]. Hinton et al. used smooth and hard labels for training, this work only used smoothed labels. - 65 + 5. Experimental Evaluation -5.15. Optimized Classifier -In comparison to the baseline classifier, the following changes are applied to the optimized -classifier: +5.15. Optimized Classifier +In comparison to the baseline classifier, the following changes are applied to the optimized +classifier: • Remove the bias for the last layers: For all layers which output a 1 × 1 feature map, the bias is removed • Increase the max pooling kernel to 3 × 3 -• More filters in the first layers +• More filters in the first layers The detailed architecture is given in Table 5.14 and visualized in Figure 5.16. The evaluation is given in Table 5.15 and the timing comparison is given in Table 5.16. # Type Filters @ @@ -2555,8 +2555,8 @@ use SAME padding, except for layer 11 which used VALID padding in order to decre the feature map size to 1 × 1. If the input feature map is bigger than32 × 32, for each power of two there are two Convolution + BN + ELU blocks and one Max pooling block added. This is the framed part in the table. -66 -5.15. Optimized Classifier32 × 32Input + +5.15. Optimized Classifier32 × 32Input C 69@3 × 3 / 1 BN + ELU C 69@3 × 3 / 1 @@ -2576,7 +2576,7 @@ C* k @1 × 1 / 1 Global AVG pooling BN + Softmax Figure 5.16.: Architecture of the optimized model. C 32@3 × 3 / 1 is a convolutional layer with -32 filters of kernel size 3 × 3 with stride 1. The * indicates that no bias is used. +32 filters of kernel size 3 × 3 with stride 1. The * indicates that no bias is used. Dataset Single Model Accuracy Ensemble of 10 Training Set Test Set Training Set Test Set Asirra 95. 83 % σ = 4.70 90.75 % σ = 4. 73 98 . 78 % 93.09 % @@ -2592,7 +2592,7 @@ used in the ensemble. The empirical standard deviationσ of the accuracy is also CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the models uses unlabeled data or data from other datasets. For MNIST, GTSRB, SVHN and HASY, no test time transformations are used. -Network GPU Tensorflow Inference per Training +Network GPU Tensorflow Inference per Training 1 Image 128 images time / epoch Optimized Default Intel i7-4930K 5 ms 432 ms 386 s Optimized Optimized Intel i7-4930K 4 ms 307 ms 315 s @@ -2605,17 +2605,17 @@ Optimized Default Titan Black 4 ms 46 ms 43 s Table 5.16.: Speed comparison of the optimized model on CIFAR-10. The baseline model is evaluated on six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken from [ Ma j17]. Weights the baseline model can be found at [ Tho17b]. The optimized -Tensorflow build makes use of SSE4.X, AVX, AVX2 and FMA instructions. - 67 +Tensorflow build makes use of SSE4.X, AVX, AVX2 and FMA instructions. + 5. Experimental Evaluation 5.16. Early Stopping vs More Data A separate validation set is necessary for two reasons:(1)Early stopping and(2)preventing -overfitting due to many experiments. To prevent overfitting, a different dataset can be used. +overfitting due to many experiments. To prevent overfitting, a different dataset can be used. For example, all decisions about hyperparameters in this thesis are based on CIFAR-100, -but the network is finally trained and evaluated with the same hyperparameters on all +but the network is finally trained and evaluated with the same hyperparameters on all datasets. 2 The validation set can hence be removed if early stopping is removed. Instead, -the validation data is used in a first run to determine the number of epochs necessary for +the validation data is used in a first run to determine the number of epochs necessary for training. In a second training run the validation data is added to the training set. The number of used epochs for the second run is given in Table 5.17. Dataset Mean epochs Train data classes average data / class @@ -2632,14 +2632,14 @@ amount of used training data, the number of classes of the dataset and the avera amount of data per class is given. Alternatively, the model can be trained with early stopping (ES) purely on the training loss. All three methods – early stopping on the validation set accuracy, early stopping on -the training loss and training a fixed number of epochs are evaluated. While having more +the training loss and training a fixed number of epochs are evaluated. While having more data helped with Asirra and CIFAR-100, the results as shown in Table 5.18 on the other -datasets are only marginally different. For CIFAR-10, training with more data did not -improve the results when the number of epochs is fixed, but notably improved the results +datasets are only marginally different. For CIFAR-10, training with more data did not +improve the results when the number of epochs is fixed, but notably improved the results when the training loss was used as the early stopping criterion. 5.17. Regularization Stronger regularization might even improve the results when using the training loss as an -early stopping criterion. +early stopping criterion. 2 regularization with a weighting factor of λ = 0 .0001 is used in all other experiments. While the accuracy as shown in Table 5.19 does not show a clear pattern, the number of epochs increases with lower model regularization (see Table 5.20). @@ -2649,7 +2649,7 @@ pattern, the number of epochs increases with lower model regularization (see Tab Only 1 model is trained due to the long training time of 581 epochs and 12 hours for this model. 4 Only 3 models are in this ensemble due to the long training time of more than 8 hours per model. -68 + 5.17. Regularization Dataset Early Stopping Fixed epochs val. acc train loss @@ -2664,25 +2664,25 @@ STL-10 78. 66 % 83. 25 % 78 .64 % Table 5.18.: Comparisons of trained optimized models with early stopping on the validation accuracy compared training setups without a validation set and thus more training data. The second column uses the training loss as a stopping criterion, the third column uses a -fixed number of epochs which is equal to the mean number of training epochs of the +fixed number of epochs which is equal to the mean number of training epochs of the models with early stopping on the validation set accuracy. λ Single Model Accuracy Ensemble of 10 Training Set Test Set Training Set Test Set λ = 0.01 73. 83 % σ = 1.78 58.94 % σ = 1.33 87 .78 % 69. 98 % λ = 0.001 82.86 % σ = 0. 89 63.03 % σ = 0.67 91 .86 % 71. 02 % λ = 0.0001 77.96 % σ = 2. 18 64.42 % σ = 0.73 81 .44 % 67. 03 % -Table 5.19.: Different choices of +Table 5.19.: Different choices of 2 model regularization applied to the optimized model. λ min max mean std λ = 0. 01 457 503 404.6 37.2 λ = 0. 001 516 649 588.4 41.6 λ = 0. 0001 579 833 696.1 79.1 -Table 5.20.: Training time in epochs of models with early stopping on training loss by different -choices of +Table 5.20.: Training time in epochs of models with early stopping on training loss by different +choices of 2 model regularization applied to the optimized model. - 69 + 5. Experimental Evaluation -70 + 6. Conclusion and Outlook This master thesis gave an extensive overview over the design patterns of CNNs in Chapter 2, the methods how CNNs can be analyzed and the principle directions of topology learning @@ -2692,28 +2692,28 @@ of confusion matrices easier to read (see Figure 5.13), was introduced as a clas algorithm in Chapter 4 and evaluated in Sections 4.2 and 5.4. The important insights are: • Ordering the classes in the confusion matrix allows to display the relevant parts even for several hundred classes. -• A hierarchy of classifiers based on the classes does not improve the results on CIFAR- -100. There are three possible reasons for this: +• A hierarchy of classifiers based on the classes does not improve the results on CIFAR100. + There are three possible reasons for this: – 32 px × 32 px is too low dimensional – 100 classes are not enough for this approach – More classes are always easier to distinguish if each new class comes with more data. One reason why this might be the case is that distinguishing the ob ject -from background has similar properties even for different classes. +from background has similar properties even for different classes. • - Label smoothing had only a minor effect on the accuracy and no effect on the training -time when a single base classifier was used to train with the smoothed labels by an -ensemble of base classifiers. -A baseline model was defined and evaluated on eight publicly available datasets. The + Label smoothing had only a minor effect on the accuracy and no effect on the training +time when a single base classifier was used to train with the smoothed labels by an +ensemble of base classifiers. +A baseline model was defined and evaluated on eight publicly available datasets. The baselines topology and training setup are described in detail as well as its behavior during training and properties of the weights of the trained model. -The influence of various hyperparameters is examined in Sections 5.5 to 5.12 for CIFAR-100. +The influence of various hyperparameters is examined in Sections 5.5 to 5.12 for CIFAR-100. The insights of those experiments are: -• Averaging ensembles of 10 base classifiers of the same architecture and trained with the +• Averaging ensembles of 10 base classifiers of the same architecture and trained with the same setup consistently improve the accuracy. The amount of improvement depends -on the base classifiers, but the ensemble tends to improve the test accuracy by about +on the base classifiers, but the ensemble tends to improve the test accuracy by about one percentage point. • Wider networks learn in fewer epochs. This, however, does not mean that the -71 + 6. Conclusion and Outlook wall-clock time is lower due to increased computation in forward- and backward passes. @@ -2721,20 +2721,20 @@ passes. baseline model it also increases accuracy, which contradicts [CUH15]. • The lower the batch size, the longer the time for each epoch of training and the less epochs need to be trained. Higher accuracy by lower batch sizes was empirically -confirmed. The batch size, however, can also be too low. +confirmed. The batch size, however, can also be too low. • An analysis of the weights of the baseline indicated that the bias of layers close to -the output layer can be removed. This was experimentally confirmed. -• It could not be confirmed that learned color space transformation, as described -in [MSM16], improves the network. Neither with ELU nor with leaky rectified linear +the output layer can be removed. This was experimentally confirmed. +• It could not be confirmed that learned color space transformation, as described +in [MSM16], improves the network. Neither with ELU nor with leaky rectified linear unit (LReLU) and α = 0. 3. -• It could be confirmed that ELU networks gives better results than any other activation +• It could be confirmed that ELU networks gives better results than any other activation function on CIFAR-100. For the character datasets MNIST and HASYv2, however, ReLU, LReLU, PReLU, Softplus and ELU all performed similar. • Changing the activation functions to the identity had very little impact on the HASYv2 -and MNIST classifiers. Note that those networks are still able to learn nonlinear +and MNIST classifiers. Note that those networks are still able to learn nonlinear decision boundaries due to max-pooling and SAME padding. For CIFAR-100, however, the accuracy drops by 6 .64 % when ELU is replaced by the identity. -Based on the results of those experiments, an optimized classifier was developed and +Based on the results of those experiments, an optimized classifier was developed and evaluated on all eight datasets. The state of the art of STL-10 was improved from 74.80 % [ ZMGL15] to 78.66 % without using the unlabeled part of the dataset. The state of the art of HASYv2 was improved @@ -2745,34 +2745,34 @@ augmentation and test-time transformations. The removal of the bias of layers cl output and re-usage of those parameters in layers close to the input as well as using 3 × 3 pooling instead of 2 × 2 pooling improved the baseline. While writing this masters thesis, several related questions could not be answered: -• Deeper CNNs have generally higher accuracy, if trained long enough and if overfitting +• Deeper CNNs have generally higher accuracy, if trained long enough and if overfitting is not a problem. But at which subsampling-level does having more layers have the -biggest effect? Can this question be answered before a deeper network is trained? +biggest effect? Can this question be answered before a deeper network is trained? • Is label smoothing helpful for noisy labels? 1 The baseline is better than the optimized model on Asirra and on HASYv2. -72 -• How does the choice of activation functions influence residual architectures? Could the -results be the same for different activation functions in architectures with hundreds + +• How does the choice of activation functions influence residual architectures? Could the +results be the same for different activation functions in architectures with hundreds of layers? • The results for the pooling kernel were inconclusive. Larger pooling kernels might be advantageous as well as fractional max pooling [Gra15]. -• Why is the mean weight update (see Figure 5.8) not decreasing? Is this an effect that -can and should be fixed? +• Why is the mean weight update (see Figure 5.8) not decreasing? Is this an effect that +can and should be fixed? • Why is softmax so much better than the logistic function? Can the reason be used to further improve ELU? -Besides those questions, the influence of optimizers on time per epoch, epochs until +Besides those questions, the influence of optimizers on time per epoch, epochs until convergence, total training time, memory consumption, accuracy of the models and standard deviation of the models was not evaluated. This, and the stopping criterion for training might be crucial for the models quality. - 73 -74 + + A. Figures, Tables and Algorithms -(a) Original image (b) Smoothing filter (c) Laplace edge detection filter -(d) Sobel edge detection filter (e) Prewitt edge detection filter (f ) Canny filter -Figure A.1.: Examples of image filters. Best viewed in electronic form. +(a) Original image (b) Smoothing filter (c) Laplace edge detection filter +(d) Sobel edge detection filter (e) Prewitt edge detection filter (f ) Canny filter +Figure A.1.: Examples of image filters. Best viewed in electronic form. Layer 99-percentile interval -filter bias +filter bias 1 [-0.50, 0.48] [-0.06, 0.07] 3 [-0.21, 0.19] [-0.07, 0.07] 5 [-0.20, 0.17] [-0.07, 0.05] @@ -2781,9 +2781,9 @@ Layer 99-percentile interval 11 [-0.08, 0.08] [-0.00, 0.00] 13 [-0.08, 0.08] [-0.00, 0.00] 15 [-0.10, 0.11] [-0.01, 0.01] -Table A.1.: 99-percentile intervals for filter weights and bias weights by layer of a baseline model +Table A.1.: 99-percentile intervals for filter weights and bias weights by layer of a baseline model trained on CIFAR-100. - 75 + Figure A.2.: The distribution of bias weights of a model without batch normalization trained on CIFAR-100. Algorithm 1 Simulated Annealing for minimizing Equation (4.1). @@ -2799,13 +2799,13 @@ if p < 0 .5 then Swap rows i ← randomInteger (1, . . . , n ) j ← randomInteger (1, . . . , n ) \ { i } p ← randomUniform (0, 1) -C +C ← swap (C, i, j ) -s ← accuracy (C +s ← accuracy (C ) if p < exp( s −bestScore T ) then -C ← C +C ← C if s > bestScore then bestScore ← s bestC ← C @@ -2816,9 +2816,9 @@ e ← randomInteger ( s, . . . , n ) Block end i ← randomInteger (1, . . . , n − (e − s)) Block insert position Move Block (s, . . . , e) to position i return bestM -76 + Figure A.3.: Maximum weight updates between epochs by layer. The model is the baseline model, -but with layer 5 reduced to 3 filters. +but with layer 5 reduced to 3 filters. Function Single model Ensemble of 10 Epochs Training set Test set Train Test Range Mean Identity 87.92 % σ = 0.40 84 . 69 % σ = 0.08 88 .59 % 85 . 43 % 92 – 140 114.5 @@ -2831,11 +2831,11 @@ Softplus 88.42 % σ = 0. 29 85. 16 % σ = 0.15 88 .90 % 85 . 73 % 108 – 143 12 LReLU 88.61 % σ = 0.41 85 . 21 % σ = 0 . 05 89.07 % 85 . 83 % 87 – 117 104.5 PReLU 89. 62 % σ = 0.41 85 .35 % σ = 0.17 90. 10 % 86. 01 % 85 – 111 100.5 ELU 89.49 % σ = 0.42 85 .35 % σ = 0.10 89 .94 % 86 . 03 % 73 – 113 92.4 -Table A.2.: Test accuracy of adjusted baseline models trained with different activation functions on +Table A.2.: Test accuracy of adjusted baseline models trained with different activation functions on HASYv2. For LReLU, α = 0.3 was chosen. - 77 + Figure A.4.: Sum of weight updates between epochs by layer. The model is the baseline model, but -with layer 5 reduced to 3 filters. +with layer 5 reduced to 3 filters. Function Single model Ensemble of 10 Epochs Training set Test set Train Test Range Mean Identity 87 .49 % σ = 2. 50 69 .86 % σ = 1.41 89 .78 % 71 .90 % 51 – 65 53.4 @@ -2848,9 +2848,9 @@ Softplus 83.03 % σ = 2.07 68 .28 % σ = 1.74 93 . 04 % 75 .99 % 56 – 89 68.9 LReLU 93.83 % σ = 3.89 74 . 66 % σ = 2.11 97 . 56 % 78 .08 % 52 – 120 80.1 PReLU 95.53 % σ = 1.92 71 . 69 % σ = 1.37 98 .17 % 74 .69 % 59 – 101 78.8 ELU 95.42 % σ = 3.57 75 . 09 % σ = 2.39 98 .54 % 78 .66 % 66 – 72 67.2 -Table A.3.: Test accuracy of adjusted baseline models trained with different activation functions on +Table A.3.: Test accuracy of adjusted baseline models trained with different activation functions on STL-10. For LReLU, α = 0.3 was chosen. -78 + B. Hyperparameters Hyperparameters are parameters of models which are not optimized automatically (e.g., by gradient descent), but by methods like random search [ BB12], grid search [ LBOM98] or @@ -2859,7 +2859,7 @@ B.1. Preprocessing Preprocessing used to be of ma jor importance in machine learning. However, with the availability of data sets with hundreds of examples per class and the possibility of CNNs to learn features themselves, most models today rely on raw pixel values. The only common -preprocessing is size normalization. In order to get a fixed input-size for a CNN, the +preprocessing is size normalization. In order to get a fixed input-size for a CNN, the following procedure can be used: • Take one or multiple crops of the image which have the desired aspect ratio. • Scale the crop(s) to the desired size. @@ -2873,18 +2873,18 @@ Other preprocessing methods are: • Dimensionality reduction – Principal component analysis (PCA): An unsupervised linear transformation -which can be learned in the first hidden layer. It is hence doubtful if PCA +which can be learned in the first hidden layer. It is hence doubtful if PCA improves the network. – Linear discriminant analysis (LDA) • Zero Components Analysis (ZCA) whitening (used by [KH09]) - 79 + B.2. Data augmentation -Data augmentation techniques aim at making artificially more data from real data items by +Data augmentation techniques aim at making artificially more data from real data items by applying invariances. For computer vision, they include: Name Augmentation Factor Used by -Horizontal flip 2 [KSH12, WYS + +Horizontal flip 2 [KSH12, WYS + 15] -Vertical flip 2 [DWD15] 1 +Vertical flip 2 [DWD15] 1 Rotation ∼ 40 (δ = 20) [DSRB14] Scaling ∼ 14 (δ ∈ [0.7 , 1 .4]) [DSRB14] Crops 322 @@ -2914,9 +2914,9 @@ Less common, but also reasonable are: • Lens distortion (used by [WYS+ 15]) 1 - Vertical flipping combined with 180◦ - rotation is equivalent to horizontal flipping -80 + Vertical flipping combined with 180◦ + rotation is equivalent to horizontal flipping + B.3. Initialization Weight initializations are usually chosen to be small and centered around zero. One way to characterize many initialization schemes is by @@ -2925,16 +2925,16 @@ Table B.2 shows six commonly used weight initialization schemes. Several schemes same idea, that unit-variance is desired for each layer as the training converges faster [IS15]. Name α β γ Reference Constant α = 0 β = 0 γ ≥ 0 used by [ZF14] -Xavier/Glorot uniform α = +Xavier/Glorot uniform α = 6 n in + n out β = 0 γ = 0 [GB10] -Xavier/Glorot normal α = 0 β = +Xavier/Glorot normal α = 0 β = 2 (n in +n -out ) +out ) 2 γ = 0 [GB10] He α = 0 β = 2 @@ -2951,9 +2951,9 @@ unit-coadaptation. However, dropout makes it possible to use constant initializa all parameters. LSUV and Orthogonal initialization cannot be described with this simple pattern. B.4. Objective function -For classification tasks, the cross-entropy +For classification tasks, the cross-entropy E -C E ( W ) = − +C E ( W ) = − x ∈X K k =1 [ tx @@ -2965,16 +2965,16 @@ is by far the most commonly used ob jective function (e.g., used by [ZF14]). In X is the set of training examples, K is the number of classes, tx k ∈ { 0, 1 } indicates if the training example x is of class k , o x -k is the output of the classifier for the training examplex +k is the output of the classifier for the training examplex and class k . However, regularization terms weighted with a constant λ ∈ (0, +∞ ) are sometimes added: -• LASSO: +• LASSO: 1 (e.g., used in [HPTD15]) -• Weight decay: +• Weight decay: 2 (e.g., λ = 0.0005 as in [MSM16]) • Orthogonality regularization (|(W T · W − I )|, see [VTKP17]) - 81 + B.5. Optimization Techniques Most relevant optimization techniques for CNNs are based on SGD, which updates the weights according to the rule @@ -3035,10 +3035,10 @@ until the learning rate is decreased by 1 • Newbob Scheduling [new00]: Start with Performance Scheduling, then use Exponential Decay Scheduling. • Adam and AdaMax [KB14] -82 + • Nadam [Doz15] Some of those are explained in [Rud16]. -Other first-order gradient optimization methods are: +Other first-order gradient optimization methods are: • Quickprop [Fah88] • Nesterov Accellerated Momentum (NAG) [Nes83] • Conjugate Gradient method [ Cha92]: Combines a line search for the step size with @@ -3052,11 +3052,11 @@ However, there are alternatives which do not use gradient information: on [Tho14b] There are also approaches which learn the optimization algorithm [ADG+ 16, LM16]. - 83 + B.6. Network Design CNNs have the following hyperparameters: • Depth: The number of layers -• Width : The number of filters per layer +• Width : The number of filters per layer • Layer and block connectivity graph • Layer and block hyperparameters: – Activation Functions as shown in Table B.3 @@ -3114,7 +3114,7 @@ j = ex k =1 ex k [0, 1]K o (x ) - j · + j · K k =1 e x k @@ -3132,18 +3132,18 @@ x ∈x x (−∞, +∞)  i = max x 0 otherwise [GWFM + 13] -Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0 +Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0 and functions marked with ‡ operate on all elements of a layer simultaneously. The hyperparameters α ∈ (0 , 1) of Leaky ReLU and ELU are typically α = 0 .01 . Other activation function like randomized leaky ReLUs exist [ XWCL15 ], but are far less commonly used. Some functions are smoothed versions of others, like the logistic function for the Heaviside step function, tanh for the sign function, softplus for ReLU. -Softmax is the standard activation function for the last layer of a classification network +Softmax is the standard activation function for the last layer of a classification network as it produces a probability distribution. See Figure B.1 for a plot of some of them. 2 α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function. -84 + −2. 0 −1 .5 −1 .0 −0. 5 0.5 1 .0 1. 5 2 .0 −1. 0−0. 50. 51. 01 .52 .0 xy @@ -3164,10 +3164,10 @@ Figure B.1.: Activation functions plotted in [ − 2 , +2] . tanh and ELU are ab numbers. The image of ELU, ReLU and Softplus is not bound on the positive side, whereas tanh and the logistic function are always below 1. B.7. Regularization -Regularization techniques aim to make the fitted function smoother and reduce overfitting. +Regularization techniques aim to make the fitted function smoother and reduce overfitting. Regularization techniques are: -• - 1 , +• + 1 , 2 , and Orthogonality regularization: See Appendix B.4 • Max-norm regularization (e.g. used ins [SHK+ 14]) @@ -3184,14 +3184,14 @@ Regularization techniques are: • Dense-Sparse-Dense training (see [HPN+ 16]) • Soft targets (see [HVD15]) - 85 -86 + + C. Calculating Network Characteristics C.1. Parameter Numbers • A fully connected layer with n nodes, k inputs has n · (k + 1) parameters. The +1 is due to the bias. • A convolutional layer i with k -i filters of size n × m being applied to k +i filters of size n × m being applied to k i −1 feature maps has k i · k @@ -3202,10 +3202,10 @@ i · k n · ( k · m 1 · m 2 + 1) parameters. -• A dense block with a depth of L, a growth rate of n and 3 × 3 filters has L + n · 32 +• A dense block with a depth of L, a growth rate of n and 3 × 3 filters has L + n · 32 + 32 - · n 2 + · n 2 L i =0 (L − i) = L + 9 n + 9 n 2 L2 −L @@ -3228,11 +3228,11 @@ additions / multiplications before the non-linearityϕ is calculated. The total of FLOPs is 2 · n · k + n · n ϕ . • In the following, biases are ignored. A convolutional layer withk -i filters of size n × m +i filters of size n × m being applied to k -i − 1 filter maps of size w × h results in k -i filter maps of size w × h if -padding is applied. For each element of each filter map,n · m · k +i − 1 filter maps of size w × h results in k +i filter maps of size w × h if +padding is applied. For each element of each filter map,n · m · k i − 1 multiplications and (n · m · k i − 1 − 1) additions have to be made. This results in(2nmk @@ -3245,7 +3245,7 @@ i · w · h · n ϕ . This is, of course, a naive way of calculating a convolution. There are other ways of calculating convolutions [LG16]. - 87 + • A fully connected layer with n nodes after k feature maps of size w × h needs 2n (k · w · h ) FLOPs. The total number of FLOPs is 2 n · (k · w · h) + n · n ϕ . @@ -3253,7 +3253,7 @@ FLOPs. The total number of FLOPs is 2 n · (k · w · h) + n · n • The number of FLOPs for max pooling is dominated by the number of positions to which the pooling kernel is applied. For a feature map of size w × h a max pooling -filter with stride s gets applied w · h +filter with stride s gets applied w · h s 2 . The number of FLOPs per application depends on the kernel size. A 2 × 2 kernel is assumed to need 5 FLOPs. • @@ -3269,35 +3269,35 @@ Here are some references which give information for the FLOPs: – 15484M in total [HPTD15]. – 31000M in total in Table D.3. • GoogleNet: 1566M in total [HPTD15]. -One can see that the numbers are by a factor of 2 up to a factor of 4 different for the same +One can see that the numbers are by a factor of 2 up to a factor of 4 different for the same network. C.3. Memory Footprint The memory footprint of CNNs determines when networks can be used at all and if they -can be trained efficiently. In order to be able to train CNNs efficiently, one weight update -step has to fit in the memory of the GPU. This includes the following: +can be trained efficiently. In order to be able to train CNNs efficiently, one weight update +step has to fit in the memory of the GPU. This includes the following: • Activations: All activations of one mini-batch in order to calculate the gradients -in the backward pass. This is the number of floats in the feature maps of all weight +in the backward pass. This is the number of floats in the feature maps of all weight layers combined. • Weights • Optimization algorithm : The optimization algorithm introduces some overhead. For example, Adam stores two parameters per weights. -At inference time, every two consecutive layers have to fit into memory. When the forward +At inference time, every two consecutive layers have to fit into memory. When the forward pass of layer A to layer B is calculated, the memory can be freed if no skip connections are used. -88 + D. Common Architectures -In the following, some of the most important CNN architectures are explained. Understand- -ing the development of these architectures helps understanding critical insights the machine +In the following, some of the most important CNN architectures are explained. Understanding + the development of these architectures helps understanding critical insights the machine learning community got in the past years for convolutional networks for image recognition. It starts with LeNet-5 from 1998, continues with AlexNet from 2012, VGG-16 D from 2014, the Inception modules v1 to v3 as well as ResNets in 2015. The recently developed Inception-v4 is also covered. -The summation row gives the sum of all floats for the output size column. This allows +The summation row gives the sum of all floats for the output size column. This allows conclusions about the maximum mini-batch size which can be in memory for training. - 89 + D.1. LeNet-5 -One of the first CNNs used was LeNet-5 [ LBBH98 ]. LeNet-5 uses two times the common +One of the first CNNs used was LeNet-5 [ LBBH98 ]. LeNet-5 uses two times the common pattern of a single convolutional layer withtanh as a non-linear activation function followed by a pooling layer and three fully connected layers. One fully connected layer is used to get the right output dimension, another one is necessary to allow the network to learn a @@ -3321,14 +3321,14 @@ Table D.1.: LeNet-5 architecture: After layers 1, 3, 5 and 6 the tanh activation After layer 7, the softmax function is applied. One can see that convolutional layer need much fewer parameters, but an order of magnitude more FLOPs per parameter than fully connected layers. -90 + D.2. AlexNet -The first CNN which achieved ma jor improvements on the ImageNet dataset was AlexNet [KSH12]. +The first CNN which achieved ma jor improvements on the ImageNet dataset was AlexNet [KSH12]. Its architecture is shown in Figure D.2 and described in Table D.2. It has about60· 106 - param- -eters. A trained AlexNet can be downloaded at www.cs.toronto.edu/˜guerzhoy/tf_alexnet. -Note that the uncompressed size is at least 60 965 224 floats · 32 bit -float ≈ 244 MB. + parameters. + A trained AlexNet can be downloaded at www.cs.toronto.edu/˜guerzhoy/tf_alexnet. +Note that the uncompressed size is at least 60 965 224 floats · 32 bit +float ≈ 244 MB. Figure D.2.: Architecture of AlexNet as shown in [ KSH12]: Convolutional Layers are followed by pooling layers multiple times. At the end, a fully connected network is applied. Conceptually, it is identical to the architecture of LeNet-5 (see Figure D.1). @@ -3358,17 +3358,17 @@ The FLOPs are taken from [ HPTD15 ] and combined with rough estimates for Local Contrast Normalization and max pooling. The calculated number of parameters was checked against the downloaded version. It also has 60 965 224 parameters. - 91 + D.3. VGG-16 D Another widespread architecture is the VGG-16 (D) [ SZ14]. VGG comes from the V isual Geometry Group in Oxford which developed this architecture. It has 16 layers which can -learn parameters. A ma jor difference compared to AlexNet is that VGG-16 uses only3 × 3 -filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a +learn parameters. A ma jor difference compared to AlexNet is that VGG-16 uses only3 × 3 +filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a detailed textual description is given in Table D.3. -A trained VGG-16 D for Tensorflow can be downloaded athttps://github.com/machrisaa/ -tensorflow- vgg . Note that the uncompressed size is at least 138 357 544 floats · 32 bit -float ≈ -520 MB. The downloaded Numpy binary file npz needs 553 MB without compression and +A trained VGG-16 D for Tensorflow can be downloaded athttps://github.com/machrisaa/ +tensorflow- vgg . Note that the uncompressed size is at least 138 357 544 floats · 32 bit +float ≈ +520 MB. The downloaded Numpy binary file npz needs 553 MB without compression and 514 MB with compression.224 × 224Input C 64@3 × 3 /1 C 64@3 × 3 /1 112 × 112max pooling 2 × 2 /1 @@ -3388,9 +3388,9 @@ Dropout, p = 0.5 Fully Connected 4096 Dropout, p = 0.5 Fully Connected 1000 -Figure D.3.: Architecture of VGG-16 D. C 512@3 × 3/1 is a convolutional layer with 512 filters of +Figure D.3.: Architecture of VGG-16 D. C 512@3 × 3/1 is a convolutional layer with 512 filters of kernel size 3 × 3 with stride 1. All convolutional layers use SAME padding. -92 + # Type Filters @ Patch size / stride Parameters FLOPs Output size Input 3 @ 224 × 224 @@ -3426,34 +3426,34 @@ a preprocessing step ( −103 . 939 , − 116 .779 , − 123 .68 ). As Dropout i during training time, the number of FLOPs is 0. The dropout probability is 0. 5. The calculated number of parameters was checked against the downloaded version. It also has 138 357 544 parameters. - 93 + D.4. GoogleNet, Inception v2 and v3 The large number of parameters and operations is a problem when such models should get applied in practice to thousands of images. In order to reduce the computational cost while -maintaining the classification quality, GoogleNet [SLJ+ +maintaining the classification quality, GoogleNet [SLJ+ 15] and the Inception module were -developed. The Inception module essentially only computes 1 × 1 filters, 3 × 3 filters and -5 × 5 filters in parallel, but applied bottleneck 1 × 1 filters before to reduce the number of +developed. The Inception module essentially only computes 1 × 1 filters, 3 × 3 filters and +5 × 5 filters in parallel, but applied bottleneck 1 × 1 filters before to reduce the number of parameters. It is shown in Figure D.4. Figure D.4.: Inception module Image source: [SLJ+ 15] Compared to GoogleNet, Inception v2 [ SVI+ - 15] removed the 5 × 5 filters and replaced -them by two successive layers of 3 × 3 filters. A visualization of an Inception v2 module -is given in Figure D.5. Additionally, Inception v2 applies successive asymmetric filters to -approximate symmetric filters with fewer parameters. The authors call this approachfilter + 15] removed the 5 × 5 filters and replaced +them by two successive layers of 3 × 3 filters. A visualization of an Inception v2 module +is given in Figure D.5. Additionally, Inception v2 applies successive asymmetric filters to +approximate symmetric filters with fewer parameters. The authors call this approachfilter factorization. Inception v3 introduced Batch Normalization to the network [SVI+ 15]. Figure D.5.: Inception v2 module Image source: [SVI+ 15] -94 + D.5. Inception-v4 Inception-v4 as described in [ SIV16] consists of four main building blocks: The stem, Inception A, Inception B and Inception C. To quote the authors: Inception-v4 is a deeper, -wider and more uniform simplified architecture than Inception-v3. The stem, Reduction A +wider and more uniform simplified architecture than Inception-v3. The stem, Reduction A and Reduction B use max-pooling, whereas Inception A, Inception B and Inception C use average pooling. The stem, module B and module C use separable convolutions. # × Type Parameters Output size @@ -3471,12 +3471,12 @@ Dropout (p=0.8) 0 1536 @ 1 × 1 42 679 816 Table D.4.: Inception-v4 network. - 95 -96 + + E. Datasets -Well-known benchmark datasets for classification problems in computer vision are listed +Well-known benchmark datasets for classification problems in computer vision are listed in Table E.1. The best results known to me are given in Table E.2. However, every semantic -segmentation dataset (e.g., PASCAL VOC) can also be used to benchmark image classifiers +segmentation dataset (e.g., PASCAL VOC) can also be used to benchmark image classifiers using Algorithm 2. Database Image Resolution (width × height) Number @@ -3511,7 +3511,7 @@ Asirra3 (4 px − 500 px) ×(4 px − 500 px) 25 000 2 3 [Asi17, EDHS07] Graz-02 480 px × 640 px and 640 px × 480 px 1096 3 3 [Mar08, MS07] -Table E.1.: An overview over publicly available image databases for classification. The number +Table E.1.: An overview over publicly available image databases for classification. The number of images row gives the sum of the training and the test images. Some datasets, like SVHN, have additional unlabeled data which is not given in this table. 1 @@ -3520,7 +3520,7 @@ SVHN, have additional unlabeled data which is not given in this table. The dimensions are only calculated for the validation set. 3 Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs” competition on Kaggle -97 + Dataset Model type / name Result Score Achieved / Claimed by MNIST — 0 .21 % error [WZZ+ @@ -3537,7 +3537,7 @@ GTSRB MCDNN 99.46 % accuracy [SL11] Asirra SVM 82. 7 % accuracy [Gol08] Graz-02 Optimal NBNN 78.98 % accuracy [BMDP10] Table E.2.: An overview over state of the art results achieved in computer vision datasets. -Algorithm 2 Create a classification dataset from a semantic segmentation dataset +Algorithm 2 Create a classification dataset from a semantic segmentation dataset Require: Semantic segmentation dataset (D S ) procedure CreateDataset (Annotated dataset D @@ -3560,15 +3560,15 @@ I , c L )) return (D C ) -98 + F. List of Tables 2.1 Pooling types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39 5.2 Baseline model evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . . 40 5.3 Baseline model speed comparison . . . . . . . . . . . . . . . . . . . . . . . . 40 5.4 Clustering errors for spectral clustering and CMO on CIFAR-100 . . . . . . 52 -5.5 Differences in spectral clustering and CMO. . . . . . . . . . . . . . . . . . . 52 -5.6 Accuracies for hierarchy of classifiers on CIFAR-100 . . . . . . . . . . . . . . 53 +5.5 Differences in spectral clustering and CMO. . . . . . . . . . . . . . . . . . . 52 +5.6 Accuracies for hierarchy of classifiers on CIFAR-100 . . . . . . . . . . . . . . 53 5.7 Parameters of models with increased capacity . . . . . . . . . . . . . . . . . 54 5.8 Training time for models with increased capacity . . . . . . . . . . . . . . . 54 5.9 Baseline model training time . . . . . . . . . . . . . . . . . . . . . . . . . . 59 @@ -3583,7 +3583,7 @@ F. List of Tables 5.18 Optimized model trained with early stopping vs training with more data . . 69 5.19 Model regularization with early stopping on training loss . . . . . . . . . . . 69 5.20 Model regularization with early stopping on training loss - Training time . . 69 -A.1 99-percentile intervals for filter weights on CIFAR-100 . . . . . . . . . . . . 75 +A.1 99-percentile intervals for filter weights on CIFAR-100 . . . . . . . . . . . . 75 A.2 Activation function evaluation results on HASYv2 . . . . . . . . . . . . . . . 77 A.3 Activation function evaluation results on STL-10 . . . . . . . . . . . . . . . 78 B.1 Data augmentation techniques . . . . . . . . . . . . . . . . . . . . . . . . . . 80 @@ -3593,12 +3593,12 @@ D.1 LeNet-5 architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . D.2 AlexNet architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 D.3 VGG-16 D architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 93 D.4 Inception-v4 network . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 95 -99 + E.1 Image Benchmark datasets . . . . . . . . . . . . . . . . . . . . . . . . . . . . 97 E.2 State of the Art results . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 98 -100 + G. List of Figures -2.1 Application of a single image filter (Convolution) . . . . . . . . . . . . . . . 3 +2.1 Application of a single image filter (Convolution) . . . . . . . . . . . . . . . 3 2.2 Application of a convolutional layer . . . . . . . . . . . . . . . . . . . . . . . 6 2.3 Max pooling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 2.4 ResNet module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 @@ -3612,11 +3612,11 @@ G. List of Figures 3.1 Cascade-correlation network . . . . . . . . . . . . . . . . . . . . . . . . . . . 28 4.1 Class Tree . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 33 5.1 Baseline architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 39 -5.2 Baseline model filter weight distribution . . . . . . . . . . . . . . . . . . . . 42 +5.2 Baseline model filter weight distribution . . . . . . . . . . . . . . . . . . . . 42 5.3 Baseline model bias weight distribution . . . . . . . . . . . . . . . . . . . . . 42 5.4 Baseline model γ distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43 5.5 Baseline model β distribution . . . . . . . . . . . . . . . . . . . . . . . . . . 43 -5.6 Baseline model filter weight range distribution . . . . . . . . . . . . . . . . . 44 +5.6 Baseline model filter weight range distribution . . . . . . . . . . . . . . . . . 44 5.7 Baseline model CIFAR-100 validation accuracy . . . . . . . . . . . . . . . . 45 5.8 Baseline Weight updates (mean) . . . . . . . . . . . . . . . . . . . . . . . . 46 5.9 Baseline Weight updates (maximum) . . . . . . . . . . . . . . . . . . . . . . 47 @@ -3629,7 +3629,7 @@ G. List of Figures 5.16 Optimized architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 67 A.1 Image Filters . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 75 A.2 Bias weight distribution without BN . . . . . . . . . . . . . . . . . . . . . . 76 -101 + A.3 Maximum weight updates of baseline with bottleneck . . . . . . . . . . . . . 77 A.4 Sum of weight updates of baseline with bottleneck . . . . . . . . . . . . . . 78 B.1 Activation functions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 85 @@ -3638,10 +3638,10 @@ D.2 AlexNet architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . D.3 VGG-16 D architecture . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 92 D.4 Inception module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94 D.5 Inception v2 module . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94 -102 + H. Bibliography [AAB+ - 16] M. Abadi, A. Agarwal et al. , “Tensorflow: Large-scale machine learning on + 16] M. Abadi, A. Agarwal et al. , “Tensorflow: Large-scale machine learning on heterogeneous distributed systems,” arXiv preprint arXiv:1603.04467 , Mar. 2016. [Online]. Available: https://arxiv . org/abs/1603. 04467 [ABKS99] M. Ankerst, M. M. Breunig et al., “ OPTICS: Ordering points to identify the @@ -3651,12 +3651,12 @@ clustering structure,” in ACM Sigmod record, vol. 28, no. 2. ACM, 1999, pp. 16] M. Andrychowicz, M. Denil et al., “Learning to learn by gradient descent by gradient descent,” in Advances in Neural Information Processing Systems 29 (NIPS) , D. D. Lee, M. Sugiyama et al. , Eds. Curran Associates, Inc., Mar. -2016, pp. 3981–3989. [Online]. Available: http://papers .nips .cc/paper/6461- -learning-to- learn-by-gradient- descent- by- gradient-descent.pdf +2016, pp. 3981–3989. [Online]. Available: http://papers .nips .cc/paper/6461learning-to- + learn-by-gradient- descent- by- gradient-descent.pdf [AM15] M. T. Alexander Mordvintsev, Christopher Olah, “Inceptionism: -Going deeper into neural networks,” Jun. 2015. [Online]. Avail- -able: https://research . googleblog . com/2015/06/inceptionism-going-deeper- -into- neural.html +Going deeper into neural networks,” Jun. 2015. [Online]. Available: + https://research . googleblog . com/2015/06/inceptionism-going-deeperinto- + neural.html [Asi17] “Kaggle cats and dogs dataset,” Oct. 2017. [Online]. Available: https: //www .microsoft.com/en-us/download/details .aspx?id=54765 [BB12] J. Bergstra and Y. Bengio, “Random search for hyper-parameter optimization,” @@ -3669,13 +3669,13 @@ bergstra12a/bergstra12a .pdf asymmetric training,” arXiv preprint arXiv:1703.10155, Mar. 2017. [Online]. Available: https://arxiv .org/abs/1703.10155 [BDLB09] - J. Bergstra, G. Desjardins et al. , “Quadratic polynomials learn better im- -age features,” Département d’Informatique et de Recherche Opérationnelle, + J. Bergstra, G. Desjardins et al. , “Quadratic polynomials learn better image + features,” Département d’Informatique et de Recherche Opérationnelle, Université de Montréal, Tech. Rep. 1337, 2009. [BGNR16] B. Baker, O. Gupta et al. , “Designing neural network architectures using reinforcement learning,” arXiv preprint arXiv:1611.02167, Nov. 2016. [Online]. Available: https://arxiv .org/abs/1611.02167 - 103 + [BM93] U. Bodenhausen and S. Manke, Automatical ly Structured Neural Networks For Handwritten Character And Word Recognition . London: Springer London, Sep. 1993, pp. 956–961. [Online]. Available: http: @@ -3689,10 +3689,10 @@ feature pooling in visual recognition,” in International Conference on Machine Learning (ICML) , no. 27, 2010, pp. 111–118. [Online]. Available: http://yann .lecun.com/exdb/publis/pdf/boureau- icml-10 .pdf [BSF94] Y. Bengio, P. Simard, and P. Frasconi, “Learning long-term dependencies -with gradient descent is difficult,” IEEE transactions on neural networks , +with gradient descent is difficult,” IEEE transactions on neural networks , vol. 5, no. 2, pp. 157–166, 1994. -[Cha92] C. Charalambous, “Conjugate gradient algorithm for efficient training -of artificial neural networks,” IEEE Proceedings G-Circuits, Devices +[Cha92] C. Charalambous, “Conjugate gradient algorithm for efficient training +of artificial neural networks,” IEEE Proceedings G-Circuits, Devices and Systems , vol. 139, no. 3, pp. 301–310, 1992. [Online]. Available: http://ieeexplore. ieee.org/document/143326/ [Cho15]F. Chollet, “Keras,” https://github.com/fchollet/keras, 2015. @@ -3703,7 +3703,7 @@ coatesleeng_aistats_2011.pdf [CLN11] A. Coates, H. Lee, and A. Y. Ng, “ STL-10 dataset,” 2011. [Online]. Available: http://cs .stanford.edu/~acoates/stl10 [CMS12] D. Ciregan, U. Meier, and J. Schmidhuber, “Multi-column deep neural -networks for image classification,” in Conference on Computer Vision and +networks for image classification,” in Conference on Computer Vision and Pattern Recognition (CVPR) . IEEE, Feb. 2012, pp. 3642–3649. [Online]. Available: https://arxiv .org/abs/1202. 2745v1 [CUH15] D.-A. Clevert, T. Unterthiner, and S. Hochreiter, “Fast and accurate @@ -3711,17 +3711,17 @@ deep network learning by exponential linear units (ELUs),” arXiv preprint arXiv:1511.07289 , Nov. 2015. [Online]. Available: https: //arxiv .org/abs/1511. 07289 [CWV+ - 14] S. Chetlur, C. Woolley et al. , “ cuDNN: Efficient primitives for deep + 14] S. Chetlur, C. Woolley et al. , “ cuDNN: Efficient primitives for deep learning,” arXiv preprint arXiv:1410.0759 , Oct. 2014. [Online]. Available: https://arxiv .org/abs/1410.0759 -104 + [DBB + 01] C. Dugas, Y. Bengio et al. , “Incorporating second-order functional -knowledge for better option pricing,” in Advances in Neural Infor- -mation Processing Systems 13 (NIPS) , T. K. Leen, T. G. Dietterich, +knowledge for better option pricing,” in Advances in Neural Information + Processing Systems 13 (NIPS) , T. K. Leen, T. G. Dietterich, and V. Tresp, Eds. MIT Press, 2001, pp. 472–478. [Online]. -Available: http://papers .nips .cc/paper/1920-incorporating-second-order- -functional-knowledge- for-better-option- pricing .pdf +Available: http://papers .nips .cc/paper/1920-incorporating-second-orderfunctional-knowledge- + for-better-option- pricing .pdf [DDFK16] S. Dieleman, J. De Fauw, and K. Kavukcuoglu, “Exploiting cyclic symmetry in convolutional neural networks,” arXiv preprint arXiv:1602.02660 , Feb. 2016. [Online]. Available: https://arxiv . org/abs/1602. 02660 @@ -3745,26 +3745,26 @@ pro j2015/054_report .pdf feature learning with convolutional neural networks,” in Advances in Neural Information Processing Systems 27 (NIPS) , Z. Ghahramani, M. Welling et al. , Eds. Curran Associates, Inc., 2014, pp. 766–774. [Online]. -Available: http://papers . nips . cc/paper/5548-discriminative-unsupervised- -feature-learning- with- convolutional- neural-networks.pdf +Available: http://papers . nips . cc/paper/5548-discriminative-unsupervisedfeature-learning- + with- convolutional- neural-networks.pdf [DWD15] S. Dieleman, K. W. Willett, and J. Dambre, “Rotation-invariant convolutional neural networks for galaxy morphology prediction,” Monthly notices of the royal astronomical society , vol. 450, no. 2, pp. 1441–1459, 2015. [EDHS07] J. Elson, J. J. Douceur et al. , “Asirra: A CAPTCHA that -exploits interest-aligned manual image categorization,” in ACM Con- -ference on Computer and Communications Security (CCS) , no. 14. +exploits interest-aligned manual image categorization,” in ACM Conference + on Computer and Communications Security (CCS) , no. 14. Association for Computing Machinery, Inc., Oct. 2007. [Online]. -105 -Available: https://www .microsoft .com/en-us/research/publication/asirra-a- -captcha- that-exploits- interest-aligned- manual-image- categorization/ + +Available: https://www .microsoft .com/en-us/research/publication/asirra-acaptcha- + that-exploits- interest-aligned- manual-image- categorization/ [EKS+ 96] M. Ester, H.-P. Kriegel et al. , “A density-based algorithm for discovering clusters in large spatial databases with noise.” in Kdd , vol. 96, no. 34, 1996, pp. 226–231. [ES03] A. E. Eiben and J. E. Smith, Introduction to evolutionary computing . -Springer, 2003, vol. 53. [Online]. Available: https://dx. doi. org/10. 1007/978- 3- -662- 44874- 8 +Springer, 2003, vol. 53. [Online]. Available: https://dx. doi. org/10. 1007/978- 3662- + 44874- 8 [Fah88] S. E. Fahlman, “An empirical study of learning speed in back-propagation networks,” 1988. [Online]. Available: http://repository .cmu .edu/cgi/ viewcontent. cgi?article=2799&context=compsci @@ -3774,30 +3774,30 @@ vol. 28, no. 4, pp. 594–611, Apr. 2006. [Online]. Available: http: //vision.stanford. edu/documents/Fei-FeiFergusPerona2006.pdf [FFP03] R. F. Fei-Fei and P. Perona, “Caltech 101,” 2003. [Online]. Available: http: //www . vision.caltech .edu/Image_Datasets/Caltech101/Caltech101.html -[FGMR10] P. F. Felzenszwalb, R. B. Girshick et al. , “Ob ject detection with discrimina- -tively trained part-based models,” IEEE transactions on pattern analysis and +[FGMR10] P. F. Felzenszwalb, R. B. Girshick et al. , “Ob ject detection with discriminatively + trained part-based models,” IEEE transactions on pattern analysis and machine intel ligence, vol. 32, no. 9, pp. 1627–1645, 2010. [FL89] S. E. Fahlman and C. Lebiere, “The cascade-correlation learning architecture,” 1989. [Online]. Available: http://repository . cmu .edu/compsci/1938/ -[GB10] X. Glorot and Y. Bengio, “Understanding the difficulty of training deep +[GB10] X. Glorot and Y. Bengio, “Understanding the difficulty of training deep feedforward neural networks.” in Aistats , vol. 9, 2010, pp. 249–256. [Online]. Available: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf -[GBB11] X. Glorot, A. Bordes, and Y. Bengio, “Deep sparse rectifier neural +[GBB11] X. Glorot, A. Bordes, and Y. Bengio, “Deep sparse rectifier neural networks.” in Aistats , vol. 15, no. 106, 2011, p. 275. [Online]. Available: http://www .jmlr.org/proceedings/papers/v15/glorot11a/glorot11a. pdf [GDDM14] R. Girshick, J. Donahue et al. , “Rich feature hierarchies for accurate ob ject detection and semantic segmentation,” in Conference on Computer Vision and Pattern Recognition (CVPR) . IEEE, 2014, pp. 580–587. [Online]. Available: https://arxiv .org/abs/1311. 2524 -[GG07] P. P. Greg Griffin, Alex Holub, “Caltech-256 ob ject category dataset,” Apr. +[GG07] P. P. Greg Griffin, Alex Holub, “Caltech-256 ob ject category dataset,” Apr. 2007. [Online]. Available: http://authors .library .caltech . edu/7694/ -106 + [GG16] Y. Gal and Z. Ghahramani, “Bayesian convolutional neural networks with Bernoulli approximate variational inference,”arXiv preprint arXiv:1506.02158, Jan. 2016. [Online]. Available: https://arxiv . org/abs/1506. 02158v6 [GJ02] M. R. Garey and D. S. Johnson, Computers and intractability. wh freeman New York, 2002, vol. 29. -[GJS76] M. R. Garey, D. S. Johnson, and L. Stockmeyer, “Some simplified NP-complete +[GJS76] M. R. Garey, D. S. Johnson, and L. Stockmeyer, “Some simplified NP-complete graph problems,” Theoretical computer science , vol. 1, no. 3, pp. 237–267, 1976. [Gol08] P. Golle, “Machine learning attacks against the Asirra CAPTCHA,” inACM @@ -3806,7 +3806,7 @@ conference on Computer and communications security (CCS), no. 15. ACM, [Gra15] B. Graham, “Fractional max-pooling,” arXiv preprint arXiv:1412.6071, May 2015. [Online]. Available: https://arxiv . org/abs/1412. 6071 -[Gri06] A. P. Griffin, G. Holub, “Caltech 256,” 2006. [Online]. Available: +[Gri06] A. P. Griffin, G. Holub, “Caltech 256,” 2006. [Online]. Available: http://www . vision.caltech . edu/Image_Datasets/Caltech256/ [GWFM+ 13] I. J. Goodfellow, D. Warde-Farley et al. , “Maxout networks.” ICML , @@ -3827,21 +3827,21 @@ https://arxiv . org/abs/1608. 06993v1 preprint arXiv:1611.04231 , Nov. 2016. [Online]. Available: https: //arxiv .org/abs/1611.04231 [How13] A. G. Howard, “Some improvements on deep convolutional neural network -based image classification,” arXiv preprint arXiv:1312.5402 , Dec. 2013. +based image classification,” arXiv preprint arXiv:1312.5402 , Dec. 2013. [Online]. Available: https://arxiv . org/abs/1312.5402 - 107 + [HPK11] J. Han, J. Pei, and M. Kamber, Data mining: concepts and techniques . Elsevier, 2011. [HPN+ 16] S. Han, J. Pool et al. , “ DSD: Regularizing deep neural networks with -dense-sparse-dense training flow,” arXiv preprint arXiv:1607.04381, Jul. 2016. +dense-sparse-dense training flow,” arXiv preprint arXiv:1607.04381, Jul. 2016. [Online]. Available: https://arxiv .org/abs/1607.04381 -[HPTD15] S. Han, J. Pool et al. , “Learning both weights and connections for efficient +[HPTD15] S. Han, J. Pool et al. , “Learning both weights and connections for efficient neural network,” in Advances in Neural Information Processing Systems 28 (NIPS), C. Cortes, N. D. Lawrence et al., Eds. Curran Associates, Inc., Jun. -2015, pp. 1135–1143. [Online]. Available: http://papers .nips .cc/paper/5784- -learning-both-weights- and- connections-for- efficient- neural- network.pdf +2015, pp. 1135–1143. [Online]. Available: http://papers .nips .cc/paper/5784learning-both-weights- + and- connections-for- efficient- neural- network.pdf [HSK+ 12] G. E. Hinton, N. Srivastava et al., “Improving neural networks by preventing co-adaptation of feature detectors,” arXiv preprint arXiv:1207.0580 , Jul. @@ -3850,7 +3850,7 @@ co-adaptation of feature detectors,” arXiv preprint arXiv:1207.0580 , Jul. 16] G. Huang, Y. Sun et al. , “Deep networks with stochastic depth,” arXiv preprint arXiv:1603.09382 , Mar. 2016. [Online]. Available: https: //arxiv .org/abs/1603. 09382 -[HSW93] B. Hassibi, D. G. Stork, and G. J. Wolff, “Optimal brain surgeon +[HSW93] B. Hassibi, D. G. Stork, and G. J. Wolff, “Optimal brain surgeon and general network pruning,” in International Conference on Neural Networks . IEEE, 1993, pp. 293–299. [Online]. Available: http: //ee. caltech .edu/Babak/pubs/conferences/00298572.pdf @@ -3865,24 +3865,24 @@ https://arxiv .org/abs/1406.4729 [HZRS15a] K. He, X. Zhang et al. , “Deep residual learning for image recognition,” arXiv preprint arXiv:1512.03385 , Dec. 2015. [Online]. Available: https: //arxiv .org/abs/1512. 03385v1 -[HZRS15b] K. He, X. Zhang et al., “Delving deep into rectifiers: Surpassing human-level -performance on imagenet classification,” in International Conference on +[HZRS15b] K. He, X. Zhang et al., “Delving deep into rectifiers: Surpassing human-level +performance on imagenet classification,” in International Conference on Computer Vision (ICCV) , Feb. 2015, pp. 1026–1034. [Online]. Available: https://arxiv .org/abs/1502.01852 [Ima12] “Imagenet large scale visual recognition challenge 2012 (ILSVRC2012),” -108 + 2012. [Online]. Available: http://www .image-net .org/challenges/LSVRC/ 2012/nonpub-downloads -[IS15] S. Ioffe and C. Szegedy, “Batch normalization: Accelerating deep network +[IS15] S. Ioffe and C. Szegedy, “Batch normalization: Accelerating deep network training by reducing internal covariate shift,”arXiv preprint arXiv:1502.03167, Feb. 2015. [Online]. Available: https://arxiv . org/abs/1502.03167 [JXF+ - 16] X. Jin, C. Xu et al. , “Deep learning with s-shaped rectified linear activation -units,” in Thirtieth AAAI Conference on Artificial Intel ligence , Dec. 2016. + 16] X. Jin, C. Xu et al. , “Deep learning with s-shaped rectified linear activation +units,” in Thirtieth AAAI Conference on Artificial Intel ligence , Dec. 2016. [Online]. Available: https://arxiv . org/abs/1512.07030 [Kar11] A. Karpathy, “Lessons learned from manually classifying CIFAR-10,” Apr. -2011. [Online]. Available: http://karpathy .github .io/2011/04/27/manually- -classifying-cifar10/ +2011. [Online]. Available: http://karpathy .github .io/2011/04/27/manuallyclassifying-cifar10/ + [KB14] D. Kingma and J. Ba, “Adam: A method for stochastic optimization,” arXiv preprint arXiv:1412.6980 , Dec. 2014. [Online]. Available: https: //arxiv .org/abs/1412.6980 @@ -3908,20 +3908,20 @@ cluster analysis. John Wiley & Sons, 2009, vol. 344. and neural network approximation,” IEEE Transactions on Information Theory , vol. 48, no. 1, pp. 264–275, Jan. 2002. [Online]. Available: http://ieeexplore.ieee. org/abstract/document/971754/ - 109 -[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, “Imagenet classification + +[KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, “Imagenet classification with deep convolutional neural networks,” in Advances in Neural Information Processing Systems 25 (NIPS) , F. Pereira, C. J. C. Burges et al. , Eds. Curran Associates, Inc., 2012, pp. 1097–1105. [Online]. -Available: http://papers .nips .cc/paper/4824-imagenet-classification-with- -deep-convolutional-neural- networks .pdf +Available: http://papers .nips .cc/paper/4824-imagenet-classification-withdeep-convolutional-neural- + networks .pdf [KSlB+ 10] K. Kavukcuoglu, P. Sermanet et al. , “Learning convolutional feature hierarchies for visual recognition,” in Advances in Neural Information -Processing Systems 23 (NIPS) , J. D. Lafferty, C. K. I. Williams +Processing Systems 23 (NIPS) , J. D. Lafferty, C. K. I. Williams et al. , Eds. Curran Associates, Inc., 2010, pp. 1090–1098. [Online]. -Available: http://papers .nips . cc/paper/4133-learning- convolutional-feature- -hierarchies- for-visual- recognition .pdf +Available: http://papers .nips . cc/paper/4133-learning- convolutional-featurehierarchies- + for-visual- recognition .pdf [LAE + 16] W. Liu, D. Anguelov et al. , “ SSD: Single shot multibox detector,” in European Conference on Computer Vision (ECCV) . Springer, 2016, pp. @@ -3931,12 +3931,12 @@ en/latest/modules/layers/noise .html#lasagne .layers.DropoutLayer [LBBH98] Y. LeCun, L. Bottou et al. , “Gradient-based learning applied to document recognition,” Proceedings of the IEEE , vol. 86, no. 11, pp. 2278–2324, Nov. -1998. [Online]. Available: http://yann .lecun .com/exdb/publis/pdf/lecun- -01a.pdf +1998. [Online]. Available: http://yann .lecun .com/exdb/publis/pdf/lecun01a.pdf + [LBH15] Y. LeCun, Y. Bengio, and G. Hinton, “Deep learning,” Nature , vol. 521, no. 7553, pp. 436–444, May 2015. [Online]. Available: http://www .nature.com/nature/journal/v521/n7553/abs/nature14539 .html -[LBOM98] Y. A. LeCun, L. Bottou et al. , Efficient BackProp , ser. Lecture Notes in +[LBOM98] Y. A. LeCun, L. Bottou et al. , Efficient BackProp , ser. Lecture Notes in Computer Science. Berlin, Heidelberg: Springer Berlin Heidelberg, 1998, vol. 1524, pp. 9–50. [Online]. Available: http://dx.doi.org/10.1007/3- 540- 49430- 8 [LDS+ @@ -3949,13 +3949,13 @@ learning,” in International conference on acoustics, speech and signal processing . IEEE, 2013, pp. 8595–8598. [Online]. Available: http: //ieeexplore.ieee. org/stamp/stamp.jsp?arnumber=6639343 [LG16] A. Lavin and S. Gray, “Fast algorithms for convolutional neural networks,” in -110 + Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. 2016, pp. 4013–4021. [Online]. Available: https://arxiv .org/abs/1509.09308 [LGT16] C.-Y. Lee, P. W. Gallagher, and Z. Tu, “Generalizing pooling functions in convolutional neural networks: Mixed, gated, and tree,” in International -Conference on Artificial Intel ligence and Statistics , 2016. [Online]. Available: +Conference on Artificial Intel ligence and Statistics , 2016. [Online]. Available: https://arxiv . org/abs/1509. 08985v2 [LH16] I. Loshchilov and F. Hutter, “ SGDR: stochastic gradient descent @@ -3983,12 +3983,12 @@ Machine Learning (ICML) , 2015, pp. 2113–2122. [MH08] L. v. d. Maaten and G. Hinton, “Visualizing data using t-SNE,” Journal of Machine Learning Research , vol. 9, no. Nov, pp. 2579–2605, 2008. [MHN13] - A. L. Maas, A. Y. Hannun, and A. Y. Ng, “Rectifier nonlinearities + A. L. Maas, A. Y. Hannun, and A. Y. Ng, “Rectifier nonlinearities improve neural network acoustic models,” in Proc. ICML , vol. 30, no. 1, 2013. [Online]. Available: https://web .stanford . edu/~awni/papers/ -relu_hybrid_icml2013_final. pdf +relu_hybrid_icml2013_final. pdf [MM15] D. Mishkin and J. Matas, “All you need is a good init,” arXiv -111 + preprint arXiv:1511.06422 , Nov. 2015. [Online]. Available: https: //arxiv .org/abs/1511. 06422 [MP43] @@ -3996,7 +3996,7 @@ preprint arXiv:1511.06422 , Nov. 2015. [Online]. Available: https: nervous activity,” The bul letin of mathematical biophysics , vol. 5, no. 4, pp. 115–133, 1943. [MRM15] N. McLaughlin, J. M. D. Rincon, and P. Miller, “Data-augmentation for -reducing dataset bias in person re-identification,” inInternational Conference +reducing dataset bias in person re-identification,” inInternational Conference on Advanced Video and Signal Based Surveil lance (AVSS), no. 12, Aug. 2015, pp. 1–6. [Online]. Available: http://ieeexplore .ieee . org/abstract/document/ 7301739/ @@ -4026,7 +4026,7 @@ Talk, Dec. 2016. weight-sharing,” Neural computation , vol. 4, no. 4, pp. 473–493, 1992. [Online]. Available: https://www. cs.toronto.edu/~hinton/absps/sunspots.pdf [NH02] R. T. Ng and J. Han, “ CLARANS: A method for clustering ob jects for spatial -112 + data mining,” IEEE transactions on know ledge and data engineering, vol. 14, no. 5, pp. 1003–1016, 2002. [NWC + @@ -4034,16 +4034,16 @@ no. 5, pp. 1003–1016, 2002. Y. Netzer, T. Wang et al. , “Reading digits in natural images with unsupervised feature learning,” in NIPS workshop on deep learning and unsupervised feature learning, vol. 2011, no. 2, 2011, p. 5. [Online]. Available: -http://ufldl. stanford.edu/housenumbers/nips2011_housenumbers.pdf +http://ufldl. stanford.edu/housenumbers/nips2011_housenumbers.pdf [NWC + 11b] Y. Netzer, T. Wang et al., “The street view house numbers (SVHN) dataset,” -2011. [Online]. Available: http://ufldl. stanford.edu/housenumbers/ +2011. [Online]. Available: http://ufldl. stanford.edu/housenumbers/ [NYC16] A. Nguyen, J. Yosinski, and J. Clune, “Multifaceted feature visualization: -Uncovering the different types of features learned by each neuron in deep +Uncovering the different types of features learned by each neuron in deep neural networks,” arXiv preprint arXiv:1602.03616 , May 2016. [Online]. Available: https://arxiv .org/abs/1602.03616 [OHIL16] J. Ortigosa-Hernández, I. Inza, and J. A. Lozano, “Towards competitive -classifiers for unbalanced classification problems: A study on the performance +classifiers for unbalanced classification problems: A study on the performance scores,” arXiv preprint arXiv:1608.08984 , Aug. 2016. [Online]. Available: https://arxiv . org/abs/1608. 08984 [PMW+ @@ -4061,22 +4061,22 @@ challenge,” arXiv preprint arXiv:1409.0575, vol. 115, no. 3, pp. 211–252, Se for biomedical image segmentation,” in International Conference on Medical Image Computing and Computer-Assisted Intervention . Springer, 2015, pp. 234–241. [Online]. Available: https://arxiv .org/abs/1505. 04597 -[RLS10] S. Risi, J. Lehman, and K. O. Stanley, “Evolving the placement and den- -sity of neurons in the hyperneat substrate,” in Conference on Genetic and +[RLS10] S. Risi, J. Lehman, and K. O. Stanley, “Evolving the placement and density + of neurons in the hyperneat substrate,” in Conference on Genetic and evolutionary computation , no. 12. ACM, 2010, pp. 563–570. [RSG16] M. T. Ribeiro, S. Singh, and C. Guestrin, “"why should i trust you?": -Explaining the predictions of any classifier,” arXiv preprint arXiv:1602.04938, +Explaining the predictions of any classifier,” arXiv preprint arXiv:1602.04938, Feb. 2016. [Online]. Available: https://arxiv . org/abs/1602.04938 - 113 + [Rud16] S. Ruder, “An overview of gradient descent optimization algorithms,” arXiv preprint arXiv:1609.04747 , Sep. 2016. [Online]. Available: https: //arxiv .org/abs/1609. 04747 [SCL12] P. Sermanet, S. Chintala, and Y. LeCun, “Convolutional neural networks -applied to house numbers digit classification,” in International Conference +applied to house numbers digit classification,” in International Conference on Pattern Recognition (ICPR) , no. 21. IEEE, Apr. 2012, pp. 3288–3291. [Online]. Available: https://arxiv .org/abs/1204.3968 [SDG09] K. O. Stanley, D. B. D’Ambrosio, and J. Gauci, “A hypercube-based encoding -for evolving large-scale neural networks,” Artificial life, vol. 15, no. 2, pp. 185– +for evolving large-scale neural networks,” Artificial life, vol. 15, no. 2, pp. 185– 212, 2009. [Online]. Available: http://ieeexplore. ieee.org/document/6792316/ [SEZ+ 13] P. Sermanet, D. Eigen et al. , “Overfeat: Integrated recognition, localization @@ -4084,7 +4084,7 @@ and detection using convolutional networks,” arXiv preprint arXiv:1312.6229, Feb. 2013. [Online]. Available: https://arxiv .org/abs/1312.6229v4 [SHK+ 14] N. Srivastava, G. E. Hinton et al. , “Dropout: a simple way to -prevent neural networks from overfitting.” Journal of Machine Learning +prevent neural networks from overfitting.” Journal of Machine Learning Research , vol. 15, no. 1, pp. 1929–1958, 2014. [Online]. Available: https://www .cs. toronto .edu/~hinton/absps/JMLRdropout .pdf [SHY+ @@ -4092,14 +4092,14 @@ https://www .cs. toronto .edu/~hinton/absps/JMLRdropout .pdf neural networks for speech recognition,” in International Conference on Acoustics, Speech and Signal Processing. IEEE, 2013, pp. 6724–6728. [Online]. Available: http://ieeexplore. ieee.org/document/6638963/?arnumber=6638963 -[SIV16] C. Szegedy, S. Ioffe, and V. Vanhoucke, “Inception-v4, inception-resnet and the +[SIV16] C. Szegedy, S. Ioffe, and V. Vanhoucke, “Inception-v4, inception-resnet and the impact of residual connections on learning,” arXiv preprint arXiv:1602.07261, Feb. 2016. [Online]. Available: https://arxiv .org/abs/1602.07261 -[SKP15] F. Schroff, D. Kalenichenko, and J. Philbin, “Facenet: A unified embedding +[SKP15] F. Schroff, D. Kalenichenko, and J. Philbin, “Facenet: A unified embedding for face recognition and clustering,” in Conference on Computer Vision and Pattern Recognition (CVPR) . IEEE, Mar. 2015, pp. 815–823. [Online]. Available: https://arxiv .org/abs/1503. 03832 -[SL11] P. Sermanet and Y. LeCun, “Traffic sign recognition with multi-scale +[SL11] P. Sermanet and Y. LeCun, “Traffic sign recognition with multi-scale convolutional networks,” in International Joint Conference on Neural Networks (IJCNN) , Jul. 2011, pp. 2809–2813. [Online]. Available: http://ieeexplore. ieee.org/document/6033589/ @@ -4109,7 +4109,7 @@ on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. 2015, pp. 1–9. [Online]. Available: https://arxiv .org/abs/1409. 4842 [SM02] K. O. Stanley and R. Miikkulainen, “Evolving neural networks through -114 + augmenting topologies,” Evolutionary computation, vol. 10, no. 2, pp. 99–127, 2002. [Online]. Available: http://www.mitpressjournals.org/doi/abs/10.1162/ 106365602320169811 @@ -4120,11 +4120,11 @@ arXiv preprint arXiv:1312.6120 , Dec. 2013. [Online]. Available: https: [SMGS14] R. K. Srivastava, J. Masci et al. , “Understanding locally competitive networks,” arXiv preprint arXiv:1410.1165 , Oct. 2014. [Online]. Available: https://arxiv . org/abs/1410. 1165 -[SSSI] J. Stallkamp, M. Schlipsing et al. , “The german traffic sign recognition +[SSSI] J. Stallkamp, M. Schlipsing et al. , “The german traffic sign recognition benchmark.” [Online]. Available: http://benchmark .ini . rub .de/?section= gtsrb&subsection=news [SSSI12] J. Stallkamp, M. Schlipsing et al. , “Man vs. computer: Benchmarking -machine learning algorithms for traffic sign recognition,” Neural Networks , +machine learning algorithms for traffic sign recognition,” Neural Networks , no. 0, pp. –, 2012. [Online]. Available: http://www.sciencedirect.com/science/ article/pii/S0893608012000457 [SV16] S. Saxena and J. Verbeek, “Convolutional neural fabrics,” arXiv preprint @@ -4134,7 +4134,7 @@ arXiv:1606.02492, 2016. [Online]. Available: https://arxiv.org/abs/1606. 02492 for computer vision,” arXiv preprint arXiv:1512.00567 , Dec. 2015. [Online]. Available: https://arxiv .org/abs/1512.00567v3 [SVZ13] K. Simonyan, A. Vedaldi, and A. Zisserman, “Deep inside convolutional -networks: Visualising image classification models and saliency maps,” +networks: Visualising image classification models and saliency maps,” arXiv preprint arXiv:1312.6034 , Dec. 2013. [Online]. Available: https: //arxiv .org/abs/1312.6034 [SZ14] K. Simonyan and A. Zisserman, “Very deep convolutional networks for @@ -4145,17 +4145,17 @@ large-scale image recognition,” arXiv preprint arXiv:1409.1556 , Sep. 2014. networks,” arXiv preprint arXiv:1312.6199 , Dec. 2013. [Online]. Available: https://arxiv . org/abs/1312. 6199v4 [TF-16a] “ MNIST for ML beginners,” Dec. 2016. [Online]. Available: https: -//www .tensorflow .org/tutorials/mnist/beginners/ - 115 -[tf-16b] “tf.nn.dropout,” Dec. 2016. [Online]. Available: https://www.tensorflow .org/ +//www .tensorflow .org/tutorials/mnist/beginners/ + +[tf-16b] “tf.nn.dropout,” Dec. 2016. [Online]. Available: https://www.tensorflow .org/ api_docs/python/nn/activation_functions_#dropout [TH12] T. Tieleman and G. Hinton, “Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude,” COURSERA: Neural Networks for Machine Learning , vol. 4, no. 2, 2012. [Online]. Available: http://www .cs.toronto . edu/~tijmen/csc321/slides/lecture_slides_lec6 .pdf [Tho14a] M. Thoma, “On-line recognition of handwritten mathematical symbols,” -Karlsruhe, Germany, Nov. 2014. [Online]. Available: http://martin- -thoma.com/write-math +Karlsruhe, Germany, Nov. 2014. [Online]. Available: http://martinthoma.com/write-math + [Tho14b] M. Thoma, “The Twiddle algorithm,” Sep. 2014. [Online]. Available: https://martin- thoma .com/twiddle/ [Tho16] M. Thoma, “A survey of semantic segmentation,” arXiv preprint @@ -4179,10 +4179,10 @@ Jan. 2017. [Online]. Available: https://arxiv .org/abs/1702.00071 neural networks,” IEEE transactions on acoustics, speech, and signal processing , vol. 37, no. 3, pp. 328–339, Aug. 1989. [Online]. Available: http://ieeexplore. ieee.org/document/21701/ -[Wil92] R. J. Williams, “Simple statistical gradient-following algorithms for connec- -tionist reinforcement learning,” Machine learning, vol. 8, no. 3-4, pp. 229–256, +[Wil92] R. J. Williams, “Simple statistical gradient-following algorithms for connectionist + reinforcement learning,” Machine learning, vol. 8, no. 3-4, pp. 229–256, 1992. -116 + [WWQ13] X. Wang, L. Wang, and Y. Qiao, A Comparative Study of Encoding, Pooling and Normalization Methods for Action Recognition . Berlin, Heidelberg: Springer Berlin Heidelberg, Nov. 2013, no. 11, pp. 572–585. [Online]. @@ -4204,15 +4204,15 @@ Available: https://arxiv .org/abs/1611.05431v1 [Xu11] W. Xu, “Towards optimal one pass large scale learning with averaged stochastic gradient descent,” arXiv preprint arXiv:1107.2490 , Jul. 2011. [Online]. Available: https://arxiv . org/abs/1107.2490 -[XWCL15] B. Xu, N. Wang et al. , “Empirical evaluation of rectified activations in +[XWCL15] B. Xu, N. Wang et al. , “Empirical evaluation of rectified activations in convolutional network,” arXiv preprint arXiv:1505.00853, May 2015. [Online]. Available: https://arxiv .org/abs/1505.00853 -[XXE12] H. Xiao, H. Xiao, and C. Eckert, “Adversarial label flips attack on +[XXE12] H. Xiao, H. Xiao, and C. Eckert, “Adversarial label flips attack on support vector machines.” in ECAI , 2012, pp. 870–875. [Online]. Available: https://www .sec.in.tum.de/assets/Uploads/ecai2 . pdf [XZY+ - 14] T. Xiao, J. Zhang et al., “Error-driven incremental learning in deep convolu- -tional neural network for large-scale image classification,” in International + 14] T. Xiao, J. Zhang et al., “Error-driven incremental learning in deep convolutional + neural network for large-scale image classification,” in International Conference on Multimedia, no. 22. ACM, 2014, pp. 177–186. [YL98] C. J. B. Yann LeCun, Corinna Cortes, “The MNIST database of handwritten digits,” 1998. [Online]. Available: http://yann.lecun. com/exdb/mnist/ @@ -4223,10 +4223,10 @@ Available: https://arxiv .org/abs/1611.03530 [ZCZL16] S. Zhai, Y. Cheng et al. , “Doubly convolutional neural networks,” in Advances in Neural Information Processing Systems 29 (NIPS) , D. D. Lee, M. Sugiyama et al., Eds. Curran Associates, Inc., Oct. 2016, pp. 1082–1090. -[Online]. Available: http://papers.nips.cc/paper/6340- doubly-convolutional- -neural-networks.pdf - 117 -[ZDGD14] N. Zhang, J. Donahue et al. , “Part-based R-CNNs for fine-grained category +[Online]. Available: http://papers.nips.cc/paper/6340- doubly-convolutionalneural-networks.pdf + + +[ZDGD14] N. Zhang, J. Donahue et al. , “Part-based R-CNNs for fine-grained category detection,” in European Conference on Computer Vision (ECCV). Springer, Jul. 2014, pp. 834–849. [Online]. Available: https://arxiv.org/abs/1407. 3867 [Zei12] M. D. Zeiler, “Adadelta: an adaptive learning rate method,” arXiv preprint @@ -4262,9 +4262,9 @@ arXiv preprint arXiv:1506.02351 , Jun. 2015. [Online]. Available: https: H. Zheng, Z. Yang et al. , “Improving deep neural networks using softplus units,” in International Joint Conference on Neural Networks (IJCNN) , Jul. 2015, pp. 1–4. -118 + I. Glossary -ANN artificial neural network. 4 +ANN artificial neural network. 4 ASO Automatic Structure Optimization. 29 CMO Confusion Matrix Ordering. 2, 35, 36, 51, 52, 71 CNN Convolutional Neural Network. 1, 3–6, 11, 13, 15, 21–23, 28, 29, 31, 33, 37, 54, 60, @@ -4272,22 +4272,21 @@ CNN Convolutional Neural Network. 1, 3–6, 11, 13, 15, 21–23, 28, 29, 31, 33, ELU Exponential Linear Unit. 38, 57, 60–64, 72, 73, 77, 78, 84 ES early stopping. 68 FC Fully Connected. 91, 93 -FLOP floating point operation. 27, 29, 87, 88, 90, 91, 93 +FLOP floating point operation. 27, 29, 87, 88, 90, 91, 93 GA genetic algorithm. 30 GAN Generative Adverserial Network. 80 GPU graphics processing unit. 37, 40, 59, 63, 67, 88, 91 HSV hue, saturation, value. 79 LCN Local Contrast Normalization. 91 LDA linear discriminant analysis. 79 -LReLU leaky rectified linear unit. 63, 72, 77, 78, 84 +LReLU leaky rectified linear unit. 63, 72, 77, 78, 84 MLP multilayer perceptron. 3–6, 28 NAG Nesterov Accellerated Momentum. 83 NEAT NeuroEvolution of Augmenting Topologies. 83 OBD Optimal Brain Damage. 29 - 119 + PCA principal component analysis. 79 -PReLU parametrized rectified linear unit. 60, 61, 63, 64, 72, 77, 78, 84 -ReLU rectified linear unit. 5, 13, 60, 61, 63, 64, 72, 77, 78, 84 +PReLU parametrized rectified linear unit. 60, 61, 63, 64, 72, 77, 78, 84 +ReLU rectified linear unit. 5, 13, 60, 61, 63, 64, 72, 77, 78, 84 SGD stochastic gradient descent. 5, 30, 45, 46, 82 ZCA Zero Components Analysis. 79 -120 diff --git a/read/results/playa/2201.00021.txt b/read/results/playa/2201.00021.txt index 522c23d..cfa743b 100644 --- a/read/results/playa/2201.00021.txt +++ b/read/results/playa/2201.00021.txt @@ -2,7 +2,7 @@ Astronomy & Astrophysics manuscript no. mainArxiv © ESO 2022 April 12, 2022 Discovery of ammonia (9,6) masers in two high-mass star-forming regions -Y. T. Yan (闫耀庭) 1, +Y. T. Yan (闫耀庭) 1, , C. Henkel1, 2, 3 , K. M. Menten 1 , Y. Gong (龚龑) 1 @@ -35,35 +35,35 @@ Received 13 December 2021 / Accepted 30 December 2021 Context. Molecular maser lines are signposts of high-mass star formation, probing the excitation and kinematics of very compact regions in the close environment of young stellar objects and providing useful targets for trigonometric parallax measurements. Aims. Only a few NH - 3 (9,6) masers are known so far, and their origin is still poorly understood. Here we aim to find new NH + 3 (9,6) masers are known so far, and their origin is still poorly understood. Here we aim to find new NH 3 (9,6) masers to provide a better observational basis for studying their role in high-mass star-forming regions. Methods. We carried out NH -3 (9,6) observations toward Cepheus A and G34.26+ 0.15 with the Eff elsberg 100-meter telescope (beam +3 (9,6) observations toward Cepheus A and G34.26+ 0.15 with the Eff elsberg 100-meter telescope (beam size 49 ) and the Karl G. Jansky Very Large Array (JVLA; beam size about 1 . 2). Results. We discovered new NH -3 (9,6) masers in Cep A and G34.26+0.15, which increases the number of known high-mass star- -forming regions hosting NH - 3 (9,6) masers from five to seven. Long-term monitoring (20 months) at Eff elsberg shows that the intensity -of the (9,6) maser in G34.26+0.15 is decreasing, while the Cep A maser remains stable. Compared to the E ff elsberg data and assuming -linear variations between the epochs of observation, the JVLA data indicate no missing flux. This suggests that the NH +3 (9,6) masers in Cep A and G34.26+0.15, which increases the number of known high-mass starforming + regions hosting NH + 3 (9,6) masers from five to seven. Long-term monitoring (20 months) at Eff elsberg shows that the intensity +of the (9,6) maser in G34.26+0.15 is decreasing, while the Cep A maser remains stable. Compared to the E ff elsberg data and assuming +linear variations between the epochs of observation, the JVLA data indicate no missing flux. This suggests that the NH 3 (9,6) emission arises from single compact emission regions that are not resolved by the interferometric measurements. As JVLA imaging shows, the NH 3 (9,6) emission in Cep A originates from a sub-arcsecond-sized region, slightly to the west (0 -. 28 ± 0 +. 28 ± 0 . 10) of the peak position of the 1.36 cm continuum object, HW2. In G34.26+0.15, three NH 3 (9,6) maser spots are observed: one is close to the head of the cometary ultracompact H ii region C, and the other two are emitted from a compact region to the west of the hypercompact H ii region A. -Conclusions. The newly found (9,6) masers appear to be related to outflows. The higher angular resolution of JVLA and very long +Conclusions. The newly found (9,6) masers appear to be related to outflows. The higher angular resolution of JVLA and very long baseline interferometry observations are needed to provide more accurate positions and constraints for pumping scenarios. Key words. Masers – ISM: clouds – ISM: individual objects: Cep A, G34.26+0.15 – ISM: H ii regions – Radio lines: ISM 1. Introduction -Since its discovery more than five decades ago (Cheung et al. +Since its discovery more than five decades ago (Cheung et al. 1968), ammonia (NH 3 ) has been a most valuable molecule for investigating the physical properties of molecular clouds (e.g., @@ -71,10 +71,10 @@ Ho & Townes 1983). While thermally excited transitions in the centimeter-wavelength inversion transitions of ammonia are regarded as a reliable thermometer of molecular clouds (e.g., Walmsley & Ungerechts 1983; Danby et al. 1988), ammonia -masers have attracted attention since the first detection of maser +masers have attracted attention since the first detection of maser action in the ( J, K ) = (3,3) metastable ( J = K ) line toward the -massive star-forming region W33 (Wilson et al. 1982). Subse- -quent observations have led to the detection of new metastable +massive star-forming region W33 (Wilson et al. 1982). Subsequent + observations have led to the detection of new metastable ammonia masers, including 15 NH 3 (3,3) (Mauersberger et al. @@ -85,63 +85,63 @@ ammonia masers, including 15 3 (5,5) (Cesaroni et al. 1992), NH 3 (6,6) (Beuther - Member of the International Max Planck Research School (IM- -PRS) for Astronomy and Astrophysics at the universities of Bonn and + Member of the International Max Planck Research School (IMPRS) + for Astronomy and Astrophysics at the universities of Bonn and Cologne. et al. 2007), NH 3 (7,7), NH 3 (9,9), and NH 3 (12,12) (Henkel et al. 2013). These have led to the discovery of metastable maser -lines in 22 different regions (Mauersberger et al. 1986, 1987; +lines in 22 different regions (Mauersberger et al. 1986, 1987; Wilson & Henkel 1988; Wilson et al. 1990; Pratap et al. 1991; -Cesaroni et al. 1992; Wilson & Schilke 1993; Mangum & Woot- -ten 1994; Kraemer & Jackson 1995; Zhang & Ho 1995; Zhang +Cesaroni et al. 1992; Wilson & Schilke 1993; Mangum & Wootten + 1994; Kraemer & Jackson 1995; Zhang & Ho 1995; Zhang et al. 1999; Walsh et al. 2007; Hunter et al. 2008; Galván-Madrid et al. 2009; Brogan et al. 2011; Urquhart et al. 2011; Walsh -et al. 2011; Wang et al. 2012; Henkel et al. 2013; Hoffman & +et al. 2011; Wang et al. 2012; Henkel et al. 2013; Hoffman & Joyce 2014; McEwen et al. 2016; Mills et al. 2018; Hogge et al. 2019; Mei et al. 2020; Towner et al. 2021). Compared with the metastable ammonia masers, detected non-metastable ( J > K ) -ammonia maser transitions are more numerous. The first highly -excited non-metastable ammonia maser was detected by Mad- -den et al. (1986) in the ( J, K ) = (9,6) and (6,3) lines. Thereafter, +ammonia maser transitions are more numerous. The first highly +excited non-metastable ammonia maser was detected by Madden + et al. (1986) in the ( J, K ) = (9,6) and (6,3) lines. Thereafter, many other NH 3 non-metastable inversion transition lines have -been identified as masers, including the (5,3), (5,4), (6,1), (6,2), +been identified as masers, including the (5,3), (5,4), (6,1), (6,2), (6,4), (6,5), (7,3), (7,4), (7,5) (7,6), (8,3), (8,4), (8,5), (8,6), (9,3), -(9,4), (9,5), (9,7), (9,8), (10,7), (10,8), (10,9), and (11,9) transi- -Article number, page 1 of 10arXiv:2201.00021v3 [astro-ph.GA] 9 Apr 2022 +(9,4), (9,5), (9,7), (9,8), (10,7), (10,8), (10,9), and (11,9) transiArticle + number, page 1 of 10arXiv:2201.00021v3 [astro-ph.GA] 9 Apr 2022 A & A proofs: manuscript no. mainArxiv tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007; Henkel et al. 2013; Mei et al. 2020). Except for the NH 3 (3,3) masers proposed to be associated with four supernova remnants (McEwen et al. 2016), almost all the other ammonia masers are -detected in high-mass star-forming regions (HMSFRs). How- -ever, while many HMSFRs host water (H +detected in high-mass star-forming regions (HMSFRs). However, + while many HMSFRs host water (H 2 O), hydroxyl (OH), or methanol (CH 3 OH) masers, ammonia masers are quite rare in these sources, and the role that the environment of a young -high-mass star plays in their excitation remains unclear. There- -fore, dedicated searches for ammonia masers in HMSFRs are -indispensable in regard to their overall incidence and associa- -tion with di ff erent environments, which can provide additional +high-mass star plays in their excitation remains unclear. Therefore, + dedicated searches for ammonia masers in HMSFRs are +indispensable in regard to their overall incidence and association + with di ff erent environments, which can provide additional constraints on the pumping mechanism of ammonia masers. So far, a total of 32 NH 3 inversion transitions ( ∆ K = 0 -and ∆ J = 0) have been identified as masers. Among these, and +and ∆ J = 0) have been identified as masers. Among these, and despite arising from energy levels as high as 1090 K above the ground state, the NH 3 (9,6) maser stands out as being the strongest and most variable one in W51-IRS2 (e.g., Henkel et al. -2013). Maser emission in this line has only been detected in five +2013). Maser emission in this line has only been detected in five HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al. 1986), and Sgr B2(N) (Mei et al. 2020). The NH 3 (3,3) masers are thought to be collisionally excited (e.g., Flower et al. 1990; -Mangum & Wootten 1994); in contrast, the pumping mecha- -nism of NH +Mangum & Wootten 1994); in contrast, the pumping mechanism + of NH 3 (9,6) masers is less well constrained (Madden et al. 1986). Brown & Cragg (1991) have studied ortho-ammonia and found that it could possibly pump the (6,3) inversion line, but @@ -153,8 +153,8 @@ NH H 2 O masers (Madden et al. 1986; Pratap et al. 1991; Henkel et al. 2013). In W51-IRS2, Henkel et al. (2013) found that the (9,6) -line showed significant variation in line shape within a time in- -terval of only two days. Mapping of the (9,6) maser toward W51 +line showed significant variation in line shape within a time interval + of only two days. Mapping of the (9,6) maser toward W51 with very long baseline interferometry (VLBI) suggests that the masers are closer to the H 2 O masers than to the OH masers or @@ -162,72 +162,72 @@ to ultracompact (UC) H ii regions (Pratap et al. 1991). While Henkel et al. (2013) and Goddi et al. (2015) showed that the SiO and NH 3 masers in W51-IRS2 are very close to each other, their -positions, diff ering by 0 +positions, diff ering by 0 . 065 ( ∼0.015 pc), do not fully coincide. In this paper we report the discovery of NH 3 (9,6) masers in two HMSFRs, Cepheus A and G34.26+ 0.15. This increases -the number of (9,6) maser detections in our Galaxy from five -to seven. In Sect. 2 observations with the Eff elsberg 100-meter +the number of (9,6) maser detections in our Galaxy from five +to seven. In Sect. 2 observations with the Eff elsberg 100-meter telescope and the Karl G. Jansky Very Large Array (JVLA) are described. Results are presented in Sect. 3. The morphology of Cep A and G34.26+ 0.15 as well as a comparison of the emission -distributions of di ff erent tracers with the NH +distributions of di ff erent tracers with the NH 3 (9,6) masers are presented in Sect. 4. Our main results are summarized in Sect. 5. 2. Observations and data reduction 2.1. Effelsberg observations and data reduction The NH 3 (9,6) line was observed toward Cep A and -G34.26 +0.15 with the 100-meter E ff elsberg telescope 1 +G34.26 +0.15 with the 100-meter E ff elsberg telescope 1 in 2020 -January and 2021 February, July, and August. The S14mm dou- -ble beam secondary focus receiver was employed. The full width -at half maximum (FWHM) beam size is 49 +January and 2021 February, July, and August. The S14mm double + beam secondary focus receiver was employed. The full width +at half maximum (FWHM) beam size is 49 at 18.5 GHz, the frequency of the target line. The observations were performed in -position switching mode, and the o ff position was 10 +position switching mode, and the o ff position was 10 in azimuth 1 Based on observations with the 100-meter telescope of the MPIfR -(Max-Planck-Institut für Radioastronomie) at E ff elsberg. away from the source. For observations made before 2021 Au- -gust, we used a spectrometer that covered 2 GHz wide backends +(Max-Planck-Institut für Radioastronomie) at E ff elsberg. away from the source. For observations made before 2021 August, + we used a spectrometer that covered 2 GHz wide backends with a channel width of 38.1 kHz, corresponding to ∼0.62 km s −1 at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar 1975). A high spectral resolution backend with 65536 channels and a bandwidth of 300 MHz was employed in 2021 August, providing a channel width of 0.07 km s−1 - at 18.5 GHz. Point- -ing was checked every 2 hours using 3C 286 or NGC 7027. -Focus calibrations were done at the beginning of the observa- -tions and during sunset and sunrise toward the abovementioned + at 18.5 GHz. Pointing + was checked every 2 hours using 3C 286 or NGC 7027. +Focus calibrations were done at the beginning of the observations + and during sunset and sunrise toward the abovementioned pointing sources. The system temperatures were 100–130 K on a main-beam brightness temperature, T - MB , scale. This flux den- -sity was calibrated assuming a T + MB , scale. This flux density + was calibrated assuming a T MB / S ratio of 1.95 K /Jy, derived -from continuum cross scans of NGC 7027 (the flux density was -adopted from Ott et al. 1994). Calibration uncertainties are esti- -mated to be ∼ 10%. +from continuum cross scans of NGC 7027 (the flux density was +adopted from Ott et al. 1994). Calibration uncertainties are estimated + to be ∼ 10%. We used the GILDAS / CLASS 2 - package (Pety 2005) to re- -duce the spectral line data. A first-order polynomial was sub- -tracted from each spectrum for baseline removal. + package (Pety 2005) to reduce + the spectral line data. A first-order polynomial was subtracted + from each spectrum for baseline removal. 2.2. JVLA observations and data reduction Observations of the NH 3 (9,6) line toward Cep A and G34.26 +0.15 were obtained on 2021 July 13 with the JVLA of the National Radio Astronomy Observatory 3 (NRAO) in the -C configuration (project ID: 21A-157, PI: Yaoting Yan). We +C configuration (project ID: 21A-157, PI: Yaoting Yan). We employed 27 antennas for the observations. The primary beam -of the JVLA antennas is 150 - (FWHM) at 18.5 GHz. A mix- -ture of mixed three-bit and eight-bit samplers were used to per- -form the observations. For the NH +of the JVLA antennas is 150 + (FWHM) at 18.5 GHz. A mixture + of mixed three-bit and eight-bit samplers were used to perform + the observations. For the NH 3 (9,6) line observations, we -used one subband with the eight-bit sampler covering a band- -width of 16 MHz with full polarization, eight recirculations, and +used one subband with the eight-bit sampler covering a bandwidth + of 16 MHz with full polarization, eight recirculations, and four baseline board pairs (BIBPs) to provide a velocity range of 260 km s−1 with a channel spacing of 0.13 km s −1 @@ -235,11 +235,11 @@ of 260 km s−1 additional subbands of bandwidth 16 MHz were used to cover the NH 3 (8,5) and (10,7) lines. The three-bit sampler with 32 -subbands, each with a bandwidth of 128 MHz to cover a to- -tal range of 4 GHz between 20–24 GHz, was used to mea- -sure the continuum emission. 3C 286 with a flux density of +subbands, each with a bandwidth of 128 MHz to cover a total + range of 4 GHz between 20–24 GHz, was used to measure + the continuum emission. 3C 286 with a flux density of 2.89 Jy at 18.5 GHz (Perley & Butler 2013) was used as a -calibrator for pointing, flux density, bandpass, and polarization. +calibrator for pointing, flux density, bandpass, and polarization. J2230 +6946 and J1851+0035 served as gain calibrators for Cep A and G34.26+0.15, respectively. The on-source times were 4m @@ -247,69 +247,69 @@ A and G34.26+0.15, respectively. The on-source times were and 4 m 50 s toward Cep A and G34.26 +0.15, respectively. -Data from two antennas were lost due to technical is- -sues. The data from the remaining 25 antennas were reduced -through the Common Astronomy Software Applications pack- -age (CASA 4 +Data from two antennas were lost due to technical issues. + The data from the remaining 25 antennas were reduced +through the Common Astronomy Software Applications package + (CASA 4 ; McMullin et al. 2007). We calibrated the data with the JVLA CASA calibration pipeline using CASA 6.1.2. The -results were obtained after flagging data that contain artifacts. +results were obtained after flagging data that contain artifacts. We inspected the phase, amplitude, and bandpass variations of -the calibrated visibility data to search for additional artifacts be- -fore imaging. Then, the uvcontsub task in CASA was used to -separate the calibrated visibilities into two parts, one with line- -only data and the other with the continuum data. The tclean task +the calibrated visibility data to search for additional artifacts before + imaging. Then, the uvcontsub task in CASA was used to +separate the calibrated visibilities into two parts, one with lineonly + data and the other with the continuum data. The tclean task with a cell size of 0 . 2 and Briggs weighting with robust=0 was -used to produce the images of spectral line and continuum emis- -sion. The synthesized beams for NH +used to produce the images of spectral line and continuum emission. + The synthesized beams for NH 3 (9,6) are 1 -. 47 × 0 +. 47 × 0 . 99 at 2 https: //www.iram.fr /IRAMFR /GILDAS/ 3 - The National Radio Astronomy Observatory is a facility of the Na- -tional Science Foundation operated under cooperative agreement by As- -sociated Universities, Inc. + The National Radio Astronomy Observatory is a facility of the National + Science Foundation operated under cooperative agreement by Associated + Universities, Inc. 4 https: //casa.nrao.edu/ Article number, page 2 of 10 Y. T. Yan ( 闫耀庭 ) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions P.A. = 58◦ -. 79 and 1 +. 79 and 1 . 33 × 1 . 06 at P.A. = 5 ◦ . 36 toward Cep A and G34.26 + 0.15, respectively. For the 1.36 cm (20–24 GHz) -continuum emission, the synthesized beams are 1 +continuum emission, the synthesized beams are 1 . 08 × 0 . 67 at P.A. = 60 ◦ -. 64 and 0 +. 64 and 0 . 95 × 0 . 71 at P.A. = 5◦ . 91 toward Cep A and G34.26 + 0.15. The typical absolute astrometric accuracy of the JVLA is ∼ 10% of the synthesized beam5 - . The flux density scale + . The flux density scale calibration accuracy is estimated to be within 15%. Fig. 1. Spectra from NH 3 (9,6) transition lines. Left: Top to bottom: Time sequence of NH -3 (9,6) profiles observed toward Cep A with the -E ff elsberg 100-meter telescope (after subtracting a first-order polyno- -mial baseline). A JVLA spectrum is interspersed. The systemic veloc- -ity from CO and HCO+ +3 (9,6) profiles observed toward Cep A with the +E ff elsberg 100-meter telescope (after subtracting a first-order polynomial + baseline). A JVLA spectrum is interspersed. The systemic velocity + from CO and HCO+ lines is indicated by a dashed blue line. The two dashed red lines at LSR velocities, V LSR , of − 0.90 km s −1 and − 0.28 km s− 1 - indicate the central velocities of the two major compo- -nents. Right : NH -3 (9,6) spectra from G34.26 +0.15. The systemic ve- -locity from C 17 + indicate the central velocities of the two major components. + Right : NH +3 (9,6) spectra from G34.26 +0.15. The systemic velocity + from C 17 O is indicated by a dashed blue line. The three dashed red lines at V LSR = 54.1 km s − 1 @@ -318,30 +318,30 @@ LSR = 54.1 km s − 1 show the central velocities of the main ammonia emission components. 3. Results -The spectra from diff erent epochs are shown in Figs. 1 and 2. +The spectra from diff erent epochs are shown in Figs. 1 and 2. Toward Cep A, the NH -3 (9,6) line profile from the JVLA is ex- -tracted from an Effelsberg-beam-sized region (FWHM, 49 +3 (9,6) line profile from the JVLA is extracted + from an Effelsberg-beam-sized region (FWHM, 49 ). In the case of G34.26+0.15, the NH 3 spectrum is below the noise -level if a similarly large beam size is used. Therefore, we de- -rived the JVLA NH +level if a similarly large beam size is used. Therefore, we derived + the JVLA NH 3 (9,6) spectrum from a smaller region, with -radius 3 +radius 3 . 5, that contains all the detected NH 3 (9,6) emission. In Table A.1, the observed NH 3 (9,6) line parameters obtained by -Gaussian fits are listed. NH -3 (8,5) and (10,7) emission is not de- -tected by our JVLA observations. The 3σ upper limits for the +Gaussian fits are listed. NH +3 (8,5) and (10,7) emission is not detected + by our JVLA observations. The 3σ upper limits for the NH 3 (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1 5 - https://science.nrao.edu/ facilities / vla/ docs /manuals/ oss/ performance- -/ positional-accuracy Fig. 2. NH -3 (9,6) line profiles emphasizing, in contrast to the spectra + https://science.nrao.edu/ facilities / vla/ docs /manuals/ oss/ performance/ + positional-accuracy Fig. 2. NH +3 (9,6) line profiles emphasizing, in contrast to the spectra in Fig. 1, weaker features. Cep A spectra are presented on the left, G34.26+ 0.15 spectra on the right. The two dashed red lines in the left panels indicate V @@ -352,61 +352,61 @@ the two dashed red lines refer to 54.1 km s− 1 and 55.8 km s− 1 . and 27.2 mJy beam−1 - , respectively. In G34.26 + 0.15, the corre- -sponding 3σ upper limits for the NH + , respectively. In G34.26 + 0.15, the corresponding + 3σ upper limits for the NH 3 (8,5) and (10,7) lines are 22.1 mJy beam −1 and 30.4 mJy beam−1 - . For both sources, sen- -sitivity levels refer to emission from a single channel of width + . For both sources, sensitivity + levels refer to emission from a single channel of width 0.13 km s−1 . Taking the larger measured line widths of the (9,6) -maser features (see Table A.1), these limits could be further low- -ered by factors of two to four. +maser features (see Table A.1), these limits could be further lowered + by factors of two to four. 3.1. Centimeter-continuum emission The 1.36 cm continuum, derived from our JVLA observations, toward Cep A is presented in Fig. 3. Six published compact -sources, HW2, HW3a, HW3b, HW3c, HW3d, and HW9, are de- -tected in our observations. Figure 4 shows the 1.36 cm contin- -uum in G34.26 + 0.15. Three main continuum objects, A, B, and -C, are detected. By using the imfit task in CASA, we measured -the continuum flux at 1.36 cm toward individual compact source -components in Cep A and G34.26+0.15. Details are given in Ta- -ble A.2. +sources, HW2, HW3a, HW3b, HW3c, HW3d, and HW9, are detected + in our observations. Figure 4 shows the 1.36 cm continuum + in G34.26 + 0.15. Three main continuum objects, A, B, and +C, are detected. By using the imfit task in CASA, we measured +the continuum flux at 1.36 cm toward individual compact source +components in Cep A and G34.26+0.15. Details are given in Table + A.2. 3.2. NH 3 (9,6) emission in Cep A In 2020 January, NH -3 (9,6) emission with a peak flux density of -0.67 ± 0.07 Jy was first detected with the Effelsberg 100-meter -telescope in Cep A. Emission with similar strength was also de- -tected in 2021 February and August with the same telescope. +3 (9,6) emission with a peak flux density of +0.67 ± 0.07 Jy was first detected with the Effelsberg 100-meter +telescope in Cep A. Emission with similar strength was also detected + in 2021 February and August with the same telescope. Higher velocity resolution data, which were obtained in 2021 -August, again with the E ff elsberg 100-meter telescope, show +August, again with the E ff elsberg 100-meter telescope, show that the (9,6) emission contains two main velocity components. -Overall, the flux densities of the NH - 3 (9,6) emission line mea- -sured with the Eff elsberg 100-meter telescope are, within the cal- -ibration uncertainties, unchanged. This is valid for the time inter- -val between 2020 January and August 2021, when we smoothed +Overall, the flux densities of the NH + 3 (9,6) emission line measured + with the Eff elsberg 100-meter telescope are, within the calibration + uncertainties, unchanged. This is valid for the time interval + between 2020 January and August 2021, when we smoothed the obtained spectra to the same velocity resolution. We also see another two weaker components. Figure 2 emphasizes these -weak components with an expanded flux density scale. +weak components with an expanded flux density scale. Higher angular resolution data from the JVLA pinpoint the position of the NH -3 (9,6) emission with an o ffset of (− 0 +3 (9,6) emission with an o ffset of (− 0 . 28, 0 . 02) relative to the 1.36 cm continuum peak of Cep A HW2 (Fig. 3). The deconvolved NH -3 (9,6) component size is (0 +3 (9,6) component size is (0 . 29 ± 0 -. 15) × (0 -. 19 ± 0 +. 15) × (0 +. 19 ± 0 . 14) at P.A. = 174 ◦ - , derived with the imfit task -in CASA, and can thus be considered, accounting for the uncer- -tainties, as unresolved. + , derived with the imfit task +in CASA, and can thus be considered, accounting for the uncertainties, + as unresolved. Article number, page 3 of 10 A & A proofs: manuscript no. mainArxiv Fig. 3. Cepheus A. White contours mark the 1.36 cm JVLA continuum map of Cep A; levels are −5, 5, 10, 20, 30, 40, 50, 70, 90, @@ -446,40 +446,40 @@ J2000 = 18 h 3 OH (Bartkiewicz et al. 2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range (V LSR ) of maser spots. -In view of the constancy of the flux densities obtained at Ef- -felsberg and the similar JVLA flux density, measured in 2021 -July, there is no missing interferometric flux density in the JVLA +In view of the constancy of the flux densities obtained at Effelsberg + and the similar JVLA flux density, measured in 2021 +July, there is no missing interferometric flux density in the JVLA data. 3.3. NH 3 (9,6) emission in G34.26 +0.15 The NH - 3 (9,6) emission was first detected toward G34.26+0.15 -in 2020 January with the Effelsberg 100-meter telescope. Higher velocity resolution data from 2021 August show the NH + 3 (9,6) emission was first detected toward G34.26+0.15 +in 2020 January with the Effelsberg 100-meter telescope. Higher velocity resolution data from 2021 August show the NH 3 (9,6) -emission to be composed of two di fferent components. The spec- -tra of weak components on a smaller flux density scale are pre- -sented in Fig. 2. -Three di fferent locations showing NH +emission to be composed of two di fferent components. The spectra + of weak components on a smaller flux density scale are presented + in Fig. 2. +Three di fferent locations showing NH 3 (9,6) emission are found toward G34.26 +0.15 (Fig. 4). The deconvolved NH 3 (9,6) -component sizes are (1 +component sizes are (1 . 42 ± 0 -. 43) × (0 +. 43) × (0 . 54 ± 0 . 62) at P.A. = 97 ◦ (M1), (0 . 42 ± 0 -. 27) × (0 -. 15 ± 0 +. 27) × (0 +. 15 ± 0 . 27) at P.A. = 150 ◦ (M2), and Article number, page 4 of 10 Y. T. Yan ( 闫耀庭 ) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions -(1 +(1 . 17 ± 0 -. 34) × (0 -. 27 ± 0 +. 34) × (0 +. 27 ± 0 . 46) at P.A. = 53◦ (M3) and are thus comparable to or smaller than the beam size. @@ -487,9 +487,9 @@ Overall, the NH 3 (9,6) line from G34.26 +0.15 weakened during the time interval from 2020 January to 2021 August by about 70%. A comparison between the JVLA spectrum and the -E ffelsberg data, assuming a linear decrease in the integrated in- -tensity as a function of time between diff erent epochs of the -100-meter observations, suggests there is no missing flux in the +E ffelsberg data, assuming a linear decrease in the integrated intensity + as a function of time between diff erent epochs of the +100-meter observations, suggests there is no missing flux in the JVLA data. This is similar to the situation in Cep A. 4. Discussion 4.1. Morphology of Cep A and G34.26+ 0.15 @@ -500,68 +500,68 @@ HMSFR (after Orion) and by far the closest NH known. About 16 compact ( ∼1 ) radio sources (e.g., Hughes & Wouterloot 1984; Hughes 1991; Garay et al. 1996) have been -identified in Cep A. Hughes & Wouterloot (1984) discovered -these targets at radio wavelengths, which are UC and hypercom- -pact (HC) H ii regions and /or stellar wind sources, subsequently +identified in Cep A. Hughes & Wouterloot (1984) discovered +these targets at radio wavelengths, which are UC and hypercompact + (HC) H ii regions and /or stellar wind sources, subsequently named as HW sources. The HW2 object is one of the best known examples of a protostellar jet or disk system driving a powerful -outflow (e.g., Rodriguez et al. 1980; Güsten et al. 1984; Torrelles +outflow (e.g., Rodriguez et al. 1980; Güsten et al. 1984; Torrelles et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021). The observed NH - 3 (9,6) emission is slightly offset ( −0 -. 28, 0 + 3 (9,6) emission is slightly offset ( −0 +. 28, 0 . 02) from the center of HW2 (see Fig. 3). G34.26 +0.15 is an HMSFR located at a distance of 3.3 kpc -(Kuchar & Bania 1994). It hosts four radio continuum compo- -nents named A, B, C, and D. Component C is a prototypical -cometary UC H ii region containing a compact head and a diffuse +(Kuchar & Bania 1994). It hosts four radio continuum components + named A, B, C, and D. Component C is a prototypical +cometary UC H ii region containing a compact head and a diffuse tail that extends from east to west (e.g., Reid & Ho 1985; Garay et al. 1986; Sewilo et al. 2004; Sewiło et al. 2011). Components A and B are HC H ii regions, located to the east of component -C. An extended ring-like H ii region, called component D, is lo- -cated southeast of components A-C. One of the three observed +C. An extended ring-like H ii region, called component D, is located + southeast of components A-C. One of the three observed NH - 3 (9,6) emission line sources, M1, is close to the head of com- -ponent C, whereas M2 and M3 originate from another compact + 3 (9,6) emission line sources, M1, is close to the head of component + C, whereas M2 and M3 originate from another compact region in the west of the HC H ii component A (see Fig. 4). 4.2. NH 3 (9,6) emission possibly caused by maser action As shown in Fig. 1, the NH -3 (9,6) profiles in Cep A and +3 (9,6) profiles in Cep A and G34.26 +0.15 are narrow (∆V 1 /2 ≤2.0 km s−1 ), much narrower than the expected line widths ( 4 km s − 1 - ) of thermal lines ob- -served at a similar angular resolution (e.g., Torrelles et al. 1985, -1986, 1993, 1999; Henkel et al. 1987; Comito et al. 2007; Mook- -erjea et al. 2007; Wyrowski et al. 2012; Beuther et al. 2018). Ve- -locity shifts with respect to the systemic velocities of the two + ) of thermal lines observed + at a similar angular resolution (e.g., Torrelles et al. 1985, +1986, 1993, 1999; Henkel et al. 1987; Comito et al. 2007; Mookerjea + et al. 2007; Wyrowski et al. 2012; Beuther et al. 2018). Velocity + shifts with respect to the systemic velocities of the two sources are both observed, that is, V ∼10 km s −1 in Cep A and V ∼4 km s− 1 - in G34.26 +0.15 (see details in Sect. 4.3). Further- -more, time variability is observed in the case of G34.26+0.15, + in G34.26 +0.15 (see details in Sect. 4.3). Furthermore, + time variability is observed in the case of G34.26+0.15, which is also a characteristic feature of maser emission. -Additional evidence of their maser nature is the high bright- -ness temperatures of the (9,6) emission spots toward Cep A and +Additional evidence of their maser nature is the high brightness + temperatures of the (9,6) emission spots toward Cep A and G34.26 +0.15. The spectral parameters are listed in Table A.3. -Because at least a significant part of the NH +Because at least a significant part of the NH 3 (9,6) emission -is not resolved by our JVLA observations, the derived bright- -ness temperatures are only lower limits. Nevertheless, the lower +is not resolved by our JVLA observations, the derived brightness + temperatures are only lower limits. Nevertheless, the lower limits on the brightness temperature are >800 K in Cep A (see Table A.3), which is much higher than the expected thermal gas temperature of ∼250 K (e.g., Patel et al. 2005; Comito et al. 2007; Beuther et al. 2018). This strongly suggests that the NH -3 (9,6) emission in Cep A is due to maser action. Be- -cause G34.26+ 0.15 is located at about five times the distance to -Cep A, beam dilution e ffects reduce the lower main beam bright- -ness temperature limit to 400 K in G34.26 +0.15 (M2) (see Ta- -ble A.3). We also note that the luminosity of the NH -3 (9,6) emis- -sion in G34.26 +0.15 is higher than or comparable to that in Cep +3 (9,6) emission in Cep A is due to maser action. Because + G34.26+ 0.15 is located at about five times the distance to +Cep A, beam dilution e ffects reduce the lower main beam brightness + temperature limit to 400 K in G34.26 +0.15 (M2) (see Table + A.3). We also note that the luminosity of the NH +3 (9,6) emission + in G34.26 +0.15 is higher than or comparable to that in Cep A, depending on the epoch of our observations. Finally, the non-detections of the (8,5) and (10,7) lines also indicate that the (9,6) line is special. This allows us to derive @@ -578,7 +578,7 @@ para states (e.g., Umemoto et al. 1999; Goddi et al. 2011; Henkel et al. 2013). In Cep A, the line intensity ratios are far higher than this factor of two. Thus, at least in Cep A the higher main beam brightness peak temperature of the (9,6) emission is caused by -maser action, perhaps involving exponential amplification, and +maser action, perhaps involving exponential amplification, and the case of G34.26 +0.15 is likely similar. 4.3. Comparison of NH 3 (9,6) masers with previously @@ -595,29 +595,29 @@ NH 3 column density of ∼5 ×10 15 cm−2 was estimated for a region -of 3 +of 3 around HW2 (Torrelles et al. 1999). This high NH -3 abun- -dance could provide a suitable environment for maser species. +3 abundance + could provide a suitable environment for maser species. Large line widths (∆ V 1 / 2 7.0 km s−1 ) with V LSR ∼ −10 km s−1 in both (1,1) and (2,2) lines were found toward HW2 (Torrelles -et al. 1993). The velocity is similar to the cloud’s systemic lo- -cal standard of rest (LSR) velocity of −11 .2 km s−1 +et al. 1993). The velocity is similar to the cloud’s systemic local + standard of rest (LSR) velocity of −11 .2 km s−1 , which is based on CO (Narayanan & Walker 1996) and HCO + - ob- -servations (Gómez et al. 1999). Our (9,6) maser is redshifted + observations + (Gómez et al. 1999). Our (9,6) maser is redshifted (− 0.9 km s− 1 ≤ V LSR ≤2.9 km s−1 ) and shares positions with -the outflowing gas seen in CO and HCO + - with similarly red- -shifted velocities. Therefore, we argue that the (9,6) masers are -related to outflowing gas. +the outflowing gas seen in CO and HCO + + with similarly redshifted + velocities. Therefore, we argue that the (9,6) masers are +related to outflowing gas. In G34.26 +0.15, a large NH 3 column density, 1018 . 5 ±0 .2 @@ -631,7 +631,7 @@ While these lines were measured with a beam size of about 40 , a comparison of the peak intensities of the optically thick lines with the kinetic temperature reveals the size of the hot, -ammonia-emitting core to be only ∼2.5 +ammonia-emitting core to be only ∼2.5 . All those measured NH 3 lines were quasi-thermal and had LSR velocities of @@ -642,12 +642,12 @@ obtained from C 17 Their line widths (∆ V 1 / 2 ≥3.6 km s−1 ) are larger than what -we find (0.35 km s −1 +we find (0.35 km s −1 ≤ ∆ V 1 / 2 ≤ 0.94 km s−1 ) for each (9,6) maser component (see details in Table A.3). In all, we may -have observed four di ff erent (9,6) velocity features. Three +have observed four di ff erent (9,6) velocity features. Three are blueshifted at V LSR ∼ 53.8 km s −1 , 55.8 km s− 1 @@ -656,19 +656,19 @@ LSR ∼ 53.8 km s −1 , and a fourth, tentatively detected, at 62.5 km s −1 . This tentative redshifted feature was only potentially detected -with E ffelsberg in 2020 January. The velocity is similar to that +with E ffelsberg in 2020 January. The velocity is similar to that of the JVLA measurements on the NH 3 (1,1) absorption line -against continuum source C ( ∼ 7 +against continuum source C ( ∼ 7 resolution; Keto et al. 1987) Article number, page 5 of 10 A & A proofs: manuscript no. mainArxiv and the NH 3 (3,3) emission surrounding continuum source B as well as the head of C (1 -. 4×1 +. 4×1 . 2 resolution; Heaton et al. 1989). -However, we did not find this redshifted component in our +However, we did not find this redshifted component in our JVLA observations. Therefore, its position within G34.26+0.15 cannot be determined. The blueshifted (9,6) masers with a velocity range of 53.8–56.8 km s− 1 @@ -692,7 +692,7 @@ et al. 2017) and H 2011; Sobolev et al. 2018) are detected and are associated with its disk. Sobolev et al. (2018) also found that most of the H 2 O -maser flux is associated with the compact H ii region HW3d. OH +maser flux is associated with the compact H ii region HW3d. OH maser features close to the H ii regions are also seen in HW2 (e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These three kinds of masers in Cep A have a large velocity range of @@ -705,8 +705,8 @@ HW2 and HW3, while NH −0.9 km s −1 ≤ V LSR ≤ 2.9 km s −1 - toward a sub-arcsecond- -sized region to the west of the peak continuum position of HW2 + toward a sub-arcsecondsized + region to the west of the peak continuum position of HW2 (see Fig. 3). This suggests that the NH 3 (9,6) maser in Cep A is unique and not related to maser spots seen in other molecular @@ -718,26 +718,26 @@ In G34.26 +0.15, OH (Zheng et al. 2000), H detected east of source C (Fig. 4), and none of them coincides with the head of C. The NH 3 (9,6) maser M1 is also found -slightly o ff the head of source C. This could suggest that M1 -is powered by continuum source C or by an outflow. Near com- -ponent B, there are some OH and CH +slightly o ff the head of source C. This could suggest that M1 +is powered by continuum source C or by an outflow. Near component + B, there are some OH and CH 3 OH masers but no H 2 O or NH 3 masers. A group of H 2 O masers, well-known tracers -of outflows, with a large velocity distribution of 43 km s−1 +of outflows, with a large velocity distribution of 43 km s−1 ≤ V LSR ≤54 km s −1 - , was found to the west of the centimeter- -continuum source A and close to the peak of the millimeter- -continuum emission (see details in our Fig. A.2 and also in Fig. 5 + , was found to the west of the centimetercontinuum + source A and close to the peak of the millimetercontinuum + emission (see details in our Fig. A.2 and also in Fig. 5 of Imai et al. 2011). The closeness of NH 3 (9,6) maser spots M2 and M3 to this group of water masers and their similar velocities again suggest an association of NH -3 (9,6) masers with outflow +3 (9,6) masers with outflow activity. 4.5. Constraints on pumping scenarios Our observations have resulted in the detection of NH @@ -749,50 +749,50 @@ of the (9,6) maser is unclear (Madden et al. 1986; Brown & Cragg 1991). Previous studies have suggested that there are three main pumping scenarios to explain the observed NH 3 maser -lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared ra- -diation from the dust continuum emission, (2) line overlap, and +lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared radiation + from the dust continuum emission, (2) line overlap, and (3) collisional pumping. -For the first mechanism, infrared photons near 10 µ m are +For the first mechanism, infrared photons near 10 µ m are needed for vibrational excitation. The high dust temperature -( ∼300 K) of W51-IRS2 can provide substantial infrared pho- -tons near 10 µ m, which is used for radiative pumping (Henkel et al. 2013). Both Cep A and G34.26 + 0.15 have similar kinetic +( ∼300 K) of W51-IRS2 can provide substantial infrared photons + near 10 µ m, which is used for radiative pumping (Henkel et al. 2013). Both Cep A and G34.26 + 0.15 have similar kinetic temperatures of 200 K (Henkel et al. 1987; Patel et al. 2005; Comito et al. 2007; Beuther et al. 2018). This suggests that high kinetic temperatures are needed to excite NH 3 (9,6) masers. -However, it should be noted that the silicate dust absorption fea- -ture might dominate at 10 µ m (see the spectral energy distribu- -tion of Cep A in De Buizer et al. 2017). Additionally, there is +However, it should be noted that the silicate dust absorption feature + might dominate at 10 µ m (see the spectral energy distribution + of Cep A in De Buizer et al. 2017). Additionally, there is no bright infrared emission around the two (9,6) masers, M2 and M3, in G34.26+0.15 (see Fig. 4; see also Fig. 11 in De Buizer et al. 2003 for a 10.5 µ m map). This indicates that the pumping mechanism via infrared photons near 10 µ m may not be viable -to explain the (9,6) masers in Cep A and G34.26+0.15. Further- -more, Wilson & Schilke (1993) argued that radiative pumping by +to explain the (9,6) masers in Cep A and G34.26+0.15. Furthermore, + Wilson & Schilke (1993) argued that radiative pumping by dust emission tends to excite multiple adjacent ammonia maser transitions, which appears to contradict our failure to detect the adjacent (8,5) and (10,7) lines (with respect to quantum numbers and frequency) and to only measure the (9,6) transitions in Cep -A and G34.26 + 0.15. Therefore, we suggest that infrared radia- -tion from dust is not the main pumping source. +A and G34.26 + 0.15. Therefore, we suggest that infrared radiation + from dust is not the main pumping source. Madden et al. (1986) suggested that there might be some line overlaps between the rotational NH -3 transitions in the far- -infrared band. However, this would be unlikely to affect only the +3 transitions in the farinfrared + band. However, this would be unlikely to affect only the (9,6) line. Nevertheless, far-infrared spectral observations will be needed to clarify this scenario. Based on our observations, the (9,6) maser spots are close to, but not coincident with, the peaks of the radio continuum emission in Cep A and G34.26+ 0.15. Furthermore, the (9,6) -masers show velocity off sets with respect to their systemic ve- -locities. This indicates that the (9,6) masers are located at the -base of outflows, similar to the H +masers show velocity off sets with respect to their systemic velocities. + This indicates that the (9,6) masers are located at the +base of outflows, similar to the H 2 O masers. This is supported by VLBI observations that show that (9,6) masers tend to be closely associated with H -2 O masers (Pratap et al. 1991). The ob- -served time variability in G34.26 + 0.15 and W51-IRS2 can also -be attributed to episodic molecular outflows. This indicates that +2 O masers (Pratap et al. 1991). The observed + time variability in G34.26 + 0.15 and W51-IRS2 can also +be attributed to episodic molecular outflows. This indicates that collisional pumping could be the driver of the (9,6) maser. On the other hand, collisional pumping has been successfully used to explain the NH @@ -801,28 +801,28 @@ Flower et al. 1990; Mangum & Wootten 1994). Collisions tend to pump from the K =0 level to the K = 3 level with parity changes, that is, the upper level of the (3,3) metastable transition will be overpopulated. NH -3 (9,6) arises from the ortho species, so a sim- -ilar mechanism might also occur in the case of the (9,6) transi- -tion. Further measurements of collisional rates of ammonia will +3 (9,6) arises from the ortho species, so a similar + mechanism might also occur in the case of the (9,6) transition. + Further measurements of collisional rates of ammonia will allow us to test this scenario. 5. Summary We report the discovery of NH 3 (9,6) masers in two HMSFRs, -Cep A and G34.26 +0.15. The narrow line width of the emis- -sion features (∆ V +Cep A and G34.26 +0.15. The narrow line width of the emission + features (∆ V 1/ 2 ≤ 2.0 km s −1 - ) and their high brightness tem- -peratures ( > 400 K) indicate the maser nature of the lines. + ) and their high brightness temperatures + ( > 400 K) indicate the maser nature of the lines. The intensity of the (9,6) maser in G34.26 +0.15 is decreasing with time, while toward Cep A the maser is stable based on 20 -months of monitoring at E ffelsberg. Linearly interpolating the -integrated intensities obtained at E ff elsberg as a function of time, -the JVLA measurements show that there is no missing flux den- -sity on scales on the order of 1.2 arcsec (4 ×10 −3 +months of monitoring at E ffelsberg. Linearly interpolating the +integrated intensities obtained at E ff elsberg as a function of time, +the JVLA measurements show that there is no missing flux density + on scales on the order of 1.2 arcsec (4 ×10 −3 and 2 ×10− 2 pc) -to the total single-dish flux. The JVLA-detected emission in- -dicates that the NH +to the total single-dish flux. The JVLA-detected emission indicates + that the NH 3 (9,6) maser in Cep A originates from a sub-arcsecond-sized region slightly (0 . 28 ± 0 @@ -835,23 +835,23 @@ two are emitted from a compact region to the west of the HC H ii Article number, page 6 of 10 Y. T. Yan ( 闫耀庭 ) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions region A. We suggest that the (9,6) masers may be connected to -outflowing gas. Higher angular resolution JVLA and VLBI ob- -servations are planned to provide more accurate positions and +outflowing gas. Higher angular resolution JVLA and VLBI observations + are planned to provide more accurate positions and constraints on pumping scenarios. -Acknowledgements. We would like to thank the anonymous referee for the use- -ful comments that improve the manuscript. Y.T.Y. is a member of the Interna- -tional Max Planck Research School (IMPRS) for Astronomy and Astrophysics +Acknowledgements. We would like to thank the anonymous referee for the useful + comments that improve the manuscript. Y.T.Y. is a member of the International + Max Planck Research School (IMPRS) for Astronomy and Astrophysics at the Universities of Bonn and Cologne. Y.T.Y. would like to thank the China -Scholarship Council (CSC) for its support. We would like to thank the staff at -the E ffelsberg for their help provided during the observations. We thank the sta ff +Scholarship Council (CSC) for its support. We would like to thank the staff at +the E ffelsberg for their help provided during the observations. We thank the sta ff of the JVLA, especially Tony Perreault and Edward Starr, for their assistance with the observations and data reduction. This research has made use of the -NASA / IPAC Infrared Science Archive, which is funded by the National Aero- -nautics and Space Administration and operated by the California Institute of +NASA / IPAC Infrared Science Archive, which is funded by the National Aeronautics + and Space Administration and operated by the California Institute of Technology. References -Bartkiewicz, A., Szymczak, M., Cohen, R. J., & Richards, A. M. S. 2005, MN- -RAS, 361, 623 +Bartkiewicz, A., Szymczak, M., Cohen, R. J., & Richards, A. M. S. 2005, MNRAS, + 361, 623 Bartkiewicz, A., Szymczak, M., & van Langevelde, H. J. 2016, A&A, 587, A104 Benjamin, R. A., Churchwell, E., Babler, B. L., et al. 2003, PASP, 115, 953 Beuther, H., Mottram, J. C., Ahmadi, A., et al. 2018, A&A, 617, A100 @@ -877,7 +877,7 @@ De Buizer, J. M., Radomski, J. T., Telesco, C. M., & Piña, R. K. 2003, ApJ, 598 1127 Dzib, S., Loinard, L., Rodríguez, L. F., Mioduszewski, A. J., & Torres, R. M. 2011, ApJ, 733, 71 -Flower, D. R., O ffer, A., & Schilke, P. 1990, MNRAS, 244, 4P +Flower, D. R., O ffer, A., & Schilke, P. 1990, MNRAS, 244, 4P Galván-Madrid, R., Keto, E., Zhang, Q., et al. 2009, ApJ, 706, 1036 Garay, G., Ramirez, S., Rodriguez, L. F., Curiel, S., & Torrelles, J. M. 1996, ApJ, 459, 193 @@ -893,7 +893,7 @@ Heaton, B. D., Little, L. T., & Bishop, I. S. 1989, A&A, 213, 148 Henkel, C., Wilson, T. L., Asiri, H., & Mauersberger, R. 2013, A&A, 549, A90 Henkel, C., Wilson, T. L., & Mauersberger, R. 1987, A&A, 182, 137 Ho, P. T. P. & Townes, C. H. 1983, ARA&A, 21, 239 -Ho ffman, I. M. & Joyce, S. A. 2014, ApJ, 782, 83 +Ho ffman, I. M. & Joyce, S. A. 2014, ApJ, 782, 83 Hogge, T. G., Jackson, J. M., Allingham, D., et al. 2019, ApJ, 887, 79 Hughes, V. A. 1991, ApJ, 383, 280 Hughes, V. A. & Wouterloot, J. G. A. 1984, ApJ, 276, 204 @@ -909,8 +909,8 @@ Mangum, J. G. & Wootten, A. 1994, ApJ, 428, L33 Mauersberger, R., Henkel, C., & Wilson, T. L. 1987, A&A, 173, 352 Mauersberger, R., Wilson, T. L., & Henkel, C. 1986, A&A, 160, L13 Mauersberger, R., Wilson, T. L., & Henkel, C. 1988, A&A, 201, 123 McEwen, B. C., Pihlström, Y. M., & Sjouwerman, L. O. 2016, ApJ, 826, 189 -McMullin, J. P., Waters, B., Schiebel, D., Young, W., & Golap, K. 2007, in As- -tronomical Society of the Pacific Conference Series, Vol. 376, Astronomical +McMullin, J. P., Waters, B., Schiebel, D., Young, W., & Golap, K. 2007, in Astronomical + Society of the Pacific Conference Series, Vol. 376, Astronomical Data Analysis Software and Systems XVI, ed. R. A. Shaw, F. Hill, & D. J. Bell, 127 Mei, Y., Chen, X., Shen, Z.-Q., & Li, B. 2020, ApJ, 898, 157 @@ -922,8 +922,8 @@ Narayanan, G. & Walker, C. K. 1996, ApJ, 466, 844 Ott, M., Witzel, A., Quirrenbach, A., et al. 1994, A&A, 284, 331 Patel, N. A., Curiel, S., Sridharan, T. K., et al. 2005, Nature, 437, 109 Perley, R. A. & Butler, B. J. 2013, ApJS, 204, 19 -Pety, J. 2005, in SF2A-2005: Semaine de l’Astrophysique Francaise, ed. F. Ca- -soli, T. Contini, J. M. Hameury, & L. Pagani, 721 +Pety, J. 2005, in SF2A-2005: Semaine de l’Astrophysique Francaise, ed. F. Casoli, + T. Contini, J. M. Hameury, & L. Pagani, 721 Poynter, R. L. & Kakar, R. K. 1975, ApJS, 29, 87 Pratap, P., Menten, K. M., Reid, M. J., Moran, J. M., & Walmsley, C. M. 1991, ApJ, 373, L13 @@ -968,7 +968,7 @@ Appendix A: Table A.1. Summary of NH 3 (9, 6) maser observations. Source Telescope Beam Epoch Channel S - ν rms + ν rms S ν dv V LSR ∆ V @@ -978,30 +978,30 @@ size spacing ) (Jy) (mJy) (Jy km s −1 ) (km s − 1 ) -Cep A E ff elsberg 49 +Cep A E ff elsberg 49 2020, Jan. 04 0.62 0.67 3.41 1.19 ± 0.02 -1.11 ± 0.02 1.67 ± 0.04 -Eff elsberg 49 +Eff elsberg 49 2021, Feb. 11 0.62 0.59 5.97 1.08 ± 0.02 -0.74 ± 0.02 1.70 ± 0.04 -Eff elsberg 49 +Eff elsberg 49 2021, Feb. 15 0.62 0.65 10.98 1.11 ± 0.03 -0.75 ± 0.02 1.60 ± 0.05 JVLAa 1 . 47 × 0 . 99 2021, Jul. 13 0.13 1.13 144 0.89 ± 0.09 -0.86 ± 0.03 0.74 ± 0.12 -Eff elsberg 49 +Eff elsberg 49 2021, Aug. 11 0.07 0.98 13.36 0.49 ± 0.02 -0.90 ± 0.01 0.47 ± 0.01 0.35 0.26 ± 0.02 -0.28 ± 0.02 0.69 ± 0.05 -Eff elsberg 49 +Eff elsberg 49 2021, Aug. 12 0.07 0.98 13.35 0.50 ± 0.01 -0.89 ± 0.07 0.48 ± 0.07 0.35 0.20 ± 0.01 -0.29 ± 0.07 0.54 ± 0.07 0.06 0.07 ± 0.01 0.51 ± 0.07 1.09 ± 0.07 0.02 0.02 ± 0.01 2.15 ± 0.07 0.80 ± 0.07 0.07 0.06 ± 0.01 2.89 ± 0.07 0.92 ± 0.07 -G34.26 +0.15 E ff elsberg 49 +G34.26 +0.15 E ff elsberg 49 2020, Jan. 03 0.62 0.30 1.26 0.65 ± 0.03 62.50 ± 0.05 2.05 ± 0.13 -Eff elsberg 49 +Eff elsberg 49 2021, Feb. 11 0.62 0.24 2.42 0.40 ± 0.02 55.76 ± 0.04 1.60 ± 0.12 -Eff elsberg 49 +Eff elsberg 49 2021, Feb. 15 0.62 0.20 4.86 0.38 ± 0.02 55.71 ± 0.05 1.80 ± 0.14 JVLAb 1 @@ -1009,25 +1009,25 @@ JVLAb . 06 2021, Jul. 13 0.13 0.23 37.1 0.09 ± 0.02 54.41 ± 0.03 0.38 ± 0.09 0.22 0.22 ± 0.02 55.82 ± 0.05 0.95 ± 0.12 0.15 0.06 ± 0.01 57.21 ± 0.04 0.35 ± 0.08 -Eff elsberg 49 +Eff elsberg 49 2021, Aug. 11 0.07 0.08 13.92 0.06 ± 0.007 54.10 ± 0.05 0.68 ± 0.12 0.07 0.02 ± 0.006 54.82 ± 0.03 0.31 ± 0.09 0.12 0.10 ± 0.006 55.85 ± 0.02 0.75 ± 0.06 -Eff elsberg 49 +Eff elsberg 49 2021, Aug. 12 0.07 0.16 27.40 0.09 ± 0.008 55.83 ± 0.02 0.56 ± 0.05 -Notes. The spectral parameters are obtained from Gaussian fitting. (a ) - The JVLA spectrum toward Cep A is extracted from the E ff elsberg-beam- -sized region (FWHM 49 +Notes. The spectral parameters are obtained from Gaussian fitting. (a ) + The JVLA spectrum toward Cep A is extracted from the E ff elsberg-beamsized + region (FWHM 49 ). (b ) For G34.26+ 0.15, the JVLA beam samples the NH - 3 (9,6) spectrum over a region of radius 3 + 3 (9,6) spectrum over a region of radius 3 . 5, which contains all detected NH 3 (9,6) emissions. -Table A.2. 1.36 cm JVLA flux densities of individual continuum sources. +Table A.2. 1.36 cm JVLA flux densities of individual continuum sources. Source R.A. Dec. Size P.A. S ν -( h m s) ( ◦ +( h m s) ( ◦ ) (arcsec) (deg) (mJy) Cep A HW2 22 56 17.972 ± 0.003 +62 01 49.587 ± 0.015 (0.45 ± 0.19) × (0.22 ± 0.10) 50.0 20.2 ± 1.4 HW3a 22 56 17.420 ± 0.022 +62 01 44.576 ± 0.076 (2.35 ± 0.45) × (0.55 ± 0.14) 66.6 4.75 ± 0.74 @@ -1047,7 +1047,7 @@ Source R.A. Dec. S MB V LSR ∆ V 1 /2 -(h m s ) ( ◦ +(h m s ) ( ◦ ) (mJy beam−1 ) (K) (km s− 1 ) @@ -1066,7 +1066,7 @@ Fig. A.1. Cepheus A. The grey shaded areas mark the 1.36 cm JVLA continuum map o . 972, and δ J2000 = 62◦ - 01 + 01 49 . 587, the peak position of the continuum map, is marked by a red cross. Slightly to the west of the cross is the white ellipse denoting the position of the NH @@ -1087,8 +1087,8 @@ J2000 = 18h . 560, and δ J2000 = 01◦ - 14 - 58 + 14 + 58 . 201, the peak position, is marked by a red cross. The red ellipses show the positions of NH 3 (9,6) emission with stars at their center (i.e., M1, M2, and M3). The blue contours show the Berkeley-Illinois-Maryland Association (BIMA) array 2.8 mm continuum, taken from @@ -1099,4 +1099,4 @@ et al. 2011), and CH 3 OH (Bartkiewicz et al. 2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range (V LSR ) of maser spots. -Article number, page 10 of 10 +Article number, page 10 of \ No newline at end of file diff --git a/read/results/playa/2201.00022.txt b/read/results/playa/2201.00022.txt index 963630f..e869ca3 100644 --- a/read/results/playa/2201.00022.txt +++ b/read/results/playa/2201.00022.txt @@ -20,12 +20,12 @@ Most stellar evolution models predict that black holes (BHs) should not exist ab indicate the existence of BHs with masses at and above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical -processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite -efficient, forming IMBHs as massive as 104 +processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite +efficient, forming IMBHs as massive as 104 M . This upper limit assumes that (1) the BHs accrete a substantial fraction of the stellar mass captured during each collision and (2) that the rate at which -new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar +new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our results suggest that BHs in the pair-instability mass gap and IMBHs may be ubiquitous in galactic centers. This formation channel has implications for observations. Collisions between stars and BHs @@ -36,7 +36,7 @@ These gravitational wave events are extreme and intermediate mass ratio inspiral respectively). 1. INTRODUCTION The recently detected gravitational wave source -GW190521 (The LIGO Scientific Collaboration et al. +GW190521 (The LIGO Scientific Collaboration et al. 2020a,b) produced an intermediate mass black hole of approximately 142 M . This event may have also had a @@ -51,21 +51,21 @@ more than . Similarly, the merger products of GW150914, GW170104, and GW170814 fall within the mass gap (e.g.,Abbott et al.2016,2017a,b). BH mergers that -form second generation BHs and, in some cases, inter- -mediate mass BHs (IMBHs), these gravitational wave -(GW) events can occur in globular clusters, young stel- -Corresponding author: Sanaea C. Rose +form second generation BHs and, in some cases, intermediate + mass BHs (IMBHs), these gravitational wave +(GW) events can occur in globular clusters, young stelCorresponding + author: Sanaea C. Rose srose@astro.ucla.edu 1 Note that the exact lower and upper limits may be sensitive to metallicity of the progenitor (e.g.,Woosley2017;Spera & Mapelli -2017a;Limongi & Chieffi2018a;Sakstein et al.2020;Belczynski -et al.2020a;Renzo et al.2020;Vink et al.2021). lar clusters, or the field (e.g.,Rodriguez et al.2018;Ro- -driguez et al.2019;Fishbach et al.2020;Mapelli et al. +2017a;Limongi & Chieffi2018a;Sakstein et al.2020;Belczynski +et al.2020a;Renzo et al.2020;Vink et al.2021). lar clusters, or the field (e.g.,Rodriguez et al.2018;Rodriguez + et al.2019;Fishbach et al.2020;Mapelli et al. 2021b,a;Di Carlo et al.2019,2021;Dall’Amico et al. 2021;Arca Sedda et al.2021). However, IMBHs are -not limited to these locations and may reside in galac- -tic nuclei as well. Several studies propose that our +not limited to these locations and may reside in galactic + nuclei as well. Several studies propose that our own galactic center may host an IMBH in the inner pc (e.g.,Hansen & Milosavljevi´c2003;Maillard et al.2004; G¨urkan & Rasio2005;Gualandris & Merritt2009;Chen @@ -73,58 +73,58 @@ G¨urkan & Rasio2005;Gualandris & Merritt2009;Chen 2020a;Zheng et al.2020;Naoz et al.2020;GRAVITY Collaboration et al.2020). Several IMBH formation channels have been suggested -in the literature. For example, IMBHs may have a cos- -mological origin, forming in the early universe either -as a result of the very first stars (e.g.,Madau & Rees +in the literature. For example, IMBHs may have a cosmological + origin, forming in the early universe either +as a result of the very first stars (e.g.,Madau & Rees 2001;Schneider et al.2002;Johnson & Bromm2007; -Valiante et al.2016) or from direct collapse of accumu- -lated gas (e.g.,Begelman et al.2006;Yue et al.2014; +Valiante et al.2016) or from direct collapse of accumulated + gas (e.g.,Begelman et al.2006;Yue et al.2014; Ferrara et al.2014;Choi et al.2015;Shlosman et al. -2016). These high redshift IMBHs would need to sur- -vive galaxy evolution and mergers to present day (e.g.,arXiv:2201.00022v2 [astro-ph.GA] 6 Jul 2022 -2 Rose et al. -Rashkov & Madau2014), with significant effects on their +2016). These high redshift IMBHs would need to survive + galaxy evolution and mergers to present day (e.g.,arXiv:2201.00022v2 [astro-ph.GA] 6 Jul 2022 + Rose et al. +Rashkov & Madau2014), with significant effects on their stellar and even dark matter surroundings (e.g.,Bertone et al.2009;Chen & Liu2013;Bringmann et al.2012;Eda et al.2013;Naoz & Silk2014;Naoz et al.2019). Another popular formation channel relies on the coalescence of many stellar-mass black holes, which may seed ob jects as massive as SMBHs (e.g.,Kroupa et al.2020). IMBHs -may form in the centers of globular clusters, where few- -body interactions lead to the merger of stellar-mass BHs +may form in the centers of globular clusters, where fewbody + interactions lead to the merger of stellar-mass BHs (e.g.,O’Leary et al.2006;G¨urkan et al.2006;Blecha -et al.2006;Freitag et al.2006;Umbreit et al.2012;Ro- -driguez et al.2018;Rodriguez et al.2019;Fragione et al. +et al.2006;Freitag et al.2006;Umbreit et al.2012;Rodriguez + et al.2018;Rodriguez et al.2019;Fragione et al. 2020b). Other formation mechanisms invoke successive collisions and mergers of massive stars (e.g.,Ebisuzaki et al.2001;Portegies Zwart & McMillan2002;Portegies Zwart et al.2004;Freitag et al.2006;Sakurai et al.2017; Kremer et al.2020;Gonz´alez et al.2021;Di Carlo et al. 2021;Das et al.2021a,b;Escala2021). -The main obstacle to sequential BH mergers in clus- -ters is that the merger recoil velocity kick often exceeds +The main obstacle to sequential BH mergers in clusters + is that the merger recoil velocity kick often exceeds the escape velocity from the cluster (e.g.,Schnittman & Buonanno2007;Centrella et al.2010;O’Leary et al. -2006;Baibhav et al.2020, Rom & Sari, in prep.). How- -ever, nuclear star clusters at the centers of galaxies do +2006;Baibhav et al.2020, Rom & Sari, in prep.). However, + nuclear star clusters at the centers of galaxies do not encounter this problem. For example,Fragione et al. (2021) explore repeated BH-BH mergers in nuclear star -clusters without a SMBH. They considered BH binary- -single interactions, binary BH GW merger, and GW +clusters without a SMBH. They considered BH binarysingle + interactions, binary BH GW merger, and GW merger recoil kicks. The post-kick merger product sinks -back towards the cluster center over a dynamical fric- -tion timescale. Using this approach, they showed that +back towards the cluster center over a dynamical friction + timescale. Using this approach, they showed that 10 3 − 10 4 M - IMBHs can form efficiently over the life- -time of a cluster. + IMBHs can form efficiently over the lifetime + of a cluster. However, as discussed in Section2.2, direct BH-star collisions are much more frequent than BH-BH collision -in galactic nuclei, making the former a promising chan- -nel for BH growth. In an N-body study of young star -clusters,Rizzuto et al.(2022) find that BH-star colli- -sions are a main contributor to the formation of BHs +in galactic nuclei, making the former a promising channel + for BH growth. In an N-body study of young star +clusters,Rizzuto et al.(2022) find that BH-star collisions + are a main contributor to the formation of BHs in the mass gap and IMBHs. In a similar vein,Stone et al.(2017) demonstrate that massive BHs can form from repeated tidal encounters between stars and BHs. @@ -133,96 +133,96 @@ collisions in a GN, with implications for the stellar and red giant populations (e.g.,Dale & Davies2006;Dale et al.2009;Balberg et al.2013;Mastrobuono-Battisti et al.2021). We propose that IMBHs can form naturally -within the central pc of a galactic center through re- -peated collisions between BHs and main sequence stars. +within the central pc of a galactic center through repeated + collisions between BHs and main sequence stars. During a collision, the BH can accrete some portion of -the star’s mass. Over many collisions, it can grow ap- -preciably in size. We demonstrate that this channel can create IMBHs with masses as large as 10 4 +the star’s mass. Over many collisions, it can grow appreciably + in size. We demonstrate that this channel can create IMBHs with masses as large as 10 4 M , an upper -limit that depends on the density profile of the surround- -ing stars and the efficiency of the accretion. -The paper is structured as follows: we describe rele- -vant physical processes and our approach in Section2. +limit that depends on the density profile of the surrounding + stars and the efficiency of the accretion. +The paper is structured as follows: we describe relevant + physical processes and our approach in Section2. In particular, we provide an overview of collisions in -Section2.2and present our statistical approach in Sec- -tion2.3. Section2.4discusses our treatment of the +Section2.2and present our statistical approach in Section2.3. + Section2.4discusses our treatment of the mass growth with each collision and presents analytic -solutions to our equations in two different regimes, ef- -ficient collisions and inefficient collisions We compare +solutions to our equations in two different regimes, efficient + collisions and inefficient collisions We compare these solutions to our statistical results. Sections2.6 -and2.8discuss implications for GW merger events be- -tween IMBHs and the SMBH. We then incorporate re- -laxation processes and discuss the subsequent results in -Section2.9. Finally, we discuss and summarize our find- -ings in Section3. +and2.8discuss implications for GW merger events between + IMBHs and the SMBH. We then incorporate relaxation + processes and discuss the subsequent results in +Section2.9. Finally, we discuss and summarize our findings + in Section3. 2. METHODOLOGY -We consider a population of stellar mass BHs embed- -ded in a cluster of 1 M +We consider a population of stellar mass BHs embedded + in a cluster of 1 M stars. When stars and BHs -collide, the BHs can accrete mass. The growth rate de- -pends on the physical processes outlined below. We use +collide, the BHs can accrete mass. The growth rate depends + on the physical processes outlined below. We use a statistical approach to estimate the stellar encounters -and final IMBH masses. +and final IMBH masses. 2.1. Physical Picture We consider a population of BHs within the inner few -parsecs of the SMBH in a galactic nucleus (GN). We as- -sume that the BH mass distribution follows that of the +parsecs of the SMBH in a galactic nucleus (GN). We assume + that the BH mass distribution follows that of the stars from which they originate, a Kroupa initial mass function dN/dm ∝ m− 2.35 . While this choice represents -a gross oversimplification, it has very little bearing on -our final results. Future work may address the particu- -lars of the BH mass distribution, but we do not expect -that it will significantly alter the outcome. The upper +a gross oversimplification, it has very little bearing on +our final results. Future work may address the particulars + of the BH mass distribution, but we do not expect +that it will significantly alter the outcome. The upper and lower limits of the BH mass distribution are 5 and 50 M - , respectively. We select the upper limit to en- -compass the range of upper bounds predicted by stellar + , respectively. We select the upper limit to encompass + the range of upper bounds predicted by stellar evolution models, which vary between 40 and 125 M - + depending on the metallicity (Heger et al.2003;Woosley -2017;Spera & Mapelli2017b;Limongi & Chieffi2018b; +2017;Spera & Mapelli2017b;Limongi & Chieffi2018b; Belczynski et al.2020b;Renzo et al.2020). We assume that the orbits of the BHs follow a thermal eccentricity distribution. We draw their semima jor axes, a • , from a uniform distribution in log distance, dN/d(log r ) being -constant. While this distribution is not necessarily rep- -resentative of actual conditions in the GN, we use it to +constant. While this distribution is not necessarily representative + of actual conditions in the GN, we use it to build a comprehensive physical picture of BH growth at all distances from the SMBH, including within 0 . 01 pc. Otherwise, the innermost region of the GN would be poorly represented in our sample. We consider other IMBH Formation in Galactic Nuclei 3 -Figure 1. We plot the relevant timescales, including col- -lision (green), relaxation (gold), and BH-BH GW capture +Figure 1. We plot the relevant timescales, including collision + (green), relaxation (gold), and BH-BH GW capture (purple), for a single BH in the GN as a function of distance from the SMBH. For the collision timescale, we assume the BH is on a circular orbit. The timescales depend on the -density, so we adopt a range of density profiles, bounded by +density, so we adopt a range of density profiles, bounded by α = 1 (dashed curve) to α = 2 (dark, solid curve). The dark blue line represents the time for a 105 M BH to merge with the SMBH through GW emission. observationally motivated distributions in Section2.9, -but reserve a more detailed examination of the distribu- -tion’s impact for future work. +but reserve a more detailed examination of the distribution’s + impact for future work. 2.2. Direct Col lisions BHs in the GN can undergo direct collisions with other ob jects. The timescale for this process, t - coll , can be es- -timated using a simple rate calculation: t− 1 + coll , can be estimated + using a simple rate calculation: t− 1 coll = nσA, -where n is the number density of ob jects, σ is the ve- -locity dispersion, and A is the cross-section. We use the +where n is the number density of ob jects, σ is the velocity + dispersion, and A is the cross-section. We use the collision timescale fromRose et al.(2020): t− 1 coll = πn (a • ) σ (a • ) -× +× f 1 (e • )r 2 @@ -233,7 +233,7 @@ c 2G (m BH + m ) σ ( a -• )2 +• )2 . (1) where G is the gravitational constant and r c is the sum @@ -245,34 +245,34 @@ et al.(2020), f 1 ( e • ) and f 2 (e - • ) account for the effect of + • ) account for the effect of the eccentricity of the BH’s orbit about the SMBH on the collision rate, while n and σ are simply evaluated at the semima jor axis of the orbit (see below). Note -that this timescale equation includes the effects of grav- -itational focusing, which enhances the cross-section of +that this timescale equation includes the effects of gravitational + focusing, which enhances the cross-section of interaction. Assuming a circular orbit for simplicity, we plot the timescale for a BH orbiting in the GN to collide with a 1 M star as a function of distance from the SMBH in Figure1. 2 As this timescale depends on the density -of surrounding stars, we adopt a density profile of the +of surrounding stars, we adopt a density profile of the form: ρ ( r • ) = ρ - 0 + 0 r • r -0 +0 −α , (2) where r • denotes the distance from the SMBH. We adopt a SMBH mass of 4 × 10 6 M - such that our fiducial GN + such that our fiducial GN matches our own galactic center (e.g.,Ghez et al.2005; Genzel et al.2003). In this case, the normalization in Eq. (2) is ρ @@ -280,35 +280,35 @@ Eq. (2) is ρ M /pc3 at r -0 = 0.25 pc (Gen- -zel et al.2010). Additionally, in Eq. (2), α gives the -slope of the power law. We assume that a uniform pop- -ulation of solar mass stars account for most of the mass +0 = 0.25 pc (Genzel + et al.2010). Additionally, in Eq. (2), α gives the +slope of the power law. We assume that a uniform population + of solar mass stars account for most of the mass in the GN, making the stellar number density: n ( r • ) = ρ ( r • ) 1 M . (3) -The collision timescale also depends on the velocity dis- -persion, which we express as: +The collision timescale also depends on the velocity dispersion, + which we express as: σ (r -• ) = +• ) = GM • r • (1 + α ) , (4) -where α is the slope of the density profile and M - • de- -notes the mass of the SMBH (Alexander1999;Alexan- -der & Pfuhl2014). As mentioned above, Eq. (1) depends +where α is the slope of the density profile and M + • denotes + the mass of the SMBH (Alexander1999;Alexander + & Pfuhl2014). As mentioned above, Eq. (1) depends on the sum of the radii of the colliding ob jects, r c . We take r c = 1 R because these interactions involve a BH -and a star, and the former has a much smaller physi- -cal cross-section. For example, the Schwarzschild radius +and a star, and the former has a much smaller physical + cross-section. For example, the Schwarzschild radius of a 10 M BH is only 30 km, or 4 . 31 × 10 −5 R @@ -316,22 +316,22 @@ of a 10 M this reason, direct collisions between compact ob jects are very rare and not included in our model. We note that direct collisions between BHs, via GW -emission, were shown to be efficient in nuclear star clus- -ters without SMBHs (e.g.,Portegies Zwart & McMil- -lan2000;O’Leary et al.2006;Rodriguez et al.2016). +emission, were shown to be efficient in nuclear star clusters + without SMBHs (e.g.,Portegies Zwart & McMillan2000;O’Leary + et al.2006;Rodriguez et al.2016). However, in the GN, star-BH collisions are much more frequent than direct BH-BH collisions. As depicted in Figure1, the star-BH collision timescale for a range -of density profiles is many orders of magnitude shorter -than the BH-BH GW collision timescale (for the rele- -vant equations, seeO’Leary et al.2009;Gond´an et al. -2018, for example). Thus, we expect that star-BH col- -lisions will be the main driver of IMBH growth in the +of density profiles is many orders of magnitude shorter +than the BH-BH GW collision timescale (for the relevant + equations, seeO’Leary et al.2009;Gond´an et al. +2018, for example). Thus, we expect that star-BH collisions + will be the main driver of IMBH growth in the GN. 2 - We note that the eccentricity has a very minor effect on the + We note that the eccentricity has a very minor effect on the collision timescale (Rose et al.2020). -4 Rose et al. + Rose et al. 2.3. Statistical Approach to Col lisions We simulate the mass growth of a population of BHs with initial conditions detailed in Section2.1. Over an @@ -340,8 +340,8 @@ increment ∆t of 10 6 a collision occurring, given by ∆ t/t coll . This choice of ∆ t is motivated by our galactic center’s star formation -timescale (e.g.,Lu et al.2009), allowing for regular re- -plenishment of the stellar population in the GN. We have +timescale (e.g.,Lu et al.2009), allowing for regular replenishment + of the stellar population in the GN. We have checked that the results are not sensitive to this choice of ∆ t , omitted here to avoid clutter. We draw a number between 0 and 1 using a random number generator. If @@ -353,13 +353,13 @@ the updated BH mass and repeat this process until the time elapsed equals the simulation time of 10 Gyr3 . 2.4. Mass Growth -When a BH collides with a star, it may accrete ma- -terial and grow in mass. The details of the accretion +When a BH collides with a star, it may accrete material + and grow in mass. The details of the accretion depend on the relative velocity between the BH and star. For simplicity, this calculation assumes that the two ob jects experience a head on collision, with the BH -passing through the star’s center. We begin by con- -sidering the escape velocity from the BH at the star’s +passing through the star’s center. We begin by considering + the escape velocity from the BH at the star’s outermost point, its surface, which corresponds to the maximum impact parameter 1 R . Qualitatively, one @@ -376,10 +376,10 @@ approximately 1 R . For the purposes of this study, we assume that the BH accretes all of the material that it captures. The details of the accretion are uncertain, -however, and it may be much less efficient than our re- -sults imply. We discuss accretion in Section2.5. -To estimate ∆m, we begin with the Bondi-Hoyle ac- -cretion rate, ˙m, given by: +however, and it may be much less efficient than our results + imply. We discuss accretion in Section2.5. +To estimate ∆m, we begin with the Bondi-Hoyle accretion + rate, ˙m, given by: ˙m = 4 πG2 m 2 BH ρ @@ -389,8 +389,8 @@ s + σ 2 )3/ 2 , (5) 3 Closer to the SMBH, ∆t may exceed the collision timescale by -a factor of a few for steep density profiles. We include a safe- -guard in our code which takes the ratio t +a factor of a few for steep density profiles. We include a safeguard + in our code which takes the ratio t coll /∆ t and rounds it to the nearest integer. We take this integer to be the number of collisions and increase the BH mass accordingly. Figure 2. We consider an example that highlights the mass @@ -399,7 +399,7 @@ represent the initial masses and distances from the SMBH of the BHs involved in the simulation. For simplicity, we set the inital mass equal to 10 M for all of the BHs. Assuming -the density profile of stars has α = 1, we consider two cases: +the density profile of stars has α = 1, we consider two cases: BHs accrete all of the star’s mass during a collision (red) and only a portion of the star’s mass is accreted during a collision given by Eq.6(blue). The latter case results in less growth @@ -420,7 +420,7 @@ the conservative value of c consistent with the sound speed inside a 1 M star (Christensen-Dalsgaard et al.1996) and allows us to set -a lower limit on ∆ m. To find ∆m, at each collision, we +a lower limit on ∆ m. To find ∆m, at each collision, we have: ∆ m = min( ˙m × t , cross , 1 M @@ -433,50 +433,50 @@ the star. We take the minimum between ˙m × t 1 M because the BH cannot accrete more mass than one star at each collision. -Figure2juxtaposes the expected growth using Bondi- -Hoyle-Lyttleton accretion (blue small points) with a +Figure2juxtaposes the expected growth using BondiHoyle-Lyttleton + accretion (blue small points) with a much simpler model in which the BH accretes the star’s entire mass, 1 M (red large points). Both examples start with identical populations of 10 M BHs (grey) -and simulate growth through collisions using a statisti- -cal approach. As the BHs grow, the collision timescale, +and simulate growth through collisions using a statistical + approach. As the BHs grow, the collision timescale, which depends on m BH , decreases. Simultaneously, ∆ m , which also depends on m -BH , increases. The re- -sult is exponential growth (see discussion and details -surrounding Eq. (8)). In Figure2, however, the simula- -tions assume α = 1 for the stellar density profile, ensur- -ing the collision timescale is long compared to the sim- +BH , increases. The result + is exponential growth (see discussion and details +surrounding Eq. (8)). In Figure2, however, the simulations + assume α = 1 for the stellar density profile, ensuring + the collision timescale is long compared to the sim- IMBH Formation in Galactic Nuclei 5 ulation time, 10 Gyr. Therefore, the BHs grow slowly, -and their final masses can be approximated using the +and their final masses can be approximated using the following equation: m -final (t +final (t coll → const .) = m initial + ∆ m T t coll , (7) in which T represents the simulation time and ∆ m and t - coll remain constant, approximated as their initial val- -ues. + coll remain constant, approximated as their initial values. + This equation is plotted in Figure2for both cases, ∆ m = 1 M (red) and ∆m from Bondi-Hoyle-Lyttleton -accretion (blue), and the curves coincide with the cor- -responding simulated results. The shaded regions rep- -resent one standard deviation from Eq. (7), calculated +accretion (blue), and the curves coincide with the corresponding + simulated results. The shaded regions represent + one standard deviation from Eq. (7), calculated using the square root of the number of collisions, T /t coll . As indicated by the results in red, in the absence of Bondi-Hoyle-Lyttleton accretion, the BHs closest to the SMBH experience the most growth because they have -shorter collision timescales. However, Bondi-Hoyle- -Lyttleton accretion becomes important closer to the +shorter collision timescales. However, Bondi-HoyleLyttleton + accretion becomes important closer to the SMBH, where the velocity dispersion is large compared with the stars’ escape velocity, and curtails the mass growth for BHs in this region. Outside of 10− 2 @@ -486,21 +486,21 @@ consumes the star’s entire mass: the accretion-limited star’s mass. Eq.7does not apply for other values of α . When the collision timescale is shorter, corresponding to a larger -index α in the density profile (see Figure1), the growth -is very efficient and ∆m quickly approaches 1 M - . Con- -sequently, while we can now assume ∆ m = 1 M +index α in the density profile (see Figure1), the growth +is very efficient and ∆m quickly approaches 1 M + . Consequently, + while we can now assume ∆ m = 1 M , we can no longer assume the collision timescale is constant. -The final mass grows exponentially as a result. For +The final mass grows exponentially as a result. For ∆ m = 1M , the general solution is reached by solving -the differential equation dm/dt = 1 M +the differential equation dm/dt = 1 M /t coll (m ), which gives: m -final (∆ m → 1 M +final (∆ m → 1 M ) = − A + ( m initial + A ) e CT (8) @@ -512,29 +512,29 @@ star R example, we plot this curve in purple for the α = 2 case, in Figure3, which agrees with the simulated masses. 2.5. Uncertainties in Accretion -We note that the ∆ M calculated in this proof-of- -concept study assumes that the BH accretes all of the +We note that the ∆ M calculated in this proof-ofconcept + study assumes that the BH accretes all of the material that it captures. Estimating the true fraction -of the material accreted by the BH is very challeng- -ing; this complex problem requires numerically solving -the generalized GR fluid equations with cooling, heat- -ing, and radiative transfer, etc. and remains an active -field of research (e.g.,Blandford & Begelman1999;Park +of the material accreted by the BH is very challenging; + this complex problem requires numerically solving +the generalized GR fluid equations with cooling, heating, + and radiative transfer, etc. and remains an active +field of research (e.g.,Blandford & Begelman1999;Park & Ostriker2001;Narayan et al.2003;Igumenshchev et al.2003;Ohsuga et al.2005;Yuan et al.2012;Jiang et al.2014;McKinney et al.2014;Narayan et al.2022). -Heuristically, if a collision between a BH and a star re- -sults in an accretion disk, the disk’s viscous timescale +Heuristically, if a collision between a BH and a star results + in an accretion disk, the disk’s viscous timescale may be as low as days. The resultant luminosity can unbind most of the captured material, though details -such as the amount accreted and peak luminosity re- -main uncertain (e.g.,Yuan et al.(2012);Jiang et al. +such as the amount accreted and peak luminosity remain + uncertain (e.g.,Yuan et al.(2012);Jiang et al. (2014), see also the discussion inStone et al.(2017), Rizzuto et al.(2022), andKremer et al.(2022)). The -question becomes whether or not a BH can still accu- -mulate significant amounts of mass over many collisions -even if it accretes very little in a single one. We ex- -plore the viability of our channel using a physically mo- -tivated inefficient accretion model. Several studies have +question becomes whether or not a BH can still accumulate + significant amounts of mass over many collisions +even if it accretes very little in a single one. We explore + the viability of our channel using a physically motivated + inefficient accretion model. Several studies have invoked momentum-driven winds in BH accretion (e.g., Murray et al.2005;Ostriker et al.2010;Brennan et al. 2018). We thus estimate the fraction of captured mass @@ -543,53 +543,53 @@ accreted to be approximately v esc is the escape velocity from the BH at 1 R and η is the -accretion efficiency at the ISCO. We take η to be 0 .1 +accretion efficiency at the ISCO. We take η to be 0 .1 (e.g.,Yu & Tremaine2002). This expression for the fraction accreted is consistent withKremer et al.(2022) equation 19 for s = 0.5, which is a reasonable value for s, a free parameter between 0 . 2 and 0 . 8. We discuss the results of the momentum-driven winds estimate in Section3. We note that the accretion process may be -more efficient than this estimate implies if, for example, -jets or other instabilities result in the beaming of radi- -ation away from the captured material (e.g.,Blandford +more efficient than this estimate implies if, for example, +jets or other instabilities result in the beaming of radiation + away from the captured material (e.g.,Blandford & Zna jek1977;Begelman1979;De Villiers et al.2005; -McKinney & Gammie2004;McKinney2006;Igumen- -shchev2008;Begelman2012a,b;McKinney et al.2014). +McKinney & Gammie2004;McKinney2006;Igumenshchev2008;Begelman2012a,b;McKinney + et al.2014). 2.6. GW Inspiral When a BH is close to the SMBH, GW emission can -circularize and shrink its orbit. We implement the ef- -fects of GW emission on the BH’s semima jor axis and +circularize and shrink its orbit. We implement the effects + of GW emission on the BH’s semima jor axis and eccentricity followingPeters & Mathews(1963a). The characteristic timescale to merge a BH with an SMBH is given by: t GW ≈ 2. 9 × 10 12 - yr + yr M • 10 6 M - - −1 + + −1 m BH 10 6 M - + −1 -× +× M • + m BH 2 × 10 6 M - -− 1 + +− 1 a • 10− 2 - pc + pc 4 × f (e • )(1 − e 2 @@ -605,8 +605,8 @@ plot this timescale for a 1 × 105 M BH in Figure1in blue. -6 Rose et al. -Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow ( α = 1) to + Rose et al. +Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow ( α = 1) to cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq.8, taking m initial to be the average mass of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and @@ -615,27 +615,27 @@ In our simulations, we assume a BH has merged with the SMBH when the condition t GW < t elapsed is met. -When this condition is satisfied, we terminate mass +When this condition is satisfied, we terminate mass growth through collisions for that BH.4 2.7. IMBH growth As detailed above, BH-stellar collisions can increase the BH masses as a function of time. Here, we examine the sensitivity of the BH growth to the density power -law. From Eq. (1), it is clear that the growth rate de- -pends on the stellar density profile, governed by the in- -dex α . We expect that higher values of α , or steeper -profiles, will result in more efficient mass growth. In +law. From Eq. (1), it is clear that the growth rate depends + on the stellar density profile, governed by the index + α . We expect that higher values of α , or steeper +profiles, will result in more efficient mass growth. In Figure1, larger values of α lead to collision timescales in the GN’s inner region, inwards of 0 .25 pc, that are much smaller that the 10 Gyr simulation time. Figure3 -confirms this expectation. It depicts the mass growth of -a uniform distribution of BHs with initial conditions de- -tailed in Section2.1for five α values, spanning 1 (green) +confirms this expectation. It depicts the mass growth of +a uniform distribution of BHs with initial conditions detailed + in Section2.1for five α values, spanning 1 (green) to 2 (purple). The most massive IMBHs form inwards of 0 .25 pc for the α = 2 case. 2.8. Gravitational Wave Mergers and Intermediate and Extreme Mass Ratio Inspiral Candidates -Towards the SMBH, efficient collisions can create BHs +Towards the SMBH, efficient collisions can create BHs massive enough to merge with the SMBH through GWs. Following the method detailed in Section2.6, when a given BH meets the criterion t @@ -645,20 +645,20 @@ elapsed , we mark For comparison, we also incrementally changed the semimajor axis and eccentricity from GW emission following the equations inPeters & Mathews(1963b). This method leads to a slight -increase in the final IMBH masses because it accounts for the +increase in the final IMBH masses because it accounts for the collisions that take place while the orbit is gradually shrinking. it as merged with the SMBH. We assume that at this point the dynamics of the BH will be determined by GW -emission, shrinking and circularizing the BHs orbit un- -til it undergoes an extreme or intermediate mass ratio +emission, shrinking and circularizing the BHs orbit until + it undergoes an extreme or intermediate mass ratio inspiral (EMRI and IMRI, respectively). The righthand plot in Figure3shows the BH masses versus time of -merger. It is interesting to note that even in the ab- -sence of relaxation processes, which are often invoked +merger. It is interesting to note that even in the absence + of relaxation processes, which are often invoked to explain the formation of EMRIs, EMRIs and notably IMRIs can form in this region. 2.9. Two Body Relaxation Processes -A BH orbiting the SMBH experiences weak gravita- -tional interactions with other ob jects in the GN. Over a +A BH orbiting the SMBH experiences weak gravitational + interactions with other ob jects in the GN. Over a relaxation time, these interactions alter its orbit about the SMBH. The two-body relaxation timescale for a single-mass system is: @@ -677,26 +677,26 @@ to be 1 M Eq. (7.106)). This equation represents the approximate timescale for a BH on a semi-circular orbit to change its orbital energy and angular momentum by order of -themselves. The BH experiences diffusion in its angular +themselves. The BH experiences diffusion in its angular momentum and energy as a function of time (depending on the eccentricity of the orbit, this process can be more -efficientFragione & Sari2018;Sari & Fragione2019). +efficientFragione & Sari2018;Sari & Fragione2019). Relaxation can cause the orbit of an ob ject in a GN to reach high eccentricities. If the ob ject is a BH, it can spiral into the SMBH and form an EMRI, while a star IMBH Formation in Galactic Nuclei 7 can be tidally disrupted by the SMBH (e.g.Magorrian & Tremaine1999;Wang & Merritt2004;Hopman & -Alexander2005;Aharon & Perets2016;Stone & Met- -zger2016;Amaro-Seoane2018;Sari & Fragione2019; +Alexander2005;Aharon & Perets2016;Stone & Metzger2016;Amaro-Seoane2018;Sari + & Fragione2019; Naoz et al.2022). The relaxation process is therefore crucial to our study. In Figure1, we plot the relaxation -timescale in gold for a range of α . We note that theBah- -call & Wolf(1976) profile, α = 7/4, corresponds to zero -net flux and therefore does not preferentially migrate +timescale in gold for a range of α . We note that theBahcall + & Wolf(1976) profile, α = 7/4, corresponds to zero +net flux and therefore does not preferentially migrate ob jects inward. -Additionally, because BHs are more massive on av- -erage than the surrounding ob jects, they are expected +Additionally, because BHs are more massive on average + than the surrounding ob jects, they are expected to segregate inwards in the GN (e.g.,Shapiro & Marchant1978;Cohn & Kulsrud1978;Morris1993; Miralda-Escud´e & Gould2000;Baumgardt et al.2004). @@ -710,8 +710,8 @@ Fregeau et al.2002;Merritt2006), which is typically an order of magnitude smaller than the relaxation timescale plotted in Figure1. We incorporate relaxation processes by introducing a -small change in the BH’s energy and angular momen- -tum each time it orbits the SMBH. We apply a small +small change in the BH’s energy and angular momentum + each time it orbits the SMBH. We apply a small instantaneous velocity kick to the BH, denoted as ∆ v . We draw ∆v from a Guassian distribution with average of zero and a standard deviation of ∆ v @@ -719,7 +719,7 @@ of zero and a standard deviation of ∆ v 3, where ∆ v rlx = v - • + • P • /t rlx (seeBradnick et al.2017, for an @@ -727,165 +727,165 @@ approach to changes in the angular momentum). The new orbital parameters can be calculated followingLu & Naoz(2019), and seeNaoz et al.(2022) for the full set of equations. -We account for the effects of relaxation processes, -including mass-segregation, using a multi-faceted ap- -proach. We begin by migrating each BH towards the -center over its mass-segregation timescale, shifting it in- -crementally inward such that its orbital energy changes +We account for the effects of relaxation processes, +including mass-segregation, using a multi-faceted approach. + We begin by migrating each BH towards the +center over its mass-segregation timescale, shifting it incrementally + inward such that its orbital energy changes by order of itself within the segregation timescale. As the BHs segregate down the potential well, their abundance with respect to stars increases, until at some turnover radius, BHs become the dominant source of -scattering for both black holes and stars. Within this ra- -dius, BH self-interaction dominates over two-body scat- -terings with the now rarer main-sequence stars. The -BHs will then settle onto a Bahcall-Wolf profile, while -the stars may follow a shallower profile, with approx- -imately n +scattering for both black holes and stars. Within this radius, + BH self-interaction dominates over two-body scatterings + with the now rarer main-sequence stars. The +BHs will then settle onto a Bahcall-Wolf profile, while +the stars may follow a shallower profile, with approximately + n ∝ r − 1.5 , inwards of the transition radius (Linial & Sari in prep.). Therefore, after the initial mass segregation, we allow -the BHs to begin diffusing over a relaxation timescale, -their orbital parameters changing slowly through a ran- -dom process. In this random process, some of the BHs +the BHs to begin diffusing over a relaxation timescale, +their orbital parameters changing slowly through a random + process. In this random process, some of the BHs may migrate closer to the SMBH. We terminate mass growth when the BH enters the inner 200 au of the GN, -within which the density of stars is uncertain. This cut- -off is based on the 120 au pericenter of S0-2, the closest +within which the density of stars is uncertain. This cutoff + is based on the 120 au pericenter of S0-2, the closest known star to the SMBH (e.g.,Ghez et al.2005). -Another physical process that causes inward migra- -tion is dynamical friction. A cursory derivation based +Another physical process that causes inward migration + is dynamical friction. A cursory derivation based on the dynamical friction equations described inBinney -& Tremaine(2008) reveals the process to have a simi- -lar timescale to mass segregation. If a BH diffuses to +& Tremaine(2008) reveals the process to have a similar + timescale to mass segregation. If a BH diffuses to a distance greater than 2 pc from the SMBH, exiting -the sphere of influence, we have it sink inwards, back +the sphere of influence, we have it sink inwards, back towards the center, over a dynamical friction timescale. After one dynamical friction timescale has passed, we -restart diffusion. +restart diffusion. We note that our prescription ignores self-interactions between the BHs. As mentioned above, as the BHs sink -towards the SMBH, their concentration in the inner re- -gion of the GN increases, allowing them to dominate the +towards the SMBH, their concentration in the inner region + of the GN increases, allowing them to dominate the scattering. We reserve the inclusion of these interactions for future study. -2.10. Effect of Relaxation Processes +2.10. Effect of Relaxation Processes As depicted in Figure4, two-body relaxation processes -result in more EMRIs and IMRIs events. These pro- -cesses allow BHs that begin further from the SMBH -to migrate inwards and grow more efficiently in mass. +result in more EMRIs and IMRIs events. These processes + allow BHs that begin further from the SMBH +to migrate inwards and grow more efficiently in mass. However, it also impedes the growth of BHs that are -initially closer to the SMBH by allowing them to dif- -fuse out of the inner region where collisions are efficient. +initially closer to the SMBH by allowing them to diffuse + out of the inner region where collisions are efficient. As can be seen in Figure4, the net result is that more BHs grow, but the maximum mass is lower compared to the scenario that ignores two-body relaxation. The -histogram in Figure4presents the final BH mass distri- -butions for different power law indices α . As expected, +histogram in Figure4presents the final BH mass distributions + for different power law indices α . As expected, the two-body relaxation suppresses the α dependence highlighted in Figure3. In fact, using a KS test, we -find that we cannot reject the hypothesis that the two +find that we cannot reject the hypothesis that the two distributions were drawn from the same sample for the α = 1.75 and α = 2 results. Interestingly, a BH mass IMF with an average of 10 M - leads to a final distri- -bution with an average of ∼ 200 M + leads to a final distribution + with an average of ∼ 200 M and a median of ∼ 45 M , which lies within the mass gap. 3. DISCUSSION AND PREDICTIONS We explore the feasibility of forming IMBHs in a GN through successive collisions between a stellar-mass -BH and main-sequence stars. Taking both a statisti- -cal and analytic approach, we show that this channel -can produce IMBHs efficiently with masses as high as +BH and main-sequence stars. Taking both a statistical + and analytic approach, we show that this channel +can produce IMBHs efficiently with masses as high as 10 3− 4 M - and may result in many IMBH-SMBH merg- -ers (intermediate-mass ratio inspirals, or IMRIs) and + and may result in many IMBH-SMBH mergers + (intermediate-mass ratio inspirals, or IMRIs) and EMRIs. -8 Rose et al. -Figure 4. Similar to Figure3, we plot the initial masses versus initial distance (grey) and final mass versus final distance -(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. -We assume α = 1 . 75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward + Rose et al. +Figure 4. Similar to Figure3, we plot the initial masses versus initial distance (grey) and final mass versus final distance +(red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. +We assume α = 1 . 75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure3. Additionally, more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses -for two different values of α , 1 . 5 (orange, solid), α , 1 . 75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation +for two different values of α , 1 . 5 (orange, solid), α , 1 . 75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation processes. We also show the results for a simulation with α = 1. 75 that accounts for momentum-driven winds (black, dotted). Despite the substantially reduced accretion, BHs in the mass gap still form. As the stellar mass BH collides with a star, the BH -will grow in mass. The increase may equal star’s en- -tire mass if the relative velocity is smaller than the es- -cape velocity from the BH at 1 R +will grow in mass. The increase may equal star’s entire + mass if the relative velocity is smaller than the escape + velocity from the BH at 1 R . However, near the SMBH, the velocity dispersion may be larger than the escape velocity from the BH at the star’s radius. In this limit, the BH captures a “tunnel” of material through -the star, estimated using Bondi-Hoyle-Lyttleton accre- -tion. In our statistical analysis, we account for Bondi- -Hoyle-Lyttleton accretion and find that BHs outside of +the star, estimated using Bondi-Hoyle-Lyttleton accretion. + In our statistical analysis, we account for BondiHoyle-Lyttleton + accretion and find that BHs outside of 10 −2 pc from the SMBH can capture the entire star (see Figure2). -The efficiency of collisions, and therefore IMBH, +The efficiency of collisions, and therefore IMBH, EMRI, and IMRI formation as well, are sensitive to the underlying stellar density. As shown in Figure3, a -steeper density profile results in larger IMBHs. This be- -havior can be understood from the collision timescale’s -dependence on the stellar density profile. A steeper pro- -file yields shorter collision timescales near the SMBH. +steeper density profile results in larger IMBHs. This behavior + can be understood from the collision timescale’s +dependence on the stellar density profile. A steeper profile + yields shorter collision timescales near the SMBH. However, the inclusion of relaxation processes in the -simulations dampens the influence of the stellar density -profile by allowing BHs to diffuse into regions of more -or less efficient growth. As a result, more BHs grow in +simulations dampens the influence of the stellar density +profile by allowing BHs to diffuse into regions of more +or less efficient growth. As a result, more BHs grow in mass, but their maximum mass is smaller ( ∼ 104 M ). -Additionally, the final masses have no apparent depen- -dence on distance from the SMBH (see Figure4). +Additionally, the final masses have no apparent dependence + on distance from the SMBH (see Figure4). Most simulations in our study assume that the BHs -accrete all of the mass that they capture. The final BH +accrete all of the mass that they capture. The final BH masses can be taken as an upper limit. We note that -the accretion is a highly uncertain process and repre- -sents an active field of study (e.g.,Blandford & Begel- -man1999;Park & Ostriker2001;Narayan et al.2003; +the accretion is a highly uncertain process and represents + an active field of study (e.g.,Blandford & Begelman1999;Park + & Ostriker2001;Narayan et al.2003; Igumenshchev et al.2003;Ohsuga et al.2005;Yuan et al.2012;Jiang et al.2014;McKinney et al.2014; Narayan et al.2022). To assess the limits of our model, we also consider a physically motivated accretion model, momentum-driven winds (Section2.5). We present the -final mass distribution for momentum-driven winds in -Figure4. Importantly, we find that BHs within the +final mass distribution for momentum-driven winds in +Figure4. Importantly, we find that BHs within the mass gap still form naturally despite the substantially reduced accretion. About 5% of the BHs grow by 10 to 100 M - . Furthermore, if we increase this ∆M esti- -mate by a factor of 2 (i.e., use η = 0. 05), the simula- -tion produces a 3. 5 × 10 3 + . Furthermore, if we increase this ∆M estimate + by a factor of 2 (i.e., use η = 0. 05), the simulation + produces a 3. 5 × 10 3 M IMBH for the same initial -conditions. Our proof-of-concept demonstrates that col- -lisions between BH and stars are an important process +conditions. Our proof-of-concept demonstrates that collisions + between BH and stars are an important process that should be taken into account in dense places such as a GN. -Mass growth through BH-main-sequence star colli- -sions may act in concert with other IMBH formation +Mass growth through BH-main-sequence star collisions + may act in concert with other IMBH formation channels, such as compact ob ject binary mergers (e.g., Hoang et al.2018;Stephan et al.2019;Fragione et al. -2021;Wang et al.2021). While in some cases colli- -sions can unbind a binary (e.g.,Sigurdsson & Phinney +2021;Wang et al.2021). While in some cases collisions + can unbind a binary (e.g.,Sigurdsson & Phinney 1993;Fregeau et al.2004), BH binaries can be tightly -bound enough to withstand the collisions. Wide bina- -ries may also become unbound due to interactions with +bound enough to withstand the collisions. Wide binaries + may also become unbound due to interactions with the neighboring stars and compact ob jects (e.g.,Binney & Tremaine1987;Rose et al.2020, see latter study for the timescale for an arbitrary eccentricity). However, -as highlighted in previous studies, a substantial frac- -tion of these binaries may merge due to the Eccentric +as highlighted in previous studies, a substantial fraction + of these binaries may merge due to the Eccentric Kozai Lidov mechanism, leaving behind a single star or a single compact ob ject (e.g.,Stephan et al.2016,2019; Hoang et al.2018). Additionally, to be susceptible to -evaporation, BH binaries must have a wider configura- -tion. Otherwise, they will be more tightly bound than +evaporation, BH binaries must have a wider configuration. + Otherwise, they will be more tightly bound than the average kinetic energy of the surrounding ob jects and will only harden through weak gravitational inter- IMBH Formation in Galactic Nuclei 9 @@ -894,7 +894,7 @@ actions with neighboring stars (see for example Figure We note that we assume a steady-state and treat the stars as a reservoir in this model. Future work will take a more nuanced approach to the background stars, whose -density as a function of time can be influenced by several +density as a function of time can be influenced by several factors. Firstly, the relaxation of the stellar population occurs on Gyr timescales. Some studies have suggested that in situ star formation can occur in the Galactic @@ -902,64 +902,64 @@ Center as close as 0.04 pc from the SMBH (e.g.,Levin & Beloborodov2003;Paumard et al.2006), and star formation episodes can occur as often as every ∼ 5 Myr (e.g.Lu et al.2009). Therefore, we expect that after -the first Gyr, stars within 0 .01 pc will be replenished +the first Gyr, stars within 0 .01 pc will be replenished at intervals consistent with the star formation episodes; the infalling populations of stars are separated by ∼ 5 − 10 Myr, which is shorter than the collision timescale. -However, star-star collisions may complicate this pic- -ture within ∼ 0. 01 pc. As discussed above, regular star -formation ensures the BHs always have a stellar popula- -tion to interact with outside of ∼ 0. 01 pc.5 +However, star-star collisions may complicate this picture + within ∼ 0. 01 pc. As discussed above, regular star +formation ensures the BHs always have a stellar population + to interact with outside of ∼ 0. 01 pc.5 At 0 . 01 pc, however, the kinetic energy during a collision between two 1 M stars is larger than their binding energies. Collisions can therefore thin out the stellar populations -during the time it takes them to diffuse to these small +during the time it takes them to diffuse to these small radii, 0 .01 pc, and may reduce the BH growth in the innermost region. We reserve the inclusion of star-star -collisions for future work. We also note that the disrup- -tion of binary stars by the SMBH may help replenish +collisions for future work. We also note that the disruption + of binary stars by the SMBH may help replenish the stellar population even as collisions work to deplete it (e.g.,Balberg et al.2013); when a binary is disrupted, one of the stars is captured on a tightly bound orbit about the SMBH. -An IMBH may also affect the stellar density profile. +An IMBH may also affect the stellar density profile. As it spirals into the SMBH, it can perturb stellar orbits, and these interactions can lead to hypervelocity stars (e.g.,Baumgardt et al.2006a;L¨ockmann & Baumgardt 2008).L¨ockmann & Baumgardt(2008) show that an -IMBH can modify an initially steep stellar density pro- -file to become consistent with the flatter cusp observed -in the Galactic Center. The stars may then be replen- -ished on 100 Myr timescales (Baumgardt et al.2006a). -Therefore, after the formation of the first few IMBHs, +IMBH can modify an initially steep stellar density profile + to become consistent with the flatter cusp observed +in the Galactic Center. The stars may then be replenished + on 100 Myr timescales (Baumgardt et al.2006a). +Therefore, after the formation of the first few IMBHs, subsequent BH growth may occur in bursts, coinciding with replenishment of the stars. While there are many competing dynamical processes -that shape the stellar density profile, we stress that α +that shape the stellar density profile, we stress that α 5 In fact, the star-star collision timescale is greater than 10 Myr for the entire parameter space, save at 0. 001 pc for larger values of α ; the BH-star collision timescale plotted in Fig. 1 is the same order of magnitude as the star-star collision timescale. can simply be chosen to encapsulate all of the relevant -physics. A value for α that is constrained by observa- -tions must already reflect ongoing processes like star- -star collisions and replenishment.Sch¨odel et al.(2018) -find the observed stellar mass enclosed within 0.01 pc of +physics. A value for α that is constrained by observations + must already reflect ongoing processes like starstar + collisions and replenishment.Sch¨odel et al.(2018) +find the observed stellar mass enclosed within 0.01 pc of the Milky Way’s Galactic Center to be approximately 180 M - . This estimate is consistent to order of magni- -tude with our α = 1.25 case. In a simulation like those + . This estimate is consistent to order of magnitude + with our α = 1.25 case. In a simulation like those depicted in Figure 4, which include relaxation, α = 1. 25 leads to a maximum IMBH mass of 140 M - . Further- -more, while the stellar mass within 0.01 pc may be a + . Furthermore, + while the stellar mass within 0.01 pc may be a few hundred M - ,Do et al.(2019) andGRAVITY Col- -laboration et al.(2020) set an upper limit on the mass -enclosed within the orbit of S0-2 to be about a few thou- -sand M + ,Do et al.(2019) andGRAVITY Collaboration + et al.(2020) set an upper limit on the mass +enclosed within the orbit of S0-2 to be about a few thousand + M , or 0. 1% of the central mass. This upper limit can include mass that was previously in stars but is now in BHs. In that case, the 180 M @@ -975,8 +975,8 @@ Fragione et al.2021) and even neutron star BH mergers increase in mass through stellar collisions. As a result, the BH-BH collision timescale, discussed in Section2.2, will become relevant to our simulations, allowing the -BHs to grow through this channel in addition to stel- -lar collisions. Additionally, this compact ob ject mergers +BHs to grow through this channel in addition to stellar + collisions. Additionally, this compact ob ject mergers result in GW recoil, which may have a large impact on the dynamics (e.g.,Baibhav et al.2020;Fragione et al. 2021). @@ -985,8 +985,8 @@ dissipates energy from the orbit. Along with relaxation, GW emission causes BHs to sink towards the SMBH and eventually undergo a merger. As a result, the GN environment is conducive to the formation of EMRIs -and IMRIs. The GW emission from EMRIs and IM- -RIs is expected to be at mHz frequencies, making them +and IMRIs. The GW emission from EMRIs and IMRIs + is expected to be at mHz frequencies, making them promising candidates for LISA to observe. While the exact rate calculation is beyond the scope of this study, the mechanism outlined here seems very promising. @@ -996,36 +996,36 @@ well as within our own galactic center. This implication seems to be consistent with recent observational and theoretical studies (e.g.,Hansen & Milosavljevi´c2003; Maillard et al.2004;G¨urkan & Rasio2005;Gualandris -& Merritt2009;Chen & Liu2013;Generozov & Madi- -gan2020;Fragione et al.2020a;Zheng et al.2020;Naoz +& Merritt2009;Chen & Liu2013;Generozov & Madigan2020;Fragione + et al.2020a;Zheng et al.2020;Naoz et al.2020;GRAVITY Collaboration et al.2020). -10 Rose et al. + Rose et al. Lastly, the collisions between stellar mass BHs and stars may contribute to the x-ray emission from our galactic centre (e.g.,Muno et al.2005,2009;Hailey -et al.2018;Zhu et al.2018;Cheng et al.2018, seeKre- -mer et al.(2022) for a discussion of electromagnetic sig- -natures from BH-star collisions) 6 +et al.2018;Zhu et al.2018;Cheng et al.2018, seeKremer + et al.(2022) for a discussion of electromagnetic signatures + from BH-star collisions) 6 . These interactions, in particular grazing collisions, may also result in tidal disruption events (e.g.,Baumgardt et al.2006b;Perets -et al.2016;Stone et al.2017;Samsing et al.2019;Kre- -mer et al.2021). Thus, the process outlined here may +et al.2016;Stone et al.2017;Samsing et al.2019;Kremer + et al.2021). Thus, the process outlined here may produce electromagnetic signatures in addition to GW mergers. We thank the anonymous referee for useful comments. We also thank Jessica Lu, Fred Rasio, Kyle Kremer, -Ryosuke Hirai, Ilya Mandel, and Erez Michaely for use- -ful discussion. +Ryosuke Hirai, Ilya Mandel, and Erez Michaely for useful + discussion. SR thanks the Charles E. Young Fellowship, the Nina Byers Fellowship, and the Michael A. Jura Memorial Graduate Award for support. SR and SN acknowledge the partial support from NASA ATP 80NSSC20K0505. -SN thanks Howard and Astrid Preston for their gener- -ous support. IL thanks support from the Adams Fellow- -ship. SN and RS thank the Bhaumik Institute visitor -program. This work was performed in part at the As- -pen Center for Physics, which is supported by National +SN thanks Howard and Astrid Preston for their generous + support. IL thanks support from the Adams Fellowship. + SN and RS thank the Bhaumik Institute visitor +program. This work was performed in part at the Aspen + Center for Physics, which is supported by National Science Foundation grant PHY-1607611. REFERENCES Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016, @@ -1043,8 +1043,8 @@ doi:10.1088/0004- 637X/780/2/148 Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4, doi:10.1007/s41114- 018-0013- 8 6 - The connection between the observed X-ray sources at the Galac- -tic Center and tidal capture has been suggested byGenerozov + The connection between the observed X-ray sources at the Galactic + Center and tidal capture has been suggested byGenerozov et al.(2018), but seeZhu et al.(2018);Stephan et al.(2019) for alternative channels. Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. 2021, arXiv e-prints, arXiv:2109.12119. @@ -1166,7 +1166,7 @@ doi:10.3847/1538- 4357/ab94bc Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker, J. P. 2018, MNRAS, 478, 4030, doi:10.1093/mnras/sty1262 -12 Rose et al. + Rose et al. Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of Modern Physics, 82, 3121, doi:10.1103/RevModPhys.82.3121 @@ -1174,7 +1174,7 @@ Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812, doi:10.1086/377127 Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ, 620, 744, doi:10.1086/427175 -Gond´an, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ, +Gond´an, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ, 860, 5, doi:10.3847/1538- 4357/aabfee Gonz´alez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL, 908, L29, doi:10.3847/2041- 8213/abdf5b @@ -1219,7 +1219,7 @@ Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, MNRAS, 498, 5652, doi:10.1093/mnras/staa2276 Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33, doi:10.1086/376675 -Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, +Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, doi:10.3847/1538- 4365/aacb24 —. 2018b, ApJS, 237, 13, doi:10.3847/1538- 4365/aacb24 L¨ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323, @@ -1234,10 +1234,10 @@ Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447, doi:10.1046/j.1365- 8711.1999.02853.x Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F. 2004, A&A, 423, 155, doi:10.1051/0004- 6361:20034147 -Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda, +Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda, M., & Artale, M. C. 2021a, arXiv e-prints, arXiv:2109.06222.https://arxiv.org/abs/2109.06222 -Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b, +Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b, MNRAS, 505, 339, doi:10.1093/mnras/stab1334 Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B. 2021, MNRAS, 505, 3314, doi:10.1093/mnras/stab1409 @@ -1253,9 +1253,9 @@ doi:10.1088/0034- 4885/69/9/R01 Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847, doi:10.1086/317837 Morris, M. 1993, ApJ, 408, 496, doi:10.1086/172607 -Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL, +Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL, 622, L113, doi:10.1086/429721 -Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, +Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, ApJS, 181, 110, doi:10.1088/0067-0049/181/1/110 Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ, 618, 569, doi:10.1086/426067 @@ -1350,7 +1350,7 @@ Stone, N. C., K¨upper, A. H. W., & Ostriker, J. P. 2017, MNRAS, 467, 4180, doi:10.1093/mnras/stx097 Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859, doi:10.1093/mnras/stv2281 -The LIGO Scientific Collaboration, the Virgo +The LIGO Scientific Collaboration, the Virgo Collaboration, Abbott, R., et al. 2020a, arXiv e-prints, arXiv:2009.01075.https://arxiv.org/abs/2009.01075 —. 2020b, arXiv e-prints, arXiv:2009.01190. @@ -1363,7 +1363,7 @@ Society, 457, 3356, doi:10.1093/mnras/stw225 Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit, G. N. 2021, MNRAS, 504, 146, doi:10.1093/mnras/stab842 -14 Rose et al. + Rose et al. Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., & Breivik, K. 2021, ApJ, 917, 76, doi:10.3847/1538- 4357/ac088d @@ -1380,4 +1380,4 @@ Society, 440, 1263, doi:10.1093/mnras/stu351 Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints, arXiv:2011.04653.https://arxiv.org/abs/2011.04653 Zhu, Z., Li, Z., & Morris, M. R. 2018, ApJS, 235, 26, -doi:10.3847/1538- 4365/aab14f +doi:10.3847/1538- 4365/aab14f \ No newline at end of file diff --git a/read/results/playa/2201.00029.txt b/read/results/playa/2201.00029.txt index 8d989c2..db15086 100644 --- a/read/results/playa/2201.00029.txt +++ b/read/results/playa/2201.00029.txt @@ -1,362 +1,362 @@ - 1 - - - - - - -Exploring new techniques for analyzing variability in white dwarf KIC 8626021 -Thomas Huckans , Peter Stine + + + + + + + +Exploring new techniques for analyzing variability in white dwarf KIC 8626021 +Thomas Huckans , Peter Stine Department of Physics and Engineering , Bloomsburg University of Pennsylvania , 400 E 2 nd - St ., -Bloomsburg, PA 17815 - - 2 -Abst r act - - As is common with the collection of astronomical data, signals are frequently dominated -by noise. However, when performing FTs of light curves, re - binning data can improve the signal - -to - noise ratio ( SNR ) at lower frequencies. Using data collected from the K epler space telescope, -we sequentially re - binned data three times to investigate the SNR i mprovement of lower frequency -(< 1 7 µ Hz) variability in white dwarf KIC 8626021 . We fou nd that the SNR at approximately 5.8 -µ Hz greatly improved through this process, and we postulate that this frequen c y is linked to the -rotation of KIC 8626021. - - - Introduction - -First detected in 1862, white dwarfs long posed a mystery for early observ ers. When the -companion to Sirius was detected, apparent contradictions concerning the mass, luminosities, and -densities baffled astronomers. Lacking full understanding of atom ic structures and the energy -states of electrons, these early researchers believ ed white dwarfs to o dense to exist . However, new + St ., +Bloomsburg, PA 17815 + + +Abst r act + + As is common with the collection of astronomical data, signals are frequently dominated +by noise. However, when performing FTs of light curves, re - binning data can improve the signal to + - noise ratio ( SNR ) at lower frequencies. Using data collected from the K epler space telescope, +we sequentially re - binned data three times to investigate the SNR i mprovement of lower frequency +(< 1 7 µ Hz) variability in white dwarf KIC 8626021 . We fou nd that the SNR at approximately 5.8 +µ Hz greatly improved through this process, and we postulate that this frequen c y is linked to the +rotation of KIC 8626021. + + + Introduction + +First detected in 1862, white dwarfs long posed a mystery for early observ ers. When the +companion to Sirius was detected, apparent contradictions concerning the mass, luminosities, and +densities baffled astronomers. Lacking full understanding of atom ic structures and the energy +states of electrons, these early researchers believ ed white dwarfs to o dense to exist . However, new discoveries at the turn of the 20 th - century explained the existence of these stars , and between the -world wars white dwarfs wer e increasingly studied and modeled (Holberg, 2009 ) . -As stars age, those that lack the mass to become neutron stars and black holes become -white dwarf stars, representing 98% of the stars in our galaxy (Winget & Kepler, 2008 ) . They are -composed of a core o f carbon and oxygen ions that slowly cools over billions of years, and the -light emanating from these star s is a result of thermal energy. White dwarf stars are no longer -supported against the force of gravity by fusion, so the stars collapse into an elect ron - degenerate -state where the electrons in the carbon and oxygen atoms occupy the lowest energy levels. As two -electrons cannot occupy the same quantum state, Pauli repulsion keeps white dwarfs from -collapsing entirely. -For many years, accurate detection of light variability in white dwarfs was difficult due to -a lack of adequate instruments. However , the launch of the Kepler space telescope in 2009 made -capturing the light of distant stars much more efficient and effective (Basri et al., 2010 ) . Kepler -was initially de veloped with the intention of surveying our region of the Milky Way galaxy in -order to find potentially habitable planets. The purpose of the mission was to identify key traits for -such planets by determining the number of planets in habitable zones, the s izes and shapes of orbits, -and the characteristics of the stars being orbited. Over the lifespan of its first mission, Kepler + century explained the existence of these stars , and between the +world wars white dwarfs wer e increasingly studied and modeled (Holberg, 2009 ) . +As stars age, those that lack the mass to become neutron stars and black holes become +white dwarf stars, representing 98% of the stars in our galaxy (Winget & Kepler, 2008 ) . They are +composed of a core o f carbon and oxygen ions that slowly cools over billions of years, and the +light emanating from these star s is a result of thermal energy. White dwarf stars are no longer +supported against the force of gravity by fusion, so the stars collapse into an elect ron - degenerate +state where the electrons in the carbon and oxygen atoms occupy the lowest energy levels. As two +electrons cannot occupy the same quantum state, Pauli repulsion keeps white dwarfs from +collapsing entirely. +For many years, accurate detection of light variability in white dwarfs was difficult due to +a lack of adequate instruments. However , the launch of the Kepler space telescope in 2009 made +capturing the light of distant stars much more efficient and effective (Basri et al., 2010 ) . Kepler +was initially de veloped with the intention of surveying our region of the Milky Way galaxy in +order to find potentially habitable planets. The purpose of the mission was to identify key traits for +such planets by determining the number of planets in habitable zones, the s izes and shapes of orbits, +and the characteristics of the stars being orbited. Over the lifespan of its first mission, Kepler observed approximately 1.5 x 10 5 - stars ( Johnson, 2018) , affording scientists excellent -opportunities to research stel lar variability . Due to the loss of a second reaction wheel in 2013, -NASA developed the K2 mission, a way to prolong Kepler’s assistance to astronomy and -astrophysics. -Utilizing Kepler’s ability to maintain three - dimensional control, NASA proceeded to use -the telescope to collect photometry data of certain sections of our galaxy, although the number of -targets was significantly reduced. In addition, the K2 mission was designed to be community - -oriented, with the scientific community having a n influence on th e fields observed and serving as -the analysts of the vast amounts of data being received ( Howell et al., 2014 ). Although Kepler was -deactivated in 2018, the data used in this paper came from observations during 2010 and 2012 of -white dwarf KIC 8626021 and was obtained from the Kepler Asteroseismic Science Operations -Center (KASOC). - 3 -The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon -previous studies, this research investigated novel techniques of analyzing variability in white -dw arfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on -the star, allowing for the validation of results using our methods. KIC 8626021 has an effective + stars ( Johnson, 2018) , affording scientists excellent +opportunities to research stel lar variability . Due to the loss of a second reaction wheel in 2013, +NASA developed the K2 mission, a way to prolong Kepler’s assistance to astronomy and +astrophysics. +Utilizing Kepler’s ability to maintain three - dimensional control, NASA proceeded to use +the telescope to collect photometry data of certain sections of our galaxy, although the number of +targets was significantly reduced. In addition, the K2 mission was designed to be community oriented, + with the scientific community having a n influence on th e fields observed and serving as +the analysts of the vast amounts of data being received ( Howell et al., 2014 ). Although Kepler was +deactivated in 2018, the data used in this paper came from observations during 2010 and 2012 of +white dwarf KIC 8626021 and was obtained from the Kepler Asteroseismic Science Operations +Center (KASOC). + +The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon +previous studies, this research investigated novel techniques of analyzing variability in white +dw arfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on +the star, allowing for the validation of results using our methods. KIC 8626021 has an effective temperature of 2 9,700 K, log g = 7.890, and mass of 0.56 M - ☉ (Córsico, 2020 ) . Other research -has found that this white dwarf is the DBV with the highest known temperature, and its helium -layer is the thinn est (Bischoff - K im et al., 2015). Despite the long - cadence light curve being too -noisy to draw many conclusions , other FTs of short - cadence data have been performed to find -variability in the dwarf. Analyses at high frequencies of KIC 8626021 yielded pulsations with -frequencies of 4309.89 µHz , 5073.26 µHz , 36 81.87 µHz , 3294.22 µHz and 2658.85 µ Hz -(Østensen et al., 2011 ). These fin dings confirm the classification of the white dwarf as a V777 -Herculis, although our research focuses on low frequencies using long - cadence data. - - - - Method s - -All data were downloaded from the KASOC database, and the long - cadence (data -sampled ap proximately every thirty minutes) measurements of Corrected F lux (ppm) were -analyzed. All computations were made in Wolfram Mathematica and Microsoft Exce l , and FTs -were performed in Mathematica . The re - binning process consist ed of summin g adjacent light -c urve data points in each quarter , therefore doubling the sampling interval from 0 .5 hour to one -hour, and then repeating this process on the data sample fo r a total of three times. In addition, a -significant detection was defin ed as being 3 𝝈 above the mean of the relative flux, and 0 on the -graphs below represents this 3 𝝈 cutoff. ( Koch, D. G., 2010), ( Wolfram Research, Inc., 2021). To -find the SNR , we converted to decibels . Using these SNRs , we were able to easily identify -im provement in signal strength. - - - Results - - Figure 1 presents the lightcurves constructed for quarters seven (Q 7) and thirteen (Q13) , -with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs -of the first iteration and three successive re - bins for Q7 , while Figure 3 presents the FTs of the -same for Q13 . - Tables 1 and 2 both show the hypothesized f requency corresponding to the rotation of -KIC 8626021 that is found in the FTs of the f irst iteration and subsequent re - bins for Q7 and -Q1 3 . Tables 3 and 4 show all data values < 17 µ Hz found in the first iterations and re - bins of Q7 -and Q13 . - - - - - - - - 4 - - -FIG. 1 : Pictured top is the light curve constructed for Q7 , below is the light curve for Q13 . Q 7 -lasted from September 24 – December 13 , 2010, and Q13 was from M arch 29 – June 23, 2012. -Both graphs were constructed by plotting corrected flux magnitude (flux corrected for -instrumental artifacts) versus time in Excel, and gaps in the data were filled in by interpolating -between points. Q 7 had forty - three interpolated points, and Q13 had sixty - six . - - - - - - - - - - - 5 - - -FIG. 2 : The graphs show the initial FTs of Q7, and then the FTs of the three successive re - bins of -the light curve data. The significant fr equenc ies of 5.886 µHz and 5.889 µHz are circled. The -d isappearance of the freque ncy in the last FT is most likely a b yproduct of the method, and the -spurious frequency of 5.464 µHz in the last FT most probably represents an artifact of the re - -binning proc ess. - - - - - - - - - - 6 - - - -FIG. 3 : The graphs show the initial FT of Q13 , and t he n the FT s of the three successive re - bins -of the light curve data. The significant frequencies of 5.784 µHz and 5. 787 µHz are circled. In -addition, in the third re - bin , the frequencies 11.641 µHz and 16.823 µHz rise above 3 𝝈 and are -near ly perfect integer multiples of 5 .787 µHz . These harmonics are potentially indications of a -starspot (Santos et al., 2017). - - - - - - - - - - - - - - 7 -Q7 Significant -Data Points Light -Variability -Frequenc y -(µHz) Corrected Flux -Magnitude -(ppm) Period (days ) Signal - to - Noise -(dB) -Q7 First -Iteration 5.886 - 1.198 1.966 9.9 -Q7 Re - bin 1 5.886 - 1.477 1.966 12.8 -Q7 Re - bin 2 5.889 0.59 7 1.965 19.2 -TABLE I : The table displays the various frequencies collected from Q7 and the information -found throu gh calculations to find period and SNR. The frequency of 5.464 µHz is not included, -and therefore was not used in any calculations deter mining the average period of rotation. The -values under corrected flux magnitude are relative to our significant frequency cutoff of 3 𝝈 , thus -negative numbers are under the cutoff. - - - - Q 13 Significant -Data Points Light -Variability -Frequenc y -(µHz) Corrected Flux -Magnitude -(ppm) Period (days) Signal - to - Noise -(dB) -Q13 First -Iteration 5.784 1.555 2.001 15.6 -Q13 Re - bin 1 5.784 2.873 2.001 1 7.7 -Q13 Re - bin 2 5.787 4.938 2.000 22.6 -Q13 Re - bin 3 5.787 6.909 2.0 00 26.3 -Q13 Re - bin 3 11.641 7.073 0.994 26.4 -Q13 Re - bin 3 16.823 2.299 0.688 24.1 -TABLE II : The table displays the various frequencies collected from Q13 and the information -found through calculations to find period and SNR. The last two signific ant frequencies (11.641 -µHz and 16.823 µHz ) for Q13 Re - bin 3 represent potential harmonic s, which are discussed in -further detail in the Con clusions section of this paper . The values under corrected flux magnitude -are relative to our significant frequency cutoff of 3 𝝈 , thus negative numbers are under the cutoff. - - - - - - - - - - 8 -First Iteration ( µ Hz) First Re - bin ( µ Hz) Second Re - bin ( µ Hz) Third Re - bin ( µ Hz) -0.933 0.933 0.21 5 0.216 -1.148 1.148 0.575 0.575 -1.364 1.364 0.934 0.935 -1.507 1.507 1.005 1.006 -12.5 61 12.561 1.149 1.150 -16.581 16.581 1.221 1.222 - 1.364 1.366 - 1.508 1 .509 - 1 .580 1. 582 - 1.7 24 1. 725 - 1.795 1.797 - 5.889 2.0 85 - 6.822 5.392 - 9.192 5. 464 - 9.479 7. 476 - 11.203 9. 489 - 12.568 11.215 - 14 . 291 12.581 - 16.230 13.084 - 1 6.589 13.443 - 13.659 - 14.018 - 14. 809 - 15.097 - 16.031 - 16.463 - 16.894 -TABLE III : The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm) -above the cutoff of 3 𝝈 . The minor shifting of significant frequencies between re - bins is a by - -product of the method, and we calculated for such errors when finding our average. - - - - - 9 -First Iteration ( µ Hz) First Re - bin ( µ Hz) Second Re - bin ( µ Hz) Third Re - bin ( µ Hz) -3.094 2 .018 2.019 1.951 -5.784 3.094 3.095 2.019 -9.080 5.784 5.787 2.442 -13.519 7.667 7.671 2. 759 -15.671 9.080 9.084 3.095 -16.209 11.165 11.641 3.634 -16.411 13.519 13.526 4.374 - 15.469 15.477 4.778 - 15.671 15.679 4.912 - 16.209 15.881 5.0 47 - 16.41 1 16.419 5.787 - 8. 479 - 9. 084 - 10.565 - 11.641 - 13.526 - 15.544 - 15.881 - 16.82 3 -TABLE IV : The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm) -above the cutoff of 3 𝝈 . The minor shifting of significant frequencies between re - bins is a by - -product of the method, and we calculate d for such errors when finding our average. - - - Conclusions - -As our research used the long - cadence data from Kepler, much of the high - frequency -va riability due to gravitational wave pulsations is lost. However, this presents an opportunity to -verify our results with the work of research groups that analyzed short - cadence data. With the -data analyzed, the lower fre quencies between 5 - 6 µHz emerged . Aft er finding the average of the -periods and accounting for a 1 𝝈 margin of error, our research hypothesizes that the rotation -period of KIC 8626021 is 1.99 ± 0.02 days. Other short - cadence re search has found the rotation -period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff - K im et -al ., 2015 ) . Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011 ) , and -these periods indicate that the more precise significant period identified through our re - binning -relates to the rotation of the white dwarf. -Through the re - b inning process , the SNR clearly improves for both quarters, and for Q7 it -improves by approximately 1.3 dB, except f o r the last data re - bin. In the last re - bin, the previous - 10 -significant frequency disappears , which beco mes increasingly likely after succe ssive re - binning -processes . The frequency 5.464 µHz rises as another significant frequency; however, we believe -that this new frequ ency is simply an artifact of the re - binning process. In Q13 , we saw SNR -improvement ranging from 1.1 dB to 1.3 dB . -Through the re - binning process , more lines , or significant frequencies, appeared above -the 3 𝝈 cutoff , particularly at lower frequencies. These findings suggest that as an alternative to -short - cadence analysis, the re - binning process of long - cadence dat a can be used to identify -significant lower frequencies in white dwarfs. The methods we used are also si mple and -replicable, which allows even those with less experience to quickly analyze the large amounts of -data being collected by orbiting telescopes, s uch as the currently active TESS (Transiting -Exoplanet Survey Satellite) telescope. -The presence of poss ible harmonics in the third re - bin of Q13 also indicates the possible -presence of a previously unseen starspot in KIC 8626021 caused by mag netic activity. These -spots are darker, cooler, and modulate stellar light curves, and with confirmation of its existence, -the harmonic frequencies can be used to calculate the spot’s rotation rate, size, latitude, and -contrast (Santos et al., 2017) . Using the process of re - binning, a starspot signal, previously -dominated by noise, may have been discov ered. - 11 -Acknowledgments - -W e wish to thank Bloomsburg University of Pennsylvania for its continued support of our -research. -This paper includes data collected b y the Kepler mission and obtained from the MAST -data archive at the Space Telescope Science Institute (STScI). Funding for the Kepler mission is -provided by the NASA Science Mission Directorate. STScI is operated by the Association of -Universities for Rese arch in Astronomy, Inc., under NASA contract NAS 5 – 26555. - - - References - - Basri, G., Walkowicz, L. M., Batalha, N., Gilliland, R. L., Jenkins, J., Borucki, W. J., Koch, D., -Caldwell, D., Dupree, A. K., Latham, D. W., Meibom, S., Howell, S., & Brown, T. (2010) . -PHOTOMETRIC VARIABILITY IN KEPLER TARGET stars: THE SUN AMONG -stars — a FIRST LOOK. The Astr ophysical Journal, 713(2), L155 - L159. -https://doi.org/10.1088/2041 - 8205/713/2/L155 -Bischoff - K im , A., Øs tensen, R. H., Hermes, J.j., & Provencal, J. L. (2015). Seven - Period -asteroseismic fit of KI C 8626021. EPJ Web of Conferences, 101, 06009. -https://doi.org/10.1051/epjconf/ 201510106009 -Córsico, A. H. (2020). White - Dwarf asteros eismology with the kepler space telescope. Frontiers -in Astronomy and Space Sciences, 7. https://doi.org/10.3389/fspas.2020.00047 -Holberg, J . B. (2009). The discovery of the existence of white dwarf stars: 1862 to 1930. Journal -for the History of Astrono my, 40(2), 137 - 154. -https://doi.org/10.1177%2F002182860904000201 -Howell, S. B., Sobeck, C., Haas, M., Still, M., Barclay, T., Mullally, F., Tr oeltzsch, J., Aigrain, S., -Bryson, S. T., Caldwell, D., Chaplin, W. J., Cochran, W. D., Huber, D., Marcy, G. W., -M iglio, A., Najita, J. R., Smith, M., Twicken, J. D., & Fortney, J. J. (2014). The k2 mission: -Characterization and early results. Publications of the Astronomical Society of the Pacific, -126(938), 398 - 408. https://doi.org/10.1086/676406 -Johnson, M. (Ed.). (2018, October 30). Mission overview. National Aeronautics and Space -Administration. Retrieved September 2, 2021, from -https://www.nasa.gov/mission_pages/keple r/overview/index.html -Koch, D. G., Borucki, W. J., Basri, G., Batalha, N. M., Brown, T. M., Caldwell, D., Christensen - -dalsgaard, J., Cochran, W. D., Devore, E., Dunham, E. W., Gautier, T. N., Geary, J. C., -Gilliland, R. L., Gould, A., Jenkins, J., Kondo, Y ., Latham, D. W., Lissauer, J. J., Marcy, -G., . . . Morrison , D. (2010). KEPLER MISSION design, REALIZED PHOTOMETRIC -performance, AND EARLY SCIENCE. The Astrophy sical Journal , 713 (2), L79 - L86. -https://dx.doi.org/10.1088/2041 - 8205/713/2/L79 -Østensen, R. H., Bloemen, S., Vučković, M., Aerts, C., Oreiro, R., Kinemuchi, K., Still, M., & -Koester, D. (2011) . AT last — a v777 HER PULSATOR IN THE KEPLER FIELD. The -Astrophysical Journal , 736 (2), L39. https://doi.org/10.1088/2041 - 8205/736/2/L39 -Santos, A. R. G., Cunha, M. S., Avelino, P. P., García, R. A., & Mathur, S. (2017). Starspot -signature on the light curv e. Astronomy & Astrophysics , 599 , A1. -https://doi.org/10.1051/0004 - 6361/201629923 - 12 -Winget, D.e., & Kepler, S.o. (2008). Pulsating white dwarf stars and precision asteroseismology. -Annual Review of Astronomy and Astrophyics, 46(1), 157 - 199. -https://doi.org/10.1146/annurev.astro. 46.060407.145250 -Wolfram Research , Inc., Mathematica, Version 12.3.1, Champaig n, IL (2021). + ☉ (Córsico, 2020 ) . Other research +has found that this white dwarf is the DBV with the highest known temperature, and its helium +layer is the thinn est (Bischoff - K im et al., 2015). Despite the long - cadence light curve being too +noisy to draw many conclusions , other FTs of short - cadence data have been performed to find +variability in the dwarf. Analyses at high frequencies of KIC 8626021 yielded pulsations with +frequencies of 4309.89 µHz , 5073.26 µHz , 36 81.87 µHz , 3294.22 µHz and 2658.85 µ Hz +(Østensen et al., 2011 ). These fin dings confirm the classification of the white dwarf as a V777 +Herculis, although our research focuses on low frequencies using long - cadence data. + + + + Method s + +All data were downloaded from the KASOC database, and the long - cadence (data +sampled ap proximately every thirty minutes) measurements of Corrected F lux (ppm) were +analyzed. All computations were made in Wolfram Mathematica and Microsoft Exce l , and FTs +were performed in Mathematica . The re - binning process consist ed of summin g adjacent light +c urve data points in each quarter , therefore doubling the sampling interval from 0 .5 hour to one +hour, and then repeating this process on the data sample fo r a total of three times. In addition, a +significant detection was defin ed as being 3 𝝈 above the mean of the relative flux, and 0 on the +graphs below represents this 3 𝝈 cutoff. ( Koch, D. G., 2010), ( Wolfram Research, Inc., 2021). To +find the SNR , we converted to decibels . Using these SNRs , we were able to easily identify +im provement in signal strength. + + + Results + + Figure 1 presents the lightcurves constructed for quarters seven (Q 7) and thirteen (Q13) , +with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs +of the first iteration and three successive re - bins for Q7 , while Figure 3 presents the FTs of the +same for Q13 . + Tables 1 and 2 both show the hypothesized f requency corresponding to the rotation of +KIC 8626021 that is found in the FTs of the f irst iteration and subsequent re - bins for Q7 and +Q1 3 . Tables 3 and 4 show all data values < 17 µ Hz found in the first iterations and re - bins of Q7 +and Q13 . + + + + + + + + + + +FIG. 1 : Pictured top is the light curve constructed for Q7 , below is the light curve for Q13 . Q 7 +lasted from September 24 – December 13 , 2010, and Q13 was from M arch 29 – June 23, 2012. +Both graphs were constructed by plotting corrected flux magnitude (flux corrected for +instrumental artifacts) versus time in Excel, and gaps in the data were filled in by interpolating +between points. Q 7 had forty - three interpolated points, and Q13 had sixty - six . + + + + + + + + + + + + + +FIG. 2 : The graphs show the initial FTs of Q7, and then the FTs of the three successive re - bins of +the light curve data. The significant fr equenc ies of 5.886 µHz and 5.889 µHz are circled. The +d isappearance of the freque ncy in the last FT is most likely a b yproduct of the method, and the +spurious frequency of 5.464 µHz in the last FT most probably represents an artifact of the re binning + proc ess. + + + + + + + + + + + + + +FIG. 3 : The graphs show the initial FT of Q13 , and t he n the FT s of the three successive re - bins +of the light curve data. The significant frequencies of 5.784 µHz and 5. 787 µHz are circled. In +addition, in the third re - bin , the frequencies 11.641 µHz and 16.823 µHz rise above 3 𝝈 and are +near ly perfect integer multiples of 5 .787 µHz . These harmonics are potentially indications of a +starspot (Santos et al., 2017). + + + + + + + + + + + + + + +Q7 Significant +Data Points Light +Variability +Frequenc y +(µHz) Corrected Flux +Magnitude +(ppm) Period (days ) Signal - to - Noise +(dB) +Q7 First +Iteration 5.886 - 1.198 1.966 9.9 +Q7 Re - bin 1 5.886 - 1.477 1.966 12.8 +Q7 Re - bin 2 5.889 0.59 7 1.965 19.2 +TABLE I : The table displays the various frequencies collected from Q7 and the information +found throu gh calculations to find period and SNR. The frequency of 5.464 µHz is not included, +and therefore was not used in any calculations deter mining the average period of rotation. The +values under corrected flux magnitude are relative to our significant frequency cutoff of 3 𝝈 , thus +negative numbers are under the cutoff. + + + + Q 13 Significant +Data Points Light +Variability +Frequenc y +(µHz) Corrected Flux +Magnitude +(ppm) Period (days) Signal - to - Noise +(dB) +Q13 First +Iteration 5.784 1.555 2.001 15.6 +Q13 Re - bin 1 5.784 2.873 2.001 1 7.7 +Q13 Re - bin 2 5.787 4.938 2.000 22.6 +Q13 Re - bin 3 5.787 6.909 2.0 00 26.3 +Q13 Re - bin 3 11.641 7.073 0.994 26.4 +Q13 Re - bin 3 16.823 2.299 0.688 24.1 +TABLE II : The table displays the various frequencies collected from Q13 and the information +found through calculations to find period and SNR. The last two signific ant frequencies (11.641 +µHz and 16.823 µHz ) for Q13 Re - bin 3 represent potential harmonic s, which are discussed in +further detail in the Con clusions section of this paper . The values under corrected flux magnitude +are relative to our significant frequency cutoff of 3 𝝈 , thus negative numbers are under the cutoff. + + + + + + + + + + +First Iteration ( µ Hz) First Re - bin ( µ Hz) Second Re - bin ( µ Hz) Third Re - bin ( µ Hz) +0.933 0.933 0.21 5 0.216 +1.148 1.148 0.575 0.575 +1.364 1.364 0.934 0.935 +1.507 1.507 1.005 1.006 +12.5 61 12.561 1.149 1.150 +16.581 16.581 1.221 1.222 + 1.364 1.366 + 1.508 1 .509 + 1 .580 1. 582 + 1.7 24 1. 725 + 1.795 1.797 + 5.889 2.0 85 + 6.822 5.392 + 9.192 5. 464 + 9.479 7. 476 + 11.203 9. 489 + 12.568 11.215 + 14 . 291 12.581 + 16.230 13.084 + 1 6.589 13.443 + 13.659 + 14.018 + 14. 809 + 15.097 + 16.031 + 16.463 + 16.894 +TABLE III : The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm) +above the cutoff of 3 𝝈 . The minor shifting of significant frequencies between re - bins is a by product + of the method, and we calculated for such errors when finding our average. + + + + + +First Iteration ( µ Hz) First Re - bin ( µ Hz) Second Re - bin ( µ Hz) Third Re - bin ( µ Hz) +3.094 2 .018 2.019 1.951 +5.784 3.094 3.095 2.019 +9.080 5.784 5.787 2.442 +13.519 7.667 7.671 2. 759 +15.671 9.080 9.084 3.095 +16.209 11.165 11.641 3.634 +16.411 13.519 13.526 4.374 + 15.469 15.477 4.778 + 15.671 15.679 4.912 + 16.209 15.881 5.0 47 + 16.41 1 16.419 5.787 + 8. 479 + 9. 084 + 10.565 + 11.641 + 13.526 + 15.544 + 15.881 + 16.82 3 +TABLE IV : The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm) +above the cutoff of 3 𝝈 . The minor shifting of significant frequencies between re - bins is a by product + of the method, and we calculate d for such errors when finding our average. + + + Conclusions + +As our research used the long - cadence data from Kepler, much of the high - frequency +va riability due to gravitational wave pulsations is lost. However, this presents an opportunity to +verify our results with the work of research groups that analyzed short - cadence data. With the +data analyzed, the lower fre quencies between 5 - 6 µHz emerged . Aft er finding the average of the +periods and accounting for a 1 𝝈 margin of error, our research hypothesizes that the rotation +period of KIC 8626021 is 1.99 ± 0.02 days. Other short - cadence re search has found the rotation +period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff - K im et +al ., 2015 ) . Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011 ) , and +these periods indicate that the more precise significant period identified through our re - binning +relates to the rotation of the white dwarf. +Through the re - b inning process , the SNR clearly improves for both quarters, and for Q7 it +improves by approximately 1.3 dB, except f o r the last data re - bin. In the last re - bin, the previous + +significant frequency disappears , which beco mes increasingly likely after succe ssive re - binning +processes . The frequency 5.464 µHz rises as another significant frequency; however, we believe +that this new frequ ency is simply an artifact of the re - binning process. In Q13 , we saw SNR +improvement ranging from 1.1 dB to 1.3 dB . +Through the re - binning process , more lines , or significant frequencies, appeared above +the 3 𝝈 cutoff , particularly at lower frequencies. These findings suggest that as an alternative to +short - cadence analysis, the re - binning process of long - cadence dat a can be used to identify +significant lower frequencies in white dwarfs. The methods we used are also si mple and +replicable, which allows even those with less experience to quickly analyze the large amounts of +data being collected by orbiting telescopes, s uch as the currently active TESS (Transiting +Exoplanet Survey Satellite) telescope. +The presence of poss ible harmonics in the third re - bin of Q13 also indicates the possible +presence of a previously unseen starspot in KIC 8626021 caused by mag netic activity. These +spots are darker, cooler, and modulate stellar light curves, and with confirmation of its existence, +the harmonic frequencies can be used to calculate the spot’s rotation rate, size, latitude, and +contrast (Santos et al., 2017) . Using the process of re - binning, a starspot signal, previously +dominated by noise, may have been discov ered. + +Acknowledgments + +W e wish to thank Bloomsburg University of Pennsylvania for its continued support of our +research. +This paper includes data collected b y the Kepler mission and obtained from the MAST +data archive at the Space Telescope Science Institute (STScI). Funding for the Kepler mission is +provided by the NASA Science Mission Directorate. STScI is operated by the Association of +Universities for Rese arch in Astronomy, Inc., under NASA contract NAS 5 – 26555. + + + References + + Basri, G., Walkowicz, L. M., Batalha, N., Gilliland, R. L., Jenkins, J., Borucki, W. J., Koch, D., +Caldwell, D., Dupree, A. K., Latham, D. W., Meibom, S., Howell, S., & Brown, T. (2010) . +PHOTOMETRIC VARIABILITY IN KEPLER TARGET stars: THE SUN AMONG +stars — a FIRST LOOK. The Astr ophysical Journal, 713(2), L155 - L159. +https://doi.org/10.1088/2041 - 8205/713/2/L155 +Bischoff - K im , A., Øs tensen, R. H., Hermes, J.j., & Provencal, J. L. (2015). Seven - Period +asteroseismic fit of KI C 8626021. EPJ Web of Conferences, 101, 06009. +https://doi.org/10.1051/epjconf/ 201510106009 +Córsico, A. H. (2020). White - Dwarf asteros eismology with the kepler space telescope. Frontiers +in Astronomy and Space Sciences, 7. https://doi.org/10.3389/fspas.2020.00047 +Holberg, J . B. (2009). The discovery of the existence of white dwarf stars: 1862 to 1930. Journal +for the History of Astrono my, 40(2), 137 - 154. +https://doi.org/10.1177%2F002182860904000201 +Howell, S. B., Sobeck, C., Haas, M., Still, M., Barclay, T., Mullally, F., Tr oeltzsch, J., Aigrain, S., +Bryson, S. T., Caldwell, D., Chaplin, W. J., Cochran, W. D., Huber, D., Marcy, G. W., +M iglio, A., Najita, J. R., Smith, M., Twicken, J. D., & Fortney, J. J. (2014). The k2 mission: +Characterization and early results. Publications of the Astronomical Society of the Pacific, +126(938), 398 - 408. https://doi.org/10.1086/676406 +Johnson, M. (Ed.). (2018, October 30). Mission overview. National Aeronautics and Space +Administration. Retrieved September 2, 2021, from +https://www.nasa.gov/mission_pages/keple r/overview/index.html +Koch, D. G., Borucki, W. J., Basri, G., Batalha, N. M., Brown, T. M., Caldwell, D., Christensen dalsgaard, + J., Cochran, W. D., Devore, E., Dunham, E. W., Gautier, T. N., Geary, J. C., +Gilliland, R. L., Gould, A., Jenkins, J., Kondo, Y ., Latham, D. W., Lissauer, J. J., Marcy, +G., . . . Morrison , D. (2010). KEPLER MISSION design, REALIZED PHOTOMETRIC +performance, AND EARLY SCIENCE. The Astrophy sical Journal , 713 (2), L79 - L86. +https://dx.doi.org/10.1088/2041 - 8205/713/2/L79 +Østensen, R. H., Bloemen, S., Vučković, M., Aerts, C., Oreiro, R., Kinemuchi, K., Still, M., & +Koester, D. (2011) . AT last — a v777 HER PULSATOR IN THE KEPLER FIELD. The +Astrophysical Journal , 736 (2), L39. https://doi.org/10.1088/2041 - 8205/736/2/L39 +Santos, A. R. G., Cunha, M. S., Avelino, P. P., García, R. A., & Mathur, S. (2017). Starspot +signature on the light curv e. Astronomy & Astrophysics , 599 , A1. +https://doi.org/10.1051/0004 - 6361/201629923 + +Winget, D.e., & Kepler, S.o. (2008). Pulsating white dwarf stars and precision asteroseismology. +Annual Review of Astronomy and Astrophyics, 46(1), 157 - 199. +https://doi.org/10.1146/annurev.astro. 46.060407.145250 +Wolfram Research , Inc., Mathematica, Version 12.3.1, Champaig n, IL (2021). \ No newline at end of file diff --git a/read/results/playa/2201.00037.txt b/read/results/playa/2201.00037.txt index fdf2ac3..854599e 100644 --- a/read/results/playa/2201.00037.txt +++ b/read/results/playa/2201.00037.txt @@ -1,5 +1,5 @@ -Confidential manuscript submitted to JGR-Planets -The influence of a fluid core and a solid inner core on the +Confidential manuscript submitted to JGR-Planets +The influence of a fluid core and a solid inner core on the Cassini sate of Mercury Mathieu Dumberry 1 1 @@ -9,63 +9,63 @@ Key Points: The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid planet by no more than 0.01 arcmin. • - For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid + For a core magnetic field above 0.3 mT, electromagnetic coupling locks the fluid and solid cores into a common precession motion. • - The larger the inner core is, the more the obliquity of the polar moment of inertia ap- -proaches that expected for a rigid planet. + The larger the inner core is, the more the obliquity of the polar moment of inertia approaches + that expected for a rigid planet. Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca -–1–arXiv:2201.00037v1 [astro-ph.EP] 31 Dec 2021 -Confidential manuscript submitted to JGR-Planets +–1–arXiv:2201.00037v1 [astro-ph.EP] 31 Dec 202 +Confidential manuscript submitted to JGR-Planets Abstract -We present a model of the Cassini state of Mercury that comprises an inner core, a fluid core +We present a model of the Cassini state of Mercury that comprises an inner core, a fluid core and a mantle. Our model includes inertial and gravitational torques between interior regions, -and viscous and electromagnetic (EM) coupling at the boundaries of the fluid core. We show -that the coupling between Mercury’s interior regions is sufficiently strong that the obliquity of -the mantle spin axis deviates from that of a rigid planet by no more than 0.01 arcmin. The man- -tle obliquity decreases with increasing inner core size, but the change between a large and no -inner core is limited to 0.015 arcmin. EM coupling is stronger than viscous coupling at the in- -ner core boundary and, if the core magnetic field strength is above 0.3 mT, locks the fluid and -solid cores into a common precession motion. Because of the strong gravitational coupling be- -tween the mantle and inner core, the larger the inner core is, the more this co-precessing core +and viscous and electromagnetic (EM) coupling at the boundaries of the fluid core. We show +that the coupling between Mercury’s interior regions is sufficiently strong that the obliquity of +the mantle spin axis deviates from that of a rigid planet by no more than 0.01 arcmin. The mantle + obliquity decreases with increasing inner core size, but the change between a large and no +inner core is limited to 0.015 arcmin. EM coupling is stronger than viscous coupling at the inner + core boundary and, if the core magnetic field strength is above 0.3 mT, locks the fluid and +solid cores into a common precession motion. Because of the strong gravitational coupling between + the mantle and inner core, the larger the inner core is, the more this co-precessing core is brought into an alignment with the mantle, and the more the obliquity of the polar moment -of inertia approaches that expected for a rigid planet. The misalignment between the polar mo- -ment of inertia and mantle spin axis increases with inner core size, but is limited to 0.007 ar- -cmin. Our results imply that the measured obliquities of the mantle spin axis and polar mo- -ment of inertia should coincide at the present-day level of measurement errors, and cannot be +of inertia approaches that expected for a rigid planet. The misalignment between the polar moment + of inertia and mantle spin axis increases with inner core size, but is limited to 0.007 arcmin. + Our results imply that the measured obliquities of the mantle spin axis and polar moment + of inertia should coincide at the present-day level of measurement errors, and cannot be distinguished from the obliquity of a rigid planet. -Plain language summary: The plane of Mercury’s orbit around the Sun is slowly precess- -ing about an axis fixed in space. This entrains a precession of the spin axis of Mercury at the +Plain language summary: The plane of Mercury’s orbit around the Sun is slowly precessing + about an axis fixed in space. This entrains a precession of the spin axis of Mercury at the same rate, an equilibrium known as a Cassini state. The angle between the spin axis and the -normal to the orbital plane is known as the obliquity and remains fixed. Observations have con- -firmed that Mercury’s obliquity matches, within measurement errors, the theoretical predic- -tion based on an entirely rigid planet. However, we know that Mercury has a large metallic core +normal to the orbital plane is known as the obliquity and remains fixed. Observations have confirmed + that Mercury’s obliquity matches, within measurement errors, the theoretical prediction + based on an entirely rigid planet. However, we know that Mercury has a large metallic core which is liquid, although the central part may be solid. In this work, we investigate how the -presence of a fluid and solid core affect the Cassini state of Mercury. We show that the inter- -nal coupling between the solid core, fluid core and the mantle is sufficiently strong that the obliq- -uity of the mantle does not depart from that of a rigid planet by more than 0.01 arcmin, an -offset smaller than the present-day error in measurements. We also show that the larger the +presence of a fluid and solid core affect the Cassini state of Mercury. We show that the internal + coupling between the solid core, fluid core and the mantle is sufficiently strong that the obliquity + of the mantle does not depart from that of a rigid planet by more than 0.01 arcmin, an +offset smaller than the present-day error in measurements. We also show that the larger the solid inner core is, the more the planet behaves as if it were precessing as an entirely rigid body. 1 Introduction -Mercury is expected to be in a Cassini state (Figure 1) whereby its orbit normal and spin- -symmetry axis are both coplanar with, and precess about, the normal to the Laplace plane [ Colombo , +Mercury is expected to be in a Cassini state (Figure 1) whereby its orbit normal and spinsymmetry + axis are both coplanar with, and precess about, the normal to the Laplace plane [ Colombo , 1966; Peale , 1969, 2006]. The orientation of the Laplace plane varies on long timescales, but its present-day orientation can be reconstructed from ephemerides data [ Yseboodt and Margot , 2006; Baland et al., 2017]. Likewise, the rate of precession is also not observed directly, but is reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513 yr with an inclination angle of I = 8.5330 ◦ - between the orbit and Laplace plane normals [ Ba- -land et al., 2017]. Measurements of the obliquity ε -m , defined as the angle of misalignment be- -tween the spin-symmetry axis and the orbit normal, have been obtained by different techniques, -including ground based radar observations [Margot et al. , 2007, 2012], and stereo digital ter- -rain images [Stark et al., 2015a] and radio tracking data [Mazarico et al. , 2014; Verma and Mar- -got , 2016; Genova et al., 2019; Konopliv et al., 2020] from the MErcury Surface Space ENvi- -ronment GEochemistry and Ranging (MESSENGER) spacecraft. Within measurement errors, + between the orbit and Laplace plane normals [ Baland + et al., 2017]. Measurements of the obliquity ε +m , defined as the angle of misalignment between + the spin-symmetry axis and the orbit normal, have been obtained by different techniques, +including ground based radar observations [Margot et al. , 2007, 2012], and stereo digital terrain + images [Stark et al., 2015a] and radio tracking data [Mazarico et al. , 2014; Verma and Margot + , 2016; Genova et al., 2019; Konopliv et al., 2020] from the MErcury Surface Space ENvironment + GEochemistry and Ranging (MESSENGER) spacecraft. Within measurement errors, all techniques yield an obliquity which is coplanar with the orbit and Laplace plane normals and consistent with a Cassini state. Furthermore, the observed obliquity angle (2. 042 ± 0 .08 –2– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets I descending node of orbit Ω @@ -80,7 +80,7 @@ I ê ascending node of orbit descending -node of equator equatorial +node of equator equatorial plane orbital direction @@ -92,11 +92,11 @@ Sê ε morbital planeFigure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded rectangle) and the Cassini state of Mercury. The normal to the orbital plane ( ˆe I -3 ) is offset from the nor- -mal to the Laplace plane ( ˆe L +3 ) is offset from the normal + to the Laplace plane ( ˆe L 3 ) by an angle I = 8. 5330◦ . The symmetry axis of the mantle ˆe p -3 is offset +3 is offset from ˆe I 3 by ε m ≈ 2 arcmin. ˆe I @@ -107,89 +107,89 @@ at frequency Ω p = 2 π/325, 513 yr− 1 . The blue (orange) shaded region indicates the portion of the orbit when Mercury is above (below) the Laplace plane. Angles are not drawn to scale. -arcmin [ Margot et al. , 2012], 2.029 ± 0. 085 arcmin [Stark et al., 2015a] and 1.968 ± 0 .027 [ Gen- -ova et al., 2019] to list a few) matches that expected if Mercury occupies Cassini state 1. +arcmin [ Margot et al. , 2012], 2.029 ± 0. 085 arcmin [Stark et al., 2015a] and 1.968 ± 0 .027 [ Genova + et al., 2019] to list a few) matches that expected if Mercury occupies Cassini state 1. The prediction of Mercury’s obliquity is based on the assumption that the whole planet -precesses as a single body. However, we know that Mercury has a fluid core from two main lines -of evidence. First, Mercury’s large scale magnetic field is intrinsic, and must be maintained by -dynamo action [Anderson et al., 2011, 2012; Johnson et al., 2012]. This requires fluid motion -in its metallic core, and hence that Mercury’s core is at least partially liquid. Second, the ob- -served amplitude of the 88-day longitudinal libration is approximately twice as large as that +precesses as a single body. However, we know that Mercury has a fluid core from two main lines +of evidence. First, Mercury’s large scale magnetic field is intrinsic, and must be maintained by +dynamo action [Anderson et al., 2011, 2012; Johnson et al., 2012]. This requires fluid motion +in its metallic core, and hence that Mercury’s core is at least partially liquid. Second, the observed + amplitude of the 88-day longitudinal libration is approximately twice as large as that expected if Mercury were librating as a rigid body [ Margot et al. , 2007, 2012; Stark et al., 2015a]. -This indicates that it is only the mantle that librates, and that the outer part of the core is fluid. -These evidences do not necessarily imply that the whole of Mercury’s core is fluid, but only that +This indicates that it is only the mantle that librates, and that the outer part of the core is fluid. +These evidences do not necessarily imply that the whole of Mercury’s core is fluid, but only that its outermost part must be. A solid inner core may have nucleated at the centre although its size is not well constrained. Inner core growth leads to planetary contraction, and the inferred radial contraction of ∼ 7 km since the late heavy bombardment [Byrne et al. , 2014] places an approximate limit of 800 km on the inner core radius [ Grott et al. , 2011]. However, the inner -core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history. +core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history. –3– -Confidential manuscript submitted to JGR-Planets -With a fluid core, and possibly a solid inner core, the observed obliquity ε - m reflects the -orientation of the spin-symmetry axis of the precessing mantle and crust alone. Neglecting dis- -sipation, and at equilibrium in the Cassini state, the spin axis of the fluid core and the spin- -symmetry axis of the inner core should both also precess about the normal to the Laplace plane +Confidential manuscript submitted to JGR-Planets +With a fluid core, and possibly a solid inner core, the observed obliquity ε + m reflects the +orientation of the spin-symmetry axis of the precessing mantle and crust alone. Neglecting dissipation, + and at equilibrium in the Cassini state, the spin axis of the fluid core and the spinsymmetry + axis of the inner core should both also precess about the normal to the Laplace plane in a retrograde direction with a period of 325,513 yr. Both of these axes should also lie in the -plane that defines the equilibrium Cassini state [e.g. Dumberry and Wieczorek , 2016], although -their obliquity angles may be different than ε - m . Whether the spin axis of the fluid core is brought -into an alignment with the mantle obliquity depends primarily on the pressure torque (also re- -ferred to as the inertial torque) exerted by the centrifugal force of the rotating fluid core on the -misaligned elliptical shape of the core-mantle boundary (CMB) [ Poincar´e , 1910]. The more flat- -tened the CMB is, the stronger the pressure torque is, and the more the fluid core is entrained -into a co-precession at a similar obliquity to that of the mantle. The flattening of Mercury’s -CMB is not known. But if one assumes that the topography of the CMB coincides with an equipo- -tential surface at hydrostatic equilibrium with the imposed frozen-in mass anomalies in the up- -per mantle and crust, then the pressure torque at the CMB is sufficient to bring the fluid core -into a close alignment with the mantle [ Peale et al. , 2014]. The spin axis of the fluid core is not -expected to be exactly aligned with the spin-symmetry axis of the mantle, but sufficiently close -that the resulting mantle obliquity does not differ much from that of a single body planet. Fur- -thermore, viscous and electromagnetic (EM) coupling at the CMB can further restrict the mis- -alignment between the mantle and core [Peale et al. , 2014]. -If an inner core is present, its obliquity angle is determined by the sum of the torques act- -ing on it. This includes the gravitational torque from the Sun acting on its tilted figure, anal- -ogous to the torque applied on the tilted mantle that sets the obliquity ε +plane that defines the equilibrium Cassini state [e.g. Dumberry and Wieczorek , 2016], although +their obliquity angles may be different than ε + m . Whether the spin axis of the fluid core is brought +into an alignment with the mantle obliquity depends primarily on the pressure torque (also referred + to as the inertial torque) exerted by the centrifugal force of the rotating fluid core on the +misaligned elliptical shape of the core-mantle boundary (CMB) [ Poincar´e , 1910]. The more flattened + the CMB is, the stronger the pressure torque is, and the more the fluid core is entrained +into a co-precession at a similar obliquity to that of the mantle. The flattening of Mercury’s +CMB is not known. But if one assumes that the topography of the CMB coincides with an equipotential + surface at hydrostatic equilibrium with the imposed frozen-in mass anomalies in the upper + mantle and crust, then the pressure torque at the CMB is sufficient to bring the fluid core +into a close alignment with the mantle [ Peale et al. , 2014]. The spin axis of the fluid core is not +expected to be exactly aligned with the spin-symmetry axis of the mantle, but sufficiently close +that the resulting mantle obliquity does not differ much from that of a single body planet. Furthermore, + viscous and electromagnetic (EM) coupling at the CMB can further restrict the misalignment + between the mantle and core [Peale et al. , 2014]. +If an inner core is present, its obliquity angle is determined by the sum of the torques acting + on it. This includes the gravitational torque from the Sun acting on its tilted figure, analogous + to the torque applied on the tilted mantle that sets the obliquity ε m . In addition, the tilt of the inner core also depends on the gravitational torque imposed by the mantle and the -pressure torque at the inner core boundary (ICB) imposed by the fluid core. If the mantle grav- -itational torque dominates, the inner core tilt is expected to remain closely aligned with the -mantle. Conversely, if the pressure torque at the ICB is the largest, the inner core should in- -stead be closely aligned with the spin axis of the fluid core. A strong viscous and/or EM cou- -pling at the ICB should also enforce a closer alignment between the rotation vectors of the in- -ner core and fluid core. -It is on the basis of the observed mantle obliquity that the polar moment of inertia of Mer- -cury is inferred [e.g. Peale , 1976; Margot et al. , 2018]. Inherent in this calculation is the built- -in assumption that the mantle obliquity does not deviate from that of a rigid planet by a sub- -stantial amount. However, the recent study by Peale et al. [2016] suggests that the inner core +pressure torque at the inner core boundary (ICB) imposed by the fluid core. If the mantle gravitational + torque dominates, the inner core tilt is expected to remain closely aligned with the +mantle. Conversely, if the pressure torque at the ICB is the largest, the inner core should instead + be closely aligned with the spin axis of the fluid core. A strong viscous and/or EM coupling + at the ICB should also enforce a closer alignment between the rotation vectors of the inner + core and fluid core. +It is on the basis of the observed mantle obliquity that the polar moment of inertia of Mercury + is inferred [e.g. Peale , 1976; Margot et al. , 2018]. Inherent in this calculation is the builtin + assumption that the mantle obliquity does not deviate from that of a rigid planet by a substantial + amount. However, the recent study by Peale et al. [2016] suggests that the inner core can be misaligned from the mantle by a few arcmin and that a large inner core can perturb the -orientation of the spin vector of the mantle by as much as 0.1 arcmin. This challenges the as- -sumption that the observed obliquity reflects the orientation of the whole planet. +orientation of the spin vector of the mantle by as much as 0.1 arcmin. This challenges the assumption + that the observed obliquity reflects the orientation of the whole planet. Furthermore, if a large inner core is misaligned with the mantle, then the mantle spin axis does not coincide with the orientation of the polar moment of inertia of the whole planet. This -can introduce a systematic offset between different types of obliquity measurements. Those based -on tracking topographic features [ Margot et al. , 2007, 2012; Stark et al., 2015a] capture the obliq- -uity of the mantle spin axis. While those based on the orientation of the gravity field [ Mazarico +can introduce a systematic offset between different types of obliquity measurements. Those based +on tracking topographic features [ Margot et al. , 2007, 2012; Stark et al., 2015a] capture the obliquity + of the mantle spin axis. While those based on the orientation of the gravity field [ Mazarico et al., 2014; Verma and Margot , 2016; Genova et al., 2019; Konopliv et al., 2020] are instead -tied to the orientation of the principal moment of inertia of the whole planet. An offset of the -obliquity of the mantle spin axis with respect to the gravity field could be used to constrain the -size of the inner core, even though this is difficult to do at present because the different esti- -mates of the obliquity of the gravity field do not match well with one another. +tied to the orientation of the principal moment of inertia of the whole planet. An offset of the +obliquity of the mantle spin axis with respect to the gravity field could be used to constrain the +size of the inner core, even though this is difficult to do at present because the different estimates + of the obliquity of the gravity field do not match well with one another. –4– -Confidential manuscript submitted to JGR-Planets -There is thus a significant interest in properly assessing how the presence of a solid in- -ner core at the centre of Mercury may affect its Cassini state equilibrium. Here, we present a -model of Mercury’s Cassini state that comprises a fluid core and solid inner core. The model -is an adaptation of a similar model developed to study the Cassini state of the Moon [Dumb- -erry and Wieczorek , 2016; Stys and Dumberry , 2018; Organowski and Dumberry , 2020]. The -specific questions that motivate our study are the following. First, we want to determine how -large the misaligned obliquities of the fluid core and solid inner core can be and how they de- -pend on model parameters. Second, we want to assess by how much the mantle obliquity may -differ from that of an entirely rigid Mercury, and third, by how much the obliquities of the spin- -symmetry axis of the mantle and gravity field may differ. +Confidential manuscript submitted to JGR-Planets +There is thus a significant interest in properly assessing how the presence of a solid inner + core at the centre of Mercury may affect its Cassini state equilibrium. Here, we present a +model of Mercury’s Cassini state that comprises a fluid core and solid inner core. The model +is an adaptation of a similar model developed to study the Cassini state of the Moon [Dumberry + and Wieczorek , 2016; Stys and Dumberry , 2018; Organowski and Dumberry , 2020]. The +specific questions that motivate our study are the following. First, we want to determine how +large the misaligned obliquities of the fluid core and solid inner core can be and how they depend + on model parameters. Second, we want to assess by how much the mantle obliquity may +differ from that of an entirely rigid Mercury, and third, by how much the obliquities of the spinsymmetry + axis of the mantle and gravity field may differ. 2 Theory 2.1 The interior structure of Mercury -Our model of Mercury consists of four layers of uniform density: a solid inner core, a fluid +Our model of Mercury consists of four layers of uniform density: a solid inner core, a fluid outer core, a solid mantle, and a thin crust. The outer radii of each of these layers, are denoted by r s , r @@ -198,16 +198,16 @@ m , and R , and their densities by ρ s , ρ f , ρ m , and ρ - c , respectively. The inner core ra- -dius r -s corresponds to the ICB radius, the fluid core radius r + c , respectively. The inner core radius + r +s corresponds to the ICB radius, the fluid core radius r f to the CMB radius, and R = -2439 .36 km to the planetary radius of Mercury. Compressibility effects from increasing pres- -sure with depth are not negligible in the core of Mercury. However adopting uniform densities -simplifies the analytical expressions of the model while still capturing the first order rotational +2439 .36 km to the planetary radius of Mercury. Compressibility effects from increasing pressure + with depth are not negligible in the core of Mercury. However adopting uniform densities +simplifies the analytical expressions of the model while still capturing the first order rotational dynamics. Uniform densities were also adopted by Peale et al. [2016] and following the same strategy facilitates comparisons between our results. -We build our interior model as detailed in Peale et al. [2016]. We first specify r +We build our interior model as detailed in Peale et al. [2016]. We first specify r s , ρ s (or a density contrast at the ICB), the crustal density ρ @@ -220,53 +220,53 @@ f and ρ the known mass M and chosen values of the moments of inertia of the whole planet C and that of the mantle and crust C m . -Each layer is triaxial in shape. We denote the polar flattening (or geometrical ellipticity) -by - i , defined as the difference between the mean equatorial and polar radii, divided by the mean -spherical radius. Likewise, we denote the equatorial flattening by the variable ξ - i , defined as the -difference between the maximum and minimum equatorial radii, divided by the mean spher- -ical radius. As above, we use the subscript i = s, f , m and r , to denote the polar or equa- -torial flattenings at the ICB, CMB, crust-mantle boundary (CrMB), and surface. -The measured polar and equatorial flattenings are taken from Perry et al. [2015] and their -numerical values are given in Table 1. We then assume that the ICB and CMB are both at hy- -drostatic equilibrium with the imposed gravitational potential induced by the flattenings at the -CrMB and surface. The flattenings at all interior boundaries are specified such that they are -consistent with the observed degree 2 spherical harmonic coefficients of gravity J +Each layer is triaxial in shape. We denote the polar flattening (or geometrical ellipticity) +by + i , defined as the difference between the mean equatorial and polar radii, divided by the mean +spherical radius. Likewise, we denote the equatorial flattening by the variable ξ + i , defined as the +difference between the maximum and minimum equatorial radii, divided by the mean spherical + radius. As above, we use the subscript i = s, f , m and r , to denote the polar or equatorial + flattenings at the ICB, CMB, crust-mantle boundary (CrMB), and surface. +The measured polar and equatorial flattenings are taken from Perry et al. [2015] and their +numerical values are given in Table 1. We then assume that the ICB and CMB are both at hydrostatic + equilibrium with the imposed gravitational potential induced by the flattenings at the +CrMB and surface. The flattenings at all interior boundaries are specified such that they are +consistent with the observed degree 2 spherical harmonic coefficients of gravity J 2 and C 22 ; their -numerical values are given in Table 1. Specifically, J +numerical values are given in Table 1. Specifically, J 2 and C 22 are connected to the principal -moments of inertia of Mercury (C > B > A) and to the polar and equatorial flattenings by +moments of inertia of Mercury (C > B > A) and to the polar and equatorial flattenings by J 2 = C − ¯ A M R 2 = 8π 15 1 -M R 2 +M R 2 (ρ s − ρ f )r 5 -s +s s + ( ρ f − ρ m )r 5 -f +f f + ( ρ m − ρ c ) r 5 -m +m m + ρ c R 5 - - r + + r , (1a) C 22 = B − A 4M R 2 = 8π 15 1 -4M R 2 +4M R 2 ( ρ s − ρ f )r 5 @@ -282,13 +282,13 @@ m ξ m + ρ c R 5 ξ - r + r . (1b) where ¯ -A is the mean equatorial moment of inertia defined below. The same procedure was used +A is the mean equatorial moment of inertia defined below. The same procedure was used in Peale et al. [2016] and the mathematical details are given in Equations (18-20) of Dumberry –5– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets Mercury Parameter Numerical value Reference mean motion, n 2π/87 .96935 day− 1 Stark et al. [2015b] @@ -315,10 +315,10 @@ J C 22 8. 0415 × 10 − 6 Genova et al. [2019] -polar surface flattening, +polar surface flattening, r 6. 7436 × 10 − 4 Perry et al. [2015] -equatorial surface flattening, ξ +equatorial surface flattening, ξ r 5. 1243 × 10 − 4 Perry et al. [2015] Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031. 8636 × 109 @@ -327,9 +327,9 @@ m 3 taken from Genova et al. [2019]. The mean density is calculated from 4 π 3 ¯ρR 3 = M . The numerical -values of +values of r and ξ -r are calculated from +r are calculated from r = (¯a − c )/R and ξ r = (a − b ) /R , where ¯a = 1 2 ( a + b ) and where @@ -339,12 +339,12 @@ axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et 22 are computed from Equation (4) in the Supporting Information of Genova et al. [2019]. and Wieczorek [2016] who adopted the same strategy in their interior modelling of the Moon. -Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topog- -raphy and the axes of the principal moments of inertia, which amount to a polar offset of ∼ 2◦ -and an equatorial offset of ∼ 15 ◦ +Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topography + and the axes of the principal moments of inertia, which amount to a polar offset of ∼ 2◦ +and an equatorial offset of ∼ 15 ◦ [Perry et al. , 2015]. -Once the densities and flattenings of all interior regions are known, we can specify the mo- -ments of inertia of the fluid core ( C +Once the densities and flattenings of all interior regions are known, we can specify the moments + of inertia of the fluid core ( C f > B f > A f ) and solid inner core (C @@ -365,11 +365,11 @@ A 2 (A s + B s ) . (2) -From these, we define the polar (e , e +From these, we define the polar (e , e f , e s ) and equatorial (γ , γ s ) dynamical ellipticities of the -whole planet (no subscript), fluid core (subscript f ) and solid inner core (subscript s), which +whole planet (no subscript), fluid core (subscript f ) and solid inner core (subscript s), which enter our rotational model, e = C − ¯ A @@ -409,7 +409,7 @@ A J A C 22 . (4) –6– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets θ m θ @@ -457,7 +457,7 @@ the Laplace normal ( ˆe L with respect to ˆe I 3 . Shown in (a) are the orientations of the symmetry axis of the inner core ( ˆe s 3 ), the -rotation rate vectors of the mantle ( Ω ), fluid core (Ω +rotation rate vectors of the mantle ( Ω ), fluid core (Ω f ) and inner core ( Ω f ) and angles θ p , θ @@ -467,7 +467,7 @@ f and θ s in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer to as the Cassini plane. The light grey, white, and dark grey ellipsoid represent a polar cross-section of -the mantle, fluid core and inner core, respectively; blue shaded parts show an equatorial cross section. +the mantle, fluid core and inner core, respectively; blue shaded parts show an equatorial cross section. The black curved arrow in the equatorial plane in (a) indicates the direction of rotation of the equatorial mantle axes ˆe p 1 and ˆe p @@ -475,16 +475,16 @@ mantle axes ˆe p mantle (b), the Cassini plane is rotating at frequency ω Ω o = −Ω o − Ω - p cos I in the longitudinal direc- -tion. The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of + p cos I in the longitudinal direction. + The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of illustration. –7– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets 2.2 The rotational model Mercury’s rotation is characterized by a 3:2 spin-orbit resonance in which it completes 3 rotations around itself for every 2 orbital revolutions around the Sun. The orbital period is -87.96935 day and the sidereal rotation period is 58.64623 day [ Stark et al., 2015b]. These de- -fine the mean motion n = 2 π/87 . 96935 day −1 +87.96935 day and the sidereal rotation period is 58.64623 day [ Stark et al., 2015b]. These define + the mean motion n = 2 π/87 . 96935 day −1 and the sidereal frequency Ω o = 2 π/58 .64623 day −1 @@ -492,8 +492,8 @@ day −1 o = 1. 5 n . Mercury’s rotational state is also characterized by a Cassini state whereby the orientations of the orbit normal ( ˆeI 3 ) and of the mantle symmetry axis ( ˆep -3 ) are both copla- -nar with, and precess about, the normal to the Laplace plane ( ˆeL +3 ) are both coplanar + with, and precess about, the normal to the Laplace plane ( ˆeL 3 ). The orientation of the Laplace plane varies on long timescales, but it can be taken as invariable in inertial space for our present purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between ˆeL @@ -514,38 +514,38 @@ p = I + ε about the Laplace pole is retrograde with frequency Ω p = 2π/325 , 513 yr − 1 [ Baland et al. , 2017]. -The mantle and crust are welded together and form a single rotating region which we re- -fer to as the ‘mantle’ in the context of our rotational model. The rotation and symmetry axes +The mantle and crust are welded together and form a single rotating region which we refer + to as the ‘mantle’ in the context of our rotational model. The rotation and symmetry axes of the mantle are expected to remain in close alignment, but they do not coincide exactly. We -define the rotation rate vector of the mantle by Ω, and its misalignment from ˆe p +define the rotation rate vector of the mantle by Ω, and its misalignment from ˆe p 3 by an angle θ m . Note that θ m ε -m and it is often the spin axis of Mercury which is used to define the +m and it is often the spin axis of Mercury which is used to define the obliquity ε m [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, ˆep 3 and Ω would characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and the angles I , ε m and θ - m would completely describe the Cassini state. The presence of a fluid -outer core and solid inner core require three additional orientation vectors and angles. The sym- -metry axis of the inner core is defined by unit vector ˆes + m would completely describe the Cassini state. The presence of a fluid +outer core and solid inner core require three additional orientation vectors and angles. The symmetry + axis of the inner core is defined by unit vector ˆes 3 and its misalignment from ˆep 3 by an angle θ - n . The rotation vectors of the fluid core and inner core are defined as Ω + n . The rotation vectors of the fluid core and inner core are defined as Ω f and Ω -s , re- -spectively, and their misalignment from the rotation vector of the mantle Ω are defined by an- -gles θ +s , respectively, + and their misalignment from the rotation vector of the mantle Ω are defined by angles + θ f and θ s (see Figure 2a). The rotation and symmetry axes of the inner core remain in close alignment, so θ n ≈ θ - s . To be formal in our definition of the different angles of misalignment, -for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise + s . To be formal in our definition of the different angles of misalignment, +for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise direction. At equilibrium in the Cassini state, the three orientation vectors ( ˆe I 3 , ˆep @@ -554,13 +554,13 @@ At equilibrium in the Cassini state, the three orientation vectors ( ˆe I rotation vectors (Ω, Ω f , Ω s ) are forced to precess about ˆeL -3 at the same frequency. If we ne- -glect dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed +3 at the same frequency. If we neglect + dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ω p . Viewed in the frame attached to the mantle rotating at sidereal frequency Ω - o , the Cassini plane is ro- -tating in a retrograde direction at frequency ω Ω + o , the Cassini plane is rotating + in a retrograde direction at frequency ω Ω o (see Figure 2b), where ω , expressed in cycles per Mercury day, is equal to ω = − 1 − δω cos( θ @@ -581,11 +581,11 @@ or equivalently, by Equation (19e) of Stys and Dumberry [2018], m + θ p ) = 0 . (7) –8– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets This expresses a formal connection between θ p and θ - m which is independent of the interior struc- -ture of Mercury. Using Equation (5) and cos(θ + m which is independent of the interior structure + of Mercury. Using Equation (5) and cos(θ m ) → 1, this connection can be rewritten as sin( θ m ) = δω sin( θ @@ -593,21 +593,21 @@ m ) = δω sin( θ and thus the relative amplitudes of θ m and θ p depend of the Poincar´e number δω . -To investigate Mercury’s response to the gravitational torque from the Sun, we take ad- -vantage of the framework developed in Mathews et al. [1991] to model the forced nutations of -Earth [see also Mathews et al., 2002; Dehant and Mathews , 2015]. This model takes into ac- -count the pressure torque (also referred to as the inertial torque) that results when the spin axis -of the fluid core is misaligned from the symmetry axes of the elliptical surfaces of the CMB and +To investigate Mercury’s response to the gravitational torque from the Sun, we take advantage + of the framework developed in Mathews et al. [1991] to model the forced nutations of +Earth [see also Mathews et al., 2002; Dehant and Mathews , 2015]. This model takes into account + the pressure torque (also referred to as the inertial torque) that results when the spin axis +of the fluid core is misaligned from the symmetry axes of the elliptical surfaces of the CMB and ICB. It also includes the gravitational torque exerted on the inner core when it is misaligned with the mantle. Electromagnetic and viscous torques at both the CMB and ICB have been -incorporated into the framework [e.g Buffett , 1992; Buffett et al., 2002; Mathews and Guo , 2005; +incorporated into the framework [e.g Buffett , 1992; Buffett et al., 2002; Mathews and Guo , 2005; Deleplace and Cardin , 2006]. The framework was adapted to model the Cassini state of the Moon in Dumberry and Wieczorek [2016] and further developed in Stys and Dumberry [2018] and Organowski and Dumberry [2020]. We adapt it here to capture the Cassini state of Mercury. -Because the forced precession period is much longer than the rotation and orbital peri- -ods of Mercury, the gravitational solar torque that is relevant to the Cassini state is the mean -torque averaged over one orbit. This mean torque is perpendicular to the Cassini plane, point- -ing in the same direction as the vector connecting the Sun to the descending node of Mercury’s +Because the forced precession period is much longer than the rotation and orbital periods + of Mercury, the gravitational solar torque that is relevant to the Cassini state is the mean +torque averaged over one orbit. This mean torque is perpendicular to the Cassini plane, pointing + in the same direction as the vector connecting the Sun to the descending node of Mercury’s orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque is periodic, rotating at frequency ω Ω o . Setting the equatorial directions ˆe p @@ -623,16 +623,16 @@ components of this periodic applied torque in a compact form as where ˜ Γ(ω ) represents the amplitude of the torque at frequency ω Ω o . In response to this torque, -the axes defining all angles (θ +the axes defining all angles (θ p , ε m , θ m , θ f , θ s , θ - n ) as viewed in the mantle frame are also ro- -tating at frequency ω Ω + n ) as viewed in the mantle frame are also rotating + at frequency ω Ω o (see Figure 2). The longitudinal direction of each of these angles at -a specific time t can then also be written in the equatorial complex plane and is proportional +a specific time t can then also be written in the equatorial complex plane and is proportional to exp[ iω Ω o t]. For instance, the two equatorial time-dependent components θ m 1 and θ @@ -646,7 +646,7 @@ angle θ where ˜m ≡ ˜m( ω ) = Re[ ˜m] + iI m [ ˜m ] , (10b) is the amplitude at frequency ω Ω - o . Equivalent definitions apply for all other angles, with the + o . Equivalent definitions apply for all other angles, with the connection as follows: θ m ⇔ ˜m , θ @@ -663,27 +663,27 @@ The notation ˜m, ˜m f , ˜m s , ˜n s follows that introduced in the original model of Mathews et al. [1991]. -Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase re- -sponse to the applied torque as a result of dissipation, for instance from viscous or EM coupling +Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase response + to the applied torque as a result of dissipation, for instance from viscous or EM coupling –9– -Confidential manuscript submitted to JGR-Planets -at the boundaries of the fluid core. In the absence of dissipation, all tilded variables are purely -real. We concentrate our analysis in this work on the real part of the solutions, which corre- -sponds to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜ε +Confidential manuscript submitted to JGR-Planets +at the boundaries of the fluid core. In the absence of dissipation, all tilded variables are purely +real. We concentrate our analysis in this work on the real part of the solutions, which corresponds + to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜ε m corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to ε m , -though we keep the tilde notation in the presentation of our results to emphasize that it rep- -resents the real part of the solution from our system. Furthermore, since ˜m ˜ε +though we keep the tilde notation in the presentation of our results to emphasize that it represents + the real part of the solution from our system. Furthermore, since ˜m ˜ε m , we often refer to ˜ε m as the orientation of spin axis of the mantle, since the Cassini state of Mercury is more customarily described in terms of the latter in the literature. The model of Mathews et al. [1991] is developed under the assumption of small angles as -appropriate for the nutations on Earth. The details on how the equations of the model are de- -rived can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. Three equa- -tions describe, respectively, the time rate of change of the angular momenta of the whole of Mer- -cury, the fluid core, and the inner core in the reference frame of the rotating mantle. These three +appropriate for the nutations on Earth. The details on how the equations of the model are derived + can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. Three equations + describe, respectively, the time rate of change of the angular momenta of the whole of Mercury, + the fluid core, and the inner core in the reference frame of the rotating mantle. These three equations are (ω − e ) ˜m + (1 + ω ) ¯ @@ -703,14 +703,14 @@ A s ¯ A ˜n -s +s = 1 i Ω 2 o ¯ -A +A ˜ Γ -sun +sun , (12a) ω ˜m + (1 + ω + e f ) ˜m @@ -726,12 +726,12 @@ A iΩ 2 o ¯ A - f + f − ˜ Γ cmb − ˜ Γ - icb + icb , (12b) (ω − α 3 e @@ -746,15 +746,15 @@ s + (1 + ω − α iΩ 2 o ¯ A - s + s ˜ Γ s sun + ˜ Γ -icb +icb , (12c) -and a fourth equation consists of a kinematic relation that expresses the change in the orien- -tation of the inner core figure as a result of its own rotation, +and a fourth equation consists of a kinematic relation that expresses the change in the orientation + of the inner core figure as a result of its own rotation, ˜m s + ω ˜n s = 0 . (12d) @@ -782,12 +782,12 @@ g = 8πG 5Ω2 o [ρ c ( - r − + r − m ) + ρ m ( - m − + m − f ) + ρ - f + f f ] , (13b) where G is the gravitational constant. ˜ @@ -800,7 +800,7 @@ m and a small inner core tilt ˜n Γ sun = − iΩ 2 o ¯ -A +A φ m ˜ε m + ¯ @@ -810,32 +810,32 @@ A A α 3 φ s ˜n -s +s , (14) where –10– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets φ m = 3 2 n 2 Ω 2 -o +o G 210 e + 1 2 G -201 γ +201 γ , (15a) φ s = 3 2 n 2 Ω 2 -o +o G 210 e s + 1 2 G 201 γ - s + s , (15b) and where G 210 and G @@ -870,12 +870,12 @@ A Γ cmb and ˜ Γ -icb are the torques from tangential stresses by the fluid core on the mantle at the +icb are the torques from tangential stresses by the fluid core on the mantle at the CMB and on the inner core at the ICB, respectively. These torques can be parameterized in terms of dimensionless complex coupling constants K icb and K -cmb and the differential angu- -lar velocities at each boundary [e.g Buffett , 1992; Buffett et al., 2002], +cmb and the differential angular + velocities at each boundary [e.g Buffett , 1992; Buffett et al., 2002], ˜ Γ icb = iΩ 2 @@ -893,11 +893,11 @@ A f K cmb ˜m f . (18b) -Specific expressions for K +Specific expressions for K icb and K cmb are delayed to sections 4 and 5 when we consider the -effects of viscous and EM coupling, respectively. -A fifth equation is required to connect this interior model to the obliquity of the mantle, +effects of viscous and EM coupling, respectively. +A fifth equation is required to connect this interior model to the obliquity of the mantle, and this is provided by Equation (7). For small angles θ m and θ p , this gives [e.g. Mathews et al., @@ -909,10 +909,10 @@ is because θ p ≈ 8 .567 ◦ whereas ˜ε m ≈ 2 arcmin and thus the latter obeys more strictly the -condition of small angles assumed in our framework. Furthermore, the external torques act- -ing on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜ε - m . Writ- -ten in terms of ˜ε +condition of small angles assumed in our framework. Furthermore, the external torques acting + on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜ε + m . Written + in terms of ˜ε m , and with the approximation of ˜ε m 1 and ˜m 1, Equation (7) becomes ˜m + (1 + ω ) ˜ε @@ -920,15 +920,15 @@ m = −(1 + ω ) tan I . (20) Likewise, the frequency ω from Equation (5) can be written simply in terms of I , ω = − 1 − δω cos I . (21) The set of four Equations (12) with the addition of Equation (20) form a linear system -of equations for the five rotational variables ˜m , ˜m +of equations for the five rotational variables ˜m , ˜m f , ˜m s , ˜n s and ˜ε m . It captures the response -of Mercury, in the frequency domain, when sub ject to a periodic solar torque applied at fre- -quency ω . The system can be written in a matrix form as +of Mercury, in the frequency domain, when sub ject to a periodic solar torque applied at frequency + ω . The system can be written in a matrix form as –11– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets M · x = y , (22a) where the solution (x ) and forcing (y ) vectors are x T @@ -959,10 +959,10 @@ A s ¯ A α -3 +3 (1 + ω )e s + φ - s + s φ m ω 1 + ω + e @@ -1010,43 +1010,43 @@ icb (1 + ω − α  . (22d) Solutions of the homogeneous system (i.e. y = 0) represent free modes of precession. Three -modes have periods which, when seen in inertial space, are typically in the range of a few hun- -dred to a few thousand years. The first is the free axial precession of Mercury maintained by -the solar torque acting on its elliptical figure [e.g. Peale , 2005]. The second is the free core nu- -tation (FCN), which is the free precession of the spin axis of the fluid core about the symme- -try axis of the CMB [e.g. Mathews et al., 1991]. The third is the free inner core nutation (FICN), +modes have periods which, when seen in inertial space, are typically in the range of a few hundred + to a few thousand years. The first is the free axial precession of Mercury maintained by +the solar torque acting on its elliptical figure [e.g. Peale , 2005]. The second is the free core nutation + (FCN), which is the free precession of the spin axis of the fluid core about the symmetry + axis of the CMB [e.g. Mathews et al., 1991]. The third is the free inner core nutation (FICN), a free mode of rotation similar to the FCN but associated with the inner core [e.g. Mathews et al., 1991]. A few remarks on our model are important to point out before we proceed further. First, although we have retained the triaxial shape of Mercury in the expression of the solar torque, -we treat its angular momentum response as if it were an axially symmetric body. This is con- -venient as the two equatorial angular momentum equations for each region can be combined -into a single equation. To first order, the frequency of the free precession of Mercury is not largely +we treat its angular momentum response as if it were an axially symmetric body. This is convenient + as the two equatorial angular momentum equations for each region can be combined +into a single equation. To first order, the frequency of the free precession of Mercury is not largely altered by triaxiality [e.g. Peale , 2005]. Baland et al. [2019] showed that the frequencies of the -FCN and FICN for a triaxial planetary body may be slightly different than those for an axi- -ally symmetric body, but not by large factor. As the response of Mercury to the solar torque -is largely determined by the resonant amplification due to the presence of these three modes, -our model should capture correctly the first order Cassini state of Mercury. Considering the +FCN and FICN for a triaxial planetary body may be slightly different than those for an axially + symmetric body, but not by large factor. As the response of Mercury to the solar torque +is largely determined by the resonant amplification due to the presence of these three modes, +our model should capture correctly the first order Cassini state of Mercury. Considering the triaxial shape of Mercury may alter the numerical results, but not our general conclusions. -Second, our modelling approach is different than in the studies of Peale et al. [2014] and -Peale et al. [2016]. In these two studies, dynamical models of Mercury’s Cassini state are de- -veloped and must then be integrated in time. The equilibrium Cassini state is the quasi-steady -state that remains after transient effects associated with the initial conditions have decayed away. -An advantage of these models compared to ours is that the complete triaxial dynamics of Mer- -cury, including its longitudinal librations, are retained. However, the numerical integration can -be lengthy if dissipation is weak, which restricts the number of possible interior models of Mer- -cury that can be tested. In contrast, our model is a simple linear system in the frequency do- -main, focused on one specific frequency: the forced precession associated with the Cassini state. +Second, our modelling approach is different than in the studies of Peale et al. [2014] and +Peale et al. [2016]. In these two studies, dynamical models of Mercury’s Cassini state are developed + and must then be integrated in time. The equilibrium Cassini state is the quasi-steady +state that remains after transient effects associated with the initial conditions have decayed away. +An advantage of these models compared to ours is that the complete triaxial dynamics of Mercury, + including its longitudinal librations, are retained. However, the numerical integration can +be lengthy if dissipation is weak, which restricts the number of possible interior models of Mercury + that can be tested. In contrast, our model is a simple linear system in the frequency domain, + focused on one specific frequency: the forced precession associated with the Cassini state. Solutions are straightforward to obtain for a given interior model, and this allows us to cover -a larger span of the parameter space. One drawback, however, is that our model does not cap- -ture time-dependent variations at any other frequencies, including the precession of the peri- -center of Mercury’s orbit about the Sun. +a larger span of the parameter space. One drawback, however, is that our model does not capture + time-dependent variations at any other frequencies, including the precession of the pericenter + of Mercury’s orbit about the Sun. –12– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets 2.3 Analytical solutions and limiting cases 2.3.1 The Cassini state of a single-body, rigid Mercury -For a rigid planet with no fluid and solid cores, our system of equations reduces to Equa- -tions (12a) and (20), +For a rigid planet with no fluid and solid cores, our system of equations reduces to Equations + (12a) and (20), (ω − e ) ˜m + φ m ˜ε m = 0 , (23a) @@ -1060,9 +1060,9 @@ C , these can be written as Aφ m ˜ε m , (24a) -˜m = δω +˜m = δω sin I + cos I ˜ε - m + m . (24b) Equation (24b) gives a direct relationship between ˜m and ˜ε m . For I = 8 . 5330◦ @@ -1072,12 +1072,12 @@ Equation (24b) gives a direct relationship between ˜m and ˜ε m = 2.04 arcmin, this gives ˜m = 2.52 × 10 − 4 arcmin, much smaller than ˜ε - m : the offset of the rotation axis of the mantle with respect to its symmetry axis is very + m : the offset of the rotation axis of the mantle with respect to its symmetry axis is very small. Substituting Equation (24b) in Equation (24a) gives C Ω - p + p sin I + cos I ˜ε - m + m = ¯ A Ω o φ @@ -1107,12 +1107,12 @@ Upon using Equations (4), (15a), and Ω 201 C 22 ) . (27) This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1 -[see for instance Equation (1) of Baland et al., 2017, where their definition of ˙ +[see for instance Equation (1) of Baland et al., 2017, where their definition of ˙ Ω is equal to − Ω p ]. -Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of -Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized mo- -ment of inertia ˆ +Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of +Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized moment + of inertia ˆ C , ˆ C = C @@ -1128,31 +1128,31 @@ cos I + sin I / ˜ε which is equivalent to Equation (89) of Van Hoolst [2015]. It is based on the latter equation that a measurement of the obliquity gives a constraint on ˆ C . -Two free modes of precession are found by setting y = 0 in Equation (23). One mode cor- -responds to the Eulerian wobble, or Chandler wobble, and represents the prograde precession -of the rotation axis about the symmetry axis. The second mode is the free retrograde axial pre- -cession of Mercury. As seen in the inertial frame, its frequency is given by +Two free modes of precession are found by setting y = 0 in Equation (23). One mode corresponds + to the Eulerian wobble, or Chandler wobble, and represents the prograde precession +of the rotation axis about the symmetry axis. The second mode is the free retrograde axial precession + of Mercury. As seen in the inertial frame, its frequency is given by –13– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets ω f p = n M R 2 -C +C G 210 J 2 + 2 G 201 C -22 +22 , (29) -which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical com- -ponent. Note that in Peale [2005] it was assumed that only the mantle was involved in the solid- -body precession and hence C was replaced by C +which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical component. + Note that in Peale [2005] it was assumed that only the mantle was involved in the solidbody + precession and hence C was replaced by C m . Using C = 0.346 · M R 2 [ Margot et al. , 2012] and the numerical values for n , J 2 , C 22 and e - c given in Table 1, we obtain a free preces- -sion period of T + c given in Table 1, we obtain a free precession + period of T f p = 2π/ω f p = 1298 yr. If we use C m instead of C in Equation (29), and take @@ -1162,8 +1162,8 @@ C f p = 2π/ω f p = 560 yr. These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical, -the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid -core, the degree of which depends on the amplitude of the pole-to-equator CMB flattening. The +the pressure torque exerted on the fluid core by the mantle leads to an entrainment of the fluid +core, the degree of which depends on the amplitude of the pole-to-equator CMB flattening. The true free precession period lies somewhere between 560 and 1298 yr. Regardless of its exact value, the free precession period is much shorter than the forcing period of 325 kyr. Using Equation (29), Equation (27) can be written as [e.g. Baland et al., 2017] @@ -1180,29 +1180,29 @@ f p . Because ω f p > Ω p , Mercury occupies Cassini state 1 [Peale , 1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant -amplification if Ω +amplification if Ω p ≈ ω f p . Since ω f p Ω - p , resonant amplification is minimal and the re- -sulting obliquity, ˜ε + p , resonant amplification is minimal and the resulting + obliquity, ˜ε m ≈ 2 arcmin, is much smaller than the inclination angle I ≈ 8 .5◦ . -2.3.2 The misalignment of the fluid and solid cores +2.3.2 The misalignment of the fluid and solid cores With ω = − 1 − δω cos I and δω 1, Equation (12d) gives ˜n s ≈ ˜m s ; as for the mantle, the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state. The relationship between ˜m and ˜ε m of Equation (24b) is independent of the interior structure, -so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equa- -tion (12a), and setting ˜n +so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equation + (12a), and setting ˜n s = ˜m s , the angular momentum equation of the whole planet becomes C Ω - p + p sin I + cos I ˜ε -m +m + ( ¯ A f cos I Ω @@ -1219,8 +1219,8 @@ A Ω o φ m ˜ε m . (31) -This latter equation shows how the misaligned inner core and fluid core can lead to a modifi- -cation of the mantle obliquity ˜ε +This latter equation shows how the misaligned inner core and fluid core can lead to a modification + of the mantle obliquity ˜ε m . Approximate analytical solutions of ˜n s and ˜m f are given by @@ -1228,17 +1228,17 @@ f are given by s ≈ Ω p κλ - s + s 1 + Ω o (K icb − α 1 e s ) λ - f - + f + sin I + cos I ˜ε - m + m − Ω o α 3 φ @@ -1250,9 +1250,9 @@ f are given by f ≈ Ω p λ - f + f sin I + cos I ˜ε -m +m + Ω o λ @@ -1261,11 +1261,11 @@ A s ¯ A - f + f K icb − α 1 e - s + s ˜n s , (32b) where @@ -1275,11 +1275,11 @@ A ¯ A f Ω 2 -o +o K icb − α 1 e - s + s 2 λ s λ @@ -1293,11 +1293,11 @@ s = ¯σ s − Ω p cos I , (33c) –14– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets and where we have introduced the frequencies ¯σ f = Ω -o +o e f + K cmb + ¯ @@ -1306,11 +1306,11 @@ A ¯ A f K -icb +icb , (33d) ¯σ s = Ω -o +o e s α 3 α @@ -1319,28 +1319,28 @@ g − e 1 + α 3 φ s + K - icb + icb . (33e) These solutions are good approximations for all the results that we present in section 3. For an observed mantle obliquity ˜ε - m and for a chosen set of interior model parameters, they pro- -vide useful predictions of ˜n + m and for a chosen set of interior model parameters, they provide + useful predictions of ˜n s and ˜m f . -In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯σ - s +In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯σ + s Ω p and ¯σ f Ω p , so that ˜n s → 0, ˜m f → 0 and Equation (31) reverts back to Equation (25) -for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and +for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and mantle (i.e. for spherical internal boundaries, e f = e s = γ - s = 0 and no viscous or EM cou- -pling, K + s = 0 and no viscous or EM coupling, + K cmb = K icb = 0), then φ @@ -1360,9 +1360,9 @@ A s , we obtain C m Ω - p + p sin I + cos I ˜ε - m + m = ¯ A Ω o φ @@ -1380,8 +1380,8 @@ f ≈ Ω f → 0) and/or ¯σ s ≈ Ω p (and thus λ -s → 0) resonant amplifica- -tion leads to large amplitudes for ˜m +s → 0) resonant amplification + leads to large amplitudes for ˜m f , ˜n s and the mantle obliquity ˜ε m . The frequencies ¯σ @@ -1390,13 +1390,13 @@ f , ˜n s are closely related to the FCN and FICN frequencies ω f cn and ω f icn , respectively. Hence, -just as a large mantle obliquity can result from resonant amplification when the forcing frequency -approaches the free precession frequency, a large mantle obliquity can likewise result from res- -onant amplification when the forcing frequency approaches the FCN or FICN frequencies. These +just as a large mantle obliquity can result from resonant amplification when the forcing frequency +approaches the free precession frequency, a large mantle obliquity can likewise result from resonant + amplification when the forcing frequency approaches the FCN or FICN frequencies. These frequencies depend on the interior density structure and are not known. However, we will show that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of -a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not ex- -pect an important amplification effect. Furthermore, since ω +a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not expect + an important amplification effect. Furthermore, since ω f cn , ω f icn Ω p , then ¯σ @@ -1409,32 +1409,32 @@ to that expected for a rigid planet, as observations suggest. Therefore, we expe f and ˜n s should be of the order of ˜ε - m or smaller. This further justifies the assumption of small an- -gles that we have adopted. + m or smaller. This further justifies the assumption of small angles + that we have adopted. 3 Results 3.1 Geodetic constraints and interior density structure -All our interior models are constrained to match the mass M of Mercury and specific choices +All our interior models are constrained to match the mass M of Mercury and specific choices of ˆ C = C/M R 2 and C m /C . The choice of ˆ -C is determined from Equation (28). For the pa- -rameters listed in Table 1, and an observed obliquity of ε +C is determined from Equation (28). For the parameters + listed in Table 1, and an observed obliquity of ε m = 2. 04 arcmin [Margot et al. , 2012], this gives ˆ C = C/M R 2 = 0. 3455 and all our interior models are consistent with this choice. -Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are +Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are –15– -Confidential manuscript submitted to JGR-Planets -perfectly aligned with the mantle, which is not strictly correct. Hence, we make an error in es- -timating ˆ +Confidential manuscript submitted to JGR-Planets +perfectly aligned with the mantle, which is not strictly correct. Hence, we make an error in estimating + ˆ C from Equation (28), or conversely in predicting ε m based on a given choice for ˆ C . Part of the ob jective of our study is to estimate how large this error is. The ratio C -m /C is ob- -tained from the amplitude of the 88-day longitudinal mantle libration φ +m /C is obtained + from the amplitude of the 88-day longitudinal mantle libration φ o , which is given by φ o = 6 · f (e @@ -1450,9 +1450,9 @@ where c + 959 48 e 4 c , (37) -and where ζ is a correction that takes into account the entrainment of the inner core in the li- -bration [ Van Hoolst et al., 2012; Dumberry et al. , 2013; Dumberry and Rivoldini , 2015]; this cor- -rection is small and, to simplify, we neglect it here. Taking the observed libration amplitude +and where ζ is a correction that takes into account the entrainment of the inner core in the libration + [ Van Hoolst et al., 2012; Dumberry et al. , 2013; Dumberry and Rivoldini , 2015]; this correction + is small and, to simplify, we neglect it here. Taking the observed libration amplitude to be 38.5 arcsec [ Margot et al. , 2012], ˆ C = C/M R 2 = 0.3455 and C @@ -1468,26 +1468,26 @@ For all results presented in our study, the crustal density is set at ρ c = 2974 kg m −3 [Sori , 2018]. Our standard choice for the crustal thickness is h = 26 km [ Sori , 2018], although in -section 3.2 we also present some results with other choices of h. We have considered two pos- -sible prescriptions connected to the density of the inner core. First, for all the results presented -in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρ +section 3.2 we also present some results with other choices of h. We have considered two possible + prescriptions connected to the density of the inner core. First, for all the results presented +in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρ s = 8800 kg m − 3 - ap- -proximately that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure + approximately + that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure Fe composition in face-centered cubic phase. This captures an end-member scenario where the core composition is an Fe-S alloy; at Mercury’s core conditions, crystallization of Fe is relatively free of S on the Fe-rich side of the eutectic [ Li et al., 2001]. If the core composition is instead an Fe-Si alloy, approximately equal partitioning of Si between the liquid and solid phase [e.g. Schaefer et al., 2017] implies a weak chemical contrast at the ICB. The density jump across the ICB is expected to be small, although since density increases with depth, the contrast between -the mean densities of the fluid and solid cores is larger. It is these mean densities that enter -our Mercury model with uniform density layers. To capture this other end-member core com- -position scenario, in section 3.5 we present results where we instead prescribe a fixed density -contrast between the fluid and solid core; specifically, we set the numerical value of α +the mean densities of the fluid and solid cores is larger. It is these mean densities that enter +our Mercury model with uniform density layers. To capture this other end-member core composition + scenario, in section 3.5 we present results where we instead prescribe a fixed density +contrast between the fluid and solid core; specifically, we set the numerical value of α 3 . For a given choice of inner core radius r s , the densities of the mantle ( ρ - m ) and fluid core + m ) and fluid core ( ρ f ) and the radius of the CMB ( r f ) are determined such that the interior model matches M , @@ -1497,39 +1497,39 @@ C m = 0.1475. Figure 3a shows how ρ m , ρ f and r -f vary as a function of in- -ner core radius r -s for each of the two inner core density scenarios: a fixed ρ -s , or a fixed α +f vary as a function of inner + core radius r +s for each of the two inner core density scenarios: a fixed ρ +s , or a fixed α 3 . When -the inner core is small, its presence has a limited influence on the resulting density structure, -and we find ρ +the inner core is small, its presence has a limited influence on the resulting density structure, +and we find ρ m = 3197 kg m− 3 , ρ f = 7263 kg m−3 and r f = 2000 km in each of the two scenarios. When ρ - s is fixed to 8800 kg m− 3 + s is fixed to 8800 kg m− 3 , as the inner core reaches 1500 km in size, r -f in- -creases to above 2100 km, ρ +f increases + to above 2100 km, ρ m approaches 4000 kg m −3 and ρ f is reduced to below 5000 kg m− 3 . -Figure 3a illustrates that when adopting a fixed ρ +Figure 3a illustrates that when adopting a fixed ρ s , there is a limit in the possible inner core size, as otherwise ρ m gets unreasonably large and ρ f gets inappropriately small (as it would -require an excessively large concentration of light elements). When adopting instead a fixed den- -sity contrast, with α +require an excessively large concentration of light elements). When adopting instead a fixed density + contrast, with α 3 = 0. 1, the changes in r f , ρ m and ρ - f with inner core radius are more mod- -est, allowing larger possible inner core sizes. Different assumptions on ρ + f with inner core radius are more modest, + allowing larger possible inner core sizes. Different assumptions on ρ c and h would alter the numerical values shown on Figure 3a but not their trends with r s . @@ -1539,7 +1539,7 @@ core density scenarios and in the absence of viscous and EM coupling (i.e. K cmb = K icb = –16– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets 0200400600800100012001400 period (yr) 0 200 400 600 800 1000 1200 1400 @@ -1552,47 +1552,47 @@ Inner core radius (km) 200020202040206020802100 FICNFCN int mantle densitya b - FCNFigure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand + FCNFigure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand side scale) and b) FICN (blue) and FCN (red) periods as a function of inner core radius. The FCN period when the external torque is set to zero (FCN int ) is shown in orange. Solid lines correspond to a scenario where the density of the inner core is set to 8800 kg m −3 ; thin dashed lines correspond to a -scenario where the density contrast between the fluid and solid cores is set to α +scenario where the density contrast between the fluid and solid cores is set to α 3 = 0. 1. -0). Both of these free modes are retrograde. The FCN period is close to 400 yr for a small in- -ner core, increasing to approximately 600 yr at the largest r +0). Both of these free modes are retrograde. The FCN period is close to 400 yr for a small inner + core, increasing to approximately 600 yr at the largest r s . The FICN period is shorter, close to 100 yr (160 yr) for a small inner core and decreasing to approximately 40 yr (120 yr) at the largest r -s under the fixed ρ - s (fixed α -3 ) scenario. This confirms that the FCN and FICN peri- -ods are both much shorter than the forcing precession period of 325 kyr and sufficiently far away +s under the fixed ρ + s (fixed α +3 ) scenario. This confirms that the FCN and FICN periods + are both much shorter than the forcing precession period of 325 kyr and sufficiently far away from it that we do not expect large ˜m f and ˜n - s from resonant amplification. -The FCN and FICN periods that we have computed include the influence of the exter- -nal torque. As shown by Baland et al. [2019], the external torque allow solid regions to have -a free motion in inertial space thereby affecting the free rotational modes. To a good approx- -imation, the FCN and FICN frequencies (as seen in an inertial frame) for K + s from resonant amplification. +The FCN and FICN periods that we have computed include the influence of the external + torque. As shown by Baland et al. [2019], the external torque allow solid regions to have +a free motion in inertial space thereby affecting the free rotational modes. To a good approximation, + the FCN and FICN frequencies (as seen in an inertial frame) for K cmb = K icb = 0 are given by ω f cn ≈ −Ω - o + o ¯ A ¯ A m + ¯ A - s - + s + e f + φ - m + m + Ω o e f φ @@ -1602,7 +1602,7 @@ A m ) , (38a) ω f icn ≈ Ω - o + o ¯ A + ¯ A @@ -1610,8 +1610,8 @@ A ¯ A − ¯ A - s - + s + e s α 1 − e @@ -1619,12 +1619,12 @@ s α 3 α g − α 3 φ - s + s . (38b) The expression of the FICN frequency involves the inertial torque (term e s α -1 ) and the grav- -itational torque from the rest of Mercury ( e +1 ) and the gravitational + torque from the rest of Mercury ( e s α 3 α g ) and the Sun ( α @@ -1638,17 +1638,17 @@ s = 8800 kg m −3 3 α g α 1 ; -the gravitational torque dominates the inertial torque, in large part because of the slow rota- -tion rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion +the gravitational torque dominates the inertial torque, in large part because of the slow rotation + rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek , 2016; Stys and -Dumberry , 2018], but it is different for Earth, where α +Dumberry , 2018], but it is different for Earth, where α 1 > α 3 α g because of its faster rotation -and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approximate expres- -–17– -Confidential manuscript submitted to JGR-Planets -sion for the FICN differs by a factor ( ¯ +and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approximate expres–17– + +Confidential manuscript submitted to JGR-Planets +sion for the FICN differs by a factor ( ¯ A + ¯ A s ) /( ¯ @@ -1656,24 +1656,24 @@ A − ¯ A s ) compared to that given in Dumberry and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon. -The expression for FCN frequency differs from the usual expression for Earth. First, it +The expression for FCN frequency differs from the usual expression for Earth. First, it involves the external torque from the Sun captured by the parameter φ m . If we set φ m = 0, we obtain the FCN frequency for a decoupled model in which only interior torques contribute, ω f cn,int ≈ −Ω - o + o ¯ A ¯ A m + ¯ A - s + s e f . (38c) -This frequency is slightly different from the usual expression for Earth, involving the ratio ¯ +This frequency is slightly different from the usual expression for Earth, involving the ratio ¯ A/( ¯ A m + @@ -1684,25 +1684,25 @@ A/ ¯ A m . This is because of the relatively thin mantle of Mercury; for the largest r -s considered, the moment of inertia of the inner core can get close to 40% of that of the man- -tle and is not negligible. The period of the FCN when only interior torques contribute is shown +s considered, the moment of inertia of the inner core can get close to 40% of that of the mantle + and is not negligible. The period of the FCN when only interior torques contribute is shown in Figure 3b. It is close to 1100 yr for a small inner core, increasing to approximately 1500 yr at the largest r -s . Hence, the influence of the solar torque reduces the FCN period by a factor +s . Hence, the influence of the solar torque reduces the FCN period by a factor of approximately 3. We note that the FICN period, in contrast, is not altered substantially when the external torque is set to zero. 3.2 Gravitational and inertial coupling -Let us now investigate the obliquities of the mantle, fluid core and inner core in their equi- -librium Cassini state. We assume a fixed inner core density scenario in this section, with ρ +Let us now investigate the obliquities of the mantle, fluid core and inner core in their equilibrium + Cassini state. We assume a fixed inner core density scenario in this section, with ρ s = 8800 kg m− 3 - . Viscous and EM coupling are set to zero in order to isolate the influence of grav- -itational and inertial coupling. Figure 4 shows how ˜ε + . Viscous and EM coupling are set to zero in order to isolate the influence of gravitational + and inertial coupling. Figure 4 shows how ˜ε m , ˜m f and ˜n s vary as functions of inner -core radius. We show calculations for three different choices of crustal thickness, but let us con- -centrate first on the case for h = 26 km. For small r +core radius. We show calculations for three different choices of crustal thickness, but let us concentrate + first on the case for h = 26 km. For small r s , we retrieve an obliquity of ˜ε m = 2. 0494 arcmin (Figure 4a). ˜ε @@ -1710,32 +1710,32 @@ arcmin (Figure 4a). ˜ε s , but not substantially; at the largest r s (1500 km), ˜ε - m = 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜ε + m = 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜ε m = 2.04 arcmin, the obliquity that we used in setting the constraint for ˆ C – and hence the prediction we should recover for a rigid planet – is an overestimate of approximately 0 . 01 arcmin which occurs for small inner cores. The deviation of ˜ε - m from that of a rigid planet is due to the misalignments of the fluid + m from that of a rigid planet is due to the misalignments of the fluid core ( ˜m f ) and solid inner core ( ˜n s ) with respect to the mantle (Figure 4b). The misalignment -of the fluid core spin axis from the mantle is significant: ˜m +of the fluid core spin axis from the mantle is significant: ˜m f is approximately 4.02 arcmin for a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin at the largest r s . Recall that ˜m f is measured with respect to the mantle rotation axis (which -coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with +coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with respect to the orbit normal is ˜ε m + ˜m f ≈ 6 arcmin. The reason why the obliquity of the spin -axis of the fluid core is larger than that of the mantle can be understood from Equation (32b), +axis of the fluid core is larger than that of the mantle can be understood from Equation (32b), which shows that ˜m -f is determined by the resonant amplification of the FCN mode at the forc- -ing frequency. When the FCN frequency is much larger than the forcing frequency, as is the -case for Mercury, the resonant amplification is very weak but remains present and ˜m +f is determined by the resonant amplification of the FCN mode at the forcing + frequency. When the FCN frequency is much larger than the forcing frequency, as is the +case for Mercury, the resonant amplification is very weak but remains present and ˜m f is larger than zero. In contrast to ˜m @@ -1745,10 +1745,10 @@ smaller; ˜n m . Physically, this is because the gravitational torque acting on the inner core when it is tilted from the mantle is much stronger than the inertial torque acting at the ICB. As a result, the inner -core must remain in close alignment with the mantle. Presented differently, since the FICN pe- -riod is more than 3000 times shorter than the forced precession period, the inner core can eas- -–18– -Confidential manuscript submitted to JGR-Planets +core must remain in close alignment with the mantle. Presented differently, since the FICN period + is more than 3000 times shorter than the forced precession period, the inner core can eas–18– + +Confidential manuscript submitted to JGR-Planets 2.0382.0402.0422.0442.0462.0482.050 Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 @@ -1774,7 +1774,7 @@ n dashed line) b) ˜m f (solid lines) and ˜n s (dashed lines, x100) as a function of inner core radius and for -different choices of crustal thickness. +different choices of crustal thickness. ily follow the forced precession of the mantle and remains gravitationally locked to it. ˜n s does not change substantially as the inner core increases in size. @@ -1783,20 +1783,20 @@ icb = K cmb = 0, a good approximation of ˜ε m is given by ˜ε - m = C + m = C Ω p sin I -− C +− C Ω p cos I + ¯ A Ω o φ m , (39) which is identical to the prediction of Equation (26) for a rigid Mercury, except C is replaced -by C - . The latter represents an effective moment of inertia that accounts for the coupling of +by C + . The latter represents an effective moment of inertia that accounts for the coupling of the core to the mantle, - C + C = C + ¯ A c χ , (40) @@ -1811,7 +1811,7 @@ A p cos I ¯ A - c + c ¯ A f @@ -1822,7 +1822,7 @@ A s ( ¯σ s − Ω - p cos I ) + p cos I ) − ¯ A s @@ -1839,29 +1839,29 @@ The frequencies ¯σ f and ¯σ s are given in Equations (33d-33e) and closely approximate the FCN and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then -how the core is entrained to precess with the mantle, with the coupling between the two ex- -pressed in terms of the resonant amplification of the FCN and FICN frequencies. In the limit +how the core is entrained to precess with the mantle, with the coupling between the two expressed + in terms of the resonant amplification of the FCN and FICN frequencies. In the limit of ¯σ f , ¯σ - s → 0, then χ = − 1, C + s → 0, then χ = − 1, C = C m , the core is fully decoupled from the mantle and we retrieve Equation (35). If instead ¯σ f , ¯σ - s → ∞ , then χ = 0, C - = C and we retrieve the pre- -diction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ω + s → ∞ , then χ = 0, C + = C and we retrieve the prediction + for a rigid planet. When both the FCN and FICN frequencies are much larger than Ω p , -as is the case here, resonant amplification is weak, χ is small and positive, C +as is the case here, resonant amplification is weak, χ is small and positive, C > C and this leads to a slightly larger ˜ε - m compared to a rigid planet. Because the inner core core is grav- -itationally locked to the mantle, deviations from a rigid planet are dominantly caused by the -misalignment of the fluid core. In Equation (41), ¯σ + m compared to a rigid planet. Because the inner core core is gravitationally + locked to the mantle, deviations from a rigid planet are dominantly caused by the +misalignment of the fluid core. In Equation (41), ¯σ s ¯σ f , so to a good approximation –19– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets χ ≈ ¯ A f @@ -1875,37 +1875,37 @@ f − Ω For a small inner core, χ ≈ 7. 55 ×10 − 3 . As the inner core grows, ¯ A - f decreases, and the com- -bination ¯ + f decreases, and the combination + ¯ A - c χ also decreases. This implies that C + c χ also decreases. This implies that C decreases with inner core size and, consequently, ˜ε m also decreases with inner core size, as seen in Figure 4a, though it remains larger than the prediction for a rigid planet. -The specific predictions of ˜ε +The specific predictions of ˜ε m , ˜m f and ˜n s on Figure 4 depend sensitively on the assumed interior density model and on the dynamical ellipticities of the inner core (e - s ) and fluid core + s ) and fluid core (e f ). Hence, it depends on the choices we have made for the inner core density ρ s , the crustal density ρ c and its thickness h. Changing ρ s , ρ -c and/or h requires a different combination of ρ +c and/or h requires a different combination of ρ f , ρ m and r f in order to match M , ˆ C and ˆ C -m . In turn, this leads to different ellipticities at in- -terior boundary in order to match J +m . In turn, this leads to different ellipticities at interior + boundary in order to match J 2 and C -22 , and thus different predictions for ˜ε +22 , and thus different predictions for ˜ε m , ˜m f and ˜n @@ -1917,12 +1917,12 @@ the changes in ˜m s are more substantial, ∼ 5% and ∼ 10%, respectively. We also show on Figure 4a (only for h = 26 km) the obliquity of the principal moment of inertia of the whole planet, which we denote by ˜ε - g . A difference between ˜ε + g . A difference between ˜ε g and ˜ε m occurs if the inner core is misaligned with the mantle. As seen in the mantle frame, a tilted inner core (with ˜n -s assumed small) leads to an off-diagonal component of the moment of inertia tensor +s assumed small) leads to an off-diagonal component of the moment of inertia tensor of ( C s − ¯ A @@ -1953,18 +1953,18 @@ A Ae α 3 ˜n s . (43) -Since the inner core is gravitationally forced into a close alignment with the mantle, the dif- -ference between ˜ε +Since the inner core is gravitationally forced into a close alignment with the mantle, the difference + between ˜ε g and ˜ε m remains very small. For the largest inner core radius that we have considered, ˜ε - g differs from ˜ε + g differs from ˜ε m only by approximately 0.001 arcmin. 3.3 Viscous coupling -We now investigate how viscous coupling at the CMB and ICB affects the equilibrium Cassini -state. Peale et al. [2014] present two different parameterizations of viscous coupling based on -the timescale of attenuation of the differential rotation between the fluid core and mantle. More -complete analytical solutions for the flow resulting from a differentially precessing shell have +We now investigate how viscous coupling at the CMB and ICB affects the equilibrium Cassini +state. Peale et al. [2014] present two different parameterizations of viscous coupling based on +the timescale of attenuation of the differential rotation between the fluid core and mantle. More +complete analytical solutions for the flow resulting from a differentially precessing shell have been derived [e.g. Stewartson and Roberts , 1963; Busse , 1968; Rochester , 1976] and we exploit these solutions here. The parametrization of the viscous coupling constants K cmb and K @@ -1976,10 +1976,10 @@ f r 4 f ¯ A - f + f ν 2Ω - o + o 0.195 − 1. 976 i , (44a) K @@ -1988,26 +1988,26 @@ f r 4 s ¯ A - s + s ν 2Ω - o + o 0. 195 − 1 .976 i , (44b) -where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary inte- -rior is not well known but based on theoretical and experimental studies it is expected to be +where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary interior + is not well known but based on theoretical and experimental studies it is expected to be of the order of 10 −6 m 2 s −1 [e.g. Gans , 1972; de Wijs et al. , 1998; Alf`e et al., 2000; Rutter et al., 2002a,b]. –20– -Confidential manuscript submitted to JGR-Planets -The above parameterizations are valid only under the assumption that the flow in the bound- -ary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds +Confidential manuscript submitted to JGR-Planets +The above parameterizations are valid only under the assumption that the flow in the boundary + layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds number Re = r f ∆ u -f /ν , associated with the differential velocity ∆ u +f /ν , associated with the differential velocity ∆ u f = r f Ω o ˜m @@ -2018,33 +2018,33 @@ f = 4 arcmin ≈ 0. 001 rad from the results in the previous section, we get ∆ u f ∼ 2 mm/s and Re ∼ 6 × 109 . Such a large Reynolds number indicates -that the viscous friction between the fluid core and mantle should induce turbulent flows, as +that the viscous friction between the fluid core and mantle should induce turbulent flows, as is the case for the Cassini state of the Moon [ Yoder , 1981; Wil liams et al. , 2001; C´ebron et al. , -2019]. For a boundary layer that involves turbulent flows, the viscous torque should be inde- -pendent of the fluid viscosity and proportional to the square of the differential velocity. The +2019]. For a boundary layer that involves turbulent flows, the viscous torque should be independent + of the fluid viscosity and proportional to the square of the differential velocity. The coupling constant K cmb should be in the form K cmb = f -cmb +cmb ˜m -f - +f + 0.195 − 1. 976 i , (45) where f - cmb is a numerical factor that depends among other things on surface roughness. In- -corporating a viscous coupling of this form in our rotational model is more challenging not only + cmb is a numerical factor that depends among other things on surface roughness. Incorporating + a viscous coupling of this form in our rotational model is more challenging not only because f cmb is not known but also because the viscous torque is no longer linear in ˜m f . One -strategy is to find solutions through an iterative process. The simpler alternative strategy that +strategy is to find solutions through an iterative process. The simpler alternative strategy that we adopt is to use the laminar formulas of Equation (44) but with the understanding that ν -represents an effective turbulent viscosity. +represents an effective turbulent viscosity. To give an estimate of an appropriate turbulent value for ν , we turn to the Cassini state of the Moon. A measure of the viscous dissipation at the CMB of the Moon has been obtained -by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR) +by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR) [ Wil liams et al. , 2001, 2014; Wil liams and Boggs , 2015]. Viscous dissipation is reported in terms of a coupling parameter K and a recent estimate is K /C L = (1.41 ± 0.34)× 10 −8 @@ -2054,10 +2054,10 @@ and Boggs , 2015], where C L is the lunar polar moment of inertia. The connection between K and K cmb is - + I m [K - cmb ] + cmb ] = K C @@ -2078,47 +2078,47 @@ rotation rate. With C [e.g. Wil liams et al. , 2014], this gives |I m [K cmb ]| ∼ 9 × 10− 5 - . In order to match this amplitude in Equation (44a), with lunar parameters and as- -suming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10− 4 + . In order to match this amplitude in Equation (44a), with lunar parameters and assuming + a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10− 4 m 2 s − 1 - , about 500 times larger than the laminar viscosity. Note that the differential velocity at the + , about 500 times larger than the laminar viscosity. Note that the differential velocity at the CMB of the Moon is closer to 3 cm/s [ Yoder , 1981; Wil liams et al. , 2001], more than 10 times -larger than our estimate for Mercury above. Since the effective turbulent coupling constant K +larger than our estimate for Mercury above. Since the effective turbulent coupling constant K cmb -is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mer- -cury should be smaller. Thus, ν ≈ 5 × 10− 4 +is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mercury + should be smaller. Thus, ν ≈ 5 × 10− 4 m 2 s− 1 gives a conservative upper bound for the -possible effective turbulent viscosity that can be expected for Mercury. +possible effective turbulent viscosity that can be expected for Mercury. Figure 5 shows how ˜ε m , ˜m f and ˜n - s vary as functions of inner core radius for different choices -of effective viscosities. For ν = 10 − 5 + s vary as functions of inner core radius for different choices +of effective viscosities. For ν = 10 − 5 m 2 s − 1 - , viscous coupling is too weak to affect ˜ε + , viscous coupling is too weak to affect ˜ε m and ˜m f and they are essentially unchanged from the solutions shown in Figure 4. With increasing -ν , the stronger viscous coupling between the core and the mantle reduces their differential ve- -locity, and ˜m -f is reduced. With the reduced differential velocity at the CMB, the prediction +ν , the stronger viscous coupling between the core and the mantle reduces their differential velocity, + and ˜m +f is reduced. With the reduced differential velocity at the CMB, the prediction of ˜ε m gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB -viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜ε +viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜ε m and ˜m - f are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the -fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent vis- -cosity that we have identified above (i.e ν ≈ 5 × 10 − 4 + f are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the +fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent viscosity + that we have identified above (i.e ν ≈ 5 × 10 − 4 m 2 s − 1 - ), the influence of viscous cou- -–21– -Confidential manuscript submitted to JGR-Planets + ), the influence of viscous cou–21– + +Confidential manuscript submitted to JGR-Planets ε mε g @@ -2147,18 +2147,18 @@ a b for a rigid planet ε m Figure 5. a) Obliquity of the mantle ( ˜ε - m , solid lines) and gravity field ( ˜ε + m , solid lines) and gravity field ( ˜ε g , dashed lines) b) ˜m f (solid lines) and ˜n -s (dashed lines) as a function of inner core radius and for different choices of kinematic +s (dashed lines) as a function of inner core radius and for different choices of kinematic viscosity (color in legend). pling on ˜ε m remains modest, reducing its amplitude by a maximum of approximately 0.0015 arcmin. The inclusion of viscous coupling at the ICB can lead to a substantial change in inner core -tilt. A larger viscosity leads to stronger viscous coupling and to a closer alignment of the in- -ner core with the fluid core spin axis. The viscous coupling strength is inversely proportional +tilt. A larger viscosity leads to stronger viscous coupling and to a closer alignment of the inner + core with the fluid core spin axis. The viscous coupling strength is inversely proportional to r s , so a larger viscosity results in a larger inner core radius at which viscous coupling is of a similar magnitude to gravitational coupling. Taking again an upper bound of ν = 5× 10− 4 @@ -2166,11 +2166,11 @@ m 2 s −1 , Figure 5 indicates that ˜n s may be 1 arcmin or larger only if the inner core radius is -smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravi- -tational coupling is much larger than viscous coupling, and the inner core tilt is limited to a +smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravitational + coupling is much larger than viscous coupling, and the inner core tilt is limited to a fraction of 1 arcmin. -The larger inner core tilt observed with increasing effective viscosity results in a larger -offset between the obliquity of the principal moment of inertia ˜ε +The larger inner core tilt observed with increasing effective viscosity results in a larger +offset between the obliquity of the principal moment of inertia ˜ε g and that of the mantle ˜ε m , though it remains limited. For the upper bound of ν = 5 × 10 − 4 @@ -2178,54 +2178,54 @@ though it remains limited. For the upper bound of ν = 5 × 10 − 4 s − 1 , and for r s = 1500 -km, the difference between ˜ε +km, the difference between ˜ε g and ˜ε m is limited to 0.0013 arcmin. The conclusion that emerges from Figure 5 is that the larger the inner core is, the smaller -the misalignments of both the fluid core and inner core are with respect to the mantle. This +the misalignments of both the fluid core and inner core are with respect to the mantle. This implies that the larger the inner core is, the more we approach a planet precessing as a rigid -body, although the misalignment of the spin axis of the fluid core remains important, approx- -imately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜ε +body, although the misalignment of the spin axis of the fluid core remains important, approximately + 3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜ε m , ˜m f and ˜n - s change with inner core size would certainly be different for a turbulent model of viscous + s change with inner core size would certainly be different for a turbulent model of viscous coupling. But the general conclusion remains that the addition of viscous coupling at the CMB -and ICB does not significantly modify the Cassini state equilibrium angle of the mantle. +and ICB does not significantly modify the Cassini state equilibrium angle of the mantle. –22– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets 3.4 Electromagnetic coupling Let us now turn to electromagnetic (EM) coupling. To focus on its role in the equilibrium -Cassini state, we set the viscous coupling back to zero. Because magnetic field lines tend to re- -main attached to electrically conducting materials, a differential tangential motion between two -electrically conducting regions stretches existing magnetic field lines that thread their interface. -This induces a secondary magnetic field (or equivalently, an electrical current) and an associ- -ated tangential EM stress resisting the differential motion. EM coupling at the CMB and ICB +Cassini state, we set the viscous coupling back to zero. Because magnetic field lines tend to remain + attached to electrically conducting materials, a differential tangential motion between two +electrically conducting regions stretches existing magnetic field lines that thread their interface. +This induces a secondary magnetic field (or equivalently, an electrical current) and an associated + tangential EM stress resisting the differential motion. EM coupling at the CMB and ICB acts then in a similar way to viscous coupling, and this ’magnetic friction’ depends on the strength -of the radial magnetic field B -r and the electrical conductivity σ on either side of the bound- -ary [ Rochester , 1960, 1962, 1968]. +of the radial magnetic field B +r and the electrical conductivity σ on either side of the boundary + [ Rochester , 1960, 1962, 1968]. The parametrization of EM coupling in terms of the coupling constants K cmb and K icb -has been developed in a few studies [e.g. Buffett , 1992; Buffett et al., 2002; Dumberry and Koot , -2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given +has been developed in a few studies [e.g. Buffett , 1992; Buffett et al., 2002; Dumberry and Koot , +2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given by B r = √ - 3 + 3 B d -r - cos θ , where +r + cos θ , where B d -r - is the r.m.s. strength of the field, the coupling constant +r + is the r.m.s. strength of the field, the coupling constant K cmb can be written is the form K cmb = 3(1 − i)F - cmb + cmb B d -r +r 2 , (47) where @@ -2234,49 +2234,49 @@ cmb = 1 Ω o ρ f r -f +f 1 σ m δ m + 1 σ f δ - f + f −1 , (48) and where σ m , δ - m = + m = 2/ (σ m µΩ o ) and σ f , δ - f = + f = 2/( σ f µΩ - o ) are the electrical conductivi- -ties and magnetic skin depths in the mantle and fluid core, respectively, with µ = 4π × 10 − 7 + o ) are the electrical conductivities + and magnetic skin depths in the mantle and fluid core, respectively, with µ = 4π × 10 − 7 N A − 2 - the magnetic permeability of free space. The r.m.s. field strength + the magnetic permeability of free space. The r.m.s. field strength B d -r +r is connected to -the Gauss coefficient g 0 -1 of the surface magnetic field by +the Gauss coefficient g 0 +1 of the surface magnetic field by B d -r +r = 2 √ - 3 + 3 R r -f +f 3 - + g 0 -1 +1 . (49) We can readily build an estimate of the amplitude of K @@ -2284,8 +2284,8 @@ cmb . The electrical conductivity of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding to the CMB of Mercury is in the range of σ m ∼ 0 .01 − 1 S m−1 - [Constable , 2015]. In con- -trast, the electrical conductivity of Fe in planetary cores is expected to be close σ + [Constable , 2015]. In contrast, + the electrical conductivity of Fe in planetary cores is expected to be close σ f ∼ 10 6 S m −1 @@ -2295,15 +2295,15 @@ m −1 (σ f δ f )− 1 - . Tak- -ing σ + . Taking + σ m = 1 S m −1 - , + , g 0 -1 +1 - = 190 nT for Mercury’s dipole field [ Anderson et al., 2012], r + = 190 nT for Mercury’s dipole field [ Anderson et al., 2012], r f = 2000 km, ρ f = 7000 kg m − 3 @@ -2313,20 +2313,20 @@ cmb ≈ (3 .1 × 10 − 11 in perspective, taking a molecular viscosity of ν = 10−6 m 2 s − 1 - in Equation (44a) gives a vis- -cous coupling constant of K + in Equation (44a) gives a viscous + coupling constant of K cmb ≈ (6 .0 × 10 − 7 ) · (0 .195 − 1. 976 i). Hence, EM coupling at the -CMB is much weaker than viscous coupling, even if we include other spherical harmonic com- -ponents of the radial magnetic field. -EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by -CMB cavities [Buffett , 2010; Glane and Buffett , 2018], in which case the effective σ +CMB is much weaker than viscous coupling, even if we include other spherical harmonic components + of the radial magnetic field. +EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by +CMB cavities [Buffett , 2010; Glane and Buffett , 2018], in which case the effective σ m could be closer to σ f . Likewise, σ m can be increased if a more electrically conducting layer has formed at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction -of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even +of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even in the extreme case of σ m = σ f = 10 6 @@ -2335,18 +2335,18 @@ in the extreme case of σ cmb ≈ (1. 6 × 10 − 8 ) · (1 − i ), which remains –23– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets smaller by a factor ∼ 60 than the smallest possible viscous coupling constant. Viscous forces dominate the tangential stress on the CMB of Mercury. At the ICB, because we can expect the electrical conductivity in both the solid inner core -and fluid core to be similar, and because the radial magnetic field is likely much stronger, EM -coupling can be much larger and dominate viscous coupling. We assume that the magnetic field +and fluid core to be similar, and because the radial magnetic field is likely much stronger, EM +coupling can be much larger and dominate viscous coupling. We assume that the magnetic field morphology at the ICB is dominantly comprised of small spatial scales for example as predicted by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in -terms of an equivalent uniform radial magnetic field B -r capturing its r.m.s. strength [ Buf- -fett et al., 2002; Dumberry and Koot , 2012]. Assuming an electrical conductivity σ equal in the -fluid and solid core, the coupling constant K +terms of an equivalent uniform radial magnetic field B +r capturing its r.m.s. strength [ Buffett + et al., 2002; Dumberry and Koot , 2012]. Assuming an electrical conductivity σ equal in the +fluid and solid core, the coupling constant K icb can be written in the form K icb = 5 @@ -2361,7 +2361,7 @@ where o ρ s r s , (51) -and where δ = +and where δ = 2/ (σµΩ o ) is the magnetic skin depth. As F icb is inversely proportional to @@ -2369,21 +2369,21 @@ r s , K icb is inversely proportional to inner core size. Note that computing the EM coupling based on the r.m.s. strength B - r rather than a true field morphology tends to overestimate the strength + r rather than a true field morphology tends to overestimate the strength of the coupling [Koot and Dumberry , 2013]. However, since the strength of the radial magnetic -field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are +field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are absorbed in the range of possible B r values. -The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al., -2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected. +The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al., +2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected. When B - r is sufficiently large, this is no longer the case. EM coupling then enters a ’strong -field’ regime [Buffett et al., 2002; Dumberry and Koot , 2012; Koot and Dumberry , 2013] in which + r is sufficiently large, this is no longer the case. EM coupling then enters a ’strong +field’ regime [Buffett et al., 2002; Dumberry and Koot , 2012; Koot and Dumberry , 2013] in which K icb increases linearly with B r instead of quadratically. A good approximation of K - icb cal- -culated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012], + icb calculated + for Earth can be extracted from Figure 6a of Dumberry and Koot [2012], K E icb = (0.175 − i0. 138) B r , (52) @@ -2398,8 +2398,8 @@ F E icb B r , (53) where F E -icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumb- -erry and Koot [2012]. These are Ω +icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumberry + and Koot [2012]. These are Ω o = 7. 292 × 10− 5 s − 1 , ρ @@ -2416,41 +2416,41 @@ To compute F S m − 1 in the core of Mercury [e.g. de Koker et al. , 2012; Deng et al., 2013]. The transition between the weak and -strong field regime occurs when B +strong field regime occurs when B r ≈ 1. 53 mT for the real part of K icb . B r at the ICB -of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geom- -etry inside the core could be dominated by small length scales, yet only the weaker lower har- -monics of the field would penetrate through a thermally stratified layer in the upper region of +of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geometry + inside the core could be dominated by small length scales, yet only the weaker lower harmonics + of the field would penetrate through a thermally stratified layer in the upper region of –24– -Confidential manuscript submitted to JGR-Planets -the fluid core and reach the surface. If so, the field strength inside the core can exceed the sur- -face field strength by a factor 1000. Taking a surface field strength equal to ∼ 300 nT [e.g An- -derson et al., 2012], B - r at the ICB could be as large as 0.3 mT, corresponding to approxi- -mately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mer- -cury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of -Mercury remains in the weak field regime. +Confidential manuscript submitted to JGR-Planets +the fluid core and reach the surface. If so, the field strength inside the core can exceed the surface + field strength by a factor 1000. Taking a surface field strength equal to ∼ 300 nT [e.g Anderson + et al., 2012], B + r at the ICB could be as large as 0.3 mT, corresponding to approximately + 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mercury’s + field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of +Mercury remains in the weak field regime. Figure 6 shows how ˜ε m , ˜m f and ˜n - s vary as functions of inner core radius for different choices + s vary as functions of inner core radius for different choices of B r . The larger B r is, the stronger is the EM coupling at the ICB, and the smaller is the -differential rotation between the fluid core and inner core. The inner core and fluid core are vir- -tually locked into a common precession motion when B +differential rotation between the fluid core and inner core. The inner core and fluid core are virtually + locked into a common precession motion when B r > 0. 3 mT. Further increasing B - r + r above 1 mT does not change the solution as EM coupling already dominates all other torques -on the inner core. This is the case even when EM coupling transitions into the strong field regime. +on the inner core. This is the case even when EM coupling transitions into the strong field regime. EM coupling at the CMB is included in these calculations, with σ m = 1 S m−1 - and + and g 0 -1 +1 = 190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core @@ -2465,15 +2465,15 @@ The inner core needs to be larger than approximately 500 km for changes in the C equilibrium to be noticeable. It is important to point out that ˜m f is reduced not because of EM coupling at the CMB, but rather from the combination of EM coupling at the ICB, which -pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the +pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the greater is the reduction in ˜ε m and ˜m f . -When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are +When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are locked into a common precession motion, a good approximation of ˜ε - m is given by the same pre- -diction as Equations (39-40) involving the effective moment of inertia C + m is given by the same prediction + as Equations (39-40) involving the effective moment of inertia C , except χ is now given by χ = ¯ @@ -2514,7 +2514,7 @@ A o α 3 φ s increases -with inner core size, χ gets smaller, and so do C +with inner core size, χ gets smaller, and so do C and ˜ε m . The mantle obliquity drops from 2.049 arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015 @@ -2526,26 +2526,26 @@ A s Ω o α 3 φ - s , so χ becomes neg- -ative, C + s , so χ becomes negative, + C becomes smaller than the moment of inertia of a rigid Mercury C , and ˜ε m becomes smaller than the prediction based on a rigid planet. -The larger the inner core is, the smaller are the misalignments of the fluid and solid cores +The larger the inner core is, the smaller are the misalignments of the fluid and solid cores with respect to the mantle. Hence, the general conclusion we reached for viscous coupling alone is not altered with the addition of EM coupling but further strengthened; the larger the inner core is, the closer we approach a planet precessing as a rigid body. This is best revealed by the -obliquity of the gravity field ˜ε - g which, for a large inner core, asymptotically approaches the obliq- -uity expected for a rigid planet. Note that with strong EM coupling at the ICB, the offset be- -tween ˜ε +obliquity of the gravity field ˜ε + g which, for a large inner core, asymptotically approaches the obliquity + expected for a rigid planet. Note that with strong EM coupling at the ICB, the offset between + ˜ε m and ˜ε g can be as large as 0.008 arcmin for a large inner core. -3.5 Fixed inner core density versus fixed ICB density contrast +3.5 Fixed inner core density versus fixed ICB density contrast Coupling models when viscous and EM stresses are both present have been presented in Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of our results, –25– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets 2.0322.0342.0362.0382.0402.0422.0442.0462.0482.050 Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 @@ -2566,56 +2566,56 @@ n for a rigid planet ε m Figure 6. a) Obliquity of the mantle ( ˜ε - m , solid lines) and gravity field ( ˜ε + m , solid lines) and gravity field ( ˜ε g , dashed lines) b) ˜m f (solid lines) and ˜n -s (dashed lines) as a function of inner core radius and for different choices of B +s (dashed lines) as a function of inner core radius and for different choices of B r (colour in legend). for the Cassini state equilibrium of Mercury, the tangential stress at the CMB is dominated by -viscous forces, and that at the ICB should be dominated by EM forces. To simplify, we con- -sider a model where K +viscous forces, and that at the ICB should be dominated by EM forces. To simplify, we consider + a model where K cmb is purely from viscous coupling and K icb purely from EM coupling. -We choose an effective viscosity at the CMB of ν = 10 −4 +We choose an effective viscosity at the CMB of ν = 10 −4 m 2 s −1 , which we believe to be a representative value given the comparison with the Moon (see section 3.3). We take a radial -field strength at the ICB of B - r = 0.3 mT, approximately the field strength expected under -the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representa- -tive’ coupling model, although the uncertainty on ν and B +field strength at the ICB of B + r = 0.3 mT, approximately the field strength expected under +the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representative’ + coupling model, although the uncertainty on ν and B r obviously remains high. Figure 7 shows how ˜ε m , ˜m f and ˜n s vary with inner core radius for the ’representative’ -coupling model (black lines) under the fixed inner core density scenario that we have used in -sections 3.2, 3.3 and 3.4. Figure 7 also shows how the results change when, for the same rep- -resentative coupling model, we adopt instead a fixed density contrast between the fluid and solid -cores and for different choices of α +coupling model (black lines) under the fixed inner core density scenario that we have used in +sections 3.2, 3.3 and 3.4. Figure 7 also shows how the results change when, for the same representative + coupling model, we adopt instead a fixed density contrast between the fluid and solid +cores and for different choices of α 3 (coloured lines). For a relatively high density contrast (α 3 = -0 .2), the results are qualitatively similar to the fixed inner core density scenario. For a smaller +0 .2), the results are qualitatively similar to the fixed inner core density scenario. For a smaller α -3 , the point at which the orientation of the co-precessing fluid and inner cores begins to be +3 , the point at which the orientation of the co-precessing fluid and inner cores begins to be pulled into an alignment with the mantle is pushed to a larger inner core radius. However, the general behaviour of ˜ε m , ˜m f and ˜n s as functions of inner core radius is unchanged. Hence, all -our results in the previous three sections would be qualitatively similar under a fixed density +our results in the previous three sections would be qualitatively similar under a fixed density contrast scenario. A smaller density contrast at the ICB only implies that a larger inner core is required in order to produce an equivalent change in the Cassini state equilibrium. 4 Discussion -The study of Peale et al. [2016] also presented predictions of the obliquities of the man- -tle, fluid core and inner core associated with the equilibrium Cassini state of Mercury. Their +The study of Peale et al. [2016] also presented predictions of the obliquities of the mantle, + fluid core and inner core associated with the equilibrium Cassini state of Mercury. Their model included the tangential viscous stress at the ICB and CMB, but not the EM stress. Their -Table 1 gives the obliquities of the mantle, fluid core and inner core, denoted respectively as +Table 1 gives the obliquities of the mantle, fluid core and inner core, denoted respectively as –26– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets 2.0322.0342.0362.0382.0402.0422.0442.0462.0482.050 Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 @@ -2635,17 +2635,17 @@ n m ε gFigure 7. a) Obliquity of the mantle ( ˜ε - m , solid lines) and gravity field ( ˜ε + m , solid lines) and gravity field ( ˜ε g , dashed lines) b) ˜m f (solid lines) and ˜n -s (dashed lines) as a function of inner core radius, for a fixed inner core density of +s (dashed lines) as a function of inner core radius, for a fixed inner core density of 8800 kg m −3 - (black lines) and for different choices of α + (black lines) and for different choices of α 3 (coloured lines). -i +i m , i -f and i +f and i s ; these represent the obliquities with respect to the orbital plane and are connected to our variables by: i m = ˜ε @@ -2660,206 +2660,206 @@ m + ˜n s . To summarize their results, i f and i -s vary substantially for different inner core sizes, are always of compara- -ble amplitude, and i +s vary substantially for different inner core sizes, are always of comparable + amplitude, and i s is always larger than i -f . Furthermore, they find that as the inner core +f . Furthermore, they find that as the inner core size is increased, the mantle obliquity i m gets progressively larger and is displaced further away from its expected orientation based of a rigid planet (see their Figure 6). The change in i m they -obtain between a case with no inner core and an inner core radius equal to 0.6 times the plan- -etary radius (≈ 1463 km, close to the maximum inner core size of 1500 km we have considered), +obtain between a case with no inner core and an inner core radius equal to 0.6 times the planetary + radius (≈ 1463 km, close to the maximum inner core size of 1500 km we have considered), is approximately an increase of 5 × 10 −5 - rad = 0.17 arcmin. This also corresponds approxi- -mately to the deviation of the obliquity with respect to that of a rigid planet. -When only viscous stress is included in our model (section 3.3), our results are substan- -tially different. As illustrated in Figure 4, we find instead that the obliquity of the fluid core -gets smaller with inner core size and that the change is very modest. In contrast with the re- -sults of Peale et al. [2016], we find that the inner core obliquity is typically smaller than that -of the fluid core, except when the inner core is very small or when the effective viscosity is un- -reasonably large. We also find that as the inner core size is increased, the mantle obliquity gets + rad = 0.17 arcmin. This also corresponds approximately + to the deviation of the obliquity with respect to that of a rigid planet. +When only viscous stress is included in our model (section 3.3), our results are substantially + different. As illustrated in Figure 4, we find instead that the obliquity of the fluid core +gets smaller with inner core size and that the change is very modest. In contrast with the results + of Peale et al. [2016], we find that the inner core obliquity is typically smaller than that +of the fluid core, except when the inner core is very small or when the effective viscosity is unreasonably + large. We also find that as the inner core size is increased, the mantle obliquity gets smaller, opposite to the results of Peale et al. [2016], and that the changes remain small, at most -of the order of 0.005 arcmin. A part of the difference is due to the different viscous coupling +of the order of 0.005 arcmin. A part of the difference is due to the different viscous coupling model that we use. But even when we adopt their model parameters and use their viscosity model, we were not able to reproduce their results. In the absence of viscous and EM coupling, the strong gravitational torque exerted on the -inner core by the mantle should prevent any large misalignment between the two. This is cap- -tured by the period of the FICN, which is of the order of 100 yr, much shorter than the forc- -ing period of 325 kyr. Viscous and/or EM coupling at the ICB can counteract the gravitational -torque (and alter the period of the FICN), but only for a small inner core. The ratio of the viscous- -EM torque to the gravitational torque decreases with inner core size, so a large inner core should +inner core by the mantle should prevent any large misalignment between the two. This is captured + by the period of the FICN, which is of the order of 100 yr, much shorter than the forcing + period of 325 kyr. Viscous and/or EM coupling at the ICB can counteract the gravitational +torque (and alter the period of the FICN), but only for a small inner core. The ratio of the viscousEM + torque to the gravitational torque decreases with inner core size, so a large inner core should be more strongly aligned with the mantle. The more strongly the inner core and mantle are –27– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets gravitationally locked together, the more they behave as a single rigid body in response to the external torque from the Sun. We expect then that the obliquity of the mantle should be brought -closer to that of a rigid planet when the inner core is larger. Hence, we find puzzling the re- -sults of Peale et al. [2016], which suggest the opposite. +closer to that of a rigid planet when the inner core is larger. Hence, we find puzzling the results + of Peale et al. [2016], which suggest the opposite. We showed that EM coupling is most likely larger than viscous coupling at the ICB, even -though our knowledge of the radial magnetic field strength inside Mercury (on which EM cou- -pling depends) remains poor. If the magnetic field strength at the ICB is above 0.3 mT, EM -coupling is sufficiently strong to bring the fluid and solid cores into a locked procession motion. +though our knowledge of the radial magnetic field strength inside Mercury (on which EM coupling + depends) remains poor. If the magnetic field strength at the ICB is above 0.3 mT, EM +coupling is sufficiently strong to bring the fluid and solid cores into a locked procession motion. The larger the inner core is, the more this co-precessing core is forced into an alignment with the mantle because of the mantle gravitational torque on the inner core. As a result, the larger the inner core is, the closer we approach a situation resembling a whole planet precessing as a rigid body. The addition of EM coupling at the ICB does not change the overall picture that we observe with viscous coupling alone; the mantle obliquity decreases with inner core size. The -amplitude of the decrease can be as large as 0.015 arcmin, 3 times larger than for viscous cou- -pling alone; this remains a factor 10 smaller than the changes suggested in Peale et al. [2016], +amplitude of the decrease can be as large as 0.015 arcmin, 3 times larger than for viscous coupling + alone; this remains a factor 10 smaller than the changes suggested in Peale et al. [2016], and again, importantly, in the reverse direction. -Our results suggest then that the presence and size of an inner core leads to only mod- -est changes of the mantle obliquity ε +Our results suggest then that the presence and size of an inner core leads to only modest + changes of the mantle obliquity ε m compared to the obliquity predicted on the basis of an entirely rigid planet ( ε r -m ). Let us denote this difference as ∆ε +m ). Let us denote this difference as ∆ε m = ε m −ε r m . The largest ∆ ε m occurs for a small or no inner core, and is ∆ ε -m ≈ 0.01 arcmin. This difference is decreased -as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM +m ≈ 0.01 arcmin. This difference is decreased +as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM coupling and large density contrast at the ICB, ∆ ε m can be negative, but its absolute value remains smaller than 0.01 arcmin. -To put these results in perspective, the uncertainty in the measurement of the mantle obliq- -uity reported by Margot et al. [2012] and Stark et al. [2015a] is of the order of 0.08 arcmin, much -larger than this difference. This means that, at the current level of precision, it is not possi- -ble to distinguish the position of the mantle obliquity from the obliquity of a rigid planet. This +To put these results in perspective, the uncertainty in the measurement of the mantle obliquity + reported by Margot et al. [2012] and Stark et al. [2015a] is of the order of 0.08 arcmin, much +larger than this difference. This means that, at the current level of precision, it is not possible + to distinguish the position of the mantle obliquity from the obliquity of a rigid planet. This is consistent with the fact that the observed obliquity falls close to that expected from a rigid planet. But it also implies that the observed obliquity cannot be used to place constraints on the inner core size. -Nevertheless, our results show that the presence of a fluid core and inner core affect the +Nevertheless, our results show that the presence of a fluid core and inner core affect the resulting mantle obliquity by as much as 0.01 arcmin. This is of the same order as the change in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec ( ≈ 0 .006 arcmin) [ Baland et al. , 2017]. This is also of the same order as the amplitude of the nutation motion about the mean equilibrium Cassini state forced by the precession of the pericenter, which -is approximately 0.85 arcsec (≈ 0.014 arcmin) [Baland et al. , 2017]. The precision on the obliq- -uity from the upcoming BepiColombo satellite mission is expected to be ≤ 0 .5 arcsec (≤ 0 .008 -arcmin) [ Cical`o et al. , 2016]. Thus, in addition to including tidal deformation and the preces- -sion of the pericenter, a Cassini state model that includes a fluid and solid core will then be +is approximately 0.85 arcsec (≈ 0.014 arcmin) [Baland et al. , 2017]. The precision on the obliquity + from the upcoming BepiColombo satellite mission is expected to be ≤ 0 .5 arcsec (≤ 0 .008 +arcmin) [ Cical`o et al. , 2016]. Thus, in addition to including tidal deformation and the precession + of the pericenter, a Cassini state model that includes a fluid and solid core will then be necessary in order to properly tie Mercury’s obliquity to its interior structure. In turn, this opens -the possibility of further constraining the interior structure of Mercury on the basis of its obliq- -uity. - Obliquity measurements based on tracking topographic features reflect the orientation of +the possibility of further constraining the interior structure of Mercury on the basis of its obliquity. + + Obliquity measurements based on tracking topographic features reflect the orientation of the spin-symmetry axis of the mantle ( ε - m ). Measurements based on tracking the gravity field -of Mercury reflect instead the orientation of the principal moment of the whole planet (ε + m ). Measurements based on tracking the gravity field +of Mercury reflect instead the orientation of the principal moment of the whole planet (ε g ). These -two orientations do not coincide when an inner core is present and is misaligned from the man- -tle. Since gravitational coupling prevents a large inner core tilt with respect to the mantle, we +two orientations do not coincide when an inner core is present and is misaligned from the mantle. + Since gravitational coupling prevents a large inner core tilt with respect to the mantle, we –28– -Confidential manuscript submitted to JGR-Planets -find that the misalignment ∆ ε +Confidential manuscript submitted to JGR-Planets +find that the misalignment ∆ ε g = ε g − ε - m is limited. The maximum offset that we obtain + m is limited. The maximum offset that we obtain is approximately ∆ε - g ≈ 0 .007 arcmin. This limited magnitude of offset is important in the -light of the recent obliquity of the gravity field estimated in Genova et al. [2019], ε + g ≈ 0 .007 arcmin. This limited magnitude of offset is important in the +light of the recent obliquity of the gravity field estimated in Genova et al. [2019], ε g = 1.968 ± 0 .027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the spin-symmetry axis of the mantle: ε m = 2 . 04 ± 0.08 arcmin [Margot et al. , 2012] and ε m = 2 .029 ± 0. 085 arcmin [Stark et al., 2015a], although all three measurements remain consistent -with one another within their error estimates. In their interpretation, Genova et al. [2019] sug- -gest that the different central value of the obliquity that they obtain (smaller by ∼ 0 .07 ar- -cmin) is perhaps explained by an offset ∆ ε - g due to the presence of a (possibly large) solid in- -ner core. However, this is one order of magnitude larger than the maximum magnitude of ∆ε +with one another within their error estimates. In their interpretation, Genova et al. [2019] suggest + that the different central value of the obliquity that they obtain (smaller by ∼ 0 .07 arcmin) + is perhaps explained by an offset ∆ ε + g due to the presence of a (possibly large) solid inner + core. However, this is one order of magnitude larger than the maximum magnitude of ∆ε g -that we predict. Moreover, we predict that the obliquity of the gravity field should be larger +that we predict. Moreover, we predict that the obliquity of the gravity field should be larger than that of the mantle spin axis, not smaller. Hence, at the present-day level of the precision of the measurements, ε g and ε - m should coincide, and their difference cannot be interpreted as -reflecting the misalignment between the polar moment of inertia of the whole planet and the + m should coincide, and their difference cannot be interpreted as +reflecting the misalignment between the polar moment of inertia of the whole planet and the mantle spin axis. -Lastly, we have concentrated our efforts on the mutual orientations of the different spin -and symmetry axes in the Cassini plane. Dissipation at the CMB and ICB introduced by vis- -cous and EM coupling also lead to a displacement of these axes in the direction perpendicu- -lar to the Cassini plane [e.g Peale et al. , 2014]. Indeed, the two measurements based on track- -ing surface topographic features from Margot et al. [2012] and Stark et al. [2015a] suggest that +Lastly, we have concentrated our efforts on the mutual orientations of the different spin +and symmetry axes in the Cassini plane. Dissipation at the CMB and ICB introduced by viscous + and EM coupling also lead to a displacement of these axes in the direction perpendicular + to the Cassini plane [e.g Peale et al. , 2014]. Indeed, the two measurements based on tracking + surface topographic features from Margot et al. [2012] and Stark et al. [2015a] suggest that the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼ 0. 03 arcmin). -Although this offset is smaller than the measurement errors, so that the observed obliquity is -still consistent with no deviation away from the Cassini plane, some amount of dissipation in- -variably takes place. These measurements give then a measure of the possible amplitude of the +Although this offset is smaller than the measurement errors, so that the observed obliquity is +still consistent with no deviation away from the Cassini plane, some amount of dissipation invariably + takes place. These measurements give then a measure of the possible amplitude of the dissipation. One source of dissipation is from anelastic tidal deformation [ Baland et al., 2017], -but viscous and EM coupling at the boundaries of the fluid core is another. Hence, the out-of- -plane component of the observed obliquity may further help to quantify and constrain the in- -terior coupling mechanisms. This will be the sub ject of a future study. +but viscous and EM coupling at the boundaries of the fluid core is another. Hence, the out-ofplane + component of the observed obliquity may further help to quantify and constrain the interior + coupling mechanisms. This will be the sub ject of a future study. 5 Conclusion -We have investigated how the presence of a fluid core and solid inner core affects the Cassini -state equilibrium of Mercury. Our general conclusion is that the coupling strength between Mer- -cury’s interior regions is sufficiently strong that the obliquity of the mantle spin-symmetry axis -does not deviate from that of a rigid planet by more than 0.01 arcmin. This largest offset oc- -curs for a small or no inner core. The larger the inner core is, the more it is forced into an align- -ment with the mantle because of the strong gravitational torque between the two, and the closer -we approach a situation resembling a whole planet precessing as a rigid body. The misalign- -ment between the polar moment of inertia and mantle spin axis increases with inner core size, +We have investigated how the presence of a fluid core and solid inner core affects the Cassini +state equilibrium of Mercury. Our general conclusion is that the coupling strength between Mercury’s + interior regions is sufficiently strong that the obliquity of the mantle spin-symmetry axis +does not deviate from that of a rigid planet by more than 0.01 arcmin. This largest offset occurs + for a small or no inner core. The larger the inner core is, the more it is forced into an alignment + with the mantle because of the strong gravitational torque between the two, and the closer +we approach a situation resembling a whole planet precessing as a rigid body. The misalignment + between the polar moment of inertia and mantle spin axis increases with inner core size, but is limited to approximately 0.007 arcmin. These conclusions apply irrespective of the core -composition and thus of the partitioning of light elements into the solid core; a smaller den- -sity contrast at the ICB only implies that a larger inner core is required in order to produce +composition and thus of the partitioning of light elements into the solid core; a smaller density + contrast at the ICB only implies that a larger inner core is required in order to produce an equivalent change in the Cassini state equilibrium. -Our results imply that the obliquities of the mantle spin axis and polar moment of iner- -tia (or, equivalently, the gravity field) should coincide at the present-day level of measurement -errors. Moreover, neither of these can be distinguished from the obliquity predicted on the ba- -sis of a rigid planet. However, the smaller measurement errors expected from the upcoming Bepi- -Columbo satellite mission may permit this distinction, and thus provide further constraints on +Our results imply that the obliquities of the mantle spin axis and polar moment of inertia + (or, equivalently, the gravity field) should coincide at the present-day level of measurement +errors. Moreover, neither of these can be distinguished from the obliquity predicted on the basis + of a rigid planet. However, the smaller measurement errors expected from the upcoming BepiColumbo + satellite mission may permit this distinction, and thus provide further constraints on Mercury’s interior structure. –29– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets Acknowledgments Figures were created using the GMT software [ Wessel et al. , 2013]. The source codes, GMT -scripts and data files to reproduce all figures are freely accessible in Dumberry [2020]. This work +scripts and data files to reproduce all figures are freely accessible in Dumberry [2020]. This work was supported by an NSERC/CRSNG Discovery Grant. References Alf`e, D., G. Kresse, and M. Gillan (2000), Structure and dynamics of liquid iron under core conditions, Phys. Rev., B61, 132–142. Anderson, B. J., C. L. Johnson, H. Korth, M. E. Purucker, R. M. Winslow, J. A. Slavin, S. C. Solomon, R. L. McNutt, M. Raines, Jim, and T. H. Zurbuchen (2011), The global -magnetic field of Mercury from MESSENGER orbital observations, Science, 333 , 1859– +magnetic field of Mercury from MESSENGER orbital observations, Science, 333 , 1859– 1862. -Anderson, B. J., C. L. Johnson, H. Korth, R. M. Winslow, J. E. Borovsky, M. E. Pu- -rucker, J. A. Slavin, S. C. Solomon, M. T. Zuber, and R. L. McNutt (2012), Low- -degree structure in mercury’s planetary magnetic field, J. Geophys. Res. , 117 , E00L12, +Anderson, B. J., C. L. Johnson, H. Korth, R. M. Winslow, J. E. Borovsky, M. E. Purucker, + J. A. Slavin, S. C. Solomon, M. T. Zuber, and R. L. McNutt (2012), Lowdegree + structure in mercury’s planetary magnetic field, J. Geophys. Res. , 117 , E00L12, doi:10.1029/2012JE004159. -Baland, R.-M., A. Yseboodt, M. Rivoldini, and T. Van Hoolst (2017), Obliquity of Mer- -cury: Influence of the precession of the pericenter and of tides, Icarus, 291 , 136–159. -Baland, R.-M., A. Coyette, and T. Van Hoolst (2019), Coupling between the spin pre- -cession and polar motion of a synchronously rotating satellite: application to Titan, +Baland, R.-M., A. Yseboodt, M. Rivoldini, and T. Van Hoolst (2017), Obliquity of Mercury: + Influence of the precession of the pericenter and of tides, Icarus, 291 , 136–159. +Baland, R.-M., A. Coyette, and T. Van Hoolst (2019), Coupling between the spin precession + and polar motion of a synchronously rotating satellite: application to Titan, Celestial Mechanics and Dynamical Astronomy, 131 (11), 1–50. -Buffett, B. A. (1992), Constraints on magnetic energy and mantle conductivity from the +Buffett, B. A. (1992), Constraints on magnetic energy and mantle conductivity from the forced nutations of the Earth, J. Geophys. Res. , 97 , 19,581–19,597. -Buffett, B. A. (2010), Chemical stratification at the top of earth’s core: Constraints from +Buffett, B. A. (2010), Chemical stratification at the top of earth’s core: Constraints from observations of nutations, Earth Planet. Sci. Lett. , 296 , 367–372. -Buffett, B. A., P. M. Mathews, and T. A. Herring (2002), Modeling of nutation-precession: -effects of electromagnetic coupling, J. Geophys. Res. , 107 , doi:10.1029/2001JB000056. -Busse, F. H. (1968), Steady fluid flow in a precessing spheroidal shell, J. Fluid Mech. , 33 , +Buffett, B. A., P. M. Mathews, and T. A. Herring (2002), Modeling of nutation-precession: +effects of electromagnetic coupling, J. Geophys. Res. , 107 , doi:10.1029/2001JB000056. +Busse, F. H. (1968), Steady fluid flow in a precessing spheroidal shell, J. Fluid Mech. , 33 , 739–751. Byrne, P. K., C. Klimczak, A. M. C. Seng¨or, S. C. Solomon, T. R. Watters, and S. A. Hauck (2014), Mercury’s global contraction much greater than earlier estimates, Nature Geosci., 7 , 301–307. -C´ebron, D., R. Laguerre, J. Noir, and N. Schaeffer (2019), Precessing spherical shells: -flows, dissipation, dynamo and the lunar core, Geophys. J. Int. , 219 (Supplement +C´ebron, D., R. Laguerre, J. Noir, and N. Schaeffer (2019), Precessing spherical shells: +flows, dissipation, dynamo and the lunar core, Geophys. J. Int. , 219 (Supplement 1), S34–S57, doi:10.1093/gji/ggz037. -Christensen, U. R. (2006), A deep dynamo generating Mercury’s magnetic field, Nature, +Christensen, U. R. (2006), A deep dynamo generating Mercury’s magnetic field, Nature, 444 , 1056–1058. Cical`o, S., G. Schettino, S. Di Ruzza, E. M. Alessi, G. Tommei, and A. Milani (2016), The BepiColombo MORE gravimetry and rotation experiments with the ORBIT14 software, Month. N. Roy. Astr. Soc., 457 , 1507–1521. Colombo, G. (1966), Cassini’s second and third laws, Astron. J., 71 , 891–896. Constable, S. (2015), Geomagnetic induction studies, in Treatise on Geophysics, Second -Edition, vol. 5, edited by G. Schubert and M. Kono, chap. 7, pp. 219–254, Elsevier, Ox- -ford. +Edition, vol. 5, edited by G. Schubert and M. Kono, chap. 7, pp. 219–254, Elsevier, Oxford. + de Koker, N., G. Seinle-Neumann, and V. Vlˇcek (2012), Electrical resistivity and thermal -conductivity of liquid Fe alloys at high P and T, and heat flux in Earth’s core, Proc. +conductivity of liquid Fe alloys at high P and T, and heat flux in Earth’s core, Proc. Nat. Acad. Sci. , 109 , 4070–4073. –30– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets de Wijs, G. A., G. Kresse, L. Voˇcadlo, D. Dobson, D. Alf´e, M. J. Gillan, and G. D. Price (1998), The viscosity of liquid iron at the physical conditions of the Earth’s core, Nature, 392 , 805–807. @@ -2870,23 +2870,23 @@ Geophys. J. Int. , 167 , 557–566. Deng, L., C. Seagle, Y. Fei, and A. Shahar (2013), High pressure and temperature electrical resistivity of iron and implications for planetary cores, Geophys. Res. Lett. , 40 , 33–37, doi:10.1029/2012GL054347. -Dumberry, M. (2020), Replication Data for: The influence of a fluid core and a solid in- -ner core on the Cassini sate of Mercury, https://doi.org/10.7939/DVN/903HUV, UAL +Dumberry, M. (2020), Replication Data for: The influence of a fluid core and a solid inner + core on the Cassini sate of Mercury, https://doi.org/10.7939/DVN/903HUV, UAL Dataverse, V2. -Dumberry, M., and L. Koot (2012), A global model of electromagnetic coupling for nuta- -tions, Geophys. J. Int. , 191 , 530–544. +Dumberry, M., and L. Koot (2012), A global model of electromagnetic coupling for nutations, + Geophys. J. Int. , 191 , 530–544. Dumberry, M., and A. Rivoldini (2015), Mercury’s inner core size and core-crystallization regime, Icarus, 248 , 254–268. Dumberry, M., and M. A. Wieczorek (2016), The forced precession of the Moon’s inner core, J. Geophys. Res. Planets , 121 , 1264–1292. -Dumberry, M., A. Rivoldini, T. Van Hoolst, and M. Yseboodt (2013), The role of Mer- -cury’s core density structure on its longitudinal librations, Icarus, 225 , 62–74. +Dumberry, M., A. Rivoldini, T. Van Hoolst, and M. Yseboodt (2013), The role of Mercury’s + core density structure on its longitudinal librations, Icarus, 225 , 62–74. Gans, R. F. (1972), Viscosity of the Earth’s core, J. Geophys. Res. , 77 , 360–366. Genova, A., S. Goossens, E. Mazarico, F. G. Lemoine, G. A. Neumann, W. Kuang, T. J. Sabaka, S. A. Hauck II, D. E. Smith, S. C. Solomon, and M. T. Zuber (2019), Geodetic evidence that Mercury has a solid inner core, Geophys. Res. Lett. , 46 , doi:10.1029/2018GL081135. -Glane, S., and B. A. Buffett (2018), Enhanced core-mantle coupling due to stratification at +Glane, S., and B. A. Buffett (2018), Enhanced core-mantle coupling due to stratification at the top of the core, Frontiers in Earth Science , 6 , 171, doi:10.3389/feart.2018.00171. Grott, M., D. Breuer, and M. Laneuville (2011), Thermo-chemical evolution and global contraction of Mercury, Earth Planet. Sci. Lett. , 307 , 135–146. @@ -2896,21 +2896,21 @@ Zuber (2013), The curious case of Mercury’s internal structure, J. Geophys. Re doi:10.1002/jgre.20091. Johnson, C. L., M. E. Purucker, H. Korth, B. J. Anderson, R. M. Winslow, M. M. H. Al Asad, J. A. Slavin, I. I. Alexeev, R. J. Phillips, M. T. Zuber, and S. C. Solomon -(2012), MESSENGER observations of mercury’s magnetic field structure, J. Geophys. +(2012), MESSENGER observations of mercury’s magnetic field structure, J. Geophys. Res., 117 , E00L14, doi:10.1029/2012JE004217. -Konopliv, A. S., R. S. Park, and A. I. Ermakov (2020), The Mercury gravity field, orien- -tation, love number, and ephemeris from the MESSENGER radiometric tracking data, +Konopliv, A. S., R. S. Park, and A. I. Ermakov (2020), The Mercury gravity field, orientation, + love number, and ephemeris from the MESSENGER radiometric tracking data, Icarus, 335 , 113,386. -Koot, L., and M. Dumberry (2013), The role of the magnetic field morphology on the +Koot, L., and M. Dumberry (2013), The role of the magnetic field morphology on the electromagnetic coupling for nutations, Geophys. J. Int. , 195 , 200–210. Li, J., Y. Fei, H. Mao, K. Hirose, and S. Shieh (2001), Sulfur in Earth’s inner core, Earth Planet. Sci. Lett. , 193 , 509–514. -Margot, J. L., S. J. Peale, R. F. Jurgens, M. A. Slade, and I. V. Holin (2007), Large longi- -tude libration of Mercury reveals a molten core, Science, 316 , 710–714. +Margot, J. L., S. J. Peale, R. F. Jurgens, M. A. Slade, and I. V. Holin (2007), Large longitude + libration of Mercury reveals a molten core, Science, 316 , 710–714. Margot, J. L., S. J. Peale, S. C. Solomon, S. A. Hauck, F. D. Ghigo, R. F. Jurgens, M. Yseboodt, J. D. Giorgini, S. Padovan, and D. B. Campbell (2012), Mercury’s –31– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets moment of inertia from spin and gravity data, J. Geophys. Res. , 117 , E00L09, doi:10.1029/2012JE004161. Margot, J. L., S. A. Hauck II, E. Mazarico, S. Padovan, and S. J. Peale (2018), Mercury’s @@ -2919,13 +2919,13 @@ L. Nittler, and B. Anderson, pp. 85–113, Cambridge University Press, Cambridge 10.1017/9781316650684.005. Mathews, P. M., and J. Guo (2005), Viscoelectromagnetic coupling in precession-nutation theory, J. Geophys. Res. , 110 (B02402), doi:10.1029/2003JB002915. -Mathews, P. M., B. A. Buffett, T. A. Herring, and I. I. Shapiro (1991), Forced nutations of -the Earth: Influence of inner core dynamics. 1. theory, J. Geophys. Res. , 96 , 8219–8242. -Mathews, P. M., T. A. Herring, and B. A. Buffett (2002), Modeling of nutations and pre- -cession: New nutation series for nonrigid Earth and insights into the Earth’s interior, J. +Mathews, P. M., B. A. Buffett, T. A. Herring, and I. I. Shapiro (1991), Forced nutations of +the Earth: Influence of inner core dynamics. 1. theory, J. Geophys. Res. , 96 , 8219–8242. +Mathews, P. M., T. A. Herring, and B. A. Buffett (2002), Modeling of nutations and precession: + New nutation series for nonrigid Earth and insights into the Earth’s interior, J. Geophys. Res., 107 , doi:10.1029/2004JB000390. Mazarico, E., A. Genova, S. Goossens, F. G. Lemoine, G. A. Neumann, M. T. Zuber, -D. E. Smith, and S. C. Solomon (2014), The gravity field, orientation, and ephemeris of +D. E. Smith, and S. C. Solomon (2014), The gravity field, orientation, and ephemeris of Mercury from MESSENGER observations after three years in orbit, J. Geophys. Res. Planets, 119 , 2417–2436. Organowski, O., and M. Dumberry (2020), Viscoelastic relaxation within the Moon @@ -2935,12 +2935,12 @@ Peale, S. J. (1969), Generalized Cassini’s laws, Astron. J., 74 , 483–489. Peale, S. J. (1974), Possible histories of the obliquity of Mercury, Astron. J., 79 , 722–744. Peale, S. J. (1976), Does Mercury have a molten core?, Nature, 262 , 765–766. Peale, S. J. (2005), The free precession and libration of Mercury, Icarus, 178 , 4–18. -Peale, S. J. (2006), The proximity of Mercury’s spin to Cassini state 1 from adiabatic in- -variance, Icarus, 181 , 338–347. -Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2014), Effect of core-mantle +Peale, S. J. (2006), The proximity of Mercury’s spin to Cassini state 1 from adiabatic invariance, + Icarus, 181 , 338–347. +Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2014), Effect of core-mantle and tidal torques on Mercury’s spin axis orientation, Icarus, 231 , 206–220. Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2016), Consequences of a -solid inner core on Mercury’s spin configuration, Icarus, 264 , 443–455. +solid inner core on Mercury’s spin configuration, Icarus, 264 , 443–455. Perry, M. E., G. A. Neumann, R. J. Phillips, and et al. (2015), The low-degree shape of Mercury, Geophys. Res. Lett. , 42 , 6951–6958. Poincar´e, H. (1910), Sur la pr´ecession des corps d´eformables, Bul l. Astron. Ser. 1 , 27 , @@ -2951,25 +2951,25 @@ Rochester, M. G. (1960), Geomagnetic westward drift and irregularities in the Ea rotation, Phil. Trans. R. Soc. Lond., A, 252 , 531–555. Rochester, M. G. (1962), Geomagnetic core-mantle coupling, J. Geophys. Res. , 67 , 4833– 4836. -Rochester, M. G. (1968), Perturbations in the Earth’s rotation and geomagnetic core- -mantle coupling, J. Geomag. Geoelectr., 20 , 387–402. +Rochester, M. G. (1968), Perturbations in the Earth’s rotation and geomagnetic coremantle + coupling, J. Geomag. Geoelectr., 20 , 387–402. Rochester, M. G. (1976), The secular decrease of obliquity due to dissipative core-mantle coupling, Geophys. J. R. Astron. Soc., 46 , 109–126. -Rutter, M., R. Secco, T. Uchida, H. Liu, Y. Wang, M. Rivers, and S. Sutton (2002a), To- -wards evaluating the viscosity of the Earth’s outer core: an experimental high pressure +Rutter, M., R. Secco, T. Uchida, H. Liu, Y. Wang, M. Rivers, and S. Sutton (2002a), Towards + evaluating the viscosity of the Earth’s outer core: an experimental high pressure study of liquid Fe-S (8.5 wt. per cent S), Geophys. Res. Lett. , 29 , 080,000–1. Rutter, M. D., R. A. Secco, H. Liu, T. Uchida, M. Rivers, S. Sutton, and Y. Wang (2002b), Viscosity of liquid Fe at high pressure, Phys. Rev. B , 66 , 060,102, –32– -Confidential manuscript submitted to JGR-Planets +Confidential manuscript submitted to JGR-Planets doi:10.1029/2001GL014392. -Schaefer, L., S. B. Jacobsen, J. L. Remo, M. I. Petaev, and D. D. Sasselov (2017), Metal- -silicate partitioning and its role in core formation and composition on Super-Earths, +Schaefer, L., S. B. Jacobsen, J. L. Remo, M. I. Petaev, and D. D. Sasselov (2017), Metalsilicate + partitioning and its role in core formation and composition on Super-Earths, Astrophys. J., 835 , 234. Sori, M. M. (2018), A thin, dense crust for Mercury, Earth Planet. Sci. Lett. , 489 , 92–99. Stark, A., J. Oberst, F. Preusker, S. J. Peale, J.-L. Margot, R. J. Phillips, G. A. Neumann, -S. D. E., M. T. Zuber, and S. C. Solomon (2015a), First MESSENGER orbital observa- -tions of Mercury’s librations, Geophys. Res. Lett. , 42 , 7881–7889. +S. D. E., M. T. Zuber, and S. C. Solomon (2015a), First MESSENGER orbital observations + of Mercury’s librations, Geophys. Res. Lett. , 42 , 7881–7889. Stark, A., J. Oberst, and H. Hussmann (2015b), Mercury’s resonant rotation from secular orbital elements, Celest. Mech. Dyn. Astr., 123 , 263–277. Stewartson, K., and P. H. Roberts (1963), On the motion of a liquid in a spheroidal cavity @@ -2978,16 +2978,16 @@ Stys, C., and M. Dumberry (2018), The cassini state of the Moon’s inner core, Res. Planets, 123 , 1–25, doi:10.1029/2018JE005607. Van Hoolst, T. (2015), Rotation of the terrestrial planets, in Treatise on Geophysics , vol. 10, edited by G. Schubert, chap. 4, pp. 121 – 151, Elsevier, Oxford. -Van Hoolst, T., A. Rivoldini, R.-M. Baland, and M. Yseboodt (2012), The effects of tides +Van Hoolst, T., A. Rivoldini, R.-M. Baland, and M. Yseboodt (2012), The effects of tides and an inner core on the forced libration of mercury, Earth Planet. Sci. Lett. , 333–334 , 83–90. -Verma, A. K., and J. L. Margot (2016), Mercury’s gravity, tides, and spin from MESSEN- -GER radio science data, J. Geophys. Res. Planets , 121 , 1627–1640. +Verma, A. K., and J. L. Margot (2016), Mercury’s gravity, tides, and spin from MESSENGER + radio science data, J. Geophys. Res. Planets , 121 , 1627–1640. Wessel, P., W. H. F. Smith, R. Scharroo, J. Luis, and F. Wobbe (2013), Generic Mapping Tools: Improved version released, EOS Trans. AGU , 94 , 409–410. Williams, J. G., and D. H. Boggs (2015), Tides on the Moon: theory and determination of dissipation, J. Geophys. Res. Planets , 120 (4), 689–724, doi:10.1002/2014JE004755. -Williams, J. G., D. H. Boggs, C. F. Yoder, J. T. Ratcliff, and J. O. Dickey (2001), Lunar +Williams, J. G., D. H. Boggs, C. F. Yoder, J. T. Ratcliff, and J. O. Dickey (2001), Lunar rotational dissipation in solid body and molten core, J. Geophys. Res. , 106 , 27,933– 27,968. Williams, J. G., A. S. Konopliv, D. H. Boggs, R. S. Park, D.-N. Yuan, F. G. Lemoine, @@ -3000,4 +3000,4 @@ Yoder, C. F. (1981), The free librations of a dissipative Moon, Phil. Trans. R. A , 303 , 327–338. Yseboodt, M., and J. L. Margot (2006), Evolution of Mercury’s obliquity, Icarus, 181 , 327–337. - –33– + –33– \ No newline at end of file diff --git a/read/results/playa/2201.00069.txt b/read/results/playa/2201.00069.txt index 7b7f0bd1c96b1e832782e596803bde593614f114..e899cc7764d6247030dad72853c97a22530f4100 100644 GIT binary patch delta 1482 zcmX|Bdu$X%7|-tB?%vHI2((nN@-l)*OYg3<6ro01o7ezN%S%(R&Gv4%n{;ot%;;ZOxBbuZx!%5dkF~nQP%?;abnj|e7@jc^CAdh4*gD;G@HU3W6 zCRyU)6ciRb8%er3OmQlXcz3~BL~#eUJmRDgr-lEG&RR|)7Oz~;=ytd{#JeW+>aaH* z@zSDmM%s4QGbHX8)u|Cin1>w?P36bPB2R#UNHH*0T;A8k+?B=j1somh)ujs#P3hptTU3sNNk$gqJ}++*IV-I z=JaCCO!-NPLzpw_HQqlxq#&_s)^Wx3i0dSv|Ag6FqO#n^PTI>Lv43_gw194QC+z-S zx;4r$%>iC{O}=T)Z+uJ{QV{PbyO+OjHg-OgA}-=p@dau*61jLn83p$wZ8SKA#IeLW zHK-M{E7mBwgKYr3y>fdVu$WS%gu-iGnhoG@tDYok7ussb@&LlXa~l+1_WUnq#&VKU zj){vZ&82nAmee;O{?Z~50Mx9kD6C?d3D6?+G+#DkW}>7 z{;lNyivGGfO@DdW3V{00i}ymXLgHTi1Vzt~bOtD2JTJ^TKn#*ZV#VS&)!}A0mu}4i zYH#RO%rtd<2l^H_s*1sUiUHnJO-3Z-`&q}x9$O}I}pN47K)HMDpPiE-{_6)}&3K>~(uuiKzwD)Z1z-!f7d2uJ(@Jrh`k z%|YS<+X%e*pbM|}d-z}egW)Q1H&>)WnG*mm?A!=;gxO+OXHE}J%ccce-Y@`<6kz|B zX&M}VO#JkEA{s1a?r2nuv~OD!iLSS+G&m(4i1;@`MLW_a0*_sb^@IE-(OV5FCnSGwGz7Gq$Fl~>P1WsZS~Br8GU$!q^A zM)};>{0hWp3^hNp(gC!x{op4+$&R74?qMpEH+0mnE%_{qf$JxgXxd^K;#esQ_BQ@F F@IM|IAb delta 1622 zcmY*ae{2**6wcnYMYaf9N~?$n1R{jndVBp1t${X;77J1oBEO2;yWwu=-fo$hy>cKW z0%9<&hQ35FAV!7aHC6~y4VD;6D~%dbMFm4*qA@}6FXPV`6!p#ai1ELjdvCt?ecyZY zZa@Z44Fpg1=4&P#7?~!|<{!+PWav@mauRi&7>A7oWfKkEcH=Z^+3-riV7{RXnj{>L z1U63nO+{&v3KD~Lg^P5&?4}4KF%w=b{1B5FX(yJTP8`~%-I`+PNy~|Z&5Fe<-F7zx zsiI7tp<{Fo%q_k!!O-Kj+rj0J#SH-j=9CNK2&kF4L2UxiCLVch}<%(MU@eZbXa)& zY|f0}9fltBqNqmxGOsb{C%TSYy5Nk45kXxi66#sF3q4XdY~sX4LcXxD-iJX*I~{_{ z#vY7w!cwRkDMPg6#{Yj<^Uxnz39G`R_!Xlr%rF-R4ZT#ZH6z-cK6Tp4d;obcvNbTC zFDI36(2$fvY%Y@(-B~B(-<4Vp3fb(kBzh?))a+9iEJ5Ufhqr558RRed4*4f6CyIUg zbx8#tmN%?wYFq*4QaNtHtPYFAQ2o8It8+3;t)uv>&%kA{x2_sb);JxwP`3>!C}~hv zUyC+l%h0dq>TfD)>UX)fp+R}~)T$>#F!rbti=Xn7#v+xGpz#D;TULpXyd9fIqjK(J z`vbQzPcPq-6hsOgN7ULcxT>i8&Zz5)SUK0LXTcH^^y+v^3y(S6k znpBqX$MxF!cQHx1dvwF~|0H*9?7@SusDtA$ucZbLS*H^%wHl5Tr-G(%Ru;&Uo4yPn zoKFGH&zcHHTB&**8+wPPq$8&1*A_6P&P7>$j{3$bTce3Ib#6@D;SQJ z@-4GaqK3>|LF!CJy%4|2KVHC@%p&SBRZgX)pEQR@3zBAegLjO%Aa{&Q@C zR$%D3II6>CW|gs~-UyouN}#y+IBHI+9dW*QCsN`bc4oVe0LxAc;fUcQbXmGY7Q7W! zaP2M=@JQxaWU<-9?Eqt$=71UT9jWY_t(vXGb7*<{cO=J&#i9vV-(Q7LNy@pJ?!*0O z{Fc9-DUw@GUiDi(p0`ZaVh!KvF4>psgQ=xe^Zo~FTwPqKw}$%yW|?2_+{i?ASGAsp z_iPSD#)Cvu~iIUbfheQ*?ctS9Xa{}LfK8$9ob%9nzv3UO%f2j(kW#Q^_ zIC**9WJ6!=pbx6Z$T0y$SK=s45Eg>nS59N5ZQ;LzT=dmh4T0hoPf*8-S)rb*1DgIX DWk@-B diff --git a/read/results/playa/2201.00151.txt b/read/results/playa/2201.00151.txt index 6ac4169..60bbfe4 100644 --- a/read/results/playa/2201.00151.txt +++ b/read/results/playa/2201.00151.txt @@ -8,57 +8,57 @@ e-mail: klaudia.kowalczyk@gmail.com, lokas@camk.edu.pl January 4, 2022 ABSTRACT Dwarf spheroidal (dSph) galaxies are believed to be strongl y dark matter dominated and thus are considered perfect objects to study -dark matter distribution and test theories of structure for mation. They possess resolved, multiple stellar populations that off er new +dark matter distribution and test theories of structure for mation. They possess resolved, multiple stellar populations that off er new possibilities for modeling. A promising tool for the dynami cal modeling of these objects is the Schwarzschild orbit superposition method. In this work we extend our previous implementation of the scheme to include more than one population of stars and a more general form of the mass-to-light ratio function. We tested the improved approach on a nearly spherical, gas-free galaxy formed in the cosmological context from the Illustris simulation. We modeled the binned velocity moments for stars split into two populations -by metallicity and demonstrate that in spite of larger sampl ing errors the increased number of constraints leads to significantly tighter -confidence regions on the recovered density and velocity ani sotropy profiles. We then applied the method to the Fornax dSph galaxy -with stars similarly divided into two populations. In comparison with our earlier work, we find the anisotropy parameter to be slightly +by metallicity and demonstrate that in spite of larger sampl ing errors the increased number of constraints leads to significantly tighter +confidence regions on the recovered density and velocity ani sotropy profiles. We then applied the method to the Fornax dSph galaxy +with stars similarly divided into two populations. In comparison with our earlier work, we find the anisotropy parameter to be slightly increasing, rather than decreasing, with radius and more st rongly constrained. We are also able to infer anisotropy for each stellar -population separately and find them to be significantly di ff erent. +population separately and find them to be significantly di ff erent. Key words. galaxies: kinematics and dynamics – galaxies: structure – galaxies: fundamental parameters – galaxies: dwarf – galaxi es: star clusters: individual: Fornax 1. Introduction Dwarf spheroidal (dSph) galaxies of the Local Group (Mateo 1998; Tolstoy et al. 2009) are considered to be a perfect tool to test our current theories of structure formation involving dark -matter in the context of near-field cosmology. The objects ar e +matter in the context of near-field cosmology. The objects ar e believed to be strongly dark matter dominated with mass-to- light ratios even on the order of a few hundred solar units. Due to th eir -proximity they are also the only extragalactic systems wher e in- -dividual stars can be resolved and their velocities measure d of- -fering the possibility to create interesting dynamical mod eling +proximity they are also the only extragalactic systems wher e individual + stars can be resolved and their velocities measure d offering + the possibility to create interesting dynamical mod eling techniques. -The first estimates of dark matter content in dSph galaxies +The first estimates of dark matter content in dSph galaxies were based on a single measurement of the line-of-sight velo city dispersion of the stars and the application of the virial the orem. As the samples of the stars with kinematic measurements grew, -it became possible to estimate the profile of the velocity disper- -sion and model it using the Jeans equation (Binney & Tremaine +it became possible to estimate the profile of the velocity dispersion + and model it using the Jeans equation (Binney & Tremaine 2008). Since the stars in the galaxy can move on a variety of orbits, from circular to radial, the degeneracy between the anisotropy of the orbits and the mass distribution is inhere nt in this type of modeling. The reason for this lies in the fact tha t -diff erent combinations of these quantities can reproduce the ve - -locity dispersion profile equally well. +diff erent combinations of these quantities can reproduce the ve locity + dispersion profile equally well. A way to overcome this issue, at least partially, is to resort to -higher order line-of-sight velocity moments, such as the ku rto- -sis, and use the corresponding Jeans equations. Since the ku rto- -sis is more sensitive to the velocity anisotropy than to the m ass +higher order line-of-sight velocity moments, such as the ku rtosis, + and use the corresponding Jeans equations. Since the ku rtosis + is more sensitive to the velocity anisotropy than to the m ass distribution, useful constraints can be obtained on both. Still, the method requires large kinematic samples to estimate the velocity moments reliably and some assumption on the functional form of the anisotropy (Łokas 2002; Łokas et al. 2005). The Schwarzschild modeling technique (Schwarzschild -1979) o ff ers a diff erent approach to estimate the properties of +1979) o ff ers a diff erent approach to estimate the properties of dSph galaxies without prior assumptions on the type of orbits. -It relies on building a galaxy model out of a set of best-fittin g +It relies on building a galaxy model out of a set of best-fittin g orbits probed in the range of energy and angular momenta. In this method, the anisotropy of the stellar orbits comes out a s a -result of the modeling in the same way as the density profile. A l- -though it has been originally developed for large elliptica l galax- -ies (van der Marel et al. 1998; Valluri et al. 2004; Gebhardt e t al. +result of the modeling in the same way as the density profile. A lthough + it has been originally developed for large elliptica l galaxies + (van der Marel et al. 1998; Valluri et al. 2004; Gebhardt e t al. 2015), it has recently been adopted for use on discrete data characteristic of dSph galaxies and applied to a number of dwarfs, including Carina, Draco, Fornax, Sculptor, and Sex tans @@ -67,18 +67,18 @@ dwarfs, including Carina, Draco, Fornax, Sculptor, and Sex tans Many dSph galaxies show signs of the presence of multiple stellar populations resulting from a few star formation episodes (Bellazzini et al. 2001; del Pino et al. 2015; Fabrizio et al. 2016; -Pace et al. 2020). This observation o ff ers a way to improve the +Pace et al. 2020). This observation o ff ers a way to improve the modeling methods since, assuming dynamical equilibrium, a ll -populations are supposed to be influenced by the same under- -lying gravitational potential of the galaxy, but they have d if- -ferent distributions so more constraints can be imposed dur ing -the modeling. This approach was first used by Battaglia et al. +populations are supposed to be influenced by the same underlying + gravitational potential of the galaxy, but they have d ifferent + distributions so more constraints can be imposed dur ing +the modeling. This approach was first used by Battaglia et al. (2008) to model the mass distribution in the Sculptor dSph galaxy. A few attempts have also been made to constrain the -inner slope of the dark matter profile in dSph galaxies using +inner slope of the dark matter profile in dSph galaxies using this technique (Walker & Peñarrubia 2011; Amorisco & Evans -2012; Hayashi et al. 2018) in order to resolve the so-called c usp- -core problem. It has been shown to be diffi cult, however, due +2012; Hayashi et al. 2018) in order to resolve the so-called c uspcore + problem. It has been shown to be diffi cult, however, due Article number, page 1 of 12 A&A proofs: manuscript no. Populations4 Table 1. Properties of the Illustris galaxy used to create mock data. @@ -110,47 +110,47 @@ to the nonsphericity of the dwarfs that introduces biases in such measurements (Kowalczyk et al. 2013; Genina et al. 2018). In our recent papers (Kowalczyk et al. 2017, 2018, 2019) we developed the Schwarzschild technique in the form applicab le to -binned velocity moments of a single tracer and verified its ab il- -ity to reproduce the mass distribution and velocity anisotr opy of +binned velocity moments of a single tracer and verified its ab ility + to reproduce the mass distribution and velocity anisotr opy of simulated galaxies. We have also studied biases resulting f rom the nonsphericity of the modeled objects. Later, we applied the -method to model the kinematics of the Fornax dSph galaxy esti- -mating its mass and anisotropy profiles with unprecedented p re- -cision. -In this paper we extend our Schwarzschild modeling tech- -nique to include multiple stellar populations with the aim to +method to model the kinematics of the Fornax dSph galaxy estimating + its mass and anisotropy profiles with unprecedented p recision. + +In this paper we extend our Schwarzschild modeling technique + to include multiple stellar populations with the aim to constrain the properties of dSph galaxies even more strongly. We test our approach on a realistic simulated galaxy formed in the cosmological context, originating from the Illustris p roject (Vogelsberger et al. 2014a). Although no precise analogues of -dSph galaxies are available in this simulation because of th e res- -olution, we use a more massive galaxy but with properties oth - -erwise similar to dSphs. The reliability of the modeling doe s not +dSph galaxies are available in this simulation because of th e resolution, + we use a more massive galaxy but with properties oth erwise + similar to dSphs. The reliability of the modeling doe s not depend on the particular value of the mass so we believe these tests to be viable. We do not attempt to constrain the inner da rk -matter density profile (which is poorly resolved anyway) but try +matter density profile (which is poorly resolved anyway) but try to put tighter limits on the estimates of the mass and anisotr opy -profiles. Finally, we apply the improved method to the availa ble +profiles. Finally, we apply the improved method to the availa ble kinematic data for the distinct stellar populations of the Fornax dSph. This paper is organized as follows. In Section 2 we present the data for the simulated galaxy as well as their splitting into stellar populations and mock observations along the main axes. -Section 3 contains an overview of our modeling method, the ap - -plication of the method to all stars and to two populations, a nd +Section 3 contains an overview of our modeling method, the ap plication + of the method to all stars and to two populations, a nd a comparison of the results obtained with these two approach es. The results of the application of the method to the Fornax dSp h -galaxy are presented in Section 4. We discuss our findings and +galaxy are presented in Section 4. We discuss our findings and summarize the paper in Section 5. 2. Mock data 2.1. Selection of the simulated galaxy In order to test our modeling method on realistic simulated data, we decided to use a galaxy from the Illustris project (Vogelsberger et al. 2014a,b; Genel et al. 2014; Nelson et al. -2015), namely the Illustris-1 cosmological simulation. Th is sim- -ulation follows the formation and evolution of galaxies fro m the -early Universe to the present by solving gravity and hydrody - -namics, as well as modeling of star formation, galactic wind s, SFR [M⊙ yr-1] +2015), namely the Illustris-1 cosmological simulation. Th is simulation + follows the formation and evolution of galaxies fro m the +early Universe to the present by solving gravity and hydrody namics, + as well as modeling of star formation, galactic wind s, SFR [M⊙ yr-1] t [Gyr] 0 4 8 12 16 0 2 4 6 8 10 12 Fig. 1. Star formation rate as a function of the age of the Universe in @@ -165,33 +165,33 @@ t [Gyr] Fig. 2. Number of stars as a function of their metallicity and time of formation (the age of the Universe) in the simulated galaxy. The vertical line indicates the applied split into stellar populations. -magnetic fields, and the feedback from black holes. Although +magnetic fields, and the feedback from black holes. Although dwarf galaxies that are of our interest here are not resolved in the suite, this can be easily overcome with the appropriate choice of the object and the treatment of data. -As the key properties of dSph galaxy equivalents we iden- -tified: the lack of gas, the lack of a black hole, a low spin, +As the key properties of dSph galaxy equivalents we identified: + the lack of gas, the lack of a black hole, a low spin, the stellar mass much smaller than the dark matter mass and a -nearly spherical shape. The last condition was adopted in an at- -tempt to avoid any strong bias introduced by the spherical mo d- -eling of a nonspherical object. Moreover, we required the ga laxy -to possess a significant number of both stellar and dark mat- -ter particles (over 10 5 +nearly spherical shape. The last condition was adopted in an attempt + to avoid any strong bias introduced by the spherical mo deling + of a nonspherical object. Moreover, we required the ga laxy +to possess a significant number of both stellar and dark matter + particles (over 10 5 ), and a well resolved center. Due to the large softening scale for dark matter particles in the simulation ( ǫ DM = 1 . 42 kpc), we looked for an object in which even the more concentrated stellar population (see Section 2.2) extended -over 43 kpc so that the region a ff ected by the numerical artifacts +over 43 kpc so that the region a ff ected by the numerical artifacts was enclosed within 2-3 innermost data bins (we used 20 linea rly spaced spatial bins, see Section 3.1). -Out of 27345 galaxies listed in the catalog of stellar circu- -larities, angular momenta, and axis ratios published by the Illus- -tris team (Genel et al. 2015) containing subhalos with the stellar +Out of 27345 galaxies listed in the catalog of stellar circularities, + angular momenta, and axis ratios published by the Illustris + team (Genel et al. 2015) containing subhalos with the stellar mass larger than 10 9 M - ⊙ , only a few met our restrictive require- -Article number, page 2 of 12 + ⊙ , only a few met our restrictive requireArticle + number, page 2 of 1 K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling -80-4004080 POPULATION I [kpc] major POPULATION I @@ -226,8 +226,8 @@ intermediatePOPULATION II [kpc] -80 -40 0 40 80POPULATION II [kpc] 0 30 60 90 σ [km/s] -Fig. 3. Maps of the projected stellar density, mean stellar velocit y, and stellar velocity dispersion (in rows) for two stellar populations: the metal- -rich population I (left-hand side panels) and the metal-poor population II (right-hand side), and observations along t he principal axes determined +Fig. 3. Maps of the projected stellar density, mean stellar velocit y, and stellar velocity dispersion (in rows) for two stellar populations: the metalrich + population I (left-hand side panels) and the metal-poor population II (right-hand side), and observations along t he principal axes determined for all stars (in columns, along the major, the intermediate, and the minor axis, respectively). -1-0.5 0 0.5 1 1 10 100β(r) @@ -247,29 +247,29 @@ pop II 40 60 80 100 120 40 60 80 100 120 0 10 20 30 40 50σt(r) r [kpc] -Fig. 4. Profiles of the velocity anisotropy parameter, radial velocity dispersion, and tangential velocity dispersion (in consecutive columns) calcu- -lated from all stars (in red), including only population I (i n orange), and only population II (in blue). The upper row shows the profiles using the +Fig. 4. Profiles of the velocity anisotropy parameter, radial velocity dispersion, and tangential velocity dispersion (in consecutive columns) calculated + from all stars (in red), including only population I (i n orange), and only population II (in blue). The upper row shows the profiles using the logarithmic distance scale and reaching the outskirts of the galaxy whereas the bottom row presents in the linear scale only the radial range used in the modeling. ments. We decided to use a galaxy labeled as subhalo 16960. All the relevant properties of the galaxy are given in Table 1, -including numbers of particles and total masses for both com po- -nents, and details on the shape of the stellar component: the axis +including numbers of particles and total masses for both com ponents, + and details on the shape of the stellar component: the axis ratios minor to major (shortest to longest) c / a , intermediate to major b / a , and the triaxiality parameter T = ( a 2 − b 2 ) / ( a 2 − c 2 ). -We distinguish between the half-mass radius provided in the Il- -lustris database and the half-number radius r -1/ 2 , which we use for further calculations in this paper. The diff erence between the +We distinguish between the half-mass radius provided in the Illustris + database and the half-number radius r +1/ 2 , which we use for further calculations in this paper. The diff erence between the two comes from a small gradient in the stellar mass-to-light ratio with the distance from the galactic center. Since in our appr oach -we treat stars as equal-mass particles and refer to number de n- -sities (multiplied by the mean mass of a stellar particle whe n -needed), the application of the half-number radius is more self- -consistent. +we treat stars as equal-mass particles and refer to number de nsities + (multiplied by the mean mass of a stellar particle whe n +needed), the application of the half-number radius is more selfconsistent. + Article number, page 3 of 12 A&A proofs: manuscript no. Populations4 10 -310 -110 110 3 @@ -282,23 +282,23 @@ R [kpc]minor all stars pop I pop II -Fig. 5. Surface number density profiles of the stellar data samples f or the simulated galaxy observed along di ff erent lines of sight (from the left to -the right). Di ff erent lines show profiles for all available stars (in red), the metal-rich population I (in orange), and the metal-poor population II (in +Fig. 5. Surface number density profiles of the stellar data samples f or the simulated galaxy observed along di ff erent lines of sight (from the left to +the right). Di ff erent lines show profiles for all available stars (in red), the metal-rich population I (in orange), and the metal-poor population II (in blue). Thin vertical lines indicate r 0 (see text) and the outer boundary of the spectroscopic data. 2.2. Splitting the stars into populations -Our chosen galaxy shows a complex formation history under- -going multiple mergers which result in extended star formation +Our chosen galaxy shows a complex formation history undergoing + multiple mergers which result in extended star formation with a few star formation bursts. The last wet merger, that is a merger with an object containing gas, happens at 6.9 Gyr from the beginning of the simulation, whereas the last dry merger (no gas transfer) at 12.1 Gyr, giving the galaxy enough time to regain dynamical equilibrium. We present the star formation rate ( SFR) as a function of time (the age of the Universe) in Fig. 1, where -these last mergers are indicated with black and gray vertica l ar- -rows. In Fig. 2 we show the distribution of stars as a function of -their metallicity (in solar units) and the time of formation . In or- -der to divide the stellar sample into two populations we cut it in +these last mergers are indicated with black and gray vertica l arrows. + In Fig. 2 we show the distribution of stars as a function of +their metallicity (in solar units) and the time of formation . In order + to divide the stellar sample into two populations we cut it in half based on the metallicity index of each stellar particle . This split is indicated in Fig. 2 with the vertical line. With satisfying accuracy it separates the stars born before and after 4 Gyr since @@ -307,29 +307,29 @@ time before and after the end of the second major star burst, a s shown in Fig. 1. We refer to the metal-rich stars as populatio n I and to the metal-poor as population II, following the common ly used nomenclature in astronomy. -In Fig. 3 we present maps of the projected stellar mass den- -sity, line-of-sight velocity, and line-of-sight velocity dispersion +In Fig. 3 we present maps of the projected stellar mass density, + line-of-sight velocity, and line-of-sight velocity dispersion for both populations obtained by projecting the galaxy alon g its -principal axes. The orientation was determined from the ine r- -tia tensor calculated from all stars within the half-number radius +principal axes. The orientation was determined from the ine rtia + tensor calculated from all stars within the half-number radius r -1/ 2 and therefore is the same in both panels. The two popula- -tions diff er significantly in the spatial distribution and kinemat- -ics with the metal-rich (considered to be younger) population I -being more concentrated but having lower central velocity d is- -persion. Both populations show a weak rotation signal at large +1/ 2 and therefore is the same in both panels. The two populations + diff er significantly in the spatial distribution and kinematics + with the metal-rich (considered to be younger) population I +being more concentrated but having lower central velocity d ispersion. + Both populations show a weak rotation signal at large distances from the center. The velocity anisotropy parameter β( r ) = 1 − ( σ2 θ + σ2 φ ) / (2 σ2 r ), where σ -i are velocity dispersions in spherical coordi- -nates (Binney & Tremaine 2008), describes the orbital struc ture +i are velocity dispersions in spherical coordinates + (Binney & Tremaine 2008), describes the orbital struc ture of galaxies. It is one of the most important dynamical proper ties -of bound systems which cannot be inferred directly from ob- -servations and has to be recovered by dynamical modeling. Th e -profiles of the anisotropy parameter β as well as the radial σ +of bound systems which cannot be inferred directly from observations + and has to be recovered by dynamical modeling. Th e +profiles of the anisotropy parameter β as well as the radial σ r and tangential σ t = [( σ2 @@ -337,17 +337,17 @@ t = [( σ2 φ ) / 2] 1/ 2 velocity dispersions for our simulated galaxy are presented in the consecutive columns o f Fig. 4. Throughout the paper we use red, orange, and blue colo rs -to indicate values calculated or recovered for all stars, po pula- -tion I, and population II, respectively. The two rows of the figure -show the behavior of the parameters at diff erent scales. The top -row plots the profiles with the distance from the center of the +to indicate values calculated or recovered for all stars, po pulation + I, and population II, respectively. The two rows of the figure +show the behavior of the parameters at diff erent scales. The top +row plots the profiles with the distance from the center of the galaxy in the logarithmic scale and shows the drop of anisotr opy at the outer edges of the object. The bottom row uses the linea r distance scale and focuses on the main body of the galaxy. -Figure 5 shows the surface number density profiles of the -stars as measured in diff erent directions. We can see that while -the diff erent subsamples have quite distinguishable profiles, the -diff erence between the lines of sight is small because the galaxy +Figure 5 shows the surface number density profiles of the +stars as measured in diff erent directions. We can see that while +the diff erent subsamples have quite distinguishable profiles, the +diff erence between the lines of sight is small because the galaxy is close to spherical. 2.3. Observables We generated nine sets of mock data by observing all stars and @@ -356,22 +356,22 @@ from all stars. For the observables to be used in the modeling we divided the stars into 20 bins spaced linearly in distance fr om the center of the galaxy up to 50 kpc, measuring the fraction of the total number of stars and the 2nd, 3rd, and 4th proper -moments of the line-of-sight velocity defined in Eq. 8 and 9 -of Kowalczyk et al. (2018). The profiles of these quantities a re -shown in consecutive rows in Fig. 6. Columns correspond to dif- -ferent lines of sight, from the left to the right: along the ma jor, -intermediate, and minor axis of the galaxy. For clarity of th e fig- -ure, in each panel we indicate only the error bars for one of th e +moments of the line-of-sight velocity defined in Eq. 8 and 9 +of Kowalczyk et al. (2018). The profiles of these quantities a re +shown in consecutive rows in Fig. 6. Columns correspond to different + lines of sight, from the left to the right: along the ma jor, +intermediate, and minor axis of the galaxy. For clarity of th e figure, + in each panel we indicate only the error bars for one of th e data sets. However, as the number of stars in a sample remains roughly constant between the lines of sight, the error bars a re very similar among the panels in a given row. Although in our previous studies of the reliability of the Schwarzschild modeling and its applications to real data -(Kowalczyk et al. 2017, 2018, 2019) we approximated the den- -sity profile of the tracer with the Sérsic formula, we found th at it -does not provide a good approximation of the data for the simu - -lated galaxy considered here. We therefore fit the projected den- -sity profile with the King formula (King 1962) +(Kowalczyk et al. 2017, 2018, 2019) we approximated the density + profile of the tracer with the Sérsic formula, we found th at it +does not provide a good approximation of the data for the simu lated + galaxy considered here. We therefore fit the projected density + profile with the King formula (King 1962) I ( R ) = I 0   @@ -422,12 +422,12 @@ pop I pop II Fig. 6. Observables used in our Schwarzschild modeling scheme of the simulated galaxy. In rows: the fraction of the total number of stars, 2nd, 3rd, and 4th velocity moment. In columns: mock data from the simulated galaxy along the major, intermediate, and minor axis. In red we present -the values obtained for all stars whereas in orange and blue t hose for populations I and II, respectively. For clarity of t he figure, in each panel we +the values obtained for all stars whereas in orange and blue t hose for populations I and II, respectively. For clarity of t he figure, in each panel we indicate only the error bars for one of the data sets. where I 0 , R c , and R - t are the model parameters. The profile can + t are the model parameters. The profile can be analytically deprojected to obtain the 3D density ρ( r ) = ρ 0 @@ -453,14 +453,14 @@ c R 2 c + R 2 t . (4) 3. Schwarzschild modeling -In this section we briefly present our modeling method and its -application to the data sets derived for all stars and the two pop- -ulations of the simulated galaxy separately. In both cases o ur -aim was to recover the profiles of the total mass and the velocity +In this section we briefly present our modeling method and its +application to the data sets derived for all stars and the two populations + of the simulated galaxy separately. In both cases o ur +aim was to recover the profiles of the total mass and the velocity anisotropy. 3.1. Overview of the method We follow the approach introduced in Kowalczyk et al. (2018) , -namely we model the total mass profile with the mass-to-light +namely we model the total mass profile with the mass-to-light ratio Υ varying with radius: log Υ ( r ) = ( log( Υ @@ -494,9 +494,9 @@ A&A proofs: manuscript no. Populations4 10 100 χ2 Fig. 7. Absolute values of χ2 - obtained from the fits of three data sets: all stars (top left panel), population I (bottom left), and population II (bottom + obtained from the fits of three data sets: all stars (top left panel), population I (bottom left), and population II (bottom right) for the observations along the major axis of the simul ated galaxy. The results for the modeling of two populations (top right) were obtained -as an algebraic sum of values for populations I and II. To avoi d large numbers in the figure, Υ +as an algebraic sum of values for populations I and II. To avoi d large numbers in the figure, Υ 0 was divided by the mean mass of a stellar particle. where r is the distance from the center of the galaxy, r 0 is a @@ -506,8 +506,8 @@ have assumed log r 0 = 0 . 33 which corresponds to three softening scales for stellar particles in the Illustris simulation. We probed the parameter a ∈ [0 : 1 . 3] with a step ∆a = 0 . 04 -and c ∈ [1 . 1 : 2 . 9] with a step ∆c = 0 . 2, imposing the require- -ment on the total density profile to be monotonically decreasing +and c ∈ [1 . 1 : 2 . 9] with a step ∆c = 0 . 2, imposing the requirement + on the total density profile to be monotonically decreasing with radius. For each set of parameters and for each line of sight we generated 1200 orbits using 100 values of energy (expressed with the radius of a circular orbit) spaced logarithmically and @@ -515,40 +515,40 @@ with the radius of a circular orbit) spaced logarithmically and outer radius of the orbit library, that is the apocenter of th e most extended orbit, was set to r out = 165 kpc in order to cover over -0.999 of the total stellar mass based on the fitted King profile +0.999 of the total stellar mass based on the fitted King profile parameters. -We fit the kinematics weighted with the fraction of mass with -the constrained least squares algorithm where diff erent values +We fit the kinematics weighted with the fraction of mass with +the constrained least squares algorithm where diff erent values of Υ 0 were obtained with a simple transformation of velocities -given by Eq. 12, 13, and 15 in Kowalczyk et al. (2018). In or- -der to smooth out the numerical artifacts, the three-dimensional +given by Eq. 12, 13, and 15 in Kowalczyk et al. (2018). In order + to smooth out the numerical artifacts, the three-dimensional χ2 spaces were then interpolated with 12-order polynomials ( ∼ a 4 c 4 Υ 4 -0 ) that were further used to determine the global min- -imums (identified as the best-fitting models) and 1, 2, 3 σ con- -fidence levels which for three parameters correspond to ∆χ2 +0 ) that were further used to determine the global minimums + (identified as the best-fitting models) and 1, 2, 3 σ confidence + levels which for three parameters correspond to ∆χ2 = 3 . 53 , 8 . 02 , 14 . 2 (Press et al. 1992). 3.2. Application to mock data In the following we present the direct and inferred results o f -the Schwarzschild modeling of the data sets described in Sec - -tion 2.3. +the Schwarzschild modeling of the data sets described in Sec tion + 2.3. First, Fig. 7 shows the distribution of the absolute values o f the χ2 - as a function of three parameters of the mass-to-light ra- -tio. In order to avoid unnecessary repetitions, we include o nly + as a function of three parameters of the mass-to-light ratio. + In order to avoid unnecessary repetitions, we include o nly the plot for the mock data obtained by observing the Illustris galaxy along its major axis as the others are qualitatively similar. -The four panels refer to fits for all stars (top left), the meta l-rich +The four panels refer to fits for all stars (top left), the meta l-rich population I (bottom left), the metal-poor population II (b ottom right), and the one named "populations" (top right) which is the algebraic sum of values for both populations. -As our parametrization of the mass-to-light ratio is not intu- -itive we present its profiles explicitly in the first rows of th e left- -Article number, page 6 of 12 +As our parametrization of the mass-to-light ratio is not intuitive + we present its profiles explicitly in the first rows of th e leftArticle + number, page 6 of 12 K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling 10 610 710 810 910 10 10 100 ALL @@ -612,73 +612,73 @@ r [kpc] 10 100POPULATIONS r [kpc] 0 10 20 30 40 50POPULATIONS r [kpc] Fig. 8. Left-hand side: results of Schwarzschild modeling of three mock data sets obtained by observing the simulated galaxy al ong the principal -axes. In rows: derived mass-to-light ratio, total density, total mass, and anisotropy parameter. In columns: observations along the major, interme- -diate, and minor axis, respectively. Green lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the -1, 2, and 3 σ confidence levels. The true values are presented as black lines. Thin vertical lines mark the values of r +axes. In rows: derived mass-to-light ratio, total density, total mass, and anisotropy parameter. In columns: observations along the major, intermediate, + and minor axis, respectively. Green lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the +1, 2, and 3 σ confidence levels. The true values are presented as black lines. Thin vertical lines mark the values of r 0 and the outer range of the -data sets, from left to right. Right-hand side: same as left but for the fit of two stellar populations. +data sets, from left to right. Right-hand side: same as left but for the fit of two stellar populations. and right-hand side panels of Fig. 8 for the results obtained for all stars and the populations, respectively. We further calculate the total density (second rows) and the total mass content (third -rows). We include the obtained orbit anisotropy within the m od- -eled range in the bottom rows. The consecutive columns prese nt +rows). We include the obtained orbit anisotropy within the m odeled + range in the bottom rows. The consecutive columns prese nt the results for the observations along the major, intermediate, -and minor axis. Green lines indicate values for the best-fit m od- -els whereas the colored areas of decreasing intensity corre spond -to 1, 2, and 3 σ confidence regions obtained as extreme values al- -lowed by the models with χ2 +and minor axis. Green lines indicate values for the best-fit m odels + whereas the colored areas of decreasing intensity corre spond +to 1, 2, and 3 σ confidence regions obtained as extreme values allowed + by the models with χ2 within a given region. In each panel the true values from the simulation are presented with black lines while thin vertical lines mark the values of r 0 and the outer range -of the data sets beyond which the reliability of results drop s sig- -nificantly. The true mass-to-light ratio profile was obtaine d by -dividing the total mass by the fitted King profiles, therefore the +of the data sets beyond which the reliability of results drop s significantly. + The true mass-to-light ratio profile was obtaine d by +dividing the total mass by the fitted King profiles, therefore the drop at 100 kpc is the numerical artifact occurring at the ver y outskirts of the galaxy. Whereas in the right-hand side panels of Fig. 8 the resulting -anisotropy is obtained from the fit of all stars and uses only the -location of global minimum and confidence levels from two pop - -ulations (as in the top right panel of Fig. 7), in Fig. 9 we present +anisotropy is obtained from the fit of all stars and uses only the +location of global minimum and confidence levels from two pop ulations + (as in the top right panel of Fig. 7), in Fig. 9 we present another method of calculating the anisotropy. In the second and -third row we show the derived profiles for population I and II +third row we show the derived profiles for population I and II separately and combine them as stellar mass weighted averag e -in the top row. As in previous figures, three columns refer to the -diff erent lines of sight whereas the narrow fourth one shows the -behavior of the true profiles outside the modeled range which , as -we noticed in our previous studies, in a limited way influence s +in the top row. As in previous figures, three columns refer to the +diff erent lines of sight whereas the narrow fourth one shows the +behavior of the true profiles outside the modeled range which , as +we noticed in our previous studies, in a limited way influence s the results. Such an impact is understandable since the star s at -larger distances from the center are still included in the line-of- -sight measurements. 3.3. Comparison of fitting results +larger distances from the center are still included in the line-ofsight + measurements. 3.3. Comparison of fitting results The main strength of the two populations method comes from -tracing the underlying gravitational potential at diff erent scales. +tracing the underlying gravitational potential at diff erent scales. As can be seen in the bottom panels of Fig. 7, population I, which is more concentrated, is also more sensitive to Υ 0 , but gives weaker constraints on a or c . On the other hand, population II attempts to reproduce the total mass content at larger dista nces -as well, therefore showing stronger coupling between the pa ram- -eters. +as well, therefore showing stronger coupling between the pa rameters. + The global minimums of the χ2 - distributions for both ap- -proaches, that is modeling one and two populations, which we -identify as the best-fitting models, closely coincide showing that -there is no internal bias in the improved method. However, sig- -nificant diff erences can be observed when comparing the confi- -dence levels, mainly at 1 and 3 σ. Namely, we find that using + distributions for both approaches, + that is modeling one and two populations, which we +identify as the best-fitting models, closely coincide showing that +there is no internal bias in the improved method. However, significant + diff erences can be observed when comparing the confidence + levels, mainly at 1 and 3 σ. Namely, we find that using two populations, the constraints we obtain on the density an d -anisotropy profile are much stronger. +anisotropy profile are much stronger. Additionally, the more accurate method allows us to study -other e ff ects and biases, for example the consequences of the -nonsphericity of the modeled object. Whereas for the fit of all -stars the true values of the density, mass, and anisotropy pr ofiles -are contained within 1 σ confidence regions, the results for the +other e ff ects and biases, for example the consequences of the +nonsphericity of the modeled object. Whereas for the fit of all +stars the true values of the density, mass, and anisotropy pr ofiles +are contained within 1 σ confidence regions, the results for the populations are more or less biased depending on the axis. Th ey are well reproduced for the observation along the intermediate -axis, for which the e ff ects of nonsphericity seem to cancel out, +axis, for which the e ff ects of nonsphericity seem to cancel out, and more biased for the remaining lines of sight. We notice a -trend from under- to overestimation of the anisotropy when g o- -ing from the major to the minor axis. +trend from under- to overestimation of the anisotropy when g oing + from the major to the minor axis. Article number, page 7 of 12 A&A proofs: manuscript no. Populations4 -101 @@ -704,47 +704,47 @@ best model 1σ 2σ 3σ -Fig. 9. Profiles of the anisotropy parameter obtained with the Schwarzschild modeling of two stellar populations of the simulat ed galaxy. In rows: +Fig. 9. Profiles of the anisotropy parameter obtained with the Schwarzschild modeling of two stellar populations of the simulat ed galaxy. In rows: results for all stars (calculated as the superposition of two populations), population I, and population II. Colors fol low the convention used in -previous figures. In columns: observations along the major, intermediate, and minor axis. The last narrower column shows the data (black lines) -outside the modeled radial range. Color lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the 1, -2, and 3 σ confidence regions. +previous figures. In columns: observations along the major, intermediate, and minor axis. The last narrower column shows the data (black lines) +outside the modeled radial range. Color lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the 1, +2, and 3 σ confidence regions. 4. Modeling Fornax dSph In this section we present the application of our Schwarzsch ild modeling scheme to the observational data for the Fornax dSp h galaxy obtained by del Pino et al. (2015) and del Pino et al. (2017). This study is a follow-up of the work of Kowalczyk et a l. (2019) and can be directly compared to the results presented -there. Moreover, we refer the reader to these previous publica- -tions for details on the origin of data and our procedures use d +there. Moreover, we refer the reader to these previous publications + for details on the origin of data and our procedures use d for cleaning the spectroscopic sample. -Similarly to the approach introduced in Section 2.2, we di- -vided all available stars into two equal-size populations b ased on +Similarly to the approach introduced in Section 2.2, we divided + all available stars into two equal-size populations b ased on their metallicity and then cross-correlated the samples with the data used in Kowalczyk et al. (2019). The metallicity histog ram -of the final spectroscopic sample is shown in Fig. 10. Additio n- -ally, we color-coded each bin with the population it has been -assigned to, namely orange or blue for population I or II. Inter- -estingly, the case of Fornax is similar to our simulated gala xy +of the final spectroscopic sample is shown in Fig. 10. Additio nally, + we color-coded each bin with the population it has been +assigned to, namely orange or blue for population I or II. Interestingly, + the case of Fornax is similar to our simulated gala xy as the split at [Fe / H] = − 1 also captures an important feature -of the object’s star formation history, separating stars in to sub- -samples older and younger than 6 Gyr, as shown in Fig. 12 of +of the object’s star formation history, separating stars in to subsamples + older and younger than 6 Gyr, as shown in Fig. 12 of del Pino et al. (2015) and Fig. 8 of del Pino et al. (2017). The -numbers of stars contained in the samples of all stars, popula- -tion I, and population II are given in Table 2, where the indic es -"phot" and "spec" refer to the photometric and kinematic sam - -ples. The sum of stars in the populations is lower than in the sample of all stars since only stars with reliable measureme nts +numbers of stars contained in the samples of all stars, population + I, and population II are given in Table 2, where the indic es +"phot" and "spec" refer to the photometric and kinematic sam ples. + The sum of stars in the populations is lower than in the sample of all stars since only stars with reliable measureme nts of metallicity could be included. N [Fe/H]pop I pop II 0 20 40 60 80 100 -2.5 -2 -1.5 -1 -0.5 0 -Fig. 10. Metallicity histogram of the final spectroscopic sample used in +Fig. 10. Metallicity histogram of the final spectroscopic sample used in the modeling of two stellar populations in the Fornax dSph. E ach bin is color-coded according to the population it has been assigned to, orange or blue for population I and II, respectively. -As we have shown in our earlier work, the light profile of the +As we have shown in our earlier work, the light profile of the Fornax dSph can be well reproduced with the three-parameter Article number, page 8 of 12 K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling @@ -766,49 +766,49 @@ Sérsic parameter ( m ) 0.808 0.807 0.898 R [kpc]all stars popI popII -Fig. 11. Surface number density profiles of the photometric data sam- -ples for the Fornax dSph: all available stars (in red), the metal-rich pop- -ulation I (in orange), and the metal-poor population II (in blue). Thin +Fig. 11. Surface number density profiles of the photometric data samples + for the Fornax dSph: all available stars (in red), the metal-rich population + I (in orange), and the metal-poor population II (in blue). Thin vertical lines indicate r -0 (see text) and the outer boundary of the spec- -troscopic data. -Sérsic formula (Sérsic 1968). The profiles of number density for -all stars and both populations together with the best-fittin g Sérsic -profiles are presented in Fig. 11. The colors follow the conve n- -tion introduced in previous sections. Thin vertical lines indicate -the innermost data point for the light profile for all stars an d +0 (see text) and the outer boundary of the spectroscopic + data. +Sérsic formula (Sérsic 1968). The profiles of number density for +all stars and both populations together with the best-fittin g Sérsic +profiles are presented in Fig. 11. The colors follow the conve ntion + introduced in previous sections. Thin vertical lines indicate +the innermost data point for the light profile for all stars an d the outer boundary of the kinematic sample. The former, set a t log r = − 0 . 16, is also used as the minimum of the mass-to-light -ratio profile ( r -0 in Eq. 5). The fitted parameters of the profiles, +ratio profile ( r +0 in Eq. 5). The fitted parameters of the profiles, that is the normalization N 0 , the Sérsic radius R S , and the Sérsic parameter m , are included in the second part of Table 2. -Figure 12 presents the profiles of the observables used in the +Figure 12 presents the profiles of the observables used in the Schwarzschild modeling: the fraction of stars and the 2nd, 3 rd, -and 4th velocity moments (top to bottom) for the three data sa m- -ples: all stars, population I, and population II (in red, ora nge, and +and 4th velocity moments (top to bottom) for the three data sa mples: + all stars, population I, and population II (in red, ora nge, and blue, respectively). The error bars indicate 1 σ sampling errors. The parameter space for Υ ( r ) has been probed as follows: a ∈ [0 : 1 . 85] with a step ∆a = 0 . 05 and c ∈ [1 . 2 : 6] with a step ∆c = 0 . 2. We point out that in Kowalczyk et al. (2019) the -parameter c was fixed at c = 3 and now we fit it as a free pa- -rameter. As for the mock data in Section 3.2, diff erent values of +parameter c was fixed at c = 3 and now we fit it as a free parameter. + As for the mock data in Section 3.2, diff erent values of Υ 0 were obtained with the transformation of velocity moments within the χ2 - fitting routine. The values of ∆χ2 + fitting routine. The values of ∆χ2 for all stars and the populations are shown in the two panels of Fig. 13 (left an d right-hand side, respectively). Due to the dense coverage o f the grid, we decided to include only the values within 3 σ from the -fitted minimums (see Section 3.1). -The profiles of the mass-to-light ratio, total density, tota l +fitted minimums (see Section 3.1). +The profiles of the mass-to-light ratio, total density, tota l mass, and velocity anisotropy resulting from the χ2 distributions are presented in the consecutive rows of Fig. 14. The anisotr opy -profile for the populations is based on the fit of all stars but u sing 0 0.05 0.1 0.15 0.2 0.25 +profile for the populations is based on the fit of all stars but u sing 0 0.05 0.1 0.15 0.2 0.25 0 0.4 0.8 1.2 1.6M(R) R [kpc]all stars pop I @@ -827,13 +827,13 @@ modeling scheme. In rows: the fraction of the total number of stars, the 2nd, 3rd, and 4th velocity moment. In red we present the values obtained for all stars whereas in orange and blue those for populations I and II, respectively. -the confidence levels on Υ from the fit of two populations. Green -lines indicate the values for the best-fitting models wherea s the -colored areas of decreasing intensity show the 1, 2, and 3 σ con- -fidence regions. Additionally, with black dashed lines we in clude +the confidence levels on Υ from the fit of two populations. Green +lines indicate the values for the best-fitting models wherea s the +colored areas of decreasing intensity show the 1, 2, and 3 σ confidence + regions. Additionally, with black dashed lines we in clude the results from Kowalczyk et al. (2019) for comparison. As a result of freeing the steepness of the mass-to-light -ratio profile (parameter c ) with respect to the previous study +ratio profile (parameter c ) with respect to the previous study Article number, page 9 of 12 A&A proofs: manuscript no. Populations4 0 0.5 1 1.5 @@ -855,72 +855,72 @@ A&A proofs: manuscript no. Populations4 0 3 6 9 12 χ2 -χ2 min Fig. 13. Values of χ2 - relative to the fitted minimum within the range of 3 σ confidence level for all stars (left panel) and for the populations (right + relative to the fitted minimum within the range of 3 σ confidence level for all stars (left panel) and for the populations (right panel) for the Fornax dSph. -(Kowalczyk et al. 2019), we obtained higher estimates of the en- -closed total mass at larger radii. In particular, for the mass en- -closed within 1.8 kpc we get M +(Kowalczyk et al. 2019), we obtained higher estimates of the enclosed + total mass at larger radii. In particular, for the mass enclosed + within 1.8 kpc we get M all ( < 1 . 8 kpc) = 3 . 87 + 1. 48 − 1. 56 × 10 8 M - ⊙ from the fit for all stars and M + ⊙ from the fit for all stars and M pops ( < 1 . 8 kpc) = 4 . 71 + 0. 87 − 1. 13 × 10 8 M - ⊙ from the fit of populations, while previously we had + ⊙ from the fit of populations, while previously we had M old ( < 1 . 8 kpc) = 3 . 7 + 1. 4 − 1. 3 × 10 8 M ⊙ . -Interestingly, despite the significant shift of the positio n of +Interestingly, despite the significant shift of the positio n of χ2 -min (to c = 4 . 2 for all stars and 3.6 for populations), the ob- -tained profile of the anisotropy parameter remains decreasing or -flat for all stars but changes to increasing from 0 to 0.5 for th e +min (to c = 4 . 2 for all stars and 3.6 for populations), the obtained + profile of the anisotropy parameter remains decreasing or +flat for all stars but changes to increasing from 0 to 0.5 for th e populations. Nevertheless, even in the latter case the prev ious -result agrees with the new finding within 1 σ. +result agrees with the new finding within 1 σ. The detailed analysis of the anisotropy is shown in Fig. 15 -where the middle and bottom panels present the profiles ob- -tained for each population separately. We notice that the pr ofile +where the middle and bottom panels present the profiles obtained + for each population separately. We notice that the pr ofile for population I is decreasing or has a local minimum whereas -for population II is increasing (from − 0 . 25 to 0.5 for the best- -fitting model). Since population I is more concentrated, the last +for population II is increasing (from − 0 . 25 to 0.5 for the bestfitting + model). Since population I is more concentrated, the last bins contain very few stars, which limits their credibility. The -top panel of Fig. 15 presents the anisotropy of all stars calc u- -lated as a weighted superposition of two populations. With such -approach we still obtain the increasing profile (from 0 to 0.5 ) but +top panel of Fig. 15 presents the anisotropy of all stars calc ulated + as a weighted superposition of two populations. With such +approach we still obtain the increasing profile (from 0 to 0.5 ) but the previous result agrees with it only within 2 σ. -Since Fornax dSph is significantly elongated with the pro- -jected ellipticity of ǫ = 0 . 30 ± 0 . 01 (Irwin & Hatzidimitriou +Since Fornax dSph is significantly elongated with the projected + ellipticity of ǫ = 0 . 30 ± 0 . 01 (Irwin & Hatzidimitriou 1995), we anticipate some bias in the obtained results cause d by the spherically symmetric modeling. Kowalczyk et al. (20 18) -studied such bias in an axisymmetric simulated object qualita- -tively similar to Fornax and identified diff erences in the system- -atic errors depending on whether the galaxy was observed alo ng +studied such bias in an axisymmetric simulated object qualitatively + similar to Fornax and identified diff erences in the systematic + errors depending on whether the galaxy was observed alo ng its major or minor axis. Assuming that Fornax is observed alo ng the line of sight in between these extremes, we expect the total -mass profile to be slightly overestimated and the anisotropy to be +mass profile to be slightly overestimated and the anisotropy to be underestimated, further strengthening the likelihood of the real -anisotropy to be radial and its profile to be growing with radius +anisotropy to be radial and its profile to be growing with radius with respect to the results of Kowalczyk et al. (2019). -Both constant (like for our population I) and growing (pop- -ulation II) anisotropy profiles can arise from biased modeling of the real growing profile by observing an object along the +Both constant (like for our population I) and growing (population + II) anisotropy profiles can arise from biased modeling of the real growing profile by observing an object along the minor and major axis, respectively. However, for the bias to -occur in two populations presented here, their inner orienta- -tions would need to be opposite. Since such morphological fe a- -tures are not supported by the photometric studies of Fornax -(del Pino et al. 2015; Wang et al. 2019) which rather find a good +occur in two populations presented here, their inner orientations + would need to be opposite. Since such morphological fe atures + are not supported by the photometric studies of Fornax +(del Pino et al. 2015; Wang et al. 2019) which rather find a good spatial alignment between the stellar populations, we conc lude -that the anisotropy profiles of the two populations modeled in -this work are indeed significantly distinct. -Finally, it is worth noticing that the so-called mass-follows- -light model, that is the one following from the assumption th at -the total density traces the stellar distribution, is no lon ger sup- -ported by the fit of the populations. With our parametrizatio n, +that the anisotropy profiles of the two populations modeled in +this work are indeed significantly distinct. +Finally, it is worth noticing that the so-called mass-followslight + model, that is the one following from the assumption th at +the total density traces the stellar distribution, is no lon ger supported + by the fit of the populations. With our parametrizatio n, the mass-follows-light model corresponds to a = 0 and whereas -it is enclosed within 3 σ for the fit of all stars, as was the case +it is enclosed within 3 σ for the fit of all stars, as was the case in Kowalczyk et al. (2019), the allowed values for the improved method are much larger, as demonstrated by the right panel of Fig. 13. @@ -928,21 +928,21 @@ Fig. 13. Building on the previously created implementation of the Schwarzschild orbit superposition method focused on modeling dSph galaxies of the Local Group (Kowalczyk et al. 2017, 2018, -2019), we improved our tool by introducing multiple stellar pop- -ulations. Such an improvement is desirable and justified sin ce +2019), we improved our tool by introducing multiple stellar populations. + Such an improvement is desirable and justified sin ce many of the dwarfs show signs of multiple star formation bursts -or extended star formation episodes. As the diff erent populations +or extended star formation episodes. As the diff erent populations trace the common underlying gravitational potential, one m ay -expect a significant improvement in the estimates of not only the +expect a significant improvement in the estimates of not only the total mass content but also the orbit anisotropy since this r obust modeling technique reproduces the anisotropy as a by-produ ct of the modeling rather than taking it as an assumption. -We have tested our hypothesis by modeling mock data gener- -ated from a galaxy formed in the Illustris simulation. Due to the -limitations of the resolution, we chose a galaxy of mass a few or- -ders of magnitude larger than the estimated masses of classical -dwarfs. Still, the galaxy possessed appropriate qualitative char- -acteristics, such as the lack of gas and an almost spherical shape, +We have tested our hypothesis by modeling mock data generated + from a galaxy formed in the Illustris simulation. Due to the +limitations of the resolution, we chose a galaxy of mass a few orders + of magnitude larger than the estimated masses of classical +dwarfs. Still, the galaxy possessed appropriate qualitative characteristics, + such as the lack of gas and an almost spherical shape, Article number, page 10 of 12 K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling 101103105 @@ -969,30 +969,30 @@ r [kpc] r [kpc] Fig. 14. Results of Schwarzschild modeling of the Fornax dSph. In rows: derived mass-to-light ratio, total density, total mass, and -anisotropy parameter. In columns: results for all stars and the popula- -tions, respectively. Green lines indicate the values for the best-fit models +anisotropy parameter. In columns: results for all stars and the populations, + respectively. Green lines indicate the values for the best-fit models whereas the colored areas of decreasing intensity show the 1, 2, and 3 σ -confidence regions. The best-fitting values obtained by Kowalczyk et al. +confidence regions. The best-fitting values obtained by Kowalczyk et al. (2019) are shown with black dashed lines. -that made it a good test bed for modeling techniques applica- -ble to dSph galaxies. We applied our approach to all data and +that made it a good test bed for modeling techniques applicable + to dSph galaxies. We applied our approach to all data and to two stellar populations separately, comparing the accur acy of the obtained results. Although the addition of the second tr acer -seemingly increases the number of constraints twice, the in cre- -ment is somewhat compromised by the sampling errors since th e +seemingly increases the number of constraints twice, the in crement + is somewhat compromised by the sampling errors since th e number of stars in each sample is then reduced. Still, we foun d -strong improvements in the accuracy of the method when us- -ing two populations. The results of the modeling show that th e -density and velocity anisotropy profiles are more strongly c on- -strained, most importantly at the 3 σ level, that is the range of +strong improvements in the accuracy of the method when using + two populations. The results of the modeling show that th e +density and velocity anisotropy profiles are more strongly c onstrained, + most importantly at the 3 σ level, that is the range of allowed values is much narrower. Similarly to the conclusions of Kowalczyk et al. (2018) who -explored the e ff ects of nonsphericity using large and small -data samples, the comparison of results presented in the lef t- -and right-hand side panels of Fig. 8 suggests that the improved +explored the e ff ects of nonsphericity using large and small +data samples, the comparison of results presented in the lef tand + right-hand side panels of Fig. 8 suggests that the improved method using two stellar populations gives more precise but less -accurate outcome. However, in both studies the apparent dete- -rioration of the reliability is a consequence of modeling of a +accurate outcome. However, in both studies the apparent deterioration + of the reliability is a consequence of modeling of a nonspherical object. In both cases, a simpler approach (muc h smaller data samples or using one stellar population) resulted -2-101 0 0.4 0.8 1.2 1.6POP I + POP IIβ(r) @@ -1007,87 +1007,87 @@ best model 1 σ 2 σ 3 σ K19 -Fig. 15. Profiles of the anisotropy parameter obtained with the +Fig. 15. Profiles of the anisotropy parameter obtained with the Schwarzschild modeling of two stellar populations for the Fornax dSph. -In rows: results for all stars (calculated as the superposition of two pop- -ulations), population I, and population II. Color lines indicate values -for the best-fit models whereas the colored areas of decreasi ng intensity -show the 1, 2, and 3 σ confidence regions. The dashed black line shows +In rows: results for all stars (calculated as the superposition of two populations), + population I, and population II. Color lines indicate values +for the best-fit models whereas the colored areas of decreasi ng intensity +show the 1, 2, and 3 σ confidence regions. The dashed black line shows the result from Kowalczyk et al. (2019) for comparison. -in larger final uncertainties, usually containing the true values -within 1 σ confidence region. On the other hand, the improved +in larger final uncertainties, usually containing the true values +within 1 σ confidence region. On the other hand, the improved methods exhibit substantially reduced uncertainties, hig hlighting the underlying bias. Our method parametrizes the total mass content with the -mass-to-light ratio varying with radius as a power-law in th e log- -log scale. We made two main changes with respect to our previ- -ous work: we added a third parameter c controlling the steepness -of the mass-to-light ratio profile (previously fixed at the va lue of -3) and allowed for diff erent stellar density profiles (previously -only Sérsic, now also King). These changes are of course cou- -pled since diff erent density profiles require diff erent exponents to -reproduce the same mass profile. It is visible also in our resu lts -since the King profile applied in the simulated galaxy gave us -values of c lower than 3. Nevertheless, we decided to use diff er- -ent density profiles to make our method more general and appli- -cable to objects, such as our Illustris galaxy, for which the Sérsic +mass-to-light ratio varying with radius as a power-law in th e loglog + scale. We made two main changes with respect to our previous + work: we added a third parameter c controlling the steepness +of the mass-to-light ratio profile (previously fixed at the va lue of +3) and allowed for diff erent stellar density profiles (previously +only Sérsic, now also King). These changes are of course coupled + since diff erent density profiles require diff erent exponents to +reproduce the same mass profile. It is visible also in our resu lts +since the King profile applied in the simulated galaxy gave us +values of c lower than 3. Nevertheless, we decided to use diff erent + density profiles to make our method more general and applicable + to objects, such as our Illustris galaxy, for which the Sérsic formula does not provide a good approximation of the density distribution. Finally, we applied the improved method to the data for the -Fornax dSph galaxy. Due to the addition of another free param - -eter in our functional form for the mass-to-light ratio, our re- -sults for modeling all stars are slightly diff erent from the ones +Fornax dSph galaxy. Due to the addition of another free param eter + in our functional form for the mass-to-light ratio, our results + for modeling all stars are slightly diff erent from the ones Article number, page 11 of 12 A&A proofs: manuscript no. Populations4 obtained in Kowalczyk et al. (2019). However, in terms of the total density and mass distribution the estimates obtained here agree very well with those earlier results in the range cover ed -by the data. Therefore, the detailed comparison with other e sti- -mates from the literature presented in Kowalczyk et al. (201 9) is +by the data. Therefore, the detailed comparison with other e stimates + from the literature presented in Kowalczyk et al. (201 9) is still valid and we do not repeat it here. -A more significant diff erence with respect to these previous +A more significant diff erence with respect to these previous estimates is seen in the results of modeling two populations in -Fornax. In this case we find the anisotropy to be slightly incr eas- -ing rather than decreasing with radius and, most importantly, the -confidence regions for this parameter, as well as for the den- -sity, are much narrower. We were thus able to obtain tighter c on- -straints on the properties of Fornax, which means that the im - -proved method is successful. For the first time, we were also a ble -to deduce the velocity anisotropy profiles for each of the pop ula- -tions separately. We found that the more concentrated, meta l-rich -population I has a decreasing anisotropy profile while the mo re +Fornax. In this case we find the anisotropy to be slightly incr easing + rather than decreasing with radius and, most importantly, the +confidence regions for this parameter, as well as for the density, + are much narrower. We were thus able to obtain tighter c onstraints + on the properties of Fornax, which means that the im proved + method is successful. For the first time, we were also a ble +to deduce the velocity anisotropy profiles for each of the pop ulations + separately. We found that the more concentrated, meta l-rich +population I has a decreasing anisotropy profile while the mo re extended, metal-poor population II has the anisotropy incr easing -with radius. This finding may partially explain the large spr ead -of the anisotropy values obtained in the literature and summ a- -rized in Table 2 and 3 of Kowalczyk et al. (2019), which were +with radius. This finding may partially explain the large spr ead +of the anisotropy values obtained in the literature and summ arized + in Table 2 and 3 of Kowalczyk et al. (2019), which were often based on modeling subsamples of our spectroscopic data set. - For both studied objects we split the stars into two popula- -tions by dividing them in half based on their metallicity, Z (in + For both studied objects we split the stars into two populations + by dividing them in half based on their metallicity, Z (in solar units), for the Illustris galaxy and [Fe / H] for Fornax. Such -a method is approximate but justified. Both galaxies have com - -plex star formation history with multiple star formation bu rsts, as +a method is approximate but justified. Both galaxies have com plex + star formation history with multiple star formation bu rsts, as demonstrated by Fig. 1 in this work and Fig. 7 in del Pino et al. (2013), producing multiple stellar populations which cann ot be easily tracked as the metallicity is a good but not perfect pr oxy for the stellar age. Moreover, the metallicity histograms f or both -objects are approximately unimodal not allowing for a conve - -nient separation. More refined methods of division have been -suggested in the literature, for example in the form of the likeli- -hood function based on the position, velocity, and metallic ity in- -dex (Walker & Peñarrubia 2011). However, the likelihood fun c- -tion requires many assumptions which introduce additional un- -certainties into the treatment of the data. On the other hand , our -approach ensures the maximization of each sample (and there - -fore minimization of sampling errors) while capturing the im- -portant features of the star formation history. +objects are approximately unimodal not allowing for a conve nient + separation. More refined methods of division have been +suggested in the literature, for example in the form of the likelihood + function based on the position, velocity, and metallic ity index + (Walker & Peñarrubia 2011). However, the likelihood fun ction + requires many assumptions which introduce additional uncertainties + into the treatment of the data. On the other hand , our +approach ensures the maximization of each sample (and there fore + minimization of sampling errors) while capturing the important + features of the star formation history. Further improvements to the Schwarzschild modeling method are certainly possible. One way to proceed would be to include the modeling of the proper motions of the stars. For n ow, measurements of transverse velocities are available only f or the brightest stars in dSph galaxies, but even small samples of this -type could provide further constraints on the models, as dem on- -strated by Strigari et al. (2007) and Massari et al. (2020). +type could provide further constraints on the models, as dem onstrated + by Strigari et al. (2007) and Massari et al. (2020). Acknowledgements. We are grateful to Andrés del Pino for providing the data for the Fornax dSph and to the Illustris team for making their sim ulations publicly available. Useful comments from the anonymous referee are kindly appreciated. @@ -1097,8 +1097,8 @@ References Amorisco, N. C., & Evans, N. W. 2012, MNRAS, 419, 184 Battaglia, G., Helmi, A., Tolstoy, E., et al. 2008, ApJ, 681, L13 Bellazzini, M., Ferraro, F. R., & Pancino, E. 2001, MNRAS, 327, L15 -Binney, J., & Tremaine, S. 2008, Galactic Dynamics, 2nd edn. (Princeton Uni- -versity Press, Princeton) +Binney, J., & Tremaine, S. 2008, Galactic Dynamics, 2nd edn. (Princeton University + Press, Princeton) Breddels, M. A., & Helmi, A. 2013, A&A, 558, A35 Breddels, M. A., Helmi, A., van den Bosch, R. C. E., van de Ven, G., & Battaglia, G. 2013, MNRAS, 433, 3173 del Pino, A., Hidalgo, S. L., Aparicio, A., et al. 2013, MNRAS , 433, 1505 @@ -1129,11 +1129,11 @@ Mateo, M. 1998, ARA&A, 36, 435 Nelson, D., Pillepich, A., Genel, S., et al. 2015, Astronomy and Computing, 13, 12 Pace, A. B., Kaplinghat, M., Kirby, E., et al. 2020, MNRAS, 495, 3022 -Press, W. H., Teukolsky, S. A., Vetterling, W. T., & Flannery, B. P. 1992, Numer- -ical Recipes in C, 2nd edn. (Cambridge University Press, Cam bridge) +Press, W. H., Teukolsky, S. A., Vetterling, W. T., & Flannery, B. P. 1992, Numerical + Recipes in C, 2nd edn. (Cambridge University Press, Cam bridge) Schwarzschild, M. 1979, ApJ, 232, 236 -Sérsic, J. L. 1968, Atlas de Galaxias Australes (Observatorio Astronomico, Cor- -doba, Argentina) +Sérsic, J. L. 1968, Atlas de Galaxias Australes (Observatorio Astronomico, Cordoba, + Argentina) Strigari, L. E., Bullock, J. S., & Kaplinghat, M. 2007, ApJ, 657, L1 Tolstoy, E., Hill, V., & Tosi, M. 2009, ARA&A, 47, 371 Valluri, M., Merritt, D., & Emsellem, E. 2004, ApJ, 602, 66 @@ -1143,4 +1143,4 @@ Vogelsberger, M., Genel, S., Springel, V., et al. 2014a, Nature, 509, 177 Vogelsberger, M., Genel, S., Springel, V., et al. 2014b, MNRAS, 444, 1518 Walker, M. G., & Peñarrubia, J. 2011, ApJ, 742, 20 Wang, M. Y., de Boer, T., Pieres, A., et al. 2019, ApJ, 881, 118 -Article number, page 12 of 12 +Article number, page 12 of \ No newline at end of file diff --git a/read/results/playa/2201.00178.txt b/read/results/playa/2201.00178.txt index 83c41dd..5c079d2 100644 --- a/read/results/playa/2201.00178.txt +++ b/read/results/playa/2201.00178.txt @@ -2,7 +2,7 @@ Draft version January 4, 2022 Typeset using LA T EX default style in AASTeX631 -Imaging the Sun’s near-surface flows using mode-coupling analysis +Imaging the Sun’s near-surface flows using mode-coupling analysis Prasad Mani , 1 Chris S. Hanson , 2 andShravan Hanasoge 1, 2 @@ -11,26 +11,26 @@ Prasad Mani , 1 2 Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE ABSTRACT -The technique of normal-mode coupling is a powerful tool with which to seismically image non- -axisymmetric phenomena in the Sun. Here we apply mode coupling in the Cartesian approximation to -probe steady, near-surface flows in the Sun. Using Doppler cubes obtained from the Helioseismic and +The technique of normal-mode coupling is a powerful tool with which to seismically image nonaxisymmetric + phenomena in the Sun. Here we apply mode coupling in the Cartesian approximation to +probe steady, near-surface flows in the Sun. Using Doppler cubes obtained from the Helioseismic and Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on mode-coupling measurements to show that the resulting divergence and radial vorticity maps at supergranular length -scales (∼ 30 Mm) near the surface compare extremely well with those obtained using the Local Corre- -lation Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows, +scales (∼ 30 Mm) near the surface compare extremely well with those obtained using the Local Correlation + Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows, while ≥ 0.8 is obtained for the radial vorticity. Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662) 1. INTRODUCTION -Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its effect +Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its effect on solar oscillations (seeChristensen-Dalsgaard2002, for a review). These are resonant normal modes of the Sun, behaving as standing waves in a cavity bounded by the solar surface and a depth that depends on the wavenumber of the oscillation. As these waves penetrate the interior, they register information of the properties and dynamics of the solar interior and return to the surface, where they are observed. The internal structure of the Sun can then be retrieved through meticulous inversions of these seismic measurements. -Several important flow systems on the Sun have been inferred using various global and local helioseismic methods. -Of those, the most notable global helioseismic results include inferences on the solar differential rotation, through +Several important flow systems on the Sun have been inferred using various global and local helioseismic methods. +Of those, the most notable global helioseismic results include inferences on the solar differential rotation, through global mode frequency splitting (Thompson et al.1996;Schou et al.1998), and the resolving the neutrino problem -(Bahcall & Pinsonneault1992). Notable local helioseismic results include imaging of the meridional flow (Giles et al. +(Bahcall & Pinsonneault1992). Notable local helioseismic results include imaging of the meridional flow (Giles et al. 1997;Gizon et al.2020) through time-distance helioseismology (Duvall et al.1993), and farside imaging of active regions (Braun & Lindsey2001) and their near side emergence (Birch et al.2016), through helioseismic holography (Lindsey & Braun2000). The recent discovery of various inertial waves (Gizon et al.2021), including the equatorial @@ -40,31 +40,31 @@ In recent years, the use of global mode-coupling helioseismology (Woodard1989;La received attention, with many studies seeking to validate and demonstrate the importance of such a technique for investigating numerous solar phenomena. While the derivation of the mode-coupling technique is mathematically challenging, the data analysis is simple and utilizes all the information registered by the mode. Thus far, global -mode-coupling has been validated through observations of the meridional flow (Vorontsov2011;Woodard et al.2013), -differential rotation (Schad & Roth2020;Kashyap et al.2021), global-scale convection (Woodard2014,2016;Hanasoge +mode-coupling has been validated through observations of the meridional flow (Vorontsov2011;Woodard et al.2013), +differential rotation (Schad & Roth2020;Kashyap et al.2021), global-scale convection (Woodard2014,2016;Hanasoge et al.2020;Mani & Hanasoge2021) and Rossby modes (Hanasoge & Mandal2019;Mandal & Hanasoge2020;Mandal et al.2021). Local mode-coupling analysis in the Cartesian approximation, formulated byWoodard(2006), was validated byHanson et al.(2021) (hereafter H21) by examining the power-spectrum of supergranular waves and comparing with previous time-distance studies (Langfellner et al.2018). prasad.subramanian@tifr.res.inarXiv:2201.00178v1 [astro-ph.SR] 1 Jan 2022 -2 Mani et al. -Normal-mode coupling refers to the concept of expressing solar-oscillation eigenfunctions as a linear weighted combi- -nation of model-eigenfunctions (e.g., Model SChristensen-Dalsgaard2021). The model eigenfunctions form a complete + Mani et al. +Normal-mode coupling refers to the concept of expressing solar-oscillation eigenfunctions as a linear weighted combination + of model-eigenfunctions (e.g., Model SChristensen-Dalsgaard2021). The model eigenfunctions form a complete and orthogonal basis. By design, the model Sun is spherically symmetric, adiabatic, free from rotation, magnetism and -flows. In this state, the oscillations are considered to be uncoupled. The weights needed to express the solar-oscillation +flows. In this state, the oscillations are considered to be uncoupled. The weights needed to express the solar-oscillation eigenfunctions would then encode all the perturbations that are absent in the model. The forward problem then -reduces to relating observed seismic measurements to the perturbations that we want to infer. The surface wavefield +reduces to relating observed seismic measurements to the perturbations that we want to infer. The surface wavefield cross-correlation is the primary measurement in the mode-coupling analysis and can be directly related to the weights -(Woodard2016). As mode coupling is a Fourier domain technique, wavefields are cross-correlated at different spatial -and temporal frequencies, leaving us with measurements sensitive to different quantities of interest. -In this study, we extend the spectral analysis of H21 and develop the method to produce near-surface flow maps +(Woodard2016). As mode coupling is a Fourier domain technique, wavefields are cross-correlated at different spatial +and temporal frequencies, leaving us with measurements sensitive to different quantities of interest. +In this study, we extend the spectral analysis of H21 and develop the method to produce near-surface flow maps at supergranulation length scales. A part of the formalism that was used to derive the forward model in H21 is -reworked, primarily to image steady flows. Measurements are then constructed, and inversions to infer divergence flow -and radial vorticity are described. We also demonstrate signal associated with supergranular flow in a radial-order +reworked, primarily to image steady flows. Measurements are then constructed, and inversions to infer divergence flow +and radial vorticity are described. We also demonstrate signal associated with supergranular flow in a radial-order coupling (p 2 -p 2 ), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface. -We compare our results with flows obtained using the Local Correlation Tracking method on solar granules. +We compare our results with flows obtained using the Local Correlation Tracking method on solar granules. 1.1. Forward problem In favor of algebraic brevity, we only show crucial steps here and refer the interested reader to AppendixAfor a complete derivation of the forward problem. Working in the plane-parallel atmosphere (see alsoWoodard2006), we @@ -73,15 +73,15 @@ x and e y in our local Cartesian domain as pointing towards west and north on the solar surface, respectively, and e z points outwards. This approximation is valid when observing patches of the surface -that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood +that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood of the supergranular scale ( ∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the horizontal wavenumber qR ≈ 120 (Rincon & Rieutord2018), where q = | q | = |( q x , q y )| is the vector horizontal -wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow +wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, seeRincon -& Rieutord2018), permitting us to model the flow vector uu +& Rieutord2018), permitting us to model the flow vector uu u = (u x , u y , u @@ -96,7 +96,7 @@ where P = P σ (x ) are poloidal and toroidal scalar functions, varying with position x and temporal frequency σ . ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying perturbations (seeWoodard2016;Mani & Hanasoge2020;Hanasoge et al.2020;Mandal & Hanasoge2020, for -example), here we only consider the frequency bin σ = 0, denoting the temporally averaged flow over the period +example), here we only consider the frequency bin σ = 0, denoting the temporally averaged flow over the period of analysis. We therefore suppress σ from all terms this point forward, remembering that temporal dynamics of perturbations may also be studied using the same model outlined in the following paragraphs. Simplifying eq1using vector calculus results in @@ -108,34 +108,34 @@ z P ) + ∇ z , (2) where ∇ h refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the -Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a +Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a function of horizontal wavenumber q and depth z e -z . Hence the poloidal and toroidal flows are described by P +z . Hence the poloidal and toroidal flows are described by P q (z ) and T -q (z ), respectively. Furthermore, we parametrize the flow along e +q (z ), respectively. Furthermore, we parametrize the flow along e z using basis functions f (z ) (Chebyshev, B -spline, etc). This is expressed as P ≡ P - q (z ) = + q (z ) = j f j (z ) P q j , T ≡ T -q ( z ) = +q ( z ) = j f j (z ) T q j . (3) -The flow coefficients P +The flow coefficients P q j and T q j , represented by the discrete indices q and j , become ideal candidates for inversions, -where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be +where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be exploited to expedite inversions. Note that P q j = P ∗ − q j and T q j = T ∗ -− q j for the flow field to be real in the spatio- -temporal domain. -To infer flows from wavefields φ scattered by a perturbation of length scale q , cross-correlate them in the manner +− q j for the flow field to be real in the spatiotemporal + domain. +To infer flows from wavefields φ scattered by a perturbation of length scale q , cross-correlate them in the manner Imaging near-surface flows using mode-coupling analysis 3 φ ω ∗ k φ ω @@ -144,14 +144,14 @@ k +q , where k is the oscillation mode wavenumber (k y ) and ω is the temporal frequency. Relate φ ω ∗ k φ ω k +q thus -to the flow coefficients P +to the flow coefficients P q j and T q j (see eqA7) φ ω ∗ k φ ω k +q = H ω -kk - nn +kk + nn j C q j, k P q j + D @@ -159,51 +159,51 @@ q j, k T q j . (4) The weight factor H ω (see eqA8) is a function of frequency, capturing information about the extent of coupling between -the two modes [ n, k ] and [n - , k - ], where n and n - are the radial orders of the modes, and k = |k | and k - = |k +the two modes [ n, k ] and [n + , k + ], where n and n + are the radial orders of the modes, and k = |k | and k + = |k | = |k + q |. -The spectral profile of the mode (see eqA9) is approximated using a Lorentzian (Anderson et al.1990). The more the +The spectral profile of the mode (see eqA9) is approximated using a Lorentzian (Anderson et al.1990). The more the Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms C q j, k and D q j, k are poloidal -and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements +and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements and are derived from the solar model see AppendixA. They possess the symmetry relation: C q j, k = C − q j, −k and D q j, k = D -− q j, − k (see eqA6). The kernels, as flows, are expressed on the basis f +− q j, − k (see eqA6). The kernels, as flows, are expressed on the basis f j ( z ). 1.2. Least-squares of cross-correlation Even though φ ω ∗ k φ ω -k +q isolates the effect of flow perturbations at individual wavenumbers q , a more compact mea- -surement, known in mode-coupling literature as ’ B -coefficients’, is much better designed for inversion as it reduces the -dimension of the problem. A least-squares fit to the cross-correlation φ ω ∗ +k +q isolates the effect of flow perturbations at individual wavenumbers q , a more compact measurement, + known in mode-coupling literature as ’ B -coefficients’, is much better designed for inversion as it reduces the +dimension of the problem. A least-squares fit to the cross-correlation φ ω ∗ k φ ω k +q (seeWoodard2006,2014,2016) results -in the B -coefficients B +in the B -coefficients B k ,q , according to B - k ,q = + k ,q = ω H ω ∗ -kk +kk nn φ ω ∗ k φ ω k +q ω |H ω -kk +kk nn | 2 . (5) Multiplying eq4on both sides by H ω ∗ -kk - nn and substituting by eq5on the left-hand-side results in a concisely defined +kk + nn and substituting by eq5on the left-hand-side results in a concisely defined forward problem (compare with eq4) B - k ,q = + k ,q = j C q j, k P q j + D @@ -212,36 +212,36 @@ q j . (6) In eq5,Woodard(2007) and H21 thus far only considered positive-frequency components in the summation over ω . Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ω nk , -|ω | ∈ +|ω | ∈ ω nk − Γ nk /2 , ω nk + Γ nk / 2 or -|ω | ∈ +|ω | ∈ ω -n - k +n + k − Γ - n - k + n + k / 2, ω -n - k +n + k + Γ -n - k +n + k / 2 . (7) Summing over ± ω guarantees that the parity B k ,q = B ∗ −k ,− q (see AppendixAfor derivation) is obeyed, thereby -ensuring that the flow field on the right-hand-side of eq6is a real physical quantity in the spatio-temporal domain. +ensuring that the flow field on the right-hand-side of eq6is a real physical quantity in the spatio-temporal domain. Taking the complex conjugate on both sides of eq6and considering the negative wavenumber components − q and − k , B ∗ -− k , −q = +− k , −q = j C − q j, − k P ∗ −q j + D @@ -249,24 +249,24 @@ j C − q j . (8) Substituting parity and symmetry relations for all terms in the above results in eq6. As B k , q is constructed by a -least-squares fitting, it is noteworthy that summing over − ω will also lead to improvement in its signal-to-noise as a +least-squares fitting, it is noteworthy that summing over − ω will also lead to improvement in its signal-to-noise as a by-product. 1.3. Noise model -In the addition to the sensitivity kernels, a systematic background noise model is required to infer the flows from -the observed B -coefficients. For estimating the contribution from realization noise to the measurements, we make the -following assumptions (Gizon & Birch2004): that the excitation of the wavefield is modelled as a multivariate Gaussian -random process and the wavefields are uncorrelated across wavenumber and frequency in the absence of perturbations. +In the addition to the sensitivity kernels, a systematic background noise model is required to infer the flows from +the observed B -coefficients. For estimating the contribution from realization noise to the measurements, we make the +following assumptions (Gizon & Birch2004): that the excitation of the wavefield is modelled as a multivariate Gaussian +random process and the wavefields are uncorrelated across wavenumber and frequency in the absence of perturbations. Every independent realization of a mode can be understood as the output of a damped harmonic oscillator driven by a random forcing function (seeDuvall & Harvey1986). Modes are thus generated with random phases and amplitudes -and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters -4 Mani et al. +and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters + Mani et al. Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p 1 (orange) and p 2 (green). The shaded regions of the same colours indicate 1-linewidth Γ about the mode frequency. The yellow shaded region indicates the range of kR and ω/2 π to which we have restricted ourselves in this analysis. Beyond kR - of 2000, it is seen that the theoretical fitting + of 2000, it is seen that the theoretical fitting of mode frequencies start deviating from the observed dispersion relation for the f -mode. such as its amplitude, frequency and linewidth, and consequently in B k ,q in our case. We use the same noise model @@ -281,44 +281,44 @@ k ,q = G − k ,− q (see AppendixA for explanation). 2. DATA ANALYSIS -In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the +In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO,Scherrer et al.2012). Each image is Postel pro jected, with a spatial resolution of approximately 0. 48Mm, sperated in time by 45 seconds, and is tracked at the (Snodgrass1984) rotation rate. Here, we select a patch that is 194 .4 × 194 .4 Mm2 in size, tracked for 24 hours and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number 2197, Carrington longitude 90 ◦ - ). This Dopplercube is considered as the physical wavefield φ ( x, y ; t ). The Fourier-space -wavefield φ ω + ). This Dopplercube is considered as the physical wavefield φ ( x, y ; t ). The Fourier-space +wavefield φ ω k (and subsequently, the cross-correlation φ ω ∗ k φ ω k +q ) is obtained by computing the 3D spatial and temporal Fourier transform of the Dopplercube. -The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in +The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in Eq6, while short enough that supergranules do not substantially evolve (lifetime is purported to be 1.6 days;Rincon & Rieutord2018) over this period. Our observation region is close to the disk center to also avoid any contamination from center-to-limb systematics (Zhao et al.2012;Langfellner et al.2015). Maximum signal can be extracted from the weighted summation of the cross correlations (eq5) when the spectral -profiles of the two modes [ n, k ] and [n - , k +profiles of the two modes [ n, k ] and [n + , k ] closely align in ω space. This implies that their mode frequencies should be -sufficiently close ( |ω +sufficiently close ( |ω nk − ω -n - k +n + k | ≤ δ , the separation parameter). Since Lorentzians decay rapidly, the summation over -± ω is significant only over a few linewidths ( , the summation parameter; see eq7). We have empirically found and -tabulated δ in Table1for the radial order couplings n - n +± ω is significant only over a few linewidths ( , the summation parameter; see eq7). We have empirically found and +tabulated δ in Table1for the radial order couplings n - n ∈ f-f, p 1 -p 1 , and p 2 -p 2 (the signal strength depends only weakly on ; we set it to 3 line widths). -Figure1shows that for any two adjacent ridges (adjacent n and n +Figure1shows that for any two adjacent ridges (adjacent n and n ), mode frequencies ω nk and ω - n + n k become spaced farther apart with increasing wavenumber kR . It is also known that mode linewidth Γ grows with radial orders for @@ -326,13 +326,13 @@ a given kR . Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of observation set the total number of modes within a range of kR (and ω/2π ) that can be clearly observed, thereby -affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually -inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR - at fixed -radial order are different. In wavenumber, we restrict our analysis to within 200 ≤ kR +affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually +inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR + at fixed +radial order are different. In wavenumber, we restrict our analysis to within 200 ≤ kR ≤ 2000 and qR ≤ 300. Our -frequency range is confined to span the range over which acoustic modes are observed (2 ≤ ω/2π ≤ 5 in mHz). +frequency range is confined to span the range over which acoustic modes are observed (2 ≤ ω/2π ≤ 5 in mHz). Imaging near-surface flows using mode-coupling analysis 5 Coupling k R range # of δ @@ -348,15 +348,15 @@ p 2 -p 2 [200,1000] 5886 3 [1000,1300] 4280 3 -Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different +Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different ranges of kR . 3. INVERSION -The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements +The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements B k , q from the linear relation in eq6. We describe inversion using regularized-least-squares (RLS) method here and leave Subtractive Optimally Localized Averages (SOLA,Pijpers & Thompson1994) for AppendixB. The methods -complement each other (seeSekii1997), where RLS tries to minimize the misfit between data and model, whereas +complement each other (seeSekii1997), where RLS tries to minimize the misfit between data and model, whereas SOLA gives better localization. For total number of modes M , RLS scales as M xJ where J is the number of basis functions f j (z ) (J M ; see eq3and section3.1), whereas SOLA scales as M 2 @@ -369,12 +369,12 @@ present even in p 1 , and p 2 -p 2 (see Figure3), and possibly other higher order self- and cross-couplings. Since we are -interested in only surface flows, we leave higher order coupling to future work. +interested in only surface flows, we leave higher order coupling to future work. It bears mentioning that the slopes of the ridges in the kR - ν spectrum (Figure1) increase with radial order. This limits us to low-to-intermediate kR - (< 1000) for these higher radial orders if we are to remain under the acoustic cut- -off frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals + (< 1000) for these higher radial orders if we are to remain under the acoustic cutoff + frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals from low kR - too large an observation region could possibly render invalid the Cartesian geometry approximation. Regardless, in addition to performing inversions using all the couplings stacked together, we also demonstrate inversions @@ -383,21 +383,21 @@ helioseismic technique. 3.1. RLS For given q , the forward problem may be stated as KU = B, (10) -with the aim to minimize the misfit +with the aim to minimize the misfit k ||KU − B|| 2 , with || || 2 denoting the L 2 norm. Here, K is the matrix formed by the sensitivity kernels: {C q j, k , D -q j, k } . U is a vector composed of the flow coefficients: {P +q j, k } . U is a vector composed of the flow coefficients: {P q j , T q j } and B is a vector -composed of computed B -coefficients: { B +composed of computed B -coefficients: { B k ,q }. The least-squares problem is solved simultaneously for poloidal and -toroidal flow. We use B -spline basis functions as our f +toroidal flow. We use B -spline basis functions as our f j (z ), comprising 11 knots spaced uniformly in acoustic radius, -for both poloidal and toroidal coefficients. Hence, for M modes (total number of k for a given q is M ) and 11 basis +for both poloidal and toroidal coefficients. Hence, for M modes (total number of k for a given q is M ) and 11 basis functions for each poloidal and toroidal, the dimensions of K, U and B are thus M × 22, 22 × 1, and M × 1 respectively. Normalizing both sides of eq10by the noise covariance Λ (a diagonal matrix with the entries G k ,q ; see eq9; dimension @@ -414,21 +414,21 @@ U =(K K Λ −1 B. (12) -6 Mani et al. -Figure 2. Left : Averaging kernel for poloidal flow (see sectionB.2, eqB17, and left panel of Figure8) for q R + Mani et al. +Figure 2. Left : Averaging kernel for poloidal flow (see sectionB.2, eqB17, and left panel of Figure8) for q R = [ −112, − 45], at the depth z o = −0 . 41 Mm. Right : L-curve for the mode q R = [ −112, − 45]; the knee (λ = 2 . 48) is marked by a blue diamond. -Since the least-squares problem is typically ill-posed, we restate the minimization as +Since the least-squares problem is typically ill-posed, we restate the minimization as k ||KU − B|| 2 + λ ||U || 2 with -the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution +the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this -regularization makes the problem better conditioned and is now defined as +regularization makes the problem better conditioned and is now defined as U = (K Λ− 1 K + λ I )− 1 @@ -439,20 +439,20 @@ where I is the identity matrix for L 1 regularization. The knee-point of the L-curve (Hansen1992), a curve formed by plotting ||U || 2 vs ||KU − B|| -2 for different values of λ (see right panel of Figure2), is usually chosen as the -regularization parameter. After successfully inverting for U , we reconstruct the flow using eq3. Results for poloidal -flow P +2 for different values of λ (see right panel of Figure2), is usually chosen as the +regularization parameter. After successfully inverting for U , we reconstruct the flow using eq3. Results for poloidal +flow P q are shown in Figure3. 4. LCT -To improve confidence in the imaged near-surface flows through mode-coupling, we compare them with flows obtained -from Local Correlation Tracking method (LCT;November & Simon1988). LCT provides surface-flow maps by +To improve confidence in the imaged near-surface flows through mode-coupling, we compare them with flows obtained +from Local Correlation Tracking method (LCT;November & Simon1988). LCT provides surface-flow maps by examining the advection of convective granules (1.2 Mm, qR - ≈ 3500;Hathaway et al.2015) by underlying larger- -scale flow systems. Since granules are used as tracers, which are much smaller in size than supergranules ( ≈ 35 Mm), -LCT is an effective method (seeRieutord et al.2001) to produce surface horizontal flow maps of supergranulation. + ≈ 3500;Hathaway et al.2015) by underlying largerscale + flow systems. Since granules are used as tracers, which are much smaller in size than supergranules ( ≈ 35 Mm), +LCT is an effective method (seeRieutord et al.2001) to produce surface horizontal flow maps of supergranulation. Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section2 -(tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are ob- -tained and Postel pro jected. The horizontal flows are deduced by tracking the proper motions of granules between +(tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are obtained + and Postel pro jected. The horizontal flows are deduced by tracking the proper motions of granules between consecutive intensity images, which we denote as I 1 , I 2 . The LCT method selects a patch in two images each @@ -474,15 +474,15 @@ A Gaussian of width sigma allows to isolate a small region surrounding the grid moved by granules are usually in sub-pixel regime. The convention for the direction of x is the same as described in section1.1. The two patches I 1 , I -2 are then cross correlated for different values of position shifts ∆x , +2 are then cross correlated for different values of position shifts ∆x , C - ij (∆x, ∆ y ) = + ij (∆x, ∆ y ) = dx I ∗ 1 (− x )I 2 (∆x − x) . (14) The shift ∆x = (∆x, ∆ y ) that maximizes the cross-correlation C ij is taken to be the proper motion of the granule. -Provided that the time difference ∆t, here 45 seconds, between the images is less than the lifetime of granules ( < 10 +Provided that the time difference ∆t, here 45 seconds, between the images is less than the lifetime of granules ( < 10 min), the velocities are given by v x = ∆x/ ∆ t and v y = ∆y/∆ t . This exercise is repeated for all grid points in the @@ -494,7 +494,7 @@ In practice, we use the Fourier LCT algorithm (FLCT,Fisher & Welsch2008) for com y . FLCT requires the input sigma, which we set to 4 pix, that captures the extent of localization desired, and depends on the Imaging near-surface flows using mode-coupling analysis 7 -Figure 3. Top : Inverted poloidal flow power-spectrum for the three couplings f-f, p +Figure 3. Top : Inverted poloidal flow power-spectrum for the three couplings f-f, p 1 -p 1 , and p 2 -p @@ -507,14 +507,14 @@ q mean. Total power appears to increase through the radial orders. Power is in units of m2 /s 4 . -dominant length scale of the velocity field in the images. The Postel-pro jected intensity images are fed as input to the +dominant length scale of the velocity field in the images. The Postel-pro jected intensity images are fed as input to the FLCT code. v x and v y are then computed for consecutive pairs of images and are averaged over the entire day. 5. MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY For mode-coupling, horizontal divergence (hereafter div ) and radial vorticity (hereafter curl ) are computed by -substituting P and T from eq3into eq2as below - -uu +substituting P and T from eq3into eq2as below uu + u( q , z ) = − ∇2 P e z + ∇(∂ @@ -542,30 +542,30 @@ u(q , z ) = q 2 ∂ z P, (16) and curl is given by, - + ∇ × uu u( q , z ) z = q 2 T . (17) -We follow similar steps to those taken inLangfellner et al.(2015) for comparison of flow maps with LCT. The -essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR +We follow similar steps to those taken inLangfellner et al.(2015) for comparison of flow maps with LCT. The +essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR of interest (see Figure4), and subsequently convert it to real space. We seek to show comparisons (see Figures5,6, and7) for qR - = 100, 150, 200 and 250. To sufficiently delineate -flows at these length scales, we apply a Gaussian filter (see Figure4) to flows obtained from eqns16and17. The + = 100, 150, 200 and 250. To sufficiently delineate +flows at these length scales, we apply a Gaussian filter (see Figure4) to flows obtained from eqns16and17. The Gaussian is centered at the desired wavenumber with a half-width of 25. We then perform a 2D Fourier transform to -obtain a real-space steady-flow map. -8 Mani et al. -Figure 4. Left: Divergence-flow power spectrum | div | 2 +obtain a real-space steady-flow map. + Mani et al. +Figure 4. Left: Divergence-flow power spectrum | div | 2 , from eqn16, obtained from inversion using all the couplings. The -power-spectrum is then filtered with a bandpass centered around qR +power-spectrum is then filtered with a bandpass centered around qR = 150 (middle panel). The resulting spectra is shown in the right panel. The units of | div | 2 are in s − 2 - . For illustration, we show the action of the filter on the power-spectrum | div | 2 -since it is a real quantity, but recall that it is the Fourier-space flow div (a complex quantity) on which we apply the filter. -For LCT, we first apply a Gaussian smoothing to v + . For illustration, we show the action of the filter on the power-spectrum | div | 2 +since it is a real quantity, but recall that it is the Fourier-space flow div (a complex quantity) on which we apply the filter. +For LCT, we first apply a Gaussian smoothing to v x and v y to average over small-scale features; the extent of smoothing depends on the length scale qR @@ -581,11 +581,11 @@ curl = ∂ y − ∂ y v x . (19) -We then perform a 2D Fourier transform on eqns18and19, apply the same Gaussian filters as for mode-coupling, +We then perform a 2D Fourier transform on eqns18and19, apply the same Gaussian filters as for mode-coupling, and transform back to real space. -Condensing all of the above, the following sequence of operations to compare flows at desired length scales are -performed for mode-coupling (M-C) and for LCT - -M-C : φ (x, y ; t) 3D FFT +Condensing all of the above, the following sequence of operations to compare flows at desired length scales are +performed for mode-coupling (M-C) and for LCT M-C + : φ (x, y ; t) 3D FFT =====⇒ φ ω k , B k ,q inversion @@ -607,30 +607,30 @@ h · ∇× eqns18 , 19 2D FFT, ======⇒ Filter Filtered, Fourier-space -flows 2D FFT +flows 2D FFT =====⇒ div, curl 6. RESULTS -Table2summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure5, -where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from +Table2summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure5, +where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from the two methods near supergranular scale ( qR - ≈ 100). Near-surface flows are imaged most faithfully when all the -couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of + ≈ 100). Near-surface flows are imaged most faithfully when all the +couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between -the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence -flows (this is consistent with the results ofHathaway et al.2015;Langfellner et al.2015;Rincon et al.2017). Due to -insufficient modes for the p +the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence +flows (this is consistent with the results ofHathaway et al.2015;Langfellner et al.2015;Rincon et al.2017). Due to +insufficient modes for the p 2 -p - 2 case (see Table1), we are unable to infer vortical flows with conviction other than near + 2 case (see Table1), we are unable to infer vortical flows with conviction other than near the supergranular scale, as can be seen from Table2. Figure6also aligns with what we believe can be accomplished through mode-coupling helioseismology - using f-f or p 1 -p 1 alone to seismically infer near-surface divergence and vortical -flows at different scales (qR +flows at different scales (qR = 100, 150) can yield extremely good agreement with LCT. As the length scale of the -inferred flow moves further away from that of supergranules (Figure7), the demand on signal-to-noise also increases. +inferred flow moves further away from that of supergranules (Figure7), the demand on signal-to-noise also increases. An adequate number of modes (and coupling strength between higher radial-orders) thus becomes a necessity to -comment substantively on the flows at these scales. -6.1. Amplitudes of mode-coupling flows +comment substantively on the flows at these scales. +6.1. Amplitudes of mode-coupling flows Imaging near-surface flows using mode-coupling analysis 9 (a) qR = 100 , f-f + p @@ -638,53 +638,53 @@ Imaging near-surface flows using mode-coupling analysis 9 1 + p 2 -p 2 -Figure 5. Real-space divergence flows (left column, in units of 10−5 +Figure 5. Real-space divergence flows (left column, in units of 10−5 s − 1 ) and radial vorticity (right column, in units of 10−6 s −1 ) -for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass filtered around +for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass filtered around qR - = 100 (see Figure4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges -out from the flow maps and compare a circular region of diameter ≈175 Mm. The slopes of the best-fit line through the scatter -plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps are saturated to show only 40% of the maximum + = 100 (see Figure4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges +out from the flow maps and compare a circular region of diameter ≈175 Mm. The slopes of the best-fit line through the scatter +plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps are saturated to show only 40% of the maximum values. For both LCT and mode-coupling divergence and vorticity maps, numerous factors, arising from the associated -numerous data processing steps, can influence the final inference of flow amplitudes, making it difficult to put forward +numerous data processing steps, can influence the final inference of flow amplitudes, making it difficult to put forward a precise statement on them. H21 reported a 60% greater amplitude for p 1 -p - 1 over f-f coupling (Figure3reflects a -similar conclusion), another element to consider when combining different radial orders. The choice of regularization -(see right panel of Figure2) has the potential to affect the amplitudes of the inverted flows to some degree. Flow -amplitudes also vary with depth, implying that different radial orders and LCT will measure different flow averages. + 1 over f-f coupling (Figure3reflects a +similar conclusion), another element to consider when combining different radial orders. The choice of regularization +(see right panel of Figure2) has the potential to affect the amplitudes of the inverted flows to some degree. Flow +amplitudes also vary with depth, implying that different radial orders and LCT will measure different flow averages. This variability emerges as a natural consequence of any helioseismic inversion procedure necessitating the use of a -radial grid along which kernels and flows tend to be described. -Thus, the amplitudes of the mode-coupling flows (and the correlation coefficient) depend upon the following factors: +radial grid along which kernels and flows tend to be described. +Thus, the amplitudes of the mode-coupling flows (and the correlation coefficient) depend upon the following factors: • Coupling(s) used, • Regularization parameter in the inversion, -• Smoothing applied to LCT flows (indirectly; see below paragraph), -• The depth at which flows are inferred. +• Smoothing applied to LCT flows (indirectly; see below paragraph), +• The depth at which flows are inferred. Here, we report in Table2only the maximum correlation found from among the points in the radial grid close to the surface (within ± 0.5 Mm from z =0). For a desired comparison length scale qR - , we first fix the coupling(s) -and the regularization parameter to be used in the inversion. We then separately compute filtered divergence and -10 Mani et al. + , we first fix the coupling(s) +and the regularization parameter to be used in the inversion. We then separately compute filtered divergence and + Mani et al. (a) qR = 100 , f-f (b) qR = 150, p 1 -p 1 -Figure 6. Real-space divergence flows (left column, in units of 10−5 +Figure 6. Real-space divergence flows (left column, in units of 10−5 s − 1 ) and radial vorticity (right column, in units of 10− 6 s −1 ) -for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around +for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around qR = 100, and using (b) p 1 -p - 1 coupling (bottom row), bandpass filtered around qR - = 150. We cut edges out from the flow + 1 coupling (bottom row), bandpass filtered around qR + = 150. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. (a) qR = 200 , f-f + p @@ -697,26 +697,26 @@ maps and compare a circular region of diameter ≈175 Mm. 1 + p 2 -p 2 -Figure 7. Real-space divergence flows (left column, in units of 10−5 +Figure 7. Real-space divergence flows (left column, in units of 10−5 s − 1 ) and radial vorticity (right column, in units of 10− 6 s −1 ) -for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass filtered around +for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass filtered around (a) qR = 200, and (b) qR - = 250. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. -vorticity maps for LCT for different values of smoothing. These flow maps are then compared with those obtained + = 250. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. +vorticity maps for LCT for different values of smoothing. These flow maps are then compared with those obtained from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation -(corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired +(corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired qR . It has been shown (seeDe Rosa & Toomre2004;Langfellner et al.2015) that line-of-sight velocity from Dopplergrams and LCT agree closely in amplitudes. But, to recapitulate, a host of factors described above can skew the amplitudes -for divergence flows owing to the multi-step process involved in obtaining them. For example, there has been a history +for divergence flows owing to the multi-step process involved in obtaining them. For example, there has been a history (see, e.g.,De Rosa et al.2000;Sekii et al.2007;Zhao et al.2007;Langfellner et al.2018;B¨oning et al.2020;Korda & ˇ -Svanda2021) of using travel-time difference as only a proxy for horizontal divergence. However,Langfellner et al. +Svanda2021) of using travel-time difference as only a proxy for horizontal divergence. However,Langfellner et al. Imaging near-surface flows using mode-coupling analysis 11 Coupling qR div curl @@ -744,38 +744,38 @@ p 150 0.91 0.39 200 0.79 0.3 250 0.55 0.3 -Table 2. Correlation between mode-coupling flow maps and LCT maps derived from HMI Dopplergrams and intensity images, +Table 2. Correlation between mode-coupling flow maps and LCT maps derived from HMI Dopplergrams and intensity images, respectively. -(2015),Birch et al.(2016) andBirch et al.(2019) use empirically determined conversion factors to align flow amplitudes +(2015),Birch et al.(2016) andBirch et al.(2019) use empirically determined conversion factors to align flow amplitudes from travel-time measurements with those of LCT, while acknowledging that LCT underestimates magnitudes (see Verma et al.2013;L¨optien et al.2016). Even for the case of supergranulation divergence maps obtained through ring-diagram helioseismology,Greer et al.(2016) only report normalized amplitudes. In this work, we have developed inversions to show that the Cartesian approximation of mode-coupling can be used -with great confidence to investigate flows near the surface. Careful inversions of mode-coupling measurements, built -using a sufficiently large modeset that penetrates into the deeper layers of the convection zone, can also enable probing +with great confidence to investigate flows near the surface. Careful inversions of mode-coupling measurements, built +using a sufficiently large modeset that penetrates into the deeper layers of the convection zone, can also enable probing of the depth structure and time-evolution of supergranules, part of future work. With enough modes to improve -signal-to-noise through larger observation sizes, we suggest that Cartesian mode-coupling can find local helioseismic -applications to investigate other depth- and time-varying features such as giant cell flows (seeHathaway et al.2013; -Hanson et al.2020), emerging active regions, meridional flows and Rossby waves. +signal-to-noise through larger observation sizes, we suggest that Cartesian mode-coupling can find local helioseismic +applications to investigate other depth- and time-varying features such as giant cell flows (seeHathaway et al.2013; +Hanson et al.2020), emerging active regions, meridional flows and Rossby waves. APPENDIX A. DERIVATION OF THE FORWARD MODEL -As described in section1.1, we seek to describe the flow u as a function of q along e +As described in section1.1, we seek to describe the flow u as a function of q along e z . To that end, substituting eq3into eq2, u σ -q (z ) = -j +q (z ) = +j q 2 f j e -z + iq f -j +z + iq f +j P σ j q + iq × e z f j T σ j q . (A1) -For flows in the anelastic limit (u speed of sound), we can denote the flow perturbation operator as δ L σ +For flows in the anelastic limit (u speed of sound), we can denote the flow perturbation operator as δ L σ = − 2iωρu σ · ∇ (seeHanasoge et al.2017). Substituting Eq.A1into the operator, we get, @@ -785,9 +785,9 @@ q · k + uσ q · e z ∂ z ), (A2) -= − 2 i ωρ -j -− k · q f += − 2 i ωρ +j +− k · q f j P σ j q − k · ( q × e z ) f @@ -796,9 +796,9 @@ j q + q 2 f j P σ j q ∂ -z +z . (A3) -12 Mani et al. + Mani et al. Express the mode eigenfunction describing oscillations in the Cartesian domain by (seeWoodard2006) ξ k ≡ ξ @@ -807,72 +807,72 @@ k H nk (z )e z + ˆzV nk (z ), (A4) -where H and V are real-valued functions; n and n +where H and V are real-valued functions; n and n are dropped for compactness of notation. Then the coupling of two modes ξ k and ξ - k (k - = k + q ), by the flow perturbation operator δ Lσ + k (k + = k + q ), by the flow perturbation operator δ Lσ q , denoted by coupling integral Λk k (σ ), is given by Λk -k (σ ) ≡ +k (σ ) ≡ dx (δ L σ q ξ k ) · ξ ∗ -k = - dx - − 2i ωρ -j +k = + dx + − 2i ωρ +j q 2 f j P σ j q ( ˆ k · ˆ -k - H +k + H k H ∗ -k + V +k + V k V ∗ k ) -− -k · q f +− +k · q f j P σ j q + k · ( q × e z ) f j T σ -j q +j q ( ˆ k · ˆ -k +k H k H ∗ k + V k V ∗ -k ) +k ) (A5) -We desire to linearly relate the coupling integral in the above equation to the flows P and T , through poloidal and +We desire to linearly relate the coupling integral in the above equation to the flows P and T , through poloidal and toroidal sensitivity kernels, C q j, k and D q j, k respectively. Hence, they are given by C - q j, k = - dz ρ + q j, k = + dz ρ q 2 f j ( ˆ k · ˆ -k - H +k + H k H ∗ -k + V +k + V k V ∗ k ) -− k · q f +− k · q f j ( ˆ k · ˆ -k +k H k H ∗ k + V @@ -881,11 +881,11 @@ k ) , D q j, k = k · ( q × e -z ) +z ) dz ρ f j ( ˆ k · ˆ -k +k H k H ∗ k + V @@ -896,29 +896,29 @@ q j, k = C −q j, −k and D q j, k = D − q j, − k . This coupling integral contributes to the cross-spectral -measurement between modes k and k + q From eq 8 ofWoodard(2014), we write the first-order effect of flow on -wavefield cross-correlation as +measurement between modes k and k + q From eq 8 ofWoodard(2014), we write the first-order effect of flow on +wavefield cross-correlation as φ ω ∗ k φ ω +σ k +q = H ω -kk +kk σ Λk k ( σ ), (A7) where the function H is given by H ω -kk +kk σ = −2 i ω ( N k |R ω k | 2 R ω +σ k + N -k +k |R ω +σ k |2 R ω ∗ k ). (A8) -We absorb the factor − 2i ω into the definition of H . Substitute eqA6in right-hand-side of eqA7to obtain eq4. -The mode spectral profile R is a Lorentzian, given by +We absorb the factor − 2i ω into the definition of H . Substitute eqA6in right-hand-side of eqA7to obtain eq4. +The mode spectral profile R is a Lorentzian, given by R ω k = 1 ω 2 @@ -928,12 +928,12 @@ nk /2 , (A9) where ω nk is the resonant frequency of the mode, and γ nk is the mode linewidth. EqA9can be derived by introducing -mode damping − i ωγ ρ as an operator in the differential equation that governs undamped, driven oscillations (see eq -5 ofHanasoge et al.2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation. +mode damping − i ωγ ρ as an operator in the differential equation that governs undamped, driven oscillations (see eq +5 ofHanasoge et al.2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation. Also, the parity H ω -kk +kk σ = H − ω ∗ -kk +kk − σ and R ω k = R − ω ∗ k are established. Mode normalization N is given by @@ -941,7 +941,7 @@ N k = 1 Q Q -k +k ω |φ ω k | 2 @@ -953,15 +953,15 @@ Q Q k on the right-hand-side implies average over all [k x , k y ] (Q terms in all) such that k = |k | is constant. -This forces N to be isotropic, i.e., to only depend on k , and not k . The sum over ω is within five linewidths of ω +This forces N to be isotropic, i.e., to only depend on k , and not k . The sum over ω is within five linewidths of ω nk . -Note that Eq.A8throughA10are modified from H21 to ensure parity and that flow maps are real. +Note that Eq.A8throughA10are modified from H21 to ensure parity and that flow maps are real. The three equationsA8throughA10, along with the symmetry relation for kernels, and summation over ± ω , serve to establish the parity B σ k ,q = B ∗− σ − k ,−q . This allows for obtaining P σ q = P ∗− σ -−q , and subsequently, purely real flow in +−q , and subsequently, purely real flow in the real domain. Setting σ = 0 gives us the linear, invertible equation eq6. Substituting eqnsA8throughA10into the noise model obtained in H21 and summing over ± ω establishes the symmetry Gσ k , q = G− σ @@ -976,9 +976,9 @@ k ,q in the fashion k α k ,zo B - k ,q allows for an average value of the flow P + k ,q allows for an average value of the flow P q (z ) to be estimated at the depth z -o . To obtain the coefficients +o . To obtain the coefficients α k ,zo , it is assumed that a set of sensitivity kernels K k ,q (z ) for the mode q can be summed up coherently to give an @@ -987,17 +987,17 @@ o . Conventionally, a Gaussian centered at z o and a width ∆ is chosen which the averaging kernel should resemble after performing inversion. B.1. Kernels in the integral form -Since the kernels in eqA6are manifest as coefficients on a basis f - j (z ), we first derive kernels that can be expressed -as a function of depth z (see Figure8). It is convenient to derive in matrix form. Thus, with the following definitions: +Since the kernels in eqA6are manifest as coefficients on a basis f + j (z ), we first derive kernels that can be expressed +as a function of depth z (see Figure8). It is convenient to derive in matrix form. Thus, with the following definitions: P ≡ P q ( z ), p ≡ P q j , F ≡ f j (z ), B ≡ B k ,q C ≡ C q j, k and K ≡ K -k , q ( z ), we write (assume only poloidal flow for -simplicity, the same derivations hold true for toroidal flow as well) +k , q ( z ), we write (assume only poloidal flow for +simplicity, the same derivations hold true for toroidal flow as well) P = F p (B11) The size of P is thus the same as the length of the radial grid z . Now, pre-multiply by F T @@ -1022,19 +1022,19 @@ where F T C, i.e., K - k ,q ( z ) = -j,j + k ,q ( z ) = +j,j dz f j (z ) f -j - (z ) +j + (z ) − 1 f - j + j (z ) C -q j +q j ,k (B14) -B.2. Obtaining the coefficients α +B.2. Obtaining the coefficients α Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at z o T (z, z @@ -1043,27 +1043,27 @@ o ) = 1 2π ∆ 2 exp z − z o -2∆ 2 +2∆ 2 . (B15) This can be achieved by solving the optimization problem -minimize X = - dz +minimize X = + dz T ( z, z o ) − Θ q (z, z -o ) +o ) 2 , (B16) where we introduce the averaging kernel for mode q thus Θ q (z, z -o ) = +o ) = k α k ,zo K k ,q ( z ). (B17) As an aside, we note that averaging kernels can similarly be constructed for RLS (see section3.1) using eqns13 andB14. -14 Mani et al. + Mani et al. Figure 8. Left : Kernel K k ,q ( z ) (eqB14) shown vs depth z for the three radial order couplings f-f, p 1 -p @@ -1080,37 +1080,37 @@ Integral of the averaging kernel over z is 0.89. Setting ∂ X ∂α → 0 gives us the matrix problem to be solved A { α } = v, -{ α } = -A + µI +{ α } = +A + µI − 1 v, (B18) -where the square matrix A = +where the square matrix A = dz K k ,q (z ) K -k - ,q (z ) and v = +k + ,q (z ) and v = dz K k ,q (z ) T (z, z -o ). Here, k +o ). Here, k is just a dummy index for -denoting elements in the matrix A , ( k +denoting elements in the matrix A , ( k = k + q ). In the last line of eqB18, we introduce regularization using an Identity matrix I , with the regularization parameter µ - purpose being the same as that described in section3.1. Obtaining α thus becomes a highly expensive computationally for very large number of modes (see section3). Substitute α -obtained from eqB18into last line of eqB13, and +obtained from eqB18into last line of eqB13, and k on both sides k α k ,z o B σ -k ,q = +k ,q = k α k ,z -o +o dz K k ,q ( z )P σ q (z ), -= += dz Θ q ( z, z o ) P σ @@ -1118,10 +1118,10 @@ q (z ) , ≈ P σ q ( z o ) (B19) -Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Di- -vergence flow can then be obtained from eq16. Results are shown in Figures9and10. +Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Divergence + flow can then be obtained from eq16. Results are shown in Figures9and10. REFERENCES -Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M. +Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M. 1990, ApJ, 364, 699, doi:10.1086/169452 Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of Modern Physics, 64, 885, @@ -1140,7 +1140,7 @@ Physics, 74, 1073, doi:10.1103/RevModPhys.74.1073 —. 2021, Living Reviews in Solar Physics, 18, 2, doi:10.1007/s41116- 020-00028- 3 Imaging near-surface flows using mode-coupling analysis 15 -Figure 9. Left : Poloidal flow power-spectrum for f-f as a function of q +Figure 9. Left : Poloidal flow power-spectrum for f-f as a function of q x R and q y R @@ -1148,13 +1148,13 @@ Figure 9. Left : Poloidal flow power-spectrum for f-f as a function of q averaged over the azimuthal angle. Shaded region shows ± 1 − σ error around the mean. Power is in units of m 2 /s4 . -Figure 10. Real-space divergence flows (in units of 10 − 5 +Figure 10. Real-space divergence flows (in units of 10 − 5 s −1 ) for mode-coupling inversion through SOLA using f-f coupling, -and LCT, bandpass filtered around qR - = 100. We cut edges out from the flow maps and compare a circular region of diameter -≈ 175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is -1.05. For demonstration, we show inversions only for poloidal flow using SOLA. +and LCT, bandpass filtered around qR + = 100. We cut edges out from the flow maps and compare a circular region of diameter +≈ 175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is +1.05. For demonstration, we show inversions only for poloidal flow using SOLA. De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh, 192, 351, doi:10.1023/A:1005269001739 De Rosa, M. L., & Toomre, J. 2004, ApJ, 616, 1242, @@ -1162,12 +1162,12 @@ doi:10.1086/424920 Duvall, T. L., J., & Harvey, J. W. 1986, in NATO Advanced Study Institute (ASI) Series C, Vol. 169, Seismology of the Sun and the Distant Stars, ed. D. O. Gough, 105–116 -Duvall, T. L., J., Jefferies, S. M., Harvey, J. W., & +Duvall, T. L., J., Jefferies, S. M., Harvey, J. W., & Pomerantz, M. A. 1993, Nature, 362, 430, doi:10.1038/362430a0 Fisher, G. H., & Welsch, B. T. 2008, in Astronomical -Society of the Pacific Conference Series, Vol. 383, -Subsurface and Atmospheric Influences on Solar Activity, +Society of the Pacific Conference Series, Vol. 383, +Subsurface and Atmospheric Influences on Solar Activity, ed. R. Howe, R. W. Komm, K. S. Balasubramaniam, & G. J. D. Petrie, 373.https://arxiv.org/abs/0712.4289 Giles, P. M., Duvall, T. L., Scherrer, P. H., & Bogart, R. S. @@ -1180,7 +1180,7 @@ Science, 368, 1469, doi:10.1126/science.aaz7119 Gizon, L., Cameron, R. H., Bekki Greer, B. J., Hindman, B. W., & Toomre, J. 2016, ApJ, 824, 128, doi:10.3847/0004- 637X/824/2/128 Hanasoge, S., & Mandal, K. 2019, ApJL, 871, L32, -doi:10.3847/2041- 8213/aaff60 +doi:10.3847/2041- 8213/aaff60 Hanasoge, S. M., Hotta, H., & Sreenivasan, K. R. 2020, Science Advances, 6, eaba9639, doi:10.1126/sciadv.aba9639 @@ -1195,7 +1195,7 @@ Hanson, C. S., Hanasoge, S., & Sreenivasan, K. R. 2021, ApJ, 910, 156, doi:10.3847/1538-4357/abe770 Hathaway, D. H., Teil, T., Norton, A. A., & Kitiashvili, I. 2015, ApJ, 811, 105, doi:10.1088/0004- 637X/811/2/105 -16 Mani et al. + Mani et al. Hathaway, D. H., Upton, L., & Colegrove, O. 2013, Science, 342, 1217, doi:10.1126/science.1244682 Hill, F. 1988, ApJ, 333, 996, doi:10.1086/166807 @@ -1253,7 +1253,7 @@ Thompson, M. J., Toomre, J., Anderson, E. R., et al. 1996, Science, 272, 1300, doi:10.1126/science.272.5266.1300 Unno, W., Osaki, Y., Ando, H., Saio, H., & Shibahashi, H. 1989, Nonradial oscillations of stars -Verma, M., Steffen, M., & Denker, C. 2013, A&A, 555, +Verma, M., Steffen, M., & Denker, C. 2013, A&A, 555, A136, doi:10.1051/0004-6361/201321628 Vorontsov, S. V. 2011, MNRAS, 418, 1146, doi:10.1111/j.1365- 2966.2011.19564.x @@ -1269,4 +1269,4 @@ Zhao, J., Georgobiani, D., Kosovichev, A. G., et al. 2007, ApJ, 659, 848, doi:10.1086/512009 Zhao, J., Nagashima, K., Bogart, R. S., Kosovichev, A. G., & Duvall, T. L., J. 2012, ApJL, 749, L5, -doi:10.1088/2041- 8205/749/1/L5 +doi:10.1088/2041- 8205/749/1/L5 \ No newline at end of file diff --git a/read/results/playa/2201.00200.txt b/read/results/playa/2201.00200.txt index 51d3bc7..dba297d 100644 --- a/read/results/playa/2201.00200.txt +++ b/read/results/playa/2201.00200.txt @@ -2,11 +2,11 @@ Astronomy & Astrophysics manuscript no. solar˙model˙v10˙corrected © ESO 2022 January 4, 2022 Local heating due to convective overshooting and the solar modelling problem -I. Baraff e1, 2 +I. Baraff e1, 2 , T. Constantino 1 , J. Clarke1 , A. Le Saux1, 2 - , T. Goffrey 4 + , T. Goffrey 4 , T. Guillet1 , J. Pratt3 , D. G. Vlaykov1 @@ -22,198 +22,198 @@ e de Lyon, France 4 Centre for Fusion, Space and Astrophysics, Department of Physics, University of Warwick, Coventry, CV4 7AL, UK ABSTRACT -Recent hydrodynamical simulations of convection in a solar-like model suggest that penetrative convective flows at the boundary -of the convective envelope modify the thermal background in the overshooting layer. Based on these results, we implement in one- -dimensional stellar evolution codes a simple prescription to modify the temperature gradient below the convective boundary of a +Recent hydrodynamical simulations of convection in a solar-like model suggest that penetrative convective flows at the boundary +of the convective envelope modify the thermal background in the overshooting layer. Based on these results, we implement in onedimensional + stellar evolution codes a simple prescription to modify the temperature gradient below the convective boundary of a solar model. This simple prescription qualitatively reproduces the behaviour found in the hydrodynamical simulations, namely a local heating and smoothing of the temperature gradient below the convective boundary. We show that introducing local heating in the overshooting layer can reduce the sound-speed discrepancy usually reported between solar models and the structure of the Sun -inferred from helioseismology. It also affects key quantities in the convective envelope, such as the density, the entropy, and the -speed of sound. These eff ects could help reduce the discrepancies between solar models and observed constraints based on seismic +inferred from helioseismology. It also affects key quantities in the convective envelope, such as the density, the entropy, and the +speed of sound. These eff ects could help reduce the discrepancies between solar models and observed constraints based on seismic inversions of the Ledoux discriminant. Since mixing due to overshooting and local heating are the result of the same convective penetration process, the goal of this work is to invite solar modellers to consider both processes for a more consistent approach. Key words. Convection – Hydrodynamics – Stars: evolution – Sun: evolution - helioseismology - interior 1. Introduction Modelling the internal structure of the Sun is still a challenge. A recent review by Christensen-Dalsgaard (2021) describes in -detail the long-standing eff orts to improve solar models. The so- -lar modelling problem refers to the discrepancy between helio- -seismology and solar interior models that adopt low metallici- -ties predicted by the three-dimensional (3D) atmosphere models -of, for example, Asplund et al. (2009) and Ca ffau et al. (2011), -in contrast to the high metallicities based on previous litera- -ture compilations by, for example, Anders & Grevesse (1989) -and Grevesse & Noels (1993). Asplund et al. (2021) have re- -cently confirmed with state-of-the-art 3D simulations the rela- -tively low metal abundances for the Sun. Asplund et al. (2021) -consider that their study yields the most reliable solar abun- -dances available today, suggesting that the solar modelling prob- -lem is no longer a problem of abundances but rather a problem +detail the long-standing eff orts to improve solar models. The solar + modelling problem refers to the discrepancy between helioseismology + and solar interior models that adopt low metallicities + predicted by the three-dimensional (3D) atmosphere models +of, for example, Asplund et al. (2009) and Ca ffau et al. (2011), +in contrast to the high metallicities based on previous literature + compilations by, for example, Anders & Grevesse (1989) +and Grevesse & Noels (1993). Asplund et al. (2021) have recently + confirmed with state-of-the-art 3D simulations the relatively + low metal abundances for the Sun. Asplund et al. (2021) +consider that their study yields the most reliable solar abundances + available today, suggesting that the solar modelling problem + is no longer a problem of abundances but rather a problem of stellar physics. The treatment of mixing below the convective -zone is one of the key processes that could improve solar mod- -els. Several studies indeed reveal that the process of convective -penetration, also called overshooting, at the bottom of the con- -vective envelope could play an important role in improving the +zone is one of the key processes that could improve solar models. + Several studies indeed reveal that the process of convective +penetration, also called overshooting, at the bottom of the convective + envelope could play an important role in improving the agreement between solar models and helioseismic constraints (see for example Christensen-Dalsgaard et al. 2011; Zhang et al. 2012; Buldgen et al. 2019b). Overshooting in solar models has -most often been treated using diff usive or instantaneous chemi- -cal mixing. A temperature gradient that sharply transitions from +most often been treated using diff usive or instantaneous chemical + mixing. A temperature gradient that sharply transitions from a nearly adiabatic form to a radiative form is usually assumed, as suggested by the theoretical work of Zahn (1991). Models with a smoother transition have also been investigated. Based -on the analysis of models with di ff erent stratifications near the -Send o ffprint requests to : I. Bara ffe base of the convective zone, Christensen-Dalsgaard et al. (2011) -found that models that better fit the helioseismic data have a +on the analysis of models with di ff erent stratifications near the +Send o ffprint requests to : I. Bara ffe base of the convective zone, Christensen-Dalsgaard et al. (2011) +found that models that better fit the helioseismic data have a weakly sub-adiabatic temperature gradient in the lower part of -the convective zone and a smooth transition to the radiative gra- -dient in the overshooting layer. But Christensen-Dalsgaard et al. -(2011) noted that the required temperature stratification is diffi- -cult to reconcile with existing overshooting models and numer- -ical simulations. They concluded that only non-local turbulent -convection models could produce the desired degree of smooth- -ness in the transition (see for example Zhang & Li 2012; Zhang +the convective zone and a smooth transition to the radiative gradient + in the overshooting layer. But Christensen-Dalsgaard et al. +(2011) noted that the required temperature stratification is difficult + to reconcile with existing overshooting models and numerical + simulations. They concluded that only non-local turbulent +convection models could produce the desired degree of smoothness + in the transition (see for example Zhang & Li 2012; Zhang et al. 2012). But these non-local models remain uncertain, and their description of overshooting under the conditions found at the base of the solar convective zone is yet to be validated. Zhang et al. (2019) explored the impact of overshooting by -introducing a parametrised turbulent kinetic energy flux based +introducing a parametrised turbulent kinetic energy flux based on a model with parameters that are adjusted to improve the helioseismic properties. They suggest that amelioration can be -obtained specifically below the convective envelope. However, -Zhang et al. (2019) find that this model cannot solve the whole -solar problem because such a flux worsens the sound-speed pro- -file in the deep radiative interior of their solar model. Given the -uncertainties regarding the temperature stratification of the over- -shooting region, solar modellers have considered these effects as -secondary and have focused their e fforts on exploring the impact +obtained specifically below the convective envelope. However, +Zhang et al. (2019) find that this model cannot solve the whole +solar problem because such a flux worsens the sound-speed profile + in the deep radiative interior of their solar model. Given the +uncertainties regarding the temperature stratification of the overshooting + region, solar modellers have considered these effects as +secondary and have focused their e fforts on exploring the impact of solar abundances, microphysics (opacities, equations of state, -nuclear reaction rates), and chemical mixing and diffusion (see +nuclear reaction rates), and chemical mixing and diffusion (see details and references in the review of Buldgen et al. 2019a). -Additional, more exotic e ff ects such as early disk accretion or +Additional, more exotic e ff ects such as early disk accretion or solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot 2021) are also attracting increasing attention. To reinvigorate the debate, Buldgen et al. (2019b) recently -highlighted once again how the transition of the temperature gra- -1arXiv:2201.00200v1 [astro-ph.SR] 1 Jan 2022 -Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem -dient just below the convective envelope can significantly impact -the disagreement between solar models and helioseismic con- -straints. Their results, based on a method that combines multi- -ple structural inversions, suggest that the transition in temper- -ature gradient is improperly reproduced by adopting either an -adiabatic or a radiative temperature gradient in the overshoot- -ing layer. The solution should be somewhere in between these +highlighted once again how the transition of the temperature gra1arXiv:2201.00200v1 + [astro-ph.SR] 1 Jan 2022 +Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem +dient just below the convective envelope can significantly impact +the disagreement between solar models and helioseismic constraints. + Their results, based on a method that combines multiple + structural inversions, suggest that the transition in temperature + gradient is improperly reproduced by adopting either an +adiabatic or a radiative temperature gradient in the overshooting + layer. The solution should be somewhere in between these two extremes. Christensen-Dalsgaard et al. (2018) also note that an increase in the temperature at the transition would remove a remaining small sharp dip in the speed of sound immediately -beneath the convective zone of the model. A major difficulty is -to disentangle the eff ects of overshoot from the e ff ects of opaci- -ties, which can also alter the temperature gradient in these layers. -Given the large number of parameters to deal with in order to im- -prove solar models and the current lack of strong arguments in -favour of modifying the thermal stratification in the overshoot- -ing layer, there has been no real motivation to deviate from the +beneath the convective zone of the model. A major difficulty is +to disentangle the eff ects of overshoot from the e ff ects of opacities, + which can also alter the temperature gradient in these layers. +Given the large number of parameters to deal with in order to improve + solar models and the current lack of strong arguments in +favour of modifying the thermal stratification in the overshooting + layer, there has been no real motivation to deviate from the traditional picture of a sharp transition as formalised by Zahn (1991). -The present work is motivated by arguments inspired by hy- -drodynamical simulations of convection and convective penetra- -tion in solar-like models. Recent hydrodynamical simulations by -Bara ffe et al. (2021, hereafter B21) highlight the process of local +The present work is motivated by arguments inspired by hydrodynamical + simulations of convection and convective penetration + in solar-like models. Recent hydrodynamical simulations by +Bara ffe et al. (2021, hereafter B21) highlight the process of local heating in the overshooting region due to penetrating convective motions across the convective boundary. In the following, we analyse the potential impact of this feature on one-dimensional (1D) stellar evolution structures in the context of solar models. -The hydrodynamical results of B21 are briefly summarised in +The hydrodynamical results of B21 are briefly summarised in Sect. 2, and their impact on 1D models are analysed in Sect. 3 and discussed in Sect. 4. -2. Modification of the thermal background in the +2. Modification of the thermal background in the overshooting layer: Results from two-dimensional hydrodynamical simulations -B21 performed two-dimensional (2D) fully compressible time- -implicit simulations of convection and convective penetration in +B21 performed two-dimensional (2D) fully compressible timeimplicit + simulations of convection and convective penetration in a solar-like model with the MUlti-dimensional Stellar Implicit -Code MUSIC (Viallet et al. 2011, 2016; Go ff rey et al. 2017). -The main motivation was to explore the impact of an artificial +Code MUSIC (Viallet et al. 2011, 2016; Go ff rey et al. 2017). +The main motivation was to explore the impact of an artificial increase in the stellar luminosity on the properties of convection and convective penetration. This procedure is a common tactic adopted in hydrodynamical simulations of convection (Rogers et al. 2006; Meakin & Arnett 2007; Brun et al. 2011; Hotta 2017; -Edelmann et al. 2019). The experiments of B21 highlight the im- -pact of penetrative downflows on the local thermal background -in the overshooting layer. They illustrate how convective down- -flows, when penetrating the region below the convective bound- -ary of the envelope, can induce a local heating and a modification +Edelmann et al. 2019). The experiments of B21 highlight the impact + of penetrative downflows on the local thermal background +in the overshooting layer. They illustrate how convective downflows, + when penetrating the region below the convective boundary + of the envelope, can induce a local heating and a modification of the temperature gradient as a result of compression and shear -in the overshooting layer. This modification of the local back- -ground is connected to a local increase in the radiative flux to -counterbalance the negative enthalpy flux (or heat flux) produced -by penetrating flows. The negative peak of the enthalpy flux -and the positive bump of the radiative flux below the convective -boundary are well-known features described in many numeri- -cal works (Hurlburt et al. 1986; Muthsam et al. 1995; Brummell +in the overshooting layer. This modification of the local background + is connected to a local increase in the radiative flux to +counterbalance the negative enthalpy flux (or heat flux) produced +by penetrating flows. The negative peak of the enthalpy flux +and the positive bump of the radiative flux below the convective +boundary are well-known features described in many numerical + works (Hurlburt et al. 1986; Muthsam et al. 1995; Brummell et al. 2002; Brun et al. 2011; Hotta 2017; K ¨ apyl ¨ a 2019; Cai 2020). A few works (Rogers et al. 2006; Viallet et al. 2013; Korre -et al. 2019; Higl et al. 2021) have also reported a modification +et al. 2019; Higl et al. 2021) have also reported a modification of the local thermal background in the overshooting region, but without providing a detailed description. The simulations of B21 provide a physical explanation that links the convective penetra- tion process to the local heating and to the radiative bump in the overshooting layer. The solar-like star simulated in B21 is based -on a model that is not thermally relaxed. It is reasonable to as- -sume that the local heating seen in B21 is present in stars because -the negative heat flux in the overshooting layer and the bump in -the radiative flux that compensates for this feature are persistent. -These two features are also commonly observed in other hydro- -dynamical simulations, as mentioned above. An exploration of +on a model that is not thermally relaxed. It is reasonable to assume + that the local heating seen in B21 is present in stars because +the negative heat flux in the overshooting layer and the bump in +the radiative flux that compensates for this feature are persistent. +These two features are also commonly observed in other hydrodynamical + simulations, as mentioned above. An exploration of the impact of this heating on stellar evolution models may reveal that heating is a necessary aspect of models for overshooting. -Fig. 1. Radial profile of the temperature departure ∆ T / T +Fig. 1. Radial profile of the temperature departure ∆ T / T 0 from -the initial profile T +the initial profile T 0 and of the sub-adiabaticity (∇ − ∇ ad ) close to -the convective boundary predicted by 2D hydrodynamical simu- -lations (B21) of solar-like models. The lower panel corresponds +the convective boundary predicted by 2D hydrodynamical simulations + (B21) of solar-like models. The lower panel corresponds to the model with a realistic stellar luminosity and the upper panel to a model with luminosity enhanced by a factor of ten. The dash-dotted red lines show ∆ T /T - 0 (in %), the relative dif- -ference between the time and space averages of the temperature, + 0 (in %), the relative difference + between the time and space averages of the temperature, T , and the initial temperature, T 0 . The solid blue lines show the time and space averages of the sub-adiabaticity (∇ − ∇ ad ). The -dashed black lines show the initial profile of the sub-adiabaticity, +dashed black lines show the initial profile of the sub-adiabaticity, ( ∇ − ∇ ad ) init . The convective boundary is indicated by the vertical solid line (see details in B21) -The behaviour of the thermal profile below the convective +The behaviour of the thermal profile below the convective boundary found in the simulations of B21 is illustrated in Fig. 1. It is displayed for the model with a realistic stellar luminosity -(lower panel). We also show the results for a model with an artifi- -cial enhancement in the luminosity by a factor of ten because the -features are intensified in these ‘boosted’ models (upper panel). -The figure shows the local heating in the overshooting layer and +(lower panel). We also show the results for a model with an artificial + enhancement in the luminosity by a factor of ten because the +features are intensified in these ‘boosted’ models (upper panel). +The figure shows the local heating in the overshooting layer and its impact on the sub-adiabaticity ( ∇ − ∇ ad ), with ∇ = d log T d log P the -2 -Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem + +Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem temperature gradient and ∇ ad = d log T d log P | S the adiabatic gradient. -The initial stratification below the convective boundary (located +The initial stratification below the convective boundary (located at r = 0 .6734 × R -star for this specific stellar model) is set by +star for this specific stellar model) is set by the stable radiative gradient, ∇ -rad (see the dashed black line be- -low the convective boundary in Fig. 1). B21 show that, as a re- -sult of the local heating below the convective boundary charac- -terised by the bump in temperature di fference ∆ T /T +rad (see the dashed black line below + the convective boundary in Fig. 1). B21 show that, as a result + of the local heating below the convective boundary characterised + by the bump in temperature di fference ∆ T /T 0 displayed in Fig. 1, the temperature gradient becomes less sub-adiabatic immediately below the convective boundary1 @@ -223,17 +223,17 @@ a temperature gradient that has an intermediate value between the radiative temperature gradient and the adiabatic one. In the next section we analyse the impact of this local heating on 1D solar structures by adopting a simple prescription that mimics -the behaviour of the temperature gradient suggested by hydro- -dynamical simulations. +the behaviour of the temperature gradient suggested by hydrodynamical + simulations. 3. Impact on one-dimensional solar structure models 3.1. Helioseismic constraints Our primary goal in this short paper is to illustrate the potential, -qualitative impact of the local heating produced by overshoot- -ing. We adopted a strategy inspired by the analysis of Buldgen +qualitative impact of the local heating produced by overshooting. + We adopted a strategy inspired by the analysis of Buldgen et al. (2020), who constructed a static structure of the Sun in agreement with seismic inversions of the Ledoux discriminant -defined by +defined by A = 1 Γ 1 d ln P @@ -241,29 +241,29 @@ d ln r − d ln ρ d ln r , (1) with Γ 1 = (∂ ln P/∂ ln ρ) - ad . Starting from a reference evolu- -tionary model, Buldgen et al. (2020) used an inversion pro- -cedure to iteratively reconstruct a solar model. Successive in- -versions of the Ledoux discriminant allowed them to obtain a -model-independent profile for this quantity. Their reconstruction -method also gives solar structures that are in excellent agree- -ment with other structural inversions, namely the entropy, S , the + ad . Starting from a reference evolutionary + model, Buldgen et al. (2020) used an inversion procedure + to iteratively reconstruct a solar model. Successive inversions + of the Ledoux discriminant allowed them to obtain a +model-independent profile for this quantity. Their reconstruction +method also gives solar structures that are in excellent agreement + with other structural inversions, namely the entropy, S , the square of the speed of sound, c2 s , and the density, ρ. To illustrate the convergence of their reconstruction procedure, they show -(right panels of their Figs. 3-6) the successive iterations that con- -verge to an excellent level of agreement for the four structural +(right panels of their Figs. 3-6) the successive iterations that converge + to an excellent level of agreement for the four structural inversions ( A , S , c2 s , ρ) starting from the initial reference model -adopted in their work. The diff erences found between the recon- -structed model and the reference model are useful as they indi- -cate the modifications of the reference model that are required to +adopted in their work. The diff erences found between the reconstructed + model and the reference model are useful as they indicate + the modifications of the reference model that are required to converge towards a solar model in agreement with helioseismic data. We recall here the major trends found by Buldgen et al. (2020) for the four structural quantities, which are used for our analysis in Sect. 3.2. -The first concerns the Ledoux discriminant. The major dis- -crepancy between the Sun and the reference model occurs just +The first concerns the Ledoux discriminant. The major discrepancy + between the Sun and the reference model occurs just below the convective boundary, with a large positive bump for the quantity ( A Sun - A @@ -276,18 +276,18 @@ s ,ref ) /c 2 s ,ref . The corrections applied to A during the reconstruction procedure also reduce the discrepancy in the speed of sound in the radiative region. -The third concerns the entropy. Large discrepancies are ob- -served in both the radiative region and the convective zone. The +The third concerns the entropy. Large discrepancies are observed + in both the radiative region and the convective zone. The 1 Less sub-adiabatic means that |∇ − ∇ ad | decreases compared to the -initial profile. entropy discrepancy (S +initial profile. entropy discrepancy (S Sun − S ref )/ S ref has two positive peaks in the radiative zone, one just below the overshooting region and a -larger peak deeper at ∼ 40% of the stellar radius. This discrep- -ancy is negative in the convective zone. The corrections applied +larger peak deeper at ∼ 40% of the stellar radius. This discrepancy + is negative in the convective zone. The corrections applied to A help reduce these entropy discrepancies in both regions. The fourth concerns the density. The quantity (ρ Sun − @@ -295,57 +295,57 @@ Sun − ref )/ρ ref has a negative peak in the radiative region, at ∼ 35% of the stellar radius, and is positive in the convective zone. -Importantly, Buldgen et al. (2020) mention that their recon- -struction procedure gives similar Ledoux discriminant profiles -for a wide range of initial reference models. We used these re- -sults to gauge whether the modifications of the thermal profile +Importantly, Buldgen et al. (2020) mention that their reconstruction + procedure gives similar Ledoux discriminant profiles +for a wide range of initial reference models. We used these results + to gauge whether the modifications of the thermal profile predicted by B21 can help in qualitatively improving all the structural quantities used by Buldgen et al. (2020). 3.2. Testing one-dimensional solar models Our main motivation is to show the potential impact of the local -heating described in Sect. 2 on stellar models. We are not aim- -ing in this short work at constructing the best solar model to fit +heating described in Sect. 2 on stellar models. We are not aiming + in this short work at constructing the best solar model to fit helioseismic constraints. Using stellar evolution codes, we have -adopted two di ff erent methods that can be found in the litera- -ture to construct solar models (e.g. Zhang et al. 2012; Vinyoles -et al. 2017). Our first method relies on the thermal relaxation +adopted two di ff erent methods that can be found in the literature + to construct solar models (e.g. Zhang et al. 2012; Vinyoles +et al. 2017). Our first method relies on the thermal relaxation of a reference model with solar radius and luminosity that is -modified to reproduce the temperature gradient in the overshoot- -ing layer suggested by hydrodynamical simulations. In this case, -the chemical abundances are not modified by nuclear reactions, -mixing, or microscopic diffusion during the relaxation process. +modified to reproduce the temperature gradient in the overshooting + layer suggested by hydrodynamical simulations. In this case, +the chemical abundances are not modified by nuclear reactions, +mixing, or microscopic diffusion during the relaxation process. For these tests, we used the 1D Lyon stellar evolution code -(Bara ff e et al. 1998). We repeated this experiment based on ther- -mal relaxation with the stellar evolution code MONSTAR (e.g. -Constantino et al. 2014) and obtained the same qualitative re- -sults. +(Bara ff e et al. 1998). We repeated this experiment based on thermal + relaxation with the stellar evolution code MONSTAR (e.g. +Constantino et al. 2014) and obtained the same qualitative results. + The second method considers models that account for the -modification of the temperature gradient in the overshooting +modification of the temperature gradient in the overshooting layer from the zero age main sequence (ZAMS). The models are then evolved until they reach the solar radius and luminosity. With this approach, changes in the chemical abundances from -nuclear reactions, microscopic diffusion, and overshooting mix- -ing are also consistent with any modification of the structure +nuclear reactions, microscopic diffusion, and overshooting mixing + are also consistent with any modification of the structure induced by the forced local heating in the overshooting layer. These tests were performed with MONSTAR as it includes the -treatment of microscopic di ff usion. -The first method allows the impact of local heating in +treatment of microscopic di ff usion. +The first method allows the impact of local heating in the overshooting layer after thermal relaxation to be isolated. -The second method provides evolutionary models that are self- -consistent since the eff ect of the modification of the temperature -gradient is accounted for during their evolution on the main se- -quence. -In the following, we adopt a modification of the local temper- -ature gradient in the overshooting layer that qualitatively repro- -duces the behaviour displayed in Fig. 1. We define an overshoot- -ing length d +The second method provides evolutionary models that are selfconsistent + since the eff ect of the modification of the temperature +gradient is accounted for during their evolution on the main sequence. + +In the following, we adopt a modification of the local temperature + gradient in the overshooting layer that qualitatively reproduces + the behaviour displayed in Fig. 1. We define an overshooting + length d ov = α ov H P, CB , with H P, CB the pressure scale height at the convective boundary and α -ov a free parameter. We also de- -fine two radial locations, r +ov a free parameter. We also define + two radial locations, r ov = r CB − d ov and r @@ -353,8 +353,8 @@ ov and r CB − d ov / 2, with r -CB the radial location of the convective boundary. The tem- -perature gradient is modified as follows. For r +CB the radial location of the convective boundary. The temperature + gradient is modified as follows. For r mid ≤ r < r CB , we use @@ -367,8 +367,8 @@ mid )/ (r CB − r mid )]a × π/ 2} . (3) -3 -Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem + +Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem For r ov ≤ r < r mid , we use @@ -381,285 +381,285 @@ mid − r ) /( r mid − r ov )] × π } . (5) Sine functions are used in Eqs. (3) and (5) to reproduce the -smooth variations in the temperature gradient below the convec- -tive boundary produced by the hydrodynamical simulations. We -have verified that the results are insensitive to the smoothness of -these variations and to the exact shape of the temperature gra- -dient radial profile.We adopted a =0.3 in Eq. (3) as it provides a -behaviour for the temperature gradient very close to the one dis- -played in Fig. 1. Results are rather insensitive to variations in the +smooth variations in the temperature gradient below the convective + boundary produced by the hydrodynamical simulations. We +have verified that the results are insensitive to the smoothness of +these variations and to the exact shape of the temperature gradient + radial profile.We adopted a =0.3 in Eq. (3) as it provides a +behaviour for the temperature gradient very close to the one displayed + in Fig. 1. Results are rather insensitive to variations in the values of a between 0.2 and 0.4. We adopted b=0.03 in Eq. (5), which also provides a close visual match to the hydrodynamical results, but we note that the results are insensitive to the value of b. 3.2.1. Thermal equilibrium models -The details of the procedure for the first method are the follow- -ing. We calculate the evolution of a 1 M +The details of the procedure for the first method are the following. + We calculate the evolution of a 1 M model with an initial -helium mass fraction of 0.28, metallicity Z = 0. 02 , and a mix- -ing length l +helium mass fraction of 0.28, metallicity Z = 0. 02 , and a mixing + length l mix = 1 .9 H P . We use a reference model that is in thermal equilibrium 2 and has the luminosity and radius of the -current Sun. Starting from this reference model, the tempera- -ture gradient is modified over a prescribed depth to mimic the -impact of overshooting according to the hydrodynamical sim- -ulations described in Sect. 2. We adopt the prescription given +current Sun. Starting from this reference model, the temperature + gradient is modified over a prescribed depth to mimic the +impact of overshooting according to the hydrodynamical simulations + described in Sect. 2. We adopt the prescription given by Eqs. (2)-(5) over a distance d -ov below the convective bound- -ary. We show the results in Fig. 2 for α +ov below the convective boundary. + We show the results in Fig. 2 for α ov = 0.15 and α ov = 0.20. -These overshooting widths are in good agreement with the maxi- -mal depth reached by downflows below the convective boundary +These overshooting widths are in good agreement with the maximal + depth reached by downflows below the convective boundary predicted by the hydrodynamical simulations for the solar-like model investigated in B21. We note that the stellar model used in B21 is slightly under-luminous compared to the Sun (see B21 for details). B21 also mention that one should be cautious when -directly applying the overshooting depths predicted by their sim- -ulations to real stars since the final relaxed state for these simula- -tions may have di fferent properties from non-thermally relaxed +directly applying the overshooting depths predicted by their simulations + to real stars since the final relaxed state for these simulations + may have di fferent properties from non-thermally relaxed states. We varied α - ov between 0.15 and 0.35 and find that the + ov between 0.15 and 0.35 and find that the results do not change qualitatively. However, the amplitude of the variations in the model properties depends on d - ov (see be- -low). As shown below, this simple prescription implemented in -a stellar evolution code yields a local increase in the tempera- -ture below the convective boundary, similar to that observed in + ov (see below). + As shown below, this simple prescription implemented in +a stellar evolution code yields a local increase in the temperature + below the convective boundary, similar to that observed in the hydrodynamical simulations. We stress that Eqs. (2)-(5) have been chosen for simplicity. They are only a rough approximation -that can mimic the thermal profile behaviour suggested in the 2D +that can mimic the thermal profile behaviour suggested in the 2D simulations. -The model with a modified temperature gradient is then ther- -mally relaxed, that is to say, it is evolved over many thermal -timescales without any modification of the abundances from nu- -clear reactions until thermal equilibrium is reached. The temper- -ature gradient is modified in the overshooting layer during the +The model with a modified temperature gradient is then thermally + relaxed, that is to say, it is evolved over many thermal +timescales without any modification of the abundances from nuclear + reactions until thermal equilibrium is reached. The temperature + gradient is modified in the overshooting layer during the whole relaxation process, and this is referred to as a ‘forced local -heating’. This procedure ensures that the model with a modified -temperature gradient can be consistently compared to the refer- -ence model. As shown in Fig. 2, the simple prescription given +heating’. This procedure ensures that the model with a modified +temperature gradient can be consistently compared to the reference + model. As shown in Fig. 2, the simple prescription given 2 Thermal equilibrium means that the total nuclear energy produced in the central regions balances the radiative losses at the surface, i.e. the total nuclear luminosity, L -nuc , equals the total stellar luminosity, L . by Eqs. (2)-(5) yields similar qualitative changes in the temper- -ature and the sub-adiabaticity close to the convective boundary +nuc , equals the total stellar luminosity, L . by Eqs. (2)-(5) yields similar qualitative changes in the temperature + and the sub-adiabaticity close to the convective boundary that was found in the hydrodynamical simulations of B21. -Fig. 2. Radial profile of the temperature difference and of the -sub-adiabaticity of a 1D solar-like structure with a modified tem- -perature gradient in the overshooting layer according to Eqs. -(2)-(5). The temperature gradient is modified over a distance +Fig. 2. Radial profile of the temperature difference and of the +sub-adiabaticity of a 1D solar-like structure with a modified temperature + gradient in the overshooting layer according to Eqs. +(2)-(5). The temperature gradient is modified over a distance d ov = α ov H P,CB , with α ov =0.15 in the lower panel and α ov =0.20 -in the upper panel. The dash-dotted red lines show the percent- -age relative temperature di ff erence, ∆ T / T +in the upper panel. The dash-dotted red lines show the percentage + relative temperature di ff erence, ∆ T / T ref , with ∆ T = T − T ref . The solid blue lines correspond to the sub-adiabaticity ( ∇ − ∇ ad ). -The dashed black lines show the sub-adiabaticity of the refer- -ence model. The convective boundary is indicated by the vertical +The dashed black lines show the sub-adiabaticity of the reference + model. The convective boundary is indicated by the vertical solid line. The vertical dashed line in each panel is located at a distance d ov below the convective boundary. -The impact on the whole stellar structure was quantified by +The impact on the whole stellar structure was quantified by comparing the four structural quantities ( A , S , c 2 s , ρ) between the -modified and the reference model. The results are displayed in -Fig. 3, with ∆ X defined as ( X − X +modified and the reference model. The results are displayed in +Fig. 3, with ∆ X defined as ( X − X ref ) for any structural quantity X . -The forced local heating in the overshooting layer produces sim- -ilar positive peaks for ∆ A, ∆ S , and ∆ c 2 -s , as found for the temper- -ature. The modification thus provides the correction required to +The forced local heating in the overshooting layer produces similar + positive peaks for ∆ A, ∆ S , and ∆ c 2 +s , as found for the temperature. + The modification thus provides the correction required to improve the discrepancy for the Ledoux discriminant described -in the first of the trends outlined in Sect. 3.1. Unsurprisingly, -such a modification of the temperature gradient is expected to +in the first of the trends outlined in Sect. 3.1. Unsurprisingly, +such a modification of the temperature gradient is expected to improve the agreement with helioseismic constraints and help -4 -Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem -remove the sound speed anomaly below the convective bound- -ary (second trend in Sect. 3.1), as suggested by the results of + +Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem +remove the sound speed anomaly below the convective boundary + (second trend in Sect. 3.1), as suggested by the results of Christensen-Dalsgaard et al. (2011). But it is also interesting to -note that such a modification yields a slight cooling of the con- -vective zone (see Fig. 2) and thus a negative di ff erence for the -entropy (see Fig. 3). A negative di fference in the convective en- -velope is in agreement with the correction required for the ref- -erence model of Buldgen et al. (2020) to better match the Sun -(see third trend in Sect. 3.1). Regarding the density, the modifi- -cation of the temperature gradient has an interesting impact in +note that such a modification yields a slight cooling of the convective + zone (see Fig. 2) and thus a negative di ff erence for the +entropy (see Fig. 3). A negative di fference in the convective envelope + is in agreement with the correction required for the reference + model of Buldgen et al. (2020) to better match the Sun +(see third trend in Sect. 3.1). Regarding the density, the modification + of the temperature gradient has an interesting impact in the radiative zone, with a large decrease in the density compared to the reference model over a broad region below the convective boundary. The impact on the density in the convective region for -this specific model is partly in agreement with the correction re- -quired for this quantity in the Buldgen et al. (2020) study, with a -positive diff erence found only in the upper part of the convective +this specific model is partly in agreement with the correction required + for this quantity in the Buldgen et al. (2020) study, with a +positive diff erence found only in the upper part of the convective envelope (see the fourth trend in Sect. 3.1). -These trends are insensitive to the depth over which the tem- -perature gradient is modified. Increasing the depth increases the -magnitude of the di ff erences but has no impact on their sign. We -find that the maximum variation in the model properties, such as +These trends are insensitive to the depth over which the temperature + gradient is modified. Increasing the depth increases the +magnitude of the di ff erences but has no impact on their sign. We +find that the maximum variation in the model properties, such as the speed of sound, ∆ c2 s / c2 s , ref , roughly scales with d 2 -ov . This scal- -ing is linked to the integrated area between the modified temper- -ature gradient curve and the one for the reference (non-modified) +ov . This scaling + is linked to the integrated area between the modified temperature + gradient curve and the one for the reference (non-modified) temperature gradient, which roughly decreases linearly with r . This area is proportional to the square of the overshooting depth, -and consequently, the maximum variation in the model proper- -ties is also proportional to d 2 +and consequently, the maximum variation in the model properties + is also proportional to d 2 ov . The qualitative trends also remain the same whether overshooting mixing in the reference model is ignored or included using a step function (with instantaneous -mixing) or an exponential decay for the diff usion coefficient (e.g. +mixing) or an exponential decay for the diff usion coefficient (e.g. Freytag et al. 1996). 3.2.2. Self-consistent evolutionary models -For the tests based on the second method, we ran di ff erent sets -of models with diff erent combinations of assumptions, including -or not microscopic diff usion and with or without overshooting -mixing. When overshooting mixing was included in the over- -shooting layer, it was based either on a step function or on an -exponential decay for the di ffusion coe ffi cient. Microscopic dif- -fusion for H and He was implemented according to Thoul et al. -(1994). For these tests, the temperature gradient was modified +For the tests based on the second method, we ran di ff erent sets +of models with diff erent combinations of assumptions, including +or not microscopic diff usion and with or without overshooting +mixing. When overshooting mixing was included in the overshooting + layer, it was based either on a step function or on an +exponential decay for the di ffusion coe ffi cient. Microscopic diffusion + for H and He was implemented according to Thoul et al. +(1994). For these tests, the temperature gradient was modified according to Eqs. (2)-(5). All models start from the ZAMS and are evolved until they reach the solar radius and luminosity at the same age. This was achieved by making small adjustments to the mixing length, l -mix . The models with temperature gradient mod- -ifications were compared to the relevant reference model, which -has no modification of the temperature gradient but everything -else is the same (i.e. the same treatment of microscopic diff u- -sion and of overshooting mixing). The evolutionary models with -temperature gradient modifications are thus self-consistent. The -main di fference between this approach and the one in the previ- -ous section is that these models accumulate small di ff erences in, -for example, central H abundance when compared to their ref- -erence model. These tests produce the same trends in the over- -shooting layer as found for the tests based on the first method +mix . The models with temperature gradient modifications + were compared to the relevant reference model, which +has no modification of the temperature gradient but everything +else is the same (i.e. the same treatment of microscopic diff usion + and of overshooting mixing). The evolutionary models with +temperature gradient modifications are thus self-consistent. The +main di fference between this approach and the one in the previous + section is that these models accumulate small di ff erences in, +for example, central H abundance when compared to their reference + model. These tests produce the same trends in the overshooting + layer as found for the tests based on the first method (Sect. 3.2.1), independently of the treatment of overshooting -mixing and whether microscopic di ffusion is included or not. -In the convective zone, all models give a positive di ff erence for -the density between the model with a modified temperature gra- -dient and the relevant reference model. For the other quantities +mixing and whether microscopic di ffusion is included or not. +In the convective zone, all models give a positive di ff erence for +the density between the model with a modified temperature gradient + and the relevant reference model. For the other quantities ( S , c2 -s ), the diff erences in the convective zone are very sensitive Fig. 3. Di fference of various structural quantities between a -model with a modified temperature gradient in the overshoot- -ing layer and a reference model calculated with the Lyon stellar -evolution code. The temperature gradient in the modified model +s ), the diff erences in the convective zone are very sensitive Fig. 3. Di fference of various structural quantities between a +model with a modified temperature gradient in the overshooting + layer and a reference model calculated with the Lyon stellar +evolution code. The temperature gradient in the modified model is changed over a distance d ov = α ov H - P, CB below the convec- -tive boundary (indicated by the vertical solid line). The lower + P, CB below the convective + boundary (indicated by the vertical solid line). The lower panel shows the results for α ov = 0. 15 and the upper panel for α ov = 0. 20. -to the assumptions regarding whether overshooting mixing is in- -cluded or not. But at least we find solutions that are compatible +to the assumptions regarding whether overshooting mixing is included + or not. But at least we find solutions that are compatible with the four trends found by Buldgen et al. (2020) for the four structural quantities. This is illustrated in Fig. 4 with a model -that accounts for step function overshooting mixing over a dis- -tance d +that accounts for step function overshooting mixing over a distance + d ov = 0 .15 H P, CB (lower panel) and d ov = 0. 20 H P, CB (upper panel). 4. Conclusion -The tests performed in Sect. 3 are based on di ff erent methods +The tests performed in Sect. 3 are based on di ff erent methods (relaxed models versus consistent evolution) that can be used to construct solar models. Independently of the method used, the -tests show that a local increase in the temperature in the over- -shooting region due to convective penetration provides the quali- -tative e ffects required to improve the speed of sound discrepancy +tests show that a local increase in the temperature in the overshooting + region due to convective penetration provides the qualitative + e ffects required to improve the speed of sound discrepancy below the convective boundary. This discrepancy is persistent in -5 -Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem -Fig. 4. Di fference of various structural quantities between a -modified model and a reference model calculated with the + +Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem +Fig. 4. Di fference of various structural quantities between a +modified model and a reference model calculated with the MONSTAR stellar evolution code. The reference model is -evolved from the ZAMS with microscopic di ff usion and step +evolved from the ZAMS with microscopic di ff usion and step function overshooting mixing over a distance d ov = α ov H - P,CB be- -low the convective boundary. The lower panel shows the results + P,CB below + the convective boundary. The lower panel shows the results for α ov = 0. 15 and the upper panel for α ov = 0. 20. The models -with a modified temperature gradient in the overshooting layer -(same microscopic diffusion and overshooting mixing treatment +with a modified temperature gradient in the overshooting layer +(same microscopic diffusion and overshooting mixing treatment as the reference model) are evolved similarly from the ZAMS. The convective boundary is indicated by the vertical solid line. solar models that use low solar metal abundances. This is not -surprising because an increase in the temperature in this spe- -cific region has previously been invoked in the literature to solve +surprising because an increase in the temperature in this specific + region has previously been invoked in the literature to solve this problem, as mentioned in Sect. 1. However, the details of the physical process responsible for this local heating have been lacking, whereas we can now suggest an explanation based on -the B21 results. The trends that we find for the four structural +the B21 results. The trends that we find for the four structural quantities ( A, S , c2 -s , ρ) are robust below the convective bound- -ary and in a large fraction of the radiative core, independently of -the treatment of mixing and di ffusion and of the method for con- -structing the models in Sects. 3.2.1 and 3.2.2. Our experiments +s , ρ) are robust below the convective boundary + and in a large fraction of the radiative core, independently of +the treatment of mixing and di ffusion and of the method for constructing + the models in Sects. 3.2.1 and 3.2.2. Our experiments additionally show that such a local change in the temperature, -despite being made over a very limited region below the convec- -tive boundary, can also aff ect the density, the entropy, and the speed of sound in the convective envelope after thermal relax- -ation or evolution on the main sequence. How these quantities -are affected in the convective envelope compared to a reference +despite being made over a very limited region below the convective + boundary, can also aff ect the density, the entropy, and the speed of sound in the convective envelope after thermal relaxation + or evolution on the main sequence. How these quantities +are affected in the convective envelope compared to a reference model with no local heating depends on the strategy for building solar models and on the treatment of overshooting mixing. This mixing is obviously linked to the local heating given that both result from the same dynamical process. A combined testing of -both eff ects in stellar models could provide more constraints on +both eff ects in stellar models could provide more constraints on the general process of overshooting. -Increasingly, eff orts are now devoted to characterising the +Increasingly, eff orts are now devoted to characterising the process of convective boundary mixing in stellar models based on multi-dimensional hydrodynamical simulations. More work is required to obtain reliable determinations of an overshooting depth and to describe quantitatively the mixing and impact on -the temperature gradient. Understanding the e ff ects of rotation -and magnetic fields on overshooting is a significantly more dif- -ficult theoretical and numerical problem to address; however, -eff orts to study these combined non-linear e ff ects are ongoing -(Hotta 2017; Korre et al. 2021). Despite the limitations of ex- -isting hydrodynamical simulations, they are already providing +the temperature gradient. Understanding the e ff ects of rotation +and magnetic fields on overshooting is a significantly more difficult + theoretical and numerical problem to address; however, +eff orts to study these combined non-linear e ff ects are ongoing +(Hotta 2017; Korre et al. 2021). Despite the limitations of existing + hydrodynamical simulations, they are already providing constraints on physical processes usually treated with several free parameters in 1D stellar evolution models. They can thus -limit the degrees of freedom in a problem as complex as so- -lar modelling. Our primary goal in this work is to highlight the -potential impact of convective penetration on the thermal back- -ground in the overshooting region. The processes studied in B21 +limit the degrees of freedom in a problem as complex as solar + modelling. Our primary goal in this work is to highlight the +potential impact of convective penetration on the thermal background + in the overshooting region. The processes studied in B21 that produce a local change in the temperature gradient are also -responsible for the mixing in this region. Because much observa- -tional evidence points towards the need for extra mixing at con- -vective boundaries, for example lithium depletion in solar-like -stars (Baraff e et al. 2017), the size of convective cores (Claret +responsible for the mixing in this region. Because much observational + evidence points towards the need for extra mixing at convective + boundaries, for example lithium depletion in solar-like +stars (Baraff e et al. 2017), the size of convective cores (Claret & Torres 2016), and colour-magnitude diagrams (Castro et al. 2014), solar modellers often include this extra mixing in their -models. But a consistent approach should also require account- -ing for a local change in the temperature gradient. The impact of +models. But a consistent approach should also require accounting + for a local change in the temperature gradient. The impact of this local heating goes in the right direction to improve not only -the discrepancies of solar models below the convective bound- -ary, but also in the convective envelope. This e ffect o ff ers an in- -teresting step forward for solving the solar modelling problem. +the discrepancies of solar models below the convective boundary, + but also in the convective envelope. This e ffect o ff ers an interesting + step forward for solving the solar modelling problem. In this exploratory work, we adopt a simple prescription for the local heating in the overshooting layer since the main goal is to highlight its qualitative impact on stellar models. However, -this eff ect should not be considered as another free parameter in -the solar modelling problem. Future multi-dimensional hydro- -dynamical simulations will enable this process, and its treatment +this eff ect should not be considered as another free parameter in +the solar modelling problem. Future multi-dimensional hydrodynamical + simulations will enable this process, and its treatment in 1D stellar evolution codes, to be better constrained. 5. Acknowledgements We thank our anonymous referee for valuable comments which @@ -672,27 +672,27 @@ of part of this work. The authors would like to acknowledge the use of the University of Exeter High-Performance Computing (HPC) facility ISCA and of the DiRAC Data Intensive service at Leicester, operated by the University of Leicester IT Services, -which forms part of the STFC DiRAC HPC Facility. The equip- -ment was funded by BEIS capital funding via STFC capital +which forms part of the STFC DiRAC HPC Facility. The equipment + was funded by BEIS capital funding via STFC capital grants ST/ K000373 /1 and ST / R002363 /1 and STFC DiRAC Operations grant ST/ R001014 / 1. DiRAC is part of the National e-Infrastructure. -6 -Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem + +Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem References Anders, E. & Grevesse, N. 1989, Geochim. Cosmochim. Acta, 53, 197 Asplund, M., Amarsi, A. M., & Grevesse, N. 2021, A&A, 653, A141 Asplund, M., Grevesse, N., Sauval, A. J., & Scott, P. 2009, ARA&A, 47, 481 -Bara ffe, I., Chabrier, G., Allard, F., & Hauschildt, P. H. 1998, A&A, 337, 403 -Bara ffe, I., Pratt, J., Goffrey, T., et al. 2017, ApJ, 845, L6 -Bara ffe, I., Pratt, J., Vlaykov, D. G., et al. 2021, A&A, 654, A126 +Bara ffe, I., Chabrier, G., Allard, F., & Hauschildt, P. H. 1998, A&A, 337, 403 +Bara ffe, I., Pratt, J., Goffrey, T., et al. 2017, ApJ, 845, L6 +Bara ffe, I., Pratt, J., Vlaykov, D. G., et al. 2021, A&A, 654, A126 Brummell, N. H., Clune, T. L., & Toomre, J. 2002, ApJ, 570, 825 Brun, A. S., Miesch, M. S., & Toomre, J. 2011, ApJ, 742, 79 Buldgen, G., Eggenberger, P., Baturin, V. A., et al. 2020, A&A, 642, A36 Buldgen, G., Salmon, S., & Noels, A. 2019a, Frontiers in Astronomy and Space Sciences, 6, 42 Buldgen, G., Salmon, S. J. A. J., Noels, A., et al. 2019b, A&A, 621, A33 -Ca ffau, E., Ludwig, H. G., Steffen, M., Freytag, B., & Bonifacio, P. 2011, +Ca ffau, E., Ludwig, H. G., Steffen, M., Freytag, B., & Bonifacio, P. 2011, Sol. Phys., 268, 255 Cai, T. 2020, ApJ, 888, 46 Castro, N., Fossati, L., Langer, N., et al. 2014, A&A, 570, L13 @@ -704,8 +704,8 @@ M. J. 2011, MNRAS, 414, 1158 Claret, A. & Torres, G. 2016, A&A, 592, A15 Constantino, T., Campbell, S., Gil-Pons, P., & Lattanzio, J. 2014, ApJ, 784, 56 Edelmann, P. V. F., Ratnasingam, R. P., Pedersen, M. G., et al. 2019, ApJ, 876, 4 -Freytag, B., Ludwig, H. G., & Ste ffen, M. 1996, A&A, 313, 497 -Go ffrey, T., Pratt, J., Viallet, M., et al. 2017, A&A, 600, A7 +Freytag, B., Ludwig, H. G., & Ste ffen, M. 1996, A&A, 313, 497 +Go ffrey, T., Pratt, J., Viallet, M., et al. 2017, A&A, 600, A7 Grevesse, N. & Noels, A. 1993, in Origin and Evolution of the Elements, ed. N. Prantzos, E. Vangioni-Flam, & M. Casse, 15–25 Higl, J., M ¨ @@ -723,8 +723,8 @@ Muthsam, H. J., Goeb, W., Kupka, F., Liebich, W., & Zoechling, J. 1995, A&A, 293, 127 Rogers, T. M., Glatzmaier, G. A., & Jones, C. A. 2006, ApJ, 653, 765 Thoul, A. A., Bahcall, J. N., & Loeb, A. 1994, ApJ, 421, 828 -Viallet, M., Bara ffe, I., & Walder, R. 2011, A&A, 531, A86 -Viallet, M., Go ffrey, T., Bara ffe, I., et al. 2016, A&A, 586, A153 +Viallet, M., Bara ffe, I., & Walder, R. 2011, A&A, 531, A86 +Viallet, M., Go ffrey, T., Bara ffe, I., et al. 2016, A&A, 586, A153 Viallet, M., Meakin, C., Arnett, D., & Moc ´ ak, M. 2013, ApJ, 769, 1 Vinyoles, N., Serenelli, A. M., Villante, F. L., et al. 2017, ApJ, 835, 202 @@ -733,4 +733,4 @@ Zhang, C., Deng, L., Xiong, D., & Christensen-Dalsgaard, J. 2012, ApJ, 759, L14 Zhang, Q. S. & Li, Y. 2012, ApJ, 746, 50 Zhang, Q.-S., Li, Y., & Christensen-Dalsgaard, J. 2019, ApJ, 881, 103 - 7 + \ No newline at end of file diff --git a/read/results/playa/2201.00201.txt b/read/results/playa/2201.00201.txt index 8d7c0a1..faed955 100644 --- a/read/results/playa/2201.00201.txt +++ b/read/results/playa/2201.00201.txt @@ -2,7 +2,7 @@ Astronomy & Astrophysics manuscript no. trabucchi_etal_2022_period_age_relation_ January 19, 2022 Letter to the E ditor The period-age relation of long-period variables -M. Trabucchi1, +M. Trabucchi1, , N. Mowlavi1 Department of Astronomy, University of Geneva, Ch. Pegasi 51, 1290 Versoix, Switzerland December 2021 @@ -15,10 +15,10 @@ which include Miras, in the period-age plane, and we compared it with observatio clusters. Results. In agreement with observations, models predict that the fundamental mode period decreases with increasing age because of the dominant role of mass in shaping stellar structure and evolution. At a given age, the period distribution shows a non-negligible -width and is skewed toward short periods, except for young C-rich stars. As a result, the period-age relations of O-rich and C- -rich models are predicted to have diff erent slopes. We derived best-fit relations describing age and initial mass as a function of the +width and is skewed toward short periods, except for young C-rich stars. As a result, the period-age relations of O-rich and Crich + models are predicted to have diff erent slopes. We derived best-fit relations describing age and initial mass as a function of the fundamental mode period for both O- and C-rich models. -Conclusions. The study confirms the power of the period-age relations to study populations of LPVs of specific types, either O-rich +Conclusions. The study confirms the power of the period-age relations to study populations of LPVs of specific types, either O-rich or C-rich, on statistical grounds. In doing so, it is recommended not to limit a study to Miras, which would make it prone to selection biases, but rather to include semi-regular variables that pulsate predominantly in the fundamental mode. The use of the relations to study individual LPVs, on the other hand, requires more care given the scatter in the period distribution predicted at any given age. @@ -27,87 +27,87 @@ clusters: general – Magellanic Clouds 1. Introduction Low- to intermediate-mass stars approach the end of their lives through the asymptotic giant branch (AGB) evolutionary phase, -during which they exhibit pulsations with timescales up to sev- -eral hundreds of days, and they are hence known as long-period +during which they exhibit pulsations with timescales up to several + hundreds of days, and they are hence known as long-period variables (LPVs). If their V -band amplitude exceeds 2.5 mag, -they are classified as Miras, which have a rather regular periodic- -ity and they are believed to pulsate only in the radial fundamen- -tal mode (FM). If their photometric amplitude is smaller, they +they are classified as Miras, which have a rather regular periodicity + and they are believed to pulsate only in the radial fundamental + mode (FM). If their photometric amplitude is smaller, they are known as semi-regular variables (SRVs), which are thought to be the progenitors of Miras. The name stems from the lesser degree of regularity of their light curves, likely due to the fact that they can pulsate in multiple modes simultaneously. The notion that younger LPVs tend to display longer periods compared to older ones, often referred to as the period-age (PA) -relation, is rooted in the empirical evidence from stellar kinemat- -ics in the solar neighborhood. The first such piece of evidence +relation, is rooted in the empirical evidence from stellar kinematics + in the solar neighborhood. The first such piece of evidence is probably due toMerrill(1923), who pointed out that M-type -LPVs increasingly lag behind the local standard of rest (i.e., pos- -sess a higher asymmetric drift) as their period decreases. Later -studies (as summarized byWyatt & Cahn1983) confirmed this -behavior (also using proper motion data, e.g.,Wilson & Mer- -rill1942), and showed that the shorter periods are also accom- -panied by a higher velocity dispersion. Furthermore, groups of +LPVs increasingly lag behind the local standard of rest (i.e., possess + a higher asymmetric drift) as their period decreases. Later +studies (as summarized byWyatt & Cahn1983) confirmed this +behavior (also using proper motion data, e.g.,Wilson & Merrill1942), + and showed that the shorter periods are also accompanied + by a higher velocity dispersion. Furthermore, groups of LPVs with relatively short periods are characterized by a greater scale height above the Galactic plane. This was shown, using for Corresponding author: M. Trabucchi -( michele.trabucchi@unige.ch) the first time the radial velocity of LPVs in the southern hemi- -sphere, byFeast(1963). In this seminal paper, Feast realized +( michele.trabucchi@unige.ch) the first time the radial velocity of LPVs in the southern hemisphere, + byFeast(1963). In this seminal paper, Feast realized that LPVs with shorter periods must be members of older stellar populations and emphasized their highly promising applications for both Galactic and extra-galactic studies over a wide range -of stellar ages. It should be noted that the PA relation is con- -nected with the existence of a period-metallicity relation (Lloyd +of stellar ages. It should be noted that the PA relation is connected + with the existence of a period-metallicity relation (Lloyd Evans & Menzies1973;Lloyd Evans1983b;Feast1981;Feast & Whitelock2000a, and references therein). A number of subsequent works have corroborated the PA relation on empirical grounds, or have exploited it to interpret observational results. Relevant examples are studies of LPVs in -globular clusters (e.g.,Feast1966;Lloyd Evans1983b;White- -lock1986), toward the galactic center and bulge (Lloyd Evans +globular clusters (e.g.,Feast1966;Lloyd Evans1983b;Whitelock1986), + toward the galactic center and bulge (Lloyd Evans 1976;Feast et al.1980;Whitelock et al.1991) or at high galactic -latitude (Jura & Kleinmann1992;Whitelock et al.1994). Of par- -ticular interest is the recent eff ort to extend the analysis of LPVs +latitude (Jura & Kleinmann1992;Whitelock et al.1994). Of particular + interest is the recent eff ort to extend the analysis of LPVs to dwarf galaxies in the Local Group (Menzies et al.2002,2008; Whitelock et al.2009;Menzies et al.2010,2011;Sakamoto et al. 2012;Battinelli & Demers2012,2013;Whitelock et al.2013; Menzies et al.2015). -The Hipparcos mission provided the means to refine the re- -sults on the period-kinematics connection. This was done by +The Hipparcos mission provided the means to refine the results + on the period-kinematics connection. This was done by Feast & Whitelock(2000b), who found evidence supporting the existence of a bar-like structure in the Bulge from the orbits of -local LPVs. A similar study dedicated to C-rich LPVs was per- -formed byFeast et al.(2006), who provided quantitative age +local LPVs. A similar study dedicated to C-rich LPVs was performed + byFeast et al.(2006), who provided quantitative age estimates for these stars. A summary of the main results and prospects emerging from these Hipparcos-era studies is given by Article number, page 1 of 9arXiv:2201.00201v2 [astro-ph.SR] 17 Jan 2022 A & A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs Feast(2007). More recently, the study of the Galaxy with LPVs has been stimulated by the wealth of data acquired by large-scale -surveys (e.g.,Catchpole et al.2016;Urago et al.2020), espe- -cially the Gaia mission (Grady et al.2019,2020). +surveys (e.g.,Catchpole et al.2016;Urago et al.2020), especially + the Gaia mission (Grady et al.2019,2020). It seems relevant that just a few years after the study ofFeast (1963),Kippenhahn & Smith(1969) predicted the PA relation -of classical Cepheids from stellar evolution and pulsation mod- -els. The theoretical modeling of Cepheids and of their period- -luminosity (PL) and PA relations is now an active field of re- -search (e.g.,Bono et al.2005;Anderson et al.2016;De Somma +of classical Cepheids from stellar evolution and pulsation models. + The theoretical modeling of Cepheids and of their periodluminosity + (PL) and PA relations is now an active field of research + (e.g.,Bono et al.2005;Anderson et al.2016;De Somma et al.2020). In contrast, when it comes to theoretical assessments -of the LPV PA relation, the literature is surprisingly scarce (espe- -cially in comparison with the significant e ff ort put into empirical -studies). In fact, we were able to identify only two relevant stud- -ies addressing this subject (Wyatt & Cahn1983;Eggen1998). -The discrepancy in period predictions between linear and nonlin- -ear pulsation models (e.g.,Ya’Ari & Tuchman1996;Lebzelter +of the LPV PA relation, the literature is surprisingly scarce (especially + in comparison with the significant e ff ort put into empirical +studies). In fact, we were able to identify only two relevant studies + addressing this subject (Wyatt & Cahn1983;Eggen1998). +The discrepancy in period predictions between linear and nonlinear + pulsation models (e.g.,Ya’Ari & Tuchman1996;Lebzelter & Wood2005;Trabucchi et al.2021b), and more generally the -di fficulty in modeling the structure of evolved red giants, likely +di fficulty in modeling the structure of evolved red giants, likely played a role in hampering the theoretical investigation of the PA relation of LPVs. -Motivated by the release of updated AGB evolutionary mod- -els (Pastorelli et al.2019,2020) and the availability of new, ac- -curate model predictions for the FM period of AGB stars (Tra- -bucchi et al.2019,2021b), we decided to investigate the nature +Motivated by the release of updated AGB evolutionary models + (Pastorelli et al.2019,2020) and the availability of new, accurate + model predictions for the FM period of AGB stars (Trabucchi + et al.2019,2021b), we decided to investigate the nature of the PA relation of LPVs on theoretical grounds. The adopted models and observed data are described in Sect.2, while in Sect.3we present the results, which are discussed in Sect.4. @@ -117,10 +117,10 @@ We summarize our conclusions in Sect.5. We employed PARSEC-COLIBRI isochrones (Marigo et al. 2017) with stellar evolutionary models fromPastorelli et al. (2019,2020) for the thermally pulsing asymptotic giant branch -(TP-AGB) phase, and from PARSEC (Bressan et al.2012, ver- -sion 1.2S) for the preceding evolution. The adopted set of -isochrones covers the range 0.001 to 0.016 in initial metal- -licity (Z +(TP-AGB) phase, and from PARSEC (Bressan et al.2012, version + 1.2S) for the preceding evolution. The adopted set of +isochrones covers the range 0.001 to 0.016 in initial metallicity + (Z i ), with a 0.001 step, while it spans the age interval 8 .00 ≤ log( τ/ yr) ≤ 10 . 45 with a step of 0.05. Since the AGB phase is short-lived, it only spans a small range of initial masses @@ -143,8 +143,8 @@ decreases as soon as the “bending radius” R becomes zero when the “saturation radius” R s > R b is reached -(i.e., the period becomes independent of radius). The exact val- -ues of R +(i.e., the period becomes independent of radius). The exact values + of R b and R s , as well as of the exponents, depend on the current mass ( M ). We assume that the FM is dominant if the @@ -155,7 +155,7 @@ et al.(2021b). 1 Hereinafter, whenever we discuss periods, it should be understood that we refer to FM periods on which this work is focused. 2.2. Data -As a first set of data, we considered the cluster-LPV pairs used +As a first set of data, we considered the cluster-LPV pairs used byGrady et al.(2019, see their tables 1 and 2). These consist of 19 clusters in the Large Magellanic Cloud, hosting a total of 20 potential LPV members, and eight Galactic clusters each hosting @@ -163,26 +163,26 @@ a potential LPV member. We expanded this list with data for LPVs in a few populous clusters, namely the Galactic clusters NGC 362, NGC 2808, 47 Tuc (NGC 104), and ω Cen (NGC 5139); the LMC clusters NGC -1978 and NGC 1846; and the cluster NGC 419 in the Small Mag- -ellanic Cloud (SMC). The source lists were taken fromLebzel- -ter & Wood(2005,2007,2011,2016) andKamath et al.(2010), -whose notation for the sources names is adopted here. After ex- -cluding the star LW3 in NGC 1846 and the star V129 in ω Cen, +1978 and NGC 1846; and the cluster NGC 419 in the Small Magellanic + Cloud (SMC). The source lists were taken fromLebzelter + & Wood(2005,2007,2011,2016) andKamath et al.(2010), +whose notation for the sources names is adopted here. After excluding + the star LW3 in NGC 1846 and the star V129 in ω Cen, which are unlikely cluster members (cf.Lebzelter & Wood2007, 2016), we reached a total of 203 sources. -The aforementioned studies also provide a lot of informa- -tion, possibly including J H K photometry, one or more periods, +The aforementioned studies also provide a lot of information, + possibly including J H K photometry, one or more periods, and a spectral type. In order to expand on the available data, -we crossmatched the selected sample with the Two Micron All- -Sky Survey (2MASS,Skrutskie et al.2006), the all-sky data -release of the Wide-field Infrared Survey Explorer (AllWISE, -Cutri et al.2013), the catalog of variable stars from the All- -Sky Automated Survey for SuperNovae (ASAS-SNJayasinghe +we crossmatched the selected sample with the Two Micron AllSky + Survey (2MASS,Skrutskie et al.2006), the all-sky data +release of the Wide-field Infrared Survey Explorer (AllWISE, +Cutri et al.2013), the catalog of variable stars from the AllSky + Automated Survey for SuperNovae (ASAS-SNJayasinghe et al.2020), the catalogs of LPVs in the Magellanic Clouds from the third phase of the Optical Gravitational Lensing Experiment (OGLE-III,Soszy ´ -nski et al.2009,2011), the early third data re- -lease from the Gaia mission ( Gaia EDR3,Gaia Collaboration +nski et al.2009,2011), the early third data release + from the Gaia mission ( Gaia EDR3,Gaia Collaboration et al.2021), and the catalog of LPV candidates from Gaia DR2 (Mowlavi et al.2018). FollowingGrady et al.(2019), we took ages from @@ -200,192 +200,192 @@ adopted byGrady et al.2019, in their Fig. 7). As discussed byKamath et al.(2010), the age of the SMC cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is consistent with the value τ = 1. 45 ± 0 .05 Gyr fromGoudfrooij -et al.(2014), while it is as young as τ 0. 89 ± 0. 015 Gyr ac- -cording toPerren et al.(2017). Since an accurate estimate is not +et al.(2014), while it is as young as τ 0. 89 ± 0. 015 Gyr according + toPerren et al.(2017). Since an accurate estimate is not necessary for our exploratory analysis, we took a rough average and assumed log( τ/ yr) = 9 .1 ± 0. 1. NGC 419 and NGC 1846 likely exhibit TP-AGB boosting (Girardi et al.2013). We note that some clusters show multiple stellar populations, whose age spread has been estimated in some cases (e.g.,Mackey & Broby -Nielsen2007;Joo & Lee2013;Villanova et al.2014) and is con- -sistent with the age uncertainties we adopted. +Nielsen2007;Joo & Lee2013;Villanova et al.2014) and is consistent + with the age uncertainties we adopted. Distances of Galactic clusters were also taken from Kharchenko et al.(2016), while for the Magellanic Clouds and their clusters we adopted the distance moduli µ LMC = 18 .49 ± 0. 09 mag and µ SMC = 18. 96 ± 0. 02 mag fromde Grijs et al. -(2017). We searched for data on interstellar extinction from sev- -eral literature works (e.g.,Nayak et al.2016;Kharchenko et al. +(2017). We searched for data on interstellar extinction from several + literature works (e.g.,Nayak et al.2016;Kharchenko et al. 2016;Perren et al.2017), all of which suggest that extinction in the K -s filter is smaller than ∼ 0 .1 mag for most of the clus- -ters we considered, and at most as large as ∼ 0 .3 mag, which is +s filter is smaller than ∼ 0 .1 mag for most of the clusters + we considered, and at most as large as ∼ 0 .3 mag, which is negligible for our purposes. Article number, page 2 of 9 Trabucchi et al.: The period-age relation of LPVs -A detailed membership verification is beyond the scope of +A detailed membership verification is beyond the scope of this work, and we relied on the checks performed by authors whose source lists we adopted. It should be kept in mind that some sources may not be real cluster members. -For sources without a spectral type, we used the Gaia- -2MASS diagram (Lebzelter et al.2018,2019) to determine -whether they are O- or C-rich. We used the near-infrared period- -luminosity diagram to identify the most likely pulsation mode -associated with each period of each observed source. We se- -lected only FM periods and rejected long secondary periods and +For sources without a spectral type, we used the Gaia2MASS + diagram (Lebzelter et al.2018,2019) to determine +whether they are O- or C-rich. We used the near-infrared periodluminosity + diagram to identify the most likely pulsation mode +associated with each period of each observed source. We selected + only FM periods and rejected long secondary periods and periods attributed to overtone mode pulsation. The details of -these classification steps are provided in AppendixA. Out of -203 sources from the initial list, we identified 95 LPVs pulsat- -ing in the FM, consisting of 40 C-rich and 55 O-rich sources. +these classification steps are provided in AppendixA. Out of +203 sources from the initial list, we identified 95 LPVs pulsating + in the FM, consisting of 40 C-rich and 55 O-rich sources. They consist of 29 Miras, 33 semi-regular variables, and 33 other sources (most likely LPVs) whose variability type has not been determined. We note that, with the exception of Gaia DR2, the -sources of variability data considered here do not report the un- -certainty associated with observed periods. However, since peri- -ods were derived in most cases from well-sampled, high-quality +sources of variability data considered here do not report the uncertainty + associated with observed periods. However, since periods + were derived in most cases from well-sampled, high-quality variability observations, relative period uncertainties are most likely negligible compared with those associated with age. 3. Results -Panel (a) of Fig.1shows a comparison between model predic- -tions and observations in the P +Panel (a) of Fig.1shows a comparison between model predictions + and observations in the P FM –log(τ/ yr) plane. The former are displayed by a density map showing the expected number N - FM of LPVs pulsating in the FM in each period-age bin, nor- -malized to maximum. Model predictions are in good agreement + FM of LPVs pulsating in the FM in each period-age bin, normalized + to maximum. Model predictions are in good agreement with data derived from observations (i.e., individual LPVs in -clusters, represented by symbols), and they show that the pe- -riod of LPVs pulsating in the FM decreases with increasing age. -Crosses mark the average properties of the three groups of C- -rich LPVs fromFeast et al.(2006, their table 4), which fit the +clusters, represented by symbols), and they show that the period + of LPVs pulsating in the FM decreases with increasing age. +Crosses mark the average properties of the three groups of Crich + LPVs fromFeast et al.(2006, their table 4), which fit the general pattern with the exception of their group 3, estimated to be older than what our models predict at P 650. -We also show a linear best-fit to the models distribution +We also show a linear best-fit to the models distribution (weighted by N FM ), which shows a fairly good agreement with -the best-fit to observations byGrady et al.(2019, also shown). -However, the best-fit line does not fully capture the properties +the best-fit to observations byGrady et al.(2019, also shown). +However, the best-fit line does not fully capture the properties of the predictions, nor of the observed trend. Indeed, models are -indicative of a substantial dispersion around the relation. For in- -stance, at 1 Gyr, the FM period ranges from ∼ 200 days to ∼ 550 +indicative of a substantial dispersion around the relation. For instance, + at 1 Gyr, the FM period ranges from ∼ 200 days to ∼ 550 days. Conversely, LPVs pulsating in the FM with a period of 350 days are predicted to be at least ∼200 Myr old, but they can be as old as ∼3 Gyr. Observed data are consistent with the predicted -spread, although the agreement cannot be considered as the ob- -served sample adopted is not complete. +spread, although the agreement cannot be considered as the observed + sample adopted is not complete. Nonetheless, it is relevant that some clusters host multiple LPVs, which are thus almost coeval, and they do span a wide -period range. Some of these clusters host multiple stellar popu- -lations that are believed to have formed over a time comparable -with the age uncertainties we adopted. This means that longer- -period (more massive) LPVs in these clusters probably lean to- -ward the lower age limit assumed for their host cluster, and the +period range. Some of these clusters host multiple stellar populations + that are believed to have formed over a time comparable +with the age uncertainties we adopted. This means that longerperiod + (more massive) LPVs in these clusters probably lean toward + the lower age limit assumed for their host cluster, and the opposite is true at shorter periods. This tends to strengthen the agreement between models and observations. Our data set samples the intermediate-age range (NGC 419 and NGC 1846) relatively well as well as old ages ( ω Cen, 47 -Tuc, NGC 362, and NGC 2808). This provides us with the op- -portunity to study the period distribution at these ages, and for -a more detailed comparison between models and observations. On the basis of the average age of these two groups of clus- -ters and the associated uncertainty, and taking the discrete age +Tuc, NGC 362, and NGC 2808). This provides us with the opportunity + to study the period distribution at these ages, and for +a more detailed comparison between models and observations. On the basis of the average age of these two groups of clusters + and the associated uncertainty, and taking the discrete age sampling of the isochrones into account, we considered the age -ranges log( τ/yr) = 9. 15 ± 0. 10 and log(τ/yr) = 10. 10 ± 0. 20. Pe- -riod distributions at those ages are displayed in panels (b) and (c) +ranges log( τ/yr) = 9. 15 ± 0. 10 and log(τ/yr) = 10. 10 ± 0. 20. Period + distributions at those ages are displayed in panels (b) and (c) of Fig.1, respectively, showing good agreement between model -predictions and observations. We note that in both cases, the dis- -tribution is skewed toward short periods, which seems to be true +predictions and observations. We note that in both cases, the distribution + is skewed toward short periods, which seems to be true at all ages for O-rich stars. This can be seen in panel (a) of Fig.2, -which is a version of the PA plane limited to an O-rich compo- -sition 2 +which is a version of the PA plane limited to an O-rich composition + 2 . Indeed, although at τ 5 Gyr the observed sample is very scarce, it appears to be consistent with models predicting a more densely populated region in the shorter-period half of the PA distribution. -The case of C-stars, shown in panel (b) of Fig.2, is diff er- -ent. They only form over a restricted range of initial masses +The case of C-stars, shown in panel (b) of Fig.2, is diff erent. + They only form over a restricted range of initial masses and ages, so their occurrence in a given stellar population is an age indicator on its own. Toward the low-mass (old age) side of the C-star regime, the behavior is similar to the O-rich case -with a concentration around relatively short periods. C-rich mod- -els tend to have a lower surface temperature and larger radii, -at a given mass, compared to O-rich models, and thus they at- -tain longer periods more easily. This occurs in particular toward -higher masses, so that younger C-rich models are more concen- -trated at longer periods, leading to a steeper PA relation com- -pared with the O-rich case. These predictions agree with ob- -servations on the old side of the period distribution, while the +with a concentration around relatively short periods. C-rich models + tend to have a lower surface temperature and larger radii, +at a given mass, compared to O-rich models, and thus they attain + longer periods more easily. This occurs in particular toward +higher masses, so that younger C-rich models are more concentrated + at longer periods, leading to a steeper PA relation compared + with the O-rich case. These predictions agree with observations + on the old side of the period distribution, while the scarcity of C stars at τ 0 .6 Gyr prevents us from performing a comparison at younger ages. -In appendixB, we provide analytic PA relations by fitting the +In appendixB, we provide analytic PA relations by fitting the high-density parts of the O- and C-rich models’ distribution. We emphasize that, because of the large scatter of the relation, ages estimated in this way for individual LPVs are bound to be highly uncertain. As a way to assess the error in age determination, we -also provide analytic best-fit relations to the boundaries of the +also provide analytic best-fit relations to the boundaries of the PA distribution of the models in the appendix. These relations are displayed in Fig.2. 4. Discussion -In general agreement with observations, models confirm that +In general agreement with observations, models confirm that LPVs pulsating predominantly in the FM follow a PA relation, which exhibits a non-negligible dispersion. Thanks to the newly -available nonlinear period predictions, we were able to better ex- -amine the nature of this relation and the origin of its scatter. +available nonlinear period predictions, we were able to better examine + the nature of this relation and the origin of its scatter. The PA relation is intimately connected with the PL relation, both patterns emerging because of the prominent role of mass in -shaping stellar structure and evolution. Indeed, stellar mass de- -termines the lifetimes of the main evolutionary stages, and thus -the age of stars in the AGB phase. Pulsation models (Trabuc- -chi et al.2021b) show that the radius R -dom ,0 (and correspond- -ing luminosity) at the onset of dominant FM pulsation (DFMP) +shaping stellar structure and evolution. Indeed, stellar mass determines + the lifetimes of the main evolutionary stages, and thus +the age of stars in the AGB phase. Pulsation models (Trabucchi + et al.2021b) show that the radius R +dom ,0 (and corresponding + luminosity) at the onset of dominant FM pulsation (DFMP) increases with mass, so that the most massive FM-dominated -LPVs are brighter. They also have longer periods, as this in- -creases with radius. In other words, the period, luminosity, and +LPVs are brighter. They also have longer periods, as this increases + with radius. In other words, the period, luminosity, and age near the tip of the AGB are all functions of initial stellar mass (at least to a good approximation). -We note that this would not be the case if the FM were dom- -inant along the entire AGB, as the large change in radius during +We note that this would not be the case if the FM were dominant + along the entire AGB, as the large change in radius during this phase would result in a wide range of periods at a given age. -It is the very fact that DFMP occurs only during the final portion +It is the very fact that DFMP occurs only during the final portion 2 A further version of the PA plane highlighting both chemical types can be found in Fig.A.2of appendixA.1. Article number, page 3 of 9 A & A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs Fig. 1. Period-age diagram. Panel (a) shows the predicted period-age distribution (darker tones indicate a higher expected number of LPVs on -a linear scale, normalized to maximum). Symbols represent observed LPVs (green: SRVs; purple: Miras; white: unclassified) with the shape +a linear scale, normalized to maximum). Symbols represent observed LPVs (green: SRVs; purple: Miras; white: unclassified) with the shape indicating their host cluster or literature source as indicated in the legend. The age uncertainties are marked by the error bars. The groups of -galactic C-stars ofFeast et al.(2006) are marked by crosses annotated with the group number. The solid and dotted line represent a linear best-fit -to models and the best-fit byGrady et al.(2019), respectively. Period distributions at selected ages are compared in panels (b) and (c) and marked -in panel (a) by the blue and red shaded areas (at log( τ/yr) ∼ 9. 15 and ∼ 10. 10, respectively). For clarity, the eff ect of the TP-AGB boosting is +galactic C-stars ofFeast et al.(2006) are marked by crosses annotated with the group number. The solid and dotted line represent a linear best-fit +to models and the best-fit byGrady et al.(2019), respectively. Period distributions at selected ages are compared in panels (b) and (c) and marked +in panel (a) by the blue and red shaded areas (at log( τ/yr) ∼ 9. 15 and ∼ 10. 10, respectively). For clarity, the eff ect of the TP-AGB boosting is suppressed in panel (a). -Fig. 2. Similar to Fig.1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while -dashed lines are best fits to the edges of the model distribution (see the text for more details). +Fig. 2. Similar to Fig.1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while +dashed lines are best fits to the edges of the model distribution (see the text for more details). of the AGB that limits the range of periods a FM-pulsating LPV can have at a given age. Yet, the DFMP part of the AGB is long -enough for significant variations in radius to occur, which result +enough for significant variations in radius to occur, which result in the dispersion of the PA relation seen in Fig.1. At a given initial metallicity Z -i , the shape of the period dis- -tribution primarily results from the fact that, throughout the TP- -AGB (the stage during which the FM is normally excited), the -envelope expansion accelerates, while the period becomes pro- -gressively less sensitive to changes in radius (see AppendixC). +i , the shape of the period distribution + primarily results from the fact that, throughout the TPAGB + (the stage during which the FM is normally excited), the +envelope expansion accelerates, while the period becomes progressively + less sensitive to changes in radius (see AppendixC). In particular, the slope of the period-radius relation decreases sharply at P b = P(R b ). The FM period distribution is roughly symmetric around that value, but at its short-period side, the FM is not dominant. Therefore, when only FM-dominated LPVs are -considered, as is done here, the observed period distribution ap- -pears skewed toward short periods. This feature is strengthened when a set of isochrones is con- -sidered which spans a range of initial metallicities because the +considered, as is done here, the observed period distribution appears + skewed toward short periods. This feature is strengthened when a set of isochrones is considered + which spans a range of initial metallicities because the adopted criterion for the onset of DFMP does not depend on metallicity, but the FM period does as metal-poor LPVs are warmer and have smaller radii compared with metal-rich ones. -As a consequence, the bulk of the period distribution of metal- -poor LPVs is at periods shorter than P +As a consequence, the bulk of the period distribution of metalpoor + LPVs is at periods shorter than P b , so they only contribute to the global distribution (i.e., at all Z i at a given age) over a @@ -395,44 +395,44 @@ periods well beyond P b , so they contribute both at that value and at longer periods. The result is an excess of FM-dominated LPVs near P -b , that is to say on the short side of the overall period dis- -tribution. +b , that is to say on the short side of the overall period distribution. + We note that, in contrast with the prescription we adopted, -the onset of DFMP in reality is probably sensitive to metallic- -Article number, page 4 of 9 +the onset of DFMP in reality is probably sensitive to metallicArticle + number, page 4 of 9 Trabucchi et al.: The period-age relation of LPVs -ity. While the good degree of agreement with observations sug- -gests that the dependence is weak at most, it is possible for +ity. While the good degree of agreement with observations suggests + that the dependence is weak at most, it is possible for any discrepancy to be smeared out by the fact that our set of -isochrone implicitly assumes a flat star-formation rate with no +isochrone implicitly assumes a flat star-formation rate with no age-metallicity relation, so it is not an accurate representation of any realistic stellar environment. In this sense, the PA relation is environment-dependent, and it is not necessarily universal. A further point of uncertainty stems from the fact that the -prescription we adopted assumes that the FM period only de- -pends upon the mass and radius, and that it is a ffected by a -change in composition only through the eff ect that such a varia- -tion has on the radius. While this is true to a good approximation, -linear models show a small dependence of periods on metallic- -ity at a fixed mass and radius, but the quantitative impact in the +prescription we adopted assumes that the FM period only depends + upon the mass and radius, and that it is a ffected by a +change in composition only through the eff ect that such a variation + has on the radius. While this is true to a good approximation, +linear models show a small dependence of periods on metallicity + at a fixed mass and radius, but the quantitative impact in the nonlinear case is unknown. We can only estimate, based on the results ofTrabucchi et al.(2019), an uncertainty of ±10% at most with respect to the prescriptions adopted here. Qualitatively, a realistic age-metallicity relation and the metallicity dependence of the period and of the onset of DFMP are all expected to result in a steeper PA relation than the one -we predict, but it is di fficult to assess the relative importance of -these e ff ects. In this sense, the composition probably a ffects the +we predict, but it is di fficult to assess the relative importance of +these e ff ects. In this sense, the composition probably a ffects the shape of the PA relation more than its dispersion. The latter is -likely aff ected by the composition indirectly through mass loss, -the analysis of which is beyond the scope of this study. How- -ever, we point out that mass loss represents a source of scatter in -combination with the occurrence of thermal pulses, because it re- -duces the minimum radius for the onset of DFMP. Thus, during +likely aff ected by the composition indirectly through mass loss, +the analysis of which is beyond the scope of this study. However, + we point out that mass loss represents a source of scatter in +combination with the occurrence of thermal pulses, because it reduces + the minimum radius for the onset of DFMP. Thus, during the luminosity dips associated with thermal pulses, a LPV can -have a period shorter than the one it had when it first entered the -DFMP regime (see AppendixC). An additional source of uncer- -tainty, which we disregarded, is rotation (or other processes that +have a period shorter than the one it had when it first entered the +DFMP regime (see AppendixC). An additional source of uncertainty, + which we disregarded, is rotation (or other processes that induce extra mixing in the core) which causes a spread in ages at a given initial mass (cf.Anderson et al.2016, for the case of classical Cepheids). @@ -441,14 +441,14 @@ encourages the use of LPVs as age indicators, but the scatter of the PA relation hampers this application. We attempted to reduce the scatter through corrections involving photometric properties, as is customarily done for classical Cepheids with a color term -(e.g.,Bono et al.2005), but with unsatisfactory results. A correc- -tion dependent on the photometric amplitude of variability rep- -resents a promising alternative, but it cannot be pursued at the -moment. Indeed, for computational efficiency, current pulsation +(e.g.,Bono et al.2005), but with unsatisfactory results. A correction + dependent on the photometric amplitude of variability represents + a promising alternative, but it cannot be pursued at the +moment. Indeed, for computational efficiency, current pulsation models include only a crude treatment of the atmospheric layers -as they do not aff ect pulsation periods. On the other hand, the -atmosphere is crucial in determining the spectral energy distri- -bution and its variation throughout the pulsation cycle, and hence +as they do not aff ect pulsation periods. On the other hand, the +atmosphere is crucial in determining the spectral energy distribution + and its variation throughout the pulsation cycle, and hence the amplitude of variability. At the same time, the observational sample adopted here is too heterogeneous for a self-consistent investigation of amplitude, but this kind of study could be made @@ -460,96 +460,96 @@ Observatory. It is worth noting that our analysis applies to Miras as well as SRVs, provided that they predominantly pulsate in the FM. The limitation of PA relation studies to Miras, as has mainly -been done in literature so far, undoubtedly has some advan- -tages: to begin with, the fact that Miras are typically easier to +been done in literature so far, undoubtedly has some advantages: + to begin with, the fact that Miras are typically easier to detect than SRVs, and their light curves are easier to process as they tend to be more regular. Moreover, Miras represent the end-point of AGB evolution, so in principle they correspond to a smaller range of stellar parameters compared to the full extent of the DFMP regime, and they display a smaller range of periods at a given age (cf.Feast & Whitelock2000b). In other words, they should exhibit a relatively narrow PA relation (even though, -based on the observational data set we adopted, there is no con- -clusive evidence that considering only Miras reduces the scatter +based on the observational data set we adopted, there is no conclusive + evidence that considering only Miras reduces the scatter of the PA relation). Nonetheless, we caution against this approach as it is prone to introducing uncontrolled biases, as the traditional distinction between SRVs and Miras is arbitrary (seeTrabucchi et al.2021a, -and references therein). As such, it disregards the physical pro- -cesses at the origin of the range of amplitudes characterizing -LPVs. In particular, photometric amplitudes are largely deter- -mined by the formation and dissociation of molecules in the stel- -lar atmosphere, and they are likely to be metallicity-dependent. +and references therein). As such, it disregards the physical processes + at the origin of the range of amplitudes characterizing +LPVs. In particular, photometric amplitudes are largely determined + by the formation and dissociation of molecules in the stellar + atmosphere, and they are likely to be metallicity-dependent. It is therefore reasonable to assume that metal-poor (old) Mira -analogs might be classified as SRVs, thereby undermining the +analogs might be classified as SRVs, thereby undermining the potential application of the PA relation if restricted to Miras. This seems to be supported by the fact that the bulk of old LPVs -in our sample are classified as SRVs. Therefore, studies involv- -ing PA relations of LPVs would advantageously include both +in our sample are classified as SRVs. Therefore, studies involving + PA relations of LPVs would advantageously include both Miras and FM-pulsating SRVs. The challenge associated with SRVs stems from the fact that -they are often multiperiodic (even when predominantly pulsat- -ing in the FM), a property that complicates the light curve anal- -ysis and period extraction. At the same time, this feature could +they are often multiperiodic (even when predominantly pulsating + in the FM), a property that complicates the light curve analysis + and period extraction. At the same time, this feature could potentially improve age determinations as overtone modes are expected to display a PA relation as well. 5. Conclusions We used the results from recent nonlinear pulsation calculations -and combined them with state-of-the-art isochrone models to in- -vestigate the PA relation of FM-dominated LPVs, finding good -agreement with the distribution of observed LPVs in star clus- -ters. The theoretical PA relation displays a non-negligible scat- -ter, whose origin we identified due to the fact that, despite being +and combined them with state-of-the-art isochrone models to investigate + the PA relation of FM-dominated LPVs, finding good +agreement with the distribution of observed LPVs in star clusters. + The theoretical PA relation displays a non-negligible scatter, + whose origin we identified due to the fact that, despite being very brief, the portion of AGB evolution during which the FM becomes dominant shows a relatively large range in mass and radius at a given age. -The theoretical distribution of FM periods is roughly sym- -metric, but the FM is not dominant at the shortest periods. As a -result, models predict that the distribution of dominant FM peri- -ods at a given age is skewed toward short periods, in agreement +The theoretical distribution of FM periods is roughly symmetric, + but the FM is not dominant at the shortest periods. As a +result, models predict that the distribution of dominant FM periods + at a given age is skewed toward short periods, in agreement with observations. Depending on stellar populations, metallicity may enhance this feature as metal-poor LPVs, which tend to be warmer and more compact, only contribute near short periods. -We provide the best-fit PA relation separately for O-rich and +We provide the best-fit PA relation separately for O-rich and C-rich FM-pulsating LPVs. The latter LPVs show a steeper PA relation because of their lower surface temperatures, which allow them to reach longer periods more easily. Our analysis concerns all LPVs predominantly pulsating in -the FM, regardless of whether they are classified as Miras or +the FM, regardless of whether they are classified as Miras or SRVs. We discourage such a distinction in that it is arbitrary and prone to selection biases that risk compromising the use of LPVs as age indicators. -The main limitation in the use of the PA relation for age de- -terminations of individual LPVs stems from its relatively large -scatter. We suggest that corrective terms, involving the ampli- -tude of variability, might help to reduce this scatter and antici- -pate that upcoming data from ongoing and future surveys dedi- -cated to time-domain astronomy will be highly valuable to probe +The main limitation in the use of the PA relation for age determinations + of individual LPVs stems from its relatively large +scatter. We suggest that corrective terms, involving the amplitude + of variability, might help to reduce this scatter and anticipate + that upcoming data from ongoing and future surveys dedicated + to time-domain astronomy will be highly valuable to probe this possibility. A study of the impact of metallicity on nonlinear pulsation is highly desirable to pursue this line of investigation, Article number, page 5 of 9 A & A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs -as would be a theoretical investigation of the dependence of pho- -tometric amplitudes upon global stellar parameters. +as would be a theoretical investigation of the dependence of photometric + amplitudes upon global stellar parameters. Acknowledgements. M.T. and N.M. acknowledge the support provided by the Swiss National Science Foundation through grant Nr. 188697. We are grateful to the anonymous referee for the constructive comments that helped improving -this paper, and to Léo Girardi for helping with the computation and interpre- -tation of isochrones. This research has made use of: data from the OGLE-III -Catalog of Variable Stars; data products from the Two Micron All Sky Sur- -vey, which is a joint project of the University of Massachusetts and the In- -frared Processing and Analysis Center/ California Institute of Technology, funded -by the National Aeronautics and Space Administration and the National Sci- -ence Foundation; data from the European Space Agency (ESA) mission Gaia -(https://www.cosmos.esa.int/gaia ), processed by the Gaia Data Process- -ing and Analysis Consortium (DPAC, https://www.cosmos.esa.int/web/ -gaia/dpac/consortium ). Funding for the DPAC has been provided by na- -tional institutions, in particular the institutions participating in the Gaia Multi- -lateral Agreement. This research has made use of the following free / open source +this paper, and to Léo Girardi for helping with the computation and interpretation + of isochrones. This research has made use of: data from the OGLE-III +Catalog of Variable Stars; data products from the Two Micron All Sky Survey, + which is a joint project of the University of Massachusetts and the Infrared + Processing and Analysis Center/ California Institute of Technology, funded +by the National Aeronautics and Space Administration and the National Science + Foundation; data from the European Space Agency (ESA) mission Gaia +(https://www.cosmos.esa.int/gaia ), processed by the Gaia Data Processing + and Analysis Consortium (DPAC, https://www.cosmos.esa.int/web/ +gaia/dpac/consortium ). Funding for the DPAC has been provided by national + institutions, in particular the institutions participating in the Gaia Multilateral + Agreement. This research has made use of the following free / open source software and/ or libraries: the Starlink Tables Infrastructure Library (STILTS and Topcat,Taylor2006); IPython (Pérez & Granger2007) and Jupyter (Kluyver et al.2016) notebooks; the P ython libraries N umPy (Harris et al.2020), SciP y -(Virtanen et al.2020), matplotlib (a Python library for publication quality graph- -ics,Hunter2007), and A stropy (a community-developed core Python package +(Virtanen et al.2020), matplotlib (a Python library for publication quality graphics,Hunter2007), + and A stropy (a community-developed core Python package for Astronomy,Astropy Collaboration et al.2018). This research has made use of NASA’s Astrophysics Data System Bibliographic Services, and of the following services provided by CDS, Strasbourg: the SIMBAD data base, VizieR catalogue @@ -566,8 +566,8 @@ Battinelli, P. & Demers, S. 2012, A&A, 544, A10 Battinelli, P. & Demers, S. 2013, A&A, 553, A93 Baumgardt, H., Parmentier, G., Anders, P., & Grebel, E. K. 2013, MNRAS, 430, 676 -Boch, T., Pineau, F., & Derriere, S. 2012, in Astronomical Society of the Pa- -cific Conference Series, Vol. 461, Astronomical Data Analysis Software and +Boch, T., Pineau, F., & Derriere, S. 2012, in Astronomical Society of the Pacific + Conference Series, Vol. 461, Astronomical Data Analysis Software and Systems XXI, ed. P. Ballester, D. Egret, & N. P. F. Lorente, 291 Bonnarel, F., Fernique, P., Bienaymé, O., et al. 2000, A&AS, 143, 33 Bono, G., Marconi, M., Cassisi, S., et al. 2005, ApJ, 621, 966 @@ -575,18 +575,18 @@ Bressan, A., Marigo, P., Girardi, L., et al. 2012, MNRAS, 427, 127 Catchpole, R. M., Whitelock, P. A., Feast, M. W., et al. 2016, MNRAS, 455, 2216 Cutri, R. M., Wright, E. L., Conrow, T., et al. 2013, Explanatory Supplement -to the AllWISE Data Release Products, Explanatory Supplement to the All- -WISE Data Release Products +to the AllWISE Data Release Products, Explanatory Supplement to the AllWISE + Data Release Products de Grijs, R., Courbin, F., Martínez-Vázquez, C. E., et al. 2017, Space Sci. Rev., 212, 1743 De Somma, G., Marconi, M., Cassisi, S., et al. 2020, MNRAS, 496, 5039 Eggen, O. J. 1998, AJ, 115, 2435 -Feast, M. 2007, in Astronomical Society of the Pacific Conference Series, Vol. +Feast, M. 2007, in Astronomical Society of the Pacific Conference Series, Vol. 378, Why Galaxies Care About AGB Stars: Their Importance as Actors and Probes, ed. F. Kerschbaum, C. Charbonnel, & R. F. Wing, 479 Feast, M. & Whitelock, P. 2000a, in Astrophysics and Space Science Library, -Vol. 255, Astrophysics and Space Science Library, ed. F. Matteucci & F. Gio- -vannelli, 229 +Vol. 255, Astrophysics and Space Science Library, ed. F. Matteucci & F. Giovannelli, + 229 Feast, M. W. 1963, MNRAS, 125, 367 Feast, M. W. 1966, The Observatory, 86, 120 Feast, M. W. 1981, in Astrophysics and Space Science Library, Vol. 88, Physical @@ -595,7 +595,7 @@ Feast, M. W., Robertson, B. S. C., & Black, C. 1980, MNRAS, 190, 227 Feast, M. W. & Whitelock, P. A. 2000b, MNRAS, 317, 460 Feast, M. W., Whitelock, P. A., & Menzies, J. W. 2006, MNRAS, 369, 791 Gaia Collaboration, Brown, A. G. A., Vallenari, A., et al. 2021, A&A, 649, A1 -Girardi, L., Marigo, P., Bressan, A., & Rosenfield, P. 2013, ApJ, 777, 142 +Girardi, L., Marigo, P., Bressan, A., & Rosenfield, P. 2013, ApJ, 777, 142 Goudfrooij, P., Girardi, L., Kozhurina-Platais, V., et al. 2014, ApJ, 797, 35 Grady, J., Belokurov, V., & Evans, N. W. 2019, MNRAS, 483, 3022 Grady, J., Belokurov, V., & Evans, N. W. 2020, MNRAS, 492, 3128 @@ -624,8 +624,8 @@ Lebzelter, T. & Wood, P. R. 2016, A&A, 585, A111 Lloyd Evans, T. 1976, MNRAS, 174, 169 Lloyd Evans, T. 1983a, MNRAS, 204, 985 Lloyd Evans, T. 1983b, MNRAS, 204, 961 -Lloyd Evans, T. & Menzies, J. W. 1973, in Astrophysics and Space Science Li- -brary, Vol. 36, IAU Colloq. 21: Variable Stars in Globular Clusters and in +Lloyd Evans, T. & Menzies, J. W. 1973, in Astrophysics and Space Science Library, + Vol. 36, IAU Colloq. 21: Variable Stars in Globular Clusters and in Related Systems, ed. J. D. Fernie, 151 Mackey, A. D. & Broby Nielsen, P. 2007, MNRAS, 379, 151 Marigo, P., Girardi, L., Bressan, A., et al. 2017, ApJ, 835, 77 @@ -646,10 +646,10 @@ Pastorelli, G., Marigo, P., Girardi, L., et al. 2020, MNRAS, 498, 3283 Pastorelli, G., Marigo, P., Girardi, L., et al. 2019, MNRAS, 485, 5666 Pérez, F. & Granger, B. E. 2007, Computing in Science and Engineering, 9, 21 Perren, G. I., Piatti, A. E., & Vázquez, R. A. 2017, A&A, 602, A89 -Pineau, F.-X., Boch, T., Derrière, S., & Schaaff, A. 2020, in Astronomical So- -ciety of the Pacific Conference Series, Vol. 522, Astronomical Data Analysis -Software and Systems XXVII, ed. P. Ballester, J. Ibsen, M. Solar, & K. Short- -ridge, 125 +Pineau, F.-X., Boch, T., Derrière, S., & Schaaff, A. 2020, in Astronomical Society + of the Pacific Conference Series, Vol. 522, Astronomical Data Analysis +Software and Systems XXVII, ed. P. Ballester, J. Ibsen, M. Solar, & K. Shortridge, + 125 Sakamoto, T., Matsunaga, N., Hasegawa, T., & Nakada, Y. 2012, ApJ, 761, L10 Skrutskie, M. F., Cutri, R. M., Stiening, R., et al. 2006, AJ, 131, 1163 Soszy ´ @@ -660,8 +660,8 @@ nski, M. K., et al. 2009, Acta Astron., 59, 239 Soszy ´ nski, I., Udalski, A., Szyma ´ nski, M. K., et al. 2011, Acta Astron., 61, 217 -Taylor, M. B. 2006, in Astronomical Society of the Pacific Conference Se- -ries, Vol. 351, Astronomical Data Analysis Software and Systems XV, ed. +Taylor, M. B. 2006, in Astronomical Society of the Pacific Conference Series, + Vol. 351, Astronomical Data Analysis Software and Systems XV, ed. C. Gabriel, C. Arviset, D. Ponz, & S. Enrique, 666 Trabucchi, M., Mowlavi, N., & Lebzelter, T. 2021a, A&A, 656, A66 Trabucchi, M., Wood, P. R., Montalbán, J., et al. 2017, ApJ, 847, 139 @@ -683,20 +683,20 @@ Ya’Ari, A. & Tuchman, Y. 1996, ApJ, 456, 350 Article number, page 6 of 9 Trabucchi et al.: The period-age relation of LPVs Fig. A.1. Absolute- K - s Gaia -2MASS diagram for the stars with or with- -out a spectral type (left and right panels, respectively) in the selected + s Gaia -2MASS diagram for the stars with or without + a spectral type (left and right panels, respectively) in the selected sample. Symbol colors and shapes indicate the spectral type and host -cluster described in the legend, respectively, which also reports the num- -ber of sources displayed (i.e., having both optical and NIR photometry). +cluster described in the legend, respectively, which also reports the number + of sources displayed (i.e., having both optical and NIR photometry). The dashed line marks the separation between O- and C-rich sources according toLebzelter et al.(2018). An arrow marks the source MSX LMC 124 in NGC 1830 that, having W BP, RP − W J, K - s = 9. 73 mag, lies out- -side the plot area. Background dots are LPVs in the LMC from OGLE- -III (light gray) andMowlavi et al.(2018) (darker gray). -Appendix A: Classification of observed LPVs + s = 9. 73 mag, lies outside + the plot area. Background dots are LPVs in the LMC from OGLEIII + (light gray) andMowlavi et al.(2018) (darker gray). +Appendix A: Classification of observed LPVs Appendix A.1: Spectral type We adopted the spectral types provided byLebzelter & Wood (2007) andKamath et al.(2010) for 52 of the LPVs they studied @@ -706,19 +706,19 @@ reported byLloyd Evans(1983a). We also searched the SIMBAD astronomical database (Wenger et al.2000) for spectral type information, which we found for 26 more stars. We used the Gaia-2MASS diagram of -Lebzelter et al.(2018) to confirm the chemical type classification +Lebzelter et al.(2018) to confirm the chemical type classification taken from literature and to characterize the surface chemistry of sources of an unknown spectral type (see Fig.A.1). Among the -latter, we identified 13 C-rich stars and 106 O-rich sources. -Three of the sources without a spectral type lack Gaia pho- -tometry, so they cannot be classified with the Gaia-2MASS. Two +latter, we identified 13 C-rich stars and 106 O-rich sources. +Three of the sources without a spectral type lack Gaia photometry, + so they cannot be classified with the Gaia-2MASS. Two of them (LW5 and LW22 in 47 Tuc) have no match in Gaia EDR3, but they have NIR data and are probably O-rich based on their position in the J − K s versus K s color-magnitude diagram. The third source is one of the two stars in NGC 1903 from the -list ofGrady et al.(2019), which we identified with the 2MASS +list ofGrady et al.(2019), which we identified with the 2MASS source J05171633-6920298. It is likely C-rich according to the NIR color-magnitude diagram. Finally, the sources V138 in ω Cen, LW15 in NGC 2808, @@ -732,47 +732,47 @@ Lebzelter & Wood andKamath et al.(2010) with the catalogs from OGLE-III, ASAS-SN, and Gaia DR2. Combining these data sets, we found at least one period for each of the 176 sources in our sample. -In order to identify the pulsation mode most likely respon- -sible for periods in a given source, we assumed that the second -overtone mode is associated with sequence A, the first overtone +In order to identify the pulsation mode most likely responsible + for periods in a given source, we assumed that the second +overtone mode is associated with sequence A, the first overtone mode with sequences B and C , and the fundamental mode with -sequence C (e.g.,Trabucchi et al.2017). We excluded long sec- -ondary periods on sequence D as they are not due to stellar pul- -sation (Soszy ´ +sequence C (e.g.,Trabucchi et al.2017). We excluded long secondary + periods on sequence D as they are not due to stellar pulsation + (Soszy ´ nski et al.2021, and references therein), and we used the pattern of PL sequences in the LMC as a reference to -guide the mode identification (cf.Trabucchi et al.2021a). -We performed this classification separately for periods com- -ing from each distinct data set. If two or more periods from dif- -ferent data sets were assigned to the same pulsation mode, we +guide the mode identification (cf.Trabucchi et al.2021a). +We performed this classification separately for periods coming + from each distinct data set. If two or more periods from different + data sets were assigned to the same pulsation mode, we retained only one of those periods, with priority to the values from Lebzelter & Wood andKamath et al.(2010). If the latter authors do not provide this information, we adopted the period from OGLE-III if available, and otherwise from ASAS-SN or from Gaia DR2. -For some sources, the periods reported in di fferent catalogs +For some sources, the periods reported in di fferent catalogs were assigned to the same mode through this procedure. In most cases, these periods are reasonably similar to each other. Only -in a few cases were they significantly di ff erent, but this did not +in a few cases were they significantly di ff erent, but this did not alter our conclusions. -When available, the variability type was taken from OGLE- -III or ASAS-SN. We note that we are only interested in whether -a star is classified as a Mira or semi-regular variable. In many +When available, the variability type was taken from OGLEIII + or ASAS-SN. We note that we are only interested in whether +a star is classified as a Mira or semi-regular variable. In many cases, this type is not given or the star is simply considered, for -instance, as an LPV or AGB in SIMBAD, in which case we con- -sidered the variability type as undetermined. +instance, as an LPV or AGB in SIMBAD, in which case we considered + the variability type as undetermined. Appendix B: Fitting relations We obtained analytic expressions for the PA relations separately for O- and C-rich stars, proceeding as follows. For each bin of log( τ/ yr), we modeled the period distribution with a Gaussian -kernel density estimator (KDE) and identified the peak of the +kernel density estimator (KDE) and identified the peak of the distribution. To describe the boundaries of the PA relation, we -adopted, at each age, the values of the period at which the dis- -tribution equals 25% of its maximum. We selected this arbitrary +adopted, at each age, the values of the period at which the distribution + equals 25% of its maximum. We selected this arbitrary value upon visual inspection of the PA plane. We modeled the -central trend of the PA relation, as well as its short- and long- -period edges, with linear or quadratic functions in the form +central trend of the PA relation, as well as its short- and longperiod + edges, with linear or quadratic functions in the form log( τ/ yr) = a 0 + a 1 ( P/ ˜ @@ -783,19 +783,19 @@ P) 2 (where ˜ P = 350 days) and employed a Lenvenberg-Marquardt nonlinear regression algorithm 3 - to derive the best-fit coeffi cients, -which are listed in TableB.1. We remark that these best-fit ex- -pressions are only valid in the intervals 8 . 0 ≤ log( τ/ yr) ≤ 10 . 3 + to derive the best-fit coeffi cients, +which are listed in TableB.1. We remark that these best-fit expressions + are only valid in the intervals 8 . 0 ≤ log( τ/ yr) ≤ 10 . 3 and 20 < P/days < 700 for O-rich composition, and within 3 We made use of the Python library SciPy to perform Gaussian KDE -modeling and best-fit, respectively, by means of the gaussian_kde +modeling and best-fit, respectively, by means of the gaussian_kde tool from the stats module and the curve_fit function from the optimize module. Article number, page 7 of 9 A & A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs -Fig. A.2. Similar to Fig.1, except each source is color-coded according to whether it has been classified as O-rich (blue) or C-rich (red). -Table B.1. Best-fit coe ffi cients for the PA relation and its boundaries in +Fig. A.2. Similar to Fig.1, except each source is color-coded according to whether it has been classified as O-rich (blue) or C-rich (red). +Table B.1. Best-fit coe ffi cients for the PA relation and its boundaries in the form given in Eq.B.1. Sp. type relation a 0 a @@ -822,10 +822,10 @@ P) + b 2 ( P/ ˜ P)2 . (B.2) -The resulting best-fit lines are displayed in Fig.B.1, and the co- -e fficients are given in TableB.2. -We remark that both the PA and the period-initial mass rela- -tions depend on model assumptions, in particular mass loss and +The resulting best-fit lines are displayed in Fig.B.1, and the coe + fficients are given in TableB.2. +We remark that both the PA and the period-initial mass relations + depend on model assumptions, in particular mass loss and mixing, as well as on the properties of the population of LPVs, namely the star-formation history and age-metallicity relation. Appendix C: The shape of the period distribution @@ -838,7 +838,7 @@ initial masses M M . The relation between period and initial mass is displayed in -panel (a) of Fig.C.1, where isochrone portions undergoing Table B.2. Best-fit coefficients for the period-initial mass relation and +panel (a) of Fig.C.1, where isochrone portions undergoing Table B.2. Best-fit coefficients for the period-initial mass relation and its boundaries in the form given in Eq.B.2. Sp. type relation b 0 b @@ -851,8 +851,8 @@ C-rich center -0.0304 0.2885 lower edge -0.0131 0.5752 upper edge -0.2245 -0.2720 0.2343 DFMP are indicated by solid lines. Panel (b) shows the period -distributions for a few di ff erent cases. -It is instructive, to begin with, to ignore the effect of thermal +distributions for a few di ff erent cases. +It is instructive, to begin with, to ignore the effect of thermal pulses and consider only the quiescent evolution (green lines in Fig.C.1). The smallest initial mass corresponds to a star that just entered the TP-AGB, when the FM has a period of ∼ 240 days @@ -865,12 +865,12 @@ evolved (quiescent) model with dominant FM has P FM 360 days (green circle and horizontal line), corresponding to a sharp cut in the period distribution shown in panel (b) of Fig.C.1. -As a star evolves along the AGB it expands, and its period be- -comes longer in response to the increase in radius. Models with +As a star evolves along the AGB it expands, and its period becomes + longer in response to the increase in radius. Models with a higher initial mass are more evolved, hence they have a larger radius and a longer period. The rate at which a period increases -with radius is not fixed, but rather decreases with evolution. Ac- -cording to the prescription ofTrabucchi et al.(2021b), a period +with radius is not fixed, but rather decreases with evolution. According + to the prescription ofTrabucchi et al.(2021b), a period grows with radius as a broken power-law with exponent α 1. 8 if R < R b , and with α 1. 25 at larger radii. @@ -882,23 +882,23 @@ the gray dotted line in Fig.C.1. The isochrone reaches it at Article number, page 8 of 9 Trabucchi et al.: The period-age relation of LPVs Fig. B.1. Similar to Fig.2, but showing initial mass M -i in place of age. The best-fit lines to the most populated band and edges of the theoretical +i in place of age. The best-fit lines to the most populated band and edges of the theoretical P FM – M i relation are shown. -Fig. C.1. Period distribution at fixed age and metallicity. Panel (a) shows +Fig. C.1. Period distribution at fixed age and metallicity. Panel (a) shows period as a function of initial mass (current mass on the top axis) on the TP-AGB for a ∼ 200 Myr old isochrone with Z i = 0. 006. Red lines show full thermal pulses, while blue lines ignore luminosity spikes and green lines show only the quiescent evolution. The same color code is used for the period distributions (normalized to their maximum) on -panel (b). Solid lines indicate that the FM is dominant. Circles indi- -cate the earliest onset of DFMP accounting for (red) or ignoring (green) +panel (b). Solid lines indicate that the FM is dominant. Circles indicate + the earliest onset of DFMP accounting for (red) or ignoring (green) luminosity spikes, and the shortest period of the dominant FM (blue). Gray lines mark the critical values of periods at which the FM becomes dominant (solid line), less sensitive to radius (dotted line, which occurs -at the vertical line for this specific isochrone), and independent of radius +at the vertical line for this specific isochrone), and independent of radius (dashed line). M i 3. 8524 M @@ -907,10 +907,10 @@ FM 420 days. In models with a smaller initial mass, the period is still increasing at a relatively large rate as the envelope expands, while in more massive models the period has already become less sensitive to -changes in radius. This is reflected by a slight inflection of the +changes in radius. This is reflected by a slight inflection of the green curve, which corresponds to the maximum in the period -distribution shown in panel (b) of Fig.C.1. The period distri- -bution of the full TP-AGB range is roughly symmetric around +distribution shown in panel (b) of Fig.C.1. The period distribution + of the full TP-AGB range is roughly symmetric around this maximum, while limiting the selection to DFMP, produces a distribution skewed toward short periods, as found in Sect.3. If the luminosity dips following thermal pulses are taken @@ -921,12 +921,12 @@ dom ,0 is lowered, so that the shortest period associated with DFMP does not correspond to the least evolved model (green circle), but rather to the luminosity dip of a thermal pulse (blue circle). -To be precise, the earliest occurrence of DFMP is on the left- -most luminosity spike (red circle), whose duration is so short that +To be precise, the earliest occurrence of DFMP is on the leftmost + luminosity spike (red circle), whose duration is so short that it is unlikely to be observed. Indeed, the inclusion of luminosity spikes alters the period distribution at long periods very little. Luminosity spikes are relevant only for relatively massive and young TP-AGB stars, and they give rise to the poorly populated portion of the PA relation at the longest periods, as seen in panel (a) of Fig.2. - Article number, page 9 of 9 + Article number, page 9 of \ No newline at end of file diff --git a/read/results/playa/2201.00214.txt b/read/results/playa/2201.00214.txt index 0661679..91eaf9e 100644 --- a/read/results/playa/2201.00214.txt +++ b/read/results/playa/2201.00214.txt @@ -12,37 +12,37 @@ N. Fathalian1 e-mail: narges_fathalian@alum.sharif.edu January 4, 2022 Abstract -Here, we study the temperature structure of flaring and non-fl aring coronal loops, using extracted +Here, we study the temperature structure of flaring and non-fl aring coronal loops, using extracted loops from images taken in six extreme ultraviolet (EUV) channels recorded by Atmospheric Imaging -Assembly (AIA)/ Solar Dynamic Observatory (SDO). We use dat a for loops of X2.1-class-flaring active -region (AR11283) during 22:10UT till 23:00UT, on 2011, Sept ember 6; and non-flaring active region +Assembly (AIA)/ Solar Dynamic Observatory (SDO). We use dat a for loops of X2.1-class-flaring active +region (AR11283) during 22:10UT till 23:00UT, on 2011, Sept ember 6; and non-flaring active region (AR12194) during 08:00:00UT till 09:00:00UT on 2014, Octob er 26. By using spatially-synthesized -Gaussian DEM forward-fitting method, we calculate the peak t emperatures for each strip of the loops. +Gaussian DEM forward-fitting method, we calculate the peak t emperatures for each strip of the loops. We apply the Lomb-Scargle method to compute the oscillation s periods for the temperature series of each -strip. The periods of the temperature oscillations for the fl aring loops are ranged from 7 min to 28.4 +strip. The periods of the temperature oscillations for the fl aring loops are ranged from 7 min to 28.4 min. These temperature oscillations show very close behavi or to the slow-mode oscillation. We observe -that the temperature oscillations in the flaring loops are st arted at least around 10 minutes before the +that the temperature oscillations in the flaring loops are st arted at least around 10 minutes before the transverse oscillations and continue for a long time durati on even after the transverse oscillations are -ended. The temperature amplitudes are increased at the flari ng time (during 20 min) in the flaring loops. -The periods of the temperatures obtained for the non-flaring loops are ranged from 8.5 min to 30 min,but -their significances are less (below 0.5) in comparison with t he flaring ones (near to one). Hence the -detected temperature periods for the non-flaring loops’ strips are less probable in comparison with the -flaring ones, and maybe they are just fluctuations. Based on ou r confined observations, it seems that the -flaring loops’ periods show more diversity and their temperatures have wider ranges of variation than the -non-flaring ones. More accurate commentary in this respect requires more extensive statistical research +ended. The temperature amplitudes are increased at the flari ng time (during 20 min) in the flaring loops. +The periods of the temperatures obtained for the non-flaring loops are ranged from 8.5 min to 30 min,but +their significances are less (below 0.5) in comparison with t he flaring ones (near to one). Hence the +detected temperature periods for the non-flaring loops’ strips are less probable in comparison with the +flaring ones, and maybe they are just fluctuations. Based on ou r confined observations, it seems that the +flaring loops’ periods show more diversity and their temperatures have wider ranges of variation than the +non-flaring ones. More accurate commentary in this respect requires more extensive statistical research and broader observations. Coronal Loops,Temperature Analysis, Temperature Oscillations,Flaring and non-Flaring Active Regions I. I ntroduction Analyzing the thermal structure of coronal loops is of considerable interest, especially as these -magnetic loops have an essential role in heating the solar chromosphere and corona. Such anal- -ysis can help to describe how the process of solar flaring is correlated with the loop’s thermal +magnetic loops have an essential role in heating the solar chromosphere and corona. Such analysis + can help to describe how the process of solar flaring is correlated with the loop’s thermal structure. Detections of coronal waves have a historical preview and have been reported for several times (e.g., Aschwanden et al. ( 1999 ); Nakariakov et al. ( 1999 ); Wang et al. ( 2003 ); Wang & Solanki ( 2004 ); Berghmans & Clette ( 1999 ); De Moortel et al. ( 2000 ), Verwichte et al. ( 2004 ), De Moortel & Brady ( 2007 ), Ballai et al. ( 2011 )). Coronal seismology and MHD waves have been reviewed wide ly by -1 + De Moortel ( 2005 ), Nakariakov & Verwichte ( 2005 ), Aschwanden ( 2006 ), Banerjee et al. ( 2007 ) and De Moortel & Nakariakov ( 2012 ). Along with the development of the observations, transver se and longitudinal oscillations have also been studied theor etically (e.g., Gruszecki et al. ( 2006 ), @@ -50,17 +50,17 @@ Pascoe et al. ( 2007 ), Fathalian et al. ( 2010 ); Luna et al. ( 2010 ); Fathali seismology techniques help to elicit the information from observations of oscillatory phenomena and the results to be interpreted by using theoretical models (see for e.g., Roberts et al. ( 1984 ); -Goossens et al. ( 1992 )). Oscillatory patterns and processes which happen during solar flares, were +Goossens et al. ( 1992 )). Oscillatory patterns and processes which happen during solar flares, were interesting and subject of investigations from different a pproaches (e.g., Nakariakov et al. ( 2010 ), -Nisticò et al. ( 2013 ), Anfinogentov et al. ( 2013 ), Hindman & Jain ( 2014 ), Russell et al. ( 2015 )). As -we know the transverse loops oscillations usually occur in r esponse to a close filament or flare +Nisticò et al. ( 2013 ), Anfinogentov et al. ( 2013 ), Hindman & Jain ( 2014 ), Russell et al. ( 2015 )). As +we know the transverse loops oscillations usually occur in r esponse to a close filament or flare ( Wills-Davey & Thompson ( 1999 )). -Rapidly decaying long-period oscillations are mostly inte rpreted as global (or fundamen- -tal mode) standing slow magnetoacoustic waves (reviewed by Liu & Ofman ( 2014 ), and Wang +Rapidly decaying long-period oscillations are mostly inte rpreted as global (or fundamental + mode) standing slow magnetoacoustic waves (reviewed by Liu & Ofman ( 2014 ), and Wang ( 2011 ), also see Ofman & Wang ( 2002 ), and for slow-mode observed in fan-loops see Pant et al. ( 2017 )). They often occur in hot coronal loops of active regions, a ssociated with tiny (or micro-) -flares.Increasing evidence has suggested that the harmonic type of decaying pulsations detected -in intensity plots of solar and stellar flares are possibly ca used by standing slow-mode waves (see +flares.Increasing evidence has suggested that the harmonic type of decaying pulsations detected +in intensity plots of solar and stellar flares are possibly ca used by standing slow-mode waves (see reviews by Van Doorsselaere et al. ( 2016 ), and McLaughlin et al. ( 2018 )).Excitation, propagation, and damping mechanisms of slow-mode waves have been studied theoretically (e.g., Wang et al. ( 2007 ); Wang et al. ( 2015 ); Jess et al. ( 2016 ); Nakariakov et al. ( 2017 ); Nisticò et al. ( 2017 ); Kolotkov @@ -68,125 +68,125 @@ et al. ( 2019 ); Krishna Prasad et al. ( 2019 ); Reale et al. ( 2019 ); Wang & O a complete overview of slow-mode magnetoacoustic waves in c oronal loops see the review by Wang et al. ( 2021 ). Investigating and comparing the thermal structures and osc illations of coronal loops in loops -of flaring and non-flaring active regions could help us in better understanding the loops’ material -oscillations and the flare impact on them. Several different methods have been developed to in- -vestigate the thermal structure of the coronal loops and loop strands. The thermal stability of the +of flaring and non-flaring active regions could help us in better understanding the loops’ material +oscillations and the flare impact on them. Several different methods have been developed to investigate + the thermal structure of the coronal loops and loop strands. The thermal stability of the coronal loops was the subject of research, done by Habbal & Rosner ( 1979 ) (and references cited -therein). McClymont & Craig ( 1985 ) stated that a pressure fluctuation must assist asymmetric +therein). McClymont & Craig ( 1985 ) stated that a pressure fluctuation must assist asymmetric coronal temperature perturbation. They concluded that coronal loops are impartially stable in the case of uniform heating. Van Doorsselaere et al. ( 2011 ) used spectroscopic line ratios to obtain the required temperature (via CHIANTI code) and estimated the adiabatic index of the corona. -The dependence of coronal loop temperature on loop length and magnetic field strength is also +The dependence of coronal loop temperature on loop length and magnetic field strength is also a favorite topic. For instance, Dahlburg et al. ( 2018 ) probed the temperature properties of solar -coronal loops over a wide range of lengths and magnetic field strengths via numerical simula- -tions and observed a very high correlation between magnetic field strength and a maximum of +coronal loops over a wide range of lengths and magnetic field strengths via numerical simulations + and observed a very high correlation between magnetic field strength and a maximum of the temperature. The effect of temperature inhomogeneity on the periods and the damping times -of the standing slow-modes in stratified solar coronal loops was studied either (e.g., Abedini et al. -( 2012 )). Fathalian ( 2019 ) estimated the loop temperature using the intensity ratios and the AIA re- -sponse functions in different wavelengths. Different emission measure (DEM) computations and +of the standing slow-modes in stratified solar coronal loops was studied either (e.g., Abedini et al. +( 2012 )). Fathalian ( 2019 ) estimated the loop temperature using the intensity ratios and the AIA response + functions in different wavelengths. Different emission measure (DEM) computations and methods have been developed to estimate the temperature in the corona, which led to various discussions. Schmelz et al. ( 2010 ) analyzed a coronal loop, which was observed on 2010 August 3, by AIA. They took some differential emission measure (DEM ) curves, claiming a multithermal -rather than an isothermal DEM distribution (for the cross-sectional temperature of the loop). Af- -ter that, Aschwanden & Boerner ( 2011 ) criticized the method of background subtraction which +rather than an isothermal DEM distribution (for the cross-sectional temperature of the loop). After + that, Aschwanden & Boerner ( 2011 ) criticized the method of background subtraction which Schmelz et al. had applied. They claimed that the background subtraction method caused their inferred result of a multithermal loop. Aschwanden & Boerner ( 2011 ) analyzed a set of hundred -loops and understood that 66% of the loops could be fitted with a narrowband single-Gaussian -DEM model. In this regard, some attention was paid to the instrumental limitations and abil- -ity of AIA and Guennou et al. ( 2012a , b ) discussed on the accuracy of the differential emission -measure diagnostics of solar plasmas in respect of the AIA instrument of SDO. The abovemen- -tioned controversy of whether the cross-field temperatures of coronal loops are multithermal or +loops and understood that 66% of the loops could be fitted with a narrowband single-Gaussian +DEM model. In this regard, some attention was paid to the instrumental limitations and ability + of AIA and Guennou et al. ( 2012a , b ) discussed on the accuracy of the differential emission +measure diagnostics of solar plasmas in respect of the AIA instrument of SDO. The abovementioned + controversy of whether the cross-field temperatures of coronal loops are multithermal or isothermal, continued by Schmelz et al. ( 2013 ) (similar to Schmelz et al. ( 2011 )). They analyzed -twelve loops to understand the cross-field temperature distributions of them and reveal the loops’ +twelve loops to understand the cross-field temperature distributions of them and reveal the loops’ substructure. Based on their achievements, the warmer loop s entail broader DEMs. Thereafter, Schmelz et al. ( 2014 ) found indications of a relationship between the DEM weighted-temperature -and the cross-field DEM width for coronal loops. They argued that cooler loops tend to have -narrower DEM widths. This could imply that fewer strands are seen emitting in the later cool- -ing phase, which they claim could potentially resolve the ab ovementioned controversy. In this +and the cross-field DEM width for coronal loops. They argued that cooler loops tend to have +narrower DEM widths. This could imply that fewer strands are seen emitting in the later cooling + phase, which they claim could potentially resolve the ab ovementioned controversy. In this subject, Aschwanden et al. ( 2015 ) (as well as 2013 ( Aschwanden, 2013 )) developed a method to -extract the loop temperature which is based on Gaussian fit for Differential Emission Measure, -named spatially-synthesized Gaussian DEM forward-fitting method (DEM hereafter). -This paper aims to analyze and compare thermal oscillations of coronal loops in flaring and -non-flaring active regions, 11283 and 12194, respectively. The contents of this paper are as follows: +extract the loop temperature which is based on Gaussian fit for Differential Emission Measure, +named spatially-synthesized Gaussian DEM forward-fitting method (DEM hereafter). +This paper aims to analyze and compare thermal oscillations of coronal loops in flaring and +non-flaring active regions, 11283 and 12194, respectively. The contents of this paper are as follows: In section - II , data, we introduce the considered flaring and non-flaring ac tive regions and describe -the data employed and the time and properties of the flare, occ urred in the active region. In + II , data, we introduce the considered flaring and non-flaring ac tive regions and describe +the data employed and the time and properties of the flare, occ urred in the active region. In section III , we explain the method we use to analyze the time-series of te mperatures in different -strips of the loops. Section IV is specified to our results, obtained related to flaring and non- -flaring regions. In section V we briefly state a summary of this work. +strips of the loops. Section IV is specified to our results, obtained related to flaring and nonflaring + regions. In section V we briefly state a summary of this work. II. Data -We investigate the thermal structure and treatment of loops in a flaring region to see if it follows -the transverse oscillations of the loops, and we examine the thermal fluctuations at the flare time. -For this purpose, we select a high energy flare x2.1 which the transverse oscillations of two loops +We investigate the thermal structure and treatment of loops in a flaring region to see if it follows +the transverse oscillations of the loops, and we examine the thermal fluctuations at the flare time. +For this purpose, we select a high energy flare x2.1 which the transverse oscillations of two loops of it have been analyzed by Jain et al. ( 2015 ). They analyzed intensity variations in the wavelength 171 in two coronal loops of this region and detected obvious transverse oscillation with periods -of roughly 2 minutes and decay times of 5 minutes for these loops at the flare time. To see -the specific thermal properties of the flaring loops, as a blind test, we select a non-flaring active +of roughly 2 minutes and decay times of 5 minutes for these loops at the flare time. To see +the specific thermal properties of the flaring loops, as a blind test, we select a non-flaring active region, extract its loops and analyze their thermal treatme nt. Then we compare the temperature -treatment of the loops at the flaring region with the loops of the non-flaring region to see the +treatment of the loops at the flaring region with the loops of the non-flaring region to see the differences. The temperature analysis done here uses EUV images from the A IA onboard the SDO. AIA has ten different wavelength channels, three in white light and UV, and the other seven in EUV -channels. Between these seven, the 304 filter, which is mostly sensitive to chromospheric temper- -atures (in order of T = 10 4.7 +channels. Between these seven, the 304 filter, which is mostly sensitive to chromospheric temperatures + (in order of T = 10 4.7 K), not the corona, is not taken into account (Aschwanden et a l. 2015). Therefore, we consider the images of the events in the six wavelengths (94, 131, 171, 193, 211, 335 ). These are covering the coronal temperature range from T ≈ 0.6 to T ≥ 16 MK . -The two below data sets are finally selected to study thermal variations and coronal loops -oscillations in flaring or non-flaring active regions. A few d istinct loops are visible in the regions. +The two below data sets are finally selected to study thermal variations and coronal loops +oscillations in flaring or non-flaring active regions. A few d istinct loops are visible in the regions. Finally, these loops are chosen: -– Three loops of the x-flaring active region 11283: Observationally, the X-class flares are rarely -happening around the loops with the specification we are looking for. So this selected LOS -X-flare, which occurs near the loops is of rare cases. We consider EUV images of NOAA +– Three loops of the x-flaring active region 11283: Observationally, the X-class flares are rarely +happening around the loops with the specification we are looking for. So this selected LOS +X-flare, which occurs near the loops is of rare cases. We consider EUV images of NOAA AR 11283, in the time period of 22:10UT till 23:00UT of 2011 Se ptember 6 with the cadence -of 12 sec. This period of time is selected since no other flare is happening during it. A +of 12 sec. This period of time is selected since no other flare is happening during it. A few distinct loops are visible and follow-able here during this period. Loop shapes in our -active region change permanently; therefore, it is difficult or impossible to follow a loop +active region change permanently; therefore, it is difficult or impossible to follow a loop over a very long time. Hence, it is not useful to extend the time interval of this region -to the time before the flare. The transverse oscillations of two loops in this region were +to the time before the flare. The transverse oscillations of two loops in this region were analyzed before by Jain et al. ( 2015 ). We mark these loops by A and B in Figure 1 b. They detected fundamental mode oscillation with periods of roughly 2 minutes and decay time of 5 minutes for these loops. We are curious to see the loops’ thermal oscillations (if any) -or thermal fluctuations in this condition. Figure 1 a (left) displays AR 11283 and the area, -indicated by the white box is featured in a zoom-in view in Figure 1 .b (right) and the five +or thermal fluctuations in this condition. Figure 1 a (left) displays AR 11283 and the area, +indicated by the white box is featured in a zoom-in view in Figure 1 .b (right) and the five selected parts of the center of the three chosen loops are shown by red lines (the movie of the region is available in this link). As it is clear in the mov ie, these three loops oscillate -together and their oscillations decay simultaneously. The center of figure 1 .a is coordinated +together and their oscillations decay simultaneously. The center of figure 1 .a is coordinated at (230, 165) arcsec and its width and height are 450 ′′ × 456 ′′ - /750 × 775 pixels. The flare -occurring in this active region is an X2.1 class flare located close to the disk center at latitude + /750 × 775 pixels. The flare +occurring in this active region is an X2.1 class flare located close to the disk center at latitude 14 ◦ north and longitude 18 ◦ - west (269.9 arcsec, 129.9 arcsec). This flare initiates at 22 :12UT, + west (269.9 arcsec, 129.9 arcsec). This flare initiates at 22 :12UT, ends about 22:24UT with the peak at 22:20UT, and associates with a coronal mass ejection (CME) which occurs from 2011 September 6, 21:36:05T to 2011 S eptember 7, 02:24:05T, with the radial velocity of 469 km/s,angular width of 252 deg, and position angle of 275 deg (for more details look at LASCO CME catalogue.) 1 -– Three loops of non-flaring active region 12194: As a blind te st, we select three loops of the -non-flaring (nonf hereafter) active region 12194 in the smooth time period of 08:00:00UT till -09:00:00UT of 2014 October 26. The center of figure 2 .a is coordinated at (0, -264) arcsec +– Three loops of non-flaring active region 12194: As a blind te st, we select three loops of the +non-flaring (nonf hereafter) active region 12194 in the smooth time period of 08:00:00UT till +09:00:00UT of 2014 October 26. The center of figure 2 .a is coordinated at (0, -264) arcsec and its width and height are 615 ′′ × 615 ′′ /1025 × 1025 pixels. We consider the images of the selected area with the cadence of 12 sec in the same six wavelengths mentioned above. These loops are relatively motionless and do not show any tra nsversal oscillation (see the region’s movie in the link). We select the loops in such a way that they do not have any -crossing over the neighbor loops (in our perspective) during this time. In figure +crossing over the neighbor loops (in our perspective) during this time. In figure 2 the -selected loops are distinguished in red in the mentioned active region. The size of the final -cut of non-flaring region (represented in the right) is 351 × 401 pixels. +selected loops are distinguished in red in the mentioned active region. The size of the final +cut of non-flaring region (represented in the right) is 351 × 401 pixels. The data set are primarily downloaded at level 1 with a pixel r esolution of 0.6 arcsec. We use the standard aia _ pre p . pro subroutine available in SDO package SolarSoftWare library to adjust the screen scale between the four arms of the AIA. This pre-processing step increases the data -level from 1 to 1.5, so that finally no jump or sudden movement is observed in the image series. +level from 1 to 1.5, so that finally no jump or sudden movement is observed in the image series. We also used drot _ ma p . pro subroutine to correct the differential rotation effect. Ac cording to the movie made by pre-processed images, the most obvious loops ( marked in the abovementioned -figures) are selected in each region (with obvious transversal oscillations in the case of the flaring +figures) are selected in each region (with obvious transversal oscillations in the case of the flaring active region). III. Temperature A nalysis Method We extract the selected loop segment pixels, for each loop, a nd calculate the normal vectors @@ -195,20 +195,20 @@ considered box with the thickness of 15 to 40 pixels (macro-p ixels, depending o empty area around each loop and the distance to the neighbor loop). The area around the loop is needed for calculations of background subtraction. The selected loop segment is cut in 1 - Based on data on these WebSites: https://solarflare.njit.e du/webapp.html, and https://www.swpc.noaa.gov/ + Based on data on these WebSites: https://solarflare.njit.e du/webapp.html, and https://www.swpc.noaa.gov/ all wavelengths and at the same considered box from the image s set. These loop images are necessary entrances for our thermal analysis process. Then the loop is divided into different strips and its best division in terms of pixel intervals is considered. To do thermal analysis, we -use the spatially-synthesized Gaussian DEM forward-fitting method founded by Aschwanden +use the spatially-synthesized Gaussian DEM forward-fitting method founded by Aschwanden et al. ( 2015 ). -The images in the above six wavelength filters are considered to calculate the temperature in +The images in the above six wavelength filters are considered to calculate the temperature in each strip of the loop. The DEM function is considered a single-Gaussian function relative to the -temperature determined by the forward fitting method. To obtain the temperature for each loop, -we divided the loop into narrow strips, and then the intensity flux was averaged over each strip. +temperature determined by the forward fitting method. To obtain the temperature for each loop, +we divided the loop into narrow strips, and then the intensity flux was averaged over each strip. The number of each strip is displayed with the index i. One of the usual methods to subtract -the background from observed data is fitting a single-Gaussian cospatial function with a linear -function on the flux profile. The DEM for each strip is consider ed to be single-Gaussian DEM +the background from observed data is fitting a single-Gaussian cospatial function with a linear +function on the flux profile. The DEM for each strip is consider ed to be single-Gaussian DEM in terms of the logarithm of the temperature, which has three free parameters ( Aschwanden & Boerner , 2011 ): D E M @@ -223,7 +223,7 @@ In which, T p , i is the DEM peak temperature, E M p , i is the peak EM function, and σ T , i is the -logarithmic width of the temperature for that strip. To calc ulate the background-subtracted fluxes +logarithmic width of the temperature for that strip. To calc ulate the background-subtracted fluxes (for each strip) we use Eq.6 of Aschwanden & Boerner ( 2011 ) (in below): F 0 λ = Z @@ -236,74 +236,74 @@ k ) R λ ( T k ) . (2) Here, R - λ ( T ) is the instrumental temperature response function of each wavelength filter λ , which + λ ( T ) is the instrumental temperature response function of each wavelength filter λ , which is obtained by the code aia _ get _res ponse . pro in the SSW package. As time has passed, the AIA response functions calibration has partly changed. Here, we use the updated calibration of the -temperature response functions, for each of the AIA tempera ture filters, according to the CHI- -ANTI Version 2019 code available in the Solar SoftWare (SSW) . After forward-fitting the Gaussian -DEM to the background-subtracted observed fluxes in multiple wavelengths, the three-fitting pa- -rameters, temperature width ( σ +temperature response functions, for each of the AIA tempera ture filters, according to the CHIANTI + Version 2019 code available in the Solar SoftWare (SSW) . After forward-fitting the Gaussian +DEM to the background-subtracted observed fluxes in multiple wavelengths, the three-fitting parameters, + temperature width ( σ T , i ), peak of temperature ( T p , i ), and peak emission measure ( E M p , i ) are found by minimizing χ 2 i . -Our data sample is uneven because of omitting some damaged images in between. There- -fore to analyze the temperature oscillations, we use the Lomb-Scargle method. This method is -developed to use the technique periodogram, in the case wher e the observation times are un- -evenly spaced ( +Our data sample is uneven because of omitting some damaged images in between. Therefore + to analyze the temperature oscillations, we use the Lomb-Scargle method. This method is +developed to use the technique periodogram, in the case wher e the observation times are unevenly + spaced ( Scargle , 1982 ). The Lomb-Scargle periodogram method is useful in cases where -the periodicity of data treatment is not immediately appare nt. This method allows efficient com- -putation of a Fourier-like power spectrum estimator from unevenly-sampled data, resulting in +the periodicity of data treatment is not immediately appare nt. This method allows efficient computation + of a Fourier-like power spectrum estimator from unevenly-sampled data, resulting in an intuitive means of determining the period of oscillation ( VanderPlas, 2018 ). Therefore we use -Lomb-Scargle Periodogram to evaluate and estimate the effic ient periods of temperature oscilla- -tions in our loops. We select the first period related to the highest power frequency, which is -obtained by this method.We considered the achieved periods with the highest significances and -amplitudes. The most significant (highest) periods observe d in temperature (minute) for flaring -and non-flaring loops are listed in Tables 1 and 2, respective ly. To estimate the significance of +Lomb-Scargle Periodogram to evaluate and estimate the effic ient periods of temperature oscillations + in our loops. We select the first period related to the highest power frequency, which is +obtained by this method.We considered the achieved periods with the highest significances and +amplitudes. The most significant (highest) periods observe d in temperature (minute) for flaring +and non-flaring loops are listed in Tables 1 and 2, respective ly. To estimate the significance of the periods, we computed the probability values (p-values) . In the Lomb-Scargle method, the -significance returned here is the false alarm probability of the null hypothesis, i.e., as the data +significance returned here is the false alarm probability of the null hypothesis, i.e., as the data is composed of independent Gaussian random variables. Accordingly, low probability values -(p-value less than 0.05) indicate a high degree of significance in the associated periodic signal. +(p-value less than 0.05) indicate a high degree of significance in the associated periodic signal. IV. R esults i. Temperature Analysis of Flaring Active Region Loops Thenceforth the temperature time-series of different strips of the selected loops are calculated -using the method described in section 3. In the following figures, the vertical axis shows the +using the method described in section 3. In the following figures, the vertical axis shows the logarithm of the temperature and the horizontal axis shows the time duration. To be comparable -by eyes, all the forthcoming figures (which show the loops temperature oscillations) have been co- -scaled in the (log) temperature range of 5.7 to 6.9. The color maps are shown for each temperature +by eyes, all the forthcoming figures (which show the loops temperature oscillations) have been coscaled + in the (log) temperature range of 5.7 to 6.9. The color maps are shown for each temperature map. Loops A, B1, B2, C1, and C2 are subdivided into 25, 11, 8, 1 2, and 6 strips, respectively. Each strip’s length is equal to 4 pixels (macro-pixel), for all loops in this paper. For brevity, a few strips’ temperature oscillations are presented here. Figure 3 displays the time-series of temperature -oscillations for the first 3 strips of Loop A, and first 2 strips of loops B1. We calculated the +oscillations for the first 3 strips of Loop A, and first 2 strips of loops B1. We calculated the errors for each point (temperature) but removed in the prese ntation to avoid overcrowding of the -figures. As we observe in Figures 3 and 4 ), the temperature oscillations are started and increase -around 22:12 before the flare peak time (22:20) and are mostly continuing after the flare ended +figures. As we observe in Figures 3 and 4 ), the temperature oscillations are started and increase +around 22:12 before the flare peak time (22:20) and are mostly continuing after the flare ended (22:24). These temperature oscillations follow the transverse loop oscillations observed by Jain et al. ( 2015 ). As Jain et al. reported, LoopA and B have a transverse oscillation with periods -of roughly 2 minutes and decay times of 5 minutes, starting at 22:18 around the flare peak time -(23:20) and decaying after the flare ended (22:24). So as we ob serve, the temperature oscillations in -these flaring loops happen before the start of their transver se oscillations and are continuing even +of roughly 2 minutes and decay times of 5 minutes, starting at 22:18 around the flare peak time +(23:20) and decaying after the flare ended (22:24). So as we ob serve, the temperature oscillations in +these flaring loops happen before the start of their transver se oscillations and are continuing even in the time interval after the transverse oscillations decay. Although the temperature oscillations do not decay as rapid as the transverse oscillations do, and c onversely, the loop temperature increases at the end of the oscillating mode (see Fig. 4 , the temperature map of the loop A, for instance) We calculate the temperature oscillations periods, using L omb-Scargle method. We consider -the thermal oscillations periods with the highest significa nces. As this method shows, the most +the thermal oscillations periods with the highest significa nces. As this method shows, the most powerful period in the range of data time-series (listed in Table 1 ) are from 7 to 28.4 minutes -observed in the strips of the marked loops of this flaring region. These loops of flaring region +observed in the strips of the marked loops of this flaring region. These loops of flaring region also show some short periods in temperature oscillations which some are less than 10 minutes -(listed in Table 1 ). These short periods are more frequently observed in the loops of the flaring -active region. Such short periods are very scarce for the loops of the non-flaring active region +(listed in Table 1 ). These short periods are more frequently observed in the loops of the flaring +active region. Such short periods are very scarce for the loops of the non-flaring active region (compare Tables1 and 2 ). -The first column in Table 1 is the number of every strip along the loop. The second column is -the period of the most powerful frequency observed for the loop strips, calculated by the Lomb- -Scargle method. The third column shows the maximum of log ( T ) minus its minimum in each +The first column in Table 1 is the number of every strip along the loop. The second column is +the period of the most powerful frequency observed for the loop strips, calculated by the LombScargle + method. The third column shows the maximum of log ( T ) minus its minimum in each strip. The columns of Table 2 are exactly the same as Table 1 ; the only difference is that Table 2 is -for the non-flaring loops. +for the non-flaring loops. The loop A, has the length of 42.3 (Mm) which is the length of the selected part of the loop marked in Figure 1 .b. The mean of the parameter (Max(log T )-Min(log T )) for the strips of loop A is 1.21. Mean of the temperature (log) of this loop over time is 6.15 ± 0.25. The loop B1, divided @@ -313,38 +313,38 @@ strips, with the length of 15.61 (Mm), has the mean temperature (log) of 6.21 ± of (Max(log T )-Min(log T )) is 0.81 through this loop segment. The loops C1 and C2, divided into 12, and 6 strips, have the lengths of 22.08 and 11.06 (Mm), the mean temperatures of 6.25 ± 0.22, and 6.14 ± 0.25 (log), and the mean (Max(log T )-Min(log T )) of 1.48, 0.88, respectively. -We observe that despite the temperature oscillations, the flaring loops show a temperature -rise at the end of the considered time interval (figure 3 ). As their temperature maps also show, -the oscillations follow with a relatively sensible rise in the final temperature of the loop segments -(Figures 4 ). Although in the case of the transverse oscillations, the loops oscillate as the flare -occurs and then the oscillations decay and stop, in the case of temperature oscillations, the tem- -peratures of the various strips of the loops oscillate and at the end of the flare occurrence, they +We observe that despite the temperature oscillations, the flaring loops show a temperature +rise at the end of the considered time interval (figure 3 ). As their temperature maps also show, +the oscillations follow with a relatively sensible rise in the final temperature of the loop segments +(Figures 4 ). Although in the case of the transverse oscillations, the loops oscillate as the flare +occurs and then the oscillations decay and stop, in the case of temperature oscillations, the temperatures + of the various strips of the loops oscillate and at the end of the flare occurrence, they get to a relatively higher value of temperature in average. Figure - 4 shows the temperature maps of the flaring loops A, B1, B2, C1, a nd C2, respectively + 4 shows the temperature maps of the flaring loops A, B1, B2, C1, a nd C2, respectively as a time series. In each plot, the vertical axis is the distance along the loop segment in Mm, and the horizontal axis shows time. The color bar (in the left) shows the temperature range. Each separated grid part on the map is standing for one strip. Figure 4 shows that the temperature for most of the strips increased, bypassing a few oscillations. Before the end of the time duration, some strips become hotter (yellow ones) and some cooler (blue ones). The loop B1 is colder at the early times of the duration and becomes hotter at the midd le and end times with a swing -to lower temperatures again (see Fig. 4 ). There are some temperature fluctuations at the middle +to lower temperatures again (see Fig. 4 ). There are some temperature fluctuations at the middle times (the red and green stripes) while at the end the strips temperatures are smoother with less -fluctuations. The temperature map of the loop segment B2 (Fig. +fluctuations. The temperature map of the loop segment B2 (Fig. 4 ) shows that at the beginning of -the time duration, the first strips of the loop are hotter, and the last ones are colder, but at the end +the time duration, the first strips of the loop are hotter, and the last ones are colder, but at the end times this pattern is reversed in this loop segment. In loop segment C1 (Fig. 4 ), the temperature -fluctuations are mainly observed to start after the end of the flare (22:24), and at the end time +fluctuations are mainly observed to start after the end of the flare (22:24), and at the end time (23:00) the temperature is much higher than the beginning. T he temperature is increasing after -the flare time (22:24) for the loop C2 either (see Fig. 4 ). This happens with some oscillations in -the strips’ temperatures. So as figure 4 shows, the temperature increases with some fluctuation -in most of the flaring loops’ strips after the flare time. According to these temperature maps, -the temperature fluctuations in the flaring loops are increasing at the flaring time and around 20 +the flare time (22:24) for the loop C2 either (see Fig. 4 ). This happens with some oscillations in +the strips’ temperatures. So as figure 4 shows, the temperature increases with some fluctuation +in most of the flaring loops’ strips after the flare time. According to these temperature maps, +the temperature fluctuations in the flaring loops are increasing at the flaring time and around 20 minutes after that. -We expect the flaring loops to cool down as a result of heat cond uction and radiative cooling. +We expect the flaring loops to cool down as a result of heat cond uction and radiative cooling. Hence this relative temperature increase should be scrutinized. As we probed, this temperature rise is also followed in intensity time-series. As the intensity time-series show, the related intensity -in the Loop A of the flaring AR increases at the end of the time duration. To be assured, the +in the Loop A of the flaring AR increases at the end of the time duration. To be assured, the authors also checked the wavelength of Fe XV I I I which has a peak formation temperature of 7 × 10 6 ◦ K ( Ugarte-Urra & Warren ( 2014 )). By using the method developed by Warren et al. ( 2012 ) @@ -357,17 +357,17 @@ This warm contribution is calculated from a weighted combination of the emission analysis is done directly and it has not gone through any othe r process like the thermal analysis. For this purpose, we applied the formulation (1) used by Li et al. ( 2015 ). Plots in Figure 5 show the intensity map, and the mean intensity variation of the wavelength Fe XV I I I , for Loop A of -the flaring region, respectively. As these plots show, this intensity is also higher at the end of -the time duration in respect of the flare time. It seems to us that the expected cooling has not -occurred in these flaring loops yet, even after the flare occur rence in the probed duration due to +the flaring region, respectively. As these plots show, this intensity is also higher at the end of +the time duration in respect of the flare time. It seems to us that the expected cooling has not +occurred in these flaring loops yet, even after the flare occur rence in the probed duration due to some plausible reasons. We consider that the mentioned simultaneous CME (see section II ) which -this flare is associated with could cause this increase in temperature. We can be sure that the -source of this CME is AR 11283 ( Romano et al. ( 2015 )). This CME is in our flare region, hence -the loops receive energy even after the flare occurrence and it is probably the reason why the +this flare is associated with could cause this increase in temperature. We can be sure that the +source of this CME is AR 11283 ( Romano et al. ( 2015 )). This CME is in our flare region, hence +the loops receive energy even after the flare occurrence and it is probably the reason why the expected cooling does not occur. The thermal oscillations periods obtained the Lomb-Scargle method, do not have the same -significance in all strips of the loops, but for most strips of the flaring loops, the significances are +significance in all strips of the loops, but for most strips of the flaring loops, the significances are very near to one. To be assured about these oscillations, we p robed the intensity time-series for each strip of the loops and we observed that this loop’s intensities shows intensity oscillations too (i.e., alongside the loop). The most probable dominant p eriods observed in intensity, for @@ -375,151 +375,151 @@ wavelength of 171 is 18.22, and 16.7 min for strips of F-Loop A , 16.7, and 18.2 F-Loop B1, 16.70, and 12.52 for F-Loop B2, and 16.7 for F-Loop C1 and F-Loop C2. These periods are in the same order of the observed thermal oscillation per iods. The intensity in this time series has not passed any thermal process but still shows oscillation periods close to thermal ones. So -we think these results confirm the observation of thermal osc illations. +we think these results confirm the observation of thermal osc illations. ii. Temperature Analysis of non-Flaring Active Region Loop s -The temperature time-series for different strips of the selected loops of the non-flaring active -region 12194 are calculated using the Lomb-Scargle method. In the following figures (Fig. +The temperature time-series for different strips of the selected loops of the non-flaring active +region 12194 are calculated using the Lomb-Scargle method. In the following figures (Fig. 6 ), the vertical axis shows the logarithm of the temperature and the horizontal axis shows the time -duration. Figure 6 displays the time-series of temperature variations for the first two strips of -the non-flaring Loops A, and B. These figures are all co-scaled in the range of 5.7 to 6.9 for the -logarithm of temperature (like the flaring loops range). The most powerful periods, observed in -most of these non-flaring loops’ strips (listed in Table 2 ) are from 8.5 min. to 30 min. Comparing -the periods of the loops in the flaring region (Table 1 ) with the non-flaring one (Table 2 ), we see -that the temperature periods of the flaring loops have lower values on average and have more -diversity than the non-flaring ones. As Tables - 1 and 2 show, the mean temperatures of nonf- -loops are lower in comparison with the f-loops, a fact we also expected from common sense. -The parameter (Max(log T )-Min(log T )) in nonf-loops’ strips is less than that for the flaring loop s’ +duration. Figure 6 displays the time-series of temperature variations for the first two strips of +the non-flaring Loops A, and B. These figures are all co-scaled in the range of 5.7 to 6.9 for the +logarithm of temperature (like the flaring loops range). The most powerful periods, observed in +most of these non-flaring loops’ strips (listed in Table 2 ) are from 8.5 min. to 30 min. Comparing +the periods of the loops in the flaring region (Table 1 ) with the non-flaring one (Table 2 ), we see +that the temperature periods of the flaring loops have lower values on average and have more +diversity than the non-flaring ones. As Tables + 1 and 2 show, the mean temperatures of nonfloops + are lower in comparison with the f-loops, a fact we also expected from common sense. +The parameter (Max(log T )-Min(log T )) in nonf-loops’ strips is less than that for the flaring loop s’ strips. Nonf-loop A, divided into 11 strips, has the length of 19.91 ( Mm) which is the length of the selected part of the loop marked in Figure 2 b. The mean of (Max(log T )-Min(log T )) for the strips of nonf-loop A is 0.81. Mean of the temperature (log) of this loop segment over time is 5.93 ± 0.10. Nonf-Loop B, divided into 6 strips, has the length of 11.11 (M m), and the mean temperature (log), -and the mean of (Max(log T )-Min(log T )) for this loop are, 5.99 ± 0.13 and 0.62 respectively. Nonf- -loop C, which has 5 strips, with the length of 10.13 (Mm), has the mean temperature (log) of +and the mean of (Max(log T )-Min(log T )) for this loop are, 5.99 ± 0.13 and 0.62 respectively. Nonfloop + C, which has 5 strips, with the length of 10.13 (Mm), has the mean temperature (log) of 5.82 ± 0.12, and the mean (Max(log T )-Min(log T )) of 0.56. -The first highest period observed for the temperature oscillations of these non-flaring loops’ -strips is reported in Table 2 . As we observe the temperature periods in these non-flaring loops -are mostly longer than those of the flaring loops (compare the values listed in Table +The first highest period observed for the temperature oscillations of these non-flaring loops’ +strips is reported in Table 2 . As we observe the temperature periods in these non-flaring loops +are mostly longer than those of the flaring loops (compare the values listed in Table 1 and Table 2 ). -Therefore the temperature oscillations of these loops are a little slower than the flaring ones. +Therefore the temperature oscillations of these loops are a little slower than the flaring ones. Figure - 7 shows the temperature maps of the non-flaring loops A, B, and C , respectively as a + 7 shows the temperature maps of the non-flaring loops A, B, and C , respectively as a time series. In each plot, the vertical axis is the distance a long the loop in Mm, and the horizontal axis is the time. The color bar in the left shows the colors considered for the temperature range. Each separated colored part in the map is one strip. These color maps are plotted totally at the -same color range of the loops of the flaring region either. -As figure 7 shows, the strips’ temperature of these non-flaring loops have fewer temperature -fluctuations and are smoother in comparison with the flaring ones (Fig. 4 ). Furthermore, that -much increase in the temperatures of the strips, which was obvious in the loops of the flaring +same color range of the loops of the flaring region either. +As figure 7 shows, the strips’ temperature of these non-flaring loops have fewer temperature +fluctuations and are smoother in comparison with the flaring ones (Fig. 4 ). Furthermore, that +much increase in the temperatures of the strips, which was obvious in the loops of the flaring region toward the end times, is not observed here. The temper atures are also totally lower in the -nonf-loops in comparison with the flaring loops. Conversely, it seems that different strips of the -non-flaring loops have relatively more similar temperature fluctuations. -As figure 8 shows, the peaks of the observed temperature periods for the loops’ strips of the -flaring active region (blue ones), and non-flaring active region (red ones), are around 18 minutes, +nonf-loops in comparison with the flaring loops. Conversely, it seems that different strips of the +non-flaring loops have relatively more similar temperature fluctuations. +As figure 8 shows, the peaks of the observed temperature periods for the loops’ strips of the +flaring active region (blue ones), and non-flaring active region (red ones), are around 18 minutes, and 30 minutes, respectively. The temperature periods’ diversity is higher in the loops’ strips of -the flaring active region, and shorter temperature periods ( less than 10 minutes, nearer to the -transverse oscillations periods) are observed in the case of the flaring loops’ strips in comparison -with the non-flaring ones. And figure +the flaring active region, and shorter temperature periods ( less than 10 minutes, nearer to the +transverse oscillations periods) are observed in the case of the flaring loops’ strips in comparison +with the non-flaring ones. And figure 9 shows that the increasing and decreasing of temperature -range, or the difference between maximum and minimum of the temperature value (max(log( T ) )- -min(log ( T ) )), is much higher on average for the loops’ strips of the flaring AR in comparison with -the loops’ strips of the non-flaring one. +range, or the difference between maximum and minimum of the temperature value (max(log( T ) )min(log + ( T ) )), is much higher on average for the loops’ strips of the flaring AR in comparison with +the loops’ strips of the non-flaring one. V. Summery -We reported the temperature oscillations of coronal loops of a flaring active region. We selected -the flaring active region 11283 to investigate the thermal structure and treatment of its loops. This -region includes a high energy flare x2.1 and the transverse oscillations of two loops of it have been +We reported the temperature oscillations of coronal loops of a flaring active region. We selected +the flaring active region 11283 to investigate the thermal structure and treatment of its loops. This +region includes a high energy flare x2.1 and the transverse oscillations of two loops of it have been analyzed before by Jain et al. ( 2015 ). They analyzed intensity variations in the wavelength 171 in two coronal loops of this region and detected obvious transverse oscillation with periods of roughly 2 minutes and decay times of 5 minutes for these loops (loops A and B in Figure. 1 b) -at the flare time. We were curious to know if the temperature va riations follow the transverse +at the flare time. We were curious to know if the temperature va riations follow the transverse oscillations of the loops, or there is any relation or correlation between them. We also wanted to -investigate the thermal fluctuations at the flare time. As a blind test to see the specific thermal -properties of the flaring loops, we selected a LOS non-flaring active region (12194), extracted three +investigate the thermal fluctuations at the flare time. As a blind test to see the specific thermal +properties of the flaring loops, we selected a LOS non-flaring active region (12194), extracted three segments of its loops and analyzed their thermal treatment. Then we compared the temperature -treatment of the loops at the flaring region with the loops of the non-flaring region to see the -differences. We were eager to observe the probable discrepa ncies between flaring and non-flaring +treatment of the loops at the flaring region with the loops of the non-flaring region to see the +differences. We were eager to observe the probable discrepa ncies between flaring and non-flaring loops in this respect. -Here we used data of three loops of the flaring active region (A R11283) around the time of the -Flare X2.1, from 22:10UT till 23:00UT on 2011 September 6, plus three loops of the non-flaring -active region (AR12194), from 08:00:00UT till 09:00:00UT of 2014 October 26 (marked in figures -1 and 2 ). To calculate the time series of the loop temperature value s, we first extracted the loop +Here we used data of three loops of the flaring active region (A R11283) around the time of the +Flare X2.1, from 22:10UT till 23:00UT on 2011 September 6, plus three loops of the non-flaring +active region (AR12194), from 08:00:00UT till 09:00:00UT of 2014 October 26 (marked in figures +1 and 2 ). To calculate the time series of the loop temperature value s, we first extracted the loop pixels in each image and then displayed the loop straightly f or all the images in the time series of different wavelengths. To do thermal analysis, we used the spatially-synthesized Gaussian -DEM forward-fitting method founded by Aschwanden et al. ( 2015 ). We calculated the peak +DEM forward-fitting method founded by Aschwanden et al. ( 2015 ). We calculated the peak temperatures for each strip of the loops. Then we applied the Lomb-Scargle method to analyze temperature oscillations of the time-series for each strip of the loops. We observed temperature oscillations which are following the transverse loop oscillations -observed by Jain et al. ( 2015 ) for the flaring loops. Furthermore, the temperature oscillations in -these flaring loops happen before the transverse oscillations start and continue even in the time +observed by Jain et al. ( 2015 ) for the flaring loops. Furthermore, the temperature oscillations in +these flaring loops happen before the transverse oscillations start and continue even in the time duration after the transverse oscillations decay. As obser ved, the temperature oscillations do not decay as rapidly as the transverse oscillations do. Conversely, the strips’ temperatures increase -at the end of the oscillating mode and a rather sensible rise is observed in the final temperatures +at the end of the oscillating mode and a rather sensible rise is observed in the final temperatures of the f-loops’ segments. The ranges of the obtained periods are from 7 min. to 28.4 min. for the -flaring loops, and from 8.5 min. to 30 min. for the non-flaring loops. With the onset of X-flare in -the F-loopA, which has a distinct transverse oscillation in the flaring time with period of roughly +flaring loops, and from 8.5 min. to 30 min. for the non-flaring loops. With the onset of X-flare in +the F-loopA, which has a distinct transverse oscillation in the flaring time with period of roughly 2 minutes and decay time of 5 minutes, a temperature oscillation is observed with periods of roughly 10 to 28.5 minutes in different segments of this loop . And as the transverse oscillation -decays in this interval, no special definite decay is observe d in its temperature oscillations. -The temperature periods of the flaring loops are rather shorter than the temperature periods -of the non-flaring loops. The loops of the flaring region show some short temperature oscillations +decays in this interval, no special definite decay is observe d in its temperature oscillations. +The temperature periods of the flaring loops are rather shorter than the temperature periods +of the non-flaring loops. The loops of the flaring region show some short temperature oscillations periods in which some are less than 10 minutes (Table 1 ). These kind of short periods are more -frequently observed for the loops of the flaring active region and in the case of the non-flaring -ones, are very scarce. We observed that the periods of the flar ing loops have more diversity -than those of the non-flaring ones. Based on our confined obser vations, the non-flaring loops’ +frequently observed for the loops of the flaring active region and in the case of the non-flaring +ones, are very scarce. We observed that the periods of the flar ing loops have more diversity +than those of the non-flaring ones. Based on our confined obser vations, the non-flaring loops’ periods are longer and their temperatures’ values are totally lower. So our research showed that -thermal structures of the flaring loops differ from the non-flaring ones in the ways described -above. As temperature maps show, the temperature fluctuations are increasing at the flaring time -and around 20 min. after, in the flaring loops. This happens with some oscillations in strips’ -temperature. Conversely, it seems that different strips of the non-flaring loops have relatively -more similar temperature fluctuations. The temperatures ar e either higher in average in the flar- -ing loops’ segments as expected. The significances of the per iods, obtained by the Lomb-Scargle -method, are calculated for each strip of each loop and the results show that these significances -for the loops’ strips of the flaring region are high and close to one, while for the loops’ strips of -the non-flaring region are less than 0.5. Hence the detected p eriods in the flaring loops’ strips -have high significances (near to one) and are oscillations. Whereas the detected periods in the -non-flaring loops’ strips have less significances in comparison with the flaring ones, and maybe -they are just fluctuations. +thermal structures of the flaring loops differ from the non-flaring ones in the ways described +above. As temperature maps show, the temperature fluctuations are increasing at the flaring time +and around 20 min. after, in the flaring loops. This happens with some oscillations in strips’ +temperature. Conversely, it seems that different strips of the non-flaring loops have relatively +more similar temperature fluctuations. The temperatures ar e either higher in average in the flaring + loops’ segments as expected. The significances of the per iods, obtained by the Lomb-Scargle +method, are calculated for each strip of each loop and the results show that these significances +for the loops’ strips of the flaring region are high and close to one, while for the loops’ strips of +the non-flaring region are less than 0.5. Hence the detected p eriods in the flaring loops’ strips +have high significances (near to one) and are oscillations. Whereas the detected periods in the +non-flaring loops’ strips have less significances in comparison with the flaring ones, and maybe +they are just fluctuations. Using this method for the coronal loops showed that the oscillation modes obtained for the -temperatures of the flaring loops are very close to those of the spatial slow-mode oscillations of +temperatures of the flaring loops are very close to those of the spatial slow-mode oscillations of the coronal loops. So the origin of temperature oscillation is probably slow-mode waves. These kind of oscillations often occur in hot coronal loops (log ( T ) > 6) of active regions especially the -ones associated with small (or micro-) flares ( Wang et al. ( 2021 )). The loops of our flaring active +ones associated with small (or micro-) flares ( Wang et al. ( 2021 )). The loops of our flaring active region are also hot loops with the mean temperature above this range. They also show intensity -oscillations. Hence we think the above evidence confirms the slow-mode oscillations for flaring -loops. The temperature of the non-flaring loops are lower (log ( T ) < 6) and as discussed above, -we believe that the observed oscillation-like periods in non-flaring loops should be more probably -related to the high amplitude fluctuations. -Comparing the loops of the flaring and non-flaring regions, we observed that the amplitudes -of the fluctuations show a discrepancy. Mean of the parameter (Max(log T )-Min(log T )) in the -FloopA, , FloopB1, FLoopB2, FloopC1, and FloopC2, are 1.21, 1.10, 0.81, 1.48, and 0.88, respec- -tively. And for non-flaring region, mean of (Max(log T )-Min(log T )), are 0.81, 0.62, and 0.56, for -nonfloopA, B, and C respectively. Therefore the values of the quantity mean of (Max(log T )- -Min(log T )) for these non-flaring loops show a difference from the flaring ones and are lower. -Loops of the non-flaring active region 12194 have a relatively uniform temperature at the +oscillations. Hence we think the above evidence confirms the slow-mode oscillations for flaring +loops. The temperature of the non-flaring loops are lower (log ( T ) < 6) and as discussed above, +we believe that the observed oscillation-like periods in non-flaring loops should be more probably +related to the high amplitude fluctuations. +Comparing the loops of the flaring and non-flaring regions, we observed that the amplitudes +of the fluctuations show a discrepancy. Mean of the parameter (Max(log T )-Min(log T )) in the +FloopA, , FloopB1, FLoopB2, FloopC1, and FloopC2, are 1.21, 1.10, 0.81, 1.48, and 0.88, respectively. + And for non-flaring region, mean of (Max(log T )-Min(log T )), are 0.81, 0.62, and 0.56, for +nonfloopA, B, and C respectively. Therefore the values of the quantity mean of (Max(log T )Min(log + T )) for these non-flaring loops show a difference from the flaring ones and are lower. +Loops of the non-flaring active region 12194 have a relatively uniform temperature at the beginning of the time interval, which rises slightly at its e nd. As the Solar Monitor reports in the -neighborhood of this region, the flaring active region 12192 exists of which between its multiple -flares, there is a c 4.6 class flare occurring at 9:44UT. Therefore, it could be a p ossible suggestion +neighborhood of this region, the flaring active region 12192 exists of which between its multiple +flares, there is a c 4.6 class flare occurring at 9:44UT. Therefore, it could be a p ossible suggestion that the abovementioned slight temperature rise in the loop s of AR 12194 (in the time interval -8:00 to 9:00) originated from the influence of an increase in the energy at the pre-flare conditions +8:00 to 9:00) originated from the influence of an increase in the energy at the pre-flare conditions exist in the AR 12192. -Hence as our study shows, the temperature of coronal loops of flaring AR changes in an -oscillatory manner. Compared with these non-flaring loops, the flaring loops show higher tem- -peratures on average and higher oscillation periods with higher peaks and deeper valleys. More -accurate commentary in this respect requires more extensive statistical research and broader ob- -servations. +Hence as our study shows, the temperature of coronal loops of flaring AR changes in an +oscillatory manner. Compared with these non-flaring loops, the flaring loops show higher temperatures + on average and higher oscillation periods with higher peaks and deeper valleys. More +accurate commentary in this respect requires more extensive statistical research and broader observations. + arcsecarcsec 79 154 229 304 379 454−6825118211304397 a arcsecarcsec - + 114.6 171.2 227.8 284.4 341171.4206.3241.2276.1311 Loop B1 Loop ALoop C2 Loop C1b Loop B2 -Figure 1: (a) AIA image of the AR 11283 on 2011 September 6, 22:10 UT as seen in the 171 filter. (b) Zoom-in view +Figure 1: (a) AIA image of the AR 11283 on 2011 September 6, 22:10 UT as seen in the 171 filter. (b) Zoom-in view of the area marked by a box in the left. The selected loops are distinguished in red. The loops A and B are the same loops studied by Jain et al. ( 2015) (see Fig.3a in Jain et al. ( 2015)). @@ -542,9 +542,9 @@ LogT LogT F−LoopB1 22:10 22:20 22:30 22:40 22:50 23:005.866.26.46.66.8 timeLogT -Figure 3: From up to down: The time-series of the temperature oscillat ions for the first 3 strips of Loop A (strip 1 to -3 from top to down), and the first 2 strips of LoopB1. Horizontal axis is the time and the vertical axis is the -logarithm of the temperature. The red lines mark the initial and final time of the flare x2.1. +Figure 3: From up to down: The time-series of the temperature oscillat ions for the first 3 strips of Loop A (strip 1 to +3 from top to down), and the first 2 strips of LoopB1. Horizontal axis is the time and the vertical axis is the +logarithm of the temperature. The red lines mark the initial and final time of the flare x2.1. 22:10 22:20 22:30 22:40 22:50 23:000 11213242 F−loopA Time Loop Length(Mm) 5.866.26.46.66.8 @@ -560,19 +560,19 @@ Time Loop Length(Mm) 22:10 22:20 22:30 22:40 22:50 23:000 3 6 8 11 F−loopC2 Time Loop Length(Mm) 5.866.26.46.66.8 -Figure 4: Temperature map of the flaring loops A, B1, B2, C1, and C2 (from top to down) as a time series. The vertical +Figure 4: Temperature map of the flaring loops A, B1, B2, C1, and C2 (from top to down) as a time series. The vertical axis is the distance along the loop in Mm, and the horizontal axis is the time. The colorbar in the left shows the colors considered for the temperature range. -Table 1: The properties observed for the loop segments of the flaring A R. +Table 1: The properties observed for the loop segments of the flaring A R. FLoopA (Strip Number) The highest Temp.’s period -observed Max(log(T))- -Min(log(T)) FLoopB2 +observed Max(log(T))Min(log(T)) + FLoopB2 (Strip Number) The highest Temp.’s period -observed Max(log(T))- -Min(log(T)) +observed Max(log(T))Min(log(T)) + 1 9.94 1.09 1 18.07 0.68 2 16.57 0.79 2 24.85 0.83 3 8.46 0.65 3 24.85 0.85 @@ -581,8 +581,8 @@ Min(log(T)) 6 24.85 0.76 6 8.28 0.93 7 22.09 0.58 7 18.07 0.84 8 18.07 1.55 8 28.4 0.73 -9 18.07 1.6 FLoopC1 - - -10 12.42 1.57 1 28.4 1.46 +9 18.07 1.6 FLoopC1 - 10 + 12.42 1.57 1 28.4 1.46 11 12.42 1.42 2 22.09 1.34 12 24.85 1.56 3 16.57 1.36 13 19.88 1.6 4 28.04 1.49 @@ -594,8 +594,8 @@ Min(log(T)) 19 8.64 0.95 10 16.57 1.6 20 16.57 1.54 11 16.57 1.6 21 7.36 1.18 12 9.46 1.13 -22 7.36 1.51 FLoopC1 - - -23 18.07 1.58 1 18.07 0.88 +22 7.36 1.51 FLoopC1 - 23 + 18.07 1.58 1 18.07 0.88 24 22.09 1.33 2 28.4 0.8 25 24.85 0.72 3 15.29 0.87 FLoopB1 - - 4 16.57 0.93 @@ -610,12 +610,12 @@ FLoopB1 - - 4 16.57 0.93 9 11.04 1.6 10 18.07 1.6 11 18.07 1.6 -Table 2: The properties observed for the loop segments of the non flari ng AR. +Table 2: The properties observed for the loop segments of the non flari ng AR. Nonf-LoopA (Strip Number) The highest Temp.’s period -observed Max(log(T))- -Min(log(T)) +observed Max(log(T))Min(log(T)) + 1 24 0.61 2 30 0.95 3 30 0.81 @@ -630,8 +630,8 @@ Min(log(T)) Nonf-LoopB (Strip Number) The highest Temp.’s period -observed Max(log(T))- -Min(log(T)) +observed Max(log(T))Min(log(T)) + 1 26.66 0.36 2 26.66 0.64 3 10.43 0.45 @@ -641,8 +641,8 @@ Min(log(T)) Nonf-LoopC (Strip Number) The highest Temp.’s period -observed Max(log(T))- -Min(log(T)) +observed Max(log(T))Min(log(T)) + 1 26.66 0.76 2 26.66 0.75 3 26.66 0.26 @@ -653,8 +653,8 @@ Time Loop Length(Mm) 00.020.040.060.080.10.120.140.160.180.2 22:10 22:20 22:30 22:40 22:50 23:0000.10.20.30.40.50.60.70.80.91 Int−Fe−LoopA TimeNormalized Intensity Fe XVIII -Figure 5: Normalized intensity map of the flaring loop A for the wavelen gth Fe XV I I I, and mean intensity of Fe -XV I I I (from top to down). The vertical axis is the distance al ong the loop in Mm for the first plot, and +Figure 5: Normalized intensity map of the flaring loop A for the wavelen gth Fe XV I I I, and mean intensity of Fe +XV I I I (from top to down). The vertical axis is the distance al ong the loop in Mm for the first plot, and normalized intensity for the second. The horizontal axis is the time. The colorbar in the left shows the colors considered for the Intensity range. VI. acknowledgements @@ -669,8 +669,8 @@ LogT NonF−LoopA LogT NonF−LoopB 8:00 8:10 8:20 8:30 8:40 8:50 9:005.866.26.46.66.8 timeLogT -Figure 6: from top to down: The time-series of the temperature for the first 2 strips (from top to down) of the non- -flaring Loops A and B. Horizontal axis is the time and the verti cal axis is the logarithm of the temperature. +Figure 6: from top to down: The time-series of the temperature for the first 2 strips (from top to down) of the nonflaring + Loops A and B. Horizontal axis is the time and the verti cal axis is the logarithm of the temperature. 8:10 8:20 8:30 8:40 8:50 9:000 5 101520 NonF−loopA Time Loop Length(Mm) 5.866.26.46.66.8 @@ -680,20 +680,20 @@ Time Loop Length(Mm) 8:10 8:20 8:30 8:40 8:50 9:000 3 5 8 10 NonF−loopC Time Loop Length(Mm) 5.866.26.46.66.8 -Figure 7: from top to down: Temperature map of the non-flaring loops A, B and C as a time-series. The vertical axis +Figure 7: from top to down: Temperature map of the non-flaring loops A, B and C as a time-series. The vertical axis is the distance along the loop in Mm, and the horizontal axis i s the time. The color-bar in the left shows the colors considered for the temperature range. 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 3000.050.10.150.20.250.30.350.4 Temp. Period (min)Percentage of Temp. Periods -Figure 8: Hisogram of the temperature periods percentages for the loops’ strips of the flaring (blue bars) and non- -flaring (red bars) ARs. The horizontal axis shows the temperature periods in minute. +Figure 8: Hisogram of the temperature periods percentages for the loops’ strips of the flaring (blue bars) and nonflaring + (red bars) ARs. The horizontal axis shows the temperature periods in minute. 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 1.1 1.2 1.3 1.4 1.5 1.6 1.7024681012 max(log(T))−min(log(T))Number -Figure 9: Hisogram of the parameter of (max(log(T))-min(log(T))) for each strip of the loops of the flaring (blue bars) -and non-flaring (red bars) ARs. +Figure 9: Hisogram of the parameter of (max(log(T))-min(log(T))) for each strip of the loops of the flaring (blue bars) +and non-flaring (red bars) ARs. R eferences Abedini, A., Safari, H., & Nasiri, S. 2012, Solar Physics, 28 0 -Anfinogentov, S., Nakariakov, V. M., Mathioudakis, M., Van Doorsselaere, T., & Kowalski, A. F. +Anfinogentov, S., Nakariakov, V. M., Mathioudakis, M., Van Doorsselaere, T., & Kowalski, A. F. 2013, ApJ, 773, 156 Aschwanden, M., B. P. S. C. M. A. 2013, Solar Physics, 283, 5 Aschwanden, M. J. 2006, Philosophical Transactions of the Royal Society of London Series A, 364, @@ -763,4 +763,4 @@ Wang, T., & Ofman, L. 2019, ApJ, 886, 2 Wang, T., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M . 2015, ApJ, 811, L13 Wang, T., Ofman, L., Yuan, D., et al. 2021, Space Science Reviews, 217 Warren, H. P., Winebarger, A. R., & Brooks, D. H. 2012, ApJ, 75 9, 141 -Wills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 19 0, 467 +Wills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 19 0, 467 \ No newline at end of file diff --git a/read/results/playa/GeoTopo-book.txt b/read/results/playa/GeoTopo-book.txt index 4a00ee3..2e39a2a 100644 --- a/read/results/playa/GeoTopo-book.txt +++ b/read/results/playa/GeoTopo-book.txt @@ -1,12 +1,12 @@ Einführung in die Geometrie und Topologie -0. Auflage, 31. Dezember 2016 Martin Thoma +0. Auflage, 31. Dezember 2016 Martin Thoma Vorwort Dieses Skript wurde im Wintersemester 2013/2014 von Martin Thoma geschrieben. Es beinhaltet die Mitschriften aus der Vorlesung von Prof. Dr. Herrlich sowie die Mitschriften einiger Übungen und Tutorien. -Das Skript ist kostenlos übermartin-thoma.com/geotopoverfügbar. Wer es gerne in A5 (Schwarz- -Weiß, Ringbindung) für 10 Euro hätte, kann mir eine E-Mail schicken (info@martin-thoma.de). +Das Skript ist kostenlos übermartin-thoma.com/geotopoverfügbar. Wer es gerne in A5 (SchwarzWeiß, + Ringbindung) für 10 Euro hätte, kann mir eine E-Mail schicken (info@martin-thoma.de). Danksagungen An dieser Stelle möchte ich Herrn Prof. Dr. Herrlich für einige Korrekturvorschläge und einen gut strukturierten Tafelanschrieb danken, der als Vorlage für dieses Skript diente. Tatsächlich @@ -15,30 +15,30 @@ Abschnitte konnten direkt mit LA T EX umgesetzt werden. Vielen Dank für die Erlaubnis, Ihre Inhalte in diesem Skript einbauen zu dürfen! -Vielen Dank auch an Frau Lenz und Frau Randecker, die es mir erlaubt haben, ihre Übungsauf- -gaben und Lösungen zu benutzen. +Vielen Dank auch an Frau Lenz und Frau Randecker, die es mir erlaubt haben, ihre Übungsaufgaben + und Lösungen zu benutzen. Jérôme Urhausen hat durch viele Verbesserungsvorschläge und Beweise zu einer erheblichen Qualitätssteigerung am Skript beigetragen und meine Tutorin Sarah hat mir viele Fragen per E-Mail und nach dem Tutorium beantwortet. Danke! Was ist Topologie? -Die Kugeloberfläche S 2 - lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche -oder der Oberfläche einer Pyramide verformen, aber nicht zumR 2 +Die Kugeloberfläche S 2 + lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche +oder der Oberfläche einer Pyramide verformen, aber nicht zumR 2 oder zu einem Torus T 2 . Für den R 2 - müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein + müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein Loch machen. Erforderliche Vorkenntnisse Es wird ein sicherer Umgang mit den Quantoren (∀, ∃), Mengenschreibweisen (∪, ∩, \ , ∅ , R , P (M )) -und ganz allgemein formaler Schreibweise vorausgesetzt. Auch die Beweisführung mittels Wider- -spruchsbeweisen sollte bekannt sein und der Umgang mit komplexen Zahlen C , deren Betrag, +und ganz allgemein formaler Schreibweise vorausgesetzt. Auch die Beweisführung mittels Widerspruchsbeweisen + sollte bekannt sein und der Umgang mit komplexen Zahlen C , deren Betrag, Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werden vor allem in „Analysis I“ vermittelt. -Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit, +Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit, der Spektralsatz und der pro jektive RaumP (R) aus „Lineare Algebra I“ bekannt sind. In „Lineare -Algebra II“ wird der Begriff der Orthonormalbasis eingeführt. -iii +Algebra II“ wird der Begriff der Orthonormalbasis eingeführt. + (a) S 2 (b)Würfel (c)Pyramide y @@ -49,7 +49,7 @@ Abbildung 0.1:Beispiele für verschiedene Formen Obwohl es nicht vorausgesetzt wird, könnte es von Vorteil sein „Einführung in die Algebra und Zahlentheorie“ gehört zu haben. Inhaltsverzeichnis -1 Topologische Grundbegriffe2 +1 Topologische Grundbegriffe2 1.1 Topologische Räume. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .2 1.2 Metrische Räume. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .6 1.3 Stetigkeit. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .9 @@ -59,7 +59,7 @@ Inhaltsverzeichnis Übungsaufgaben. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .22 2 Mannigfaltigkeiten und Simplizialkomplexe24 2.1 Topologische Mannigfaltigkeiten. . . . . . . . . . . . . . . . . . . . . . . . . . . .24 -2.2 Differenzierbare Mannigfaltigkeiten. . . . . . . . . . . . . . . . . . . . . . . . . .29 +2.2 Differenzierbare Mannigfaltigkeiten. . . . . . . . . . . . . . . . . . . . . . . . . .29 2.3 Simplizialkomplex. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .34 Übungsaufgaben. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .43 3 Fundamentalgruppe und Überlagerungen44 @@ -81,13 +81,13 @@ Inhaltsverzeichnis Lösungen der Übungsaufgaben99 Bildquellen 105 Abkürzungsverzeichnis106 -Ergänzende Definitionen und Sätze107 +Ergänzende Definitionen und Sätze107 Symbolverzeichnis108 -2 Inhaltsverzeichnis + Inhaltsverzeichnis Stichwortverzeichnis111 -1 Top ologische Grundb egriffe +1 Top ologische Grundb egriffe 1.1 Topologische Räume -Definition 1 +Definition 1 Ein topologischer Raum ist ein Paar (X, T) bestehend aus einer Menge X und T ⊆ P (X ) mit folgenden Eigenschaften (i) ∅, X ∈ T @@ -97,32 +97,32 @@ mit folgenden Eigenschaften 1 ∩ U 2 ∈ T (iii)Ist I eine Menge und U - i ∈ T für jedes i ∈ I , so ist + i ∈ T für jedes i ∈ I , so ist i ∈ I U i ∈ T -Die Elemente von T heißen offene Teilmengen von X . -A ⊆ X heißt abgeschlossen, wenn X \ A offen ist. -Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0 , 1) . Auch gibt es -Mengen, die sowohl abgeschlossen als auch offen sind. -Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.) +Die Elemente von T heißen offene Teilmengen von X . +A ⊆ X heißt abgeschlossen, wenn X \ A offen ist. +Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0 , 1) . Auch gibt es +Mengen, die sowohl abgeschlossen als auch offen sind. +Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.) Betrachte ∅ und X mit der trivialen Topologie T triv = { ∅ , X } . -Es gilt: X ∈ T und ∅ ∈ T, d. h. X und ∅ sind offen. Außerdem X C +Es gilt: X ∈ T und ∅ ∈ T, d. h. X und ∅ sind offen. Außerdem X C = X \ X = ∅ ∈ T und -X \ ∅ = X ∈ T, d. h. X und ∅ sind als Komplement offener Mengen abgeschlossen. +X \ ∅ = X ∈ T, d. h. X und ∅ sind als Komplement offener Mengen abgeschlossen. Beispiel 1 (Topologien) 1) X = R n mit der von der euklidischen Metrik erzeugten Topologie T Euklid : U ⊆ R n - offen ⇔ für jedes x ∈ U gibt es r > 0 , + offen ⇔ für jedes x ∈ U gibt es r > 0 , sodass B r (x ) = { y ∈ Rn | d(x, y ) < r } ⊆ U Diese Topologie wird auch „Standardtopologie des R n “ genannt. Sie beinhaltet unter -anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedli- -chem Mittelpunkt (vgl.Definition 1.ii). +anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedlichem + Mittelpunkt (vgl.Definition 1.ii). 2)Jeder metrische Raum ( X, d) ist auch ein topologischer Raum. 3)Für eine Menge X heißt T Diskret = P (X ) diskrete Topologie . @@ -131,9 +131,9 @@ Z := { U ⊆ R | R \ U endlich } ∪ { ∅ } heißt Zariski-Topologie Beobachtungen: • U ∈ T Z ⇔ ∃f ∈ R [X ] , sodass R \ U = V (f ) = { x ∈ R | f (x ) = 0 } -• Es gibt keine disjunkten offenen Mengen in T +• Es gibt keine disjunkten offenen Mengen in T Z . -4 1.1. TOPOLOGISCHE RÄUME + 1.1. TOPOLOGISCHE RÄUME 5) X := Rn , T Z = { U ⊆ R n @@ -148,7 +148,7 @@ R n r )} 6) X := { 0 , 1 } , T = { ∅ , { 0 , 1 } , { 0 } } heißt Sierpińskiraum. ∅ , { 0, 1 } , { 1 } sind dort alle abgeschlossenen Mengen. -Definition 2 +Definition 2 Sei ( X, T) ein topologischer Raum und x ∈ X . Eine Teilmenge U ⊆ X heißt Umgebung von x , wenn es ein U 0 ∈ T gibt mit x ∈ U @@ -156,14 +156,14 @@ Eine Teilmenge U ⊆ X heißt Umgebung von x , wenn es ein U U 0 ⊆ U . Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokal gilt. -Definition 3 +Definition 3 Sei (X, T) ein topologischer Raum und M ⊆ X eine Teilmenge. a) M ◦ - := { x ∈ M | M ist Umgebung von x } = + := { x ∈ M | M ist Umgebung von x } = U ⊆M -U ∈ T U heißt Inneres oder offener +U ∈ T U heißt Inneres oder offener Kern von M . -b) M := +b) M := M ⊆A A abgeschlossenA heißt abgeschlossene Hülle oder Abschluss von M . c) ∂ M := @@ -176,7 +176,7 @@ Beispiel 2 2)Sei X = R und M = (a, b). Dann gilt: M = [a, b] 3)Sei X = R , T = T Z und M = (a, b) . Dann gilt: M = R -Definition 4 +Definition 4 Sei ( X, T) ein topologischer Raum. a) B ⊆ T heißt Basis der Topologie T , wenn jedes U ∈ T Vereinigung von Elementen aus B ist. @@ -195,7 +195,7 @@ B = { B ist eine abzählbare Basis von T. 3) Sei (X, T) ein topologischer Raum mitX = { 0, 1, 2 } und T = { ∅ , { 0 } , { 0 , 1 } , { 0, 2 } , X }. Dann ist S = { ∅ , { 0 , 1 } , { 0, 2 } } eine Subbasis von T , da gilt: -5 1.1. TOPOLOGISCHE RÄUME + 1.1. TOPOLOGISCHE RÄUME •S ⊆ T •∅ , { 0, 1 } und { 0 , 2 } ∈ S •{ 0 } = { 0 , 1 } ∩ { 0 , 2 } @@ -205,7 +205,7 @@ aus S erzeugt werden kann. Bemerkung 2 Sei X eine Menge und S ⊆ P ( X ) . Dann gibt es genau eine Topologie T auf X , für die S Subbasis ist. -Definition 5 +Definition 5 Sei ( X, T) ein topologischer Raum und Y ⊆ X . T Y := { U ∩ Y | U ∈ T } ist eine Topologie auf Y . @@ -213,13 +213,13 @@ T Y heißt Teilraumtopologie und (Y , T Y ) heißt ein Teilraum von ( X, T) . Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt. -Definition 6 +Definition 6 Seien X 1 , X 2 topologische Räume. U ⊆ X 1 × X -2 sei offen, wenn es zu jedem x = ( x +2 sei offen, wenn es zu jedem x = ( x 1 , x 2 ) ∈ U Umgebungen U i um x @@ -229,13 +229,13 @@ i = 1, 2 gibt, sodass U 2 ⊆ U gilt. T = { U ⊆ X 1 × X -2 | U offen } ist eine Topologie auf X +2 | U offen } ist eine Topologie auf X 1 × X 2 . Sie heißt Produkttopologie. B = { U 1 × U 2 | U - i offen in X + i offen in X i , i = 1, 2 } ist eine Basis von T. U x @@ -270,24 +270,24 @@ R 2 1 × U 2 (SieheAbbildung 1.2) -6 1.1. TOPOLOGISCHE RÄUME + 1.1. TOPOLOGISCHE RÄUME U 1 = R \ NU 2 = R \ N Abbildung 1.2:Zariski-Topologie auf R 2 -Definition 7 +Definition 7 Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X , X = X/ ∼ sei die Menge der Äquivalenzklassen, π : X → X , x → [ x] ∼ . T -X := +X := U ⊆ - X + X π − 1 (U ) ∈ T -X +X (X , T X ) heißt Quotiententopologie . Beispiel 5 @@ -318,15 +318,15 @@ X = R n+1 Ursprungsgerade X = P n ( R ) -7 1.2. METRISCHE RÄUME + 1.2. METRISCHE RÄUME Also für n = 1: −4 −2 2 4 6 8 −4−224 1.2 Metrische Räume -Definition 8 +Definition 8 Sei X eine Menge. Eine Abbildung d : X × X → R + 0 heißt Metrik , wenn gilt: -(i)Definitheit: d(x, y ) = 0 ⇔ x = y ∀x, y ∈ X +(i)Definitheit: d(x, y ) = 0 ⇔ x = y ∀x, y ∈ X (ii)Symmetrie: d(x, y ) = d( y, x ) ∀x, y ∈ X (iii)Dreiecksungleichung: d(x, z ) ≤ d( x, y ) + d(y, z ) ∀x, y, z ∈ X Das Paar (X, d) heißt ein metrischer Raum . @@ -337,7 +337,7 @@ r (x ) := { y ∈ X | d(x, y ) < r } für x ∈ X, r ∈ R + B = { B r ( x ) ⊆ P ( X ) | x ∈ X, r ∈ R + } ist Basis einer Topologie auf X . -Definition 9 +Definition 9 Seien ( X, d X ) und (Y , d Y ) metrische Räume und ϕ : X → Y eine Abbildung mit @@ -353,15 +353,15 @@ Y ( ϕ( x Dann heißt ϕ eine Isometrie von X nach Y . Beispiel 8 (Skalarprodukt erzeugt Metrik) Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt·, ·. Dann wird V -durch d(x, y ) := +durch d(x, y ) := x − y, x − y zum metrischen Raum. Beispiel 9 (diskrete Metrik) Sei X eine Menge. Dann heißt - d(x, y ) = + d(x, y ) = 0 falls x = y 1 falls x = y die diskrete Metrik. Die Metrik d induziert die diskrete Topologie . -8 1.2. METRISCHE RÄUME + 1.2. METRISCHE RÄUME Beispiel 10 X = R 2 und d ((x @@ -381,38 +381,38 @@ r (a) B r (0) (b)Euklidische Topologie Abbildung 1.3:Veranschaulichungen zur Metrik d ausBeispiel 10 -9 1.2. METRISCHE RÄUME + 1.2. METRISCHE RÄUME Beispiel 11 (SNCF-Metrik1 ) X = R 2 −4 −2 2 4 6 8 −4−224 -Definition 10 -Ein topologischer Raum X heißt hausdorffsch , wenn es für je zwei Punkte x = y in X +Definition 10 +Ein topologischer Raum X heißt hausdorffsch , wenn es für je zwei Punkte x = y in X Umgebungen U x um x und U y um y gibt, sodass U x ∩ U y = ∅ . Bemerkung 4 (Trennungseigenschaft) -Metrische Räume sind hausdorffsch, wegen +Metrische Räume sind hausdorffsch, wegen d( x, y ) > 0 ⇒ ∃ε > 0 : B ε ( x) ∩ B ε (y ) = ∅ -Beispiel 12 (Topologische Räume und Hausdorff-Räume) +Beispiel 12 (Topologische Räume und Hausdorff-Räume) 1) (R , T -Z ) ist ein topologischer Raum, der nicht hausdorffsch ist. +Z ) ist ein topologischer Raum, der nicht hausdorffsch ist. 2) (R , T -Euklid ) ist ein topologischer Hausdorff-Raum. -Bemerkung 5 (Eigenschaften von Hausdorff-Räumen) +Euklid ) ist ein topologischer Hausdorff-Raum. +Bemerkung 5 (Eigenschaften von Hausdorff-Räumen) Seien X, X 1 , X -2 Hausdorff-Räume. -a)Jeder Teilraum von X ist hausdorffsch. +2 Hausdorff-Räume. +a)Jeder Teilraum von X ist hausdorffsch. b) X 1 × X -2 ist hausdorffsch (vgl.Abbildung 1.4). -Definition 11 +2 ist hausdorffsch (vgl.Abbildung 1.4). +Definition 11 Sei X ein topologischer Raum und ( x ) n∈N eine Folge in X . x ∈ X heißt Grenzwert oder Limes von ( x @@ -422,10 +422,10 @@ n ) , wenn es für jede Umgebung U von x ein n n ≥ n 0 . Bemerkung 6 -Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert. +Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert. Beweis: Sei ( x n ) eine konvergierende Folge und x und y Grenzwerte der Folge. -Da X hausdorffsch ist, gibt es Umgebungen U +Da X hausdorffsch ist, gibt es Umgebungen U x von x und U y von y mit U x ∩ U @@ -437,10 +437,10 @@ x = y . Da (x x ∩ U y für alle n ≥ n 0 -⇒ x = y +⇒ x = y 1 Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt. -10 1.3. STETIGKEIT + 1.3. STETIGKEIT (x 1 , y 1 ) (x @@ -458,11 +458,11 @@ U 2 Abbildung 1.4:Wenn X 1 , X -2 hausdorffsch sind, dann auch X +2 hausdorffsch sind, dann auch X 1 × X 2 1.3 Stetigkeit -Definition 12 +Definition 12 Seien ( X, T X ) , (Y , T Y ) topologische Räume und f : X → Y eine Abbildung. @@ -482,10 +482,10 @@ alle y ∈ X mit d(x, y ) < δ gilt d Y (f ( x ), f (y )) < ε. Beweis: „ ⇒“: Sei x ∈ X, ε > 0 gegeben und U := B ε (f ( x )). -Dann ist U offen in Y . +Dann ist U offen in Y . Def. 12.a =====⇒ f −1 - ( U ) ist offen in X . Dann ist x ∈ f − 1 + ( U ) ist offen in X . Dann ist x ∈ f − 1 ( U ). ⇒ ∃δ > 0, sodass B δ (x ) ⊆ f − 1 @@ -494,7 +494,7 @@ Def. 12.a δ ( x )) ⊆ U ⇒ { y ∈ X | d X ( x, y ) < δ } ⇒ Beh. -„ ⇐“: Sei U ⊆ Y offen, X ∈ f −1 +„ ⇐“: Sei U ⊆ Y offen, X ∈ f −1 (U ). Dann gibt es ε > 0 , sodass B ε (f ( x )) ⊆ U @@ -506,7 +506,7 @@ Vor. δ (x ) ⊆ f − 1 (B ε ( f (x ))) ⊆ f −1 - (U ) + (U ) Bemerkung 8 Seien X, Y topologische Räume und f : X → Y eine Abbildung. Dann gilt: f ist stetig @@ -517,7 +517,7 @@ Beispiel 13 (Stetige Abbildungen und Homöomorphismen) X : X → X ist Homöomorphismus. 2 Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt. -11 1.3. STETIGKEIT + 1.3. STETIGKEIT 2) Ist ( Y , T Y ) trivialer topologischer Raum, d. h. T Y = T @@ -533,25 +533,25 @@ R g Abbildung 1.5:Beispiel einer stetigen Funktion f , deren Umkehrabbildung g nicht stetig ist. Die Umkehrabbildung g ist nicht stetig, da g − 1 - (U ) nicht offen ist (vgl.Abbildung 1.5). + (U ) nicht offen ist (vgl.Abbildung 1.5). Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig) Seien X, Y , Z topologische Räume, f : X → Y und g : Y → Z stetige Abbildungen. Dann ist g ◦ f : X → Z stetig. X f - + g ◦f Y g Z -Beweis: Sei U ⊆ Z offen ⇒ ( g ◦ f ) − 1 +Beweis: Sei U ⊆ Z offen ⇒ ( g ◦ f ) − 1 ( U ) = f −1 ( g −1 ( U )) . g − 1 - ( U ) ist offen in Y weil g stetig + ( U ) ist offen in Y weil g stetig ist, f − 1 ( g − 1 - ( U )) ist offen in X , weil f stetig ist. + ( U )) ist offen in X , weil f stetig ist. Bemerkung 10 a)Für jeden topologischen Raum X ist Homöo(X ) := { f : X → X | f ist Homöomorphismus } @@ -570,18 +570,18 @@ Y : X × Y → Y die Pro jektionen Wird X × Y mit der Produkttopologie versehen, so sind π X und π Y stetig. -Beweis: Sei U ⊆ X offen +Beweis: Sei U ⊆ X offen ⇒ π − 1 -X ( U ) = U × Y ist offen in X × Y . +X ( U ) = U × Y ist offen in X × Y . Bemerkung 12 Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X , X = X/ ∼ der Bahnenraum versehen mit der Quotiententopologie, π : X → X , x → [x ] ∼ . Dann ist π stetig. -12 1.4. ZUSAMMENHANG -Beweis: Nach Definition ist U ⊆ X offen ⇔ π − 1 - ( U ) ⊆ X offen. + 1.4. ZUSAMMENHANG +Beweis: Nach Definition ist U ⊆ X offen ⇔ π − 1 + ( U ) ⊆ X offen. Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird. Beispiel 14 (Stereographische Pro jektion) R n @@ -589,19 +589,19 @@ R n \ { N } sind homöomorph für beliebiges N ∈ S n . Es gilt: S n - = - x ∈ R n+1 + = + x ∈ R n+1 - x = 1 -= - x ∈ R n+1 + x = 1 += + x ∈ R n+1 n+1 i =1 x 2 -i = 1 +i = 1 O. B. d. A. sei N =    @@ -623,7 +623,7 @@ f : S n \ { N } → R n P → genau ein Punkt - + L P ∩ H wobei R n @@ -641,7 +641,7 @@ wobei R n x n+1   - ∈ R n+1 + ∈ R n+1 @@ -674,8 +674,8 @@ und H in genau einem Punkt ˆ P . Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig. 1.4 Zusammenhang -Definition 13 -a) Ein Raum X heißt zusammenhängend, wenn es keine offenen, nichtleeren Teilmengen +Definition 13 +a) Ein Raum X heißt zusammenhängend, wenn es keine offenen, nichtleeren Teilmengen U 1 , U 2 von X gibt mit U @@ -685,7 +685,7 @@ U 2 = X . b) Eine Teilmenge Y ⊆ X heißt zusammenhängend, wenn Y als topologischer Raum mit der Teilraumtopologie zusammenhängend ist. -13 1.4. ZUSAMMENHANG + 1.4. ZUSAMMENHANG x yz N ˆ @@ -721,14 +721,14 @@ betrachten wir V Rn V . Somit gilt U 1 ∩ [ x, y ] ∈ T - V wegen der Definition der Teilraumtopologie. + V wegen der Definition der Teilraumtopologie. Dann gibt es z ∈ [ x, y ] mit z ∈ ∂ ( U 1 ∩ [ x, y ]) , aber z /∈ U 1 ⇒ z ∈ U 2 . In jeder Umgebung von z liegt ein Punkt von U 1 ⇒ Widerspruch zu U -2 offen. +2 offen. 2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R < 0 ∪ R >0 @@ -742,9 +742,9 @@ Umgebung von z liegt ein Punkt von U 5) { x } ist zusammenhängend für jedes x ∈ X , wobei X ein topologischer Raum ist. 6) R mit Zariski-Topologie ist zusammenhängend. Bemerkung 14 -Sei X ein topologischer Raum und A ⊆ X zusammenhängend. Dann ist auch A zusammen- -hängend. -14 1.4. ZUSAMMENHANG +Sei X ein topologischer Raum und A ⊆ X zusammenhängend. Dann ist auch A zusammenhängend. + + 1.4. ZUSAMMENHANG Beweis: durch Widerspruch Annahme: A = A 1 ∪ A @@ -755,14 +755,14 @@ Annahme: A = A 2 = ∅ ⇒ A = (A ∩ A 1 ) - + abgeschlossen ˙ ∪ ( A ∩ A 2 ) - + abgeschlossen - + disjunkt Wäre A ∩ A 1 = ∅ @@ -780,7 +780,7 @@ Wäre A ∩ A ⇒ A ∩ A 1 = ∅ und analog A ∩ A 2 = ∅ -⇒ Widerspruch zu A ist zusammenhängend. +⇒ Widerspruch zu A ist zusammenhängend. Bemerkung 15 Sei X ein topologischer Raum und A, B ⊆ X zusammenhängend. Ist A ∩ B = ∅ , dann ist A ∪ B zusammenhängend. @@ -788,12 +788,12 @@ Beweis: Sei A ∪ B = U 1 ˙ ∪ U 2 , U -i = ∅ offen +i = ∅ offen o. B. d. A. ======⇒ A = (A ∩ U 1 ) ˙ ∪ (A ∩ U - 2 ) offen + 2 ) offen A zhgd. ====⇒ A ∩ U 1 = ∅ @@ -803,17 +803,17 @@ A ∩B =∅ B = (B ∩ U 1 ) - + =U 1 ∪ ( B ∩ U 2 ) - + = ∅ ist unerlaubte Zerlegung. - -Definition 14 + +Definition 14 Sei X ein topologischer Raum. -Für x ∈ X sei Z (x ) ⊆ X definiert durch -Z ( x) := +Für x ∈ X sei Z (x ) ⊆ X definiert durch +Z ( x) := A ⊆X zhgd. x ∈ AA Z ( x ) heißt Zusammenhangskomponente . @@ -823,7 +823,7 @@ a) Z ( x) ist die größte zusammenhängende Teilmenge von X , die x enthält. b) Z ( x) ist abgeschlossen. c) X ist disjunkte Vereinigung von Zusammenhangskomponenten. Beweis: -15 1.5. KOMPAKTHEIT + 1.5. KOMPAKTHEIT a)Sei Z ( x) = A 1 ˙ ∪ A @@ -835,23 +835,23 @@ O. B. d. A. sei x ∈ A die auch x enthält. ⇒ A = (A ∩ A 1 ) - + x ∪ ( A ∩ A 2 ) - + y ist unerlaubte Zerlegung. b)NachBemerkung 14ist Z (x ) zusammenhängend ⇒ Z (x ) ⊆ Z (x ) ⇒ Z ( x ) = Z (x ) c)Ist Z (y ) ∩ Z ( x) = ∅ Bem. 15 =====⇒ Z (y ) ∪ Z (x ) ist zusammenhängend. ⇒ Z (x ) ∪ Z ( y ) ⊆ Z (x ) ⇒ Z ( y ) ⊆ Z (x ) ⊆ Z (y ) ⇒ Z ( x ) ⊆ Z (y ) - + Bemerkung 17 Sei f : X → Y stetig. Ist A ⊆ X zusammenhängend, so ist f ( A ) ⊆ Y zusammenhängend. Beweis: Sei f (A ) = U 1 ∪ U 2 , U -i = ∅ , offen, disjunkt. +i = ∅ , offen, disjunkt. ⇒ f −1 (f ( A)) = f −1 (U @@ -861,25 +861,25 @@ i = ∅ , offen, disjunkt. ⇒ A = (A ∩ f − 1 ( U 1 )) - + = ∅ ∪ ( A ∩ f −1 (U 2 )) - -=∅ + +=∅ 1.5 Kompaktheit -Definition 15 +Definition 15 Sei X eine Menge und U ⊆ P (X ) . U heißt eine Überdeckung von X , wenn gilt: ∀x ∈ X : ∃M ∈ U : x ∈ M -Definition 16 -Ein topologischer Raum X heißt kompakt , wenn jede offene Überdeckung von X +Definition 16 +Ein topologischer Raum X heißt kompakt , wenn jede offene Überdeckung von X U = { U i } i ∈I mit U -i offen in X +i offen in X eine endliche Teilüberdeckung - + i ∈J ⊆IU i = X mit |J | ∈ N besitzt. @@ -887,11 +887,11 @@ Bemerkung 18 Das Einheitsintervall I := [0, 1] ist kompakt bezüglich der euklidischen Topologie. Beweis: Sei ( U i ) -i ∈ J eine offene Überdeckung von I . +i ∈ J eine offene Überdeckung von I . Es genügt zu zeigen, dass es ein δ > 0 gibt, sodass jedes Teilintervall der Länge δ von I in einem der U i enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich viele Intervalle -16 1.5. KOMPAKTHEIT + 1.5. KOMPAKTHEIT der Länge δ unterteilen und alle U i in die endliche Überdeckung aufnehmen, die Teilintervalle enthalten. @@ -907,7 +907,7 @@ Sei x n ) hat einen Häufungspunkt x ∈ [0 , 1] . Dann gibt es i ∈ J mit x ∈ U i . Da U - i offen ist, gibt es ein ε > 0 , sodass ( x − ε, x + ε ) ⊆ U + i offen ist, gibt es ein ε > 0 , sodass ( x − ε, x + ε ) ⊆ U i . Dann gibt es n 0 , sodass gilt: 1 @@ -932,14 +932,14 @@ U j 1 , . . . , U j -d ist endliche Teilüberdeckung von U . +d ist endliche Teilüberdeckung von U . Beispiel 16 (Kompakte Räume) 1) R ist nicht kompakt. 2) (0, 1) ist nicht kompakt. U n = (1 /n, 1 − 1 -/n ) ⇒ +/n ) ⇒ n ∈N U n = (0, 1) 3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch. @@ -947,16 +947,16 @@ Bemerkung 19 Sei X kompakter Raum, A ⊆ X abgeschlossen. Dann ist A kompakt. Beweis: Sei (V i ) -i ∈I offene Überdeckung von A. -Dann gibt es für jedes i ∈ I eine offene Teilmenge U +i ∈I offene Überdeckung von A. +Dann gibt es für jedes i ∈ I eine offene Teilmenge U i ⊆ X mit V i = U i ∩ A . -⇒ A ⊆ +⇒ A ⊆ i ∈ I U i ⇒ U = { U -i | i ∈ I } ∪ { X \ A } ist offene Überdeckung von X +i | i ∈ I } ∪ { X \ A } ist offene Überdeckung von X X kompakt =======⇒ es gibt i 1 , . . . , i @@ -979,25 +979,25 @@ j =1 (U i j ∩ A ) - + = V i j ∪ ((X \ A ) ∩ A ) - + = ∅ = A ⇒ V i 1 , . . . , V i n überdecken A . - + Bemerkung 20 Seien X, Y kompakte topologische Räume. Dann ist X × Y mit der Produkttopologie kompakt. Beweis: Sei ( W i ) - i ∈I eine offene Überdeckung von X × Y . Für jedes ( x, y ) ∈ X × Y gibt es -offene Teilmengen U + i ∈I eine offene Überdeckung von X × Y . Für jedes ( x, y ) ∈ X × Y gibt es +offene Teilmengen U x,y von X und V x,y von Y sowie ein i ∈ I , sodass U x,y × V @@ -1008,7 +1008,7 @@ x,y von Y sowie ein i ∈ I , sodass U 0 , da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. 4 Sogar für unendlich viele. -17 1.5. KOMPAKTHEIT + 1.5. KOMPAKTHEIT W i x @@ -1018,7 +1018,7 @@ y U x,y YX Abbildung 1.7:Die blaue Umgebung ist Schnitt vieler Umgebungen -Die offenen Mengen U +Die offenen Mengen U x 0 ,y × V x @@ -1029,7 +1029,7 @@ kompakt ist, ist auch { x 0 } × Y kompakt. Also gibt es y 1 , . . . , y m (x - 0 ) mit + 0 ) mit m (x 0 ) i =1 U @@ -1043,43 +1043,43 @@ V 0 } × Y . Sei U x - 0 := + 0 := m (x ) i =1 U x 0 ,y i . Da X kompakt ist, gibt es x 1 , . . . , x - n ∈ X mit + n ∈ X mit n j =1 U x j = X -⇒ +⇒ k -j =1 +j =1 m (x j ) -i =1 +i =1 U x j ,y i × V x j ,y - i + i + - Ein grün-oranges Kästchen⊇ X × Y -⇒ -j +⇒ +j i W i ( x j , y -i ) = X × Y +i ) = X × Y Bemerkung 21 -Sei X ein Hausdorffraum und K ⊆ X kompakt. Dann ist K abgeschlossen. -Beweis: z. Z.: Komplement ist offen +Sei X ein Hausdorffraum und K ⊆ X kompakt. Dann ist K abgeschlossen. +Beweis: z. Z.: Komplement ist offen Ist X = K , so ist K abgeschlossen in X . Andernfalls sei y ∈ X \ K . Für jedes x ∈ K seien U x bzw. V @@ -1092,7 +1092,7 @@ Kx y Da K kompakt ist, gibt es endlich viele x 1 , . . . , x - n ∈ K , sodass + n ∈ K , sodass m i =1 U x @@ -1102,29 +1102,29 @@ Sei V := n i =1 V x i -18 1.6. WEGE UND KNOTEN -⇒ V ∩ + 1.6. WEGE UND KNOTEN +⇒ V ∩ n i =1 U x - i + i = ∅ ⇒ V ∩ K = ∅ ⇒ V ist Überdeckung von y , die ganz in X \ K enthalten ist. -⇒ X \ K ist offen -Damit ist K abgeschlossen. +⇒ X \ K ist offen +Damit ist K abgeschlossen. Bemerkung 22 Seien X, Y topologische Räume, f : X → Y stetig. Ist K ⊆ X kompakt, so ist f ( K ) ⊆ Y kompakt. Beweis: Sei (V i ) -i ∈ I offene Überdeckung von f ( K ) +i ∈ I offene Überdeckung von f ( K ) f stetig ====⇒ (f − 1 ( V i )) -i ∈ I ist offene Überdeckung von K +i ∈ I ist offene Überdeckung von K Kompakt =====⇒ es gibt i 1 , . . . , i @@ -1143,7 +1143,7 @@ i i n )) überdecken f (K ). Es gilt: f (f −1 - ( V )) = V ∩ f ( X ) + ( V )) = V ∩ f ( X ) Satz 1.1 (Heine-Borel) Eine Teilmenge von R n oder Cn @@ -1154,14 +1154,14 @@ Beweis: „ ⇒“: Sei K ⊆ R n ) kompakt. Da R n und C n - hausdorffsch sind, ist K nachBemerkung 21abgeschlossen. Nach Vorausset- -zung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ K ist + hausdorffsch sind, ist K nachBemerkung 21abgeschlossen. Nach Voraussetzung + kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ K ist beschränkt. „ ⇐“ Sei A ⊆ R n (oder Cn ) beschränkt und abgeschlossen. Dann gibt es einen Würfel W = [−N , N ] × · · · × [−N , N ] - + n mal mit A ⊆ W bzw. „Polyzylinder“ Z = { (z 1 , . . . , z @@ -1172,15 +1172,15 @@ NachBemerkung 20undBemerkung 18istW kompakt, also ist A nachBemerkung 19auch kompakt. Genauso ist Z kompakt, weil { z ∈ C z | ≤ 1 } homöomorph zu - - (x, y ) ∈ R 2 - ( x, y ) ≤ 1 -ist. + (x, y ) ∈ R 2 + + ( x, y ) ≤ 1 +ist. 1.6 Wege und Knoten -Definition 17 +Definition 17 Sei X ein topologischer Raum. -19 1.6. WEGE UND KNOTEN + 1.6. WEGE UND KNOTEN a)Ein Weg in X ist eine stetige Abbildung γ : [0, 1] → X . b) γ heißt geschlossen , wenn γ (1) = γ (0) gilt. c) γ heißt einfach , wenn γ | @@ -1189,7 +1189,7 @@ Beispiel 17 Ist X diskret, so ist jeder Weg konstant, d. h. von der Form ∀x ∈ [0, 1] : γ ( x) = c, c ∈ X Denn γ ([0, 1]) ist zusammenhängend für jeden Weg γ . -Definition 18 +Definition 18 Ein topologischer Raum X heißt wegzusammenhängend, wenn es zu je zwei Punkten x, y ∈ X einen Weg γ : [0, 1] → X gibt mit γ (0) = x und γ (1) = y . Bemerkung 23 @@ -1210,19 +1210,19 @@ ein Weg von x nach y . Dann ist C := γ ([0, 1]) ⊆ X zusammenhängend, weil γ stetig ist. C = (C ∩ A 1 ) - + x ∪ ( C ∩ A 2 ) - + y ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒ Widerspruch -b)Sei X = - ( x, y ) ∈ R 2 +b)Sei X = + ( x, y ) ∈ R 2 x 2 + y 2 = 1 ∨ y = 1 + 2 · e − 1 -10 x +10 x . Abbildung 1.8averanschaulicht diesen Raum. Sei U @@ -1230,7 +1230,7 @@ Sei U 2 = X, U 1 = U 2 = ∅, U -i offen. X = C ∪ S . Dann ist C ⊆ U +i offen. X = C ∪ S . Dann ist C ⊆ U 1 oder C ⊆ U 2 , weil C und S zusammenhängend sind. @@ -1244,15 +1244,15 @@ Sei y ∈ C = U 1 enthalten ist. Aber: B ε ( y ) ∩ S = ∅ ⇒ Widerspruch ⇒ X ∪ S ist zusammenhängend, aber nicht -wegzusammenhängend. +wegzusammenhängend. Beispiel 18 (Hilbert-Kurve) -Es gibt stetige, surjektive Abbildungen [0 , 1] → [0 , 1] × [0 , 1] . Ein Beispiel ist die inAbbil- -dung 1.9dargestellte Hilbert-Kurve. -Definition 19 +Es gibt stetige, surjektive Abbildungen [0 , 1] → [0 , 1] × [0 , 1] . Ein Beispiel ist die inAbbildung + 1.9dargestellte Hilbert-Kurve. +Definition 19 Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ : [0, 1] → C ⊆ X bzw. γ : S 1 → C ⊆ X , wobei C := Bild γ . -20 1.6. WEGE UND KNOTEN + 1.6. WEGE UND KNOTEN (a)Spirale S mit Kreis C 0. 1 1 −101 X @@ -1273,21 +1273,21 @@ Zusammenhangskomponenten, von denen eine beschränkt ist und eine unbeschränkt. außen innen Jordankurve -Abbildung 1.10: Die unbeschränkte Zusammenhangskomponente wird häufig inneres, die be- -schränkte äußeres genannt. +Abbildung 1.10: Die unbeschränkte Zusammenhangskomponente wird häufig inneres, die beschränkte + äußeres genannt. Beweis: ist technisch mühsam und wird hier nicht geführt. Er kann in „Algebraische Topologie: Eine Einführung“ von R. Stöcker und H. Zieschang auf S. 301f (ISBN 978-3519122265) nachgelesen werden. Idee: Ersetze Weg C durch Polygonzug. -21 1.6. WEGE UND KNOTEN -Definition 20 + 1.6. WEGE UND KNOTEN +Definition 20 Eine geschlossene Jordankurve in R 3 heißt Knoten. Beispiel 19 (Knoten) (a)Trivialer Knoten (b)Kleeblattknoten (c)Achterknoten (d) 6 2 -Knoten Abbildung 1.11:Beispiele für verschiedene Knoten -Definition 21 +Definition 21 Zwei Knoten γ 1 , γ 2 : S 1 @@ -1308,7 +1308,7 @@ z : S 1 ein Knoten. Die Abbildung H heißt Isotopie zwischen γ 1 und γ 2 . -Definition 22 +Definition 22 Sei γ : [0, 1] → R3 ein Knoten, E eine Ebene und π : R 3 → E eine Pro jektion auf E . @@ -1332,7 +1332,7 @@ Ist (π | Satz 1.3 (Satz von Reidemeister) Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können. -22 1.6. WEGE UND KNOTEN + 1.6. WEGE UND KNOTEN (a) Ω 1 (b) Ω 2 @@ -1340,14 +1340,14 @@ durch endlich viele „Reidemeister-Züge“ ineinander überführt werden könn 3 Abbildung 1.12:Reidemeister-Züge Beweis: Durch sorgfältige Fallunterscheidung. 5 -Definition 23 +Definition 23 Ein Knotendiagramm heißt 3-färbbar, wenn jeder Bogen von D so mit einer Farbe gefärbt werden kann, dass an jeder Kreuzung eine oder 3 Farben auftreten und alle 3 Farben auftreten. Abbildung 1.13:Ein 3-gefärber Kleeblattknoten 5 Siehe „Knot Theory and Its Applications“ von Kunio Murasugi. ISBN 978-0817638177. -23 1.6. WEGE UND KNOTEN + 1.6. WEGE UND KNOTEN Übungsaufgaben Aufgabe 1 (Sierpińskiraum) Es sei X := { 0, 1 } und T @@ -1355,7 +1355,7 @@ X := { ∅ , { 0 } , X }. Dies ist der sogenannte Sierpińskiraum. (a)Beweisen Sie, dass (X, T X ) ein topologischer Raum ist. (b)Ist (X, T -X ) hausdorffsch? +X ) hausdorffsch? (c)Ist T X von einer Metrik erzeugt? Aufgabe 2 @@ -1365,14 +1365,14 @@ versehen. Zeigen Sie: (a)Jedes U a,b und jede einelementige Teilmenge von Z ist abgeschlossen. -(b) { − 1, 1 } ist nicht offen. +(b) { − 1, 1 } ist nicht offen. (c)Es gibt unendlich viele Primzahlen. Aufgabe 3 (Cantorsches Diskontinuum) Für jedes i ∈ N sei P - i := { 0, 1 } mit der diskreten Topologie. Weiter Sei P := + i := { 0, 1 } mit der diskreten Topologie. Weiter Sei P := i ∈N P i . -(a)Wie sehen die offenen Mengen von P aus? +(a)Wie sehen die offenen Mengen von P aus? (b)Was können Sie über den Zusammenhang von P sagen? Aufgabe 4 (Kompaktheit) (a)Ist GL @@ -1382,33 +1382,33 @@ Aufgabe 4 (Kompaktheit) n ( R) = { A ∈ R n× n | det(A) = 1 } kompakt? (c)Ist P (R ) kompakt? -Aufgabe 5 (Begriffe) -Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“. +Aufgabe 5 (Begriffe) +Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“. Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist, begründen Sie warum. 1)Ein Homomorphismus, der zugleich ein Homöomorphismus ist, 2)ein Homomorphismus, der kein Homöomorphismus ist, -24 1.6. WEGE UND KNOTEN + 1.6. WEGE UND KNOTEN 3)ein Homöomorphismus, der kein Homomorphismus ist -Aufgabe 6 (Begriffe) -Definieren Sie die Begriffe „Isomorphismus“, „Isotopie“ und „Isometrie“. +Aufgabe 6 (Begriffe) +Definieren Sie die Begriffe „Isomorphismus“, „Isotopie“ und „Isometrie“. 2 Mannigfaltigkeiten und Simplizialkomplexe 2.1 Topologische Mannigfaltigkeiten -Definition 24 +Definition 24 Sei ( X, T) ein topologischer Raum und n ∈ N. a) Eine n -dimensionale Karte auf X ist ein Paar ( U, ϕ) , wobei U ∈ T und ϕ : U → V -Homöomorphismus von U auf eine offene Teilmenge V ⊆ R n +Homöomorphismus von U auf eine offene Teilmenge V ⊆ R n . b) Ein n -dimensionaler Atlas A auf X ist eine Familie ( U i , ϕ i ) i ∈ I von Karten auf X , -sodass +sodass i ∈ I U i = X . c) X - heißt (topologische) n -dimensionale Mannigfaltigkeit , wenn X hausdorffsch ist, + heißt (topologische) n -dimensionale Mannigfaltigkeit , wenn X hausdorffsch ist, eine abzählbare Basis der Topologie hat und einen n -dimensionalen Atlas besitzt. Anschaulich ist also ein n -dimensionale Mannigfaltigkeit lokal dem Rn ähnlich. @@ -1416,15 +1416,15 @@ Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten) Jede n -dimensionale Mannigfaltigkeit mit n ≥ 1 ist mindestens so mächtig wie R . Beweis: Sei (X, T ) ein topologischer Raum und (U, ϕ) mit U ∈ T und ϕ : U → V ⊆ R n , wobei -V offen und ϕ ein Homöomorphismus ist, eine Karte auf X . -Da jede offene Teilmenge des R n +V offen und ϕ ein Homöomorphismus ist, eine Karte auf X . +Da jede offene Teilmenge des R n genauso mächtig ist wie der R n , ϕ als Homöomorphismus insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig sind, ist U genauso mächtig wie der R n . Da jede Mannigfaltigkeit mindestens eine Karte hat, muss jede Mannigfaltigkeit X mindestens so mächtig sein wie der Rn - . + . Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können beliebig viele Elemente haben. Bemerkung 25 @@ -1434,8 +1434,8 @@ b) Für n = m sind Rn nicht homöomorph. Zum Beweis benutzt man den „Satz von der Gebietstreue“ (Brouwer): Ist U ⊆ R n - offen und f : U → R n - stetig und injektiv, so ist f (U ) offen. + offen und f : U → R n + stetig und injektiv, so ist f (U ) offen. Ist n < m und R m homöomorph zu R n , so wäre @@ -1449,10 +1449,10 @@ f : R n 2 , . . . , x n , 0, . . . , 0) eine stetige injektive Abbildung. Also müsste f (R n - ) offen sein ⇒ Widerspruch -26 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN + ) offen sein ⇒ Widerspruch + 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN Beispiel 20 (Mannigfaltigkeiten) -1) Jede offene Teilmenge U ⊆ R n +1) Jede offene Teilmenge U ⊆ R n ist eine n -dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte. 2) C n @@ -1478,7 +1478,7 @@ i := { ( x n ) ∈ P n (R ) | x i = 0 } ∀i ∈ 0, . . . , n . Dann ist P n - (R ) = + (R ) = n i =0 U i @@ -1487,7 +1487,7 @@ und die Abbildung i → R n ( x 0 : · · · : x - n ) → + n ) → x 0 x @@ -1498,7 +1498,7 @@ x i , . . . , x n x - i + i (y 1 : · · · : y i −1 : 1 : y @@ -1519,12 +1519,12 @@ Umgebung: B 1 (0, 1) → { (1 : u : v ) | ( u, v ) < 1 } = V 1 Umgebung: B -1 (0, 1) → - (w : z : 1) +1 (0, 1) → + (w : z : 1) w 2 + z 2 - < 1 + < 1 = V 2 V @@ -1547,10 +1547,10 @@ c ) 2 c < 1 ⇒ Widerspruch 4) S n - = - x ∈ R n+1 + = + x ∈ R n+1 - x = 1 + x = 1 ist n -dimensionale Mannigfaltigkeit. Karten: D @@ -1561,7 +1561,7 @@ D i > 0 } → B 1 (0, . . . , 0 - + ∈ Rn ) C i := {( x @@ -1581,20 +1581,20 @@ i < 0 } → B 1 , . . . , x n ) → (x 1 , . . . , x - i − 1 , -1 − + i − 1 , +1 − n k =1 x 2 k , x i , . . . , x n ), oder − -1 − +1 − n k =1 x2 k für C i S n - = + = n+1 i =1 (C i ∪ D @@ -1602,34 +1602,34 @@ i ) Als kompakte Mannigfaltigkeit wird S n auch „ geschlossene Mannigfaltigkeit“ genannt. 5) [0, 1] ist keine Mannigfaltigkeit, denn: -Es gibt keine Umgebung von 0 in [0 , 1] , die homöomorph zu einem offenem Intervall +Es gibt keine Umgebung von 0 in [0 , 1] , die homöomorph zu einem offenem Intervall ist. 1 x i wird rausgenommen -27 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN + 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN 6) V - 1 = - ( x, y ) ∈ R 2 + 1 = + ( x, y ) ∈ R 2 - x · y = 0 + x · y = 0 ist keine Mannigfaltigkeit. Das Problem ist (0 , 0) . Wenn man diesen Punkt entfernt, zerfällt der Raum in 4 Zusammenhangskomponenten. Jeder R n - zerfällt jedoch in höchstens zwei Zusammen- -hangskomponenten, wenn man einen Punkt entfernt. + zerfällt jedoch in höchstens zwei Zusammenhangskomponenten, + wenn man einen Punkt entfernt. 7) V - 2 = - ( x, y ) ∈ R 2 + 2 = + ( x, y ) ∈ R 2 x 3 - = y 2 + = y 2 ist eine Mannigfaltigkeit. 8) X = (R \ { 0 }) ∪ (0 1 , 0 2 ) -U ⊆ X offen ⇔ -U offen in R \ { 0 } , falls 0 +U ⊆ X offen ⇔ +U offen in R \ { 0 } , falls 0 1 /∈ U, 0 2 ∈ U ∃ε > 0 : (−ε, ε) ⊆ U falls 0 @@ -1637,31 +1637,31 @@ U offen in R \ { 0 } , falls 0 2 ∈ U Insbesondere sind ( R \ { 0 }) ∪ { 0 1 } und ( R \ { 0 } ) ∪ { 0 -2 } offen und homöomorph +2 } offen und homöomorph zu R . -Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 0 +Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 0 1 und 0 2 . 9) GL n ( R ) ist eine Mannigfaltigkeit der Dimension n 2 - , weil offene Teilmengen von R n2 + , weil offene Teilmengen von R n2 eine Mannigfaltigkeit bilden. -Definition 25 -Seien X, Y n -dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y offen, Φ : U → V ein Ho- -möomorphismus Z = (X ˙ +Definition 25 +Seien X, Y n -dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y offen, Φ : U → V ein Homöomorphismus + Z = (X ˙ ∪ Y )/ ∼ mit der von u ∼ Φ(u) ∀u ∈ U erzeugten Äquivalenzrelation und der von ∼ induzierten Quotiententopologie. Z heißt Verklebung von X und Y längs U und V . Z besitzt einen Atlas aus n -dimensionalen -Karten. Falls Z hausdorffsch ist, ist Z eine n -dimensionale Mannigfaltigkeit. +Karten. Falls Z hausdorffsch ist, ist Z eine n -dimensionale Mannigfaltigkeit. Bemerkung 26 Sind X, Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X × Y eine Mannigfaltigkeit der Dimension n + m. -Beweis: Produkte von Karten sind Karten. +Beweis: Produkte von Karten sind Karten. Beispiel 21 Mannigfaltigkeiten mit Dimension 1: -1)Offene Intervalle, R , (0, 1) sind alle homöomorph +1)Offene Intervalle, R , (0, 1) sind alle homöomorph 2) S 1 Mannigfaltigkeiten mit Dimension 2: 1) R 2 @@ -1672,11 +1672,11 @@ Mannigfaltigkeiten mit Dimension 2: 4)oder mehr Henkel, wie z.B. der Zweifachtorus inAbbildung 2.1 Bemerkung 27 Sei n ∈ N , F : R n - → R stetig differenzierbar und X = V ( F ) := { x ∈ R n + → R stetig differenzierbar und X = V ( F ) := { x ∈ R n | F ( x) = 0 } das „vanishing set“ . Dann gilt: -28 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN + 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN Abbildung 2.1:Durch Verklebung zweier Tori entsteht ein Zweifachtorus. a) X ist abgeschlossen in R n b)Ist grad(F )(X ) = 0 ∀x ∈ X , so ist X eine Mannigfaltigkeit der Dimension n − 1. @@ -1689,23 +1689,23 @@ a) ε = 1 2 F ( y ) . Folgt B δ ( y ) ∩ V (F ) = ∅ ⇒ Rn - \ V (F ) ist offen. + \ V (F ) ist offen. b) Sei x ∈ X mit grad( F )( x ) = 0 , also o. B. d. A. ∂F ∂X 1 ( x ) = 0 , x = ( x 1 , . . . , x n ) , -x +x := ( x 2 , . . . , x n ) ∈ R n− 1 . Der Satz von der impliziten Funktion liefert nun: Es -gibt Umgebungen U von x - und differenzierbare Funktionen g : U → R , sodass +gibt Umgebungen U von x + und differenzierbare Funktionen g : U → R , sodass G : U → R n - , u → (g (u), u) eine stetige Abbildung auf eine offene UmgebungV von x + , u → (g (u), u) eine stetige Abbildung auf eine offene UmgebungV von x in X ist. - + Beispiel 22 1) F : R 3 @@ -1753,16 +1753,16 @@ a = 2 Abbildung 2.2:Rechts ist die Neilsche Parabel für verschiedene Parameter a. Daher istBemerkung 27.bnicht anwendbar, aberV (F ) ist trotzdem eine 1-dimensionale topologische Mannigfaltigkeit. -29 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN -Definition 26 -Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n -dimensionale + 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN +Definition 26 +Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n -dimensionale Mannigfaltigkeit mit Rand , wenn es einen Atlas (U i , ϕ i ) gibt, wobei U i ⊆ X -i offen und +i offen und ϕ -i ein Homöomorphismus auf eine offene Teilmenge von +i ein Homöomorphismus auf eine offene Teilmenge von R n + , 0 := { (x 1 , . . . , x @@ -1782,13 +1782,13 @@ Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten. = (c)Sphäre mit einem Loch Abbildung 2.3:Beispiele für Mannigfaltigkeiten mit Rand -Definition 27 +Definition 27 Sei X eine n -dimensionale Mannigfaltigkeit mit Rand und Atlas A . Dann heißt -∂ X := +∂ X := ( U,ϕ )∈A { x ∈ U | ϕ( x ) = 0 } Rand von X . ∂ X ist eine Mannigfaltigkeit der Dimension n − 1 . -Definition 28 +Definition 28 Sei X eine n -dimensionale Mannigfaltigkeit mit Atlas (U i , ϕ i ) @@ -1808,7 +1808,7 @@ j (U i ∩ U j ) Kartenwechsel oder Übergangsfunktion. -30 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN + 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN R n R nU i U @@ -1820,23 +1820,23 @@ i V i ϕ j Abbildung 2.4:Kartenwechsel -2.2 Differenzierbare Mannigfaltigkeiten -Definition 29 +2.2 Differenzierbare Mannigfaltigkeiten +Definition 29 Sei X eine n -dimensionale Mannigfaltigkeit mit Atlas (U i , ϕ i ) i ∈I . -a) X heißt differenzierbare Mannigfaltigkeit der Klasse C k - , wenn jede Karten- -wechselabbildung ϕ - ij , i, j ∈ I k -mal stetig differenzierbar ist. -b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannig- -faltigkeit der Klasse C ∞ +a) X heißt differenzierbare Mannigfaltigkeit der Klasse C k + , wenn jede Kartenwechselabbildung + ϕ + ij , i, j ∈ I k -mal stetig differenzierbar ist. +b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannigfaltigkeit + der Klasse C ∞ ist. -Differenzierbare Mannigfaltigkeiten der Klasse C ∞ +Differenzierbare Mannigfaltigkeiten der Klasse C ∞ werden auch glatt genannt. -Definition 30 -Sei X eine differenzierbare Mannigfaltigkeit der Klasse C k +Definition 30 +Sei X eine differenzierbare Mannigfaltigkeit der Klasse C k ( k ∈ N ∪ { ∞ }) mit Atlas A = (U i , ϕ @@ -1847,73 +1847,73 @@ i und ϕ i ◦ ϕ− 1 (i ∈ I mit U -i ∩ U = ∅ ) differenzierbar von Klasse C k +i ∩ U = ∅ ) differenzierbar von Klasse C k sind. b) Die Menge aller mit A verträglichen Karten auf X bildet einen maximalen Atlas der Klasse C k . Er heißt C k -Struktur auf X . Eine C ∞ - -Struktur heißt auch differenzierbare Struktur auf X . + -Struktur heißt auch differenzierbare Struktur auf X . Bemerkung 28 Für n ≥ 4 gibt es auf S n - mehrere verschiedene differenzierbare Strukturen, die sogenannten + mehrere verschiedene differenzierbare Strukturen, die sogenannten „exotische Sphären“ . -Definition 31 -Seien X, Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m , x ∈ X . -a) Eine stetige Abbildung f : X → Y heißt differenzierbar in x (von Klasse C k +Definition 31 +Seien X, Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m , x ∈ X . +a) Eine stetige Abbildung f : X → Y heißt differenzierbar in x (von Klasse C k ), wenn es Karten ( U, ϕ) von X mit x ∈ U und ( V , ψ ) von Y mit f ( U ) ⊆ V gibt, sodass ψ ◦ f ◦ ϕ −1 - stetig differenzierbar von Klasse C k + stetig differenzierbar von Klasse C k in ϕ (x ) ist. -b) f heißt differenzierbar (von Klasse C k - ), wenn f in jedem x ∈ X differenzierbar ist. -c) f heißt Diffeomorphismus , wenn f differenzierbar von Klasse C ∞ +b) f heißt differenzierbar (von Klasse C k + ), wenn f in jedem x ∈ X differenzierbar ist. +c) f heißt Diffeomorphismus , wenn f differenzierbar von Klasse C ∞ ist und es eine -differenzierbare Abbildung g : Y → X von Klasse C ∞ +differenzierbare Abbildung g : Y → X von Klasse C ∞ gibt mit g ◦ f = id X und f ◦ g = id Y . -31 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN + 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN Bemerkung 29 -Die Bedingung inDefinition 31.ahängt nicht von den gewählten Karten ab. -Beweis: Seien (U +Die Bedingung inDefinition 31.ahängt nicht von den gewählten Karten ab. +Beweis: Seien (U , ϕ - ) und (V - , ψ - ) Karten von X bzw. Y um x bzw. f (x ) mit f (U - ) ⊆ V + ) und (V + , ψ + ) Karten von X bzw. Y um x bzw. f (x ) mit f (U + ) ⊆ V . -⇒ ψ - ◦ f ◦ (ϕ +⇒ ψ + ◦ f ◦ (ϕ )− 1 -= ψ += ψ ◦ (ψ −1 ◦ ψ ) ◦ f ◦ ( ϕ− 1 ◦ ϕ) ◦ (ϕ )−1 -ist genau dann differenzierbar, wenn ψ ◦ f ◦ ϕ−1 - differenzierbar ist. +ist genau dann differenzierbar, wenn ψ ◦ f ◦ ϕ−1 + differenzierbar ist. Beispiel 23 f : R → R , x → x 3 - ist kein Diffeomorphismus, aber Homöomorphismus, da mitg (x ) := 3√ + ist kein Diffeomorphismus, aber Homöomorphismus, da mitg (x ) := 3√ x gilt: f ◦ g = id R , g ◦ f = id R Bemerkung 30 Sei X eine glatte Mannigfaltigkeit. Dann ist -Diffeo(X ) := { f : X → X | f ist Diffeomorphismus } +Diffeo(X ) := { f : X → X | f ist Diffeomorphismus } eine Untergruppe von Homöo( X ). -Definition 32 +Definition 32 S ⊆ R 3 heißt reguläre Fläche : ⇔ ∀ s ∈ S ∃ Umgebung V ( s ) ⊆ R 3 ∃U ⊆ R 2 - offen: -∃ differenzierbare Abbildung F : U → V ∩ S : Rg(J + offen: +∃ differenzierbare Abbildung F : U → V ∩ S : Rg(J F (u)) = 2 ∀u ∈ U . F heißt (lokale) reguläre Parametrisierung von S . F (u, v ) = ( x (u, v ), y (u, v ), z (u, v )) @@ -1930,16 +1930,16 @@ F (u, v ) =  ∂v ( p )   Beispiel 24 -1)Rotationsflächen: Sei r : R → R - >0 eine differenzierbare Funktion. +1)Rotationsflächen: Sei r : R → R + >0 eine differenzierbare Funktion. F : R 2 → R 3 (u, v ) → (r ( u) cos(u), r ( v ) sin(u), v ) J F ( u, v ) =  - −r ( v ) sin u r + −r ( v ) sin u r ( v ) cos u -r (v ) cos u r +r (v ) cos u r ( v ) sin u 0 1   @@ -1967,18 +1967,18 @@ R 2 ( v ) sin2 (u) + sin 2 ( v )) -=R 2 +=R 2 cos 2 (v )(cos 2 (u) + sin 2 ( u)) + sin 2 (v ) -=R 2 +=R 2 cos 2 (v ) + sin 2 - ( v ) + ( v ) =R 2 -32 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN + 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN N S v u (a)Kugelkoordinaten −1 @@ -1995,7 +1995,7 @@ u (a)Kugelkoordinaten −1 xy sin x cos x (c)Sinus und Kosinus haben keine gemeinsame Nullstelle -33 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN + 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN Die Jacobi-Matrix J F ( u, v ) =  @@ -2006,10 +2006,10 @@ R cos v cos u −R sin v sin u hat Rang 2 für cos v = 0. In N und S ist cos v = 0. Bemerkung 31 Jede reguläre Fläche S ⊆ R 3 - ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit. + ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit. Beweis: S ⊆ R3 - ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von + ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von regulären Flächen folgt direkt, dass Karten(U i , F i ) und (U @@ -2022,10 +2022,10 @@ U i ∩ U j = ∅ existieren, wobei F i und F -j nach Definition differenzierbare Abbildungen sind. +j nach Definition differenzierbare Abbildungen sind. z.Z.: F − 1 j ◦ F -i ist ein Diffeomorphismus. +i ist ein Diffeomorphismus. U i U jS @@ -2038,9 +2038,9 @@ j ◦ F i Abbildung 2.5:Reguläre Fläche S zum Beweis vonBemerkung 31 Idee: - Finde differenzierbare Funktion + Finde differenzierbare Funktion F −1 -j in Umgebung W von s, sodass +j in Umgebung W von s, sodass F − 1 j | S ∩ W = F − 1 @@ -2058,18 +2058,18 @@ Da Rg(J F j ( v 0 )) = 2 ist, ist o. B. d. A. -det +det ∂x ∂u ∂x ∂v ∂y ∂u ∂y -∂v +∂v (v 0 ) = 0 und F j (u, v ) = (x ( u, v ) , y ( u, v ) , z ( u, v )). -Definiere +Definiere F j : U j × R → R3 @@ -2077,14 +2077,14 @@ j : U F j ( u, v, t ) := (x (u, v ), y (u, v ), z (u, v ) + t) -Offensichtlich: +Offensichtlich: F j | U j ×{ 0 } = F j J - + F j =   ∂x @@ -2098,25 +2098,25 @@ F ∂v 1  ⇒ det J - + F j ( v 0 , 0) = 0 Analysis II ======⇒ Es gibt Umgebungen W von F -j von +j von F j (v 0 , 0) = F j (v - 0 ) = s , sodass + 0 ) = s , sodass F j auf W eine -differenzierbar Inverse F −1 +differenzierbar Inverse F −1 j hat. -34 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN + 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN Weiter gilt: - + F j −1 | @@ -2132,15 +2132,15 @@ j ◦ F i | F −1 i ( W ∩ S ) -ist differenzierbar. -Definition 33 +ist differenzierbar. +Definition 33 Sei G eine Mannigfaltigkeit und ( G, ◦) eine Gruppe. a) G heißt topologische Gruppe , wenn die Abbildungen ◦ : G × G → G und ι : G → G -definiert durch +definiert durch g ◦ h := g · h und ι (g ) := g −1 stetig sind. -b) Ist G eine differenzierbare Mannigfaltigkeit, so heißtG Lie-Gruppe, wenn (G, ◦ ) und -(G, ι) differenzierbar sind. +b) Ist G eine differenzierbare Mannigfaltigkeit, so heißtG Lie-Gruppe, wenn (G, ◦ ) und +(G, ι) differenzierbar sind. Beispiel 25 (Lie-Gruppen) 1)Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen. 2) GL @@ -2150,11 +2150,11 @@ Beispiel 25 (Lie-Gruppen) 4) (R > 0 , · ) 5) (R n - , +), denn A · B ( i, j ) = + , +), denn A · B ( i, j ) = n k =1 a ik b -kj ist nach allen Variablen differenzierbar +kj ist nach allen Variablen differenzierbar (A −1 )(i, j ) = det(A ij ) @@ -2177,12 +2177,12 @@ n1 . . . a nn    ∈ R (n− 1)×( n−1) -ist differenzierbar. +ist differenzierbar. det A ij kann 0 werden, da: - + 1 1 -−1 0 +−1 0 6) SL n ( R) = { A ∈ GL n ( R) | det(A) = 1 } @@ -2191,10 +2191,10 @@ Ist G eine Lie-Gruppe und g ∈ G , so ist die Abbildung l g : G → G h → g · h -ein Diffeomorphismus. -35 2.3. SIMPLIZIALKOMPLEX +ein Diffeomorphismus. + 2.3. SIMPLIZIALKOMPLEX 2.3 Simplizialkomplex -Definition 34 +Definition 34 Seien v 0 , . . . , v k ∈ Rn @@ -2203,7 +2203,7 @@ a) v 0 , . . . , v k sind in allgemeiner Lage ⇔ - es gibt keinen (k − 1)-dimensionalen affinen Untervektorraum, derv + es gibt keinen (k − 1)-dimensionalen affinen Untervektorraum, derv 0 , . . . , v k enthält ⇔ v @@ -2213,23 +2213,23 @@ a) v 0 sind linear unabhängig. b) conv (v 0 , . . . , v - k ) := - + k ) := + k i =0 λ i v - i + i λ - i ≥ 0, + i ≥ 0, k i =0 λ - i = 1 + i = 1 heißt die konvexe Hülle von v 0 , . . . , v k . -Definition 35 +Definition 35 a) Sei ∆ n = conv ( e @@ -2283,7 +2283,7 @@ e 3 (d)3-Simplex ∆ 3 Abbildung 2.6:Beispiele für k -Simplexe -Definition 36 +Definition 36 a) Eine endliche Menge K von Simplizes im Rn heißt (endlicher) Simplizialkomplex , @@ -2296,26 +2296,26 @@ wenn gilt: 2 leer oder ein Teilsimplex von ∆ 1 und von ∆ 2 . -b) |K | := +b) |K | := ∆ ∈K ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K . c)Ist d = max { k ∈ N 0 | K enthält k -Simplex }, so heißt d die Dimension von K . -36 2.3. SIMPLIZIALKOMPLEX + 2.3. SIMPLIZIALKOMPLEX (a)1D Simplizialkomplex (b) 2D Simplizialkomplex (ohne untere Fläche!) (c)2D Simplizialkomplex (d)1D Simplizialkomplex (e)2D Simplizialkomplex P -(f ) P ist kein Teilsimplex, da Eigen- -schaftPunkt b.iiverletzt ist P +(f ) P ist kein Teilsimplex, da EigenschaftPunkt + b.iiverletzt ist P (g)Simplizialkomplex Abbildung 2.7:Beispiele für Simplizialkomplexe -Definition 37 +Definition 37 Seien K, L Simplizialkomplexe. Eine stetige Abbildung f : |K | → |L| heißt simplizial, wenn für jedes ∆ ∈ K gilt: a) f (∆) ∈ L b) f | -∆ : ∆ → f (∆) ist eine affine Abbildung. +∆ : ∆ → f (∆) ist eine affine Abbildung. Beispiel 26 (Simpliziale Abbildungen) 1) ϕ( e 1 ) := b @@ -2323,7 +2323,7 @@ Beispiel 26 (Simpliziale Abbildungen) 2 ) := b 2 ϕ ist eine eindeutig bestimmte lineare Abbildung -37 2.3. SIMPLIZIALKOMPLEX + 2.3. SIMPLIZIALKOMPLEX 0 e 2e 1 @@ -2354,7 +2354,7 @@ bbb b b b bAbbildung 2.8:Abbildung eines Torus auf eine Sphäre -Definition 38 +Definition 38 Sei K ein endlicher Simplizialkomplex. Für n ≥ 0 sei a n ( K ) die Anzahl der n -Simplizes in K . @@ -2372,16 +2372,16 @@ Beispiel 27 ) = 3 − 3 + 1 = 1 χ (∆3 ) = 4 − 6 + 4 − 1 = 1 -2) χ (Oktaeder-Oberfläche ) = 6 − 12 + 8 = 2 +2) χ (Oktaeder-Oberfläche ) = 6 − 12 + 8 = 2 χ (Rand des Tetraeders) = 2 χ (Ikosaeder ) = 12 − 30 + 20 = 2 3) χ (Würfel) = 8 − 12 + 6 = 2 -χ (Würfel, unterteilt in Dreiecksflächen) = 8 − (12 + 6) + (6 · 2) = 2 +χ (Würfel, unterteilt in Dreiecksflächen) = 8 − (12 + 6) + (6 · 2) = 2 Bemerkung 33 χ (∆n ) = 1 für jedes n ∈ N 0 -38 2.3. SIMPLIZIALKOMPLEX + 2.3. SIMPLIZIALKOMPLEX Beweis: ∆n ist die konvexe Hülle von (e 0 , . . . , e @@ -2389,45 +2389,45 @@ Beweis: ∆n . Jede (k + 1)-elementige Teilmenge von { e 0 , . . . , e - n } definiert ein k -Simplex. + n } definiert ein k -Simplex. ⇒ a k (∆n - ) = + ) = n+1 -k +1 +k +1 , k = 0, . . . , n ⇒ χ (∆n - ) = + ) = n -k =0 ( −1)k +k =0 ( −1)k n +1 -k +1 +k +1 f ( x) = (x + 1) n+1 Binomischer Lehrsatz -= += n +1 -k =0 +k =0 n+1 -k +k xk -⇒ 0 = +⇒ 0 = n +1 -k =0 +k =0 n+1 -k +k (−1)k = χ (∆n ) − 1 ⇒ χ (∆n - ) = 1 -Definition 39 + ) = 1 +Definition 39 a)Ein 1D-Simplizialkomplex heißt Graph. b)Ein Graph, der homöomorph zu S 1 ist, heißt Kreis. c)Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält. -(a) Dies wird häufig auch als -Multigraph bezeichnet. (b) Planare Einbettung des Te- -traeders +(a) Dies wird häufig auch als +Multigraph bezeichnet. (b) Planare Einbettung des Tetraeders + (c) K 5 (d) K 3 ,3 @@ -2446,7 +2446,7 @@ Beweis: a)Siehe „Algorithmus von Kruskal“. 2 T wird „Spannbaum“ genannt. -39 2.3. SIMPLIZIALKOMPLEX + 2.3. SIMPLIZIALKOMPLEX b) χ (Γ) = a 0 (Γ) − a 1 (Γ) @@ -2463,20 +2463,20 @@ Sei ∆ ein n -Simplex und x ∈ ∆ ◦ ⊆ R n . Sei K der Simplizialkomplex, der aus ∆ durch „Unterteilung“ in x entsteht. Dann ist χ ( K ) = χ (∆) = 1. -(a) K (b) ∆, das aus K durch Unter- -teilung entsteht +(a) K (b) ∆, das aus K durch Unterteilung + entsteht Abbildung 2.10:Beispiel fürBemerkung 36. Beweis: χ( K ) = χ (∆) − (−1)n - + n-Simplex + n -k =0 ( −1)k +k =0 ( −1)k n + 1 -k +k + - -(1+(−1))n +1 = χ (∆) -Definition 40 +(1+(−1))n +1 = χ (∆) +Definition 40 Sei X ein topologischer Raum, K ein Simplizialkomplex und h : | K | → X ein Homöomorphismus von der geometrischen Realisierung |K | auf X . Dann heißt h eine @@ -2499,14 +2499,14 @@ Beweis: . Erhalte Triangulierung von S 2 . -40 2.3. SIMPLIZIALKOMPLEX + 2.3. SIMPLIZIALKOMPLEX (a) Die beiden markierten Dreiecke schneiden sich im Mittelpunkt und in einer Seite. (b) Die beiden markierten Dreiecke schneiden sich im Mittelpunkt und außen. Abbildung 2.11:Fehlerhafte Triangulierungen (a)Einfache Triangulierung (b)Minimale Triangulierung Abbildung 2.12:Triangulierungen des Torus -41 2.3. SIMPLIZIALKOMPLEX + 2.3. SIMPLIZIALKOMPLEX 3) Sind P 1 und P 2 konvexe Polygone und T @@ -2544,10 +2544,10 @@ n (K ) der R -Vektorraum mit Basis A C n (K ) =   - + σ ∈ A n (K ) c -σ · σ +σ · σ @@ -2577,7 +2577,7 @@ i σ ∈ C n− 1 ( K ) und d n : C n ( K ) → C - n− 1 ( K ) die dadurch definierte lineare + n− 1 ( K ) die dadurch definierte lineare Abbildung. Dann gilt: d n− 1 ◦ d @@ -2602,7 +2602,7 @@ d 1 − e 2 + e 3 ) = (c − b) − (c − a) + ( b − a ) -42 2.3. SIMPLIZIALKOMPLEX + 2.3. SIMPLIZIALKOMPLEX = 0 Sei a < b < c < d . Dann gilt für Tetraeder: d @@ -2641,19 +2641,19 @@ i =0 (−1)i n−1 j =0 ∂ i (∂ j σ )(−1)j -= += 0 ≤i ≤ j ≤n− 1(−1)i + j ∂ j (∂ - i ( σ )) + + i ( σ )) + 0≤ j d( P, A ) = d(P, B ) = d( P, C ) + d(C, B ) 2. Fall : Q und B liegen auf verschieden Halbebenen bzgl. P A. Dann liegen A und Q in derselben Halbebene bzgl. P B . -Tausche A und B ⇒ Fall 1 +Tausche A und B ⇒ Fall 1 Bemerkung 63 Sei (X, d, G ) eine Geometrie, die§1-§3erfüllt, P, Q ∈ X mit P = Q und ϕ eine Isometrie mit ϕ( P ) = P und ϕ(Q ) = Q . @@ -4603,19 +4603,19 @@ P,Q∈ Fix( ϕ) ⇒ d(P, S ) = d( ϕ( P ), ϕ(S )) = d(P, ϕ (S )) 3(i ) ⇒ ϕ(S ) = S - + Proposition 4.2 -In einer Geometrie, die§1-§3erfüllt, gibt es zu P, P - , Q, Q - mit d( P, Q ) = d( P +In einer Geometrie, die§1-§3erfüllt, gibt es zu P, P + , Q, Q + mit d( P, Q ) = d( P , Q ) -höchstens zwei Isometrien mit ϕ( P ) = P +höchstens zwei Isometrien mit ϕ( P ) = P und ϕ( Q) = Q -70 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE + 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE Aus den Axiomen folgt, dass es in der Situation von§4höchstens zwei Isometrien mit ϕ -i (P ) = P +i (P ) = P und ϕ i (Q) = Q gibt. @@ -4623,7 +4623,7 @@ Beweis: Seien ϕ 1 , ϕ 2 , ϕ 3 Isometrien mit ϕ -i (P ) = P +i (P ) = P , ϕ i ( Q) = Q mit i = 1, 2, 3. @@ -4646,14 +4646,14 @@ Nun zu den Beweisen der Teilaussagen: 1 (R ), ϕ 2 (R ), ϕ 3 (R ) liegen zwei in der selben -Halbebene bzgl. P +Halbebene bzgl. P Q = ϕ i ( P Q). O. B. d. A. seien ϕ 1 (R ) und ϕ 2 (R ) in der selben Halbebene. -Es gilt: d(P +Es gilt: d(P , ϕ 1 ( R )) = d(ϕ 1 (P ) , ϕ @@ -4662,7 +4662,7 @@ Es gilt: d(P = d(ϕ 2 (P ) , ϕ 2 (R )) -= d(P += d(P , ϕ 2 ( R )) und analog d( Q @@ -4682,113 +4682,113 @@ Ist R /∈ AB , so ist AB ∩ P R = ∅ oder AB ∈ RQ = ∅ nachSatz 4.1. Der S C ist dann Fixpunkt von ϕ nachBemerkung 63 ⇒ ϕ( A ) = A. Bemerkung 64 (SWS-Kongruenzsatz) -Sei ( X, d, G ) eine Geometrie, die§1-§4erfüllt. Seien außerdem AB C und A - B - C +Sei ( X, d, G ) eine Geometrie, die§1-§4erfüllt. Seien außerdem AB C und A + B + C Dreiecke, für die gilt: -(i) d(A, B ) = d( A - , B +(i) d(A, B ) = d( A + , B ) (ii) ∠C AB ∼ -= ∠ C - A - B -71 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE -(iii) d(A, C ) = d(A - , C += ∠ C + A + B + 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE +(iii) d(A, C ) = d(A + , C ) -Dann ist AB C kongruent zu A - B - C +Dann ist AB C kongruent zu A + B + C . -Beweis: Sei ϕ die Isometrie mit ϕ( A - ) = A , ϕ( A +Beweis: Sei ϕ die Isometrie mit ϕ( A + ) = A , ϕ( A C + ) = AC + - und ϕ( A + und ϕ( A B + ) = AB + . Diese Isometrie existiert wegenPunkt §4. -⇒ C ∈ ϕ( A +⇒ C ∈ ϕ( A C + - ) und B ∈ ϕ (A + ) und B ∈ ϕ (A B + ). d( A - , C - ) = d(ϕ (A - ) , ϕ( C - )) = d( A, ϕ( C + , C + ) = d(ϕ (A + ) , ϕ( C + )) = d( A, ϕ( C )) 3(i ) -==⇒ ϕ( C +==⇒ ϕ( C ) = C -d(A - , B - ) = d(ϕ( A - ), ϕ(B - )) = d(A, ϕ(B +d(A + , B + ) = d(ϕ( A + ), ϕ(B + )) = d(A, ϕ(B )) 3(i ) -==⇒ ϕ( B +==⇒ ϕ( B ) = B -Also gilt insbesondere ϕ( A - B - C - ) = AB C . +Also gilt insbesondere ϕ( A + B + C + ) = AB C . Bemerkung 65 (WSW-Kongruenzsatz) -Sei ( X, d, G ) eine Geometrie, die§1-§4erfüllt. Seien außerdem AB C und A - B - C +Sei ( X, d, G ) eine Geometrie, die§1-§4erfüllt. Seien außerdem AB C und A + B + C Dreiecke, für die gilt: -(i) d(A, B ) = d( A - , B +(i) d(A, B ) = d( A + , B ) (ii) ∠ C AB ∼ -= ∠ C - A - B += ∠ C + A + B (iii) ∠ AB C ∼ -= ∠ A - B - C -Dann ist AB C kongruent zu A - B - C += ∠ A + B + C +Dann ist AB C kongruent zu A + B + C . -Beweis: Sei ϕ die Isometrie mit ϕ(A - ) = A , ϕ(B - ) = B und ϕ(C +Beweis: Sei ϕ die Isometrie mit ϕ(A + ) = A , ϕ(B + ) = B und ϕ(C ) liegt in der selben Halbebene bzgl. AB wie C . Diese Isometrie existiert wegen§4. -Aus ∠ C AB = ∠C - A - B - = ∠ ϕ( C - )ϕ (A - ) ϕ(B - ) = ∠ ϕ( C - )AB folgt, dass ϕ (C +Aus ∠ C AB = ∠C + A + B + = ∠ ϕ( C + )ϕ (A + ) ϕ(B + ) = ∠ ϕ( C + )AB folgt, dass ϕ (C ) ∈ AC + . Analog folgt aus ∠ AB C = ∠ A - B - C - = ∠ ϕ ( A - ) ϕ( B - ) ϕ( C - ) = ∠ AB ϕ ( C - ) , dass ϕ ( C + B + C + = ∠ ϕ ( A + ) ϕ( B + ) ϕ( C + ) = ∠ AB ϕ ( C + ) , dass ϕ ( C ) ∈ B C + . -Dann gilt ϕ (C - ) ∈ AC ∩ B C = { C } ⇒ ϕ( C +Dann gilt ϕ (C + ) ∈ AC ∩ B C = { C } ⇒ ϕ( C ) = C . -Es gilt also ϕ (A - B - C - ) = AB C . -Definition 61 +Es gilt also ϕ (A + B + C + ) = AB C . +Definition 61 a) Ein Winkel ist ein Punkt P ∈ X zusammen mit 2 Halbgeraden mit Anfangspunkt P . Man schreibt: ∠ R 1 P R @@ -4798,17 +4798,17 @@ Man schreibt: ∠ R b) Zwei Winkel sind gleich , wenn es eine Isometrie gibt, die den einen Winkel auf den anderen abbildet. -c) ∠ R -1 P - R +c) ∠ R +1 P + R 2 heißt kleiner als ∠ R 1 P R -2 , wenn es eine Isometrie ϕ gibt, mit ϕ ( P +2 , wenn es eine Isometrie ϕ gibt, mit ϕ ( P ) = P , -ϕ(P +ϕ(P R + 1 ) = P R + -1 und ϕ (R +1 und ϕ (R 2 ) liegt in der gleichen Halbebene bzgl. P R 1 wie R 2 und in @@ -4818,9 +4818,9 @@ der gleichen Halbebene bzgl. P R d)Im Dreieck P QR gibt es Innenwinkel und Außenwinkel. Bemerkung 66 In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel. -Beweis: Zeige ∠ P RQ < ∠ RQP +Beweis: Zeige ∠ P RQ < ∠ RQP . -Sei M der Mittelpunkt der Strecke QR und P +Sei M der Mittelpunkt der Strecke QR und P ∈ P Q+ \ P Q. Sei A ∈ M P − mit d(P, M ) = @@ -4832,61 +4832,61 @@ d( M , A ). 2 P R 1 . Also sind insbesondere alle Winkel ≤ 180◦ . -72 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE + 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE P - R + R 1 R -1R +1R 2R 2 -(a) ∠R -1 P - R +(a) ∠R +1 P + R 2 ist kleiner als ∠ R 1 P R 2 , -vgl.Definition 61.c P +vgl.Definition 61.c P Q R -(b) InnenwinkelundAußenwin- -kelin P QR , vgl.Definiti- -on 61.d -Abbildung 4.6:Situation ausDefinition 61 +(b) InnenwinkelundAußenwinkelin + P QR , vgl.Definition + 61.d +Abbildung 4.6:Situation ausDefinition 61 Q M A P R (a) Parallelogramm AQPR α βR Q P -(b) Innen- und Außenwin- -kel von P QR +(b) Innen- und Außenwinkel + von P QR Abbildung 4.7:Situation ausBemerkung 66 Es gilt: d( Q, M ) = d( M , R ) und d( P, M ) = d( M , A ) sowie ∠P M R = ∠ AM Q ⇒ M RQ ist kongruent zu AM Q , denn eine der beiden Isometrien, die∠ P M R auf ∠ AM Q abbildet, bildet R auf Q und P auf A ab. ⇒ ∠M QA = ∠ M RP = ∠ QRP = ∠ P RQ. -Noch zu zeigen: ∠ M QA < ∠ RQP +Noch zu zeigen: ∠ M QA < ∠ RQP , denn A liegt in der selben Halbebene bzgl. P Q wie M . Proposition 4.3 (Existenz der Parallelen) Sei (X, d, G ) eine Geometrie mit den Axiomen§1-§4. Dann gibt es zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g mindestens eine Parallele h ∈ G mit P ∈ h und g ∩ h = ∅. -Beweis: Seien P, Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P +Beweis: Seien P, Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P ∈ f mit -d( P, P +d( P, P ) = d(P, Q ) abbildet und die Halbebenen bzgl. f erhält. -73 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE + 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE Q hf gP Abbildung 4.8:Situation ausProposition 4.3 Annahme: ϕ(g ) ∩ g = ∅ ⇒ Es gibt einen Schnittpunkt { R } = ϕ( g ) ∩ g . -Dann ist ∠RQP = ∠ RQP - < ∠ RP P - nachBemerkung 66und ∠RQP = ∠ RP P +Dann ist ∠RQP = ∠ RQP + < ∠ RP P + nachBemerkung 66und ∠RQP = ∠ RP P , weil -ϕ( ∠ RQP ) = ∠ RP P +ϕ( ∠ RQP ) = ∠ RP P . ⇒ Widerspruch -⇒ ϕ (g ) ∩ g = ∅ +⇒ ϕ (g ) ∩ g = ∅ Folgerung 4.4 Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π . D. h. es gibt eine Isometrie ϕ mit ϕ(Q) = P und ϕ (QP + @@ -4901,7 +4901,7 @@ Dreiecke mit drei 90◦ Proposition 4.5 In einer Geometrie mit den Axiomen§1-§4ist in jedem Dreieck die Summe der Innenwinkel ≤ π . -74 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE + 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE Sei im Folgenden „ IWS“ die „Innenwinkelsumme“. Beweis: Sei ein Dreieck mit IWS() = π + ε αβ @@ -4912,19 +4912,19 @@ Beweis: Sei ein Dreieck mit IWS() = π + ε α 2 βγ M -A BC A +A BC A α (b)Situation ausProposition 4.5 Abbildung 4.10:Situation ausProposition 4.5 Sei α ein Innenwinkel von . -Beh.: Es gibt ein Dreieck +Beh.: Es gibt ein Dreieck mit IWS( - ) = IWS( ) und einem Innenwinkel α + ) = IWS( ) und einem Innenwinkel α ≤ α 2 . -Dann gibt es für jedes n ein +Dann gibt es für jedes n ein n mit IWS ( -n ) = IWS() und Innenwinkel α +n ) = IWS() und Innenwinkel α ≤ α 2 n . Für α @@ -4936,16 +4936,16 @@ der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C . Sei M der Mittelpunkt der Strecke B C . Sei außerdem α 1 = ∠ C AM und α 2 = ∠ B AM . -Sei weiter A +Sei weiter A ∈ M A − - mit d(A + mit d(A , M ) = d( A, M ). Die Situation ist inAbbildung 4.10bskizziert. -⇒ ( M A - C ) und ( M AB ) sind kongruent. ⇒ ∠ AB M = ∠ A - C M und ∠ M A +⇒ ( M A + C ) und ( M AB ) sind kongruent. ⇒ ∠ AB M = ∠ A + C M und ∠ M A C = -∠M AB . ⇒ α + β + γ = IWS(AB C ) = IWS(AA +∠M AB . ⇒ α + β + γ = IWS(AB C ) = IWS(AA C ) und α 1 + α 2 = α , also o. B. d. A. @@ -4954,24 +4954,24 @@ Die Situation ist inAbbildung 4.10bskizziert. 2 Bemerkung 67 In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π . -α -α -α ββ +α +α +α ββ γ A BC g Abbildung 4.11:Situation ausBemerkung 67 Beweis: Sei g eine Parallele von AB durch C . -• Es gilt α +• Es gilt α = α wegenProposition 4.3. -• Es gilt β +• Es gilt β = β wegenProposition 4.3. -• Es gilt α - = α +• Es gilt α + = α wegenAufgabe 8. -75 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE -⇒ IWS(AB C ) = γ + α - + β + 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE +⇒ IWS(AB C ) = γ + α + + β = π Aus der Eigenschaft, dass die Innenwinkelsumme von Dreiecken in der euklidischen Ebene gleich π ist, folgen direkt die Kongruenzsätze SWW und WWS über den Kongruenzsatz WSW. @@ -4988,21 +4988,21 @@ xy Abbildung 4.12:Strahlensatz Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar. A - B C + B C B C cb a c b - a -Abbildung 4.13:Die Dreiecke AB C und AB - C + a +Abbildung 4.13:Die Dreiecke AB C und AB + C sind ähnlich. 4.2.1 Flächeninhalt -Definition 62 -„Simplizialkomplexe“ in euklidischer Ebene ( X, d) heißen flächengleich , wenn sie sich in +Definition 62 +„Simplizialkomplexe“ in euklidischer Ebene ( X, d) heißen flächengleich , wenn sie sich in kongruente Dreiecke zerlegen lassen. -76 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE -(a)Zwei kongruente Dreiecke (b) Zwei weitere kongruente Drei- -ecke + 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE +(a)Zwei kongruente Dreiecke (b) Zwei weitere kongruente Dreiecke + Abbildung 4.14:Flächengleichheit Der Flächeninhalt eines Dreiecks ist 1 /2 · Grundseite · Höhe. @@ -5053,12 +5053,12 @@ Beweis: (a + b) · (a + b) = a 2 = c2 + 4 · ( 1 2 · a · b) -77 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE + 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE cb a A BC · -(a) a, b sind Katheten und c ist die Hypo- -tenuse b a baba +(a) a, b sind Katheten und c ist die Hypotenuse + b a baba b a · ··· @@ -5072,7 +5072,7 @@ d = euklidischer Abstand, G = Menge der üblichen Geraden. Beweis: (i) (R 2 , d -Euklid ) ist offensichtlich eine euklidische Ebene. +Euklid ) ist offensichtlich eine euklidische Ebene. (ii) Sei (X, d) eine euklidische Ebene und g 1 , g 2 Geraden in X , die sich in einem Punkt 0 @@ -5095,7 +5095,7 @@ Sei h : X → R2 eine Abbildung mit h ( P ) := ( x P , y P ) Dadurch wird h auf dem -Quadranten definiert, in dem P liegt, d. h. +Quadranten definiert, in dem P liegt, d. h. ∀Q ∈ X mit P Q ∩ g 1 = ∅ = P Q ∩ g 2 @@ -5105,7 +5105,7 @@ Im Folgenden werden zwei Aussagen gezeigt: (ii) h ist eine Isometrie Da jede Isometrie injektiv ist, folgt aus(i)und(ii), dass h bijektiv ist. Nun zu den Beweisen der Teilaussagen: -78 4.3. HYPERBOLISCHE GEOMETRIE + 4.3. HYPERBOLISCHE GEOMETRIE · g 1g @@ -5125,10 +5125,10 @@ P P (b)Schritt 2 Abbildung 4.18:Beweis zuSatz 4.8 (i) Sei ( x, y ) ∈ R 2 - , z. B. x ≥ 0 , y ≥ 0 . Sei P + , z. B. x ≥ 0 , y ≥ 0 . Sei P ∈ g -1 mit d(0 , P - ) = x und P +1 mit d(0 , P + ) = x und P auf der gleichen Seite von g 2 wie P . @@ -5156,13 +5156,13 @@ h( Q) = (x Q , y Q ) 4.3 Hyperbolische Geometrie -Definition 63 +Definition 63 Sei - H := { z ∈ C | (z ) > 0 } = - ( x, y ) ∈ R 2 + H := { z ∈ C | (z ) > 0 } = + ( x, y ) ∈ R 2 - y > 0 -79 4.3. HYPERBOLISCHE GEOMETRIE + y > 0 + 4.3. HYPERBOLISCHE GEOMETRIE die obere Halbebene bzw. Poincaré-Halbebene und G = G 1 ∪ G 2 mit @@ -5182,7 +5182,7 @@ a). . . die Inzidenzaxiome§1 b). . . das Anordnungsaxiom§3 (ii) c). . . nicht das Parallelenaxiom§5 Beweis: -a)Offensichtlich sind§1 (iii)und§1 (ii)erfüllt. Für§1 (i)gilt: +a)Offensichtlich sind§1 (iii)und§1 (ii)erfüllt. Für§1 (i)gilt: Gegeben z 1 , z 2 ∈ H @@ -5201,8 +5201,8 @@ Fall 2 (z 2 ) Betrachte nun z 1 und z - 2 als Punkte in der euklidischen Ebene. Die Mittelsenkrech- -te zu diesen Punkten schneidet diex -Achse. Alle Punkte auf der Mittelsenkrechten + 2 als Punkte in der euklidischen Ebene. Die Mittelsenkrechte + zu diesen Punkten schneidet diex -Achse. Alle Punkte auf der Mittelsenkrechten zu z 1 und z 2 sind gleich weit von z @@ -5230,18 +5230,18 @@ b)Sei g ∈ G 1 ˙ ∪ G 2 eine hyperbolische Gerade. -80 4.3. HYPERBOLISCHE GEOMETRIE + 4.3. HYPERBOLISCHE GEOMETRIE Es existieren disjunkte Zerlegungen von H \ g : Fall 1: g = { z ∈ H z − m| = r } ∈ G 1 Dann gilt: H = { z ∈ H z − m | < r } - + =:H 1 (Kreisinneres) ˙ ∪ { z ∈ H z − m | > r } - + =: H 2 (Kreisäußeres) Da r > 0 ist H @@ -5252,11 +5252,11 @@ Fall 2: g = { z ∈ H | z = x } ∈ G Die disjunkte Zerlegung ist: H = { z ∈ H | (z ) < x } - + =: H 1 (Links) ˙ ∪ { z ∈ H | ( z ) > x } - + =: H 2 (Rechts) Zu zeigen: ∀A ∈ H @@ -5272,7 +5272,7 @@ von m der kleiner ist als r und alle Punkte in H 2 haben einen Abstand von m der größer ist als r . Da man jede Strecke von A nach B insbesondere auch als stetige Abbildung f : R → R -> 0 auffassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g = ∅ +> 0 auffassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g = ∅ „ ⇒ “: A ∈ H i , B ∈ H j mit i, j ∈ { 1 , 2 } : AB ∩ g = ∅ ⇒ i = j @@ -5286,18 +5286,18 @@ c)SieheAbbildung 4.21. xy −5 −4 −3 −2 −1 0 1 2 3 4 5 6012345 Abbildung 4.21:Hyperbolische Geraden erfüllen§5nicht. -81 4.3. HYPERBOLISCHE GEOMETRIE -Definition 64 -Es seien a, b, c, d ∈ R mit ad − bc = 0 und σ : C → C eine Abbildung definiert durch + 4.3. HYPERBOLISCHE GEOMETRIE +Definition 64 +Es seien a, b, c, d ∈ R mit ad − bc = 0 und σ : C → C eine Abbildung definiert durch σ (z ) := az + b cz + d σ heißt Möbiustransformation. Proposition 4.9 a)Die Gruppe SL 2 (R ) operiert auf H durch die Möbiustransformation -σ (z ) := +σ (z ) := a b -c d +c d ◦ z := az + b cz + d b)Die Gruppe PSL @@ -5319,29 +5319,29 @@ d) SL 2 (R ) wird von den Matrizen λ 0 -0 λ− 1 +0 λ− 1 + - =: A - λ , + λ , 1 t -0 1 +0 1 + - =: B -t und +t und 0 1 -−1 0 +−1 0 + - =: C mit t, λ ∈ R × erzeugt. e) PSL 2 ( R) operiert auf G . Beweis: -a)Sei z = x + i y ∈ H, d. h. y > 0 und σ = +a)Sei z = x + i y ∈ H, d. h. y > 0 und σ = a b -c d +c d ∈ SL 2 ( R ) ⇒ σ (z ) = a(x + i y ) + b @@ -5374,45 +5374,45 @@ SL Die Abbildung bildet also nach H ab. Außerdem gilt: 1 0 -0 1 +0 1 ◦ z = x + i y 1 = x + i y = z -82 4.3. HYPERBOLISCHE GEOMETRIE + 4.3. HYPERBOLISCHE GEOMETRIE und - + a b -c d - ◦ -a +c d + ◦ +a b c - d - ◦ z - = + d + ◦ z + = a b -c d +c d ◦ a z + b c z + d = a a z + b -c +c z +d + b c a z + b -c +c z +d + d = a(a z + b - )+b (c - z +d + )+b (c + z +d ) c z + d c (a - z +b - )+ d(c + z +b + )+ d(c z + d ) c @@ -5422,20 +5422,20 @@ c ) + b( c z + d ) -c(a +c(a z + b ) + d(c z + d ) = ( aa + bc - )z + ab + )z + ab + bd (ca + db )z + cb + dd -= += aa + bc ab @@ -5443,22 +5443,22 @@ aa ca + db cb - + dd + + dd ◦ z -= += a b -c d - · +c d + · a b c - d + d ◦ z b)Es gilt σ (z ) = (−σ )(z ) für alle σ ∈ SL 2 ( R ) und z ∈ H. -c)Ansatz: σ = +c)Ansatz: σ = a b -c d +c d σ (x 0 ) = ax 0 + b @@ -5535,9 +5535,9 @@ Matrizen der Form A λ , B t und C die Einheitsmatrix zu generieren. Sei also - M = + M = a b -c d +c d ∈ SL 2 ( R ) beliebig. @@ -5545,16 +5545,16 @@ Fall 1: a = 0 Da M ∈ SL 2 (R ) ist, gilt det M = 1 = ad − bc = −bc. Daher ist insbesondere c = 0. Es folgt: - + 0 1 -−1 0 - · +−1 0 + · a b -c d - = +c d + = c d −a −b -83 4.3. HYPERBOLISCHE GEOMETRIE + 4.3. HYPERBOLISCHE GEOMETRIE Gehe zu Fall 2. Fall 2: a = 0 Nun wird in M durch M · A @@ -5562,24 +5562,24 @@ Nun wird in M durch M · A a an der Stelle von a eine 1 erzeugt: a b -c d - · +c d + · 1 a 0 -0 a - = +0 a + = 1 ab c -a ad +a ad Gehe zu Fall 3. Fall 3: a = 1 - + 1 b c d - · + · 1 −b -0 1 - = +0 1 + = 1 0 c d − bc Da wir det M = 1 = ad − bc = d − bc wissen, gilt sogar M @@ -5589,20 +5589,20 @@ Fall 4: a = 1, b = 0, d = 1 A − 1 C B -c C +c C 1 0 c 1 - = + = 1 0 -0 1 +0 1 Daher erzeugen Matrizen der Form A λ , B t und C die Gruppe SL -2 R . +2 R . e)Es genügt die Aussage für Matrizen ausProposition 4.9 (d)zu zeigen. -• σ = +• σ = λ 0 -0 λ −1 +0 λ −1 , also σ ( z ) = λ 2 z . Daraus ergeben sich die Situationen, die in Abbildung 4.22aundAbbildung 4.22bdargestellt sind. @@ -5622,12 +5622,12 @@ m + 1 x (b)Fall 2 (Strahlensatz) Abbildung 4.22:Beweis vonProposition 4.9 (e)für eine Diagonalmatrix -• Offensichtlich gilt die Aussage für σ = +• Offensichtlich gilt die Aussage für σ = 1 a -0 1 -• Sei nun σ = +0 1 +• Sei nun σ = 0 1 -−1 0 +−1 0 , also σ (z ) = − 1 z Bemerkung 69 @@ -5637,7 +5637,7 @@ Zu hyperbolischen Geraden g 2 ( R) mit σ (g 1 ) = g 2 . -84 4.3. HYPERBOLISCHE GEOMETRIE + 4.3. HYPERBOLISCHE GEOMETRIE · xy −1 0 101 z = r · eiϕ @@ -5653,7 +5653,7 @@ Beweis: NachProposition 4.9 (c)gibt es σ mit σ ( a σ (g 1 ) := g 2 wegen dem Inzidenzaxiom§1und ist eindeutig bestimmt. -Definition 65 +Definition 65 Seien z 1 , z 2 , z @@ -5712,7 +5712,7 @@ DV( z 2 ,z 3 ,z 4 ) -d) DV ist auch wohldefiniert, wenn eines der z +d) DV ist auch wohldefiniert, wenn eines der z i = ∞ oder wenn zwei der z i gleich sind. e) DV(0 , 1, ∞, z @@ -5771,7 +5771,7 @@ Annahme: DV( z 4 )(z 3 − z 2 ) -85 4.3. HYPERBOLISCHE GEOMETRIE + 4.3. HYPERBOLISCHE GEOMETRIE ⇔ z 1 z 3 − z @@ -5820,7 +5820,7 @@ Annahme: DV( z 2 = z 4 Alle z - i sind paarweise verschieden ⇒ Widerspruch + i sind paarweise verschieden ⇒ Widerspruch b) DV( z 1 , z 4 , z @@ -5917,7 +5917,7 @@ Bem. 70.f 4 ) ∈ R ∪ { ∞ } Behauptung folgt, weil σ − 1 (R ∪ ∞ ) ein Kreis oder eine Gerade in C ist. -Definition 66 +Definition 66 Für z 1 , z 2 ∈ H sei g @@ -5983,7 +5983,7 @@ Außerdem gilt: ln 1 x = ln x − 1 = (−1) · ln x = − ln x -86 4.3. HYPERBOLISCHE GEOMETRIE + 4.3. HYPERBOLISCHE GEOMETRIE Da der ln im Betrag steht, folgt direkt: 1 2 | ln DV(a @@ -5996,8 +5996,8 @@ Da der ln im Betrag steht, folgt direkt: 1 , a 1 , z 2 )| -Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x -Achse im Doppelver- -hältnis genutzt werden. +Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x -Achse im Doppelverhältnis + genutzt werden. Beh.: Die hyperbolische Metrik ist eine Metrik auf H. Beweis: WegenBemerkung 70.fist d( z @@ -6060,7 +6060,7 @@ Satz 4.10 Die hyperbolische Ebene H mit der hyperbolischen Metrik d und den hyperbolischen Geraden bildet eine „nichteuklidische Geometrie“, d. h. die Axiome§1-§4sind erfüllt, aber Axiom§5ist verletzt. -87 4.3. HYPERBOLISCHE GEOMETRIE + 4.3. HYPERBOLISCHE GEOMETRIE Übungsaufgaben Aufgabe 8 Seien (X, d) eine absolute Ebene und P, Q, R ∈ X Punkte. Der Scheitelwinkel des Winkels @@ -6078,7 +6078,7 @@ Zeigen Sie: (b)Der Winkel ∠ P QR ist gleich seinem Scheitelwinkel. Aufgabe 9 Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ X von -Punkten ist definiert durch d( P, Y ) := inf d(P, y ) |y ∈ Y . +Punkten ist definiert durch d( P, Y ) := inf d(P, y ) |y ∈ Y . Zeigen Sie: (a) Ist AB C ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die Winkel ∠ AB C und ∠ B C A gleich. @@ -6093,132 +6093,132 @@ Zeigen Sie: f g ∧ g h ⇒ f h Aufgabe 11 Beweise den Kongruenzsatz S S S . 5 Krümmung -Definition 67 +Definition 67 Sei f : [a, b] → R n eine eine Funktion aus C ∞ . Dann heißt f Kurve . 5.1 Krümmung von Kurven -Definition 68 +Definition 68 Sei γ : I = [a, b] → R n eine Kurve. a)Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt: - γ + γ (t ) 2 = 1 ∀t ∈ I -Dabei ist γ - (t) = (γ -1 (t ), γ -2 (t ), . . . , γ +Dabei ist γ + (t) = (γ +1 (t ), γ +2 (t ), . . . , γ n (t )). -b) l (γ ) = +b) l (γ ) = b -a γ +a γ ( t) dt heißt Länge von γ . Bemerkung 71 (Eigenschaften von Kurven I) Sei γ : I = [a, b] → R n eine C ∞ -Funktion. a)Ist γ durch Bogenlänge parametrisiert, so ist l (γ ) = b − a. -b)Ist γ durch Bogenlänge parametrisiert, so ist γ - (t ) orthogonal zu γ +b)Ist γ durch Bogenlänge parametrisiert, so ist γ + (t ) orthogonal zu γ ( t) für alle t ∈ I . Beweis: -a) l (γ ) = +a) l (γ ) = b -a γ - (t ) dt = +a γ + (t ) dt = b a 1dt = b − a. b) Im Folgenden wird die Aussage nur fürγ : [a, b] → R 2 bewiesen. Allerdings funktioniert der Beweis im Rn analog. Es muss nur die Ableitung angepasst werden. -1 = γ - (t) = γ +1 = γ + (t) = γ ( t) 2 - = γ - ( t) , γ + = γ + ( t) , γ ( t) ⇒ 0 = d -dt γ - ( t) , γ +dt γ + ( t) , γ ( t) = d -dt (γ -1 (t) γ -1 ( t) + γ -2 (t )γ +dt (γ +1 (t) γ +1 ( t) + γ +2 (t )γ 2 (t)) -= 2 · (γ -1 ( t) · γ -1 (t ) + γ -2 ( t) · γ += 2 · (γ +1 ( t) · γ +1 (t ) + γ +2 ( t) · γ 2 ( t)) -= 2 · γ - ( t) , γ += 2 · γ + ( t) , γ ( t) -Definition 69 +Definition 69 Sei γ : I → R 2 eine durch Bogenlänge parametrisierte Kurve. a)Für t ∈ I sei n ( t) Normalenvektor an γ in t wenn gilt: - n (t) , γ - ( t) = 0, n ( t) = 1 und det((γ + n (t) , γ + ( t) = 0, n ( t) = 1 und det((γ (t ), n(t ))) = +1 -89 5.1. KRÜMMUNG VON KURVEN + 5.1. KRÜMMUNG VON KURVEN b)Seit κ : I → R so, dass gilt: - γ + γ ( t) = κ( t) · n ( t) Dann heißt κ (t ) Krümmung von γ in t . -Da n (t ) und γ +Da n (t ) und γ ( t) nachBemerkung 71.blinear abhängig sind, existiert κ (t) . Beispiel 45 Gegeben sei ein Kreis mit Radius r , d. h. mit Umfang 2πr . Es gilt: -γ (t ) = +γ (t ) = r · cos t r , r · sin t -r +r für t ∈ [0, 2 πr ] ist parametrisiert durch Bogenlänge, da gilt: -γ - (t ) = +γ + (t ) = (r · 1 r )(− sin t r ) , r 1 r cos t -r -= +r += − sin t r , cos t -r +r Der Normalenvektor von γ in t ist -n (t) = +n (t) = − cos t r , − sin t -r +r da gilt: - n (t ), γ - (t ) = + n (t ), γ + (t ) = − cos t r − sin t -r - , +r + , − sin t r cos t -r +r = (− cos t r ) · (− sin t r ) + ( − sin t r ) · (cos t r ) = 0 - n (t ) = + n (t ) = (− cos t r , − sin t -r ) +r ) @@ -6227,8 +6227,8 @@ r ) 2 + ( − sin t r )2 = 1 -det(γ -1 ( t), n(t )) = +det(γ +1 ( t), n(t )) = @@ -6237,7 +6237,7 @@ r − cos t r cos t r − sin t -r +r @@ -6249,59 +6249,59 @@ r = 1 Die Krümmung ist für jedes t konstant 1 r , da gilt: -γ - (t ) = +γ + (t ) = − 1 r cos t r , − 1 r sin t -r +r = 1 -r · +r · − cos t r , − sin t -r +r ⇒ κ (t ) = 1 r -90 5.2. TANGENTIALEBENE -Definition 70 + 5.2. TANGENTIALEBENE +Definition 70 Sei γ : I → R 3 eine durch Bogenlänge parametrisierte Kurve. -a)Für t ∈ I heißt κ( t) := γ +a)Für t ∈ I heißt κ( t) := γ (t ) die Krümmung von γ in t . -b)Ist für t ∈ I die Ableitung γ - ( t) = 0, so heißt γ +b)Ist für t ∈ I die Ableitung γ + ( t) = 0, so heißt γ ( t) - γ + γ ( t) Normalenvektor an γ in t. -c) b(t ) sei ein Vektor, der γ +c) b(t ) sei ein Vektor, der γ (t), n(t) zu einer orientierten Orthonormalbasis vonR 3 ergänzt. Also gilt: - det(γ + det(γ (t ), n(t) , b( t)) = 1 b(t ) heißt Binormalenvektor, die Orthonormalbasis - γ - (t ), n(t ), b( t) + γ + (t ), n(t ), b( t) heißt begleitendes Dreibein. Bemerkung 72 (Eigenschaften von Kurven II) Sei γ : I → R 3 durch Bogenlänge parametrisierte Kurve. -a) n (t ) ist orthogonal zu γ +a) n (t ) ist orthogonal zu γ ( t) . -b) b(t ) ausDefinition 70.cist eindeutig. +b) b(t ) ausDefinition 70.cist eindeutig. 5.2 Tangentialebene -Erinnerung Sie sich anDefinition 32„reguläre Fläche“. +Erinnerung Sie sich anDefinition 32„reguläre Fläche“. Äquivalent dazu ist: S ist lokal von der Form -V (f ) = - x ∈ R 3 +V (f ) = + x ∈ R 3 - f ( x) = 0 + f ( x) = 0 für eine C ∞ -Funktion f : R 3 → R . -Definition 71 +Definition 71 Sei S ⊆ R3 eine reguläre Fläche, s ∈ S , F : U → V ∩ S eine lokale Parametrisierung um s ∈ V : @@ -6324,7 +6324,7 @@ und D p F : R 2 → R 3 die durch J -F (p ) definierte lineare Abbildung. +F (p ) definierte lineare Abbildung. Dann heißt T s S := Bild(D p F ) die Tangentialebene an s ∈ S . @@ -6337,11 +6337,11 @@ s S = ˜u, ˜v , wobei ˜u, ˜v die Spaltenvektoren der Jacobi-Matrix J F (p ) sind. c) T s S hängt nicht von der gewählten Parametrisierung ab. -91 5.2. TANGENTIALEBENE + 5.2. TANGENTIALEBENE d) Sei S = V ( f ) eine reguläre Fläche in R 3 , also f : V → R eine C ∞ -Funktion, V ⊆ R 3 -offen, grad(f )(x ) = 0 für alle x ∈ S . +offen, grad(f )(x ) = 0 für alle x ∈ S . Dann ist T s S = (grad(f )(s ))⊥ für jedes s ∈ S . @@ -6357,24 +6357,24 @@ c) T s S = { x ∈ R 3 |∃parametrisierte Kurve γ : [ −ε, + ε ] → S für ein ε > 0 mit γ (0) = -s und γ +s und γ (0) = x } Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. d) Sei x ∈ T -s S, γ : [ −ε, + ε ] → S eine parametrisierte Kurve mit ε > 0 und γ +s S, γ : [ −ε, + ε ] → S eine parametrisierte Kurve mit ε > 0 und γ (0) = s, -sodass γ +sodass γ (0) = x gilt. Da γ ( t) ∈ S für alle t ∈ [−ε, ε], ist f ◦ γ = 0 ⇒ 0 = (f ◦ γ ) - (0) = grad(f )(γ (0)), γ + (0) = grad(f )(γ (0)), γ (0) ⇒ T s S ⊆ grad(f )(s) ⊥ dim=2 ====⇒ T s S = (grad(f )(s ))⊥ -Definition 72 +Definition 72 a) Ein Normalenfeld auf der regulären Fläche S ⊆ R3 ist eine Abbildung n : S → S 2 ⊆ @@ -6384,7 +6384,7 @@ s S ⊥ für jedes s ∈ S . b) S heißt orientierbar , wenn es ein stetiges Normalenfeld auf S gibt. Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden. -Im Folgenden werden diese Begriffe jedoch synonym benutzt. +Im Folgenden werden diese Begriffe jedoch synonym benutzt. Bemerkung 74 (Eigenschaften von Normalenfeldern) a)Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C ∞ ). @@ -6392,7 +6392,7 @@ b) Zu jedem s ∈ S gibt es eine Umgebung V ⊆ R3 von s und eine lokale Parametrisierung F : U → V von S um s, sodass auf F (U ) = V ∩ S ein stetiges Normalenfeld existiert. -c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas vonS aus lokalen +c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas vonS aus lokalen Parametrisierungen F i : U i → V @@ -6405,11 +6405,11 @@ j ∩ S gilt: i → V j - + F j ◦ F − 1 i - + ∈ R3 ×3 ) > 0 Beweis: Wird hier nicht geführt. Beispiel 46 (Normalenfelder) @@ -6420,9 +6420,9 @@ S 2 ist ein stetiges Normalenfeld. Auch n 2 = −id S 2 ist ein stetiges Normalenfeld. -2) S = Möbiusband (vgl.Abbildung 5.1) ist nicht orientierbar. Es existiert ein Norma- -lenfeld, aber kein stetiges Normalenfeld. -92 5.3. GAUSS-KRÜMMUNG +2) S = Möbiusband (vgl.Abbildung 5.1) ist nicht orientierbar. Es existiert ein Normalenfeld, + aber kein stetiges Normalenfeld. + 5.3. GAUSS-KRÜMMUNG Abbildung 5.1:Möbiusband 5.3 Gauß-Krümmung Bemerkung 75 @@ -6434,13 +6434,13 @@ Dann gibt es eine Umgebung V ⊆ R 3 von s , sodass C := (s + E ) ∩ S ∩ V das Bild einer durch Bogenlänge parametrisierten Kurveγ : [−ε, ε] → S enthält mit γ (0) = s -und γ +und γ (0) = x . Beweis: „Satz über implizite Funktionen“ 1 -Definition 73 +Definition 73 In der Situation ausBemerkung 75heißt die Krümmung κ γ (0) der Kurve γ in der Ebene -(s + E ) im Punkt s die Normalkrümmung von S in s in Richtung x = γ +(s + E ) im Punkt s die Normalkrümmung von S in s in Richtung x = γ (0). Man schreibt: κ Nor ( s, x) := κ @@ -6481,12 +6481,12 @@ x 3 (x, z -Ebene) 1 Siehe z. B. https://github.com/MartinThoma/LaTeX- examples/tree/master/documents/Analysis%20II -93 5.3. GAUSS-KRÜMMUNG + 5.3. GAUSS-KRÜMMUNG V ∩ E -2 ∩ S = - (1, 0 , z ) ∈ R 3 +2 ∩ S = + (1, 0 , z ) ∈ R 3 - z ∈ R + z ∈ R ist eine Gerade ⇒ κ Nor (s, x @@ -6517,15 +6517,15 @@ Nor ( s, x − Y 2 − Z ) Abbildung 5.2:Beispiele für reguläre Flächen -Definition 74 +Definition 74 Sei S ⊆ R3 eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S . γ : [ −ε, ε] → S eine nach Bogenlänge parametrisierte Kurve ( ε > 0 ) mit γ (0) = s und -γ +γ (0) = 0. -Sei n (0) := γ +Sei n (0) := γ (0) -γ +γ (0) . Zerlege n (0) = n (0)t + n (0)⊥ @@ -6537,21 +6537,21 @@ s S )⊥ Dann ist n (0) ⊥ = n (0) , n( s) · n (s ) κ - Nor (s, γ ) := γ + Nor (s, γ ) := γ (0), n(s ) die Normalkrümmung . Bemerkung 76 Sei γ ( t) = γ (−t) , t ∈ [ −ε, ε]. Dann ist κ Nor (s, γ ) = κ Nor ( s, γ ). -Beweis: γ - (0) = γ - (0) , da γ - (0) = −γ +Beweis: γ + (0) = γ + (0) , da γ + (0) = −γ (0). Es gilt: κ -Nor ( s, γ ) hängt nur von |γ +Nor ( s, γ ) hängt nur von |γ (0)| ab und ist gleich κ - Nor (s, γ + Nor (s, γ (0)). Bemerkung 77 Sei S eine reguläre Fläche und n = n ( s) ein Normalenvektor an S in s . @@ -6566,21 +6566,21 @@ s S → R , x → κ Nor ( s, x) eine glatte Funktion und Bild κn Nor (s ) ist ein abgeschlossenes Intervall. -Definition 75 +Definition 75 Sei S eine reguläre Fläche und n = n ( s) ein Normalenvektor an S in s . -94 5.3. GAUSS-KRÜMMUNG + 5.3. GAUSS-KRÜMMUNG a) κn -1 (s ) : = min +1 (s ) : = min κn -Nor ( s, x) +Nor ( s, x) x ∈ T 1 -s S +s S und κn -2 (s ) : = max +2 (s ) : = max κ n -Nor ( s, x) +Nor ( s, x) x ∈ T 1 s S heißen Hauptkrümmungen von S in s. @@ -6627,7 +6627,7 @@ s 3 ) < 0 Bemerkung 79 Sei S eine reguläre Fläche, s ∈ S ein Punkt. -95 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM + 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM a)Ist K ( s) > 0 , so liegt S in einer Umgebung von s ganz auf einer Seite von T s S + s . b)Ist K ( s) < 0 , so schneidet jede Umgebung von s in S beide Seiten von T @@ -6638,19 +6638,19 @@ Sei S ⊆ R 3 s S die Tangentialebene an S in s und F : U → V eine lokale Parametrisierung von S um s . Weiter sei p := F −1 (s ). -Definition 76 +Definition 76 Sei I S ∈ R2 ×2 - definiert als + definiert als I - S : = + S : = g 1, 1 (s ) g 1, 2 (s ) g 1, 2 (s ) g 2, 2 (s ) - = + = E ( s) F ( s) F ( s ) G (s ) mit g @@ -6684,14 +6684,14 @@ c) Bzgl. der Basis { D p F ( e 1 ) , D p F ( e -2 ) } hat das Standardskalarprodukt ausBemer- -kung 80.adie Darstellungsmatrix I +2 ) } hat das Standardskalarprodukt ausBemerkung + 80.adie Darstellungsmatrix I S . d) g -i,j (s ) ist eine differenzierbare Funktion von s . +i,j (s ) ist eine differenzierbare Funktion von s . Bemerkung 81 det(I - S ) = + S ) = ∂ F @@ -6760,7 +6760,7 @@ z 1 + z 2 2 + z 2 3 -96 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM + 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM det(I S ) = g 1 , 1 g @@ -6796,7 +6796,7 @@ y 2 y 3  - + −   x 1 @@ -6826,46 +6826,46 @@ y 2 + x 3 y 3 ) 2 -Definition 77 +Definition 77 a) - Das Differential d A = + Das Differential d A = det(I )d u 1 d u -2 heißt Flächenelement von S bzgl. der Para- -metrisierung F . +2 heißt Flächenelement von S bzgl. der Parametrisierung + F . b)Für eine Funktion f : V → R heißt -V f dA := +V f dA := U f ( F (u 1 , u 2 ) - + =: s ) det I (s )du 1 du 2 der Wert des Integrals von f über V , falls das Integral rechts existiert. Bemerkung 82 -a) +a) V f dA ist unabhängig von der gewählten Parametrisierung. -b)Sei f : S → R eine Funktion, die im Sinne vonDefinition 77.blokal integrierbar ist. -Dann ist - S f dA wohldefiniert, falls (z. B.) S kompakt ist. +b)Sei f : S → R eine Funktion, die im Sinne vonDefinition 77.blokal integrierbar ist. +Dann ist + S f dA wohldefiniert, falls (z. B.) S kompakt ist. Etwa: - + S f dA = n -i =1 +i =1 V if dA -− -i = j +− +i = j V i ∩V jf dA -+ -i,j,k ++ +i,j,k V i ∩V j ∩V @@ -6887,14 +6887,14 @@ n (s ) S 2 d s n ( x ) = d dt n (s „+“ tx - + Soll auf Fläche S bleiben) t =0 Die Abbildung d s n heißt Weingarten-Abbildung -97 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM + 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM b) T n(s ) S 2 = T @@ -6906,7 +6906,7 @@ d) d s n ist selbstadjungiert bzgl. des Skalarproduktes I S . Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt. -98 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM + 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM Beweis: a)Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. b) T @@ -6919,7 +6919,7 @@ s n ein Homomorphismus. d)Zu zeigen: ∀x, y ∈ I s S : x, d s n ( y ) = d -s n ( x ), y +s n ( x ), y Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die Basisvektoren zu zeigen. Sei x @@ -6953,11 +6953,11 @@ Bew.: 0 = ∂ F j ) , n( p + te j ) ⇒ 0 = d -dt +dt ∂ F ∂ u ( p + te j ), n( p + te - j ) + j ) @@ -6967,12 +6967,12 @@ dt ∂ F ∂ u i (p + te j ) - + ∂ 2 F ∂u j ∂u -i ( p) +i ( p) t=0 , n( s) + x @@ -6980,14 +6980,14 @@ i ( p) s n D p F (e j ) - + x - j -Definition 78 + j +Definition 78 Die durch −d -s n definierte symmetrische Bilinearform aufT -s S heißt zweite Fundamental- -form von S in s bzgl. F . +s n definierte symmetrische Bilinearform aufT +s S heißt zweite Fundamentalform + von S in s bzgl. F . Man schreibt: I I s ( x, y ) = −d s n ( x) , y = I @@ -7006,49 +7006,49 @@ i,j (s ) = ∂ 2 F ∂ u i ∂ u -j ( p) , n( s) +j ( p) , n( s) Proposition 5.2 Sei γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve mitγ (0) = s. Dann gilt: κ Nor (s, γ ) = I I - s (γ - (0), γ + s (γ + (0), γ (0)) -Beweis: NachDefinition 74ist κ -Nor (s, γ ) = γ +Beweis: NachDefinition 74ist κ +Nor (s, γ ) = γ (0), n(s ) . Nach Voraussetzung gilt -n (γ (t )) ⊥ γ - ( t) ⇔ γ +n (γ (t )) ⊥ γ + ( t) ⇔ γ (0), n( s) = 0 Die Ableitung nach t ergibt 0 = d -dt (n ( γ (t )), γ +dt (n ( γ (t )), γ ( t)) -= += d dt n ( γ (t )) -t=0 , γ - (0) - + n ( s) , γ - (0) -99 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM +t=0 , γ + (0) + + n ( s) , γ + (0) + 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM = d -s n ( γ - (0)), γ +s n ( γ + (0)), γ (0) + κ Nor ( s, γ ) = −I I - s ( γ - (0), γ + s ( γ + (0), γ (0)) + κ Nor ( s, γ ) Folgerung 5.3 -Die beiden Definitionen von Normalkrümmung inAbschnitt 5.1stimmen überein: +Die beiden Definitionen von Normalkrümmung inAbschnitt 5.1stimmen überein: κ Nor (s, γ ) = κ - Nor ( s, γ + Nor ( s, γ (0)) Satz 5.4 Sei S ⊆ R 3 @@ -7107,34 +7107,34 @@ I I 2 Prop. 5. 2 =====⇒ λ - 1 = min + 1 = min κ -Nor (s, x) +Nor (s, x) x ∈ T 1 -s S +s S λ -2 = max +2 = max κ -Nor ( s, x) +Nor ( s, x) x ∈ T 1 -s S +s S Satz 5.5 (Satz von Gauß-Bonnet) Sei S ⊆ R 3 eine kompakte orientierbare reguläre Fläche. Dann gilt: S K (s )dA = 2πχ( S ) Dabei ist χ ( S ) die Euler-Charakteristik von S . -Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von -Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen werden. +Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von +Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen werden. Lösungen der Übungsaufgab en Lösung zu Aufgabe1 Teilaufgabe a) Es gilt: (i) ∅ , X ∈ T X . (ii) T -X ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alleU +X ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alleU 1 , U 2 ∈ T @@ -7147,19 +7147,19 @@ X . X abgeschlossen, d. h. es gilt für eine beliebige Indexmenge I und alle U i ∈ T -X für alle i ∈ I : +X für alle i ∈ I : i ∈I U i ∈ T X Also ist ( X, T X ) ein topologischer Raum. Teilaufgabe b) Wähle x = 1 , y = 0 . Dann gilt x = y und die einzige Umgebung von x -ist X . Da y = 0 ∈ X können also x und y nicht durch offene Mengen getrennt werden. +ist X . Da y = 0 ∈ X können also x und y nicht durch offene Mengen getrennt werden. (X, T -X ) ist also nicht hausdorffsch. -Teilaufgabe c) Nach Bemerkung4sind metrische Räume hausdorffsch. Da(X, T +X ) ist also nicht hausdorffsch. +Teilaufgabe c) Nach Bemerkung4sind metrische Räume hausdorffsch. Da(X, T X ) nach -(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass(X, T +(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass(X, T X ) kein metrischer Raum sein kann. Lösung zu Aufgabe2 @@ -7169,49 +7169,49 @@ Sei a ∈ Z beliebig. Dann gilt: Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de schicken. Teilaufgabe b) -Beh.: { − 1, 1 } ist nicht offen +Beh.: { − 1, 1 } ist nicht offen Bew.: durch Widerspruch -Annahme: { − 1, 1 } ist offen. -Dann gibt es T ⊆ B, sodass +Annahme: { − 1, 1 } ist offen. +Dann gibt es T ⊆ B, sodass M ∈ T M = { − 1, 1 } . Aber alle U ∈ B haben unendlich viele Elemente. Auch endlich viele Schnitte von Elementen inB haben unendlich viele Elemente -⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒ { − 1 , 1 } ist -nicht offen. +⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒ { − 1 , 1 } ist +nicht offen. Teilaufgabe c) Beh.: Es gibt unendlich viele Primzahlen. -101 Lösungen der Übungsaufgaben + Lösungen der Übungsaufgaben Bew.: durch Widerspruch Annahme: Es gibt nur endlich viele Primzahlen p ∈ P Dann ist Z \ { − 1 , +1 } FS d. Arithmetik -= += p∈ P U 0,p -endlich. Das ist ein Widerspruch zu | Z| ist unendlich und | { −1, 1 } | ist endlich. +endlich. Das ist ein Widerspruch zu | Z| ist unendlich und | { −1, 1 } | ist endlich. Lösung zu Aufgabe3 -(a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form +(a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form j ∈ J U -j × +j × i ∈ N,i = j P i wobei J ⊆ N endlich und U j ⊆ P - j offen ist. -Beweis: Nach Definition der Produkttopologie bilden Mengen der Form + j offen ist. +Beweis: Nach Definition der Produkttopologie bilden Mengen der Form i ∈ J U -j × +j × i ∈N \J P i wobei J ⊆ N endlich und U j ⊆ P -j offen ∀j ∈ J eine Basis der Topologie. -Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen -Form. +j offen ∀j ∈ J eine Basis der Topologie. +Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen +Form. (b) Beh.: Die Zusammenhangskomponenten von P sind alle einpunktig. -Beweis: Es seinen x, y ∈ P und x sowie y liegen in der gleichen Zusammenhangs- -komponente Z ⊆ P . Da Z zusammenhängend ist und ∀i ∈ I : p +Beweis: Es seinen x, y ∈ P und x sowie y liegen in der gleichen Zusammenhangskomponente + Z ⊆ P . Da Z zusammenhängend ist und ∀i ∈ I : p i : P → P i ist stetig, ist p @@ -7228,15 +7228,15 @@ i ( Z ) ⊆ { z alle i ∈ N . Dann gilt also: p i (x ) - + = x i = z i = p i (y ) - + = y i ∀i ∈ N -Somit folgt: x = y +Somit folgt: x = y Lösung zu Aufgabe4 (a) Beh.: GL n ( R ) ist nicht kompakt. @@ -7245,29 +7245,29 @@ Bew.: det : GL n ( R)) = R \ { 0 } nicht kompakt. 22 ⇒ GL - n ( R) ist nicht kompakt. + n ( R) ist nicht kompakt. (b) Beh.: SL 1 ( R ) ist nicht kompakt, für n > 1 ist SL n ( R ) kompakt. Bew.: Für SL 1 (R ) gilt: SL -1 (R) = - A ∈ R1 ×1 +1 (R) = + A ∈ R1 ×1 - det A = 1 - = - 1 + det A = 1 + = + 1 ∼ = { 1 }. 22 ⇒ SL 1 (R) ist kompakt. -102 Lösungen der Übungsaufgaben + Lösungen der Übungsaufgaben SL n (R ) ⊆ GL n (R ) lässt sich mit einer Teilmenge des R n2 - identifizieren. NachSatz 1.1 -sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Definiere + identifizieren. NachSatz 1.1 +sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Definiere nun für für n ∈ N ≥ 2 , m ∈ N : A @@ -7279,23 +7279,23 @@ Dann gilt: det A m ∈ SL n (R ), und A m ist unbeschränkt, da A - m + m ∞ = m −−−−→ -m →∞ ∞. +m →∞ ∞. (c) Beh.: P ( R) ist kompakt. Bew.: P (R ) ∼ = S n / - x ∼− x . Per Definition der Quotiententopologie ist die Klassenabbil- -dung stetig. Da S n + x ∼− x . Per Definition der Quotiententopologie ist die Klassenabbildung + stetig. Da S n als abgeschlossene und beschränkte Teilmenge desR n+1 kompakt ist 22 -⇒ P (R ) ist kompakt. +⇒ P (R ) ist kompakt. Lösung zu Aufgabe5 -Die Definition von Homöomorphismus kann aufSeite 9nachgelesen werden. -Definition 79 +Die Definition von Homöomorphismus kann aufSeite 9nachgelesen werden. +Definition 79 Seien (G, ∗) und ( H, ◦) Gruppen und ϕ : G → H eine Abbildung. ϕ heißt Homomorphismus , wenn ∀g @@ -7316,23 +7316,23 @@ Gruppenhomomorphismus und ein Homöomorphismus. 2) Sei G = ( Z, +) und H = ( Z/3 Z, +) . Dann ist ϕ 2 : G → H, x → x mod 3 ein Gruppenhomomorphismus. Jedoch ist ϕ -2 nicht injektiv, also sicher kein Homöomor- -phismus. +2 nicht injektiv, also sicher kein Homöomorphismus. + 3) Sei X ein topologischer Raum. Dann ist id X ein Homöomorphismus. Da keine -Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Grup- -penhomomorphismus. -Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten +Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Gruppenhomomorphismus. + +Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten verwendet. Lösung zu Aufgabe6 -Die Definition einer Isotopie kann aufSeite 20nachgelesen werden, die einer Isometrie auf +Die Definition einer Isotopie kann aufSeite 20nachgelesen werden, die einer Isometrie auf Seite 6. -Definition 80 +Definition 80 Seien (G, ∗) und ( H, ◦) Gruppen und ϕ : G → H eine Abbildung. ϕ heißt Isomorphismus , wenn ϕ ein bijektiver Homomorphismus ist. -Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen +Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen Sinn und ein Isomorphismus benötigt eine Gruppenstruktur. -103 Lösungen der Übungsaufgaben + Lösungen der Übungsaufgaben Lösung zu Aufgabe7 (a) Vor.: Sei M eine topologische Mannigfaltigkeit. Beh.: M ist wegzusammehängend ⇔ M ist zusammenhängend @@ -7342,11 +7342,11 @@ direkt ausBemerkung 23. Z := { z ∈ M | ∃Weg von x nach z } Es gilt: (i) Z = ∅ , da M lokal wegzusammenhängend ist -(ii) Z ist offen, da M lokal wegzusammenhängend ist +(ii) Z ist offen, da M lokal wegzusammenhängend ist (iii) Z C - := { ˜z ∈ M | Weg von x nach ˜z } ist offen + := { ˜z ∈ M | Weg von x nach ˜z } ist offen Da M eine Mannigfaltigkeit ist, existiert zu jedem ˜z ∈ Z C - eine offene und + eine offene und wegzusammenhängende Umgebung U ˜z ⊆ M . Es gilt sogar U @@ -7362,7 +7362,7 @@ Es gilt sogar U 1 (1) = z . Dann wäre aber γ : [0, 1] → M , -γ ( x ) = +γ ( x ) = γ 1 (2x) falls 0 ≤ x ≤ 1 2 @@ -7372,11 +7372,11 @@ Dann wäre aber ein stetiger Weg von ˜z nach x ⇒ Widerspruch. Da M zusammenhängend ist und M = Z -offen ∪ Z C +offen ∪ Z C -offen , sowie Z = ∅ folgt Z C +offen , sowie Z = ∅ folgt Z C = ∅. -Also ist M = Z wegzusammenhängend. +Also ist M = Z wegzusammenhängend. (b) Beh.: X ist wegzusammenhängend. Beweis: X := (R \ { 0 }) ∪ { 0 1 , 0 @@ -7398,10 +7398,10 @@ Weg γ 2 . Damit existiert ein (nicht einfacher) Weg γ von 0 1 nach 0 -2 . +2 . Lösung zu Aufgabe9 Vor.: Sei ( X, d) eine absolute Ebene, A, B , C ∈ X und AB C ein Dreieck. -104 Lösungen der Übungsaufgaben + Lösungen der Übungsaufgaben (a) Beh.: AB ∼ = AC ⇒ ∠ AB C ∼ = ∠ AC B @@ -7410,30 +7410,30 @@ Bew.: Sei AB ∼ ⇒ ∃ Isometrie ϕ mit ϕ( B ) = C und ϕ (C ) = B und ϕ (A ) = A . ⇒ ϕ (∠ AB C ) = ∠ AC B ⇒ ∠ AB C ∼ -= ∠AC B += ∠AC B (b) Beh.: - Der längeren Seite von AB C liegt der größere Winkel gegenüber und umge- -kehrt. -Bew.: Sei d(A, C ) > d(A, B ). Nach§3 (i)gibt es C + Der längeren Seite von AB C liegt der größere Winkel gegenüber und umgekehrt. + +Bew.: Sei d(A, C ) > d(A, B ). Nach§3 (i)gibt es C ∈ AC + - mit d(A, C + mit d(A, C ) = d(A, B ) -⇒ C +⇒ C liegt zwischen A und C . -Es gilt AB C - < AB C und ausAufgabe 9 (a)folgt: AB C - = AC +Es gilt AB C + < AB C und ausAufgabe 9 (a)folgt: AB C + = AC B . -∠ B C +∠ B C A ist ein nicht anliegender Außenwinkel zu ∠ B C A Bem. 66 -=====⇒ B C +=====⇒ B C A > B C A -⇒ B C A < B C - A = AB C +⇒ B C A < B C + A = AB C < AB C Sei umgekehrt AB C > B C A, kann wegen 1. Teil vonAufgabe 9 (b)nicht d(A, B ) > d( A, C ) gelten. WegenAufgabe 9 (a)kann nicht d(A, B ) = d(A, C ) gelten. -⇒ d(A, B ) < d( A, C ) +⇒ d(A, B ) < d( A, C ) (c) Vor.: Sei g eine Gerade, P ∈ X und P /∈ g Beh.: ∃! Lot Bew.: @@ -7452,53 +7452,53 @@ F g Abbildung 5.4:Zwei Lote zu einer Geraden g durch einen Punkt P NachFolgerung 4.4ist die Summe von zwei Innenwinkeln immer < π -⇒ G gibt es nicht. +⇒ G gibt es nicht. Lösung zu Aufgabe10 Sei f h und o. B. d. A. f g . f ∦ h ⇒ f ∩ h = ∅ , sei also x ∈ f ∩ h. Mit Axiom§5folgt: Es gibt höchstens eine Parallele zu g durch x , da x /∈ g . Diese ist f , da x ∈ f und f g . Da aber x ∈ h, kann h nicht -105 Lösungen der Übungsaufgaben -parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zug durch x (f = h). ⇒ g ∦ h + Lösungen der Übungsaufgaben +parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zug durch x (f = h). ⇒ g ∦ h Lösung zu Aufgabe11 -Sei ( X, d, G ) eine Geometrie, die§1-§4erfüllt. Seien außerdem AB C und A - B - C +Sei ( X, d, G ) eine Geometrie, die§1-§4erfüllt. Seien außerdem AB C und A + B + C Dreiecke, für die gilt: - d(A, B ) = d (A - , B + d(A, B ) = d (A + , B ) -d(A, C ) = d (A - , C +d(A, C ) = d (A + , C ) -d(B , C ) = d(B - , C +d(B , C ) = d(B + , C ) -Sei ϕ die Isometrie mit ϕ( A) = A - , ϕ( B ) = B - und ϕ ( C +Sei ϕ die Isometrie mit ϕ( A) = A + , ϕ( B ) = B + und ϕ ( C ) liegt in der selben Halbebene bzgl. AB wie C . Diese Isometrie existiert wegen§4. -Es gilt d( A, C ) = d ( A - , C - ) = d( ϕ( A - ) , ϕ( C - )) = d( A, ϕ( C - )) und d( B , C ) = d( B - , C +Es gilt d( A, C ) = d ( A + , C + ) = d( ϕ( A + ) , ϕ( C + )) = d( A, ϕ( C + )) und d( B , C ) = d( B + , C ) = -d( ϕ(B - ) , ϕ(C - )) = d(B , ϕ(C +d( ϕ(B + ) , ϕ(C + )) = d(B , ϕ(C )). Bem. 62 =====⇒ C = ϕ (C ) . -Es gilt also ϕ (A - B - C - ) = AB C . +Es gilt also ϕ (A + B + C + ) = AB C . Bildquellen Alle Bilder, die hier nicht aufgeführt sind, wurden von Martin Thoma erstellt. -Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert. +Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert. Abb.0.1a S 2 : Tom Bombadil,tex.stackexchange.com/a/42865 Abb.0.1bWürfel: Jan Hlavacek,tex.stackexchange.com/a/12069 @@ -7533,7 +7533,7 @@ bzgl. bezüglich bzw. beziehungsweise ca. circa d. h. das heißt -Def. Definition +Def. Definition etc. et cetera ex. existieren Hom. Homomorphismus @@ -7545,12 +7545,12 @@ vgl. vergleiche z. B. zum Beispiel zhgd. zusammenhängend z. z. zu zeigen -Ergänzende Definitionen und Sätze +Ergänzende Definitionen und Sätze Da dieses Skript in die Geometrie und Topologie einführen soll, sollten soweit wie möglich alle -benötigten Begriffe definiert und erklärt werden. Die folgenden Begriffe wurden zwar verwendet, +benötigten Begriffe definiert und erklärt werden. Die folgenden Begriffe wurden zwar verwendet, aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ sowie „Lineare Algebra -und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen. -Definition 81 +und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen. +Definition 81 Sei D ⊆ R und x 0 ∈ R. x 0 heißt ein Häufungspunkt von D : ⇔ ∃ Folge x @@ -7559,14 +7559,14 @@ Sei D ⊆ R und x mit x n → x 0 . -Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra +Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra entnommen: -Definition 82 -Es seien V und W K-Vektorräume und A( V ) und A ( W ) die zugehörigen affinen Räume. -Eine Abbildung f : V → W heißt affin, falls für alle a, b ∈ V und alle λ, µ ∈ K mit λ + µ = 1 +Definition 82 +Es seien V und W K-Vektorräume und A( V ) und A ( W ) die zugehörigen affinen Räume. +Eine Abbildung f : V → W heißt affin, falls für alle a, b ∈ V und alle λ, µ ∈ K mit λ + µ = 1 gilt: f (λa + µb ) = λf ( a) + µf ( b) -Definition 83 +Definition 83 Sei V ein Vektorraum und S ⊆ V eine Teilmenge. S heißt eine Orthonormalbasis von V , wenn gilt: (i) S ist eine Basis von V @@ -7587,7 +7587,7 @@ f ( b) < y 0 ∈ [a, b] mit f ( x 0 ) = y 0 . -Definition 84 +Definition 84 Sei V ein Vektorraum über einem Körper K und f : V → V eine lineare Abbildung. v ∈ V \ { 0 } heißt Eigenvektor :⇔ ∃λ ∈ K : f (v ) = λv . Wenn ein solches λ ∈ K existiert, heißt es Eigenwert von f . @@ -7596,14 +7596,14 @@ Sei x, y ∈ R. Dann gilt: ( x + y )n = n -k =0 +k =0 n -k +k x n− k y k ∀n ∈ N 0 -Definition 85 +Definition 85 Seien a, b ∈ R 3 Vektoren. a × b :=  @@ -7652,7 +7652,7 @@ M ◦ A × B Kreuzprodukt A ⊆ B Teilmengenbeziehung A B echte Teilmengenbeziehung -A \ B Differenzmenge +A \ B Differenzmenge A ∪ B Vereinigung A ˙ ∪ B Disjunkte Vereinigung @@ -7669,8 +7669,8 @@ isometrisch |K | Geometrische Realisierung des Simplizialkomplexes K Gruppen -Sei X ein topologischer Raum und K ein Kör- -per. +Sei X ein topologischer Raum und K ein Körper. + Homöo (X ) Homöomorphismengruppe Iso( X ) Isometriengruppe GL @@ -7722,34 +7722,34 @@ f − 1 (M ) Urbild von M Rg(M ) Rang von M χ (K ) Euler-Charakteristik von K -110 Symbolverzeichnis + Symbolverzeichnis ∆ k Standard-Simplex X # Y Verklebung von X und Y d -n Lineare Abbildung ausBemer- -kung 37 +n Lineare Abbildung ausBemerkung + 37 A ∼ = B A ist isometrisch zu B f -∗ Abbildung zwischen Fundamental- -gruppen (vgl.Seite 49) -111 Symbolverzeichnis +∗ Abbildung zwischen Fundamentalgruppen + (vgl.Seite 49) + Symbolverzeichnis Zahlenmengen N = { 1, 2, 3, . . . } Natürliche Zahlen Z = N ∪ { 0, −1 , −2 , . . . } Ganze Zahlen -Q = Z ∪ +Q = Z ∪ 1 2 , 1 3 , 2 -3 - = +3 + = z -n mit z ∈ Z und n ∈ Z \ { 0 } +n mit z ∈ Z und n ∈ Z \ { 0 } Rationale Zahlen R = Q ∪ √ 2, − 3√ - 3 , . . . + 3 , . . . Reele Zahlen R + Echt positive reele Zahlen @@ -7771,7 +7771,7 @@ f : S 1 π 1 (X, x) Fundamentalgruppe im topologischen Raum X um x ∈ X Fix(f ) Menge der Fixpunkte der Abbildung f - · + · 2 2-Norm; Euklidische Norm κ Krümmung κ @@ -7791,10 +7791,10 @@ s n ( x) Weingarten-Abbildung von Vanishing Set Stichwortverzeichnis Abbildung -affine,107 -differenzierbare,29 +affine,107 +differenzierbare,29 homotope,50 -offene,53 +offene,53 simpliziale,35 stetige,9 Abschluss,3 @@ -7819,7 +7819,7 @@ Decktransformation,59 Decktransformationsgruppe,59 Deformationsretrakt,47 dicht,3 -Diffeomorphismus,29 +Diffeomorphismus,29 Dimension,34 diskret,53 Doppelverhältnis,83 @@ -7867,7 +7867,7 @@ Halbgerade,65 Halbraum,28 Hauptkrümmung,92 Hilbert-Kurve,19,19 -113 Stichwortverzeichnis + Stichwortverzeichnis Homöomorphismengruppe,10 Homöomorphismus,9 Homologiegruppe,41 @@ -7887,7 +7887,7 @@ geschlossene,19 Karte,24 Kartenwechsel,28 Kern -offener,3 +offener,3 Kleeblattknoten,20 Klumpentopologie, siehe triviale Topologie Knoten,20, 17–21 @@ -7920,13 +7920,13 @@ Lotfußpunkt,86 Möbiusband,91 Möbiustransformation,80 Mannigfaltigkeit,24 -differenzierbare,29 +differenzierbare,29 geschlossene,25 glatte,29 mit Rand,28 Menge abgeschlossene,2 -offene,2 +offene,2 zusammenhängende,11 Metrik,6 diskrete,6 @@ -7955,11 +7955,11 @@ Punkt,34 Quotiententopologie,5,10,11 Rand,3,28 Raum -hausdorffscher,8 +hausdorffscher,8 kompakter,14 metrischer,6 pro jektiver,5,22,25,52 -114 Stichwortverzeichnis + Stichwortverzeichnis topologischer,2 zusammenhängender,11 Realisierung @@ -7973,7 +7973,7 @@ Sierpińskiraum,3,22 Simplex,34 Simplizialkomplex,34 Simplizialkomplexe -flächengleiche,74 +flächengleiche,74 Sphäre exotische,29 Standard-Simplex,34 @@ -7982,7 +7982,7 @@ sternförmig,48 Stetigkeit, 9–11 Strecke,65 Struktur -differenzierbare,29 +differenzierbare,29 Subbasis,3 Tangentialebene,89, 89–90 Teilraum,4 @@ -8019,4 +8019,4 @@ Weingarten-Abbildung,95 Winkel,70 Zusammenhang, 11–14 Zusammenhangskomponente,13 -Zwischenwertsatz,107 +Zwischenwertsatz,107 \ No newline at end of file From a4fdbbad66e8f8edb0331aa599c742f78d0e525d Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Thu, 20 Feb 2025 09:37:58 -0500 Subject: [PATCH 13/18] fix: update release dates --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 8db2045..3f3393b 100644 --- a/README.md +++ b/README.md @@ -25,15 +25,15 @@ This benchmark is about reading pure PDF files - notscanned documents and not do ## Libraries | Name | Last PyPI Release | License | Version | Dependencies | | -----------: | :---------------- | ------------------------------: | ----------------------: | :-------------------------------------------------------- | -| Borb | 2023-06-23 | AGPL/Commercial | 2.1.16 | | -| pypdfium2 | 2023-07-04 | Apache-2.0 or BSD-3-Clause | 4.30.1 | PDFium (Foxit/Google) | -| pdfminer.six | 2022-11-05 | MIT/X | 20231228 | | -| pdfplumber | 2023-07-29 | MIT | 0.11.5 | pdfminer.six | +| Borb | 2024-08-03 | AGPL/Commercial | 2.1.16 | | +| pypdfium2 | 2024-12-19 | Apache-2.0 or BSD-3-Clause | 4.30.1 | PDFium (Foxit/Google) | +| pdfminer.six | 2024-07-06 | MIT/X | 20231228 | | +| pdfplumber | 2025-01-01 | MIT | 0.11.5 | pdfminer.six | | pdfrw | 2017-09-18 | MIT | 0.4 | | -| pdftotext | - | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | +| pdftotext | 2025-02-03 | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | | playa | 2025-02-18 | MIT | 0.3.0rc1.dev41+g4a84b70 | | -| PyMuPDF | 2023-08-24 | GNU AFFERO GPL 3.0 / Commerical | 1.25.3 | MuPDF | -| pypdf | 2023-08-26 | BSD 3-Clause | 5.3.0 | | +| PyMuPDF | 2025-02-06 | GNU AFFERO GPL 3.0 / Commerical | 1.25.3 | MuPDF | +| pypdf | 2025-02-09 | BSD 3-Clause | 5.3.0 | | | Tika | 2023-01-01 | Apache v2 | 2.6.0 | Apache Tika | From 75e22bdf1c306b3607ef746d0b8d71788f628005 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Thu, 20 Feb 2025 22:45:13 -0500 Subject: [PATCH 14/18] chore: update for playa 0.3.0 --- README.md | 28 +++++++++++++-------------- cache.json | 56 +++++++++++++++++++++++++++--------------------------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 3f3393b..615fc3e 100644 --- a/README.md +++ b/README.md @@ -23,18 +23,18 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | 14 | [1601.03642](https://arxiv.org/pdf/1601.03642.pdf) | 1004.9KiB | 8 | ## Libraries -| Name | Last PyPI Release | License | Version | Dependencies | -| -----------: | :---------------- | ------------------------------: | ----------------------: | :-------------------------------------------------------- | -| Borb | 2024-08-03 | AGPL/Commercial | 2.1.16 | | -| pypdfium2 | 2024-12-19 | Apache-2.0 or BSD-3-Clause | 4.30.1 | PDFium (Foxit/Google) | -| pdfminer.six | 2024-07-06 | MIT/X | 20231228 | | -| pdfplumber | 2025-01-01 | MIT | 0.11.5 | pdfminer.six | -| pdfrw | 2017-09-18 | MIT | 0.4 | | -| pdftotext | 2025-02-03 | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | -| playa | 2025-02-18 | MIT | 0.3.0rc1.dev41+g4a84b70 | | -| PyMuPDF | 2025-02-06 | GNU AFFERO GPL 3.0 / Commerical | 1.25.3 | MuPDF | -| pypdf | 2025-02-09 | BSD 3-Clause | 5.3.0 | | -| Tika | 2023-01-01 | Apache v2 | 2.6.0 | Apache Tika | +| Name | Last PyPI Release | License | Version | Dependencies | +| -----------: | :---------------- | ------------------------------: | -------: | :-------------------------------------------------------- | +| Borb | 2023-06-23 | AGPL/Commercial | 2.1.16 | | +| pypdfium2 | 2023-07-04 | Apache-2.0 or BSD-3-Clause | 4.30.1 | PDFium (Foxit/Google) | +| pdfminer.six | 2022-11-05 | MIT/X | 20231228 | | +| pdfplumber | 2023-07-29 | MIT | 0.11.5 | pdfminer.six | +| pdfrw | 2017-09-18 | MIT | 0.4 | | +| pdftotext | - | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | +| playa | 2025-02-18 | MIT | 0.3.0 | | +| PyMuPDF | 2023-08-24 | GNU AFFERO GPL 3.0 / Commerical | 1.25.3 | MuPDF | +| pypdf | 2023-08-26 | BSD 3-Clause | 5.3.0 | | +| Tika | 2023-01-01 | Apache v2 | 2.6.0 | Apache Tika | ## Text Extraction Speed @@ -44,7 +44,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | 1 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 0.1s | 0.8s | 0.3s | 0.2s | 0.2s | 0.0s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | | 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 1.3s | 0.4s | 0.7s | 0.3s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.0s | | 3 | [pdftotext ](https://poppler.freedesktop.org/) | 0.3s | 1.0s | 1.1s | 0.3s | 0.8s | 0.1s | 0.3s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | -| 4 | [playa ](https://pypi.org/project/playa-pdf/) | 2.4s | 16.9s | 5.1s | 4.3s | 2.2s | 0.7s | 1.1s | 0.5s | 0.6s | 0.4s | 0.7s | 0.5s | 0.6s | 0.4s | 0.2s | +| 4 | [playa ](https://pypi.org/project/playa-pdf/) | 2.5s | 17.2s | 5.3s | 4.4s | 2.2s | 0.7s | 1.1s | 0.6s | 0.6s | 0.4s | 0.7s | 0.5s | 0.6s | 0.4s | 0.2s | | 5 | [pypdf ](https://pypi.org/project/pypdf/) | 4.1s | 28.7s | 8.1s | 8.1s | 3.9s | 1.2s | 2.0s | 0.8s | 1.0s | 0.8s | 1.0s | 0.9s | 0.8s | 0.6s | 0.4s | | 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 9.0s | 55.9s | 23.7s | 16.8s | 8.9s | 2.3s | 4.0s | 1.8s | 2.2s | 1.5s | 2.7s | 1.8s | 2.0s | 1.1s | 0.9s | | 7 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 13.0s | 86.4s | 22.7s | 23.4s | 14.2s | 4.2s | 7.1s | 3.3s | 3.2s | 2.9s | 4.4s | 3.3s | 3.5s | 1.9s | 1.7s | @@ -66,7 +66,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :--------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.1s | 0.1s | 0.5s | 0.1s | 0.4s | 0.1s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.1s | +| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.2s | 0.1s | 0.6s | 0.1s | 0.4s | 0.1s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.1s | | 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 0.5s | 0.7s | 0.2s | 0.5s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | | 3 | [pypdf ](https://pypi.org/project/pypdf/) | 0.6s | 0.7s | 2.3s | 0.5s | 1.7s | 0.3s | 0.4s | 0.5s | 0.4s | 0.2s | 0.5s | 0.2s | 0.6s | 0.1s | 0.1s | diff --git a/cache.json b/cache.json index 2553089..a328460 100644 --- a/cache.json +++ b/cache.json @@ -162,46 +162,46 @@ }, "pdfrw": { "2201.00214": { - "watermark": 0.06520223617553711 + "watermark": 0.06627368927001953 }, "GeoTopo-book": { - "watermark": 0.537128210067749 + "watermark": 0.5515298843383789 }, "2201.00151": { - "watermark": 0.0580286979675293 + "watermark": 0.05949521064758301 }, "1707.09725": { - "watermark": 0.42675042152404785 + "watermark": 0.4222135543823242 }, "2201.00021": { - "watermark": 0.11134600639343262 + "watermark": 0.11372518539428711 }, "2201.00037": { - "watermark": 0.08258700370788574 + "watermark": 0.08260774612426758 }, "2201.00069": { - "watermark": 0.1533362865447998 + "watermark": 0.1553647518157959 }, "2201.00178": { - "watermark": 0.1181187629699707 + "watermark": 0.11800670623779297 }, "2201.00201": { - "watermark": 0.08050227165222168 + "watermark": 0.08152651786804199 }, "1602.06541": { - "watermark": 0.14638280868530273 + "watermark": 0.1436610221862793 }, "2201.00200": { - "watermark": 0.058241844177246094 + "watermark": 0.059192657470703125 }, "2201.00022": { - "watermark": 0.15881800651550293 + "watermark": 0.1749415397644043 }, "2201.00029": { - "watermark": 0.018030166625976562 + "watermark": 0.019024372100830078 }, "1601.03642": { - "watermark": 0.07268810272216797 + "watermark": 0.0719459056854248 } }, "pdftotext": { @@ -482,46 +482,46 @@ }, "playa": { "2201.00214": { - "read": 16.891082525253296 + "read": 17.163233280181885 }, "GeoTopo-book": { - "read": 5.135345220565796 + "read": 5.275846719741821 }, "2201.00151": { - "read": 4.301593780517578 + "read": 4.383682727813721 }, "1707.09725": { - "read": 2.1891415119171143 + "read": 2.212519884109497 }, "2201.00021": { - "read": 0.6821308135986328 + "read": 0.7033224105834961 }, "2201.00037": { - "read": 1.0525555610656738 + "read": 1.0794103145599365 }, "2201.00069": { - "read": 0.547914981842041 + "read": 0.5540194511413574 }, "2201.00178": { - "read": 0.5721349716186523 + "read": 0.5791630744934082 }, "2201.00201": { - "read": 0.3648381233215332 + "read": 0.3816349506378174 }, "1602.06541": { - "read": 0.6639382839202881 + "read": 0.6758975982666016 }, "2201.00200": { - "read": 0.506413459777832 + "read": 0.521315336227417 }, "2201.00022": { - "read": 0.5782179832458496 + "read": 0.5967402458190918 }, "2201.00029": { - "read": 0.42621445655822754 + "read": 0.4316384792327881 }, "1601.03642": { - "read": 0.22521281242370605 + "read": 0.22159337997436523 } } }, From f4bb730a2dad47ff51c9aabf02724b87e3cf9a1e Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Thu, 20 Feb 2025 22:48:39 -0500 Subject: [PATCH 15/18] fix: update release dates --- README.md | 16 ++++++++-------- benchmark.py | 14 +++++++------- cache.json | 28 ++++++++++++++-------------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 615fc3e..84305e7 100644 --- a/README.md +++ b/README.md @@ -25,15 +25,15 @@ This benchmark is about reading pure PDF files - notscanned documents and not do ## Libraries | Name | Last PyPI Release | License | Version | Dependencies | | -----------: | :---------------- | ------------------------------: | -------: | :-------------------------------------------------------- | -| Borb | 2023-06-23 | AGPL/Commercial | 2.1.16 | | -| pypdfium2 | 2023-07-04 | Apache-2.0 or BSD-3-Clause | 4.30.1 | PDFium (Foxit/Google) | -| pdfminer.six | 2022-11-05 | MIT/X | 20231228 | | -| pdfplumber | 2023-07-29 | MIT | 0.11.5 | pdfminer.six | +| Borb | 2024-08-03 | AGPL/Commercial | 2.1.16 | | +| pypdfium2 | 2024-12-19 | Apache-2.0 or BSD-3-Clause | 4.30.1 | PDFium (Foxit/Google) | +| pdfminer.six | 2024-07-06 | MIT/X | 20231228 | | +| pdfplumber | 2025-01-01 | MIT | 0.11.5 | pdfminer.six | | pdfrw | 2017-09-18 | MIT | 0.4 | | | pdftotext | - | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | -| playa | 2025-02-18 | MIT | 0.3.0 | | -| PyMuPDF | 2023-08-24 | GNU AFFERO GPL 3.0 / Commerical | 1.25.3 | MuPDF | -| pypdf | 2023-08-26 | BSD 3-Clause | 5.3.0 | | +| playa | 2025-02-20 | MIT | 0.3.0 | | +| PyMuPDF | 2025-02-06 | GNU AFFERO GPL 3.0 / Commerical | 1.25.3 | MuPDF | +| pypdf | 2025-02-09 | BSD 3-Clause | 5.3.0 | | | Tika | 2023-01-01 | Apache v2 | 2.6.0 | Apache Tika | @@ -66,7 +66,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :--------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.2s | 0.1s | 0.6s | 0.1s | 0.4s | 0.1s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.1s | +| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.1s | 0.1s | 0.5s | 0.1s | 0.4s | 0.1s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.0s | | 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 0.5s | 0.7s | 0.2s | 0.5s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | | 3 | [pypdf ](https://pypi.org/project/pypdf/) | 0.6s | 0.7s | 2.3s | 0.5s | 1.7s | 0.3s | 0.4s | 0.5s | 0.4s | 0.2s | 0.5s | 0.2s | 0.6s | 0.1s | 0.1s | diff --git a/benchmark.py b/benchmark.py index a31bb10..c609257 100644 --- a/benchmark.py +++ b/benchmark.py @@ -174,7 +174,7 @@ def write_single_result( version=pypdf.__version__, watermarking_function=pypdf_watermarking, license="BSD 3-Clause", - last_release_date="2023-08-26", + last_release_date="2025-02-09", image_extraction_function=pypdf_image_extraction, ), "pdfminer": Library( @@ -184,7 +184,7 @@ def write_single_result( text_extraction_function=lambda n: pdfminder_extract_text(BytesIO(n)), version=pdfminer.__version__, license="MIT/X", - last_release_date="2022-11-05", + last_release_date="2024-07-06", image_extraction_function=pdfminer_image_extraction, ), "pdfplumber": Library( @@ -194,7 +194,7 @@ def write_single_result( text_extraction_function=pdfplubmer_get_text, version=pdfplumber.__version__, license="MIT", - last_release_date="2023-07-29", + last_release_date="2025-01-01", dependencies="pdfminer.six", ), "pymupdf": Library( @@ -207,7 +207,7 @@ def write_single_result( image_extraction_function=pymupdf_image_extraction, dependencies="MuPDF", license="GNU AFFERO GPL 3.0 / Commerical", - last_release_date="2023-08-24", + last_release_date="2025-02-06", ), "pdftotext": Library( "pdftotext", @@ -228,7 +228,7 @@ def write_single_result( version="2.1.16", watermarking_function=None, license="AGPL/Commercial", - last_release_date="2023-06-23", + last_release_date="2024-08-03", ), "pdfium": Library( "pypdfium2", @@ -239,7 +239,7 @@ def write_single_result( watermarking_function=None, image_extraction_function=pdfium_image_extraction, license="Apache-2.0 or BSD-3-Clause", - last_release_date="2023-07-04", + last_release_date="2024-12-19", dependencies="PDFium (Foxit/Google)", ), "pdfrw": Library( @@ -260,7 +260,7 @@ def write_single_result( text_extraction_function=playa_get_text, version=playa.__version__, license="MIT", - last_release_date="2025-02-18", + last_release_date="2025-02-20", ), } main(docs, libraries) diff --git a/cache.json b/cache.json index a328460..39bd3f8 100644 --- a/cache.json +++ b/cache.json @@ -162,46 +162,46 @@ }, "pdfrw": { "2201.00214": { - "watermark": 0.06627368927001953 + "watermark": 0.06554722785949707 }, "GeoTopo-book": { - "watermark": 0.5515298843383789 + "watermark": 0.548457145690918 }, "2201.00151": { - "watermark": 0.05949521064758301 + "watermark": 0.05731678009033203 }, "1707.09725": { - "watermark": 0.4222135543823242 + "watermark": 0.4208219051361084 }, "2201.00021": { - "watermark": 0.11372518539428711 + "watermark": 0.11202788352966309 }, "2201.00037": { - "watermark": 0.08260774612426758 + "watermark": 0.07846570014953613 }, "2201.00069": { - "watermark": 0.1553647518157959 + "watermark": 0.15108752250671387 }, "2201.00178": { - "watermark": 0.11800670623779297 + "watermark": 0.11293959617614746 }, "2201.00201": { - "watermark": 0.08152651786804199 + "watermark": 0.09079337120056152 }, "1602.06541": { - "watermark": 0.1436610221862793 + "watermark": 0.13688302040100098 }, "2201.00200": { - "watermark": 0.059192657470703125 + "watermark": 0.06020212173461914 }, "2201.00022": { - "watermark": 0.1749415397644043 + "watermark": 0.15983247756958008 }, "2201.00029": { - "watermark": 0.019024372100830078 + "watermark": 0.017145156860351562 }, "1601.03642": { - "watermark": 0.0719459056854248 + "watermark": 0.048897504806518555 } }, "pdftotext": { From 3df099c8848a2685b3060c2d1db5bd77fbfcff66 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Fri, 21 Feb 2025 09:04:26 -0500 Subject: [PATCH 16/18] fix: outpath no longer used --- pdf_benchmark/library_code.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pdf_benchmark/library_code.py b/pdf_benchmark/library_code.py index 91274e5..ab3a8cc 100644 --- a/pdf_benchmark/library_code.py +++ b/pdf_benchmark/library_code.py @@ -22,13 +22,11 @@ def playa_get_text(data: bytes) -> str: path = os.path.join(tempdir, "pdf.pdf") with open(path, "wb") as outfh: outfh.write(data) - outpath = os.path.join(tempdir, "pdf.txt") texts = [] - with open(outpath, "wt") as outfh: - with playa.open(path, max_workers=2) as pdf: - pages = pdf.pages - page_labels = [page.label for page in pages] - texts = list(pages.map(playa.Page.extract_text)) + with playa.open(path, max_workers=2) as pdf: + pages = pdf.pages + page_labels = [page.label for page in pages] + texts = list(pages.map(playa.Page.extract_text)) return postprocess(texts, page_labels) From c928275c3052680a8e98bc5350aec4b49968cbf7 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Tue, 1 Apr 2025 19:24:10 -0400 Subject: [PATCH 17/18] chore: update to 0.4.1 --- README.md | 6 +++--- cache.json | 56 +++++++++++++++++++++++++++--------------------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 84305e7..19d7c46 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | pdfplumber | 2025-01-01 | MIT | 0.11.5 | pdfminer.six | | pdfrw | 2017-09-18 | MIT | 0.4 | | | pdftotext | - | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | -| playa | 2025-02-20 | MIT | 0.3.0 | | +| playa | 2025-03-20 | MIT | 0.4.1 | | | PyMuPDF | 2025-02-06 | GNU AFFERO GPL 3.0 / Commerical | 1.25.3 | MuPDF | | pypdf | 2025-02-09 | BSD 3-Clause | 5.3.0 | | | Tika | 2023-01-01 | Apache v2 | 2.6.0 | Apache Tika | @@ -44,7 +44,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | 1 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 0.1s | 0.8s | 0.3s | 0.2s | 0.2s | 0.0s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | | 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 1.3s | 0.4s | 0.7s | 0.3s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.0s | | 3 | [pdftotext ](https://poppler.freedesktop.org/) | 0.3s | 1.0s | 1.1s | 0.3s | 0.8s | 0.1s | 0.3s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | -| 4 | [playa ](https://pypi.org/project/playa-pdf/) | 2.5s | 17.2s | 5.3s | 4.4s | 2.2s | 0.7s | 1.1s | 0.6s | 0.6s | 0.4s | 0.7s | 0.5s | 0.6s | 0.4s | 0.2s | +| 4 | [playa ](https://pypi.org/project/playa-pdf/) | 2.5s | 17.0s | 5.4s | 4.3s | 2.3s | 0.7s | 1.1s | 0.6s | 0.7s | 0.4s | 0.6s | 0.5s | 0.5s | 0.4s | 0.2s | | 5 | [pypdf ](https://pypi.org/project/pypdf/) | 4.1s | 28.7s | 8.1s | 8.1s | 3.9s | 1.2s | 2.0s | 0.8s | 1.0s | 0.8s | 1.0s | 0.9s | 0.8s | 0.6s | 0.4s | | 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 9.0s | 55.9s | 23.7s | 16.8s | 8.9s | 2.3s | 4.0s | 1.8s | 2.2s | 1.5s | 2.7s | 1.8s | 2.0s | 1.1s | 0.9s | | 7 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 13.0s | 86.4s | 22.7s | 23.4s | 14.2s | 4.2s | 7.1s | 3.3s | 3.2s | 2.9s | 4.4s | 3.3s | 3.5s | 1.9s | 1.7s | @@ -66,7 +66,7 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :--------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.1s | 0.1s | 0.5s | 0.1s | 0.4s | 0.1s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.0s | +| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.2s | 0.1s | 0.6s | 0.1s | 0.4s | 0.1s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.1s | | 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 0.5s | 0.7s | 0.2s | 0.5s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | | 3 | [pypdf ](https://pypi.org/project/pypdf/) | 0.6s | 0.7s | 2.3s | 0.5s | 1.7s | 0.3s | 0.4s | 0.5s | 0.4s | 0.2s | 0.5s | 0.2s | 0.6s | 0.1s | 0.1s | diff --git a/cache.json b/cache.json index 39bd3f8..f67e23e 100644 --- a/cache.json +++ b/cache.json @@ -162,46 +162,46 @@ }, "pdfrw": { "2201.00214": { - "watermark": 0.06554722785949707 + "watermark": 0.06554293632507324 }, "GeoTopo-book": { - "watermark": 0.548457145690918 + "watermark": 0.561650276184082 }, "2201.00151": { - "watermark": 0.05731678009033203 + "watermark": 0.05933380126953125 }, "1707.09725": { - "watermark": 0.4208219051361084 + "watermark": 0.42210912704467773 }, "2201.00021": { - "watermark": 0.11202788352966309 + "watermark": 0.11514902114868164 }, "2201.00037": { - "watermark": 0.07846570014953613 + "watermark": 0.08163809776306152 }, "2201.00069": { - "watermark": 0.15108752250671387 + "watermark": 0.15467286109924316 }, "2201.00178": { - "watermark": 0.11293959617614746 + "watermark": 0.1228017807006836 }, "2201.00201": { - "watermark": 0.09079337120056152 + "watermark": 0.08412480354309082 }, "1602.06541": { - "watermark": 0.13688302040100098 + "watermark": 0.1428825855255127 }, "2201.00200": { - "watermark": 0.06020212173461914 + "watermark": 0.058417558670043945 }, "2201.00022": { - "watermark": 0.15983247756958008 + "watermark": 0.17146778106689453 }, "2201.00029": { - "watermark": 0.017145156860351562 + "watermark": 0.018665313720703125 }, "1601.03642": { - "watermark": 0.048897504806518555 + "watermark": 0.07572460174560547 } }, "pdftotext": { @@ -482,46 +482,46 @@ }, "playa": { "2201.00214": { - "read": 17.163233280181885 + "read": 17.036799907684326 }, "GeoTopo-book": { - "read": 5.275846719741821 + "read": 5.419198036193848 }, "2201.00151": { - "read": 4.383682727813721 + "read": 4.287257194519043 }, "1707.09725": { - "read": 2.212519884109497 + "read": 2.2673816680908203 }, "2201.00021": { - "read": 0.7033224105834961 + "read": 0.6923933029174805 }, "2201.00037": { - "read": 1.0794103145599365 + "read": 1.1081407070159912 }, "2201.00069": { - "read": 0.5540194511413574 + "read": 0.5646071434020996 }, "2201.00178": { - "read": 0.5791630744934082 + "read": 0.6794044971466064 }, "2201.00201": { - "read": 0.3816349506378174 + "read": 0.3750267028808594 }, "1602.06541": { - "read": 0.6758975982666016 + "read": 0.6215255260467529 }, "2201.00200": { - "read": 0.521315336227417 + "read": 0.5109498500823975 }, "2201.00022": { - "read": 0.5967402458190918 + "read": 0.5388374328613281 }, "2201.00029": { - "read": 0.4316384792327881 + "read": 0.4311091899871826 }, "1601.03642": { - "read": 0.22159337997436523 + "read": 0.2250828742980957 } } }, From 055d287ae163dd1f5ec2114dacbd2380b9558329 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 14 May 2025 23:12:29 -0400 Subject: [PATCH 18/18] chore: update for playa 0.5.0 --- README.md | 48 +- cache.json | 548 +- read/results/pdfminer/1602.06541.txt | 4 +- read/results/pdfminer/1707.09725.txt | 24 +- read/results/pdfminer/GeoTopo-book.txt | 72 +- read/results/pdfplumber/1601.03642.txt | 59 +- read/results/pdfplumber/1602.06541.txt | 236 +- read/results/pdfplumber/1707.09725.txt | 708 +- read/results/pdfplumber/2201.00021.txt | 246 +- read/results/pdfplumber/2201.00022.txt | 344 +- read/results/pdfplumber/2201.00029.txt | 12 +- read/results/pdfplumber/2201.00037.txt | 2045 +++-- read/results/pdfplumber/2201.00069.txt | 72 +- read/results/pdfplumber/2201.00151.txt | 573 +- read/results/pdfplumber/2201.00178.txt | 844 +- read/results/pdfplumber/2201.00200.txt | 247 +- read/results/pdfplumber/2201.00201.txt | 131 +- read/results/pdfplumber/2201.00214.txt | 217 +- read/results/pdfplumber/GeoTopo-book.txt | 9465 +++++++++++++++++----- read/results/playa/1601.03642.txt | 121 +- read/results/playa/1602.06541.txt | 1042 ++- read/results/playa/1707.09725.txt | 2620 +++--- read/results/playa/2201.00021.txt | 666 +- read/results/playa/2201.00022.txt | 1082 +-- read/results/playa/2201.00029.txt | 478 +- read/results/playa/2201.00037.txt | 2440 +++--- read/results/playa/2201.00069.txt | Bin 55732 -> 55547 bytes read/results/playa/2201.00151.txt | 816 +- read/results/playa/2201.00178.txt | 1348 +-- read/results/playa/2201.00200.txt | 346 +- read/results/playa/2201.00201.txt | 482 +- read/results/playa/2201.00214.txt | 543 +- read/results/playa/GeoTopo-book.txt | 8535 +++++++++---------- read/results/pypdf/2201.00151.txt | 140 +- read/results/pypdf/2201.00214.txt | 30 +- read/results/tika/2201.00022.txt | 31 +- read/results/tika/2201.00069.txt | 229 +- read/results/tika/2201.00178.txt | 93 +- requirements/dev.txt | 12 +- requirements/main.txt | 51 +- 40 files changed, 22654 insertions(+), 14346 deletions(-) diff --git a/README.md b/README.md index 19d7c46..f6a9d1d 100644 --- a/README.md +++ b/README.md @@ -27,48 +27,48 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | -----------: | :---------------- | ------------------------------: | -------: | :-------------------------------------------------------- | | Borb | 2024-08-03 | AGPL/Commercial | 2.1.16 | | | pypdfium2 | 2024-12-19 | Apache-2.0 or BSD-3-Clause | 4.30.1 | PDFium (Foxit/Google) | -| pdfminer.six | 2024-07-06 | MIT/X | 20231228 | | -| pdfplumber | 2025-01-01 | MIT | 0.11.5 | pdfminer.six | +| pdfminer.six | 2024-07-06 | MIT/X | 20250327 | | +| pdfplumber | 2025-01-01 | MIT | 0.11.6 | pdfminer.six | | pdfrw | 2017-09-18 | MIT | 0.4 | | | pdftotext | - | GPL | 0.86.1 | build-essential libpoppler-cpp-dev pkg-config python3-dev | -| playa | 2025-03-20 | MIT | 0.4.1 | | -| PyMuPDF | 2025-02-06 | GNU AFFERO GPL 3.0 / Commerical | 1.25.3 | MuPDF | -| pypdf | 2025-02-09 | BSD 3-Clause | 5.3.0 | | -| Tika | 2023-01-01 | Apache v2 | 2.6.0 | Apache Tika | +| playa | 2025-02-20 | MIT | 0.5.0 | | +| PyMuPDF | 2025-02-06 | GNU AFFERO GPL 3.0 / Commerical | 1.25.5 | MuPDF | +| pypdf | 2025-02-09 | BSD 3-Clause | 5.5.0 | | +| Tika | 2023-01-01 | Apache v2 | 3.1.0 | Apache Tika | ## Text Extraction Speed | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :-------------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 0.1s | 0.8s | 0.3s | 0.2s | 0.2s | 0.0s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | -| 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 1.3s | 0.4s | 0.7s | 0.3s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.0s | +| 1 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 0.2s | 1.0s | 0.3s | 0.2s | 0.2s | 0.0s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | +| 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.3s | 1.3s | 0.4s | 0.7s | 0.3s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | | 3 | [pdftotext ](https://poppler.freedesktop.org/) | 0.3s | 1.0s | 1.1s | 0.3s | 0.8s | 0.1s | 0.3s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | -| 4 | [playa ](https://pypi.org/project/playa-pdf/) | 2.5s | 17.0s | 5.4s | 4.3s | 2.3s | 0.7s | 1.1s | 0.6s | 0.7s | 0.4s | 0.6s | 0.5s | 0.5s | 0.4s | 0.2s | -| 5 | [pypdf ](https://pypi.org/project/pypdf/) | 4.1s | 28.7s | 8.1s | 8.1s | 3.9s | 1.2s | 2.0s | 0.8s | 1.0s | 0.8s | 1.0s | 0.9s | 0.8s | 0.6s | 0.4s | -| 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 9.0s | 55.9s | 23.7s | 16.8s | 8.9s | 2.3s | 4.0s | 1.8s | 2.2s | 1.5s | 2.7s | 1.8s | 2.0s | 1.1s | 0.9s | -| 7 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 13.0s | 86.4s | 22.7s | 23.4s | 14.2s | 4.2s | 7.1s | 3.3s | 3.2s | 2.9s | 4.4s | 3.3s | 3.5s | 1.9s | 1.7s | -| 8 | [Tika ](https://pypi.org/project/tika/) | 24.4s | 17.8s | 100.1s | 0.6s | 23.4s | 47.3s | 48.3s | 31.5s | 34.5s | 0.1s | 13.2s | 0.1s | 24.2s | 0.1s | 0.1s | -| 9 | [Borb ](https://pypi.org/project/borb/) | 50.5s | 188.4s | 149.1s | 2.3s | 113.6s | 28.4s | 11.7s | 112.3s | 23.7s | 27.1s | 8.4s | 5.7s | 27.7s | 4.9s | 2.9s | +| 4 | [playa ](https://pypi.org/project/playa-pdf/) | 2.5s | 17.1s | 5.5s | 4.4s | 2.4s | 0.7s | 1.2s | 0.6s | 0.6s | 0.4s | 0.7s | 0.6s | 0.6s | 0.5s | 0.3s | +| 5 | [pypdf ](https://pypi.org/project/pypdf/) | 4.1s | 28.6s | 8.0s | 8.2s | 4.0s | 1.2s | 1.8s | 0.9s | 0.8s | 0.6s | 1.0s | 0.9s | 0.8s | 0.7s | 0.4s | +| 6 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 9.5s | 60.6s | 24.4s | 18.6s | 9.1s | 2.4s | 4.1s | 1.8s | 2.1s | 1.4s | 2.6s | 1.8s | 2.0s | 1.1s | 0.9s | +| 7 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 13.0s | 86.8s | 22.2s | 24.0s | 14.3s | 4.0s | 7.2s | 3.3s | 3.2s | 2.9s | 4.4s | 3.2s | 3.6s | 1.8s | 1.7s | +| 8 | [Tika ](https://pypi.org/project/tika/) | 23.7s | 14.1s | 100.1s | 0.6s | 23.4s | 47.5s | 48.3s | 26.2s | 34.6s | 0.1s | 13.2s | 0.1s | 24.0s | 0.1s | 0.1s | +| 9 | [Borb ](https://pypi.org/project/borb/) | 53.5s | 189.5s | 151.8s | 2.3s | 128.6s | 34.0s | 11.8s | 118.7s | 25.8s | 31.9s | 8.4s | 5.8s | 32.3s | 5.0s | 2.9s | ## Image Extraction Speed | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :-------------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.6s | 0.3s | 0.7s | 0.0s | 2.2s | 0.6s | 0.0s | 3.3s | 0.5s | 0.5s | 0.1s | 0.0s | 0.4s | 0.3s | 0.0s | -| 2 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 1.3s | 1.5s | 2.3s | 0.0s | 4.3s | 1.2s | 0.2s | 5.7s | 0.9s | 0.9s | 0.3s | 0.1s | 0.7s | 0.3s | 0.0s | -| 3 | [pypdf ](https://pypi.org/project/pypdf/) | 5.2s | 24.6s | 7.0s | 6.6s | 18.9s | 1.7s | 0.7s | 7.6s | 1.5s | 1.5s | 0.9s | 0.2s | 1.3s | 0.3s | 0.2s | -| 4 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 12.3s | 69.2s | 24.6s | 20.6s | 36.6s | 2.6s | 4.1s | 2.4s | 2.3s | 1.5s | 2.7s | 2.0s | 2.1s | 1.1s | 0.9s | +| 1 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.6s | 0.3s | 0.7s | 0.0s | 2.3s | 0.6s | 0.0s | 3.3s | 0.5s | 0.5s | 0.1s | 0.0s | 0.4s | 0.3s | 0.0s | +| 2 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 1.3s | 1.6s | 2.3s | 0.1s | 4.3s | 1.1s | 0.2s | 5.7s | 0.9s | 0.8s | 0.3s | 0.0s | 0.7s | 0.3s | 0.0s | +| 3 | [pypdf ](https://pypi.org/project/pypdf/) | 5.3s | 24.7s | 7.0s | 6.7s | 19.1s | 1.6s | 0.7s | 7.7s | 1.5s | 1.6s | 0.8s | 0.2s | 1.3s | 0.3s | 0.3s | +| 4 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 12.2s | 72.4s | 25.7s | 21.0s | 30.1s | 2.7s | 4.3s | 2.4s | 2.3s | 1.5s | 2.7s | 2.0s | 2.1s | 1.1s | 0.9s | ## Watermarking Speed | # | Library | Average | [ 1 ](https://arxiv.org/pdf/2201.00214.pdf) | [ 2 ](https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf) | [ 3 ](https://arxiv.org/pdf/2201.00151.pdf) | [ 4 ](https://arxiv.org/pdf/1707.09725.pdf) | [ 5 ](https://arxiv.org/pdf/2201.00021.pdf) | [ 6 ](https://arxiv.org/pdf/2201.00037.pdf) | [ 7 ](https://arxiv.org/pdf/2201.00069.pdf) | [ 8 ](https://arxiv.org/pdf/2201.00178.pdf) | [ 9 ](https://arxiv.org/pdf/2201.00201.pdf) | [ 10 ](https://arxiv.org/pdf/1602.06541.pdf) | [ 11 ](https://arxiv.org/pdf/2201.00200.pdf) | [ 12 ](https://arxiv.org/pdf/2201.00022.pdf) | [ 13 ](https://arxiv.org/pdf/2201.00029.pdf) | [ 14 ](https://arxiv.org/pdf/1601.03642.pdf) | | :- | :--------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | -| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.2s | 0.1s | 0.6s | 0.1s | 0.4s | 0.1s | 0.1s | 0.2s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.1s | +| 1 | [pdfrw ](https://pypi.org/project/pdfrw/) | 0.1s | 0.1s | 0.6s | 0.1s | 0.4s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.2s | 0.0s | 0.1s | | 2 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 0.2s | 0.5s | 0.7s | 0.2s | 0.5s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.1s | 0.0s | 0.1s | 0.0s | 0.0s | -| 3 | [pypdf ](https://pypi.org/project/pypdf/) | 0.6s | 0.7s | 2.3s | 0.5s | 1.7s | 0.3s | 0.4s | 0.5s | 0.4s | 0.2s | 0.5s | 0.2s | 0.6s | 0.1s | 0.1s | +| 3 | [pypdf ](https://pypi.org/project/pypdf/) | 0.6s | 0.7s | 2.3s | 0.5s | 1.8s | 0.4s | 0.6s | 0.3s | 0.5s | 0.2s | 0.6s | 0.2s | 0.6s | 0.1s | 0.1s | ## Watermarking File Size @@ -85,10 +85,10 @@ This benchmark is about reading pure PDF files - notscanned documents and not do | :- | :-------------------------------------------------------- | :------ | :---------------------------------------------- | :------------------------------------------------------------------------------------------ | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | :---------------------------------------------- | | 1 | [pypdfium2 ](https://pypi.org/project/pypdfium2/) | 97% | 99% | 97% | 94% | 99% | 98% | 96% | 99% | 99% | 99% | 99% | 98% | 78% | 99% | 99% | | 2 | [pypdf ](https://pypi.org/project/pypdf/) | 96% | 99% | 95% | 93% | 98% | 99% | 96% | 97% | 99% | 99% | 99% | 99% | 78% | 100% | 99% | -| 3 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 96% | 98% | 96% | 93% | 97% | 98% | 95% | 99% | 98% | 98% | 98% | 97% | 77% | 98% | 99% | -| 4 | [playa ](https://pypi.org/project/playa-pdf/) | 96% | 98% | 93% | 93% | 98% | 98% | 95% | 97% | 97% | 98% | 99% | 98% | 77% | 96% | 99% | -| 5 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 93% | 96% | 89% | 89% | 98% | 92% | 94% | 93% | 95% | 93% | 97% | 94% | 76% | 99% | 98% | +| 3 | [playa ](https://pypi.org/project/playa-pdf/) | 96% | 99% | 95% | 94% | 98% | 98% | 96% | 98% | 98% | 99% | 99% | 99% | 78% | 98% | 99% | +| 4 | [PyMuPDF ](https://pypi.org/project/PyMuPDF/) | 96% | 98% | 96% | 93% | 97% | 98% | 95% | 99% | 98% | 98% | 98% | 97% | 77% | 98% | 99% | +| 5 | [pdfplumber ](https://pypi.org/project/pdfplumber/) | 93% | 96% | 88% | 88% | 98% | 92% | 94% | 93% | 95% | 93% | 97% | 94% | 76% | 99% | 98% | | 6 | [pdftotext ](https://poppler.freedesktop.org/) | 92% | 96% | 94% | 91% | 95% | 92% | 96% | 96% | 96% | 97% | 83% | 94% | 77% | 96% | 79% | | 7 | [pdfminer.six ](https://pypi.org/project/pdfminer.six/) | 89% | 95% | 79% | 86% | 92% | 86% | 93% | 95% | 93% | 92% | 92% | 93% | 71% | 98% | 86% | -| 8 | [Tika ](https://pypi.org/project/tika/) | 83% | 99% | 0% | 92% | 95% | 77% | 86% | 81% | 82% | 98% | 88% | 98% | 67% | 98% | 96% | +| 8 | [Tika ](https://pypi.org/project/tika/) | 83% | 99% | 0% | 92% | 95% | 77% | 86% | 82% | 82% | 98% | 88% | 98% | 67% | 98% | 96% | | 9 | [Borb ](https://pypi.org/project/borb/) | 45% | 70% | 79% | 0% | 40% | 48% | 92% | 0% | 64% | 51% | 41% | 55% | 41% | 0% | 53% | diff --git a/cache.json b/cache.json index f67e23e..91973c0 100644 --- a/cache.json +++ b/cache.json @@ -2,526 +2,526 @@ "benchmark_times": { "borb": { "2201.00214": { - "read": 188.4488205909729 + "read": 189.48876190185547 }, "GeoTopo-book": { - "read": 149.14154720306396 + "read": 151.83953523635864 }, "2201.00151": { - "read": 2.308486223220825 + "read": 2.2889041900634766 }, "1707.09725": { - "read": 113.59780859947205 + "read": 128.60176134109497 }, "2201.00021": { - "read": 28.409748315811157 + "read": 33.952091693878174 }, "2201.00037": { - "read": 11.665522575378418 + "read": 11.81212306022644 }, "2201.00069": { - "read": 112.33900618553162 + "read": 118.73726058006287 }, "2201.00178": { - "read": 23.70449709892273 + "read": 25.80863618850708 }, "2201.00201": { - "read": 27.119436264038086 + "read": 31.945479154586792 }, "1602.06541": { - "read": 8.40593433380127 + "read": 8.378407716751099 }, "2201.00200": { - "read": 5.670783281326294 + "read": 5.813374042510986 }, "2201.00022": { - "read": 27.747946977615356 + "read": 32.266849517822266 }, "2201.00029": { - "read": 4.8552985191345215 + "read": 5.010681629180908 }, "1601.03642": { - "read": 2.92044734954834 + "read": 2.859659194946289 } }, "pdfium": { "2201.00214": { - "read": 0.7582509517669678, - "image_extraction": 1.5329885482788086 + "read": 1.0257675647735596, + "image_extraction": 1.6196339130401611 }, "GeoTopo-book": { - "read": 0.31653857231140137, - "image_extraction": 2.299833297729492 + "read": 0.3268585205078125, + "image_extraction": 2.269947052001953 }, "2201.00151": { - "read": 0.22646355628967285, - "image_extraction": 0.027804136276245117 + "read": 0.22581076622009277, + "image_extraction": 0.05360603332519531 }, "1707.09725": { - "read": 0.22682547569274902, - "image_extraction": 4.282248497009277 + "read": 0.22593140602111816, + "image_extraction": 4.305531740188599 }, "2201.00021": { - "read": 0.04979705810546875, - "image_extraction": 1.1562087535858154 + "read": 0.04949045181274414, + "image_extraction": 1.0980193614959717 }, "2201.00037": { - "read": 0.11386513710021973, - "image_extraction": 0.22445988655090332 + "read": 0.11467123031616211, + "image_extraction": 0.22275733947753906 }, "2201.00069": { - "read": 0.05641007423400879, - "image_extraction": 5.704399824142456 + "read": 0.0616908073425293, + "image_extraction": 5.682236671447754 }, "2201.00178": { - "read": 0.0558314323425293, - "image_extraction": 0.8747310638427734 + "read": 0.05685162544250488, + "image_extraction": 0.8708248138427734 }, "2201.00201": { - "read": 0.040203094482421875, - "image_extraction": 0.853954553604126 + "read": 0.04024791717529297, + "image_extraction": 0.8477842807769775 }, "1602.06541": { - "read": 0.06276416778564453, - "image_extraction": 0.33101916313171387 + "read": 0.06260371208190918, + "image_extraction": 0.3279297351837158 }, "2201.00200": { - "read": 0.037505149841308594, - "image_extraction": 0.051642656326293945 + "read": 0.03628039360046387, + "image_extraction": 0.04786324501037598 }, "2201.00022": { - "read": 0.054618120193481445, - "image_extraction": 0.6855518817901611 + "read": 0.054438114166259766, + "image_extraction": 0.686147928237915 }, "2201.00029": { - "read": 0.024730443954467773, - "image_extraction": 0.3229193687438965 + "read": 0.02482748031616211, + "image_extraction": 0.3170650005340576 }, "1601.03642": { - "read": 0.02708148956298828, - "image_extraction": 0.03443098068237305 + "read": 0.027425289154052734, + "image_extraction": 0.03217649459838867 } }, "pdfminer": { "2201.00214": { - "read": 55.93182134628296, - "image_extraction": 69.16859841346741 + "read": 60.60245084762573, + "image_extraction": 72.40467977523804 }, "GeoTopo-book": { - "read": 23.7318594455719, - "image_extraction": 24.565200805664062 + "read": 24.425355911254883, + "image_extraction": 25.683483839035034 }, "2201.00151": { - "read": 16.848540544509888, - "image_extraction": 20.575064420700073 + "read": 18.612955808639526, + "image_extraction": 20.973793506622314 }, "1707.09725": { - "read": 8.933068037033081, - "image_extraction": 36.61323118209839 + "read": 9.125925540924072, + "image_extraction": 30.07633948326111 }, "2201.00021": { - "read": 2.2727904319763184, - "image_extraction": 2.606733798980713 + "read": 2.3772432804107666, + "image_extraction": 2.6542723178863525 }, "2201.00037": { - "read": 3.968254566192627, - "image_extraction": 4.147768974304199 + "read": 4.093456506729126, + "image_extraction": 4.28325629234314 }, "2201.00069": { - "read": 1.8122689723968506, - "image_extraction": 2.4438936710357666 + "read": 1.8156311511993408, + "image_extraction": 2.380213737487793 }, "2201.00178": { - "read": 2.186378002166748, - "image_extraction": 2.2748231887817383 + "read": 2.0833513736724854, + "image_extraction": 2.259970188140869 }, "2201.00201": { - "read": 1.4637563228607178, - "image_extraction": 1.5430595874786377 + "read": 1.3995592594146729, + "image_extraction": 1.5048816204071045 }, "1602.06541": { - "read": 2.655424118041992, - "image_extraction": 2.690824270248413 + "read": 2.6256520748138428, + "image_extraction": 2.7129485607147217 }, "2201.00200": { - "read": 1.847485065460205, - "image_extraction": 1.9874897003173828 + "read": 1.8143031597137451, + "image_extraction": 1.967012882232666 }, "2201.00022": { - "read": 1.9943366050720215, - "image_extraction": 2.0818636417388916 + "read": 1.9715628623962402, + "image_extraction": 2.0709922313690186 }, "2201.00029": { - "read": 1.0527269840240479, - "image_extraction": 1.1150856018066406 + "read": 1.0807011127471924, + "image_extraction": 1.1380336284637451 }, "1601.03642": { - "read": 0.9015827178955078, - "image_extraction": 0.9357635974884033 + "read": 0.932868480682373, + "image_extraction": 0.9043912887573242 } }, - "pdfrw": { + "pdfplumber": { "2201.00214": { - "watermark": 0.06554293632507324 + "read": 86.757169008255 }, "GeoTopo-book": { - "watermark": 0.561650276184082 + "read": 22.199340105056763 }, "2201.00151": { - "watermark": 0.05933380126953125 + "read": 24.04423952102661 }, "1707.09725": { - "watermark": 0.42210912704467773 + "read": 14.33962893486023 }, "2201.00021": { - "watermark": 0.11514902114868164 + "read": 4.038215637207031 }, "2201.00037": { - "watermark": 0.08163809776306152 + "read": 7.1929755210876465 }, "2201.00069": { - "watermark": 0.15467286109924316 + "read": 3.2600326538085938 }, "2201.00178": { - "watermark": 0.1228017807006836 + "read": 3.171274185180664 }, "2201.00201": { - "watermark": 0.08412480354309082 + "read": 2.876185178756714 }, "1602.06541": { - "watermark": 0.1428825855255127 + "read": 4.379940986633301 }, "2201.00200": { - "watermark": 0.058417558670043945 + "read": 3.1631577014923096 }, "2201.00022": { - "watermark": 0.17146778106689453 + "read": 3.5800487995147705 }, "2201.00029": { - "watermark": 0.018665313720703125 + "read": 1.8037359714508057 }, "1601.03642": { - "watermark": 0.07572460174560547 + "read": 1.7201035022735596 } }, - "pdftotext": { + "pdfrw": { "2201.00214": { - "read": 0.9697160720825195 + "watermark": 0.06628918647766113 }, "GeoTopo-book": { - "read": 1.0543584823608398 + "watermark": 0.5555062294006348 }, "2201.00151": { - "read": 0.3154182434082031 + "watermark": 0.05751514434814453 }, "1707.09725": { - "read": 0.7857511043548584 + "watermark": 0.4335949420928955 }, "2201.00021": { - "read": 0.0954442024230957 + "watermark": 0.11416792869567871 }, "2201.00037": { - "read": 0.25443029403686523 + "watermark": 0.08194208145141602 }, "2201.00069": { - "read": 0.21236205101013184 + "watermark": 0.12613558769226074 }, "2201.00178": { - "read": 0.14896798133850098 + "watermark": 0.11217474937438965 }, "2201.00201": { - "read": 0.06666803359985352 + "watermark": 0.08137369155883789 }, "1602.06541": { - "read": 0.1229856014251709 + "watermark": 0.13975119590759277 }, "2201.00200": { - "read": 0.07712578773498535 + "watermark": 0.05994772911071777 }, "2201.00022": { - "read": 0.11791729927062988 + "watermark": 0.16331195831298828 }, "2201.00029": { - "read": 0.04947781562805176 + "watermark": 0.02135634422302246 }, "1601.03642": { - "read": 0.05305290222167969 + "watermark": 0.05025434494018555 } }, - "pymupdf": { + "pdftotext": { "2201.00214": { - "read": 1.2650783061981201, - "watermark": 0.48745298385620117, - "image_extraction": 0.2923619747161865 + "read": 0.983544111251831 }, "GeoTopo-book": { - "read": 0.4023463726043701, - "watermark": 0.6608211994171143, - "image_extraction": 0.6644651889801025 + "read": 1.0938339233398438 }, "2201.00151": { - "read": 0.6669011116027832, - "watermark": 0.2024550437927246, - "image_extraction": 0.0034143924713134766 + "read": 0.32129335403442383 }, "1707.09725": { - "read": 0.2984461784362793, - "watermark": 0.5301051139831543, - "image_extraction": 2.2000043392181396 + "read": 0.828188419342041 }, "2201.00021": { - "read": 0.12205195426940918, - "watermark": 0.08205199241638184, - "image_extraction": 0.613532543182373 + "read": 0.09987092018127441 }, "2201.00037": { - "read": 0.16483521461486816, - "watermark": 0.13814139366149902, - "image_extraction": 0.0038840770721435547 + "read": 0.25408291816711426 }, "2201.00069": { - "read": 0.06914305686950684, - "watermark": 0.09702491760253906, - "image_extraction": 3.283721923828125 + "read": 0.2126762866973877 }, "2201.00178": { - "read": 0.0685114860534668, - "watermark": 0.1016242504119873, - "image_extraction": 0.4525175094604492 + "read": 0.14757943153381348 }, "2201.00201": { - "read": 0.05706334114074707, - "watermark": 0.06307482719421387, - "image_extraction": 0.4692685604095459 + "read": 0.07124114036560059 }, "1602.06541": { - "read": 0.0886693000793457, - "watermark": 0.11176776885986328, - "image_extraction": 0.08822441101074219 + "read": 0.12359809875488281 }, "2201.00200": { - "read": 0.0522770881652832, - "watermark": 0.04518556594848633, - "image_extraction": 0.003078937530517578 + "read": 0.07798433303833008 }, "2201.00022": { - "read": 0.07638287544250488, - "watermark": 0.10881543159484863, - "image_extraction": 0.38236427307128906 + "read": 0.1180119514465332 }, "2201.00029": { - "read": 0.0361933708190918, - "watermark": 0.03627157211303711, - "image_extraction": 0.2511467933654785 + "read": 0.049498558044433594 }, "1601.03642": { - "read": 0.03939104080200195, - "watermark": 0.04615473747253418, - "image_extraction": 0.003738880157470703 + "read": 0.05458521842956543 } }, - "pypdf": { + "playa": { "2201.00214": { - "read": 28.712388277053833, - "watermark": 0.7213070392608643, - "image_extraction": 24.60779356956482 + "read": 17.097771167755127 }, "GeoTopo-book": { - "read": 8.068076133728027, - "watermark": 2.343616008758545, - "image_extraction": 6.996605157852173 + "read": 5.519521951675415 }, "2201.00151": { - "read": 8.089233875274658, - "watermark": 0.4587695598602295, - "image_extraction": 6.647898435592651 + "read": 4.4114460945129395 }, "1707.09725": { - "read": 3.891724109649658, - "watermark": 1.7476551532745361, - "image_extraction": 18.940555572509766 + "read": 2.4427313804626465 }, "2201.00021": { - "read": 1.1562883853912354, - "watermark": 0.30209898948669434, - "image_extraction": 1.6896071434020996 + "read": 0.7357511520385742 }, "2201.00037": { - "read": 2.0014760494232178, - "watermark": 0.3821859359741211, - "image_extraction": 0.749286413192749 + "read": 1.1612508296966553 }, "2201.00069": { - "read": 0.7707874774932861, - "watermark": 0.46254944801330566, - "image_extraction": 7.584951162338257 + "read": 0.6288008689880371 }, "2201.00178": { - "read": 0.9634733200073242, - "watermark": 0.3837471008300781, - "image_extraction": 1.4831831455230713 + "read": 0.6197876930236816 }, "2201.00201": { - "read": 0.7547926902770996, - "watermark": 0.21503210067749023, - "image_extraction": 1.460836410522461 + "read": 0.4411795139312744 }, "1602.06541": { - "read": 0.967684268951416, - "watermark": 0.5418281555175781, - "image_extraction": 0.8519599437713623 + "read": 0.678673505783081 }, "2201.00200": { - "read": 0.8903443813323975, - "watermark": 0.1604924201965332, - "image_extraction": 0.18567204475402832 + "read": 0.5687770843505859 }, "2201.00022": { - "read": 0.8163042068481445, - "watermark": 0.610785722732544, - "image_extraction": 1.2961516380310059 + "read": 0.5955770015716553 }, "2201.00029": { - "read": 0.6088814735412598, - "watermark": 0.07402253150939941, - "image_extraction": 0.26814889907836914 + "read": 0.47310757637023926 }, "1601.03642": { - "read": 0.35593676567077637, - "watermark": 0.12810969352722168, - "image_extraction": 0.21191930770874023 + "read": 0.27886438369750977 } }, - "tika": { + "pymupdf": { "2201.00214": { - "read": 17.824857473373413 + "read": 1.2560763359069824, + "watermark": 0.4893491268157959, + "image_extraction": 0.2912435531616211 }, "GeoTopo-book": { - "read": 100.1345567703247 + "read": 0.41425490379333496, + "watermark": 0.6550765037536621, + "image_extraction": 0.667384147644043 }, "2201.00151": { - "read": 0.6399288177490234 + "read": 0.6849265098571777, + "watermark": 0.20680928230285645, + "image_extraction": 0.0033898353576660156 }, "1707.09725": { - "read": 23.35584807395935 + "read": 0.31463146209716797, + "watermark": 0.5394175052642822, + "image_extraction": 2.269129753112793 }, "2201.00021": { - "read": 47.338046073913574 + "read": 0.12572789192199707, + "watermark": 0.08519124984741211, + "image_extraction": 0.6188168525695801 }, "2201.00037": { - "read": 48.305400133132935 + "read": 0.2286970615386963, + "watermark": 0.14116668701171875, + "image_extraction": 0.003799915313720703 }, "2201.00069": { - "read": 31.513932704925537 + "read": 0.0737600326538086, + "watermark": 0.10507345199584961, + "image_extraction": 3.3033792972564697 }, "2201.00178": { - "read": 34.47823882102966 + "read": 0.07315778732299805, + "watermark": 0.10161709785461426, + "image_extraction": 0.45417189598083496 }, "2201.00201": { - "read": 0.11622309684753418 + "read": 0.061261653900146484, + "watermark": 0.06329631805419922, + "image_extraction": 0.47312164306640625 }, "1602.06541": { - "read": 13.186578750610352 + "read": 0.0915529727935791, + "watermark": 0.10732769966125488, + "image_extraction": 0.0887451171875 }, "2201.00200": { - "read": 0.12495112419128418 + "read": 0.06952333450317383, + "watermark": 0.046387434005737305, + "image_extraction": 0.003120899200439453 }, "2201.00022": { - "read": 24.17951250076294 + "read": 0.09096360206604004, + "watermark": 0.10521316528320312, + "image_extraction": 0.3852880001068115 }, "2201.00029": { - "read": 0.08729672431945801 + "read": 0.06443285942077637, + "watermark": 0.03647136688232422, + "image_extraction": 0.25585365295410156 }, "1601.03642": { - "read": 0.07596778869628906 + "read": 0.06111550331115723, + "watermark": 0.048032283782958984, + "image_extraction": 0.003945112228393555 } }, - "pdfplumber": { + "pypdf": { "2201.00214": { - "read": 86.39272856712341 + "read": 28.589249849319458, + "watermark": 0.7176375389099121, + "image_extraction": 24.738558769226074 }, "GeoTopo-book": { - "read": 22.65720844268799 + "read": 7.980882167816162, + "watermark": 2.3296380043029785, + "image_extraction": 7.01096773147583 }, "2201.00151": { - "read": 23.390413761138916 + "read": 8.237623691558838, + "watermark": 0.45946359634399414, + "image_extraction": 6.705368518829346 }, "1707.09725": { - "read": 14.161987543106079 + "read": 3.9532535076141357, + "watermark": 1.8308773040771484, + "image_extraction": 19.12919783592224 }, "2201.00021": { - "read": 4.160851240158081 + "read": 1.1517627239227295, + "watermark": 0.35332202911376953, + "image_extraction": 1.6376030445098877 }, "2201.00037": { - "read": 7.055023908615112 + "read": 1.8098814487457275, + "watermark": 0.598846435546875, + "image_extraction": 0.6979629993438721 }, "2201.00069": { - "read": 3.3328192234039307 + "read": 0.9310543537139893, + "watermark": 0.32545042037963867, + "image_extraction": 7.657184362411499 }, "2201.00178": { - "read": 3.220952272415161 + "read": 0.8240063190460205, + "watermark": 0.48301267623901367, + "image_extraction": 1.478360891342163 }, "2201.00201": { - "read": 2.854520559310913 + "read": 0.621835470199585, + "watermark": 0.21535801887512207, + "image_extraction": 1.6354153156280518 }, "1602.06541": { - "read": 4.377838373184204 + "read": 0.9688084125518799, + "watermark": 0.5814881324768066, + "image_extraction": 0.8102591037750244 }, "2201.00200": { - "read": 3.310704469680786 + "read": 0.9108552932739258, + "watermark": 0.15725207328796387, + "image_extraction": 0.18536925315856934 }, "2201.00022": { - "read": 3.518846035003662 + "read": 0.8084313869476318, + "watermark": 0.6330957412719727, + "image_extraction": 1.3017487525939941 }, "2201.00029": { - "read": 1.9474315643310547 + "read": 0.6995418071746826, + "watermark": 0.07724857330322266, + "image_extraction": 0.26739954948425293 }, "1601.03642": { - "read": 1.671983242034912 + "read": 0.35368847846984863, + "watermark": 0.12895679473876953, + "image_extraction": 0.3331313133239746 } }, - "playa": { + "tika": { "2201.00214": { - "read": 17.036799907684326 + "read": 14.126012086868286 }, "GeoTopo-book": { - "read": 5.419198036193848 + "read": 100.12433242797852 }, "2201.00151": { - "read": 4.287257194519043 + "read": 0.5522549152374268 }, "1707.09725": { - "read": 2.2673816680908203 + "read": 23.391923666000366 }, "2201.00021": { - "read": 0.6923933029174805 + "read": 47.53182625770569 }, "2201.00037": { - "read": 1.1081407070159912 + "read": 48.28274869918823 }, "2201.00069": { - "read": 0.5646071434020996 + "read": 26.16471815109253 }, "2201.00178": { - "read": 0.6794044971466064 + "read": 34.607691526412964 }, "2201.00201": { - "read": 0.3750267028808594 + "read": 0.09984779357910156 }, "1602.06541": { - "read": 0.6215255260467529 + "read": 13.2386314868927 }, "2201.00200": { - "read": 0.5109498500823975 + "read": 0.13814258575439453 }, "2201.00022": { - "read": 0.5388374328613281 + "read": 24.02240300178528 }, "2201.00029": { - "read": 0.4311091899871826 + "read": 0.09504485130310059 }, "1601.03642": { - "read": 0.2250828742980957 + "read": 0.08772063255310059 } } }, @@ -560,9 +560,9 @@ }, "pdfminer": { "2201.00214": 0.9487280293804596, - "GeoTopo-book": 0.7883106543377503, + "GeoTopo-book": 0.7883172614741182, "2201.00151": 0.8602045202371076, - "1707.09725": 0.9190023540909966, + "1707.09725": 0.9189694626000253, "2201.00021": 0.8588197275011207, "2201.00037": 0.9301479087658201, "2201.00069": 0.9540472289854548, @@ -574,6 +574,22 @@ "2201.00029": 0.975523516322736, "1601.03642": 0.8623963054819123 }, + "pdfplumber": { + "2201.00214": 0.9617839460759947, + "GeoTopo-book": 0.881156947575813, + "2201.00151": 0.8834078325527807, + "1707.09725": 0.9778034125495448, + "2201.00021": 0.9165505666048686, + "2201.00037": 0.9398528606089066, + "2201.00069": 0.931573476258142, + "2201.00178": 0.9505142881280757, + "2201.00201": 0.931133252859218, + "1602.06541": 0.9735710510150145, + "2201.00200": 0.9366531687427314, + "2201.00022": 0.7638770612371294, + "2201.00029": 0.9926702855215138, + "1601.03642": 0.9819316802496966 + }, "pdfrw": {}, "pdftotext": { "2201.00214": 0.9600762653108389, @@ -591,6 +607,22 @@ "2201.00029": 0.9649219467401285, "1601.03642": 0.7867700010287713 }, + "playa": { + "2201.00214": 0.9892356749444391, + "GeoTopo-book": 0.9539066558356663, + "2201.00151": 0.9364750771229096, + "1707.09725": 0.9849325894160281, + "2201.00021": 0.9833233125534002, + "2201.00037": 0.9599506720927364, + "2201.00069": 0.9795408772361502, + "2201.00178": 0.9805815986016209, + "2201.00201": 0.9875833300845971, + "1602.06541": 0.991808318903064, + "2201.00200": 0.9857781717094396, + "2201.00022": 0.7775499398315283, + "2201.00029": 0.9772836250299688, + "1601.03642": 0.9931662087912088 + }, "pymupdf": { "2201.00214": 0.9780473882293753, "GeoTopo-book": 0.957868684569868, @@ -608,9 +640,9 @@ "1601.03642": 0.9884500360936372 }, "pypdf": { - "2201.00214": 0.9876438905903474, + "2201.00214": 0.9875784744753969, "GeoTopo-book": 0.9519678772970627, - "2201.00151": 0.9317966019515546, + "2201.00151": 0.9316025356320911, "1707.09725": 0.9834021823012359, "2201.00021": 0.9852542946602353, "2201.00037": 0.9643816837117355, @@ -630,52 +662,21 @@ "1707.09725": 0.9520265054911324, "2201.00021": 0.770705041657062, "2201.00037": 0.8572065203619317, - "2201.00069": 0.8050052291240113, - "2201.00178": 0.8168859839727433, + "2201.00069": 0.8213549890969246, + "2201.00178": 0.8169214856278629, "2201.00201": 0.981721720946443, "1602.06541": 0.8827184830564161, "2201.00200": 0.9774490203918432, - "2201.00022": 0.6698799418093457, + "2201.00022": 0.6698371085569961, "2201.00029": 0.9828859664925239, "1601.03642": 0.9551993153165015 - }, - "pdfplumber": { - "2201.00214": 0.9624093076027349, - "GeoTopo-book": 0.8932082690274208, - "2201.00151": 0.8857353838250874, - "1707.09725": 0.977952891119146, - "2201.00021": 0.9174005666220104, - "2201.00037": 0.9432015121388418, - "2201.00069": 0.9320623652220378, - "2201.00178": 0.9530470165622914, - "2201.00201": 0.9316913879761284, - "1602.06541": 0.9741434157570039, - "2201.00200": 0.9378122018297131, - "2201.00022": 0.7645679514756893, - "2201.00029": 0.9927616243405717, - "1601.03642": 0.982476230133944 - }, - "playa": { - "2201.00214": 0.9849718101175701, - "GeoTopo-book": 0.9322408377322038, - "2201.00151": 0.931934317626386, - "1707.09725": 0.9802441595024175, - "2201.00021": 0.9786885590106452, - "2201.00037": 0.9520637063559613, - "2201.00069": 0.9722151461115761, - "2201.00178": 0.9684549898853675, - "2201.00201": 0.9835579567396774, - "1602.06541": 0.9856410256410256, - "2201.00200": 0.9821904061015588, - "2201.00022": 0.7724464182274571, - "2201.00029": 0.9566110247308358, - "1601.03642": 0.9919385269802065 } }, "watermarking_result_file_size": { "borb": {}, "pdfium": {}, "pdfminer": {}, + "pdfplumber": {}, "pdfrw": { "2201.00214": 2515466.0, "GeoTopo-book": 5738184.0, @@ -693,6 +694,7 @@ "1601.03642": 1026759.0 }, "pdftotext": {}, + "playa": {}, "pymupdf": { "2201.00214": 2716298.0, "GeoTopo-book": 6857999.0, @@ -704,7 +706,7 @@ "2201.00178": 2521012.0, "2201.00201": 1402129.0, "1602.06541": 3166433.0, - "2201.00200": 342826.0, + "2201.00200": 342836.0, "2201.00022": 1302162.0, "2201.00029": 935908.0, "1601.03642": 1092594.0 @@ -725,8 +727,6 @@ "2201.00029": 830154.0, "1601.03642": 1014378.0 }, - "tika": {}, - "pdfplumber": {}, - "playa": {} + "tika": {} } } \ No newline at end of file diff --git a/read/results/pdfminer/1602.06541.txt b/read/results/pdfminer/1602.06541.txt index 202d224..48f4936 100644 --- a/read/results/pdfminer/1602.06541.txt +++ b/read/results/pdfminer/1602.06541.txt @@ -1969,12 +1969,12 @@ J. 13 -30, - 10, no. +30, + Analysis 1699–1712, diff --git a/read/results/pdfminer/1707.09725.txt b/read/results/pdfminer/1707.09725.txt index 87a0bfc..8e21522 100644 --- a/read/results/pdfminer/1707.09725.txt +++ b/read/results/pdfminer/1707.09725.txt @@ -6211,18 +6211,18 @@ Softsign Softmax +88.41 % σ = 0.36 + 81.46 % σ = 5.08 88.19 % σ = 0.31 -88.41 % σ = 0.36 - 87.92 % σ = 0.40 -79.67 % σ = 4.85 - 84.70 % σ = 0.15 +79.67 % σ = 4.85 + 84.69 % σ = 0.08 88.59 % 85.43 % 92 – 140 @@ -6248,10 +6248,10 @@ ELU 84.46 % σ = 0.23 -88.61 % σ = 0.41 - 88.00 % σ = 0.47 +88.61 % σ = 0.41 + Softplus ReLU @@ -6324,16 +6324,16 @@ ELU 75.5 -83.2 +80.1 78.8 -80.1 - -67.2 +83.2 68.9 +67.2 + Table A.3.: Test accuracy of adjusted baseline models trained with different activation functions on STL-10. For LReLU, α = 0.3 was chosen. @@ -8596,11 +8596,11 @@ Springer, 2003, vol. 53. [Online]. Available: https://dx.doi.org/10.1007/978-3- S. E. Fahlman, “An empirical study of learning speed in back-propagation http://repository.cmu.edu/cgi/ +[Online]. Available: + networks,” viewcontent.cgi?article=2799&context=compsci -[Online]. Available: - 1988. L. Fei-Fei, R. Fergus, and P. Perona, diff --git a/read/results/pdfminer/GeoTopo-book.txt b/read/results/pdfminer/GeoTopo-book.txt index 151eec7..05b93d1 100644 --- a/read/results/pdfminer/GeoTopo-book.txt +++ b/read/results/pdfminer/GeoTopo-book.txt @@ -51,10 +51,10 @@ in „Analysis I“ vermittelt. ), Mengenschreibweisen ( , -∩ +∪ , -∪ +∩ P @@ -273,10 +273,10 @@ sind als Komplement offener Mengen abgeschlossen. X = -\ - ∅ +\ + T und (cid:4) @@ -447,10 +447,10 @@ sind dort alle abgeschlossenen Mengen. { -{ - } +{ + , 0 @@ -1385,10 +1385,10 @@ x = y Ux -∩ - ∈ +∩ + Uy = ∅ Uy für alle n @@ -2183,10 +2183,10 @@ R ∩ X, wobei X ein topologischer Raum ist. -∩ - ∪ +∩ + 6) R mit Zariski-Topologie ist zusammenhängend. Bemerkung 14 @@ -7537,10 +7537,10 @@ R) → -(cid:55)→ - → +(cid:55)→ + × I @@ -8855,10 +8855,10 @@ z Abbildungen. -} - (cid:107) +} + ∈ 1) f1 := idR ist eine offene und stetige Abbildung. 2) g(x) := e2πix ist eine offene, aber keine stetige Abbildung (vgl. Abbildung 1.5). @@ -9034,10 +9034,10 @@ Vi Vj = -∃ - ∀ +∃ + ∈ ∈ @@ -9079,10 +9079,10 @@ yi ∩ -{ - } +{ + . ∅ @@ -10188,10 +10188,10 @@ von y. ⊆ -U ein Homöomorphismus. Dann ist W := f −1(V ) - Y +U ein Homöomorphismus. Dann ist W := f −1(V ) + × ∈ @@ -10234,10 +10234,10 @@ f (y0) = g(y0), so ist (g−1 − -1 - } +1 + schon Fix(f ) = Y , also f = idY . = @@ -11440,6 +11440,8 @@ Diese Teilmengen Hi heißen Halbebenen bzgl. g. für alle A +gilt: AB + Hi, B ∈ { @@ -11455,8 +11457,6 @@ Hi, B H2 in zwei nichtleere Teilmengen H1, H2, sodass = -gilt: AB - = j. g @@ -12447,10 +12447,10 @@ g = (cid:4) -} - { +} + ⇒ ⇒ @@ -13660,18 +13660,18 @@ Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht. 1 − -1 - -0 - -3 - 2 +1 + 5 +0 + 4 +3 + x 6 @@ -17773,10 +17773,10 @@ phismus. H, x -(cid:55)→ - → +(cid:55)→ + 3) Sei X ein topologischer Raum. Dann ist idX ein Homöomorphismus. Da keine Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Grup- penhomomorphismus. @@ -18038,10 +18038,10 @@ R \ { -} - 0 +} + 0 ) diff --git a/read/results/pdfplumber/1601.03642.txt b/read/results/pdfplumber/1601.03642.txt index 8c74671..87845f6 100644 --- a/read/results/pdfplumber/1601.03642.txt +++ b/read/results/pdfplumber/1601.03642.txt @@ -110,14 +110,20 @@ An important group of machine learning algorithms was inspired by biological neurons and are thus called artificial neural networks. Those networks are based on mathematical functions called artificial neurons which take n ∈ N num- -bers x 1,...,x +bers x +1 +,...,x n ∈ R as input, multiply them with weights -w 1,...,w +w +1 +,...,w n ∈ R, add them and apply a so called activation function ϕ as visualized in Figure 1(a). One example of such -an activation function is the sigmoid function ϕ(x)= 1 1+e−x. +an activation function is the sigmoid function ϕ(x)= 1 +1+e−x +. Those functions act as building blocks for more complex systems as they can be chained and grouped in layers as visualized in Figure 1(b). The interesting question is how @@ -127,11 +133,40 @@ are learned. This is usually done by an optimization technique called gradient descent. The gradient descent algorithm takes a function which has to be derivable, starts at any point of the surface of this error function and -arXiv:1601.03642v1 -[cs.CV] -12 -Jan -2016 +a +r +X +i +v +: +1 +6 +0 +1 +. +0 +3 +6 +4 +2 +v +1 +[ +c +s +. +C +V +] +1 +2 +J +a +n +2 +0 +1 +6 2 makes a step in the direction which goes downwards. Hence it tries to find a minimum of this high-dimensional function. @@ -370,9 +405,13 @@ Recurrent neural networks — LSTM networks, to be exact music. Instead of taking notes directly or MIDI files, Nayebi and Vitelli took raw audio waveforms as input. Those audio waveformsarefeaturevectorsgivenfortimesteps0,1,...,t− -1,t. The network is given those feature vectors X 1,...,X +1,t. The network is given those feature vectors X +1 +,...,X t -and has to predict the following feature vector X t+1. This +and has to predict the following feature vector X +t+1 +. This means it continues the music. As the input is continuous, the problem was modeled as a regression task. Discrete Fourier Transformation (DFT) was used on chunks of length N of the diff --git a/read/results/pdfplumber/1602.06541.txt b/read/results/pdfplumber/1602.06541.txt index a5be9a3..47fcea8 100644 --- a/read/results/pdfplumber/1602.06541.txt +++ b/read/results/pdfplumber/1602.06541.txt @@ -94,11 +94,39 @@ tion algorithms, there is a publication about multiple class affiliation segmentation [LRAL08]. Similarly, recent publications in pixel-level object segmentation used layered models [YHRF12]. -arXiv:1602.06541v2 -[cs.CV] -11 -May -2016 +a +r +X +i +v +: +1 +6 +0 +2 +. +0 +6 +5 +4 +1 +v +2 +[ +c s +. +C +V +] +1 +1 +M +a +y +2 +0 +1 +6 2 C. Input Data The available data which can be used for the @@ -178,10 +206,14 @@ ij 0 with i,j ∈1,...,k be the number of pixels which belong to class i and were labeled as class j. -(n ij) is called a confusion matrix. Let t +(n +ij +) is called a confusion matrix. Let t i -=(cid:80)k -j=1n += +(cid:80)k +j=1 +n ij be the total number of pixels of class i. One way to compare segmentation algorithms is by @@ -189,8 +221,12 @@ the pixel-wise accuracy of the predicted segmentation as done in many publications [SWRC06], [CP08], [LSD14]. This is also called per-pixel rate and de- fined as -(cid:80)k i=1nii -(cid:80)k i=1ti +(cid:80)k +i=1 +nii +(cid:80)k +i=1 +ti . Taking the pixel-wise classification accuracy has two major drawbacks: P1 Taskslikesegmentingimagesforautonomouscars @@ -209,18 +245,28 @@ general “car” and the more specific “wheel of a car” Three accuracy metrics which do not suffer from problem P1 are used in [LSD14]: -• mean accuracy: 1 k ·(cid:80)k i=1 nii ti ∈[0,1] +• mean accuracy: 1 k · (cid:80)k i=1 nii ti ∈[0,1] • mean intersection over union: 1 k -·(cid:80)k +· (cid:80)k i=1 nii -ti−nii+(cid:80)k j=1nji +ti−nii+(cid:80)k j=1 nji ∈[0,1] • frequency weighted intersection over union: -((cid:80)k i=1t i)−1(cid:80)k i=1t i· nii -ti−nii+(cid:80)k j=1nji +( (cid:80)k +i=1 +t +i +) −1(cid:80)k +i=1 +t +i +· nii +ti−nii+(cid:80)k +j=1 +nji ∈[0,1] Another problem might be pixels which cannot be assigned to one of the known classes. For this reason, @@ -670,7 +716,7 @@ Forestsforsemanticsegmentationisgivenby[SCZ08]. D. SVMs SVMs are well-studied binary classifiers which can be described by five central ideas. For those ideas, the -training data is represented as (x i,y i) where x i is the +training data is represented as (x i ,y i ) where x i is the feature vector and y i ∈ {−1,1} the binary label for training example i∈{1,...,m}. 1) If data is linearly separable, it can be separated @@ -680,8 +726,15 @@ maximizes the distance to the next datapoints minimize w,b 1 -2(cid:107)w(cid:107)2 -s.t. ∀m i=1y i·((cid:104)w,x i(cid:105)+b) +2 +(cid:107)w(cid:107)2 +s.t. ∀m +i=1 +y +i +·((cid:104)w,x +i +(cid:105)+b) (cid:124) (cid:123)(cid:122) (cid:125) sgn appliedtothisgivestheclassification ≥1 @@ -692,15 +745,17 @@ duction ofslackvariables to relaxthe requirement of linear separability solves this problem. The trade-off between accepting some errors and a more complex model is weighted by a parameter -C ∈ R+ 0. The bigger C, the more errors are +C ∈ R+ +0 +. The bigger C, the more errors are accepted. The new optimization problem is: -minimize -w +minimize w 1 -2(cid:107)w(cid:107)2+C· +2 (cid:107)w(cid:107)2+C· m (cid:88) -i=1ξ i -s.t. ∀m i=1y i·((cid:104)w,x i(cid:105)+b)≥1−ξ i +i=1 +ξ i +s.t. ∀m i=1 y i ·((cid:104)w,x i (cid:105)+b)≥1−ξ i Note that 0 ≤ ξ i ≤ 1 means that the data point @@ -711,10 +766,17 @@ misclassified. An SVM with C >0 is also called a soft-margin SVM. 3) The primal problem is to find the normal vector w and the bias b. The dual problem is to express -w as a linear combination of the training data x i: +w as a linear combination of the training data x +i +: w= m (cid:88) -i=1α iy ix +i=1 +α +i +y +i +x i where y i @@ -741,7 +803,10 @@ maximize αi m (cid:88) -i=1α i− +i=1 +α +i +− 1 2 m @@ -749,37 +814,85 @@ m i=1 m (cid:88) -j=1α iα jy iy j(cid:104)x i,x j(cid:105) -s.t. ∀m i=10≤α +j=1 +α +i +α +j +y +i +y +j +(cid:104)x +i +,x +j +(cid:105) +s.t. ∀m +i=1 +0≤α i ≤C s.t. m (cid:88) -i=1α iy i =0 +i=1 +α i y i =0 8 4) Not every dataset is linearly separable. This prob- lem is approached by transforming the feature vectors x with a non-linear mapping Φ into a higher dimensional (probably ∞-dimensional) space. As the feature vectors x are only used -within scalar product (cid:104)x i,x j(cid:105), it is not necessary +within scalar product (cid:104)x +i +,x +j +(cid:105), it is not necessary to do the transformation. It is enough to do the calculation -K(x i,x j)=(cid:104)x i,x j(cid:105) +K(x +i +,x +j +)=(cid:104)x +i +,x +j +(cid:105) This function K is called a kernel. The idea of never explicitly transforming the vectors x i to the higher dimensional space is called the kernel trick. Common kernels include the polynomial kernel -K P(x i,x j)=((cid:104)x i,x j(cid:105)+r)p +K +P +(x +i +,x +j +)=((cid:104)x +i +,x +j +(cid:105)+r)p of degree p and coefficient r, the Gaussian radial basis function (RBF) kernel -K Gauss(x i,x -j)=e−γ(cid:107)xi−xj(cid:107)2 +K Gauss (x i ,x j )=e +−γ(cid:107)xi−xj(cid:107)2 2σ2 and the sigmoid kernel -K tanh(x i,x j)=tanh(γ(cid:104)x i,x j(cid:105)−r) +K +tanh +(x +i +,x +j +)=tanh(γ(cid:104)x +i +,x +j +(cid:105)−r) where the parameter γ determines how much influence single training examples have. 5) ThedescribedSVMscanonlydistinguishbetween @@ -885,16 +998,37 @@ typically live on 0,...,255 or [0,1]. The probability of x,y can be expressed as P(x,y)= 1 -Ze−E(x,y) -where Z = (cid:80) x,ye−E(x,y) is a normalization term +Z +e−E(x,y) +where Z = (cid:80) +x,y +e−E(x,y) is a normalization term called the partition function and E is called the energy function. A common choice for the energy function is -E(x,y)=(cid:88) -c∈Cψ c(x,y) +E(x,y)= +(cid:88) +c∈C +ψ +c +(x,y) where ψ is called a clique potential. One choice for -cliques of size two x,y=(x 1,x 2) is [KP06] -ψ c(x 1,x 2)=wδ(x 1,x -2)=(cid:40) +cliques of size two x,y=(x +1 +,x +2 +) is [KP06] +ψ +c +(x +1 +,x +2 +)=wδ(x +1 +,x +2 +)= +(cid:40) +w if x 1 (cid:54)=x @@ -921,7 +1055,8 @@ not have to be estimated. Another advantage of CRFs compared to MRFs is that no distribution assumption about x has to be made. A CRF has the partition function Z: -Z(x)=(cid:88) +Z(x)= +(cid:88) y P(x,y) and joint probability distribution @@ -929,7 +1064,12 @@ P(y|x)= 1 Z(x) (cid:89) -c∈Cψ c(y c|x) +c∈C +ψ +c +(y +c +|x) The simplest way to define the clique potentials ψ is the count of the class y c given x added with a positive smoothing constant to prevent the complete term from @@ -990,10 +1130,12 @@ clever regularization called dropout training, which set the output of neurons while training randomly to zero. Another contribution was the usage of an activation function called rectified linear unit: -ϕ ReLU(x)=max(0,x) +ϕ ReLU (x)=max(0,x) Those are much faster to train than the commonly used sigmoid activation functions -ϕ Sigmoid(x)= +ϕ +Sigmoid +(x)= 1 e−x+1 Krizhevsky et al. implemented those ideas and partici- diff --git a/read/results/pdfplumber/1707.09725.txt b/read/results/pdfplumber/1707.09725.txt index 5dfd974..a8aac29 100644 --- a/read/results/pdfplumber/1707.09725.txt +++ b/read/results/pdfplumber/1707.09725.txt @@ -12,11 +12,40 @@ Second reviewer: Prof. Dr.–Ing. J. M. Zöllner Advisor: Dipl.–Inform. Michael Weber Research Period: 03. May 2017 – 03. August 2017 KIT–UniversityoftheStateofBaden-WuerttembergandNationalResearchCenteroftheHelmholtzAssociation www.kit.edu -arXiv:1707.09725v1 -[cs.CV] -31 -Jul -2017 +a +r +X +i +v +: +1 +7 +0 +7 +. +0 +9 +7 +2 +5 +v +1 +[ +c +s +. +C +V +] +3 +1 +J +u +l +2 +0 +1 +7 Analysis and Optimization of Convolutional Neural Network Architectures @@ -236,7 +265,7 @@ This chapter introduces linear image filters in Section 2.1, then standard layer CNNs are explained in Section 2.2. The layer block pattern is described in Section 2.3, transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5. 2.1. Linear Image Filters -A linear image filter (also called a filter bank or a kernel) is an element F ∈ Rkw×k h×d, +A linear image filter (also called a filter bank or a kernel) is an element F ∈ Rkw×k h ×d, where k w represents the filter’s width, k @@ -251,12 +280,25 @@ I(cid:48)(x,y) = ix=1−(cid:100)kw 2 (cid:101) -(cid:98)kh 2 (cid:99) (cid:88) -iy=1−(cid:100)kh +(cid:98) kh 2 (cid:99) (cid:88) +iy=1−(cid:100) kh 2 (cid:101) d (cid:88) -ic=1I(x+i x,y+i y,i c)·F(i x,i y,i c) +ic=1 +I(x+i +x +,y+i +y +,i +c +)·F(i +x +,i +y +,i +c +) This procedure is explained by Figure 2.1. It is essentially a discrete convolution. I ∈ R7×7 Filterkernel @@ -288,7 +330,9 @@ outputimage,k2 multiplicationsandk2 additionsoftheproductshavetobecalculated. One important detail is how boundaries are treated. There are four common ways of boundary treatment: • don’t compute: The image I(cid:48) will be smaller than the original image. I(cid:48) ∈ -R(w−kw+1)×(h−k h+1)×d3, to be exact. +R(w−kw+1)×(h−k +h ++1)×d3 , to be exact. • zero padding: The image I is padded by zeros where the filter would access elements which do not exist. This will result in edges being detected at the border if the border pixels are not black, but doesn’t need any computation. @@ -337,10 +381,16 @@ w ×k h are hyperparameters of convolutional -layers. Sometimes, it is denoted as n@k w×k h. Although the filter depth is usually omitted +layers. Sometimes, it is denoted as n@k +w +×k +h +. Although the filter depth is usually omitted in the notation, the filters are of dimension k w -×k h×d(i−1), where d(i−1) is the number of +×k +h +×d(i−1), where d(i−1) is the number of feature maps of the input layer (i−1). Another hyperparameter of convolution layers is the stride s ∈ N ≥1 @@ -348,14 +398,20 @@ and the padding. Padding (usually zero-padding [SCL12, SEZ+13, HZRS15a]) is used to make sure that the size of the feature maps doesn’t change. The hyperparameters of convolutional layers are -• the number of filters n ∈ N ≥1, -• k w,k +• the number of filters n ∈ N +≥1 +, +• k +w +,k h ∈ N ≥1 of the filter size k w -×k h×d(i−1), +×k +h +×d(i−1), • the activation function of the layer (see Table B.3) and • the stride s ∈ N ≥1 @@ -376,7 +432,8 @@ This is easier to see when the filtering operation is denoted formally: o(i)(x) = b+ k (cid:88) -j=1w +j=1 +w ij ·x j @@ -386,12 +443,27 @@ o(x,y,z)(I) = b+ ix=1−(cid:100)kw 2 (cid:101) -(cid:98)kh 2 (cid:99) (cid:88) -iy=1−(cid:100)kh +(cid:98) kh 2 (cid:99) (cid:88) +iy=1−(cid:100) kh 2 (cid:101) d (cid:88) -ic=1F z(i x,i y,i c)·I(x+i x,y+i y,i c) [2.2] +ic=1 +F +z +(i +x +,i +y +,i +c +)·I(x+i +x +,y+i +y +,i +c +) [2.2] with a bias b ∈ R, x ∈ {1,...,w}, y ∈ {1,...,h} and z ∈ {1,...,d} One can see that most weights of the equivalent MLP are zero and many weights are equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters. @@ -405,8 +477,24 @@ See Figure 2.2 for a visualization of the application of a convolutional layer. n feature maps n filters of size k×k×3 -widthw widthw -heighth heighth +wi +d t +h +w +wi +d t +h +w +h +e i +g +h t +h +h +e i +g +h t +h neural network data @@ -426,13 +514,21 @@ w ×k h and SAME padding after d(i−1) feature -maps of size s x×s +maps of size s +x +×s y -has n·d(i−1)·(k w·k h) parameters if no bias is used. In contrast, a fully +has n·d(i−1)·(k +w +·k +h +) parameters if no bias is used. In contrast, a fully connected layer which produces the same output size and does not use a bias would have n·d(i−1) ·(s x -×s y)2 parameters. This means a convolutional layer has drastically fewer +×s +y +)2 parameters. This means a convolutional layer has drastically fewer parameters. Onetheonehand, thismeansitcanlearnlesscomplexdecisionboundaries. On the other hand, it means fewer parameters have to be learned and hence the optimization procedure needs fewer examples and the optimization objective is simpler. @@ -457,7 +553,9 @@ omitted if a convolution layer without padding and a filter size equal to the fe size is applied. This was used by [LSD15]. 2.2.2. Pooling Layers Pooling summarizes a p×p area of the input feature map. Just like convolutional layers, -pooling can be used with a stride of s ∈ N >1. As s ≥ 2 is the usual choice, pooling layers +pooling can be used with a stride of s ∈ N +>1 +. As s ≥ 2 is the usual choice, pooling layers are sometimes also called subsampling layers. Typically, p ∈ {2,3,4,5} and s = 2 such as for AlexNet [KSH12] and VGG-16 [SZ14]. The type of summary for the set of activations A varies between the functions listed @@ -470,10 +568,14 @@ Name Definition Used by Max pooling max{a ∈ A} [BPL10, KSH12] Average / mean pooling 1 |A| -(cid:80) a∈Aa LeNet-5 [LBBH98] and [KSlB+10] +(cid:80) +a∈A +a LeNet-5 [LBBH98] and [KSlB+10] (cid:96) 2 -pooling (cid:112)(cid:80) a∈Aa2 [Le13] +pooling (cid:112)(cid:80) +a∈A +a2 [Le13] Stochastic pooling * [ZF13] Table 2.1.: Pooling types for a set A of activations a∈R. (*)Forstochasticpooling,eachofthep×pactivationvaluesa @@ -483,11 +585,15 @@ pickedwithprobabilityp i = ai (cid:80) -aj∈Aaj. Thisassumestheactivationsa +aj∈A +aj +. Thisassumestheactivationsa i arenon-negative. Pooling is applied for three reasons: To get local translational invariance, to get invariance -against minor local changes and, most important, for data reduction to 1 s2th of the data by +against minor local changes and, most important, for data reduction to 1 +s2 +th of the data by using strides of s > 1. See Figure 2.3 for a visualization of max pooling. 7 9 3 5 9 4 @@ -523,7 +629,8 @@ p2 1 p2 ... 1 -p2 +p2 +    @@ -531,7 +638,8 @@ for the dimension i and the zero matrix    -0 ... 0 + +0 ... 0 . . . @@ -541,8 +649,8 @@ for the dimension i and the zero matrix . . . -0 ... -0 +0 ... 0 +    @@ -565,7 +673,9 @@ i where (cid:12) is the Hadamard product (A(cid:12)B) i,j -:= (A) i,j(B) +:= (A) +i,j +(B) i,j Hence every value of the input gets set to zero with a dropout probability of p. Typically, Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout prob- @@ -601,17 +711,21 @@ x(k)− ¯ x(k) s(cid:48)[x(k)]2+ε with ¯ x(k) = 1 m -(cid:80)m i=1x(k) +(cid:80)m +i=1 +x (k) i being the sample mean and s(cid:48)[x(k)]2 = 1 m -(cid:80)m i=1(x(k) +(cid:80)m +i=1 +(x (k) i − ¯ x(k)) the sample variance where m ∈ N ≥1 is the number of training samples per mini-batch, ε > 0 -being a small constant to prevent division by zero and x(k) +being a small constant to prevent division by zero and x (k) i is the activation of neuron k for training sample i. @@ -667,13 +781,23 @@ describes a traditional CNN. Note that this could be multiple layers. A residual visualized in Figure 2.4 is described by y = H(x)+x In [HZRS15a], they only used residual skip connections to skip two layers. Hence, if -conv i(x i) describes the application of the convolutional layer i to the input x +conv +i +(x +i +) describes the application of the convolutional layer i to the input x i without the nonlinearity, then such a residual block is x i+2 -= conv i+1(ReLU(conv i(x i)))+x += conv +i+1 +(ReLU(conv +i +(x +i +)))+x i Figure 2.4.: ResNet module Image source: [HZRS15a] @@ -684,7 +808,9 @@ Image source: [HZRS15a] Two common ways to add more parameters to neural networks are increasing their depth by adding more layers or increasing their width by adding more neurons / filters. Inception blocks [AM15] implicitly started a new idea which was explicitly described in [XGD+16] as -“ResNeXt block”: Increasing the cardinality C ∈ N ≥1. By cardinality, the authors describe +“ResNeXt block”: Increasing the cardinality C ∈ N +≥1 +. By cardinality, the authors describe the concept of having C small convolutional networks with the same topology but different weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not combine aggregation blocks with residual blocks as the authors did. @@ -707,7 +833,9 @@ the same topology, the learned weights are different. The outputs of the groups concatenated. The hyperparameters of an aggregation block are: • The topology of the group members. -• The cardinality C ∈ N ≥1. Note that a cardinality of C = 1 is equivalent in every +• The cardinality C ∈ N +≥1 +. Note that a cardinality of C = 1 is equivalent in every aspect to using the group network without an aggregation block. 12 2.3. CNN Blocks @@ -816,13 +944,23 @@ is the number of times items of class i were classified as class j. This means the correct classification is on the diagonal c ii -and all wrong classifications are of the diagonal. The sum (cid:80)K i=1(cid:80)K j=1c +and all wrong classifications are of the diagonal. The sum (cid:80)K +i=1 +(cid:80)K +j=1 +c ij is the -total number of samples which were evaluated and (cid:80) i=1cii -(cid:80)K i=1(cid:80)K j=1cij +total number of samples which were evaluated and (cid:80) i=1 cii +(cid:80)K +i=1 +(cid:80)K +j=1 +cij is the accuracy. -The sums r(i) = (cid:80)K j=1c +The sums r(i) = (cid:80)K +j=1 +c ij of each class i are worth being investigated as they show if the classes are skewed. If the number of samples of one class dominates the data set, then the @@ -833,7 +971,8 @@ An automatic criterion to check for this problem is accuracy ≤ max({r(i) | i = 1,...,k}) (cid:80)k -i=1r(i) +i=1 +r(i) +ε where ε is a small value to compensate the fact that some examples might be correct just by chance. @@ -849,15 +988,24 @@ r(k) If s(i) is much lower than s(j), it is an indicator that more or cleaner training data is necessary for s(i). The class-wise confusion -f confusability(k 1,k 2) = +f +confusability +(k +1 +,k +2 +) = c k1k2 (cid:80)K -j=1c +j=1 +c k1j indicates if class k 1 -gets often classified as class k 2. The highest values here can indicate +gets often classified as class k +2 +. The highest values here can indicate if two classes should be merged or a specialized model for separating those classes could improve the overall system. 2.5.3. Validation Curves: Accuracy, loss and other metrics @@ -899,32 +1047,53 @@ with (cid:96) or (cid:96) 2 regularization, as it was described in [NH92]: -E CE(W) = -−(cid:88) +E +CE +(W) = − +(cid:88) x∈X K (cid:88) -k=1[tx klog(ox k)+(1−tx k)log(1−ox k)] +k=1 +[tx +k +log(ox +k +)+(1−tx +k +)log(1−ox +k +)] (cid:124) (cid:123)(cid:122) (cid:125) cross-entropydataloss -+λ 1· ++λ +1 +· (cid:96)1 (cid:122) (cid:125)(cid:124) (cid:123) (cid:88) -w∈W|w|+λ 2· +w∈W +|w|+λ +2 +· (cid:96)2 (cid:122) (cid:125)(cid:124) (cid:123) (cid:88) -w∈Ww2 +w∈W +w2 (cid:124) (cid:123)(cid:122) (cid:125) modelcomplexityloss -where W are the weights, X is the training data set, K ∈ N ≥0 is the number of classes and +where W are the weights, X is the training data set, K ∈ N +≥ +0 is the number of classes and tx k indicates if the training example x is of class k. ox k is the output of the classification -algorithm which depends on the weights. λ 1,λ +algorithm which depends on the weights. λ +1 +,λ 2 ∈ [0,∞) weights the regularization and is typically smaller than 0.1. @@ -960,16 +1129,20 @@ the number of times the real class was i and j was predicted. This means the diagonal contains the number of correct predictions. For the following, let t i -= (cid:80)k j=1c += (cid:80)k +j=1 +c ij be the number of training samples for class i. The most common quality criterion is accuracy: accuracy(c) = (cid:80)k -i=1c +i=1 +c ii (cid:80)k -i=1t +i=1 +t i ∈ [0,1] One problem of accuracy as a quality criterion are skewed classes. If one class is by far @@ -986,7 +1159,9 @@ c ii t i ∈ [0,1] -For two-class problems there are many other metrics like precision, recall and F β-score. +For two-class problems there are many other metrics like precision, recall and F +β +-score. Quality criteria for semantic segmentation are explained in [Tho16]. Besides the quality of the classification result, several other quality criteria are important in practice: @@ -1076,11 +1251,19 @@ In [SVZ13], a gradient-based approach was used to generate image-specific class maps. The authors describe the problem as a ranking problem, where each pixel of the image I 0 -is assigned a score S c(I 0) for a class c of interest. CNNs are non-linear functions, -but they can be approximated by the first order Taylor expansion S c(I) ≈ wTI +b where +is assigned a score S +c +(I +0 +) for a class c of interest. CNNs are non-linear functions, +but they can be approximated by the first order Taylor expansion S +c +(I) ≈ wTI +b where w is the derivative of S c -at I 0. +at I +0 +. 21 2. Convolutional Neural Networks 2.5.6. Argmax Method @@ -1122,12 +1305,21 @@ If the set of learned filters changes with initialization, this might be an indi little capacity of that layer. Hence adding more filters to that layer could improve the performance. Filters can be compared with the k-translation correlation as introduced in [ZCZL16]: -ρ k(W i,W j) = max +ρ +k +(W +i +,W +j +) = max (x,y)∈{−k,...,k}2\(0,0) -(cid:104)W i,T(W j,x,y)(cid:105) +(cid:104)W +i +,T(W +j +,x,y)(cid:105) f -(cid:107)W i(cid:107) 2(cid:107)W j(cid:107) -2 +(cid:107)W i (cid:107) 2 (cid:107)W j (cid:107) 2 ∈ [−1,1], where T(·,x,y) denotes the translation of the first operand by (x,y), with zero padding at the borders to keep the shape. (cid:104)·,·(cid:105) @@ -1135,21 +1327,33 @@ f denotes the flattened inner product, where the two operands are flattened into column vectors before applying the standard inner product. The closer the absolute value of the k-translation correlation to one, the more similar two filters -W i,W +W +i +,W j are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and VGG-16 (see Appendix D.3) have many filters which are highly correlated. They found this by comparing the averaged maximum k-translational correlation of the networks with Gaussian-distributed initialized filters. The averaged maximum k-translational correlation is defined as -¯ ρ k(W) = +¯ ρ +k +(W) = 1 N N (cid:88) i=1 N -max j=1,j(cid:54)=iρ k(W i,W j) +max +j=1,j(cid:54)=i +ρ +k +(W +i +,W +j +) where N is the number of filters in the layer W and W i denotes the ith filter. @@ -1225,12 +1429,16 @@ connected to the output nodes. 4. Correlation Maximization: Train the weights of the candidates by maximizing S, the correlation between candidates output value V with the networks residual error: S = (cid:88) -o∈O(cid:12) +o∈O +(cid:12) (cid:12) (cid:12) (cid:12) (cid:12) -(cid:12)(cid:88) p∈T(cid:0) V p− ¯ V(cid:1) (E p,o− ¯ E -o)(cid:12) +(cid:12) +(cid:88) +p∈T +(cid:0) V p − ¯ V (cid:1) (E p,o − ¯ E o ) +(cid:12) (cid:12) (cid:12) (cid:12) (cid:12) @@ -1262,27 +1470,37 @@ in Meiosis networks follows a normal distribution: w ij -∼ N(µ ij,σ2 ij) +∼ N(µ +ij +,σ2 +ij +) 28 3.2. Pruning approaches Hence every connection has two learned parameters: µ ij -and σ2 ij. +and σ2 +ij +. The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell division. A node j is splitted, when the random part dominates the value of the sampled weights: (cid:80) -iσ +i +σ ij (cid:80) -iµ +i +µ ij > 1 and (cid:80) -kσ +k +σ jk (cid:80) -kµ +k +µ jk > 1 The mean of the new nodes is sampled around the old mean, half the variance is assigned @@ -1291,12 +1509,20 @@ Hence Meiosis networks only change the number of neurons per layer. They do not layers or add skip connections. 3.1.3. Automatic Structure Optimization Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of on- -line handwriting recognition. It makes use of the confusion matrix C = (c ij) ∈ Nk×k +line handwriting recognition. It makes use of the confusion matrix C = (c +ij +) ∈ Nk×k ≥0 (see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix -S with s ij = s ji = c +S with s +i +j = s +j +i = c ij -·c ji. The maximum of S defines where the ASO algorithm adds +·c +ji +. The maximum of S defines where the ASO algorithm adds more parameters. The details how the resources are added are not transferable to CNNs. 3.2. Pruning approaches Pruning approaches start with a network which is bigger than necessary and prune it. The @@ -1314,7 +1540,9 @@ Damage (OBD) as introduced in [LDS+89]. For every single parameter k, OBD calcul the effect on the objective function of deleting k. The authors call the effect of the deletion 29 3. Topology Learning -of parameter k the saliency s k. The parameters with the lowest saliency are deleted, which +of parameter k the saliency s +k +. The parameters with the lowest saliency are deleted, which means they are set to 0 and are not updated anymore. A follow-up method called Optimal Brain Surgeon [HSW93] claims to choose the weights in a much better way. This requires, however, to calculate the inverse Hessian matrix @@ -1323,7 +1551,9 @@ A much simpler and computationally cheaper pruning criterion is the weight magni [HPTD15] prunes all weights w which are below a threshold θ: w ←  - w if w ≥ θ + + +w if w ≥ θ 0 otherwise 3.3. Genetic approaches The general idea of genetic algorithms (GAs) is to encode the solution space as genes, which @@ -1393,7 +1623,9 @@ predicts a pedestrian, another classifier has to predict if it is an adult or a child. Similar, if C 0 predicts traffic sign, then another classifier has to predict if it is a speed limit, a -sign indicating danger or something else. If C 0, however, predicts road, then no other +sign indicating danger or something else. If C +0 +, however, predicts road, then no other classifier will become active. In this example, the problem has 17 classes. The hierarchical approach introduces 7 clusters of classes and thus uses 8 classifiers. @@ -1409,7 +1641,9 @@ children. Siblings are not affected. In the example from Figure 4.1, the classif which distinguishes traffic signs can be changed while the classification as pedestrian, four+-wheelers, traffic sign, street, other will not be affected. Also, the classification between speed limits, danger signs and other signs will not change. -• Faster training: Except for the root classifier C 0, each other classifier will have +• Faster training: Except for the root classifier C +0 +, each other classifier will have less than the total amount of training data. Depending on the combined classes, the models could also be simpler. Hence the training time is reduced. • Weighting of errors: In practice, some errors are more severe than others. For @@ -1457,7 +1691,8 @@ n i=1 n (cid:88) -j=1C +j=1 +C ij ·|i−j| [4.1] which punishes errors linearly with the distance to the diagonal. This method is called CMO @@ -1489,7 +1724,9 @@ Those will be moved to the corners of the confusion matrix by optimizing Equatio Once a permutation of the classes is found which has a low score Equation (4.1), the clusters can either be made by hand by deciding why classes should not be in one clusters. With such a permutation, only n−1 binary decisions have to be made and hence only the list of -classes has to be read. Alternatively, one can calculate the confusions C(cid:48) i,i+1+C(cid:48) +classes has to be read. Alternatively, one can calculate the confusions C(cid:48) +i,i+1 ++C(cid:48) i+1,i for each pair of classes which are neighbors in the confusion matrix. The higher this value, the @@ -1609,7 +1846,8 @@ use SAME padding, except for layer 11 which used VALID padding in order to decre the feature map size to 1×1. If the input feature map is bigger than 32×32, for eachpoweroftwotherearetwoConvolution + BN + ELUblocksandoneMax pooling block added. This is the framed part in the table. -32×32Input +32×32 +Input C32@3×3/1 BN+ELU C32@3×3/1 @@ -1752,7 +1990,40 @@ is reversed. For the baseline model, 21.8 changes in the relative order of accur in average for each pair of epochs (i,i+1). This means if one knows only the relative order of the validation accuracy of two models m and m(cid:48) in epoch i, it is doubtful if one can make any statement about the ordering of m and m(cid:48) in epoch i+1. -0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 +0 +1 +0 +2 +0 +3 +0 +4 +0 +5 +0 +6 +0 +7 +0 +8 +0 +9 +0 +1 +0 +0 +1 +1 +0 +1 +2 +0 +1 +3 +0 +1 +4 +0 0.2 0.3 0.4 @@ -1760,7 +2031,18 @@ make any statement about the ordering of m and m(cid:48) in epoch i+1. 0.6 0.7 epoch -validationaccuracy +v +ali +d a +ti +o +n +a c +c +u +r +a +c y maximum validation accuracy minimum validation accuracy 1.5 @@ -1770,7 +2052,9 @@ minimum validation accuracy 3.5 4 4.5 -loss +l +o s +s maximum validation accuracy minimum validation accuracy mean loss @@ -1991,11 +2275,17 @@ was increased. The detailed results are given in Table 5.8. As expected, the number of training epochs of the models with increased numbers of parameters is lower. The wall-clock time, however, is higher due to the increase in computation per forward- and backward-pass. -For m 9, m +For m +9 +, m 11 -and m 13, the filter weight range of the layer with increased capacity decreases +and m +13 +, the filter weight range of the layer with increased capacity decreases compared to Figure 5.6, the filter weights of the layer with increased capacity are more -concentrated around zero compared to Figure 5.2. For model m 13, the distribution of +concentrated around zero compared to Figure 5.2. For model m +13 +, the distribution of weight of the output layer changed to a more bell-shaped distribution. Except for this, the distribution of filter weights in other layers did not change for all three models compared to the baseline. @@ -2019,7 +2309,11 @@ m(cid:48) m 13 5982092 62.39% 0.66 63.77% 147.8 4485s -Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m 9, m 11, m +Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m +9 +, m +11 +, m 13 as well as their accuracies. 54 @@ -2058,7 +2352,13 @@ feature maps of layer i where i = 0 is the input layer and all filters are 3×3 a bias, then the number of parameters is Parameters = k -(cid:88) i=1(cid:0) (n i−1·32+1)·n i(cid:1) +(cid:88) +i=1 +(cid:0) (n +i−1 +·32+1)·n +i +(cid:1) Hence the width of one layer does not only influence the parameters in this layer, but also in the next layer. The number of possible subsequent layers of one feature map size is enormous, even if @@ -2136,7 +2436,9 @@ a GTX 980, m no-bn only needs 21ms per epoch. The number of epochs used for training, however, also increased noticeably from 149 epochs to 178 epochs in average. The standard -deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs for m no-bn. +deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs for m +no-bn +. The mean accuracy of m no-bn is 62.86% and hence 0.52 percentage points worse. The @@ -2298,10 +2600,11 @@ This contradicts [GBB11, SMGS14]. A key difference between the logistic− function and ELU is that ELU does neither suffers from the vanishing gradient problem nor is its range of values bound. For this reason, the S2ReLU activation function, defined as -S2ReLU(x) = -ReLU(x +S2ReLU(x) = ReLU( +x 2 -+1)−ReLU(−x ++1)−ReLU(− +x 2 +1) =  @@ -2310,8 +2613,8 @@ ReLU(x    - -−x +  +−x 2 +1 if x ≤ −2 x if −2 ≤ x ≤ 2 @@ -2430,8 +2733,12 @@ such as self-driving cars is that they increase the computation by a factor of n why they improve the test accuracy is by reducing the variance. The idea of label smoothing is to use the ensemble prediction of the training data as labels for another classifier. For every element x of the training set, the one-hot encoded target -t(x) is smoothed by the ensemble prediction y E(x) -t(cid:48)(x) = α·t(x)+(1−α)y E(x) +t(x) is smoothed by the ensemble prediction y +E +(x) +t(cid:48)(x) = α·t(x)+(1−α)y +E +(x) where α ∈ [0,1] is the smoothing factor. There are three reasons why label smoothing could be beneficial: • Training speed: The ensemble prediction contains more information about the @@ -2514,7 +2821,8 @@ power of two there are two Convolution + BN + ELU blocks and one Max pooling block added. This is the framed part in the table. 66 5.15. Optimized Classifier -32×32Input +32×32 +Input C69@3×3/1 BN+ELU C69@3×3/1 @@ -2888,7 +3196,8 @@ nin+nout Xavier/Glorot normal α = 0 β = (cid:16) 2 -(nin+nout)(cid:17)2 +(nin+nout) +(cid:17)2 γ = 0 [GB10] He α = 0 β = 2 nin @@ -2896,7 +3205,9 @@ nin Orthogonal — — γ = 0 [SMG13] LSUV — — γ = 0 [MM15] Table B.2.: Weight initialization schemes of the form w ∼α·U[−1,1]+β·N(0,1)+γ. -n in,n +n +in +,n out are the number of units in the previous layer and the next layer. Typically, biasesareinitializedwithconstant0andweightsbyoneoftheotherschemesto prevent @@ -2905,12 +3216,23 @@ all parameters. LSUV and Orthogonal initialization cannot be described with this simple pattern. B.4. Objective function For classification tasks, the cross-entropy -E CE(W) = -−(cid:88) +E +CE +(W) = − +(cid:88) x∈X K (cid:88) -k=1[tx klog(ox k)+(1−tx k)log(1−ox k)] +k=1 +[tx +k +log(ox +k +)+(1−tx +k +)log(1−ox +k +)] is by far the most commonly used objective function (e.g., used by [ZF14]). In this equation, X is the set of training examples, K is the number of classes, tx k @@ -2933,12 +3255,14 @@ Most relevant optimization techniques for CNNs are based on SGD, which updates t weights according to the rule w ji -← w ji+∆w +← w +ji ++∆w ji with ∆w ji -= -−η∂E += −η +∂E x ∂w ji @@ -2950,31 +3274,33 @@ lead to longer training times due to computational overhead and to more training to gradient noise. w ji -← w ji+∆w +← w +ji ++∆w ji with ∆w ji -= -−η∂E += −η +∂E B ∂w ji Nine variations which adjust the learning rate during training are: • Momentum: -w(t+1) +w (t+1) ji -← w(t) +← w (t) ji -+∆w(t+1) ++∆w (t+1) ji -with ∆w(t+1) +with ∆w (t+1) ji -= -−η∂E += −η +∂E B ∂w ji -+α∆w(t) ++α∆w (t) ji with α ∈ [0,1], typically 0.9 (e.g., [ZF14, MSM16]) • Adagrad [DHS11] @@ -2990,7 +3316,9 @@ decrease the learning rate when the algorithms improvement is below a threshold. training step, η(0) is the initial learning rate, k ∈ N ≥1 is the number of training steps -until the learning rate is decreased by 1 10th. +until the learning rate is decreased by 1 +10 +th. • NewbobScheduling[new00]: StartwithPerformanceScheduling,thenuseExponential Decay Scheduling. • Adam and AdaMax [KB14] @@ -3022,13 +3350,17 @@ CNNs have the following hyperparameters: Name Function ϕ(x) Range of Values ϕ(cid:48)(x) Used by Sign function†  - +1 if x ≥ 0 + + ++1 if x ≥ 0 −1 if x < 0 {−1,1} 0 [KS02] Heaviside step function†  - +1 if x > 0 + + ++1 if x > 0 0 if x < 0 {0,1} 0 [MP43] Logistic function 1 @@ -3041,14 +3373,18 @@ ex+e−x = tanh(x) [−1,1] sech2(x) [LBBH98, Tho14a] ReLU† max(0,x) [0,+∞)  - 1 if x > 0 + + +1 if x > 0 0 if x < 0 [KSH12] LReLU†2 (PReLU) ϕ(x) = max(αx,x) (−∞,+∞)  - 1 if x > 0 + + +1 if x > 0 α if x < 0 [MHN13, HZRS15b] Softplus log(ex+1) (0,+∞) ex @@ -3056,25 +3392,37 @@ ex+1 [DBB+01, GBB11] ELU  - x if x > 0 + + +x if x > 0 α(ex−1) if x ≤ 0 (−∞,+∞)  - 1 if x > 0 + + +1 if x > 0 αex otherwise [CUH15] Softmax‡ o(x) j = exj -(cid:80)K k=1exk +(cid:80)K +k=1 +exk [0,1]K o(x) j -· (cid:80)K k=1exk−exj -(cid:80)K k=1exk +· (cid:80)K k=1 exk−exj +(cid:80)K +k=1 +exk [KSH12, Tho14a] -Maxout‡ o(x) = max x∈xx (−∞,+∞) +Maxout‡ o(x) = max +x∈x +x (−∞,+∞)  - 1 if x + + +1 if x i = maxx 0 otherwise @@ -3099,19 +3447,33 @@ as it produces a probability distribution. See Figure B.1 for a plot of some of 2.0 x y -ϕ 1(x) = 1 +ϕ +1 +(x) = 1 1+e−x -ϕ 2(x) = tanh(x) -ϕ 3(x) = max(0,x) -ϕ 4(x) = log(ex+1) -ϕ 5(x) = max(x,ex−1) +ϕ +2 +(x) = tanh(x) +ϕ +3 +(x) = max(0,x) +ϕ +4 +(x) = log(ex+1) +ϕ +5 +(x) = max(x,ex−1) Figure B.1.: Activation functions plotted in [−2,+2]. tanh and ELU are able to produce negative numbers. The image of ELU, ReLU and Softplus is not bound on the positive side, whereas tanh and the logistic function are always below 1. B.7. Regularization Regularization techniques aim to make the fitted function smoother and reduce overfitting. Regularization techniques are: -• (cid:96) 1, (cid:96) 2, and Orthogonality regularization: See Appendix B.4 +• (cid:96) +1 +, (cid:96) +2 +, and Orthogonality regularization: See Appendix B.4 • Max-norm regularization (e.g. used ins [SHK+14]) • Dropout (introduced in [SHK+14]), DropConnect (see [WZZ+13]), Stochastic Depth (see [HSL+16]) @@ -3131,15 +3493,25 @@ i filters of size n×m being applied to k i−1 feature maps -has k i·k i−1(n·m+1) parameters. The +1 is due to the bias. +has k +i +·k +i−1 +(n·m+1) parameters. The +1 is due to the bias. • A fully connected layer with n nodes after k feature maps of size m 1 × m 2 has -n·(k·m 1·m 2+1) parameters. +n·(k·m +1 +·m +2 ++1) parameters. • A dense block with a depth of L, a growth rate of n and 3×3 filters has L+n·32+ -32·n2(cid:80)L i=0(L−i) = L+9n+9n2L2−L +32·n2(cid:80)L +i=0 +(L−i) = L+9n+9n2L2−L 2 parameters. According to [HPTD15], AlexNet has 60 million parameters which is roughly the number @@ -3156,7 +3528,9 @@ simplicity, n • A fully connected layer with n nodes and k inputs has to calculate ϕ(W ·x+b) with W ∈ Rn×k, x ∈ Rk×1, b ∈ Rn×1. It hence needs about n·(k+(k−1)+1) = 2nk additions / multiplications before the non-linearity ϕ is calculated. The total number -of FLOPs is 2·n·k+n·n ϕ. +of FLOPs is 2·n·k+n·n +ϕ +. • In the following, biases are ignored. A convolutional layer with k i filters of size n×m @@ -3168,13 +3542,29 @@ filter maps of size w×h if padding is applied. For each element of each filter map, n·m·k i−1 multiplications and -(n·m·k i−1−1) additions have to be made. This results in (2nmk i−1−1)·(k i·w·h) -operations. The total number of FLOPs is (2·n·m·k i−1−1)·(k i·w·h)+k i·w·h·n ϕ. +(n·m·k +i−1 +−1) additions have to be made. This results in (2nmk +i−1 +−1)·(k +i +·w·h) +operations. The total number of FLOPs is (2·n·m·k +i−1 +−1)·(k +i +·w·h)+k +i +·w·h·n +ϕ +. This is, of course, a naive way of calculating a convolution. There are other ways of calculating convolutions [LG16]. 87 • Afullyconnectedlayerwithnnodesafterk featuremapsofsizew×hneeds2n(k·w·h) -FLOPs. The total number of FLOPs is 2n·(k·w·h)+n·n ϕ. +FLOPs. The total number of FLOPs is 2n·(k·w·h)+n·n +ϕ +. • As Dropout is only calculated during training, the number of FLOPs was set to 0. • The number of FLOPs for max pooling is dominated by the number of positions to which the pooling kernel is applied. For a feature map of size w×h a max pooling @@ -3479,8 +3869,12 @@ Asirra SVM 82.7% accuracy [Gol08] Graz-02 Optimal NBNN 78.98% accuracy [BMDP10] Table E.2.: An overview over state of the art results achieved in computer vision datasets. Algorithm 2 Create a classification dataset from a semantic segmentation dataset -Require: Semantic segmentation dataset (D S) -procedure CreateDataset(Annotated dataset D S) +Require: Semantic segmentation dataset (D +S +) +procedure CreateDataset(Annotated dataset D +S +) D C ← List @@ -3498,8 +3892,14 @@ if at least 50% of s are of one class then c I ← crop(x,(i,j),(i+w,j +h)) -D.append((c I,c L)) -return (D C) +D.append((c +I +,c +L +)) +return (D +C +) 98 F. List of Tables 2.1 Pooling types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 diff --git a/read/results/pdfplumber/2201.00021.txt b/read/results/pdfplumber/2201.00021.txt index 74f39a7..0393d19 100644 --- a/read/results/pdfplumber/2201.00021.txt +++ b/read/results/pdfplumber/2201.00021.txt @@ -17,16 +17,24 @@ Received13December2021/Accepted30December2021 ABSTRACT Context. Molecularmaserlinesaresignpostsofhigh-massstarformation,probingtheexcitationandkinematicsofverycompact regionsinthecloseenvironmentofyoungstellarobjectsandprovidingusefultargetsfortrigonometricparallaxmeasurements. -Aims.OnlyafewNH 3(9,6)masersareknownsofar,andtheiroriginisstillpoorlyunderstood.HereweaimtofindnewNH 3(9,6) +Aims.OnlyafewNH +3 +(9,6)masersareknownsofar,andtheiroriginisstillpoorlyunderstood.HereweaimtofindnewNH +3 +(9,6) maserstoprovideabetterobservationalbasisforstudyingtheirroleinhigh-massstar-formingregions. -Methods.WecarriedoutNH 3(9,6)observationstowardCepheusAandG34.26+0.15withtheEffelsberg100-metertelescope(beam +Methods.WecarriedoutNH +3 +(9,6)observationstowardCepheusAandG34.26+0.15withtheEffelsberg100-metertelescope(beam size49(cid:48)(cid:48))andtheKarlG.JanskyVeryLargeArray(JVLA;beamsizeabout1(cid:48)(cid:48) .2). Results.WediscoverednewNH 3 (9,6)masersinCepAandG34.26+0.15,whichincreasesthenumberofknownhigh-massstar- -formingregionshostingNH 3(9,6)masersfromfivetoseven.Long-termmonitoring(20months)atEffelsbergshowsthattheintensity +formingregionshostingNH 3 (9,6)masersfromfivetoseven.Long-termmonitoring(20months)atEffelsbergshowsthattheintensity ofthe(9,6)maserinG34.26+0.15isdecreasing,whiletheCepAmaserremainsstable.ComparedtotheEffelsbergdataandassuming -linearvariationsbetweentheepochsofobservation,theJVLAdataindicatenomissingflux.ThissuggeststhattheNH 3(9,6)emission +linearvariationsbetweentheepochsofobservation,theJVLAdataindicatenomissingflux.ThissuggeststhattheNH +3 +(9,6)emission arisesfromsinglecompactemissionregionsthatarenotresolvedbytheinterferometricmeasurements.AsJVLAimagingshows,the NH 3 @@ -41,7 +49,7 @@ baselineinterferometryobservationsareneededtoprovidemoreaccuratepositionsandcons Keywords. Masers–ISM:clouds–ISM:individualobjects:CepA,G34.26+0.15–ISM:Hiiregions–Radiolines:ISM 1. Introduction Since its discovery more than five decades ago (Cheung et al. -1968), ammonia (NH 3) has been a most valuable molecule for +1968), ammonia (NH 3 ) has been a most valuable molecule for investigating the physical properties of molecular clouds (e.g., Ho & Townes 1983). While thermally excited transitions in thecentimeter-wavelengthinversiontransitionsofammoniaare @@ -91,11 +99,45 @@ beenidentifiedasmasers,includingthe(5,3),(5,4),(6,1),(6,2), (6,4),(6,5),(7,3),(7,4),(7,5)(7,6),(8,3),(8,4),(8,5),(8,6),(9,3), (9,4),(9,5),(9,7),(9,8),(10,7),(10,8),(10,9),and(11,9)transi- Articlenumber,page1of10 -arXiv:2201.00021v3 -[astro-ph.GA] +a +r +X +i +v +: +2 +2 +0 +1 +. +0 +0 +0 +2 +1 +v +3 +[ +a +s +t +r +o +- +p +h +. +G +A +] 9 -Apr -2022 +A +p +r +2 +0 +2 +2 A&Aproofs:manuscriptno.mainArxiv tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007; Henkel et al. 2013; Mei et al. 2020). Except for the NH @@ -104,8 +146,12 @@ Henkel et al. 2013; Mei et al. 2020). Except for the NH masersproposedtobeassociatedwithfoursupernovaremnants (McEwenetal.2016),almostalltheotherammoniamasersare detected in high-mass star-forming regions (HMSFRs). How- -ever, while many HMSFRs host water (H 2O), hydroxyl (OH), -or methanol (CH 3OH) masers, ammonia masers are quite rare +ever, while many HMSFRs host water (H +2 +O), hydroxyl (OH), +or methanol (CH +3 +OH) masers, ammonia masers are quite rare in these sources, and the role that the environment of a young high-mass star plays in their excitation remains unclear. There- fore, dedicated searches for ammonia masers in HMSFRs are @@ -128,22 +174,30 @@ HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al. (3,3) masers arethoughttobecollisionallyexcited(e.g.,Floweretal.1990; Mangum & Wootten 1994); in contrast, the pumping mecha- -nismofNH 3(9,6)masersislesswellconstrained(Maddenetal. +nismofNH 3 (9,6)masersislesswellconstrained(Maddenetal. 1986).Brown&Cragg(1991)havestudiedortho-ammoniaand found that it could possibly pump the (6,3) inversion line, but theydidnotextendtheirmodeltothe(9,6)transitionduetothe factthatcollisionratesareonlyknownforinversionlevelsupto J = 6(e.g.,Danbyetal.1988). -NH 3(9,6)masersarefoundtobestronglyvariable,similarto -H 2Omasers(Maddenetal.1986;Pratapetal.1991;Henkeletal. +NH +3 +(9,6)masersarefoundtobestronglyvariable,similarto +H +2 +Omasers(Maddenetal.1986;Pratapetal.1991;Henkeletal. 2013). In W51-IRS2, Henkel et al. (2013) found that the (9,6) lineshowedsignificantvariationinlineshapewithinatimein- tervalofonlytwodays.Mappingofthe(9,6)masertowardW51 withverylongbaselineinterferometry(VLBI)suggeststhatthe -masers are closer to the H 2O masers than to the OH masers or +masers are closer to the H +2 +O masers than to the OH masers or to ultracompact (UC) Hii regions (Pratap et al. 1991). While Henkeletal.(2013)andGoddietal.(2015)showedthattheSiO -andNH 3masersinW51-IRS2areveryclosetoeachother,their +andNH +3 +masersinW51-IRS2areveryclosetoeachother,their positions,differingby0(cid:48)(cid:48) .065(∼0.015pc),donotfullycoincide. In this paper we report the discovery of NH 3 (9,6) masers in two HMSFRs, Cepheus A and G34.26+0.15. This increases @@ -178,8 +232,12 @@ ing was checked every 2 hours using 3C 286 or NGC 7027. Focus calibrations were done at the beginning of the observa- tionsandduringsunsetandsunrisetowardtheabovementioned pointingsources.Thesystemtemperatureswere100–130Kon -amain-beambrightnesstemperature,T MB,scale.Thisfluxden- -sitywascalibratedassumingaT MB/S ratioof1.95K/Jy,derived +amain-beambrightnesstemperature,T +MB +,scale.Thisfluxden- +sitywascalibratedassumingaT +MB +/S ratioof1.95K/Jy,derived fromcontinuumcrossscansofNGC7027(thefluxdensitywas adoptedfromOttetal.1994).Calibrationuncertaintiesareesti- matedtobe∼10%. @@ -254,11 +312,15 @@ TimesequenceofNH Effelsberg 100-meter telescope (after subtracting a first-order polyno- mialbaseline).AJVLAspectrumisinterspersed.Thesystemicveloc- ity from CO and HCO+ lines is indicated by a dashed blue line. The -two dashed red lines at LSR velocities, V LSR, of −0.90 km s−1 and +two dashed red lines at LSR velocities, V +LSR +, of −0.90 km s−1 and −0.28 km s−1 indicate the central velocities of the two major compo- nents.Right:NH 3 (9,6)spectrafromG34.26+0.15.Thesystemicve- locityfromC17Oisindicatedbyadashedblueline.Thethreedashed -redlinesatV LSR=54.1kms−1,55.8kms−1,and62.5kms−1showthe +redlinesatV +LSR +=54.1kms−1,55.8kms−1,and62.5kms−1showthe centralvelocitiesofthemainammoniaemissioncomponents. 3. Results The spectra from different epochs are shown in Figs. 1 and 2. @@ -279,7 +341,9 @@ radius3(cid:48)(cid:48) .5,thatcontainsallthedetectedNH Table A.1, the observed NH 3 (9,6) line parameters obtained by -Gaussianfitsarelisted.NH 3(8,5)and(10,7)emissionisnotde- +Gaussianfitsarelisted.NH +3 +(8,5)and(10,7)emissionisnotde- tected by our JVLA observations. The 3σ upper limits for the NH 3 @@ -291,7 +355,9 @@ Fig. 2. NH (9,6) line profiles emphasizing, in contrast to the spectra in Fig. 1, weaker features. Cep A spectra are presented on the left, G34.26+0.15spectraontheright.Thetwodashedredlinesintheleft -panelsindicateV LSR=1.48kms−1and2.89kms−1.Intherightpanels, +panelsindicateV +LSR +=1.48kms−1and2.89kms−1.Intherightpanels, thetwodashedredlinesreferto54.1kms−1and55.8kms−1. and 27.2 mJy beam−1, respectively. In G34.26+0.15, the corre- sponding3σupperlimitsfortheNH @@ -350,8 +416,12 @@ J2000 = 62◦01(cid:48)49(cid:48)(cid:48) .587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black ellipsedenotingthepositionoftheNH 3 -(9,6)emissionwithapurplestaratitscenter.OH(Bartkiewiczetal.2005),H 2O(Sobolevetal.2018), -andCH 3OH(Sannaetal.2017)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarontheright-handsideindicates +(9,6)emissionwithapurplestaratitscenter.OH(Bartkiewiczetal.2005),H +2 +O(Sobolevetal.2018), +andCH +3 +OH(Sannaetal.2017)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarontheright-handsideindicates theLSRvelocityrangeofthemaserspots. Fig. 4. 1.36cm JVLA continuum map of G34.26+0.15 presented as white contours with levels of −5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130, 150,180,and200×5.0mJybeam−1.ThebackgroundimageistheSpitzer 4.5µmemission,takenfromGLIMPSE.Thereferencepositionis @@ -361,8 +431,14 @@ J2000 J2000 =01◦14(cid:48)58(cid:48)(cid:48) .201,thepeakposition,ismarkedbyablackcross.TheblackellipsesshowthepositionsofNH 3 -(9,6)emissionswithstarsattheircenter(i.e.,M1,M2,andM3).OH(Zhengetal.2000),H 2O(Imaietal.2011),andCH 3OH(Bartkiewiczetal. -2016)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarindicatesthevelocityrange(V LSR)ofmaserspots. +(9,6)emissionswithstarsattheircenter(i.e.,M1,M2,andM3).OH(Zhengetal.2000),H +2 +O(Imaietal.2011),andCH +3 +OH(Bartkiewiczetal. +2016)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarindicatesthevelocityrange(V +LSR +)ofmaserspots. InviewoftheconstancyofthefluxdensitiesobtainedatEf- felsberg and the similar JVLA flux density, measured in 2021 July,thereisnomissinginterferometricfluxdensityintheJVLA @@ -379,7 +455,9 @@ emissiontobecomposedoftwodifferentcomponents.Thespec- traofweakcomponentsonasmallerfluxdensityscalearepre- sentedinFig.2. Three different locations showing NH 3 (9,6) emission are -foundtowardG34.26+0.15(Fig.4).ThedeconvolvedNH 3(9,6) +foundtowardG34.26+0.15(Fig.4).ThedeconvolvedNH +3 +(9,6) componentsizesare(1(cid:48)(cid:48) .42±0(cid:48)(cid:48) .43)×(0(cid:48)(cid:48) .54±0(cid:48)(cid:48) .62)atP.A.=97◦ (M1),(0(cid:48)(cid:48) .42±0(cid:48)(cid:48) .27)×(0(cid:48)(cid:48) .15±0(cid:48)(cid:48) .27)atP.A.=150◦ (M2),and Articlenumber,page4of10 @@ -411,7 +489,7 @@ namedasHWsources.TheHW2objectisoneofthebestknown examplesofaprotostellarjetordisksystemdrivingapowerful outflow(e.g.,Rodriguezetal.1980;Güstenetal.1984;Torrelles et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021). -TheobservedNH 3(9,6)emissionisslightlyoffset(−0(cid:48)(cid:48) .28,0(cid:48)(cid:48) .02) +TheobservedNH 3 (9,6)emissionisslightlyoffset(−0(cid:48)(cid:48) .28,0(cid:48)(cid:48) .02) fromthecenterofHW2(seeFig.3). G34.26+0.15isanHMSFRlocatedatadistanceof3.3kpc (Kuchar & Bania 1994). It hosts four radio continuum compo- @@ -422,7 +500,7 @@ etal.1986;Sewiloetal.2004;Sewiłoetal.2011).Components A and B are HC Hii regions, located to the east of component C.Anextendedring-likeHiiregion,calledcomponentD,islo- cated southeast of components A-C. One of the three observed -NH 3(9,6)emissionlinesources,M1,isclosetotheheadofcom- +NH 3 (9,6)emissionlinesources,M1,isclosetotheheadofcom- ponentC,whereasM2andM3originatefromanothercompact regioninthewestoftheHCHiicomponentA(seeFig.4). 4.2. NH @@ -461,7 +539,9 @@ the NH causeG34.26+0.15islocatedataboutfivetimesthedistanceto CepA,beamdilutioneffectsreducethelowermainbeambright- ness temperature limit to 400 K in G34.26+0.15 (M2) (see Ta- -bleA.3).WealsonotethattheluminosityoftheNH 3(9,6)emis- +bleA.3).WealsonotethattheluminosityoftheNH +3 +(9,6)emis- sioninG34.26+0.15ishigherthanorcomparabletothatinCep A,dependingontheepochofourobservations. Finally,thenon-detectionsofthe(8,5)and(10,7)linesalso @@ -496,7 +576,9 @@ show thermal emission toward Cep A over a velocity range of LSR ≤ −4 km s−1 (Brown et al. 1981; Güsten etal.1984;Torrellesetal.1985,1986,1993,1999).Anaverage -NH 3columndensityof∼5×1015cm−2wasestimatedforaregion +NH +3 +columndensityof∼5×1015cm−2wasestimatedforaregion of3(cid:48)(cid:48)aroundHW2(Torrellesetal.1999).ThishighNH 3 abun- @@ -573,11 +655,21 @@ To characterize the environment of NH 3 (9,6) masers, we can compare their positions with respect to those of other maser -species (i.e., OH, H 2O, and CH 3OH). Toward Cep A HW2, -manyCH 3OH(e.g.,Menten1991;Sugiyamaetal.2008;Sanna -et al. 2017) and H 2O maser spots (e.g., Torrelles et al. 1998, +species (i.e., OH, H +2 +O, and CH +3 +OH). Toward Cep A HW2, +manyCH +3 +OH(e.g.,Menten1991;Sugiyamaetal.2008;Sanna +et al. 2017) and H +2 +O maser spots (e.g., Torrelles et al. 1998, 2011;Sobolevetal.2018)aredetectedandareassociatedwith -its disk. Sobolev et al. (2018) also found that most of the H 2O +its disk. Sobolev et al. (2018) also found that most of the H +2 +O maserfluxisassociatedwiththecompactHiiregionHW3d.OH maser features close to the Hii regions are also seen in HW2 (e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These @@ -597,23 +689,31 @@ sizedregiontothewestofthepeakcontinuumpositionofHW2 (9,6) maser in Cep A isuniqueandnotrelatedtomaserspotsseeninothermolecular species. -In G34.26+0.15, OH (Zheng et al. 2000), H 2O (Imai et al. -2011),andCH 3OH(Bartkiewiczetal.2016)masershavebeen +In G34.26+0.15, OH (Zheng et al. 2000), H +2 +O (Imai et al. +2011),andCH 3 OH(Bartkiewiczetal.2016)masershavebeen detected east of source C (Fig. 4), and none of them coincides with the head of C. The NH 3 (9,6) maser M1 is also found slightly off the head of source C. This could suggest that M1 ispoweredbycontinuumsourceCorbyanoutflow.Nearcom- -ponent B, there are some OH and CH 3OH masers but no H 2O -or NH 3 masers. A group of H 2O masers, well-known tracers +ponent B, there are some OH and CH +3 +OH masers but no H +2 +O +or NH 3 masers. A group of H 2 O masers, well-known tracers of outflows, with a large velocity distribution of 43 km s−1 ≤ V LSR ≤54 km s−1, was found to the west of the centimeter- continuum source A and close to the peak of the millimeter- continuumemission(seedetailsinourFig.A.2andalsoinFig.5 -ofImaietal.2011).TheclosenessofNH 3(9,6)maserspotsM2 +ofImaietal.2011).TheclosenessofNH +3 +(9,6)maserspotsM2 andM3tothisgroupofwatermasersandtheirsimilarvelocities again suggest an association of NH 3 @@ -639,7 +739,9 @@ tons near 10 µm, which is used for radiative pumping (Henkel etal.2013).BothCepAandG34.26+0.15havesimilarkinetic temperatures of (cid:38)200 K (Henkel et al. 1987; Patel et al. 2005; Comito et al. 2007; Beuther et al. 2018). This suggests that -highkinetictemperaturesareneededtoexciteNH 3(9,6)masers. +highkinetictemperaturesareneededtoexciteNH +3 +(9,6)masers. However,itshouldbenotedthatthesilicatedustabsorptionfea- turemightdominateat10µm(seethespectralenergydistribu- tion of Cep A in De Buizer et al. 2017). Additionally, there is @@ -665,9 +767,13 @@ to, but not coincident with, the peaks of the radio continuum emission in Cep A and G34.26+0.15. Furthermore, the (9,6) masers show velocity offsets with respect to their systemic ve- locities. This indicates that the (9,6) masers are located at the -base of outflows, similar to the H 2O masers. This is supported +base of outflows, similar to the H +2 +O masers. This is supported by VLBI observations that show that (9,6) masers tend to be -closelyassociatedwithH 2Omasers(Pratapetal.1991).Theob- +closelyassociatedwithH +2 +Omasers(Pratapetal.1991).Theob- servedtimevariabilityinG34.26+0.15andW51-IRS2canalso beattributedtoepisodicmolecularoutflows.Thisindicatesthat collisional pumping could be the driver of the (9,6) maser. On @@ -678,7 +784,9 @@ toexplaintheNH Floweretal.1990;Mangum&Wootten1994).Collisionstendto pumpfromtheK=0leveltotheK=3levelwithparitychanges, thatis,theupperlevelofthe(3,3)metastabletransitionwillbe -overpopulated.NH 3(9,6)arisesfromtheorthospecies,soasim- +overpopulated.NH +3 +(9,6)arisesfromtheorthospecies,soasim- ilarmechanismmightalsooccurinthecaseofthe(9,6)transi- tion.Furthermeasurementsofcollisionalratesofammoniawill allowustotestthisscenario. @@ -842,12 +950,16 @@ Zheng,X.W.,Moran,J.M.,&Reid,M.J.2000,MNRAS,317,192 Articlenumber,page7of10 A&Aproofs:manuscriptno.mainArxiv AppendixA: -TableA.1.SummaryofNH 3(9,6)maserobservations. +TableA.1.SummaryofNH +3 +(9,6)maserobservations. Source Telescope Beam Epoch Channel S ν rms (cid:82) -S νdv V +S +ν +dv V LSR ∆V 1/2 @@ -875,8 +987,12 @@ Effelsberg 49(cid:48)(cid:48) 2021,Aug.11 0.07 0.08 13.92 0.06±0.007 54.10±0.0 0.12 0.10±0.006 55.85±0.02 0.75±0.06 Effelsberg 49(cid:48)(cid:48) 2021,Aug.12 0.07 0.16 27.40 0.09±0.008 55.83±0.02 0.56±0.05 Notes.ThespectralparametersareobtainedfromGaussianfitting.(a)TheJVLAspectrumtowardCepAisextractedfromtheEffelsberg-beam- -sizedregion(FWHM49(cid:48)(cid:48)).(b)ForG34.26+0.15,theJVLAbeamsamplestheNH 3(9,6)spectrumoveraregionofradius3(cid:48)(cid:48) .5,whichcontainsall -detectedNH 3(9,6)emissions. +sizedregion(FWHM49(cid:48)(cid:48)).(b)ForG34.26+0.15,theJVLAbeamsamplestheNH +3 +(9,6)spectrumoveraregionofradius3(cid:48)(cid:48) .5,whichcontainsall +detectedNH +3 +(9,6)emissions. TableA.2.1.36cmJVLAfluxdensitiesofindividualcontinuumsources. Source R.A. Dec. Size P.A. S ν @@ -892,7 +1008,9 @@ B 185318.649±0.005 +011500.071±0.180 (2.31±0.49)×(0.85±0.21) 17.4 597±110 C 185318.560±0.004 +011458.201±0.112 (2.03±0.30)×(1.34±0.20) 178.0 5070±660 Articlenumber,page8of10 Y.T.Yan(闫耀庭) etal.:Discoveryofammonia(9,6)masersintwohigh-massstar-formingregions -TableA.3.NH 3(9,6)maserpositionsderivedfromtheJVLAobservations. +TableA.3.NH +3 +(9,6)maserpositionsderivedfromtheJVLAobservations. Source R.A. Dec. S ν T @@ -910,14 +1028,24 @@ M2 185318.696±0.002 +011455.807±0.034 48.4 122.4 53.77±0.05 0.35±0.08 180.8 457.6 55.83±0.01 0.59±0.03 M3 185318.667±0.005 +011455.348±0.066 78.1 197.2 54.22±0.04 0.94±0.08 73.7 186.3 55.78±0.04 0.79±0.08 -Fig.A.1.CepheusA.Thegreyshadedareasmarkthe1.36cmJVLAcontinuummapofCepA.Thereferencepositionisα J2000=22h56m17s.972, -andδ J2000=62◦01(cid:48)49(cid:48)(cid:48) .587,thepeakpositionofthecontinuummap,ismarkedbyaredcross.Slightlytothewestofthecrossisthewhiteellipse +Fig.A.1.CepheusA.Thegreyshadedareasmarkthe1.36cmJVLAcontinuummapofCepA.Thereferencepositionisα +J2000 +=22h56m17s.972, +andδ +J2000 +=62◦01(cid:48)49(cid:48)(cid:48) .587,thepeakpositionofthecontinuummap,ismarkedbyaredcross.Slightlytothewestofthecrossisthewhiteellipse denotingthepositionoftheNH 3 (9,6)emissionwithapurplestaratitscenter.TheredcontoursshowtheNOrthernExtendedMillimeterArray (NOEMA)1.37mmcontinuum,takenfromBeutheretal.(2018).Contourlevelsare-5,5,10,20,40,80,100,150,and200×2.43mJybeam−1. -OH(Bartkiewiczetal.2005),H 2O(Sobolevetal.2018),andCH 3OH(Sannaetal.2017)masersarepresentedasdiamonds,circles,andsquares, -respectively.Thecolorbarontheright-handsideindicatesthevelocityrange(V LSR)ofmaserspots. +OH(Bartkiewiczetal.2005),H +2 +O(Sobolevetal.2018),andCH +3 +OH(Sannaetal.2017)masersarepresentedasdiamonds,circles,andsquares, +respectively.Thecolorbarontheright-handsideindicatesthevelocityrange(V +LSR +)ofmaserspots. Articlenumber,page9of10 A&Aproofs:manuscriptno.mainArxiv Fig.A.2.1.36cmJVLAcontinuummapofG34.26+0.15presentedasgrayshadedareas.Thereferencepositionisα @@ -929,7 +1057,13 @@ J2000 3 (9,6)emissionwithstarsattheir center(i.e.,M1,M2,andM3).ThebluecontoursshowtheBerkeley-Illinois-MarylandAssociation(BIMA)array2.8mmcontinuum,takenfrom -Mookerjeaetal.(2007).Contourlevelsare-3,3,10,20,30,40,50,70,90,100,120,and140×20mJybeam−1.OH(Zhengetal.2000),H 2O(Imai -etal.2011),andCH 3OH(Bartkiewiczetal.2016)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarindicates -thevelocityrange(V LSR)ofmaserspots. +Mookerjeaetal.(2007).Contourlevelsare-3,3,10,20,30,40,50,70,90,100,120,and140×20mJybeam−1.OH(Zhengetal.2000),H +2 +O(Imai +etal.2011),andCH +3 +OH(Bartkiewiczetal.2016)masersarepresentedasdiamonds,circles,andsquares,respectively.Thecolorbarindicates +thevelocityrange(V +LSR +)ofmaserspots. Articlenumber,page10of10 diff --git a/read/results/pdfplumber/2201.00022.txt b/read/results/pdfplumber/2201.00022.txt index 4dbb0e7..9878b03 100644 --- a/read/results/pdfplumber/2201.00022.txt +++ b/read/results/pdfplumber/2201.00022.txt @@ -7,12 +7,16 @@ Sanaea C. Rose,1,2 Smadar Naoz,1,2 Re’em Sari,3 and Itai Linial3 3Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel ABSTRACT Most stellar evolution models predict that black holes (BHs) should not exist above approximately -50−70 M (cid:12), the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections +50−70 M +(cid:12) +, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite -efficient, forming IMBHs as massive as 104 M (cid:12). This upper limit assumes that (1) the BHs accrete a +efficient, forming IMBHs as massive as 104 M +(cid:12) +. This upper limit assumes that (1) the BHs accrete a substantial fraction of the stellar mass captured during each collision and (2) that the rate at which new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar disruptionsandstar-starcollisions. Wediscussdeviationsfromthesekeyassumptionsinthetext. Our @@ -27,7 +31,9 @@ respectively). The recently detected gravitational wave source GW190521 (The LIGO Scientific Collaboration et al. 2020a,b) produced an intermediate mass black hole of -approximately142M (cid:12). Thiseventmayhavealsohada +approximately142M +(cid:12) +. Thiseventmayhavealsohada 85M (cid:12) progenitor,whichfallswithinthepair-instability @@ -71,11 +77,45 @@ lated gas (e.g., Begelman et al. 2006; Yue et al. 2014; Ferrara et al. 2014; Choi et al. 2015; Shlosman et al. 2016). These high redshift IMBHs would need to sur- vive galaxy evolution and mergers to present day (e.g., -arXiv:2201.00022v2 -[astro-ph.GA] +a +r +X +i +v +: +2 +2 +0 +1 +. +0 +0 +0 +2 +2 +v +2 +[ +a +s +t +r +o +- +p +h +. +G +A +] 6 -Jul -2022 +J +u +l +2 +0 +2 +2 2 Rose et al. Rashkov&Madau2014),withsignificanteffectsontheir stellarandevendarkmattersurroundings(e.g.,Bertone @@ -131,7 +171,9 @@ peatedcollisionsbetweenBHsandmain sequence stars. During a collision, the BH can accrete some portion of the star’s mass. Over many collisions, it can grow ap- preciablyinsize. Wedemonstratethatthischannelcan -createIMBHswithmassesaslargeas104 M (cid:12),anupper +createIMBHswithmassesaslargeas104 M +(cid:12) +,anupper limitthatdependsonthedensityprofileofthesurround- ing stars and the efficiency of the accretion. The paper is structured as follows: we describe rele- @@ -168,7 +210,9 @@ our final results. Future work may address the particu- lars of the BH mass distribution, but we do not expect that it will significantly alter the outcome. The upper and lower limits of the BH mass distribution are 5 and -50M (cid:12), respectively. We select the upper limit to en- +50M +(cid:12) +, respectively. We select the upper limit to en- compass the range of upper bounds predicted by stellar evolution models, which vary between 40 and 125M (cid:12) @@ -176,7 +220,9 @@ dependingonthemetallicity(Hegeretal.2003;Woosley 2017; Spera & Mapelli 2017b; Limongi & Chieffi 2018b; Belczynski et al. 2020b; Renzo et al. 2020). We assume that the orbits of the BHs follow a thermal eccentricity -distribution. We draw their semimajor axes, a •, from a +distribution. We draw their semimajor axes, a +• +, from a uniform distribution in log distance, dN/d(logr) being constant. While this distribution is not necessarily rep- resentative of actual conditions in the GN, we use it to @@ -199,22 +245,42 @@ butreserveamoredetailedexaminationofthedistribu- tion’s impact for future work. 2.2. Direct Collisions BHsintheGNcanundergodirectcollisionswithother -objects. The timescale for this process, t coll, can be es- +objects. The timescale for this process, t coll , can be es- timated using a simple rate calculation: t−1 coll = nσA, where n is the number density of objects, σ is the ve- locitydispersion, andAisthecross-section. Weusethe collision timescale from Rose et al. (2020): -t−1 coll=πn(a •)σ(a •) -×(cid:18) -f 1(e •)r2 +t−1 +coll +=πn(a +• +)σ(a +• +) +× +(cid:18) +f +1 +(e +• +)r2 +c ++f +2 +(e +• +)r c -+f 2(e •)r -c2G(m +2G(m BH -+m (cid:63)) -σ(a •)2 ++m +(cid:63) +) +σ(a +• +)2 (cid:19) . (1) where G is the gravitational constant and r @@ -223,8 +289,18 @@ is the sum of the radii of the interacting objects, a black hole with mass m BH -and a star with mass m (cid:63). Detailed in Rose -et al. (2020), f 1(e •) and f 2(e •) account for the effect of +and a star with mass m +(cid:63) +. Detailed in Rose +et al. (2020), f +1 +(e +• +) and f +2 +(e +• +) account for the effect of the eccentricity of the BH’s orbit about the SMBH on the collision rate, while n and σ are simply evaluated at the semimajor axis of the orbit (see below). Note @@ -239,14 +315,20 @@ star as a function of distance from the SMBH in Figure 1.2 As this timescale depends on the density of surrounding stars, we adopt a density profile of the form: -ρ(r •)=ρ -0(cid:18) +ρ(r +• +)=ρ +0 +(cid:18) r • r -0(cid:19)−α +0 +(cid:19)−α , (2) -wherer •denotesthedistancefromtheSMBH.Weadopt +wherer +• +denotesthedistancefromtheSMBH.Weadopt a SMBH mass of 4×106 M (cid:12) such that our fiducial GN @@ -254,36 +336,39 @@ matches our own galactic center (e.g., Ghez et al. 2005; Genzel et al. 2003). In this case, the normalization in Eq.(2)isρ 0 -=1.35×106M (cid:12)/pc3 atr +=1.35×106M +(cid:12) +/pc3 atr 0 =0.25pc(Gen- zel et al. 2010). Additionally, in Eq. (2), α gives the slope of the power law. We assume that a uniform pop- ulation of solar mass stars account for most of the mass in the GN, making the stellar number density: -n(r •)= -ρ(r •) +n(r +• +)= ρ(r • ) 1M (cid:12) . (3) The collision timescale also depends on the velocity dis- persion, which we express as: -σ(r -•)=(cid:115) +σ(r • )= +(cid:115) GM +• r • -r -•(1+α), (4) +(1+α) , (4) where α is the slope of the density profile and M • de- notes the mass of the SMBH (Alexander 1999; Alexan- der&Pfuhl2014). Asmentionedabove,Eq.(1)depends -on the sum of the radii of the colliding objects, r c. We +on the sum of the radii of the colliding objects, r c . We take r c =1R (cid:12) because these interactions involve a BH and a star, and the former has a much smaller physi- calcross-section. Forexample,theSchwarzschildradius -of a 10M (cid:12) BH is only 30 km, or 4.31×10−5R (cid:12). For +of a 10M (cid:12) BH is only 30 km, or 4.31×10−5R (cid:12) . For this reason, direct collisions between compact objects are very rare and not included in our model. We note that direct collisions between BHs, via GW @@ -306,7 +391,9 @@ collisiontimescale(Roseetal.2020). We simulate the mass growth of a population of BHs with initial conditions detailed in Section 2.1. Over an increment ∆t of 106 yr, we calculate the probability of -a collision occurring, given by ∆t/t coll. This choice of +a collision occurring, given by ∆t/t +coll +. This choice of ∆t is motivated by our galactic center’s star formation timescale (e.g., Lu et al. 2009), allowing for regular re- plenishmentofthestellarpopulationintheGN.Wehave @@ -328,16 +415,18 @@ two objects experience a head on collision, with the BH passing through the star’s center. We begin by con- sidering the escape velocity from the BH at the star’s outermost point, its surface, which corresponds to the -maximum impact parameter 1 R (cid:12). Qualitatively, one +maximum impact parameter 1 R (cid:12) . Qualitatively, one might expect that the BH could capture the entire star -(i.e.,∆m∼1M (cid:12))iftherelativevelocityissmallerthan +(i.e.,∆m∼1M (cid:12) )iftherelativevelocityissmallerthan theescapevelocityfromtheBHatthispoint. However, in the vicinity of the SMBH, the dispersion velocity of the stars may be much larger than the escape velocity from the BH at the star’s surface. In this case, the BH captures a “tunnel” of material through the star. This tunnel has radius equal to the Bondi radius and length -approximately 1R (cid:12). For the purposes of this study, we +approximately 1R +(cid:12) +. For the purposes of this study, we assume that the BH accretes all of the material that it captures. The details of the accretion are uncertain, however, and it may be much less efficient than our re- @@ -345,13 +434,19 @@ sults imply. We discuss accretion in Section 2.5. To estimate ∆m, we begin with the Bondi-Hoyle ac- cretion rate, ˙ m, given by: ˙ m= -4πG2m2 BHρ +4πG2m2 +BH +ρ star -(c2 s+σ2)3/2 +(c2 +s ++σ2)3/2 , (5) 3Closer to the SMBH, ∆t may exceed the collision timescale by a factor of a few for steep density profiles. We include a safe- -guard in our code which takes the ratio t coll/∆t and rounds it +guard in our code which takes the ratio t +coll +/∆t and rounds it tothenearestinteger. Wetakethisintegertobethenumberof collisionsandincreasetheBHmassaccordingly. Figure 2. Weconsideranexamplethathighlightsthemass @@ -371,16 +466,18 @@ analytical predictions detailed in Section 2.4. wherec s isthespeedofsoundinthestarandρ star isits density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima et al. 1985; Edgar 2004, see latter for a review). We -approximate the density as 1M (cid:12)/(4πR3 (cid:12)/3) and take +approximate the density as 1M (cid:12) /(4πR3 (cid:12) /3) and take the conservative value of c s = 500 km s−1, which is consistent with the sound speed inside a 1 M (cid:12) star (Christensen-Dalsgaardetal.1996)andallowsustoset a lower limit on ∆m. To find ∆m, at each collision, we have: -∆m=min(˙ m×t (cid:63),cross,1 M (cid:12)) , (6) +∆m=min(˙ m×t (cid:63),cross ,1 M (cid:12) ) , (6) where t (cid:63),cross -∼R (cid:12)/σ is the crossing time of the BH in +∼R +(cid:12) +/σ is the crossing time of the BH in thestar. Wetaketheminimumbetween ˙ m×t (cid:63),cross and @@ -399,8 +496,12 @@ start with identical populations of 10M BHs (grey) and simulate growth through collisions using a statisti- cal approach. As the BHs grow, the collision timescale, -which depends on m BH, decreases. Simultaneously, -∆m, which also depends on m BH, increases. The re- +which depends on m +BH +, decreases. Simultaneously, +∆m, which also depends on m +BH +, increases. The re- sult is exponential growth (see discussion and details surrounding Eq. (8)). In Figure 2, however, the simula- tions assume α=1 for the stellar density profile, ensur- @@ -409,9 +510,13 @@ IMBH Formation in Galactic Nuclei 5 ulation time, 10 Gyr. Therefore, the BHs grow slowly, and their final masses can be approximated using the following equation: -m final(t +m +final +(t coll -→const.)=m initial+∆m +→const.)=m +initial ++∆m T t coll @@ -428,7 +533,9 @@ This equation is plotted in Figure 2 for both cases, accretion (blue), and the curves coincide with the cor- responding simulated results. The shaded regions rep- resent one standard deviation from Eq. (7), calculated -usingthesquarerootofthenumberofcollisions,T/t coll. +usingthesquarerootofthenumberofcollisions,T/t +coll +. As indicated by the results in red, in the absence of Bondi-Hoyle-Lyttletonaccretion, theBHsclosesttothe SMBH experience the most growth because they have @@ -443,15 +550,37 @@ star’s mass. Eq. 7 does not apply for other values of α. When the collision timescale is shorter, corresponding to a larger index α in the density profile (see Figure 1), the growth -isveryefficientand∆mquicklyapproaches1M (cid:12). Con- -sequently, while we can now assume ∆m = 1M (cid:12), we +isveryefficientand∆mquicklyapproaches1M +(cid:12) +. Con- +sequently, while we can now assume ∆m = 1M +(cid:12) +, we cannolongerassumethecollisiontimescaleisconstant. The final mass grows exponentially as a result. For -∆m = 1M (cid:12), the general solution is reached by solving -the differential equation dm/dt=1M (cid:12)/t coll(m), which +∆m = 1M +(cid:12) +, the general solution is reached by solving +the differential equation dm/dt=1M +(cid:12) +/t +coll +(m), which gives: -m final(∆m→1M (cid:12))=−A+(m initial+A)eCT (8) -where A=σ2R star/G and C =2πGn starR star/σ. As an +m +final +(∆m→1M +(cid:12) +)=−A+(m +initial ++A)eCT (8) +where A=σ2R +star +/G and C =2πGn +star +R +star +/σ. As an example,weplotthiscurveinpurplefortheα=2case, in Figure 3, which agrees with the simulated masses. 2.5. Uncertainties in Accretion @@ -482,7 +611,7 @@ tivated inefficient accretion model. Several studies have invoked momentum-driven winds in BH accretion (e.g., Murray et al. 2005; Ostriker et al. 2010; Brennan et al. 2018). We thus estimate the fraction of captured mass -accreted to be approximately v esc/(cη), where v esc is +accreted to be approximately v esc /(cη), where v esc is the escape velocity from the BH at 1 R (cid:12) and η is the accretion efficiency at the ISCO. We take η to be 0.1 (e.g., Yu & Tremaine 2002). This expression for the @@ -506,26 +635,41 @@ characteristic timescale to merge a BH with an SMBH is given by: t GW -≈2.9×1012 -yr(cid:18) +≈2.9×1012 yr +(cid:18) M • 106 M -(cid:12)(cid:19)−1(cid:18) +(cid:12) +(cid:19)−1(cid:18) m BH 106 M -(cid:12)(cid:19)−1 -×(cid:18) -M •+m BH -2×106 M -(cid:12)(cid:19)−1(cid:18) +(cid:12) +(cid:19)−1 +× +(cid:18) +M • +m BH +2×106 M (cid:12) +(cid:19)−1(cid:18) a • -10−2 -pc(cid:19)4 -×f(e •)(1−e2 •)7/2 , (9) -where f(e •) is a function of e •. For all values of e •, -f(e •) is between 0.979 and 1.81 (Blaes et al. 2002). We +10−2 pc +(cid:19)4 +×f(e +• +)(1−e2 +• +)7/2 , (9) +where f(e +• +) is a function of e +• +. For all values of e +• +, +f(e +• +) is between 0.979 and 1.81 (Blaes et al. 2002). We plot this timescale for a 1×105M (cid:12) BH in Figure 1 in @@ -568,7 +712,9 @@ massiveenoughtomergewiththeSMBHthroughGWs. Following the method detailed in Section 2.6, when a given BH meets the criterion t GW -< t elapsed, we mark +< t +elapsed +, we mark 4For comparison, we also incrementally changed the semimajor axis and eccentricity from GW emission following the equations in Peters & Mathews (1963b). This method leads to a slight @@ -594,11 +740,16 @@ t relax =0.34 σ3 -G2ρ(cid:104)M ∗(cid:105)lnΛ -rlx, (10) +G2ρ(cid:104)M +∗ +(cid:105)lnΛ +rlx +, (10) where lnΛ rlx -is the Coulomb logarithm and (cid:104)M ∗(cid:105) is the +is the Coulomb logarithm and (cid:104)M +∗ +(cid:105) is the average mass of the surrounding objects, here assumed to be 1M (cid:12) @@ -632,7 +783,11 @@ Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004). They sink toward the SMBH on the mass segregation timescale, t seg -≈ (cid:104)M ∗(cid:105)/m BH×t +≈ (cid:104)M +∗ +(cid:105)/m +BH +×t relax (e.g., Spitzer 1987; Fregeau et al. 2002; Merritt 2006), which is typically an @@ -644,13 +799,18 @@ tum each time it orbits the SMBH. We apply a small instantaneous velocity kick to the BH, denoted as ∆v. We draw ∆v from a Guassian distribution with average of zero and a standard deviation of ∆v -rlx/√ +rlx +/ +√ 3, where ∆v rlx = v -•(cid:112) -P •/t +• +(cid:112) +P +• +/t rlx (see Bradnick et al. 2017, for an approach to changes in the angular momentum). The @@ -724,7 +884,9 @@ leads to a final distri- bution with an average of ∼ 200 M (cid:12) and a median of -∼45 M (cid:12), which lies within the mass gap. +∼45 M +(cid:12) +, which lies within the mass gap. 3. DISCUSSION AND PREDICTIONS We explore the feasibility of forming IMBHs in a GN through successive collisions between a stellar-mass @@ -748,7 +910,9 @@ Despite the substantially reduced accretion, BHs in the mass gap still form. As the stellar mass BH collides with a star, the BH will grow in mass. The increase may equal star’s en- tire mass if the relative velocity is smaller than the es- -cape velocity from the BH at 1 R (cid:12). However, near the +cape velocity from the BH at 1 R +(cid:12) +. However, near the SMBH, the velocity dispersion may be larger than the escapevelocityfromtheBHatthestar’sradius. Inthis limit, the BH captures a “tunnel” of material through @@ -768,7 +932,9 @@ However, the inclusion of relaxation processes in the simulations dampens the influence of the stellar density profile by allowing BHs to diffuse into regions of more or less efficient growth. As a result, more BHs grow in -mass, but their maximum mass is smaller (∼ 104 M (cid:12)). +mass, but their maximum mass is smaller (∼ 104 M +(cid:12) +). Additionally, the final masses have no apparent depen- dence on distance from the SMBH (see Figure 4). Most simulations in our study assume that the BHs @@ -786,7 +952,9 @@ final mass distribution for momentum-driven winds in Figure 4. Importantly, we find that BHs within the mass gap still form naturally despite the substantially reduced accretion. About 5% of the BHs grow by 10 -to 100 M (cid:12). Furthermore, if we increase this ∆M esti- +to 100 M +(cid:12) +. Furthermore, if we increase this ∆M esti- mate by a factor of 2 (i.e., use η = 0.05), the simula- tionproducesa3.5×103 M (cid:12) @@ -876,15 +1044,23 @@ tions must already reflect ongoing processes like star- star collisions and replenishment. Sch¨odel et al. (2018) findtheobservedstellarmassenclosedwithin0.01pcof the Milky Way’s Galactic Center to be approximately -180 M (cid:12). This estimate is consistent to order of magni- +180 M +(cid:12) +. This estimate is consistent to order of magni- tude with our α = 1.25 case. In a simulation like those depictedinFigure4, whichincluderelaxation, α=1.25 -leads to a maximum IMBH mass of 140 M (cid:12). Further- +leads to a maximum IMBH mass of 140 M +(cid:12) +. Further- more, while the stellar mass within 0.01 pc may be a -few hundred M (cid:12), Do et al. (2019) and GRAVITY Col- +few hundred M +(cid:12) +, Do et al. (2019) and GRAVITY Col- laboration et al. (2020) set an upper limit on the mass enclosedwithintheorbitofS0-2tobeaboutafewthou- -sand M (cid:12), or 0.1% of the central mass. This upper limit +sand M +(cid:12) +, or 0.1% of the central mass. This upper limit canincludemassthatwaspreviouslyinstarsbutisnow inBHs. Inthatcase,the180M (cid:12) diff --git a/read/results/pdfplumber/2201.00029.txt b/read/results/pdfplumber/2201.00029.txt index 9884fb4..462334c 100644 --- a/read/results/pdfplumber/2201.00029.txt +++ b/read/results/pdfplumber/2201.00029.txt @@ -125,7 +125,9 @@ Q7 Re-bin 2 5.889 0.597 1.965 19.2 TABLE I: The table displays the various frequencies collected from Q7 and the information found through calculations to find period and SNR. The frequency of 5.464 µHz is not included, and therefore was not used in any calculations determining the average period of rotation. The -values under corrected flux magnitude are relative to our significant frequency cutoff of 3 𝝈, thus +values under corrected flux magnitude are relative to our significant frequency cutoff of 3 +𝝈 +, thus negative numbers are under the cutoff. Q13 Significant Data Points @@ -150,7 +152,9 @@ TABLE II: The table displays the various frequencies collected from Q13 and the found through calculations to find period and SNR. The last two significant frequencies (11.641 µHz and 16.823 µHz) for Q13 Re-bin 3 represent potential harmonics, which are discussed in further detail in the Conclusions section of this paper. The values under corrected flux magnitude -are relative to our significant frequency cutoff of 3 𝝈, thus negative numbers are under the cutoff. +are relative to our significant frequency cutoff of 3 +𝝈 +, thus negative numbers are under the cutoff. 8 First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) 0.933 0.933 0.215 0.216 @@ -207,7 +211,9 @@ First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µ 15.881 16.823 TABLE IV: The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm) -above the cutoff of 3 𝝈. The minor shifting of significant frequencies between re-bins is a by- +above the cutoff of 3 +𝝈 +. The minor shifting of significant frequencies between re-bins is a by- product of the method, and we calculated for such errors when finding our average. Conclusions As our research used the long-cadence data from Kepler, much of the high-frequency diff --git a/read/results/pdfplumber/2201.00037.txt b/read/results/pdfplumber/2201.00037.txt index f149c3c..8709bfc 100644 --- a/read/results/pdfplumber/2201.00037.txt +++ b/read/results/pdfplumber/2201.00037.txt @@ -12,11 +12,46 @@ cores into a common precession motion. proaches that expected for a rigid planet. Corresponding author: Mathieu Dumberry, dumberry@ualberta.ca –1– -arXiv:2201.00037v1 -[astro-ph.EP] -31 -Dec -2021 +a +r +X +i +v +: +2 +2 +0 +1 +. +0 +0 +0 +3 +7 +v +1 +[ +a +s +t +r +o +- +p +h +. +E +P +] +3 +1 +D +e +c +2 +0 +2 +1 Confidential manuscript submitted to JGR-Planets Abstract We present a model of the Cassini state of Mercury that comprises an inner core, a fluid core @@ -55,7 +90,9 @@ its present-day orientation can be reconstructed from ephemerides data [Yseboodt 2006; Baland et al., 2017]. Likewise, the rate of precession is also not observed directly, but is reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513 yr with an inclination angle of I =8.5330◦ between the orbit and Laplace plane normals [Ba- -land et al., 2017]. Measurements of the obliquity ε m, defined as the angle of misalignment be- +land et al., 2017]. Measurements of the obliquity ε +m +, defined as the angle of misalignment be- tween the spin-symmetry axis and the orbit normal, have been obtained by different techniques, including ground based radar observations [Margot et al., 2007, 2012], and stereo digital ter- rain images [Stark et al., 2015a] and radio tracking data [Mazarico et al., 2014; Verma and Mar- @@ -70,12 +107,18 @@ descending node of orbit Ω p -ê 3I +ê +3 I -ê 3L +I +ê +3 +L ε m -I ê 3p +I ê +3 +p ascending node of orbit descending @@ -85,15 +128,23 @@ plane orbital direction S -ê 3I ê 3L +ê +3 +I ê +3 +L M ε m orbital plane Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded -rectangle) and the Cassini state of Mercury. The normal to the orbital plane (ˆ eI 3) is offset from the nor- -mal to the Laplace plane (ˆ eL 3) by an angle I = 8.5330◦. The symmetry axis of the mantle ˆ ep +rectangle) and the Cassini state of Mercury. The normal to the orbital plane (ˆ eI +3 +) is offset from the nor- +mal to the Laplace plane (ˆ eL +3 +) by an angle I = 8.5330◦. The symmetry axis of the mantle ˆ ep 3 is offset from ˆ eI @@ -137,7 +188,9 @@ sipation, and at equilibrium in the Cassini state, the spin axis of the fluid co symmetry axis of the inner core should both also precess about the normal to the Laplace plane in a retrograde direction with a period of 325,513 yr. Both of these axes should also lie in the plane that defines the equilibrium Cassini state [e.g. Dumberry and Wieczorek, 2016], although -their obliquity angles may be different than ε m. Whether the spin axis of the fluid core is brought +their obliquity angles may be different than ε +m +. Whether the spin axis of the fluid core is brought into an alignment with the mantle obliquity depends primarily on the pressure torque (also re- ferred to as the inertial torque) exerted by the centrifugal force of the rotating fluid core on the misaligned elliptical shape of the core-mantle boundary (CMB) [Poincar´e, 1910]. The more flat- @@ -153,7 +206,9 @@ thermore, viscous and electromagnetic (EM) coupling at the CMB can further restr alignment between the mantle and core [Peale et al., 2014]. If an inner core is present, its obliquity angle is determined by the sum of the torques act- ing on it. This includes the gravitational torque from the Sun acting on its tilted figure, anal- -ogous to the torque applied on the tilted mantle that sets the obliquity ε m. In addition, the +ogous to the torque applied on the tilted mantle that sets the obliquity ε +m +. In addition, the tilt of the inner core also depends on the gravitational torque imposed by the mantle and the pressure torque at the inner core boundary (ICB) imposed by the fluid core. If the mantle grav- itational torque dominates, the inner core tilt is expected to remain closely aligned with the @@ -194,7 +249,21 @@ symmetry axis of the mantle and gravity field may differ. 2.1 The interior structure of Mercury Our model of Mercury consists of four layers of uniform density: a solid inner core, a fluid outer core, a solid mantle, and a thin crust. The outer radii of each of these layers, are denoted -by r s, r f, r m, and R, and their densities by ρ s, ρ f, ρ m, and ρ c, respectively. The inner core ra- +by r +s +, r +f +, r +m +, and R, and their densities by ρ +s +, ρ +f +, ρ +m +, and ρ +c +, respectively. The inner core ra- dius r s corresponds to the ICB radius, the fluid core radius r @@ -205,22 +274,34 @@ sure with depth are not negligible in the core of Mercury. However adopting unif simplifies the analytical expressions of the model while still capturing the first order rotational dynamics. Uniform densities were also adopted by Peale et al. [2016] and following the same strategy facilitates comparisons between our results. -We build our interior model as detailed in Peale et al. [2016]. We first specify r s, ρ +We build our interior model as detailed in Peale et al. [2016]. We first specify r +s +, ρ s (or a density contrast at the ICB), the crustal density ρ c -and crustal thickness h=R−r m. The -three unknowns r f, ρ +and crustal thickness h=R−r +m +. The +three unknowns r +f +, ρ f and ρ m are then solved such that the interior model is consistent with the known mass M and chosen values of the moments of inertia of the whole planet C and that -of the mantle and crust C m. +of the mantle and crust C +m +. Each layer is triaxial in shape. We denote the polar flattening (or geometrical ellipticity) -by (cid:15) i, defined as the difference between the mean equatorial and polar radii, divided by the mean -spherical radius. Likewise, we denote the equatorial flattening by the variable ξ i, defined as the +by (cid:15) +i +, defined as the difference between the mean equatorial and polar radii, divided by the mean +spherical radius. Likewise, we denote the equatorial flattening by the variable ξ +i +, defined as the difference between the maximum and minimum equatorial radii, divided by the mean spher- ical radius. As above, we use the subscript i = s, f, m and r, to denote the polar or equa- torial flattenings at the ICB, CMB, crust-mantle boundary (CrMB), and surface. @@ -230,7 +311,9 @@ drostatic equilibrium with the imposed gravitational potential induced by the fl CrMB and surface. The flattenings at all interior boundaries are specified such that they are consistent with the observed degree 2 spherical harmonic coefficients of gravity J 2 -and C 22; their +and C +22 +; their numerical values are given in Table 1. Specifically, J 2 and C @@ -245,11 +328,35 @@ MR2 15 1 MR2 -(cid:2) (ρ s−ρ f)r5 s(cid:15) s+(ρ +(cid:2) (ρ +s +−ρ +f +)r5 +s +(cid:15) +s ++(ρ f -−ρ m)r5 f(cid:15) +−ρ +m +)r5 +f +(cid:15) f -+(ρ m−ρ c)r5 m(cid:15) m+ρ cR5(cid:15) r(cid:3) , (1a) ++(ρ +m +−ρ +c +)r5 +m +(cid:15) +m ++ρ +c +R5(cid:15) +r +(cid:3) , (1a) C 22 = B−A @@ -258,11 +365,35 @@ C 15 1 4MR2 -(cid:2) (ρ s−ρ f)r5 sξ s+(ρ +(cid:2) (ρ +s +−ρ +f +)r5 +s +ξ +s ++(ρ f -−ρ m)r5 fξ +−ρ +m +)r5 +f +ξ f -+(ρ m−ρ c)r5 mξ m+ρ cR5ξ r(cid:3) . (1b) ++(ρ +m +−ρ +c +)r5 +m +ξ +m ++ρ +c +R5ξ +r +(cid:3) . (1b) where ¯ A is the mean equatorial moment of inertia defined below. The same procedure was used in Peale et al. [2016] and the mathematical details are given in Equations (18-20) of Dumberry –5– @@ -275,7 +406,9 @@ o orbit precession rate, Ω p 2π/325,513 yr−1 Baland et al. [2017] -Poincar´e number, δω =Ω p/Ω +Poincar´e number, δω =Ω +p +/Ω o 4.9327×10−7 orbital eccentricity, e @@ -309,7 +442,9 @@ are calculated from (cid:15) r =(¯ a−c)/R and ξ r -=(a−b)/R, where ¯ a= 1 2(a+b) and where +=(a−b)/R, where ¯ a= 1 +2 +(a+b) and where a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semimajor, intermediate and semiminor axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015]. J 2 @@ -326,26 +461,45 @@ ments of inertia of the fluid core (C f > B f -> A f) and solid inner core (C +> A +f +) and solid inner core (C s > B s -> A s) +> A +s +) along with the mean equatorial moments of inertia ¯ A= 1 -2(A+B), ¯ A +2 +(A+B), ¯ A f = 1 -2(A +2 +(A f -+B f), ¯ A ++B +f +), ¯ A s = 1 -2(A s+B s). (2) -From these, we define the polar (e, e f, e s) and equatorial (γ, γ s) dynamical ellipticities of the +2 +(A +s ++B +s +). (2) +From these, we define the polar (e, e +f +, e +s +) and equatorial (γ, γ +s +) dynamical ellipticities of the whole planet (no subscript), fluid core (subscript f) and solid inner core (subscript s), which enter our rotational model, e= @@ -363,7 +517,9 @@ f e s = -C s− ¯ A +C +s +− ¯ A s ¯ A s @@ -374,7 +530,9 @@ B−A γ s = -B s−A +B +s +−A s ¯ A s @@ -387,10 +545,14 @@ by e= MR2 ¯ A -J 2, γ = +J +2 +, γ = 4MR2 ¯ A -C 22. (4) +C +22 +. (4) –6– Confidential manuscript submitted to JGR-Planets θ @@ -401,37 +563,69 @@ m Ω Ω s -Ω -f -ê 3p -ê 3s -ê 3I +Ω f +ê +3 +p +ê 3 s +ê +3 +I I ε m θ p -ê 3L -ê 1p -ê 2p -Cassini -plane -ωΩ ot -ê 3I +ê +3 +L +ê +1 +p +ê +2 +p +Cassini plane +ωΩ +o +t +ê 3 I I ε m -ê 3p +ê 3 p ê 1 -ê 2p -ê 3L +ê +2 +p +ê 3 L a) b) Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b) -in a frame attached to the rotating mantle. The orbit normal (ˆ eI 3) is tilted by an angle I = 8.533◦ from -the Laplace normal (ˆ eL 3) and the symmetry axis of Mercury’s mantle (ˆ ep 3) is tilted by an obliquity ε +in a frame attached to the rotating mantle. The orbit normal (ˆ eI +3 +) is tilted by an angle I = 8.533◦ from +the Laplace normal (ˆ eL +3 +) and the symmetry axis of Mercury’s mantle (ˆ ep +3 +) is tilted by an obliquity ε +m +with respect to ˆ eI +3 +. Shown in (a) are the orientations of the symmetry axis of the inner core (ˆ es +3 +), the +rotation rate vectors of the mantle (Ω), fluid core (Ω +f +) and inner core (Ω +f +) and angles θ +p +, θ +n +, θ m -with respect to ˆ eI 3. Shown in (a) are the orientations of the symmetry axis of the inner core (ˆ es 3), the -rotation rate vectors of the mantle (Ω), fluid core (Ω f) and inner core (Ω f) and angles θ p, θ n, θ m, θ +, θ f and θ s @@ -448,7 +642,9 @@ mantle (b), the Cassini plane is rotating at frequency ωΩ o = −Ω o -− Ω pcosI in the longitudinal direc- +− Ω +p +cosI in the longitudinal direc- tion. The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of illustration. –7– @@ -463,8 +659,14 @@ o day−1, with Ω o =1.5n. Mercury’s rotational state is also characterized by a Cassini state whereby -the orientations of the orbit normal (ˆ eI 3) and of the mantle symmetry axis (ˆ ep 3) are both copla- -nar with, and precess about, the normal to the Laplace plane (ˆ eL 3). The orientation of the Laplace +the orientations of the orbit normal (ˆ eI +3 +) and of the mantle symmetry axis (ˆ ep +3 +) are both copla- +nar with, and precess about, the normal to the Laplace plane (ˆ eL +3 +). The orientation of the Laplace plane varies on long timescales, but it can be taken as invariable in inertial space for our present purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle between ˆ eL 3 @@ -483,7 +685,9 @@ and ˆ ep 3 is θ p -= I +ε m. The precession of ˆ eI += I +ε +m +. The precession of ˆ eI 3 and ˆ ep 3 @@ -496,7 +700,9 @@ of the mantle are expected to remain in close alignment, but they do not coincid define the rotation rate vector of the mantle by Ω, and its misalignment from ˆ ep 3 by an angle -θ m. Note that θ +θ +m +. Note that θ m (cid:28) ε m @@ -518,9 +724,13 @@ metry axis of the inner core is defined by unit vector ˆ es and its misalignment from ˆ ep 3 by an -angle θ n. The rotation vectors of the fluid core and inner core are defined as Ω +angle θ +n +. The rotation vectors of the fluid core and inner core are defined as Ω f -and Ω s, re- +and Ω +s +, re- spectively, and their misalignment from the rotation vector of the mantle Ω are defined by an- gles θ f @@ -529,34 +739,61 @@ s (see Figure 2a). The rotation and symmetry axes of the inner core remain in close alignment, so θ n -≈θ s. To be formal in our definition of the different angles of misalignment, +≈θ +s +. To be formal in our definition of the different angles of misalignment, for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise direction. -At equilibrium in the Cassini state, the three orientation vectors (ˆ eI 3, ˆ ep 3, ˆ es 3) and three -rotation vectors (Ω, Ω f, Ω s) are forced to precess about ˆ eL +At equilibrium in the Cassini state, the three orientation vectors (ˆ eI +3 +, ˆ ep +3 +, ˆ es +3 +) and three +rotation vectors (Ω, Ω +f +, Ω +s +) are forced to precess about ˆ eL 3 at the same frequency. If we ne- glect dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed -in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ω p. Viewed -in the frame attached to the mantle rotating at sidereal frequency Ω o, the Cassini plane is ro- +in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ω +p +. Viewed +in the frame attached to the mantle rotating at sidereal frequency Ω +o +, the Cassini plane is ro- tating in a retrograde direction at frequency ωΩ o (see Figure 2b), where ω, expressed in cycles per Mercury day, is equal to -ω =−1−δωcos(θ p). (5) -The factor δω = Ω p/Ω +ω =−1−δωcos(θ +p +). (5) +The factor δω = Ω +p +/Ω o = 4.933×10−7 is the Poincar´e number, expressing the ratio of the forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal as seen in the mantle frame is expressed as d -dtˆ eL +dt +ˆ eL 3 +Ω׈ eL 3 =0, (6) or equivalently, by Equation (19e) of Stys and Dumberry [2018], -ωsin(θ p)+sin(θ m+θ p)=0. (7) +ωsin(θ +p +)+sin(θ +m ++θ +p +)=0. (7) –8– Confidential manuscript submitted to JGR-Planets This expresses a formal connection between θ @@ -564,8 +801,14 @@ p and θ m which is independent of the interior struc- -ture of Mercury. Using Equation (5) and cos(θ m)→1, this connection can be rewritten as -sin(θ m)=δω sin(θ p). (8) +ture of Mercury. Using Equation (5) and cos(θ +m +)→1, this connection can be rewritten as +sin(θ +m +)=δω sin(θ +p +). (8) and thus the relative amplitudes of θ m and θ @@ -587,45 +830,91 @@ ods of Mercury, the gravitational solar torque that is relevant to the Cassini s torque averaged over one orbit. This mean torque is perpendicular to the Cassini plane, point- ing in the same direction as the vector connecting the Sun to the descending node of Mercury’s orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque -is periodic, rotating at frequency ωΩ o. Setting the equatorial directions ˆ ep +is periodic, rotating at frequency ωΩ +o +. Setting the equatorial directions ˆ ep 1 and ˆ ep 2 to correspond to the real and imaginary axes of the complex plane, respectively, we can write the equatorial components of this periodic applied torque in a compact form as -Γ 1(t)+iΓ 2(t)=−i˜ Γ(ω) exp[iωΩ ot], (9) -where ˜ Γ(ω) represents the amplitude of the torque at frequency ωΩ o. In response to this torque, -the axes defining all angles (θ p, ε m, θ m, θ f, θ s, θ n) as viewed in the mantle frame are also ro- +Γ +1 +(t)+iΓ +2 +(t)=−i˜ Γ(ω) exp[iωΩ +o +t], (9) +where ˜ Γ(ω) represents the amplitude of the torque at frequency ωΩ +o +. In response to this torque, +the axes defining all angles (θ +p +, ε +m +, θ +m +, θ +f +, θ +s +, θ +n +) as viewed in the mantle frame are also ro- tating at frequency ωΩ o (see Figure 2). The longitudinal direction of each of these angles at a specific time t can then also be written in the equatorial complex plane and is proportional -to exp[iωΩ ot]. For instance, the two equatorial time-dependent components θ +to exp[iωΩ +o +t]. For instance, the two equatorial time-dependent components θ m1 and θ m2 of the -angle θ m, as seen in the mantle frame, can be written as -θ m1(t)+iθ m2(t)= ˜ m exp[iωΩ ot], (10a) +angle θ +m +, as seen in the mantle frame, can be written as +θ +m1 +(t)+iθ +m2 +(t)= ˜ m exp[iωΩ +o +t], (10a) where ˜ m≡ ˜ m(ω)=Re[˜ m]+iIm[˜ m], (10b) -is the amplitude at frequency ωΩ o. Equivalent definitions apply for all other angles, with the +is the amplitude at frequency ωΩ +o +. Equivalent definitions apply for all other angles, with the connection as follows: θ m ⇔ ˜ m, θ f -⇔ ˜ m f, θ +⇔ ˜ m +f +, θ +s +⇔ ˜ m s -⇔ ˜ m s, θ +, θ n -⇔ ˜ n s, θ +⇔ ˜ n +s +, θ p ⇔ ˜ p, ε m -⇔ ˜ ε m. (11) -The notation ˜ m, ˜ m f, ˜ m s, ˜ n +⇔ ˜ ε +m +. (11) +The notation ˜ m, ˜ m +f +, ˜ m +s +, ˜ n s follows that introduced in the original model of Mathews et al. [1991]. Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase re- @@ -636,9 +925,13 @@ at the boundaries of the fluid core. In the absence of dissipation, all tilded v real. We concentrate our analysis in this work on the real part of the solutions, which corre- sponds to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜ ε m -corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to ε m, +corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to ε +m +, though we keep the tilde notation in the presentation of our results to emphasize that it rep- -resents the real part of the solution from our system. Furthermore, since ˜ m (cid:28) ˜ ε m, we often +resents the real part of the solution from our system. Furthermore, since ˜ m (cid:28) ˜ ε +m +, we often refer to ˜ ε m as the orientation of spin axis of the mantle, since the Cassini state of Mercury is @@ -649,8 +942,8 @@ rived can found in Mathews et al. [1991] and in Dumberry and Wieczorek [2016]. T tions describe, respectively, the time rate of change of the angular momenta of the whole of Mer- cury, the fluid core, and the inner core in the reference frame of the rotating mantle. These three equations are -(ω−e)˜ -m+(1+ω)(cid:34) +(ω−e)˜ m+(1+ω) +(cid:34) ¯ A f ¯ A @@ -660,59 +953,96 @@ f ¯ A s ¯ A -˜ m s+α 3e +˜ m +s ++α +3 +e s ¯ A s ¯ A ˜ n -s(cid:35) +s +(cid:35) = 1 iΩ2 o -¯ -A(cid:16) +¯ A +(cid:16) ˜ Γ -sun(cid:17) +sun +(cid:17) , (12a) -ω˜ m+(1+ω+e f) ˜ m +ω˜ m+(1+ω+e f -−ωα 1e +) ˜ m +f +−ωα +1 +e s ¯ A s ¯ A -f˜ n +f +˜ n s = 1 iΩ2 o ¯ A -f(cid:16) -−˜ Γ cmb−˜ Γ -icb(cid:17) +f +(cid:16) +−˜ Γ +cmb +−˜ Γ +icb +(cid:17) , (12b) -(ω−α 3e s)˜ m+α 1e s˜ m +(ω−α +3 +e +s +)˜ m+α +1 +e +s +˜ m f -+(1+ω) ˜ m s+(1+ω−α 2)e s˜ n ++(1+ω) ˜ m +s ++(1+ω−α +2 +)e +s +˜ n s = 1 iΩ2 o ¯ A -s(cid:16) -˜ Γs sun+˜ Γ -icb(cid:17) +s +(cid:16) +˜ Γs +sun ++˜ Γ +icb +(cid:17) , (12c) and a fourth equation consists of a kinematic relation that expresses the change in the orien- tation of the inner core figure as a result of its own rotation, -˜ m s+ω˜ n +˜ m +s ++ω˜ n s =0. (12d) -In these equations, the parameters α 1, α +In these equations, the parameters α +1 +, α 2 and α 3 @@ -727,9 +1057,17 @@ f s , α 3 -=1−α 1, α +=1−α +1 +, α 2 -=α 1−α 3α g, (13a) +=α +1 +−α +3 +α +g +, (13a) where the parameter α g is a measure of the ratio of the gravitational to inertial torque applied @@ -740,26 +1078,53 @@ g 8πG 5Ω2 o -[ρ c((cid:15) r−(cid:15) m)+ρ m((cid:15) m−(cid:15) f)+ρ f(cid:15) f] , (13b) +[ρ +c +((cid:15) +r +−(cid:15) +m +)+ρ +m +((cid:15) +m +−(cid:15) +f +)+ρ +f +(cid:15) +f +] , (13b) where G is the gravitational constant. ˜ Γ sun is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For a small mantle obliquity ˜ ε m -and a small inner core tilt ˜ n s, it is given by +and a small inner core tilt ˜ n +s +, it is given by ˜ Γ sun =−iΩ2 o -¯ -A(cid:18) -φ m˜ ε m+ +¯ A +(cid:18) +φ +m +˜ ε +m ++ ¯ A s ¯ A -α 3φ s˜ n -s(cid:19) +α +3 +φ +s +˜ n +s +(cid:19) , (14) where –10– @@ -767,56 +1132,95 @@ Confidential manuscript submitted to JGR-Planets φ m = -3 2n2 +3 +2 +n2 Ω2 o (cid:20) -G 210e+ +G +210 +e+ 1 -2G -201γ(cid:21) -, (15a) -φ +2 +G +201 +γ +(cid:21) +, (15a) +φ s = -3 2n2 +3 +2 +n2 Ω2 o (cid:20) -G 210e s+ +G +210 +e +s ++ 1 -2G 201γ -s(cid:21) +2 +G +201 +γ +s +(cid:21) , (15b) and where G 210 and G 201 -are functions of the orbital eccentricity e c, +are functions of the orbital eccentricity e +c +, G 210 = 1 -(1−e2 c)3/2 +(1−e2 +c +)3/2 , (16a) G 201 = 7 -2e c− +2 +e +c +− 123 16 e3 c + 489 -128e5 c. (16b) -The gravitational torque by the Sun acting on the inner core alone, ˜ Γs sun, is +128 +e5 +c +. (16b) +The gravitational torque by the Sun acting on the inner core alone, ˜ Γs +sun +, is ˜ Γs sun =−iΩ2 o -¯ A sα 3φ s(˜ ε m+˜ n s). (17) +¯ A +s +α +3 +φ +s +(˜ ε +m ++˜ n +s +). (17) ˜ Γ cmb and ˜ Γ @@ -833,16 +1237,26 @@ lar velocities at each boundary [e.g Buffett, 1992; Buffett et al., 2002], icb =iΩ2 o -¯ A sK icb(˜ m +¯ A +s +K +icb +(˜ m f -− ˜ m s), (18a) +− ˜ m +s +), (18a) ˜ Γ cmb =iΩ2 o -¯ A fK +¯ A +f +K cmb -˜ m f. (18b) +˜ m +f +. (18b) Specific expressions for K icb and K @@ -852,7 +1266,9 @@ effects of viscous and EM coupling, respectively. A fifth equation is required to connect this interior model to the obliquity of the mantle, and this is provided by Equation (7). For small angles θ m -and θ p, this gives [e.g. Mathews et al., +and θ +p +, this gives [e.g. Mathews et al., 1991; Dumberry and Wieczorek, 2016; Baland et al., 2019] ˜ m+(1+ω)˜ p=0. (19) For Mercury, it is more convenient to connect the internal model with ˜ ε @@ -864,8 +1280,12 @@ p m ≈ 2 arcmin and thus the latter obeys more strictly the condition of small angles assumed in our framework. Furthermore, the external torques act- -ing on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜ ε m. Writ- -ten in terms of ˜ ε m, and with the approximation of ˜ ε +ing on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜ ε +m +. Writ- +ten in terms of ˜ ε +m +, and with the approximation of ˜ ε m (cid:28)1 and ˜ m(cid:28)1, Equation (7) becomes ˜ m+(1+ω)˜ ε @@ -874,19 +1294,34 @@ m Likewise, the frequency ω from Equation (5) can be written simply in terms of I, ω =−1−δωcosI. (21) The set of four Equations (12) with the addition of Equation (20) form a linear system -of equations for the five rotational variables ˜ m, ˜ m f, ˜ m s, ˜ n +of equations for the five rotational variables ˜ m, ˜ m +f +, ˜ m +s +, ˜ n s -and ˜ ε m. It captures the response +and ˜ ε +m +. It captures the response of Mercury, in the frequency domain, when subject to a periodic solar torque applied at fre- quency ω. The system can be written in a matrix form as –11– Confidential manuscript submitted to JGR-Planets M·x =y, (22a) where the solution (x) and forcing (y) vectors are -xT =[˜ m, ˜ m f, ˜ m s,˜ n s,˜ ε m] , (22b) +xT =[˜ m, ˜ m +f +, ˜ m +s +,˜ n +s +,˜ ε +m +] , (22b) yT =[0,0,0,0,−(1+ω)tanI] , (22c) and the elements of matrix M are -M= +M= +     @@ -898,27 +1333,37 @@ M= ¯ A ¯ As ¯ A -α 3(cid:0) (1+ω)e s+φ s(cid:1) φ +α +3 +(cid:0) (1+ω)e +s ++φ +s +(cid:1) φ m ω 1+ω+e f -+K cmb+ ¯ As -¯ -AfK ++K +cmb ++ ¯ As +¯ Af +K icb − ¯ As -¯ -AfK +¯ Af +K icb -−ωe sα +−ωe +s +α 1 ¯ As ¯ Af 0 -ω−α 3e s α 1e s−K icb 1+ω+K icb (1+ω−α 2)e s+α 3φ s α 3φ s +ω−α 3 e s α 1 e s −K icb 1+ω+K icb (1+ω−α 2 )e s +α 3 φ s α 3 φ s 0 0 1 ω 0 -1 0 0 0 -(1+ω) +1 0 0 0 (1+ω) +     @@ -964,7 +1409,9 @@ Confidential manuscript submitted to JGR-Planets 2.3.1 The Cassini state of a single-body, rigid Mercury For a rigid planet with no fluid and solid cores, our system of equations reduces to Equa- tions (12a) and (20), -(ω−e)˜ m+φ m˜ ε +(ω−e)˜ m+φ +m +˜ ε m =0, (23a) ˜ m+(1+ω)˜ ε @@ -972,38 +1419,81 @@ m =−(1+ω)tanI. (23b) Using Equation (21), δω (cid:28)1, and the approximation ¯ A(1+e+δωcosI)=C+ ¯ AδωcosI ≈ C, these can be written as -C˜ m= ¯ Aφ m˜ ε m, (24a) -˜ -m=δω(cid:0) +C˜ m= ¯ Aφ +m +˜ ε +m +, (24a) +˜ m=δω +(cid:0) sinI+cosI ˜ ε -m(cid:1) +m +(cid:1) . (24b) -Equation (24b) gives a direct relationship between ˜ m and ˜ ε m. For I = 8.5330◦, δω = +Equation (24b) gives a direct relationship between ˜ m and ˜ ε +m +. For I = 8.5330◦, δω = 4.9327×10−7 and taking ˜ ε m =2.04 arcmin, this gives ˜ m=2.52×10−4 arcmin, much smaller -than ˜ ε m: the offset of the rotation axis of the mantle with respect to its symmetry axis is very +than ˜ ε +m +: the offset of the rotation axis of the mantle with respect to its symmetry axis is very small. Substituting Equation (24b) in Equation (24a) gives -CΩ p(cid:0) sinI+cosI ˜ ε m(cid:1) = ¯ AΩ oφ m˜ ε m, (25) -and isolating for ˜ ε m, +CΩ +p +(cid:0) sinI+cosI ˜ ε +m +(cid:1) = ¯ AΩ +o +φ +m +˜ ε +m +, (25) +and isolating for ˜ ε +m +, ˜ ε m = -CΩ psinI -−CΩ pcosI+ ¯ AΩ oφ +CΩ +p +sinI +−CΩ +p +cosI+ ¯ AΩ +o +φ m . (26) Upon using Equations (4), (15a), and Ω o -= 3 2n, we can write += 3 +2 +n, we can write ˜ ε m = -CΩ psinI -−CΩ pcosI+nMR2(G 210J 2+2G 201C -22). (27) +CΩ +p +sinI +−CΩ +p +cosI+nMR2(G +210 +J +2 ++2G +201 +C +22 +) +. (27) This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1 -[see for instance Equation (1) of Baland et al., 2017, where their definition of ˙ Ω is equal to −Ω p]. +[see for instance Equation (1) of Baland et al., 2017, where their definition of ˙ Ω is equal to −Ω +p +]. Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized mo- ment of inertia ˆ C, @@ -1013,7 +1503,14 @@ MR2 = n Ω -pG 210J 2+2G 201C +p +G +210 +J +2 ++2G +201 +C 22 cosI+sinI/˜ ε m @@ -1028,16 +1525,28 @@ cession of Mercury. As seen in the inertial frame, its frequency is given by Confidential manuscript submitted to JGR-Planets ω fp -=nMR2 +=n +MR2 C (cid:16) -G 210J 2+2G 201C -22(cid:17) +G +210 +J +2 ++2G +201 +C +22 +(cid:17) , (29) which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical com- ponent. Note that in Peale [2005] it was assumed that only the mantle was involved in the solid- -body precession and hence C was replaced by C m. Using C = 0.346 · MR2 [Margot et al., -2012] and the numerical values for n, J 2, C +body precession and hence C was replaced by C +m +. Using C = 0.346 · MR2 [Margot et al., +2012] and the numerical values for n, J +2 +, C 22 and e c @@ -1065,29 +1574,43 @@ the free precession period is much shorter than the forcing period of 325 kyr. U ˜ ε m = -Ω psinI -−Ω pcosI+ω +Ω +p +sinI +−Ω +p +cosI+ω fp . (30) The obliquity of Mercury is thus determined by how the forcing frequency Ω p compares with -the free precession frequency ω fp. Because ω +the free precession frequency ω fp ->Ω p, Mercury occupies Cassini state 1 [Peale, +. Because ω +fp +>Ω +p +, Mercury occupies Cassini state 1 [Peale, 1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant amplification if Ω p -≈ ω fp. Since ω +≈ ω +fp +. Since ω fp -(cid:29) Ω p, resonant amplification is minimal and the re- +(cid:29) Ω +p +, resonant amplification is minimal and the re- sulting obliquity, ˜ ε m ≈2 arcmin, is much smaller than the inclination angle I ≈8.5◦. 2.3.2 The misalignment of the fluid and solid cores With ω =−1−δωcosI and δω (cid:28)1, Equation (12d) gives ˜ n s -≈ ˜ m s; as for the mantle, +≈ ˜ m +s +; as for the mantle, the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state. The relationship between ˜ m and ˜ ε m @@ -1095,14 +1618,42 @@ of Equation (24b) is independent of the interior structure, so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equa- tion (12a), and setting ˜ n s -= ˜ m s, the angular momentum equation of the whole planet becomes -CΩ p(cid:0) sinI+cosI ˜ ε m(cid:1) +(¯ A fcosIΩ p)˜ m += ˜ m +s +, the angular momentum equation of the whole planet becomes +CΩ +p +(cid:0) sinI+cosI ˜ ε +m +(cid:1) +(¯ A +f +cosIΩ +p +)˜ m f -+ ¯ A s(cosIΩ p−Ω oα 3φ s)˜ n ++ ¯ A s -= ¯ AΩ oφ m˜ ε m. (31) +(cosIΩ +p +−Ω +o +α +3 +φ +s +)˜ n +s += ¯ AΩ +o +φ +m +˜ ε +m +. (31) This latter equation shows how the misaligned inner core and fluid core can lead to a modifi- -cation of the mantle obliquity ˜ ε m. Approximate analytical solutions of ˜ n +cation of the mantle obliquity ˜ ε +m +. Approximate analytical solutions of ˜ n s and ˜ m f @@ -1113,64 +1664,117 @@ s κλ s (cid:18) -1+ Ω o(K icb−α 1e s) +1+ Ω o (K icb −α 1 e s ) λ f (cid:19) -(cid:0) sinI+cosI ˜ ε m(cid:1) − Ω oα 3φ s +(cid:0) sinI+cosI ˜ ε +m +(cid:1) − Ω o α 3 φ s κλ s -˜ ε m, (32a) +˜ ε +m +, (32a) ˜ m f ≈ Ω p λ -f(cid:0) sinI+cosI ˜ ε m(cid:1) + Ω o +f +(cid:0) sinI+cosI ˜ ε +m +(cid:1) + Ω o λ f ¯ A s ¯ A -f(cid:0) K icb−α 1e s(cid:1) ˜ n s, (32b) +f +(cid:0) K +icb +−α +1 +e +s +(cid:1) ˜ n +s +, (32b) where κ=1− ¯ A s ¯ A f -Ω2 o(cid:0) K icb−α 1e s(cid:1)2 -λ sλ +Ω2 +o +(cid:0) K +icb +−α +1 +e +s +(cid:1)2 +λ +s +λ f , (33a) λ f = ¯ σ f -−Ω pcosI, (33b) +−Ω +p +cosI, (33b) λ s -= ¯ σ s−Ω pcosI, (33c) += ¯ σ +s +−Ω +p +cosI, (33c) –14– Confidential manuscript submitted to JGR-Planets and where we have introduced the frequencies ¯ σ f =Ω -o(cid:18) +o +(cid:18) e f -+K cmb+ ++K +cmb ++ ¯ A s ¯ A -fK -icb(cid:19) +f +K +icb +(cid:19) , (33d) ¯ σ s =Ω -o(cid:16) -e sα 3α g−e sα 1+α 3φ s+K -icb(cid:17) +o +(cid:16) +e +s +α +3 +α +g +−e +s +α +1 ++α +3 +φ +s ++K +icb +(cid:17) . (33e) These solutions are good approximations for all the results that we present in section 3. For an observed mantle obliquity ˜ ε @@ -1178,7 +1782,9 @@ m and for a chosen set of interior model parameters, they pro- vide useful predictions of ˜ n s -and ˜ m f. +and ˜ m +f +. In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯ σ s (cid:29) @@ -1186,7 +1792,9 @@ s p and ¯ σ f -(cid:29) Ω p, so that ˜ n +(cid:29) Ω +p +, so that ˜ n s → 0, ˜ m f @@ -1210,20 +1818,40 @@ s f =λ s -=−Ω pcosI, ˜ m +=−Ω +p +cosI, ˜ m f = ˜ n s -=−(tanI+˜ ε m). (34) +=−(tanI+˜ ε +m +). (34) Inserting these in Equation (31), and with the moment of inertia of the mantle equal to C m = C− ¯ A f -− ¯ A s, we obtain -C mΩ p(cid:0) sinI+cosI ˜ ε m(cid:1) = ¯ AΩ oφ m˜ ε m. (35) +− ¯ A +s +, we obtain +C +m +Ω +p +(cid:0) sinI+cosI ˜ ε +m +(cid:1) = ¯ AΩ +o +φ +m +˜ ε +m +. (35) which describes, as expected, a forced precession of the mantle alone. If this was the case for -Mercury, taking C m/C =0.431, the obliquity should be ˜ ε +Mercury, taking C +m +/C =0.431, the obliquity should be ˜ ε m ≈0.88 arcmin, substantially smaller than the observed obliquity of ˜ ε @@ -1242,31 +1870,43 @@ p (and thus λ s → 0) resonant amplifica- -tion leads to large amplitudes for ˜ m f, ˜ n +tion leads to large amplitudes for ˜ m +f +, ˜ n s -and the mantle obliquity ˜ ε m. The frequencies ¯ σ +and the mantle obliquity ˜ ε +m +. The frequencies ¯ σ f and ¯ σ s are closely related to the FCN and FICN frequencies ω fcn -and ω ficn, respectively. Hence, +and ω +ficn +, respectively. Hence, just as a large mantle obliquity can result from resonant amplification when the forcing frequency approaches the free precession frequency, a large mantle obliquity can likewise result from res- onant amplification when the forcing frequency approaches the FCN or FICN frequencies. These frequencies depend on the interior density structure and are not known. However, we will show that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not ex- -pect an important amplification effect. Furthermore, since ω fcn,ω +pect an important amplification effect. Furthermore, since ω +fcn +,ω ficn -(cid:29) Ω p, then ¯ σ +(cid:29) Ω +p +, then ¯ σ f (cid:29) Ω p and ¯ σ s -(cid:29)Ω p, and we are in the strong coupling limit. The mantle obliquity should be close +(cid:29)Ω +p +, and we are in the strong coupling limit. The mantle obliquity should be close to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜ m f and @@ -1279,7 +1919,9 @@ gles that we have adopted. 3 Results 3.1 Geodetic constraints and interior density structure All our interior models are constrained to match the mass M of Mercury and specific choices -of ˆ C = C/MR2 and C m/C. The choice of ˆ C is determined from Equation (28). For the pa- +of ˆ C = C/MR2 and C +m +/C. The choice of ˆ C is determined from Equation (28). For the pa- rameters listed in Table 1, and an observed obliquity of ε m =2.04 arcmin [Margot et al., 2012], @@ -1291,12 +1933,19 @@ perfectly aligned with the mantle, which is not strictly correct. Hence, we make timating ˆ C from Equation (28), or conversely in predicting ε m based on a given choice for ˆ C. -Part of the objective of our study is to estimate how large this error is. The ratio C m/C is ob- -tained from the amplitude of the 88-day longitudinal mantle libration φ o, which is given by +Part of the objective of our study is to estimate how large this error is. The ratio C +m +/C is ob- +tained from the amplitude of the 88-day longitudinal mantle libration φ +o +, which is given by φ o -=6·f(e c)C -22MR2 +=6·f(e +c +)C +22 +MR2 C C C @@ -1305,12 +1954,16 @@ m 1+ζ , (36) where -f(e c)=1−11e2 +f(e +c +)=1−11e2 c + 959 48 -e4 c, (37) +e4 +c +, (37) and where ζ is a correction that takes into account the entrainment of the inner core in the li- bration [Van Hoolst et al., 2012; Dumberry et al., 2013; Dumberry and Rivoldini, 2015]; this cor- rection is small and, to simplify, we neglect it here. Taking the observed libration amplitude @@ -1319,9 +1972,13 @@ to be 38.5 arcsec [Margot et al., 2012], ˆ C = C/MR2 = 0.3455 and C and e c from Table 1, -this corresponds to a ratio C m/C =0.4269, or equivalently ˆ C +this corresponds to a ratio C +m +/C =0.4269, or equivalently ˆ C m -=C m/MR2 =0.1475. +=C +m +/MR2 =0.1475. For all results presented in our study, the crustal density is set at ρ c =2974 kg m−3 [Sori, @@ -1341,19 +1998,35 @@ ICB is expected to be small, although since density increases with depth, the co the mean densities of the fluid and solid cores is larger. It is these mean densities that enter our Mercury model with uniform density layers. To capture this other end-member core com- position scenario, in section 3.5 we present results where we instead prescribe a fixed density -contrast between the fluid and solid core; specifically, we set the numerical value of α 3. -For a given choice of inner core radius r s, the densities of the mantle (ρ m) and fluid core -(ρ f) and the radius of the CMB (r f) are determined such that the interior model matches M, +contrast between the fluid and solid core; specifically, we set the numerical value of α +3 +. +For a given choice of inner core radius r +s +, the densities of the mantle (ρ +m +) and fluid core +(ρ +f +) and the radius of the CMB (r +f +) are determined such that the interior model matches M, ˆ C = 0.3455 and ˆ C m -= 0.1475. Figure 3a shows how ρ m, ρ += 0.1475. Figure 3a shows how ρ +m +, ρ f and r f vary as a function of in- ner core radius r s -for each of the two inner core density scenarios: a fixed ρ s, or a fixed α 3. When +for each of the two inner core density scenarios: a fixed ρ +s +, or a fixed α +3 +. When the inner core is small, its presence has a limited influence on the resulting density structure, and we find ρ m @@ -1372,7 +2045,9 @@ m approaches 4000 kg m−3 and ρ f is reduced to below 5000 kg m−3. -Figure 3a illustrates that when adopting a fixed ρ s, there is a limit in the possible inner core +Figure 3a illustrates that when adopting a fixed ρ +s +, there is a limit in the possible inner core size, as otherwise ρ m gets unreasonably large and ρ @@ -1381,7 +2056,9 @@ gets inappropriately small (as it would require an excessively large concentration of light elements). When adopting instead a fixed den- sity contrast, with α 3 -=0.1, the changes in r f, ρ +=0.1, the changes in r +f +, ρ m and ρ f @@ -1389,7 +2066,9 @@ with inner core radius are more mod- est, allowing larger possible inner core sizes. Different assumptions on ρ c and h would alter the -numerical values shown on Figure 3a but not their trends with r s. +numerical values shown on Figure 3a but not their trends with r +s +. Figure 3b shows how the FCN and FICN periods vary with r s for each of the two inner @@ -1404,23 +2083,20 @@ Confidential manuscript submitted to JGR-Planets 200 400 600 -800 -1000 +800 1000 1200 1400 -period -(yr) +p +eri o d ( yr) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) 3000 4000 -5000 -6000 +5000 6000 7000 8000 -density -(kg/m -3) +d e n +sit y ( k g/ m 3) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) 2000 @@ -1429,13 +2105,13 @@ Inner core radius (km) 2060 2080 2100 -Fluid -core -radius -(km) +Fl +ui d c +or e r a di u s ( k +m) fluid core density -CMB -radius +C M B ra +di us FICN FCNint mantle density @@ -1449,13 +2125,17 @@ scenario where the density contrast between the fluid and solid cores is set to 3 =0.1. 0). Both of these free modes are retrograde. The FCN period is close to 400 yr for a small in- -ner core, increasing to approximately 600 yr at the largest r s. The FICN period is shorter, close +ner core, increasing to approximately 600 yr at the largest r +s +. The FICN period is shorter, close to 100 yr (160 yr) for a small inner core and decreasing to approximately 40 yr (120 yr) at the largest r s under the fixed ρ s -(fixed α 3) scenario. This confirms that the FCN and FICN peri- +(fixed α +3 +) scenario. This confirms that the FCN and FICN peri- ods are both much shorter than the forcing precession period of 325 kyr and sufficiently far away from it that we do not expect large ˜ m f @@ -1474,74 +2154,138 @@ are given by ω fcn ≈−Ω -o(cid:18) ¯ A -¯ A m+ ¯ A -s(cid:19)(cid:16) +o +(cid:18) ¯ A +¯ A +m ++ ¯ A +s +(cid:19)(cid:16) e f +φ -m(cid:17) +m +(cid:17) +Ω o -e fφ +e +f +φ m (e f +φ -m), (38a) +m +) +, (38a) ω ficn ≈Ω -o(cid:18) ¯ A+ ¯ A +o +(cid:18) ¯ A+ ¯ A s ¯ A− ¯ A -s(cid:19)(cid:16) -e sα 1−e sα 3α g−α 3φ -s(cid:17) +s +(cid:19)(cid:16) +e +s +α +1 +−e +s +α +3 +α +g +−α +3 +φ +s +(cid:17) . (38b) -The expression of the FICN frequency involves the inertial torque (term e sα 1) and the grav- -itational torque from the rest of Mercury (e sα 3α g) and the Sun (α 3φ s) acting on the inner core. +The expression of the FICN frequency involves the inertial torque (term e +s +α +1 +) and the grav- +itational torque from the rest of Mercury (e +s +α +3 +α +g +) and the Sun (α +3 +φ +s +) acting on the inner core. For both of our inner core density scenarios (and our choices of ρ s =8800 kg m−3 and α 3 = -0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α 3α +0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α +3 +α g -(cid:29)α 1; +(cid:29)α +1 +; the gravitational torque dominates the inertial torque, in large part because of the slow rota- tion rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek, 2016; Stys and Dumberry, 2018], but it is different for Earth, where α 1 ->α 3α +>α +3 +α g because of its faster rotation and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approximate expres- –17– Confidential manuscript submitted to JGR-Planets -sion for the FICN differs by a factor (¯ A+ ¯ A s)/(¯ A− ¯ A s) compared to that given in Dumberry +sion for the FICN differs by a factor (¯ A+ ¯ A +s +)/(¯ A− ¯ A +s +) compared to that given in Dumberry and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon. The expression for FCN frequency differs from the usual expression for Earth. First, it -involves the external torque from the Sun captured by the parameter φ m. If we set φ +involves the external torque from the Sun captured by the parameter φ +m +. If we set φ m = 0, we obtain the FCN frequency for a decoupled model in which only interior torques contribute, ω fcn,int ≈−Ω -o(cid:18) ¯ A -¯ A m+ ¯ A -s(cid:19) -e f. (38c) -This frequency is slightly different from the usual expression for Earth, involving the ratio ¯ A/(¯ A m+ -¯ A s) rather than ¯ A/¯ A m. This is because of the relatively thin mantle of Mercury; for the largest +o +(cid:18) ¯ A +¯ A +m ++ ¯ A +s +(cid:19) +e +f +. (38c) +This frequency is slightly different from the usual expression for Earth, involving the ratio ¯ A/(¯ A +m ++ +¯ A +s +) rather than ¯ A/¯ A +m +. This is because of the relatively thin mantle of Mercury; for the largest r s considered, the moment of inertia of the inner core can get close to 40% of that of the man- tle and is not negligible. The period of the FCN when only interior torques contribute is shown in Figure 3b. It is close to 1100 yr for a small inner core, increasing to approximately 1500 yr -at the largest r s. Hence, the influence of the solar torque reduces the FCN period by a factor +at the largest r +s +. Hence, the influence of the solar torque reduces the FCN period by a factor of approximately 3. We note that the FICN period, in contrast, is not altered substantially when the external torque is set to zero. 3.2 Gravitational and inertial coupling @@ -1550,18 +2294,24 @@ librium Cassini state. We assume a fixed inner core density scenario in this sec s = 8800 kg m−3. Viscous and EM coupling are set to zero in order to isolate the influence of grav- -itational and inertial coupling. Figure 4 shows how ˜ ε m, ˜ m +itational and inertial coupling. Figure 4 shows how ˜ ε +m +, ˜ m f and ˜ n s vary as functions of inner core radius. We show calculations for three different choices of crustal thickness, but let us con- -centrate first on the case for h=26 km. For small r s, we retrieve an obliquity of ˜ ε +centrate first on the case for h=26 km. For small r +s +, we retrieve an obliquity of ˜ ε m =2.0494 arcmin (Figure 4a). ˜ ε m -decreases with r s, but not substantially; at the largest r +decreases with r +s +, but not substantially; at the largest r s (1500 km), ˜ ε @@ -1575,16 +2325,24 @@ occurs for small inner cores. The deviation of ˜ ε m from that of a rigid planet is due to the misalignments of the fluid -core (˜ m f) and solid inner core (˜ n s) with respect to the mantle (Figure 4b). The misalignment +core (˜ m +f +) and solid inner core (˜ n +s +) with respect to the mantle (Figure 4b). The misalignment of the fluid core spin axis from the mantle is significant: ˜ m f is approximately 4.02 arcmin for a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin -at the largest r s. Recall that ˜ m +at the largest r +s +. Recall that ˜ m f is measured with respect to the mantle rotation axis (which coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with -respect to the orbit normal is ˜ ε m+˜ m +respect to the orbit normal is ˜ ε +m ++˜ m f ≈6 arcmin. The reason why the obliquity of the spin axis of the fluid core is larger than that of the mantle can be understood from Equation (32b), @@ -1596,10 +2354,14 @@ case for Mercury, the resonant amplification is very weak but remains present an f is larger than zero. -In contrast to ˜ m f, the misalignment of the inner core with respect to the mantle is much +In contrast to ˜ m +f +, the misalignment of the inner core with respect to the mantle is much smaller; ˜ n s -is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜ ε m. +is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜ ε +m +. Physically, this is because the gravitational torque acting on the inner core when it is tilted from the mantle is much stronger than the inertial torque acting at the ICB. As a result, the inner core must remain in close alignment with the mantle. Presented differently, since the FICN pe- @@ -1613,9 +2375,13 @@ Confidential manuscript submitted to JGR-Planets 2.046 2.048 2.050 -Obliquity -angle -(arcmin) +O +bli q +uit +y +a n gl e ( +ar c mi +n) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) 1.5 @@ -1625,26 +2391,32 @@ Inner core radius (km) 3.5 4.0 4.5 -Obliquity -angle -(arcmin) +O +bli q +uit +y +a n gl e ( +ar c mi +n) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) crustal thickness 16 km -36 km -26 km +36 km 26 km crustal thickness 16 km -36 km -26 km +36 km 26 km ε m ε g -for a rigid planet -ε -m +for a rigid planet ε m m f -n s(x100) +n +s +(x100) a b -Figure 4. a) Obliquity of the mantle (˜ ε m, solid lines) and of the principal moment of inertia (˜ ε g, +Figure 4. a) Obliquity of the mantle (˜ ε +m +, solid lines) and of the principal moment of inertia (˜ ε +g +, dashed line) b) ˜ m f (solid lines) and ˜ n @@ -1665,14 +2437,22 @@ is given by ˜ ε m = -C(cid:48)Ω psinI -−C(cid:48)Ω pcosI+ ¯ AΩ oφ +C(cid:48)Ω +p +sinI +−C(cid:48)Ω +p +cosI+ ¯ AΩ +o +φ m , (39) which is identical to the prediction of Equation (26) for a rigid Mercury, except C is replaced by C(cid:48). The latter represents an effective moment of inertia that accounts for the coupling of the core to the mantle, -C(cid:48) =C+ ¯ A cχ, (40) +C(cid:48) =C+ ¯ A +c +χ, (40) where ¯ A c = ¯ A @@ -1681,28 +2461,44 @@ f s and χ= -Ω pcosI +Ω +p +cosI ¯ A c (cid:18) ¯ A f (¯ σ f -−Ω pcosI) +−Ω +p +cosI) + ¯ A s -(¯ σ s−Ω -pcosI)(cid:19) +(¯ σ +s +−Ω +p +cosI) +(cid:19) − ¯ A s ¯ A c -Ω oα 3φ +Ω +o +α +3 +φ +s +(¯ σ s -(¯ σ s−Ω -pcosI). (41) +−Ω +p +cosI) +. (41) The frequencies ¯ σ f and ¯ σ @@ -1711,13 +2507,21 @@ are given in Equations (33d-33e) and closely approximate the FCN and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then how the core is entrained to precess with the mantle, with the coupling between the two ex- pressed in terms of the resonant amplification of the FCN and FICN frequencies. In the limit -of ¯ σ f,¯ σ +of ¯ σ +f +,¯ σ s -→ 0, then χ = −1, C(cid:48) = C m, the core is fully decoupled from the mantle and we -retrieve Equation (35). If instead ¯ σ f,¯ σ +→ 0, then χ = −1, C(cid:48) = C +m +, the core is fully decoupled from the mantle and we +retrieve Equation (35). If instead ¯ σ +f +,¯ σ s → ∞, then χ = 0, C(cid:48) = C and we retrieve the pre- -diction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ω p, +diction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ω +p +, as is the case here, resonant amplification is weak, χ is small and positive, C(cid:48) > C and this leads to a slightly larger ˜ ε m @@ -1725,7 +2529,9 @@ compared to a rigid planet. Because the inner core core is grav- itationally locked to the mantle, deviations from a rigid planet are dominantly caused by the misalignment of the fluid core. In Equation (41), ¯ σ s -(cid:29) ¯ σ f, so to a good approximation +(cid:29) ¯ σ +f +, so to a good approximation –19– Confidential manuscript submitted to JGR-Planets χ≈ @@ -1733,42 +2539,68 @@ Confidential manuscript submitted to JGR-Planets f ¯ A c -Ω ocosI +Ω +o +cosI (¯ σ f −Ω -pcosI). (42) +p +cosI) +. (42) For a small inner core, χ≈7.55×10−3. As the inner core grows, ¯ A f decreases, and the com- -bination ¯ A cχ also decreases. This implies that C(cid:48) decreases with inner core size and, consequently, +bination ¯ A +c +χ also decreases. This implies that C(cid:48) decreases with inner core size and, consequently, ˜ ε m also decreases with inner core size, as seen in Figure 4a, though it remains larger than the prediction for a rigid planet. -The specific predictions of ˜ ε m, ˜ m +The specific predictions of ˜ ε +m +, ˜ m f and ˜ n s on Figure 4 depend sensitively on the assumed -interior density model and on the dynamical ellipticities of the inner core (e s) and fluid core -(e f). Hence, it depends on the choices we have made for the inner core density ρ s, the crustal +interior density model and on the dynamical ellipticities of the inner core (e +s +) and fluid core +(e +f +). Hence, it depends on the choices we have made for the inner core density ρ +s +, the crustal density ρ c -and its thickness h. Changing ρ s, ρ +and its thickness h. Changing ρ +s +, ρ c -and/or h requires a different combination of ρ f, +and/or h requires a different combination of ρ +f +, ρ m and r f -in order to match M, ˆ C and ˆ C m. In turn, this leads to different ellipticities at in- +in order to match M, ˆ C and ˆ C +m +. In turn, this leads to different ellipticities at in- terior boundary in order to match J 2 -and C 22, and thus different predictions for ˜ ε m, ˜ m +and C +22 +, and thus different predictions for ˜ ε +m +, ˜ m f and -˜ n s. To illustrate this, we show on Figure 4 two additional predictions computed with crustal +˜ n +s +. To illustrate this, we show on Figure 4 two additional predictions computed with crustal thicknesses changed to h=16 and 36 km. The change in ˜ ε m remains modest, ∼0.025%, but @@ -1778,7 +2610,9 @@ and ˜ n s are more substantial, ∼5% and ∼10%, respectively. We also show on Figure 4a (only for h=26 km) the obliquity of the principal moment -of inertia of the whole planet, which we denote by ˜ ε g. A difference between ˜ ε +of inertia of the whole planet, which we denote by ˜ ε +g +. A difference between ˜ ε g and ˜ ε m @@ -1787,20 +2621,50 @@ if the inner core is misaligned with the mantle. As seen in the mantle frame, a (with ˜ n s assumed small) leads to an off-diagonal component of the moment of inertia tensor -of (C s−¯ A s)α 3˜ n +of (C +s +−¯ A +s +)α +3 +˜ n +s += ¯ A +s +e s -= ¯ A se sα 3˜ n s. The angle by which the mantle frame must be rotated so that -the moment of inertia of the whole planet is purely diagonal is (¯ A se sα 3˜ n s)/(¯ Ae), and hence a +α +3 +˜ n +s +. The angle by which the mantle frame must be rotated so that +the moment of inertia of the whole planet is purely diagonal is (¯ A +s +e +s +α +3 +˜ n +s +)/(¯ Ae), and hence a good approximation of ˜ ε g is ˜ ε g -= ˜ ε m+ -¯ A se += ˜ ε +m ++ +¯ A +s +e s ¯ Ae -α 3˜ n s. (43) +α +3 +˜ n +s +. (43) Since the inner core is gravitationally forced into a close alignment with the mantle, the dif- ference between ˜ ε g @@ -1826,25 +2690,28 @@ based on them are given in Mathews and Guo [2005], K cmb -= -πρ fr4 -f += πρ f r4 f ¯ A f (cid:114) ν 2Ω -o(cid:16) 0.195−1.976i(cid:17) -, (44a) +o +(cid:16) 0.195−1.976i (cid:17) , (44a) K icb = -πρ fr4 +πρ +f +r4 s ¯ A s (cid:114) ν 2Ω -o(cid:16) 0.195−1.976i(cid:17) +o +(cid:16) +0.195−1.976i +(cid:17) , (44b) where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary inte- rior is not well known but based on theoretical and experimental studies it is expected to be @@ -1854,9 +2721,17 @@ of the order of 10−6 m2 s−1 [e.g. Gans, 1972; de Wijs et al., 1998; Alf`e et Confidential manuscript submitted to JGR-Planets The above parameterizations are valid only under the assumption that the flow in the bound- ary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds -number Re=r f∆u f/ν, associated with the differential velocity ∆u +number Re=r +f +∆u +f +/ν, associated with the differential velocity ∆u f -=r fΩ o˜ m +=r +f +Ω +o +˜ m f at the CMB. For r @@ -1874,12 +2749,14 @@ pendent of the fluid viscosity and proportional to the square of the differentia coupling constant K cmb should be in the form -K -cmb -=f -cmb(cid:12) -(cid:12)˜ m -f(cid:12) (cid:12)(cid:16) 0.195−1.976i(cid:17) +K cmb =f cmb +(cid:12) +(cid:12)˜ m f +(cid:12) +(cid:12) +(cid:16) +0.195−1.976i +(cid:17) , (45) where f cmb @@ -1887,7 +2764,9 @@ is a numerical factor that depends among other things on surface roughness. In- corporating a viscous coupling of this form in our rotational model is more challenging not only because f cmb -is not known but also because the viscous torque is no longer linear in ˜ m f. One +is not known but also because the viscous torque is no longer linear in ˜ m +f +. One strategy is to find solutions through an iterative process. The simpler alternative strategy that we adopt is to use the laminar formulas of Equation (44) but with the understanding that ν represents an effective turbulent viscosity. @@ -1905,9 +2784,15 @@ and K cmb is (cid:12) -(cid:12) (cid:12)Im[K -cmb](cid:12) -(cid:12) (cid:12)= +(cid:12) +(cid:12) +Im[K +cmb +] +(cid:12) +(cid:12) +(cid:12) += K C L @@ -1924,9 +2809,13 @@ fL is the moment of inertia of the lunar core and Ω L = 2.66 × 10−6 s−1 the lunar -rotation rate. With C fL/C +rotation rate. With C +fL +/C L -∼7×10−4 [e.g. Williams et al., 2014], this gives |Im[K cmb]|∼ +∼7×10−4 [e.g. Williams et al., 2014], this gives |Im[K +cmb +]|∼ 9×10−5. In order to match this amplitude in Equation (44a), with lunar parameters and as- suming a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10−4 m2 s−1, about 500 times larger than the laminar viscosity. Note that the differential velocity at the @@ -1936,7 +2825,9 @@ cmb is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mer- cury should be smaller. Thus, ν ≈5×10−4 m2 s−1 gives a conservative upper bound for the possible effective turbulent viscosity that can be expected for Mercury. -Figure 5 shows how ˜ ε m, ˜ m +Figure 5 shows how ˜ ε +m +, ˜ m f and ˜ n s @@ -1965,8 +2856,7 @@ cosity that we have identified above (i.e ν ≈ 5×10−4 m2 s−1), the influe Confidential manuscript submitted to JGR-Planets ε m ε g -m -f +m f n s 2.038 @@ -1976,9 +2866,15 @@ s 2.046 2.048 2.050 -Obliquity -angle -(arcmin) +O +bli q +uit +y a n +gl e +( +ar c +mi +n) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) 0.0 @@ -1991,9 +2887,15 @@ Inner core radius (km) 3.5 4.0 4.5 -Obliquity -angle -(arcmin) +O +bli q +uit +y a n +gl e +( +ar c +mi +n) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) kinematic viscosity: 0.01 m2 s-1 0.00001 m2 s-1 0.0001 m2 s-1 0.0005 m2 s-1 0.001 m2 s-1 @@ -2001,7 +2903,11 @@ a b for a rigid planet ε m -Figure 5. a) Obliquity of the mantle (˜ ε m, solid lines) and gravity field (˜ ε g, dashed lines) b) ˜ m +Figure 5. a) Obliquity of the mantle (˜ ε +m +, solid lines) and gravity field (˜ ε +g +, dashed lines) b) ˜ m f (solid lines) and ˜ n s @@ -2014,7 +2920,9 @@ arcmin. The inclusion of viscous coupling at the ICB can lead to a substantial change in inner core tilt. A larger viscosity leads to stronger viscous coupling and to a closer alignment of the in- ner core with the fluid core spin axis. The viscous coupling strength is inversely proportional -to r s, so a larger viscosity results in a larger inner core radius at which viscous coupling is of +to r +s +, so a larger viscosity results in a larger inner core radius at which viscous coupling is of a similar magnitude to gravitational coupling. Taking again an upper bound of ν =5×10−4 m2 s−1, Figure 5 indicates that ˜ n s @@ -2025,7 +2933,9 @@ fraction of 1 arcmin. The larger inner core tilt observed with increasing effective viscosity results in a larger offset between the obliquity of the principal moment of inertia ˜ ε g -and that of the mantle ˜ ε m, +and that of the mantle ˜ ε +m +, though it remains limited. For the upper bound of ν = 5×10−4 m2 s−1, and for r s = 1500 @@ -2038,7 +2948,9 @@ The conclusion that emerges from Figure 5 is that the larger the inner core is, the misalignments of both the fluid core and inner core are with respect to the mantle. This implies that the larger the inner core is, the more we approach a planet precessing as a rigid body, although the misalignment of the spin axis of the fluid core remains important, approx- -imately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜ ε m, ˜ m +imately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜ ε +m +, ˜ m f and ˜ n s @@ -2069,50 +2981,88 @@ by B r = √ -3(cid:10) Bd r(cid:11) cosθ, where (cid:10) Bd r(cid:11) is the r.m.s. strength of the field, the coupling constant +3 (cid:10) Bd +r +(cid:11) cosθ, where (cid:10) Bd +r +(cid:11) is the r.m.s. strength of the field, the coupling constant K cmb can be written is the form K cmb -=3(1−i)F cmb(cid:10) Bd r(cid:11)2 , (47) +=3(1−i)F +cmb +(cid:10) Bd +r +(cid:11)2 , (47) where F cmb = 1 -Ω oρ fr +Ω +o +ρ +f +r f (cid:18) 1 -σ mδ +σ +m +δ m + 1 -σ fδ -f(cid:19)−1 +σ +f +δ +f +(cid:19)−1 , (48) -and where σ m, δ +and where σ +m +, δ m = (cid:112) -2/(σ mµΩ o) and σ f, δ +2/(σ +m +µΩ +o +) and σ +f +, δ f = (cid:112) -2/(σ fµΩ o) are the electrical conductivi- +2/(σ +f +µΩ +o +) are the electrical conductivi- ties and magnetic skin depths in the mantle and fluid core, respectively, with µ=4π×10−7 -N A−2 the magnetic permeability of free space. The r.m.s. field strength (cid:10) Bd r(cid:11) is connected to +N A−2 the magnetic permeability of free space. The r.m.s. field strength (cid:10) Bd +r +(cid:11) is connected to the Gauss coefficient g0 1 of the surface magnetic field by -(cid:10) Bd r(cid:11) = 2 √ -3(cid:18) -R +(cid:10) Bd r -f(cid:19)3 -(cid:12) (cid:12)g0 1(cid:12) (cid:12) . (49) -We can readily build an estimate of the amplitude of K cmb. The electrical conductivity +(cid:11) = 2 √ +3 +(cid:18) +R +r f +(cid:19)3 +(cid:12) (cid:12)g0 +1 +(cid:12) (cid:12) . (49) +We can readily build an estimate of the amplitude of K +cmb +. The electrical conductivity of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding to the CMB of Mercury is in the range of σ m @@ -2120,11 +3070,19 @@ m trast, the electrical conductivity of Fe in planetary cores is expected to be close σ f ∼ 106 S -m−1 [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σ mδ m)−1 (cid:29)(σ fδ f)−1. Tak- +m−1 [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σ +m +δ +m +)−1 (cid:29)(σ +f +δ +f +)−1. Tak- ing σ m = 1 S m−1, (cid:12) -(cid:12)g0 -1(cid:12) +(cid:12)g0 1 +(cid:12) (cid:12) = 190 nT for Mercury’s dipole field [Anderson et al., 2012], r f = 2000 km, ρ f @@ -2141,7 +3099,9 @@ EM coupling can be enhanced if strongly stratified pockets of core fluid are tra CMB cavities [Buffett, 2010; Glane and Buffett, 2018], in which case the effective σ m could be -closer to σ f. Likewise, σ +closer to σ +f +. Likewise, σ m can be increased if a more electrically conducting layer has formed at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction @@ -2162,7 +3122,9 @@ and fluid core to be similar, and because the radial magnetic field is likely mu coupling can be much larger and dominate viscous coupling. We assume that the magnetic field morphology at the ICB is dominantly comprised of small spatial scales for example as predicted by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in -terms of an equivalent uniform radial magnetic field (cid:104)B r(cid:105) capturing its r.m.s. strength [Buf- +terms of an equivalent uniform radial magnetic field (cid:104)B +r +(cid:105) capturing its r.m.s. strength [Buf- fett et al., 2002; Dumberry and Koot, 2012]. Assuming an electrical conductivity σ equal in the fluid and solid core, the coupling constant K icb @@ -2171,50 +3133,78 @@ K icb = 5 -4(1−i)F icb(cid:104)B r(cid:105)2 , (50) +4 +(1−i)F +icb +(cid:104)B +r +(cid:105)2 , (50) where F icb = σδ -Ω oρ sr +Ω +o +ρ +s +r s , (51) and where δ = (cid:112) -2/(σµΩ o) is the magnetic skin depth. As F +2/(σµΩ +o +) is the magnetic skin depth. As F icb is inversely proportional to -r s, K +r +s +, K icb is inversely proportional to inner core size. Note that computing the EM coupling based -on the r.m.s. strength (cid:104)B r(cid:105) rather than a true field morphology tends to overestimate the strength +on the r.m.s. strength (cid:104)B +r +(cid:105) rather than a true field morphology tends to overestimate the strength of the coupling [Koot and Dumberry, 2013]. However, since the strength of the radial magnetic field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are -absorbed in the range of possible (cid:104)B r(cid:105) values. +absorbed in the range of possible (cid:104)B +r +(cid:105) values. The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al., 2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected. -When (cid:104)B r(cid:105) is sufficiently large, this is no longer the case. EM coupling then enters a ’strong +When (cid:104)B +r +(cid:105) is sufficiently large, this is no longer the case. EM coupling then enters a ’strong field’ regime [Buffett et al., 2002; Dumberry and Koot, 2012; Koot and Dumberry, 2013] in which K icb -increases linearly with (cid:104)B r(cid:105) instead of quadratically. A good approximation of K +increases linearly with (cid:104)B +r +(cid:105) instead of quadratically. A good approximation of K icb cal- culated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012], KE icb -=(0.175−i0.138)(cid:104)B r(cid:105) , (52) -where (cid:104)B r(cid:105) is in units of Tesla. The superscript E emphasizes that the numerical factors are +=(0.175−i0.138)(cid:104)B +r +(cid:105) , (52) +where (cid:104)B +r +(cid:105) is in units of Tesla. The superscript E emphasizes that the numerical factors are appropriate for the parameter values adopted for Earth in the computation of Dumberry and Koot [2012]. To adapt these numerical factors to Mercury, we write, K icb -=(0.175−i0.138)F +=(0.175−i0.138) +F icb FE icb -(cid:104)B r(cid:105) , (53) +(cid:104)B +r +(cid:105) , (53) where FE icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumb- @@ -2228,9 +3218,17 @@ s km, σ =5×105 S m−1, which gives FE icb =90.36 T−2. -To compute F icb, we assume an electrical conductivity of σ =106 S m−1 in the core of +To compute F +icb +, we assume an electrical conductivity of σ =106 S m−1 in the core of Mercury [e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and -strong field regime occurs when (cid:104)B r(cid:105) ≈ 1.53 mT for the real part of K icb. (cid:104)B r(cid:105) at the ICB +strong field regime occurs when (cid:104)B +r +(cid:105) ≈ 1.53 mT for the real part of K +icb +. (cid:104)B +r +(cid:105) at the ICB of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geom- etry inside the core could be dominated by small length scales, yet only the weaker lower har- monics of the field would penetrate through a thermally stratified layer in the upper region of @@ -2238,24 +3236,36 @@ monics of the field would penetrate through a thermally stratified layer in the Confidential manuscript submitted to JGR-Planets the fluid core and reach the surface. If so, the field strength inside the core can exceed the sur- face field strength by a factor 1000. Taking a surface field strength equal to ∼300 nT [e.g An- -derson et al., 2012], (cid:104)B r(cid:105) at the ICB could be as large as 0.3 mT, corresponding to approxi- +derson et al., 2012], (cid:104)B +r +(cid:105) at the ICB could be as large as 0.3 mT, corresponding to approxi- mately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mer- cury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of Mercury remains in the weak field regime. -Figure 6 shows how ˜ ε m, ˜ m +Figure 6 shows how ˜ ε +m +, ˜ m f and ˜ n s vary as functions of inner core radius for different choices -of (cid:104)B r(cid:105). The larger (cid:104)B r(cid:105) is, the stronger is the EM coupling at the ICB, and the smaller is the +of (cid:104)B +r +(cid:105). The larger (cid:104)B +r +(cid:105) is, the stronger is the EM coupling at the ICB, and the smaller is the differential rotation between the fluid core and inner core. The inner core and fluid core are vir- -tually locked into a common precession motion when (cid:104)B r(cid:105)>0.3 mT. Further increasing (cid:104)B r(cid:105) +tually locked into a common precession motion when (cid:104)B +r +(cid:105)>0.3 mT. Further increasing (cid:104)B +r +(cid:105) above 1 mT does not change the solution as EM coupling already dominates all other torques on the inner core. This is the case even when EM coupling transitions into the strong field regime. EM coupling at the CMB is included in these calculations, with σ m = 1 S m−1 and (cid:12) -(cid:12)g0 -1(cid:12) +(cid:12)g0 1 +(cid:12) (cid:12) = 190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core we retrieved the solutions of ˜ ε @@ -2278,7 +3288,9 @@ pulls the fluid core towards an alignment with the inner core, and gravitational inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the greater is the reduction in ˜ ε m -and ˜ m f. +and ˜ m +f +. When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are locked into a common precession motion, a good approximation of ˜ ε m @@ -2286,20 +3298,80 @@ is given by the same pre- diction as Equations (39-40) involving the effective moment of inertia C(cid:48), except χ is now given by χ= -¯ A cΩ pcosI− ¯ A sΩ oα 3φ +¯ A +c +Ω +p +cosI− ¯ A +s +Ω +o +α +3 +φ s -¯ A fΩ o(e +¯ A +f +Ω +o +(e f -+K cmb)+ ¯ A sΩ oe sα 3α g− ¯ A cΩ pcosI ++K +cmb +)+ ¯ A +s +Ω +o +e +s +α +3 +α +g +− ¯ A +c +Ω +p +cosI . (54) -For a small inner core, ¯ A cΩ pcosI > ¯ A sΩ oα 3φ +For a small inner core, ¯ A +c +Ω +p +cosI > ¯ A +s +Ω +o +α +3 +φ +s +and χ is positive. Because ¯ A s -and χ is positive. Because ¯ A sΩ oα 3φ +Ω +o +α +3 +φ s increases -with inner core size, χ gets smaller, and so do C(cid:48) and ˜ ε m. The mantle obliquity drops from 2.049 +with inner core size, χ gets smaller, and so do C(cid:48) and ˜ ε +m +. The mantle obliquity drops from 2.049 arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015 -arcmin. For an inner core larger than ≈1000 km, ¯ A cΩ pcosI < ¯ A sΩ oα 3φ s, so χ becomes neg- +arcmin. For an inner core larger than ≈1000 km, ¯ A +c +Ω +p +cosI < ¯ A +s +Ω +o +α +3 +φ +s +, so χ becomes neg- ative, C(cid:48) becomes smaller than the moment of inertia of a rigid Mercury C, and ˜ ε m becomes @@ -2332,9 +3404,16 @@ Confidential manuscript submitted to JGR-Planets 2.046 2.048 2.050 -Obliquity -angle -(arcmin) +O +bli q +uit +y a n +gl e +( +ar +c +mi +n) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) 0.0 @@ -2347,9 +3426,16 @@ Inner core radius (km) 3.5 4.0 4.5 -Obliquity -angle -(arcmin) +O +bli q +uit +y a n +gl e +( +ar +c +mi +n) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) Br at ICB: 1 mT 0.01 mT 0.03 mT 0.1 mT 0.3 mT @@ -2361,10 +3447,12 @@ m f n s a b -for a rigid planet -ε +for a rigid planet ε m +Figure 6. a) Obliquity of the mantle (˜ ε m -Figure 6. a) Obliquity of the mantle (˜ ε m, solid lines) and gravity field (˜ ε g, dashed lines) b) ˜ m +, solid lines) and gravity field (˜ ε +g +, dashed lines) b) ˜ m f (solid lines) and ˜ n s @@ -2380,10 +3468,16 @@ icb purely from EM coupling. We choose an effective viscosity at the CMB of ν = 10−4 m2 s−1, which we believe to be a representative value given the comparison with the Moon (see section 3.3). We take a radial -field strength at the ICB of (cid:104)B r(cid:105)=0.3 mT, approximately the field strength expected under +field strength at the ICB of (cid:104)B +r +(cid:105)=0.3 mT, approximately the field strength expected under the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representa- -tive’ coupling model, although the uncertainty on ν and (cid:104)B r(cid:105) obviously remains high. -Figure 7 shows how ˜ ε m, ˜ m +tive’ coupling model, although the uncertainty on ν and (cid:104)B +r +(cid:105) obviously remains high. +Figure 7 shows how ˜ ε +m +, ˜ m f and ˜ n s @@ -2397,9 +3491,13 @@ cores and for different choices of α 3 = 0.2), the results are qualitatively similar to the fixed inner core density scenario. For a smaller -α 3, the point at which the orientation of the co-precessing fluid and inner cores begins to be +α +3 +, the point at which the orientation of the co-precessing fluid and inner cores begins to be pulled into an alignment with the mantle is pushed to a larger inner core radius. However, the -general behaviour of ˜ ε m, ˜ m +general behaviour of ˜ ε +m +, ˜ m f and ˜ n s @@ -2424,9 +3522,16 @@ Confidential manuscript submitted to JGR-Planets 2.046 2.048 2.050 -Obliquity -angle -(arcmin) +O +bli q +uit y a +n +gl e +( +ar +c +mi +n) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) 0.0 @@ -2439,9 +3544,16 @@ Inner core radius (km) 3.5 4.0 4.5 -Obliquity -angle -(arcmin) +O +bli q +uit y a +n +gl e +( +ar +c +mi +n) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) for a rigid planet @@ -2455,9 +3567,12 @@ n s ε m -ε +ε g +Figure 7. a) Obliquity of the mantle (˜ ε +m +, solid lines) and gravity field (˜ ε g -Figure 7. a) Obliquity of the mantle (˜ ε m, solid lines) and gravity field (˜ ε g, dashed lines) b) ˜ m +, dashed lines) b) ˜ m f (solid lines) and ˜ n s @@ -2465,20 +3580,34 @@ s 8800 kg m−3 (black lines) and for different choices of α 3 (coloured lines). -i(cid:48) m, i(cid:48) +i(cid:48) +m +, i(cid:48) f -and i(cid:48) s; these represent the obliquities with respect to the orbital plane and are connected +and i(cid:48) +s +; these represent the obliquities with respect to the orbital plane and are connected to our variables by: i(cid:48) m -= ˜ ε m, i(cid:48) += ˜ ε +m +, i(cid:48) f -= ˜ ε m+˜ m+˜ m += ˜ ε +m ++˜ m+˜ m f -≈ ˜ ε m+˜ m +≈ ˜ ε +m ++˜ m f and i(cid:48) s -= ˜ ε m+˜ n s. To summarize += ˜ ε +m ++˜ n +s +. To summarize their results, i(cid:48) f and i(cid:48) @@ -2486,7 +3615,9 @@ s vary substantially for different inner core sizes, are always of compara- ble amplitude, and i(cid:48) s -is always larger than i(cid:48) f. Furthermore, they find that as the inner core +is always larger than i(cid:48) +f +. Furthermore, they find that as the inner core size is increased, the mantle obliquity i(cid:48) m gets progressively larger and is displaced further away @@ -2536,9 +3667,15 @@ Our results suggest then that the presence and size of an inner core leads to on est changes of the mantle obliquity ε m compared to the obliquity predicted on the basis of an -entirely rigid planet (εr m). Let us denote this difference as ∆ε +entirely rigid planet (εr +m +). Let us denote this difference as ∆ε m -=ε m−εr m. The largest ∆ε +=ε +m +−εr +m +. The largest ∆ε m occurs for a small or no inner core, and is ∆ε m @@ -2568,8 +3705,12 @@ necessary in order to properly tie Mercury’s obliquity to its interior structu the possibility of further constraining the interior structure of Mercury on the basis of its obliq- uity. Obliquity measurements based on tracking topographic features reflect the orientation of -the spin-symmetry axis of the mantle (ε m). Measurements based on tracking the gravity field -of Mercury reflect instead the orientation of the principal moment of the whole planet (ε g). These +the spin-symmetry axis of the mantle (ε +m +). Measurements based on tracking the gravity field +of Mercury reflect instead the orientation of the principal moment of the whole planet (ε +g +). These two orientations do not coincide when an inner core is present and is misaligned from the man- tle. Since gravitational coupling prevents a large inner core tilt with respect to the mantle, we –28– diff --git a/read/results/pdfplumber/2201.00069.txt b/read/results/pdfplumber/2201.00069.txt index e23a0fa..f38f17d 100644 --- a/read/results/pdfplumber/2201.00069.txt +++ b/read/results/pdfplumber/2201.00069.txt @@ -35,11 +35,45 @@ M. Zacharias,24,1 D. Zargaryan,12,14 A.A. Zdziarski,34 A. Zech,24 S.J. Zhu,20 S. Zouari,22 N. Żywucka,1 AcceptedXXX.ReceivedYYY;inoriginalformZZZ MNRAS000,1–15(2021) -arXiv:2201.00069v1 -[astro-ph.HE] -31 -Dec -2021 +a +r +X i +v +: +2 +2 +0 +1 +. +0 +0 +0 +6 +9 +v +1 +[ +a +s +t +r +o +- +p +h +. +H +E +] +3 +1 +D +e +c +2 +0 +2 +1 MNRAS000,1–15(2021) Preprint4January2022 CompiledusingMNRASLATEXstylefilev3.0 ABSTRACT Wereportonasearchforpersistentradioemissionfromtheone-offFastRadioBurst(FRB) @@ -261,15 +295,15 @@ hancedMulti-ElementRemote-LinkedInterferometerNetwork,e- MERLIN array in the United Kingdom (project code: CY10003) on 13 January, 2021 (see Section 3.1.2). Six antennas were used including the 75-m Lovell telescope and the target pointing cen- -trewasR.A.=12ℎ 15𝑚 55𝑠 .12,Dec.= −13◦01(cid:48)15. (cid:48)(cid:48)7.1407+2827 +trewasR.A.=12 ℎ 15 𝑚 55 𝑠 .12,Dec.= −13◦01(cid:48)15. (cid:48)(cid:48)7.1407+2827 was used as the bandpass calibrator, 1331+3030 as the flux cal- ibrator and 1216−1033 as the phase calibrator. The angular sep- aration between the target and the phase calibrator is 2.47◦. The data reduction was done following standard e-MERLIN calibra- tion procedures6 with additional flagging of bad visibilities fol- lowed by imaging. We found two confusing sources in the field, -at R.A. = 12ℎ 15𝑚 44𝑠 .669, Dec. = −12◦57(cid:48)59. (cid:48)(cid:48)56 and R.A. = -12ℎ 15𝑚 37𝑠 .216,Dec.= −13◦09(cid:48)33. (cid:48)(cid:48)44at4.1(cid:48) and9.4(cid:48) fromthe +at R.A. = 12 ℎ 15 𝑚 44 𝑠 .669, Dec. = −12◦57(cid:48)59. (cid:48)(cid:48)56 and R.A. = +12 ℎ 15 𝑚 37 𝑠 .216,Dec.= −13◦09(cid:48)33. (cid:48)(cid:48)44at4.1(cid:48) and9.4(cid:48) fromthe pointingcentre,respectively.Theyhadapparentfluxdensitiesof4 and 1.3mJy without primary beam correction. We used these for self-calibration of the field and then subtracted them before final @@ -326,14 +360,17 @@ thesameacceptanceandunderthesameobservationconditions. 3 RESULTS 3.1 MeerKAT ThetheoreticalthermalnoiseoftheMeerKATcanbecalculatedas -𝑆 rms= +𝑆 rms = 1 𝜂 𝑐 SEFD √︃ -𝑛 pol×𝑁(𝑁−1)×Δ𝜈×𝑡 -int. (1) +𝑛 +pol +×𝑁(𝑁−1)×Δ𝜈×𝑡 +int +. (1) The system equivalent flux density (SEFD) of MeerKAT at the 1.28GHzis443Jyand𝜂 𝑐isthecorrelatorefficiency.Weused𝑛 pol @@ -383,7 +420,7 @@ dio source is offset by 1. (cid:48)(cid:48)68 from the localisation region of FR 3.1.2 e-MERLINdetectionofcompactemissiontowards FRB20190714 Compact persistent emission was detected in the 1.51GHz e- -MERLINimageatR.A.=12ℎ 15𝑚 55𝑠 .116,Dec.=−13◦01(cid:48)14. (cid:48)(cid:48)48 +MERLINimageatR.A.=12 ℎ 15 𝑚 55 𝑠 .116,Dec.=−13◦01(cid:48)14. (cid:48)(cid:48)48 at 86𝜇Jybeam−1 by e-MERLIN. The stochastic position uncer- tainty is (0.04, 0.15) arcsec and the uncertainty (due to the sepa- ration between phase-calibrator and target, and antenna position @@ -441,8 +478,7 @@ arespatiallycoincidentwithstarscataloguedintheSDSScatalogue ThisgalaxyisalsodetectedbytheMeerKATradioobservations.We usetheNASA/IPACExtragalacticDatabase(NED)8 tosearchfor knowngalaxiesintheFRB20171019Auncertaintyregions.Wefind -multiplegalaxieswithunknownredshifts,thereforewecannotdraw -conclusionsonthehostgalaxyfromourobservations.Usinga50(cid:48)(cid:48) +multiplegalaxieswithunknownredshifts,thereforewecannotdraw conclusionsonthehostgalaxyfromourobservations.Usinga50(cid:48)(cid:48) circularONregioncentredonthepositionofFRB20171019Aand a50(cid:48)(cid:48) OFFregionthatdoesnotcontainanyofthedetectedsources, weruntheuvotsourcetoolwitha5𝜎backgroundthresholdand @@ -454,7 +490,9 @@ the Wolf 1561 star. As we consider this source unrelated to the FRB,weusetheonlineSwift-XRTdataproductsgenerator(Evans et al. 2007) (Evans et al. 2009) to derive upper limits in the 0.3- 10 keV range on the count rate of 0.001885 counts.s−1. Using -WebPIMMS9(v4.11a)andassumingaweightedaverage𝑁 H=5.12× +WebPIMMS9(v4.11a)andassumingaweightedaverage𝑁 +H +=5.12× 1020 cm−2 from the direction of the source estimated from the NASA’s HEASARC 10 online tools (HI4PI Collaboration et al. 2016)andapowerlawmodelwithaphotonindex=2,thisupper @@ -520,8 +558,8 @@ IntheimageinFigure3onecanseethatthepersistentradio source lies at the edge of the optical extent of the host galaxy as seen in PanSTARRS observations (Heintz et al. 2020). Our derived 1283MHz peak position with MeerKAT places it just -1. (cid:48)(cid:48)68awayfromthepositionofFRB20190714A(𝛼 𝐽2000,𝛿 𝐽2000 -= 12ℎ 15𝑚 55𝑠 .12, -13◦01(cid:48)15. (cid:48)(cid:48)70; Heintz et al. 2020). The posi- +1. (cid:48)(cid:48)68awayfromthepositionofFRB20190714A(𝛼 𝐽2000 ,𝛿 𝐽2000 += 12 ℎ 15 𝑚 55 𝑠 .12, -13◦01(cid:48)15. (cid:48)(cid:48)70; Heintz et al. 2020). The posi- tionaluncertaintyontheFRBpositionis0. (cid:48)(cid:48)283.Similarly,thepeak 1.51GHze-MERLINpositionofthepersistentradiosourceissepa- ratedfromthepositionofFRB20190714Aby0. (cid:48)(cid:48)53.Thepersistent diff --git a/read/results/pdfplumber/2201.00151.txt b/read/results/pdfplumber/2201.00151.txt index f5a51ce..8caed3a 100644 --- a/read/results/pdfplumber/2201.00151.txt +++ b/read/results/pdfplumber/2201.00151.txt @@ -1,8 +1,41 @@ -arXiv:2201.00151v1 -[astro-ph.GA] +a +r +X +i +v +: +2 +2 +0 +1 . +0 +0 +1 +5 +1 +v 1 -Jan -2022 +[ +a +s +t +r +o +- +p +h +. +G +A +] +1 +J +a +n +2 +0 +2 +2 Astronomy&Astrophysicsmanuscriptno.Populations4 ©ESO2022 January4,2022 Multiple stellar populations in Schwarzschild modeling @@ -90,13 +123,17 @@ A&Aproofs:manuscriptno.Populations4 Table1.PropertiesoftheIllustrisgalaxyusedtocreatemockdata. Property Value SubhaloID 16960 -Numberofstellarparticles(N ⋆) 70446 -Numberofdarkmatterparticles(N DM) 78448 -Stellarmass(M ⋆) 5.74×1010M⊙ -Darkmattermass(M DM) 4.91×1011M⊙ +Numberofstellarparticles(N ⋆ ) 70446 +Numberofdarkmatterparticles(N +DM +) 78448 +Stellarmass(M ⋆ ) 5.74×1010M⊙ +Darkmattermass(M DM ) 4.91×1011M⊙ Meanmassofstellarparticles 815808M⊙ Stellarhalf-massradius 9.99kpc -Stellarhalf-numberradius(r 1/2) 9.6kpc +Stellarhalf-numberradius(r +1/2 +) 9.6kpc Axisratioc/awithinr 1/2 0.907 @@ -149,10 +186,16 @@ data, we decided to use a galaxy from the Illustris project ulationfollowstheformationandevolutionofgalaxiesfromthe early Universe to the present by solving gravity and hydrody- namics, as well as modeling of star formation, galactic winds, -SFR -[M⊙ -yr --1] +S +F +R +[ +M +⊙ +y +r - +1 +] t [Gyr] 0 4 @@ -165,8 +208,12 @@ thesimulatedgalaxyfromtheIllustrisprojectusedtocreatemockdata. Theblackandgrayverticalarrowsindicatethelastmergerswhichthe galaxyunderwent,wetanddry,respectively. t -[Gyr] -Z [Z ⊙] +[ +G y +r ] +Z [Z +⊙ +] 0 2 4 @@ -178,9 +225,9 @@ Z [Z ⊙] 2 4 6 -N -[10 -2] +N [ +1 0 +2 ] Fig. 2. Number of stars as a function of their metallicity and time of formation(theageoftheUniverse)inthesimulatedgalaxy.Thevertical lineindicatestheappliedsplitintostellarpopulations. @@ -229,7 +276,9 @@ minor 7.7 log( Σ) -[M⊙ +[ +M +⊙ /kpc 2] -80 @@ -245,7 +294,8 @@ POPULATION II POPULATION II 80 160 V -[km/s] +[k +m/s] -80 -40 0 @@ -265,7 +315,8 @@ POPULATION II 60 90 σ -[km/s] +[k +m/s] -80 -40 0 @@ -285,7 +336,9 @@ minor 7.7 log( Σ) -[M⊙ +[ +M +⊙ /kpc 2] -80 @@ -301,7 +354,8 @@ POPULATION II POPULATION II 80 160 V -[km/s] +[k +m/s] -80 -40 0 @@ -321,7 +375,8 @@ POPULATION II 60 90 σ -[km/s] +[k +m/s] Fig.3.Mapsoftheprojectedstellardensity,meanstellarvelocity,andstellarvelocitydispersion(inrows)fortwostellarpopulations:themetal- richpopulationI(left-handsidepanels)andthemetal-poorpopulationII(right-handside),andobservationsalongtheprincipalaxesdetermined forallstars(incolumns,alongthemajor,theintermediate,andtheminoraxis,respectively). @@ -331,7 +386,7 @@ forallstars(incolumns,alongthemajor,theintermediate,andtheminoraxis,respectively 0.5 1 1 10 100 -β(r) +β ( r ) r [kpc] -1 -0.5 @@ -339,7 +394,7 @@ r [kpc] 0.5 1 0 10 20 30 40 50 -β(r) +β ( r ) r [kpc] all stars pop I @@ -350,8 +405,7 @@ pop II 100 120 1 10 100 -σr -(r) +σr ( r ) r [kpc] 40 60 @@ -359,8 +413,7 @@ r [kpc] 100 120 0 10 20 30 40 50 -σr -(r) +σr ( r ) r [kpc] 40 60 @@ -368,8 +421,7 @@ r [kpc] 100 120 1 10 100 -σt -(r) +σt ( r ) r [kpc] 40 60 @@ -377,8 +429,7 @@ r [kpc] 100 120 0 10 20 30 40 50 -σt -(r) +σt ( r ) r [kpc] Fig.4.Profilesofthevelocityanisotropyparameter,radialvelocitydispersion,andtangentialvelocitydispersion(inconsecutivecolumns)calcu- latedfromallstars(inred),includingonlypopulationI(inorange),andonlypopulationII(inblue).Theupperrowshowstheprofilesusingthe @@ -391,7 +442,9 @@ nents,anddetailsontheshapeofthestellarcomponent:theaxis ratios minor to major (shortest to longest) c/a, intermediate to majorb/a,andthetriaxialityparameterT =(a2−b2)/(a2−c2). Wedistinguishbetweenthehalf-massradiusprovidedintheIl- -lustris database and the half-numberradius r 1/2, which we use +lustris database and the half-numberradius r +1/2 +, which we use forfurthercalculationsinthispaper.Thedifferencebetweenthe twocomesfromasmallgradientinthestellarmass-to-lightratio withthedistancefromthegalacticcenter.Sinceinourapproach @@ -401,15 +454,22 @@ needed),theapplicationofthehalf-numberradiusismoreself- consistent. Articlenumber,page3of12 A&Aproofs:manuscriptno.Populations4 -10-3 -10-1 -101 -103 +10 +-3 +10 -1 +10 1 +10 +3 10 100 -n⋆ -(R) -[kpc --2] +n +⋆ ( +R +) +[ +k +p +c - 2 +] R [kpc] major 10 100 @@ -423,7 +483,9 @@ pop I pop II Fig.5.Surfacenumberdensityprofilesofthestellardatasamplesforthesimulatedgalaxyobservedalongdifferentlinesofsight(fromtheleftto theright).Differentlinesshowprofilesforallavailablestars(inred),themetal-richpopulationI(inorange),andthemetal-poorpopulationII(in -blue).Thinverticallinesindicater 0(seetext)andtheouterboundaryofthespectroscopicdata. +blue).Thinverticallinesindicater +0 +(seetext)andtheouterboundaryofthespectroscopicdata. 2.2.Splittingthestarsintopopulations Our chosen galaxy shows a complex formation history under- goingmultiplemergerswhichresultinextendedstar formation @@ -461,7 +523,7 @@ distancesfromthecenter. The velocity anisotropy parameter β(r) = 1 − (σ2 θ + -σ2 φ)/(2σ2 r),whereσ iarevelocitydispersionsinsphericalcoordi- +σ2 φ )/(2σ2 r ),whereσ i arevelocitydispersionsinsphericalcoordi- nates(Binney&Tremaine2008),describestheorbitalstructure ofgalaxies.Itisoneofthemostimportantdynamicalproperties of bound systems which cannot be inferred directly from ob- @@ -470,7 +532,11 @@ profiles of the anisotropy parameter β as well as the radial σ r andtangentialσ t -=[(σ2 θ+σ2 φ)/2]1/2velocitydispersionsforour +=[(σ2 +θ ++σ2 +φ +)/2]1/2velocitydispersionsforour simulated galaxy are presented in the consecutive columns of Fig.4.Throughoutthepaperweusered,orange,andbluecolors to indicate values calculated or recoveredfor all stars, popula- @@ -512,20 +578,28 @@ I(R)=I 0   1 -p1+(R/R c)2 +p +1+(R/R c )2 − 1 -p1+(R t/R c)2 -2 +p +1+(R t /R c )2 + +2 , (1) Articlenumber,page4of12 K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling -10-3 -10-2 -10-1 -100 +10 +-3 +10 +-2 +10 +-1 +10 +0 0 10 20 30 40 -M(R) +M( +R) R [kpc] major 0 10 20 30 40 @@ -534,14 +608,21 @@ intermediate 0 10 20 30 40 50 R [kpc] minor -369 +3 +6 +9 12 0 10 20 30 40 -m2 -(R)[10 -3(km -s --1) +m +2 ( +R)[ +1 +0 +3( +k +m +s - +1) 2] R [kpc] 0 10 20 30 40 @@ -549,27 +630,43 @@ R [kpc] 0 10 20 30 40 50 R [kpc] -10 --505 +-5 +0 +5 10 0 10 20 30 40 -m3 -(R)[10 -4(km -s --1) +m +3 ( +R)[ +1 +0 +4( +k +m +s - +1) 3] R [kpc] 0 10 20 30 40 R [kpc] 0 10 20 30 40 50 R [kpc] -01234 +0 +1 +2 +3 +4 0 10 20 30 40 -m4 -(R)[10 -8(km -s --1) +m +4 ( +R)[ +1 +0 +8( +k +m +s - +1) 4] R [kpc] 0 10 20 30 40 @@ -583,7 +680,11 @@ Fig.6.ObservablesusedinourSchwarzschildmodelingschemeofthesimulatedgalaxy.Inrows 3rd,and4thvelocitymoment.Incolumns:mockdatafromthesimulatedgalaxyalongthemajor,intermediate,andminoraxis.Inredwepresent thevaluesobtainedforallstarswhereasinorangeandbluethoseforpopulationsIandII,respectively.Forclarityofthefigure,ineachpanelwe indicateonlytheerrorbarsforoneofthedatasets. -where I 0, R c, and R +where I +0 +, R +c +, and R t are the modelparameters.The profile can beanalyticallydeprojectedtoobtainthe3Ddensity @@ -591,23 +692,33 @@ beanalyticallydeprojectedtoobtainthe3Ddensity ρ 0 z2 -"1 +" +1 z -arccos(z)− p1−z2 #, (2) +arccos(z)− +p +1−z2 +# +, (2) where ρ 0 = I 0 -πR c[1+(R t/R c)2]3/2 -(3) -and -z= sr2+R2 c -R2 +πR c -+R2 +[1+(R t +/R +c +)2]3/2 +(3) +and +z= +s +r2+R2 c +R2 c +R2 t . (4) 3. Schwarzschildmodeling In this section we briefly present our modeling method and its @@ -621,9 +732,8 @@ namely we model the total mass profile with the mass-to-light ratioΥvaryingwithradius: logΥ(r)= ( -log(Υ 0) r≤r 0 -a(logr−logr 0)c+log(Υ 0) r>r -0 +log(Υ 0 ) r≤r 0 +a(logr−logr 0 )c+log(Υ 0 ) r>r 0 (5) Articlenumber,page5of12 A&Aproofs:manuscriptno.Populations4 @@ -693,11 +803,15 @@ c 2 Fig.7.Absolutevaluesofχ2obtainedfromthefitsofthreedatasets:allstars(topleftpanel),populationI(bottomleft),andpopulationII(bottom right)fortheobservationsalongthemajoraxisofthesimulatedgalaxy.Theresultsforthemodelingoftwopopulations(topright)wereobtained -asanalgebraicsumofvaluesforpopulationsIandII.Toavoidlargenumbersinthefigure,Υ 0wasdividedbythemeanmassofastellarparticle. +asanalgebraicsumofvaluesforpopulationsIandII.Toavoidlargenumbersinthefigure,Υ +0 +wasdividedbythemeanmassofastellarparticle. where r is the distance from the center of the galaxy, r 0 is a -constant,while Υ 0, a,andc are the parametersof a model.We +constant,while Υ +0 +, a,andc are the parametersof a model.We haveassumedlogr 0 =0.33whichcorrespondstothreesoftening @@ -721,7 +835,9 @@ were obtainedwith a simple transformationof velocities given by Eq.12, 13, and 15 in Kowalczyketal. (2018). In or- dertosmoothoutthenumericalartifacts,thethree-dimensional χ2 spaces were then interpolated with 12-order polynomials -(∼ a4c4Υ4 0)thatwerefurtherusedto determinetheglobalmin- +(∼ a4c4Υ4 +0 +)thatwerefurtherusedto determinetheglobalmin- imums(identifiedas the best-fitting models)and 1, 2, 3σ con- fidence levels which for three parameterscorrespondto ∆χ2 = 3.53, 8.02, 14.2(Pressetal.1992). @@ -749,9 +865,11 @@ K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling 1010 10 100 ALL -Υ(r) -[M⊙ -/L⊙ ] +Υ(r) [ +M +⊙ / +L +⊙ ] r [kpc] major 10 100 @@ -773,8 +891,10 @@ data ALL νtot (r) -[M⊙ -kpc +[ +M ⊙ +k +pc -3] r [kpc] 10 100 @@ -789,8 +909,9 @@ r [kpc] 10 100 ALL Mtot -(r) -[M⊙ ] +(r) [ +M +⊙ ] r [kpc] 10 100 ALL @@ -800,7 +921,8 @@ ALL r [kpc] -2 -1 -01 +0 +1 0 10 20 30 40 ALL β(r) @@ -818,9 +940,11 @@ r [kpc] 1010 10 100 POPULATIONS -Υ(r) -[M⊙ -/L⊙ ] +Υ(r) [ +M +⊙ / +L +⊙ ] r [kpc] major 10 100 @@ -842,8 +966,10 @@ data POPULATIONS νtot (r) -[M⊙ -kpc +[ +M ⊙ +k +pc -3] r [kpc] 10 100 @@ -858,8 +984,9 @@ r [kpc] 10 100 POPULATIONS Mtot -(r) -[M⊙ ] +(r) [ +M +⊙ ] r [kpc] 10 100 POPULATIONS @@ -869,7 +996,8 @@ POPULATIONS r [kpc] -2 -1 -01 +0 +1 0 10 20 30 40 POPULATIONS β(r) @@ -898,7 +1026,9 @@ elswhereasthecoloredareasofdecreasingintensitycorrespond to1,2,and3σconfidenceregionsobtainedasextremevaluesal- lowedbythemodelswithχ2withinagivenregion.Ineachpanel thetruevaluesfromthesimulationarepresentedwithblacklines -whilethinverticallinesmarkthevaluesofr 0andtheouterrange +whilethinverticallinesmarkthevaluesofr +0 +andtheouterrange ofthedatasetsbeyondwhichthereliabilityofresultsdropssig- nificantly. The true mass-to-light ratio profile was obtained by dividingthetotalmassbythefittedKingprofiles,thereforethe @@ -922,7 +1052,9 @@ sightmeasurements. The main strength of the two populations method comes from tracingtheunderlyinggravitationalpotentialatdifferentscales. AscanbeseeninthebottompanelsofFig.7,populationI,which -is more concentrated, is also more sensitive to Υ 0, but gives +is more concentrated, is also more sensitive to Υ +0 +, but gives weaker constraints on a or c. On the other hand, population II attemptsto reproducethe totalmass contentatlargerdistances aswell,thereforeshowingstrongercouplingbetweentheparam- @@ -949,14 +1081,22 @@ ingfromthemajortotheminoraxis. Articlenumber,page7of12 A&Aproofs:manuscriptno.Populations4 -1 -01 +0 +1 0 10 20 30 40 -POP +P +O +P I + -POP -II -β(r) +P +O +P +I +I +β +( +r ) r [kpc] major 0 10 20 30 40 @@ -967,11 +1107,15 @@ r [kpc] minor 50 60 70 80 -1 -01 +0 +1 0 10 20 30 40 -POP +P +O +P I -β(r) +β ( +r ) r [kpc] 0 10 20 30 40 r [kpc] @@ -979,11 +1123,15 @@ r [kpc] r [kpc] 50 60 70 80 -1 -01 +0 +1 0 10 20 30 40 -POP -II -β(r) +P +O +P I +I +β ( +r ) r [kpc] 0 10 20 30 40 r [kpc] @@ -1048,21 +1196,28 @@ Articlenumber,page8of12 K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling Table2.PropertiesofthedatasamplesfortheFornaxdSph. Property ALL POPI POPII -Numberofstars(N phot) 65797 14882 49205 -Numberofstars(N spec) 3286 1136 1151 +Numberofstars(N phot ) 65797 14882 49205 Numberofstars(N spec ) 3286 1136 1151 Starswithin1.8kpc 3268 1134 1130 -Fittednormalization(N 0)[×104] 6.95 1.81 5.45 -Sérsicradius(R S)[kpc] 0.454 0.429 0.420 +Fittednormalization(N 0 )[×104] 6.95 1.81 5.45 +Sérsicradius(R +S +)[kpc] 0.454 0.429 0.420 Sérsicparameter(m) 0.808 0.807 0.898 -102 -103 -104 -105 +10 +2 +10 +3 +10 4 +10 +5 0.2 0.5 2 0.1 1 -n⋆ -(R) -[kpc --2] +n +⋆ ( +R ) +[ +k +p +c - 2 ] R [kpc] all stars popI @@ -1081,10 +1236,12 @@ tionintroducedinprevioussections.Thinverticallinesindicate the innermost data point for the light profile for all stars and the outerboundaryof the kinematic sample. The former,set at logr = −0.16,isalsousedastheminimumofthemass-to-light -ratio profile (r +ratio profile (r 0 in Eq.5). The fitted parameters of the profiles, +thatisthenormalizationN 0 -in Eq.5). The fitted parameters of the profiles, -thatisthenormalizationN 0,theSérsicradiusR S,andtheSérsic +,theSérsicradiusR +S +,andtheSérsic parameterm,areincludedinthesecondpartofTable2. Figure12presentstheprofilesoftheobservablesusedinthe Schwarzschildmodeling:thefractionofstarsandthe 2nd,3rd, @@ -1110,12 +1267,11 @@ arepresentedintheconsecutiverowsofFig.14.Theanisotropy profileforthepopulationsisbasedonthefitofallstarsbutusing 0 0.05 -0.1 -0.15 +0.1 0.15 0.2 0.25 0 0.4 0.8 1.2 1.6 -M(R) +M ( R ) R [kpc] all stars pop I @@ -1127,34 +1283,57 @@ pop II 160 200 0 0.4 0.8 1.2 1.6 -m2 -(R)[(km -s --1) -2] +m +2 ( +R ) [ +( +k +m +s - 1 ) +2 +] R [kpc] -16 -8 -08 +0 +8 16 0 0.4 0.8 1.2 1.6 -m3 -(R)[10 -2(km -s --1) -3] +m +3 +( +R +) +[ +1 +0 +2 +( +k +m +s - +1 +) +3 +] R [kpc] -048 +0 +4 +8 12 16 0 0.4 0.8 1.2 1.6 -m4 -(R)[10 -4(km -s --1) -4] +m +4 ( +R ) [ +1 +0 4 ( +k +m +s - +1 ) +4 +] R [kpc] Fig. 12. Observables of the Fornax dSph used in our Schwarzschild modelingscheme.Inrows:thefractionofthetotalnumberofstars,the @@ -1210,15 +1389,29 @@ c χ 2- χ -2min +2 +mi +n Fig.13.Valuesofχ2relativetothefittedminimumwithintherangeof3σconfidencelevelforallstars(leftpanel)andforthepopulations(right panel)fortheFornaxdSph. (Kowalczyketal.2019),weobtainedhigherestimatesoftheen- closed total mass at larger radii. In particular, for the mass en- -closedwithin 1.8kpcwe get M all(< 1.8kpc) = 3.87+1.48 −1.56×108 -M⊙ fromthefitforallstarsand M pops(< 1.8 kpc)= 4.71+0.87 −1.13× +closedwithin 1.8kpcwe get M +all +(< 1.8kpc) = 3.87+1.48 +−1.56 +×108 +M⊙ fromthefitforallstarsand M +pops +(< 1.8 kpc)= 4.71+0.87 +−1.13 +× 108 M⊙ from the fit of populations, while previously we had -M old(<1.8 kpc)=3.7+1.4 −1.3×108M⊙. +M +old +(<1.8 kpc)=3.7+1.4 +−1.3 +×108M⊙. Interestingly, despite the significant shift of the position of χ2 min @@ -1296,27 +1489,29 @@ K.Kowalczyk&E.L.Łokas:MultiplestellarpopulationsinSchwarzschildmodeling 103 105 0.1 1 -Υ(r) -[M⊙ -/L⊙ ] +Υ(r) [ M +⊙ / L ⊙ ] r [kpc] ALL 0.1 1 r [kpc] POPULATIONS 3σ -2σ 1σ -best model +2σ 1σ best model K19 104 106 108 0.1 1 -νtot +νt +ot (r) -[M⊙ -kpc --3] +[ +M +⊙ +k +p c - +3] r [kpc] 0.1 1 r [kpc] @@ -1324,15 +1519,18 @@ r [kpc] 107 109 0.1 1 -Mtot (r) -[M⊙ ] +Mt ot (r) +[ +M +⊙ ] r [kpc] 0.1 1 r [kpc] -3 -2 -1 -01 +0 +1 0 0.4 0.8 1.2 1.6 β(r) r [kpc] @@ -1368,30 +1566,39 @@ nonspherical object. In both cases, a simpler approach (much smaller data samples or using one stellar population) resulted -2 -1 -01 +0 +1 0 0.4 0.8 1.2 1.6 -POP +P +O +P I ++ P O P I -+ -POP -II -β(r) +I +β +( r ) r [kpc] -2 -1 -01 +0 +1 0 0.4 0.8 1.2 1.6 -POP +P +O +P I -β(r) +β ( +r ) r [kpc] -2 -1 -01 +0 +1 0 0.4 0.8 1.2 1.6 -POP -II -β(r) +P +O P I +I +β ( r ) r [kpc] best model 1σ diff --git a/read/results/pdfplumber/2201.00178.txt b/read/results/pdfplumber/2201.00178.txt index 11d5958..8340a8f 100644 --- a/read/results/pdfplumber/2201.00178.txt +++ b/read/results/pdfplumber/2201.00178.txt @@ -41,11 +41,44 @@ et al. 2021). Local mode-coupling analysis in the Cartesian approximation, formu validated by Hanson et al. (2021) (hereafter H21) by examining the power-spectrum of supergranular waves and comparing with previous time-distance studies (Langfellner et al. 2018). prasad.subramanian@tifr.res.in -arXiv:2201.00178v1 -[astro-ph.SR] +a +r +X +i +v +: +2 +2 +0 +1 +. +0 +0 +1 +7 +8 +v +1 +[ +a +s +t +r +o +- +p +h . +S +R +] 1 -Jan -2022 +J +a +n +2 +0 +2 +2 2 Mani et al. Normal-modecouplingreferstotheconceptofexpressingsolar-oscillationeigenfunctionsasalinearweightedcombi- nationofmodel-eigenfunctions(e.g.,ModelSChristensen-Dalsgaard2021). Themodeleigenfunctionsformacomplete @@ -60,7 +93,11 @@ In this study, we extend the spectral analysis of H21 and develop the method to at supergranulation length scales. A part of the formalism that was used to derive the forward model in H21 is reworked,primarilytoimagesteadyflows. Measurementsarethenconstructed,andinversionstoinferdivergenceflow and radial vorticity are described. We also demonstrate signal associated with supergranular flow in a radial-order -coupling (p 2-p 2), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface. +coupling (p +2 +-p +2 +), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface. We compare our results with flows obtained using the Local Correlation Tracking method on solar granules. 1.1. Forward problem In favor of algebraic brevity, we only show crucial steps here and refer the interested reader to Appendix A for a @@ -77,12 +114,26 @@ that are small when compared to the solar radius. When imaging steady, near-surf of the supergranular scale (∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the horizontal wavenumber qR (cid:12) -≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(q x,q y)| is the vector horizontal +≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(q +x +,q +y +)| is the vector horizontal wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow perturbationdescribedinahorizontalFourierdomain. Supergranularvelocitiesaresubsonic(300-400m/s,seeRincon -&Rieutord2018), permittingustomodeltheflowvectoruuu=(u x,u y,u z)intheCartesiandomainlikeso(Unnoetal. +&Rieutord2018), permittingustomodeltheflowvectoruuu=(u +x +,u +y +,u +z +)intheCartesiandomainlikeso(Unnoetal. 1989; Woodard 2006) -uσ =∇×[∇×(P e z)]+∇×(T e z), (1) +uσ =∇×[∇×(P e +z +)]+∇×(T e +z +), (1) where P = Pσ(x) and T = Tσ(x) are poloidal and toroidal scalar functions, varying with position x and temporal frequency σ. ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for @@ -90,26 +141,54 @@ example), here we only consider the frequency bin σ = 0, denoting the temporall of analysis. We therefore suppress σ from all terms this point forward, remembering that temporal dynamics of perturbations may also be studied using the same model outlined in the following paragraphs. Simplifying eq 1 using vector calculus results in -u=−∇2Pe z+∇(∂ zP)+∇ hT×e z, (2) +u=−∇2Pe +z ++∇(∂ +z +P)+∇ +h +T×e +z +, (2) where ∇ h refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a -functionofhorizontalwavenumberq anddepthze z. HencethepoloidalandtoroidalflowsaredescribedbyP q(z)and -T q(z), respectively. Furthermore, we parametrize the flow along e +functionofhorizontalwavenumberq anddepthze +z +. HencethepoloidalandtoroidalflowsaredescribedbyP +q +(z)and +T +q +(z), respectively. Furthermore, we parametrize the flow along e z using basis functions f(z) (Chebyshev, B-spline, etc). This is expressed as P ≡P -q(z)=(cid:88) +q +(z)= +(cid:88) +j +f +j +(z)P +qj +, T ≡T +q +(z)= +(cid:88) j -f j(z)P qj, T ≡T -q(z)=(cid:88) +f j -f j(z)T qj. (3) +(z)T +qj +. (3) The flow coefficients P qj -and T qj, represented by the discrete indices q and j, become ideal candidates for inversions, +and T +qj +, represented by the discrete indices q and j, become ideal candidates for inversions, where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be exploited to expedite inversions. Note that P qj @@ -125,7 +204,13 @@ To infer flows from wavefields φ scattered by a perturbation of length scale q, Imaging near-surface flows using mode-coupling analysis 3 φω∗ k -φω k+q, wherek istheoscillationmodewavenumber(k x,k y)andω isthetemporalfrequency. Relateφω∗ +φω +k+q +, wherek istheoscillationmodewavenumber(k +x +,k +y +)andω isthetemporalfrequency. Relateφω∗ k φω k+q @@ -137,13 +222,21 @@ qj (see eq A7) (cid:104)φω∗ k -φω k+q(cid:105)=Hω +φω +k+q +(cid:105)=Hω kk(cid:48)nn(cid:48) (cid:88) j -C qj,kP +C +qj,k +P qj -+D qj,kT qj. (4) ++D +qj,k +T +qj +. (4) TheweightfactorHω (seeeqA8)isafunctionoffrequency,capturinginformationabouttheextentofcouplingbetween thetwomodes[n,k]and[n(cid:48),k(cid:48)],wherenandn(cid:48) aretheradialordersofthemodes,andk =|k|andk(cid:48) =|k(cid:48)|=|k+q|. Thespectralprofileofthemode(seeeqA9)isapproximatedusingaLorentzian(Andersonetal.1990). Themorethe @@ -162,7 +255,9 @@ D qj,k =D −qj,−k -(see eq A6). The kernels, as flows, are expressed on the basis f j(z). +(see eq A6). The kernels, as flows, are expressed on the basis f +j +(z). 1.2. Least-squares of cross-correlation Even though φω∗ k @@ -175,19 +270,25 @@ k φω k+q (seeWoodard2006,2014,2016)results -in the B-coefficients B k,q, according to +in the B-coefficients B +k,q +, according to B k,q = (cid:80) ω -Hω∗ kk(cid:48)nn(cid:48)φω∗ +Hω∗ +kk(cid:48)nn(cid:48) +φω∗ k φω k+q (cid:80) ω -|Hω kk(cid:48)nn(cid:48)|2 +|Hω +kk(cid:48)nn(cid:48) +|2 . (5) Multiplying eq 4 on both sides by Hω∗ kk(cid:48)nn(cid:48) @@ -195,24 +296,47 @@ and substituting by eq 5 on the left-hand-side results in a concisely defined forward problem (compare with eq 4) B k,q -=(cid:88) += +(cid:88) j -C qj,kP +C +qj,k +P qj -+D qj,kT qj. (6) ++D +qj,k +T +qj +. (6) In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over ω. -Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ω nk, -|ω|∈(cid:16) -ω nk−(cid:15)Γ nk/2,ω nk+(cid:15)Γ -nk/2(cid:17) +Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ω +nk +, +|ω|∈ +(cid:16) +ω +nk +−(cid:15)Γ +nk +/2,ω +nk ++(cid:15)Γ +nk +/2 +(cid:17) or -|ω|∈(cid:16) +|ω|∈ +(cid:16) ω n(cid:48)k(cid:48) -−(cid:15)Γ n(cid:48)k(cid:48)/2,ω +−(cid:15)Γ +n(cid:48)k(cid:48) +/2,ω n(cid:48)k(cid:48) +(cid:15)Γ -n(cid:48)k(cid:48)/2(cid:17) +n(cid:48)k(cid:48) +/2 +(cid:17) . (7) Summing over ±ω guarantees that the parity B k,q @@ -224,11 +348,18 @@ Taking the complex conjugate on both sides of eq 6 and considering the negative −k, B∗ −k,−q -=(cid:88) += +(cid:88) j -C −qj,−kP∗ +C +−qj,−k +P∗ −qj -+D −qj,−kT∗ −qj. (8) ++D +−qj,−k +T∗ +−qj +. (8) Substituting parity and symmetry relations for all terms in the above results in eq 6. As B k,q is constructed by a @@ -261,7 +392,9 @@ in our case. We use the same noise model as in H21, which was motivated by the above discussion, G k,q -≡(cid:104)|B k,q|2(cid:105), (9) +≡(cid:104)|B +k,q +|2(cid:105), (9) where, unlike H21, we again sum over ±ω. G k,q is real, with the symmetry relation G @@ -281,7 +414,9 @@ wavefieldφω k (andsubsequently,thecross-correlationφω∗ k -φω k+q)isobtainedbycomputingthe3Dspatialandtemporal +φω +k+q +)isobtainedbycomputingthe3Dspatialandtemporal Fourier transform of the Dopplercube. The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in Eq 6, while short enough that supergranules do not substantially evolve (lifetime is purported to be 1.6 days; Rincon @@ -291,9 +426,17 @@ Maximum signal can be extracted from the weighted summation of the cross correla profiles of the two modes [n,k] and [n(cid:48),k(cid:48)] closely align in ω space. This implies that their mode frequencies should be sufficiently close (|ω nk -−ω n(cid:48)k(cid:48)| ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over +−ω +n(cid:48)k(cid:48) +| ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over ±ω is significant only over a few linewidths ((cid:15), the summation parameter; see eq 7). We have empirically found and -tabulated δ in Table 1 for the radial order couplings n-n(cid:48) ∈ f-f, p 1-p 1, and p 2-p +tabulated δ in Table 1 for the radial order couplings n-n(cid:48) ∈ f-f, p +1 +-p +1 +, and p +2 +-p 2 (the signal strength depends only weakly on (cid:15); we set it to 3 line widths). @@ -302,8 +445,12 @@ nk and ω n(cid:48)k become spaced -farther apart with increasing wavenumber kR (cid:12). It is also known that mode linewidth Γ grows with radial orders for -a given kR (cid:12). Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of +farther apart with increasing wavenumber kR +(cid:12) +. It is also known that mode linewidth Γ grows with radial orders for +a given kR +(cid:12) +. Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of observation set the total number of modes within a range of kR (cid:12) (and ω/2π) that can be clearly observed, thereby @@ -325,16 +472,22 @@ modes f-f [400,1000] 5240 4 [1000,1500] 7784 1.1 [1500,2000] 10940 0.4 -p 1-p +p +1 +-p 1 [400,1000] 5240 4.5 [1000,1750] 12852 2 -p 2-p +p +2 +-p 2 [200,1000] 5886 3 [1000,1300] 4280 3 Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different -ranges of kR (cid:12). +ranges of kR +(cid:12) +. 3. INVERSION The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements B @@ -343,15 +496,25 @@ from the linear relation in eq 6. We describe inversion using regularized-least- leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods complement each other (see Sekii 1997), where RLS tries to minimize the misfit between data and model, whereas SOLA gives better localization. For total number of modes M, RLS scales as MxJ where J is the number of basis -functions f j(z) (J (cid:28) M; see eq 3 and section 3.1), whereas SOLA scales as M2 (see Appendix B). For M > 5000, +functions f +j +(z) (J (cid:28) M; see eq 3 and section 3.1), whereas SOLA scales as M2 (see Appendix B). For M > 5000, computation starts to quickly become expensive for SOLA. Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is -presenteveninp 1-p 1, andp 2-p +presenteveninp +1 +-p +1 +, andp +2 +-p 2 (seeFigure3), andpossiblyotherhigherorderself-andcross-couplings. Sinceweare interested in only surface flows, we leave higher order coupling to future work. -It bears mentioning that the slopes of the ridges in the kR (cid:12)-ν spectrum (Figure 1) increase with radial order. This +It bears mentioning that the slopes of the ridges in the kR +(cid:12) +-ν spectrum (Figure 1) increase with radial order. This limitsustolow-to-intermediatekR (cid:12) (<1000)forthesehigherradialordersifwearetoremainundertheacousticcut- @@ -368,20 +531,36 @@ KU=B, (10) with the aim to minimize the misfit (cid:80) k -||KU−B|| 2, with |||| +||KU−B|| +2 +, with |||| 2 denoting the L 2 norm. Here, K is the matrix formed -by the sensitivity kernels: {C qj,k,D qj,k}. U is a vector composed of the flow coefficients: {P qj,T qj} and B is a vector -composed of computed B-coefficients: {B k,q}. The least-squares problem is solved simultaneously for poloidal and -toroidal flow. We use B-spline basis functions as our f j(z), comprising 11 knots spaced uniformly in acoustic radius, +by the sensitivity kernels: {C +qj,k +,D +qj,k +}. U is a vector composed of the flow coefficients: {P +qj +,T +qj +} and B is a vector +composed of computed B-coefficients: {B +k,q +}. The least-squares problem is solved simultaneously for poloidal and +toroidal flow. We use B-spline basis functions as our f +j +(z), comprising 11 knots spaced uniformly in acoustic radius, for both poloidal and toroidal coefficients. Hence, for M modes (total number of k for a given q is M) and 11 basis functionsforeachpoloidalandtoroidal,thedimensionsofK,UandBarethusM×22,22×1,andM×1respectively. -Normalizingbothsidesofeq10bythenoisecovarianceΛ(adiagonalmatrixwiththeentriesG k,q;seeeq9;dimension +Normalizingbothsidesofeq10bythenoisecovarianceΛ(adiagonalmatrixwiththeentriesG +k,q +;seeeq9;dimension M ×M) and pre-multiplying by K(cid:124), -(K(cid:124) Λ−1K)U=(K(cid:124) Λ−1)B, (11) -U=(K(cid:124) Λ−1K)−1K(cid:124) Λ−1B. (12) +(K (cid:124) Λ−1K)U=(K (cid:124) Λ−1)B, (11) +U=(K (cid:124) Λ−1K)−1K (cid:124) Λ−1B. (12) 6 Mani et al. Figure 2. Left: Averagingkernelforpoloidalflow(seesectionB.2,eqB17,andleftpanelofFigure8)forqR (cid:12) @@ -395,14 +574,16 @@ diamond. Since the least-squares problem is typically ill-posed, we restate the minimization as (cid:80) k -||KU−B|| 2+λ||U|| +||KU−B|| +2 ++λ||U|| 2 with the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this regularization makes the problem better conditioned and is now defined as -U=(K(cid:124) Λ−1K+λI)−1K(cid:124) Λ−1B, (13) +U=(K (cid:124) Λ−1K+λI)−1K (cid:124) Λ−1B, (13) where I is the identity matrix for L 1 regularization. The knee-point of the L-curve (Hansen 1992), a curve formed @@ -426,22 +607,42 @@ LCT is an effective method (see Rieutord et al. 2001) to produce surface horizon Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2 (tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are ob- tained and Postel projected. The horizontal flows are deduced by tracking the proper motions of granules between -consecutive intensity images, which we denote as I 1,I 2. The LCT method selects a patch in two images each +consecutive intensity images, which we denote as I +1 +,I +2 +. The LCT method selects a patch in two images each (I 1 -= I 1e(x−xij)2/2sigma2,I += I +1 +e(x−xij)2/2sigma2,I 2 -= I 2e(x−xij)2/2sigma2) that observe the same granule at the grid point x += I +2 +e(x−xij)2/2sigma2) that observe the same granule at the grid point x ij -= (x i,y j). += (x +i +,y +j +). A Gaussian of width sigma allows to isolate a small region surrounding the grid point of interest as the distance moved by granules are usually in sub-pixel regime. The convention for the direction of x is the same as described in -section 1.1. The two patches I 1,I +section 1.1. The two patches I +1 +,I 2 are then cross correlated for different values of position shifts ∆x, C -ij(∆x,∆y)=(cid:90) -dxI∗ 1(−x)I 2(∆x−x). (14) +ij +(∆x,∆y)= +(cid:90) +dxI∗ +1 +(−x)I +2 +(∆x−x). (14) The shift ∆x = (∆x,∆y) that maximizes the cross-correlation C ij is taken to be the proper motion of the granule. @@ -451,20 +652,36 @@ x = ∆x/∆t and v y = ∆y/∆t. This exercise is repeated for all grid points in the -images I 1,I +images I +1 +,I 2 and for each consecutive pair of images in the cube. In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing v x -and v y. FLCT +and v +y +. FLCT requires the input sigma, which we set to 4 pix, that captures the extent of localization desired, and depends on the Imaging near-surface flows using mode-coupling analysis 7 -Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p 1-p 1, and p 2-p +Figure 3. Top: Inverted poloidal flow power-spectrum for the three couplings f-f, p +1 +-p +1 +, and p 2 -as a function of q xR +-p +2 +as a function of q +x +R (cid:12) and -q yR (cid:12). Bottom: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the +q +y +R +(cid:12) +. Bottom: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the mean. Total power appears to increase through the radial orders. Power is in units of m2/s4. dominant length scale of the velocity field in the images. The Postel-projected intensity images are fed as input to the FLCT code. v @@ -475,14 +692,50 @@ are then computed for consecutive pairs of images and are averaged over the enti 5. MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY For mode-coupling, horizontal divergence (hereafter div) and radial vorticity (hereafter curl) are computed by substituting P and T from eq 3 into eq 2 as below - -uuu(q,z)=−∇2Pe z+∇(∂ zP)+∇ hT×e z, -=−(0, 0, ∂2 xP +∂2 yP +∂2 zP)+(∂ x∂ zP, ∂ y∂ zP, ∂2 zP)+(∂ yT, −∂ xT, 0). (15) -Setting ∂2 x+∂2 +uuu(q,z)=−∇2Pe +z ++∇(∂ +z +P)+∇ +h +T×e +z +, +=−(0, 0, ∂2 +x +P +∂2 +y +P +∂2 +z +P)+(∂ +x +∂ +z +P, ∂ +y +∂ +z +P, ∂2 +z +P)+(∂ +y +T, −∂ +x +T, 0). (15) +Setting ∂2 +x ++∂2 y =q2, div is given by, -∇ h·uuu(q,z)=q2∂ zP, (16) +∇ +h +·uuu(q,z)=q2∂ +z +P, (16) and curl is given by, -(cid:104) ∇×uuu(q,z)(cid:105) +(cid:104) +∇×uuu(q,z) +(cid:105) z =q2T. (17) We follow similar steps to those taken in Langfellner et al. (2015) for comparison of flow maps with LCT. The @@ -512,22 +765,40 @@ smoothing depends on the length scale qR (cid:12) to be compared with mode-coupling. div and curl are then simply computed by -div =∂ xv x+∂ yv y, (18) -curl=∂ xv y−∂ yv x. (19) +div =∂ +x +v +x ++∂ +y +v +y +, (18) +curl=∂ +x +v +y +−∂ +y +v +x +. (19) We then perform a 2D Fourier transform on eqns 18 and 19, apply the same Gaussian filters as for mode-coupling, and transform back to real space. Condensing all of the above, the following sequence of operations to compare flows at desired length scales are performed for mode-coupling (M-C) and for LCT - -M-C: φ(x,y;t) 3DFFT =====⇒φω k,B +M-C: φ(x,y;t) 3DFFT =====⇒φω +k +,B k,q inversion ======⇒P,T ∇h· ===⇒ ∇× eqns 16, 17 Filter, =====⇒ 2DFFT div,curl -LCT: I 1,I 2 +LCT: I 1 ,I 2 FLCT -====⇒v x,v y +====⇒v x ,v y smooth, ======⇒ ∇h· ∇× @@ -550,11 +821,17 @@ couplings are used. Since vortical flows are imaged at a region near the equator vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence flows (this is consistent with the results of Hathaway et al. 2015; Langfellner et al. 2015; Rincon et al. 2017). Due to -insufficientmodesforthep 2-p +insufficientmodesforthep +2 +-p 2 case(seeTable1),weareunabletoinfervorticalflowswithconvictionotherthannear the supergranular scale, as can be seen from Table 2. Figure 6 also aligns with what we believe can be accomplished -throughmode-couplinghelioseismology-usingf-forp 1-p 1alonetoseismicallyinfernear-surfacedivergenceandvortical +throughmode-couplinghelioseismology-usingf-forp +1 +-p +1 +alonetoseismicallyinfernear-surfacedivergenceandvortical flows at different scales (qR (cid:12) = 100,150) can yield extremely good agreement with LCT. As the length scale of the @@ -574,7 +851,9 @@ plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps ar values. For both LCT and mode-coupling divergence and vorticity maps, numerous factors, arising from the associated numerous data processing steps, can influence the final inference of flow amplitudes, making it difficult to put forward -a precise statement on them. H21 reported a 60% greater amplitude for p 1-p +a precise statement on them. H21 reported a 60% greater amplitude for p +1 +-p 1 over f-f coupling (Figure 3 reflects a similar conclusion), another element to consider when combining different radial orders. The choice of regularization @@ -588,7 +867,9 @@ Thus, theamplitudesofthemode-couplingflows(andthecorrelationcoefficient)dependup • Smoothing applied to LCT flows (indirectly; see below paragraph), • The depth at which flows are inferred. Here, we report in Table 2 only the maximum correlation found from among the points in the radial grid close -to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR (cid:12), we first fix the coupling(s) +to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR +(cid:12) +, we first fix the coupling(s) and the regularization parameter to be used in the inversion. We then separately compute filtered divergence and 10 Mani et al. (a)qR(cid:12)=100,f-f (b)qR(cid:12)=150,p1-p1 @@ -596,7 +877,9 @@ Figure6. Real-spacedivergenceflows(leftcolumn,inunitsof10−5s−1)andradialvort for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around qR (cid:12) -=100, and using (b) p 1-p +=100, and using (b) p +1 +-p 1 coupling (bottom row), bandpass filtered around qR (cid:12) @@ -613,7 +896,9 @@ for LCT (top row), and mode-coupling inversion through RLS using all the couplin vorticity maps for LCT for different values of smoothing. These flow maps are then compared with those obtained from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation (corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired -qR (cid:12). +qR +(cid:12) +. Ithasbeenshown(seeDeRosa&Toomre2004;Langfellneretal.2015)thatline-of-sightvelocityfromDopplergrams and LCT agree closely in amplitudes. But, to recapitulate, a host of factors described above can skew the amplitudes fordivergenceflowsowingtothemulti-stepprocessinvolvedinobtainingthem. Forexample, therehasbeenahistory @@ -624,10 +909,14 @@ Coupling qR (cid:12) div curl f-f 100 0.97 0.87 -+ p 1-p ++ p +1 +-p 1 150 0.95 0.76 -+ p 2-p ++ p +2 +-p 2 200 0.92 0.76 250 0.85 0.65 @@ -635,13 +924,17 @@ f-f 100 0.96 0.85 150 0.93 0.76 200 0.89 0.69 250 0.77 0.58 -p 1-p +p +1 +-p 1 100 0.95 0.83 150 0.95 0.75 200 0.92 0.75 250 0.85 0.61 -p 2-p +p +2 +-p 2 100 0.94 0.7 150 0.91 0.39 @@ -662,11 +955,29 @@ applications to investigate other depth- and time-varying features such as giant Hanson et al. 2020), emerging active regions, meridional flows and Rossby waves. APPENDIX A. DERIVATION OF THE FORWARD MODEL -As described in section 1.1, we seek to describe the flow u as a function of q along e z. To that end, substituting +As described in section 1.1, we seek to describe the flow u as a function of q along e +z +. To that end, substituting eq 3 into eq 2, -uσ q(z)=(cid:88) +uσ +q +(z)= (cid:88) j -(cid:8) q2f je z+iqf(cid:48) j(cid:9) Pσ jq+iq×e zf jTσ jq. (A1) +(cid:8) q2f +j +e +z ++iqf(cid:48) +j +(cid:9) Pσ +jq ++iq×e +z +f +j +Tσ +jq +. (A1) For flows in the anelastic limit (u (cid:28) speed of sound), we can denote the flow perturbation operator as δLσ = −2iωρuσ·∇ (see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get, δLσ @@ -675,35 +986,99 @@ q q ·k+uσ q -·e z∂ z), (A2) -=−2iωρ(cid:80) +·e +z +∂ +z +), (A2) +=−2iωρ (cid:80) +j +(cid:8) −k·qf(cid:48) j -(cid:8) −k·qf(cid:48) jPσ jq−k·(q×e z)f jTσ jq+q2f jPσ jq∂ z(cid:9) . (A3) +Pσ +jq +−k·(q×e +z +)f +j +Tσ +jq ++q2f +j +Pσ +jq +∂ +z +(cid:9) . (A3) 12 Mani et al. Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006) ξ k -≡ξ nk(z)=iˆ kH nk(z)e z+ˆ zV nk(z), (A4) +≡ξ +nk +(z)=iˆ kH +nk +(z)e +z ++ˆ zV +nk +(z), (A4) where H and V are real-valued functions; n and n(cid:48) are dropped for compactness of notation. Then the coupling of two modes ξ k and ξ k(cid:48) -(k(cid:48) = k+q), by the flow perturbation operator δLσ q, denoted by coupling integral Λk k(cid:48)(σ), is +(k(cid:48) = k+q), by the flow perturbation operator δLσ +q +, denoted by coupling integral Λk +k(cid:48) +(σ), is given by -Λk k(cid:48)(σ)≡(cid:90) dx(δLσ qξ k)·ξ∗ +Λk +k(cid:48) +(σ)≡ (cid:90) dx(δLσ +q +ξ +k +)·ξ∗ k(cid:48) -=(cid:90) -dx(cid:34) -−2iωρ(cid:88) += (cid:90) dx +(cid:34) +−2iωρ (cid:88) +j +(cid:110) q2f j -(cid:110) q2f jPσ jq(ˆ k·ˆ k(cid:48) H(cid:48) kH∗ +Pσ +jq +(ˆ k·ˆ k (cid:48) H(cid:48) +k +H∗ +k(cid:48) ++V(cid:48) +k +V∗ k(cid:48) -+V(cid:48) kV∗ k(cid:48)) -−(cid:2) k·qf(cid:48) jPσ jq+k·(q×e z)f jTσ jq(cid:3) (ˆ k·ˆ k(cid:48) H kH∗ +) +− (cid:2) k·qf(cid:48) +j +Pσ +jq ++k·(q×e +z +)f +j +Tσ +jq +(cid:3) (ˆ k·ˆ k (cid:48) H +k +H∗ k(cid:48) -+V kV∗ -k(cid:48))(cid:111)(cid:35) ++V +k +V∗ +k(cid:48) +) (cid:111) +(cid:35) (A5) We desire to linearly relate the coupling integral in the above equation to the flows P and T, through poloidal and toroidal sensitivity kernels, C @@ -713,37 +1088,77 @@ qj,k respectively. Hence, they are given by C qj,k -=(cid:90) dzρ(cid:104) q2f j(ˆ k·ˆ k(cid:48) H(cid:48) kH∗ += (cid:90) dzρ (cid:104) q2f +j +(ˆ k·ˆ k (cid:48) H(cid:48) +k +H∗ k(cid:48) -+V(cid:48) kV∗ k(cid:48)) -−k·qf(cid:48) j(ˆ k·ˆ k(cid:48) H kH∗ ++V(cid:48) +k +V∗ k(cid:48) -+V kV∗ k(cid:48))(cid:105) , +) +−k·qf(cid:48) +j +(ˆ k·ˆ k (cid:48) H +k +H∗ +k(cid:48) ++V +k +V∗ +k(cid:48) +) (cid:105) , D qj,k -=k·(q×e z) +=k·(q×e +z +) (cid:90) -dzρf j(ˆ k·ˆ k(cid:48) H kH∗ +dzρf +j +(ˆ k·ˆ k (cid:48) H +k +H∗ k(cid:48) -+V kV∗ k(cid:48)). (A6) ++V +k +V∗ +k(cid:48) +). (A6) Note the symmetry C qj,k = C −qj,−k and D qj,k -= D −qj,−k. This coupling integral contributes to the cross-spectral += D +−qj,−k +. This coupling integral contributes to the cross-spectral measurement between modes k and k+q From eq 8 of Woodard (2014), we write the first-order effect of flow on wavefield cross-correlation as (cid:104)φω∗ k -φω+σ k+q(cid:105)=Hω kk(cid:48)σΛk k(cid:48)(σ), (A7) +φω+σ +k+q +(cid:105)=Hω +kk(cid:48)σ +Λk +k(cid:48) +(σ), (A7) where the function H is given by Hω kk(cid:48)σ -=−2iω(N k|Rω k|2Rω+σ +=−2iω(N +k +|Rω +k +|2Rω+σ +k(cid:48) ++N k(cid:48) -+N k(cid:48)|Rω+σ +|Rω+σ k(cid:48) |2Rω∗ k @@ -754,8 +1169,12 @@ Rω k = 1 -ω2 nk−ω2−iωγ -nk/2, (A9) +ω2 +nk +−ω2−iωγ +nk +/2 +, (A9) whereω nk istheresonantfrequencyofthemode,andγ @@ -776,12 +1195,13 @@ N k = 1 Q -Q -(cid:88) +Q (cid:88) k (cid:80) ω -|φω k|2 +|φω +k +|2 (cid:80) ω Rω @@ -792,13 +1212,21 @@ Q Q (cid:80) k -on the right-hand-side implies average over all [k x,k y] (Q terms in all) such that k = |k| is constant. -This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ω nk. +on the right-hand-side implies average over all [k +x +,k +y +] (Q terms in all) such that k = |k| is constant. +This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ω +nk +. Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real. The three equations A8 through A10, along with the symmetry relation for kernels, and summation over ±ω, serve to establish the parity Bσ k,q -= B∗−σ −k,−q. This allows for obtaining Pσ += B∗−σ +−k,−q +. This allows for obtaining Pσ q = P∗−σ −q @@ -806,31 +1234,59 @@ q the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσ k,q -=G−σ −k,−q. +=G−σ +−k,−q +. Imaging near-surface flows using mode-coupling analysis 13 B. SOLA INVERSIONS SubtractiveOptimallyLocalizedAverages(SOLA,Pijpers&Thompson1994)aimstoobtainasetofweightfactors -for the mode q and depth z o, which we will call α k,zo. A linear weighted sum of the measurements B +for the mode q and depth z +o +, which we will call α +k,zo +. A linear weighted sum of the measurements B k,q in the fashion (cid:80) k -α k,zoB +α +k,zo +B k,q -allows for an average value of the flow P q(z) to be estimated at the depth z o. To obtain the coefficients -α k,zo, it is assumed that a set of sensitivity kernels K k,q(z) for the mode q can be summed up coherently to give an -’averaging kernel’ thatis localized atthe depth z o. Conventionally, a Gaussiancenteredat z +allows for an average value of the flow P +q +(z) to be estimated at the depth z +o +. To obtain the coefficients +α +k,zo +, it is assumed that a set of sensitivity kernels K +k,q +(z) for the mode q can be summed up coherently to give an +’averaging kernel’ thatis localized atthe depth z +o +. Conventionally, a Gaussiancenteredat z o and awidth ∆is chosen which the averaging kernel should resemble after performing inversion. B.1. Kernels in the integral form -Since the kernels in eq A6 are manifest as coefficients on a basis f j(z), we first derive kernels that can be expressed +Since the kernels in eq A6 are manifest as coefficients on a basis f +j +(z), we first derive kernels that can be expressed as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions: -P ≡ P q(z), p ≡ P qj, F ≡ f j(z), B ≡ B +P ≡ P +q +(z), p ≡ P +qj +, F ≡ f +j +(z), B ≡ B k,q C ≡ C qj,k -and K ≡ K k,q(z), we write (assume only poloidal flow for +and K ≡ K +k,q +(z), we write (assume only poloidal flow for simplicity, the same derivations hold true for toroidal flow as well) P =Fp (B11) The size of P is thus the same as the length of the radial grid z. @@ -844,43 +1300,78 @@ B =Cp where K =(FTF)−1FTC, i.e., K -k,q(z)=(cid:88) +k,q +(z)= +(cid:88) j,j(cid:48) (cid:104)(cid:90) -dzf j(z)f -j(cid:48)(z)(cid:105)−1 -f j(cid:48)(z)C +dzf +j +(z)f +j(cid:48) +(z) +(cid:105)−1 +f +j(cid:48) +(z)C qj(cid:48),k (B14) B.2. Obtaining the coefficients α Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at z o -T(z,z o)= +T(z,z +o +)= 1 √ 2π∆2 -exp(cid:16)z−z +exp +(cid:16)z−z o 2∆2 (cid:17) . (B15) This can be achieved by solving the optimization problem -minimizeX -=(cid:90) +minimizeX = +(cid:90) dz (cid:104) -T(z,z o)−Θ q(z,z -o)(cid:105)2 +T(z,z +o +)−Θ +q +(z,z +o +) +(cid:105)2 , (B16) where we introduce the averaging kernel for mode q thus -Θ q(z,z -o)=(cid:88) +Θ +q +(z,z +o +)= +(cid:88) k -α k,zoK k,q(z). (B17) +α +k,zo +K +k,q +(z). (B17) As an aside, we note that averaging kernels can similarly be constructed for RLS (see section 3.1) using eqns 13 and B14. 14 Mani et al. -Figure 8. Left: Kernel K k,q(z) (eq B14) shown vs depth z for the three radial order couplings f-f, p 1-p 1, and p 2-p 2. qR +Figure 8. Left: Kernel K +k,q +(z) (eq B14) shown vs depth z for the three radial order couplings f-f, p +1 +-p +1 +, and p +2 +-p +2 +. qR (cid:12) = [−112,−45] and kR @@ -896,9 +1387,20 @@ Setting ∂X ∂α →0 gives us the matrix problem to be solved A{α}=v, -{α}=(cid:104) A+µI(cid:105)−1 +{α}= +(cid:104) +A+µI +(cid:105)−1 v, (B18) -where the square matrix A=(cid:82) dzK k,q(z)K k(cid:48),q(z) and v =(cid:82) dzK k,q(z)T(z,z o). Here, k(cid:48) is just a dummy index for +where the square matrix A= (cid:82) dzK +k,q +(z)K +k(cid:48),q +(z) and v = (cid:82) dzK +k,q +(z)T(z,z +o +). Here, k(cid:48) is just a dummy index for denotingelementsinthematrixA,(k(cid:48) (cid:54)=k+q). InthelastlineofeqB18,weintroduceregularizationusinganIdentity matrix I, with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α @@ -908,17 +1410,35 @@ k on both sides (cid:88) k -α k,zoBσ +α +k,zo +Bσ k,q -=(cid:88) += +(cid:88) k α k,zo (cid:90) -dzK k,q(z)Pσ q(z), -=(cid:90) -dzΘ q(z,z o)Pσ q(z), -≈(cid:104)Pσ q(z o)(cid:105) (B19) +dzK +k,q +(z)Pσ +q +(z), += +(cid:90) +dzΘ +q +(z,z +o +)Pσ +q +(z), +≈(cid:104)Pσ +q +(z +o +)(cid:105) (B19) Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Di- vergence flow can then be obtained from eq 16. Results are shown in Figures 9 and 10. REFERENCES @@ -942,9 +1462,15 @@ Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073 —. 2021, Living Reviews in Solar Physics, 18, 2, doi: 10.1007/s41116-020-00028-3 Imaging near-surface flows using mode-coupling analysis 15 -Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of q xR +Figure 9. Left: Poloidal flow power-spectrum for f-f as a function of q +x +R +(cid:12) +and q +y +R (cid:12) -and q yR (cid:12). Right: Corresponding power-spectrum +. Right: Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1−σ error around the mean. Power is in units of m2/s4. Figure 10. Real-space divergence flows (in units of 10−5s−1) for mode-coupling inversion through SOLA using f-f coupling, andLCT,bandpassfilteredaroundqR diff --git a/read/results/pdfplumber/2201.00200.txt b/read/results/pdfplumber/2201.00200.txt index 1d9e0e0..33b7ed3 100644 --- a/read/results/pdfplumber/2201.00200.txt +++ b/read/results/pdfplumber/2201.00200.txt @@ -83,11 +83,44 @@ solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot To reinvigorate the debate, Buldgen et al. (2019b) recently highlightedonceagainhowthetransitionofthetemperaturegra- 1 -arXiv:2201.00200v1 -[astro-ph.SR] +a +r +X i +v +: +2 +2 +0 +1 +. +0 +0 +2 +0 +0 +v 1 -Jan -2022 +[ +a +s +t +r +o +- +p +h +. +S +R +] +1 +J +a +n +2 +0 +2 +2 Baraffeetal.:Localheatingduetoconvectiveovershootingandthesolarmodellingproblem dientjustbelowtheconvectiveenvelopecansignificantlyimpact the disagreement between solar models and helioseismic con- @@ -161,17 +194,17 @@ dynamical simulations, as mentioned above. An exploration of theimpactofthisheatingonstellarevolutionmodelsmayreveal thatheatingisanecessaryaspectofmodelsforovershooting. Fig.1. Radial profile of the temperature departure ∆T/T 0 from -theinitialprofileT 0andofthesub-adiabaticity(∇−∇ ad)closeto +theinitialprofileT 0 andofthesub-adiabaticity(∇−∇ ad )closeto theconvectiveboundarypredictedby2Dhydrodynamicalsimu- lations(B21)ofsolar-likemodels.Thelowerpanelcorresponds to the model with a realistic stellar luminosity and the upper panel to a model with luminosity enhanced by a factor of ten. The dash-dotted red lines show ∆T/T 0 (in %), the relative dif- ferencebetweenthetimeandspaceaveragesofthetemperature, -T,andtheinitialtemperature,T 0.Thesolidbluelinesshowthe -time and space averages of the sub-adiabaticity (∇−∇ ad). The +T,andtheinitialtemperature,T 0 .Thesolidbluelinesshowthe +time and space averages of the sub-adiabaticity (∇−∇ ad ). The dashedblacklinesshowtheinitialprofileofthesub-adiabaticity, -(∇−∇ ad) init.Theconvectiveboundaryisindicatedbythevertical +(∇−∇ ad ) init .Theconvectiveboundaryisindicatedbythevertical solidline(seedetailsinB21) The behaviour of the thermal profile below the convective boundary found in the simulations of B21 is illustrated in Fig. @@ -180,14 +213,18 @@ boundary found in the simulations of B21 is illustrated in Fig. cialenhancementintheluminositybyafactoroftenbecausethe featuresareintensifiedinthese‘boosted’models(upperpanel). Thefigureshowsthelocalheatingintheovershootinglayerand -itsimpactonthesub-adiabaticity(∇−∇ ad),with∇ = dlogT +itsimpactonthesub-adiabaticity(∇−∇ +ad +),with∇ = dlogT dlogP the 2 Baraffeetal.:Localheatingduetoconvectiveovershootingandthesolarmodellingproblem temperature gradient and ∇ ad -= dlogT dlogP| += dlogT +dlogP +| S the adiabatic gradient. Theinitialstratificationbelowtheconvectiveboundary(located @@ -221,25 +258,33 @@ definedby A= 1 Γ -1dlnP +1 +dlnP dlnr − dlnρ -dlnr, (1) +dlnr +, (1) with Γ 1 -= (∂lnP/∂lnρ) ad. Starting from a reference evolu- += (∂lnP/∂lnρ) +ad +. Starting from a reference evolu- tionary model, Buldgen et al. (2020) used an inversion pro- cedure to iteratively reconstruct a solar model. Successive in- versions of the Ledoux discriminant allowed them to obtain a model-independentprofileforthisquantity.Theirreconstruction method also gives solar structures that are in excellent agree- mentwithotherstructuralinversions,namelytheentropy,S,the -squareofthespeedofsound,c2 s,andthedensity,ρ.Toillustrate +squareofthespeedofsound,c2 +s +,andthedensity,ρ.Toillustrate the convergence of their reconstruction procedure, they show (rightpanelsoftheirFigs.3-6)thesuccessiveiterationsthatcon- verge to an excellent level of agreement for the four structural -inversions(A,S,c2 s,ρ)startingfromtheinitialreferencemodel +inversions(A,S,c2 +s +,ρ)startingfromtheinitialreferencemodel adoptedintheirwork.Thedifferencesfoundbetweentherecon- structedmodelandthereferencemodelareusefulastheyindi- catethemodificationsofthereferencemodelthatarerequiredto @@ -250,19 +295,33 @@ analysisinSect.3.2. The first concerns the Ledoux discriminant. The major dis- crepancy between the Sun and the reference model occurs just below the convective boundary, with a large positive bump for -thequantity(A Sun-A ref). +thequantity(A +Sun +-A +ref +). Thesecondconcernsthespeedofsound.Thesamepositive bumpatthesamelocationasfortheLedouxdiscriminant,A,is observed for the quantity (c2 s,Sun -− c2 s,ref)/c2 s,ref. The corrections +− c2 +s,ref +)/c2 +s,ref +. The corrections appliedtoAduringthereconstructionprocedurealsoreducethe discrepancyinthespeedofsoundintheradiativeregion. Thethirdconcernstheentropy.Largediscrepanciesareob- servedinboththeradiativeregionandtheconvectivezone.The -1 Lesssub-adiabaticmeansthat|∇−∇ ad|decreasescomparedtothe +1 Lesssub-adiabaticmeansthat|∇−∇ +ad +|decreasescomparedtothe initialprofile. -entropydiscrepancy(S Sun−S ref)/S +entropydiscrepancy(S +Sun +−S +ref +)/S ref hastwopositivepeaksin theradiativezone,onejustbelowtheovershootingregionanda @@ -270,7 +329,7 @@ larger peak deeper at ∼ 40% of the stellar radius. This discrep- ancyisnegativeintheconvectivezone.Thecorrectionsapplied toAhelpreducetheseentropydiscrepanciesinbothregions. The fourth concerns the density. The quantity (ρ Sun − -ρ ref)/ρ ref has a negative peak in the radiative region, at ∼ 35% +ρ ref )/ρ ref has a negative peak in the radiative region, at ∼ 35% ofthestellarradius,andispositiveintheconvectivezone. Importantly,Buldgenetal.(2020)mentionthattheirrecon- struction procedure gives similar Ledoux discriminant profiles @@ -317,31 +376,69 @@ aturegradientintheovershootinglayerthatqualitativelyrepro- ducesthebehaviourdisplayedinFig.1.Wedefineanovershoot- inglengthd ov -= α ovH P,CB,with H += α +ov +H +P,CB +,with H P,CB thepressurescaleheight -attheconvectiveboundaryandα ovafreeparameter.Wealsode- +attheconvectiveboundaryandα +ov +afreeparameter.Wealsode- finetworadiallocations,r ov -=r CB−d ovandr +=r +CB +−d +ov +andr mid -=r CB−d ov/2, -withr CBtheradiallocationoftheconvectiveboundary.Thetem- +=r +CB +−d +ov +/2, +withr CB theradiallocationoftheconvectiveboundary.Thetem- peraturegradientismodifiedasfollows.Forr mid -≤ r < r CB,we +≤ r < r +CB +,we use -∇=g(r)∇ ad+(1−g(r))∇ rad, (2) +∇=g(r)∇ +ad ++(1−g(r))∇ +rad +, (2) with -g(r)= sin{[(r−r mid)/(r CB−r mid)]a×π/2}. (3) +g(r)= sin{[(r−r +mid +)/(r +CB +−r +mid +)]a×π/2}. (3) 3 Baraffeetal.:Localheatingduetoconvectiveovershootingandthesolarmodellingproblem Forr ov -≤r 0, -sodass B r(x) = {y +sodass B +r +(x) = +{ +y ∈ Rn | @@ -157,14 +193,20 @@ chem Mittelpunkt (vgl. Definition 1.ii). 2) Jeder metrische Raum (X,d) ist auch ein topologischer Raum. 3) Für eine Menge X heißt T Diskret -= P(X) diskrete Topologie. += +P +(X) diskrete Topologie. 4) X := R,T Z -:= {U +:= +{ +U ⊆ R | -R \U endlich +R +\ +U endlich }∪{∅} heißt Zariski-Topologie Beobachtungen: @@ -173,32 +215,73 @@ U ∈ T Z -⇔ -∃f +⇔ ∃ +f ∈ -R[X], sodass R \U = V(f) = {x +R[X], sodass R +\ +U = V(f) = +{ +x ∈ R | f(x) = 0 } • -Es gibt keine disjunkten offenen Mengen in T Z. +Es gibt keine disjunkten offenen Mengen in T +Z +. 4 1.1.TOPOLOGISCHERÄUME 5) X := Rn,T Z -= {U += +{ +U ⊆ -Rn |Es gibt Polynome f 1,...,f +Rn +| +Es gibt Polynome f +1 +,...,f r ∈ -R[X 1,...,X n] sodass -Rn \U = V(f 1,...,f r) +R[X +1 +,...,X +n +] sodass +Rn +\ +U = V(f +1 +,...,f +r +) +} +6) X := +{ +0,1 +} +,T = +{∅ +, +{ +0,1 } -6) X := {0,1 },T = {∅, {0,1 }, {0 +, +{ +0 }} heißt Sierpińskiraum. -∅, {0,1 }, {1 +∅ +, +{ +0,1 +} +, +{ +1 } sind dort alle abgeschlossenen Mengen. Definition 2 @@ -224,7 +307,9 @@ Definition 3 Sei (X,T) ein topologischer Raum und M ⊆ X eine Teilmenge. -a) M◦ := {x +a) M◦ := +{ +x ∈ M | @@ -233,14 +318,17 @@ M ist Umgebung von x = (cid:91) U⊆M -U∈TU heißt Inneres oder offener +U∈T +U heißt Inneres oder offener Kern von M. b) M := (cid:92) M⊆A Aabgeschlossen A heißt abgeschlossene Hülle oder Abschluss von M. -c) ∂M := M \M◦ heißt Rand von M. +c) ∂M := M +\ +M◦ heißt Rand von M. d) M heißt dicht in X, wenn M = X ist. Beispiel 2 1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M◦ = @@ -267,7 +355,9 @@ S ist. Beispiel 3 (Basis und Subbasis) 1) Jede Basis ist auch eine Subbasis, z.B. -S = {(a,b) +S = +{ +(a,b) | a,b ∈ @@ -276,40 +366,88 @@ R,a < b ist für R mit der Standardtopologie sowohl Basis als auch Subbasis. 2) Gegeben sei X = Rn mit euklidischer Topologie T. Dann ist -B = {B r(x) +B = +{ +B +r +(x) | r ∈ -Q >0,x +Q +>0 +,x ∈ Qn } ist eine abzählbare Basis von T. -3) Sei(X,T)eintopologischerRaummitX = {0,1,2 }undT = {∅, {0 }, {0,1 }, {0,2 },X }. +3) Sei(X,T)eintopologischerRaummitX = +{ +0,1,2 +} +undT = +{∅ +, +{ +0 +} +, +{ +0,1 +} +, +{ +0,2 +} +,X +} +. Dann ist S -= {∅, {0,1 }, {0,2 += +{∅ +, +{ +0,1 +} +, +{ +0,2 }} eine Subbasis von T, da gilt: 5 1.1.TOPOLOGISCHERÄUME • S ⊆ T -• -∅, {0,1 +• ∅ +, +{ +0,1 } -und {0,2 +und +{ +0,2 } ∈ S -• -{0 +• { +0 } -= {0,1 }∩{0,2 += +{ +0,1 +}∩{ +0,2 } • -X = {0,1 }∪{0,2 +X = +{ +0,1 +}∪{ +0,2 } Allerings ist S -keine Basis von (X,T), da {0 +keine Basis von (X,T), da +{ +0 } nicht als Vereinigung von Elementen aus @@ -317,8 +455,8 @@ S erzeugt werden kann. Bemerkung 2 Sei X eine Menge und -S ⊆ -P(X). Dann gibt es genau eine Topologie T auf X, für die +S ⊆ P +(X). Dann gibt es genau eine Topologie T auf X, für die S Subbasis ist. Definition 5 @@ -327,7 +465,11 @@ Sei (X,T) ein topologischer Raum und Y X. T Y -:= {U ∩Y +:= +{ +U +∩ +Y | U ∈ @@ -336,19 +478,28 @@ T ist eine Topologie auf Y. T Y -heißt Teilraumtopologie und (Y,T Y) heißt ein Teilraum von (X,T). +heißt Teilraumtopologie und (Y,T +Y +) heißt ein Teilraum von (X,T). Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt. Definition 6 -Seien X 1,X +Seien X +1 +,X 2 topologische Räume. U ⊆ X 1 -×X +× +X +2 +sei offen, wenn es zu jedem x = (x +1 +,x 2 -sei offen, wenn es zu jedem x = (x 1,x 2) +) ∈ U Umgebungen U i @@ -357,28 +508,42 @@ i mit i = 1,2 gibt, sodass U 1 -×U +× +U 2 ⊆ U gilt. -T = {U +T = +{ +U ⊆ X 1 -×X +× +X 2 | -U offen }isteineTopologieaufX +U offen +} +isteineTopologieaufX 1 -×X 2.SieheißtProdukttopologie. -B = {U +× +X +2 +.SieheißtProdukttopologie. +B = +{ +U 1 -×U +× +U 2 | U i -offen in X i,i = 1,2 +offen in X +i +,i = 1,2 } ist eine Basis von T. U @@ -395,11 +560,18 @@ X 1 X 2 -Abbildung 1.1: Zu x = (x 1,x 2) gibt es Umgebungen U 1,U +Abbildung 1.1: Zu x = (x +1 +,x +2 +) gibt es Umgebungen U +1 +,U 2 mit U 1 -×U +× +U 2 ⊆ U @@ -410,7 +582,9 @@ Beispiel 4 (Produkttopologien) 2 = R mit euklidischer Topologie. ⇒ -Die Produkttopologie auf R ×R = R2 stimmt mit der euklidischen Topologie auf +Die Produkttopologie auf R +× +R = R2 stimmt mit der euklidischen Topologie auf R2 überein. 2) X 1 @@ -418,13 +592,16 @@ R2 überein. 2 = R mit Zariski-Topologie. T Produkttopologie auf R2: U 1 -×U +× +U 2 (Siehe Abbildung 1.2) 6 1.1.TOPOLOGISCHERÄUME U 1 -= R \N += R +\ +N U 2 = @@ -442,15 +619,21 @@ der Äquivalenzklassen, π : X → X, x (cid:55)→ -[x] ∼. +[x] +∼ +. T X := (cid:8) U ⊆ X (cid:12) (cid:12) π−1(U) ∈ T X (cid:9) -(X,T X) heißt Quotiententopologie. +(X,T +X +) heißt Quotiententopologie. Beispiel 5 X = R,a ∼ b : ⇔ -a −b +a +− +b ∈ Z R @@ -464,65 +647,99 @@ a ∼ 1, d. h. [0] = [1] Beispiel 6 -Sei X = R2 und (x 1,y 1) +Sei X = R2 und (x +1 +,y +1 +) ∼ -(x 2,y 2) +(x +2 +,y +2 +) ⇔ x 1 -−x +− +x 2 ∈ Z und y 1 -−y +− +y 2 ∈ Z. Dann ist X/ ∼ ein Torus. Beispiel 7 (Projektiver Raum) -X = Rn+1 \{0 }, x +X = Rn+1 +\{ +0 +} +, x ∼ y -⇔ -∃λ +⇔ ∃ +λ ∈ R× mit y = λx ⇔ x und y liegen auf der gleichen Ursprungsgerade -X = Pn(R) +X = +P +n(R) 7 1.2.METRISCHERÄUME Also für n = 1: -−4 −2 2 4 6 8 -−4 -−2 -24 +− +4 +− +2 2 4 6 8 +− +4 +− +2 +2 +4 1.2 Metrische Räume Definition 8 -Sei X eine Menge. Eine Abbildung d : X ×X +Sei X eine Menge. Eine Abbildung d : X +× +X → R+ 0 heißt Metrik, wenn gilt: (i) Definitheit: d(x,y) = 0 ⇔ -x = y ∀x,y +x = y +∀ +x,y ∈ X -(ii) Symmetrie: d(x,y) = d(y,x) ∀x,y +(ii) Symmetrie: d(x,y) = d(y,x) +∀ +x,y ∈ X (iii) Dreiecksungleichung: d(x,z) ≤ -d(x,y)+d(y,z) ∀x,y,z +d(x,y)+d(y,z) +∀ +x,y,z ∈ X Das Paar (X,d) heißt ein metrischer Raum. Bemerkung 3 Sei (X,d) ein metrischer Raum und -B r(x) := {y +B +r +(x) := +{ +y ∈ X | @@ -533,9 +750,13 @@ für x X,r ∈ R+ -B = {B r(x) -⊆ -P(X) +B = +{ +B +r +(x) +⊆ P +(X) | x ∈ @@ -545,19 +766,47 @@ R+ } ist Basis einer Topologie auf X. Definition 9 -Seien (X,d X) und (Y,d Y) metrische Räume und ϕ : X +Seien (X,d +X +) und (Y,d +Y +) metrische Räume und ϕ : X → Y eine Abbildung mit -∀x 1,x +∀ +x +1 +,x 2 ∈ -X : d X(x 1,x 2) = d Y(ϕ(x 1),ϕ(x 2)) +X : d +X +(x +1 +,x +2 +) = d +Y +(ϕ(x +1 +),ϕ(x +2 +)) Dann heißt ϕ eine Isometrie von X nach Y. Beispiel 8 (Skalarprodukt erzeugt Metrik) -Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt (cid:104)·, ·(cid:105). Dann wird V +Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt +(cid:104)· +, +·(cid:105) +. Dann wird V durch d(x,y) := (cid:112) -(cid:104)x −y,x −y +(cid:104) +x +− +y,x +− +y (cid:105) zum metrischen Raum. Beispiel 9 (diskrete Metrik) @@ -565,68 +814,116 @@ Sei X eine Menge. Dann heißt d(x,y) = (cid:40) 0 falls x = y -1 falls x (cid:54)= y +1 falls x +(cid:54) += y die diskrete Metrik. Die Metrik d induziert die diskrete Topologie. 8 1.2.METRISCHERÄUME Beispiel 10 -X = R2 und d((x 1,y 1),(x 2,y 2)) := max( (cid:107)x +X = R2 und d((x 1 -−x +,y +1 +),(x +2 +,y +2 +)) := max( +(cid:107) +x +1 +− +x 2 -(cid:107), (cid:107)y +(cid:107) +, +(cid:107) +y 1 -−y +− +y 2 -(cid:107)) ist Metrik. +(cid:107) +) ist Metrik. Beobachtung: d erzeugt die euklidische Topologie. -B r(0) = +B +r +(0) = r r r r -(a) B r(0) (b) Euklidische Topologie +(a) B +r +(0) (b) Euklidische Topologie Abbildung 1.3: Veranschaulichungen zur Metrik d aus Beispiel 10 9 1.2.METRISCHERÄUME Beispiel 11 (SNCF-Metrik1) X = R2 -−4 −2 2 4 6 8 -−4 -−2 -24 +− +4 +− +2 2 4 6 8 +− +4 +− +2 +2 +4 Definition 10 -Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x (cid:54)= y in X +Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x +(cid:54) += y in X Umgebungen U x um x und U y um y gibt, sodass U x -∩U +∩ +U y -= ∅. += +∅ +. Bemerkung 4 (Trennungseigenschaft) Metrische Räume sind hausdorffsch, wegen d(x,y) > 0 -⇒ -∃ε > 0 : B ε(x) ∩B ε(y) = +⇒ ∃ +ε > 0 : B +ε +(x) +∩ +B +ε +(y) = ∅ Beispiel 12 (Topologische Räume und Hausdorff-Räume) -1) (R,T Z) ist ein topologischer Raum, der nicht hausdorffsch ist. -2) (R,T Euklid) ist ein topologischer Hausdorff-Raum. +1) (R,T +Z +) ist ein topologischer Raum, der nicht hausdorffsch ist. +2) (R,T +Euklid +) ist ein topologischer Hausdorff-Raum. Bemerkung 5 (Eigenschaften von Hausdorff-Räumen) -Seien X,X 1,X +Seien X,X +1 +,X 2 Hausdorff-Räume. a) Jeder Teilraum von X ist hausdorffsch. b) X 1 -×X +× +X 2 ist hausdorffsch (vgl. Abbildung 1.4). Definition 11 Sei X ein topologischer Raum und (x) n∈N eine Folge in X. x ∈ X heißt Grenzwert oder -Limes von (x n), wenn es für jede Umgebung U von x ein n +Limes von (x +n +), wenn es für jede Umgebung U von x ein n 0 gibt, sodass x n @@ -634,29 +931,39 @@ n U für alle n ≥ -n 0. +n +0 +. Bemerkung 6 Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert. -Beweis: Sei (x n) eine konvergierende Folge und x und y Grenzwerte der Folge. +Beweis: Sei (x +n +) eine konvergierende Folge und x und y Grenzwerte der Folge. Da X hausdorffsch ist, gibt es Umgebungen U x von x und U y von y mit U x -∩U +∩ +U y = ∅ falls -x (cid:54)= y. Da (x n) gegen x und y konvergiert, existiert ein n +x +(cid:54) += y. Da (x +n +) gegen x und y konvergiert, existiert ein n 0 mit x n ∈ U x -∩U +∩ +U y für alle n ≥ @@ -666,48 +973,74 @@ n x = y (cid:4) 1Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt. 10 1.3.STETIGKEIT -(x 1,y 1) (x 2,y 2) -x +(x 1 -x +,y +1 +) (x +2 +,y +2 +) +x +1 +x 2 U 1 -×X +× +X 2 U 2 -×X -2X +× +X +2 +X 1 X 2 -Abbildung 1.4: Wenn X 1,X +Abbildung 1.4: Wenn X +1 +,X 2 hausdorffsch sind, dann auch X 1 -×X +× +X 2 1.3 Stetigkeit Definition 12 -Seien (X,T X),(Y,T Y) topologische Räume und f : X +Seien (X,T +X +),(Y,T +Y +) topologische Räume und f : X → Y eine Abbildung. a) f heißt stetig : -⇔ -∀U +⇔ ∀ +U ∈ T Y : f−1(U) ∈ -T X. +T +X +. b) f heißt Homöomorphismus, wenn f stetig ist und es eine stetige Abbildung g : Y → -X gibt, sodass g ◦f = id +X gibt, sodass g +◦ +f = id X -und f ◦g = id Y. +und f +◦ +g = id +Y +. Bemerkung 72 Seien X,Y metrische Räume und f: X → @@ -719,50 +1052,74 @@ zu jedem x X und jedem ε > 0 gibt es δ(x,ε) > 0, sodass für alle y ∈ -X mit d(x,y) < δ gilt d Y(f(x),f(y)) < ε. -Beweis: „ ⇒“: Sei x +X mit d(x,y) < δ gilt d +Y +(f(x),f(y)) < ε. +Beweis: „ +⇒ +“: Sei x ∈ -X,ε > 0 gegeben und U := B ε(f(x)). +X,ε > 0 gegeben und U := B +ε +(f(x)). Dann ist U offen in Y. Def. 12.a ===== ⇒ f−1(U) ist offen in X. Dann ist x ∈ f−1(U). -⇒ -∃δ > 0, sodass B δ(x) +⇒ ∃ +δ > 0, sodass B +δ +(x) ⊆ f−1(U) ⇒ -f(B δ(x)) +f(B +δ +(x)) ⊆ U -⇒ -{y +⇒ { +y ∈ X | -d X(x,y) < δ +d +X +(x,y) < δ } ⇒ Beh. -„ ⇐“: Sei U +„ +⇐ +“: Sei U ⊆ Y offen, X ∈ f−1(U). -Dann gibt es ε > 0, sodass B ε(f(x)) +Dann gibt es ε > 0, sodass B +ε +(f(x)) ⊆ U Vor. == ⇒ -Es gibt δ > 0, sodass f(B δ(x)) +Es gibt δ > 0, sodass f(B +δ +(x)) ⊆ -B ε(f(x))) +B +ε +(f(x))) ⇒ -B δ(x) +B +δ +(x) ⊆ -f−1(B ε(f(x))) +f−1(B +ε +(f(x))) ⊆ f−1(U) (cid:4) Bemerkung 8 @@ -784,9 +1141,13 @@ X X ist Homöomorphismus. 2Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt. 11 1.3.STETIGKEIT -2) Ist (Y,T Y) trivialer topologischer Raum, d. h. T +2) Ist (Y,T Y -= T triv, so ist jede Abbildung +) trivialer topologischer Raum, d. h. T +Y += T +triv +, so ist jede Abbildung f : X → Y stetig. @@ -794,11 +1155,13 @@ Y stetig. → Y stetig für jeden topologischen Raum Y und jede Abbildung f. -4) Sei X = [0,1),Y = S1 = {z +4) Sei X = [0,1),Y = S1 = +{ +z ∈ C -| -(cid:107)z +| (cid:107) +z (cid:107) = 1 } @@ -816,7 +1179,9 @@ Seien X,Y,Z topologische Räume, f : X Y und g : Y → Z stetige Abbildungen. -Dann ist g ◦f : X +Dann ist g +◦ +f : X → Z stetig. X @@ -829,11 +1194,15 @@ Beweis: Sei U ⊆ Z offen ⇒ -(g ◦f)−1(U) = f−1(g−1(U)). g−1(U) ist offen in Y weil g stetig +(g +◦ +f)−1(U) = f−1(g−1(U)). g−1(U) ist offen in Y weil g stetig ist, f−1(g−1(U)) ist offen in X, weil f stetig ist. (cid:4) Bemerkung 10 a) Für jeden topologischen Raum X ist -Homöo(X) := {f : X +Homöo(X) := +{ +f : X → X | @@ -843,7 +1212,9 @@ eine Gruppe. b) Jede Isometrie f : X → Y zwischen metrischen Räumen ist ein Homöomorphismus. -c) Iso(X) := {f : X +c) Iso(X) := +{ +f : X → X | @@ -854,11 +1225,15 @@ jeden metrischen Raum X. Bemerkung 11 (Projektionen sind stetig) Seien X,Y topologische Räume. π X -: X ×Y +: X +× +Y → X und π Y -: X ×Y +: X +× +Y → Y die Projektionen π @@ -870,7 +1245,9 @@ Y : (x,y) (cid:55)→ y -Wird X ×Y mit der Produkttopologie versehen, so sind π +Wird X +× +Y mit der Produkttopologie versehen, so sind π X und π Y @@ -881,7 +1258,11 @@ X offen ⇒ π−1 X -(U) = U ×Y ist offen in X ×Y. (cid:4) +(U) = U +× +Y ist offen in X +× +Y. (cid:4) Bemerkung 12 Sei X ein topologischer Raum, ∼ @@ -892,7 +1273,9 @@ versehen mit der Quotiententopologie, π : X → X, x (cid:55)→ -[x] ∼. +[x] +∼ +. Dann ist π stetig. 12 1.4.ZUSAMMENHANG Beweis: Nach Definition ist U @@ -904,16 +1287,20 @@ X offen X offen. (cid:4) Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird. Beispiel 14 (Stereographische Projektion) -Rn und Sn \{N +Rn und Sn +\{ +N } sind homöomorph für beliebiges N ∈ Sn. Es gilt: Sn = (cid:8) x ∈ -Rn+1 (cid:12) (cid:12) (cid:107)x +Rn+1 (cid:12) (cid:12) (cid:107) -= 1(cid:9) +x +(cid:107) += 1 (cid:9) = (cid:40) x @@ -926,27 +1313,33 @@ Rn+1 (cid:12) n+1 (cid:88) -i=1x2 +i=1 +x2 i -= -1(cid:41) += 1 +(cid:41) O. B. d. A. sei N =     -0 + +0 . . . 0 -1 +1 +    -. Die Gerade durch N und P schneidet die Ebene H in genau + +. Die Gerade durch N und P schneidet die Ebene H in genau einem Punkt ˆ P. P wird auf ˆ P abgebildet. -f :Sn \{N +f :Sn +\{ +N } → Rn P @@ -955,19 +1348,21 @@ genaueinPunkt (cid:122) (cid:125)(cid:124) (cid:123) L P -∩H +∩ +H wobei Rn = H =     - + +   x 1 . . . -x -n+1 +x n+1 +   ∈ Rn+1 (cid:12) (cid:12) @@ -975,8 +1370,8 @@ n+1 (cid:12) (cid:12) (cid:12) (cid:12) -x n+1 = -0 +x n+1 = 0 +    @@ -991,24 +1386,29 @@ x . . . x -n+1 - , so ist x n+1 < 1, also ist L P nicht parallel zu H. Also schneiden sich L P +n+1 + +  , so ist x n+1 < 1, also ist L P nicht parallel zu H. Also schneiden sich L P und H in genau einem Punkt ˆ P. Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig. 1.4 Zusammenhang Definition 13 a) EinRaumX heißtzusammenhängend,wenneskeineoffenen,nichtleerenTeilmengen -U 1,U +U +1 +,U 2 von X gibt mit U 1 -∩U +∩ +U 2 = ∅ und U 1 -∪U +∪ +U 2 = X. b) Eine Teilmenge Y @@ -1027,28 +1427,38 @@ Abbildung 1.6: Visualisierung der stereographischen Projektion Bemerkung 13 X ist zusammenhängend ⇔ -Es gibt keine abgeschlossenen, nichtleeren Teilmengen A 1,A +Es gibt keine abgeschlossenen, nichtleeren Teilmengen A +1 +,A 2 mit A 1 -∩A +∩ +A 2 = ∅ und A 1 -∪A +∪ +A 2 = X. Beispiel 15 (Zusammenhang von Räumen) -1) (Rn,T Euklid) ist zusammenhängend, denn: +1) (Rn,T +Euklid +) ist zusammenhängend, denn: Annahme: Rn = U 1 -˙ ∪U +˙ +∪ +U 2 mit ∅ (cid:54) -= U 1,U += U +1 +,U 2 ∈ T @@ -1056,15 +1466,20 @@ Euklid existieren. Sei x ∈ -U 1,y +U +1 +,y ∈ U 2 und [x,y] die Strecke zwischen x und y. Sei V = [x,y]. Nun -betrachten wir V (cid:40) Rn als (metrischen) Teilraum mit der Teilraumtopologie T V. +betrachten wir V (cid:40) Rn als (metrischen) Teilraum mit der Teilraumtopologie T +V +. Somit gilt U 1 -∩[x,y] +∩ +[x,y] ∈ T V @@ -1083,26 +1498,51 @@ U ⇒ z ∈ -U 2. In jeder +U +2 +. In jeder Umgebung von z liegt ein Punkt von U 1 ⇒ Widerspruch zu U 2 offen. -2) R \{0 +2) R +\{ +0 } -ist nicht zusammenhängend, denn R \{0 +ist nicht zusammenhängend, denn R +\{ +0 } = R <0 -∪R +∪ +R >0 -3) R2 \{0 +3) R2 +\{ +0 } ist zusammenhängend. -4) Q (cid:40) R ist nicht zusammenhängend, da (Q ∩R <√ 2) ∪(Q ∩R >√ 2) = Q -5) {x +4) Q (cid:40) R ist nicht zusammenhängend, da (Q +∩ +R +< +√ +2 +) +∪ +(Q +∩ +R +> +√ +2 +) = Q +5) +{ +x } ist zusammenhängend für jedes x ∈ @@ -1117,28 +1557,45 @@ hängend. Beweis: durch Widerspruch Annahme: A = A 1 -∪A 2, A +∪ +A +2 +, A i abgeschlossen, A i -(cid:54)= ∅, A +(cid:54) += +∅ +, A 1 -∩A +∩ +A 2 = ∅ ⇒ -A = (A ∩A 1) +A = (A +∩ +A +1 +) (cid:124) (cid:123)(cid:122) (cid:125) abgeschlossen ˙ ∪ -(A ∩A 2) +(A +∩ +A +2 +) (cid:124) (cid:123)(cid:122) (cid:125) abgeschlossen (cid:124) (cid:123)(cid:122) (cid:125) disjunkt -Wäre A ∩A +Wäre A +∩ +A 1 = ∅ @@ -1147,7 +1604,9 @@ A ⊆ A = A 1 -˙ ∪A +˙ +∪ +A 2 ⇒ A @@ -1167,16 +1626,23 @@ A ⇒ Widerspruch zu A 1 -(cid:54)= +(cid:54) += ∅ ⇒ -A ∩A +A +∩ +A 1 -(cid:54)= +(cid:54) += ∅ -und analog A ∩A +und analog A +∩ +A 2 -(cid:54)= +(cid:54) += ∅ ⇒ Widerspruch zu A ist zusammenhängend. (cid:4) @@ -1184,22 +1650,49 @@ Bemerkung 15 Sei X ein topologischer Raum und A,B ⊆ X zusammenhängend. -Ist A ∩B (cid:54)= ∅, dann ist A ∪B zusammenhängend. -Beweis: Sei A ∪B = U +Ist A +∩ +B +(cid:54) += +∅ +, dann ist A +∪ +B zusammenhängend. +Beweis: Sei A +∪ +B = U 1 -˙ ∪U 2,U +˙ +∪ +U +2 +,U i -(cid:54)= +(cid:54) += ∅ offen o.B.d.A. ====== ⇒ -A = (A ∩U 1) ˙ ∪(A ∩U 2) offen +A = (A +∩ +U +1 +) ˙ +∪ +(A +∩ +U +2 +) offen Azhgd. ==== ⇒ -A ∩U +A +∩ +U 1 = ∅ @@ -1210,10 +1703,19 @@ U 1 ⊆ B -B = (B ∩U 1) +B = (B +∩ +U +1 +) (cid:124) (cid:123)(cid:122) (cid:125) =U1 -∪(B ∩U 2) +∪ +(B +∩ +U +2 +) (cid:124) (cid:123)(cid:122) (cid:125) =∅ ist unerlaubte Zerlegung. @@ -1228,7 +1730,8 @@ X definiert durch Z(x) := (cid:91) A⊆Xzhgd. -x∈AA +x∈A +A Z(x) heißt Zusammenhangskomponente. Bemerkung 16 (Eigenschaften von Zusammenhangskomponenten) Sei X ein topologischer Raum. Dann gilt: @@ -1239,11 +1742,14 @@ Beweis: 15 1.5.KOMPAKTHEIT a) Sei Z(x) = A 1 -˙ ∪A +˙ +∪ +A 2 mit A i -(cid:54)= +(cid:54) += ∅ abgeschlossen. O. B. d. A. sei x @@ -1252,13 +1758,24 @@ A 1 und y ∈ -A 2. y liegt in einer zusammehängenden Teilmenge A, +A +2 +. y liegt in einer zusammehängenden Teilmenge A, die auch x enthält. ⇒ -A = (A ∩A 1) +A = (A +∩ +A +1 +) (cid:124) (cid:123)(cid:122) (cid:125) (cid:51)x -∪(A ∩A 2) +∪ +(A +∩ +A +2 +) (cid:124) (cid:123)(cid:122) (cid:125) (cid:51)y ist unerlaubte Zerlegung. @@ -1269,14 +1786,22 @@ Z(x) Z(x) ⇒ Z(x) = Z(x) -c) Ist Z(y) ∩Z(x) (cid:54)= +c) Ist Z(y) +∩ +Z(x) +(cid:54) += ∅ Bem. 15 ===== ⇒ -Z(y) ∪Z(x) ist zusammenhängend. +Z(y) +∪ +Z(x) ist zusammenhängend. ⇒ -Z(x) ∪Z(y) +Z(x) +∪ +Z(y) ⊆ Z(x) ⇒ @@ -1300,35 +1825,61 @@ X zusammenhängend, so ist f(A) Y zusammenhängend. Beweis: Sei f(A) = U 1 -∪U 2,U +∪ +U +2 +,U i -(cid:54)= ∅, offen, disjunkt. +(cid:54) += +∅ +, offen, disjunkt. ⇒ -f−1(f(A)) = f−1(U 1) ∪f−1(U 2) +f−1(f(A)) = f−1(U +1 +) +∪ +f−1(U +2 +) ⇒ -A = (A ∩f−1(U 1)) +A = (A +∩ +f−1(U +1 +)) (cid:124) (cid:123)(cid:122) (cid:125) (cid:54)=∅ -∪(A ∩f−1(U 2)) +∪ +(A +∩ +f−1(U +2 +)) (cid:124) (cid:123)(cid:122) (cid:125) (cid:54)=∅ (cid:4) 1.5 Kompaktheit Definition 15 Sei X eine Menge und U -⊆ -P(X). +⊆ P +(X). U heißt eine Überdeckung von X, wenn gilt: -∀x +∀ +x ∈ -X : ∃M +X : +∃ +M ∈ U : x ∈ M Definition 16 Ein topologischer Raum X heißt kompakt, wenn jede offene Überdeckung von X -U = {U +U = +{ +U i }i∈I mit U i @@ -1338,13 +1889,17 @@ eine endliche Teilüberdeckung i∈J⊆I U i -= X mit |J += X mit +| +J | ∈ N besitzt. Bemerkung 18 Das Einheitsintervall I := [0,1] ist kompakt bezüglich der euklidischen Topologie. -Beweis: Sei (U i) +Beweis: Sei (U +i +) i∈J eine offene Überdeckung von I. Es genügt zu zeigen, dass es ein δ > 0 gibt, sodass jedes Teilintervall der Länge δ von I in @@ -1371,30 +1926,48 @@ für alle i J. Sei x n -der Mittelpunkt von I n. Die Folge (x n) hat einen Häufungspunkt x +der Mittelpunkt von I +n +. Die Folge (x +n +) hat einen Häufungspunkt x ∈ [0,1]. Dann gibt es i ∈ J mit x ∈ -U i. Da U +U i -offen ist, gibt es ein ε > 0, sodass (x −ε,x+ε) +. Da U +i +offen ist, gibt es ein ε > 0, sodass (x +− +ε,x+ε) ⊆ -U i. -Dann gibt es n 0, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n +U +i +. +Dann gibt es n +0 +, sodass gilt: 1/n0 < ε/2 und für unendlich viele3 n ≥ n 0 -: |x −x -n +: +| +x +− +x +n | < ε/2, also I n ⊆ -(x −ε,x+ε) +(x +− +ε,x+ε) ⊆ U i @@ -1403,7 +1976,9 @@ für mindestens ein n N.4 ⇒ Widerspruch -Dann überdecke [0,1] mit endlich vielen Intervallen I 1,...,I +Dann überdecke [0,1] mit endlich vielen Intervallen I +1 +,...,I d der Länge δ. Jedes I j @@ -1412,7 +1987,9 @@ U ij enthalten. ⇒ -U j1,...,U +U +j1 +,...,U j d ist endliche Teilüberdeckung von U. (cid:4) @@ -1421,10 +1998,13 @@ Beispiel 16 (Kompakte Räume) 2) (0,1) ist nicht kompakt. U n -= (1/n,1 −1/n) += (1/n,1 +− +1/n) ⇒ (cid:83) -n∈NU +n∈N +U n = (0,1) 3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch. @@ -1432,7 +2012,9 @@ Bemerkung 19 Sei X kompakter Raum, A ⊆ X abgeschlossen. Dann ist A kompakt. -Beweis: Sei (V i) +Beweis: Sei (V +i +) i∈I offene Überdeckung von A. Dann gibt es für jedes i @@ -1444,56 +2026,86 @@ X mit V i = U i -∩A. +∩ +A. ⇒ A ⊆ (cid:91) -i∈IU +i∈I +U i ⇒ -U = {U +U = +{ +U i | i ∈ -I }∪{X \A +I +}∪{ +X +\ +A } ist offene Überdeckung von X X kompakt ======= ⇒ -es gibt i 1,...,i +es gibt i +1 +,...,i n ∈ I, sodass n (cid:91) -j=1U +j=1 +U ij -∪(X \A) = X +∪ +(X +\ +A) = X ⇒   n (cid:91) -j=1U ij ∪(X -\A) - ∩A = A +j=1 +U ij +∪ +(X +\ +A) + + +∩ +A = A ⇒ n (cid:91) -j=1(U +j=1 +(U ij -∩A) +∩ +A) (cid:124) (cid:123)(cid:122) (cid:125) =Vij -∪((X \A) ∩A) +∪ +((X +\ +A) +∩ +A) (cid:124) (cid:123)(cid:122) (cid:125) =∅ = A ⇒ -V i1,...,V +V +i1 +,...,V in überdecken A. (cid:4) @@ -1502,11 +2114,17 @@ Seien X,Y kompakte topologische Räume. Dann ist X × Y mit der Produkttopologie kompakt. -Beweis: Sei (W i) +Beweis: Sei (W +i +) i∈I -eine offene Überdeckung von X ×Y. Für jedes (x,y) +eine offene Überdeckung von X +× +Y. Für jedes (x,y) ∈ -X ×Y gibt es +X +× +Y gibt es offene Teilmengen U x,y von X und V @@ -1515,11 +2133,16 @@ von Y sowie ein i ∈ I, sodass U x,y -×V +× +V x,y ⊆ -W i. -3Dies gilt nicht für alle n≥n 0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. +W +i +. +3Dies gilt nicht für alle n≥n +0 +, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. 4Sogar für unendlich viele. 17 1.5.KOMPAKTHEIT W @@ -1536,18 +2159,26 @@ X Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen Die offenen Mengen U x0,y -×V +× +V x0,y für festes x 0 und alle y ∈ -Y überdecken {x -0 -}×y. Da Y -kompakt ist, ist auch {x +Y überdecken +{ +x 0 -}×Y kompakt. Also gibt es y 1,...,y +}× +y. Da Y +kompakt ist, ist auch +{ +x +0 }× +Y kompakt. Also gibt es y +1 +,...,y m(x0) mit (cid:83)m(x0) @@ -1556,36 +2187,59 @@ U x0,yi × V x0,yi -⊇ -{x +⊇ { +x 0 -}×Y. +}× +Y. Sei U x0 := (cid:84)m(x) i=1 -U x0,yi. Da X kompakt ist, gibt es x 1,...,x +U +x0,yi +. Da X kompakt ist, gibt es x +1 +,...,x n ∈ X mit (cid:83)n -j=1U +j=1 +U xj = X ⇒ -(cid:83)k j=1(cid:83)m(xj) +(cid:83)k +j=1 +(cid:83)m(xj) i=1 (cid:0) U +xj,yi × +V xj,yi -×V -xj,yi(cid:1) +(cid:1) (cid:124) (cid:123)(cid:122) (cid:125) Eingrün-orangesKästchen ⊇ -X ×Y +X +× +Y ⇒ -(cid:83) j(cid:83) iW i(x j,y i) = X ×Y (cid:4) +(cid:83) +j +(cid:83) +i +W +i +(x +j +,y +i +) = X +× +Y (cid:4) Bemerkung 21 Sei X ein Hausdorffraum und K ⊆ @@ -1593,7 +2247,9 @@ X kompakt. Dann ist K abgeschlossen. Beweis: z. Z.: Komplement ist offen Ist X = K, so ist K abgeschlossen in X. Andernfalls sei y ∈ -X \K. Für jedes x +X +\ +K. Für jedes x ∈ K seien U @@ -1602,44 +2258,58 @@ bzw. V y Umgebungen von x bzw. von y, sodass U x -∩V +∩ +V y -= ∅. += +∅ +. X i K x y -Da K kompakt ist, gibt es endlich viele x 1,...,x -n -∈ +Da K kompakt ist, gibt es endlich viele x +1 +,...,x +n ∈ K, sodass (cid:83)m -i=1U -xi -⊇ +i=1 +U +xi ⊇ K. Sei V := n (cid:92) -i=1V +i=1 +V xi 18 1.6.WEGEUNDKNOTEN ⇒ V -∩(cid:32) n +∩ +(cid:32) n (cid:91) -i=1U -xi(cid:33) +i=1 +U +xi +(cid:33) = ∅ ⇒ -V ∩K = +V +∩ +K = ∅ ⇒ -V ist Überdeckung von y, die ganz in X \K enthalten ist. +V ist Überdeckung von y, die ganz in X +\ +K enthalten ist. ⇒ -X \K ist offen +X +\ +K ist offen Damit ist K abgeschlossen. (cid:4) Bemerkung 22 Seien X,Y topologische Räume, f : X @@ -1650,24 +2320,44 @@ Ist K X kompakt, so ist f(K) ⊆ Y kompakt. -Beweis: Sei (V i) +Beweis: Sei (V +i +) i∈I offene Überdeckung von f(K) f stetig ==== ⇒ -(f−1(V i)) +(f−1(V +i +)) i∈I ist offene Überdeckung von K Kompakt ===== ⇒ -es gibt i 1,...,i n, sodass f−1(V i1),...,f−1(V in) Überdeckung von K ist. +es gibt i +1 +,...,i +n +, sodass f−1(V +i1 +),...,f−1(V +in +) Überdeckung von K ist. ⇒ -f(f−1(V i1)),...,f(f−1(V in)) überdecken f(K). -Es gilt: f(f−1(V)) = V ∩f(X) (cid:4) +f(f−1(V +i1 +)),...,f(f−1(V +in +)) überdecken f(K). +Es gilt: f(f−1(V)) = V +∩ +f(X) (cid:4) Satz 1.1 (Heine-Borel) Eine Teilmenge von Rn oder Cn ist genau dann kompakt, wenn sie beschränkt und abgeschlossen ist. -Beweis: „ ⇒“: Sei K +Beweis: „ +⇒ +“: Sei K ⊆ Rn (oder Cn) kompakt. Da Rn und Cn hausdorffsch sind, ist K nach Bemerkung 21 abgeschlossen. Nach Vorausset- @@ -1675,16 +2365,30 @@ zung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ K ist beschränkt. -„ ⇐“ Sei A +„ +⇐ +“ Sei A ⊆ Rn (oder Cn) beschränkt und abgeschlossen. -Dann gibt es einen Würfel W = [ −N,N] ×···×[ −N,N] +Dann gibt es einen Würfel W = [ +− +N,N] +×···× +[ +− +N,N] (cid:124) (cid:123)(cid:122) (cid:125) nmal mit A ⊆ W bzw. „Polyzylinder“ -Z = {(z 1,...,z n) +Z = +{ +(z +1 +,...,z +n +) ∈ Cn | @@ -1695,7 +2399,8 @@ N für i = 1,...,n } Nach Bemerkung 20 und Bemerkung 18 ist W kompakt, also ist A nach Bemerkung 19 auch kompakt. Genauso ist Z kompakt, weil -{z +{ +z ∈ C (cid:107) @@ -1706,9 +2411,11 @@ z homöomorph zu (cid:8) (x,y) ∈ -R2 (cid:12) (cid:12) (cid:107)(x,y) +R2 (cid:12) (cid:12) +(cid:107) +(x,y) (cid:107) ≤ -1(cid:9) +1 (cid:9) ist. (cid:4) 1.6 Wege und Knoten Definition 17 @@ -1719,11 +2426,13 @@ a) Ein Weg in X ist eine stetige Abbildung γ : [0,1] X. b) γ heißt geschlossen, wenn γ(1) = γ(0) gilt. c) γ heißt einfach, wenn γ -|[0,1) +| +[0,1) injektiv ist. Beispiel 17 Ist X diskret, so ist jeder Weg konstant, d. h. von der Form -∀x +∀ +x ∈ [0,1] : γ(x) = c, c ∈ @@ -1745,28 +2454,44 @@ b) X ist wegzusammenhängend (cid:54)⇐ X ist zusammenhängend Beweis: -a) Sei X ein wegzusammenhängender topologischer Raum, A 1,A +a) Sei X ein wegzusammenhängender topologischer Raum, A +1 +,A 2 nichtleere, disjunkte, abgeschlossene Teilmengen von X mit A 1 -∪A +∪ +A 2 = X. Sei x ∈ -A 1,y +A +1 +,y ∈ -A 2,γ : [0,1] +A +2 +,γ : [0,1] → X ein Weg von x nach y. Dann ist C := γ([0,1]) ⊆ X zusammenhängend, weil γ stetig ist. -C = (C ∩A 1) +C = (C +∩ +A +1 +) (cid:124) (cid:123)(cid:122) (cid:125) (cid:51)x -∪(C ∩A 2) +∪ +(C +∩ +A +2 +) (cid:124) (cid:123)(cid:122) (cid:125) (cid:51)y ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen @@ -1778,29 +2503,39 @@ b) Sei X = ∈ R2 (cid:12) -(cid:12) -(cid:12) -x2+y2 = 1 ∨y = 1+2 ·e− 1 -10x(cid:111) +(cid:12) (cid:12) x2+y2 = 1 +∨ +y = 1+2 +· +e− 1 10 x +(cid:111) . Abbildung 1.8a veranschaulicht diesen Raum. Sei U 1 -∪U +∪ +U 2 = X,U 1 -(cid:54)= U +(cid:54) += U 2 -= ∅,U += +∅ +,U i -offen. X = C ∪S. Dann ist C +offen. X = C +∪ +S. Dann ist C ⊆ U 1 oder C ⊆ -U 2, +U +2 +, weil C und S zusammenhängend sind. Also ist C = U 1 @@ -1809,23 +2544,37 @@ und S = U (oder umgekehrt). Sei y ∈ -C = U 1,ε > 0 und B ε(y) +C = U +1 +,ε > 0 und B +ε +(y) ⊆ U 1 eine Umgebung von y, die in U 1 enthalten ist. -Aber: B ε(y) ∩S (cid:54)= +Aber: B +ε +(y) +∩ +S +(cid:54) += ∅ ⇒ Widerspruch ⇒ -X ∪S ist zusammenhängend, aber nicht +X +∪ +S ist zusammenhängend, aber nicht wegzusammenhängend. (cid:4) Beispiel 18 (Hilbert-Kurve) Es gibt stetige, surjektive Abbildungen [0,1] → -[0,1] ×[0,1]. Ein Beispiel ist die in Abbil- +[0,1] +× +[0,1]. Ein Beispiel ist die in Abbil- dung 1.9 dargestellte Hilbert-Kurve. Definition 19 Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ : @@ -1841,14 +2590,24 @@ X, wobei C := Bildγ. 20 1.6.WEGEUNDKNOTEN (a) Spirale S mit Kreis C 0.1 1 -−101 +− +1 +0 +1 X Y -{(x,sin(1 x)) +{ +(x,sin(1 +x +)) ∈ -X ×Y +X +× +Y } -( −1,1) +( +− +1,1) ⊆ Y (b) Sinus @@ -1875,22 +2634,36 @@ Idee: Ersetze Weg C durch Polygonzug. Definition 20 Eine geschlossene Jordankurve in R3 heißt Knoten. Beispiel 19 (Knoten) -(a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten (d) 6 2-Knoten +(a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten (d) 6 +2 +-Knoten Abbildung 1.11: Beispiele für verschiedene Knoten Definition 21 -Zwei Knoten γ 1,γ +Zwei Knoten γ +1 +,γ 2 : S1 → R3 heißen äquivalent, wenn es eine stetige Abbildung -H : S1 ×[0,1] +H : S1 +× +[0,1] → R3 gibt mit -H(z,0) = γ 1(z) ∀z +H(z,0) = γ +1 +(z) +∀ +z ∈ S1 -H(z,1) = γ 2(z) ∀z +H(z,1) = γ +2 +(z) +∀ +z ∈ S1 und für jedes feste t @@ -1905,7 +2678,9 @@ R3,z H(z,t) ein Knoten. Die Abbildung H heißt Isotopie zwischen γ 1 -und γ 2. +und γ +2 +. Definition 22 Sei γ : [0,1] → @@ -1913,22 +2688,36 @@ R3 ein Knoten, E eine Ebene und π : R3 → E eine Projektion auf E. π heißt Knotendiagramm von γ, wenn gilt: -(cid:12) (cid:12)π−1(x)(cid:12) -(cid:12) +(cid:12) (cid:12)π−1(x) (cid:12) (cid:12) ≤ -2 ∀x +2 +∀ +x ∈ π(γ) -Ist (π |γ([0,1]))−1(x) = {y 1,y +Ist (π +| +γ([0,1]) +)−1(x) = +{ +y +1 +,y 2 -}, so liegt y +} +, so liegt y 1 -über y 2, wenn gilt: -∃λ > 1 : (y +über y +2 +, wenn gilt: +∃ +λ > 1 : (y 1 -−x) = λ(y +− +x) = λ(y 2 -−x) +− +x) Satz 1.3 (Satz von Reidemeister) Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können. @@ -1950,13 +2739,27 @@ Abbildung 1.13: Ein 3-gefärber Kleeblattknoten 23 1.6.WEGEUNDKNOTEN Übungsaufgaben Aufgabe 1 (Sierpińskiraum) -Es sei X := {0,1 +Es sei X := +{ +0,1 } und T X -:= {∅, {0 },X }. Dies ist der sogenannte Sierpińskiraum. -(a) Beweisen Sie, dass (X,T X) ein topologischer Raum ist. -(b) Ist (X,T X) hausdorffsch? +:= +{∅ +, +{ +0 +} +,X +} +. Dies ist der sogenannte Sierpińskiraum. +(a) Beweisen Sie, dass (X,T +X +) ein topologischer Raum ist. +(b) Ist (X,T +X +) hausdorffsch? (c) Ist T X von einer Metrik erzeugt? @@ -1967,13 +2770,19 @@ a,b ∈ Z,b ∈ -Z \{0 }) erzeugten Topologie +Z +\{ +0 +} +) erzeugten Topologie versehen. Zeigen Sie: (a) Jedes U a,b und jede einelementige Teilmenge von Z ist abgeschlossen. -(b) {−1,1 +(b) +{− +1,1 } ist nicht offen. (c) Es gibt unendlich viele Primzahlen. @@ -1982,27 +2791,45 @@ Für jedes i ∈ N sei P i -:= {0,1 +:= +{ +0,1 } -mit der diskreten Topologie. Weiter Sei P := (cid:81) i∈NP i. +mit der diskreten Topologie. Weiter Sei P := (cid:81) +i∈N +P +i +. (a) Wie sehen die offenen Mengen von P aus? (b) Was können Sie über den Zusammenhang von P sagen? Aufgabe 4 (Kompaktheit) -(a) Ist GL n(R) = {A +(a) Ist GL +n +(R) = +{ +A ∈ Rn×n | -det(A) (cid:54)= 0 +det(A) +(cid:54) += 0 } kompakt? -(b) Ist SL n(R) = {A +(b) Ist SL +n +(R) = +{ +A ∈ Rn×n | det(A) = 1 } kompakt? -(c) Ist P(R) kompakt? +(c) Ist +P +(R) kompakt? Aufgabe 5 (Begriffe) Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“. Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist, @@ -2030,12 +2857,17 @@ Homöomorphismus von U auf eine offene Teilmenge V Rn. b) Ein n-dimensionaler Atlas A -auf X ist eine Familie (U i,ϕ i) +auf X ist eine Familie (U +i +,ϕ +i +) i∈I von Karten auf X, sodass (cid:83) -i∈IU +i∈I +U i = X. c) X heißt (topologische) n-dimensionale Mannigfaltigkeit, wenn X hausdorffsch ist, @@ -2062,8 +2894,12 @@ beliebig viele Elemente haben. Bemerkung 25 a) Es gibt surjektive, stetige Abbildungen [0,1] → -[0,1] ×[0,1] -b) Für n (cid:54)= m sind Rn und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz +[0,1] +× +[0,1] +b) Für n +(cid:54) += m sind Rn und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz von der Gebietstreue“ (Brouwer): Ist U ⊆ @@ -2075,9 +2911,19 @@ f : Rn → Rm → -Rn, (x 1,...,x n) +Rn, (x +1 +,...,x +n +) (cid:55)→ -(x 1,x 2,...,x n,0,...,0) +(x +1 +,x +2 +,...,x +n +,0,...,0) eine stetige injektive Abbildung. Also müsste f(Rn) offen sein ⇒ Widerspruch @@ -2088,32 +2934,68 @@ Beispiel 20 (Mannigfaltigkeiten) Rn ist eine n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte. 2) Cn ist eine 2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte: -(z 1,...,z n) +(z +1 +,...,z +n +) (cid:55)→ -( (cid:60)(z 1), (cid:61)(z 1),..., (cid:60)(z n), (cid:61)(z n)) -3) Pn(R) = (Rn+1 \{0 })/ +( +(cid:60) +(z +1 +), +(cid:61) +(z +1 +),..., +(cid:60) +(z +n +), +(cid:61) +(z +n +)) +3) +P +n(R) = (Rn+1 +\{ +0 +} +)/ ∼ = Sn/ ∼ -und Pn(C) sind Mannigfaltigkeiten der Dimension +und +P +n(C) sind Mannigfaltigkeiten der Dimension n bzw. 2n, da gilt: Sei U i -:= {(x +:= +{ +(x 0 : ··· -: x n) -∈ -Pn(R) +: x +n +) +∈ P +n(R) | x +i (cid:54) += 0 +} ∀ i -(cid:54)= 0 -} -∀i ∈ -0,...,n. Dann ist Pn(R) = (cid:83)n i=0U +0,...,n. Dann ist +P +n(R) = (cid:83)n +i=0 +U i und die Abbildung U @@ -2124,18 +3006,20 @@ Rn 0 : ··· -: x n) +: x +n +) (cid:55)→ (cid:18) x 0 -x -i,..., -(cid:1)(cid:1)(cid:1) x i -x -i,..., -x n -x -i(cid:19) +x i +,..., +(cid:1) +(cid:1) (cid:1) x i +x i +,..., x n +x i +(cid:19) (y 1 : @@ -2146,9 +3030,15 @@ i−1 i : ··· -: y n) +: y +n +) (cid:55) → -(y 1,...,y n) +(y +1 +,...,y +n +) ist bijektiv. Die U i @@ -2169,43 +3059,61 @@ U R2 y (cid:55)→ (0,1) -Umgebung: B 1(0,1) -→ -{(1 : u : v) -| -(cid:107)(u,v) +Umgebung: B +1 +(0,1) +→ { +(1 : u : v) +| (cid:107) +(u,v) (cid:107) < 1 } = V 1 -Umgebung: B 1(0,1) +Umgebung: B 1 (0,1) → (cid:8) (w : z : 1) (cid:12) -(cid:12) w2+z2 < -1(cid:9) +(cid:12) w2+z2 < 1 +(cid:9) = V 2 V 1 -∩V +∩ +V 2 -= ∅? += +∅ +? (a : b : c) ∈ V 1 -∩V +∩ +V 2 ⇒ -a (cid:54)= 0 und (b a)2+(c a)2 < 1 +a +(cid:54) += 0 und (b +a +)2+(c +a +)2 < 1 ⇒ c a < 1 ⇒ -c (cid:54)= 0 und (a c)2+(b c)2 < 1 +c +(cid:54) += 0 und (a +c +)2+(b +c +)2 < 1 ⇒ a c @@ -2214,47 +3122,106 @@ c Widerspruch 4) Sn = (cid:8) x ∈ -Rn+1 (cid:12) (cid:12) (cid:107)x +Rn+1 (cid:12) (cid:12) (cid:107) -= 1(cid:9) ist n-dimensionale Mannigfaltigkeit. +x +(cid:107) += 1 (cid:9) ist n-dimensionale Mannigfaltigkeit. Karten: D i -:= {(x 1,...,x n+1) +:= +{ +(x +1 +,...,x +n+1 +) ∈ -Sn |x +Sn +| +x i > 0 } → -B 1(0,...,0 +B +1 +(0,...,0 (cid:124) (cid:123)(cid:122) (cid:125) ∈Rn ) C i -:= {(x 1,...,x n+1) +:= +{ +(x +1 +,...,x +n+1 +) ∈ -Sn |x +Sn +| +x i < 0 } → -B 1(0,...,0) -(x 1,...,x n+1) +B +1 +(0,...,0) +(x +1 +,...,x +n+1 +) (cid:55)→ -(x 1,...,(cid:26)(cid:26) x i,...,x n+1)1 -(x 1,...,x n) +(x +1 +,...,(cid:26)(cid:26) x +i +,...,x +n+1 +)1 +(x +1 +,...,x +n +) (cid:55)→ -(x 1,...,x -i−1,(cid:113) -1 −(cid:80)n k=1x2 k,x i,...,x n), oder -−(cid:113) -1 −(cid:80)n k=1x2 +(x +1 +,...,x +i−1 +, +(cid:113) +1 +− +(cid:80)n +k=1 +x2 +k +,x +i +,...,x +n +), oder +− +(cid:113) +1 +− +(cid:80)n +k=1 +x2 k für C i -Sn = (cid:83)n+1 i=1(C +Sn = (cid:83)n+1 +i=1 +(C +i ∪ +D i -∪D i) +) Als kompakte Mannigfaltigkeit wird Sn auch „geschlossene Mannigfaltigkeit“ genannt. 5) [0,1] ist keine Mannigfaltigkeit, denn: Es gibt keine Umgebung von 0 in [0,1], die homöomorph zu einem offenem Intervall @@ -2265,20 +3232,36 @@ wird rausgenommen 27 2.1.TOPOLOGISCHEMANNIGFALTIGKEITEN 6) V 1 = (cid:8) (x,y) ∈ -R2 (cid:12) (cid:12) x ·y = 0(cid:9) ist keine Mannigfaltigkeit. +R2 (cid:12) (cid:12) x +· +y = 0 (cid:9) ist keine Mannigfaltigkeit. Das Problem ist (0,0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4 Zusammenhangskomponenten. Jeder Rn zerfällt jedoch in höchstens zwei Zusammen- hangskomponenten, wenn man einen Punkt entfernt. 7) V 2 = (cid:8) (x,y) ∈ -R2 (cid:12) (cid:12) x3 = y2(cid:9) ist eine Mannigfaltigkeit. -8) X = (R \{0 }) ∪(0 1,0 2) +R2 (cid:12) (cid:12) x3 = y2 (cid:9) ist eine Mannigfaltigkeit. +8) X = (R +\{ +0 +} +) +∪ +(0 +1 +,0 +2 +) U ⊆ X offen ⇔ (cid:40) -U offen in R \{0 }, falls 0 +U offen in R +\{ +0 +} +, falls 0 1 / ∈ @@ -2286,7 +3269,10 @@ U,0 2 ∈ U -∃ε > 0 : ( −ε,ε) +∃ +ε > 0 : ( +− +ε,ε) ⊆ U falls 0 1 @@ -2295,18 +3281,34 @@ U,0 2 ∈ U -Insbesondere sind (R \{0 }) ∪{0 +Insbesondere sind (R +\{ +0 +} +) +∪{ +0 1 } -und (R \{0 }) ∪{0 +und (R +\{ +0 +} +) +∪{ +0 2 } offen und homöomorph zu R. Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 0 1 -und 0 2. -9) GL n(R) ist eine Mannigfaltigkeit der Dimension n2, weil offene Teilmengen von Rn2 +und 0 +2 +. +9) GL +n +(R) ist eine Mannigfaltigkeit der Dimension n2, weil offene Teilmengen von Rn2 eine Mannigfaltigkeit bilden. Definition 25 Seien X,Y n-dimensionale Mannigfaltigkeiten, U @@ -2316,11 +3318,15 @@ X und V Y offen, Φ : U → V ein Ho- -möomorphismus Z = (X ˙ ∪Y)/ +möomorphismus Z = (X ˙ +∪ +Y)/ ∼ mit der von u ∼ -Φ(u) ∀u +Φ(u) +∀ +u ∈ U erzeugten Äquivalenzrelation und der von @@ -2329,7 +3335,9 @@ induzierten Quotiententopologie. Z heißtVerklebungvonX undY längsU undV.Z besitzteinenAtlasausn-dimensionalen Karten. Falls Z hausdorffsch ist, ist Z eine n-dimensionale Mannigfaltigkeit. Bemerkung 26 -Sind X,Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X ×Y eine Mannigfaltigkeit +Sind X,Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X +× +Y eine Mannigfaltigkeit der Dimension n+m. Beweis: Produkte von Karten sind Karten. (cid:4) Beispiel 21 @@ -2346,7 +3354,9 @@ Sei n ∈ N,F : Rn → -R stetig differenzierbar und X = V(F) := {x +R stetig differenzierbar und X = V(F) := +{ +x ∈ Rn | @@ -2358,22 +3368,58 @@ Dann gilt: 28 2.1.TOPOLOGISCHEMANNIGFALTIGKEITEN Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus. a) X ist abgeschlossen in Rn -b) Ist grad(F)(X) (cid:54)= 0 ∀x +b) Ist grad(F)(X) +(cid:54) += 0 +∀ +x ∈ -X, so ist X eine Mannigfaltigkeit der Dimension n −1. +X, so ist X eine Mannigfaltigkeit der Dimension n +− +1. Beweis: a) Sei y ∈ -Rn \V(F). Weil F stetig ist, gibt es δ > 0, sodass F(B δ(y)) +Rn +\ +V(F). Weil F stetig ist, gibt es δ > 0, sodass F(B +δ +(y)) ⊆ -B ε(F(y)) mit -ε = 1 2(cid:107)F(y) (cid:107). Folgt B δ(y) ∩V(F) = +B +ε +(F(y)) mit +ε = 1 +2(cid:107) +F(y) +(cid:107) +. Folgt B +δ +(y) +∩ +V(F) = ∅ ⇒ -Rn \V(F) ist offen. +Rn +\ +V(F) ist offen. b) Sei x ∈ -X mit grad(F)(x) (cid:54)= 0, also o. B. d. A. ∂F ∂X1(x) (cid:54)= 0, x = (x 1,...,x n), -x(cid:48) := (x 2,...,x n) +X mit grad(F)(x) +(cid:54) += 0, also o. B. d. A. ∂F +∂X1 +(x) +(cid:54) += 0, x = (x +1 +,...,x +n +), +x(cid:48) := (x +2 +,...,x +n +) ∈ Rn−1. Der Satz von der impliziten Funktion liefert nun: Es gibt Umgebungen U von x(cid:48) und differenzierbare Funktionen g : U @@ -2391,35 +3437,42 @@ Beispiel 22 → R, (x,y,z) (cid:55)→ -x2+y2+z2 −1,V(F) = S2,grad(F) = (2x,2y,2z) Bem. 27.b ====== +x2+y2+z2 +− +1,V(F) = S2,grad(F) = (2x,2y,2z) Bem. 27.b ====== ⇒ Sn ist n-dimensionale Mannigfaltigkeit in Rn+1 2) F : R2 → R, (x,y) (cid:55)→ -y2 −x3 Es gilt: grad(F) = ( −3x2,2y). Also: grad(0,0) = (0,0). -−5 −4 −3 −2 −1 0 1 2 3 4 5 -−4 -−2 +y2 +− +x3 Es gilt: grad(F) = ( +− +3x2,2y). Also: grad(0,0) = (0,0). +− 5 − 4 − 3 − 2 − 1 0 1 2 3 4 5 +− 4 +− 2 0 2 4 -−100 +− 100 0 100 x y z -−100 -0100 +− 100 0 +100 f(x,y) (a) F(x,y)=y2−x3 2 4 6 8 10 12 -−10 -−55 +− 10 +− +5 +5 10 x y -a= 1 -3 +a= 1 3 a=1 a=2 (b) y2−ax3 =0 Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a. @@ -2428,7 +3481,11 @@ topologische Mannigfaltigkeit. 29 2.1.TOPOLOGISCHEMANNIGFALTIGKEITEN Definition 26 Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n-dimensionale -Mannigfaltigkeit mit Rand, wenn es einen Atlas (U i,ϕ i) gibt, wobei U +Mannigfaltigkeit mit Rand, wenn es einen Atlas (U +i +,ϕ +i +) gibt, wobei U i ⊆ X @@ -2439,7 +3496,13 @@ i ein Homöomorphismus auf eine offene Teilmenge von Rn +,0 -:= {(x 1,...,x n) +:= +{ +(x +1 +,...,x +n +) ∈ Rn | @@ -2464,42 +3527,64 @@ Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten. (c) Sphäre mit einem Loch Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand Definition 27 -Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt +Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas +A +. Dann heißt ∂X := (cid:91) -(U,ϕ)∈A{x +(U,ϕ)∈A +{ +x ∈ U | ϕ(x) = 0 } Rand von X. -∂X ist eine Mannigfaltigkeit der Dimension n −1. +∂X ist eine Mannigfaltigkeit der Dimension n +− +1. Definition 28 -Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U i,ϕ i) +Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U +i +,ϕ +i +) i∈I Für i,j ∈ I mit U i -∩U +∩ +U j -(cid:54)= +(cid:54) += ∅ heißt ϕ ij := ϕ -j -◦ϕ−1 +j ◦ +ϕ−1 +i +ϕ i -ϕ i(U +(U i -∩U j) +∩ +U +j +) → -ϕ j(U +ϕ +j +(U i -∩U j) +∩ +U +j +) Kartenwechsel oder Übergangsfunktion. 30 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN Rn Rn @@ -2513,9 +3598,17 @@ j Abbildung 2.4: Kartenwechsel 2.2 Differenzierbare Mannigfaltigkeiten Definition 29 -Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U i,ϕ i) i∈I. +Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U +i +,ϕ +i +) +i∈I +. a) X heißt differenzierbare Mannigfaltigkeit der Klasse Ck, wenn jede Karten- -wechselabbildung ϕ ij, i,j +wechselabbildung ϕ +ij +, i,j ∈ I k-mal stetig differenzierbar ist. b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannig- @@ -2525,19 +3618,35 @@ Definition 30 Sei X eine differenzierbare Mannigfaltigkeit der Klasse Ck (k ∈ N -∪ -{∞}) mit Atlas +∪ {∞} +) mit Atlas +A += (U +i +,ϕ +i +) +i∈I +. +a) Eine Karte (U,ϕ) auf X heißt verträglich mit A -= (U i,ϕ i) i∈I. -a) Eine Karte (U,ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ϕ−1 +, wenn alle Kartenwechsel ϕ +◦ +ϕ−1 i und ϕ i -◦ϕ−1 (i +◦ +ϕ−1 (i ∈ I mit U i -∩U (cid:54)= ∅) differenzierbar von Klasse Ck sind. +∩ +U +(cid:54) += +∅ +) differenzierbar von Klasse Ck sind. b) Die Menge aller mit A verträglichen Karten auf X bildet einen maximalen Atlas der @@ -2560,7 +3669,11 @@ es Karten (U,ϕ) von X mit x U und (V,ψ) von Y mit f(U) ⊆ V gibt, sodass -ψ ◦f ◦ϕ−1 stetig differenzierbar von Klasse Ck in ϕ(x) ist. +ψ +◦ +f +◦ +ϕ−1 stetig differenzierbar von Klasse Ck in ϕ(x) ist. b) f heißt differenzierbar (von Klasse Ck), wenn f in jedem x ∈ X differenzierbar ist. @@ -2572,7 +3685,11 @@ X von Klasse C∞ gibt mit g f = id X und -f ◦g = id Y. +f +◦ +g = id +Y +. 31 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN Bemerkung 29 Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab. @@ -2580,19 +3697,45 @@ Beweis: Seien (U(cid:48),ϕ(cid:48)) und (V(cid:48),ψ(cid:48)) Karten von X bzw ⊆ V(cid:48). ⇒ -ψ(cid:48) ◦f ◦(ϕ(cid:48))−1 -= ψ(cid:48) ◦(ψ−1 ◦ψ) ◦f ◦(ϕ−1 ◦ϕ) ◦(ϕ(cid:48))−1 -ist genau dann differenzierbar, wenn ψ ◦f ◦ϕ−1 differenzierbar ist. +ψ(cid:48) +◦ +f +◦ +(ϕ(cid:48))−1 += ψ(cid:48) +◦ +(ψ−1 +◦ +ψ) +◦ +f +◦ +(ϕ−1 +◦ +ϕ) +◦ +(ϕ(cid:48))−1 +ist genau dann differenzierbar, wenn ψ +◦ +f +◦ +ϕ−1 differenzierbar ist. Beispiel 23 f : R → R, x (cid:55)→ x3 istkeinDiffeomorphismus,aberHomöomorphismus,damitg(x) := 3 √x -gilt: f ◦g = idR, g ◦f = idR +gilt: f +◦ +g = idR, g +◦ +f = idR Bemerkung 30 Sei X eine glatte Mannigfaltigkeit. Dann ist -Diffeo(X) := {f : X +Diffeo(X) := +{ +f : X → X | @@ -2603,29 +3746,47 @@ Definition 32 S ⊆ R3 heißt reguläre Fläche : -⇔ -∀s +⇔ ∀ +s ∈ S ∃ Umgebung V(s) ⊆ -R3 ∃U +R3 +∃ +U ⊆ R2 offen: ∃ differenzierbare Abbildung F : U → -V ∩S: Rg(J F(u)) = 2 ∀u +V +∩ +S: Rg(J +F +(u)) = 2 +∀ +u ∈ U. F heißt (lokale) reguläre Parametrisierung von S. F(u,v) = (x(u,v),y(u,v),z(u,v)) -J F(u,v) = - ∂x ∂u(p) ∂x ∂v(p) -∂y ∂u(p) ∂y ∂v(p) -∂z ∂u(p) ∂z -∂v(p) +J F (u,v) = + + +∂x +∂u +(p) ∂x +∂v +(p) +∂y ∂u (p) ∂y ∂v (p) +∂z +∂u +(p) ∂z +∂v +(p) +  Beispiel 24 1) Rotationsflächen: Sei r : R @@ -2638,8 +3799,11 @@ F : R2 R3 (u,v) (cid:55)→ (r(u)cos(u),r(v)sin(u),v) -J F(u,v) = - −r(v)sinu r(cid:48)(v)cosu +J F (u,v) = + + +− +r(v)sinu r(cid:48)(v)cosu r(v)cosu r(cid:48)(v)sinu 0 1  @@ -2655,22 +3819,27 @@ R3, (Rcosvcosu,Rcosvsinu,Rsinv) Es gilt: F(u,v) ∈ -S2 R, denn +S2 +R +, denn R2cos2(v)cos2(u)+R2cos2(v)sin2(u)+R2sin2(v) =R2(cos2(v)cos2(u)+cos2(v)sin2(u)+sin2(v)) -=R2(cid:0) cos2(v)(cos2(u)+sin2(u))+sin2(v)(cid:1) -=R2(cid:0) cos2(v)+sin2(v)(cid:1) +=R2(cid:0) cos2(v)(cos2(u)+sin2(u))+sin2(v) (cid:1) +=R2(cid:0) cos2(v)+sin2(v) (cid:1) =R2 32 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN N S v u (a) Kugelkoordinaten -−1 +− +1 0 1 -2 −2 -−1 +2 +− +2 − +1 0 1 2 0.6 @@ -2682,9 +3851,11 @@ v u π 3π 2 2π -−1 -−0.5 -0.5 +− +1 +− +0.5 +0.5 1 x y @@ -2693,14 +3864,22 @@ cosx (c) Sinus und Kosinus haben keine gemeinsame Nullstelle 33 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN Die Jacobi-Matrix -J F(u,v) = +J F (u,v) =  -−Rcosvsinu −Rsinvcosu -Rcosvcosu −Rsinvsinu + +− +Rcosvsinu +− +Rsinvcosu +Rcosvcosu +− +Rsinvsinu 0 Rcosv   -hat Rang 2 für cosv (cid:54)= 0. In N und S ist cosv = 0. +hat Rang 2 für cosv +(cid:54) += 0. In N und S ist cosv = 0. Bemerkung 31 Jede reguläre Fläche S ⊆ @@ -2709,7 +3888,11 @@ Beweis: S ⊆ R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von -regulären Flächen folgt direkt, dass Karten (U i,F i) und (U +regulären Flächen folgt direkt, dass Karten (U +i +,F +i +) und (U j ⊆ R2,F @@ -2719,9 +3902,11 @@ j R3) von S mit U i -∩U +∩ +U j -(cid:54)= +(cid:54) += ∅ existieren, wobei F i @@ -2729,8 +3914,8 @@ und F j nach Definition differenzierbare Abbildungen sind. z.Z.: F−1 -j -◦F +j ◦ +F i ist ein Diffeomorphismus. U i U j @@ -2747,108 +3932,151 @@ Abbildung 2.5: Reguläre Fläche S zum Beweis von Bemerkung 31 Idee: Finde differenzierbare Funktion (cid:103) F−1 j in Umgebung W von s, sodass (cid:103) F−1 -j |S∩W +j | S∩W = F−1 j . Ausführung: Sei u 0 ∈ -U i, v +U +i +, v 0 ∈ U j -mit F i(u 0) = s = F j(v 0). -Da Rg(J Fj(v 0)) = 2 ist, ist o. B. d. A. -det(cid:18)∂x +mit F +i +(u +0 +) = s = F +j +(v +0 +). +Da Rg(J +Fj +(v +0 +)) = 2 ist, ist o. B. d. A. +det +(cid:18)∂x ∂u ∂x ∂v ∂y ∂u ∂y -∂v(cid:19) -(v 0) (cid:54)= 0 -und F j(u,v) = (x(u,v),y(u,v),z(u,v)). -Definiere (cid:102) F j : U j ×R +∂v +(cid:19) +(v +0 +) +(cid:54) += 0 +und F +j +(u,v) = (x(u,v),y(u,v),z(u,v)). +Definiere (cid:102) F j : U j +× +R → R3 durch -(cid:102) F j(u,v,t) := (x(u,v),y(u,v),z(u,v)+t) -Offensichtlich: (cid:102) F j |Uj×{0} = F j -J -(cid:102) Fj -= - ∂x -∂u +(cid:102) F j (u,v,t) := (x(u,v),y(u,v),z(u,v)+t) +Offensichtlich: (cid:102) F j +| +Uj×{0} = F j +J (cid:102) Fj = + + ∂x -∂v -0 -∂y ∂u -∂y +∂x ∂v 0 +∂y ∂u ∂y ∂v 0 ∂z ∂u ∂z ∂v -1 - -⇒ -detJ -(cid:102) -Fj(v 0,0) (cid:54)= 0 +1 + + ⇒ detJ (cid:102) Fj (v 0 ,0) (cid:54) = 0 AnalysisII ====== ⇒ -Es gibt Umgebungen W von F j von (cid:102) F j(v 0,0) = F j(v 0) = s, sodass (cid:102) F j auf W eine +Es gibt Umgebungen W von F j von (cid:102) F j (v 0 ,0) = F j (v 0 ) = s, sodass (cid:102) F j auf W eine differenzierbar Inverse F−1 j hat. 34 2.2.DIFFERENZIERBAREMANNIGFALTIGKEITEN Weiter gilt: -(cid:102) F -j−1 -|W∩S = F−1 j |W∩S +(cid:102) F j +−1 +| W∩S = F−1 j | W∩S ⇒ F−1 -j -◦F +j ◦ +F i |F−1 i (W∩S) = F−1 -j -◦F +j ◦ +F i |F−1 i (W∩S) ist differenzierbar. Definition 33 -Sei G eine Mannigfaltigkeit und (G, ◦) eine Gruppe. +Sei G eine Mannigfaltigkeit und (G, +◦ +) eine Gruppe. a) G heißt topologische Gruppe, wenn die Abbildungen ◦ -: G ×G +: G +× +G → G und ι : G → G definiert durch -g ◦h := g ·h und ι(g) := g−1 +g +◦ +h := g +· +h und ι(g) := g−1 stetig sind. -b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, ◦) und +b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, +◦ +) und (G,ι) differenzierbar sind. Beispiel 25 (Lie-Gruppen) 1) Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen. -2) GL n(R) -3) (R×, ·) -4) (R >0, ·) -5) (Rn,+), denn A ·B(i,j) = (cid:80)n k=1a ikb +2) GL +n +(R) +3) (R×, +· +) +4) (R +>0 +, +· +) +5) (Rn,+), denn A +· +B(i,j) = (cid:80)n +k=1 +a +ik +b kj ist nach allen Variablen differenzierbar (A−1)(i,j) = det(Aij) detA A ij =  - -a +  +a i1 ... a in @@ -2856,7 +4084,8 @@ in a n1 ... a -nn +nn +   ∈ R(n−1)×(n−1) ist differenzierbar. detA @@ -2864,11 +4093,18 @@ ij kann 0 werden, da: (cid:18) 1 1 -−1 -0(cid:19) -6) SL n(R) = {A +− +1 0 +(cid:19) +6) SL +n +(R) = +{ +A ∈ -GL n(R) +GL +n +(R) | det(A) = 1 } @@ -2883,76 +4119,139 @@ g G h (cid:55)→ -g ·h +g +· +h ein Diffeomorphismus. 35 2.3.SIMPLIZIALKOMPLEX 2.3 Simplizialkomplex Definition 34 -Seien v 0,...,v +Seien v +0 +,...,v k ∈ Rn Punkte. -a) v 0,...,v +a) v +0 +,...,v k sind in allgemeiner Lage -⇔esgibtkeinen(k −1)-dimensionalenaffinenUntervektorraum,derv 0,...,v +⇔ +esgibtkeinen(k +− +1)-dimensionalenaffinenUntervektorraum,derv +0 +,...,v k enthält ⇔ v 1 -−v 0,...,v +− +v +0 +,...,v k -−v +− +v 0 sind linear unabhängig. -b) conv(v 0,...,v k) := +b) conv(v +0 +,...,v +k +) := (cid:110) -(cid:80)k i=0λ iv +(cid:80)k +i=0 +λ +i +v i (cid:12) (cid:12) (cid:12) λ +i ≥ +0, (cid:80)k +i=0 +λ i -≥ -0,(cid:80)k i=0λ -i -= -1(cid:111) += 1 +(cid:111) heißt die konvexe Hülle von -v 0,...,v k. +v +0 +,...,v +k +. Definition 35 -a) Sei ∆n = conv(e 0,...,e n) +a) Sei ∆n = conv(e +0 +,...,e +n +) ⊆ Rn+1 die konvexe Hülle der Standard-Basisvektoren -e 0,...,e n. +e +0 +,...,e +n +. Dann heißt ∆n Standard-Simplex und n die Dimension des Simplex. -b) FürPunktev 0,...,v +b) FürPunktev +0 +,...,v +k +imRn inallgemeinerLageheißt∆(v +0 +,...,v +k +) = conv(v +0 +,...,v k -imRn inallgemeinerLageheißt∆(v 0,...,v k) = conv(v 0,...,v k) +) ein k-Simplex in Rn. -c) Ist ∆(v 0,...,v k) ein k-Simplex und I = {i 0,...,i +c) Ist ∆(v +0 +,...,v +k +) ein k-Simplex und I = +{ +i +0 +,...,i r -} ⊆ -{0,...,k }, so ist s +} ⊆ { +0,...,k +} +, so ist s i0,...,ir := -conv(v i0,...,v ir) ein r-Simplex und heißt Teilsimplex oder Seite von ∆. +conv(v +i0 +,...,v +ir +) ein r-Simplex und heißt Teilsimplex oder Seite von ∆. (a) 0-Simplex ∆0 1 2 3 -123 +1 +2 +3 e 0 e 1 (b) 1-Simplex ∆1 1 2 3 -123 +1 +2 +3 e 0 e 1 -e -2 +e 2 (c) 2-Simplex ∆2 e 0 e 1 e @@ -2970,27 +4269,39 @@ K und S ∆ Teilsimplex ist S ∈ K. -(ii) Für ∆ 1,∆ +(ii) Für ∆ +1 +,∆ 2 ∈ K ist ∆ 1 -∩∆ +∩ +∆ 2 leer oder ein Teilsimplex von ∆ 1 -und von ∆ 2. -b) |K +und von ∆ +2 +. +b) +| +K | := (cid:83) -∆∈K∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K. -c) Ist d = max {k +∆∈K +∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K. +c) Ist d = max +{ +k ∈ N 0 | -K enthält k-Simplex }, so heißt d die Dimension von K. +K enthält k-Simplex +} +, so heißt d die Dimension von K. 36 2.3.SIMPLIZIALKOMPLEX (a) 1D Simplizialkomplex(b) 2D Simplizialkomplex (ohne untere Fläche!) @@ -3004,9 +4315,11 @@ P Abbildung 2.7: Beispiele für Simplizialkomplexe Definition 37 Seien K,L Simplizialkomplexe. Eine stetige Abbildung -f : |K -| → -|L +f : +| +K +| → | +L | heißt simplizial, wenn für jedes ∆ ∈ @@ -3015,12 +4328,19 @@ a) f(∆) ∈ L b) f -|∆ +| +∆ : ∆ → f(∆) ist eine affine Abbildung. Beispiel 26 (Simpliziale Abbildungen) -1) ϕ(e 1) := b 1, ϕ(e 2) := b +1) ϕ(e +1 +) := b +1 +, ϕ(e +2 +) := b 2 ϕ ist eine eindeutig bestimmte lineare Abbildung 37 2.3.SIMPLIZIALKOMPLEX @@ -3063,8 +4383,8 @@ b b b b +b b b -bb b b b b b b @@ -3076,48 +4396,98 @@ Abbildung 2.8: Abbildung eines Torus auf eine Sphäre Definition 38 Sei K ein endlicher Simplizialkomplex. Für n ≥ -0 sei a n(K) die Anzahl der n-Simplizes in +0 sei a +n +(K) die Anzahl der n-Simplizes in K. Dann heißt χ(K) := dimK (cid:88) n=0 -( −1)na n(K) +( +− +1)na +n +(K) Eulerzahl (oder Euler-Charakteristik) von K. Beispiel 27 -1) χ(∆1) = 2 −1 = 1 -χ(∆2) = 3 −3+1 = 1 -χ(∆3) = 4 −6+4 −1 = 1 -2) χ(Oktaeder-Oberfläche) = 6 −12+8 = 2 +1) χ(∆1) = 2 +− +1 = 1 +χ(∆2) = 3 +− +3+1 = 1 +χ(∆3) = 4 +− +6+4 +− +1 = 1 +2) χ(Oktaeder-Oberfläche) = 6 +− +12+8 = 2 χ(Rand des Tetraeders) = 2 -χ(Ikosaeder) = 12 −30+20 = 2 -3) χ(Würfel) = 8 −12+6 = 2 -χ(Würfel, unterteilt in Dreiecksflächen) = 8 −(12+6)+(6 ·2) = 2 +χ(Ikosaeder) = 12 +− +30+20 = 2 +3) χ(Würfel) = 8 +− +12+6 = 2 +χ(Würfel, unterteilt in Dreiecksflächen) = 8 +− +(12+6)+(6 +· +2) = 2 Bemerkung 33 χ(∆n) = 1 für jedes n ∈ N 0 38 2.3.SIMPLIZIALKOMPLEX -Beweis: ∆n ist die konvexe Hülle von (e 0,...,e n) in Rn+1. Jede (k+1)-elementige Teilmenge -von {e 0,...,e +Beweis: ∆n ist die konvexe Hülle von (e +0 +,...,e +n +) in Rn+1. Jede (k+1)-elementige Teilmenge +von +{ +e +0 +,...,e n } definiert ein k-Simplex. ⇒ -a k(∆n) = (cid:0)n+1 k+1(cid:1) , k = 0,...,n +a +k +(∆n) = (cid:0)n+1 +k+1 +(cid:1) , k = 0,...,n ⇒ -χ(∆n) = (cid:80)n k=0( −1)k(cid:0)n+1 k+1(cid:1) +χ(∆n) = (cid:80)n +k=0 +( +− +1)k(cid:0)n+1 +k+1 +(cid:1) f(x) = (x+1)n+1 Binomischer -Lehrsatz = (cid:80)n+1 k=0(cid:0)n+1 +Lehrsatz = (cid:80)n+1 +k=0 +(cid:0)n+1 k (cid:1) xk ⇒ -0 = (cid:80)n+1 k=0(cid:0)n+1 +0 = (cid:80)n+1 +k=0 +(cid:0)n+1 k -(cid:1) ( −1)k = χ(∆n) −1 +(cid:1) ( +− +1)k = χ(∆n) +− +1 ⇒ χ(∆n) = 1 (cid:4) Definition 39 @@ -3139,16 +4509,48 @@ Beweis: Induktion über die Anzahl der Ecken. Bemerkung 35 a) Jeder zusammenhängende Graph Γ enthält einen Teilbaum T, der alle Ecken von Γ enthält.2 -b) Ist n = a 1(Γ) −a 1(T), so ist χ(Γ) = 1 −n. +b) Ist n = a +1 +(Γ) +− +a +1 +(T), so ist χ(Γ) = 1 +− +n. Beweis: a) Siehe „Algorithmus von Kruskal“. 2T wird „Spannbaum“ genannt. 39 2.3.SIMPLIZIALKOMPLEX -b) χ(Γ) = a 0(Γ) −a 1(Γ) -= a 0(Γ) −(n+a 1(T)) -= a 0(T) −a 1(T) −n -= χ(T) −n -= 1 −n +b) χ(Γ) = a +0 +(Γ) +− +a +1 +(Γ) += a +0 +(Γ) +− +(n+a +1 +(T)) += a +0 +(T) +− +a +1 +(T) +− +n += χ(T) +− +n += 1 +− +n Bemerkung 36 Sei ∆ ein n-Simplex und x ∈ @@ -3161,13 +4563,19 @@ teilung entsteht Abbildung 2.10: Beispiel für Bemerkung 36. Beweis: χ(K) = χ(∆) − -( −1)n +( +− +1)n (cid:124) (cid:123)(cid:122) (cid:125) -n-Simplex+ +n-Simplex ++ n (cid:88) -k=0( -−1)k(cid:18) +k=0 +( +− +1)k +(cid:18) n+1 k (cid:19) @@ -3176,10 +4584,14 @@ k = χ(∆) (cid:4) Definition 40 Sei X ein topologischer Raum, K ein Simplizialkomplex und -h : |K +h : +| +K | → X -ein Homöomorphismus von der geometrischen Realisierung |K +ein Homöomorphismus von der geometrischen Realisierung +| +K | auf X. Dann heißt h eine Triangulierung von X. @@ -3190,7 +4602,8 @@ in Beispiel 28. Satz 2.1 (Eulersche Polyederformel) Sei P ein konvexes Polyeder in R3, d. h. ∂P ist ein 2-dimensionaler Simplizialkomplex, sodass gilt: -∀x,y +∀ +x,y ∈ ∂P : [x,y] ⊆ @@ -3202,7 +4615,11 @@ Beweis: ∈ P und P ⊆ -B 1(0). Projeziere ∂P von 0 aus auf ∂B 1(0) = S2. +B +1 +(0). Projeziere ∂P von 0 aus auf ∂B +1 +(0) = S2. Erhalte Triangulierung von S2. 40 2.3.SIMPLIZIALKOMPLEX (a) DiebeidenmarkiertenDreieckeschneidensichim @@ -3217,7 +4634,9 @@ Abbildung 2.12: Triangulierungen des Torus 1 und P 2 -konvexe Polygone und T 1,T +konvexe Polygone und T +1 +,T 2 die zugehörigen Triangulierungen von S2, so gibt es eine Triangulierung T, die sowohl um T @@ -3236,7 +4655,15 @@ Abbildung 2.13: T ist eine Triangulierung, die für T und T 2 eine Verfeinerung ist. -NachBemerkung 36istχ(∂P 1) = χ(T 1) = χ(T) = χ(T 2) = χ(∂P 2) = 2,weilo.B.d.A. +NachBemerkung 36istχ(∂P +1 +) = χ(T +1 +) = χ(T) = χ(T +2 +) = χ(∂P +2 +) = 2,weilo.B.d.A. P 2 ein Tetraeder ist. @@ -3245,54 +4672,96 @@ Sei K ein endlicher Simplizialkomplex mit Knotenmenge V und < eine Totalordnung Sei A n die Menge der n-Simplizes in K, d. h. -A n(K) := {σ +A +n +(K) := +{ +σ ∈ K | dim(σ) = n } für n = 0,...,d = dim(K) -und C n(K) der R-Vektorraum mit Basis A n(K), d. h. -C n(K) = +und C +n +(K) der R-Vektorraum mit Basis A +n +(K), d. h. +C n (K) =    (cid:88) -σ∈An(K)c σ ·σ -(cid:12) -(cid:12) +σ∈An(K) +c σ +· +σ (cid:12) (cid:12) +(cid:12) (cid:12) (cid:12) (cid:12) c σ ∈ -R +R +   -Sei σ = ∆(x 0,...,x n) +Sei σ = ∆(x +0 +,...,x +n +) ∈ -A n(K), sodass x +A +n +(K), sodass x 0 < x 1 < ··· -< x n. -Für i = 0,...,n sei ∂ iσ := ∆(x 0,..., ˆ x i,...,x n) die i-te Seite von σ und d +< x +n +. +Für i = 0,...,n sei ∂ +i +σ := ∆(x +0 +,..., ˆ x +i +,...,x +n +) die i-te Seite von σ und d +σ += d +n +σ := +(cid:80) +i=0 +( +− +1)i∂ +i σ -= d nσ := -(cid:80) i=0( −1)i∂ iσ ∈ -C n−1(K) und d +C +n−1 +(K) und d +n +: C n -: C n(K) +(K) → -C n−1(K) die dadurch definierte lineare +C +n−1 +(K) die dadurch definierte lineare Abbildung. Dann gilt: d n−1 -◦d +◦ +d n = 0 a @@ -3308,102 +4777,235 @@ e Abbildung 2.14: Simplizialkomplex mit Totalordnung Beispiel 29 Sei a < b < c. Dann gilt: -d 2σ = e +d +2 +σ = e 1 -−e 2+e +− +e +2 ++e 3 -d 1(e +d 1 -−e 2+e 3) = (c −b) −(c −a)+(b −a) +(e +1 +− +e +2 ++e +3 +) = (c +− +b) +− +(c +− +a)+(b +− +a) 42 2.3.SIMPLIZIALKOMPLEX = 0 Sei a < b < c < d. Dann gilt für Tetraeder: -d 3(∆(a,b,c,d)) = ∆(b,c,d) −∆(a,c,d)+∆(a,b,d) −∆(a,b,c),wobei: -d 2( ∆(b,c,d)) = ∆(c,d) −∆(b,d)+∆(b,c) -d 2( −∆(a,c,d)) = −∆(c,d)+∆(a,d) −∆(a,c) -d 2( ∆(a,b,d)) = ∆(b,d) −∆(a,d)+∆(a,b) -d 2( −∆(a,b,c)) = −∆(b,c)+∆(a,c) −∆(a,b) +d +3 +(∆(a,b,c,d)) = ∆(b,c,d) +− +∆(a,c,d)+∆(a,b,d) +− +∆(a,b,c),wobei: +d +2 +( ∆(b,c,d)) = ∆(c,d) +− +∆(b,d)+∆(b,c) +d +2 +( +− +∆(a,c,d)) = +− +∆(c,d)+∆(a,d) +− +∆(a,c) +d +2 +( ∆(a,b,d)) = ∆(b,d) +− +∆(a,d)+∆(a,b) +d +2 +( +− +∆(a,b,c)) = +− +∆(b,c)+∆(a,c) +− +∆(a,b) ⇒ -d 2(d 3(∆(a,b,c,d))) = 0 +d +2 +(d +3 +(∆(a,b,c,d))) = 0 Beweis: Sei σ ∈ -A n. Dann gilt: -d n−1(d nσ) = d n−1( +A +n +. Dann gilt: +d +n−1 +(d +n +σ) = d +n−1 +( n (cid:88) -i=0( −1)i∂ iσ) +i=0 +( +− +1)i∂ +i +σ) = n (cid:88) -i=0( −1)id n−1(∂ iσ) +i=0 +( +− +1)id +n−1 +(∂ +i +σ) = n (cid:88) -i=0( -−1)in−1 +i=0 +( +− +1)i +n−1 (cid:88) -j=0∂ i(∂ jσ)( −1)j +j=0 +∂ +i +(∂ +j +σ)( +− +1)j = (cid:88) 0≤i≤j≤n−1 -( −1)i+j∂ j(∂ -i(σ))+(cid:88) +( +− +1)i+j∂ +j +(∂ +i +(σ))+ +(cid:88) 0≤j 0 +| (cid:61) +(z) > 0 } = (cid:8) (x,y) ∈ -R2 (cid:12) (cid:12) y > 0(cid:9) +R2 (cid:12) (cid:12) y > 0 (cid:9) 79 4.3.HYPERBOLISCHEGEOMETRIE die obere Halbebene bzw. Poincaré-Halbebene und G = G 1 -∪G +∪ +G 2 mit G 1 -= {g += +{ +g 1 ⊆ H -| -∃m +| ∃ +m ∈ R,r ∈ @@ -6532,28 +9950,38 @@ R >0 : g 1 -= {z += +{ +z ∈ H : | -z −m +z +− +m | = r }} G 2 -= {g += +{ +g 2 ⊆ H -| -∃x +| ∃ +x ∈ R : g 2 -= {z += +{ +z ∈ -H : (cid:60)(z) = x +H : +(cid:60) +(z) = x }} Die Elemente aus G heißen hyperbolische Geraden. Bemerkung 68 (Eigenschaften der hyperbolischen Geraden) @@ -6563,26 +9991,54 @@ b) ...das Anordnungsaxiom §3 (ii) c) ...nicht das Parallelenaxiom §5 Beweis: a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt: -Gegeben z 1,z +Gegeben z +1 +,z 2 ∈ H Existenz: -Fall 1 (cid:60)(z 1) = (cid:60)(z 2) +Fall 1 +(cid:60) +(z +1 +) = +(cid:60) +(z +2 +) ⇒ z 1 und z 2 liegen auf -g = {z +g = +{ +z ∈ C -| -(cid:60)(z) = (cid:60)(z 1) ∧H +| (cid:60) +(z) = +(cid:60) +(z +1 +) +∧ +H } Siehe Abbildung 4.20a. -Fall 2 (cid:60)(z 1) (cid:54)= (cid:60)(z 2) +Fall 2 +(cid:60) +(z +1 +) +(cid:54) += +(cid:60) +(z +2 +) Betrachtenunz 1 undz @@ -6605,20 +10061,30 @@ und z (vgl. Abbildung 4.20b) x y -−1 0 1 2 3 4 5 -01234 +− +1 0 1 2 3 4 5 0 +1 +2 +3 +4 Z 1 Z 2 -(cid:60)(Z 1) +(cid:60) +(Z +1 +) (a) Fall 1 x y -−1 0 1 2 3 4 5 -01234 -Z +− +1 0 1 2 3 4 5 0 1 +2 +3 +4 +Z 1 Z 2 (b) Fall 2 @@ -6628,37 +10094,53 @@ b) Sei g ∈ G 1 -˙ ∪G +˙ +∪ +G 2 eine hyperbolische Gerade. 80 4.3.HYPERBOLISCHEGEOMETRIE -Es existieren disjunkte Zerlegungen von H \g: -Fall 1: g = {z +Es existieren disjunkte Zerlegungen von H +\ +g: +Fall 1: g = +{ +z ∈ H (cid:107) -z −m +z +− +m | = r } ∈ G 1 Dann gilt: -H = {z +H = +{ +z ∈ H (cid:107) -z −m +z +− +m | < r } (cid:124) (cid:123)(cid:122) (cid:125) =:H1 (Kreisinneres) -˙ ∪{z +˙ +∪{ +z ∈ H (cid:107) -z −m +z +− +m | > r } @@ -6671,51 +10153,75 @@ nicht leer, da r R ist H 2 nicht leer. -Fall 2: g = {z +Fall 2: g = +{ +z ∈ H -| -(cid:60)z = x +| (cid:60) +z = x } ∈ G 2 Die disjunkte Zerlegung ist: -H = {z +H = +{ +z ∈ H -| -(cid:60)(z) < x +| (cid:60) +(z) < x } (cid:124) (cid:123)(cid:122) (cid:125) =:H1 (Links) -˙ ∪{z +˙ +∪{ +z ∈ H -| -(cid:60)(z) > x +| (cid:60) +(z) > x } (cid:124) (cid:123)(cid:122) (cid:125) =:H2 (Rechts) -Zu zeigen: ∀A +Zu zeigen: +∀ +A ∈ -H i, B +H +i +, B ∈ H j mit i,j -∈ -{1,2 +∈ { +1,2 } -gilt: AB ∩g (cid:54)= +gilt: AB +∩ +g +(cid:54) += ∅ ⇔ -i (cid:54)= j -„ ⇐“: A +i +(cid:54) += j +„ +⇐ +“: A ∈ -H 1,B +H +1 +,B ∈ H 2 -: AB ∩g (cid:54)= +: AB +∩ +g +(cid:54) += ∅ Da dH stetig ist, folgt diese Richtung direkt. Alle Punkte in H 1 @@ -6730,21 +10236,35 @@ R >0 auffassen kann, greift der Zwischenwertsatz ⇒ -AB ∩g (cid:54)= +AB +∩ +g +(cid:54) += ∅ -„ ⇒“: A +„ +⇒ +“: A ∈ -H i,B +H +i +,B ∈ H j mit i,j -∈ -{1,2 +∈ { +1,2 } -: AB ∩g (cid:54)= +: AB +∩ +g +(cid:54) += ∅ ⇒ -i (cid:54)= j +i +(cid:54) += j Sei h die Gerade, die durch A und B geht. Da A,B / ∈ @@ -6755,19 +10275,41 @@ unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich g und h in höchstens eine schneiden. Sei C dieser Punkt. Aus A,B / ∈ -g folgt: C (cid:54)= A und C (cid:54)= B. Also liegt C zwischen A und B. Daraus folgt, +g folgt: C +(cid:54) += A und C +(cid:54) += B. Also liegt C zwischen A und B. Daraus folgt, dass A und B bzgl. g in verschiedenen Halbebenen liegen. c) Siehe Abbildung 4.21. x y -−5 −4 −3 −2 −1 0 1 2 3 4 5 6 -012345 +− +5 +− +4 +− +3 +− +2 +− +1 0 1 2 3 4 5 6 +0 +1 +2 +3 +4 +5 Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht. 81 4.3.HYPERBOLISCHEGEOMETRIE Definition 64 Es seien a,b,c,d ∈ -R mit ad −bc (cid:54)= 0 und σ : C +R mit ad +− +bc +(cid:54) += 0 und σ : C → C eine Abbildung definiert durch σ(z) := @@ -6775,19 +10317,30 @@ az+b cz+d σ heißt Möbiustransformation. Proposition 4.9 -a) Die Gruppe SL 2(R) operiert auf H durch die Möbiustransformation +a) Die Gruppe SL +2 +(R) operiert auf H durch die Möbiustransformation σ(z) := (cid:18) a b -c -d(cid:19) -◦z := +c d +(cid:19) +◦ +z := az+b cz+d -b) Die Gruppe PSL 2(R) = SL 2(R)/ +b) Die Gruppe PSL +2 +(R) = SL +2 +(R)/ (±I) operiert durch σ auf H. -c) PSL 2(R) operiert auf R ∪{∞}. Diese Gruppenoperation ist 3-fach transitiv, d. h. +c) PSL +2 +(R) operiert auf R +∪{∞} +. Diese Gruppenoperation ist 3-fach transitiv, d. h. zu x 0 < x @@ -6797,44 +10350,62 @@ zu x ∈ R gibt es genau ein σ ∈ -PSL 2(R) mit σ(x 0) = 0, σ(x 1) = 1, -σ(x ∞) = ∞. -d) SL 2(R) wird von den Matrizen +PSL +2 +(R) mit σ(x +0 +) = 0, σ(x +1 +) = 1, +σ(x +∞ +) = +∞ +. +d) SL +2 +(R) wird von den Matrizen (cid:18) λ 0 -0 -λ−1(cid:19) +0 λ−1 +(cid:19) (cid:124) (cid:123)(cid:122) (cid:125) =:A λ -,(cid:18) +, +(cid:18) 1 t -0 -1(cid:19) +0 1 +(cid:19) (cid:124) (cid:123)(cid:122) (cid:125) =:Bt und (cid:18) 0 1 -−1 -0(cid:19) +− +1 0 +(cid:19) (cid:124) (cid:123)(cid:122) (cid:125) =:C mit t,λ ∈ R× erzeugt. -e) PSL 2(R) operiert auf G. +e) PSL +2 +(R) operiert auf G. Beweis: a) Sei z = x+iy ∈ H, d. h. y > 0 und σ = (cid:18) a b -c -d(cid:19) +c d +(cid:19) ∈ -SL 2(R) +SL +2 +(R) ⇒ σ(z) = a(x+iy)+b @@ -6842,18 +10413,27 @@ c(x+iy)+d = (ax+b)+iay (cx+d)+icy · -(cx+d) −icy -(cx+d) −icy +(cx+d) +− +icy +(cx+d) +− +icy = (ax+b)(cx+d)+aycy (cx+d)2+(cy)2 -+iay(cx+d) −(ax+b)cy ++i +ay(cx+d) +− +(ax+b)cy (cx+d)2+(cy)2 = axcx+axd+bcx+bd+aycy (cx+d)2+(cy)2 +i -(ad −bc)y +(ad +− +bc)y (cx+d)2+(cy)2 SL2(R) = @@ -6862,31 +10442,37 @@ ac(x2+y2)+adx+bcx+bd +i y (cx+d)2+(cy)2 -⇒ -(cid:61)(σ(z)) = +⇒ (cid:61) +(σ(z)) = y (cx+d)2+(cy)2 > 0 Die Abbildung bildet also nach H ab. Außerdem gilt: (cid:18) 1 0 -0 -1(cid:19) -◦z = +0 1 +(cid:19) +◦ +z = x+iy 1 = x+iy = z 82 4.3.HYPERBOLISCHEGEOMETRIE und (cid:18) a b -c -d(cid:19) ◦(cid:18)(cid:18) a(cid:48) b(cid:48) -c(cid:48) -d(cid:48)(cid:19) ◦z(cid:19) +c d +(cid:19) +◦ +(cid:18)(cid:18) a(cid:48) b(cid:48) +c(cid:48) d(cid:48) +(cid:19) +◦ +z +(cid:19) = (cid:18) a b -c -d(cid:19) +c d +(cid:19) ◦ a(cid:48)z+b(cid:48) c(cid:48)z+d(cid:48) @@ -6910,53 +10496,85 @@ c(a(cid:48)z+b(cid:48))+d(c(cid:48)z+d(cid:48)) (ca(cid:48)+db(cid:48))z+cb(cid:48)+dd(cid:48) = (cid:18) aa(cid:48)+bc(cid:48) ab(cid:48)+bd(cid:48) -ca(cid:48)+db(cid:48) -cb(cid:48)+dd(cid:48)(cid:19) -◦z +ca(cid:48)+db(cid:48) cb(cid:48)+dd(cid:48) +(cid:19) +◦ +z = (cid:18)(cid:18) a b -c -d(cid:19) ·(cid:18) a(cid:48) b(cid:48) -c(cid:48) -d(cid:48)(cid:19)(cid:19) -◦z -b) Es gilt σ(z) = ( −σ)(z) für alle σ +c d +(cid:19) +· +(cid:18) a(cid:48) b(cid:48) +c(cid:48) d(cid:48) +(cid:19)(cid:19) +◦ +z +b) Es gilt σ(z) = ( +− +σ)(z) für alle σ ∈ -SL 2(R) und z +SL +2 +(R) und z ∈ H. c) Ansatz: σ = (cid:18) a b -c -d(cid:19) -σ(x 0) = ax0+b +c d +(cid:19) +σ(x +0 +) = ax0+b cx0+d ! = 0 ⇒ -ax 0+b = 0 +ax +0 ++b = 0 ⇒ -b = −ax +b = +− +ax 0 -σ(x ∞) = +σ(x +∞ +) = ∞ ⇒ -cx ∞+d = 0 +cx +∞ ++d = 0 ⇒ -d = −cx +d = +− +cx ∞ -σ(x 1) = 1 +σ(x +1 +) = 1 ⇒ -ax 1+b = cx 1+d -a(x +ax 1 -−x 0) = c(x ++b = cx 1 -−x ∞) -⇒ ++d +a(x +1 − +x +0 +) = c(x +1 − +x +∞ +) +⇒ c = a x1−x0 x1−x∞ -⇒ -−a2 ·x +⇒ − +a2 +· +x ∞ x1−x0 x1−x∞ @@ -6966,9 +10584,13 @@ x1−x0 x1−x∞ = 1 ⇒ -a2 x1−x0 x0−x∞(x -0 -−x ∞) = 1 +a2 x1−x0 +x0−x∞ +(x +0 − +x +∞ +) = 1 ⇒ a2 = x1−x∞ (x1−x∞)(x1−x0) @@ -6983,113 +10605,169 @@ t = B −t C−1 = C3 -Daher genügt es zu zeigen, dass man mit A λ, B +Daher genügt es zu zeigen, dass man mit A +λ +, B t -und C alle Matrizen aus SL 2(R) +und C alle Matrizen aus SL +2 +(R) erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit -Matrizen der Form A λ, B +Matrizen der Form A +λ +, B t und C die Einheitsmatrix zu generieren. Sei also M = (cid:18) a b -c -d(cid:19) +c d +(cid:19) ∈ -SL 2(R) +SL +2 +(R) beliebig. Fall 1: a = 0 Da M ∈ -SL 2(R) ist, gilt detM = 1 = ad −bc = −bc. Daher ist insbesondere c (cid:54)= 0. Es +SL +2 +(R) ist, gilt detM = 1 = ad +− +bc = +− +bc. Daher ist insbesondere c +(cid:54) += 0. Es folgt: (cid:18) 0 1 -−1 -0(cid:19) ·(cid:18) +− +1 0 +(cid:19) +· +(cid:18) a b -c -d(cid:19) +c d +(cid:19) = (cid:18) c d -−a -−b(cid:19) +− +a +− +b +(cid:19) 83 4.3.HYPERBOLISCHEGEOMETRIE Gehe zu Fall 2. -Fall 2: a (cid:54)= 0 -Nun wird in M durch M ·A +Fall 2: a +(cid:54) += 0 +Nun wird in M durch M +· +A 1 a an der Stelle von a eine 1 erzeugt: (cid:18) a b -c -d(cid:19) ·(cid:18)1 +c d +(cid:19) +· +(cid:18)1 a 0 -0 -a(cid:19) +0 a +(cid:19) = (cid:18) 1 ab c a -ad(cid:19) +ad +(cid:19) Gehe zu Fall 3. Fall 3: a = 1 (cid:18) 1 b -c -d(cid:19) ·(cid:18) -1 −b +c d +(cid:19) +· +(cid:18) +1 +− +b 0 1 (cid:19) = (cid:18) 1 0 c d -−bc(cid:19) -Da wir detM = 1 = ad −bc = d −bc wissen, gilt sogar M +− +bc +(cid:19) +Da wir detM = 1 = ad +− +bc = d +− +bc wissen, gilt sogar M 2,2 = 1. Gehe zu Fall 4. Fall 4: a = 1, b = 0, d = 1 -A −1CB -cC(cid:18) -1 0 +A +−1 +CB c -1(cid:19) +C +(cid:18) +1 0 +c 1 +(cid:19) = (cid:18) 1 0 -0 -1(cid:19) -Daher erzeugen Matrizen der Form A λ, B +0 1 +(cid:19) +Daher erzeugen Matrizen der Form A +λ +, B t -und C die Gruppe SL 2R. (cid:4) +und C die Gruppe SL +2 +R. (cid:4) e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen. • σ = (cid:18) λ 0 -0 -λ−1(cid:19) +0 λ−1 +(cid:19) , also σ(z) = λ2z. Daraus ergeben sich die Situationen, die in Abbildung 4.22a und Abbildung 4.22b dargestellt sind. x y -−1 0 1 2 3 4 5 6 7 -0123 +− +1 0 1 2 3 4 5 6 7 +0 +1 +2 +3 m λ2m -m+irλ2m+iλ2r +m+ir +λ2m+iλ2r m+1 (a) Fall 1 x y -−1 0 1 2 3 4 -0123 +− +1 0 1 2 3 4 +0 +1 +2 +3 z x λ2z @@ -7100,48 +10778,85 @@ Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix Offensichtlich gilt die Aussage für σ = (cid:18) 1 a -0 -1(cid:19) +0 1 +(cid:19) • Sei nun σ = (cid:18) 0 1 -−1 -0(cid:19) -, also σ(z) = −1 +− +1 0 +(cid:19) +, also σ(z) = +− +1 z Bemerkung 69 -Zu hyperbolischen Geraden g 1,g +Zu hyperbolischen Geraden g +1 +,g 2 gibt es σ ∈ -PSL 2(R) mit σ(g 1) = g 2. +PSL +2 +(R) mit σ(g +1 +) = g +2 +. 84 4.3.HYPERBOLISCHEGEOMETRIE · x y -−1 0 1 -01 -z = r ·eiϕ +− +1 0 1 +0 +1 +z = r +· +eiϕ 1 z = 1 -r -·eiϕ +r · +eiϕ Abbildung 4.23: Inversion am Kreis -Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a 1) = b +Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a +1 +) = b 1 -und σ(a 2) = b 2. Dann existiert -σ(g 1) := g +und σ(a +2 +) = b +2 +. Dann existiert +σ(g +1 +) := g 2 wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt. Definition 65 -Seien z 1,z 2,z 3,z +Seien z +1 +,z +2 +,z +3 +,z 4 ∈ C paarweise verschieden. Dann heißt -DV(z 1,z 2,z 3,z 4) := +DV(z +1 +,z +2 +,z +3 +,z +4 +) := z1−z4 z1−z2 z3−z4 @@ -7149,23 +10864,63 @@ z3−z2 = (z 1 -−z 4) ·(z +− +z +4 +) +· +(z 3 -−z 2) +− +z +2 +) (z +1 − +z +2 +) +· +(z +3 − +z +4 +) +Doppelverhältnis von z 1 -−z 2) ·(z -3 -−z 4) -Doppelverhältnis von z 1,...,z 4. +,...,z +4 +. Bemerkung 70 (Eigenschaften des Doppelverhältnisses) -a) DV(z 1,...,z 4) +a) DV(z +1 +,...,z +4 +) ∈ -C \{0,1 +C +\{ +0,1 } -b) DV(z 1,z 4,z 3,z 2) = 1 +b) DV(z +1 +,z +4 +,z +3 +,z +2 +) = 1 DV(z1,z2,z3,z4) -c) DV(z 3,z 2,z 1,z 4) = 1 +c) DV(z +3 +,z +2 +,z +1 +,z +4 +) = 1 DV(z1,z2,z3,z4) d) DV ist auch wohldefiniert, wenn eines der z i @@ -7174,84 +10929,218 @@ i oder wenn zwei der z i gleich sind. -e) DV(0,1, ∞,z 4) = z +e) DV(0,1, +∞ +,z +4 +) = z 4 (Der Fall z 4 -∈ -{0,1, +∈ { +0,1, ∞} ist zugelassen). f) Für σ ∈ -PSL 2(C) und z 1,...,z +PSL +2 +(C) und z +1 +,...,z 4 ∈ C ∪{∞} ist -DV(σ(z 1),σ(z 2),σ(z 3),σ(z 4)) = DV(z 1,z 2,z 3,z 4) +DV(σ(z +1 +),σ(z +2 +),σ(z +3 +),σ(z +4 +)) = DV(z +1 +,z +2 +,z +3 +,z +4 +) und für σ(z) = 1 z gilt -DV(σ(z 1),σ(z 2),σ(z 3),σ(z 4)) = DV(z 1,z 2,z 3,z 4) -g) DV(z 1,z 2,z 3,z 4) +DV(σ(z +1 +),σ(z +2 +),σ(z +3 +),σ(z +4 +)) = DV(z +1 +,z +2 +,z +3 +,z +4 +) +g) DV(z +1 +,z +2 +,z +3 +,z +4 +) ∈ R ∪{∞} ⇔ -z 1,...,z +z +1 +,...,z 4 liegen auf einer hyperbolischen Geraden. Beweis: -a) DV(z 1,...,z 4) (cid:54)= 0, da z +a) DV(z +1 +,...,z +4 +) +(cid:54) += 0, da z i paarweise verschieden -DV(z 1,...,z 4) (cid:54)= 1, da: -Annahme: DV(z 1,...,z 4) = 1 +DV(z +1 +,...,z +4 +) +(cid:54) += 1, da: +Annahme: DV(z +1 +,...,z +4 +) = 1 ⇔ (z 1 -−z 2)(z +− +z +2 +)(z 3 -−z 4) = (z +− +z +4 +) = (z 1 -−z 4)(z +− +z +4 +)(z 3 -−z 2) +− +z +2 +) 85 4.3.HYPERBOLISCHEGEOMETRIE ⇔ -z 1z +z +1 +z 3 -−z 2z +− +z +2 +z 3 -−z 1z 4+z 2z +− +z +1 +z +4 ++z +2 +z 4 -= z 1z += z +1 +z +3 +− +z 3 -−z 3z +z 4 -−z 1z 2+z 2z +− +z +1 +z +2 ++z +2 +z 4 ⇔ -z 2z 3+z 1z +z +2 +z +3 ++z +1 +z 4 -= z 3z 4+z 1z += z +3 +z +4 ++z +1 +z 2 ⇔ -z 2z +z +2 +z +3 +− +z 3 -−z 3z +z 4 -= z 1z += z +1 +z 2 -−z 1z +− +z +1 +z 4 ⇔ -z 3(z +z +3 +(z 2 -−z 4) = z 1(z +− +z +4 +) = z +1 +(z 2 -−z 4) +− +z +4 +) ⇔ z 3 @@ -7266,11 +11155,27 @@ i sind paarweise verschieden ⇒ Widerspruch (cid:4) -b) DV(z 1,z 4,z 3,z 2) = (z1−z2)·(z3−z4) +b) DV(z +1 +,z +4 +,z +3 +,z +2 +) = (z1−z2)·(z3−z4) (z1−z4)·(z3−z2) = 1 DV(z1,z2,z3,z4) -c) DV(z 3,z 2,z 1,z 4) = (z3−z4)·(z1−z2) +c) DV(z +3 +,z +2 +,z +1 +,z +4 +) = (z3−z4)·(z1−z2) (z3−z2)·(z1−z4) = 1 DV(z1,z2,z3,z4) @@ -7285,7 +11190,11 @@ oder z 3 = z 2 -In diesem Fall ist DV(z 1,...,z 4) = 0 +In diesem Fall ist DV(z +1 +,...,z +4 +) = 0 Fall 2 z 1 = z @@ -7294,7 +11203,11 @@ oder z 3 = z 4 -Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z 1,...,z 4) = +Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z +1 +,...,z +4 +) = ∞ gilt. Fall 3 z @@ -7305,14 +11218,30 @@ oder z 2 = z 4 -Durch Einsetzen ergibt sich DV(z 1,...,z 4) = 1. +Durch Einsetzen ergibt sich DV(z +1 +,...,z +4 +) = 1. Im Fall, dass ein z i = ∞ -ist, ist entweder DV(0,1, ∞,z 4) = 0 oder DV(0,1, ∞,z 4) +ist, ist entweder DV(0,1, +∞ +,z +4 +) = 0 oder DV(0,1, +∞ +,z +4 +) ±∞ -e) DV(0,1, ∞,z 4) = +e) DV(0,1, +∞ +,z +4 +) = (0−z4)·(∞−1) (0−1)·(∞−z4) = @@ -7323,24 +11252,52 @@ z4·(∞−1) f) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. g) Sei σ ∈ -PSL 2(C) mit σ(z 1) = 0, σ(z 2) = 1, σ(z 3) = ∞. Ein solches σ existiert, da man +PSL +2 +(C) mit σ(z +1 +) = 0, σ(z +2 +) = 1, σ(z +3 +) = +∞ +. Ein solches σ existiert, da man drei Parameter von σ wählen darf. Bem. 70.f ⇒ -DV(z 1,...,z 4) = DV(0,1, ∞,σ(z 4)) +DV(z +1 +,...,z +4 +) = DV(0,1, +∞ +,σ(z +4 +)) ⇒ -DV(z 1,...,z 4) +DV(z +1 +,...,z +4 +) ∈ R ∪{∞} ⇔ -σ(z 4) +σ(z +4 +) ∈ R ∪{∞} -Behauptung folgt, weil σ−1(R ∪∞) ein Kreis oder eine Gerade in C ist. +Behauptung folgt, weil σ−1(R +∪∞ +) ein Kreis oder eine Gerade in C ist. Definition 66 -Für z 1,z +Für z +1 +,z 2 ∈ H sei g @@ -7349,16 +11306,36 @@ die eindeutige hyperbolische Gerade durch z 1 und z 2 -und a 1,a +und a +1 +,a 2 die „Schnittpunkte“ von g z1,z2 -mit R ∪{∞}. -Dann sei dH(z 1,z 2) := 1 2|lnDV(a 1,z 1,a 2,z 2) +mit R +∪{∞} +. +Dann sei dH(z +1 +,z +2 +) := 1 +2| +lnDV(a +1 +,z +1 +,a +2 +,z +2 +) | und heiße hyperbolische Metrik. -Beh.: Für z 1,z +Beh.: Für z +1 +,z 2 ∈ H sei g @@ -7367,44 +11344,120 @@ die eindeutige hyperbolische Gerade durch z 1 und z 2 -und a 1,a +und a +1 +,a 2 die „Schnittpunkte“ von g z1,z2 -mit R ∪{∞}. +mit R +∪{∞} +. Dann gilt: 1 -2|lnDV(a 1,z 1,a 2,z 2) +2| +lnDV(a +1 +,z +1 +,a +2 +,z +2 +) | = 1 -2|lnDV(a 2,z 1,a 1,z 2) +2| +lnDV(a +2 +,z +1 +,a +1 +,z +2 +) | Beweis: Wegen Bemerkung 70.c gilt: -DV(a 1,z 1,a 2,z 2) = +DV(a +1 +,z +1 +,a +2 +,z +2 +) = +1 +DV(a +2 +,z +1 +,a 1 -DV(a 2,z 1,a 1,z 2) +,z +2 +) Außerdem gilt: ln 1 x -= lnx−1 = ( −1) ·lnx = −lnx += lnx−1 = ( +− +1) +· +lnx = +− +lnx 86 4.3.HYPERBOLISCHEGEOMETRIE Da der ln im Betrag steht, folgt direkt: 1 -2|lnDV(a 1,z 1,a 2,z 2) +2| +lnDV(a +1 +,z +1 +,a +2 +,z +2 +) | = 1 -2|lnDV(a 2,z 1,a 1,z 2) +2| +lnDV(a +2 +,z +1 +,a +1 +,z +2 +) | Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x-Achse im Doppelver- hältnis genutzt werden. (cid:4) Beh.: Die hyperbolische Metrik ist eine Metrik auf H. Beweis: Wegen Bemerkung 70.f ist -d(z 1,z 2) := d(σ(z 1),σ(z 2)) mit σ(a 1) = 0, σ(a 2) = +d(z +1 +,z +2 +) := d(σ(z +1 +),σ(z +2 +)) mit σ(a +1 +) = 0, σ(a +2 +) = ∞ -d. h. σ(g z1,z2) = iR (imaginäre Achse). +d. h. σ(g +z1,z2 +) = iR (imaginäre Achse). also gilt o. B. d. A. z 1 = ia und z @@ -7414,46 +11467,102 @@ also gilt o. B. d. A. z R und a < b. 2d(ia,ib) = | -lnDV(0,ia, ∞,ib) +lnDV(0,ia, +∞ +,ib) | = | ln -(0 −ib)( ∞−ia) -(0 −ia)( ∞−ib) | +(0 +− +ib)( +∞− +ia) +(0 +− +ia)( +∞− +ib) | = | ln b a | -= lnb −lna -Also: d(z 1,z 2) += lnb +− +lna +Also: d(z +1 +,z +2 +) ≥ -0, d(z 1,z 2) = 0 +0, d(z +1 +,z +2 +) = 0 ⇔ z 1 = z 2 -2d(z 2,z 1) = +2d(z +2 +,z +1 +) = | -lnDV(a 2,z 2,a 1,z 1) +lnDV(a +2 +,z +2 +,a +1 +,z +1 +) | = | -lnDV( ∞,ib,0,ia) +lnDV( +∞ +,ib,0,ia) | Bem. 70.b = | -lnDV(0,ib, ∞,ia) +lnDV(0,ib, +∞ +,ia) | -= 2d(z 1,z 2) -Liegen drei Punkte z 1,z 2,z += 2d(z +1 +,z +2 +) +Liegen drei Punkte z +1 +,z +2 +,z 3 ∈ -C auf einer hyperbolischen Geraden, so gilt d(z 1,z 3) = -d(z 1,z 2)+d(z 2,z 3) (wenn z +C auf einer hyperbolischen Geraden, so gilt d(z +1 +,z +3 +) = +d(z +1 +,z +2 +)+d(z +2 +,z +3 +) (wenn z 2 zwischen z 1 @@ -7482,13 +11591,19 @@ Aufgabe 9 Sei (X,d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ X von -Punkten ist definiert durch d(P,Y) := infd(P,y) |y +Punkten ist definiert durch d(P,Y) := infd(P,y) +| +y ∈ Y. Zeigen Sie: -(a) Ist (cid:52)ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die +(a) Ist +(cid:52) +ABC ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die Winkel ∠ABC und ∠BCA gleich. -(b) Ist (cid:52)ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel +(b) Ist +(cid:52) +ABC ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel gegenüber und umgekehrt. (c) Sind g eine Gerade und P / ∈ @@ -7503,7 +11618,9 @@ Seien f,g,h G und paarweise verschieden. Zeigen Sie: f (cid:107) -g ∧g +g +∧ +g (cid:107) h ⇒ @@ -7523,47 +11640,99 @@ Sei γ : I = [a,b] → Rn eine Kurve. a) Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt: -(cid:107)γ(cid:48)(t) -(cid:107)2 -= 1 ∀t +(cid:107) +γ(cid:48)(t) +(cid:107) +2 += 1 +∀ +t ∈ I -Dabei ist γ(cid:48)(t) = (γ(cid:48) 1(t),γ(cid:48) 2(t),...,γ(cid:48) n(t)). +Dabei ist γ(cid:48)(t) = (γ(cid:48) +1 +(t),γ(cid:48) +2 +(t),...,γ(cid:48) +n +(t)). b) l(γ) = (cid:82)b -a -(cid:107)γ(cid:48)(t) (cid:107)dt heißt Länge von γ. +a (cid:107) +γ(cid:48)(t) +(cid:107) +dt heißt Länge von γ. Bemerkung 71 (Eigenschaften von Kurven I) Sei γ : I = [a,b] → Rn eine C∞-Funktion. -a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b −a. +a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ) = b +− +a. b) Ist γ durch Bogenlänge parametrisiert, so ist γ(cid:48)(t) orthogonal zu γ(cid:48)(cid:48)(t) für alle t ∈ I. Beweis: a) l(γ) = (cid:82)b +a (cid:107) +γ(cid:48)(t) +(cid:107) +dt = (cid:82)b a -(cid:107)γ(cid:48)(t) (cid:107)dt = (cid:82)b -a -1dt = b −a. +1dt = b +− +a. b) ImFolgendenwirddieAussagenurfürγ : [a,b] → R2 bewiesen.Allerdingsfunktioniert der Beweis im Rn analog. Es muss nur die Ableitung angepasst werden. -1 = (cid:107)γ(cid:48)(t) +1 = +(cid:107) +γ(cid:48)(t) +(cid:107) += +(cid:107) +γ(cid:48)(t) (cid:107) -= (cid:107)γ(cid:48)(t) (cid:107)2 = (cid:104)γ(cid:48)(t),γ(cid:48)(t) +2 = +(cid:104) +γ(cid:48)(t),γ(cid:48)(t) (cid:105) ⇒ 0 = d -dt(cid:104)γ(cid:48)(t),γ(cid:48)(t) +dt(cid:104) +γ(cid:48)(t),γ(cid:48)(t) (cid:105) = d -dt(γ(cid:48) 1(t)γ(cid:48) 1(t)+γ(cid:48) 2(t)γ(cid:48) 2(t)) -= 2 ·(γ(cid:48)(cid:48) 1(t) ·γ(cid:48) 1(t)+γ(cid:48)(cid:48) 2(t) ·γ(cid:48) 2(t)) -= 2 ·(cid:104)γ(cid:48)(cid:48)(t),γ(cid:48)(t) +dt +(γ(cid:48) +1 +(t)γ(cid:48) +1 +(t)+γ(cid:48) +2 +(t)γ(cid:48) +2 +(t)) += 2 +· +(γ(cid:48)(cid:48) +1 +(t) +· +γ(cid:48) +1 +(t)+γ(cid:48)(cid:48) +2 +(t) +· +γ(cid:48) +2 +(t)) += 2 +·(cid:104) +γ(cid:48)(cid:48)(t),γ(cid:48)(t) (cid:105) Definition 69 Sei γ : I @@ -7572,27 +11741,38 @@ R2 eine durch Bogenlänge parametrisierte Kurve. a) Für t ∈ I sei n(t) Normalenvektor an γ in t wenn gilt: -(cid:104)n(t),γ(cid:48)(t) +(cid:104) +n(t),γ(cid:48)(t) (cid:105) -= 0, (cid:107)n(t) += 0, +(cid:107) +n(t) (cid:107) = 1 und det((γ(cid:48)(t),n(t))) = +1 89 5.1.KRÜMMUNGVONKURVEN b) Seit κ : I → R so, dass gilt: -γ(cid:48)(cid:48)(t) = κ(t) ·n(t) +γ(cid:48)(cid:48)(t) = κ(t) +· +n(t) Dann heißt κ(t) Krümmung von γ in t. Da n(t) und γ(cid:48)(cid:48)(t) nach Bemerkung 71.b linear abhängig sind, existiert κ(t). Beispiel 45 Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt: γ(t) = (cid:18) -r ·cos +r +· +cos t -r,r ·sin +r +,r +· +sin t -r(cid:19) +r +(cid:19) für t ∈ [0,2πr] @@ -7602,104 +11782,184 @@ ist parametrisiert durch Bogenlänge, da gilt: (r · 1 -r)( −sin -t r),r1 +r +)( +− +sin +t +r +),r +1 r cos t -r(cid:19) +r +(cid:19) = (cid:18) -−sin +− +sin t -r,cos +r +,cos t -r(cid:19) +r +(cid:19) Der Normalenvektor von γ in t ist n(t) = (cid:18) -−cos +− +cos t -r, −sin +r +, +− +sin t -r(cid:19) +r +(cid:19) da gilt: -(cid:104)n(t),γ(cid:48)(t) +(cid:104) +n(t),γ(cid:48)(t) (cid:105) = -(cid:28)(cid:18) −cos t +(cid:28)(cid:18) +− +cos t r -−sin t -r(cid:19) ,(cid:18) −sin t +− +sin t +r +(cid:19) +, +(cid:18) +− +sin t r cos t r (cid:19)(cid:29) -= ( −cos += ( +− +cos t -r) ·( −sin +r +) +· +( +− +sin t -r)+( −sin +r +)+( +− +sin t -r) ·(cos +r +) +· +(cos t -r) +r +) = 0 -(cid:107)n(t) +(cid:107) +n(t) (cid:107) = (cid:13) (cid:13) -(cid:13) (cid:13)( −cos +(cid:13) +(cid:13) +( +− +cos t -r, −sin +r +, +− +sin t -r)(cid:13) +r +) (cid:13) (cid:13) (cid:13) -= ( −cos +(cid:13) += ( +− +cos t -r)2+( −sin +r +)2+( +− +sin t -r)2 +r +)2 = 1 -det(γ(cid:48) 1(t),n(t)) = (cid:13) (cid:13) (cid:13) (cid:13)(cid:18) −sin t r −cos t r +det(γ(cid:48) +1 +(t),n(t)) = (cid:13) (cid:13) (cid:13) +(cid:13) +(cid:18) − sin t r − cos t r cos t +r − +sin t r -−sin t -r(cid:19)(cid:13) (cid:13) (cid:13) +(cid:19)(cid:13) (cid:13) (cid:13) (cid:13) -= ( −sin += ( +− +sin t -r)2 −( −cos +r +)2 +− +( +− +cos t -r) ·cos +r +) +· +cos t r = 1 -Die Krümmung ist für jedes t konstant 1 r, da gilt: +Die Krümmung ist für jedes t konstant 1 +r +, da gilt: γ(cid:48)(cid:48)(t) = (cid:18) -−1 +− +1 r cos t -r, -−1 +r +, +− +1 r sin t -r(cid:19) +r +(cid:19) = 1 -r -·(cid:18) -−cos +r · +(cid:18) +− +cos t -r, −sin +r +, +− +sin t -r(cid:19) +r +(cid:19) ⇒ κ(t) = 1 @@ -7711,19 +11971,23 @@ Sei γ : I R3 eine durch Bogenlänge parametrisierte Kurve. a) Für t ∈ -I heißt κ(t) := (cid:107)γ(cid:48)(cid:48)(t) +I heißt κ(t) := +(cid:107) +γ(cid:48)(cid:48)(t) (cid:107) die Krümmung von γ in t. b) Ist für t ∈ -I die Ableitung γ(cid:48)(cid:48)(t) (cid:54)= 0, so heißt γ(cid:48)(cid:48)(t) +I die Ableitung γ(cid:48)(cid:48)(t) +(cid:54) += 0, so heißt γ(cid:48)(cid:48)(t) (cid:107)γ(cid:48)(cid:48)(t)(cid:107) Normalenvektor an γ in t. c) b(t)seieinVektor,derγ(cid:48)(t),n(t)zueinerorientiertenOrthonormalbasisvonR3 ergänzt. Also gilt: det(γ(cid:48)(t),n(t),b(t)) = 1 b(t) heißt Binormalenvektor, die Orthonormalbasis -(cid:8) γ(cid:48)(t),n(t),b(t)(cid:9) +(cid:8) γ(cid:48)(t),n(t),b(t) (cid:9) heißt begleitendes Dreibein. Bemerkung 72 (Eigenschaften von Kurven II) Sei γ : I @@ -7736,7 +12000,7 @@ Erinnerung Sie sich an Definition 32 „reguläre Fläche“. Äquivalent dazu ist: S ist lokal von der Form V(f) = (cid:8) x ∈ -R3 (cid:12) (cid:12) f(x) = 0(cid:9) +R3 (cid:12) (cid:12) f(x) = 0 (cid:9) für eine C∞-Funktion f : R3 → R. @@ -7747,7 +12011,9 @@ R3 eine reguläre Fläche, s ∈ S, F : U → -V ∩S eine lokale Parametrisierung um +V +∩ +S eine lokale Parametrisierung um s ∈ V: @@ -7757,32 +12023,66 @@ V: Für p = F−1(s) ∈ U sei -J F(p) = - ∂x ∂u(p) ∂x ∂v(p) -∂y ∂u(p) ∂y ∂v(p) -∂z ∂u(p) ∂z -∂v(p) +J F (p) = + + +∂x +∂u +(p) ∂x +∂v +(p) +∂y ∂u (p) ∂y ∂v (p) +∂z +∂u +(p) ∂z +∂v +(p) +  -und D pF : R2 +und D +p +F : R2 → -R3 die durch J F(p) definierte lineare Abbildung. -Dann heißt T sS := Bild(D pF) die Tangentialebene an s +R3 die durch J +F +(p) definierte lineare Abbildung. +Dann heißt T +s +S := Bild(D +p +F) die Tangentialebene an s ∈ S. Bemerkung 73 (Eigenschaften der Tangentialebene) -a) T sS ist 2-dimensionaler Untervektorraum von R3. -b) T sS = (cid:104)˜ u,˜ v (cid:105), wobei ˜ u,˜ v die Spaltenvektoren der Jacobi-Matrix J F(p) sind. -c) T sS hängt nicht von der gewählten Parametrisierung ab. +a) T +s +S ist 2-dimensionaler Untervektorraum von R3. +b) T +s +S = +(cid:104) +˜ u,˜ v +(cid:105) +, wobei ˜ u,˜ v die Spaltenvektoren der Jacobi-Matrix J +F +(p) sind. +c) T +s +S hängt nicht von der gewählten Parametrisierung ab. 91 5.2.TANGENTIALEBENE d) Sei S = V(f) eine reguläre Fläche in R3, also f : V → R eine C∞-Funktion, V ⊆ R3 -offen, grad(f)(x) (cid:54)= 0 für alle x +offen, grad(f)(x) +(cid:54) += 0 für alle x ∈ S. -Dann ist T sS = (grad(f)(s))⊥ für jedes s +Dann ist T +s +S = (grad(f)(s))⊥ für jedes s ∈ S. Beweis: @@ -7794,11 +12094,23 @@ ist eine 3 × 1-Vektor multipliziert wird. Das ist eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein -Vektorraum ist. Da Rg(J F) = 2, ist auch dim(T sS) = 2. +Vektorraum ist. Da Rg(J +F +) = 2, ist auch dim(T +s +S) = 2. b) Hier kann man wie in Punkt a) argumentieren -c) T sS = {x +c) T +s +S = +{ +x ∈ -R3 |∃parametrisierte Kurve γ : [ −ε,+ε] +R3 +|∃ +parametrisierte Kurve γ : [ +− +ε,+ε] → S für ein ε > 0 mit γ(0) = s und γ(cid:48)(0) = x @@ -7806,24 +12118,40 @@ s und γ(cid:48)(0) = x Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. d) Sei x ∈ -T sS,γ : [ −ε,+ε] +T +s +S,γ : [ +− +ε,+ε] → S eine parametrisierte Kurve mit ε > 0 und γ(cid:48)(0) = s, sodass γ(cid:48)(0) = x gilt. Da γ(t) ∈ S für alle t ∈ -[ −ε,ε], ist f ◦γ = 0 +[ +− +ε,ε], ist f +◦ +γ = 0 ⇒ -0 = (f ◦γ)(cid:48)(0) = (cid:104)grad(f)(γ(0)),γ(cid:48)(0) +0 = (f +◦ +γ)(cid:48)(0) = +(cid:104) +grad(f)(γ(0)),γ(cid:48)(0) (cid:105) ⇒ -T sS +T +s +S ⊆ grad(f)(s)⊥ dim=2 ==== ⇒ -T sS = (grad(f)(s))⊥ +T +s +S = (grad(f)(s))⊥ Definition 72 a) Ein Normalenfeld auf der regulären Fläche S ⊆ @@ -7833,7 +12161,9 @@ S2 ⊆ R3 mit n(s) ∈ -T sS⊥ für jedes s +T +s +S⊥ für jedes s ∈ S. b) S heißt orientierbar, wenn es ein stetiges Normalenfeld auf S gibt. @@ -7848,14 +12178,18 @@ S gibt es eine Umgebung V R3 von s und eine lokale Parametrisierung F : U → -V von S um s, sodass auf F(U) = V ∩S ein stetiges Normalenfeld existiert. +V von S um s, sodass auf F(U) = V +∩ +S ein stetiges Normalenfeld existiert. c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas von S aus lokalen Parametrisierungen F i : U i → -V i, i +V +i +, i ∈ I gibt, sodass für alle i,j ∈ @@ -7864,16 +12198,18 @@ s ∈ V i -∩V +∩ +V j -∩S gilt: +∩ +S gilt: det(D s Vi→Vj (cid:122) (cid:125)(cid:124) (cid:123) F -j -◦F−1 +j ◦ +F−1 i (cid:124) (cid:123)(cid:122) (cid:125) ∈R3×3 @@ -7887,7 +12223,9 @@ S2 ist ein stetiges Normalenfeld. Auch n 2 -= −id += +− +id S2 ist ein stetiges Normalenfeld. 2) S = Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Norma- @@ -7900,34 +12238,62 @@ Sei S eine reguläre Fläche, s ∈ S, n(s) ist ein Normalenvektor in s, x ∈ -T sS, (cid:107)x +T +s +S, +(cid:107) +x (cid:107) = 1. Sei E der von x und n(s) aufgespannte 2-dimensionale Untervektorraum von R3. Dann gibt es eine Umgebung V ⊆ R3 von s, sodass -C := (s+E) ∩S ∩V -das Bild einer durch Bogenlänge parametrisierten Kurve γ : [ −ε,ε] +C := (s+E) +∩ +S +∩ +V +das Bild einer durch Bogenlänge parametrisierten Kurve γ : [ +− +ε,ε] → S enthält mit γ(0) = s und γ(cid:48)(0) = x. Beweis: „Satz über implizite Funktionen“1 Definition 73 -In der Situation aus Bemerkung 75 heißt die Krümmung κ γ(0) der Kurve γ in der Ebene +In der Situation aus Bemerkung 75 heißt die Krümmung κ +γ +(0) der Kurve γ in der Ebene (s+E) im Punkt s die Normalkrümmung von S in s in Richtung x = γ(cid:48)(0). -Man schreibt: κ Nor(s,x) := κ γ(0) +Man schreibt: κ +Nor +(s,x) := κ +γ +(0) Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt. Beispiel 47 (Gauß-Krümmung) -1) S = S2 = V(X2+Y2+Z2 −1) ist die Kugel um den Ursprung mit Radius 1, n = id, +1) S = S2 = V(X2+Y2+Z2 +− +1) ist die Kugel um den Ursprung mit Radius 1, n = id, s = (0,0,1), x = (1,0,0) ⇒ -E = R ·x+R ·n(s) (x,z-Ebene) -C = E ∩S ist Kreislinie -κ Nor(s,x) = 1 +E = R +· +x+R +· +n(s) (x,z-Ebene) +C = E +∩ +S ist Kreislinie +κ +Nor +(s,x) = 1 r = 1 -2) S = V(X2+Z2 −1) +2) S = V(X2+Z2 +− +1) ⊆ R3 ist ein Zylinder (siehe Abbildung 5.2a). s = (1,0,0) x @@ -7936,56 +12302,110 @@ x ⇒ E 1 -= R ·e 1+R ·e += R +· +e +1 ++R +· +e 2 (x,y-Ebene) -S ∩E +S +∩ +E 1 -= V(X2+Y2 −1) ∩E, Kreislinie in E += V(X2+Y2 +− +1) +∩ +E, Kreislinie in E ⇒ -κ Nor(s,x 1) = ±1 +κ +Nor +(s,x +1 +) = +± +1 x 2 = (0,0,1),E 2 -= R ·e 1+R ·e += R +· +e +1 ++R +· +e 3 (x,z-Ebene) 1Siehe z. B. https://github.com/MartinThoma/LaTeX-examples/tree/master/documents/Analysis%20II 93 5.3.GAUSS-KRÜMMUNG -V ∩E 2 ∩S = (cid:8) (1,0,z) +V +∩ +E 2 +∩ +S = (cid:8) (1,0,z) ∈ R3 (cid:12) (cid:12) z ∈ R(cid:9) ist eine Gerade ⇒ -κ Nor(s,x 2) = 0 -3) S = V(X2 −Y2 −Z), s = (0,0,0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b) +κ +Nor +(s,x +2 +) = 0 +3) S = V(X2 +− +Y2 +− +Z), s = (0,0,0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b) x 1 = (1,0,0), n(s) = (0,0,1) x 2 = (0,1,0) -κ Nor(s,x 1) = 2 -κ Nor(s,x 2) = −2 -−1.5 −1 −0.5 0 0.5 1 1.5 −1 +κ +Nor +(s,x +1 +) = 2 +κ +Nor +(s,x +2 +) = +− +2 +− 1.5 − 1 − 0.5 0 0.5 1 1.5 − 1 +0 +1 0 1 -012345 +2 +3 +4 +5 x y z (a) S =V(X2+Z2−1) -−2 −1.5 −1 −0.5 0 0.5 1 1.5 2 −2 −1 +− 2 − 1.5 − 1 − 0.5 0 0.5 1 1.5 2 − 2 − 1 0 1 2 -−2 -02 +− 2 +0 +2 x y z -−4 −2 -024 +− 4 − 2 +0 +2 +4 f(x,y) (b) S =V(X2−Y2−Z) Abbildung 5.2: Beispiele für reguläre Flächen @@ -7995,83 +12415,180 @@ Sei S R3 eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S. -γ : [ −ε,ε] +γ : [ +− +ε,ε] → S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ(0) = s und -γ(cid:48)(cid:48)(0) (cid:54)= 0. +γ(cid:48)(cid:48)(0) +(cid:54) += 0. Sei n(0) := γ(cid:48)(cid:48)(0) -(cid:107)γ(cid:48)(cid:48)(0)(cid:107). Zerlege +(cid:107)γ(cid:48)(cid:48)(0)(cid:107) +. Zerlege n(0) = n(0)t+n(0)⊥ mit n(0)t ∈ -T sS und n(0)⊥ +T +s +S und n(0)⊥ ∈ -(T sS)⊥ -Dann ist n(0)⊥ = (cid:104)n(0),n(s) (cid:105)·n(s) -κ Nor(s,γ) := (cid:104)γ(cid:48)(cid:48)(0),n(s) +(T +s +S)⊥ +Dann ist n(0)⊥ = +(cid:104) +n(0),n(s) +(cid:105)· +n(s) +κ +Nor +(s,γ) := +(cid:104) +γ(cid:48)(cid:48)(0),n(s) (cid:105) die Normalkrümmung. Bemerkung 76 -Sei γ(t) = γ( −t), t +Sei γ(t) = γ( +− +t), t ∈ -[ −ε,ε]. Dann ist κ Nor(s,γ) = κ Nor(s,γ). -Beweis: γ(cid:48)(cid:48)(0) = γ(cid:48)(cid:48)(0), da γ(cid:48)(0) = −γ(cid:48)(0). -Es gilt: κ Nor(s,γ) hängt nur von |γ(cid:48)(0) +[ +− +ε,ε]. Dann ist κ +Nor +(s,γ) = κ +Nor +(s,γ). +Beweis: γ(cid:48)(cid:48)(0) = γ(cid:48)(cid:48)(0), da γ(cid:48)(0) = +− +γ(cid:48)(0). +Es gilt: κ +Nor +(s,γ) hängt nur von +| +γ(cid:48)(0) | -ab und ist gleich κ Nor(s,γ(cid:48)(0)). +ab und ist gleich κ +Nor +(s,γ(cid:48)(0)). Bemerkung 77 Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s. -Sei T1 sS = {x +Sei T1 s S = +{ +x ∈ -T sS -| -(cid:107)x +T s S +| (cid:107) +x (cid:107) = 1 } -∼= S1. Dann ist -κn Nor(s) : T1 sS +∼ = S1. Dann ist +κn +Nor +(s) : T1 +s +S → R, x (cid:55)→ -κ Nor(s,x) -eine glatte Funktion und Bildκn Nor(s) ist ein abgeschlossenes Intervall. +κ +Nor +(s,x) +eine glatte Funktion und Bildκn +Nor +(s) ist ein abgeschlossenes Intervall. Definition 75 Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s. 94 5.3.GAUSS-KRÜMMUNG -a) κn 1(s) : = min(cid:8) κn Nor(s,x) (cid:12) (cid:12) x +a) κn +1 +(s) : = min (cid:8) κn +Nor +(s,x) (cid:12) (cid:12) x ∈ -T1 sS (cid:9) und -κn 2(s) : = max(cid:8) κn Nor(s,x) (cid:12) (cid:12) x +T1 +s +S (cid:9) und +κn +2 +(s) : = max (cid:8) κn +Nor +(s,x) (cid:12) (cid:12) x ∈ -T1 sS (cid:9) +T1 +s +S (cid:9) heißen Hauptkrümmungen von S in s. -b) K(s) := κn 1(s) ·κn 2(s) heißt Gauß-Krümmung von S in s. +b) K(s) := κn +1 +(s) +· +κn +2 +(s) heißt Gauß-Krümmung von S in s. Bemerkung 78 -Ersetzt man n durch −n, so gilt: -κ−n Nor(s,x) = −κn Nor(x) ∀x +Ersetzt man n durch +− +n, so gilt: +κ−n +Nor +(s,x) = +− +κn +Nor +(x) +∀ +x ∈ -T1 sS +T1 +s +S ⇒ κ−n 1 -(s) = −κn 2(s) +(s) = +− +κn +2 +(s) κ−n 2 -(s) = −κn 1(s) +(s) = +− +κn +1 +(s) und K−n(s) = Kn(s) =: K(s) Beispiel 48 -1) S = S2. Dann ist κ 1(s) = κ 2(s) = ±1 ∀s +1) S = S2. Dann ist κ +1 +(s) = κ +2 +(s) = +± +1 +∀ +s ∈ S2 ⇒ K(s) = 1 2) Zylinder: -κ 1(s) = 0,κ 2(s) = 1 +κ +1 +(s) = 0,κ +2 +(s) = 1 ⇒ K(s) = 0 3) Sattelpunkt auf hyperbolischem Paraboloid: -κ 1(s) < 0,κ 2(s) = 0 +κ +1 +(s) < 0,κ +2 +(s) = 0 → K(s) < 0 4) S = Torus. Siehe Abbildung 5.3 @@ -8081,20 +12598,32 @@ s 2 s 3 -Abbildung 5.3: K(s 1) > 0, K(s 2) = 0, K(s 3) < 0 +Abbildung 5.3: K(s +1 +) > 0, K(s +2 +) = 0, K(s +3 +) < 0 Bemerkung 79 Sei S eine reguläre Fläche, s ∈ S ein Punkt. 95 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM -a) Ist K(s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von T sS +s. -b) Ist K(s) < 0, so schneidet jede Umgebung von s in S beide Seiten von T sS +s. +a) Ist K(s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von T +s +S +s. +b) Ist K(s) < 0, so schneidet jede Umgebung von s in S beide Seiten von T +s +S +s. 5.4 Erste und zweite Fundamentalform Sei S ⊆ R3 eine reguläre Fläche, s ∈ -S, T sS die Tangentialebene an S in s und F : U +S, T +s +S die Tangentialebene an S in s und F : U → V eine lokale Parametrisierung von S um s. Weiter sei p := F−1(s). @@ -8107,108 +12636,185 @@ I S : = (cid:18) -g 1,1(s) g 1,2(s) -g 1,2(s) g -2,2(s)(cid:19) +g +1,1 +(s) g +1,2 +(s) +g +1,2 +(s) g +2,2 +(s) +(cid:19) = (cid:18) E(s) F(s) -F(s) -G(s)(cid:19) +F(s) G(s) +(cid:19) mit g i,j -= g s(D pF(e i),D pF(e j)) += g +s +(D +p +F(e +i +),D +p +F(e +j +)) = -(cid:104)∂F +(cid:104) +∂F ∂u -i(p), +i +(p), ∂F ∂u -j(p) +j +(p) (cid:105) i,j -∈ -{1,2 +∈ { +1,2 } Die Matrix I S heißt erste Fundamentalform von S bzgl. der Parametrisierung F. Bemerkung 80 -a) Die Einschränkung des Standardskalarproduktes des R3 auf T sS macht T sS zu einem +a) Die Einschränkung des Standardskalarproduktes des R3 auf T +s +S macht T +s +S zu einem euklidischen Vektorraum. -b) {D pF(e 1),D pF(e 2) +b) +{ +D +p +F(e +1 +),D +p +F(e +2 +) } -ist eine Basis von T sS. -c) Bzgl. der Basis {D pF(e 1),D pF(e 2) +ist eine Basis von T +s +S. +c) Bzgl. der Basis +{ +D +p +F(e +1 +),D +p +F(e +2 +) } hat das Standardskalarprodukt aus Bemer- -kung 80.a die Darstellungsmatrix I S. -d) g i,j(s) ist eine differenzierbare Funktion von s. +kung 80.a die Darstellungsmatrix I +S +. +d) g +i,j +(s) ist eine differenzierbare Funktion von s. Bemerkung 81 -det(I S) = +det(I S ) = +(cid:13) (cid:13) (cid:13) (cid:13) -(cid:13)∂F +∂F ∂u -1(p) +1 +(p) × ∂F ∂u -2(p)(cid:13) +2 +(p) +(cid:13) (cid:13) (cid:13) -(cid:13)2 -Beweis: Sei ∂F ∂u1(p) = +(cid:13) +2 +Beweis: Sei ∂F ∂u1 (p) =  -x + +x 1 x 2 x -3 -, ∂F ∂u2(p) = +3 + +, ∂F ∂u2 (p) =  -y + +y 1 y 2 -y3 +y3 +  -Dann ist ∂F ∂u1(p) -× -∂F ∂u2(p) = +Dann ist ∂F ∂u1 (p) × ∂F ∂u2 (p) =  -z + +z 1 z 2 z -3 +3 +  mit z 1 -= x 2y += x +2 +y +3 +− +x 3 -−x 3y +y 2 z 2 -= x 3y += x +3 +y 1 -−x 1y +− +x +1 +y 3 z 3 -= x 1y += x +1 +y +2 +− +x 2 -−x 2y +y 1 -⇒ -(cid:107)∂F +⇒ (cid:107) +∂F ∂u -1(p) +1 +(p) × ∂F ∂u -2(p) +2 +(p) (cid:107) = z2 1 @@ -8217,51 +12823,106 @@ z +z2 3 96 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM -det(I S) = g 1,1g +det(I +S +) = g +1,1 +g 2,2 -−g2 +− +g2 1,2 = -(cid:42) x +(cid:42)  + +x 1 +x 2 x -2 +3 + +, + + x -3 , x 1 +x 2 x -2 -x -3 (cid:43)(cid:42) y +3 + + +(cid:43)(cid:42)  + +y 1 +y 2 y -2 +3 + +, + + +y +1 +y 2 +y +3 + + +(cid:43) +− +(cid:42)  + +x +1 +x 2 +x +3 + +, + + y -3 , y 1 +y 2 y +3 + + +(cid:43)2 += (x2 +1 ++x2 +2 ++x2 +3 +)(y2 +1 ++y2 2 ++y2 +3 +) +− +(x +1 y -3 (cid:43) −(cid:42) x 1 -x ++x 2 -x -3 , y -1 y 2 ++x +3 y -3 (cid:43)2 -= (x2 1+x2 2+x2 3)(y2 -1 -+y2 -2 -+y2 3) −(x 1y 1+x 2y 2+x 3y 3)2 +3 +)2 Definition 77 a) Das Differential dA = (cid:112) -det(I)du 1du +det(I)du +1 +du 2 heißt Flächenelement von S bzgl. der Para- metrisierung F. @@ -8273,11 +12934,18 @@ V fdA := (cid:90) U -f(F(u 1,u 2) +f(F(u +1 +,u +2 +) (cid:124) (cid:123)(cid:122) (cid:125) =:s -)(cid:112) -detI(s)du 1du +) +(cid:112) +detI(s)du +1 +du 2 der Wert des Integrals von f über V, falls das Integral rechts existiert. Bemerkung 82 @@ -8290,26 +12958,33 @@ b) Sei f : S R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist. Dann ist (cid:82) -SfdA wohldefiniert, falls (z. B.) S kompakt ist. +S +fdA wohldefiniert, falls (z. B.) S kompakt ist. Etwa: (cid:90) -SfdA = +S +fdA = n (cid:88) -i=1(cid:90) +i=1 +(cid:90) Vi fdA -−(cid:88) +− +(cid:88) i(cid:54)=j (cid:90) Vi∩Vj fdA -+(cid:88) -i,j,k(cid:90) ++ +(cid:88) +i,j,k +(cid:90) Vi∩Vj∩V k fdA -−... +− +... Beweis: a) Mit Transformationsformel. b) Ist dem Leser überlassen. @@ -8322,246 +12997,506 @@ S2. Dann gilt: a) n induziert für jedes s ∈ -S eine lineare Abbildung d sn : T sS +S eine lineare Abbildung d +s +n : T +s +S → -T n(s)S2 durch -d sn(x) = +T +n(s) +S2 durch +d +s +n(x) = d -dtn(s„+“tx +dt +n(s„+“tx (cid:124) (cid:123)(cid:122) (cid:125) SollaufFlächeS bleiben -)(cid:12) +) +(cid:12) (cid:12) (cid:12) t=0 -Die Abbildung d sn heißt Weingarten-Abbildung +Die Abbildung d +s +n heißt Weingarten-Abbildung 97 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM -b) T n(s)S2 = T sS. -c) d sn ist ein Endomorphismus von T sS. -d) d sn ist selbstadjungiert bzgl. des Skalarproduktes I S. +b) T +n(s) +S2 = T +s +S. +c) d +s +n ist ein Endomorphismus von T +s +S. +d) d +s +n ist selbstadjungiert bzgl. des Skalarproduktes I +S +. Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt. 98 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM Beweis: a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. -b) T n(S)S2 = (cid:104)n(s) (cid:105)⊥ = T sS -c) Wegen Proposition 5.1 (a) ist d sn ein Homomorphismus. -d) Zu zeigen: ∀x,y +b) T +n(S) +S2 = +(cid:104) +n(s) +(cid:105) +⊥ = T +s +S +c) Wegen Proposition 5.1 (a) ist d +s +n ein Homomorphismus. +d) Zu zeigen: +∀ +x,y ∈ -I sS : (cid:104)x,d sn(y) +I +s +S : +(cid:104) +x,d +s +n(y) (cid:105) -= (cid:104)d sn(x),y += +(cid:104) +d +s +n(x),y (cid:105) Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die Basisvektoren zu zeigen. Sei x i -= D pF(e i) = ∂F ∂ui(p) i = 1,2 -Beh.: (cid:104)x i,d sn(x j) += D +p +F(e +i +) = ∂F +∂ui +(p) i = 1,2 +Beh.: +(cid:104) +x +i +,d +s +n(x +j +) (cid:105) = (cid:104) ∂2F -∂ui∂uj(p),d sn(x i) +∂ui∂uj +(p),d +s +n(x +i +) (cid:105) ⇒ (cid:104) ∂2F -∂ui∂uj(p),d sn(x i) +∂ui∂uj +(p),d +s +n(x +i +) (cid:105) -= (cid:104)x j,d sn(x i) += +(cid:104) +x +j +,d +s +n(x +i +) (cid:105) Bew.: 0 = -(cid:104)∂F -∂u(p+te j),n(p+te j) +(cid:104) +∂F +∂u +(p+te +j +),n(p+te +j +) (cid:105) ⇒ 0 = d dt (cid:18) -(cid:104)∂F -∂u(p+te j),n(p+te j) -(cid:105)(cid:19)(cid:12) +(cid:104) +∂F +∂u +(p+te +j +),n(p+te +j +) +(cid:105) +(cid:19)(cid:12) (cid:12) (cid:12) t=0 = (cid:104) -d dt∂F -∂u -i(p+te j) +d +dt +∂F +∂u i +(p+te +j +) (cid:124) (cid:123)(cid:122) (cid:125) ∂2F -∂uj∂ui(p) +∂uj∂ui +(p) (cid:12) (cid:12) (cid:12) -t=0,n(s) (cid:105)+ (cid:104)x i,d snD pF(e j) +t=0 +,n(s) +(cid:105) ++ +(cid:104) +x +i +,d +s +nD +p +F(e +j +) (cid:124) (cid:123)(cid:122) (cid:125) xj (cid:105) Definition 78 -Die durch −d sn definierte symmetrische Bilinearform auf T sS heißt zweite Fundamental- +Die durch +− +d +s +n definierte symmetrische Bilinearform auf T +s +S heißt zweite Fundamental- form von S in s bzgl. F. -Man schreibt: II s(x,y) = (cid:104)−d sn(x),y +Man schreibt: II +s +(x,y) = +(cid:104)− +d +s +n(x),y (cid:105) -= I s( −d sn(x),y) += I +s +( +− +d +s +n(x),y) Bemerkung 83 -Bezüglich der Basis {x 1,x +Bezüglich der Basis +{ +x +1 +,x 2 } -von T sS hat II +von T +s +S hat II s die Darstellungsmatrix -(h(s) -i,j) +(h +(s) +i,j +) i,j=1,2 -mit h i,j(s) = +mit h +i,j +(s) = (cid:104) ∂2F -∂u i∂u -j(p),n(s) +∂u +i +∂u +j +(p),n(s) (cid:105) Proposition 5.2 -Sei γ : [ −ε,ε] +Sei γ : [ +− +ε,ε] → S eine nach Bogenlänge parametrisierte Kurve mit γ(0) = s. Dann gilt: -κ Nor(s,γ) = II s(γ(cid:48)(0),γ(cid:48)(0)) -Beweis: Nach Definition 74 ist κ Nor(s,γ) = (cid:104)γ(cid:48)(cid:48)(0),n(s) (cid:105). Nach Voraussetzung gilt +κ +Nor +(s,γ) = II +s +(γ(cid:48)(0),γ(cid:48)(0)) +Beweis: Nach Definition 74 ist κ +Nor +(s,γ) = +(cid:104) +γ(cid:48)(cid:48)(0),n(s) +(cid:105) +. Nach Voraussetzung gilt n(γ(t)) ⊥ γ(cid:48)(t) -⇔ -(cid:104)γ(cid:48)(cid:48)(0),n(s) +⇔ (cid:104) +γ(cid:48)(cid:48)(0),n(s) (cid:105) = 0 Die Ableitung nach t ergibt 0 = d -dt( (cid:104)n(γ(t)),γ(cid:48)(t)) +dt +( +(cid:104) +n(γ(t)),γ(cid:48)(t)) = -(cid:28) d dtn(γ(t))(cid:12) +(cid:28) d +dt +n(γ(t)) (cid:12) (cid:12) -t=0,γ(cid:48)(0)(cid:29) -+ (cid:104)n(s),γ(cid:48)(cid:48)(0) +(cid:12) +t=0 +,γ(cid:48)(0) +(cid:29) ++ +(cid:104) +n(s),γ(cid:48)(cid:48)(0) (cid:105) 99 5.4.ERSTEUNDZWEITEFUNDAMENTALFORM -= (cid:104)d sn(γ(cid:48)(0)),γ(cid:48)(0) (cid:105)+κ Nor(s,γ) -= −II s(γ(cid:48)(0),γ(cid:48)(0))+κ Nor(s,γ) += +(cid:104) +d +s +n(γ(cid:48)(0)),γ(cid:48)(0) +(cid:105) ++κ +Nor +(s,γ) += +− +II +s +(γ(cid:48)(0),γ(cid:48)(0))+κ +Nor +(s,γ) Folgerung 5.3 Die beiden Definitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein: -κ Nor(s,γ) = κ Nor(s,γ(cid:48)(0)) +κ +Nor +(s,γ) = κ +Nor +(s,γ(cid:48)(0)) Satz 5.4 Sei S ⊆ R3 eine reguläre, orientierbare Fläche und s ∈ S. -a) Die Hauptkrümmungen κ 1(s),κ 2(s) sind die Eigenwerte von II s. -b) Für die Gauß-Krümmung gilt: K(s) = det(II s) +a) Die Hauptkrümmungen κ +1 +(s),κ +2 +(s) sind die Eigenwerte von II +s +. +b) Für die Gauß-Krümmung gilt: K(s) = det(II +s +) Beweis: a) II s -ist symmetrisch, I sS hat also eine Orthonormalbasis aus Eigenvektoren y 1,y +ist symmetrisch, I +s +S hat also eine Orthonormalbasis aus Eigenvektoren y +1 +,y 2 von -II s. Ist x +II +s +. Ist x ∈ -T sS, (cid:107)x +T +s +S, +(cid:107) +x (cid:107) = 1, so gibt es ϕ ∈ -[0,2π) mit x = cosϕ ·y 1+sinϕ ·y 2. -Seien λ 1,λ +[0,2π) mit x = cosϕ +· +y +1 ++sinϕ +· +y +2 +. +Seien λ +1 +,λ 2 -die Eigenwerte von II s, also II s(y i,y i) = λ i. Dann gilt: -II s(x,x) = cos2ϕλ 1+sin2ϕλ +die Eigenwerte von II +s +, also II +s +(y +i +,y +i +) = λ +i +. Dann gilt: +II +s +(x,x) = cos2ϕλ +1 ++sin2ϕλ 2 -= (1 −sin2ϕ)λ 1+sin2ϕλ += (1 +− +sin2ϕ)λ +1 ++sin2ϕλ 2 -= λ 1+sin2ϕ(λ += λ +1 ++sin2ϕ(λ 2 -−λ 1) +− +λ +1 +) ≥ λ 1 -= cos2ϕ+(1 −cos2ϕ)λ += cos2ϕ+(1 +− +cos2ϕ)λ 2 = λ 2 -−cos2ϕ(λ +− +cos2ϕ(λ 2 -−λ 1) +− +λ +1 +) ≤ λ 2 Prop. 5.2 ===== ⇒ -λ 1 = min(cid:8) κ Nor(s,x) (cid:12) (cid:12) x +λ 1 = min (cid:8) κ Nor (s,x) (cid:12) (cid:12) x ∈ -T1 sS (cid:9) -λ 2 = max(cid:8) κ Nor(s,x) (cid:12) (cid:12) x +T1 s S (cid:9) +λ 2 = max (cid:8) κ Nor (s,x) (cid:12) (cid:12) x ∈ -T1 sS (cid:9) +T1 s S (cid:9) Satz 5.5 (Satz von Gauß-Bonnet) Sei S ⊆ R3 eine kompakte orientierbare reguläre Fläche. Dann gilt: (cid:90) -SK(s)dA = 2πχ(S) +S +K(s)dA = 2πχ(S) Dabei ist χ(S) die Euler-Charakteristik von S. Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen werden. Lösungen der Übungsaufgaben Lösung zu Aufgabe 1 Teilaufgabe a) Es gilt: -(i) ∅,X +(i) +∅ +,X ∈ -T X. +T +X +. (ii) T X -ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U 1,U +ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U +1 +,U 2 ∈ T X : U 1 -∩U +∩ +U 2 ∈ -T X. +T +X +. (iii) Auch unter beliebigen Vereinigungen ist T X abgeschlossen, d. h. es gilt für eine beliebige Indexmenge I und alle U -i -∈ +i ∈ T X für alle i ∈ I : (cid:83) -i∈IU -i -∈ +i∈I +U +i ∈ T X -Also ist (X,T X) ein topologischer Raum. -Teilaufgabe b) Wähle x = 1,y = 0. Dann gilt x (cid:54)= y und die einzige Umgebung von x +Also ist (X,T +X +) ein topologischer Raum. +Teilaufgabe b) Wähle x = 1,y = 0. Dann gilt x +(cid:54) += y und die einzige Umgebung von x ist X. Da y = 0 ∈ X können also x und y nicht durch offene Mengen getrennt werden. -(X,T X) ist also nicht hausdorffsch. -Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X,T X) nach -(b)nichthausdorffschist, liefertdieKontrapositionder Trennungseigenschaft, dass(X,T X) +(X,T +X +) ist also nicht hausdorffsch. +Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X,T +X +) nach +(b)nichthausdorffschist, liefertdieKontrapositionder Trennungseigenschaft, dass(X,T +X +) kein metrischer Raum sein kann. Lösung zu Aufgabe 2 Teilaufgabe a) -Beh.: ∀a +Beh.: +∀ +a ∈ -Z : {a +Z : +{ +a } ist abgeschlossen. Sei a @@ -8570,11 +13505,15 @@ Z beliebig. Dann gilt: Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de schicken. Teilaufgabe b) -Beh.: {−1,1 +Beh.: +{− +1,1 } ist nicht offen Bew.: durch Widerspruch -Annahme: {−1,1 +Annahme: +{− +1,1 } ist offen. Dann gibt es T @@ -8582,14 +13521,18 @@ Dann gibt es T B, sodass (cid:83) M∈T -M = {−1,1 }. Aber alle U +M = +{− +1,1 +} +. Aber alle U ∈ B haben unendlich viele Elemente. Auch endlich viele Schnitte von Elementen in B haben unendlich viele Elemente ⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein -⇒ -{−1,1 +⇒ {− +1,1 } ist nicht offen. (cid:4) @@ -8601,23 +13544,33 @@ Annahme: Es gibt nur endlich viele Primzahlen p ∈ P Dann ist -Z \{−1,+1 +Z +\{− +1,+1 } -FSd.Arithmetik = (cid:91) p∈PU +FSd.Arithmetik = (cid:91) +p∈P +U 0,p -endlich. Das ist ein Widerspruch zu |Z +endlich. Das ist ein Widerspruch zu +| +Z | -ist unendlich und |{−1,1 +ist unendlich und +|{− +1,1 }| ist endlich. (cid:4) Lösung zu Aufgabe 3 (a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form (cid:89) -j∈JU +j∈J +U j × (cid:89) -i∈N,i(cid:54)=jP +i∈N,i(cid:54)=j +P i wobei J ⊆ @@ -8629,11 +13582,13 @@ j offen ist. Beweis: Nach Definition der Produkttopologie bilden Mengen der Form (cid:89) -i∈JU +i∈J +U j × (cid:89) -i∈N\JP +i∈N\J +P i wobei J ⊆ @@ -8642,7 +13597,9 @@ j ⊆ P j -offen ∀j +offen +∀ +j ∈ J eine Basis der Topologie. Damit sind die offenen Mengen von P Vereinigungen von Mengen der obigen @@ -8653,7 +13610,9 @@ Beweis: Es seinen x,y P und x sowie y liegen in der gleichen Zusammenhangs- komponente Z ⊆ -P. Da Z zusammenhängend ist und ∀i +P. Da Z zusammenhängend ist und +∀ +i ∈ I : p i @@ -8662,7 +13621,9 @@ i P i ist -stetig, ist p i(Z) +stetig, ist p +i +(Z) ⊆ P i @@ -8671,108 +13632,182 @@ zusammenhängend für alle i N. Die zusammenhängenden Mengen von P i -sind genau {0 +sind genau +{ +0 +} +und +{ +1 } -und {1 }, d. h. für alle i +, d. h. für alle i ∈ N gilt entweder -p i(Z) -⊆ -{0 +p +i +(Z) +⊆ { +0 +} +oder p +i +(Z) +⊆ { +1 } -oder p i(Z) -⊆ -{1 }. Es sei z +. Es sei z i -∈ -{0,1 +∈ { +0,1 } -so, dass p i(Z) -⊆ -{z +so, dass p +i +(Z) +⊆ { +z i } für alle i ∈ N. Dann gilt also: -p i(x) +p +i +(x) (cid:124)(cid:123)(cid:122)(cid:125) =xi = z i -= p i(y) += p +i +(y) (cid:124)(cid:123)(cid:122)(cid:125) =yi -∀i +∀ +i ∈ N Somit folgt: x = y (cid:4) Lösung zu Aufgabe 4 -(a) Beh.: GL n(R) ist nicht kompakt. -Bew.: det : GL n(R) +(a) Beh.: GL +n +(R) ist nicht kompakt. +Bew.: det : GL +n +(R) → -R \{0 +R +\{ +0 } -ist stetig. Außerdem ist det(GL n(R)) = R \{0 +ist stetig. Außerdem ist det(GL +n +(R)) = R +\{ +0 } nicht kompakt. 22 ⇒ -GL n(R) ist nicht kompakt. (cid:4) -(b) Beh.: SL 1(R) ist nicht kompakt, für n > 1 ist SL n(R) kompakt. -Bew.: Für SL 1(R) gilt: SL 1(R) = (cid:8) A +GL +n +(R) ist nicht kompakt. (cid:4) +(b) Beh.: SL +1 +(R) ist nicht kompakt, für n > 1 ist SL +n +(R) kompakt. +Bew.: Für SL 1 (R) gilt: SL 1 (R) = (cid:8) A ∈ -R1×1 (cid:12) (cid:12) detA = 1(cid:9) = (cid:0) 1(cid:1) ∼= {1 }. 22 +R1×1 (cid:12) (cid:12) detA = 1 (cid:9) = (cid:0) 1 (cid:1) ∼ = +{ +1 +} +. 22 ⇒ -SL 1(R) +SL 1 (R) ist kompakt. 102 LösungenderÜbungsaufgaben -SL n(R) +SL +n +(R) ⊆ -GL n(R) lässt sich mit einer Teilmenge des Rn2 identifizieren. Nach Satz 1.1 +GL +n +(R) lässt sich mit einer Teilmenge des Rn2 identifizieren. Nach Satz 1.1 sinddiesegenaudannkompakt,wennsiebeschränktundabgeschlossensind.Definiere nun für für n ∈ -N ≥2,m +N +≥2 +,m ∈ N: A m -= diag n(m, += diag +n +(m, 1 -m,...,1) +m +,...,1) Dann gilt: detA m = 1, d. h. A m ∈ -SL n(R), und A +SL +n +(R), und A m -ist unbeschränkt, da (cid:107)A -m (cid:107)∞ +ist unbeschränkt, da +(cid:107) +A +m +(cid:107) +∞ = m -−−−−→ m→∞ -∞. (cid:4) -(c) Beh.: P(R) ist kompakt. -Bew.: P(R) ∼= Sn/ x∼−x. Per Definition der Quotiententopologie ist die Klassenabbil- +−−−−→ m→∞ ∞ +. (cid:4) +(c) Beh.: +P +(R) ist kompakt. +Bew.: +P +(R) ∼ = Sn/ x∼−x . Per Definition der Quotiententopologie ist die Klassenabbil- dung stetig. Da Sn als abgeschlossene und beschränkte Teilmenge des Rn+1 kompakt ist 22 -⇒ -P(R) ist kompakt. (cid:4) +⇒ P +(R) ist kompakt. (cid:4) Lösung zu Aufgabe 5 Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden. Definition 79 -Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G +Seien (G, +∗ +) und (H, +◦ +) Gruppen und ϕ : G → H eine Abbildung. ϕ heißt Homomorphismus, wenn -∀g 1,g +∀ +g +1 +,g 2 ∈ G : ϕ(g 1 -∗g 2) = ϕ(g 1) ◦ϕ(g 2) +∗ +g +2 +) = ϕ(g +1 +) +◦ +ϕ(g +2 +) gilt. Es folgt direkt: 1) Sei X = R mit der Standarttopologie und ϕ @@ -8803,7 +13838,11 @@ Lösung zu Aufgabe 6 Die Definition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf Seite 6. Definition 80 -Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G +Seien (G, +∗ +) und (H, +◦ +) Gruppen und ϕ : G → H eine Abbildung. ϕ heißt Isomorphismus, wenn ϕ ein bijektiver Homomorphismus ist. @@ -8815,19 +13854,29 @@ Lösung zu Aufgabe 7 Beh.: M ist wegzusammehängend ⇔ M ist zusammenhängend -Beweis: „ ⇒“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung +Beweis: „ +⇒ +“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung direkt aus Bemerkung 23. -„ ⇐“: Seien x,y +„ +⇐ +“: Seien x,y ∈ M und -Z := {z +Z := +{ +z ∈ M -| -∃Weg von x nach z +| ∃ +Weg von x nach z } Es gilt: -(i) Z (cid:54)= ∅, da M lokal wegzusammenhängend ist +(i) Z +(cid:54) += +∅ +, da M lokal wegzusammenhängend ist (ii) Z ist offen, da M lokal wegzusammenhängend ist (iii) ZC := { @@ -8858,24 +13907,38 @@ Z, so gäbe es Wege γ : [0,1] → -M,γ 2(0) = z,γ 2(1) = x und γ +M,γ +2 +(0) = z,γ +2 +(1) = x und γ 1 : [0,1] → -M,γ 1(0) = ˜ z,γ 1(1) = z. +M,γ +1 +(0) = ˜ z,γ +1 +(1) = z. Dann wäre aber γ : [0,1] → M, γ(x) = (cid:40) -γ 1(2x) falls 0 +γ +1 +(2x) falls 0 ≤ x ≤ 1 2 -γ 2(2x −1) falls 1 +γ +2 +(2x +− +1) falls 1 2 < x ≤ @@ -8884,24 +13947,55 @@ ein stetiger Weg von ˜ z nach x ⇒ Widerspruch. DaM zusammenhängendistundM = Z -(cid:124)(cid:123)(cid:122)(cid:125) offen∪ +(cid:124)(cid:123)(cid:122)(cid:125) +offen +∪ ZC (cid:124)(cid:123)(cid:122)(cid:125) -offen,sowieZ (cid:54)= ∅folgtZC = ∅. +offen +,sowieZ +(cid:54) += +∅ +folgtZC = +∅ +. Also ist M = Z wegzusammenhängend. (cid:4) (b) Beh.: X ist wegzusammenhängend. -Beweis: X := (R \{0 }) ∪{0 1,0 +Beweis: X := (R +\{ +0 +} +) +∪{ +0 +1 +,0 2 } -und (R \{0 }) ∪{0 +und (R +\{ +0 +} +) +∪{ +0 2 } sind homöomorph zu R. Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte 0 1 -und 0 2. -Da (R \{0 }) ∪{0 +und 0 +2 +. +Da (R +\{ +0 +} +) +∪{ +0 1 } homöomorph zu R ist, exisitert ein Weg γ @@ -8911,35 +14005,60 @@ von 0 zu einem beliebigen Punkt a ∈ -R \{0 }. -Da (R \{0 }) ∪{0 +R +\{ +0 +} +. +Da (R +\{ +0 +} +) +∪{ +0 2 } ebenfalls homöomorph zu R ist, existiert außerdem ein Weg γ 2 -von a nach 0 2. Damit existiert ein (nicht einfacher) Weg γ von 0 +von a nach 0 +2 +. Damit existiert ein (nicht einfacher) Weg γ von 0 1 nach -0 2. (cid:4) +0 +2 +. (cid:4) Lösung zu Aufgabe 9 Vor.: Sei (X,d) eine absolute Ebene, A,B,C ∈ -X und (cid:52)ABC ein Dreieck. +X und +(cid:52) +ABC ein Dreieck. 104 LösungenderÜbungsaufgaben -(a) Beh.: AB ∼= AC +(a) Beh.: AB +∼ += AC ⇒ -∠ABC ∼= ∠ACB -Bew.: Sei AB ∼= AC. +∠ABC +∼ += ∠ACB +Bew.: Sei AB +∼ += AC. ⇒ ∃ Isometrie ϕ mit ϕ(B) = C und ϕ(C) = B und ϕ(A) = A. ⇒ ϕ(∠ABC) = ∠ACB ⇒ ∠ABC -∼= +∼ += ∠ACB (cid:4) -(b) Beh.: Der längeren Seite von (cid:52)ABC liegt der größere Winkel gegenüber und umge- +(b) Beh.: Der längeren Seite von +(cid:52) +ABC liegt der größere Winkel gegenüber und umge- kehrt. Bew.: Sei d(A,C) > d(A,B). Nach §3 (i) gibt es C(cid:48) ∈ @@ -8961,7 +14080,9 @@ d(A,B) < d(A,C) (cid:4) X und P / ∈ g -Beh.: ∃! Lot +Beh.: +∃ +! Lot Bew.: ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g. ⇒ @@ -8971,17 +14092,28 @@ Es gibt eine Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl ϕ(P)P schneidet g in F. SeiA ∈ -g \{F }. Danngilt ϕ(∠AFP) = ∠AFϕ(P) = π +g +\{ +F +} +. Danngilt ϕ(∠AFP) = ∠AFϕ(P) = π ⇒ ∠AFP istrechterWinkel. Gäbe es nun G ∈ -g \{F }, so dass PG weiteres Lot von P auf g ist, wäre (cid:52)PFG +g +\{ +F +} +, so dass PG weiteres Lot von P auf g ist, wäre +(cid:52) +PFG ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4). · · A -GP +G +P F g Abbildung 5.4: Zwei Lote zu einer Geraden g durch einen Punkt P @@ -8996,9 +14128,17 @@ h und o. B. d. A. f g. f ∦ h ⇒ -f ∩h (cid:54)= ∅, sei also x +f +∩ +h +(cid:54) += +∅ +, sei also x ∈ -f ∩h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele +f +∩ +h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele zu g durch x, da x / ∈ g. Diese ist f, da x @@ -9015,7 +14155,11 @@ parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f ⇒ g ∦ h (cid:4) Lösung zu Aufgabe 11 -Sei (X,d,G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem (cid:52)ABC und (cid:52)A(cid:48)B(cid:48)C(cid:48) +Sei (X,d,G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem +(cid:52) +ABC und +(cid:52) +A(cid:48)B(cid:48)C(cid:48) Dreiecke, für die gilt: d(A,B) = d(A(cid:48),B(cid:48)) d(A,C) = d(A(cid:48),C(cid:48)) @@ -9028,7 +14172,11 @@ Bem. 62 ===== ⇒ C = ϕ(C). -Es gilt also ϕ( (cid:52)A(cid:48)B(cid:48)C(cid:48)) = (cid:52)ABC. (cid:4) +Es gilt also ϕ( +(cid:52) +A(cid:48)B(cid:48)C(cid:48)) = +(cid:52) +ABC. (cid:4) Bildquellen Alle Bilder, die hier nicht aufgeführt sind, wurden von Martin Thoma erstellt. Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert. @@ -9040,7 +14188,9 @@ Abb. 1.11 Knoten von Jim.belk aus der „Blue knots“-Serie: – Trivialer Knoten: commons.wikimedia.org/wiki/File:Blue_Unknot.png – Kleeblattknoten: commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png – Achterknoten: commons.wikimedia.org/wiki/File:Blue_Figure-Eight_Knot.png -– 6 2-Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png +– 6 +2 +-Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png Abb. 1.12 Reidemeister-Züge: YAMASHITA Makoto (1, 2, 3) Abb. 1.13 Kleeblattknoten,3-Färbung:Jim.belk,commons.wikimedia.org/wiki/File:Tricoloring. png @@ -9089,13 +14239,17 @@ heißt ein Häufungspunkt von D : ⇔ ∃ Folge x n -in D \{x +in D +\{ +x 0 } mit x n → -x 0. +x +0 +. Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra entnommen: Definition 82 @@ -9115,20 +14269,31 @@ Sei V ein Vektorraum und S V eine Teilmenge. S heißt eine Orthonormalbasis von V, wenn gilt: (i) S ist eine Basis von V -(ii) ∀v +(ii) +∀ +v ∈ -S : (cid:107)v +S : +(cid:107) +v (cid:107) = 1 -(iii) ∀v 1,v +(iii) +∀ +v +1 +,v 2 ∈ S : v 1 -(cid:54)= v +(cid:54) += v 2 -⇒ -(cid:104)v 1,v +⇒ (cid:104) +v +1 +,v 2 (cid:105) = 0 @@ -9146,18 +14311,24 @@ f(b) < y < f(a). Dann existiert ein x 0 ∈ -[a,b] mit f(x 0) = y 0. +[a,b] mit f(x +0 +) = y +0 +. Definition 84 Sei V ein Vektorraum über einem Körper K und f : V → V eine lineare Abbildung. v ∈ -V \{0 +V +\{ +0 } heißt Eigenvektor : -⇔ -∃λ +⇔ ∃ +λ ∈ K : f(v) = λv. Wenn ein solches λ @@ -9170,10 +14341,14 @@ R. Dann gilt: (x+y)n = n (cid:88) -k=0(cid:18) +k=0 +(cid:18) +n +k +(cid:19) +xn−kyk +∀ n -k(cid:19) -xn−kyk ∀n ∈ N 0 @@ -9181,57 +14356,91 @@ Definition 85 Seien a,b ∈ R3 Vektoren. -a ×b := +a +× +b :=  -a + +a 1 b 3 a -3 +3 +  -× -a +× + + +a 1 b 3 a -3 +3 +  =  -a 2b + +a +2 +b +3 +− +a 3 -−a 3b +b +2 +a 3 b 1 +− +a 1 b 3 +a +1 +b 2 -a 3b 1 −a 1b 3 -a 1b +− +a 2 -−a 2b -1 +b +1 +  Symbolverzeichnis Mengenoperationen Seien A,B und M Mengen. AC Komplement von A -P(M) Potenzmenge von M +P +(M) Potenzmenge von M M Abschluss von M ∂M Rand der Menge M M◦ Inneres der Menge M -A ×B Kreuzprodukt +A +× +B Kreuzprodukt A ⊆ B Teilmengenbeziehung A (cid:40) B echte Teilmengenbeziehung -A \B Differenzmenge -A ∪B Vereinigung -A ˙ ∪B Disjunkte Vereinigung -A ∩B Schnitt +A +\ +B Differenzmenge +A +∪ +B Vereinigung +A ˙ +∪ +B Disjunkte Vereinigung +A +∩ +B Schnitt Geometrie AB Gerade durch die Punkte A und B AB Strecke mit Endpunkten A und B -(cid:52)ABC Dreieck mit Eckpunkten A,B,C -AB ∼= CD Die Strecken AB und CD sind +(cid:52) +ABC Dreieck mit Eckpunkten A,B,C +AB ∼ = CD Die Strecken AB und CD sind isometrisch -|K +| +K | Geometrische Realisierung des Simplizialkomplexes K @@ -9240,10 +14449,16 @@ Sei X ein topologischer Raum und K ein Kör- per. Homöo(X) Homöomorphismengruppe Iso(X) Isometriengruppe -GL n(K) Allgemeine lineare Gruppe (von +GL +n +(K) Allgemeine lineare Gruppe (von General Linear Group) -SL n(K) Spezielle lineare Gruppe -PSL n(K) Projektive lineare Gruppe +SL +n +(K) Spezielle lineare Gruppe +PSL +n +(K) Projektive lineare Gruppe Perm(X) Permutationsgruppe Sym(X) Symmetrische Gruppe Wege @@ -9253,7 +14468,8 @@ X ein Weg. [γ] Homotopieklasse von γ γ 1 -∗γ +∗ +γ 2 Zusammenhängen von Wegen γ @@ -9262,12 +14478,16 @@ Zusammenhängen von Wegen γ 2 Homotopie von Wegen -γ(x) Inverser Weg, also γ(x) := γ(1 −x) +γ(x) Inverser Weg, also γ(x) := γ(1 +− +x) C Bild eines Weges γ, also C := γ([0,1]) Weiteres B Basis einer Topologie -B δ(x) δ-Kugel um x +B +δ +(x) δ-Kugel um x S Subbasis einer Topologie T Topologie @@ -9275,7 +14495,8 @@ A Atlas P Projektiver Raum -(cid:104)·, +(cid:104)· +, ·(cid:105) Skalarprodukt X/ @@ -9284,23 +14505,30 @@ X modulo ∼ [x] ∼ Äquivalenzklassen von x bzgl. -∼ (cid:107)x +∼ +(cid:107) +x (cid:107) Norm von x -|x +| +x | Betrag von x -(cid:104)a +(cid:104) +a (cid:105) Erzeugnis von a Sn Sphäre Tn Torus -f ◦g Verkettung von f und g +f +◦ +g Verkettung von f und g π X Projektion auf X f -|U +| +U f eingeschränkt auf U f−1(M) Urbild von M Rg(M) Rang von M @@ -9312,20 +14540,36 @@ d n Lineare Abbildung aus Bemer- kung 37 -A ∼= B A ist isometrisch zu B +A +∼ += B A ist isometrisch zu B f ∗ Abbildung zwischen Fundamental- gruppen (vgl. Seite 49) 111 Symbolverzeichnis Zahlenmengen -N = {1,2,3,... +N = +{ +1,2,3,... } Natürliche Zahlen -Z = N ∪{0, −1, −2,... +Z = N +∪{ +0, +− +1, +− +2,... } Ganze Zahlen -Q = Z ∪(cid:8) 1 2, 1 3, 2 +Q = Z +∪ +(cid:8) 1 +2 +, 1 +3 +, 2 3 (cid:9) = (cid:8) z n @@ -9333,16 +14577,28 @@ mit z ∈ Z und n ∈ -Z \{0 }(cid:9) Rationale Zahlen -R = Q ∪(cid:8) √2, +Z +\{ +0 +} +(cid:9) Rationale Zahlen +R = Q +∪ +(cid:8) √2, − -3 √3,...(cid:9) Reele Zahlen +3 √3,... (cid:9) Reele Zahlen R + Echt positive reele Zahlen Rn +,0 -:= {(x 1,...,x n) +:= +{ +(x +1 +,...,x +n +) ∈ Rn | @@ -9352,35 +14608,46 @@ n 0 } Halbraum -R× = R \{0 +R× = R +\{ +0 } Einheitengruppe von R -C = {a+ib +C = +{ +a+ib | a,b ∈ R } Komplexe Zahlen -P = {2,3,5,7,... +P = +{ +2,3,5,7,... } Primzahlen -H = {z +H = +{ +z ∈ C -| -(cid:61)z > 0 +| (cid:61) +z > 0 } obere Halbebene I = [0,1] (cid:40) R Einheitsintervall f : S1 (cid:44) → R2 Einbettung der Kreislinie in die Ebene -π 1(X,x) Fundamentalgruppe im topologischen Raum X um x +π +1 +(X,x) Fundamentalgruppe im topologischen Raum X um x ∈ X Fix(f) Menge der Fixpunkte der Abbildung f -(cid:107)·(cid:107)2 +(cid:107)·(cid:107) +2 2-Norm; Euklidische Norm κ Krümmung κ @@ -9388,15 +14655,21 @@ Nor Normalenkrümmung V(f) Nullstellenmenge von f2 Krümmung -D pF : R2 +D +p +F : R2 → R3 Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89) -T sS Tangentialebene an S +T +s +S Tangentialebene an S ⊆ R3 durch s ∈ S -d sn(x) Weingarten-Abbildung +d +s +n(x) Weingarten-Abbildung 2von Vanishing Set Stichwortverzeichnis Abbildung diff --git a/read/results/playa/1601.03642.txt b/read/results/playa/1601.03642.txt index 66e7c41..0b87099 100644 --- a/read/results/playa/1601.03642.txt +++ b/read/results/playa/1601.03642.txt @@ -2,7 +2,7 @@ Creativity in Machine Learning Martin Thoma E-Mail: info@martin-thoma.de -Abstract —Recent machine learning techniques can be modified +Abstract—Recent machine learning techniques can be modified to produce creative results. Those results did not exist before; it is not a trivial combination of the data which was fed into the machine learning system. The obtained results come in multiple @@ -11,7 +11,7 @@ This paper gives a high level overview of how they are created and gives some examples. It is meant to be a summary of the current work and give people who are new to machine learning some starting points. - I. I NTRODUCTION + I. INTRODUCTION According to [Gad06] creativity is “the ability to use your imagination to produce new ideas, make things etc.” and imagination is “the ability to form pictures or ideas in your @@ -25,7 +25,7 @@ learning, Section III gives examples of creativity with images, Section IV gives examples of machines producing textual content, and Section V gives examples of machine learning and music. A discussion follows in Section VI. -II. B ASICS OF M ACHINE L EARNING +II. BASICS OF MACHINE LEARNING The traditional approach of solving problems with software is to program machines to do so. The task is divided in as simple sub-tasks as possible, the subtasks are analyzed and the @@ -46,29 +46,28 @@ A computer program is said to learn from experience E with respect to some class of tasks T and performance measure P , if its performance at tasks in T , as measured by P , improves with experience E . Σ ϕx - 0 +0 x - 1 +1 x - 2 +2 x - 3 +3 x n w - 0 +0 w 1 w 2 w 3 -w -n. +wn. . . (a) Example of an artificial neuron unit. x - i are the input signals and w +i are the input signals and w i are weights which have to get learned. Each input signal gets multiplied @@ -108,23 +107,23 @@ of time required to understand such a complex system from basic building blocks is a time-intensive and difficult task. An important group of machine learning algorithms was inspired by biological neurons and are thus called artificial -neural networks . Those networks are based on mathematical +neural networks. Those networks are based on mathematical functions called artificial neurons which take n ∈ N numbers x - 1 , . . . , x - n ∈ R as input, multiply them with weights +1, . . . , x +n ∈ R as input, multiply them with weights w - 1 , . . . , w - n ∈ R , add them and apply a so called activation +1, . . . , w +n ∈ R, add them and apply a so called activation function ϕ as visualized in Figure 1(a). One example of such -an activation function is the sigmoid function ϕ( x ) = 1 -1+e − x . +an activation function is the sigmoid function ϕ(x) = 1 +1+e−x . Those functions act as building blocks for more complex systems as they can be chained and grouped in layers as visualized in Figure 1(b). The interesting question is how the parameters w i are learned. This is usually done by an -optimization technique called gradient descent . The gradient +optimization technique called gradient descent. The gradient descent algorithm takes a function which has to be derivable, starts at any point of the surface of this error function andarXiv:1601.03642v1 [cs.CV] 12 Jan 2016 @@ -133,11 +132,11 @@ it tries to find a minimum of this high-dimensional function. There is, of course, a lot more to say about machine learning. The interested reader might want to read the introduction given by Mitchell [Mit97]. - III. I MAGE D ATA + III. IMAGE DATA Applying a simple neural network on image data directly can work, but the number of parameters gets extraordinary large. One would take one neuron per pixel and channel. This means -for 500 px × 500 px RGB images one would get 750 ,000 input +for 500 px × 500 px RGB images one would get 750,000 input signals. To approach this problem, so called Convolutional Neural Networks (CNNs) were introduced. Instead of learning the full connection between the input layer and the first @@ -177,7 +176,7 @@ might be chosen is because neural networks are structured in layers. Recent publications tend to have more and more layers [HZRS15]. The used jargon is to say they get “deeper”. As this technique as published by Google engineers, the -technique is called Google DeepDream . Fig. 2: Aurelia aurita +technique is called Google DeepDream. Fig. 2: Aurelia aurita Fig. 3: DeepDream impression of Aurelia aurita It has become famous in the internet [Red]. Usually, the images are generated in iterations and in each iteration it is zoomed @@ -207,7 +206,7 @@ This artistic style imitation can be seen itself as creative work. An example is given by Figure 4. The code which created this example is available under [Joh16]. Something similar was done by [SPB+ - 14], where the style of +14], where the style of a portrait photograph was transferred to another photograph. A demo can be seen on [Shi14]. C. Drawing Robots @@ -216,8 +215,8 @@ AIKON (Automatic IKONic drawing) which can automatically generated sketches for portraits [TL05]. AIKON takes a digital photograph, detects faces on them and sketches them with a pen-plotter. -Tresset and Leymaire use k -means clustering [KMN + - 02] to +Tresset and Leymaire use k-means clustering [KMN+ +02] to segment regions of the photograph with similar color which, in turn, will get a similar shading. Such a drawing robot could apply machine learning techniques @@ -226,7 +225,7 @@ could apply self-learning techniques to draw results most similar to the artists impression of the image. However, the system described in [TL05] seems not to be a machine learning computer program according to the definition by Tom -Mitchell [Mit97]. IV. T EXT D ATA +Mitchell [Mit97]. IV. TEXT DATA Digital text is the first form of natural communication which involved computers. It is used in the form of chats, websites, on collaborative projects like Wikipedia, in scientific literature. @@ -236,8 +235,8 @@ for education, in notes from conversations. This list could be continued and most of these kinds of texts are now available in digital form. This digital form can be used to teach machines to generate similar texts. -The most simple language model which is of use is ann -gram -model. This model makes use of sequences of the lengthn to +The most simple language model which is of use is an n-gram +model. This model makes use of sequences of the length n to model language. It can be used to get the probability of a third word, given the previous two words. This way, a complete text can be generated word by word. Refinements and extensions @@ -247,7 +246,7 @@ However, there are much more sophisticated models. One of those are character predictors based on Recurrent Neural Networks (RNNs). Those character predictors take a sequence of characters as input and predict the next character. In that -sense they are similar to the n -gram model, but operate on +sense they are similar to the n-gram model, but operate on a lower level. Using such a predictor, one can generate texts character by character. If the model is good, the text can have the correct punctuation. This would not be possible with a @@ -321,10 +320,10 @@ computing power to train RNNs and language models similar to the ones described before. Interesting results like the following were obtained by [VL15]: Human: what is the purpose of life ? -Machine : to serve the greater good . +Machine: to serve the greater good . Human: what is the purpose of living ? -Machine : to live forever . -V. A UDIO D ATA +Machine: to live forever . +V. AUDIO DATA Common machine learning tasks which involve audio data are speech recognition, speaker identification, identification of songs. This leads to some less-common, but interesting topics: @@ -365,15 +364,15 @@ Recurrent neural networks — LSTM networks, to be exact (GRU) to build a network which can be trained to generate music. Instead of taking notes directly or MIDI files, Nayebi and Vitelli took raw audio waveforms as input. Those audio -waveforms are feature vectors given for time steps0, 1 , . . . , t − -1 , t. The network is given those feature vectors X -1 , . . . , X - t +waveforms are feature vectors given for time steps 0, 1, . . . , t − +1, t. The network is given those feature vectors X +1, . . . , X +t and has to predict the following feature vector X -t +1 . This +t+1. This means it continues the music. As the input is continuous, the problem was modeled as a regression task. Discrete Fourier -Transformation (DFT) was used on chunks of lengthN of the +Transformation (DFT) was used on chunks of length N of the music to obtain features in the frequency domain. An implementation can be found at [VN15] and a demonstration can be found at [Vit15]. @@ -389,7 +388,7 @@ wrote a very good introduction into neural networks for music composition which explains those ideas [Joh15b]. Example compositions are available there, too. He also made the code for his Biaxial Recurrent Neural Network available under [Joh15a]. -VI. D ISCUSSION +VI. DISCUSSION What does these examples mean for our understanding of creativity? Does it influence how much we value art? Could we define art and creativity better after having those and similar @@ -404,38 +403,36 @@ algorithms which are purely based on machine learning don’t follow a central theme. They lack the ability to plan. Although clever algorithms were implemented for composing music, it seems as if there is still a lot of supervision involved. -R EFERENCES +REFERENCES [Cop87] D. Cope, “Experiments in music intelligence (emi),” 1987. [Online]. Available: http://hdl.handle.net/2027/spo.bbp2372.1987. 025 -[Cop05] ——, Computer models of musical creativity . MIT Press +[Cop05] ——, Computer models of musical creativity. MIT Press Cambridge, 2005. [Cop12] ——, “Emily howell fugue,” YouTube, Oct. 2012. [Online]. Available: https://www.youtube.com/watch?v=jLR- c uCwI [Cop13] ——, “The well-programmed clavier: Style in computer music composition,” XRDS: Crossroads, The ACM Magazine for -Students , vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available: +Students, vol. 19, no. 4, pp. 16–20, 2013. [Online]. Available: http://dl.acm.org/citation.cfm?id=2460444 [Cur14] A. Curtis, “Now then,” BBC, Jul. 2014. [Online]. Available: http://www.bbc.co.uk/blogs/adamcurtis/entries/ 78691781- c9b7-30a0- 9a0a-3ff76e8bfe58 [Gad06] A. Gadsby, Ed., Dictionary of Contemporary English. Pearson Education Limited, 2006. -[GEB15] - L. A. Gatys, A. S. Ecker, and M. Bethge, “A neural algorithm of -artistic style,” arXiv preprint arXiv:1508.06576 , 2015. [Online]. +[GEB15] L. A. Gatys, A. S. Ecker, and M. Bethge, “A neural algorithm of +artistic style,” arXiv preprint arXiv:1508.06576, 2015. [Online]. Available: http://arxiv.org/abs/1508.06576 [goo15] “Inceptionism: Going deeper into neural networks,” Google Photos, Jun. 2015. [Online]. Available: https://goo.gl/Bydofw [HS97] S. Hochreiter and J. Schmidhuber, “Long short-term memory,” -Neural computation , vol. 9, no. 8, pp. 1735–1780, 1997. +Neural computation, vol. 9, no. 8, pp. 1735–1780, 1997. [Online]. Available: http://ieeexplore.ieee.org/xpl/freeabs all.jsp? arnumber=6795963 -[hut] - “50’000 euro prize for compressing human knowledge.” [Online]. +[hut] “50’000 euro prize for compressing human knowledge.” [Online]. Available: http://prize.hutter1.net/ [HZRS15] K. He, X. Zhang, S. Ren, and J. Sun, “Deep residual learning -for image recognition,” arXiv preprint arXiv:1512.03385 , 2015. +for image recognition,” arXiv preprint arXiv:1512.03385, 2015. [Online]. Available: http://arxiv.org/abs/1512.03385 [Joh15a] D. Johnson, “Biaxial recurrent neural network for music composition,” GitHub, Aug. 2015. [Online]. Available: https: //github.com/hexahedria/biaxial-rnn- music-composition @@ -452,20 +449,20 @@ https://github.com/karpathy/char-rnn ——, “The unreasonable effectiveness of recurrent neural networks,” Personal Blog, May 2015. [Online]. Available: http://karpathy.github.io/2015/05/21/rnn-effectiveness/ -[KMN + - 02] T. Kanungo, D. Mount, N. Netanyahu, C. Piatko, R. Silverman, +[KMN+ +02] T. Kanungo, D. Mount, N. Netanyahu, C. Piatko, R. Silverman, and A. Wu, “An efficient k-means clustering algorithm: analysis and implementation,” Pattern Analysis and Machine Intelligence, IEEE Transactions on, vol. 24, no. 7, pp. 881–892, Jul 2002. -[Mit97] T. M. Mitchell, Machine learning , ser. McGraw Hill series in +[Mit97] T. M. Mitchell, Machine learning, ser. McGraw Hill series in computer science. McGraw-Hill, 1997. [MOT15] A. Mordvintsev, C. Olah, and M. Tyka, “Inceptionism: Going deeper into neural networks,” googleresearch.blogspot.co.uk, Jun. 2015. [Online]. Available: http://googleresearch.blogspot.de/ 2015/06/inceptionism-going- deeper-into- neural.html -[Nie15] M. A. Nielsen, Neural Networks and Deep Learning - . Determination Press, 2015. [Online]. Available: +[Nie15] M. A. Nielsen, Neural Networks and Deep Learning. + Determination Press, 2015. [Online]. Available: http://neuralnetworksanddeeplearning.com/chap6.html# introducing convolutional networks [NV15] A. Nayebi and M. Vitelli, “GRUV: Algorithmic music generation @@ -476,8 +473,8 @@ com/r/deepdream/ [Shi14] Y. Shih, “Style transfer for headshot portraits,” YouTube, Jun. 2014. [Online]. Available: https://www.youtube.com/watch?v= Hj5lGFzlubU -[SPB + - 14] Y. Shih, S. Paris, C. Barnes, W. T. Freeman, and F. Durand, +[SPB+ +14] Y. Shih, S. Paris, C. Barnes, W. T. Freeman, and F. Durand, “Style transfer for headshot portraits,” ACM Transactions on Graphics (TOG), vol. 33, no. 4, p. 148, 2014. [Online]. Available: http://dl.acm.org/citation.cfm?id=2601137 @@ -502,8 +499,8 @@ Judgement to Calculation. W.H.Freeman & Co Ltd, 1976. networks,” in Computer Vision–ECCV 2014. Springer, 2014, pp. 818–833. -A PPENDIX A -A UTOM ATICALLY GENERATED T EXTS +APPENDIX A +AUTOM ATICALLY GENERATED TEXTS The following texts were generated by [Kar15a] and published by Karpathy on [Kar15b]. This is meant to be a copy for convenience and the case that the website is not reachable. A. Shakespeare @@ -550,13 +547,13 @@ C. Linux Code, 1 * Increment the size file of the new incorrect UI_FILTER group information * of the size generatively. -* / +*/ static int indicate_policy(void) { int error; if (fd == MARN_EPT) { / - * +* * The kernel blank will coeld it to userspace. */ if (ss->segment < mem_total) @@ -590,7 +587,7 @@ D. Linux Code, 2 * under the terms of the GNU General Public License version 2 as published by * the Free Software Foundation. * -* This program is distributed in the hope that it will be useful, +* This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * @@ -599,7 +596,7 @@ D. Linux Code, 2 * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -* / +*/ #include #include #include diff --git a/read/results/playa/1602.06541.txt b/read/results/playa/1602.06541.txt index 4cbf7c6..ab4a3ed 100644 --- a/read/results/playa/1602.06541.txt +++ b/read/results/playa/1602.06541.txt @@ -2,7 +2,7 @@ A Survey of Semantic Segmentation Martin Thoma info@martin-thoma.de -Abstract —This survey gives an overview over different +Abstract—This survey gives an overview over different techniques used for pixel-level semantic segmentation. Metrics and datasets for the evaluation of segmentation algorithms and traditional approaches for segmentation @@ -13,16 +13,16 @@ convolutional neural networks are mentioned and typical problematic situations for segmentation algorithms are examined. A taxonomy of segmentation algorithms is given. - I. I NTRODUCTION + I. INTRODUCTION Semantic segmentation is the task of clustering parts of images together which belong to the same object class. This type of algorithm has several usecases - such as detecting road signs [ MBLAGJ+ - 07], -detecting tumors [ MBVLG02 ], detecting medical instruments - in operations [WAH97 ], colon crypts segmentation - [ CRSS14], land use and land cover classification - [HDT02 ]. In contrast, non-semantic segmentation + such as detecting road signs [MBLAGJ+ +07], +detecting tumors [MBVLG02], detecting medical instruments + in operations [WAH97], colon crypts segmentation + [CRSS14], land use and land cover classification + [HDT02]. In contrast, non-semantic segmentation only clusters pixels together based on general characteristics of single objects. Hence the task of non-semantic segmentation is not well-defined, as many different @@ -49,7 +49,7 @@ brief, non-exhaustive summary of recently published semantic segmentation algorithms which are based on neural networks in Section VI. Finally, Section VII informs the reader about typical problematic cases for -segmentation algorithms. II. TAXONOMY OF S EGMENTATION A LGORITHMS +segmentation algorithms. II. TAXONOMY OF SEGMENTATION ALGORITHMS The computer vision community has published a wide range of segmentation algorithms so far. Those algorithms can be grouped by the kind of data they @@ -59,9 +59,9 @@ The following subsections will give four different criteria by which segmentation algorithms can be classified. This survey describes fixed-class (see Section II-A), -single-class affiliation (see Section II-B ) algorithms +single-class affiliation (see Section II-B) algorithms which work on grayscale or colored single pixel images -(see Section II-C ) in a completely automated, passive +(see Section II-C) in a completely automated, passive fashion (see Section II-D). A. Allowed classes Semantic segmentation is a classification task. As @@ -69,14 +69,14 @@ such, the classes on which the algorithm is trained is a central design decision. Most algorithms work with a fixed set of classes; some even only work on binary classes like foreground - vs background [ RM07], [ CS10 ] or street vs + vs background [RM07], [CS10] or street vs no street [BKTT15]. However, there are also unsupervised segmentation algorithms which do not distinguish classes at all (see Section V-B) as well as segmentation algorithms which are able to recognize when they don’t know a class. -For example, in [ GRC+ - 08 ] a void class was added +For example, in [GRC+ +08] a void class was added for classes which were not in the training set. Such a void class was also used in the MSRCv2 dataset (see Section III-B2) to make it possible to make more @@ -92,8 +92,7 @@ we simultaneously two labels to the coordinates of the glass: Glass and table. Although there is much more work being done on single class affiliation segmentation algorithms, there is a publication about multiple -class affiliation - segmentation [ LRAL08 ]. Similarly, +class affiliation segmentation [LRAL08]. Similarly, recent publications in pixel-level object segmentation used layered models [YHRF12].arXiv:1602.06541v2 [cs.CV] 11 May 2016 @@ -106,8 +105,8 @@ magnetic resonance (MR) imaging or ultrasonography whereas colored photographs are obviously widespread. • Excluding or including depth data: RGB-D, -sometimes also called range [ HJBJ + - 96 ] is available +sometimes also called range [HJBJ+ +96] is available in robotics, autonomous cars and recently also in consumer electronics such as Microsoft Kinect [Zha12]. @@ -118,7 +117,7 @@ stereo images was already tried in [BVZ01]. It can be seen as a more natural way of segmentation as most mammals have two eyes. It can also be seen as being related to having depth data. -Co-segmentation as in [ RMBK06], [ CXGS12 ] is +Co-segmentation as in [RMBK06], [CXGS12] is the problem of finding a consistent segmentation for multiple images. This problem can be seen in two ways: One the one hand, it can be seen @@ -129,15 +128,15 @@ of information to find a meaningful segmentation. This idea can be extended to time series such as videos. • 2D vs 3D - : Segmenting images is a 2D segmentation +: Segmenting images is a 2D segmentation task where the smallest unit is called a pixel. In 3D data, such as volumetric X-ray CT images -as they were used in [ HHR01 ], the smallest unit +as they were used in [HHR01], the smallest unit is called a voxel. D. Operation state The operation state of the classifying machine can -either be active as in [SUM + - 11 ], [SSA12] where robots +either be active as in [SUM+ +11], [SSA12] where robots can move objects to find a segmentation or passive, where the received image cannot be influenced. Among the passive algorithms, some segment in a completely @@ -145,12 +144,12 @@ automatic fashion, others work in an interactive mode. One example would be a system where the user clicks on the background or marks a coarse segmentation and the algorithm finds a fine-grained segmentation. [BJ00], -[ RKB04 ], [ PS07] describe systems which work in an -interactive mode. (a) Example Scene (b) Visualization of a found segmentation +[RKB04], [PS07] describe systems which work in an +interactive mode. (a) Example Scene (b) Visualization of a found segmentation Figure 1: An example of a scene and a possible visualization of a found segmentation. -III. E VALUATION AND D ATASETS +III. EVALUATION AND DATASETS A. Quality measures for evaluation A performance measure is a crucial part of any machine learning system. As users of a semantic @@ -172,24 +171,24 @@ meaningful information about the overall accuracy, there are a couple of metrics how accuracy can be defined. For this section, let k ∈ N be the number of classes, n - ij ∈ N - 0 with i, j ∈ 1 , . . . , k be the number of pixels +ij ∈ N +0 with i, j ∈ 1, . . . , k be the number of pixels which belong to class i and were labeled as class j . (n - ij ) is called a confusion matrix . Let t - i = - k -j =1 n - ij +ij ) is called a confusion matrix. Let t +i = +k +j=1 n +ij be the total number of pixels of class i. One way to compare segmentation algorithms is by the pixel-wise accuracy of the predicted segmentation -as done in many publications [ SWRC06], [ CP08], -[ LSD14 ]. This is also called per-pixel rate and defined +as done in many publications [SWRC06], [CP08], +[LSD14]. This is also called per-pixel rate and defined as - k +k i=1 n - ii +ii k i=1 t @@ -211,70 +210,70 @@ general “car” and the more specific “wheel of a car” Three accuracy metrics which do not suffer from problem P1 are used in [LSD14]: -• mean accuracy : 1 +• mean accuracy: 1 k · k -i =1 n - ii +i=1 n +ii t -i ∈ [0 , 1] +i ∈ [0, 1] • mean intersection over union: 1 k · k -i =1 n - ii +i=1 n +ii t - i − n -ii + - k -j =1 n - ji ∈ [0, 1] -• frequency weighted intersection over union : +i−n +ii+ +k +j=1 n +ji ∈ [0, 1] +• frequency weighted intersection over union: ( k -i =1 t -i ) −1 +i=1 t +i)−1 k -i =1 t +i=1 t i · n - ii +ii t -i − n - ii + +i−n +ii+ k -j =1 n -ji ∈ [0 , 1] +j=1 n +ji ∈ [0, 1] Another problem might be pixels which cannot be assigned to one of the known classes. For this reason, -[ SWRC06 ] makes use of a void class. This class gets +[SWRC06] makes use of a void class. This class gets completely ignored for all quality measures. Hence the -total number of pixels is assumed to bewidth · height − +total number of pixels is assumed to be width · height − number of void pixels. One way to deal with problem P1 and problem P2 -is giving the confusion matrix as done in [ SWRC06 ]. +is giving the confusion matrix as done in [SWRC06]. However, this approach is not feasible if many classes are given. The F -measure is useful for binary classification task such as the KITTI road segmentation -benchmark [ FKG13 ] or crypt segmentation as done -by [ CRSS14]. It is calculated as “the harmonic mean +benchmark [FKG13] or crypt segmentation as done +by [CRSS14]. It is calculated as “the harmonic mean of the precision and recall” [PH05]: F β = (1 + β )2 tp (1 + β 2 - ) · tp + β 2 +) · tp + β 2 · fn + fp where β = 1 is chosen in most cases and tp means -true positive , fn means false negative and fp means +true positive, fn means false negative and fp means false positive. Finally, it should be noted that a lot of other measures for the accuracy of segmentations were proposed for non-semantic segmentation. One of those accuracy measures is Normalized Probabilistic Rand (NPR) -index which was introduced in [ UPH05 ] and evaluated - in [ CSI + - 09 ] on dermoscopy images. Other +index which was introduced in [UPH05] and evaluated + in [CSI+ +09] on dermoscopy images. Other non-semantic segmentation measures were introduced in [MFTM01], but the reason for creating them seems to be to deal with the under-defined task description of nonsemantic @@ -291,10 +290,10 @@ pixel as street or no-street and thus makes a semantic segmentation, every image Most papers do not give exact values for the time their application needs. One reason might be that this is very hardware, implementation and in some cases even -data specific. For example, [HJBJ + - 96 ] notes that their +data specific. For example, [HJBJ+ +96] notes that their algorithm needs 10 s on a Sun SparcStation 20. The -fastest CPU ever produced for this system had200 MHz. +fastest CPU ever produced for this system had 200 MHz. Comparing this directly with results which were obtained using an Intel i7-4820K with 3.9 GHz would not be meaningful. @@ -332,22 +331,21 @@ the following, only the most widely used ones as well as three medical databases are described. An overview over the quantity and the kind of data is given by Table I. -1) PASCAL VOC: The PASCAL 1 - VOC 2 +1) PASCAL VOC: The PASCAL1 + VOC2 challenge was organized eight times with different datasets: -Once every year from 2005 to 2012 [ EVGW+ - b ]. +Once every year from 2005 to 2012 [EVGW+ +b]. 1 - p - attern analysis, statistical modelling and computational l earning, +pattern analysis, statistical modelling and computational learning, an EU network of excellence 2 - V isual Object Classes +Visual Object Classes Beginning with 2007, a segmentation challenge was -added [EVGW + - a]. +added [EVGW+ +a]. The dataset consists of annotated photographs from www.flicker.com, a photo sharing website. There are multiple challenges for PASCAL VOC. The 2012 @@ -376,10 +374,10 @@ the object boundaries” [SWRC06]. consists of 165 images with pixel-level annotation of 5 classes: “healthy, adenomatous, moderately differentiated, moderately-to-poorly differentiated, and poorly -differentiated” [ CSM09]. This dataset is part of the +differentiated” [CSM09]. This dataset is part of the Gland Segmentation (GlaS) challenge. -The DIARETDB1 [KKV + - 14 ] is a dataset of 89 images +The DIARETDB1 [KKV+ +14] is a dataset of 89 images fundus images. Those images show the interior surface of the eye. Fundus images can be used to detect diabetic retinopathy. The images have four classes of @@ -387,11 +385,11 @@ coarse annotations: hard and soft exudates, hemorrhages and red small dots. 20 test and additionally 20 training retinal fundus images are available through the DRIVE data -set [ SAN+ - 04 ]. The vessels were annotated. Additionally, +set [SAN+ +04]. The vessels were annotated. Additionally, [AP11] added vascular features. -The Open-CAS Endoscopic Datasets [ MHMK+ - 14] +The Open-CAS Endoscopic Datasets [MHMK+ +14] are 60 images taken from laparoscopic adrenalectomies and 60 images taken from laparoscopic pancreatic resections. Those are from 3 surgical procedures each. @@ -401,14 +399,15 @@ images were labeled by anonymous untrained workers to which they refer to as knowledge workers (KWs). One crowd annotation was obtained for each image by a majority vote on a pixel basis of 10 segmentations -given by 10 different KWs. Training +given by 10 different KWs. Training Prediction PostprocessingWindow-wise ClassificationWindow extraction Data augmentation -Feature extraction Preprocessing Figure 2: A typical segmentation pipeline gets raw +Feature extractionPreprocessing +Figure 2: A typical segmentation pipeline gets raw pixel data, applies preprocessing techniques like scaling and feature extraction like HOG features. For training, data augmentation @@ -420,10 +419,10 @@ semantic segmentation can be refined by simple morphologic operations or by more complex approaches such as Markov Random Fields (MRFs). -IV. S EGMENTATION P IPELINE +IV. SEGMENTATION PIPELINE Typically, semantic segmentation is done with a classifier which operates on fixed-size feature inputs -and a sliding-window approach [ DT05 ], [ YBCK10], +and a sliding-window approach [DT05], [YBCK10], [SCZ08]. This means a classifier is trained on images of a fixed size. The trained classifier is then fed with rectangular regions of the image which are called windows. @@ -431,9 +430,9 @@ rectangular regions of the image which are called windows. 51 px × 51 px of the environment, it might only classify the center pixel or a subset of the complete window. This segmentation pipeline is visualized in Figure 2. -This approach was taken by [BKTT15 ] and a majority - of the VOC2007 participants [ EVGW + - a]. As this +This approach was taken by [BKTT15] and a majority + of the VOC2007 participants [EVGW+ +a]. As this approach has to apply the patch classifier 512 · 512 = 262 144 times for images of size 512 px × 512 px, there are techniques for speeding it up such as applying a @@ -447,14 +446,14 @@ Conditional Random Fields (CRFs) which take the information of the complete image and segment it in an holistic approach. -V. T RADITIONAL A PPROACHES +V. TRADITIONAL APPROACHES Image segmentation algorithms which use traditional approaches, hence don’t apply neural networks and make heavy use of domain knowledge, are wide-spread in the computer vision community. Features which can -be used for segmentation are described in SectionV-A , +be used for segmentation are described in Section V-A, a very brief overview of unsupervised, non-semantic -segmentation is given in SectionV-B, Random Decision +segmentation is given in Section V-B, Random Decision Forests are described in Section V-C, Markov Random Fields in Section V-E and Support Vector Machines (SVMs) in Section V-D. Postprocessing is covered in @@ -475,52 +474,52 @@ for the gray-value) are the most widely used features. A typical image is in the RGB color space, but depending on the classifier and the problem another color space might result in better segmentations. RGB, YcBcr, HSL, -Lab and YIQ are some examples used by [ CRSS14]. +Lab and YIQ are some examples used by [CRSS14]. No single color space has been proven to be superior -to all others in all contexts [ CJSW01]. However, the +to all others in all contexts [CJSW01]. However, the most common choices seem to be RGB and HSI. Reasons for choosing RGB is simplicity and the support by programming languages, whereas the choice of the HSI color space might make it simpler for the classifier to become invariant to illumination. One reason for choosing CIE-L*a*b* color space is that it -approximates human perception of brightness [ KP92 ]. +approximates human perception of brightness [KP92]. It follows that choosing the L*a*b color space helps algorithms to detect structures which are seen by humans. Another way of improving the structure within an image is histogram equalization, which can be applied to improve contrast [PAA+ - 87], [RM07]. +87], [RM07]. 2) Histogram of oriented Gradients: Histogram of oriented gradients (HOG) features interpret the image -as a discrete function I : N 2 +as a discrete function I : N2 → { 0, . . . , 255 } which -maps the position (x, y ) to a color. For each pixel, there -are two gradients: The partial derivative of x and y . +maps the position (x, y) to a color. For each pixel, there +are two gradients: The partial derivative of x and y. Now the original image is transformed to two feature maps of equal size which represents the gradient. These feature maps are splitted into patches and a histogram of the directions is calculated for each patch. HOG features -were proposed in [DT05 ] and are used in [BMBM10], +were proposed in [DT05] and are used in [BMBM10], [FGMR10] for segmentation tasks. 3) SIFT: Scale-invariant feature transform (SIFT) feature descriptors describe keypoints in an image. The image patch of the size 16 × 16 around the keypoint is taken. This patch is divided in 16 distinct parts of -the size 4 × 4 . For each of those parts a histogram of +the size 4 × 4. For each of those parts a histogram of 8 orientations is calculated similar as for HOG features. This results in a 128-dimensional feature vector for each keypoint. It should be emphasized that SIFT is a global feature for a complete image. -SIFT is described in detail in [Low04 ] and are used +SIFT is described in detail in [Low04] and are used in [PTN09]. 4) BOV: Bag-of-visual-words (BOV), also called -bag of keypoints , is based on vector quantization. +bag of keypoints, is based on vector quantization. Similar to HOG features, BOV features are histograms which count the number of occurrences of certain patterns within a patch of the image. BOV are described -in [ CDF+ - 04 ] and used in combination with SIFT +in [CDF+ +04] and used in combination with SIFT feature descriptors in [CP08]. 5) Poselets: Poselets rely on manually added extra keypoints such as “right shoulder”, “left shoulder”, @@ -551,7 +550,7 @@ contain much more information. A simple approach to deal with this is downsampling the high-resolution image to a low-resolution variant. Another way of doing dimensionality reduction is principal component -analysis (PCA), which is applied by [ COWR11 ]. The +analysis (PCA), which is applied by [COWR11]. The idea behind PCA is to find a hyperplane on which all feature vectors can be projected with a minimal loss @@ -577,8 +576,8 @@ consistent regions or region boundaries. 1) Clustering Algorithms: Clustering algorithms can directly be applied on the pixels, when one gives a feature vector per pixel. Two clustering algorithms are -k -means and the mean-shift algorithm. -The k -means algorithm is a general-purpose clustering +k-means and the mean-shift algorithm. +The k-means algorithm is a general-purpose clustering algorithm which requires the number of clusters to be given beforehand. Initially, it places the k centroids randomly in the feature space. Then it assigns each @@ -586,10 +585,10 @@ data point to the nearest centroid, moves the centroid to the center of the cluster and continues the process until a stopping criterion is reached. A faster variant is described in [Har75]. -k -means was applied by [CLP98] for medical image +k-means was applied by [CLP98] for medical image segmentation. Another clustering algorithm is the mean-shift algorithm - which was introduced by [ CM02] for segmentation + which was introduced by [CM02] for segmentation tasks. The algorithm finds the cluster centers by initializing centroids at random seed points and iteratively shifting them to the mean coordinate within @@ -602,8 +601,8 @@ points. 2) Graph Based Image Segmentation: Graph-based image segmentation algorithms typically interpret pixels as vertices and an edge weight is a measure of -dissimilarity such as the difference in color [ FH04 ], -[ Fel]. There are several different candidates for edges. The 4-neighborhood (north, east, south west) or an 8neighborhood +dissimilarity such as the difference in color [FH04], +[Fel]. There are several different candidates for edges. The 4-neighborhood (north, east, south west) or an 8neighborhood (north, north-east, east, south-east, south, south-west, west, north-west) are plausible choices. One way to cut the edges is by building a minimum @@ -611,12 +610,12 @@ spanning tree and removing edges above a threshold. This threshold can either be constant, adapted to the graph or adjusted by the user. After the edge-cutting step, the connected components are the segments. -A graph-based method which ranked 2 nd +A graph-based method which ranked 2nd in the Pascal VOC 2010 challenge [EVGW+ - 10 ] is described -in [ CS10 ]. The system makes heavy use of the multicue - contour detector globalPb [ MAFM08] and needs +10] is described +in [CS10]. The system makes heavy use of the multicue + contour detector globalPb [MAFM08] and needs about 10 GB of main memory [CS11]. 3) Random Walks: Random walks belong to the graph-based image segmentation algorithms. Random @@ -660,13 +659,13 @@ watersheds due to plateaus. C. Random Decision Forests Random Decision Forests were first proposed -in [ Ho95 ]. This type of classifier applies techniques -called ensemble learning , where multiple classifiers +in [Ho95]. This type of classifier applies techniques +called ensemble learning, where multiple classifiers are trained and a combination of their hypotheses is used. One ensemble learning technique is the random subspaces method where each classifier is trained on a random subspace of the feature space. Another -ensemble learning technique is bagging , which is +ensemble learning technique is bagging, which is training the trees on random subsets of the training set. In the case of Random Decision Forests, the classifiers are decision trees. A decision tree is a tree where each @@ -680,25 +679,25 @@ ordinal, interval, ratio) can be arbitrary. Another advantage for example, is the speed of training and classification. Decision trees were extensively studied in the past 20 years and a multitude of training algorithms have -been proposed (e.g. ID3 in [ Qui86], C4.5 in [ Qui93 ]). +been proposed (e.g. ID3 in [Qui86], C4.5 in [Qui93]). Possible training hyperparameters are the measure to evaluate the “goodness of split” [Min89], the number of decision trees being used, and if the depth of the trees is restricted. Typically in the context of classification, decision trees are trained by adding new nodes until each leaf contains only nodes of a single class or until it -is not possible to split further. This is called astopping +is not possible to split further. This is called a stopping criterion. There are two typical training modes: Central axis -projection and perceptron training . In training, for +projection and perceptron training. In training, for each node a hyperplane is searched which is optimal according to an error function. Random Decision Forests with texton features (see Section V-A6) are applied in [SJC08] for segmentation. In the [MSC] dataset, they report a per-pixel accuracy -rate of 66 .9 % for their best system. This system +rate of 66.9 % for their best system. This system requires 415 ms for the segmentation of 320 px × 213 px -images on a single 2 .7 GHz core. On the Pascal +images on a single 2.7 GHz core. On the Pascal VOC 2007 dataset, they report an average per-pixel accuracy for their best segmentation system of 42 %. An excellent introduction to Random Decision @@ -707,22 +706,22 @@ D. SVMs SVMs are well-studied binary classifiers which can be described by five central ideas. For those ideas, the training data is represented as (x - i , y -i ) where x - i is the +i, y +i) where x +i is the feature vector and y - i ∈ { −1, 1 } the binary label for +i ∈ { −1, 1 } the binary label for training example i ∈ { 1, . . . , m }. 1) If data is linearly separable, it can be separated by a hyperplane. There is one hyperplane which maximizes the distance to the next datapoints (support vectors). This hyperplane should be taken: minimize -w ,b 1 -2 w 2 +w,b 1 +2 w2 s.t. ∀m -i=1 y -i · ( w , x - i + b) +i=1y +i · (w, x +i + b) sgn applied to this gives the classification≥ 1 2) Even if the underlying process which generates the @@ -732,39 +731,39 @@ noise can make the data not separable. The introduction of linear separability solves this problem. The trade-off between accepting some errors and a more complex model is weighted by a parameter -C ∈ R + +C ∈ R+ 0 . The bigger C , the more errors are accepted. The new optimization problem is: minimize w 1 -2 w 2 +2 w2 + C · m i=1 ξ - i +i s.t. ∀m -i=1 y -i · ( w , x - i + b) ≥ 1 − ξ - i +i=1y +i · (w, x +i + b) ≥ 1 − ξ +i Note that 0 ≤ ξ - i ≤ 1 means that the data point +i ≤ 1 means that the data point is within the margin, whereas ξ - i ≥ 1 means it is +i ≥ 1 means it is misclassified. An SVM with C > 0 is also called a soft-margin SVM. 3) The primal problem is to find the normal vector -w and the bias b . The dual problem is to express +w and the bias b. The dual problem is to express w as a linear combination of the training data x - i : +i: w = m -i =1 α -i y -i x - i +i=1 α +iy +ix +i where y -i ∈ { −1 , 1 } represents the class of the +i ∈ { −1, 1 } represents the class of the training example and α i are Lagrange multipliers. The usage of Lagrange multipliers is explained @@ -789,64 +788,64 @@ i − 1 i=1 m -j =1 α -i α +j=1 α +iα j y - i y - j x - i , x - j +iy +j x +i, x +j s.t. ∀m -i=1 0 ≤ α - i ≤ C +i=10 ≤ α +i ≤ C s.t. m i=1 α -i y - i = 0 +iy +i = 0 4) Not every dataset is linearly separable. This problem is approached by transforming the feature vectors x with a non-linear mapping Φ into -a higher dimensional (probably ∞ -dimensional) +a higher dimensional (probably ∞-dimensional) space. As the feature vectors x are only used within scalar product x - i , x -j , it is not necessary +i, x +j , it is not necessary to do the transformation. It is enough to do the calculation - K ( x -i , x - j ) = x - i , x - j -This function K is called a kernel . The idea of + K (x +i, x +j ) = x +i, x +j +This function K is called a kernel. The idea of never explicitly transforming the vectors x - i to the +i to the higher dimensional space is called the kernel trick. Common kernels include the polynomial kernel K - P (x - i , x - j ) = (x - i , x - j + r )p -of degree p and coefficient r , the Gaussian radial +P (x +i, x +j ) = (x +i, x +j + r)p +of degree p and coefficient r, the Gaussian radial basis function (RBF) kernel K - Gauss (x - i , x - j ) = e − γ x -i − x +Gauss(x +i, x +j ) = e −γx +i−x j 2 -2σ 2 +2σ2 and the sigmoid kernel K -tanh (x - i , x -j ) = tanh( γ x - i , x - j − r ) +tanh(x +i, x +j ) = tanh(γ x +i, x +j − r) where the parameter γ determines how much influence single training examples have. 5) The described SVMs can only distinguish between @@ -855,21 +854,21 @@ binary classifiers to multi-class classification is the one-vs-all and the one-vs-one strategy. In the one-vs-all strategy n classifiers have to be trained which can distinguish one of the n classes against -all other classes. In the one-vs-one strategy n 2 - − n +all other classes. In the one-vs-one strategy n2 +−n 2 classifiers are trained; one classifier for each pair of classes. A detailed description of SVMs can be found in [Bur98]. -SVMs are used by [YHRF12 ] on the 2009 and 2010 -PASCAL segmentation challenge [ EVGW + - 10 ]. They +SVMs are used by [YHRF12] on the 2009 and 2010 +PASCAL segmentation challenge [EVGW+ +10]. They did not hand their classifier in to the challenge itself, but calculated an average rank of 7 among the different categories. -[ FGMR10] also used an SVM based method with -HOG features and achieved the 7 th +[FGMR10] also used an SVM based method with +HOG features and achieved the 7th rank in the 2010 PASCAL segmentation challenge by mean accuracy. It needs about 2 s on a 2.8 GHz 8-core Intel processor. @@ -878,52 +877,52 @@ MRFs are undirected probabilistic graphical models which are wide-spread model in computer vision. The overall idea of MRFs is to assign a random variable for each feature and a random variable for each pixel which x - 1 x - 2 x - 3x - 4 x - 5 x - 6x - 7 x - 8 x - 9 +1 x +2 x +3x +4 x +5 x +6x +7 x +8 x +9 y - 1 y +1 y 2 y 3y 4 y - 5 y - 6y +5 y +6y 7 y 8 y 9 x - 1 x - 2 x - 3x - 4 x - 5 x - 6x - 7 x - 8 x - 9 +1 x +2 x +3x +4 x +5 x +6x +7 x +8 x +9 y - 1 y +1 y 2 y 3y 4 y - 5 y - 6y +5 y +6y 7 y 8 y 9 Figure 3: CRF with 4-neighborhood. Each node x - i -represents a pixel and each nodey +i +represents a pixel and each node y i represents a label. gets labeled as shown in Figure 3. For example, a MRF -which is trained on images of the size224 px × 224 pixel +which is trained on images of the size 224 px×224 pixel and gets the raw RGB values as features has 224 · 224 · 3 @@ -933,9 +932,9 @@ output = 200 704 random variables. Those random variables are conditionally independent, given their local neighborhood. These (in)dependencies can be expressed with a graph. -Let G = ( V , E ) be the associated undirected graph +Let G = (V , E ) be the associated undirected graph of an MRF and C be the set of all maximal cliques in -that graph. Nodes represent random variables x , y and +that graph. Nodes represent random variables x, y and edges represent conditional dependencies. Just like in he 4-neighborhood [SWRC06] and the 8-neighborhood are reasonable choices for constructing the graph. @@ -945,76 +944,76 @@ and edges represent pixel neighborhood in computer vision problems segmentation problems where MRFs are used. Accordingly, the random variables y live on 1, . . . , nr of classes and the random variables x -typically live on 0, . . . , 255 or [0 , 1]. -The probability of x , y can be expressed as -P ( x, y ) = 1 -Z e − E (x ,y ) +typically live on 0, . . . , 255 or [0, 1]. +The probability of x, y can be expressed as +P (x, y) = 1 +Z e−E(x,y) where Z = -x ,y e −E ( x,y ) +x,y e−E(x,y) is a normalization term called the partition function and E is called the energy function. A common choice for the energy function is -E ( x, y ) = +E (x, y) = c∈C ψ -c (x , y ) -where ψ is called a clique potential . One choice for +c(x, y) +where ψ is called a clique potential. One choice for cliques of size two x, y = (x - 1 , x -2 ) is [KP06] +1, x +2) is [KP06] ψ -c (x - 1 , x -2 ) = wδ (x - 1 , x -2 ) = - +w if x - 1 = x - 2 +c(x +1, x +2) = wδ(x +1, x +2) = ++w if x +1 = x +2 −w if x - 1 = x - 2 -According to [ Mur12], the most common way of +1 = x +2 +According to [Mur12], the most common way of inference over the posterior MRF in computer vision problems is Maximum A Posteriori (MAP) estimation. Detailed introductions to MRFs are given by -[ BKR11 ], [ Mur12]. MRFs are used by [ ZBS01] and +[BKR11], [Mur12]. MRFs are used by [ZBS01] and [MSB12] for image segmentation. F. Conditional Random Fields CRFs are MRFs where all clique potentials are -conditioned on input features [ Mur12]. This means, -instead of learning the distribution P ( y , x ), the task -is reformulated to learn the distribution P (y |x ) . One +conditioned on input features [Mur12]. This means, +instead of learning the distribution P (y, x), the task +is reformulated to learn the distribution P (y|x). One consequence of this reformulation is that CRFs need much less parameters as the distribution of x does not have to be estimated. Another advantage of CRFs compared to MRFs is that no distribution assumption about x has to be made. A CRF has the partition function Z : -Z (x ) = -y P ( x, y ) +Z (x) = +y P (x, y) and joint probability distribution -P ( y | x ) = 1 -Z ( x ) +P (y|x) = 1 +Z (x) c∈C ψ -c (y - c | x ) -The simplest way to define the clique potentialsψ is +c(y +c|x) +The simplest way to define the clique potentials ψ is the count of the class y - c given x added with a positive +c given x added with a positive smoothing constant to prevent the complete term from getting zero. -CRFs as described in [ LRKT09 ] have reached top -performance in PASCAL VOC 2010 [ VOC10] and -are also used in [ HZCP04], [ SWRC06 ] for semantic +CRFs as described in [LRKT09] have reached top +performance in PASCAL VOC 2010 [VOC10] and +are also used in [HZCP04], [SWRC06] for semantic segmentation. A method similar to CRFs was proposed -in [ GBVdW+ - 10]. The system of Gonfaus et.al. -ranked 1 st +in [GBVdW+ +10]. The system of Gonfaus et.al. +ranked 1st by mean accuracy in the segmentation task of the PASCAL VOC 2010 challenge [EVGW+ - 10]. +10]. An introduction to CRFs is given by [SM11]. G. Post-processing methods Post-processing refine a found segmentation and @@ -1027,11 +1026,11 @@ in otherwise filled regions. They were used in [CLP98] for biomedical image segmentation. Another way of refinement of the found segmentation is by adjusting the segmentation to match close edges. -This was used in [ BBMM11] with an ultra-metric +This was used in [BBMM11] with an ultra-metric contour map [AMFM09]. Active contour models are another example of a -post-processing method [KWT88]. VI. N EURAL N ETWORKS FOR S EM ANTIC -S EGM ENTATION +post-processing method [KWT88]. VI. NEURAL NETWORKS FOR SEM ANTIC +SEGM ENTATION Artificial neural networks are classifiers which are inspired by biologic neurons. Every single artificial neuron has some inputs which are weighted and sumed @@ -1040,7 +1039,7 @@ function to the weighted sum and gives an output. Those neurons can take either a feature vector as input or the output of other neurons. In this way, they build up feature hierarchies. -The parameters they learn are the weights w ∈ R . +The parameters they learn are the weights w ∈ R. They are learned by gradient descent. To do so, an error function — usually cross-entropy or mean squared error — is necessary. For the gradient descent algorithm, one @@ -1057,48 +1056,48 @@ CNNs are neural networks which learn image filters. They drastically reduce the number of parameters which have to be learned while being still general enough for the problem domain of images. This was shown by Alex -Krizhevsky et al. in [ KSH12 ]. One major idea was a +Krizhevsky et al. in [KSH12]. One major idea was a clever regularization called dropout training, which set the output of neurons while training randomly to zero. Another contribution was the usage of an activation -function called rectified linear unit : +function called rectified linear unit: ϕ -ReLU ( x ) = max(0 , x) +ReLU(x) = max(0, x) Those are much faster to train than the commonly used sigmoid activation functions ϕ -Sigmoid ( x ) = 1 -e − x +Sigmoid(x) = 1 +e−x + 1 Krizhevsky et al. implemented those ideas and participated in the ImageNet Large-Scale Visual Recognition Challenge (ILSVRC). The best other system, which used SIFT features and Fisher Vectors, had a performance - of about 25 .7 % while the network by Alex -Krizhevsky et al. got 17 .0 % error rate on the ILSVRC2010 + of about 25.7 % while the network by Alex +Krizhevsky et al. got 17.0 % error rate on the ILSVRC2010 dataset. As a preprocessing step, they downsampled all images to a fixed size of 256 px × 256 px before they fed the features into their network. This network is commonly known as AlexNet. Since AlexNet was developed, a lot of different neural networks have been proposed. One interesting -example is [PC13 ], where a recurrent CNN for semantic +example is [PC13], where a recurrent CNN for semantic segmentation is presented. -Another notable paper is [ LSD14 ]. The algorithm +Another notable paper is [LSD14]. The algorithm presented there makes use of a classifying network such as AlexNet, but applies the complete network as an image filter. This way, each pixel gets a probability distribution for each of the trained classes. By taking the most likely class, a semantic segmentation can be done with arbitrary image sizes. -A very recent publication by Dai et al. [ DHS15] +A very recent publication by Dai et al. [DHS15] showed that segmentation with much deeper networks is possible and achieves better results. More detailed explanations to neural networks for visual recognition is given by [LKJ15]. -VII. P OSSIBLE P ROBLEMS IN THE D ATA FOR -S EGMENTATION ALGORITHMS +VII. POSSIBLE PROBLEMS IN THE DATA FOR +SEGMENTATION ALGORITHMS Different segmentation workflows have different problems. However, there are a couple of special cases which should be tested. Those cases might not occur @@ -1109,7 +1108,7 @@ I am not aware of any systematic work which examined A. Lens Flare Lens flare is the effect of light getting scattered in the lens system of the camera. The testing data set of -the KITTI road evaluation benchmark [ FKG13] has a +the KITTI road evaluation benchmark [FKG13] has a couple of photos with this problem. Figure 4(a) shows an extreme example of lens flare. B. Vignetting @@ -1129,11 +1128,11 @@ are problems depends heavily on the problem domain and the used model. 1) Partial Occlusions: Segmentation systems which employ a model of the objects which should be -segmented might suffer from partial occlusions. (a)Lens Flare -Image by [Hus07] (b)Vignetting +segmented might suffer from partial occlusions. (a) Lens Flare +Image by [Hus07] (b) Vignetting Image by [Man12] -(c)Smoke by cauterization -Image by [GVSY13] (d)Camouflage +(c) Smoke by cauterization +Image by [GVSY13] (d) Camouflage Image by [Kaf07] (e) Transparency (f) Viewpoint Figure 4: Examples of images which might cause @@ -1160,8 +1159,8 @@ on photographs of professional photographers might not have photos from the point of view of a child. This is visualized in Figure 4(f). -VIII. D ISCUSSION -Ohta et al. wrote [ OKS78 ] 38 years ago. It is one +VIII. DISCUSSION +Ohta et al. wrote [OKS78] 38 years ago. It is one of the first papers mentioning semantic segmentation. In this time, a lot of work was done and many different directions have been explored. Different kinds @@ -1182,64 +1181,64 @@ A combination of different classifiers in an ensemble would be an interesting option to explore in order to improve accuracy. Another direction which is currently studied is combining classifiers such as neural networks -with CRFs [ZJRP + - 15]. R EFERENCES +with CRFs [ZJRP+ +15]. REFERENCES [AM98] M. S. Atkins and B. T. Mackiewich, “Fully automatic segmentation of the brain in mri,” Medical Imaging, IEEE Transactions -on , vol. 17, no. 1, pp. 98–107, Feb. 1998. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=668699 +on, vol. 17, no. 1, pp. 98–107, Feb. 1998. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=668699 [AMFM09] P. Arbelaez, M. Maire, C. Fowlkes, and J. Malik, “From contours to regions: An empirical evaluation,” in Computer Vision and Pattern Recognition, 2009. CVPR 2009. IEEE -Conference on . IEEE, Jun. 2009, pp. 2294–2301. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=5206707 +Conference on. IEEE, Jun. 2009, pp. 2294–2301. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=5206707 [AP11] G. Azzopardi and N. Petkov, “Detection of retinal vascular bifurcations by trainable v4-like filters,” in Computer Analysis of Images and -Patterns . Springer, 2011, pp. 451–459. [Online]. -Available: http://www. cs. rug. nl/~imaging/databases/ -retina_database/retinalfeatures_database. html +Patterns. Springer, 2011, pp. 451–459. [Online]. +Available: http://www.cs.rug.nl/~imaging/databases/ +retina_database/retinalfeatures_database.html [BBMM11] T. Brox, L. Bourdev, S. Maji, and J. Malik, “Object segmentation by alignment of poselet activations to image contours,” in Computer Vision and Pattern Recognition (CVPR), 2011 IEEE -Conference on . IEEE, Jun. 2011, pp. 2225–2232. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=5995659 +Conference on. IEEE, Jun. 2011, pp. 2225–2232. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=5995659 [BJ00] Y. Boykov and M.-P. Jolly, “Interactive organ segmentation using graph cuts,” in Medical Image Computing and Computer-Assisted Intervention– -MICCAI 2000 . Springer, 2000, pp. 276– -286. [Online]. Available: http://link . springer . com/ -chapter/10 . 1007/978- 3-540- 40899-4_28 +MICCAI 2000. Springer, 2000, pp. 276– +286. [Online]. Available: http://link.springer.com/ +chapter/10.1007/978- 3-540- 40899-4_28 [BKR11] A. Blake, P. Kohli, and C. Rother, Markov random fields for vision and image processing. Mit Press, 2011. [BKTT15] S. Bittel, V. Kaiser, M. Teichmann, and M. Thoma, “Pixel-wise segmentation of street with neural networks,” arXiv preprint arXiv:1511.00513, 2015. -[Online]. Available: http://arxiv. org/abs/1511. 00513 +[Online]. Available: http://arxiv.org/abs/1511.00513 [BMBM10] L. Bourdev, S. Maji, T. Brox, and J. Malik, “Detecting people using mutually consistent poselet activations,” in Computer Vision–ECCV -2010 . Springer, 2010, pp. 168–181. [Online]. -Available: http://link. springer. com/chapter/10. 1007/ +2010. Springer, 2010, pp. 168–181. [Online]. +Available: http://link.springer.com/chapter/10.1007/ 978- 3-642- 15567-3_13#page- 1 [Bur98] C. J. Burges, “A tutorial on support vector machines for pattern recognition,” Data mining and knowledge -discovery , vol. 2, no. 2, pp. 121–167, 1998. +discovery, vol. 2, no. 2, pp. 121–167, 1998. [BVZ01] Y. Boykov, O. Veksler, and R. Zabih, “Fast approximate energy minimization via graph cuts,” Pattern Analysis and Machine Intelligence, IEEE -Transactions on , vol. 23, no. 11, pp. 1222–1239, -2001. [Online]. Available: http://ieeexplore. ieee. org/ -xpls/abs_all. jsp?arnumber=969114 -[CDF + - 04] G. Csurka, C. Dance, L. Fan, J. Willamowski, +Transactions on, vol. 23, no. 11, pp. 1222–1239, +2001. [Online]. Available: http://ieeexplore.ieee.org/ +xpls/abs_all.jsp?arnumber=969114 +[CDF+ +04] G. Csurka, C. Dance, L. Fan, J. Willamowski, and C. Bray, “Visual categorization with bags of keypoints,” in Workshop on statistical learning in computer vision, ECCV, vol. 1, no. 1-22. Prague, @@ -1254,269 +1253,268 @@ knowledge-based morphological operations with biomedical applications,” Image Processing, IEEE Transactions on, vol. 7, no. 12, pp. 1673–1683, Dec. -1998. [Online]. Available: http://ieeexplore. ieee. org/ -xpls/abs_all. jsp?arnumber=730379 +1998. [Online]. Available: http://ieeexplore.ieee.org/ +xpls/abs_all.jsp?arnumber=730379 [CM02] D. Comaniciu and P. Meer, “Mean shift: A robust approach toward feature space analysis,” Pattern Analysis and Machine Intelligence, IEEE Transactions on, vol. 24, no. 5, pp. 603–619, 2002. -[Online]. Available: http://ieeexplore . ieee . org/xpl/ -login . jsp?tp=&arnumber=1000236 +[Online]. Available: http://ieeexplore.ieee.org/xpl/ +login.jsp?tp=&arnumber=1000236 [COWR11] C. Chen, J. Ozolek, W. Wang, and G. K. Rohde, “A pixel classification system for segmenting biomedical images using intensity neighborhoods and dimension reduction,” in Biomedical Imaging: From Nano to Macro, 2011 IEEE International -Symposium on . IEEE, 2011, pp. 1649–1652. -[Online]. Available: https://www . andrew . cmu . edu/ -user/gustavor/chen_isbi_11. pdf +Symposium on. IEEE, 2011, pp. 1649–1652. +[Online]. Available: https://www.andrew.cmu.edu/ +user/gustavor/chen_isbi_11.pdf [CP08] G. Csurka and F. Perronnin, “A simple high performance approach to semantic segmentation.” -in BMVC , 2008, pp. 1–10. [Online]. Available: - http://www . xrce . xerox . com/layout/set/print/ -content/download/16654/118653/file/2008-023 . pdf +in BMVC, 2008, pp. 1–10. [Online]. Available: + http://www.xrce.xerox.com/layout/set/print/ +content/download/16654/118653/file/2008-023.pdf [CRSS] A. Cohen, E. Rivlin, I. Shimshoni, and E. Sabo, “Colon crypt segmentation website.” [Online]. - Available: http://mis . haifa . ac . il/~ishimshoni/ -SegmentCrypt/Download. htm + Available: http://mis.haifa.ac.il/~ishimshoni/ +SegmentCrypt/Download.htm [CRSS14] ——, “Memory based active contour algorithm using pixel-level classified images for colon crypt segmentation,” Computerized Medical Imaging -and Graphics , Nov. 2014. [Online]. Available: -http://mis . haifa . ac . il/~ishimshoni/SegmentCrypt/ +and Graphics, Nov. 2014. [Online]. Available: +http://mis.haifa.ac.il/~ishimshoni/SegmentCrypt/ Active%20contour%20based%20on%20pixellevel%20classified%20image%20for%20colon% -20crypts%20segmentation. pdf +20crypts%20segmentation.pdf [CS10] J. Carreira and C. Sminchisescu, “Constrained parametric min-cuts for automatic object segmentation,” in Computer Vision and Pattern Recognition -(CVPR), 2010 IEEE Conference on . IEEE, 2010, +(CVPR), 2010 IEEE Conference on. IEEE, 2010, pp. 3241–3248. [CS11] ——, “Cpmc: Constrained parametric min-cuts for automatic object segmentation,” Feb. 2011. [Online]. -Available: http://www . maths . lth . se/matematiklth/ +Available: http://www.maths.lth.se/matematiklth/ personal/sminchis/code/cpmc/ [CSI+ - 09] M. E. Celebi, G. Schaefer, H. Iyatomi, W. V. +09] M. E. Celebi, G. Schaefer, H. Iyatomi, W. V. Stoecker, J. M. Malters, and J. M. Grichnik, “An improved objective evaluation measure for border detection in dermoscopy images,” Skin Research and Technology, vol. 15, no. 4, pp. 444–450, 2009. -[Online]. Available: http://arxiv. org/abs/1009. 1020 +[Online]. Available: http://arxiv.org/abs/1009.1020 [CSM09] L. P. Coelho, A. Shariff, and R. F. Murphy, “Nuclear segmentation in microscope cell images: a handsegmented dataset and comparison of algorithms,” in Biomedical Imaging: From Nano to Macro, -2009. ISBI’09. IEEE International Symposium on . +2009. ISBI’09. IEEE International Symposium on. IEEE, 2009, pp. 518–521. [Online]. Available: -http://murphylab. web. cmu. edu/data +http://murphylab.web.cmu.edu/data [CXGS12] M. D. Collins, J. Xu, L. Grady, and V. Singh, “Random walks based multi-image segmentation: Quasiconvexity results and gpu-based solutions,” in Computer Vision and Pattern Recognition -(CVPR), 2012 IEEE Conference on . IEEE, +(CVPR), 2012 IEEE Conference on. IEEE, 2012, pp. 1656–1663. [Online]. Available: http: -//pages. cs. wisc. edu/~jiaxu/pub/rwcoseg. pdf +//pages.cs.wisc.edu/~jiaxu/pub/rwcoseg.pdf [DHS15] J. Dai, K. He, and J. Sun, “Instance-aware semantic segmentation via multi-task network cascades,” arXiv preprint arXiv:1512.04412, 2015. [DT05] N. Dalal and B. Triggs, “Histograms of oriented gradients for human detection,” in Computer Vision and Pattern Recognition, 2005. CVPR -2005. IEEE Computer Society Conference on , vol. 1, June 2005, pp. 886–893 vol. 1. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=1467360 -[EVGW + - a] M. Everingham, L. Van Gool, C. K. I. +2005. IEEE Computer Society Conference on, vol. 1, June 2005, pp. 886–893 vol. 1. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=1467360 +[EVGW+ +a] M. Everingham, L. Van Gool, C. K. I. Williams, J. Winn, and A. Zisserman, “The PASCAL Visual Object Classes Challenge 2007 (VOC2007) Results,” http://www.pascalnetwork.org/challenges/VOC/voc2007/workshop/index.html. -[Online]. Available: http://host . robots . ox . ac . uk: -8080/pascal/VOC/voc2007/index . html -[EVGW + - b] ——, “The PASCAL Visual Object Classes Challenge +[Online]. Available: http://host.robots.ox.ac.uk: +8080/pascal/VOC/voc2007/index.html +[EVGW+ +b] ——, “The PASCAL Visual Object Classes Challenge 2012 (VOC2012) Results,” http://www.pascalnetwork.org/challenges/VOC/voc2012/workshop/index.html. -[Online]. Available: http://host . robots . ox . ac . uk: -8080/pascal/VOC/voc2012/index . html -[EVGW + - 10] M. Everingham, L. Van Gool, C. K. Williams, +[Online]. Available: http://host.robots.ox.ac.uk: +8080/pascal/VOC/voc2012/index.html +[EVGW+ +10] M. Everingham, L. Van Gool, C. K. Williams, J. Winn, and A. Zisserman, “The pascal visual object classes (voc) challenge,” International journal of computer vision, vol. 88, no. 2, pp. 303–338, 2010. -[EVGW + - 12] M. Everingham, L. Van Gool, C. K. I. Williams, +[EVGW+ +12] M. Everingham, L. Van Gool, C. K. I. Williams, J. Winn, and A. Zisserman, “Visual object classes challenge 2012 (voc2012),” 2012. [Online]. -Available: http://host . robots . ox . ac . uk:8080/pascal/ -VOC/voc2012/index. html +Available: http://host.robots.ox.ac.uk:8080/pascal/ +VOC/voc2012/index.html [Fel] P. F. Felzenszwalb, “Graph based image segmentation.” [Online]. Available: http: -//cs . brown. edu/~pff/segment/ +//cs.brown.edu/~pff/segment/ [FGMR10] P. F. Felzenszwalb, R. B. Girshick, D. McAllester, and D. Ramanan, “Object detection with discriminatively trained part-based models,” Pattern Analysis -and Machine Intelligence, IEEE Transactions on , +and Machine Intelligence, IEEE Transactions on, vol. 32, no. 9, pp. 1627–1645, 2010. [FH04] P. F. Felzenszwalb and D. P. Huttenlocher, “Efficient graph-based image segmentation,” -International Journal of Computer Vision , +International Journal of Computer Vision, vol. 59, no. 2, pp. 167–181, 2004. [Online]. -Available: http://link . springer . com/article/10 . 1023/ -B:VISI . 0000022288 . 19776. 77 +Available: http://link.springer.com/article/10.1023/ +B:VISI.0000022288.19776.77 [FKG13] J. Fritsch, T. Kuehnl, and A. Geiger, “A new performance measure and evaluation benchmark for road detection algorithms,” in International Conference on Intelligent Transportation - Systems (ITSC) , 2013. [Online]. Available: -http://www . cvlibs. net/datasets/kitti/eval_road. php -[GBVdW + - 10] - J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D. + Systems (ITSC), 2013. [Online]. Available: +http://www.cvlibs.net/datasets/kitti/eval_road.php +[GBVdW+ +10] J. M. Gonfaus, X. Boix, J. Van de Weijer, A. D. Bagdanov, J. Serrat, and J. Gonzalez, “Harmony potentials for joint classification and segmentation,” in Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on. IEEE, 2010, pp. 3280– 3287. -[GRC + - 08] S. Gould, J. Rodgers, D. Cohen, G. Elidan, and +[GRC+ +08] S. Gould, J. Rodgers, D. Cohen, G. Elidan, and D. Koller, “Multi-class segmentation with relative location prior,” International Journal of Computer -Vision , vol. 80, no. 3, pp. 300–316, Apr. 2008. +Vision, vol. 80, no. 3, pp. 300–316, Apr. 2008. [GVSY13] S. Giannarou, M. Visentini-Scarzanella, and G.Z. Yang, “Probabilistic tracking of affine-invariant anisotropic regions,” Pattern Analysis and Machine -Intelligence, IEEE Transactions on , vol. 35, no. 1, +Intelligence, IEEE Transactions on, vol. 35, no. 1, pp. 130–143, 2013. [Har75] J. A. Hartigan, Clustering algorithms. John Wiley & Sons, Inc., 1975. [HDT02] C. Huang, L. Davis, and J. Townshend, “An assessment of support vector machines for land cover classification,” International Journal of remote -sensing , vol. 23, no. 4, pp. 725–749, 2002. +sensing, vol. 23, no. 4, pp. 725–749, 2002. [HHR01] S. Hu, E. Hoffman, and J. Reinhardt, “Automatic lung segmentation for accurate quantitation of volumetric x-ray ct images,” Medical Imaging, IEEE -Transactions on , vol. 20, no. 6, pp. 490–498, Jun. +Transactions on, vol. 20, no. 6, pp. 490–498, Jun. 2001. [HJBJ+ - 96] A. Hoover, G. Jean-Baptiste, X. Jiang, P. J. +96] A. Hoover, G. Jean-Baptiste, X. Jiang, P. J. Flynn, H. Bunke, D. B. Goldgof, K. Bowyer, D. W. Eggert, A. Fitzgibbon, and R. B. Fisher, “An experimental comparison of range image segmentation algorithms,” Pattern Analysis and Machine Intelligence, IEEE Transactions -on , vol. 18, no. 7, pp. 673–689, Jul. 1996. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=506791 +on, vol. 18, no. 7, pp. 673–689, Jul. 1996. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=506791 [Ho95] T. K. Ho, “Random decision forests,” in Document Analysis and Recognition, 1995., Proceedings of the Third International Conference -on , vol. 1. IEEE, 1995, pp. 278–282. -[Online]. Available: http://ect . bell-labs . com/who/ -tkh/publications/papers/odt. pdf +on, vol. 1. IEEE, 1995, pp. 278–282. +[Online]. Available: http://ect.bell-labs.com/who/ +tkh/publications/papers/odt.pdf [Hus07] Hustvedt, “File:cctv lens flare.jpg,” Wikipedia Commons, Nov. 2007. [Online]. Available: - https://commons . wikimedia . org/wiki/File: -CCTV_Lens_flare. jpg + https://commons.wikimedia.org/wiki/File: +CCTV_Lens_flare.jpg [HZCP04] X. He, R. Zemel, and M. Carreira-Perpindn, “Multiscale conditional random fields for image labeling,” in Computer Vision and Pattern Recognition, 2004. CVPR 2004. Proceedings of the 2004 IEEE Computer Society Conference -on , vol. 2, Jun. 2004, pp. II–695–II–702 Vol.2. -[Online]. Available: http://ieeexplore . ieee . org/xpl/ -login . jsp?tp=&arnumber=1315232 +on, vol. 2, Jun. 2004, pp. II–695–II–702 Vol.2. +[Online]. Available: http://ieeexplore.ieee.org/xpl/ +login.jsp?tp=&arnumber=1315232 [JLD03] K. Jiang, Q.-M. Liao, and S.-Y. Dai, “A novel white blood cell segmentation scheme using scale-space filtering and watershed clustering,” in Machine Learning and Cybernetics, 2003 International -Conference on , vol. 5, Nov 2003, pp. 2820–2825 -Vol.5. [Online]. Available: http://ieeexplore. ieee. org/ -xpl/login. jsp?tp=&arnumber=1260033 +Conference on, vol. 5, Nov 2003, pp. 2820–2825 +Vol.5. [Online]. Available: http://ieeexplore.ieee.org/ +xpl/login.jsp?tp=&arnumber=1260033 [Kaf07] L. Kaffer, “File:great male leopard in south afrikajd.jpg,” Wikipedia Commons, Jul. 2007. [Online]. -Available: https://commons. wikimedia. org/wiki/File: -Great_male_Leopard_in_South_Afrika-JD . JPG +Available: https://commons.wikimedia.org/wiki/File: +Great_male_Leopard_in_South_Afrika-JD.JPG [KKV+ - 14] V. Kalesnykiene, J.-k. Kamarainen, R. Voutilainen, +14] V. Kalesnykiene, J.-k. Kamarainen, R. Voutilainen, J. Pietilä, H. Kälviäinen, and H. Uusitalo, “Diaretdb1 diabetic retinopathy database and evaluation protocol,” 2014. [Online]. Available: -http://www2 . it. lut. fi/project/imageret/diaretdb1/ +http://www2.it.lut.fi/project/imageret/diaretdb1/ [KP92] J. M. Kasson and W. Plouffe, “An analysis of selected computer interchange color spaces,” ACM Transactions on Graphics (TOG), vol. 11, no. 4, pp. 373–405, 1992. [KP06] Z. Kato and T.-C. Pong, “A markov random field image segmentation model for color -textured images,” Image and Vision Computing , +textured images,” Image and Vision Computing, vol. 24, no. 10, pp. 1103–1114, 2006. [Online]. -Available: http://www . sciencedirect . com/science/ +Available: http://www.sciencedirect.com/science/ article/pii/S0262885606001223 [KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, “Imagenet classification with deep convolutional neural networks,” in Advances in neural information -processing systems , 2012, pp. 1097–1105. +processing systems, 2012, pp. 1097–1105. [KWT88] M. Kass, A. Witkin, and D. Terzopoulos, “Snakes: Active contour models,” International -journal of computer vision , vol. 1, no. 4, pp. +journal of computer vision, vol. 1, no. 4, pp. 321–331, Jan. 1988. [Online]. Available: http: -//link . springer. com/article/10. 1007/BF00133570 +//link.springer.com/article/10.1007/BF00133570 [LKJ15] F.-F. Li, A. Karpathy, and J. Johnson, “CS231n: Convolutional neural networks for visual recognition,” 2015. [Online]. Available: -http://cs231n . stanford. edu/ +http://cs231n.stanford.edu/ [Low04] D. Lowe, “Distinctive image features from scale- invariant keypoints,” International Journal of Computer Vision, vol. 60, no. 2, pp. 91–110, 2004. -[Online]. Available: http://dx . doi . org/10 . 1023/B% -3AVISI . 0000029664 . 99615. 94 +[Online]. Available: http://dx.doi.org/10.1023/B% +3AVISI.0000029664.99615.94 [LRAL08] A. Levin, A. Rav-Acha, and D. Lischinski, “Spectral matting,” Pattern Analysis and -Machine Intelligence, IEEE Transactions on , +Machine Intelligence, IEEE Transactions on, vol. 30, no. 10, pp. 1699–1712, 2008. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=4547428 +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4547428 [LRKT09] L. Ladický, C. Russell, P. Kohli, and P. Torr, “Associative hierarchical crfs for object class image segmentation,” in Computer Vision, 2009 IEEE 12th -International Conference on , 2009, pp. 739–746. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=5459248 +International Conference on, 2009, pp. 739–746. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=5459248 [LSD14] J. Long, E. Shelhamer, and T. Darrell, “Fully convolutional networks for semantic segmentation,” -arXiv preprint arXiv:1411.4038 , 2014. [Online]. -Available: http://arxiv. org/abs/1411. 4038 +arXiv preprint arXiv:1411.4038, 2014. [Online]. +Available: http://arxiv.org/abs/1411.4038 [MAFM08] M. Maire, P. Arbelaez, C. Fowlkes, and J. Malik, “Using contours to detect and localize junctions in natural images,” in Computer Vision and Pattern Recognition, 2008. CVPR 2008. -IEEE Conference on , June 2008, pp. 1–8. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=4587420 +IEEE Conference on, June 2008, pp. 1–8. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4587420 [Man12] M. Manske, “File:randabschattung mikroskop kamera 6.jpg,” Wikipedia Commons, Dec. 2012. [Online]. Available: - https://commons . wikimedia . org/wiki/File: -Randabschattung_Mikroskop_Kamera_6. JPG -[MBLAGJ + - 07] S. Maldonado-Bascon, S. Lafuente-Arroyo, P. GilJimenez, + https://commons.wikimedia.org/wiki/File: +Randabschattung_Mikroskop_Kamera_6.JPG +[MBLAGJ+ +07] S. Maldonado-Bascon, S. Lafuente-Arroyo, P. GilJimenez, H. Gomez-Moreno, and F. LopezFerreras, “Road-sign detection and recognition based on support vector machines,” Intelligent -Transportation Systems, IEEE Transactions on , +Transportation Systems, IEEE Transactions on, vol. 8, no. 2, pp. 264–278, Jun. 2007. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=4220659 +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4220659 [MBVLG02] N. Moon, E. Bullitt, K. Van Leemput, and G. Gerig, -“Automatic brain and tumor segmentation,” inMedical +“Automatic brain and tumor segmentation,” in Medical Image Computing and Computer-Assisted Intervention—MICCAI - 2002 . Springer, 2002, pp. + 2002. Springer, 2002, pp. 372–379. [MFTM01] D. Martin, C. Fowlkes, D. Tal, and J. Malik, “A database of human segmented natural @@ -1524,22 +1522,22 @@ images and its application to evaluating segmentation algorithms and measuring ecological statistics,” in Computer Vision, 2001. ICCV 2001. Proceedings. Eighth IEEE International -Conference on , vol. 2. IEEE, 2001, pp. 416–423. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=937655 -[MHMK + - 14] L. Maier-Hein, S. Mersmann, D. Kondermann, +Conference on, vol. 2. IEEE, 2001, pp. 416–423. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=937655 +[MHMK+ +14] L. Maier-Hein, S. Mersmann, D. Kondermann, S. Bodenstedt, A. Sanchez, C. Stock, H. G. Kenngott, M. Eisenmann, and S. Speidel, “Can masses of non-experts train highly accurate image classifiers?” in Medical Image Computing and Computer-Assisted Intervention–MICCAI 2014. Springer, 2014, pp. 438–445. [Online]. Available: -http://opencas. webarchiv. kit. edu/?q=node/26 +http://opencas.webarchiv.kit.edu/?q=node/26 [Min89] J. Mingers, “An empirical comparison of selection measures for decision-tree induction,” Machine -Learning , vol. 3, no. 4, pp. 319–342, 1989. -[Online]. Available: http://dx . doi . org/10 . 1023/A% +Learning, vol. 3, no. 4, pp. 319–342, 1989. +[Online]. Available: http://dx.doi.org/10.1023/A% 3A1022645801436 [MSB12] G. Moser, S. B. Serpico, and J. A. Benediktsson, “Markov random field models for supervised land @@ -1547,15 +1545,15 @@ Learning , vol. 3, no. 4, pp. 319–342, 1989. cover classification from very high resolution multispectral remote sensing images,” in Advances in Radar and Remote Sensing (TyWRRS), 2012 -Tyrrhenian Workshop on . IEEE, 2012, pp. 235– -242. [Online]. Available: http://ieeexplore. ieee. org/ -xpl/login. jsp?tp=&arnumber=6381135 +Tyrrhenian Workshop on. IEEE, 2012, pp. 235– +242. [Online]. Available: http://ieeexplore.ieee.org/ +xpl/login.jsp?tp=&arnumber=6381135 [MSC] “Object class recognition image database.” -[Online]. Available: http://research . microsoft . com/ +[Online]. Available: http://research.microsoft.com/ vision/cambridge/recognition/ [MSR] “Image understanding - research data,” Microsoft Research. [Online]. Available: - http://research . microsoft . com/en-us/projects/ + http://research.microsoft.com/en-us/projects/ objectclassrecognition/ [Mur12] K. P. Murphy, Machine learning: a probabilistic perspective. MIT press, 2012. @@ -1564,31 +1562,31 @@ system for scenes containing objects with substructures,” in Proceedings of the Fourth International Joint Conference on Pattern Recognitions, 1978, pp. 752–754. -[PAA + - 87] S. M. Pizer, E. P. Amburn, J. D. Austin, +[PAA+ +87] S. M. Pizer, E. P. Amburn, J. D. Austin, R. Cromartie, A. Geselowitz, T. Greer, B. ter Haar Romeny, J. B. Zimmerman, and K. Zuiderveld, “Adaptive histogram equalization and its variations,” -Computer vision, graphics, and image processing , +Computer vision, graphics, and image processing, vol. 39, no. 3, pp. 355–368, 1987. [Online]. -Available: http://www . sciencedirect . com/science/ +Available: http://www.sciencedirect.com/science/ article/pii/S0734189X8780186X [PC13] P. H. Pinheiro and R. Collobert, “Recurrent convolutional neural networks for scene parsing,” -arXiv preprint arXiv:1306.2795 , 2013. [Online]. -Available: http://arxiv. org/abs/1306. 2795v1 +arXiv preprint arXiv:1306.2795, 2013. [Online]. +Available: http://arxiv.org/abs/1306.2795v1 [PH05] C. Pantofaru and M. Hebert, “A comparison of image segmentation algorithms,” -Robotics Institute , p. 336, 2005. [Online]. -Available: http://riweb-backend . ri . cmu . edu/ -pub _files/pub4/pantofaru _caroline _ 2005 _1/ -pantofaru_caroline_2005_1 . pdf +Robotics Institute, p. 336, 2005. [Online]. +Available: http://riweb-backend.ri.cmu.edu/ +pub_files/pub4/pantofaru_caroline_2005_1/ +pantofaru_caroline_2005_1.pdf [PS07] A. Protiere and G. Sapiro, “Interactive image segmentation via adaptive weighted distances,” Image Processing, IEEE Transactions -on , vol. 16, no. 4, pp. 1046–1057, 2007. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=4130436 +on, vol. 16, no. 4, pp. 1046–1057, 2007. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4130436 [PTN09] N. Plath, M. Toussaint, and S. Nakajima, “Multiclass image segmentation using conditional random fields and global classification,” in Proceedings @@ -1597,161 +1595,159 @@ Machine Learning. ACM, 2009, pp. 817–824. [PXP00] D. L. Pham, C. Xu, and J. L. Prince, “A survey of current methods in medical image segmentation,” Annual Review of Biomedical -Engineering , vol. 2, no. 1, pp. 315–337, 2000, +Engineering, vol. 2, no. 1, pp. 315–337, 2000, pMID: 11701515. [Online]. Available: http:// -dx. doi. org/10. 1146/annurev . bioeng . 2. 1. 315 -[Qui86] - J. R. Quinlan, “Induction of decision trees,” -Machine learning , vol. 1, no. 1, pp. 81–106, -Aug. 1986. [Online]. Available: http://dx . doi . org/ -10 . 1023/A%3A1022643204877 +dx.doi.org/10.1146/annurev.bioeng.2.1.315 +[Qui86] J. R. Quinlan, “Induction of decision trees,” +Machine learning, vol. 1, no. 1, pp. 81–106, +Aug. 1986. [Online]. Available: http://dx.doi.org/ +10.1023/A%3A1022643204877 [Qui93] ——, C4.5: Programs for Machine Learning, P. Langley, Ed. Morgan Kaufmann Publishers, Inc., 1993. [RKB04] C. Rother, V. Kolmogorov, and A. Blake, “Grabcut: Interactive foreground extraction using iterated graph cuts,” ACM Transactions on Graphics (TOG), vol. 23, no. 3, pp. 309–314, 2004. [Online]. -Available: http://delivery. acm. org/10. 1145/1020000/ -1015720/p309- rother. pdf +Available: http://delivery.acm.org/10.1145/1020000/ +1015720/p309- rother.pdf [RM00] J. B. Roerdink and A. Meijster, “The watershed -transform: Definitions, algorithms and paralleliza- tion strategies,” Fundam. Inform. , vol. 41, no. 1-2, +transform: Definitions, algorithms and paralleliza- tion strategies,” Fundam. Inform., vol. 41, no. 1-2, pp. 187–228, 2000. [RM07] J. Reynolds and K. Murphy, “Figure-ground segmentation using a hierarchical conditional random field,” in Computer and Robot Vision, 2007. CRV ’07. Fourth Canadian -Conference on , May 2007, pp. 175–182. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=4228537 +Conference on, May 2007, pp. 175–182. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4228537 [RMBK06] C. Rother, T. Minka, A. Blake, and V. Kolmogorov, “Cosegmentation of image pairs by histogram matching - incorporating a global constraint into mrfs,” in Computer Vision and Pattern Recognition, 2006 IEEE Computer Society -Conference on , vol. 1, June 2006, pp. 993– -1000. [Online]. Available: http://ieeexplore. ieee. org/ -xpls/abs_all. jsp?arnumber=1640859 -[SAN + - 04] J. Staal, M. D. Abràmoff, M. Niemeijer, +Conference on, vol. 1, June 2006, pp. 993– +1000. [Online]. Available: http://ieeexplore.ieee.org/ +xpls/abs_all.jsp?arnumber=1640859 +[SAN+ +04] J. Staal, M. D. Abràmoff, M. Niemeijer, M. Viergever, B. Van Ginneken et al., “Ridge-based vessel segmentation in color images of the retina,” -Medical Imaging, IEEE Transactions on , vol. 23, +Medical Imaging, IEEE Transactions on, vol. 23, no. 4, pp. 501–509, 2004. [Online]. Available: -http://www . isi . uu. nl/Research/Databases/DRIVE/ +http://www.isi.uu.nl/Research/Databases/DRIVE/ [SCZ08] F. Schroff, A. Criminisi, and A. Zisserman, “Object class segmentation using random -forests.” in BMVC , 2008, pp. 1–10. [Online]. - Available: http://research. microsoft. com/pubs/ -72423/Criminisi_bmvc2008. pdf +forests.” in BMVC, 2008, pp. 1–10. [Online]. + Available: http://research.microsoft.com/pubs/ +72423/Criminisi_bmvc2008.pdf [SJC08] J. Shotton, M. Johnson, and R. Cipolla, “Semantic texton forests for image categorization and segmentation,” in Computer vision and pattern recognition, 2008. CVPR 2008. IEEE -Conference on . IEEE, Jun. 2008, pp. 1–8. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=4587503 +Conference on. IEEE, Jun. 2008, pp. 1–8. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=4587503 [SM11] C. Sutton and A. McCallum, “An introduction -to conditional random fields,” Machine Learning , +to conditional random fields,” Machine Learning, vol. 4, no. 4, pp. 267–373, 2011. [Online]. -Available: http://homepages . inf . ed . ac . uk/csutton/ -publications/crftutv2 . pdf +Available: http://homepages.inf.ed.ac.uk/csutton/ +publications/crftutv2.pdf [Smi02] L. I. Smith, “A tutorial on principal components -analysis,” Cornell University, USA , vol. 51, p. 52, +analysis,” Cornell University, USA, vol. 51, p. 52, 2002. [Smi04] B. T. Smith, “Lagrange multipliers tutorial in the context of support vector machines,” Memorial University of Newfoundland St. John’s, Newfoundland, -Canada , Jun. 2004. +Canada, Jun. 2004. [SSA12] D. Schiebener, J. Schill, and T. Asfour, “Discovery, segmentation and reactive grasping of unknown -objects.” in Humanoids , 2012, pp. 71–77. [Online]. - Available: http://h2t . anthropomatik . kit . edu/ -pdf/Schiebener2012. pdf +objects.” in Humanoids, 2012, pp. 71–77. [Online]. + Available: http://h2t.anthropomatik.kit.edu/ +pdf/Schiebener2012.pdf [SUM+ - 11] D. Schiebener, A. Ude, J. Morimotot, +11] D. Schiebener, A. Ude, J. Morimotot, T. Asfour, and R. Dillmann, “Segmentation and learning of unknown objects through physical interaction,” in Humanoid Robots (Humanoids), 2011 11th IEEE-RAS International Conference -on . IEEE, 2011, pp. 500–506. [Online]. -Available: http://ieeexplore. ieee. org/ielx5/6086637/ -6100798/06100843 . pdf +on. IEEE, 2011, pp. 500–506. [Online]. +Available: http://ieeexplore.ieee.org/ielx5/6086637/ +6100798/06100843.pdf [SWRC06] J. Shotton, J. Winn, C. Rother, and A. Criminisi, “Textonboost: Joint appearance, shape and context modeling for multi-class object recognition and -segmentation,” in Computer Vision–ECCV 2006 . +segmentation,” in Computer Vision–ECCV 2006. Springer, 2006, pp. 1–15. [Online]. Available: http: -//link . springer. com/chapter/10 . 1007/11744023_1 +//link.springer.com/chapter/10.1007/11744023_1 [TNL14] J. Tighe, M. Niethammer, and S. Lazebnik, “Scene parsing with object instances and occlusion ordering,” in Computer Vision and Pattern Recognition (CVPR), 2014 IEEE -Conference on . IEEE, 2014, pp. 3748–3755. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=6909874 +Conference on. IEEE, 2014, pp. 3748–3755. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=6909874 [UPH05] R. Unnikrishnan, C. Pantofaru, and M. Hebert, “A measure for objective evaluation of image segmentation algorithms,” in Computer Vision and Pattern Recognition-Workshops, 2005. CVPR Workshops. IEEE Computer Society -Conference on . IEEE, 2005, pp. 34–34. -[Online]. Available: http://repository . cmu . edu/cgi/ -viewcontent. cgi?article=1365&context=robotics +Conference on. IEEE, 2005, pp. 34–34. +[Online]. Available: http://repository.cmu.edu/cgi/ +viewcontent.cgi?article=1365&context=robotics [vdMPvdH09] L. J. van der Maaten, E. O. Postma, and H. J. van den Herik, “Dimensionality reduction: A comparative review,” Journal of Machine Learning Research, vol. 10, no. 1-41, pp. 66–71, 2009. [VOC10] “Voc2010 preliminary results,” 2010. [Online]. -Available: http://host . robots . ox . ac . uk/pascal/VOC/ -voc2010/results/index. html +Available: http://host.robots.ox.ac.uk/pascal/VOC/ +voc2010/results/index.html [WAH97] G.-Q. Wei, K. Arbter, and G. Hirzinger, “Automatic tracking of laparoscopic instruments by color -coding,” in CVRMed-MRCAS’97 , ser. Lecture +coding,” in CVRMed-MRCAS’97, ser. Lecture Notes in Computer Science, J. Troccaz, E. Grimson, and R. Mösges, Eds. Springer Berlin Heidelberg, 1997, vol. 1205, pp. 357–366. [Online]. Available: -http://dx . doi . org/10. 1007/BFb0029257 +http://dx.doi.org/10.1007/BFb0029257 [YBCK10] Z. Yin, R. Bise, M. Chen, and T. Kanade, “Cell segmentation in microscopy imagery using a bag of local bayesian classifiers,” in Biomedical Imaging: From Nano to Macro, 2010 IEEE -International Symposium on , Apr. 2010, pp. 125– -128. [Online]. Available: http://ieeexplore. ieee. org/ -xpls/abs_all. jsp?arnumber=5490399 +International Symposium on, Apr. 2010, pp. 125– +128. [Online]. Available: http://ieeexplore.ieee.org/ +xpls/abs_all.jsp?arnumber=5490399 [YHRF12] Y. Yang, S. Hallman, D. Ramanan, and C. C. Fowlkes, “Layered object models for image segmentation,” Pattern Analysis and -Machine Intelligence, IEEE Transactions on , +Machine Intelligence, IEEE Transactions on, vol. 34, no. 9, pp. 1731–1743, Sep. 2012. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=6042883 -[ZBS01]Y. Zhang, M. Brady, and S. Smith, “Segmentation +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=6042883 +[ZBS01] Y. Zhang, M. Brady, and S. Smith, “Segmentation of brain MR images through a hidden Markov random field model and the expectationmaximization algorithm,” Medical Imaging, IEEE -Transactions on , vol. 20, no. 1, pp. 45–57, 2001. -[Online]. Available: http://ieeexplore . ieee . org/xpls/ -abs_all. jsp?arnumber=906424 +Transactions on, vol. 20, no. 1, pp. 45–57, 2001. +[Online]. Available: http://ieeexplore.ieee.org/xpls/ +abs_all.jsp?arnumber=906424 [ZGWX05] S.-C. Zhu, C.-E. Guo, Y. Wang, and Z. Xu, “What are textons?” International Journal of Computer -Vision , vol. 62, no. 1-2, pp. 121–143, 2005. -[Zha12] - Z. Zhang, “Microsoft kinect sensor and its effect,” -MultiMedia, IEEE , vol. 19, no. 2, pp. 4–10, Feb. +Vision, vol. 62, no. 1-2, pp. 121–143, 2005. +[Zha12] Z. Zhang, “Microsoft kinect sensor and its effect,” +MultiMedia, IEEE, vol. 19, no. 2, pp. 4–10, Feb. 2012. [ZJRP+ - 15] S. Zheng, S. Jayasumana, B. Romera-Paredes, +15] S. Zheng, S. Jayasumana, B. Romera-Paredes, V. Vineet, Z. Su, D. Du, C. Huang, and P. H. Torr, “Conditional random fields as recurrent neural networks,” in Proceedings of the IEEE International Conference on -Computer Vision , 2015, pp. 1529–1537. [Online]. -Available: http://www . robots . ox . ac . uk/~szheng/ -papers/CRFasRNN. pdf G LOSSARY +Computer Vision, 2015, pp. 1529–1537. [Online]. +Available: http://www.robots.ox.ac.uk/~szheng/ +papers/CRFasRNN.pdf GLOSSARY ACM active contour model. 6 BOV bag-of-visual-words. 5 CNN Convolution Neuronal Network. 5, 9 @@ -1768,22 +1764,22 @@ RBF radial basis function. 8 SIFT scale-invariant feature transform. 5 SVM Support Vector Machine. 4, 6–8 -A PPENDIX A +APPENDIX A TABLES Database Image Resolution (width × height) Number of Images Number of Classes Channels Data source -Colon Crypt DB (302 px − 1116 px ) × (349 px − 875 px) 389 2 3 [CRSS] -DIARETDB1 1500 px × 1500 px 89 4 3 [KKV+ - 14] -KITTI Road (1226 px − 1242 px ) × (370 px − 376 px) 289 2 3 [FKG13] -MSRCv1 (213 px − 320 px) × (213 px − 320 px) 240 9 3 [MSR] -MSRCv2 (213 px − 320 px) × (162 px − 320 px) 591 23 3 [MSR] -Open-CAS Endoscopic Datasets 640 px × 480 px 120 2 3 [MHMK + - 14] -PASCAL VOC 2012 (142 px − 500 px) × ( 71 px − 500 px) 2913 20 3 [EVGW + - 12] -Warwick-QU (567 px − 775 px) × (430 px − 522 px) 165 5 3 [CSM09] +Colon Crypt DB (302 px − 1116 px) × (349 px − 875 px) 389 2 3 [CRSS] +DIARETDB1 1500 px × 1500 px 89 4 3 [KKV+ +14] +KITTI Road (1226 px − 1242 px) × (370 px − 376 px) 289 2 3 [FKG13] +MSRCv1 (213 px − 320 px) × (213 px − 320 px) 240 9 3 [MSR] +MSRCv2 (213 px − 320 px) × (162 px − 320 px) 591 23 3 [MSR] +Open-CAS Endoscopic Datasets 640 px × 480 px 120 2 3 [MHMK+ +14] +PASCAL VOC 2012 (142 px − 500 px) × ( 71 px − 500 px) 2913 20 3 [EVGW+ +12] +Warwick-QU (567 px − 775 px) × (430 px − 522 px) 165 5 3 [CSM09] Table I: An overview over publicly available image databases with a semantic segmentation ground trouth. \ No newline at end of file diff --git a/read/results/playa/1707.09725.txt b/read/results/playa/1707.09725.txt index 5769be5..d1597f2 100644 --- a/read/results/playa/1707.09725.txt +++ b/read/results/playa/1707.09725.txt @@ -29,14 +29,14 @@ Affirmation Ich versichere wahrheitsgemäß, die Arbeit selbstständig angefertigt, alle benutzten Hilfsmittel vollständig und genau angegeben und alles kenntlich gemacht zu haben, was aus Arbeiten anderer unverändert oder mit Abänderungen entnommen wurde. -Karlsruhe, Martin Thoma +Karlsruhe, Martin Thoma August 2017 Abstract Convolutional Neural Networks (CNNs) dominate various computer vision tasks since Alex Krizhevsky showed that they can be trained effectively and reduced the top-5 error -from 26.2 % to 15. 3 % on the ImageNet large scale visual recognition challenge. Many +from 26.2 % to 15.3 % on the ImageNet large scale visual recognition challenge. Many aspects of CNNs are examined in various publications, but literature about the analysis and construction of neural network architectures is rare. This work is one step to close this gap. A comprehensive overview over existing techniques for CNN analysis and topology @@ -46,7 +46,7 @@ evaluated. Additionally, some results are confirmed and quantified for CIFAR-100 example, the positive impact of smaller batch sizes, averaging ensembles, data augmentation and test-time transformations on the accuracy. Other results, such as the positive impact of learned color transformation on the test accuracy could not be confirmed. A model which -has only one million learned parameters for an input size of32 × 32 × 3 and 100 classes and +has only one million learned parameters for an input size of 32 × 32 × 3 and 100 classes and which beats the state of the art on the benchmark dataset Asirra, GTSRB, HASYv2 and STL-10 was developed. @@ -54,7 +54,7 @@ Zusammenfassung Modelle welche auf Convolutional Neural Networks (CNNs) basieren sind in verschiedenen Aufgaben der Computer Vision dominant seit Alex Krizhevsky gezeigt hat dass diese effektiv trainiert werden können und er den Top-5 Fehler in dem ImageNet large scale visual -recognition challenge Benchmark von 26 .2 % auf 15.3 % drücken konnte. Viele Aspekte +recognition challenge Benchmark von 26.2 % auf 15.3 % drücken konnte. Viele Aspekte von CNNs wurden in verschiedenen Publikationen untersucht, aber es wurden vergleichsweise wenige Arbeiten über die Analyse und die Konstruktion von Neuronalen Netzen geschrieben. Diese Masterarbeit stellt einen Schritt dar um diese Lücke zu schließen. Eine @@ -187,28 +187,28 @@ applications which include one of the following six problems in computer vision • Classification:1 The algorithm is given an image and k possible classes. The task is to decide which of the k classes the image belongs to. For example, an image from -a self-driving cars on-board camera contains either paved road , unpaved road or -no road : Which of those given three classes is in the image? -• Localization: The algorithm is given an image and one class k . The task is to find -bounding boxes for all instances of k . +a self-driving cars on-board camera contains either paved road, unpaved road or +no road: Which of those given three classes is in the image? +• Localization: The algorithm is given an image and one class k. The task is to find +bounding boxes for all instances of k. • Detection: Given an image and k classes, find bounding boxes for all instances of those classes. -• Semantic Segmentation : Given an image and k classes, classify each pixel. +• Semantic Segmentation: Given an image and k classes, classify each pixel. • Instance segmentation: Given an image and k classes, classify each pixel as one of the k classes, but distinguish different instances of the classes. -• Content-based Image Retrieval : Given an image x and n images in a database, -find the top u images which are most similar to x . -There are many techniques to approach those problems, but since AlexNet [ KSH12] was +• Content-based Image Retrieval: Given an image x and n images in a database, +find the top u images which are most similar to x. +There are many techniques to approach those problems, but since AlexNet [KSH12] was published, all of those problems have high-quality solutions which make use of Convolutional -Neural Networks (CNNs) [HZRS15a, LAE + - 16, RFB15, DHS16, SKP15]. +Neural Networks (CNNs) [HZRS15a, LAE+ +16, RFB15, DHS16, SKP15]. Today, most neural networks are constructed by rules of thumb and gut feeling. The architectures evolved and got deeper, more hyperparameters were added. Although there are methods for analyzing CNNs, those methods are not enough to determine all steps in the development of network architectures without gut feeling. A detailed introduction to CNNs as well as nine methods for analysis of CNNs is given in Chapter 2. 1 - Classification is also called identification if the classes are humans. Another name is object recognition, +Classification is also called identification if the classes are humans. Another name is object recognition, although the classes can be humans and animals as well. 1. Introduction @@ -235,51 +235,52 @@ This chapter introduces linear image filters in Section 2.1, then standard layer CNNs are explained in Section 2.2. The layer block pattern is described in Section 2.3, transition layers in Section 2.4 and nine ways to analyze CNNs are described in Section 2.5. 2.1. Linear Image Filters -A linear image filter (also called a filter bank or a kernel ) is an element F ∈ R k - w × k -h × d - , +A linear image filter (also called a filter bank or a kernel ) is an element F ∈ Rk +w ×k +h×d +, where k - w represents the filter’s width, k +w represents the filter’s width, k h the filter’s height and d the number of input -channels. The filter F is convolved with the image I ∈ R w × h× d +channels. The filter F is convolved with the image I ∈ Rw×h×d to produce a new image I - . +. The output image I has only one channel. Each pixel I - ( x, y ) of the output image gets +(x, y) of the output image gets calculated by point-wise multiplication of one filter element with one element of the original image I : I - (x, y ) = k +(x, y) = k w 2 i - x =1− k +x=1− k w 2 k h 2 i - y =1− k - h +y =1− k +h 2 d i - c =1 I ( x + i -x , y + i +c=1 I (x + i +x, y + i y , i -c ) · F ( i -x , i +c) · F (i +x, i y , i -c ) -This procedure is explained by Figure 2.1. It is essentially a discrete convolution.I ∈ R 7 ×7 +c) +This procedure is explained by Figure 2.1. It is essentially a discrete convolution. +I ∈ R7×7 Filter kernel -F ∈ R3× 3 Result of point-wise +F ∈ R3×3 Result of point-wise multiplication I - ∈ R 7 ×7 + ∈ R7×7 104 116 116 112 58 47 47 109 97 114 116 105 110 45 116 104 111 109 97 46 100 @@ -298,8 +299,8 @@ multiplication I -408 596 -550 368 26 976 156 302 647 879 223 811 54 660 Figure 2.1.: Visualization of the application of a linear k × k × 1 image filter. For each pixel of the -output image, k 2 - multiplications and k 2 +output image, k2 + multiplications and k2 additions of the products have to be calculated. 2. Convolutional Neural Networks @@ -308,13 +309,13 @@ boundary treatment: • don’t compute: The image I will be smaller than the original image. I ∈ -R (w − k -w +1)× (h− k -h +1)× d +R(w−k +w +1)×(h−k +h+1)×d 3 , to be exact. • zero padding - : The image I is padded by zeros where the filter would access elements +: The image I is padded by zeros where the filter would access elements which do not exist. This will result in edges being detected at the border if the border pixels are not black, but doesn’t need any computation. • nearest: Repeat the pixel which is closest to the boundary. @@ -326,7 +327,7 @@ can be applied successively. While each pixel after one filtering operation with filter got influenced by 3 · 3 = 9 pixels of the original image, two successively applied 3 × 3 filters increase the area of the original image which influenced the output. The output is then influenced by 25 pixel. This is called the receptive field. The kind of pattern which is -detected by a filter is called a feature . The bigger the receptive field is, the more complex +detected by a filter is called a feature. The bigger the receptive field is, the more complex can features get as they are able to consider more of the original image. Instead of taking one 5 × 5 filter with 25 parameters, one might consider to take two successive 3 × 3 filters with 2 · (3 · 3) = 18 parameters. The 5 × 5 filter is a strict superset of possible filtering @@ -336,107 +337,107 @@ clear in Section 2.2. While the idea behind deep MLPs is that feature hierarchies capture the important parts of the input more easily, CNNs are inspired by the idea of translational invariance : Many features in an image are translationally invariant. For example, if a car is developed, one -could try to detect it by its parts [FGMR10 ]. But then there are many positions at which +could try to detect it by its parts [FGMR10]. But then there are many positions at which the wheels could be. Combining those, it is desirable to capture low-level, translationally invariant features at lower layers of an artificial neural network (ANN) and in higher layers high-level features which are combinations of the low-level features. Also, models should utilize the fact that the pixels of images are ordered. One way to use -this is by learning image filters in so called convolutional layers . -While MLPs vectorize the input, the input of a layer in a CNN arefeature maps. A feature -map is a matrix m ∈ R w ×h - , but typically the width equals the height (w = h). For an RGB +this is by learning image filters in so called convolutional layers. +While MLPs vectorize the input, the input of a layer in a CNN are feature maps. A feature +map is a matrix m ∈ Rw×h +, but typically the width equals the height (w = h). For an RGB 2.2. CNN Layer Types input image, the number of feature maps is d = 3. Each color channel is a feature map. -Since AlexNet [ KSH12] almost halved the error in the ImageNet challenge, CNNs are +Since AlexNet [KSH12] almost halved the error in the ImageNet challenge, CNNs are state-of-the-art in various computer vision tasks. Traditional CNNs have three important building tools: • Convolutional layers with a non-linear activation function as described in Section 2.2.1, • pooling layers as described in Section 2.2.2 and • normalization layers as described in Section 2.2.4. 2.2.1. Convolutional Layers -Convolutional layers take several feature maps as input and produce n feature maps 1 +Convolutional layers take several feature maps as input and produce n feature maps1 as output, where n is the number of filters in the convolution layer. The filter weights of the linear convolutions are the parameters which are adapted to the training data. The number n of filters as well as the filter’s size k w × k h are hyperparameters of convolutional -layers. Sometimes, it is denoted as n @k +layers. Sometimes, it is denoted as n@k +w × k +h. Although the filter depth is usually omitted +in the notation, the filters are of dimension k w × k - h . Although the filter depth is usually omitted -in the notation, the filters are of dimensionk - w × k - h × d(i − 1) - , where d(i − 1) +h × d(i−1) +, where d(i−1) is the number of -feature maps of the input layer ( i − 1). +feature maps of the input layer (i − 1). Another hyperparameter of convolution layers is the stride s ∈ N - ≥ 1 and the padding. +≥1 and the padding. Padding (usually zero-padding [SCL12, SEZ+ - 13, HZRS15a]) is used to make sure that the +13, HZRS15a]) is used to make sure that the size of the feature maps doesn’t change. The hyperparameters of convolutional layers are • the number of filters n ∈ N -≥ 1 , +≥1, • k - w , k +w , k h ∈ N - ≥1 of the filter size k +≥1 of the filter size k w × k -h × d( i −1) - , +h × d(i−1) +, • the activation function of the layer (see Table B.3) and • the stride s ∈ N - ≥ 1 +≥1 Typical choices are n ∈ { 32, 64, 128 }, k w = k -h = k ∈ { 1 , 3 , 5 , 11 } such as in [ KSH12, -SZ14, SLJ + - 15], rectified linear unit (ReLU) activation and s = 1. -The concept of weight sharing is crucial for CNNs. This concept was introduced in [WHH + - 89]. +h = k ∈ { 1, 3, 5, 11 } such as in [KSH12, +SZ14, SLJ+ +15], rectified linear unit (ReLU) activation and s = 1. +The concept of weight sharing is crucial for CNNs. This concept was introduced in [WHH+ +89]. With weight sharing, the filters can be learned with stochastic gradient descent (SGD) just like MLPs. In fact, every CNN has an equivalent MLP which computes the same function if only the flattened output is compared. 1 - also called activation maps or channels +also called activation maps or channels 2. Convolutional Neural Networks This is easier to see when the filtering operation is denoted formally: -o (i ) - (x ) = b + k - -j =1 w - ij · x - j with i ∈ { 1 , . . . , w } × { 1, . . . , h } × { 1, . . . , d } [2.1] -o (x,y,z ) - (I ) = b + k +o(i) +(x) = b + k + +j=1 w +ij · x +j with i ∈ { 1, . . . , w } × { 1, . . . , h } × { 1, . . . , d } [2.1] +o(x,y,z) +(I ) = b + k w 2 i - x =1− k +x=1− k w 2 k - h +h 2 i - y =1− k +y =1− k h 2 d i - c =1 F +c=1 F z (i - x , i +x, i +y , i +c) · I (x + i +x, y + i y , i -c ) · I (x + i - x , y + i - y , i -c ) [2.2] -with a bias b ∈ R , x ∈ { 1, . . . , w } , y ∈ { 1, . . . , h } and z ∈ { 1, . . . , d } +c) [2.2] +with a bias b ∈ R, x ∈ { 1, . . . , w } , y ∈ { 1, . . . , h } and z ∈ { 1, . . . , d } One can see that most weights of the equivalent MLP are zero and many weights are equivalent. Hence the advantage of CNNs compared to MLPs is the reduction of parameters. The effect of fewer parameters is that less training data is necessary to get suitable @@ -447,9 +448,12 @@ See Figure 2.2 for a visualization of the application of a convolutional layer. 3 feature maps (e.g. RGB) n feature mapsn filters of size k × k × 3 -width w width wheight h height hneural +width w + width w +heighth + heighthneural network -data apply +data apply . . . . . . . . .. . . @@ -461,19 +465,19 @@ s = 1 to input data of size width × height with three channels. 2.2. CNN Layer Types A convolutional layer with n filters of size k w × k -h and SAME padding after d(i − 1) +h and SAME padding after d(i−1) feature maps of size s - x × s - y has n · d(i − 1) +x × s +y has n · d(i−1) · (k w · k -h ) parameters if no bias is used. In contrast, a fully +h) parameters if no bias is used. In contrast, a fully connected layer which produces the same output size and does not use a bias would have -n · d( i −1) - · ( s +n · d(i−1) + · (s x × s -y ) 2 +y )2 parameters. This means a convolutional layer has drastically fewer parameters. One the one hand, this means it can learn less complex decision boundaries. On the other hand, it means fewer parameters have to be learned and hence the optimization @@ -482,15 +486,15 @@ It is particularly interesting to notice that even a convolutional layer of 1 × learn a linear combination of the d input feature maps. This can be used for dimensionality reduction, if there are fewer 1 × 1 filters in a convolutional layer than input feature maps. Another insight recently got important: Every fully connected layer has an equivalent -convolutional layer which has the same weights. 2 +convolutional layer which has the same weights.2 This way, one can use the complete classification network as a very complex non-linear image filter which can be used for semantic segmentation. A fully connected layer with d ∈ N - ≥1 inputs and n ∈ N - ≥1 nodes can be interpreted as a -convolutional layer with an input of shape 1 × 1 × d and n filters of size 1 × 1 . This will -produce an output shape 1 × 1 × n . Every single output is connected to all of the inputs. +≥1 inputs and n ∈ N +≥1 nodes can be interpreted as a +convolutional layer with an input of shape 1 × 1 × d and n filters of size 1 × 1. This will +produce an output shape 1 × 1 × n. Every single output is connected to all of the inputs. When a convolutional layer is followed by a fully connected layer, it is necessary to vectorize to feature maps. If the 1 × 1 convolutional filter layer is applied to the vectorized output, it is completely equivalent to a fully connected layer. However, the vectorization can be @@ -499,59 +503,59 @@ size is applied. This was used by [LSD15]. 2.2.2. Pooling Layers Pooling summarizes a p × p area of the input feature map. Just like convolutional layers, pooling can be used with a stride of s ∈ N - >1 . As s ≥ 2 is the usual choice, pooling layers +>1. As s ≥ 2 is the usual choice, pooling layers are sometimes also called subsampling layers. Typically, p ∈ { 2, 3, 4, 5 } and s = 2 such as for AlexNet [KSH12] and VGG-16 [SZ14]. The type of summary for the set of activations A varies between the functions listed -in Table 2.1, spatial pyramid pooling as introduced in [ HZRS14] and generalizing pooling +in Table 2.1, spatial pyramid pooling as introduced in [HZRS14] and generalizing pooling functions as introduced in [LGT16]. 2 - But convolutional layers only have equivalent fully connected layers if the output feature map is 1 × 1 +But convolutional layers only have equivalent fully connected layers if the output feature map is 1 × 1 2. Convolutional Neural Networks Name Definition Used by Max pooling max { a ∈ A } [BPL10, KSH12] Average / mean pooling 1 -| A | -a∈ A a LeNet-5 [LBBH98] and [KSlB + - 10] +|A| +a∈A a LeNet-5 [LBBH98] and [KSlB+ +10] 2 pooling - a∈A a 2 +a∈A a2 [Le13] Stochastic pooling * [ZF13] -Table 2.1.: Pooling types for a set A of activations a ∈ R . -(*) For stochastic pooling, each of thep × p activation values a +Table 2.1.: Pooling types for a set A of activations a ∈ R. +(*) For stochastic pooling, each of the p × p activation values a i in the pooling region gets picked with probability p i = a i a - j ∈ A a +j ∈A a j . This assumes the activations a i are non-negative. Pooling is applied for three reasons: To get local translational invariance, to get invariance -against minor local changes and, most important, for data reduction to1 -s 2 th of the data by -using strides of s > 1 . +against minor local changes and, most important, for data reduction to 1 +s2 th of the data by +using strides of s > 1. See Figure 2.3 for a visualization of max pooling. 7 9 3 5 9 40 7 0 0 9 05 0 9 3 7 59 2 9 6 4 3 2 × 2 max pooling 9 5 99 9 72 2 -Figure 2.3.: 2 × 2 max pooling applied to a feature map of size6 × 4 with stride s = 2 and padding. +Figure 2.3.: 2 × 2 max pooling applied to a feature map of size 6 × 4 with stride s = 2 and padding. Average pooling of p × p areas with stride s can be replaced by a convolutional layer. If -the input of the pooling layer are d(i −1) +the input of the pooling layer are d(i−1) feature maps, the convolutional layer has to have -d(i −1) - filters of size p × p and stride s . The i th filter has the values +d(i−1) + filters of size p × p and stride s. The ith filter has the values     1 -p 2 . . . 1 -p 2 +p2 . . . 1 +p2 . . . . @@ -560,8 +564,8 @@ p 2 . . 1 -p 2 . . . 1 -p 2  +p2 . . . 1 +p2     @@ -569,7 +573,7 @@ for the dimension i and the zero matrix    - 0 . . . 0 +0 . . . 0 . . . . @@ -581,17 +585,17 @@ for the dimension i and the zero matrix    -for all other dimensions i = 1, . . . , d ( i −1) - . +for all other dimensions i = 1, . . . , d(i−1) +. 2.2. CNN Layer Types 2.2.3. Dropout Dropout is a technique used to prevent overfitting and co-adaptations of neurons by setting -the output of any neuron to zero with probabilityp. It was introduced in [HSK+ - 12] and is -well-described in [SHK + - 14]. -A Dropout layer can be implemented as follows: For an inputin of any shape s, a tensor of +the output of any neuron to zero with probability p. It was introduced in [HSK+ +12] and is +well-described in [SHK+ +14]. +A Dropout layer can be implemented as follows: For an input in of any shape s, a tensor of the same shape D ∈ { 0, 1 }s is sampled, where each element d i is sampled independently @@ -601,25 +605,25 @@ out = D in with d i ∼ B (1, p) where is the Hadamard product (A B ) - i,j := (A) - i,j ( B ) +i,j := (A) +i,j (B ) i,j Hence every value of the input gets set to zero with a dropout probability of p. Typically, -Dropout is used with p = 0. 5. Layers closer to the input usually have a lower dropout probability +Dropout is used with p = 0.5. Layers closer to the input usually have a lower dropout probability than later layers. In order to keep the expected output at the same value, the output of a dropout layer is multiplied with 1 -1− p when dropout is enabled [ Las17, tf-16b]. +1−p when dropout is enabled [Las17, tf-16b]. At inference time, dropout is disabled. Dropout is usually only applied after fully connected layers, but not after convolutional layers as it usually increases the test error as pointed out in [GG16]. Models which use Dropout can be interpreted as an ensemble of models with different numbers of neurons in each layer, but also with weight sharing. Conceptually similar are DropConnect and networks with stochastic depth. DropConnect - [ WZZ+ - 13] is a generalization of Dropout, which sets weights to zero in contrast to + [WZZ+ +13] is a generalization of Dropout, which sets weights to zero in contrast to setting the output of a neuron to zero. Networks with stochastic depth as introduced in [HSL+ - 16] dropout only complete layers. This can be done by having Residual networks +16] dropout only complete layers. This can be done by having Residual networks which have one identity connection and one residual feature connection. Hence the residual features can be dropped out and the identity connection remains. 2.2.4. Normalization Layers @@ -631,110 +635,110 @@ input features might drastically change over time. 2. Convolutional Neural Networks One way to approach this problem is by normalizing mini-batches as described in [IS15]. A -Batch Normalization layer with d-dimensional input x = (x (1) - , . . . , x ( d) - ) is first normalized +Batch Normalization layer with d-dimensional input x = (x(1) +, . . . , x(d) +) is first normalized point-wise to - ˆx( k ) - = x (k ) - − ¯x (k ) + ˆx(k) + = x(k) + − ¯x(k) s - [ x (k ) - ]2 +[x(k) +]2 + ε -with ¯x (k ) +with ¯x(k) = 1 m m -i =1 x (k ) +i=1 x(k) i being the sample mean and s - [ x ( k ) - ] 2 +[x(k) +]2 = 1 m m -i =1 ( x (k ) -i − ¯x (k ) - ) the +i=1(x(k) +i − ¯x(k) +) the sample variance where m ∈ N - ≥ 1 is the number of training samples per mini-batch, ε > 0 -being a small constant to prevent division by zero andx( k ) +≥1 is the number of training samples per mini-batch, ε > 0 +being a small constant to prevent division by zero and x(k) i is the activation of neuron k for training sample i. -Additionally, for each activation x (k ) - two parameters γ (k ) - , β (k ) +Additionally, for each activation x(k) + two parameters γ (k) +, β (k) are introduced which scale and shift the feature: - y (k ) - = γ ( k ) - · ˆx (k ) - + β (k ) + y(k) + = γ (k) + · ˆx(k) + + β (k) In the case of fully connected layers, this is applied to the activation, before the non-linearity is applied. If it is applied after the activation, it harms the training in early stages. For convolution, only one γ and one β is learned per feature map. -One important special case is γ (k ) +One important special case is γ (k) = s - [x (k ) - ] 2 - + ε and β (k ) - = ¯x ( k ) - , which would make the +[x(k) +]2 + + ε and β (k) + = ¯x(k) +, which would make the Batch Normalization layer an identity layer. -During evaluation time, 3 +During evaluation time,3 the expected value and the variance are calculated once for the complete dataset. An unbiased estimate of the empirical variance is used. The question where Batch Normalization layers (BN) should be applied and for which reasons is still open. For Dropout, it doesn’t matter if it is applied before or after the activation function. Considering this, the possible options for the order are: -1.CONV / FC → BN → activation function → Dropout → . . . -2.CONV / FC → activation function → BN → Dropout → . . . -3.CONV / FC → activation function → Dropout → BN → . . . -4.CONV / FC → Dropout → BN → activation function → . . . -The authors of [ IS15] suggest to use Batch Normalization before the activation function +1. CONV / FC → BN → activation function → Dropout → . . . +2. CONV / FC → activation function → BN → Dropout → . . . +3. CONV / FC → activation function → Dropout → BN → . . . +4. CONV / FC → Dropout → BN → activation function → . . . +The authors of [IS15] suggest to use Batch Normalization before the activation function as in Items 1 and 4. Batch Normalization after the activation lead to better results in -https://github. com/ducha- aiki/caffenet-benchmark/blob/master/batchnorm.md -Another normalization layer is Local Response Normalization as described in [ KSH12], +https://github.com/ducha- aiki/caffenet-benchmark/blob/master/batchnorm.md +Another normalization layer is Local Response Normalization as described in [KSH12], which includes - 2 normalization as described in [WWQ13 ]. Those two normalization layers, +2 normalization as described in [WWQ13]. Those two normalization layers, however, are superseded by Batch Normalization. 3 - also called inference time +also called inference time 2.3. CNN Blocks 2.3. CNN Blocks This section describes more complex building blocks than simple layers. CNN blocks act similar to a layer, but they are themselves composed of layers. 2.3.1. Residual Blocks -Residual blocks as introduced in [ HZRS15a] are a milestone in computer vision. They +Residual blocks as introduced in [HZRS15a] are a milestone in computer vision. They enabled the computer vision community to go from about 16 layers as in VGG 16-D (see Appendix D.3) to several hundred layers. The key idea of deep residual networks (ResNets) -as introduced in [ HZRS15a] is to add an identity connection which skips two layers. This +as introduced in [HZRS15a] is to add an identity connection which skips two layers. This identity connection adds the feature maps onto the other feature maps and thus requires the output of the input layer of the residual block to be of the same dimension as last layer of the residual block. Formally, it can be described as follows. If x i are the feature maps after layer i and x - 0 is +0 is the input image, H is a non-linear transformation of feature maps, then -y = H (x ) +y = H (x) describes a traditional CNN. Note that this could be multiple layers. A residual block as visualized in Figure 2.4 is described by -y = H (x ) + x -In [ HZRS15a], they only used residual skip connections to skip two layers. Hence, if +y = H (x) + x +In [HZRS15a], they only used residual skip connections to skip two layers. Hence, if conv - i (x - i ) describes the application of the convolutional layer i to the input x - i without the +i(x +i) describes the application of the convolutional layer i to the input x +i without the nonlinearity, then such a residual block is x - i +2 = conv - i +1 (ReLU(conv - i (x - i ))) + x - i +i+2 = conv + i+1(ReLU(conv + i(x +i))) + x +i Figure 2.4.: ResNet module Image source: [HZRS15a] [HM16] provides some insights why deep residual networks are successful. @@ -743,10 +747,10 @@ Image source: [HZRS15a] 2.3.2. Aggregation Blocks Two common ways to add more parameters to neural networks are increasing their depth by adding more layers or increasing their width by adding more neurons / filters. Inception -blocks [AM15] implicitly started a new idea which was explicitly described in [XGD + - 16] as +blocks [AM15] implicitly started a new idea which was explicitly described in [XGD+ +16] as “ResNeXt block”: Increasing the cardinality C ∈ N - ≥1 . By cardinality, the authors describe +≥1. By cardinality, the authors describe the concept of having C small convolutional networks with the same topology but different weights. This concept is visualized in Figure 2.5. Please note that Figure 2.5 does not combine aggregation blocks with residual blocks as the authors did. @@ -758,15 +762,15 @@ groups 4 @ 3 × 3 × 4 4 @ 1 × 1 × 256 4 @ 3 × 3 × 4 4 @ 1 × 1 × 256 4 @ 3 × 3 × 4 -Figure 2.5.: Aggregation block with a cardinality of C = 32 . Each of the 32 groups is a 2-layer -convolutional network. The first layer receives 256 feature maps and applies four1 × 1 +Figure 2.5.: Aggregation block with a cardinality of C = 32. Each of the 32 groups is a 2-layer +convolutional network. The first layer receives 256 feature maps and applies four 1 × 1 filters to it. The second layer applies four 3 × 3 filters. Although every group has the same topology, the learned weights are different. The outputs of the groups are concatenated. The hyperparameters of an aggregation block are: • The topology of the group members. • The cardinality C ∈ N - ≥1 . Note that a cardinality of C = 1 is equivalent in every +≥1. Note that a cardinality of C = 1 is equivalent in every aspect to using the group network without an aggregation block. 2.3. CNN Blocks @@ -774,30 +778,30 @@ aspect to using the group network without an aggregation block. Dense blocks are collections of convolutional layers which are introduced in [HLW16]. The idea is to connect each convolutional layer directly to subsequent convolutional layers. Traditional CNNs with L layers and one input layer have L connections between layers, -but dense blocks have L( L+1) +but dense blocks have L(L+1) 2 connections between layers. The input feature maps are concatenated in depth. According to the authors, this prevents features from being relearned and allows much fewer filters per convolutional layer. Where AlexNet and VGG-16 have several hundred filters per convolutional layer (see Tables D.2 and D.3), the authors used only on the order of 12 feature maps per layer. A dense block is visualized in Figure 2.6. - 256 -d in + 256-d in k @ 3 × 3 concatenate k @ 3 × 3 -concatenate256 -d -k -d -(256 + k )-d -k -d -(256 + L · k )-d out -Figure 2.6.: Dense block with L = 2 layers and a growth factor of k . +concatenate256-d +k-d +(256 + k)-d +k-d +(256 + L · k)-d out +Figure 2.6.: Dense block with L = 2 layers and a growth factor of k. Dense block have five hyperparameters: • The activation function being used. The authors use ReLU. • The size k w × k h of filters. The authors use k w = k - h = 3. +h = 3. • The number of layers L, where L = 2 is a simple convolutional layer. • The number k of filters added per layer (called growth rate in the paper) It might be necessary use 1 × 1 convolutions to reduce the number of L · k feature maps. @@ -808,9 +812,9 @@ Transition layers are used to overcome constraints imposed by resource limitatio architectural design choices. One constraint is the number of feature maps (see Appendix C.3 for details). In order to reduce the number of feature maps while still keeping as much relevant information as possible in the network, a convolutional layer i with k - i filters of +i filters of the shape 1 × 1 × k -i − 1 is added. The number of filters k +i−1 is added. The number of filters k i directly controls the number of generated feature maps. In order to reduce the dimensionality (width and height) of the feature maps, one typically @@ -822,20 +826,20 @@ network to have different input sizes. 2.5. Analysis Techniques 2.5. Analysis Techniques CNNs have dozens of hyperparameters and ways to tune them. Although there are -automatic methods like random search [ BB12], grid search [ LBOM98], gradient-based -hyperparameter optimization [ MDA15 ] and Hyperband [ LJD + - 16] some actions need a +automatic methods like random search [BB12], grid search [LBOM98], gradient-based +hyperparameter optimization [MDA15] and Hyperband [LJD+ +16] some actions need a manual investigation to improve the model’s quality. For this reason, analysis techniques which guide developers and researchers to the important hyperparameters are necessary. In the following, nine diagnostic techniques are explained. A machine learning developer has the following choices to improve the model’s quality: -(I1)Change the problem definition (e.g., the classes which are to be distinguished) -(I2)Get more training data -(I3)Clean the training data -(I4)Change the preprocessing (see Appendix B.1) -(I5)Augment the training data set (see Appendix B.2) -(I6)Change the training setup (see Appendices B.3 to B.5) -(I7)Change the model (see Appendices B.6 and B.7) +(I1) Change the problem definition (e.g., the classes which are to be distinguished) +(I2) Get more training data +(I3) Clean the training data +(I4) Change the preprocessing (see Appendix B.1) +(I5) Augment the training data set (see Appendix B.2) +(I6) Change the training setup (see Appendices B.3 to B.5) +(I7) Change the model (see Appendices B.6 and B.7) The preprocessing is usually not changed in modern architectures. However, this still leaves six very different ways to improve the classifier. Changing the training setup and the model each have too many possible choices to explore them completely. Thus, techniques are @@ -854,67 +858,67 @@ are not covered by the training set and thus indicate the need to collect more d 2. Convolutional Neural Networks 2.5.2. Confusion Matrices -A confusion matrix is a matrix ( c) - ij ∈ N K ×K -≥ 0 , where K ∈ N - ≥ 2 is the number of classes, +A confusion matrix is a matrix (c) +ij ∈ NK ×K +≥0 , where K ∈ N +≥2 is the number of classes, which contains all correct and wrong classifications. The item c ij is the number of times items of class i were classified as class j . This means the correct classification is on the diagonal c ii and all wrong classifications are of the diagonal. The sum K -i =1 - K -j =1 c +i=1 +K +j=1 c ij is the total number of samples which were evaluated and - i =1 c - ii +i=1 c +ii - K +K i=1 - K -j =1 c - ij is the accuracy. -The sums r ( i ) = K -j =1 c +j=1 c +ij is the accuracy. +The sums r(i) = +K +j=1 c ij of each class i are worth being investigated as they show if the classes are skewed. If the number of samples of one class dominates the data set, then the classifier can get a high accuracy by simply always prediction the most common class. If the accuracy of the classifier is close to the a priory probability of the most common class, techniques to deal with skewed classes might help. An automatic criterion to check for this problem is -accuracy ≤ max({ r (i ) | i = 1, . . . , k } ) +accuracy ≤ max({ r(i) | i = 1, . . . , k }) k -i =1 r (i ) + ε +i=1 r(i) + ε where ε is a small value to compensate the fact that some examples might be correct just by chance. Other values which should be checked are the class-wise sensitivities: -s (k ) = # correctly identified instances of class k +s(k) = # correctly identified instances of class k # instances of class k = c kk -r (k ) ∈ [0, 1] -If s ( i) is much lower than s( j ) , it is an indicator that more or cleaner training data is -necessary for s (i) . +r(k) ∈ [0, 1] +If s(i) is much lower than s(j ), it is an indicator that more or cleaner training data is +necessary for s(i). The class-wise confusion f -confusability (k -1 , k -2 ) = c +confusability(k +1, k +2) = c k -1 k - 2 +1k +2 K -j =1 c +j=1 c k - 1 j +1j indicates if class k 1 gets often classified as class k -2 . The highest values here can indicate +2. The highest values here can indicate if two classes should be merged or a specialized model for separating those classes could improve the overall system. 2.5.3. Validation Curves: Accuracy, loss and other metrics @@ -927,68 +931,67 @@ validation curves give an indicator if training longer improves the model’s pe 2.5. Analysis Techniques plotting the error on the training set as well as the error on a validation set, one can also estimate if overfitting might become a problem. See Figure 2.7 for an example. -10 20 30 40 50 60 70 80 90 1000 .20 .40 .60 .8 +10 20 30 40 50 60 70 80 90 1000.20.40.60.8 overfitting EpochsError Training set Validation set Figure 2.7.: A typical validation curve: In this case, the hyperparameter is the number of epochs -and the quality metric is the error (1 − accuracy ) . The longer the network is trained, +and the quality metric is the error (1 − accuracy). The longer the network is trained, the better it gets on the training set. At some point the network is fit too well to the training data and loses its capability to generalize. At this point the quality curve of the training set and the validation set diverge. While the classifier is still improving on the training set, it gets worse on the validation and the test set. When the epoch-loss validation curve has plateaus as in Figure 2.8, this means the optimization process did not improve for several epochs. Three possible ways to reduce the -problem of plateaus are(i)to change weight initialization if the plateau was at the beginning, -(ii)regularizing the model or(iii)changing the optimization algorithm. +problem of plateaus are (i) to change weight initialization if the plateau was at the beginning, +(ii) regularizing the model or (iii) changing the optimization algorithm. Loss functions The loss function (also called error function or cost function ) is a function which assigns a real value to a complex event like the predicted class of a feature vector. It is used to define the objective function. For classification problems the loss function is typically cross-entropy with - 1 or +1 or 2 regularization, as it was described in [NH92]: E C E (W ) = − -x ∈ X K +x∈X K -k =1 [ tx -k log(o x +k=1 [tx +k log(ox k ) + (1 − tx -k ) log(1 − o x +k ) log(1 − ox k )] - cross-entropy data loss + λ 1 · 1 -w ∈W |w | +λ - 2 · +w∈W |w| +λ +2 · 2 -w ∈ W w 2 +w∈W w2 model complexity loss where W are the weights, X is the training data set, K ∈ N - ≥ 0 is the number of classes and +≥0 is the number of classes and tx -k indicates if the training example x is of class k . o x +k indicates if the training example x is of class k. ox k is the output of the classification algorithm which depends on the weights. λ - 1 , λ +1, λ 2 ∈ [0, ∞) weights the regularization and is -typically smaller than 0 .1 . +typically smaller than 0.1. 2. Convolutional Neural Networks Figure 2.8.: Example for a validation curve (plotted loss function) with plateaus. The dark orange curve is smoothed, but the non-smoothed curve is also plotted in light orange. The data loss is positive whenever the classification is not correct, whereas the model complexity loss is higher for more complex models. The model complexity loss exists due -to the intuition of Occam’s razor: If two models explain the same data with an accuracy of +to the intuition of Occam’s razor : If two models explain the same data with an accuracy of 100 %, the simpler model is to be preferred. A reason to show the loss for the validation curve technique instead of other quality metrics is that it contains more information about the quality of the model. A reason against the @@ -1002,7 +1005,7 @@ The optimization process might also be stuck in a local minimum. • Loss being NAN might be due to too high learning rates. Another reason is division by zero or taking the logarithm of zero. In both cases, adding a small constant like -10 −7 +10−7 fixes the problem. • If the loss-epoch validation curve has a plateau at the beginning, the weight initialization might be bad. @@ -1014,30 +1017,30 @@ the confusion matrix c which denotes at c ij the number of times the real class was i and j was predicted. This means the diagonal contains the number of correct predictions. For the following, let t - i = +i = k -j =1 c +j=1 c ij be the number of training samples for class i. The most common quality criterion is accuracy: -accuracy( c) = +accuracy(c) = k -i =1 c +i=1 c ii - k -i =1 t +k +i=1 t i ∈ [0, 1] One problem of accuracy as a quality criterion are skewed classes. If one class is by far more common than all other classes, then the simplest way to achieve a high score is to always classify everything as the most common class. In order to fix this problem, one can use the mean accuracy: -mean-accuracy( c) = 1 +mean-accuracy(c) = 1 k · k -i =1 c +i=1 c ii t - i ∈ [0, 1] +i ∈ [0, 1] For two-class problems there are many other metrics like precision, recall and F β -score. Quality criteria for semantic segmentation are explained in [Tho16]. @@ -1046,12 +1049,11 @@ in practice: • Speed of evaluation for new images, • latency, • power consumption, -• - robustness against (non)random perturbations in the training data (see [ SZS+ - 13, -PMW + - 15]), -• robustness against (non)random perturbations in the training labels (see [ NDRT13 , +• robustness against (non)random perturbations in the training data (see [SZS+ +13, +PMW+ +15]), +• robustness against (non)random perturbations in the training labels (see [NDRT13, XXE12]), • model size As reducing the floating point accuracy allows to process more data on a given device [Har15], @@ -1073,9 +1075,9 @@ not help. Instead, the model or the training algorithm need to be adjusted. If the training set’s learning curve is significantly higher than the validation set’s learning curve, then removing features (e.g., by decreasing the images resolution), more training samples or more regularization will help. -10 20 30 40 50 60 70 80 90 1000 .20 .40 .6 +10 20 30 40 50 60 70 80 90 1000.20.40.6 avoidable biasvariance - human-le vel error + human-level error Training samplesError Validation set Training set @@ -1084,7 +1086,7 @@ architecture will make to fit the given training data. At the same time, it is e that the training data gets more similar to the true distribution of the data which should be captured by the test data. At some point, the error on the training and test set should be about the same. The term “avoidable bias” was coined by Andrew -Ng [ Ng16]. In some cases it is not possible to classify data correctly by the given +Ng [Ng16]. In some cases it is not possible to classify data correctly by the given features. If humans can classify the data given the features correctly, however, then the bias is avoidable by building a better classifier. The ma jor drawback of this analysis technique is its computational intensity. In order to @@ -1101,17 +1103,17 @@ well. However, it is not the desired solution. For classification problems in computer vision, there are two types of visualizations which help to diagnose such problems. Both color superpixels of the original image to convey information how the model used those superpixels: -• Correct class heatmap : The probability of the correct class is encoded to give a +• Correct class heatmap: The probability of the correct class is encoded to give a heat map which superpixels are important for the correct class. This can also be done by setting the opacity accordingly. • Most-likely class image - : Each of the most likely classes for all superpixels is +: Each of the most likely classes for all superpixels is represented by a color. The colored image thus gives clues why different predictions were assigned a high probability. Two methods to generate such images are explained in the following. Occlusion Sensitivity Analysis -Occlusion sensitivity analysis is described in [ ZF14]. The idea is to occlude a part of the -image by something. This could be a gray square as in [ ZF14] or a black superpixel as +Occlusion sensitivity analysis is described in [ZF14]. The idea is to occlude a part of the +image by something. This could be a gray square as in [ZF14] or a black superpixel as in [RSG16]. Then the classifier is run on the image again. This is done for each region (e.g., superpixel or position of the square) and the regions are then colored to generate either a correct class heatmap of the most-likely class image. It is important to note that the color @@ -1122,23 +1124,23 @@ Both visualizations are shown in Figure 2.10. One can see that the network makes predictions for this image of the class “Pomeranian”. However, the image of the class “Afghan Hound” gets confused with “Ice lolly”, which is a sign that this needs further investigation. Gradient-based approaches -In [ SVZ13], a gradient-based approach was used to generate image-specific class saliency -maps . The authors describe the problem as a ranking problem, where each pixel of the +In [SVZ13], a gradient-based approach was used to generate image-specific class saliency +maps. The authors describe the problem as a ranking problem, where each pixel of the image I - 0 is assigned a score S -c (I - 0 ) for a class c of interest. CNNs are non-linear functions, +0 is assigned a score S +c(I +0) for a class c of interest. CNNs are non-linear functions, but they can be approximated by the first order Taylor expansion S -c (I ) ≈ w T +c(I ) ≈ wT I + b where w is the derivative of S - c at I - 0 . +c at I +0. 2. Convolutional Neural Networks 2.5.6. Argmax Method The argmax method has two variants: -• Fixed class argmax : Propagate all elements of a given class through the network +• Fixed class argmax: Propagate all elements of a given class through the network and analyze which neurons are activated most often / have the highest activation. • Fixed neuron argmax: Propagate the data through the network and find the n data elements which cause the highest activation for a given neuron. @@ -1163,7 +1165,7 @@ learned features. This technique is called inversion in [MV16]. A key idea of feature map visualizations is to reconstruct a layers input, given its activation. This makes it possible find which inputs would cause neurons to activate with extremely high or low values. -More recent work like [ NYC16] tries to make the reconstructions appearance look more +More recent work like [NYC16] tries to make the reconstructions appearance look more natural. 2.5. Analysis Techniques @@ -1174,50 +1176,50 @@ initializations, the learned weights should still be comparable. If the set of learned filters changes with initialization, this might be an indicator for too little capacity of that layer. Hence adding more filters to that layer could improve the performance. -Filters can be compared with the k -translation correlation as introduced in [ZCZL16]: +Filters can be compared with the k-translation correlation as introduced in [ZCZL16]: ρ k (W - i , W - j ) = max -(x,y ) ∈{−k,...,k }2 - \(0 , 0) W - i , T (W - j , x, y ) - f - W +i, W +j) = max +(x,y)∈{−k,...,k}2 +\(0,0) W +i, T (W +j, x, y) +f +W i - 2 W - j - 2 ∈ [−1 , 1], -where T (·, x, y ) denotes the translation of the first operand by (x, y ), with zero padding at +2 W +j +2 ∈ [−1, 1], +where T (·, x, y) denotes the translation of the first operand by (x, y), with zero padding at the borders to keep the shape. ·, · f denotes the flattened inner product, where the two operands are flattened into column vectors before applying the standard inner product. The -closer the absolute value of the k -translation correlation to one, the more similar two filters +closer the absolute value of the k-translation correlation to one, the more similar two filters W - i , W +i, W j are. According to [ZCZL16], standard CNNs like AlexNet (see Appendix D.2) and VGG-16 (see Appendix D.3) have many filters which are highly correlated. They found -this by comparing the averaged maximum k -translational correlation of the networks with -Gaussian-distributed initialized filters. The averaged maximum k -translational correlation +this by comparing the averaged maximum k-translational correlation of the networks with +Gaussian-distributed initialized filters. The averaged maximum k-translational correlation is defined as ¯ρ -k (W ) = 1 +k (W) = 1 N N -i =1 N +i=1 N max -j =1,j = i ρ +j=1,j=i ρ k (W - i , W +i, W j ) where N is the number of filters in the layer W and W - i denotes the ith filter. +i denotes the ith filter. 2.5.9. Weight update tracking Andrej Karpathy proposed in the 5th lecture of CS231n to track weight updates to check if the learning rate is well-chosen. He suggests that the weight update should be in the order of 10−3 - . If the weight update is too high, then the learning rate has to be decreased. If the +. If the weight update is too high, then the learning rate has to be decreased. If the weight update is too low, then the learning rate has to be increased. The order of the weight updates as well as possible implications highly depend on the model and the training algorithm. See Appendix B.5 for a short overview of training algorithms @@ -1244,7 +1246,7 @@ a model which was originally created for another task is also a common technique 2.6. Accuracy boosting techniques Figure 2.10.: Occlusion sensitivity analysis by [ZF14]: The left column shows three example images, -where a gray square occluded a part of the image. This gray squares center(x, y ) was +where a gray square occluded a part of the image. This gray squares center (x, y) was moved over the complete image and the classifier was run on each of the occluded images. The probability of the correct class, depending on the gray squares position, is showed in the middle column. One can see that the predicted probability of the @@ -1254,7 +1256,7 @@ it always predicts the correct class if the head is visible. However, if the hea dog is occluded, it predicts other classes. 2. Convolutional Neural Networks -Figure 2.11.: Filter visualization from [ ZF14]: The filters themselves as well as the input feature +Figure 2.11.: Filter visualization from [ZF14]: The filters themselves as well as the input feature maps which caused the highest activation are displayed. 3. Topology Learning @@ -1277,29 +1279,29 @@ is similar to dense block described in Section 2.3.3. Cascade-Correlation works as follows: 1. Initialization: The number of input nodes and the number of output nodes are defined by the problem. Create a minimal, fully connected network for those. -2. Training : Train the network until the error no longer decreases. -3. Candidate Generation : Generate candidate nodes. Each candidate node is connected +2. Training: Train the network until the error no longer decreases. +3. Candidate Generation: Generate candidate nodes. Each candidate node is connected to all inputs. They are not connected to other candidate nodes and not connected to the output nodes. 3. Topology Learning -4. Correlation Maximization: Train the weights of the candidates by maximizingS , +4. Correlation Maximization: Train the weights of the candidates by maximizing S , the correlation between candidates output value V with the networks residual error: S = -o ∈ O +o∈O -p ∈ T - V - p − ¯ +p∈T +V +p − ¯ V (E p,o − ¯ E -o ) +o) @@ -1307,15 +1309,15 @@ o ) where O is the set of output nodes, T is the training set, V p is the candidate neurons -activation for a training pattern p . E +activation for a training pattern p. E p,o is the residual output error at node o for -pattern p . ¯ +pattern p. ¯ V and ¯ E o are averaged values over all elements of T . This step is finished when the correlation no longer increases. -5. Candidate selection : Keep the candidate node with the highest correlation, freeze +5. Candidate selection: Keep the candidate node with the highest correlation, freeze its incoming weights and add connections to the output nodes. 6. Continue: If the error is higher than desired, continue with step 2. One network with three hidden nodes trained by Cascade-Correlation is shown in Figure 3.1. @@ -1326,17 +1328,17 @@ right corner. The black squares represent frozen weights which are found by corr maximization whereas the white squares are trainable weights. 3.1.2. Meiosis Networks Meiosis Networks are introduced in [Han89]. In contrast to most MLPs and CNNs, where -weights are deterministic and fixed at prediction time, each weightw - ij in Meiosis networks +weights are deterministic and fixed at prediction time, each weight w +ij in Meiosis networks follows a normal distribution: w - ij ∼ N (µ - ij , σ 2 +ij ∼ N (µ +ij , σ2 ij ) 3.2. Pruning approaches Hence every connection has two learned parameters: µ - ij and σ 2 +ij and σ2 ij . The key idea of Meiosis networks is to allow neurons to perform Meiosis, which is cell division. A node j is splitted, when the random part dominates the value of the sampled @@ -1346,27 +1348,27 @@ i σ ij i µ - ij > 1 and - k σ - jk +ij > 1 and +k σ +jk k µ - jk > 1 +jk > 1 The mean of the new nodes is sampled around the old mean, half the variance is assigned to the new connections. Hence Meiosis networks only change the number of neurons per layer. They do not add layers or add skip connections. 3.1.3. Automatic Structure Optimization -Automatic Structure Optimization (ASO) was introduced in [ BM93] for the task of online - handwriting recognition. It makes use of the confusion matrix C = ( c -ij ) ∈ Nk × k -≥ 0 +Automatic Structure Optimization (ASO) was introduced in [BM93] for the task of online + handwriting recognition. It makes use of the confusion matrix C = (c +ij ) ∈ Nk×k +≥0 (see Section 2.5.2) to guide the topology learning. They define a confusion-symmetry matrix S with s - i j = s +ij = s j i = c ij · c -ji . The maximum of S defines where the ASO algorithm adds +ji. The maximum of S defines where the ASO algorithm adds more parameters. The details how the resources are added are not transferable to CNNs. 3.2. Pruning approaches Pruning approaches start with a network which is bigger than necessary and prune it. The @@ -1375,27 +1377,27 @@ model sharing, memory for easier deployment and FLOPs to reduce inference time a energy consumption. Especially for embedded systems, deployment is a challenge and low energy consumption is important. Pruning generally works as follows: -1.Train a given network until a reasonable solution is obtained, -2.prune weights according to a pruning criterion and -3.retrain the pruned network. +1. Train a given network until a reasonable solution is obtained, +2. prune weights according to a pruning criterion and +3. retrain the pruned network. This procedure can be repeated. -One family of pruning criterions uses the Hessian matrix . For example, Optimal Brain +One family of pruning criterions uses the Hessian matrix. For example, Optimal Brain Damage (OBD) as introduced in [LDS+ - 89]. For every single parameter k , OBD calculates -the effect on the ob jective function of deletingk . The authors call the effect of the deletion +89]. For every single parameter k, OBD calculates +the effect on the ob jective function of deleting k. The authors call the effect of the deletion 3. Topology Learning of parameter k the saliency s - k . The parameters with the lowest saliency are deleted, which +k . The parameters with the lowest saliency are deleted, which means they are set to 0 and are not updated anymore. -A follow-up method called Optimal Brain Surgeon [ HSW93] claims to choose the weights +A follow-up method called Optimal Brain Surgeon [HSW93] claims to choose the weights in a much better way. This requires, however, to calculate the inverse Hessian matrix H −1 - ∈ R n×n + ∈ Rn×n where n ∈ N is typically n > 106 - . -A much simpler and computationally cheaper pruning criterion is the weight magnitude . -[HPTD15] prunes all weights w which are below a threshold θ : +. +A much simpler and computationally cheaper pruning criterion is the weight magnitude. +[HPTD15] prunes all weights w which are below a threshold θ: w ←   w if w ≥ θ @@ -1408,17 +1410,17 @@ Commonly used techniques to generate neural networks by GAs are NEAT [SM02] and successors HyperNEAT [SDG09] and ES-HyperNEAT [RLS10]. The results, however, are of unacceptable quality: On MNIST (see Appendix E), where random chance gives 10 % accuracy, even simple topologies trained with SGD achieve -about 92 % accuracy [ TF-16a] and state of the art is 99. 79 % [ WZZ+ - 13], the HyperNEAT +about 92 % accuracy [TF-16a] and state of the art is 99.79 % [WZZ+ +13], the HyperNEAT algorithm achieves only 23.9 % accuracy [VH13]. -Kocmánek shows in [ Koc15] that HyperNEAT approaches can achieve 96 .47 % accuracy +Kocmánek shows in [Koc15] that HyperNEAT approaches can achieve 96.47 % accuracy on MNIST. Kocmánek mentions that HyperNEAT becomes slower with each hidden layer so that not more than three hidden layers could be trained. At the same time, VGG19 [SZ14] already has 19 hidden layers and ResNets are successfully trained with 1202 layers in [HZRS15a]. -[ LX17] shows that Genetic algorithms can achieve competitive results on MNIST and +[LX17] shows that Genetic algorithms can achieve competitive results on MNIST and SVHN, but the best results on CIFAR-10 were 7.10 % error whereas the state of the art is -at 3.74 % [HLW16]. Similarly, the Genetic algorithm achieves29.03 % error on CIFAR-100, +at 3.74 % [HLW16]. Similarly, the Genetic algorithm achieves 29.03 % error on CIFAR-100, but the state of the art is 17.18 % [HLW16]. 3.4. Reinforcement Learning Reinforcement learning is a sub-field of machine learning, which focuses on the question @@ -1429,23 +1431,23 @@ One can think of the search for good neural network topologies as a reinforcemen problem. The agent is a recurrent neural network which can generate bitstrings. Those variable-length bitstrings encode neural network topologies. In 2016, this approach was applied to construct neural networks for computer vision. -In [BGNR16], Q-learning with an ε -greedy exploration was applied. +In [BGNR16], Q-learning with an ε-greedy exploration was applied. In [ZL16], the REINFORCE algorithm from [Wil92] was used to train state of the art models for CIFAR-10 and the Penn Treebank dataset. A drawback of this method is that enormous amounts of computational resources were used to obtain those results. 3.5. Convolutional Neural Fabrics -Convolutional Neural Fabrics are introduced in [ SV16]. They side-step hard decisions +Convolutional Neural Fabrics are introduced in [SV16]. They side-step hard decisions about topologies by learning an ensemble of different CNN architectures. The idea is to define a single architecture as a trellis through a 3D grid of nodes. Each node represents a convolutional layer. One dimension is the index of the layer, the other two dimensions are the amount of filters and the feature size. Each node is connected to nine other nodes and thus represents nine possible choices of convolutional layers: • Resolution - :(i)convolution with stride=1 or(ii)convolution with stride=2 or -(iii)deconvolution (doubling the resolution) -• Channels:(i)half the number of filters than the layer before(ii)the same number -of filters as the layer before(iii)double the number of filters than the layer before -They always use ReLU as an activation function and they always use filters of size 3 × 3 . +: (i) convolution with stride=1 or (ii) convolution with stride=2 or +(iii) deconvolution (doubling the resolution) +• Channels: (i) half the number of filters than the layer before (ii) the same number +of filters as the layer before (iii) double the number of filters than the layer before +They always use ReLU as an activation function and they always use filters of size 3 × 3. They don’t use pooling at all. 3. Topology Learning @@ -1453,8 +1455,8 @@ They don’t use pooling at all. 4. Hierarchical Classification Designing a classifier for a new dataset is hard for two main reasons: Many design choices are not clearly superior to others and evaluating one design choice takes much time. Especially -CNNs are known to take several days [ KSH12, SLJ + - 15] or even weeks [ SZ14] to train. +CNNs are known to take several days [KSH12, SLJ+ +15] or even weeks [SZ14] to train. Additionally, some methods for analyzing a dataset become harder to use with more classes and more training samples. Examples are t-SNE, the manual inspection of errors and confusion matrices, and the argmax method. @@ -1463,15 +1465,15 @@ classifier distinguishes clusters of classes, whereas the leaf classifiers disti classes. Figure 4.1 gives an example for an hierarchy of classifiers. Figure 4.1.: Example for a hierarchy of classifiers. Each classifier is visualized by a rounded rectangle. The root classifier C - 0 has to distinguish six coarse classes (pedestrian, four+ - -wheelers, +0 has to distinguish six coarse classes (pedestrian, four+ +-wheelers, traffic signs, two-wheelers, street, other) or 17 fine-grained classes. If C - 0 predicts a +0 predicts a pedestrian, another classifier has to predict if it is an adult or a child. Similar, if C 0 predicts traffic sign, then another classifier has to predict if it is a speed limit, a sign indicating danger or something else. If C -0 , however, predicts road, then no other +0, however, predicts road, then no other classifier will become active. In this example, the problem has 17 classes. The hierarchical approach introduces 7 clusters of classes and thus uses 8 classifiers. @@ -1480,16 +1482,16 @@ Such a hierarchy of classifiers needs clusters of classes. 4. Hierarchical Classification 4.1. Advantages of classifier hierarchies Having a classifier hierarchy has five advantages: -• Division of labor : Different teams can work together. Instead of having a monolithic +• Division of labor: Different teams can work together. Instead of having a monolithic task, the solutions can be combined. -• Guarantees : Changing a classifier will only change the prediction of itself and its +• Guarantees: Changing a classifier will only change the prediction of itself and its children. Siblings are not affected. In the example from Figure 4.1, the classifier -which distinguishes traffic signs can be changed while the classification aspedestrian , -four + - -wheelers , traffic sign , street , other will not be affected. Also, the +which distinguishes traffic signs can be changed while the classification as pedestrian, +four+ +-wheelers, traffic sign, street, other will not be affected. Also, the classification between speed limits, danger signs and other signs will not change. -• Faster training : Except for the root classifier C - 0 , each other classifier will have +• Faster training: Except for the root classifier C +0, each other classifier will have less than the total amount of training data. Depending on the combined classes, the models could also be simpler. Hence the training time is reduced. • Weighting of errors: In practice, some errors are more severe than others. For @@ -1500,30 +1502,30 @@ classification is made the way it is made. 4.2. Clustering classes There are two ways to cluster classes: By similarity or by semantics. While semantic clustering needs either additional information or manual work, the similarity can be -automatically inferred from the data. As pointed out in [ XZY+ - 14], semantically similar +automatically inferred from the data. As pointed out in [XZY+ +14], semantically similar classes are often also visually similar. For example, in the ImageNet dataset most dogs are semantically and visually more similar to each other than to non-dogs. An example where this is obviously not the case are symbols: The summation symbol \sum is identical -in appearance to the Greek letter \Sigma , but semantically much closer to the addition -operator + . +in appearance to the Greek letter \Sigma, but semantically much closer to the addition +operator +. One approach to cluster classes by similarity is to train a classifier and examine its predictions. Each class is represented in the confusion matrix by one row. Those rows -can be directly with standard clustering algorithms such as k -means, DBSCAN [EKS+ - 96], -OPTICS [ ABKS99], CLARANS [ NH02], DIANA [ KR09], AHC (see [ HPK11]) or spectral -clustering as in [ XZY+ - 14]. Those clusterings, however, are hard to interpret and most of +can be directly with standard clustering algorithms such as k-means, DBSCAN [EKS+ +96], +OPTICS [ABKS99], CLARANS [NH02], DIANA [KR09], AHC (see [HPK11]) or spectral +clustering as in [XZY+ +14]. Those clusterings, however, are hard to interpret and most of them do not allow a human to improve the found clustering manually. -The confusion matrix ( c) - ij ∈ N k × k +The confusion matrix (c) +ij ∈ Nk×k states how often class i was present and class j was 4.2. Clustering classes predicted. The more often this confusion happens, the more similar those two classes are to the classifier. Based on the confusion matrix, the classes can be clustered as explained in the following. -[ HAE16] indicates that more classes make it easier to generalize, but the accuracy gains +[HAE16] indicates that more classes make it easier to generalize, but the accuracy gains diminish after a critical point of classes is reached. Hence a binary tree might not be a good choice. As an alternative, an approach which allows building arbitrary many clusters, is proposed. @@ -1536,15 +1538,15 @@ Hence the order of the classes is permutated in such a way that the highest erro to the diagonal. One possible ob jective function to be minimized is f (C ) = n -i =1 n +i=1 n -j =1 C - ij · | i − j | [4.1] +j=1 C +ij · |i − j | [4.1] which punishes errors linearly with the distance to the diagonal. This method is called CMO in the following. As pointed out by Tobias Ribizel (personal communication), this optimization problem -is a weighted version of Optimal Linear Arrangement problem . That problem is NPcomplete - [ GJ02, GJS76]. Simulated Annealing as described in Algorithm 1, however, +is a weighted version of Optimal Linear Arrangement problem. That problem is NPcomplete + [GJ02, GJS76]. Simulated Annealing as described in Algorithm 1, however, produces reasonable clusterings as well as visually appealing confusion matrices. The algorithm works as follows: First, decide with probability 0.5 if only two random rows are swapped or a block is swapped. If two rows are swapped, choose both of them randomly. @@ -1558,8 +1560,8 @@ of dog breeds could be separated by car and bus due to random chance. Moving any class increases the score, but moving either one of the dog breed clusters or the vehicle cluster decreases the score. Hence it is beneficial to implement block moving. One advantage of permutating the classes in order to minimize Equation (4.1) in comparison -to spectral clustering as used in [ XZY+ - 14] is that the adjusted confusion matrix can be +to spectral clustering as used in [XZY+ +14] is that the adjusted confusion matrix can be 4. Hierarchical Classification split into many much smaller matrices along the diagonal. In the case of many classes (e.g., @@ -1571,13 +1573,13 @@ Once a permutation of the classes is found which has a low score Equation (4.1), can either be made by hand by deciding why classes should not be in one clusters. With such a permutation, only n − 1 binary decisions have to be made and hence only the list of classes has to be read. Alternatively, one can calculate the confusions C -i,i +1 + C -i +1,i for +i,i+1 + C +i+1,i for each pair of classes which are neighbors in the confusion matrix. The higher this value, the -more similar are the classes according to the classifier. Hence a thresholdθ can be applied. +more similar are the classes according to the classifier. Hence a threshold θ can be applied. θ can either be set automatically (e.g., such that 10 % of all pairs are above the threshold) or semi-automatically by asking the user for information if two classes belong to the same -cluster. Such an approach only needs log (n ) binary decisions from the user where n is the +cluster. Such an approach only needs log(n) binary decisions from the user where n is the number of classes. Please note that CMO only works if the classifier is neither too bad nor too good. A classifier which does not solve the task at all might just give almost uniform predictions whereas the @@ -1587,35 +1589,33 @@ the prediction of the class in contrast to using only the argmax in order to fin permutation. 5. Experimental Evaluation -All experiments are implemented using Keras 2.0 [ Cho15] with Tensorflow 1.0 [ AAB+ - 16] -and cuDNN 5.1 [CWV + - 14] as the backend. The experiments were run on different machines +All experiments are implemented using Keras 2.0 [Cho15] with Tensorflow 1.0 [AAB+ +16] +and cuDNN 5.1 [CWV+ +14] as the backend. The experiments were run on different machines with different Nvidia graphics processing units (GPUs), including the Titan Black, GeForce GTX 970 and GeForce 940MX. -The GTSRB [SSSI12], SVHN [NWC + - 11b], CIFAR-10 and CIFAR-100 [Kri], MNIST [YL98], +The GTSRB [SSSI12], SVHN [NWC+ +11b], CIFAR-10 and CIFAR-100 [Kri], MNIST [YL98], HASYv2 [Tho17a], STL-10 [CLN10] dataset are used for the evaluation. Those datasets are used as their size is small enough to be trained within a day. Other classification datasets which were considered are listed in Appendix E. CIFAR-10 (Canadian Institute for Advanced Research 10) is a 10-class dataset of color images of the size 32 px × 32 px. Its ten classes are airplane, automobile, bird, cat, deer, -dog, frog, horse, ship, truck. The state of the art achieves an accuracy of96. 54 % [HLW16]. +dog, frog, horse, ship, truck. The state of the art achieves an accuracy of 96.54 % [HLW16]. According to [Kar11], human accuracy is at about 94 %. -CIFAR-100 is a 100-class dataset of color images of the size32 px × 32 px. Its 100 classes +CIFAR-100 is a 100-class dataset of color images of the size 32 px × 32 px. Its 100 classes are grouped to 20 superclasses. It includes animals, people, plants, outdoor scenes, vehicles and other items. CIFAR-100 is not a superset of CIFAR-10, as CIFAR-100 does not contain -the class airplane . The state of the art achieves an accuracy of 82.82 % [HLW16]. +the class airplane. The state of the art achieves an accuracy of 82.82 % [HLW16]. GTSRB (German Traffic Sign Recognition Benchmark) is a 43-class dataset of traffic signs. -The 51 839 images are in color and of a minimum size of25 px × 25 px up to 266 px × 232 px. -The state of the art achieves 99.46 % accuracy with an ensemble of 25 CNNs [ SL11]. +The 51 839 images are in color and of a minimum size of 25 px × 25 px up to 266 px × 232 px. +The state of the art achieves 99.46 % accuracy with an ensemble of 25 CNNs [SL11]. According to [SSSI], human performance is at 98.84 %. -HASYv2 - (Handwritten Symbols version 2) is a 369 class dataset of black-and-white images +HASYv2 (Handwritten Symbols version 2) is a 369 class dataset of black-and-white images of the size 32 px × 32 px. The 369 classes contain the Latin and Greek letters, arrows, -mathematical symbols. The state of the art achieves an accuracy of 82 .00 % [Tho17a]. -STL-10 - (self-taught learning 10) is a 10-class dataset of color images of the size96 px × 96 px. +mathematical symbols. The state of the art achieves an accuracy of 82.00 % [Tho17a]. +STL-10 (self-taught learning 10) is a 10-class dataset of color images of the size 96 px × 96 px. Its ten classes are airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck. The state of the art achieves an accuracy of 74.80 % [ZMGL15]. It contains 100 000 unlabeled images for unsupervised training and 500 images per class for supervised training. @@ -1624,26 +1624,26 @@ the cropped digit format was used. It contains the 10 digits cropped from photos Street View. The images are in color and of size 32 px × 32 px. The state of the art 5. Experimental Evaluation -achieves an accuracy of 98. 41 % [ HLW16]. According to [ NWC + - 11a], human performance +achieves an accuracy of 98.41 % [HLW16]. According to [NWC+ +11a], human performance is at 98.0 %. -As a preprocessing step, the pixel-features were divided by 255 to obtain values in [0 , 1] . +As a preprocessing step, the pixel-features were divided by 255 to obtain values in [0, 1]. For GTSRB, the training and test data was scaled to 32 px × 32 px. 5.1. Baseline Model and Training setup -The baseline model is trained with Adam [KB14], an initial learning rate of 10− 4 - , a batch +The baseline model is trained with Adam [KB14], an initial learning rate of 10−4 +, a batch size of 64 for at most 1000 epochs with data augmentation. The kind of data augmentation depends on the dataset: -• CIFAR-10 , CIFAR-100 and STL-10: Random width and height shift by at most +• CIFAR-10, CIFAR-100 and STL-10: Random width and height shift by at most ±3 pixels in either direction; Random horizontal flip. • GTSRB , MNIST: Random width and height shift by at most ±5 pixels in either direction; random rotation by at most ±15 degrees; random channel shift; random -zoom in [0.5 , 1 .5]; random shear by at most 6 degrees. +zoom in [0.5, 1.5]; random shear by at most 6 degrees. • HASYv2: Random width and height shift by at most ±5 pixels in either direction; random rotation by at most ±5 degree. • SVHN: No data augmentation. -If the dataset does not define a training/test set, a stratified67 % / 33 % split is applied. If +If the dataset does not define a training/test set, a stratified 67 % / 33 % split is applied. If the dataset does not define a validation set, the training set is split in a stratified manner into 90 % training set / 10 % test set. Early stopping [Pre98] with the validation accuracy as a stopping criterion and a patience of @@ -1652,14 +1652,14 @@ Early stopping [Pre98] with the validation accuracy as a stopping criterion and patience of 10 epochs. Kernel weights are initialized according to the uniform initialization scheme of He [HZRS15b] (see Appendix B.3). The architecture of the baseline model uses a pattern of -Conv-Block (n ) = (Convolution − Batch Normalization − Activation)n +Conv-Block(n) = (Convolution − Batch Normalization − Activation)n − Pooling The activation function is the Exponential Linear Unit (ELU) (see Table B.3), except for the last layer where softmax is used. Before the last two convolutional layer, a dropout layer with dropout probability 0.5 is applied. The architecture is given in detail in Table 5.1. Please note that the number of input- and output channels of the network depends on the dataset. If the input image is larger than 32 px × 32 px, for each power of two a -Conv-Block (2) is added at the input. For MNIST, the images are bilinearly upsampled to +Conv-Block(2) is added at the input. For MNIST, the images are bilinearly upsampled to 32 px × 32 px. 5.1. Baseline Model and Training setup @@ -1687,35 +1687,36 @@ Dropout 0.5 0 0 512 @ 1 × 1 Dropout 0.5 0 0 512 @ 1 × 1 15 Convolution k @ 1 × 1 × 512 / 1 k · (512 + 1) 1024 · k k @ 1 × 1 Global avg Pooling 1 × 1 0 k k @ 1 × 1 -16 BN + Softmax 2 k 7k k @ 1 × 1 +16 BN + Softmax 2k 7k k @ 1 × 1 - 515 k + 515k +892 512 1032k -+55 729 664 103 424+2 k -Table 5.1.: Baseline architecture with 3 input channels of size 32 × 32 . All convolutional layers ++55 729 664 103 424+2k +Table 5.1.: Baseline architecture with 3 input channels of size 32 × 32. All convolutional layers use SAME padding, except for layer 11 which used VALID padding in order to decrease -the feature map size to 1 × 1 . If the input feature map is bigger than 32 × 32 , for +the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for each power of two there are two Convolution + BN + ELU blocks and one Max pooling -block added. This is the framed part in the table.32 × 32Input -C 32@3 × 3 / 1 +block added. This is the framed part in the table. +32 × 32Input +C 32@3 × 3/1 BN + ELU -C 32@3 × 3 / 1 -BN + ELU 16 × 16max pooling 2 × 2 /2 -C 64@3 × 3 /1 +C 32@3 × 3/1 +BN + ELU 16 × 16max pooling 2 × 2/2 +C 64@3 × 3/1 BN + ELU -C 64@3 × 3 /1 -BN + ELU 8 × 8max pooling 2 × 2 /2 -C 64@3 × 3 /1 -BN + ELU 4 × 4max pooling 2 × 2 /2 -C 512@4 × 4 /1 (V) +C 64@3 × 3/1 +BN + ELU 8 × 8max pooling 2 × 2/2 +C 64@3 × 3/1 +BN + ELU 4 × 4max pooling 2 × 2/2 +C 512@4 × 4/1 (V) BN + ELU -Dropout, p = 0. 5 1 × 1C 512@1 × 1 /1 +Dropout, p = 0.5 1 × 1C 512@1 × 1/1 BN + ELU -Dropout, p = 0. 5 -C k @1 × 1/ 1 +Dropout, p = 0.5 +C k@1 × 1/1 Global AVG pooling BN + Softmax -Figure 5.1.: Architecture of the baseline model. C 32@3 × 3/ 1 is a convolutional layer with 32 filters +Figure 5.1.: Architecture of the baseline model. C 32@3 × 3/1 is a convolutional layer with 32 filters of kernel size 3 × 3 with stride 1. 5. Experimental Evaluation @@ -1724,14 +1725,14 @@ The results for the baseline model evaluated on eight datasets are given in Tabl speed for inference for different GPUs is given in Table 5.3. Dataset Single Model Accuracy Ensemble of 10 Training Set Test Set Training Set Test Set -Asirra 94.22 % σ = 3. 49 94.37 % σ = 3.47 97 .07 % 97. 37 % -CIFAR-10 91.23 % σ = 1. 10 85.84 % σ = 0.87 92 .36 % 86.75 % -CIFAR-100 76.64 % σ = 1.48 63. 38 % σ = 0.55 78 .30 % 64.70 % -GTSRB 100 .00 % σ = 0.00 99. 18 % σ = 0.11 100 .00 % 99.46 % -HASYv2 89.49 % σ = 0.42 85. 35 % σ = 0.10 89 .94 % 86.03 % -MNIST 99.93 % σ = 0.07 99. 53 % σ = 0.06 99 .99 % 99.58 % -STL-10 94.12 % σ = 0.87 75. 67 % σ = 0.34 96 .35 % 77.62 % -SVHN 99.02 % σ = 0.07 96. 28 % σ = 0.10 99 .42 % 97.20 % +Asirra 94.22 % σ = 3.49 94.37 % σ = 3.47 97.07 % 97.37 % +CIFAR-10 91.23 % σ = 1.10 85.84 % σ = 0.87 92.36 % 86.75 % +CIFAR-100 76.64 % σ = 1.48 63.38 % σ = 0.55 78.30 % 64.70 % +GTSRB 100.00 % σ = 0.00 99.18 % σ = 0.11 100.00 % 99.46 % +HASYv2 89.49 % σ = 0.42 85.35 % σ = 0.10 89.94 % 86.03 % +MNIST 99.93 % σ = 0.07 99.53 % σ = 0.06 99.99 % 99.58 % +STL-10 94.12 % σ = 0.87 75.67 % σ = 0.34 96.35 % 77.62 % +SVHN 99.02 % σ = 0.07 96.28 % σ = 0.10 99.42 % 97.20 % Table 5.2.: Baseline model accuracy on eight datasets. The single model actuary is the 10 models used in the ensemble. The empirical standard deviation σ of the accuracy is also given. CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the @@ -1739,19 +1740,19 @@ models uses unlabeled data or data from other datasets. For HASYv2 no test time transformations are used. Network GPU Tensorflow Inference per Training 1 Image 128 images time / epoch -Baseline Default Intel i7-4930K 3 ms 244 ms 231. 0 s -Baseline Optimized Intel i7-4930K 2 ms 143 ms 149. 0 s -Baseline Default GeForce 940MX 4 ms 120 ms 145. 6 s -Baseline Default GTX 970 6 ms 32 ms 25.0 s-26. 3 s -Baseline Default GTX 980 3 ms 24 ms 20.5 s-21. 1 s -Baseline Default GTX 980 Ti 5 ms 27 ms 22.0 s-22. 1 s -Baseline Default GTX 1070 2 ms 15 ms 14 . 4 s- 14 .5 s -Baseline Default Titan Black 4 ms 25 ms 28.1 s-28. 1 s -Baseline Optimized Titan Black 3 ms 22 ms 24.4 s-24. 4 s +Baseline Default Intel i7-4930K 3 ms 244 ms 231.0 s +Baseline Optimized Intel i7-4930K 2 ms 143 ms 149.0 s +Baseline Default GeForce 940MX 4 ms 120 ms 145.6 s +Baseline Default GTX 970 6 ms 32 ms 25.0 s-26.3 s +Baseline Default GTX 980 3 ms 24 ms 20.5 s-21.1 s +Baseline Default GTX 980 Ti 5 ms 27 ms 22.0 s-22.1 s +Baseline Default GTX 1070 2 ms 15 ms 14.4 s-14.5 s +Baseline Default Titan Black 4 ms 25 ms 28.1 s-28.1 s +Baseline Optimized Titan Black 3 ms 22 ms 24.4 s-24.4 s DenseNet-40-12 Default GeForce 940MX 27 ms 2403 ms — Table 5.3.: Speed comparison of the baseline model on CIFAR-10. The baseline model is evaluated on six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken from [Ma j17]. -Weights the baseline model can be found at [Tho17b ]. The optimized Tensorflow build +Weights the baseline model can be found at [Tho17b]. The optimized Tensorflow build makes use of SSE4.X, AVX, AVX2 and FMA instructions. 5.1. Baseline Model and Training setup @@ -1760,19 +1761,19 @@ The distribution of filter weights by layer is visualized in Figure 5.2 and the of bias weights by layer is shown in Figure 5.3. Although both figures only show the distribution for one specific model trained on CIFAR-100, the following observed patterns are consistent for 70 models (7 datasets and 10 models per dataset): -• The empiric [0 .5 − percentile, 99 .5 − percentile] interval which contains 99 % of the +• The empiric [0.5 − percentile, 99.5 − percentile] interval which contains 99 % of the filter weights is almost symmetric around zero. The same is true for the bias weights. • The farther a layer is from the input away, the smaller the 99-percentile interval is, except for the last layer (see Table A.1). -• The 99-percentile interval of the first layers filter weights is about[−0. 5, +0. 5], except -for MNIST and HASYv2 where it is in [ −0. 8, 0.8]. -• The 99-percentile interval of the first layers bias weights is always in [ −0 .2 , 0 .2]. +• The 99-percentile interval of the first layers filter weights is about [−0.5, +0.5], except +for MNIST and HASYv2 where it is in [−0.8, 0.8]. +• The 99-percentile interval of the first layers bias weights is always in [−0.2, 0.2]. • The distribution of filter weights of the last convolutional layer is not symmetric. In some cases the distribution is also not unimodal. • The bias weights of the last three layers are very close to zero. The absolute value of most of them is smaller than 10−2 - . +. Similarly, Figure 5.4 and Figure 5.5 show the distribution of the γ and the β parameter of Batch Normalization. It is expected that γ is close to 1 and β is close to 0. In those cases, the Batch Normalization layer equals the identity and thus is only relevant for the training. @@ -1781,8 +1782,7 @@ layers, some observations are also consistent through all models even for differ • γ of the last layer (layer 16) is bigger than 1.3. • The 99-percentile interval for β of the last layer is longer than the other 99-percentile intervals. -• - The 99-percentile interval for β of the fourth-last (layer 14 for STL-10, layer 10 for +• The 99-percentile interval for β of the fourth-last (layer 14 for STL-10, layer 10 for all other models) is more negative then all other layers. Finally, the distribution of filter weight ranges is plotted in Figure 5.6 for each convolutional layer. The ranges are calculated for each channel and filter separately. The smaller the @@ -1790,17 +1790,17 @@ values are, the less information is lost if the filters are replaced by smaller 5. Experimental Evaluation Figure 5.2.: Violin plots of the distribution of filter weights of a baseline model trained on CIFAR100. - The weights of the first layer are relatively evenly spread in the interval[−0. 4, +0.4]. -With every layer the interval which contains95 % of the weights and is centered around + The weights of the first layer are relatively evenly spread in the interval [−0.4, +0.4]. +With every layer the interval which contains 95 % of the weights and is centered around the mean becomes smaller, especially with layer 11 where the feature maps are of -size 1 × 1 . In contrast to the other layers, the last convolutional layer has a bimodal +size 1 × 1. In contrast to the other layers, the last convolutional layer has a bimodal distribution. This plot indicates that the network might benefit from bigger filters in the first layer, whereas the filters in layers 7 – 11 could potentially be smaller. Figure 5.3.: Violin plots of the distribution of bias weights of a baseline model trained on CIFAR-100. -While the first layers biases are in[− 0. 1, +0.1], after each max-pooling layer the interval +While the first layers biases are in [−0.1, +0.1], after each max-pooling layer the interval which contains 95 % of the weights and is centered around the mean becomes smaller. -In the last three convolutional layer, most bias weights are in [− 0. 005 , +0.005] . +In the last three convolutional layer, most bias weights are in [−0.005, +0.005]. 5.1. Baseline Model and Training setup Figure 5.4.: Violin plots of the distribution of the γ parameter of Batch Normalization layers of a @@ -1825,27 +1825,29 @@ the start are also better at the end. In order to check this hypothesis, the rel validation accuracies for the 10 CIFAR-100 models was examined. If the relative ordering stays approximately the same, then it can be considered to run the first few epochs many times and only train the best models to the end. For 10 models, there can be 102 - −10 +−10 2 = 45 pair-wise changes in the ordering at maximum if the relative order of validation accuracies is reversed. For the baseline model, 21.8 changes in the relative order of accuracies occurred in average for each pair of epochs (i, i + 1). This means if one knows only the relative order of the validation accuracy of two models m and m - in epoch i , it is doubtful if one can + in epoch i, it is doubtful if one can make any statement about the ordering of m and m - in epoch i + 1 . + in epoch i + 1. 0 - 10 20 30 40 50 60 70 80 90 - 100 110 120 130 1400 .20 .30 .40 .50 .60 .7 - epochvalidation accuracy + 10 20 + 30 40 50 60 70 80 90 + 100 110 120 + 130 1400.20.30.40.50.60.7 + epochvalidationaccuracy maximum validation accuracy -minimum validation accuracy 1 .5 +minimum validation accuracy 1.5 2 -2 .5 +2.5 3 -3 .5 +3.5 4 -4 .5 loss +4.5 loss maximum validation accuracy minimum validation accuracy mean loss @@ -1886,7 +1888,7 @@ elements are set to 0 in order to make other elements easier to see. Figure 5.11b shows a confusion matrix with random mistakes. The first image of Figure 5.12 shows one example of a classifier with only 97.13 % test accuracy where a good permutation was found. Please note that this is not the best classifier. -The confusion matrix which resulted from a baseline classifier with99.32 % test accuracy is +The confusion matrix which resulted from a baseline classifier with 99.32 % test accuracy is displayed in as the second image. Those results suggest that the ordering of classes is a valuable tool to make patterns easier to see. Humans, however, are good at finding patterns even if they come from random noise. @@ -1906,7 +1908,7 @@ to Equation (4.1). The diagonal elements are set to 0 in order to make other ele easier to see. The symbols next to the label on the vertical axis indicate the shape and the color of the signs. The second image shows the same, but with baseline model. -Best viewed in electronic form. +Best viewed in electronic form. Figure 5.13.: The first 50 entries of the confusion matrix of the HASYv2 dataset. The diagonal elements are set to 0 in order to make other elements easier to see. The top image shows arbitrary class ordering, the bottom image shows the optimized ordering. @@ -1914,17 +1916,17 @@ shows arbitrary class ordering, the bottom image shows the optimized ordering. 5.3. Spectral Clustering vs CMO This section evaluates the clustering quality of CMO in comparison to the clustering quality of spectral clustering. -The evaluated model achieves 70 .50 % training accuracy and 53.16 % test accuracy on +The evaluated model achieves 70.50 % training accuracy and 53.16 % test accuracy on CIFAR-100. Figure 5.14 shows the sorted confusion matrix. Figure 5.14.: The first 50 entries of the ordered confusion matrix of the CIFAR-100 dataset. The diagonal elements are set to 0 in order to make other elements easier to see. Best viewed in electronic form. CIFAR-100 has pre-defined coarse classes. Those are used as a ground truth for the clusters -which are to be found. The number of errors is determined by(i)Join alln clusters which -contain the classes of the coarse classC to a set M . The error is n .(ii)Within M , find the +which are to be found. The number of errors is determined by (i) Join all n clusters which +contain the classes of the coarse class C to a set M . The error is n. (ii) Within M , find the set of classes M − - which do not belong to C .(iii)The final error is n + | M − - |. As can be + which do not belong to C . (iii) The final error is n + |M − +|. As can be seen in Table 5.4, both clustering methods find reasonable clusters. CMO, however, has only half the error of spectral clustering. The results for the HASYv2 dataset are qualitatively similar (see Table 5.5). It should be @@ -1938,7 +1940,7 @@ fish aquarium fish, orchid + flatfish + ray + shark, trout 4 flowers orchid, aquarium fish + sunflower + poppy, tulip + rose, -train 5 +train 5 orchid, aquarium fish + sunflower, poppy, tulip, rose 2 people baby, boy, man + girl + woman 2 baby, boy, girl, woman, man 0 @@ -1948,34 +1950,34 @@ wardrobe + dinosaur + lizard + dinosaur + snake + turtle, crab 6 trees maple, oak, pine + willow, forest -+ palm 3 palm, willow, pine, maple, oak 0 ++ palm 3 palm, willow, pine, maple, oak 0 Total 24 12 Table 5.4.: Differences in spectral clustering and CMO. Classes in a cluster are separated by , whereas clusters are separated by +. Cluster Spectral clustering Errors CMO Errors -A A , A, A 0 A , A, A , Å 1 -B B , B 0 B , B 0 -C C , c, ⊂ and C , ξ , E and C 4 C , c, ⊂ , C and C 1 -D D , D , D , 1 D , D , D 0 -E E and E , ε 2 E and E , ε , , ∈ 4 +A A, A, A 0 A, A, A , Å 1 +B B , B 0 B, B 0 +C C , c, ⊂ and C , ξ, E and C 4 C , c, ⊂, C and C 1 +D D, D, D , 1 D, D, D 0 +E E and E , ε 2 E and E , ε, , ∈ 4 F F and F , F 1 F and F , F 1 -H H and H , κ and H 3 H and H , H 1 +H H and H , κ and H 3 H and H, H 1 K K , κ 0 K , κ 0 L L, and L, L 1 L, and L, L 1 -M M and M and M 2 M and µ , M and M 3 -N N and N , N and N 2 N and N , N and N , ℵ 3 -O O , O , 0, ◦, °, and o 1 O , O , 0, ◦, ° and and o 2 -P P , P and p, ρ and P and ℘ 3 P and P , P , ℘ and p , ρ 2 -Q Q, Q , Q, ι , , , , , Æ, 1 7 Q and Q , Q 1 -R R , R and R , R, k and 3 R and , R, R, R 1 -S S , s , S 0 S , s , S 0 +M M and M and M 2 M and µ, M and M 3 +N N and N, N and N 2 N and N, N and N , ℵ 3 +O O, O, 0, ◦, °, and o 1 O, O, 0, ◦, ° and and o 2 +P P , P and p, ρ and P and ℘ 3 P and P , P , ℘ and p, ρ 2 +Q Q, Q, Q, ι, , , , , Æ, 1 7 Q and Q, Q 1 +R R, R and R, R, k and 3 R and , R, R, R 1 +S S , s, S 0 S , s, S 0 T T , and T , τ 1 T , and T , τ 1 -U U , ∪ and u , U , A 1 U , u, U , A and ∪ 2 -V V , v , ∨ 0 V , v , ∨ 0 -W W , w , ω 0 W , w and ω 1 -X X , x , X , χ , × 0 X , x , X , χ, × 0 +U U , ∪ and u, U , A 1 U , u, U , A and ∪ 2 +V V , v, ∨ 0 V , v, ∨ 0 +W W , w, ω 0 W , w and ω 1 +X X , x, X , χ, × 0 X , x, X , χ, × 0 Y Y and y 1 Y , y 0 -Z Z , z , Z and Z, Z 1 Z , z , Z, Z , Z 0 +Z Z , z, Z and Z, Z 1 Z , z, Z, Z , Z 0 Total 34 25 Table 5.5.: Differences in spectral clustering and CMO. @@ -1995,9 +1997,9 @@ push the accuracy in the ful l column only to 63.50 % due to errors of the root where the root classifier does not predict the correct cluster. The leaf classifiers use the same topology as the root classifier. By initializing them with the root classifiers weights their performance can be pushed at about the inner accuracy. -They are, however, only useful if their accuracy is well above theinner accuracy of the root +They are, however, only useful if their accuracy is well above the inner accuracy of the root classifier. Hence, for CIFAR-100, building hierarchies of classifiers is not useful. -Cluster Classes accuracy +Cluster Classes accuracy root classifier leaf classifier cluster identified class identified | cluster class identified | cluster 1 3 69.67 % 84.27 % 72.98 % @@ -2015,19 +2017,19 @@ cluster identified class identified | cluster class identified | cluster 13 2 64.00 % 82.58 % 86.27 % 14 2 79.67 % 89.85 % 89.10 % Table 5.6.: Accuracies of the root classifier trained on the full set of 100 classes evaluated on -14 clusters of classes. Each class has 100 elements to test. The columncluster identified +14 clusters of classes. Each class has 100 elements to test. The column cluster identified gives the percentage that the root classifiers argmax prediction is within the correct -cluster, but not necessarily the correct class. The columnsclass identified | cluster only +cluster, but not necessarily the correct class. The columns class identified | cluster only consider data points where the root classifier correctly identified the cluster. 5. Experimental Evaluation 5.5. Increased width for faster learning More filters in one layer could simplify the optimization problem as each filter needs smaller updates. Hence a CNN N with n - i filters in layer i is expected to take more epochs than a +i filters in layer i is expected to take more epochs than a CNN N with 2 · n - i filters in layer i to achieve the same validation accuracy. +i filters in layer i to achieve the same validation accuracy. This hypothesis can be falsified by training a CNN N and a CNN N and comparing the trained number of epochs. As more filters can lead to different results depending on the @@ -2036,27 +2038,27 @@ given in Table 5.7 Name Layer Filter count Total Baseline New parameters m - 9 9 64 638 5 978 566 +9 9 64 638 5 978 566 m 9 9 64 974 8 925 622 m - 11 11 512 3786 5 982 698 +11 11 512 3786 5 982 698 m 11 11 512 1024 1 731 980 m - 13 13 512 8704 5 982 092 +13 13 512 8704 5 982 092 Table 5.7.: Models which are identical to the baseline, except that the number of filters of one layer was increased. The detailed results are given in Table 5.8. As expected, the number of training epochs of the models with increased numbers of parameters is lower. The wall-clock time, however, is higher due to the increase in computation per forward- and backward-pass. For m -9 , m - 11 and m -13 , the filter weight range of the layer with increased capacity decreases +9, m +11 and m +13, the filter weight range of the layer with increased capacity decreases compared to Figure 5.6, the filter weights of the layer with increased capacity are more concentrated around zero compared to Figure 5.2. For model m -13 , the distribution of +13, the distribution of weight of the output layer changed to a more bell-shaped distribution. Except for this, the distribution of filter weights in other layers did not change for all three models compared to the baseline. @@ -2065,19 +2067,19 @@ Single Model Ensemble Mean Epochs Mean Time Mean std baseline 944 012 63.38 % 0.55 64.70 % 154.7 3856 s m - 9 5 978 566 65.53 % 0.37 66.72 % 105.7 4472 s +9 5 978 566 65.53 % 0.37 66.72 % 105.7 4472 s m 9 8 925 622 65.10 % 1.09 66.54 % 95.6 5261 s m -11 5 982 698 65. 73 % 0.77 67. 38 % 149.2 5450 s +11 5 982 698 65.73 % 0.77 67.38 % 149.2 5450 s m 11 1 731 980 62.12 % 0.48 62.89 % 143.6 3665 s m 13 5 982 092 62.39 % 0.66 63.77 % 147.8 4485 s Table 5.8.: Training time in epochs and wall-clock time for the baseline and models m -9 , m -11 , m - 13 +9, m +11, m +13 as well as their accuracies. 5.6. Weight updates @@ -2109,22 +2111,22 @@ with more filters is called wider [ZK16], a convolutional layer with fewer filte narrower and the number of filters in a convolutional layer is the layers width. If the number of parameters which may be used for the feature map scale is fixed and high enough, there are still many combinations. If n - i with i = 0, . . . , k is the number of output +i with i = 0, . . . , k is the number of output feature maps of layer i where i = 0 is the input layer and all filters are 3 × 3 filters without a bias, then the number of parameters is Parameters = k -i =1 - (n - i − 1 · 3 2 +i=1 +(n +i−1 · 32 + 1) · n - i +i Hence the width of one layer does not only influence the parameters in this layer, but also in the next layer. The number of possible subsequent layers of one feature map size is enormous, even if constraints are placed on the number of parameters. For example, the first convolutional layer of the baseline model has 896 parameters. If one assumes that less than 3 filters per -layer are not desirable, one keeps all layers having a bias and all layers only use3 × 3 filters, +layer are not desirable, one keeps all layers having a bias and all layers only use 3 × 3 filters, then the maximum depth is 10. If one furthermore assumes that at least 800 parameters should be used, there are still 120 possible layer combinations. As experimentally evaluating one layer combination takes about 10 hours on a GTX 970 for CIFAR-100 it is not possible @@ -2138,12 +2140,12 @@ be learned. The deeper the filter is in the network, the higher is the abstracti concept. In most cases, both is necessary: Many different concepts (width) and high-level concepts (depth). Reducing the two first convolutional layers of the baseline model (see Page 39) to one -convolutional layer of 48 filters ( 944 396 parameters in total, whereas the baseline model -has 944 012 parameters) resulted in a mean accuracy of 61.64 % (- 1. 74 %) and a standard -deviation of σ = 1 .12 (+0.57). The ensemble achieved 63.18 % (- 1 .52 %). As expected, +convolutional layer of 48 filters (944 396 parameters in total, whereas the baseline model +has 944 012 parameters) resulted in a mean accuracy of 61.64 % (-1.74 %) and a standard +deviation of σ = 1.12 (+0.57). The ensemble achieved 63.18 % (-1.52 %). As expected, the training time per epoch was reduced. For the GTX 980, it was reduced from 22.0 s of the baseline model to 15 s of the model with one less convolutional layer, one less Batch -Normalization and one less activation layer. The inference time was also reduced from6 ms +Normalization and one less activation layer. The inference time was also reduced from 6 ms 5.8. Batch Normalization to 4 ms for 1 image and from 32 ms to 23 ms for 128 images. Due to the loss in accuracy of @@ -2156,28 +2158,28 @@ for the first block at the 32 px × 32 px feature map scale. The two convolution convolutional layer with 26 filters in the convolution - BN - ELU pattern. The model has 944 132 parameters. Compared to the baseline model, the time for inference was the same. This is unexpected, because the inference time changed when a layer was removed at -this scale. The mean test accuracy was 63. 66 % (+0.28) and the standard deviation was +this scale. The mean test accuracy was 63.66 % (+0.28) and the standard deviation was σ = 1.03 (+0.48). The ensemble achieved 64.91 % test accuracy (+0.21). Having two nonlinearities at each feature map scale could be important to learn nonlinear transformations at that scale. As the baseline model does only have one nonlinearity at the 8 × 8 feature maps scale, another convolutional layer with 64 filters, Batch Normalization and ELU was added. To keep the number of parameters constant, layer 11 of the baseline model was reduced from 512 filters to 488 filters. The new model achieves a mean accuracy -of 63. 09 % (-0.29) with a standard deviation of σ = 0 . 70 (+0.15). The ensemble achieves -an accuracy of 64 .39 % (+0.31). This could indicate that having two convolutional layers +of 63.09 % (-0.29) with a standard deviation of σ = 0.70 (+0.15). The ensemble achieves +an accuracy of 64.39 % (+0.31). This could indicate that having two convolutional layers is more important for layers close to the input than intermediate layer. Alternatively, the parameters could be more important in layer 11 than having a new convolutional layer after layer 9. In order to control the hypothesis that having two convolutional layers are less important in -the middle of a network, the second convolutional layer at the16 × 16 feature map scale is +the middle of a network, the second convolutional layer at the 16 × 16 feature map scale is removed. The first convolutional layer was increased from 32 filters to 59 filters, the second convolutional layer was increased from 32 filter s to 58 filters in order to keep the amount of -parameters of the model constant. The adjusted model achieved 62. 72 % (-0.66) mean test -accuracy with a standard deviation of σ = 0 .84 (+0.29). The ensemble achieved 63.88 % +parameters of the model constant. The adjusted model achieved 62.72 % (-0.66) mean test +accuracy with a standard deviation of σ = 0.84 (+0.29). The ensemble achieved 63.88 % test accuracy (-0.66). -Even more extreme, if both convolutional layers are removed from the16 × 16 feature map -scale, the mean test accuracy drops to61 .21 % (-2.17) with a standard deviation ofσ = 0.51 -(-0.04). The ensemble achieves a test accuracy of63.07 % (-1.63). Thus it is very important +Even more extreme, if both convolutional layers are removed from the 16 × 16 feature map +scale, the mean test accuracy drops to 61.21 % (-2.17) with a standard deviation of σ = 0.51 +(-0.04). The ensemble achieves a test accuracy of 63.07 % (-1.63). Thus it is very important to have at least one convolutional layer at this feature map scale. 5.8. Batch Normalization In [CUH15], the authors write that Batch Normalization does not improve ELU networks. @@ -2185,7 +2187,7 @@ Hence the effect of removing Batch Normalization from the baseline is investigat 5. Experimental Evaluation experiment. -As before, 10 models are trained on CIFAR-100. The training setup and the modelm +As before, 10 models are trained on CIFAR-100. The training setup and the model m no-bn are identical to the baseline model m, except that in m no-bn the Batch Normalization layers @@ -2194,14 +2196,14 @@ One notable difference is the training time: While m needs 21 ms per epoch in av a GTX 980, m no-bn only needs 21 ms per epoch. The number of epochs used for training, however, also increased noticeably from 149 epochs to 178 epochs in average. The standard -deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs form -no-bn . +deviation of trained epochs is 17.3 epochs for the baseline model and 23.4 epochs for m +no-bn. The mean accuracy of m -no-bn is 62. 86 % and hence 0.52 percentage points worse. The +no-bn is 62.86 % and hence 0.52 percentage points worse. The standard deviation between models increased from 0.55 to 0.61. This is likely a result of the early stopping policy and the differences in training epochs. This can potentially be fixed by retraining the models which stopped earlier than the model which was trained for the -biggest amount of epochs. The ensemble test accuracy is63.88 % and hence 0.82 percentage +biggest amount of epochs. The ensemble test accuracy is 63.88 % and hence 0.82 percentage points worse than the baseline. The filter weight range and distribution is approximately the same as Figure 5.6 and Figure 5.2, but the distribution of bias weights changed noticeably: While the bias weights of @@ -2224,56 +2226,56 @@ Batch Normalization. 5.9. Batch size 5.9. Batch size The mini-batch size m ∈ N - ≥1 influences -• Epochs until convergence : The smaller m, the more often the model is updated +≥1 influences +• Epochs until convergence: The smaller m, the more often the model is updated in one epoch. Those updates, however, are based on fewer samples of the dataset. Hence the gradients of different mini-batches can noticeably differ. In the literature, -this is referred to as gradient noise [KMN + - 16]. +this is referred to as gradient noise [KMN+ +16]. • Training time per epoch - : The smaller the batch size, the higher the training time +: The smaller the batch size, the higher the training time per epoch as the hardware is not optimally utilized. -• Resulting model quality : The choice of the hyperparameter m influences the +• Resulting model quality: The choice of the hyperparameter m influences the accuracy of the classifier when training is finished. [KMN+ - 16] supports the view that +16] supports the view that smaller m result in less sharp minima. Hence smaller m lead to better generalization. Empiric evaluation results can be found in Table 5.9. Those results confirm the claim -of [KMN + - 16] that lower batch sizes generalize better. +of [KMN+ +16] that lower batch sizes generalize better. m Training Epochs Mean total Single model Ensemble time training time Accuracy std Accuracy 8 118 s -epoch 81 – 153 14 131 s 61 .93 % σ = 1.03 65.68 % +epoch 81 – 153 14 131 s 61.93 % σ = 1.03 65.68 % 16 62 s -epoch 103 – 173 8349 s 64 . 16 % σ = 0.81 66. 98 % +epoch 103 – 173 8349 s 64.16 % σ = 0.81 66.98 % 32 35 s -epoch 119 – 179 5171 s 64 .11 % σ = 0.75 65.89 % +epoch 119 – 179 5171 s 64.11 % σ = 0.75 65.89 % 64 25 s -epoch 133 – 195 2892 s 63. 38 % σ = 0. 55 64.70 % +epoch 133 – 195 2892 s 63.38 % σ = 0.55 64.70 % 128 18 s -epoch 145 – 239 3126 s 62 .23 % σ = 0.73 63.55 % +epoch 145 – 239 3126 s 62.23 % σ = 0.73 63.55 % Table 5.9.: Training time per epoch and single model test set accuracy (mean and standard deviation) of baseline models trained with different mini-batch sizes m on GTX 970 GPUs on CIFAR-100. 5.10. Bias Figure 5.3 suggests that the bias is not important for the layers 11, 13 and 15. Hence a model m -no-bias is created which is identical to the baseline modelm, except that the bias of +no-bias is created which is identical to the baseline model m, except that the bias of layers 11, 13 and 15 is removed. The mean test accuracy of 10 trained m no-bias is 63.74 % which is an improvement of -0.36 percentage points over the baseline. The ensemble achieves a test accuracy of65.13 % +0.36 percentage points over the baseline. The ensemble achieves a test accuracy of 65.13 % which is 0.43 percentage points better than the baseline. Hence the bias can safely be removed. Removing the biases did not have a noticeable effect on the filter weight range, the filter -weight distribution or the distribution of the remaining biases. Also, theγ and β parameters +weight distribution or the distribution of the remaining biases. Also, the γ and β parameters of the Batch Normalization layers did not noticeably change. 5. Experimental Evaluation 5.11. Learned Color Space Transformation In [MSM16] it is described that placing one convolutional layer with 10 filters of size 1 × 1 -directly after the input and then another convolutional layer with 3 filters of size1 × 1 acts +directly after the input and then another convolutional layer with 3 filters of size 1 × 1 acts as a learned transformation in another color space and boosts the accuracy. This approach was evaluated on CIFAR-100 by adding a convolutional layer with ELU activation and 10 filters followed by another convolutional layer with ELU activation and @@ -2286,21 +2288,21 @@ The inference time for 1 image and for 128 images did not change compared to the The training time per epoch increased from 26 s to 30 s on the GTX 970. Hence it is not advisable to use the learned color space transformation. 5.12. Pooling -An alternative to max pooling with stride 2 with a2 × 2 kernel is using a 3 × 3 kernel with +An alternative to max pooling with stride 2 with a 2 × 2 kernel is using a 3 × 3 kernel with stride 2. This approach was evaluated on CIFAR-100 by replacing all max pooling layers with the -3 × 3 kernel max pooling (and SAME padding). The mean accuracy of 10 models was63. 32 % -(−0.06) and the standard deviation was 0.57 (+0. 02). The ensemble achieved 65.15 % test -accuracy ( +0 .45). -The training time per epoch decreased from20.5 s-21 .1 s to 18. 6 s (mean of 10 training runs) +3 × 3 kernel max pooling (and SAME padding). The mean accuracy of 10 models was 63.32 % +(−0.06) and the standard deviation was 0.57 (+0.02). The ensemble achieved 65.15 % test +accuracy (+0.45). +The training time per epoch decreased from 20.5 s-21.1 s to 18.6 s (mean of 10 training runs) on the Nvidia GTX 970. The time for inference increased from 25 ms to 26 ms for a batch of 128 images. 5.13. Activation Functions Nonlinear, differentiable activation functions are important for neural networks to allow them to learn nonlinear decision boundaries. One of the simplest and most widely used activation -functions for CNNs is ReLU [ KSH12], but others such as ELU [ CUH15], parametrized -rectified linear unit (PReLU) [ HZRS15b], softplus [ ZYL+ - 15] and softsign [ BDLB09 ] have +functions for CNNs is ReLU [KSH12], but others such as ELU [CUH15], parametrized +rectified linear unit (PReLU) [HZRS15b], softplus [ZYL+ +15] and softsign [BDLB09] have been proposed. The baseline uses ELU. 5.13. Activation Functions @@ -2308,8 +2310,8 @@ Activation functions differ in the range of values and the derivative. The defin other comparisons of eleven activation functions are given in Table B.3. Theoretical explanations why one activation function is preferable to another in some scenarios are the following: -• Vanishing Gradient : Activation functions like tanh and the logistic function saturate - outside of the interval [ −5 , 5] . This means weight updates are very small for +• Vanishing Gradient: Activation functions like tanh and the logistic function saturate + outside of the interval [−5, 5]. This means weight updates are very small for preceding neurons, which is especially a problem for very deep or recurrent networks as described in [BSF94]. Even if the neurons learn eventually, learning is slower [KSH12]. • Dying ReLU: The dying ReLU problem is similar to the vanishing gradient problem. @@ -2317,7 +2319,7 @@ The gradient of the ReLU function is 0 for all non-positive values. This means i elements of the training set lead to a negative input for one neuron at any point in the training process, this neuron does not get any update and hence does not participate in the training process. This problem is addressed in [MHN13]. -• Mean unit activation: Some publications like [ CUH15, IS15] claim that mean +• Mean unit activation: Some publications like [CUH15, IS15] claim that mean unit activations close to 0 are desirable. They claim that this speeds up learning by reducing the bias shift effect. The speedup of learning is supported by many experiments. Hence the possibility of negative activations is desirable. @@ -2332,8 +2334,8 @@ logistic function performs so bad is that it cannot produce negative outputs. He logistic− function was developed: logistic− - ( x) = 1 -1 + e− x − 0 .5 +(x) = 1 +1 + e−x − 0.5 The logistic− function has the same derivative as the logistic function and hence still suffers from the vanishing gradient problem. The network with the logistic− @@ -2342,7 +2344,7 @@ accuracy which is 11.30 % better than the network with the logistic function, bu 5.54 % worse than the ELU. Similarly, ReLU was adjusted to have a negative output: ReLU− - (x ) = max(−1 , x) = ReLU (x + 1) − 1 +(x) = max(−1, x) = ReLU(x + 1) − 1 The results of ReLU− are much worse on the training set, but perform similar on the test @@ -2350,11 +2352,11 @@ The results of ReLU− set. The result indicates that the possibility of hard zero and thus a sparse representation is either not important or similar important as the possibility to produce negative outputs. This contradicts [GBB11, SMGS14]. -A key difference between the logistic − +A key difference between the logistic− function and ELU is that ELU does neither suffers from the vanishing gradient problem nor is its range of values bound. For this reason, the S2ReLU activation function, defined as -S2ReLU( x ) = ReLU ( x +S2ReLU(x) = ReLU ( x 2 + 1) − ReLU (− x 2 + 1) =   @@ -2369,8 +2371,8 @@ S2ReLU( x ) = ReLU ( x x if − 2 ≤ x ≤ 2 x 2 + 1 if x > −2 -This function is similar to SReLUs as introduced in [JXF + - 16]. The difference is that S2ReLU +This function is similar to SReLUs as introduced in [JXF+ +16]. The difference is that S2ReLU does not introduce learnable parameters. The S2ReLU was designed to be symmetric, be the identity close to zero and have a smaller absolute value than the identity farther away. It is easy to compute and easy to implement. @@ -2380,43 +2382,42 @@ dataset. Results for MNIST can be found in Table 5.13 and for HASYv2 in Table A. both datasets, the logistic function has a much shorter training time and a noticeably lower test accuracy. Function Vanishing Gradient Negative Activation possible Bound activation -Identity No Yes No +Identity No Yes No Logistic Yes No Yes Logistic− Yes Yes Yes -Softmax Yes Yes Yes -tanh Yes Yes Yes -Softsign - Yes Yes Yes -ReLU Yes 1 +Softmax Yes Yes Yes +tanh Yes Yes Yes +Softsign Yes Yes Yes +ReLU Yes1 No Half-sided -Softplus No No Half-sided -S2ReLU No Yes No +Softplus No No Half-sided +S2ReLU No Yes No LReLU/PReLU No Yes No -ELU No Yes No +ELU No Yes No Table 5.10.: Properties of activation functions. 1 - The dying ReLU problem is similar to the vanishing gradient problem. +The dying ReLU problem is similar to the vanishing gradient problem. 5.13. Activation Functions Function Single model Ensemble of 10 Training set Test set Training set Test set -Identity 66.25 % σ = 0 . 77 56 .74 % σ = 0. 51 68.77 % 58 .78 % -Logistic 51.87 % σ = 3.64 46 .54 % σ = 3. 22 61.19 % 54 .58 % +Identity 66.25 % σ = 0.77 56.74 % σ = 0.51 68.77 % 58.78 % +Logistic 51.87 % σ = 3.64 46.54 % σ = 3.22 61.19 % 54.58 % Logistic− - 66.49 % σ = 1.99 57 .84 % σ = 1. 15 69.04 % 60 .10 % -Softmax 75.22 % σ = 2.41 59 .49 % σ = 1. 25 78.87 % 63 .06 % -Tanh 67. 27 % σ = 2.38 55 .70 % σ = 1. 44 70.21 % 58 .10 % -Softsign 66. 43 % σ = 1.74 55 .75 % σ = 0. 93 69.78 % 58 .40 % -ReLU 78. 62 % σ = 2.15 62 .18 % σ = 0. 99 81.81 % 64 .57 % + 66.49 % σ = 1.99 57.84 % σ = 1.15 69.04 % 60.10 % +Softmax 75.22 % σ = 2.41 59.49 % σ = 1.25 78.87 % 63.06 % +Tanh 67.27 % σ = 2.38 55.70 % σ = 1.44 70.21 % 58.10 % +Softsign 66.43 % σ = 1.74 55.75 % σ = 0.93 69.78 % 58.40 % +ReLU 78.62 % σ = 2.15 62.18 % σ = 0.99 81.81 % 64.57 % ReLU− - 76. 01 % σ = 2.31 62 .87 % σ = 1. 08 78.18 % 64 .81 % -Softplus 66. 75 % σ = 2.45 56 .68 % σ = 1. 32 71.27 % 60 .26 % -S2ReLU 63. 32 % σ = 1.69 56 .99 % σ = 1. 14 65.80 % 59 .20 % -LReLU 74. 92 % σ = 2.49 61 .86 % σ = 1. 23 77.67 % 64 .01 % -PReLU 80 .01 % σ = 2.03 62 .16 % σ = 0. 73 83. 50 % 64. 79 % -ELU 76. 64 % σ = 1.48 63. 38 % σ = 0. 55 78.30 % 64 .70 % + 76.01 % σ = 2.31 62.87 % σ = 1.08 78.18 % 64.81 % +Softplus 66.75 % σ = 2.45 56.68 % σ = 1.32 71.27 % 60.26 % +S2ReLU 63.32 % σ = 1.69 56.99 % σ = 1.14 65.80 % 59.20 % +LReLU 74.92 % σ = 2.49 61.86 % σ = 1.23 77.67 % 64.01 % +PReLU 80.01 % σ = 2.03 62.16 % σ = 0.73 83.50 % 64.79 % +ELU 76.64 % σ = 1.48 63.38 % σ = 0.55 78.30 % 64.70 % Table 5.11.: Training and test accuracy of adjusted baseline models trained with different activation functions on CIFAR-100. For LReLU, α = 0.3 was chosen. Function Inference per Training @@ -2455,14 +2456,14 @@ of Keras 2.0.4 or Tensorflow 1.1.0. 5. Experimental Evaluation Function Single model Ensemble Epochs Accuracy std Accuracy Range Mean -Identity 99.45 % σ = 0. 09 99.63 % 55 – 77 62.2 -Logistic 97.27 % σ = 2. 10 99.48 % 37 – 76 54.5 -Softmax 99.60 % σ = 0. 03 99.63 % 44 – 73 55.6 +Identity 99.45 % σ = 0.09 99.63 % 55 – 77 62.2 +Logistic 97.27 % σ = 2.10 99.48 % 37 – 76 54.5 +Softmax 99.60 % σ = 0.03 99.63 % 44 – 73 55.6 Tanh 99.40 % σ = 0.09 99.57 % 56 – 80 67.6 Softsign 99.40 % σ = 0.08 99.57 % 72 – 101 84.0 -ReLU 99. 62 % σ = 0.04 99. 73 % 51 – 94 71.7 +ReLU 99.62 % σ = 0.04 99.73 % 51 – 94 71.7 Softplus 99.52 % σ = 0.05 99.62 % 62 – 70 68.9 -PReLU 99.57 % σ = 0.07 99. 73 % 44 – 89 71.2 +PReLU 99.57 % σ = 0.07 99.73 % 44 – 89 71.2 ELU 99.53 % σ = 0.06 99.58 % 45 – 111 72.5 Table 5.13.: Test accuracy of adjusted baseline models trained with different activation functions on MNIST. @@ -2470,45 +2471,45 @@ on MNIST. Ensembles consisting of n models trained by the same procedure on the same data but initialized with different weights and trained with a different order of the training data perform consistently better than single models. One drawback of ensembles in applications -such as self-driving cars is that they increase the computation by a factor of n . One idea +such as self-driving cars is that they increase the computation by a factor of n. One idea why they improve the test accuracy is by reducing the variance. The idea of label smoothing is to use the ensemble prediction of the training data as labels for another classifier. For every element x of the training set, the one-hot encoded target -t (x ) is smoothed by the ensemble prediction y - E (x ) +t(x) is smoothed by the ensemble prediction y +E (x) t - (x ) = α · t (x ) + (1 − α )y - E (x ) +(x) = α · t(x) + (1 − α)y +E (x) where α ∈ [0, 1] is the smoothing factor. There are three reasons why label smoothing could be beneficial: -• Training speed : The ensemble prediction contains more information about the +• Training speed: The ensemble prediction contains more information about the image than binary class decisions. Classifiers in computer vision predict how similar the input looks to other input of the classes they are trained on. By smoothing the labels, the information that one image could also belong to another class is passed to the optimizer. In early stages of the optimization this could lead to a lower loss on the non-smoothed validation set. -• Higher accuracy : Using smoothed labels for the optimization could lead to a higher +• Higher accuracy: Using smoothed labels for the optimization could lead to a higher accuracy of the base-classifier due to a smoothed error surface. It might be less likely 5.14. Label smoothing that the classifier gets into bad local minima. -• Label noise : Depending on the way how the labels are obtained, it might not always +• Label noise: Depending on the way how the labels are obtained, it might not always be clear which label is the correct one. Also, labeling errors can be present in training datasets. Those errors severely harm the training. By smoothing the labels errors could be relaxed. 10 models m - smooth are trained with the α = 0 . 5 smoothed labels from the prediction +smooth are trained with the α = 0.5 smoothed labels from the prediction of an ensemble of 10 baseline models. The mean accuracy of the models trained on the -smoothed training set labels was63.61 % (+0. 23 %) and the standard deviation wasσ = 0.72 -(+0 .17 %). The ensemble of 10 m - smooth models achieved 64 .79 % accuracy (+0 .09 %). Hence +smoothed training set labels was 63.61 % (+0.23 %) and the standard deviation was σ = 0.72 +(+0.17 %). The ensemble of 10 m +smooth models achieved 64.79 % accuracy (+0.09 %). Hence the effect of this kind of label smoothing on the final accuracy is questionable. The training speed didn’t noticeably change either: The number of trained epochs ranged from 144 to 205, the mean number of epochs was 177. The baseline training ranged from 146 to 232 epochs with a mean of 174 epochs. After 10, 30 and 80 epochs both training methods accuracy differed by less than one percentage point. Hence it is unlikely that label smoothing has a positive effect on the training speed. -Hinton et al. called this method distil lation in [ HVD15]. Hinton et al. used smooth and +Hinton et al. called this method distil lation in [HVD15]. Hinton et al. used smooth and hard labels for training, this work only used smoothed labels. 5. Experimental Evaluation @@ -2545,50 +2546,51 @@ Dropout 0.5 0 0 512 @ 1 × 1 Dropout 0.5 0 0 512 @ 1 × 1 15 Convolution k @ 1 × 1 × 512 / 1 512 · k 512 · k k @ 1 × 1 Global avg Pooling 1 × 1 0 k k @ 1 × 1 -16 BN + Softmax 2 k 7k k @ 1 × 1 +16 BN + Softmax 2k 7k k @ 1 × 1 - 514 k -+947 654 520 k + 514k ++947 654 520k +87 870 996 179 200+2k Table 5.14.: Optimized architecture with 3 input channels of size 32 × 32. All convolutional layers use SAME padding, except for layer 11 which used VALID padding in order to decrease -the feature map size to 1 × 1. If the input feature map is bigger than32 × 32, for each +the feature map size to 1 × 1. If the input feature map is bigger than 32 × 32, for each power of two there are two Convolution + BN + ELU blocks and one Max pooling block added. This is the framed part in the table. -5.15. Optimized Classifier32 × 32Input -C 69@3 × 3 / 1 +5.15. Optimized Classifier +32 × 32Input +C 69@3 × 3/1 BN + ELU -C 69@3 × 3 / 1 -BN + ELU 16 × 16max pooling 3 × 3 /2 -C 64@3 × 3 /1 +C 69@3 × 3/1 +BN + ELU 16 × 16max pooling 3 × 3/2 +C 64@3 × 3/1 BN + ELU -C 64@3 × 3 /1 -BN + ELU 8 × 8max pooling 3 × 3 /2 -C 64@3 × 3 /1 -BN + ELU 4 × 4max pooling 3 × 3 /2 -C* 512@4 × 4 /1 (V) +C 64@3 × 3/1 +BN + ELU 8 × 8max pooling 3 × 3/2 +C 64@3 × 3/1 +BN + ELU 4 × 4max pooling 3 × 3/2 +C* 512@4 × 4/1 (V) BN + ELU -Dropout, p = 0. 5 1 × 1C* 512@1 × 1 /1 +Dropout, p = 0.5 1 × 1C* 512@1 × 1/1 BN + ELU -Dropout, p = 0. 5 -C* k @1 × 1 / 1 +Dropout, p = 0.5 +C* k@1 × 1/1 Global AVG pooling BN + Softmax -Figure 5.16.: Architecture of the optimized model. C 32@3 × 3 / 1 is a convolutional layer with +Figure 5.16.: Architecture of the optimized model. C 32@3 × 3/1 is a convolutional layer with 32 filters of kernel size 3 × 3 with stride 1. The * indicates that no bias is used. Dataset Single Model Accuracy Ensemble of 10 Training Set Test Set Training Set Test Set -Asirra 95. 83 % σ = 4.70 90.75 % σ = 4. 73 98 . 78 % 93.09 % -CIFAR-10 94. 58 % σ = 0.70 87.92 % σ = 0. 46 96 . 47 % 89.86 % -CIFAR-100 77. 96 % σ = 2.18 64.42 % σ = 0. 73 81 . 44 % 67.03 % -GTSRB 100. 00 % σ = 0.00 99.28 % σ = 0. 10 100 . 00 % 99.51 % -HASYv2 88. 79 % σ = 0.45 85.36 % σ = 0. 15 89 . 36 % 85.92 % -MNIST 99. 88 % σ = 0.10 99.48 % σ = 0. 13 99 . 99 % 99.67 % -STL-10 95. 43 % σ = 3.57 75.09 % σ = 2. 39 98 . 54 % 78.66 % -SVHN 99. 08 % σ = 0.07 96.37 % σ = 0. 12 99 . 50 % 97.47 % +Asirra 95.83 % σ = 4.70 90.75 % σ = 4.73 98.78 % 93.09 % +CIFAR-10 94.58 % σ = 0.70 87.92 % σ = 0.46 96.47 % 89.86 % +CIFAR-100 77.96 % σ = 2.18 64.42 % σ = 0.73 81.44 % 67.03 % +GTSRB 100.00 % σ = 0.00 99.28 % σ = 0.10 100.00 % 99.51 % +HASYv2 88.79 % σ = 0.45 85.36 % σ = 0.15 89.36 % 85.92 % +MNIST 99.88 % σ = 0.10 99.48 % σ = 0.13 99.99 % 99.67 % +STL-10 95.43 % σ = 3.57 75.09 % σ = 2.39 98.54 % 78.66 % +SVHN 99.08 % σ = 0.07 96.37 % σ = 0.12 99.50 % 97.47 % Table 5.15.: Optimized model accuracy on eight datasets. The single model actuary is the 10 models -used in the ensemble. The empirical standard deviationσ of the accuracy is also given. +used in the ensemble. The empirical standard deviation σ of the accuracy is also given. CIFAR-10, CIFAR-100 and STL-10 models use test-time transformations. None of the models uses unlabeled data or data from other datasets. For MNIST, GTSRB, SVHN and HASY, no test time transformations are used. @@ -2604,16 +2606,16 @@ Optimized Default GTX 1070 2 ms 24 ms 21 s Optimized Default Titan Black 4 ms 46 ms 43 s Table 5.16.: Speed comparison of the optimized model on CIFAR-10. The baseline model is evaluated on six Nvidia GPUs and one CPU. The weights for DenseNet-40-12 are taken -from [ Ma j17]. Weights the baseline model can be found at [ Tho17b]. The optimized +from [Ma j17]. Weights the baseline model can be found at [Tho17b]. The optimized Tensorflow build makes use of SSE4.X, AVX, AVX2 and FMA instructions. 5. Experimental Evaluation 5.16. Early Stopping vs More Data -A separate validation set is necessary for two reasons:(1)Early stopping and(2)preventing +A separate validation set is necessary for two reasons: (1) Early stopping and (2) preventing overfitting due to many experiments. To prevent overfitting, a different dataset can be used. For example, all decisions about hyperparameters in this thesis are based on CIFAR-100, but the network is finally trained and evaluated with the same hyperparameters on all -datasets. 2 +datasets.2 The validation set can hence be removed if early stopping is removed. Instead, the validation data is used in a first run to determine the number of epochs necessary for training. In a second training run the validation data is added to the training set. The @@ -2640,27 +2642,27 @@ when the training loss was used as the early stopping criterion. 5.17. Regularization Stronger regularization might even improve the results when using the training loss as an early stopping criterion. - 2 regularization with a weighting factor of λ = 0 .0001 is used in +2 regularization with a weighting factor of λ = 0.0001 is used in all other experiments. While the accuracy as shown in Table 5.19 does not show a clear pattern, the number of epochs increases with lower model regularization (see Table 5.20). 2 - Except data augmentation and test time transformations. +Except data augmentation and test time transformations. 3 - Only 1 model is trained due to the long training time of 581 epochs and 12 hours for this model. +Only 1 model is trained due to the long training time of 581 epochs and 12 hours for this model. 4 - Only 3 models are in this ensemble due to the long training time of more than 8 hours per model. +Only 3 models are in this ensemble due to the long training time of more than 8 hours per model. 5.17. Regularization Dataset Early Stopping Fixed epochs val. acc train loss -Asirra 93. 09 % 96. 01 %3 +Asirra 93.09 % 96.01 %3 96.01 % -CIFAR-10 89. 86 % 91. 75 % 88 .88 % -CIFAR-100 67. 03 % 71. 01 % 69 .08 % -HASYv2 85. 92 % 82. 89 %4 +CIFAR-10 89.86 % 91.75 % 88.88 % +CIFAR-100 67.03 % 71.01 % 69.08 % +HASYv2 85.92 % 82.89 %4 85.05 % -MNIST 99. 67 % 99. 64 % 99 .57 % -STL-10 78. 66 % 83. 25 % 78 .64 % +MNIST 99.67 % 99.64 % 99.57 % +STL-10 78.66 % 83.25 % 78.64 % Table 5.18.: Comparisons of trained optimized models with early stopping on the validation accuracy compared training setups without a validation set and thus more training data. The second column uses the training loss as a stopping criterion, the third column uses a @@ -2668,15 +2670,15 @@ fixed number of epochs which is equal to the mean number of training epochs of t models with early stopping on the validation set accuracy. λ Single Model Accuracy Ensemble of 10 Training Set Test Set Training Set Test Set -λ = 0.01 73. 83 % σ = 1.78 58.94 % σ = 1.33 87 .78 % 69. 98 % -λ = 0.001 82.86 % σ = 0. 89 63.03 % σ = 0.67 91 .86 % 71. 02 % -λ = 0.0001 77.96 % σ = 2. 18 64.42 % σ = 0.73 81 .44 % 67. 03 % +λ = 0.01 73.83 % σ = 1.78 58.94 % σ = 1.33 87.78 % 69.98 % +λ = 0.001 82.86 % σ = 0.89 63.03 % σ = 0.67 91.86 % 71.02 % +λ = 0.0001 77.96 % σ = 2.18 64.42 % σ = 0.73 81.44 % 67.03 % Table 5.19.: Different choices of 2 model regularization applied to the optimized model. λ min max mean std -λ = 0. 01 457 503 404.6 37.2 -λ = 0. 001 516 649 588.4 41.6 -λ = 0. 0001 579 833 696.1 79.1 +λ = 0.01 457 503 404.6 37.2 +λ = 0.001 516 649 588.4 41.6 +λ = 0.0001 579 833 696.1 79.1 Table 5.20.: Training time in epochs of models with early stopping on training loss by different choices of 2 model regularization applied to the optimized model. @@ -2699,8 +2701,7 @@ for several hundred classes. – More classes are always easier to distinguish if each new class comes with more data. One reason why this might be the case is that distinguishing the ob ject from background has similar properties even for different classes. -• - Label smoothing had only a minor effect on the accuracy and no effect on the training +• Label smoothing had only a minor effect on the accuracy and no effect on the training time when a single base classifier was used to train with the smoothed labels by an ensemble of base classifiers. A baseline model was defined and evaluated on eight publicly available datasets. The @@ -2726,20 +2727,20 @@ confirmed. The batch size, however, can also be too low. the output layer can be removed. This was experimentally confirmed. • It could not be confirmed that learned color space transformation, as described in [MSM16], improves the network. Neither with ELU nor with leaky rectified linear -unit (LReLU) and α = 0. 3. +unit (LReLU) and α = 0.3. • It could be confirmed that ELU networks gives better results than any other activation function on CIFAR-100. For the character datasets MNIST and HASYv2, however, ReLU, LReLU, PReLU, Softplus and ELU all performed similar. • Changing the activation functions to the identity had very little impact on the HASYv2 and MNIST classifiers. Note that those networks are still able to learn nonlinear decision boundaries due to max-pooling and SAME padding. For CIFAR-100, however, -the accuracy drops by 6 .64 % when ELU is replaced by the identity. +the accuracy drops by 6.64 % when ELU is replaced by the identity. Based on the results of those experiments, an optimized classifier was developed and evaluated on all eight datasets. -The state of the art of STL-10 was improved from 74.80 % [ ZMGL15] to 78.66 % without +The state of the art of STL-10 was improved from 74.80 % [ZMGL15] to 78.66 % without using the unlabeled part of the dataset. The state of the art of HASYv2 was improved -from 81.00 % [ Tho17a] to 85 .92 %, for GTSRB the state of the art was improved from -99. 46 % [ SL11] to 99.51 %, for Asirra it was improved from 82 .7 % [ Gol08] to 93 .09 %. 1 +from 81.00 % [Tho17a] to 85.92 %, for GTSRB the state of the art was improved from +99.46 % [SL11] to 99.51 %, for Asirra it was improved from 82.7 % [Gol08] to 93.09 %.1 This was mainly achieved by the combination of ELU, Dropout, ensembles, training data augmentation and test-time transformations. The removal of the bias of layers close to the output and re-usage of those parameters in layers close to the input as well as using 3 × 3 @@ -2750,7 +2751,7 @@ is not a problem. But at which subsampling-level does having more layers have th biggest effect? Can this question be answered before a deeper network is trained? • Is label smoothing helpful for noisy labels? 1 - The baseline is better than the optimized model on Asirra and on HASYv2. +The baseline is better than the optimized model on Asirra and on HASYv2. • How does the choice of activation functions influence residual architectures? Could the results be the same for different activation functions in architectures with hundreds @@ -2787,33 +2788,33 @@ trained on CIFAR-100. Figure A.2.: The distribution of bias weights of a model without batch normalization trained on CIFAR-100. Algorithm 1 Simulated Annealing for minimizing Equation (4.1). -Require: C ∈ N n×n - , steps ∈ N, T ∈ R+ - , c ∈ (0, 1) -procedure SimulatedAnnealing (C , steps, T , c) -bestScore ← accuracy (C ) +Require: C ∈ Nn×n +, steps ∈ N, T ∈ R+ +, c ∈ (0, 1) +procedure SimulatedAnnealing(C , steps, T , c) +bestScore ← accuracy(C ) bestC ← C for i = 0; i < steps; i ← i + 1 do p ← randomFloat(0, 1) -if p < 0 .5 then Swap rows -i ← randomInteger (1, . . . , n ) -j ← randomInteger (1, . . . , n ) \ { i } -p ← randomUniform (0, 1) +if p < 0.5 then Swap rows +i ← randomInteger(1, . . . , n) +j ← randomInteger(1, . . . , n) \ { i } +p ← randomUniform(0, 1) C - ← swap (C, i, j ) -s ← accuracy (C - ) -if p < exp( s −bestScore + ← swap(C, i, j ) +s ← accuracy(C +) +if p < exp( s−bestScore T ) then C ← C if s > bestScore then bestScore ← s bestC ← C T ← T · c -else Move Block -s ← randomInteger (1, . . . , n ) Block start -e ← randomInteger ( s, . . . , n ) Block end -i ← randomInteger (1, . . . , n − (e − s)) Block insert position +else Move Block +s ← randomInteger(1, . . . , n) Block start +e ← randomInteger(s, . . . , n) Block end +i ← randomInteger(1, . . . , n − (e − s)) Block insert position Move Block (s, . . . , e) to position i return bestM @@ -2821,16 +2822,16 @@ Figure A.3.: Maximum weight updates between epochs by layer. The model is the ba but with layer 5 reduced to 3 filters. Function Single model Ensemble of 10 Epochs Training set Test set Train Test Range Mean -Identity 87.92 % σ = 0.40 84 . 69 % σ = 0.08 88 .59 % 85 . 43 % 92 – 140 114.5 -Logistic 81.46 % σ = 5.08 79 . 67 % σ = 4.85 86 .38 % 84 . 60 % 58 – 91 77.3 -Softmax 88.19 % σ = 0.31 84 . 70 % σ = 0.15 88 .69 % 85 . 43 % 124 – 171 145.8 -Tanh 88.41 % σ = 0.36 84 . 46 % σ = 0.27 89 .24 % 85 . 45 % 89 – 123 108.7 -Softsign 88.00 % σ = 0.47 84 . 46 % σ = 0.23 88 .77 % 85 . 33 % 77 – 119 104.1 -ReLU 88.93 % σ = 0.46 85 . 35 % σ = 0.21 89 .35 % 85 . 95 % 96 – 132 102.8 -Softplus 88.42 % σ = 0. 29 85. 16 % σ = 0.15 88 .90 % 85 . 73 % 108 – 143 121.0 -LReLU 88.61 % σ = 0.41 85 . 21 % σ = 0 . 05 89.07 % 85 . 83 % 87 – 117 104.5 -PReLU 89. 62 % σ = 0.41 85 .35 % σ = 0.17 90. 10 % 86. 01 % 85 – 111 100.5 -ELU 89.49 % σ = 0.42 85 .35 % σ = 0.10 89 .94 % 86 . 03 % 73 – 113 92.4 +Identity 87.92 % σ = 0.40 84.69 % σ = 0.08 88.59 % 85.43 % 92 – 140 114.5 +Logistic 81.46 % σ = 5.08 79.67 % σ = 4.85 86.38 % 84.60 % 58 – 91 77.3 +Softmax 88.19 % σ = 0.31 84.70 % σ = 0.15 88.69 % 85.43 % 124 – 171 145.8 +Tanh 88.41 % σ = 0.36 84.46 % σ = 0.27 89.24 % 85.45 % 89 – 123 108.7 +Softsign 88.00 % σ = 0.47 84.46 % σ = 0.23 88.77 % 85.33 % 77 – 119 104.1 +ReLU 88.93 % σ = 0.46 85.35 % σ = 0.21 89.35 % 85.95 % 96 – 132 102.8 +Softplus 88.42 % σ = 0.29 85.16 % σ = 0.15 88.90 % 85.73 % 108 – 143 121.0 +LReLU 88.61 % σ = 0.41 85.21 % σ = 0.05 89.07 % 85.83 % 87 – 117 104.5 +PReLU 89.62 % σ = 0.41 85.35 % σ = 0.17 90.10 % 86.01 % 85 – 111 100.5 +ELU 89.49 % σ = 0.42 85.35 % σ = 0.10 89.94 % 86.03 % 73 – 113 92.4 Table A.2.: Test accuracy of adjusted baseline models trained with different activation functions on HASYv2. For LReLU, α = 0.3 was chosen. @@ -2838,22 +2839,22 @@ Figure A.4.: Sum of weight updates between epochs by layer. The model is the bas with layer 5 reduced to 3 filters. Function Single model Ensemble of 10 Epochs Training set Test set Train Test Range Mean -Identity 87 .49 % σ = 2. 50 69 .86 % σ = 1.41 89 .78 % 71 .90 % 51 – 65 53.4 -Logistic 45.32 % σ = 14.88 40 .85 % σ = 12.56 51 . 06 % 45 .49 % 38 – 93 74.6 -Softmax 87.90 % σ = 3. 58 67 .91 % σ = 2.32 91 . 51 % 70 .96 % 108 – 150 127.5 -Tanh 85.38 % σ = 4. 04 67 .65 % σ = 2.01 90 . 47 % 71 .29 % 48 – 92 65.2 -Softsign 88.57 % σ = 4. 00 69 .32 % σ = 1.68 93 . 04 % 72 .40 % 55 – 117 83.2 -ReLU 94.35 % σ = 3. 38 71 .01 % σ = 1.63 98 . 20 % 74 .85 % 52 – 98 75.5 -Softplus 83.03 % σ = 2.07 68 .28 % σ = 1.74 93 . 04 % 75 .99 % 56 – 89 68.9 -LReLU 93.83 % σ = 3.89 74 . 66 % σ = 2.11 97 . 56 % 78 .08 % 52 – 120 80.1 -PReLU 95.53 % σ = 1.92 71 . 69 % σ = 1.37 98 .17 % 74 .69 % 59 – 101 78.8 -ELU 95.42 % σ = 3.57 75 . 09 % σ = 2.39 98 .54 % 78 .66 % 66 – 72 67.2 +Identity 87.49 % σ = 2.50 69.86 % σ = 1.41 89.78 % 71.90 % 51 – 65 53.4 +Logistic 45.32 % σ = 14.88 40.85 % σ = 12.56 51.06 % 45.49 % 38 – 93 74.6 +Softmax 87.90 % σ = 3.58 67.91 % σ = 2.32 91.51 % 70.96 % 108 – 150 127.5 +Tanh 85.38 % σ = 4.04 67.65 % σ = 2.01 90.47 % 71.29 % 48 – 92 65.2 +Softsign 88.57 % σ = 4.00 69.32 % σ = 1.68 93.04 % 72.40 % 55 – 117 83.2 +ReLU 94.35 % σ = 3.38 71.01 % σ = 1.63 98.20 % 74.85 % 52 – 98 75.5 +Softplus 83.03 % σ = 2.07 68.28 % σ = 1.74 93.04 % 75.99 % 56 – 89 68.9 +LReLU 93.83 % σ = 3.89 74.66 % σ = 2.11 97.56 % 78.08 % 52 – 120 80.1 +PReLU 95.53 % σ = 1.92 71.69 % σ = 1.37 98.17 % 74.69 % 59 – 101 78.8 +ELU 95.42 % σ = 3.57 75.09 % σ = 2.39 98.54 % 78.66 % 66 – 72 67.2 Table A.3.: Test accuracy of adjusted baseline models trained with different activation functions on STL-10. For LReLU, α = 0.3 was chosen. B. Hyperparameters Hyperparameters are parameters of models which are not optimized automatically (e.g., by -gradient descent), but by methods like random search [ BB12], grid search [ LBOM98] or +gradient descent), but by methods like random search [BB12], grid search [LBOM98] or manual search. B.1. Preprocessing Preprocessing used to be of ma jor importance in machine learning. However, with the @@ -2871,8 +2872,7 @@ Other preprocessing methods are: • Mean subtraction • Standardization of pixel-values to [0, 1] by dividing through 255 (used by [HLW16]) • Dimensionality reduction -– - Principal component analysis (PCA): An unsupervised linear transformation +– Principal component analysis (PCA): An unsupervised linear transformation which can be learned in the first hidden layer. It is hence doubtful if PCA improves the network. – Linear discriminant analysis (LDA) @@ -2882,21 +2882,21 @@ B.2. Data augmentation Data augmentation techniques aim at making artificially more data from real data items by applying invariances. For computer vision, they include: Name Augmentation Factor Used by -Horizontal flip 2 [KSH12, WYS + - 15] -Vertical flip 2 [DWD15] 1 +Horizontal flip 2 [KSH12, WYS+ +15] +Vertical flip 2 [DWD15]1 Rotation ∼ 40 (δ = 20) [DSRB14] -Scaling ∼ 14 (δ ∈ [0.7 , 1 .4]) [DSRB14] +Scaling ∼ 14 (δ ∈ [0.7, 1.4]) [DSRB14] Crops 322 - = 1024 [KSH12, WYS + - 15] + = 1024 [KSH12, WYS+ +15] Shearing [Gra15] -GANs [BCW + - 17] -Brightness ∼ 20 (δ ∈ [0.5 , 1 .5]) [How13] -Hue 51 (δ = 0.1 ) [MRM15, DSRB14] +GANs [BCW+ +17] +Brightness ∼ 20 (δ ∈ [0.5, 1.5]) [How13] +Hue 51 (δ = 0.1) [MRM15, DSRB14] Saturation ∼ 20 (δ = 0.5) [DSRB14] -Contrast ∼ 20 (δ ∈ [0.5 , 1 .5]) [How13] +Contrast ∼ 20 (δ ∈ [0.5, 1.5]) [How13] Channel shift [KSH12] Table B.1.: Overview of data augmentation techniques. The augmentation factor is calculated for typical situations. For example, the augmentation factor for random crops is calculated @@ -2908,19 +2908,19 @@ Less common, but also reasonable are: • Adding noise • Elastic deformations • Color casting (used by [WYS+ - 15]) -• Vignetting (used by [WYS + - 15]) +15]) +• Vignetting (used by [WYS+ +15]) • Lens distortion (used by [WYS+ - 15]) +15]) 1 - Vertical flipping combined with 180◦ +Vertical flipping combined with 180◦ rotation is equivalent to horizontal flipping B.3. Initialization Weight initializations are usually chosen to be small and centered around zero. One way to characterize many initialization schemes is by -w ∼ α · U [−1 , 1] + β · N (0, 1) + γ with α, β , γ ≥ 0 +w ∼ α · U [−1, 1] + β · N (0, 1) + γ with α, β , γ ≥ 0 Table B.2 shows six commonly used weight initialization schemes. Several schemes use the same idea, that unit-variance is desired for each layer as the training converges faster [IS15]. Name α β γ Reference @@ -2928,23 +2928,23 @@ Constant α = 0 β = 0 γ ≥ 0 used by [ZF14] Xavier/Glorot uniform α = 6 n -in + n +in+n out β = 0 γ = 0 [GB10] Xavier/Glorot normal α = 0 β = 2 (n -in +n -out ) - 2 +in+n +out) +2 γ = 0 [GB10] He α = 0 β = 2 n in γ = 0 [HZRS15b] Orthogonal — — γ = 0 [SMG13] LSUV — — γ = 0 [MM15] -Table B.2.: Weight initialization schemes of the form w ∼ α · U [ −1 , 1] + β · N (0 , 1) + γ . +Table B.2.: Weight initialization schemes of the form w ∼ α · U [−1, 1] + β · N (0, 1) + γ . n - in , n +in, n out are the number of units in the previous layer and the next layer. Typically, biases are initialized with constant 0 and weights by one of the other schemes to prevent unit-coadaptation. However, dropout makes it possible to use constant initialization for @@ -2953,25 +2953,25 @@ LSUV and Orthogonal initialization cannot be described with this simple pattern. B.4. Objective function For classification tasks, the cross-entropy E -C E ( W ) = − -x ∈X K +C E (W ) = − +x∈X K -k =1 [ tx -k log(o x -k ) + (1 − t x +k=1 [tx +k log(ox +k ) + (1 − tx k ) log(1 − ox k )] is by far the most commonly used ob jective function (e.g., used by [ZF14]). In this equation, X is the set of training examples, K is the number of classes, tx k ∈ { 0, 1 } indicates if the -training example x is of class k , o x -k is the output of the classifier for the training examplex -and class k . -However, regularization terms weighted with a constant λ ∈ (0, +∞ ) are sometimes added: +training example x is of class k, ox +k is the output of the classifier for the training example x +and class k. +However, regularization terms weighted with a constant λ ∈ (0, +∞) are sometimes added: • LASSO: 1 (e.g., used in [HPTD15]) • Weight decay: - 2 (e.g., λ = 0.0005 as in [MSM16]) +2 (e.g., λ = 0.0005 as in [MSM16]) • Orthogonality regularization (|(W T · W − I )|, see [VTKP17]) @@ -2979,57 +2979,57 @@ B.5. Optimization Techniques Most relevant optimization techniques for CNNs are based on SGD, which updates the weights according to the rule w - ji ← w - ji + ∆ w - ji with ∆ w - ji = −η ∂ E +ji ← w +ji + ∆w +ji with ∆w +ji = −η ∂ E x ∂ w ji -where η ∈ (0, 1), typically 0. 01 (e.g., [MSM16]), is called the learning rate . +where η ∈ (0, 1), typically 0.01 (e.g., [MSM16]), is called the learning rate. A slight variation of SGD is mini-batch gradient descent with the mini-batch B (typically -mini-batch sizes are | B | ∈ { 32, 64, 128, 256 , 512 }, e.g. [ ZF14]). Larger mini-batch sizes -lead to sharp minima and thus poor generalization [ KMN + - 16]. Smaller mini-batch sizes +mini-batch sizes are |B | ∈ { 32, 64, 128, 256, 512 }, e.g. [ZF14]). Larger mini-batch sizes +lead to sharp minima and thus poor generalization [KMN+ +16]. Smaller mini-batch sizes lead to longer training times due to computational overhead and to more training steps due to gradient noise. w - ji ← w - ji + ∆ w - ji with ∆ w - ji = −η ∂ E +ji ← w +ji + ∆w +ji with ∆w +ji = −η ∂ E B ∂ w ji Nine variations which adjust the learning rate during training are: • Momentum: - w (t+1) -ji ← w (t) -ji + ∆ w (t+1) -ji with ∆ w (t+1) + w(t+1) +ji ← w(t) +ji + ∆w(t+1) +ji with ∆w(t+1) ji = −η ∂ E B ∂ w -ji + α ∆w (t) +ji + α∆w(t) ji -with α ∈ [0, 1], typically 0 .9 (e.g., [ZF14, MSM16]) +with α ∈ [0, 1], typically 0.9 (e.g., [ZF14, MSM16]) • Adagrad [DHS11] • RProp and the mini-batch version RMSProp [TH12] • Adadelta [Zei12] -• Power Scheduling [ Xu11]: η ( t ) = η (0)(1 + a · t) − c - , where t ∈ N - 0 is the training step, +• Power Scheduling [Xu11]: η(t) = η(0)(1 + a · t)−c +, where t ∈ N +0 is the training step, a, c are constants. • Performance Scheduling [SHY+ - 13]: Measure the error on the cross validation set and +13]: Measure the error on the cross validation set and decrease the learning rate when the algorithms improvement is below a threshold. -• Exponential Decay Learning Rate [ SHY+ - 13]: η ( t) = η (0) · 10 − t +• Exponential Decay Learning Rate [SHY+ +13]: η(t) = η(0) · 10− t k where t ∈ N - 0 is the -training step, η (0) is the initial learning rate, k ∈ N - ≥1 is the number of training steps +0 is the +training step, η(0) is the initial learning rate, k ∈ N +≥1 is the number of training steps until the learning rate is decreased by 1 10 th. • Newbob Scheduling [new00]: Start with Performance Scheduling, then use Exponential @@ -3041,7 +3041,7 @@ Some of those are explained in [Rud16]. Other first-order gradient optimization methods are: • Quickprop [Fah88] • Nesterov Accellerated Momentum (NAG) [Nes83] -• Conjugate Gradient method [ Cha92]: Combines a line search for the step size with +• Conjugate Gradient method [Cha92]: Combines a line search for the step size with the gradients direction. Higher-order gradient methods like Newtons method or quasi-Newton methods like BFGS and L-BFGS need the inverse of the Hessian matrix which is intractable for today’s CNNs. @@ -3051,138 +3051,138 @@ However, there are alternatives which do not use gradient information: • Twiddle: A local hill-climbing algorithm explained by Sebastian Thrun and described on [Tho14b] There are also approaches which learn the optimization algorithm [ADG+ - 16, LM16]. +16, LM16]. B.6. Network Design CNNs have the following hyperparameters: • Depth: The number of layers -• Width : The number of filters per layer +• Width: The number of filters per layer • Layer and block connectivity graph • Layer and block hyperparameters: – Activation Functions as shown in Table B.3 – For more, see Sections 2.2 and 2.3. -Name Function ϕ(x ) Range of Values ϕ - ( x) Used by -Sign function †  +Name Function ϕ(x) Range of Values ϕ +(x) Used by +Sign function†   +1 if x ≥ 0 -−1 if x < 0 { − 1, 1 } 0 [KS02] +−1 if x < 0 { −1, 1 } 0 [KS02] Heaviside -step function †  +step function†   +1 if x > 0 0 if x < 0 { 0, 1 } 0 [MP43] Logistic function 1 -1+e−x [0, 1] e x -(e x - +1)2 [DJ99] -Tanh ex - − e−x +1+e−x [0, 1] ex +(ex ++1)2 [DJ99] +Tanh ex +−e−x ex - + e−x = tanh(x ) [ −1 , 1] sech2 - (x ) [LBBH98, Tho14a] -ReLU † - max(0, x) [0 , +∞)  ++e−x = tanh(x) [−1, 1] sech2 +(x) [LBBH98, Tho14a] +ReLU† + max(0, x) [0, +∞)   1 if x > 0 0 if x < 0 [KSH12] -LReLU † 2 -(PReLU) ϕ (x ) = max(αx, x ) (−∞, +∞)  +LReLU†2 +(PReLU) ϕ(x) = max(αx, x) (−∞, +∞)   1 if x > 0 α if x < 0 [MHN13, HZRS15b] -Softplus log(ex - + 1) (0 , +∞) e x -e x - +1 [DBB + - 01, GBB11] -ELU  +Softplus log(ex + + 1) (0, +∞) ex +ex ++1 [DBB+ +01, GBB11] +ELU   x if x > 0 -α ( ex +α(ex − 1) if x ≤ 0 (−∞, +∞)   1 if x > 0 αex otherwise [CUH15] Softmax‡ - o ( x ) + o(x) j = ex - j - - K -k =1 ex - k [0, 1]K - o (x ) - j · - K -k =1 e x - k - −e x - j - - K -k =1 e x +j + +K +k=1 ex +k [0, 1]K + o(x) +j · +K +k=1 ex +k + −ex +j + +K +k=1 ex k [KSH12, Tho14a] Maxout‡ - o ( x ) = max -x ∈x x (−∞, +∞)  + o(x) = max +x∈x x (−∞, +∞)   1 if x - i = max x -0 otherwise [GWFM + - 13] +i = max x +0 otherwise [GWFM+ +13] Table B.3.: Overview of activation functions. Functions marked with † are not differentiable at 0 and functions marked with ‡ operate on all elements of a layer simultaneously. The -hyperparameters α ∈ (0 , 1) of Leaky ReLU and ELU are typically α = 0 .01 . Other -activation function like randomized leaky ReLUs exist [ XWCL15 ], but are far less +hyperparameters α ∈ (0, 1) of Leaky ReLU and ELU are typically α = 0.01. Other +activation function like randomized leaky ReLUs exist [XWCL15], but are far less commonly used. Some functions are smoothed versions of others, like the logistic function for the Heaviside step function, tanh for the sign function, softplus for ReLU. Softmax is the standard activation function for the last layer of a classification network as it produces a probability distribution. See Figure B.1 for a plot of some of them. 2 - α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function. +α is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function. -−2. 0 −1 .5 −1 .0 −0. 5 0.5 1 .0 1. 5 2 .0 -−1. 0−0. 50. 51. 01 .52 .0 +−2.0 −1.5 −1.0 −0.5 0.5 1.0 1.5 2.0 +−1.0−0.50.51.01.52.0 xy ϕ - 1 ( x) = 1 -1+e −x +1(x) = 1 +1+e−x ϕ - 2 ( x) = tanh( x ) +2(x) = tanh(x) ϕ - 3 ( x) = max(0, x) +3(x) = max(0, x) ϕ - 4 ( x) = log( ex +4(x) = log(ex + 1) ϕ - 5 ( x) = max(x, ex +5(x) = max(x, ex − 1) -Figure B.1.: Activation functions plotted in [ − 2 , +2] . tanh and ELU are able to produce negative +Figure B.1.: Activation functions plotted in [−2, +2]. tanh and ELU are able to produce negative numbers. The image of ELU, ReLU and Softplus is not bound on the positive side, whereas tanh and the logistic function are always below 1. B.7. Regularization Regularization techniques aim to make the fitted function smoother and reduce overfitting. Regularization techniques are: • - 1 , -2 , and Orthogonality regularization: See Appendix B.4 +1, +2, and Orthogonality regularization: See Appendix B.4 • Max-norm regularization (e.g. used ins [SHK+ - 14]) +14]) • Dropout (introduced in [SHK+ - 14]), DropConnect (see [WZZ+ - 13]), Stochastic Depth -(see [HSL + - 16]) +14]), DropConnect (see [WZZ+ +13]), Stochastic Depth +(see [HSL+ +16]) • Feature scale clipping (see [ZF14]) • Data augmentation (according to [ZBH+ - 16]) +16]) • Global average pooling (according to [ZKL+ - 15]) +15]) • Dense-Sparse-Dense training (see [HPN+ - 16]) +16]) • Soft targets (see [HVD15]) @@ -3192,69 +3192,69 @@ C.1. Parameter Numbers due to the bias. • A convolutional layer i with k i filters of size n × m being applied to k -i −1 feature maps +i−1 feature maps has k i · k - i −1 (n · m + 1) parameters. The +1 is due to the bias. +i−1(n · m + 1) parameters. The +1 is due to the bias. • A fully connected layer with n nodes after k feature maps of size m 1 × m - 2 has -n · ( k · m +2 has +n · (k · m 1 · m - 2 + 1) parameters. +2 + 1) parameters. • A dense block with a depth of L, a growth rate of n and 3 × 3 filters has L + n · 32 + 32 - · n 2 + · n2 L -i =0 (L − i) = L + 9 n + 9 n 2 L2 - −L +i=0(L − i) = L + 9n + 9n2 L2 +−L 2 parameters. -According to [ HPTD15], AlexNet has 60 million parameters which is roughly the number +According to [HPTD15], AlexNet has 60 million parameters which is roughly the number calculated in Table D.2. C.2. FLOPs The FLOPs of a layer depend on the implementation, the compiler and the hardware. Hence the following number are only giving rough estimates. In the following, n - ϕ denotes the number of FLOPs to compute the non-linearity ϕ. For +ϕ denotes the number of FLOPs to compute the non-linearity ϕ. For simplicity, n - ϕ = 5 was chosen. +ϕ = 5 was chosen. • A fully connected layer with n nodes and k inputs has to calculate ϕ(W · x + b) with -W ∈ R n× k - , x ∈ Rk × 1 - , b ∈ R n×1 - . It hence needs about n · ( k + ( k − 1) + 1) = 2 nk -additions / multiplications before the non-linearityϕ is calculated. The total number +W ∈ Rn×k + , x ∈ Rk×1 +, b ∈ Rn×1 +. It hence needs about n · (k + (k − 1) + 1) = 2nk +additions / multiplications before the non-linearity ϕ is calculated. The total number of FLOPs is 2 · n · k + n · n - ϕ . -• In the following, biases are ignored. A convolutional layer withk +ϕ. +• In the following, biases are ignored. A convolutional layer with k i filters of size n × m being applied to k -i − 1 filter maps of size w × h results in k +i−1 filter maps of size w × h results in k i filter maps of size w × h if -padding is applied. For each element of each filter map,n · m · k -i − 1 multiplications and +padding is applied. For each element of each filter map, n · m · k +i−1 multiplications and (n · m · k -i − 1 − 1) additions have to be made. This results in(2nmk -i − 1 − 1) · (k +i−1 − 1) additions have to be made. This results in (2nmk +i−1 − 1) · (k i · w · h) -operations. The total number of FLOPs is(2 · n · m · k - i −1 − 1) · (k - i · w · h) + k +operations. The total number of FLOPs is (2 · n · m · k +i−1 − 1) · (k +i · w · h) + k i · w · h · n - ϕ . +ϕ. This is, of course, a naive way of calculating a convolution. There are other ways of calculating convolutions [LG16]. -• A fully connected layer with n nodes after k feature maps of size w × h needs 2n (k · w · h ) -FLOPs. The total number of FLOPs is 2 n · (k · w · h) + n · n - ϕ . +• A fully connected layer with n nodes after k feature maps of size w × h needs 2n(k · w · h) +FLOPs. The total number of FLOPs is 2n · (k · w · h) + n · n +ϕ. • As Dropout is only calculated during training, the number of FLOPs was set to 0. • The number of FLOPs for max pooling is dominated by the number of positions to which the pooling kernel is applied. For a feature map of size w × h a max pooling -filter with stride s gets applied w · h -s 2 . The number of FLOPs per application depends +filter with stride s gets applied w·h +s2 . The number of FLOPs per application depends on the kernel size. A 2 × 2 kernel is assumed to need 5 FLOPs. • The number of FLOPs for Batch Normalization is the same as the number of its @@ -3262,8 +3262,8 @@ parameters. Here are some references which give information for the FLOPs: • AlexNet – 1.5B in total [HPTD15]. -– 725M in total [KPY + - 15]. +– 725M in total [KPY+ +15]. – 3300M in total in Table D.2 • VGG-16: – 15484M in total [HPTD15]. @@ -3279,8 +3279,7 @@ step has to fit in the memory of the GPU. This includes the following: in the backward pass. This is the number of floats in the feature maps of all weight layers combined. • Weights -• Optimization algorithm -: The optimization algorithm introduces some overhead. +• Optimization algorithm: The optimization algorithm introduces some overhead. For example, Adam stores two parameters per weights. At inference time, every two consecutive layers have to fit into memory. When the forward pass of layer A to layer B is calculated, the memory can be freed if no skip connections are @@ -3297,8 +3296,8 @@ The summation row gives the sum of all floats for the output size column. This a conclusions about the maximum mini-batch size which can be in memory for training. D.1. LeNet-5 -One of the first CNNs used was LeNet-5 [ LBBH98 ]. LeNet-5 uses two times the common -pattern of a single convolutional layer withtanh as a non-linear activation function followed +One of the first CNNs used was LeNet-5 [LBBH98]. LeNet-5 uses two times the common +pattern of a single convolutional layer with tanh as a non-linear activation function followed by a pooling layer and three fully connected layers. One fully connected layer is used to get the right output dimension, another one is necessary to allow the network to learn a non-linear combination of the features of the feature maps. @@ -3324,22 +3323,22 @@ than fully connected layers. D.2. AlexNet The first CNN which achieved ma jor improvements on the ImageNet dataset was AlexNet [KSH12]. -Its architecture is shown in Figure D.2 and described in Table D.2. It has about60· 106 +Its architecture is shown in Figure D.2 and described in Table D.2. It has about 60·106 parameters. A trained AlexNet can be downloaded at www.cs.toronto.edu/˜guerzhoy/tf_alexnet. Note that the uncompressed size is at least 60 965 224 floats · 32 bit float ≈ 244 MB. -Figure D.2.: Architecture of AlexNet as shown in [ KSH12]: Convolutional Layers are followed +Figure D.2.: Architecture of AlexNet as shown in [KSH12]: Convolutional Layers are followed by pooling layers multiple times. At the end, a fully connected network is applied. Conceptually, it is identical to the architecture of LeNet-5 (see Figure D.1). # Type Filters @ Patch size / stride Parameters FLOPs Output size Input 3 @ 224 × 224 1 Convolution 96 @ 11 × 11 × 3 / 4 34 944 211 M 96 @ 55 × 55 -LCN 12 M 96 @ 55 × 55 +LCN 12 M 96 @ 55 × 55 2 Max pooling 3 × 3 / 2 0 301 k 96 @ 27 × 27 3 Convolution 256 @ 5 × 5 × 48 / 1 307 456 448 M 256 @ 13 × 13 -LCN 3 M 256 @ 13 × 13 +LCN 3 M 256 @ 13 × 13 4 Max pooling 3 × 3 / 2 0 50 k 256 @ 13 × 13 5 Convolution 384 @ 3 × 3 × 256 / 1 885 120 299 M 384 @ 13 × 13 7 Convolution 384 @ 3 × 3 × 192 / 1 663 936 224 M 384 @ 13 × 13 @@ -3354,35 +3353,36 @@ Table D.2.: AlexNet architecture: One special case of AlexNet is grouping of con computational restrictions at the time of its development. This also reduces the number of parameters and allows parallel computation on separate GPUs. However, to make the architecture easier to compare, this grouping was ignored for the parameter count. -The FLOPs are taken from [ HPTD15 ] and combined with rough estimates for Local +The FLOPs are taken from [HPTD15] and combined with rough estimates for Local Contrast Normalization and max pooling. The calculated number of parameters was checked against the downloaded version. It also has 60 965 224 parameters. D.3. VGG-16 D -Another widespread architecture is the VGG-16 (D) [ SZ14]. VGG comes from the V isual +Another widespread architecture is the VGG-16 (D) [SZ14]. VGG comes from the Visual Geometry Group in Oxford which developed this architecture. It has 16 layers which can -learn parameters. A ma jor difference compared to AlexNet is that VGG-16 uses only3 × 3 +learn parameters. A ma jor difference compared to AlexNet is that VGG-16 uses only 3 × 3 filters and is much deeper. A visualization of the architecture is shown in Figure D.3 and a detailed textual description is given in Table D.3. -A trained VGG-16 D for Tensorflow can be downloaded athttps://github.com/machrisaa/ -tensorflow- vgg . Note that the uncompressed size is at least 138 357 544 floats · 32 bit +A trained VGG-16 D for Tensorflow can be downloaded at https://github.com/machrisaa/ +tensorflow- vgg. Note that the uncompressed size is at least 138 357 544 floats · 32 bit float ≈ 520 MB. The downloaded Numpy binary file npz needs 553 MB without compression and -514 MB with compression.224 × 224Input -C 64@3 × 3 /1 -C 64@3 × 3 /1 112 × 112max pooling 2 × 2 /1 -C 128@3 × 3 /1 -C 128@3 × 3 /1 56 × 56max pooling 2 × 2/ 1 -C 256@3 × 3 /1 -C 256@3 × 3 /1 -C 256@3 × 3 /1 28 × 28max pooling 2 × 2/ 1 -C 512@3 × 3 /1 -C 512@3 × 3 /1 -C 512@3 × 3/ 1 14 × 14max pooling 2 × 2 /1 -C 512@3 × 3/ 1 -C 512@3 × 3/ 1 -C 512@3 × 3/ 1 7 × 7max pooling 2 × 2 /1 +514 MB with compression. +224 × 224Input +C 64@3 × 3/1 +C 64@3 × 3/1 112 × 112max pooling 2 × 2/1 +C 128@3 × 3/1 +C 128@3 × 3/1 56 × 56max pooling 2 × 2/1 +C 256@3 × 3/1 +C 256@3 × 3/1 +C 256@3 × 3/1 28 × 28max pooling 2 × 2/1 +C 512@3 × 3/1 +C 512@3 × 3/1 +C 512@3 × 3/1 14 × 14max pooling 2 × 2/1 +C 512@3 × 3/1 +C 512@3 × 3/1 +C 512@3 × 3/1 7 × 7max pooling 2 × 2/1 Fully Connected 4096 Dropout, p = 0.5 Fully Connected 4096 @@ -3403,27 +3403,27 @@ Max pooling 2 × 2 / 2 0 1 M 128 @ 56 × 56 5 Convolution 256 @ 3 × 3 × 128 / 1 295 168 1853 M 256 @ 56 × 56 6 Convolution 256 @ 3 × 3 × 256 / 1 590 080 3703 M 256 @ 56 × 56 7 Convolution 256 @ 3 × 3 × 256 / 1 590 080 3703 M 256 @ 56 × 56 -Max pooling 2 × 2 / 2 0 < 1 M 256 @ 28 × 28 +Max pooling 2 × 2 / 2 0 <1 M 256 @ 28 × 28 8 Convolution 512 @ 3 × 3 × 256 / 1 1 180 160 1851 M 512 @ 28 × 28 9 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 3701 M 512 @ 28 × 28 10 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 3701 M 512 @ 28 × 28 -Max pooling 2 × 2 / 2 0 < 1 M 512 @ 14 × 14 +Max pooling 2 × 2 / 2 0 <1 M 512 @ 14 × 14 11 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14 12 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14 13 Convolution 512 @ 3 × 3 × 512 / 1 2 359 808 925 M 512 @ 14 × 14 -Max pooling 2 × 2 / 2 0 < 1 M 512 @ 7 × 7 +Max pooling 2 × 2 / 2 0 <1 M 512 @ 7 × 7 14 FC 4096 neurons 102 764 544 206 M 4096 -Dropout 0 0 4096 +Dropout 0 0 4096 15 FC 4096 neurons 16 781 312 34 M 4096 -Dropout 0 0 4096 +Dropout 0 0 4096 16 FC 1000 neurons 4 097 000 8 M 1000 138 357 544 31 000 M 15 245 800 Table D.3.: VGG-16 D architecture: The authors chose to give only layers a number which have learnable parameters. All convolutions are zero padded to prevent size changes and use ReLU activation functions. The channels mean is subtracted from each pixel as -a preprocessing step ( −103 . 939 , − 116 .779 , − 123 .68 ). As Dropout is only calculated -during training time, the number of FLOPs is 0. The dropout probability is 0. 5. +a preprocessing step (−103.939, −116.779, −123.68). As Dropout is only calculated +during training time, the number of FLOPs is 0. The dropout probability is 0.5. The calculated number of parameters was checked against the downloaded version. It also has 138 357 544 parameters. @@ -3431,27 +3431,27 @@ D.4. GoogleNet, Inception v2 and v3 The large number of parameters and operations is a problem when such models should get applied in practice to thousands of images. In order to reduce the computational cost while maintaining the classification quality, GoogleNet [SLJ+ - 15] and the Inception module were +15] and the Inception module were developed. The Inception module essentially only computes 1 × 1 filters, 3 × 3 filters and 5 × 5 filters in parallel, but applied bottleneck 1 × 1 filters before to reduce the number of parameters. It is shown in Figure D.4. Figure D.4.: Inception module Image source: [SLJ+ - 15] -Compared to GoogleNet, Inception v2 [ SVI+ - 15] removed the 5 × 5 filters and replaced +15] +Compared to GoogleNet, Inception v2 [SVI+ +15] removed the 5 × 5 filters and replaced them by two successive layers of 3 × 3 filters. A visualization of an Inception v2 module is given in Figure D.5. Additionally, Inception v2 applies successive asymmetric filters to -approximate symmetric filters with fewer parameters. The authors call this approachfilter +approximate symmetric filters with fewer parameters. The authors call this approach filter factorization. Inception v3 introduced Batch Normalization to the network [SVI+ - 15]. +15]. Figure D.5.: Inception v2 module Image source: [SVI+ - 15] +15] D.5. Inception-v4 -Inception-v4 as described in [ SIV16] consists of four main building blocks: The stem, +Inception-v4 as described in [SIV16] consists of four main building blocks: The stem, Inception A, Inception B and Inception C. To quote the authors: Inception-v4 is a deeper, wider and more uniform simplified architecture than Inception-v3. The stem, Reduction A and Reduction B use max-pooling, whereas Inception A, Inception B and Inception C use @@ -3486,10 +3486,10 @@ of Classes Channels Data source MNIST 28 px × 28 px 70 000 10 1 [YL98, LBBH98] HASYv2 32 px × 32 px 168 233 369 1 [Tho17a] -SVHN 32 px × 32 px 630 420 10 3 [NWC + - 11b], -[NWC + - 11a] +SVHN 32 px × 32 px 630 420 10 3 [NWC+ +11b], +[NWC+ +11a] CIFAR-10 32 px × 32 px 60 000 10 3 [Kri, KH09] CIFAR-100 32 px × 32 px 60 000 100 3 [Kri, KH09] STL-10 96 px × 96 px 13 000 10 3 [CLN11, CLN10] @@ -3500,11 +3500,11 @@ Caltech-256 (75 px − 7913 px) ILSVRC 20121 (8 px − 9331 px) ×(10 px − 6530 px) 1.2 · 106 1000 3 [Ima12, RDS+ - 14] -Places3652 (290px − 3158 px ) -×(225px − 2630 px ) 1. 8 · 106 +14] +Places3652 (290px − 3158px) +×(225px − 2630px) 1.8 · 106 365 3 [Zho16, ZKL+ - 16] +16] GTSRB (25 px − 266 px) ×(25 px − 232 px) 51 839 43 3 [SSSI, SSSI12] Asirra3 (4 px − 500 px) @@ -3515,51 +3515,51 @@ Table E.1.: An overview over publicly available image databases for classificati of images row gives the sum of the training and the test images. Some datasets, like SVHN, have additional unlabeled data which is not given in this table. 1 - ImageNet Large Scale Visual Recognition Competition +ImageNet Large Scale Visual Recognition Competition 2 - The dimensions are only calculated for the validation set. +The dimensions are only calculated for the validation set. 3 - Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs” competition on Kaggle +Asirra is a CAPTCHA created by Microsoft and was used in the “Cats vs Dogs” competition on Kaggle Dataset Model type / name Result Score Achieved / Claimed by -MNIST — 0 .21 % error [WZZ+ - 13] +MNIST — 0.21 % error [WZZ+ +13] HASYv2 TF-CNN 81.00 % accuracy [Tho17a] -SVHN DenseNet (k = 24) 1 .59 % error [HLW16] -CIFAR-10 DenseNet-BC ( k = 40) 3 .46 % error [HLW16] +SVHN DenseNet (k = 24) 1.59 % error [HLW16] +CIFAR-10 DenseNet-BC (k = 40) 3.46 % error [HLW16] CIFAR-100 WRN-28-10 16.21 % error [LH16] STL-10 SWWAE-4layer 74.80 % accuracy [ZMGL15] -Caltech-101 SPP-net (pretrained) 93.42 %±0 .5 % accuracy [HZRS14] -Caltech-256 ZF-Net (pretrained) 74. 2 %±0 .3 % accuracy [ZF14] -ImageNet 2012 ResNet ensemble 3 .57 % Top-5 error [HZRS15a] +Caltech-101 SPP-net (pretrained) 93.42 %±0.5 % accuracy [HZRS14] +Caltech-256 ZF-Net (pretrained) 74.2 %±0.3 % accuracy [ZF14] +ImageNet 2012 ResNet ensemble 3.57 % Top-5 error [HZRS15a] GTSRB MCDNN 99.46 % accuracy [SL11] -Asirra SVM 82. 7 % accuracy [Gol08] +Asirra SVM 82.7 % accuracy [Gol08] Graz-02 Optimal NBNN 78.98 % accuracy [BMDP10] Table E.2.: An overview over state of the art results achieved in computer vision datasets. Algorithm 2 Create a classification dataset from a semantic segmentation dataset Require: Semantic segmentation dataset (D - S ) -procedure CreateDataset (Annotated dataset D - S ) +S ) +procedure CreateDataset(Annotated dataset D +S ) D - C ← List +C ← List w ← desired image width h ← desired image height -for Image and associated label ( x, y ) in D - S do -i ← randint(0, L.width − w ) -j ← randint (0, L.height − h ) +for Image and associated label (x, y) in D +S do +i ← randint(0, L.width − w) +j ← randint(0, L.height − h) c -L ← crop (y, ( i, j ) , ( i + w, j + h)) +L ← crop(y, (i, j ), (i + w, j + h)) if at least 50% of s are of one class then c -I ← crop (x, (i, j ), (i + w, j + h)) +I ← crop(x, (i, j ), (i + w, j + h)) D.append((c I , c -L )) +L)) return (D - C ) +C ) F. List of Tables 2.1 Pooling types . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 @@ -3641,626 +3641,620 @@ D.5 Inception v2 module . . . . . . . . . . . . . . . . . . . . . . . . . . . . H. Bibliography [AAB+ - 16] M. Abadi, A. Agarwal et al. , “Tensorflow: Large-scale machine learning on -heterogeneous distributed systems,” arXiv preprint arXiv:1603.04467 , Mar. -2016. [Online]. Available: https://arxiv . org/abs/1603. 04467 +16] M. Abadi, A. Agarwal et al., “Tensorflow: Large-scale machine learning on +heterogeneous distributed systems,” arXiv preprint arXiv:1603.04467, Mar. +2016. [Online]. Available: https://arxiv.org/abs/1603.04467 [ABKS99] M. Ankerst, M. M. Breunig et al., “ OPTICS: Ordering points to identify the clustering structure,” in ACM Sigmod record, vol. 28, no. 2. ACM, 1999, pp. 49–60. [ADG+ - 16] M. Andrychowicz, M. Denil et al., “Learning to learn by gradient descent by +16] M. Andrychowicz, M. Denil et al., “Learning to learn by gradient descent by gradient descent,” in Advances in Neural Information Processing Systems 29 -(NIPS) , D. D. Lee, M. Sugiyama et al. , Eds. Curran Associates, Inc., Mar. -2016, pp. 3981–3989. [Online]. Available: http://papers .nips .cc/paper/6461learning-to- +(NIPS), D. D. Lee, M. Sugiyama et al., Eds. Curran Associates, Inc., Mar. +2016, pp. 3981–3989. [Online]. Available: http://papers.nips.cc/paper/6461learning-to- learn-by-gradient- descent- by- gradient-descent.pdf [AM15] M. T. Alexander Mordvintsev, Christopher Olah, “Inceptionism: Going deeper into neural networks,” Jun. 2015. [Online]. Available: - https://research . googleblog . com/2015/06/inceptionism-going-deeperinto- + https://research.googleblog.com/2015/06/inceptionism-going-deeperinto- neural.html [Asi17] “Kaggle cats and dogs dataset,” Oct. 2017. [Online]. Available: https: -//www .microsoft.com/en-us/download/details .aspx?id=54765 +//www.microsoft.com/en-us/download/details.aspx?id=54765 [BB12] J. Bergstra and Y. Bengio, “Random search for hyper-parameter optimization,” -Journal of Machine Learning Research , vol. 13, no. Feb, pp. 281–305, -Feb. 2012. [Online]. Available: http://jmlr .csail .mit .edu/papers/volume13/ -bergstra12a/bergstra12a .pdf +Journal of Machine Learning Research, vol. 13, no. Feb, pp. 281–305, +Feb. 2012. [Online]. Available: http://jmlr.csail.mit.edu/papers/volume13/ +bergstra12a/bergstra12a.pdf [BCW+ - 17] - J. Bao, D. Chen et al., “ CVAE-GAN: Fine-grained image generation through +17] J. Bao, D. Chen et al., “ CVAE-GAN: Fine-grained image generation through asymmetric training,” arXiv preprint arXiv:1703.10155, Mar. 2017. [Online]. -Available: https://arxiv .org/abs/1703.10155 -[BDLB09] - J. Bergstra, G. Desjardins et al. , “Quadratic polynomials learn better image +Available: https://arxiv.org/abs/1703.10155 +[BDLB09] J. Bergstra, G. Desjardins et al., “Quadratic polynomials learn better image features,” Département d’Informatique et de Recherche Opérationnelle, Université de Montréal, Tech. Rep. 1337, 2009. -[BGNR16] B. Baker, O. Gupta et al. , “Designing neural network architectures using +[BGNR16] B. Baker, O. Gupta et al., “Designing neural network architectures using reinforcement learning,” arXiv preprint arXiv:1611.02167, Nov. 2016. [Online]. -Available: https://arxiv .org/abs/1611.02167 +Available: https://arxiv.org/abs/1611.02167 [BM93] U. Bodenhausen and S. Manke, Automatical ly Structured Neural -Networks For Handwritten Character And Word Recognition . London: +Networks For Handwritten Character And Word Recognition. London: Springer London, Sep. 1993, pp. 956–961. [Online]. Available: http: //dx.doi.org/10.1007/978-1- 4471-2063- 6_283 [BMDP10] - R. Behmo, P. Marcombes et al. , “Towards optimal naive Bayes nearest + R. Behmo, P. Marcombes et al., “Towards optimal naive Bayes nearest neighbor,” in European Conference on Computer Vision (ECCV). Springer, 2010, pp. 171–184. [BPL10] Y.-L. Boureau, J. Ponce, and Y. LeCun, “A theoretical analysis of feature pooling in visual recognition,” in International Conference on -Machine Learning (ICML) , no. 27, 2010, pp. 111–118. [Online]. Available: -http://yann .lecun.com/exdb/publis/pdf/boureau- icml-10 .pdf +Machine Learning (ICML), no. 27, 2010, pp. 111–118. [Online]. Available: +http://yann.lecun.com/exdb/publis/pdf/boureau- icml-10.pdf [BSF94] Y. Bengio, P. Simard, and P. Frasconi, “Learning long-term dependencies -with gradient descent is difficult,” IEEE transactions on neural networks , +with gradient descent is difficult,” IEEE transactions on neural networks, vol. 5, no. 2, pp. 157–166, 1994. [Cha92] C. Charalambous, “Conjugate gradient algorithm for efficient training of artificial neural networks,” IEEE Proceedings G-Circuits, Devices -and Systems , vol. 139, no. 3, pp. 301–310, 1992. [Online]. Available: -http://ieeexplore. ieee.org/document/143326/ -[Cho15]F. Chollet, “Keras,” https://github.com/fchollet/keras, 2015. +and Systems, vol. 139, no. 3, pp. 301–310, 1992. [Online]. Available: +http://ieeexplore.ieee.org/document/143326/ +[Cho15] F. Chollet, “Keras,” https://github.com/fchollet/keras, 2015. [CLN10] A. Coates, H. Lee, and A. Y. Ng, “An analysis of single-layer networks -in unsupervised feature learning,” Ann Arbor , vol. 1001, no. 48109, -p. 2, 2010. [Online]. Available: http://cs . stanford .edu/~acoates/papers/ +in unsupervised feature learning,” Ann Arbor, vol. 1001, no. 48109, +p. 2, 2010. [Online]. Available: http://cs.stanford.edu/~acoates/papers/ coatesleeng_aistats_2011.pdf [CLN11] A. Coates, H. Lee, and A. Y. Ng, “ STL-10 dataset,” 2011. [Online]. Available: -http://cs .stanford.edu/~acoates/stl10 +http://cs.stanford.edu/~acoates/stl10 [CMS12] D. Ciregan, U. Meier, and J. Schmidhuber, “Multi-column deep neural networks for image classification,” in Conference on Computer Vision and -Pattern Recognition (CVPR) . IEEE, Feb. 2012, pp. 3642–3649. [Online]. -Available: https://arxiv .org/abs/1202. 2745v1 +Pattern Recognition (CVPR). IEEE, Feb. 2012, pp. 3642–3649. [Online]. +Available: https://arxiv.org/abs/1202.2745v1 [CUH15] D.-A. Clevert, T. Unterthiner, and S. Hochreiter, “Fast and accurate deep network learning by exponential linear units (ELUs),” arXiv -preprint arXiv:1511.07289 , Nov. 2015. [Online]. Available: https: -//arxiv .org/abs/1511. 07289 +preprint arXiv:1511.07289, Nov. 2015. [Online]. Available: https: +//arxiv.org/abs/1511.07289 [CWV+ - 14] S. Chetlur, C. Woolley et al. , “ cuDNN: Efficient primitives for deep -learning,” arXiv preprint arXiv:1410.0759 , Oct. 2014. [Online]. Available: -https://arxiv .org/abs/1410.0759 +14] S. Chetlur, C. Woolley et al., “ cuDNN: Efficient primitives for deep +learning,” arXiv preprint arXiv:1410.0759, Oct. 2014. [Online]. Available: +https://arxiv.org/abs/1410.0759 -[DBB + - 01] C. Dugas, Y. Bengio et al. , “Incorporating second-order functional +[DBB+ +01] C. Dugas, Y. Bengio et al., “Incorporating second-order functional knowledge for better option pricing,” in Advances in Neural Information - Processing Systems 13 (NIPS) , T. K. Leen, T. G. Dietterich, + Processing Systems 13 (NIPS), T. K. Leen, T. G. Dietterich, and V. Tresp, Eds. MIT Press, 2001, pp. 472–478. [Online]. -Available: http://papers .nips .cc/paper/1920-incorporating-second-orderfunctional-knowledge- - for-better-option- pricing .pdf +Available: http://papers.nips.cc/paper/1920-incorporating- second- orderfunctional-knowledge- + for-better-option- pricing.pdf [DDFK16] S. Dieleman, J. De Fauw, and K. Kavukcuoglu, “Exploiting cyclic symmetry -in convolutional neural networks,” arXiv preprint arXiv:1602.02660 , Feb. -2016. [Online]. Available: https://arxiv . org/abs/1602. 02660 +in convolutional neural networks,” arXiv preprint arXiv:1602.02660, Feb. +2016. [Online]. Available: https://arxiv.org/abs/1602.02660 [DHS11] J. Duchi, E. Hazan, and Y. Singer, “Adaptive subgradient methods for online learning and stochastic optimization,” Journal of Machine Learning -Research , vol. 12, no. Jul, pp. 2121–2159, 2011. [Online]. Available: -http://www . jmlr. org/papers/volume12/duchi11a/duchi11a . pdf +Research, vol. 12, no. Jul, pp. 2121–2159, 2011. [Online]. Available: +http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf [DHS16] J. Dai, K. He, and J. Sun, “Instance-aware semantic segmentation via multi-task network cascades,” in Conference on Computer Vision and Pattern -Recognition (CVPR) . IEEE, 2016, pp. 3150–3158. [Online]. Available: -https://arxiv . org/abs/1512. 04412 +Recognition (CVPR). IEEE, 2016, pp. 3150–3158. [Online]. Available: +https://arxiv.org/abs/1512.04412 [DJ99] W. Duch and N. Jankowski, “Survey of neural transfer functions,” Neural -Computing Surveys , vol. 2, no. 1, pp. 163–212, 1999. [Online]. Available: -ftp://ftp. icsi.berkeley . edu/pub/ai/jagota/vol2_6.pdf +Computing Surveys, vol. 2, no. 1, pp. 163–212, 1999. [Online]. Available: +ftp://ftp.icsi.berkeley.edu/pub/ai/jagota/vol2_6.pdf [Doz15] T. Dozat, “Incorporating Nesterov momentum into Adam,” Stanford -University, Tech. Rep., 2015. [Online]. Available: http://cs229.stanford. edu/ -pro j2015/054_report .pdf -[DSRB14] A. Dosovitskiy, J. T. Springenberg et al. , “Discriminative unsupervised +University, Tech. Rep., 2015. [Online]. Available: http://cs229.stanford.edu/ +pro j2015/054_report.pdf +[DSRB14] A. Dosovitskiy, J. T. Springenberg et al., “Discriminative unsupervised feature learning with convolutional neural networks,” in Advances in Neural -Information Processing Systems 27 (NIPS) , Z. Ghahramani, M. Welling -et al. , Eds. Curran Associates, Inc., 2014, pp. 766–774. [Online]. -Available: http://papers . nips . cc/paper/5548-discriminative-unsupervisedfeature-learning- +Information Processing Systems 27 (NIPS), Z. Ghahramani, M. Welling +et al., Eds. Curran Associates, Inc., 2014, pp. 766–774. [Online]. +Available: http://papers.nips.cc/paper/5548-discriminative-unsupervisedfeature-learning- with- convolutional- neural-networks.pdf [DWD15] S. Dieleman, K. W. Willett, and J. Dambre, “Rotation-invariant convolutional neural networks for galaxy morphology prediction,” Monthly notices of the -royal astronomical society , vol. 450, no. 2, pp. 1441–1459, 2015. -[EDHS07] J. Elson, J. J. Douceur et al. , “Asirra: A CAPTCHA that +royal astronomical society, vol. 450, no. 2, pp. 1441–1459, 2015. +[EDHS07] J. Elson, J. J. Douceur et al., “Asirra: A CAPTCHA that exploits interest-aligned manual image categorization,” in ACM Conference - on Computer and Communications Security (CCS) , no. 14. + on Computer and Communications Security (CCS), no. 14. Association for Computing Machinery, Inc., Oct. 2007. [Online]. -Available: https://www .microsoft .com/en-us/research/publication/asirra-acaptcha- +Available: https://www.microsoft.com/en-us/research/publication/asirra-acaptcha- that-exploits- interest-aligned- manual-image- categorization/ [EKS+ - 96] M. Ester, H.-P. Kriegel et al. , “A density-based algorithm for discovering -clusters in large spatial databases with noise.” in Kdd , vol. 96, no. 34, 1996, +96] M. Ester, H.-P. Kriegel et al., “A density-based algorithm for discovering +clusters in large spatial databases with noise.” in Kdd, vol. 96, no. 34, 1996, pp. 226–231. -[ES03] A. E. Eiben and J. E. Smith, Introduction to evolutionary computing . -Springer, 2003, vol. 53. [Online]. Available: https://dx. doi. org/10. 1007/978- 3662- +[ES03] A. E. Eiben and J. E. Smith, Introduction to evolutionary computing. +Springer, 2003, vol. 53. [Online]. Available: https://dx.doi.org/10.1007/978-3662- 44874- 8 [Fah88] S. E. Fahlman, “An empirical study of learning speed in back-propagation -networks,” 1988. [Online]. Available: http://repository .cmu .edu/cgi/ -viewcontent. cgi?article=2799&context=compsci +networks,” 1988. [Online]. Available: http://repository.cmu.edu/cgi/ +viewcontent.cgi?article=2799&context=compsci [FFFP06] L. Fei-Fei, R. Fergus, and P. Perona, “One-shot learning of ob ject -categories,” IEEE transactions on pattern analysis and machine intel ligence , +categories,” IEEE transactions on pattern analysis and machine intel ligence, vol. 28, no. 4, pp. 594–611, Apr. 2006. [Online]. Available: http: -//vision.stanford. edu/documents/Fei-FeiFergusPerona2006.pdf +//vision.stanford.edu/documents/Fei-FeiFergusPerona2006.pdf [FFP03] R. F. Fei-Fei and P. Perona, “Caltech 101,” 2003. [Online]. Available: http: -//www . vision.caltech .edu/Image_Datasets/Caltech101/Caltech101.html -[FGMR10] P. F. Felzenszwalb, R. B. Girshick et al. , “Ob ject detection with discriminatively +//www.vision.caltech.edu/Image_Datasets/Caltech101/Caltech101.html +[FGMR10] P. F. Felzenszwalb, R. B. Girshick et al., “Ob ject detection with discriminatively trained part-based models,” IEEE transactions on pattern analysis and machine intel ligence, vol. 32, no. 9, pp. 1627–1645, 2010. [FL89] S. E. Fahlman and C. Lebiere, “The cascade-correlation learning architecture,” -1989. [Online]. Available: http://repository . cmu .edu/compsci/1938/ +1989. [Online]. Available: http://repository.cmu.edu/compsci/1938/ [GB10] X. Glorot and Y. Bengio, “Understanding the difficulty of training deep -feedforward neural networks.” in Aistats , vol. 9, 2010, pp. 249–256. [Online]. +feedforward neural networks.” in Aistats, vol. 9, 2010, pp. 249–256. [Online]. Available: http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf [GBB11] X. Glorot, A. Bordes, and Y. Bengio, “Deep sparse rectifier neural -networks.” in Aistats , vol. 15, no. 106, 2011, p. 275. [Online]. Available: -http://www .jmlr.org/proceedings/papers/v15/glorot11a/glorot11a. pdf -[GDDM14] R. Girshick, J. Donahue et al. , “Rich feature hierarchies for accurate ob ject +networks.” in Aistats, vol. 15, no. 106, 2011, p. 275. [Online]. Available: +http://www.jmlr.org/proceedings/papers/v15/glorot11a/glorot11a.pdf +[GDDM14] R. Girshick, J. Donahue et al., “Rich feature hierarchies for accurate ob ject detection and semantic segmentation,” in Conference on Computer Vision -and Pattern Recognition (CVPR) . IEEE, 2014, pp. 580–587. [Online]. -Available: https://arxiv .org/abs/1311. 2524 +and Pattern Recognition (CVPR). IEEE, 2014, pp. 580–587. [Online]. +Available: https://arxiv.org/abs/1311.2524 [GG07] P. P. Greg Griffin, Alex Holub, “Caltech-256 ob ject category dataset,” Apr. -2007. [Online]. Available: http://authors .library .caltech . edu/7694/ +2007. [Online]. Available: http://authors.library.caltech.edu/7694/ [GG16] Y. Gal and Z. Ghahramani, “Bayesian convolutional neural networks with -Bernoulli approximate variational inference,”arXiv preprint arXiv:1506.02158, -Jan. 2016. [Online]. Available: https://arxiv . org/abs/1506. 02158v6 +Bernoulli approximate variational inference,” arXiv preprint arXiv:1506.02158, +Jan. 2016. [Online]. Available: https://arxiv.org/abs/1506.02158v6 [GJ02] M. R. Garey and D. S. Johnson, Computers and intractability. wh freeman New York, 2002, vol. 29. [GJS76] M. R. Garey, D. S. Johnson, and L. Stockmeyer, “Some simplified NP-complete -graph problems,” Theoretical computer science , vol. 1, no. 3, pp. 237–267, +graph problems,” Theoretical computer science, vol. 1, no. 3, pp. 237–267, 1976. -[Gol08] P. Golle, “Machine learning attacks against the Asirra CAPTCHA,” inACM +[Gol08] P. Golle, “Machine learning attacks against the Asirra CAPTCHA,” in ACM conference on Computer and communications security (CCS), no. 15. ACM, 2008, pp. 535–542. [Gra15] B. Graham, “Fractional max-pooling,” arXiv preprint arXiv:1412.6071, May -2015. [Online]. Available: https://arxiv . org/abs/1412. 6071 +2015. [Online]. Available: https://arxiv.org/abs/1412.6071 [Gri06] A. P. Griffin, G. Holub, “Caltech 256,” 2006. [Online]. Available: -http://www . vision.caltech . edu/Image_Datasets/Caltech256/ +http://www.vision.caltech.edu/Image_Datasets/Caltech256/ [GWFM+ - 13] I. J. Goodfellow, D. Warde-Farley et al. , “Maxout networks.” ICML , +13] I. J. Goodfellow, D. Warde-Farley et al., “Maxout networks.” ICML, vol. 28, no. 3, pp. 1319–1327, 2013. [Online]. Available: http: -//www .jmlr.org/proceedings/papers/v28/goodfellow13 . pdf +//www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf [HAE16] M. Huh, P. Agrawal, and A. A. Efros, “What makes ImageNet good for -transfer learning?” arXiv preprint arXiv:1608.08614 , Aug. 2016. [Online]. -Available: https://arxiv .org/abs/1608.08614 -[Han89] S. J. Hanson, “Meiosis networks.” in NIPS , 1989, pp. 533–541. [Online]. +transfer learning?” arXiv preprint arXiv:1608.08614, Aug. 2016. [Online]. +Available: https://arxiv.org/abs/1608.08614 +[Han89] S. J. Hanson, “Meiosis networks.” in NIPS, 1989, pp. 533–541. [Online]. Available: http://papers.nips.cc/paper/227- meiosis-networks.pdf [Har15] M. Harris, “New features in CUDA 7.5,” Jul. 2015. [Online]. Available: -https://devblogs.nvidia .com/parallelforall/new- features- cuda- 7-5/ +https://devblogs.nvidia.com/parallelforall/new- features- cuda- 7-5/ [HLW16] G. Huang, Z. Liu, and K. Q. Weinberger, “Densely connected convolutional -networks,” arXiv preprint arXiv:1608.06993 , Aug. 2016. [Online]. Available: -https://arxiv . org/abs/1608. 06993v1 -[HM16] - M. Hardt and T. Ma, “Identity matters in deep learning,” arXiv -preprint arXiv:1611.04231 , Nov. 2016. [Online]. Available: https: -//arxiv .org/abs/1611.04231 +networks,” arXiv preprint arXiv:1608.06993, Aug. 2016. [Online]. Available: +https://arxiv.org/abs/1608.06993v1 +[HM16] M. Hardt and T. Ma, “Identity matters in deep learning,” arXiv +preprint arXiv:1611.04231, Nov. 2016. [Online]. Available: https: +//arxiv.org/abs/1611.04231 [How13] A. G. Howard, “Some improvements on deep convolutional neural network -based image classification,” arXiv preprint arXiv:1312.5402 , Dec. 2013. -[Online]. Available: https://arxiv . org/abs/1312.5402 +based image classification,” arXiv preprint arXiv:1312.5402, Dec. 2013. +[Online]. Available: https://arxiv.org/abs/1312.5402 -[HPK11] J. Han, J. Pei, and M. Kamber, Data mining: concepts and techniques . +[HPK11] J. Han, J. Pei, and M. Kamber, Data mining: concepts and techniques. Elsevier, 2011. [HPN+ - 16] - S. Han, J. Pool et al. , “ DSD: Regularizing deep neural networks with +16] + S. Han, J. Pool et al., “ DSD: Regularizing deep neural networks with dense-sparse-dense training flow,” arXiv preprint arXiv:1607.04381, Jul. 2016. -[Online]. Available: https://arxiv .org/abs/1607.04381 -[HPTD15] S. Han, J. Pool et al. , “Learning both weights and connections for efficient +[Online]. Available: https://arxiv.org/abs/1607.04381 +[HPTD15] S. Han, J. Pool et al., “Learning both weights and connections for efficient neural network,” in Advances in Neural Information Processing Systems 28 (NIPS), C. Cortes, N. D. Lawrence et al., Eds. Curran Associates, Inc., Jun. -2015, pp. 1135–1143. [Online]. Available: http://papers .nips .cc/paper/5784learning-both-weights- +2015, pp. 1135–1143. [Online]. Available: http://papers.nips.cc/paper/5784learning-both-weights- and- connections-for- efficient- neural- network.pdf [HSK+ - 12] G. E. Hinton, N. Srivastava et al., “Improving neural networks by preventing -co-adaptation of feature detectors,” arXiv preprint arXiv:1207.0580 , Jul. -2012. [Online]. Available: https://arxiv .org/abs/1207.0580 +12] G. E. Hinton, N. Srivastava et al., “Improving neural networks by preventing +co-adaptation of feature detectors,” arXiv preprint arXiv:1207.0580, Jul. +2012. [Online]. Available: https://arxiv.org/abs/1207.0580 [HSL+ - 16] G. Huang, Y. Sun et al. , “Deep networks with stochastic depth,” -arXiv preprint arXiv:1603.09382 , Mar. 2016. [Online]. Available: https: -//arxiv .org/abs/1603. 09382 +16] G. Huang, Y. Sun et al., “Deep networks with stochastic depth,” +arXiv preprint arXiv:1603.09382, Mar. 2016. [Online]. Available: https: +//arxiv.org/abs/1603.09382 [HSW93] B. Hassibi, D. G. Stork, and G. J. Wolff, “Optimal brain surgeon and general network pruning,” in International Conference on Neural -Networks . IEEE, 1993, pp. 293–299. [Online]. Available: http: -//ee. caltech .edu/Babak/pubs/conferences/00298572.pdf +Networks. IEEE, 1993, pp. 293–299. [Online]. Available: http: +//ee.caltech.edu/Babak/pubs/conferences/00298572.pdf [HVD15] G. Hinton, O. Vinyals, and J. Dean, “Distilling the knowledge in a neural -network,” arXiv preprint arXiv:1503.02531 , Mar. 2015. [Online]. Available: -https://arxiv .org/abs/1503.02531 -[HZRS14] K. He, X. Zhang et al. , “Spatial pyramid pooling in deep convolutional +network,” arXiv preprint arXiv:1503.02531, Mar. 2015. [Online]. Available: +https://arxiv.org/abs/1503.02531 +[HZRS14] K. He, X. Zhang et al., “Spatial pyramid pooling in deep convolutional networks for visual recognition,” in European Conference on Computer -Vision (ECCV) . Springer, 2014, pp. 346–361. [Online]. Available: -https://arxiv .org/abs/1406.4729 -[HZRS15a] K. He, X. Zhang et al. , “Deep residual learning for image recognition,” -arXiv preprint arXiv:1512.03385 , Dec. 2015. [Online]. Available: https: -//arxiv .org/abs/1512. 03385v1 +Vision (ECCV). Springer, 2014, pp. 346–361. [Online]. Available: +https://arxiv.org/abs/1406.4729 +[HZRS15a] K. He, X. Zhang et al., “Deep residual learning for image recognition,” +arXiv preprint arXiv:1512.03385, Dec. 2015. [Online]. Available: https: +//arxiv.org/abs/1512.03385v1 [HZRS15b] K. He, X. Zhang et al., “Delving deep into rectifiers: Surpassing human-level performance on imagenet classification,” in International Conference on -Computer Vision (ICCV) , Feb. 2015, pp. 1026–1034. [Online]. Available: -https://arxiv .org/abs/1502.01852 +Computer Vision (ICCV), Feb. 2015, pp. 1026–1034. [Online]. Available: +https://arxiv.org/abs/1502.01852 [Ima12] “Imagenet large scale visual recognition challenge 2012 (ILSVRC2012),” -2012. [Online]. Available: http://www .image-net .org/challenges/LSVRC/ +2012. [Online]. Available: http://www.image- net.org/challenges/LSVRC/ 2012/nonpub-downloads [IS15] S. Ioffe and C. Szegedy, “Batch normalization: Accelerating deep network -training by reducing internal covariate shift,”arXiv preprint arXiv:1502.03167, -Feb. 2015. [Online]. Available: https://arxiv . org/abs/1502.03167 +training by reducing internal covariate shift,” arXiv preprint arXiv:1502.03167, +Feb. 2015. [Online]. Available: https://arxiv.org/abs/1502.03167 [JXF+ - 16] X. Jin, C. Xu et al. , “Deep learning with s-shaped rectified linear activation -units,” in Thirtieth AAAI Conference on Artificial Intel ligence , Dec. 2016. -[Online]. Available: https://arxiv . org/abs/1512.07030 +16] X. Jin, C. Xu et al., “Deep learning with s-shaped rectified linear activation +units,” in Thirtieth AAAI Conference on Artificial Intel ligence, Dec. 2016. +[Online]. Available: https://arxiv.org/abs/1512.07030 [Kar11] A. Karpathy, “Lessons learned from manually classifying CIFAR-10,” Apr. -2011. [Online]. Available: http://karpathy .github .io/2011/04/27/manuallyclassifying-cifar10/ +2011. [Online]. Available: http://karpathy.github.io/2011/04/27/manuallyclassifying-cifar10/ [KB14] D. Kingma and J. Ba, “Adam: A method for stochastic optimization,” -arXiv preprint arXiv:1412.6980 , Dec. 2014. [Online]. Available: https: -//arxiv .org/abs/1412.6980 +arXiv preprint arXiv:1412.6980, Dec. 2014. [Online]. Available: https: +//arxiv.org/abs/1412.6980 [KH09] A. Krizhevsky and G. Hinton, “Learning multiple layers of features from tiny -images,” Apr. 2009. [Online]. Available: https://www . cs .toronto . edu/~kriz/ -learning-features- 2009- TR .pdf +images,” Apr. 2009. [Online]. Available: https://www.cs.toronto.edu/~kriz/ +learning-features- 2009- TR.pdf [KMN+ - 16] N. S. Keskar, D. Mudigere et al., “On large-batch training for deep learning: -Generalization gap and sharp minima,” arXiv preprint arXiv:1609.04836 , -Sep. 2016. [Online]. Available: https://arxiv . org/abs/1609. 04836 +16] N. S. Keskar, D. Mudigere et al., “On large-batch training for deep learning: +Generalization gap and sharp minima,” arXiv preprint arXiv:1609.04836, +Sep. 2016. [Online]. Available: https://arxiv.org/abs/1609.04836 [Koc15] T. Kocmánek, “ HyperNEAT and novelty search for image recognition,” Ph.D. dissertation, Master’s thesis, Czech Technical University in Prague, 2015. -[Online]. Available: http://kocmi .tk/photos/DiplomaThesis .pdf +[Online]. Available: http://kocmi.tk/photos/DiplomaThesis.pdf [KPY+ - 15] Y.-D. Kim, E. Park et al., “Compression of deep convolutional neural networks +15] Y.-D. Kim, E. Park et al., “Compression of deep convolutional neural networks for fast and low power mobile applications,” arXiv preprint arXiv:1511.06530, -Nov. 2015. [Online]. Available: https://arxiv .org/abs/1511.06530 +Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.06530 [KR09] L. Kaufman and P. J. Rousseeuw, Finding groups in data: an introduction to cluster analysis. John Wiley & Sons, 2009, vol. 344. [Kri] A. Krizhevsky, “The CIFAR-10 dataset.” [Online]. Available: https: -//www .cs.toronto . edu/~kriz/cifar .html +//www.cs.toronto.edu/~kriz/cifar.html [KS02] V. Kurkova and M. Sanguineti, “Comparison of worst case errors in linear and neural network approximation,” IEEE Transactions on Information -Theory , vol. 48, no. 1, pp. 264–275, Jan. 2002. [Online]. Available: -http://ieeexplore.ieee. org/abstract/document/971754/ +Theory, vol. 48, no. 1, pp. 264–275, Jan. 2002. [Online]. Available: +http://ieeexplore.ieee.org/abstract/document/971754/ [KSH12] A. Krizhevsky, I. Sutskever, and G. E. Hinton, “Imagenet classification with deep convolutional neural networks,” in Advances in Neural -Information Processing Systems 25 (NIPS) , F. Pereira, C. J. C. Burges -et al. , Eds. Curran Associates, Inc., 2012, pp. 1097–1105. [Online]. -Available: http://papers .nips .cc/paper/4824-imagenet-classification-withdeep-convolutional-neural- - networks .pdf +Information Processing Systems 25 (NIPS), F. Pereira, C. J. C. Burges +et al., Eds. Curran Associates, Inc., 2012, pp. 1097–1105. [Online]. +Available: http://papers.nips.cc/paper/4824-imagenet- classification- withdeep-convolutional-neural- + networks.pdf [KSlB+ - 10] K. Kavukcuoglu, P. Sermanet et al. , “Learning convolutional feature +10] K. Kavukcuoglu, P. Sermanet et al., “Learning convolutional feature hierarchies for visual recognition,” in Advances in Neural Information -Processing Systems 23 (NIPS) , J. D. Lafferty, C. K. I. Williams -et al. , Eds. Curran Associates, Inc., 2010, pp. 1090–1098. [Online]. -Available: http://papers .nips . cc/paper/4133-learning- convolutional-featurehierarchies- - for-visual- recognition .pdf -[LAE + - 16] W. Liu, D. Anguelov et al. , “ SSD: Single shot multibox detector,” in -European Conference on Computer Vision (ECCV) . Springer, 2016, pp. -21–37. [Online]. Available: https://arxiv .org/abs/1512.02325 -[Las17] “Noise layers,” Jan. 2017. [Online]. Available: http://lasagne. readthedocs.io/ -en/latest/modules/layers/noise .html#lasagne .layers.DropoutLayer +Processing Systems 23 (NIPS), J. D. Lafferty, C. K. I. Williams +et al., Eds. Curran Associates, Inc., 2010, pp. 1090–1098. [Online]. +Available: http://papers.nips.cc/paper/4133-learning- convolutional-featurehierarchies- + for-visual- recognition.pdf +[LAE+ +16] W. Liu, D. Anguelov et al., “ SSD: Single shot multibox detector,” in +European Conference on Computer Vision (ECCV). Springer, 2016, pp. +21–37. [Online]. Available: https://arxiv.org/abs/1512.02325 +[Las17] “Noise layers,” Jan. 2017. [Online]. Available: http://lasagne.readthedocs.io/ +en/latest/modules/layers/noise.html#lasagne.layers.DropoutLayer [LBBH98] - Y. LeCun, L. Bottou et al. , “Gradient-based learning applied to document -recognition,” Proceedings of the IEEE , vol. 86, no. 11, pp. 2278–2324, Nov. -1998. [Online]. Available: http://yann .lecun .com/exdb/publis/pdf/lecun01a.pdf + Y. LeCun, L. Bottou et al., “Gradient-based learning applied to document +recognition,” Proceedings of the IEEE, vol. 86, no. 11, pp. 2278–2324, Nov. +1998. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/lecun01a.pdf -[LBH15] Y. LeCun, Y. Bengio, and G. Hinton, “Deep learning,” Nature , +[LBH15] Y. LeCun, Y. Bengio, and G. Hinton, “Deep learning,” Nature, vol. 521, no. 7553, pp. 436–444, May 2015. [Online]. Available: -http://www .nature.com/nature/journal/v521/n7553/abs/nature14539 .html -[LBOM98] Y. A. LeCun, L. Bottou et al. , Efficient BackProp , ser. Lecture Notes in +http://www.nature.com/nature/journal/v521/n7553/abs/nature14539.html +[LBOM98] Y. A. LeCun, L. Bottou et al., Efficient BackProp, ser. Lecture Notes in Computer Science. Berlin, Heidelberg: Springer Berlin Heidelberg, 1998, vol. -1524, pp. 9–50. [Online]. Available: http://dx.doi.org/10.1007/3- 540- 49430- 8 +1524, pp. 9–50. [Online]. Available: http://dx.doi.org/10.1007/3-540-49430-8 [LDS+ - 89] Y. LeCun, J. S. Denker et al., “Optimal brain damage.” in NIPs, vol. 2, 1989, -pp. 598–605. [Online]. Available: http://yann .lecun . com/exdb/publis/pdf/ -lecun-90b . pdf -[Le13] - Q. V. Le, “Building high-level features using large scale unsupervised +89] Y. LeCun, J. S. Denker et al., “Optimal brain damage.” in NIPs, vol. 2, 1989, +pp. 598–605. [Online]. Available: http://yann.lecun.com/exdb/publis/pdf/ +lecun-90b.pdf +[Le13] Q. V. Le, “Building high-level features using large scale unsupervised learning,” in International conference on acoustics, speech and signal -processing . IEEE, 2013, pp. 8595–8598. [Online]. Available: http: -//ieeexplore.ieee. org/stamp/stamp.jsp?arnumber=6639343 +processing. IEEE, 2013, pp. 8595–8598. [Online]. Available: http: +//ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6639343 [LG16] A. Lavin and S. Gray, “Fast algorithms for convolutional neural networks,” in Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. -2016, pp. 4013–4021. [Online]. Available: https://arxiv .org/abs/1509.09308 +2016, pp. 4013–4021. [Online]. Available: https://arxiv.org/abs/1509.09308 [LGT16] C.-Y. Lee, P. W. Gallagher, and Z. Tu, “Generalizing pooling functions in convolutional neural networks: Mixed, gated, and tree,” in International -Conference on Artificial Intel ligence and Statistics , 2016. [Online]. Available: -https://arxiv . org/abs/1509. 08985v2 +Conference on Artificial Intel ligence and Statistics, 2016. [Online]. Available: +https://arxiv.org/abs/1509.08985v2 [LH16] I. Loshchilov and F. Hutter, “ SGDR: stochastic gradient descent -with warm restarts,” Learning , Aug. 2016. [Online]. Available: https: -//arxiv .org/abs/1608.03983 -[LJD + - 16] L. Li, K. Jamieson et al. , “Hyperband: A novel bandit-based approach to -hyperparameter optimization,” arXiv preprint arXiv:1603.06560 , Mar. 2016. -[Online]. Available: https://arxiv . org/abs/1603.06560 +with warm restarts,” Learning, Aug. 2016. [Online]. Available: https: +//arxiv.org/abs/1608.03983 +[LJD+ +16] L. Li, K. Jamieson et al., “Hyperband: A novel bandit-based approach to +hyperparameter optimization,” arXiv preprint arXiv:1603.06560, Mar. 2016. +[Online]. Available: https://arxiv.org/abs/1603.06560 [LM16] K. Li and J. Malik, “Learning to optimize,” arXiv preprint arXiv:1606.01885, -Jun. 2016. [Online]. Available: https://arxiv .org/abs/1606.01885 +Jun. 2016. [Online]. Available: https://arxiv.org/abs/1606.01885 [LSD15] J. Long, E. Shelhamer, and T. Darrell, “Fully convolutional networks for semantic segmentation,” in Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Mar. 2015, pp. 3431–3440. [Online]. Available: -https://arxiv . org/abs/1411. 4038v2 -[LX17] A. Y. Lingxi Xie, “Genetic CNN,” arXiv preprint arXiv:1703.01513 , Mar. -2017. [Online]. Available: https://arxiv . org/abs/1703. 01513 +https://arxiv.org/abs/1411.4038v2 +[LX17] A. Y. Lingxi Xie, “Genetic CNN,” arXiv preprint arXiv:1703.01513, Mar. +2017. [Online]. Available: https://arxiv.org/abs/1703.01513 [Ma j17] S. Ma jumdar, “Densenet,” GitHub, Feb. 2017. [Online]. Available: -https://github .com/titu1994/DenseNet +https://github.com/titu1994/DenseNet [Mar08] M. Marszałek, “ INRIA annotations for Graz-02 (IG02),” Oct. 2008. [Online]. -Available: http://lear .inrialpes .fr/people/marszalek/data/ig02/ +Available: http://lear.inrialpes.fr/people/marszalek/data/ig02/ [MDA15] D. Maclaurin, D. Duvenaud, and R. Adams, “Gradient-based hyperparameter optimization through reversible learning,” in International Conference on -Machine Learning (ICML) , 2015, pp. 2113–2122. +Machine Learning (ICML), 2015, pp. 2113–2122. [MH08] L. v. d. Maaten and G. Hinton, “Visualizing data using t-SNE,” Journal of -Machine Learning Research , vol. 9, no. Nov, pp. 2579–2605, 2008. +Machine Learning Research, vol. 9, no. Nov, pp. 2579–2605, 2008. [MHN13] A. L. Maas, A. Y. Hannun, and A. Y. Ng, “Rectifier nonlinearities -improve neural network acoustic models,” in Proc. ICML , vol. 30, -no. 1, 2013. [Online]. Available: https://web .stanford . edu/~awni/papers/ -relu_hybrid_icml2013_final. pdf +improve neural network acoustic models,” in Proc. ICML, vol. 30, +no. 1, 2013. [Online]. Available: https://web.stanford.edu/~awni/papers/ +relu_hybrid_icml2013_final.pdf [MM15] D. Mishkin and J. Matas, “All you need is a good init,” arXiv -preprint arXiv:1511.06422 , Nov. 2015. [Online]. Available: https: -//arxiv .org/abs/1511. 06422 +preprint arXiv:1511.06422, Nov. 2015. [Online]. Available: https: +//arxiv.org/abs/1511.06422 [MP43] W. S. McCulloch and W. Pitts, “A logical calculus of the ideas immanent in -nervous activity,” The bul letin of mathematical biophysics , vol. 5, no. 4, pp. +nervous activity,” The bul letin of mathematical biophysics, vol. 5, no. 4, pp. 115–133, 1943. [MRM15] N. McLaughlin, J. M. D. Rincon, and P. Miller, “Data-augmentation for -reducing dataset bias in person re-identification,” inInternational Conference +reducing dataset bias in person re-identification,” in International Conference on Advanced Video and Signal Based Surveil lance (AVSS), no. 12, Aug. 2015, -pp. 1–6. [Online]. Available: http://ieeexplore .ieee . org/abstract/document/ +pp. 1–6. [Online]. Available: http://ieeexplore.ieee.org/abstract/document/ 7301739/ [MS07] M. Marszalek and C. Schmid, “Accurate ob ject localization with shape masks,” in Conference on Computer Vision and Pattern -Recognition (CVPR) . IEEE, 2007, pp. 1–8. [Online]. Available: http: -//ieeexplore.ieee. org/document/4270110/ +Recognition (CVPR). IEEE, 2007, pp. 1–8. [Online]. Available: http: +//ieeexplore.ieee.org/document/4270110/ [MSM16] D. Mishkin, N. Sergievskiy, and J. Matas, “Systematic evaluation of CNN -advances on the ImageNet,” arXiv preprint arXiv:1606.02228 , Jun. 2016. -[Online]. Available: https://arxiv .org/abs/1606.02228 +advances on the ImageNet,” arXiv preprint arXiv:1606.02228, Jun. 2016. +[Online]. Available: https://arxiv.org/abs/1606.02228 [MV16] A. Mahendran and A. Vedaldi, “Visualizing deep convolutional neural networks using natural pre-images,” International Journal of Computer Vision, -pp. 1–23, Apr. 2016. [Online]. Available: https://arxiv .org/abs/1512.02017 -[NDRT13] N. Natara jan, I. S. Dhillon et al. , “Learning with noisy labels,” in Advances -in Neural Information Processing Systems 26 (NIPS) , C. J. C. Burges, +pp. 1–23, Apr. 2016. [Online]. Available: https://arxiv.org/abs/1512.02017 +[NDRT13] N. Natara jan, I. S. Dhillon et al., “Learning with noisy labels,” in Advances +in Neural Information Processing Systems 26 (NIPS), C. J. C. Burges, L. Bottou et al., Eds. Curran Associates, Inc., 2013, pp. 1196–1204. [Online]. -Available: http://papers. nips.cc/paper/5073- learning- with- noisy- labels .pdf +Available: http://papers.nips.cc/paper/5073- learning- with- noisy- labels.pdf [Nes83] Y. Nesterov, “A method of solving a convex programming problem with -convergence rate o (1/k2),” in Soviet Mathematics Doklady , vol. 27, no. 2, +convergence rate o (1/k2),” in Soviet Mathematics Doklady, vol. 27, no. 2, 1983, pp. 372–376. [new00] “The training performed by qnstrn,” Aug. 2000. [Online]. Available: -http://www1 .icsi.berkeley .edu/Speech/faq/nn- train.html +http://www1.icsi.berkeley.edu/Speech/faq/nn- train.html [Ng16] A. Ng, “Nuts and bolts of building ai applications using deep learning,” NIPS Talk, Dec. 2016. [NH92] S. J. Nowlan and G. E. Hinton, “Simplifying neural networks by soft -weight-sharing,” Neural computation , vol. 4, no. 4, pp. 473–493, 1992. -[Online]. Available: https://www. cs.toronto.edu/~hinton/absps/sunspots.pdf +weight-sharing,” Neural computation, vol. 4, no. 4, pp. 473–493, 1992. +[Online]. Available: https://www.cs.toronto.edu/~hinton/absps/sunspots.pdf [NH02] R. T. Ng and J. Han, “ CLARANS: A method for clustering ob jects for spatial data mining,” IEEE transactions on know ledge and data engineering, vol. 14, no. 5, pp. 1003–1016, 2002. -[NWC + - 11a] - Y. Netzer, T. Wang et al. , “Reading digits in natural images with +[NWC+ +11a] + Y. Netzer, T. Wang et al., “Reading digits in natural images with unsupervised feature learning,” in NIPS workshop on deep learning and unsupervised feature learning, vol. 2011, no. 2, 2011, p. 5. [Online]. Available: -http://ufldl. stanford.edu/housenumbers/nips2011_housenumbers.pdf -[NWC + - 11b] Y. Netzer, T. Wang et al., “The street view house numbers (SVHN) dataset,” -2011. [Online]. Available: http://ufldl. stanford.edu/housenumbers/ +http://ufldl.stanford.edu/housenumbers/nips2011_housenumbers.pdf +[NWC+ +11b] Y. Netzer, T. Wang et al., “The street view house numbers (SVHN) dataset,” +2011. [Online]. Available: http://ufldl.stanford.edu/housenumbers/ [NYC16] A. Nguyen, J. Yosinski, and J. Clune, “Multifaceted feature visualization: Uncovering the different types of features learned by each neuron in deep -neural networks,” arXiv preprint arXiv:1602.03616 , May 2016. [Online]. -Available: https://arxiv .org/abs/1602.03616 +neural networks,” arXiv preprint arXiv:1602.03616, May 2016. [Online]. +Available: https://arxiv.org/abs/1602.03616 [OHIL16] J. Ortigosa-Hernández, I. Inza, and J. A. Lozano, “Towards competitive classifiers for unbalanced classification problems: A study on the performance -scores,” arXiv preprint arXiv:1608.08984 , Aug. 2016. [Online]. Available: -https://arxiv . org/abs/1608. 08984 +scores,” arXiv preprint arXiv:1608.08984, Aug. 2016. [Online]. Available: +https://arxiv.org/abs/1608.08984 [PMW+ - 15] N. Papernot, P. McDaniel et al. , “Distillation as a defense to adversarial -perturbations against deep neural networks,”arXiv preprint arXiv:1511.04508, -Nov. 2015. [Online]. Available: https://arxiv .org/abs/1511.04508 +15] N. Papernot, P. McDaniel et al., “Distillation as a defense to adversarial +perturbations against deep neural networks,” arXiv preprint arXiv:1511.04508, +Nov. 2015. [Online]. Available: https://arxiv.org/abs/1511.04508 [Pre98] L. Prechelt, Early Stopping - But When? Berlin, Heidelberg: Springer -Berlin Heidelberg, 1998, pp. 55–69. [Online]. Available: http://dx . doi . org/ -10. 1007/3-540- 49430-8_3 +Berlin Heidelberg, 1998, pp. 55–69. [Online]. Available: http://dx.doi.org/ +10.1007/3-540- 49430-8_3 [RDS+ - 14] O. Russakovsky, J. Deng et al. , “Imagenet large scale visual recognition +14] O. Russakovsky, J. Deng et al., “Imagenet large scale visual recognition challenge,” arXiv preprint arXiv:1409.0575, vol. 115, no. 3, pp. 211–252, Sep. -2014. [Online]. Available: https://arxiv . org/abs/1409. 0575 +2014. [Online]. Available: https://arxiv.org/abs/1409.0575 [RFB15] O. Ronneberger, P. Fischer, and T. Brox, “U-net: Convolutional networks for biomedical image segmentation,” in International Conference on Medical -Image Computing and Computer-Assisted Intervention . Springer, 2015, pp. -234–241. [Online]. Available: https://arxiv .org/abs/1505. 04597 +Image Computing and Computer-Assisted Intervention. Springer, 2015, pp. +234–241. [Online]. Available: https://arxiv.org/abs/1505.04597 [RLS10] S. Risi, J. Lehman, and K. O. Stanley, “Evolving the placement and density of neurons in the hyperneat substrate,” in Conference on Genetic and -evolutionary computation , no. 12. ACM, 2010, pp. 563–570. +evolutionary computation, no. 12. ACM, 2010, pp. 563–570. [RSG16] M. T. Ribeiro, S. Singh, and C. Guestrin, “"why should i trust you?": Explaining the predictions of any classifier,” arXiv preprint arXiv:1602.04938, -Feb. 2016. [Online]. Available: https://arxiv . org/abs/1602.04938 +Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.04938 [Rud16] S. Ruder, “An overview of gradient descent optimization algorithms,” -arXiv preprint arXiv:1609.04747 , Sep. 2016. [Online]. Available: https: -//arxiv .org/abs/1609. 04747 +arXiv preprint arXiv:1609.04747, Sep. 2016. [Online]. Available: https: +//arxiv.org/abs/1609.04747 [SCL12] P. Sermanet, S. Chintala, and Y. LeCun, “Convolutional neural networks applied to house numbers digit classification,” in International Conference -on Pattern Recognition (ICPR) , no. 21. IEEE, Apr. 2012, pp. 3288–3291. -[Online]. Available: https://arxiv .org/abs/1204.3968 +on Pattern Recognition (ICPR), no. 21. IEEE, Apr. 2012, pp. 3288–3291. +[Online]. Available: https://arxiv.org/abs/1204.3968 [SDG09] K. O. Stanley, D. B. D’Ambrosio, and J. Gauci, “A hypercube-based encoding for evolving large-scale neural networks,” Artificial life, vol. 15, no. 2, pp. 185– -212, 2009. [Online]. Available: http://ieeexplore. ieee.org/document/6792316/ +212, 2009. [Online]. Available: http://ieeexplore.ieee.org/document/6792316/ [SEZ+ - 13] P. Sermanet, D. Eigen et al. , “Overfeat: Integrated recognition, localization +13] P. Sermanet, D. Eigen et al., “Overfeat: Integrated recognition, localization and detection using convolutional networks,” arXiv preprint arXiv:1312.6229, -Feb. 2013. [Online]. Available: https://arxiv .org/abs/1312.6229v4 +Feb. 2013. [Online]. Available: https://arxiv.org/abs/1312.6229v4 [SHK+ - 14] N. Srivastava, G. E. Hinton et al. , “Dropout: a simple way to +14] N. Srivastava, G. E. Hinton et al., “Dropout: a simple way to prevent neural networks from overfitting.” Journal of Machine Learning -Research , vol. 15, no. 1, pp. 1929–1958, 2014. [Online]. Available: -https://www .cs. toronto .edu/~hinton/absps/JMLRdropout .pdf +Research, vol. 15, no. 1, pp. 1929–1958, 2014. [Online]. Available: +https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf [SHY+ - 13] A. Senior, G. Heigold et al. , “An empirical study of learning rates in deep +13] A. Senior, G. Heigold et al., “An empirical study of learning rates in deep neural networks for speech recognition,” in International Conference on Acoustics, Speech and Signal Processing. IEEE, 2013, pp. 6724–6728. [Online]. -Available: http://ieeexplore. ieee.org/document/6638963/?arnumber=6638963 +Available: http://ieeexplore.ieee.org/document/6638963/?arnumber=6638963 [SIV16] C. Szegedy, S. Ioffe, and V. Vanhoucke, “Inception-v4, inception-resnet and the impact of residual connections on learning,” arXiv preprint arXiv:1602.07261, -Feb. 2016. [Online]. Available: https://arxiv .org/abs/1602.07261 +Feb. 2016. [Online]. Available: https://arxiv.org/abs/1602.07261 [SKP15] F. Schroff, D. Kalenichenko, and J. Philbin, “Facenet: A unified embedding for face recognition and clustering,” in Conference on Computer Vision -and Pattern Recognition (CVPR) . IEEE, Mar. 2015, pp. 815–823. [Online]. -Available: https://arxiv .org/abs/1503. 03832 +and Pattern Recognition (CVPR). IEEE, Mar. 2015, pp. 815–823. [Online]. +Available: https://arxiv.org/abs/1503.03832 [SL11] P. Sermanet and Y. LeCun, “Traffic sign recognition with multi-scale convolutional networks,” in International Joint Conference on Neural -Networks (IJCNN) , Jul. 2011, pp. 2809–2813. [Online]. Available: -http://ieeexplore. ieee.org/document/6033589/ +Networks (IJCNN), Jul. 2011, pp. 2809–2813. [Online]. Available: +http://ieeexplore.ieee.org/document/6033589/ [SLJ+ - 15] C. Szegedy, W. Liu et al. , “Going deeper with convolutions,” in Conference +15] C. Szegedy, W. Liu et al., “Going deeper with convolutions,” in Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Sep. 2015, pp. -1–9. [Online]. Available: https://arxiv .org/abs/1409. 4842 -[SM02] - K. O. Stanley and R. Miikkulainen, “Evolving neural networks through +1–9. [Online]. Available: https://arxiv.org/abs/1409.4842 +[SM02] K. O. Stanley and R. Miikkulainen, “Evolving neural networks through augmenting topologies,” Evolutionary computation, vol. 10, no. 2, pp. 99–127, 2002. [Online]. Available: http://www.mitpressjournals.org/doi/abs/10.1162/ 106365602320169811 [SMG13] A. M. Saxe, J. L. McClelland, and S. Ganguli, “Exact solutions to the nonlinear dynamics of learning in deep linear neural networks,” -arXiv preprint arXiv:1312.6120 , Dec. 2013. [Online]. Available: https: -//arxiv .org/abs/1312.6120 -[SMGS14] R. K. Srivastava, J. Masci et al. , “Understanding locally competitive -networks,” arXiv preprint arXiv:1410.1165 , Oct. 2014. [Online]. Available: -https://arxiv . org/abs/1410. 1165 -[SSSI] J. Stallkamp, M. Schlipsing et al. , “The german traffic sign recognition -benchmark.” [Online]. Available: http://benchmark .ini . rub .de/?section= +arXiv preprint arXiv:1312.6120, Dec. 2013. [Online]. Available: https: +//arxiv.org/abs/1312.6120 +[SMGS14] R. K. Srivastava, J. Masci et al., “Understanding locally competitive +networks,” arXiv preprint arXiv:1410.1165, Oct. 2014. [Online]. Available: +https://arxiv.org/abs/1410.1165 +[SSSI] J. Stallkamp, M. Schlipsing et al., “The german traffic sign recognition +benchmark.” [Online]. Available: http://benchmark.ini.rub.de/?section= gtsrb&subsection=news -[SSSI12] J. Stallkamp, M. Schlipsing et al. , “Man vs. computer: Benchmarking -machine learning algorithms for traffic sign recognition,” Neural Networks , +[SSSI12] J. Stallkamp, M. Schlipsing et al., “Man vs. computer: Benchmarking +machine learning algorithms for traffic sign recognition,” Neural Networks, no. 0, pp. –, 2012. [Online]. Available: http://www.sciencedirect.com/science/ article/pii/S0893608012000457 [SV16] S. Saxena and J. Verbeek, “Convolutional neural fabrics,” arXiv preprint -arXiv:1606.02492, 2016. [Online]. Available: https://arxiv.org/abs/1606. 02492 +arXiv:1606.02492, 2016. [Online]. Available: https://arxiv.org/abs/1606.02492 [SVI+ - 15] C. Szegedy, V. Vanhoucke et al. , “Rethinking the inception architecture -for computer vision,” arXiv preprint arXiv:1512.00567 , Dec. 2015. [Online]. -Available: https://arxiv .org/abs/1512.00567v3 +15] C. Szegedy, V. Vanhoucke et al., “Rethinking the inception architecture +for computer vision,” arXiv preprint arXiv:1512.00567, Dec. 2015. [Online]. +Available: https://arxiv.org/abs/1512.00567v3 [SVZ13] K. Simonyan, A. Vedaldi, and A. Zisserman, “Deep inside convolutional networks: Visualising image classification models and saliency maps,” -arXiv preprint arXiv:1312.6034 , Dec. 2013. [Online]. Available: https: -//arxiv .org/abs/1312.6034 +arXiv preprint arXiv:1312.6034, Dec. 2013. [Online]. Available: https: +//arxiv.org/abs/1312.6034 [SZ14] K. Simonyan and A. Zisserman, “Very deep convolutional networks for -large-scale image recognition,” arXiv preprint arXiv:1409.1556 , Sep. 2014. -[Online]. Available: https://arxiv . org/abs/1409.1556 +large-scale image recognition,” arXiv preprint arXiv:1409.1556, Sep. 2014. +[Online]. Available: https://arxiv.org/abs/1409.1556 [SZS+ - 13] C. Szegedy, W. Zaremba et al. , “Intriguing properties of neural -networks,” arXiv preprint arXiv:1312.6199 , Dec. 2013. [Online]. Available: -https://arxiv . org/abs/1312. 6199v4 +13] C. Szegedy, W. Zaremba et al., “Intriguing properties of neural +networks,” arXiv preprint arXiv:1312.6199, Dec. 2013. [Online]. Available: +https://arxiv.org/abs/1312.6199v4 [TF-16a] “ MNIST for ML beginners,” Dec. 2016. [Online]. Available: https: -//www .tensorflow .org/tutorials/mnist/beginners/ +//www.tensorflow.org/tutorials/mnist/beginners/ -[tf-16b] “tf.nn.dropout,” Dec. 2016. [Online]. Available: https://www.tensorflow .org/ +[tf-16b] “tf.nn.dropout,” Dec. 2016. [Online]. Available: https://www.tensorflow.org/ api_docs/python/nn/activation_functions_#dropout [TH12] T. Tieleman and G. Hinton, “Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude,” COURSERA: Neural -Networks for Machine Learning , vol. 4, no. 2, 2012. [Online]. Available: -http://www .cs.toronto . edu/~tijmen/csc321/slides/lecture_slides_lec6 .pdf +Networks for Machine Learning, vol. 4, no. 2, 2012. [Online]. Available: +http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf [Tho14a] M. Thoma, “On-line recognition of handwritten mathematical symbols,” Karlsruhe, Germany, Nov. 2014. [Online]. Available: http://martinthoma.com/write-math [Tho14b] M. Thoma, “The Twiddle algorithm,” Sep. 2014. [Online]. Available: -https://martin- thoma .com/twiddle/ +https://martin- thoma.com/twiddle/ [Tho16] M. Thoma, “A survey of semantic segmentation,” arXiv preprint -arXiv:1602.06541 , Feb. 2016. [Online]. Available: https://arxiv . org/abs/ -1602. 06541 -[Tho17a] M. Thoma, “The HASYv2 dataset,” arXiv preprint arXiv:1701.08380 , Jan. -2017. [Online]. Available: https://arxiv .org/abs/1701.08380 +arXiv:1602.06541, Feb. 2016. [Online]. Available: https://arxiv.org/abs/ +1602.06541 +[Tho17a] M. Thoma, “The HASYv2 dataset,” arXiv preprint arXiv:1701.08380, Jan. +2017. [Online]. Available: https://arxiv.org/abs/1701.08380 [Tho17b] M. Thoma, “Master thesis (blog post),” Apr. 2017. [Online]. Available: -https://martin- thoma .com/msthesis +https://martin- thoma.com/msthesis [VH13] P. Verbancsics and J. Harguess, “Generative neuroevolution for deep -learning,” arXiv preprint arXiv:1312.5355 , Dec. 2013. [Online]. Available: -https://arxiv .org/abs/1312.5355 -[vLA87] P. J. M. van Laarhoven and E. H. L. Aarts, Simulated annealing . +learning,” arXiv preprint arXiv:1312.5355, Dec. 2013. [Online]. Available: +https://arxiv.org/abs/1312.5355 +[vLA87] P. J. M. van Laarhoven and E. H. L. Aarts, Simulated annealing. Dordrecht: Springer Netherlands, 1987, pp. 7–15. [Online]. Available: -http://dx . doi.org/10. 1007/978-94- 015-7744- 1_2 -[VTKP17] E. Vorontsov, C. Trabelsi et al. , “On orthogonality and learning recurrent -networks with long term dependencies,” arXiv preprint arXiv:1702.00071 , -Jan. 2017. [Online]. Available: https://arxiv .org/abs/1702.00071 -[WHH + - 89] A. Waibel, T. Hanazawa et al. , “Phoneme recognition using time-delay +http://dx.doi.org/10.1007/978-94- 015-7744- 1_2 +[VTKP17] E. Vorontsov, C. Trabelsi et al., “On orthogonality and learning recurrent +networks with long term dependencies,” arXiv preprint arXiv:1702.00071, +Jan. 2017. [Online]. Available: https://arxiv.org/abs/1702.00071 +[WHH+ +89] A. Waibel, T. Hanazawa et al., “Phoneme recognition using time-delay neural networks,” IEEE transactions on acoustics, speech, and signal -processing , vol. 37, no. 3, pp. 328–339, Aug. 1989. [Online]. Available: -http://ieeexplore. ieee.org/document/21701/ +processing, vol. 37, no. 3, pp. 328–339, Aug. 1989. [Online]. Available: +http://ieeexplore.ieee.org/document/21701/ [Wil92] R. J. Williams, “Simple statistical gradient-following algorithms for connectionist reinforcement learning,” Machine learning, vol. 8, no. 3-4, pp. 229–256, 1992. [WWQ13] X. Wang, L. Wang, and Y. Qiao, A Comparative Study of Encoding, Pooling -and Normalization Methods for Action Recognition . Berlin, Heidelberg: +and Normalization Methods for Action Recognition. Berlin, Heidelberg: Springer Berlin Heidelberg, Nov. 2013, no. 11, pp. 572–585. [Online]. -Available: http://dx .doi.org/10.1007/978-3- 642-37431- 9_44 +Available: http://dx.doi.org/10.1007/978-3- 642-37431- 9_44 [WYS+ - 15] - R. Wu, S. Yan et al. , “Deep image: Scaling up image recognition,” arXiv -preprint arXiv:1501.02876 , vol. 7, no. 8, Jul. 2015. [Online]. Available: -https://arxiv . org/abs/1501. 02876v4 +15] + R. Wu, S. Yan et al., “Deep image: Scaling up image recognition,” arXiv +preprint arXiv:1501.02876, vol. 7, no. 8, Jul. 2015. [Online]. Available: +https://arxiv.org/abs/1501.02876v4 [WZZ+ - 13] L. Wan, M. Zeiler et al., “Regularization of neural networks using dropconnect,” -in International Conference on Machine Learning (ICML) , no. 30, 2013, -pp. 1058–1066. [Online]. Available: http://www .matthewzeiler .com/pubs/ -icml2013/icml2013 .pdf -[XGD + - 16] S. Xie, R. Girshick et al. , “Aggregated residual transformations for deep -neural networks,” arXiv preprint arXiv:1611.05431 , Nov. 2016. [Online]. -Available: https://arxiv .org/abs/1611.05431v1 +13] L. Wan, M. Zeiler et al., “Regularization of neural networks using dropconnect,” +in International Conference on Machine Learning (ICML), no. 30, 2013, +pp. 1058–1066. [Online]. Available: http://www.matthewzeiler.com/pubs/ +icml2013/icml2013.pdf +[XGD+ +16] S. Xie, R. Girshick et al., “Aggregated residual transformations for deep +neural networks,” arXiv preprint arXiv:1611.05431, Nov. 2016. [Online]. +Available: https://arxiv.org/abs/1611.05431v1 [Xu11] W. Xu, “Towards optimal one pass large scale learning with averaged -stochastic gradient descent,” arXiv preprint arXiv:1107.2490 , Jul. 2011. -[Online]. Available: https://arxiv . org/abs/1107.2490 -[XWCL15] B. Xu, N. Wang et al. , “Empirical evaluation of rectified activations in +stochastic gradient descent,” arXiv preprint arXiv:1107.2490, Jul. 2011. +[Online]. Available: https://arxiv.org/abs/1107.2490 +[XWCL15] B. Xu, N. Wang et al., “Empirical evaluation of rectified activations in convolutional network,” arXiv preprint arXiv:1505.00853, May 2015. [Online]. -Available: https://arxiv .org/abs/1505.00853 +Available: https://arxiv.org/abs/1505.00853 [XXE12] H. Xiao, H. Xiao, and C. Eckert, “Adversarial label flips attack on -support vector machines.” in ECAI , 2012, pp. 870–875. [Online]. Available: -https://www .sec.in.tum.de/assets/Uploads/ecai2 . pdf +support vector machines.” in ECAI, 2012, pp. 870–875. [Online]. Available: +https://www.sec.in.tum.de/assets/Uploads/ecai2.pdf [XZY+ - 14] T. Xiao, J. Zhang et al., “Error-driven incremental learning in deep convolutional +14] T. Xiao, J. Zhang et al., “Error-driven incremental learning in deep convolutional neural network for large-scale image classification,” in International Conference on Multimedia, no. 22. ACM, 2014, pp. 177–186. [YL98] C. J. B. Yann LeCun, Corinna Cortes, “The MNIST database of handwritten -digits,” 1998. [Online]. Available: http://yann.lecun. com/exdb/mnist/ +digits,” 1998. [Online]. Available: http://yann.lecun.com/exdb/mnist/ [ZBH+ - 16] C. Zhang, S. Bengio et al., “Understanding deep learning requires rethinking -generalization,” arXiv preprint arXiv:1611.03530 , Nov. 2016. [Online]. -Available: https://arxiv .org/abs/1611.03530 -[ZCZL16] S. Zhai, Y. Cheng et al. , “Doubly convolutional neural networks,” in -Advances in Neural Information Processing Systems 29 (NIPS) , D. D. Lee, +16] C. Zhang, S. Bengio et al., “Understanding deep learning requires rethinking +generalization,” arXiv preprint arXiv:1611.03530, Nov. 2016. [Online]. +Available: https://arxiv.org/abs/1611.03530 +[ZCZL16] S. Zhai, Y. Cheng et al., “Doubly convolutional neural networks,” in +Advances in Neural Information Processing Systems 29 (NIPS), D. D. Lee, M. Sugiyama et al., Eds. Curran Associates, Inc., Oct. 2016, pp. 1082–1090. [Online]. Available: http://papers.nips.cc/paper/6340- doubly-convolutionalneural-networks.pdf -[ZDGD14] N. Zhang, J. Donahue et al. , “Part-based R-CNNs for fine-grained category +[ZDGD14] N. Zhang, J. Donahue et al., “Part-based R-CNNs for fine-grained category detection,” in European Conference on Computer Vision (ECCV). Springer, -Jul. 2014, pp. 834–849. [Online]. Available: https://arxiv.org/abs/1407. 3867 +Jul. 2014, pp. 834–849. [Online]. Available: https://arxiv.org/abs/1407.3867 [Zei12] M. D. Zeiler, “Adadelta: an adaptive learning rate method,” arXiv preprint -arXiv:1212.5701 , Dec. 2012. [Online]. Available: https://arxiv . org/abs/ -1212. 5701v1 +arXiv:1212.5701, Dec. 2012. [Online]. Available: https://arxiv.org/abs/ +1212.5701v1 [ZF13] M. D. Zeiler and R. Fergus, “Stochastic pooling for regularization of deep -convolutional neural networks,” arXiv preprint arXiv:1301.3557 , Jan. 2013. -[Online]. Available: https://arxiv .org/abs/1301.3557v1 +convolutional neural networks,” arXiv preprint arXiv:1301.3557, Jan. 2013. +[Online]. Available: https://arxiv.org/abs/1301.3557v1 [ZF14] M. D. Zeiler and R. Fergus, “Visualizing and understanding convolutional networks,” in European Conference on Computer Vision (ECCV). Springer, -Nov. 2014, pp. 818–833. [Online]. Available: https://arxiv.org/abs/1311. 2901 +Nov. 2014, pp. 818–833. [Online]. Available: https://arxiv.org/abs/1311.2901 [Zho16] B. Zhou, “Places2 download,” 2016. [Online]. Available: http:// -places2.csail. mit. edu/download.html +places2.csail.mit.edu/download.html [ZK16] S. Zagoruyko and N. Komodakis, “Wide residual networks,” arXiv -preprint arXiv:1605.07146 , May 2016. [Online]. Available: https: -//arxiv .org/abs/1605. 07146 +preprint arXiv:1605.07146, May 2016. [Online]. Available: https: +//arxiv.org/abs/1605.07146 [ZKL+ - 15] B. Zhou, A. Khosla et al. , “Learning deep features for discriminative +15] B. Zhou, A. Khosla et al., “Learning deep features for discriminative localization,” arXiv preprint arXiv:1512.04150, Dec. 2015. [Online]. Available: -https://arxiv .org/abs/1512.04150 +https://arxiv.org/abs/1512.04150 [ZKL+ - 16] B. Zhou, A. Khosla et al. , “Places: An image database for deep scene -understanding,” arXiv preprint arXiv:1610.02055 , Oct. 2016. [Online]. -Available: https://arxiv .org/abs/1610. 02055 +16] B. Zhou, A. Khosla et al., “Places: An image database for deep scene +understanding,” arXiv preprint arXiv:1610.02055, Oct. 2016. [Online]. +Available: https://arxiv.org/abs/1610.02055 [ZL16] B. Zoph and Q. V. Le, “Neural architecture search with reinforcement -learning,” arXiv preprint arXiv:1611.01578 , Nov. 2016. [Online]. Available: -https://arxiv .org/abs/1611.01578 -[ZMGL15] J. Zhao, M. Mathieu et al. , “Stacked what-where auto-encoders,” -arXiv preprint arXiv:1506.02351 , Jun. 2015. [Online]. Available: https: -//arxiv .org/abs/1506. 02351v1 -[ZYL + - 15] - H. Zheng, Z. Yang et al. , “Improving deep neural networks using softplus -units,” in International Joint Conference on Neural Networks (IJCNN) , Jul. +learning,” arXiv preprint arXiv:1611.01578, Nov. 2016. [Online]. Available: +https://arxiv.org/abs/1611.01578 +[ZMGL15] J. Zhao, M. Mathieu et al., “Stacked what-where auto-encoders,” +arXiv preprint arXiv:1506.02351, Jun. 2015. [Online]. Available: https: +//arxiv.org/abs/1506.02351v1 +[ZYL+ +15] H. Zheng, Z. Yang et al., “Improving deep neural networks using softplus +units,” in International Joint Conference on Neural Networks (IJCNN), Jul. 2015, pp. 1–4. I. Glossary diff --git a/read/results/playa/2201.00021.txt b/read/results/playa/2201.00021.txt index cfa743b..ef562e7 100644 --- a/read/results/playa/2201.00021.txt +++ b/read/results/playa/2201.00021.txt @@ -1,19 +1,19 @@ -Astronomy & Astrophysics manuscript no. mainArxiv © ESO 2022 +Astronomy & Astrophysics manuscript no. mainArxiv ©ESO 2022 April 12, 2022 Discovery of ammonia (9,6) masers in two high-mass star-forming regions -Y. T. Yan (闫耀庭) 1, - , C. Henkel1, 2, 3 - , K. M. Menten 1 - , Y. Gong (龚龑) 1 - , J. Ott4 - , T. L. Wilson1 - , A. Wootten4 - , A. +Y. T. Yan (闫耀庭)1, +, C. Henkel1, 2, 3 +, K. M. Menten1 +, Y. Gong (龚龑)1 +, J. Ott4 +, T. L. Wilson1 +, A. Wootten4 +, A. Brunthaler1 - , J. S. Zhang (张江水 )5 - , J. L. Chen ( 陈家梁) 5 - , and K. Yang ( 杨楷) 6, 7 +, J. S. Zhang (张江水)5 +, J. L. Chen (陈家梁)5 +, and K. Yang (杨楷)6, 7 1 Max-Planck-Institut für Radioastronomie, Auf dem Hügel 69, 53121 Bonn, Germany e-mail: yyan@mpifr-bonn.mpg.de @@ -31,23 +31,23 @@ e-mail: yyan@mpifr-bonn.mpg.de Key Laboratory of Modern Astronomy and Astrophysics (Nanjing University), Ministry of Education, Nanjing 210023, People’s Republic of China Received 13 December 2021 / Accepted 30 December 2021 - ABSTRACT +ABSTRACT Context. Molecular maser lines are signposts of high-mass star formation, probing the excitation and kinematics of very compact regions in the close environment of young stellar objects and providing useful targets for trigonometric parallax measurements. Aims. Only a few NH - 3 (9,6) masers are known so far, and their origin is still poorly understood. Here we aim to find new NH - 3 (9,6) +3 (9,6) masers are known so far, and their origin is still poorly understood. Here we aim to find new NH +3 (9,6) masers to provide a better observational basis for studying their role in high-mass star-forming regions. Methods. We carried out NH -3 (9,6) observations toward Cepheus A and G34.26+ 0.15 with the Eff elsberg 100-meter telescope (beam +3 (9,6) observations toward Cepheus A and G34.26+0.15 with the Effelsberg 100-meter telescope (beam size 49 - ) and the Karl G. Jansky Very Large Array (JVLA; beam size about 1 +) and the Karl G. Jansky Very Large Array (JVLA; beam size about 1 . 2). Results. We discovered new NH 3 (9,6) masers in Cep A and G34.26+0.15, which increases the number of known high-mass starforming regions hosting NH - 3 (9,6) masers from five to seven. Long-term monitoring (20 months) at Eff elsberg shows that the intensity -of the (9,6) maser in G34.26+0.15 is decreasing, while the Cep A maser remains stable. Compared to the E ff elsberg data and assuming +3 (9,6) masers from five to seven. Long-term monitoring (20 months) at Effelsberg shows that the intensity +of the (9,6) maser in G34.26+0.15 is decreasing, while the Cep A maser remains stable. Compared to the Effelsberg data and assuming linear variations between the epochs of observation, the JVLA data indicate no missing flux. This suggests that the NH 3 (9,6) emission arises from single compact emission regions that are not resolved by the interferometric measurements. As JVLA imaging shows, the @@ -65,32 +65,32 @@ Key words. Masers – ISM: clouds – ISM: individual objects: Cep A, G34.26+0.1 1. Introduction Since its discovery more than five decades ago (Cheung et al. 1968), ammonia (NH -3 ) has been a most valuable molecule for +3) has been a most valuable molecule for investigating the physical properties of molecular clouds (e.g., Ho & Townes 1983). While thermally excited transitions in the centimeter-wavelength inversion transitions of ammonia are regarded as a reliable thermometer of molecular clouds (e.g., Walmsley & Ungerechts 1983; Danby et al. 1988), ammonia masers have attracted attention since the first detection of maser -action in the ( J, K ) = (3,3) metastable ( J = K ) line toward the +action in the ( J, K ) = (3,3) metastable (J = K ) line toward the massive star-forming region W33 (Wilson et al. 1982). Subsequent observations have led to the detection of new metastable ammonia masers, including 15 - NH - 3 (3,3) (Mauersberger et al. +NH +3 (3,3) (Mauersberger et al. 1986), NH 3 (1,1) (Gaume et al. 1996), NH - 3 (2,2) (Mills et al. +3 (2,2) (Mills et al. 2018), NH 3 (5,5) (Cesaroni et al. 1992), NH 3 (6,6) (Beuther Member of the International Max Planck Research School (IMPRS) for Astronomy and Astrophysics at the universities of Bonn and -Cologne. et al. 2007), NH +Cologne. et al. 2007), NH 3 (7,7), NH 3 (9,9), and NH - 3 (12,12) (Henkel +3 (12,12) (Henkel et al. 2013). These have led to the discovery of metastable maser lines in 22 different regions (Mauersberger et al. 1986, 1987; Wilson & Henkel 1988; Wilson et al. 1990; Pratap et al. 1991; @@ -111,7 +111,7 @@ been identified as masers, including the (5,3), (5,4), (6,1), (6,2), (6,4), (6,5), (7,3), (7,4), (7,5) (7,6), (8,3), (8,4), (8,5), (8,6), (9,3), (9,4), (9,5), (9,7), (9,8), (10,7), (10,8), (10,9), and (11,9) transiArticle number, page 1 of 10arXiv:2201.00021v3 [astro-ph.GA] 9 Apr 2022 -A & A proofs: manuscript no. mainArxiv +A&A proofs: manuscript no. mainArxiv tions (e.g., Mauersberger et al. 1987, 1988; Walsh et al. 2007; Henkel et al. 2013; Mei et al. 2020). Except for the NH 3 (3,3) @@ -119,18 +119,18 @@ masers proposed to be associated with four supernova remnants (McEwen et al. 2016), almost all the other ammonia masers are detected in high-mass star-forming regions (HMSFRs). However, while many HMSFRs host water (H - 2 O), hydroxyl (OH), +2O), hydroxyl (OH), or methanol (CH - 3 OH) masers, ammonia masers are quite rare +3OH) masers, ammonia masers are quite rare in these sources, and the role that the environment of a young high-mass star plays in their excitation remains unclear. Therefore, dedicated searches for ammonia masers in HMSFRs are indispensable in regard to their overall incidence and association - with di ff erent environments, which can provide additional + with different environments, which can provide additional constraints on the pumping mechanism of ammonia masers. So far, a total of 32 NH - 3 inversion transitions ( ∆ K = 0 -and ∆ J = 0) have been identified as masers. Among these, and +3 inversion transitions (∆K = 0 +and ∆J = 0) have been identified as masers. Among these, and despite arising from energy levels as high as 1090 K above the ground state, the NH 3 (9,6) maser stands out as being the @@ -138,7 +138,7 @@ strongest and most variable one in W51-IRS2 (e.g., Henkel et al. 2013). Maser emission in this line has only been detected in five HMSFRs, W51, NGC7538, W49, DR21 (OH) (Madden et al. 1986), and Sgr B2(N) (Mei et al. 2020). The NH - 3 (3,3) masers +3 (3,3) masers are thought to be collisionally excited (e.g., Flower et al. 1990; Mangum & Wootten 1994); in contrast, the pumping mechanism of NH @@ -149,50 +149,50 @@ they did not extend their model to the (9,6) transition due to the fact that collision rates are only known for inversion levels up to J = 6 (e.g., Danby et al. 1988). NH - 3 (9,6) masers are found to be strongly variable, similar to +3 (9,6) masers are found to be strongly variable, similar to H - 2 O masers (Madden et al. 1986; Pratap et al. 1991; Henkel et al. +2O masers (Madden et al. 1986; Pratap et al. 1991; Henkel et al. 2013). In W51-IRS2, Henkel et al. (2013) found that the (9,6) line showed significant variation in line shape within a time interval of only two days. Mapping of the (9,6) maser toward W51 with very long baseline interferometry (VLBI) suggests that the masers are closer to the H -2 O masers than to the OH masers or +2O masers than to the OH masers or to ultracompact (UC) H ii regions (Pratap et al. 1991). While Henkel et al. (2013) and Goddi et al. (2015) showed that the SiO and NH - 3 masers in W51-IRS2 are very close to each other, their -positions, diff ering by 0 -. 065 ( ∼0.015 pc), do not fully coincide. +3 masers in W51-IRS2 are very close to each other, their +positions, differing by 0 +. 065 (∼0.015 pc), do not fully coincide. In this paper we report the discovery of NH 3 (9,6) masers -in two HMSFRs, Cepheus A and G34.26+ 0.15. This increases +in two HMSFRs, Cepheus A and G34.26+0.15. This increases the number of (9,6) maser detections in our Galaxy from five -to seven. In Sect. 2 observations with the Eff elsberg 100-meter +to seven. In Sect. 2 observations with the Effelsberg 100-meter telescope and the Karl G. Jansky Very Large Array (JVLA) are described. Results are presented in Sect. 3. The morphology of -Cep A and G34.26+ 0.15 as well as a comparison of the emission -distributions of di ff erent tracers with the NH - 3 (9,6) masers are +Cep A and G34.26+0.15 as well as a comparison of the emission +distributions of different tracers with the NH +3 (9,6) masers are presented in Sect. 4. Our main results are summarized in Sect. 5. 2. Observations and data reduction 2.1. Effelsberg observations and data reduction The NH 3 (9,6) line was observed toward Cep A and -G34.26 +0.15 with the 100-meter E ff elsberg telescope 1 +G34.26+0.15 with the 100-meter Effelsberg telescope1 in 2020 January and 2021 February, July, and August. The S14mm double beam secondary focus receiver was employed. The full width at half maximum (FWHM) beam size is 49 at 18.5 GHz, the frequency of the target line. The observations were performed in -position switching mode, and the o ff position was 10 +position switching mode, and the off position was 10 in azimuth 1 Based on observations with the 100-meter telescope of the MPIfR -(Max-Planck-Institut für Radioastronomie) at E ff elsberg. away from the source. For observations made before 2021 August, +(Max-Planck-Institut für Radioastronomie) at Effelsberg. away from the source. For observations made before 2021 August, we used a spectrometer that covered 2 GHz wide backends -with a channel width of 38.1 kHz, corresponding to ∼0.62 km s −1 +with a channel width of 38.1 kHz, corresponding to ∼0.62 km s−1 at the line’s rest frequency, 18.49939 GHz (Poynter & Kakar 1975). A high spectral resolution backend with 65536 channels and a bandwidth of 300 MHz was employed in 2021 August, @@ -203,21 +203,21 @@ Focus calibrations were done at the beginning of the observations and during sunset and sunrise toward the abovementioned pointing sources. The system temperatures were 100–130 K on a main-beam brightness temperature, T - MB , scale. This flux density +MB, scale. This flux density was calibrated assuming a T - MB / S ratio of 1.95 K /Jy, derived +MB/S ratio of 1.95 K/Jy, derived from continuum cross scans of NGC 7027 (the flux density was adopted from Ott et al. 1994). Calibration uncertainties are estimated to be ∼ 10%. -We used the GILDAS / CLASS 2 +We used the GILDAS/CLASS2 package (Pety 2005) to reduce the spectral line data. A first-order polynomial was subtracted from each spectrum for baseline removal. 2.2. JVLA observations and data reduction Observations of the NH - 3 (9,6) line toward Cep A and -G34.26 +0.15 were obtained on 2021 July 13 with the JVLA -of the National Radio Astronomy Observatory 3 +3 (9,6) line toward Cep A and +G34.26+0.15 were obtained on 2021 July 13 with the JVLA +of the National Radio Astronomy Observatory3 (NRAO) in the C configuration (project ID: 21A-157, PI: Yaoting Yan). We employed 27 antennas for the observations. The primary beam @@ -230,8 +230,8 @@ used one subband with the eight-bit sampler covering a bandwidth of 16 MHz with full polarization, eight recirculations, and four baseline board pairs (BIBPs) to provide a velocity range of 260 km s−1 - with a channel spacing of 0.13 km s −1 - . Two + with a channel spacing of 0.13 km s−1 +. Two additional subbands of bandwidth 16 MHz were used to cover the NH 3 (8,5) and (10,7) lines. The three-bit sampler with 32 @@ -240,18 +240,18 @@ subbands, each with a bandwidth of 128 MHz to cover a total the continuum emission. 3C 286 with a flux density of 2.89 Jy at 18.5 GHz (Perley & Butler 2013) was used as a calibrator for pointing, flux density, bandpass, and polarization. -J2230 +6946 and J1851+0035 served as gain calibrators for Cep +J2230+6946 and J1851+0035 served as gain calibrators for Cep A and G34.26+0.15, respectively. The on-source times were 4m - 30 s - and 4 m - 50 s - toward Cep A and G34.26 +0.15, respectively. +30s + and 4m +50s + toward Cep A and G34.26+0.15, respectively. Data from two antennas were lost due to technical issues. The data from the remaining 25 antennas were reduced through the Common Astronomy Software Applications package - (CASA 4 - ; McMullin et al. 2007). We calibrated the data with + (CASA4 +; McMullin et al. 2007). We calibrated the data with the JVLA CASA calibration pipeline using CASA 6.1.2. The results were obtained after flagging data that contain artifacts. We inspected the phase, amplitude, and bandpass variations of @@ -267,67 +267,67 @@ used to produce the images of spectral line and continuum emission. . 47 × 0 . 99 at 2 - https: //www.iram.fr /IRAMFR /GILDAS/ + https://www.iram.fr/IRAMFR/GILDAS/ 3 The National Radio Astronomy Observatory is a facility of the National Science Foundation operated under cooperative agreement by Associated Universities, Inc. 4 - https: //casa.nrao.edu/ + https://casa.nrao.edu/ Article number, page 2 of 10 -Y. T. Yan ( 闫耀庭 ) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions P.A. = 58◦ -. 79 and 1 +.79 and 1 . 33 × 1 -. 06 at P.A. = 5 ◦ -. 36 toward Cep A -and G34.26 + 0.15, respectively. For the 1.36 cm (20–24 GHz) +. 06 at P.A. = 5◦ +.36 toward Cep A +and G34.26+0.15, respectively. For the 1.36 cm (20–24 GHz) continuum emission, the synthesized beams are 1 . 08 × 0 . 67 at -P.A. = 60 ◦ -. 64 and 0 +P.A. = 60◦ +.64 and 0 . 95 × 0 . 71 at P.A. = 5◦ -. 91 toward Cep A and -G34.26 + 0.15. The typical absolute astrometric accuracy of the -JVLA is ∼ 10% of the synthesized beam5 - . The flux density scale +.91 toward Cep A and +G34.26+0.15. The typical absolute astrometric accuracy of the +JVLA is ∼10% of the synthesized beam5 +. The flux density scale calibration accuracy is estimated to be within 15%. Fig. 1. Spectra from NH - 3 (9,6) transition lines. Left: Top to bottom: +3 (9,6) transition lines. Left: Top to bottom: Time sequence of NH 3 (9,6) profiles observed toward Cep A with the -E ff elsberg 100-meter telescope (after subtracting a first-order polynomial +Effelsberg 100-meter telescope (after subtracting a first-order polynomial baseline). A JVLA spectrum is interspersed. The systemic velocity from CO and HCO+ lines is indicated by a dashed blue line. The two dashed red lines at LSR velocities, V - LSR , of − 0.90 km s −1 +LSR, of −0.90 km s−1 and -− 0.28 km s− 1 +−0.28 km s−1 indicate the central velocities of the two major components. - Right : NH -3 (9,6) spectra from G34.26 +0.15. The systemic velocity - from C 17 - O is indicated by a dashed blue line. The three dashed + Right: NH +3 (9,6) spectra from G34.26+0.15. The systemic velocity + from C17 +O is indicated by a dashed blue line. The three dashed red lines at V -LSR = 54.1 km s − 1 - , 55.8 km s −1 - , and 62.5 km s − 1 +LSR = 54.1 km s−1 +, 55.8 km s−1 +, and 62.5 km s−1 show the central velocities of the main ammonia emission components. 3. Results -The spectra from diff erent epochs are shown in Figs. 1 and 2. +The spectra from different epochs are shown in Figs. 1 and 2. Toward Cep A, the NH 3 (9,6) line profile from the JVLA is extracted from an Effelsberg-beam-sized region (FWHM, 49 - ). In +). In the case of G34.26+0.15, the NH 3 spectrum is below the noise level if a similarly large beam size is used. Therefore, we derived the JVLA NH - 3 (9,6) spectrum from a smaller region, with +3 (9,6) spectrum from a smaller region, with radius 3 . 5, that contains all the detected NH 3 (9,6) emission. In @@ -337,30 +337,30 @@ Gaussian fits are listed. NH 3 (8,5) and (10,7) emission is not detected by our JVLA observations. The 3σ upper limits for the NH - 3 (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1 +3 (8,5) and (10,7) lines toward Cep A are 23.2 mJy beam−1 5 - https://science.nrao.edu/ facilities / vla/ docs /manuals/ oss/ performance/ - positional-accuracy Fig. 2. NH + https://science.nrao.edu/facilities/vla/docs/manuals/oss/performance/positional-accuracy + Fig. 2. NH 3 (9,6) line profiles emphasizing, in contrast to the spectra in Fig. 1, weaker features. Cep A spectra are presented on the left, -G34.26+ 0.15 spectra on the right. The two dashed red lines in the left +G34.26+0.15 spectra on the right. The two dashed red lines in the left panels indicate V - LSR = 1.48 km s− 1 +LSR = 1.48 km s−1 and 2.89 km s−1 - . In the right panels, -the two dashed red lines refer to 54.1 km s− 1 - and 55.8 km s− 1 - . +. In the right panels, +the two dashed red lines refer to 54.1 km s−1 + and 55.8 km s−1 +. and 27.2 mJy beam−1 - , respectively. In G34.26 + 0.15, the corresponding +, respectively. In G34.26+0.15, the corresponding 3σ upper limits for the NH 3 (8,5) and (10,7) lines are -22.1 mJy beam −1 +22.1 mJy beam−1 and 30.4 mJy beam−1 - . For both sources, sensitivity +. For both sources, sensitivity levels refer to emission from a single channel of width 0.13 km s−1 - . Taking the larger measured line widths of the (9,6) +. Taking the larger measured line widths of the (9,6) maser features (see Table A.1), these limits could be further lowered by factors of two to four. 3.1. Centimeter-continuum emission @@ -368,7 +368,7 @@ The 1.36 cm continuum, derived from our JVLA observations, toward Cep A is presented in Fig. 3. Six published compact sources, HW2, HW3a, HW3b, HW3c, HW3d, and HW9, are detected in our observations. Figure 4 shows the 1.36 cm continuum - in G34.26 + 0.15. Three main continuum objects, A, B, and + in G34.26+0.15. Three main continuum objects, A, B, and C, are detected. By using the imfit task in CASA, we measured the continuum flux at 1.36 cm toward individual compact source components in Cep A and G34.26+0.15. Details are given in Table @@ -381,11 +381,11 @@ In 2020 January, NH telescope in Cep A. Emission with similar strength was also detected in 2021 February and August with the same telescope. Higher velocity resolution data, which were obtained in 2021 -August, again with the E ff elsberg 100-meter telescope, show +August, again with the Effelsberg 100-meter telescope, show that the (9,6) emission contains two main velocity components. Overall, the flux densities of the NH - 3 (9,6) emission line measured - with the Eff elsberg 100-meter telescope are, within the calibration +3 (9,6) emission line measured + with the Effelsberg 100-meter telescope are, within the calibration uncertainties, unchanged. This is valid for the time interval between 2020 January and August 2021, when we smoothed the obtained spectra to the same velocity resolution. We also @@ -393,7 +393,7 @@ see another two weaker components. Figure 2 emphasizes these weak components with an expanded flux density scale. Higher angular resolution data from the JVLA pinpoint the position of the NH -3 (9,6) emission with an o ffset of (− 0 +3 (9,6) emission with an offset of (−0 . 28, 0 . 02) relative to the 1.36 cm continuum peak of Cep A HW2 @@ -403,79 +403,79 @@ position of the NH 0 . 15) × (0 . 19 ± 0 -. 14) at P.A. = 174 ◦ - , derived with the imfit task +. 14) at P.A. = 174◦ +, derived with the imfit task in CASA, and can thus be considered, accounting for the uncertainties, as unresolved. Article number, page 3 of 10 -A & A proofs: manuscript no. mainArxiv +A&A proofs: manuscript no. mainArxiv Fig. 3. Cepheus A. White contours mark the 1.36 cm JVLA continuum map of Cep A; levels are −5, 5, 10, 20, 30, 40, 50, 70, 90, -and 110 × 0.125 mJy beam − 1 - . The background image is the Spitzer 4.5 µm emission, taken from the Galactic Legacy Infrared Mid-Plane +and 110 × 0.125 mJy beam−1 +. The background image is the Spitzer 4.5 µm emission, taken from the Galactic Legacy Infrared Mid-Plane Survey Extraordinaire (GLIMPSE; Benjamin et al. 2003; Churchwell et al. 2009). The reference position is α J2000 = 22h - 56 m - 17 s -. 972, and +56m +17s +.972, and δ - J2000 = 62◦ - 01 - 49 +J2000 = 62◦ +01 +49 . 587, the peak position of the continuum map, is marked with a black cross. Slightly to the west of the cross is the black ellipse denoting the position of the NH - 3 (9,6) emission with a purple star at its center. OH (Bartkiewicz et al. 2005), H -2 O (Sobolev et al. 2018), +3 (9,6) emission with a purple star at its center. OH (Bartkiewicz et al. 2005), H +2O (Sobolev et al. 2018), and CH -3 OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, respectively. The color bar on the right-hand side indicates +3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, respectively. The color bar on the right-hand side indicates the LSR velocity range of the maser spots. -Fig. 4. 1.36 cm JVLA continuum map of G34.26+ 0.15 presented as white contours with levels of − 5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130, -150, 180, and 200 × 5.0 mJy beam −1 - . The background image is the Spitzer 4.5 µm emission, taken from GLIMPSE. The reference position is +Fig. 4. 1.36 cm JVLA continuum map of G34.26+0.15 presented as white contours with levels of −5, 5, 10, 20, 30, 40, 50, 70, 90, 110, 130, +150, 180, and 200 × 5.0 mJy beam−1 +. The background image is the Spitzer 4.5 µm emission, taken from GLIMPSE. The reference position is α -J2000 = 18 h - 53m - 18s -. 560, and δ - J2000 = 01◦ - 14 - 58 +J2000 = 18h +53m +18s +.560, and δ +J2000 = 01◦ +14 +58 . 201, the peak position, is marked by a black cross. The black ellipses show the positions of NH - 3 +3 (9,6) emissions with stars at their center (i.e., M1, M2, and M3). OH (Zheng et al. 2000), H -2 O (Imai et al. 2011), and CH -3 OH (Bartkiewicz et al. +2O (Imai et al. 2011), and CH +3OH (Bartkiewicz et al. 2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range (V - LSR ) of maser spots. +LSR) of maser spots. In view of the constancy of the flux densities obtained at Effelsberg and the similar JVLA flux density, measured in 2021 July, there is no missing interferometric flux density in the JVLA data. 3.3. NH -3 (9,6) emission in G34.26 +0.15 +3 (9,6) emission in G34.26+0.15 The NH - 3 (9,6) emission was first detected toward G34.26+0.15 +3 (9,6) emission was first detected toward G34.26+0.15 in 2020 January with the Effelsberg 100-meter telescope. Higher velocity resolution data from 2021 August show the NH 3 (9,6) -emission to be composed of two di fferent components. The spectra +emission to be composed of two different components. The spectra of weak components on a smaller flux density scale are presented in Fig. 2. -Three di fferent locations showing NH - 3 (9,6) emission are -found toward G34.26 +0.15 (Fig. 4). The deconvolved NH - 3 (9,6) +Three different locations showing NH +3 (9,6) emission are +found toward G34.26+0.15 (Fig. 4). The deconvolved NH +3 (9,6) component sizes are (1 . 42 ± 0 . 43) × (0 . 54 ± 0 -. 62) at P.A. = 97 ◦ +. 62) at P.A. = 97◦ (M1), (0 . 42 ± 0 . 27) × (0 . 15 ± 0 -. 27) at P.A. = 150 ◦ +. 27) at P.A. = 150◦ (M2), and Article number, page 4 of 10 -Y. T. Yan ( 闫耀庭 ) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions (1 . 17 ± 0 . 34) × (0 @@ -484,35 +484,35 @@ Y. T. Yan ( 闫耀庭 ) et al.: Discovery of ammonia (9,6) masers in two high-ma (M3) and are thus comparable to or smaller than the beam size. Overall, the NH - 3 (9,6) line from G34.26 +0.15 weakened +3 (9,6) line from G34.26+0.15 weakened during the time interval from 2020 January to 2021 August by about 70%. A comparison between the JVLA spectrum and the -E ffelsberg data, assuming a linear decrease in the integrated intensity - as a function of time between diff erent epochs of the +Effelsberg data, assuming a linear decrease in the integrated intensity + as a function of time between different epochs of the 100-meter observations, suggests there is no missing flux in the JVLA data. This is similar to the situation in Cep A. 4. Discussion -4.1. Morphology of Cep A and G34.26+ 0.15 -Cep A, at a trigonometric parallax distance of 0.70 ± 0.04 kpc +4.1. Morphology of Cep A and G34.26+0.15 +Cep A, at a trigonometric parallax distance of 0.70±0.04 kpc (Moscadelli et al. 2009; Dzib et al. 2011), is the second closest HMSFR (after Orion) and by far the closest NH - 3 (9,6) maser -known. About 16 compact ( ∼1 - ) radio sources (e.g., Hughes & +3 (9,6) maser +known. About 16 compact (∼1 +) radio sources (e.g., Hughes & Wouterloot 1984; Hughes 1991; Garay et al. 1996) have been identified in Cep A. Hughes & Wouterloot (1984) discovered these targets at radio wavelengths, which are UC and hypercompact - (HC) H ii regions and /or stellar wind sources, subsequently + (HC) H ii regions and/or stellar wind sources, subsequently named as HW sources. The HW2 object is one of the best known examples of a protostellar jet or disk system driving a powerful outflow (e.g., Rodriguez et al. 1980; Güsten et al. 1984; Torrelles et al. 1986; Curiel et al. 2006; Carrasco-González et al. 2021). The observed NH - 3 (9,6) emission is slightly offset ( −0 +3 (9,6) emission is slightly offset (−0 . 28, 0 . 02) from the center of HW2 (see Fig. 3). -G34.26 +0.15 is an HMSFR located at a distance of 3.3 kpc +G34.26+0.15 is an HMSFR located at a distance of 3.3 kpc (Kuchar & Bania 1994). It hosts four radio continuum components named A, B, C, and D. Component C is a prototypical cometary UC H ii region containing a compact head and a diffuse @@ -522,31 +522,31 @@ A and B are HC H ii regions, located to the east of component C. An extended ring-like H ii region, called component D, is located southeast of components A-C. One of the three observed NH - 3 (9,6) emission line sources, M1, is close to the head of component +3 (9,6) emission line sources, M1, is close to the head of component C, whereas M2 and M3 originate from another compact region in the west of the HC H ii component A (see Fig. 4). 4.2. NH 3 (9,6) emission possibly caused by maser action As shown in Fig. 1, the NH 3 (9,6) profiles in Cep A and -G34.26 +0.15 are narrow (∆V - 1 /2 ≤2.0 km s−1 - ), much narrower -than the expected line widths ( 4 km s − 1 - ) of thermal lines observed +G34.26+0.15 are narrow (∆V +1/2 ≤2.0 km s−1 +), much narrower +than the expected line widths (4 km s−1 +) of thermal lines observed at a similar angular resolution (e.g., Torrelles et al. 1985, 1986, 1993, 1999; Henkel et al. 1987; Comito et al. 2007; Mookerjea et al. 2007; Wyrowski et al. 2012; Beuther et al. 2018). Velocity shifts with respect to the systemic velocities of the two -sources are both observed, that is, V ∼10 km s −1 +sources are both observed, that is, V ∼10 km s−1 in Cep A and -V ∼4 km s− 1 - in G34.26 +0.15 (see details in Sect. 4.3). Furthermore, +V ∼4 km s−1 + in G34.26+0.15 (see details in Sect. 4.3). Furthermore, time variability is observed in the case of G34.26+0.15, which is also a characteristic feature of maser emission. Additional evidence of their maser nature is the high brightness temperatures of the (9,6) emission spots toward Cep A and -G34.26 +0.15. The spectral parameters are listed in Table A.3. +G34.26+0.15. The spectral parameters are listed in Table A.3. Because at least a significant part of the NH 3 (9,6) emission is not resolved by our JVLA observations, the derived brightness @@ -556,21 +556,21 @@ Table A.3), which is much higher than the expected thermal gas temperature of ∼250 K (e.g., Patel et al. 2005; Comito et al. 2007; Beuther et al. 2018). This strongly suggests that the NH 3 (9,6) emission in Cep A is due to maser action. Because - G34.26+ 0.15 is located at about five times the distance to -Cep A, beam dilution e ffects reduce the lower main beam brightness - temperature limit to 400 K in G34.26 +0.15 (M2) (see Table + G34.26+0.15 is located at about five times the distance to +Cep A, beam dilution effects reduce the lower main beam brightness + temperature limit to 400 K in G34.26+0.15 (M2) (see Table A.3). We also note that the luminosity of the NH 3 (9,6) emission - in G34.26 +0.15 is higher than or comparable to that in Cep + in G34.26+0.15 is higher than or comparable to that in Cep A, depending on the epoch of our observations. Finally, the non-detections of the (8,5) and (10,7) lines also indicate that the (9,6) line is special. This allows us to derive -lower 3σ limits of the (9,6)/(8,5) and (9,6)/ (10,7) line intensity +lower 3σ limits of the (9,6)/(8,5) and (9,6)/(10,7) line intensity ratios. The (9,6) line arises from ortho-NH - 3 ( K = 3n), whereas +3 (K = 3n), whereas the NH 3 (8,5) and (10,7) lines are para-NH - 3 ( K 3n) lines. +3 (K 3n) lines. The minimum ortho-to-para ratios are in the range 12–42 and 1– 8 toward Cep A and G34.26+0.15, respectively. The statistical weights for the ortho states are twice as large as those for the @@ -579,9 +579,9 @@ et al. 2013). In Cep A, the line intensity ratios are far higher than this factor of two. Thus, at least in Cep A the higher main beam brightness peak temperature of the (9,6) emission is caused by maser action, perhaps involving exponential amplification, and -the case of G34.26 +0.15 is likely similar. +the case of G34.26+0.15 is likely similar. 4.3. Comparison of NH - 3 (9,6) masers with previously +3 (9,6) masers with previously published (quasi-)thermal NH 3 emission The metastable (1,1), (2,2), (3,3), and (4,4) ammonia lines @@ -592,145 +592,145 @@ LSR ≤ −4 km s−1 (Brown et al. 1981; Güsten et al. 1984; Torrelles et al. 1985, 1986, 1993, 1999). An average NH - 3 column density of ∼5 ×10 15 +3 column density of ∼5×1015 cm−2 was estimated for a region of 3 around HW2 (Torrelles et al. 1999). This high NH 3 abundance could provide a suitable environment for maser species. -Large line widths (∆ V -1 / 2 7.0 km s−1 - ) with V +Large line widths (∆V +1/2 7.0 km s−1 +) with V LSR ∼ −10 km s−1 in both (1,1) and (2,2) lines were found toward HW2 (Torrelles et al. 1993). The velocity is similar to the cloud’s systemic local - standard of rest (LSR) velocity of −11 .2 km s−1 - , which -is based on CO (Narayanan & Walker 1996) and HCO + + standard of rest (LSR) velocity of −11.2 km s−1 +, which +is based on CO (Narayanan & Walker 1996) and HCO+ observations (Gómez et al. 1999). Our (9,6) maser is redshifted -(− 0.9 km s− 1 +(−0.9 km s−1 ≤ V LSR ≤2.9 km s−1 - ) and shares positions with -the outflowing gas seen in CO and HCO + +) and shares positions with +the outflowing gas seen in CO and HCO+ with similarly redshifted velocities. Therefore, we argue that the (9,6) masers are related to outflowing gas. -In G34.26 +0.15, a large NH - 3 column density, -1018 . 5 ±0 .2 - cm −2 - , and a kinetic temperature of 225±75 K +In G34.26+0.15, a large NH +3 column density, +1018.5±0.2 + cm−2 +, and a kinetic temperature of 225±75 K were derived by Henkel et al. (1987) based on measurements of 15 NH - 3 inversion transitions in the frequency range of +3 inversion transitions in the frequency range of 22.0–26.0 GHz. These did not include the (9,6) transition. While these lines were measured with a beam size of about 40 - , a comparison of the peak intensities of the optically thick +, a comparison of the peak intensities of the optically thick lines with the kinetic temperature reveals the size of the hot, ammonia-emitting core to be only ∼2.5 - . All those measured +. All those measured NH - 3 lines were quasi-thermal and had LSR velocities of -∼ 58.5 km s −1 - , close to the systemic velocity of ∼ 58.1 km s − 1 -obtained from C 17 - O observations (Wyrowski et al. 2012). -Their line widths (∆ V -1 / 2 ≥3.6 km s−1 - ) are larger than what -we find (0.35 km s −1 - ≤ ∆ V -1 / 2 ≤ 0.94 km s−1 - ) for each (9,6) +3 lines were quasi-thermal and had LSR velocities of +∼ 58.5 km s−1 +, close to the systemic velocity of ∼ 58.1 km s−1 +obtained from C17 +O observations (Wyrowski et al. 2012). +Their line widths (∆V +1/2 ≥3.6 km s−1 +) are larger than what +we find (0.35 km s−1 + ≤ ∆V +1/2 ≤ 0.94 km s−1 +) for each (9,6) maser component (see details in Table A.3). In all, we may -have observed four di ff erent (9,6) velocity features. Three +have observed four different (9,6) velocity features. Three are blueshifted at V -LSR ∼ 53.8 km s −1 - , 55.8 km s− 1 - , and +LSR ∼ 53.8 km s−1 +, 55.8 km s−1 +, and 56.8 km s−1 - , and a fourth, tentatively detected, at 62.5 km s −1 - . +, and a fourth, tentatively detected, at 62.5 km s−1 +. This tentative redshifted feature was only potentially detected -with E ffelsberg in 2020 January. The velocity is similar to that +with Effelsberg in 2020 January. The velocity is similar to that of the JVLA measurements on the NH 3 (1,1) absorption line -against continuum source C ( ∼ 7 +against continuum source C (∼ 7 resolution; Keto et al. 1987) Article number, page 5 of 10 -A & A proofs: manuscript no. mainArxiv +A&A proofs: manuscript no. mainArxiv and the NH - 3 (3,3) emission surrounding continuum source B as +3 (3,3) emission surrounding continuum source B as well as the head of C (1 . 4×1 . 2 resolution; Heaton et al. 1989). However, we did not find this redshifted component in our JVLA observations. Therefore, its position within G34.26+0.15 cannot be determined. The blueshifted (9,6) masers with a -velocity range of 53.8–56.8 km s− 1 +velocity range of 53.8–56.8 km s−1 (M1, M2, and M3) show velocities compatible with those of the NH - 3 (3,3) emission at +3 (3,3) emission at the proper positions (Heaton et al. 1989), which might be a suitable environment for maser species. 4.4. Comparison of NH 3 (9,6) masers with other maser lines To characterize the environment of NH - 3 (9,6) masers, we can +3 (9,6) masers, we can compare their positions with respect to those of other maser species (i.e., OH, H - 2 O, and CH - 3 OH). Toward Cep A HW2, +2O, and CH +3OH). Toward Cep A HW2, many CH -3 OH (e.g., Menten 1991; Sugiyama et al. 2008; Sanna +3OH (e.g., Menten 1991; Sugiyama et al. 2008; Sanna et al. 2017) and H -2 O maser spots (e.g., Torrelles et al. 1998, +2O maser spots (e.g., Torrelles et al. 1998, 2011; Sobolev et al. 2018) are detected and are associated with its disk. Sobolev et al. (2018) also found that most of the H -2 O +2O maser flux is associated with the compact H ii region HW3d. OH maser features close to the H ii regions are also seen in HW2 (e.g., Cohen & Brebner 1985; Bartkiewicz et al. 2005). These three kinds of masers in Cep A have a large velocity range of −25 km s−1 ≤ V -LSR ≤ − 2 km s−1 +LSR ≤ −2 km s−1 and are widespread around HW2 and HW3, while NH 3 (9,6) emission is only detected at -−0.9 km s −1 +−0.9 km s−1 ≤ V -LSR ≤ 2.9 km s −1 +LSR ≤2.9 km s−1 toward a sub-arcsecondsized region to the west of the peak continuum position of HW2 (see Fig. 3). This suggests that the NH - 3 (9,6) maser in Cep A +3 (9,6) maser in Cep A is unique and not related to maser spots seen in other molecular species. -In G34.26 +0.15, OH (Zheng et al. 2000), H - 2 O (Imai et al. +In G34.26+0.15, OH (Zheng et al. 2000), H +2O (Imai et al. 2011), and CH -3 OH (Bartkiewicz et al. 2016) masers have been +3OH (Bartkiewicz et al. 2016) masers have been detected east of source C (Fig. 4), and none of them coincides with the head of C. The NH - 3 (9,6) maser M1 is also found -slightly o ff the head of source C. This could suggest that M1 +3 (9,6) maser M1 is also found +slightly off the head of source C. This could suggest that M1 is powered by continuum source C or by an outflow. Near component B, there are some OH and CH -3 OH masers but no H -2 O +3OH masers but no H +2O or NH - 3 masers. A group of H - 2 O masers, well-known tracers +3 masers. A group of H +2O masers, well-known tracers of outflows, with a large velocity distribution of 43 km s−1 ≤ V -LSR ≤54 km s −1 - , was found to the west of the centimetercontinuum +LSR ≤54 km s−1 +, was found to the west of the centimetercontinuum source A and close to the peak of the millimetercontinuum emission (see details in our Fig. A.2 and also in Fig. 5 of Imai et al. 2011). The closeness of NH @@ -741,8 +741,8 @@ again suggest an association of NH activity. 4.5. Constraints on pumping scenarios Our observations have resulted in the detection of NH - 3 (9,6) -masers in Cep A and G34.26 +0.15. The new detections could +3 (9,6) +masers in Cep A and G34.26+0.15. The new detections could provide additional constraints on the maser line’s pumping mechanism. As mentioned in Sect. 1, the pumping mechanism of the (9,6) maser is unclear (Madden et al. 1986; Brown & @@ -752,28 +752,28 @@ main pumping scenarios to explain the observed NH lines (Madden et al. 1986; Henkel et al. 2013): (1) infrared radiation from the dust continuum emission, (2) line overlap, and (3) collisional pumping. -For the first mechanism, infrared photons near 10 µ m are +For the first mechanism, infrared photons near 10 µm are needed for vibrational excitation. The high dust temperature -( ∼300 K) of W51-IRS2 can provide substantial infrared photons - near 10 µ m, which is used for radiative pumping (Henkel et al. 2013). Both Cep A and G34.26 + 0.15 have similar kinetic +(∼300 K) of W51-IRS2 can provide substantial infrared photons + near 10 µm, which is used for radiative pumping (Henkel et al. 2013). Both Cep A and G34.26+0.15 have similar kinetic temperatures of 200 K (Henkel et al. 1987; Patel et al. 2005; Comito et al. 2007; Beuther et al. 2018). This suggests that high kinetic temperatures are needed to excite NH 3 (9,6) masers. However, it should be noted that the silicate dust absorption feature - might dominate at 10 µ m (see the spectral energy distribution + might dominate at 10 µm (see the spectral energy distribution of Cep A in De Buizer et al. 2017). Additionally, there is no bright infrared emission around the two (9,6) masers, M2 and M3, in G34.26+0.15 (see Fig. 4; see also Fig. 11 in De Buizer -et al. 2003 for a 10.5 µ m map). This indicates that the pumping -mechanism via infrared photons near 10 µ m may not be viable +et al. 2003 for a 10.5 µm map). This indicates that the pumping +mechanism via infrared photons near 10 µm may not be viable to explain the (9,6) masers in Cep A and G34.26+0.15. Furthermore, Wilson & Schilke (1993) argued that radiative pumping by dust emission tends to excite multiple adjacent ammonia maser transitions, which appears to contradict our failure to detect the adjacent (8,5) and (10,7) lines (with respect to quantum numbers and frequency) and to only measure the (9,6) transitions in Cep -A and G34.26 + 0.15. Therefore, we suggest that infrared radiation +A and G34.26+0.15. Therefore, we suggest that infrared radiation from dust is not the main pumping source. Madden et al. (1986) suggested that there might be some line overlaps between the rotational NH @@ -783,22 +783,22 @@ line overlaps between the rotational NH be needed to clarify this scenario. Based on our observations, the (9,6) maser spots are close to, but not coincident with, the peaks of the radio continuum -emission in Cep A and G34.26+ 0.15. Furthermore, the (9,6) -masers show velocity off sets with respect to their systemic velocities. +emission in Cep A and G34.26+0.15. Furthermore, the (9,6) +masers show velocity offsets with respect to their systemic velocities. This indicates that the (9,6) masers are located at the base of outflows, similar to the H -2 O masers. This is supported +2O masers. This is supported by VLBI observations that show that (9,6) masers tend to be closely associated with H -2 O masers (Pratap et al. 1991). The observed - time variability in G34.26 + 0.15 and W51-IRS2 can also +2O masers (Pratap et al. 1991). The observed + time variability in G34.26+0.15 and W51-IRS2 can also be attributed to episodic molecular outflows. This indicates that collisional pumping could be the driver of the (9,6) maser. On the other hand, collisional pumping has been successfully used to explain the NH 3 (3,3) maser (Walmsley & Ungerechts 1983; Flower et al. 1990; Mangum & Wootten 1994). Collisions tend to -pump from the K =0 level to the K = 3 level with parity changes, +pump from the K =0 level to the K =3 level with parity changes, that is, the upper level of the (3,3) metastable transition will be overpopulated. NH 3 (9,6) arises from the ortho species, so a similar @@ -807,33 +807,33 @@ overpopulated. NH allow us to test this scenario. 5. Summary We report the discovery of NH - 3 (9,6) masers in two HMSFRs, -Cep A and G34.26 +0.15. The narrow line width of the emission - features (∆ V - 1/ 2 ≤ 2.0 km s −1 - ) and their high brightness temperatures - ( > 400 K) indicate the maser nature of the lines. -The intensity of the (9,6) maser in G34.26 +0.15 is decreasing +3 (9,6) masers in two HMSFRs, +Cep A and G34.26+0.15. The narrow line width of the emission + features (∆V +1/2 ≤2.0 km s−1 +) and their high brightness temperatures + (> 400 K) indicate the maser nature of the lines. +The intensity of the (9,6) maser in G34.26+0.15 is decreasing with time, while toward Cep A the maser is stable based on 20 -months of monitoring at E ffelsberg. Linearly interpolating the -integrated intensities obtained at E ff elsberg as a function of time, +months of monitoring at Effelsberg. Linearly interpolating the +integrated intensities obtained at Effelsberg as a function of time, the JVLA measurements show that there is no missing flux density - on scales on the order of 1.2 arcsec (4 ×10 −3 - and 2 ×10− 2 + on scales on the order of 1.2 arcsec (4 ×10−3 + and 2 ×10−2 pc) to the total single-dish flux. The JVLA-detected emission indicates that the NH - 3 (9,6) maser in Cep A originates from a +3 (9,6) maser in Cep A originates from a sub-arcsecond-sized region slightly (0 . 28 ± 0 . 10) to the west of the peak position of the 1.36 cm continuum object, HW2. In -G34.26 +0.15, three NH +G34.26+0.15, three NH 3 (9,6) maser spots are observed: one is close to the head of the cometary UC H ii region C, and the other two are emitted from a compact region to the west of the HC H ii Article number, page 6 of 10 -Y. T. Yan ( 闫耀庭 ) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions region A. We suggest that the (9,6) masers may be connected to outflowing gas. Higher angular resolution JVLA and VLBI observations are planned to provide more accurate positions and @@ -843,10 +843,10 @@ Acknowledgements. We would like to thank the anonymous referee for the useful Max Planck Research School (IMPRS) for Astronomy and Astrophysics at the Universities of Bonn and Cologne. Y.T.Y. would like to thank the China Scholarship Council (CSC) for its support. We would like to thank the staff at -the E ffelsberg for their help provided during the observations. We thank the sta ff +the Effelsberg for their help provided during the observations. We thank the staff of the JVLA, especially Tony Perreault and Edward Starr, for their assistance with the observations and data reduction. This research has made use of the -NASA / IPAC Infrared Science Archive, which is funded by the National Aeronautics +NASA/IPAC Infrared Science Archive, which is funded by the National Aeronautics and Space Administration and operated by the California Institute of Technology. References @@ -877,7 +877,7 @@ De Buizer, J. M., Radomski, J. T., Telesco, C. M., & Piña, R. K. 2003, ApJ, 598 1127 Dzib, S., Loinard, L., Rodríguez, L. F., Mioduszewski, A. J., & Torres, R. M. 2011, ApJ, 733, 71 -Flower, D. R., O ffer, A., & Schilke, P. 1990, MNRAS, 244, 4P +Flower, D. R., Offer, A., & Schilke, P. 1990, MNRAS, 244, 4P Galván-Madrid, R., Keto, E., Zhang, Q., et al. 2009, ApJ, 706, 1036 Garay, G., Ramirez, S., Rodriguez, L. F., Curiel, S., & Torrelles, J. M. 1996, ApJ, 459, 193 @@ -893,7 +893,7 @@ Heaton, B. D., Little, L. T., & Bishop, I. S. 1989, A&A, 213, 148 Henkel, C., Wilson, T. L., Asiri, H., & Mauersberger, R. 2013, A&A, 549, A90 Henkel, C., Wilson, T. L., & Mauersberger, R. 1987, A&A, 182, 137 Ho, P. T. P. & Townes, C. H. 1983, ARA&A, 21, 239 -Ho ffman, I. M. & Joyce, S. A. 2014, ApJ, 782, 83 +Hoffman, I. M. & Joyce, S. A. 2014, ApJ, 782, 83 Hogge, T. G., Jackson, J. M., Allingham, D., et al. 2019, ApJ, 887, 79 Hughes, V. A. 1991, ApJ, 383, 280 Hughes, V. A. & Wouterloot, J. G. A. 1984, ApJ, 276, 204 @@ -907,7 +907,7 @@ Madden, S. C., Irvine, W. M., Matthews, H. E., Brown, R. D., & Godfrey, P. D. 1986, ApJ, 300, L79 Mangum, J. G. & Wootten, A. 1994, ApJ, 428, L33 Mauersberger, R., Henkel, C., & Wilson, T. L. 1987, A&A, 173, 352 -Mauersberger, R., Wilson, T. L., & Henkel, C. 1986, A&A, 160, L13 Mauersberger, R., Wilson, T. L., & Henkel, C. 1988, A&A, 201, 123 +Mauersberger, R., Wilson, T. L., & Henkel, C. 1986, A&A, 160, L13 Mauersberger, R., Wilson, T. L., & Henkel, C. 1988, A&A, 201, 123 McEwen, B. C., Pihlström, Y. M., & Sjouwerman, L. O. 2016, ApJ, 826, 189 McMullin, J. P., Waters, B., Schiebel, D., Young, W., & Golap, K. 2007, in Astronomical Society of the Pacific Conference Series, Vol. 376, Astronomical @@ -963,7 +963,7 @@ Zhang, Q. & Ho, P. T. P. 1995, ApJ, 450, L63 Zhang, Q., Hunter, T. R., Sridharan, T. K., & Cesaroni, R. 1999, ApJ, 527, L117 Zheng, X. W., Moran, J. M., & Reid, M. J. 2000, MNRAS, 317, 192 Article number, page 7 of 10 -A & A proofs: manuscript no. mainArxiv +A&A proofs: manuscript no. mainArxiv Appendix A: Table A.1. Summary of NH 3 (9, 6) maser observations. @@ -971,37 +971,37 @@ Source Telescope Beam Epoch Channel S ν rms S ν dv V -LSR ∆ V -1 /2 +LSR ∆V +1/2 size spacing -(km s − 1 - ) (Jy) (mJy) (Jy km s −1 - ) (km s − 1 - ) -Cep A E ff elsberg 49 +(km s−1 +) (Jy) (mJy) (Jy km s−1 +) (km s−1 +) +Cep A Effelsberg 49 2020, Jan. 04 0.62 0.67 3.41 1.19 ± 0.02 -1.11 ± 0.02 1.67 ± 0.04 -Eff elsberg 49 +Effelsberg 49 2021, Feb. 11 0.62 0.59 5.97 1.08 ± 0.02 -0.74 ± 0.02 1.70 ± 0.04 -Eff elsberg 49 +Effelsberg 49 2021, Feb. 15 0.62 0.65 10.98 1.11 ± 0.03 -0.75 ± 0.02 1.60 ± 0.05 JVLAa 1 . 47 × 0 . 99 2021, Jul. 13 0.13 1.13 144 0.89 ± 0.09 -0.86 ± 0.03 0.74 ± 0.12 -Eff elsberg 49 +Effelsberg 49 2021, Aug. 11 0.07 0.98 13.36 0.49 ± 0.02 -0.90 ± 0.01 0.47 ± 0.01 0.35 0.26 ± 0.02 -0.28 ± 0.02 0.69 ± 0.05 -Eff elsberg 49 +Effelsberg 49 2021, Aug. 12 0.07 0.98 13.35 0.50 ± 0.01 -0.89 ± 0.07 0.48 ± 0.07 0.35 0.20 ± 0.01 -0.29 ± 0.07 0.54 ± 0.07 0.06 0.07 ± 0.01 0.51 ± 0.07 1.09 ± 0.07 0.02 0.02 ± 0.01 2.15 ± 0.07 0.80 ± 0.07 0.07 0.06 ± 0.01 2.89 ± 0.07 0.92 ± 0.07 -G34.26 +0.15 E ff elsberg 49 +G34.26+0.15 Effelsberg 49 2020, Jan. 03 0.62 0.30 1.26 0.65 ± 0.03 62.50 ± 0.05 2.05 ± 0.13 -Eff elsberg 49 +Effelsberg 49 2021, Feb. 11 0.62 0.24 2.42 0.40 ± 0.02 55.76 ± 0.04 1.60 ± 0.12 -Eff elsberg 49 +Effelsberg 49 2021, Feb. 15 0.62 0.20 4.86 0.38 ± 0.02 55.71 ± 0.05 1.80 ± 0.14 JVLAb 1 @@ -1009,94 +1009,94 @@ JVLAb . 06 2021, Jul. 13 0.13 0.23 37.1 0.09 ± 0.02 54.41 ± 0.03 0.38 ± 0.09 0.22 0.22 ± 0.02 55.82 ± 0.05 0.95 ± 0.12 0.15 0.06 ± 0.01 57.21 ± 0.04 0.35 ± 0.08 -Eff elsberg 49 +Effelsberg 49 2021, Aug. 11 0.07 0.08 13.92 0.06 ± 0.007 54.10 ± 0.05 0.68 ± 0.12 0.07 0.02 ± 0.006 54.82 ± 0.03 0.31 ± 0.09 0.12 0.10 ± 0.006 55.85 ± 0.02 0.75 ± 0.06 -Eff elsberg 49 +Effelsberg 49 2021, Aug. 12 0.07 0.16 27.40 0.09 ± 0.008 55.83 ± 0.02 0.56 ± 0.05 -Notes. The spectral parameters are obtained from Gaussian fitting. (a ) - The JVLA spectrum toward Cep A is extracted from the E ff elsberg-beamsized +Notes. The spectral parameters are obtained from Gaussian fitting. (a) + The JVLA spectrum toward Cep A is extracted from the Effelsberg-beamsized region (FWHM 49 - ). (b ) - For G34.26+ 0.15, the JVLA beam samples the NH - 3 (9,6) spectrum over a region of radius 3 +). (b) + For G34.26+0.15, the JVLA beam samples the NH +3 (9,6) spectrum over a region of radius 3 . 5, which contains all detected NH 3 (9,6) emissions. Table A.2. 1.36 cm JVLA flux densities of individual continuum sources. Source R.A. Dec. Size P.A. S ν -( h m s) ( ◦ - ) (arcsec) (deg) (mJy) +(h m s) (◦ +) (arcsec) (deg) (mJy) Cep A HW2 22 56 17.972 ± 0.003 +62 01 49.587 ± 0.015 (0.45 ± 0.19) × (0.22 ± 0.10) 50.0 20.2 ± 1.4 HW3a 22 56 17.420 ± 0.022 +62 01 44.576 ± 0.076 (2.35 ± 0.45) × (0.55 ± 0.14) 66.6 4.75 ± 0.74 HW3b 22 56 17.578 ± 0.009 +62 01 45.041 ± 0.043 (1.43 ± 0.24) × (0.45 ± 0.10) 59.9 3.19 ± 0.36 HW3c 22 56 17.956 ± 0.016 +62 01 46.224 ± 0.038 (1.44 ± 0.37) × (0.36 ± 0.19) 86.0 9.90 ± 1.7 HW3d 22 56 18.195 ± 0.005 +62 01 46.325 ± 0.014 (1.26 ± 0.12) × (0.30 ± 0.19) 102.5 13.75 ± 0.92 HW9 22 56 18.626 ± 0.014 +62 01 47.851 ± 0.137 (1.53 ± 0.51) × (0.29 ± 0.30) 28.0 3.26 ± 0.78 -G34.26 +0.15 A 18 53 18.774 ± 0.005 +01 14 56.208 ± 0.125 (0.66 ± 0.49) × (0.50 ± 0.33) 10.0 94 ± 33 +G34.26+0.15 A 18 53 18.774 ± 0.005 +01 14 56.208 ± 0.125 (0.66 ± 0.49) × (0.50 ± 0.33) 10.0 94 ± 33 B 18 53 18.649 ± 0.005 +01 15 00.071 ± 0.180 (2.31 ± 0.49) × (0.85 ± 0.21) 17.4 597 ± 110 C 18 53 18.560 ± 0.004 +01 14 58.201 ± 0.112 (2.03 ± 0.30) × (1.34 ± 0.20) 178.0 5070 ± 660 Article number, page 8 of 10 -Y. T. Yan ( 闫耀庭 ) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions +Y. T. Yan (闫耀庭) et al.: Discovery of ammonia (9,6) masers in two high-mass star-forming regions Table A.3. NH 3 (9,6) maser positions derived from the JVLA observations. -Source R.A. Dec. S +Source R.A. Dec. S ν T - MB V -LSR ∆ V -1 /2 -(h m s ) ( ◦ - ) (mJy beam−1 - ) (K) (km s− 1 - ) +MB V +LSR ∆V +1/2 +(h m s) (◦ +) (mJy beam−1 +) (K) (km s−1 +) Cep A M 22 56 17.933 ± 0.002 +62 01 49.608 ± 0.011 985.2 2464.8 -0.88 ± 0.01 0.51 ± 0.02 343.2 829.5 -0.24 ± 0.03 0.63 ± 0.05 -G34.26 +0.15 M1 18 53 18.569 ± 0.007 +01 14 57.997 ± 0.056 37.1 94.5 56.82 ± 0.06 0.68 ± 0.14 +G34.26+0.15 M1 18 53 18.569 ± 0.007 +01 14 57.997 ± 0.056 37.1 94.5 56.82 ± 0.06 0.68 ± 0.14 M2 18 53 18.696 ± 0.002 +01 14 55.807 ± 0.034 48.4 122.4 53.77 ± 0.05 0.35 ± 0.08 57.8 146.2 54.35 ± 0.07 0.83 ± 0.14 180.8 457.6 55.83 ± 0.01 0.59 ± 0.03 M3 18 53 18.667 ± 0.005 +01 14 55.348 ± 0.066 78.1 197.2 54.22 ± 0.04 0.94 ± 0.08 73.7 186.3 55.78 ± 0.04 0.79 ± 0.08 Fig. A.1. Cepheus A. The grey shaded areas mark the 1.36 cm JVLA continuum map of Cep A. The reference position is α - J2000 = 22h - 56 m - 17 s -. 972, +J2000 = 22h +56m +17s +.972, and δ J2000 = 62◦ - 01 - 49 +01 +49 . 587, the peak position of the continuum map, is marked by a red cross. Slightly to the west of the cross is the white ellipse denoting the position of the NH 3 (9,6) emission with a purple star at its center. The red contours show the NOrthern Extended Millimeter Array -(NOEMA) 1.37 mm continuum, taken from Beuther et al. (2018). Contour levels are -5, 5, 10, 20, 40, 80, 100, 150, and 200 × 2.43 mJy beam − 1 - . +(NOEMA) 1.37 mm continuum, taken from Beuther et al. (2018). Contour levels are -5, 5, 10, 20, 40, 80, 100, 150, and 200 × 2.43 mJy beam−1 +. OH (Bartkiewicz et al. 2005), H - 2 O (Sobolev et al. 2018), and CH - 3 OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, -respectively. The color bar on the right-hand side indicates the velocity range ( V -LSR ) of maser spots. +2O (Sobolev et al. 2018), and CH +3OH (Sanna et al. 2017) masers are presented as diamonds, circles, and squares, +respectively. The color bar on the right-hand side indicates the velocity range (V +LSR) of maser spots. Article number, page 9 of 10 -A & A proofs: manuscript no. mainArxiv -Fig. A.2. 1.36 cm JVLA continuum map of G34.26 +0.15 presented as gray shaded areas. The reference position is α +A&A proofs: manuscript no. mainArxiv +Fig. A.2. 1.36 cm JVLA continuum map of G34.26+0.15 presented as gray shaded areas. The reference position is α J2000 = 18h - 53 m - 18 s -. 560, and +53m +18s +.560, and δ - J2000 = 01◦ - 14 - 58 +J2000 = 01◦ +14 +58 . 201, the peak position, is marked by a red cross. The red ellipses show the positions of NH - 3 (9,6) emission with stars at their +3 (9,6) emission with stars at their center (i.e., M1, M2, and M3). The blue contours show the Berkeley-Illinois-Maryland Association (BIMA) array 2.8 mm continuum, taken from Mookerjea et al. (2007). Contour levels are -3, 3, 10, 20, 30, 40, 50, 70, 90, 100, 120, and 140 × 20 mJy beam−1 - . OH (Zheng et al. 2000), H -2 O (Imai +. OH (Zheng et al. 2000), H +2O (Imai et al. 2011), and CH - 3 OH (Bartkiewicz et al. 2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates +3OH (Bartkiewicz et al. 2016) masers are presented as diamonds, circles, and squares, respectively. The color bar indicates the velocity range (V - LSR ) of maser spots. +LSR) of maser spots. Article number, page 10 of \ No newline at end of file diff --git a/read/results/playa/2201.00022.txt b/read/results/playa/2201.00022.txt index e869ca3..66ac88e 100644 --- a/read/results/playa/2201.00022.txt +++ b/read/results/playa/2201.00022.txt @@ -3,27 +3,27 @@ Typeset using LA T EX twocolumn style in AASTeX631 The Formation of Intermediate Mass Black Holes in Galactic Nuclei -Sanaea C. Rose, 1, 2 - Smadar Naoz, 1, 2 +Sanaea C. Rose,1, 2 + Smadar Naoz,1, 2 Re’em Sari,3 - and Itai Linial 3 + and Itai Linial3 1 - Department of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA +Department of Physics and Astronomy, University of California, Los Angeles, CA 90095, USA 2 - Mani L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA +Mani L. Bhaumik Institute for Theoretical Physics, University of California, Los Angeles, CA 90095, USA 3 - Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel +Racah Institute for Physics, The Hebrew University, Jerusalem 91904, Israel ABSTRACT Most stellar evolution models predict that black holes (BHs) should not exist above approximately 50 − 70 M - , the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections +, the lower limit of the pair-instability mass gap. However, recent LIGO/Virgo detections indicate the existence of BHs with masses at and above this threshold. We suggest that massive BHs, including intermediate mass black holes (IMBHs), can form in galactic nuclei through collisions between stellar-mass black holes and the surrounding main-sequence stars. Considering dynamical processes such as collisions, mass segregation, and relaxation, we find that this channel can be quite efficient, forming IMBHs as massive as 104 M - . This upper limit assumes that (1) the BHs accrete a +. This upper limit assumes that (1) the BHs accrete a substantial fraction of the stellar mass captured during each collision and (2) that the rate at which new stars are introduced into the region near the SMBH is high enough to offset depletion by stellar disruptions and star-star collisions. We discuss deviations from these key assumptions in the text. Our @@ -39,18 +39,18 @@ The recently detected gravitational wave source GW190521 (The LIGO Scientific Collaboration et al. 2020a,b) produced an intermediate mass black hole of approximately 142 M - . This event may have also had a +. This event may have also had a 85 M progenitor, which falls within the pair-instability mass gap that limits stellar black holes (BHs) to no more than ∼< 50 M - (e.g.,Heger et al.2003;Woosley -2017) 1 - . Similarly, the merger products of GW150914, + (e.g., Heger et al. 2003; Woosley +2017)1 +. Similarly, the merger products of GW150914, GW170104, and GW170814 fall within the mass gap -(e.g.,Abbott et al.2016,2017a,b). BH mergers that +(e.g., Abbott et al. 2016, 2017a,b). BH mergers that form second generation BHs and, in some cases, intermediate mass BHs (IMBHs), these gravitational wave (GW) events can occur in globular clusters, young stelCorresponding @@ -58,104 +58,104 @@ form second generation BHs and, in some cases, intermediate srose@astro.ucla.edu 1 Note that the exact lower and upper limits may be sensitive to -metallicity of the progenitor (e.g.,Woosley2017;Spera & Mapelli -2017a;Limongi & Chieffi2018a;Sakstein et al.2020;Belczynski -et al.2020a;Renzo et al.2020;Vink et al.2021). lar clusters, or the field (e.g.,Rodriguez et al.2018;Rodriguez - et al.2019;Fishbach et al.2020;Mapelli et al. -2021b,a;Di Carlo et al.2019,2021;Dall’Amico et al. -2021;Arca Sedda et al.2021). However, IMBHs are +metallicity of the progenitor (e.g., Woosley 2017; Spera & Mapelli +2017a; Limongi & Chieffi 2018a; Sakstein et al. 2020; Belczynski +et al. 2020a; Renzo et al. 2020; Vink et al. 2021). lar clusters, or the field (e.g., Rodriguez et al. 2018; Rodriguez + et al. 2019; Fishbach et al. 2020; Mapelli et al. +2021b,a; Di Carlo et al. 2019, 2021; Dall’Amico et al. +2021; Arca Sedda et al. 2021). However, IMBHs are not limited to these locations and may reside in galactic nuclei as well. Several studies propose that our own galactic center may host an IMBH in the inner pc -(e.g.,Hansen & Milosavljevi´c2003;Maillard et al.2004; -G¨urkan & Rasio2005;Gualandris & Merritt2009;Chen -& Liu2013;Generozov & Madigan2020;Fragione et al. -2020a;Zheng et al.2020;Naoz et al.2020;GRAVITY -Collaboration et al.2020). +(e.g., Hansen & Milosavljevi´c 2003; Maillard et al. 2004; +G¨urkan & Rasio 2005; Gualandris & Merritt 2009; Chen +& Liu 2013; Generozov & Madigan 2020; Fragione et al. +2020a; Zheng et al. 2020; Naoz et al. 2020; GRAVITY +Collaboration et al. 2020). Several IMBH formation channels have been suggested in the literature. For example, IMBHs may have a cosmological origin, forming in the early universe either -as a result of the very first stars (e.g.,Madau & Rees -2001;Schneider et al.2002;Johnson & Bromm2007; -Valiante et al.2016) or from direct collapse of accumulated - gas (e.g.,Begelman et al.2006;Yue et al.2014; -Ferrara et al.2014;Choi et al.2015;Shlosman et al. +as a result of the very first stars (e.g., Madau & Rees +2001; Schneider et al. 2002; Johnson & Bromm 2007; +Valiante et al. 2016) or from direct collapse of accumulated + gas (e.g., Begelman et al. 2006; Yue et al. 2014; +Ferrara et al. 2014; Choi et al. 2015; Shlosman et al. 2016). These high redshift IMBHs would need to survive galaxy evolution and mergers to present day (e.g.,arXiv:2201.00022v2 [astro-ph.GA] 6 Jul 2022 - Rose et al. -Rashkov & Madau2014), with significant effects on their -stellar and even dark matter surroundings (e.g.,Bertone -et al.2009;Chen & Liu2013;Bringmann et al.2012;Eda -et al.2013;Naoz & Silk2014;Naoz et al.2019). Another + Rose et al. +Rashkov & Madau 2014), with significant effects on their +stellar and even dark matter surroundings (e.g., Bertone +et al. 2009; Chen & Liu 2013; Bringmann et al. 2012; Eda +et al. 2013; Naoz & Silk 2014; Naoz et al. 2019). Another popular formation channel relies on the coalescence of many stellar-mass black holes, which may seed ob jects -as massive as SMBHs (e.g.,Kroupa et al.2020). IMBHs +as massive as SMBHs (e.g., Kroupa et al. 2020). IMBHs may form in the centers of globular clusters, where fewbody interactions lead to the merger of stellar-mass BHs -(e.g.,O’Leary et al.2006;G¨urkan et al.2006;Blecha -et al.2006;Freitag et al.2006;Umbreit et al.2012;Rodriguez - et al.2018;Rodriguez et al.2019;Fragione et al. +(e.g., O’Leary et al. 2006; G¨urkan et al. 2006; Blecha +et al. 2006; Freitag et al. 2006; Umbreit et al. 2012; Rodriguez + et al. 2018; Rodriguez et al. 2019; Fragione et al. 2020b). Other formation mechanisms invoke successive -collisions and mergers of massive stars (e.g.,Ebisuzaki -et al.2001;Portegies Zwart & McMillan2002;Portegies -Zwart et al.2004;Freitag et al.2006;Sakurai et al.2017; -Kremer et al.2020;Gonz´alez et al.2021;Di Carlo et al. -2021;Das et al.2021a,b;Escala2021). +collisions and mergers of massive stars (e.g., Ebisuzaki +et al. 2001; Portegies Zwart & McMillan 2002; Portegies +Zwart et al. 2004; Freitag et al. 2006; Sakurai et al. 2017; +Kremer et al. 2020; Gonz´alez et al. 2021; Di Carlo et al. +2021; Das et al. 2021a,b; Escala 2021). The main obstacle to sequential BH mergers in clusters is that the merger recoil velocity kick often exceeds -the escape velocity from the cluster (e.g.,Schnittman -& Buonanno2007;Centrella et al.2010;O’Leary et al. -2006;Baibhav et al.2020, Rom & Sari, in prep.). However, +the escape velocity from the cluster (e.g., Schnittman +& Buonanno 2007; Centrella et al. 2010; O’Leary et al. +2006; Baibhav et al. 2020, Rom & Sari, in prep.). However, nuclear star clusters at the centers of galaxies do -not encounter this problem. For example,Fragione et al. +not encounter this problem. For example, Fragione et al. (2021) explore repeated BH-BH mergers in nuclear star clusters without a SMBH. They considered BH binarysingle interactions, binary BH GW merger, and GW merger recoil kicks. The post-kick merger product sinks back towards the cluster center over a dynamical friction timescale. Using this approach, they showed that -10 3 - − 10 4 +103 + − 104 M - IMBHs can form efficiently over the lifetime + IMBHs can form efficiently over the lifetime of a cluster. -However, as discussed in Section2.2, direct BH-star +However, as discussed in Section 2.2, direct BH-star collisions are much more frequent than BH-BH collision in galactic nuclei, making the former a promising channel for BH growth. In an N-body study of young star -clusters,Rizzuto et al.(2022) find that BH-star collisions +clusters, Rizzuto et al. (2022) find that BH-star collisions are a main contributor to the formation of BHs -in the mass gap and IMBHs. In a similar vein,Stone -et al.(2017) demonstrate that massive BHs can form +in the mass gap and IMBHs. In a similar vein, Stone +et al. (2017) demonstrate that massive BHs can form from repeated tidal encounters between stars and BHs. More generally, several studies have explored the role of collisions in a GN, with implications for the stellar and -red giant populations (e.g.,Dale & Davies2006;Dale -et al.2009;Balberg et al.2013;Mastrobuono-Battisti -et al.2021). We propose that IMBHs can form naturally +red giant populations (e.g., Dale & Davies 2006; Dale +et al. 2009; Balberg et al. 2013; Mastrobuono-Battisti +et al. 2021). We propose that IMBHs can form naturally within the central pc of a galactic center through repeated collisions between BHs and main sequence stars. During a collision, the BH can accrete some portion of the star’s mass. Over many collisions, it can grow appreciably - in size. We demonstrate that this channel can create IMBHs with masses as large as 10 4 + in size. We demonstrate that this channel can create IMBHs with masses as large as 104 M - , an upper +, an upper limit that depends on the density profile of the surrounding stars and the efficiency of the accretion. The paper is structured as follows: we describe relevant - physical processes and our approach in Section2. + physical processes and our approach in Section 2. In particular, we provide an overview of collisions in -Section2.2and present our statistical approach in Section2.3. - Section2.4discusses our treatment of the +Section 2.2 and present our statistical approach in Section + 2.3. Section 2.4 discusses our treatment of the mass growth with each collision and presents analytic solutions to our equations in two different regimes, efficient collisions and inefficient collisions We compare -these solutions to our statistical results. Sections2.6 -and2.8discuss implications for GW merger events between +these solutions to our statistical results. Sections 2.6 +and 2.8 discuss implications for GW merger events between IMBHs and the SMBH. We then incorporate relaxation processes and discuss the subsequent results in -Section2.9. Finally, we discuss and summarize our findings - in Section3. +Section 2.9. Finally, we discuss and summarize our findings + in Section 3. 2. METHODOLOGY We consider a population of stellar mass BHs embedded in a cluster of 1 M @@ -169,29 +169,29 @@ We consider a population of BHs within the inner few parsecs of the SMBH in a galactic nucleus (GN). We assume that the BH mass distribution follows that of the stars from which they originate, a Kroupa initial mass -function dN/dm ∝ m− 2.35 - . While this choice represents +function dN/dm ∝ m−2.35 +. While this choice represents a gross oversimplification, it has very little bearing on our final results. Future work may address the particulars of the BH mass distribution, but we do not expect that it will significantly alter the outcome. The upper and lower limits of the BH mass distribution are 5 and 50 M - , respectively. We select the upper limit to encompass +, respectively. We select the upper limit to encompass the range of upper bounds predicted by stellar evolution models, which vary between 40 and 125 M -depending on the metallicity (Heger et al.2003;Woosley -2017;Spera & Mapelli2017b;Limongi & Chieffi2018b; -Belczynski et al.2020b;Renzo et al.2020). We assume +depending on the metallicity (Heger et al. 2003; Woosley +2017; Spera & Mapelli 2017b; Limongi & Chieffi 2018b; +Belczynski et al. 2020b; Renzo et al. 2020). We assume that the orbits of the BHs follow a thermal eccentricity distribution. We draw their semima jor axes, a -• , from a -uniform distribution in log distance, dN/d(log r ) being +•, from a +uniform distribution in log distance, dN/d(log r) being constant. While this distribution is not necessarily representative of actual conditions in the GN, we use it to build a comprehensive physical picture of BH growth at -all distances from the SMBH, including within 0 . 01 pc. +all distances from the SMBH, including within 0.01 pc. Otherwise, the innermost region of the GN would be poorly represented in our sample. We consider other IMBH Formation in Galactic Nuclei 3 @@ -206,46 +206,46 @@ blue line represents the time for a 105 M BH to merge with the SMBH through GW emission. -observationally motivated distributions in Section2.9, +observationally motivated distributions in Section 2.9, but reserve a more detailed examination of the distribution’s impact for future work. 2.2. Direct Col lisions BHs in the GN can undergo direct collisions with other ob jects. The timescale for this process, t - coll , can be estimated - using a simple rate calculation: t− 1 +coll, can be estimated + using a simple rate calculation: t−1 coll = nσA, where n is the number density of ob jects, σ is the velocity dispersion, and A is the cross-section. We use the -collision timescale fromRose et al.(2020): -t− 1 -coll = πn (a -• ) σ (a -• ) +collision timescale from Rose et al. (2020): +t−1 +coll = πn(a +•)σ(a +•) × - f -1 (e - • )r 2 +f +1(e +•)r2 c + f -2 (e - • )r -c 2G (m +2(e +•)r +c 2G(m BH + m - ) -σ ( a -• )2 +) +σ(a +•)2 . (1) where G is the gravitational constant and r c is the sum of the radii of the interacting ob jects, a black hole with mass m - BH and a star with mass m - . Detailed inRose -et al.(2020), f - 1 ( e - • ) and f - 2 (e - • ) account for the effect of +BH and a star with mass m +. Detailed in Rose +et al. (2020), f +1(e +•) and f +2(e +•) account for the effect of the eccentricity of the BH’s orbit about the SMBH on the collision rate, while n and σ are simply evaluated at the semima jor axis of the orbit (see below). Note @@ -255,103 +255,103 @@ interaction. Assuming a circular orbit for simplicity, we plot the timescale for a BH orbiting in the GN to collide with a 1 M - star as a function of distance from the SMBH in Figure1. 2 + star as a function of distance from the SMBH in Figure 1.2 As this timescale depends on the density of surrounding stars, we adopt a density profile of the form: - ρ ( r -• ) = ρ - 0 + ρ(r +•) = ρ +0 r • r 0 - −α +−α , (2) where r • denotes the distance from the SMBH. We adopt -a SMBH mass of 4 × 10 6 +a SMBH mass of 4 × 106 M such that our fiducial GN -matches our own galactic center (e.g.,Ghez et al.2005; -Genzel et al.2003). In this case, the normalization in +matches our own galactic center (e.g., Ghez et al. 2005; +Genzel et al. 2003). In this case, the normalization in Eq. (2) is ρ - 0 = 1. 35 × 10 6 +0 = 1.35 × 106 M - /pc3 +/pc3 at r 0 = 0.25 pc (Genzel - et al.2010). Additionally, in Eq. (2), α gives the + et al. 2010). Additionally, in Eq. (2), α gives the slope of the power law. We assume that a uniform population of solar mass stars account for most of the mass in the GN, making the stellar number density: -n ( r -• ) = ρ ( r -• ) +n(r +•) = ρ(r +•) 1 M - . (3) + . (3) The collision timescale also depends on the velocity dispersion, which we express as: -σ (r -• ) = +σ(r +•) = GM • r -• (1 + α ) , (4) +•(1 + α) , (4) where α is the slope of the density profile and M - • denotes - the mass of the SMBH (Alexander1999;Alexander - & Pfuhl2014). As mentioned above, Eq. (1) depends +• denotes + the mass of the SMBH (Alexander 1999; Alexander + & Pfuhl 2014). As mentioned above, Eq. (1) depends on the sum of the radii of the colliding ob jects, r -c . We +c. We take r c = 1 R because these interactions involve a BH and a star, and the former has a much smaller physical cross-section. For example, the Schwarzschild radius of a 10 M - BH is only 30 km, or 4 . 31 × 10 −5 + BH is only 30 km, or 4.31 × 10−5 R - . For +. For this reason, direct collisions between compact ob jects are very rare and not included in our model. We note that direct collisions between BHs, via GW emission, were shown to be efficient in nuclear star clusters - without SMBHs (e.g.,Portegies Zwart & McMillan2000;O’Leary - et al.2006;Rodriguez et al.2016). + without SMBHs (e.g., Portegies Zwart & McMillan + 2000; O’Leary et al. 2006; Rodriguez et al. 2016). However, in the GN, star-BH collisions are much more frequent than direct BH-BH collisions. As depicted in -Figure1, the star-BH collision timescale for a range +Figure 1, the star-BH collision timescale for a range of density profiles is many orders of magnitude shorter than the BH-BH GW collision timescale (for the relevant - equations, seeO’Leary et al.2009;Gond´an et al. + equations, see O’Leary et al. 2009; Gond´an et al. 2018, for example). Thus, we expect that star-BH collisions will be the main driver of IMBH growth in the GN. 2 We note that the eccentricity has a very minor effect on the -collision timescale (Rose et al.2020). - Rose et al. +collision timescale (Rose et al. 2020). + Rose et al. 2.3. Statistical Approach to Col lisions We simulate the mass growth of a population of BHs -with initial conditions detailed in Section2.1. Over an -increment ∆t of 10 6 +with initial conditions detailed in Section 2.1. Over an +increment ∆t of 106 yr, we calculate the probability of -a collision occurring, given by ∆ t/t - coll . This choice of -∆ t is motivated by our galactic center’s star formation -timescale (e.g.,Lu et al.2009), allowing for regular replenishment +a collision occurring, given by ∆t/t +coll. This choice of +∆t is motivated by our galactic center’s star formation +timescale (e.g., Lu et al. 2009), allowing for regular replenishment of the stellar population in the GN. We have checked that the results are not sensitive to this choice -of ∆ t , omitted here to avoid clutter. We draw a number +of ∆t, omitted here to avoid clutter. We draw a number between 0 and 1 using a random number generator. If that number is less than or equal to the probability, we -increase the BH’s mass by ∆ m , the mass that the BH is -expected to accrete in a single collision (see Section2.4 +increase the BH’s mass by ∆m, the mass that the BH is +expected to accrete in a single collision (see Section 2.4 for details). We recalculate the collision timescale using the updated BH mass and repeat this process until the time elapsed equals the simulation time of 10 Gyr3 - . +. 2.4. Mass Growth When a BH collides with a star, it may accrete material and grow in mass. The details of the accretion @@ -362,10 +362,10 @@ passing through the star’s center. We begin by considering the escape velocity from the BH at the star’s outermost point, its surface, which corresponds to the maximum impact parameter 1 R - . Qualitatively, one +. Qualitatively, one might expect that the BH could capture the entire star -(i.e., ∆ m ∼ 1 M - ) if the relative velocity is smaller than +(i.e., ∆m ∼ 1 M +) if the relative velocity is smaller than the escape velocity from the BH at this point. However, in the vicinity of the SMBH, the dispersion velocity of the stars may be much larger than the escape velocity @@ -373,81 +373,81 @@ from the BH at the star’s surface. In this case, the BH captures a “tunnel” of material through the star. This tunnel has radius equal to the Bondi radius and length approximately 1 R - . For the purposes of this study, we +. For the purposes of this study, we assume that the BH accretes all of the material that it captures. The details of the accretion are uncertain, however, and it may be much less efficient than our results - imply. We discuss accretion in Section2.5. + imply. We discuss accretion in Section 2.5. To estimate ∆m, we begin with the Bondi-Hoyle accretion rate, ˙m, given by: -˙m = 4 πG2 - m 2 -BH ρ - star +˙m = 4πG2 +m2 +BHρ +star (c2 -s + σ 2 - )3/ 2 , (5) +s + σ2 +)3/2 , (5) 3 Closer to the SMBH, ∆t may exceed the collision timescale by a factor of a few for steep density profiles. We include a safeguard in our code which takes the ratio t -coll /∆ t and rounds it +coll/∆t and rounds it to the nearest integer. We take this integer to be the number of -collisions and increase the BH mass accordingly. Figure 2. We consider an example that highlights the mass +collisions and increase the BH mass accordingly. Figure 2. We consider an example that highlights the mass growth as a function of distance from the SMBH. Grey dots represent the initial masses and distances from the SMBH of the BHs involved in the simulation. For simplicity, we set the inital mass equal to 10 M - for all of the BHs. Assuming + for all of the BHs. Assuming the density profile of stars has α = 1, we consider two cases: BHs accrete all of the star’s mass during a collision (red) and only a portion of the star’s mass is accreted during a collision -given by Eq.6(blue). The latter case results in less growth +given by Eq. 6 (blue). The latter case results in less growth closer to the SMBH where the velocity dispersion becomes high. The shaded regions and dashed lines represent the -analytical predictions detailed in Section2.4. +analytical predictions detailed in Section 2.4. where c s is the speed of sound in the star and ρ - star is its -density (e.g.,Bondi1952;Bondi & Hoyle1944;Shima -et al.1985;Edgar2004, see latter for a review). We +star is its +density (e.g., Bondi 1952; Bondi & Hoyle 1944; Shima +et al. 1985; Edgar 2004, see latter for a review). We approximate the density as 1 M - / (4πR 3 - /3) and take +/(4πR3 +/3) and take the conservative value of c - s = 500 km s−1 - , which is +s = 500 km s−1 +, which is consistent with the sound speed inside a 1 M - star -(Christensen-Dalsgaard et al.1996) and allows us to set -a lower limit on ∆ m. To find ∆m, at each collision, we + star +(Christensen-Dalsgaard et al. 1996) and allows us to set +a lower limit on ∆m. To find ∆m, at each collision, we have: - ∆ m = min( ˙m × t - , cross , 1 M - ) , (6) + ∆m = min( ˙m × t +,cross, 1 M +) , (6) where t - , cross ∼ R - /σ is the crossing time of the BH in +,cross ∼ R +/σ is the crossing time of the BH in the star. We take the minimum between ˙m × t -, cross and +,cross and 1 M - because the BH cannot accrete more mass than + because the BH cannot accrete more mass than one star at each collision. -Figure2juxtaposes the expected growth using BondiHoyle-Lyttleton +Figure 2 juxtaposes the expected growth using BondiHoyle-Lyttleton accretion (blue small points) with a much simpler model in which the BH accretes the star’s entire mass, 1 M - (red large points). Both examples + (red large points). Both examples start with identical populations of 10 M - BHs (grey) + BHs (grey) and simulate growth through collisions using a statistical approach. As the BHs grow, the collision timescale, which depends on m - BH , decreases. Simultaneously, -∆ m , which also depends on m +BH , decreases. Simultaneously, +∆m, which also depends on m BH , increases. The result is exponential growth (see discussion and details -surrounding Eq. (8)). In Figure2, however, the simulations +surrounding Eq. (8)). In Figure 2, however, the simulations assume α = 1 for the stellar density profile, ensuring the collision timescale is long compared to the sim- IMBH Formation in Galactic Nuclei 5 @@ -455,23 +455,23 @@ ulation time, 10 Gyr. Therefore, the BHs grow slowly, and their final masses can be approximated using the following equation: m -final (t - coll → const .) = m - initial + ∆ m T +final(t +coll → const.) = m +initial + ∆m T t - coll , (7) -in which T represents the simulation time and ∆ m and +coll , (7) +in which T represents the simulation time and ∆m and t - coll remain constant, approximated as their initial values. +coll remain constant, approximated as their initial values. -This equation is plotted in Figure2for both cases, -∆ m = 1 M - (red) and ∆m from Bondi-Hoyle-Lyttleton +This equation is plotted in Figure 2 for both cases, +∆m = 1 M + (red) and ∆m from Bondi-Hoyle-Lyttleton accretion (blue), and the curves coincide with the corresponding simulated results. The shaded regions represent one standard deviation from Eq. (7), calculated using the square root of the number of collisions, T /t - coll . +coll. As indicated by the results in red, in the absence of Bondi-Hoyle-Lyttleton accretion, the BHs closest to the SMBH experience the most growth because they have @@ -479,141 +479,141 @@ shorter collision timescales. However, Bondi-HoyleLyttleton accretion becomes important closer to the SMBH, where the velocity dispersion is large compared with the stars’ escape velocity, and curtails the mass -growth for BHs in this region. Outside of 10− 2 +growth for BHs in this region. Outside of 10−2 pc, a BH consumes the star’s entire mass: the accretion-limited -∆ m governed by Eq. (7) is greater than or equal to the +∆m governed by Eq. (7) is greater than or equal to the star’s mass. -Eq.7does not apply for other values of α . When the +Eq. 7 does not apply for other values of α. When the collision timescale is shorter, corresponding to a larger -index α in the density profile (see Figure1), the growth +index α in the density profile (see Figure 1), the growth is very efficient and ∆m quickly approaches 1 M - . Consequently, - while we can now assume ∆ m = 1 M - , we +. Consequently, + while we can now assume ∆m = 1 M +, we can no longer assume the collision timescale is constant. The final mass grows exponentially as a result. For -∆ m = 1M - , the general solution is reached by solving +∆m = 1M +, the general solution is reached by solving the differential equation dm/dt = 1 M - /t - coll (m ), which +/t +coll(m), which gives: m -final (∆ m → 1 M - ) = − A + ( m -initial + A ) e CT +final(∆m → 1 M +) = −A + (m +initial + A) eCT (8) -where A = σ 2 - R - star /G and C = 2πGn -star R - star /σ . As an +where A = σ2 +R +star/G and C = 2πGn +starR +star/σ. As an example, we plot this curve in purple for the α = 2 case, -in Figure3, which agrees with the simulated masses. +in Figure 3, which agrees with the simulated masses. 2.5. Uncertainties in Accretion -We note that the ∆ M calculated in this proof-ofconcept +We note that the ∆M calculated in this proof-ofconcept study assumes that the BH accretes all of the material that it captures. Estimating the true fraction of the material accreted by the BH is very challenging; this complex problem requires numerically solving the generalized GR fluid equations with cooling, heating, and radiative transfer, etc. and remains an active -field of research (e.g.,Blandford & Begelman1999;Park -& Ostriker2001;Narayan et al.2003;Igumenshchev et al.2003;Ohsuga et al.2005;Yuan et al.2012;Jiang -et al.2014;McKinney et al.2014;Narayan et al.2022). +field of research (e.g., Blandford & Begelman 1999; Park +& Ostriker 2001; Narayan et al. 2003; Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan et al. 2012; Jiang +et al. 2014; McKinney et al. 2014; Narayan et al. 2022). Heuristically, if a collision between a BH and a star results in an accretion disk, the disk’s viscous timescale may be as low as days. The resultant luminosity can unbind most of the captured material, though details such as the amount accreted and peak luminosity remain - uncertain (e.g.,Yuan et al.(2012);Jiang et al. -(2014), see also the discussion inStone et al.(2017), -Rizzuto et al.(2022), andKremer et al.(2022)). The + uncertain (e.g., Yuan et al. (2012); Jiang et al. +(2014), see also the discussion in Stone et al. (2017), +Rizzuto et al. (2022), and Kremer et al. (2022)). The question becomes whether or not a BH can still accumulate significant amounts of mass over many collisions even if it accretes very little in a single one. We explore the viability of our channel using a physically motivated inefficient accretion model. Several studies have invoked momentum-driven winds in BH accretion (e.g., -Murray et al.2005;Ostriker et al.2010;Brennan et al. +Murray et al. 2005; Ostriker et al. 2010; Brennan et al. 2018). We thus estimate the fraction of captured mass accreted to be approximately v - esc /(cη ), where v - esc is +esc/(cη), where v +esc is the escape velocity from the BH at 1 R and η is the -accretion efficiency at the ISCO. We take η to be 0 .1 -(e.g.,Yu & Tremaine2002). This expression for the -fraction accreted is consistent withKremer et al.(2022) +accretion efficiency at the ISCO. We take η to be 0.1 +(e.g., Yu & Tremaine 2002). This expression for the +fraction accreted is consistent with Kremer et al. (2022) equation 19 for s = 0.5, which is a reasonable value for -s, a free parameter between 0 . 2 and 0 . 8. We discuss +s, a free parameter between 0.2 and 0.8. We discuss the results of the momentum-driven winds estimate in -Section3. We note that the accretion process may be +Section 3. We note that the accretion process may be more efficient than this estimate implies if, for example, jets or other instabilities result in the beaming of radiation - away from the captured material (e.g.,Blandford -& Zna jek1977;Begelman1979;De Villiers et al.2005; -McKinney & Gammie2004;McKinney2006;Igumenshchev2008;Begelman2012a,b;McKinney - et al.2014). + away from the captured material (e.g., Blandford +& Zna jek 1977; Begelman 1979; De Villiers et al. 2005; +McKinney & Gammie 2004; McKinney 2006; Igumenshchev + 2008; Begelman 2012a,b; McKinney et al. 2014). 2.6. GW Inspiral When a BH is close to the SMBH, GW emission can circularize and shrink its orbit. We implement the effects of GW emission on the BH’s semima jor axis and -eccentricity followingPeters & Mathews(1963a). The +eccentricity following Peters & Mathews (1963a). The characteristic timescale to merge a BH with an SMBH is given by: t -GW ≈ 2. 9 × 10 12 +GW ≈ 2.9 × 1012 yr M - • -10 6 +• +106 M - −1 +−1 m BH -10 6 +106 M - −1 +−1 × M - • + m +• + m BH -2 × 10 6 +2 × 106 M -− 1 +−1 a • -10− 2 +10−2 pc - 4 +4 × f (e - • )(1 − e 2 -• )7/ 2 +•)(1 − e2 +•)7/2 , (9) where f (e - • ) is a function of e - • . For all values of e - • , +•) is a function of e +•. For all values of e +•, f (e - • ) is between 0 . 979 and 1 .81 (Blaes et al.2002). We +•) is between 0.979 and 1.81 (Blaes et al. 2002). We plot this timescale for a 1 × 105 M - BH in Figure1in + BH in Figure 1 in blue. - Rose et al. -Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow ( α = 1) to -cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq.8, taking m + Rose et al. +Figure 3. On the right, we plot final masses of 500 BHs using different values of α in the density profile, shallow (α = 1) to +cuspy (α = 2). For the latter case, the purple line shows the analytical result from Eq. 8, taking m initial to be the average mass of the population. Faded stars indicate BHs that merged with the SMBH through GWs. On the left, we plot the masses and merger times of these BHs. In our simulations, we assume a BH has merged with the SMBH when the condition t - GW < t +GW < t elapsed is met. When this condition is satisfied, we terminate mass growth through collisions for that BH.4 @@ -623,35 +623,35 @@ the BH masses as a function of time. Here, we examine the sensitivity of the BH growth to the density power law. From Eq. (1), it is clear that the growth rate depends on the stellar density profile, governed by the index - α . We expect that higher values of α , or steeper + α. We expect that higher values of α, or steeper profiles, will result in more efficient mass growth. In -Figure1, larger values of α lead to collision timescales -in the GN’s inner region, inwards of 0 .25 pc, that are -much smaller that the 10 Gyr simulation time. Figure3 +Figure 1, larger values of α lead to collision timescales +in the GN’s inner region, inwards of 0.25 pc, that are +much smaller that the 10 Gyr simulation time. Figure 3 confirms this expectation. It depicts the mass growth of a uniform distribution of BHs with initial conditions detailed - in Section2.1for five α values, spanning 1 (green) + in Section 2.1 for five α values, spanning 1 (green) to 2 (purple). The most massive IMBHs form inwards -of 0 .25 pc for the α = 2 case. +of 0.25 pc for the α = 2 case. 2.8. Gravitational Wave Mergers and Intermediate and Extreme Mass Ratio Inspiral Candidates Towards the SMBH, efficient collisions can create BHs massive enough to merge with the SMBH through GWs. -Following the method detailed in Section2.6, when a +Following the method detailed in Section 2.6, when a given BH meets the criterion t GW < t -elapsed , we mark +elapsed, we mark 4 For comparison, we also incrementally changed the semimajor axis and eccentricity from GW emission following the equations -inPeters & Mathews(1963b). This method leads to a slight +in Peters & Mathews (1963b). This method leads to a slight increase in the final IMBH masses because it accounts for the collisions that take place while the orbit is gradually shrinking. it as merged with the SMBH. We assume that at this point the dynamics of the BH will be determined by GW emission, shrinking and circularizing the BHs orbit until it undergoes an extreme or intermediate mass ratio inspiral (EMRI and IMRI, respectively). The righthand -plot in Figure3shows the BH masses versus time of +plot in Figure 3 shows the BH masses versus time of merger. It is interesting to note that even in the absence of relaxation processes, which are often invoked to explain the formation of EMRIs, EMRIs and notably @@ -663,69 +663,69 @@ relaxation time, these interactions alter its orbit about the SMBH. The two-body relaxation timescale for a single-mass system is: t - relax = 0.34 σ 3 +relax = 0.34 σ3 G2 - ρ M - ∗ ln Λ +ρM +∗ ln Λ rlx , (10) where ln Λ rlx is the Coulomb logarithm and M - ∗ is the +∗ is the average mass of the surrounding ob jects, here assumed to be 1 M - (Spitzer1987;Binney & Tremaine2008, + (Spitzer 1987; Binney & Tremaine 2008, Eq. (7.106)). This equation represents the approximate timescale for a BH on a semi-circular orbit to change its orbital energy and angular momentum by order of themselves. The BH experiences diffusion in its angular momentum and energy as a function of time (depending on the eccentricity of the orbit, this process can be more -efficientFragione & Sari2018;Sari & Fragione2019). +efficient Fragione & Sari 2018; Sari & Fragione 2019). Relaxation can cause the orbit of an ob ject in a GN to reach high eccentricities. If the ob ject is a BH, it can spiral into the SMBH and form an EMRI, while a star IMBH Formation in Galactic Nuclei 7 -can be tidally disrupted by the SMBH (e.g.Magorrian -& Tremaine1999;Wang & Merritt2004;Hopman & -Alexander2005;Aharon & Perets2016;Stone & Metzger2016;Amaro-Seoane2018;Sari - & Fragione2019; -Naoz et al.2022). The relaxation process is therefore -crucial to our study. In Figure1, we plot the relaxation -timescale in gold for a range of α . We note that theBahcall - & Wolf(1976) profile, α = 7/4, corresponds to zero +can be tidally disrupted by the SMBH (e.g. Magorrian +& Tremaine 1999; Wang & Merritt 2004; Hopman & +Alexander 2005; Aharon & Perets 2016; Stone & Metzger + 2016; Amaro-Seoane 2018; Sari & Fragione 2019; +Naoz et al. 2022). The relaxation process is therefore +crucial to our study. In Figure 1, we plot the relaxation +timescale in gold for a range of α. We note that the Bahcall + & Wolf (1976) profile, α = 7/4, corresponds to zero net flux and therefore does not preferentially migrate ob jects inward. Additionally, because BHs are more massive on average than the surrounding ob jects, they are expected -to segregate inwards in the GN (e.g.,Shapiro & -Marchant1978;Cohn & Kulsrud1978;Morris1993; -Miralda-Escud´e & Gould2000;Baumgardt et al.2004). +to segregate inwards in the GN (e.g., Shapiro & +Marchant 1978; Cohn & Kulsrud 1978; Morris 1993; +Miralda-Escud´e & Gould 2000; Baumgardt et al. 2004). They sink toward the SMBH on the mass segregation timescale, t - seg ≈ M - ∗ /m - BH × t - relax (e.g.,Spitzer1987; -Fregeau et al.2002;Merritt2006), which is typically an +seg ≈ M +∗/m +BH × t +relax (e.g., Spitzer 1987; +Fregeau et al. 2002; Merritt 2006), which is typically an order of magnitude smaller than the relaxation timescale -plotted in Figure1. +plotted in Figure 1. We incorporate relaxation processes by introducing a small change in the BH’s energy and angular momentum each time it orbits the SMBH. We apply a small -instantaneous velocity kick to the BH, denoted as ∆ v . +instantaneous velocity kick to the BH, denoted as ∆v. We draw ∆v from a Guassian distribution with average -of zero and a standard deviation of ∆ v - rlx / √ - 3, where -∆ v - rlx = v - • - P - • /t - rlx (seeBradnick et al.2017, for an +of zero and a standard deviation of ∆v +rlx/√ +3, where +∆v +rlx = v +• +P +•/t +rlx (see Bradnick et al. 2017, for an approach to changes in the angular momentum). The -new orbital parameters can be calculated followingLu -& Naoz(2019), and seeNaoz et al.(2022) for the full +new orbital parameters can be calculated following Lu +& Naoz (2019), and see Naoz et al. (2022) for the full set of equations. We account for the effects of relaxation processes, including mass-segregation, using a multi-faceted approach. @@ -742,8 +742,8 @@ scattering for both black holes and stars. Within this radius, BHs will then settle onto a Bahcall-Wolf profile, while the stars may follow a shallower profile, with approximately n - ∝ r − 1.5 - , inwards of the transition radius + ∝ r−1.5 +, inwards of the transition radius (Linial & Sari in prep.). Therefore, after the initial mass segregation, we allow the BHs to begin diffusing over a relaxation timescale, @@ -752,11 +752,11 @@ their orbital parameters changing slowly through a random may migrate closer to the SMBH. We terminate mass growth when the BH enters the inner 200 au of the GN, within which the density of stars is uncertain. This cutoff is based on the 120 au pericenter of S0-2, the closest -known star to the SMBH (e.g.,Ghez et al.2005). +known star to the SMBH (e.g., Ghez et al. 2005). Another physical process that causes inward migration is dynamical friction. A cursory derivation based -on the dynamical friction equations described inBinney -& Tremaine(2008) reveals the process to have a similar +on the dynamical friction equations described in Binney +& Tremaine (2008) reveals the process to have a similar timescale to mass segregation. If a BH diffuses to a distance greater than 2 pc from the SMBH, exiting the sphere of influence, we have it sink inwards, back @@ -770,66 +770,66 @@ towards the SMBH, their concentration in the inner region scattering. We reserve the inclusion of these interactions for future study. 2.10. Effect of Relaxation Processes -As depicted in Figure4, two-body relaxation processes +As depicted in Figure 4, two-body relaxation processes result in more EMRIs and IMRIs events. These processes allow BHs that begin further from the SMBH to migrate inwards and grow more efficiently in mass. However, it also impedes the growth of BHs that are initially closer to the SMBH by allowing them to diffuse out of the inner region where collisions are efficient. -As can be seen in Figure4, the net result is that more +As can be seen in Figure 4, the net result is that more BHs grow, but the maximum mass is lower compared to the scenario that ignores two-body relaxation. The -histogram in Figure4presents the final BH mass distributions - for different power law indices α . As expected, +histogram in Figure 4 presents the final BH mass distributions + for different power law indices α. As expected, the two-body relaxation suppresses the α dependence -highlighted in Figure3. In fact, using a KS test, we +highlighted in Figure 3. In fact, using a KS test, we find that we cannot reject the hypothesis that the two distributions were drawn from the same sample for the α = 1.75 and α = 2 results. Interestingly, a BH mass IMF with an average of 10 M leads to a final distribution with an average of ∼ 200 M - and a median of + and a median of ∼ 45 M - , which lies within the mass gap. +, which lies within the mass gap. 3. DISCUSSION AND PREDICTIONS We explore the feasibility of forming IMBHs in a GN through successive collisions between a stellar-mass BH and main-sequence stars. Taking both a statistical and analytic approach, we show that this channel can produce IMBHs efficiently with masses as high as -10 3− 4 +103−4 M and may result in many IMBH-SMBH mergers (intermediate-mass ratio inspirals, or IMRIs) and EMRIs. - Rose et al. -Figure 4. Similar to Figure3, we plot the initial masses versus initial distance (grey) and final mass versus final distance + Rose et al. +Figure 4. Similar to Figure 3, we plot the initial masses versus initial distance (grey) and final mass versus final distance (red) for 500 BHs. This simulation includes relaxation processes, including mass segregation, diffusion, and dynamical friction. -We assume α = 1 . 75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward -migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure3. Additionally, +We assume α = 1.75 for the GN density profile. Faded stars represent BHs that merge with the SMBH. As a result of inward +migration, BHs merge more quickly with the SMBH, before they can become as massive as those in Figure 3. Additionally, more BHs become EMRIs and IMRIs. Additionally, in the third panel, we show a histogram of the simulated IMBH masses -for two different values of α , 1 . 5 (orange, solid), α , 1 . 75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation -processes. We also show the results for a simulation with α = 1. 75 that accounts for momentum-driven winds (black, dotted). +for two different values of α, 1.5 (orange, solid), α, 1.75 (red, dashed), and 2 (purple, dash-dotted), accounting for relaxation +processes. We also show the results for a simulation with α = 1.75 that accounts for momentum-driven winds (black, dotted). Despite the substantially reduced accretion, BHs in the mass gap still form. As the stellar mass BH collides with a star, the BH will grow in mass. The increase may equal star’s entire mass if the relative velocity is smaller than the escape velocity from the BH at 1 R - . However, near the +. However, near the SMBH, the velocity dispersion may be larger than the escape velocity from the BH at the star’s radius. In this limit, the BH captures a “tunnel” of material through the star, estimated using Bondi-Hoyle-Lyttleton accretion. In our statistical analysis, we account for BondiHoyle-Lyttleton accretion and find that BHs outside of -10 −2 +10−2 pc from the SMBH can capture the entire star (see -Figure2). +Figure 2). The efficiency of collisions, and therefore IMBH, EMRI, and IMRI formation as well, are sensitive to -the underlying stellar density. As shown in Figure3, a +the underlying stellar density. As shown in Figure 3, a steeper density profile results in larger IMBHs. This behavior can be understood from the collision timescale’s dependence on the stellar density profile. A steeper profile @@ -838,29 +838,29 @@ However, the inclusion of relaxation processes in the simulations dampens the influence of the stellar density profile by allowing BHs to diffuse into regions of more or less efficient growth. As a result, more BHs grow in -mass, but their maximum mass is smaller ( ∼ 104 +mass, but their maximum mass is smaller (∼ 104 M - ). +). Additionally, the final masses have no apparent dependence - on distance from the SMBH (see Figure4). + on distance from the SMBH (see Figure 4). Most simulations in our study assume that the BHs accrete all of the mass that they capture. The final BH masses can be taken as an upper limit. We note that the accretion is a highly uncertain process and represents - an active field of study (e.g.,Blandford & Begelman1999;Park - & Ostriker2001;Narayan et al.2003; -Igumenshchev et al.2003;Ohsuga et al.2005;Yuan -et al.2012;Jiang et al.2014;McKinney et al.2014; -Narayan et al.2022). To assess the limits of our model, we also consider a physically motivated accretion model, -momentum-driven winds (Section2.5). We present the + an active field of study (e.g., Blandford & Begelman + 1999; Park & Ostriker 2001; Narayan et al. 2003; +Igumenshchev et al. 2003; Ohsuga et al. 2005; Yuan +et al. 2012; Jiang et al. 2014; McKinney et al. 2014; +Narayan et al. 2022). To assess the limits of our model, we also consider a physically motivated accretion model, +momentum-driven winds (Section 2.5). We present the final mass distribution for momentum-driven winds in -Figure4. Importantly, we find that BHs within the +Figure 4. Importantly, we find that BHs within the mass gap still form naturally despite the substantially reduced accretion. About 5% of the BHs grow by 10 to 100 M - . Furthermore, if we increase this ∆M estimate - by a factor of 2 (i.e., use η = 0. 05), the simulation - produces a 3. 5 × 10 3 +. Furthermore, if we increase this ∆M estimate + by a factor of 2 (i.e., use η = 0.05), the simulation + produces a 3.5 × 103 M IMBH for the same initial conditions. Our proof-of-concept demonstrates that collisions @@ -870,27 +870,27 @@ as a GN. Mass growth through BH-main-sequence star collisions may act in concert with other IMBH formation channels, such as compact ob ject binary mergers (e.g., -Hoang et al.2018;Stephan et al.2019;Fragione et al. -2021;Wang et al.2021). While in some cases collisions - can unbind a binary (e.g.,Sigurdsson & Phinney -1993;Fregeau et al.2004), BH binaries can be tightly +Hoang et al. 2018; Stephan et al. 2019; Fragione et al. +2021; Wang et al. 2021). While in some cases collisions + can unbind a binary (e.g., Sigurdsson & Phinney +1993; Fregeau et al. 2004), BH binaries can be tightly bound enough to withstand the collisions. Wide binaries may also become unbound due to interactions with -the neighboring stars and compact ob jects (e.g.,Binney -& Tremaine1987;Rose et al.2020, see latter study for +the neighboring stars and compact ob jects (e.g., Binney +& Tremaine 1987; Rose et al. 2020, see latter study for the timescale for an arbitrary eccentricity). However, as highlighted in previous studies, a substantial fraction of these binaries may merge due to the Eccentric Kozai Lidov mechanism, leaving behind a single star or -a single compact ob ject (e.g.,Stephan et al.2016,2019; -Hoang et al.2018). Additionally, to be susceptible to +a single compact ob ject (e.g., Stephan et al. 2016, 2019; +Hoang et al. 2018). Additionally, to be susceptible to evaporation, BH binaries must have a wider configuration. Otherwise, they will be more tightly bound than the average kinetic energy of the surrounding ob jects and will only harden through weak gravitational inter- IMBH Formation in Galactic Nuclei 9 actions with neighboring stars (see for example Figure -6 inRose et al.2020). +6 in Rose et al. 2020). We note that we assume a steady-state and treat the stars as a reservoir in this model. Future work will take a more nuanced approach to the background stars, whose @@ -898,41 +898,41 @@ density as a function of time can be influenced by several factors. Firstly, the relaxation of the stellar population occurs on Gyr timescales. Some studies have suggested that in situ star formation can occur in the Galactic -Center as close as 0.04 pc from the SMBH (e.g.,Levin -& Beloborodov2003;Paumard et al.2006), and star +Center as close as 0.04 pc from the SMBH (e.g., Levin +& Beloborodov 2003; Paumard et al. 2006), and star formation episodes can occur as often as every ∼ 5 Myr -(e.g.Lu et al.2009). Therefore, we expect that after -the first Gyr, stars within 0 .01 pc will be replenished +(e.g. Lu et al. 2009). Therefore, we expect that after +the first Gyr, stars within 0.01 pc will be replenished at intervals consistent with the star formation episodes; the infalling populations of stars are separated by ∼ 5 − 10 Myr, which is shorter than the collision timescale. However, star-star collisions may complicate this picture - within ∼ 0. 01 pc. As discussed above, regular star + within ∼ 0.01 pc. As discussed above, regular star formation ensures the BHs always have a stellar population - to interact with outside of ∼ 0. 01 pc.5 - At 0 . 01 pc, + to interact with outside of ∼ 0.01 pc.5 + At 0.01 pc, however, the kinetic energy during a collision between two 1 M - stars is larger than their binding energies. + stars is larger than their binding energies. Collisions can therefore thin out the stellar populations during the time it takes them to diffuse to these small -radii, 0 .01 pc, and may reduce the BH growth in the +radii, 0.01 pc, and may reduce the BH growth in the innermost region. We reserve the inclusion of star-star collisions for future work. We also note that the disruption of binary stars by the SMBH may help replenish the stellar population even as collisions work to deplete -it (e.g.,Balberg et al.2013); when a binary is disrupted, +it (e.g., Balberg et al. 2013); when a binary is disrupted, one of the stars is captured on a tightly bound orbit about the SMBH. An IMBH may also affect the stellar density profile. As it spirals into the SMBH, it can perturb stellar orbits, and these interactions can lead to hypervelocity stars -(e.g.,Baumgardt et al.2006a;L¨ockmann & Baumgardt -2008).L¨ockmann & Baumgardt(2008) show that an +(e.g., Baumgardt et al. 2006a; L¨ockmann & Baumgardt +2008). L¨ockmann & Baumgardt (2008) show that an IMBH can modify an initially steep stellar density profile to become consistent with the flatter cusp observed in the Galactic Center. The stars may then be replenished - on 100 Myr timescales (Baumgardt et al.2006a). + on 100 Myr timescales (Baumgardt et al. 2006a). Therefore, after the formation of the first few IMBHs, subsequent BH growth may occur in bursts, coinciding with replenishment of the stars. @@ -940,45 +940,45 @@ While there are many competing dynamical processes that shape the stellar density profile, we stress that α 5 In fact, the star-star collision timescale is greater than 10 Myr -for the entire parameter space, save at 0. 001 pc for larger values -of α ; the BH-star collision timescale plotted in Fig. 1 is the same +for the entire parameter space, save at 0.001 pc for larger values +of α; the BH-star collision timescale plotted in Fig. 1 is the same order of magnitude as the star-star collision timescale. can simply be chosen to encapsulate all of the relevant physics. A value for α that is constrained by observations must already reflect ongoing processes like starstar - collisions and replenishment.Sch¨odel et al.(2018) + collisions and replenishment. Sch¨odel et al. (2018) find the observed stellar mass enclosed within 0.01 pc of the Milky Way’s Galactic Center to be approximately 180 M - . This estimate is consistent to order of magnitude +. This estimate is consistent to order of magnitude with our α = 1.25 case. In a simulation like those -depicted in Figure 4, which include relaxation, α = 1. 25 +depicted in Figure 4, which include relaxation, α = 1.25 leads to a maximum IMBH mass of 140 M - . Furthermore, +. Furthermore, while the stellar mass within 0.01 pc may be a few hundred M - ,Do et al.(2019) andGRAVITY Collaboration - et al.(2020) set an upper limit on the mass +, Do et al. (2019) and GRAVITY Collaboration + et al. (2020) set an upper limit on the mass enclosed within the orbit of S0-2 to be about a few thousand M - , or 0. 1% of the central mass. This upper limit +, or 0.1% of the central mass. This upper limit can include mass that was previously in stars but is now in BHs. In that case, the 180 M - is what remains of the + is what remains of the stars, while BHs and IMBHs make up the ∼ 1000 M in the innermost region. Also not included in this study, collisions between the BH and other compact ob jects will increase the BH -growth rate. BH-BH mergers (e.g.,O’Leary et al.2009; -Fragione et al.2021) and even neutron star BH mergers -(e.g.,Hoang et al.2020) become more likely as the BHs +growth rate. BH-BH mergers (e.g., O’Leary et al. 2009; +Fragione et al. 2021) and even neutron star BH mergers +(e.g., Hoang et al. 2020) become more likely as the BHs increase in mass through stellar collisions. As a result, -the BH-BH collision timescale, discussed in Section2.2, +the BH-BH collision timescale, discussed in Section 2.2, will become relevant to our simulations, allowing the BHs to grow through this channel in addition to stellar collisions. Additionally, this compact ob ject mergers result in GW recoil, which may have a large impact on -the dynamics (e.g.,Baibhav et al.2020;Fragione et al. +the dynamics (e.g., Baibhav et al. 2020; Fragione et al. 2021). The BH’s mass growth increases GW emission, which dissipates energy from the orbit. Along with relaxation, @@ -994,23 +994,23 @@ Our results also suggest that BHs within the mass gap as well as IMBHs likely exist in many galactic nuclei, as well as within our own galactic center. This implication seems to be consistent with recent observational and -theoretical studies (e.g.,Hansen & Milosavljevi´c2003; -Maillard et al.2004;G¨urkan & Rasio2005;Gualandris -& Merritt2009;Chen & Liu2013;Generozov & Madigan2020;Fragione - et al.2020a;Zheng et al.2020;Naoz -et al.2020;GRAVITY Collaboration et al.2020). - Rose et al. +theoretical studies (e.g., Hansen & Milosavljevi´c 2003; +Maillard et al. 2004; G¨urkan & Rasio 2005; Gualandris +& Merritt 2009; Chen & Liu 2013; Generozov & Madigan + 2020; Fragione et al. 2020a; Zheng et al. 2020; Naoz +et al. 2020; GRAVITY Collaboration et al. 2020). + Rose et al. Lastly, the collisions between stellar mass BHs and stars may contribute to the x-ray emission from our -galactic centre (e.g.,Muno et al.2005,2009;Hailey -et al.2018;Zhu et al.2018;Cheng et al.2018, seeKremer - et al.(2022) for a discussion of electromagnetic signatures - from BH-star collisions) 6 - . These interactions, +galactic centre (e.g., Muno et al. 2005, 2009; Hailey +et al. 2018; Zhu et al. 2018; Cheng et al. 2018, see Kremer + et al. (2022) for a discussion of electromagnetic signatures + from BH-star collisions)6 +. These interactions, in particular grazing collisions, may also result in tidal -disruption events (e.g.,Baumgardt et al.2006b;Perets -et al.2016;Stone et al.2017;Samsing et al.2019;Kremer - et al.2021). Thus, the process outlined here may +disruption events (e.g., Baumgardt et al. 2006b; Perets +et al. 2016; Stone et al. 2017; Samsing et al. 2019; Kremer + et al. 2021). Thus, the process outlined here may produce electromagnetic signatures in addition to GW mergers. We thank the anonymous referee for useful comments. @@ -1030,354 +1030,354 @@ Science Foundation grant PHY-1607611. REFERENCES Abbott, B. P., Abbott, R., Abbott, T. D., et al. 2016, PhRvL, 116, 241102, -doi:10.1103/PhysRevLett.116.241102 +doi: 10.1103/PhysRevLett.116.241102 —. 2017a, PhRvL, 118, 221101, -doi:10.1103/PhysRevLett.118.221101 +doi: 10.1103/PhysRevLett.118.221101 —. 2017b, PhRvL, 119, 141101, -doi:10.1103/PhysRevLett.119.141101 +doi: 10.1103/PhysRevLett.119.141101 Aharon, D., & Perets, H. B. 2016, ApJL, 830, L1, -doi:10.3847/2041- 8205/830/1/L1 -Alexander, T. 1999, ApJ, 527, 835, doi:10.1086/308129 +doi: 10.3847/2041- 8205/830/1/L1 +Alexander, T. 1999, ApJ, 527, 835, doi: 10.1086/308129 Alexander, T., & Pfuhl, O. 2014, ApJ, 780, 148, -doi:10.1088/0004- 637X/780/2/148 +doi: 10.1088/0004- 637X/780/2/148 Amaro-Seoane, P. 2018, Living Reviews in Relativity, 21, 4, -doi:10.1007/s41114- 018-0013- 8 +doi: 10.1007/s41114- 018-0013- 8 6 The connection between the observed X-ray sources at the Galactic - Center and tidal capture has been suggested byGenerozov -et al.(2018), but seeZhu et al.(2018);Stephan et al.(2019) for -alternative channels. Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. + Center and tidal capture has been suggested by Generozov +et al. (2018), but see Zhu et al. (2018); Stephan et al. (2019) for +alternative channels. Arca Sedda, M., Mapelli, M., Benacquista, M., & Spera, M. 2021, arXiv e-prints, arXiv:2109.12119. https://arxiv.org/abs/2109.12119 Bahcall, J. N., & Wolf, R. A. 1976, ApJ, 209, 214, -doi:10.1086/154711 +doi: 10.1086/154711 Baibhav, V., Gerosa, D., Berti, E., et al. 2020, PhRvD, 102, -043002, doi:10.1103/PhysRevD.102.043002 +043002, doi: 10.1103/PhysRevD.102.043002 Balberg, S., Sari, R., & Loeb, A. 2013, MNRAS, 434, L26, -doi:10.1093/mnrasl/slt071 +doi: 10.1093/mnrasl/slt071 Baumgardt, H., Gualandris, A., & Portegies Zwart, S. 2006a, MNRAS, 372, 174, -doi:10.1111/j.1365- 2966.2006.10818.x +doi: 10.1111/j.1365- 2966.2006.10818.x Baumgardt, H., Hopman, C., Portegies Zwart, S., & Makino, J. 2006b, MNRAS, 372, 467, -doi:10.1111/j.1365- 2966.2006.10885.x +doi: 10.1111/j.1365- 2966.2006.10885.x Baumgardt, H., Makino, J., & Ebisuzaki, T. 2004, ApJ, -613, 1143, doi:10.1086/423299 +613, 1143, doi: 10.1086/423299 Begelman, M. C. 1979, MNRAS, 187, 237, -doi:10.1093/mnras/187.2.237 -—. 2012a, ApJL, 749, L3, doi:10.1088/2041- 8205/749/1/L3 +doi: 10.1093/mnras/187.2.237 +—. 2012a, ApJL, 749, L3, doi: 10.1088/2041- 8205/749/1/L3 IMBH Formation in Galactic Nuclei 11 —. 2012b, MNRAS, 420, 2912, -doi:10.1111/j.1365- 2966.2011.20071.x +doi: 10.1111/j.1365- 2966.2011.20071.x Begelman, M. C., Volonteri, M., & Rees, M. J. 2006, -MNRAS, 370, 289, doi:10.1111/j.1365-2966.2006.10467.x +MNRAS, 370, 289, doi: 10.1111/j.1365-2966.2006.10467.x Belczynski, K., Hirschi, R., Kaiser, E. A., et al. 2020a, ApJ, -890, 113, doi:10.3847/1538- 4357/ab6d77 -—. 2020b, ApJ, 890, 113, doi:10.3847/1538- 4357/ab6d77 +890, 113, doi: 10.3847/1538- 4357/ab6d77 +—. 2020b, ApJ, 890, 113, doi: 10.3847/1538- 4357/ab6d77 Bertone, G., Fornasa, M., Taoso, M., & Zentner, A. R. 2009, New Journal of Physics, 11, 105016, -doi:10.1088/1367- 2630/11/10/105016 +doi: 10.1088/1367- 2630/11/10/105016 Binney, J., & Tremaine, S. 1987, Galactic dynamics —. 2008, Galactic Dynamics: Second Edition Blaes, O., Lee, M. H., & Socrates, A. 2002, ApJ, 578, 775, -doi:10.1086/342655 +doi: 10.1086/342655 Blandford, R. D., & Begelman, M. C. 1999, MNRAS, 303, -L1, doi:10.1046/j.1365-8711.1999.02358.x +L1, doi: 10.1046/j.1365-8711.1999.02358.x Blandford, R. D., & Zna jek, R. L. 1977, MNRAS, 179, 433, -doi:10.1093/mnras/179.3.433 +doi: 10.1093/mnras/179.3.433 Blecha, L., Ivanova, N., Kalogera, V., et al. 2006, ApJ, 642, -427, doi:10.1086/500727 +427, doi: 10.1086/500727 Bondi, H. 1952, MNRAS, 112, 195, -doi:10.1093/mnras/112.2.195 +doi: 10.1093/mnras/112.2.195 Bondi, H., & Hoyle, F. 1944, MNRAS, 104, 273, -doi:10.1093/mnras/104.5.273 +doi: 10.1093/mnras/104.5.273 Bradnick, B., Mandel, I., & Levin, Y. 2017, MNRAS, 469, -2042, doi:10.1093/mnras/stx1007 +2042, doi: 10.1093/mnras/stx1007 Brennan, R., Choi, E., Somerville, R. S., et al. 2018, ApJ, -860, 14, doi:10.3847/1538- 4357/aac2c4 +860, 14, doi: 10.3847/1538- 4357/aac2c4 Bringmann, T., Huang, X., Ibarra, A., Vogl, S., & Weniger, C. 2012, JCAP, 2012, 054, -doi:10.1088/1475- 7516/2012/07/054 +doi: 10.1088/1475- 7516/2012/07/054 Centrella, J., Baker, J. G., Kelly, B. J., & van Meter, J. R. 2010, Reviews of Modern Physics, 82, 3069, -doi:10.1103/RevModPhys.82.3069 +doi: 10.1103/RevModPhys.82.3069 Chen, X., & Liu, F. K. 2013, ApJ, 762, 95, -doi:10.1088/0004- 637X/762/2/95 +doi: 10.1088/0004- 637X/762/2/95 Cheng, Z., Li, Z., Xu, X., & Li, X. 2018, ApJ, 858, 33, -doi:10.3847/1538- 4357/aaba16 +doi: 10.3847/1538- 4357/aaba16 Choi, J.-H., Shlosman, I., & Begelman, M. C. 2015, -MNRAS, 450, 4411, doi:10.1093/mnras/stv694 +MNRAS, 450, 4411, doi: 10.1093/mnras/stv694 Christensen-Dalsgaard, J., Dappen, W., Ajukov, S. V., et al. 1996, Science, 272, 1286, -doi:10.1126/science.272.5266.1286 +doi: 10.1126/science.272.5266.1286 Cohn, H., & Kulsrud, R. M. 1978, ApJ, 226, 1087, -doi:10.1086/156685 +doi: 10.1086/156685 Dale, J. E., & Davies, M. B. 2006, MNRAS, 366, 1424, -doi:10.1111/j.1365- 2966.2005.09937.x +doi: 10.1111/j.1365- 2966.2005.09937.x Dale, J. E., Davies, M. B., Church, R. P., & Freitag, M. 2009, MNRAS, 393, 1016, -doi:10.1111/j.1365- 2966.2008.14254.x Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, -MNRAS, 508, 3045, doi:10.1093/mnras/stab2783 +doi: 10.1111/j.1365- 2966.2008.14254.x Dall’Amico, M., Mapelli, M., Di Carlo, U. N., et al. 2021, +MNRAS, 508, 3045, doi: 10.1093/mnras/stab2783 Das, A., Schleicher, D. R. G., Basu, S., & Boekholt, T. C. N. 2021a, MNRAS, 505, 2186, -doi:10.1093/mnras/stab1428 +doi: 10.1093/mnras/stab1428 Das, A., Schleicher, D. R. G., Leigh, N. W. C., & Boekholt, T. C. N. 2021b, MNRAS, 503, 1051, -doi:10.1093/mnras/stab402 +doi: 10.1093/mnras/stab402 De Villiers, J.-P., Hawley, J. F., Krolik, J. H., & Hirose, S. -2005, ApJ, 620, 878, doi:10.1086/427142 +2005, ApJ, 620, 878, doi: 10.1086/427142 Di Carlo, U. N., Giacobbo, N., Mapelli, M., et al. 2019, -MNRAS, 487, 2947, doi:10.1093/mnras/stz1453 +MNRAS, 487, 2947, doi: 10.1093/mnras/stz1453 Di Carlo, U. N., Mapelli, M., Pasquato, M., et al. 2021, -MNRAS, 507, 5132, doi:10.1093/mnras/stab2390 +MNRAS, 507, 5132, doi: 10.1093/mnras/stab2390 Do, T., Hees, A., Ghez, A., et al. 2019, Science, 365, 664, -doi:10.1126/science.aav8137 +doi: 10.1126/science.aav8137 Ebisuzaki, T., Makino, J., Tsuru, T. G., et al. 2001, ApJL, -562, L19, doi:10.1086/338118 +562, L19, doi: 10.1086/338118 Eda, K., Itoh, Y., Kuroyanagi, S., & Silk, J. 2013, PhRvL, -110, 221101, doi:10.1103/PhysRevLett.110.221101 +110, 221101, doi: 10.1103/PhysRevLett.110.221101 Edgar, R. 2004, NewAR, 48, 843, -doi:10.1016/j.newar.2004.06.001 +doi: 10.1016/j.newar.2004.06.001 Escala, A. 2021, ApJ, 908, 57, -doi:10.3847/1538- 4357/abd93c +doi: 10.3847/1538- 4357/abd93c Ferrara, A., Salvadori, S., Yue, B., & Schleicher, D. 2014, Monthly Notices of the Royal Astronomical Society, 443, -2410, doi:10.1093/mnras/stu1280 +2410, doi: 10.1093/mnras/stu1280 Fishbach, M., Farr, W. M., & Holz, D. E. 2020, ApJL, 891, -L31, doi:10.3847/2041-8213/ab77c9 +L31, doi: 10.3847/2041-8213/ab77c9 Fragione, G., Kocsis, B., Rasio, F. A., & Silk, J. 2021, arXiv e-prints, arXiv:2107.04639. https://arxiv.org/abs/2107.04639 Fragione, G., Loeb, A., Kremer, K., & Rasio, F. A. 2020a, -ApJ, 897, 46, doi:10.3847/1538-4357/ab94b2 +ApJ, 897, 46, doi: 10.3847/1538-4357/ab94b2 Fragione, G., Loeb, A., & Rasio, F. A. 2020b, ApJL, 902, -L26, doi:10.3847/2041-8213/abbc0a +L26, doi: 10.3847/2041-8213/abbc0a Fragione, G., & Sari, R. 2018, ApJ, 852, 51, -doi:10.3847/1538- 4357/aaa0d7 +doi: 10.3847/1538- 4357/aaa0d7 Fregeau, J. M., Cheung, P., Portegies Zwart, S. F., & Rasio, F. A. 2004, MNRAS, 352, 1, -doi:10.1111/j.1365- 2966.2004.07914.x +doi: 10.1111/j.1365- 2966.2004.07914.x Fregeau, J. M., Joshi, K. J., Portegies Zwart, S. F., & -Rasio, F. A. 2002, ApJ, 570, 171, doi:10.1086/339576 +Rasio, F. A. 2002, ApJ, 570, 171, doi: 10.1086/339576 Freitag, M., Amaro-Seoane, P., & Kalogera, V. 2006, ApJ, -649, 91, doi:10.1086/506193 +649, 91, doi: 10.1086/506193 Generozov, A., & Madigan, A.-M. 2020, ApJ, 896, 137, -doi:10.3847/1538- 4357/ab94bc +doi: 10.3847/1538- 4357/ab94bc Generozov, A., Stone, N. C., Metzger, B. D., & Ostriker, J. P. 2018, MNRAS, 478, 4030, -doi:10.1093/mnras/sty1262 - Rose et al. +doi: 10.1093/mnras/sty1262 + Rose et al. Genzel, R., Eisenhauer, F., & Gillessen, S. 2010, Reviews of Modern Physics, 82, 3121, -doi:10.1103/RevModPhys.82.3121 +doi: 10.1103/RevModPhys.82.3121 Genzel, R., Sch¨odel, R., Ott, T., et al. 2003, ApJ, 594, 812, -doi:10.1086/377127 +doi: 10.1086/377127 Ghez, A. M., Salim, S., Hornstein, S. D., et al. 2005, ApJ, -620, 744, doi:10.1086/427175 +620, 744, doi: 10.1086/427175 Gond´an, L., Kocsis, B., Raffai, P., & Frei, Z. 2018, ApJ, -860, 5, doi:10.3847/1538- 4357/aabfee +860, 5, doi: 10.3847/1538- 4357/aabfee Gonz´alez, E., Kremer, K., Chatterjee, S., et al. 2021, ApJL, -908, L29, doi:10.3847/2041- 8213/abdf5b +908, L29, doi: 10.3847/2041- 8213/abdf5b GRAVITY Collaboration, Abuter, R., Amorim, A., et al. -2020, A&A, 636, L5, doi:10.1051/0004- 6361/202037813 +2020, A&A, 636, L5, doi: 10.1051/0004- 6361/202037813 Gualandris, A., & Merritt, D. 2009, ApJ, 705, 361, -doi:10.1088/0004- 637X/705/1/361 +doi: 10.1088/0004- 637X/705/1/361 G¨urkan, M. A., Fregeau, J. M., & Rasio, F. A. 2006, ApJL, -640, L39, doi:10.1086/503295 +640, L39, doi: 10.1086/503295 G¨urkan, M. A., & Rasio, F. A. 2005, ApJ, 628, 236, -doi:10.1086/430694 +doi: 10.1086/430694 Hailey, C. J., Mori, K., Bauer, F. E., et al. 2018, Nature, -556, 70, doi:10.1038/nature25029 +556, 70, doi: 10.1038/nature25029 Hansen, B. M. S., & Milosavljevi´c, M. 2003, ApJL, 593, -L77, doi:10.1086/378182 +L77, doi: 10.1086/378182 Heger, A., Fryer, C. L., Woosley, S. E., Langer, N., & Hartmann, D. H. 2003, ApJ, 591, 288, -doi:10.1086/375341 +doi: 10.1086/375341 Hoang, B.-M., Naoz, S., Kocsis, B., Rasio, F. A., & Dosopoulou, F. 2018, ApJ, 856, 140, -doi:10.3847/1538- 4357/aaafce +doi: 10.3847/1538- 4357/aaafce Hoang, B.-M., Naoz, S., & Kremer, K. 2020, ApJ, 903, 8, -doi:10.3847/1538- 4357/abb66a +doi: 10.3847/1538- 4357/abb66a Hopman, C., & Alexander, T. 2005, ApJ, 629, 362, -doi:10.1086/431475 +doi: 10.1086/431475 Igumenshchev, I. V. 2008, ApJ, 677, 317, -doi:10.1086/529025 +doi: 10.1086/529025 Igumenshchev, I. V., Narayan, R., & Abramowicz, M. A. -2003, ApJ, 592, 1042, doi:10.1086/375769 +2003, ApJ, 592, 1042, doi: 10.1086/375769 Jiang, Y.-F., Stone, J. M., & Davis, S. W. 2014, ApJ, 796, -106, doi:10.1088/0004-637X/796/2/106 +106, doi: 10.1088/0004-637X/796/2/106 Johnson, J. L., & Bromm, V. 2007, Monthly Notices of the Royal Astronomical Society, 374, 1557, -doi:10.1111/j.1365- 2966.2006.11275.x +doi: 10.1111/j.1365- 2966.2006.11275.x Kremer, K., Lombardi, James C., J., Lu, W., Piro, A. L., & Rasio, F. A. 2022, arXiv e-prints, arXiv:2201.12368. https://arxiv.org/abs/2201.12368 Kremer, K., Lu, W., Piro, A. L., et al. 2021, ApJ, 911, 104, -doi:10.3847/1538- 4357/abeb14 +doi: 10.3847/1538- 4357/abeb14 Kremer, K., Spera, M., Becker, D., et al. 2020, ApJ, 903, -45, doi:10.3847/1538-4357/abb945 Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020, -MNRAS, 498, 5652, doi:10.1093/mnras/staa2276 +45, doi: 10.3847/1538-4357/abb945 Kroupa, P., Subr, L., Jerabkova, T., & Wang, L. 2020, +MNRAS, 498, 5652, doi: 10.1093/mnras/staa2276 Levin, Y., & Beloborodov, A. M. 2003, ApJL, 590, L33, -doi:10.1086/376675 +doi: 10.1086/376675 Limongi, M., & Chieffi, A. 2018a, ApJS, 237, 13, -doi:10.3847/1538- 4365/aacb24 -—. 2018b, ApJS, 237, 13, doi:10.3847/1538- 4365/aacb24 +doi: 10.3847/1538- 4365/aacb24 +—. 2018b, ApJS, 237, 13, doi: 10.3847/1538- 4365/aacb24 L¨ockmann, U., & Baumgardt, H. 2008, MNRAS, 384, 323, -doi:10.1111/j.1365- 2966.2007.12699.x +doi: 10.1111/j.1365- 2966.2007.12699.x Lu, C. X., & Naoz, S. 2019, MNRAS, 484, 1506, -doi:10.1093/mnras/stz036 +doi: 10.1093/mnras/stz036 Lu, J. R., Ghez, A. M., Hornstein, S. D., et al. 2009, ApJ, -690, 1463, doi:10.1088/0004- 637X/690/2/1463 +690, 1463, doi: 10.1088/0004- 637X/690/2/1463 Madau, P., & Rees, M. J. 2001, ApJL, 551, L27, -doi:10.1086/319848 +doi: 10.1086/319848 Magorrian, J., & Tremaine, S. 1999, MNRAS, 309, 447, -doi:10.1046/j.1365- 8711.1999.02853.x +doi: 10.1046/j.1365- 8711.1999.02853.x Maillard, J. P., Paumard, T., Stolovy, S. R., & Rigaut, F. -2004, A&A, 423, 155, doi:10.1051/0004- 6361:20034147 +2004, A&A, 423, 155, doi: 10.1051/0004- 6361:20034147 Mapelli, M., Bouffanais, Y., Santoliquido, F., Arca Sedda, M., & Artale, M. C. 2021a, arXiv e-prints, -arXiv:2109.06222.https://arxiv.org/abs/2109.06222 +arXiv:2109.06222. https://arxiv.org/abs/2109.06222 Mapelli, M., Dall’Amico, M., Bouffanais, Y., et al. 2021b, -MNRAS, 505, 339, doi:10.1093/mnras/stab1334 +MNRAS, 505, 339, doi: 10.1093/mnras/stab1334 Mastrobuono-Battisti, A., Church, R. P., & Davies, M. B. -2021, MNRAS, 505, 3314, doi:10.1093/mnras/stab1409 +2021, MNRAS, 505, 3314, doi: 10.1093/mnras/stab1409 McKinney, J. C. 2006, MNRAS, 368, 1561, -doi:10.1111/j.1365- 2966.2006.10256.x +doi: 10.1111/j.1365- 2966.2006.10256.x McKinney, J. C., & Gammie, C. F. 2004, ApJ, 611, 977, -doi:10.1086/422244 +doi: 10.1086/422244 McKinney, J. C., Tchekhovskoy, A., Sadowski, A., & Narayan, R. 2014, MNRAS, 441, 3177, -doi:10.1093/mnras/stu762 +doi: 10.1093/mnras/stu762 Merritt, D. 2006, Reports on Progress in Physics, 69, 2513, -doi:10.1088/0034- 4885/69/9/R01 +doi: 10.1088/0034- 4885/69/9/R01 Miralda-Escud´e, J., & Gould, A. 2000, ApJ, 545, 847, -doi:10.1086/317837 -Morris, M. 1993, ApJ, 408, 496, doi:10.1086/172607 +doi: 10.1086/317837 +Morris, M. 1993, ApJ, 408, 496, doi: 10.1086/172607 Muno, M. P., Pfahl, E., Baganoff, F. K., et al. 2005, ApJL, -622, L113, doi:10.1086/429721 +622, L113, doi: 10.1086/429721 Muno, M. P., Bauer, F. E., Baganoff, F. K., et al. 2009, -ApJS, 181, 110, doi:10.1088/0067-0049/181/1/110 +ApJS, 181, 110, doi: 10.1088/0067-0049/181/1/110 Murray, N., Quataert, E., & Thompson, T. A. 2005, ApJ, -618, 569, doi:10.1086/426067 +618, 569, doi: 10.1086/426067 Naoz, S., Rose, S. C., Michaely, E., et al. 2022, ApJL, 927, -L18, doi:10.3847/2041-8213/ac574b +L18, doi: 10.3847/2041-8213/ac574b Naoz, S., & Silk, J. 2014, ApJ, 795, 102, -doi:10.1088/0004- 637X/795/2/102 +doi: 10.1088/0004- 637X/795/2/102 Naoz, S., Silk, J., & Schnittman, J. D. 2019, ApJL, 885, -L35, doi:10.3847/2041-8213/ab4fed +L35, doi: 10.3847/2041-8213/ab4fed IMBH Formation in Galactic Nuclei 13 Naoz, S., Will, C. M., Ramirez-Ruiz, E., et al. 2020, ApJL, -888, L8, doi:10.3847/2041- 8213/ab5e3b +888, L8, doi: 10.3847/2041- 8213/ab5e3b Narayan, R., Chael, A., Chatterjee, K., Ricarte, A., & Curd, B. 2022, MNRAS, 511, 3795, -doi:10.1093/mnras/stac285 +doi: 10.1093/mnras/stac285 Narayan, R., Igumenshchev, I. V., & Abramowicz, M. A. -2003, PASJ, 55, L69, doi:10.1093/pasj/55.6.L69 +2003, PASJ, 55, L69, doi: 10.1093/pasj/55.6.L69 Ohsuga, K., Mori, M., Nakamoto, T., & Mineshige, S. 2005, -ApJ, 628, 368, doi:10.1086/430728 +ApJ, 628, 368, doi: 10.1086/430728 O’Leary, R. M., Kocsis, B., & Loeb, A. 2009, MNRAS, 395, -2127, doi:10.1111/j.1365-2966.2009.14653.x +2127, doi: 10.1111/j.1365-2966.2009.14653.x O’Leary, R. M., Rasio, F. A., Fregeau, J. M., Ivanova, N., & O’Shaughnessy, R. 2006, ApJ, 637, 937, -doi:10.1086/498446 +doi: 10.1086/498446 Ostriker, J. P., Choi, E., Ciotti, L., Novak, G. S., & Proga, D. 2010, ApJ, 722, 642, -doi:10.1088/0004- 637X/722/1/642 +doi: 10.1088/0004- 637X/722/1/642 Park, M.-G., & Ostriker, J. P. 2001, ApJ, 549, 100, -doi:10.1086/319042 +doi: 10.1086/319042 Paumard, T., Genzel, R., Martins, F., et al. 2006, ApJ, 643, -1011, doi:10.1086/503273 +1011, doi: 10.1086/503273 Perets, H. B., Li, Z., Lombardi, James C., J., & Milcarek, Stephen R., J. 2016, ApJ, 823, 113, -doi:10.3847/0004- 637X/823/2/113 +doi: 10.3847/0004- 637X/823/2/113 Peters, P. C., & Mathews, J. 1963a, Physical Review, 131, -435, doi:10.1103/PhysRev.131.435 +435, doi: 10.1103/PhysRev.131.435 —. 1963b, Physical Review, 131, 435, -doi:10.1103/PhysRev.131.435 +doi: 10.1103/PhysRev.131.435 Portegies Zwart, S. F., Baumgardt, H., Hut, P., Makino, J., & McMillan, S. L. W. 2004, Nature, 428, 724, -doi:10.1038/nature02448 +doi: 10.1038/nature02448 Portegies Zwart, S. F., & McMillan, S. L. W. 2000, ApJL, -528, L17, doi:10.1086/312422 -—. 2002, ApJ, 576, 899, doi:10.1086/341798 +528, L17, doi: 10.1086/312422 +—. 2002, ApJ, 576, 899, doi: 10.1086/341798 Rashkov, V., & Madau, P. 2014, ApJ, 780, 187, -doi:10.1088/0004- 637X/780/2/187 +doi: 10.1088/0004- 637X/780/2/187 Renzo, M., Farmer, R., Justham, S., et al. 2020, A&A, 640, -A56, doi:10.1051/0004-6361/202037710 +A56, doi: 10.1051/0004-6361/202037710 Rizzuto, F. P., Naab, T., Spurzem, R., et al. 2022, -MNRAS, doi:10.1093/mnras/stac231 +MNRAS, doi: 10.1093/mnras/stac231 Rodriguez, C. L., Amaro-Seoane, P., Chatterjee, S., & Rasio, F. A. 2018, PhRvL, 120, 151101, -doi:10.1103/PhysRevLett.120.151101 +doi: 10.1103/PhysRevLett.120.151101 Rodriguez, C. L., Chatterjee, S., & Rasio, F. A. 2016, -PhRvD, 93, 084029, doi:10.1103/PhysRevD.93.084029 +PhRvD, 93, 084029, doi: 10.1103/PhysRevD.93.084029 Rodriguez, C. L., Zevin, M., Amaro-Seoane, P., et al. 2019, Phys. Rev. D, 100, 043027, -doi:10.1103/PhysRevD.100.043027 +doi: 10.1103/PhysRevD.100.043027 Rose, S. C., Naoz, S., Gautam, A. K., et al. 2020, ApJ, 904, -113, doi:10.3847/1538-4357/abc557 Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., +113, doi: 10.3847/1538-4357/abc557 Sakstein, J., Croon, D., McDermott, S. D., Straight, M. C., & Baxter, E. J. 2020, arXiv e-prints, arXiv:2009.01213. https://arxiv.org/abs/2009.01213 Sakurai, Y., Yoshida, N., Fujii, M. S., & Hirano, S. 2017, -MNRAS, 472, 1677, doi:10.1093/mnras/stx2044 +MNRAS, 472, 1677, doi: 10.1093/mnras/stx2044 Samsing, J., Venumadhav, T., Dai, L., et al. 2019, PhRvD, -100, 043009, doi:10.1103/PhysRevD.100.043009 +100, 043009, doi: 10.1103/PhysRevD.100.043009 Sari, R., & Fragione, G. 2019, ApJ, 885, 24, -doi:10.3847/1538- 4357/ab43df +doi: 10.3847/1538- 4357/ab43df Schneider, R., Ferrara, A., Natara jan, P., & Omukai, K. 2002, The Astrophysical Journal, 571, 30, -doi:10.1086/339917 +doi: 10.1086/339917 Schnittman, J. D., & Buonanno, A. 2007, ApJL, 662, L63, -doi:10.1086/519309 +doi: 10.1086/519309 Sch¨odel, R., Gallego-Cano, E., Dong, H., et al. 2018, A&A, -609, A27, doi:10.1051/0004- 6361/201730452 +609, A27, doi: 10.1051/0004- 6361/201730452 Shapiro, S. L., & Marchant, A. B. 1978, ApJ, 225, 603, -doi:10.1086/156521 +doi: 10.1086/156521 Shima, E., Matsuda, T., Takeda, H., & Sawada, K. 1985, -MNRAS, 217, 367, doi:10.1093/mnras/217.2.367 +MNRAS, 217, 367, doi: 10.1093/mnras/217.2.367 Shlosman, I., Choi, J.-H., Begelman, M. C., & Nagamine, -K. 2016, MNRAS, 456, 500, doi:10.1093/mnras/stv2700 +K. 2016, MNRAS, 456, 500, doi: 10.1093/mnras/stv2700 Sigurdsson, S., & Phinney, E. S. 1993, ApJ, 415, 631, -doi:10.1086/173190 +doi: 10.1086/173190 Spera, M., & Mapelli, M. 2017a, MNRAS, 470, 4739, -doi:10.1093/mnras/stx1576 -—. 2017b, MNRAS, 470, 4739, doi:10.1093/mnras/stx1576 +doi: 10.1093/mnras/stx1576 +—. 2017b, MNRAS, 470, 4739, doi: 10.1093/mnras/stx1576 Spitzer, L. 1987, Dynamical evolution of globular clusters Stephan, A. P., Naoz, S., Ghez, A. M., et al. 2016, ArXiv -e-prints.https://arxiv.org/abs/1603.02709 -—. 2019, ApJ, 878, 58, doi:10.3847/1538- 4357/ab1e4d +e-prints. https://arxiv.org/abs/1603.02709 +—. 2019, ApJ, 878, 58, doi: 10.3847/1538- 4357/ab1e4d Stone, N. C., K¨upper, A. H. W., & Ostriker, J. P. 2017, -MNRAS, 467, 4180, doi:10.1093/mnras/stx097 +MNRAS, 467, 4180, doi: 10.1093/mnras/stx097 Stone, N. C., & Metzger, B. D. 2016, MNRAS, 455, 859, -doi:10.1093/mnras/stv2281 +doi: 10.1093/mnras/stv2281 The LIGO Scientific Collaboration, the Virgo Collaboration, Abbott, R., et al. 2020a, arXiv e-prints, -arXiv:2009.01075.https://arxiv.org/abs/2009.01075 +arXiv:2009.01075. https://arxiv.org/abs/2009.01075 —. 2020b, arXiv e-prints, arXiv:2009.01190. https://arxiv.org/abs/2009.01190 Umbreit, S., Fregeau, J. M., Chatterjee, S., & Rasio, F. A. -2012, ApJ, 750, 31, doi:10.1088/0004- 637X/750/1/31 +2012, ApJ, 750, 31, doi: 10.1088/0004- 637X/750/1/31 Valiante, R., Schneider, R., Volonteri, M., & Omukai, K. 2016, Monthly Notices of the Royal Astronomical -Society, 457, 3356, doi:10.1093/mnras/stw225 +Society, 457, 3356, doi: 10.1093/mnras/stw225 Vink, J. S., Higgins, E. R., Sander, A. A. C., & Sabhahit, G. N. 2021, MNRAS, 504, 146, -doi:10.1093/mnras/stab842 - Rose et al. +doi: 10.1093/mnras/stab842 + Rose et al. Wang, H., Stephan, A. P., Naoz, S., Hoang, B.-M., & Breivik, K. 2021, ApJ, 917, 76, -doi:10.3847/1538- 4357/ac088d +doi: 10.3847/1538- 4357/ac088d Wang, J., & Merritt, D. 2004, ApJ, 600, 149, -doi:10.1086/379767 +doi: 10.1086/379767 Woosley, S. E. 2017, ApJ, 836, 244, -doi:10.3847/1538- 4357/836/2/244 +doi: 10.3847/1538- 4357/836/2/244 Yu, Q., & Tremaine, S. 2002, MNRAS, 335, 965, -doi:10.1046/j.1365- 8711.2002.05532.x +doi: 10.1046/j.1365- 8711.2002.05532.x Yuan, F., Wu, M., & Bu, D. 2012, ApJ, 761, 129, -doi:10.1088/0004- 637X/761/2/129 Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. +doi: 10.1088/0004- 637X/761/2/129 Yue, B., Ferrara, A., Salvaterra, R., Xu, Y., & Chen, X. 2014, Monthly Notices of the Royal Astronomical -Society, 440, 1263, doi:10.1093/mnras/stu351 +Society, 440, 1263, doi: 10.1093/mnras/stu351 Zheng, X., Lin, D. N. C., & Mao, S. 2020, arXiv e-prints, -arXiv:2011.04653.https://arxiv.org/abs/2011.04653 +arXiv:2011.04653. https://arxiv.org/abs/2011.04653 Zhu, Z., Li, Z., & Morris, M. R. 2018, ApJS, 235, 26, -doi:10.3847/1538- 4365/aab14f \ No newline at end of file +doi: 10.3847/1538- 4365/aab14f \ No newline at end of file diff --git a/read/results/playa/2201.00029.txt b/read/results/playa/2201.00029.txt index db15086..31a0f47 100644 --- a/read/results/playa/2201.00029.txt +++ b/read/results/playa/2201.00029.txt @@ -6,101 +6,101 @@ Exploring new techniques for analyzing variability in white dwarf KIC 8626021 -Thomas Huckans , Peter Stine -Department of Physics and Engineering , Bloomsburg University of Pennsylvania , 400 E 2 nd - St ., +Thomas Huckans, Peter Stine +Department of Physics and Engineering, Bloomsburg University of Pennsylvania, 400 E 2nd + St., Bloomsburg, PA 17815 -Abst r act +Abstract - As is common with the collection of astronomical data, signals are frequently dominated -by noise. However, when performing FTs of light curves, re - binning data can improve the signal to - - noise ratio ( SNR ) at lower frequencies. Using data collected from the K epler space telescope, -we sequentially re - binned data three times to investigate the SNR i mprovement of lower frequency -(< 1 7 µ Hz) variability in white dwarf KIC 8626021 . We fou nd that the SNR at approximately 5.8 -µ Hz greatly improved through this process, and we postulate that this frequen c y is linked to the + As is common with the collection of astronomical data, signals are frequently dominated +by noise. However, when performing FTs of light curves, re-binning data can improve the signalto-noise + ratio (SNR) at lower frequencies. Using data collected from the Kepler space telescope, +we sequentially re-binned data three times to investigate the SNR improvement of lower frequency +(< 17 µHz) variability in white dwarf KIC 8626021. We found that the SNR at approximately 5.8 +µHz greatly improved through this process, and we postulate that this frequency is linked to the rotation of KIC 8626021. Introduction -First detected in 1862, white dwarfs long posed a mystery for early observ ers. When the +First detected in 1862, white dwarfs long posed a mystery for early observers. When the companion to Sirius was detected, apparent contradictions concerning the mass, luminosities, and -densities baffled astronomers. Lacking full understanding of atom ic structures and the energy -states of electrons, these early researchers believ ed white dwarfs to o dense to exist . However, new -discoveries at the turn of the 20 th - century explained the existence of these stars , and between the -world wars white dwarfs wer e increasingly studied and modeled (Holberg, 2009 ) . -As stars age, those that lack the mass to become neutron stars and black holes become -white dwarf stars, representing 98% of the stars in our galaxy (Winget & Kepler, 2008 ) . They are -composed of a core o f carbon and oxygen ions that slowly cools over billions of years, and the -light emanating from these star s is a result of thermal energy. White dwarf stars are no longer -supported against the force of gravity by fusion, so the stars collapse into an elect ron - degenerate -state where the electrons in the carbon and oxygen atoms occupy the lowest energy levels. As two -electrons cannot occupy the same quantum state, Pauli repulsion keeps white dwarfs from +densities baffled astronomers. Lacking full understanding of atomic structures and the energy +states of electrons, these early researchers believed white dwarfs too dense to exist. However, new +discoveries at the turn of the 20th + century explained the existence of these stars, and between the +world wars white dwarfs were increasingly studied and modeled (Holberg, 2009). +As stars age, those that lack the mass to become neutron stars and black holes become +white dwarf stars, representing 98% of the stars in our galaxy (Winget & Kepler, 2008). They are +composed of a core of carbon and oxygen ions that slowly cools over billions of years, and the +light emanating from these stars is a result of thermal energy. White dwarf stars are no longer +supported against the force of gravity by fusion, so the stars collapse into an electron-degenerate +state where the electrons in the carbon and oxygen atoms occupy the lowest energy levels. As two +electrons cannot occupy the same quantum state, Pauli repulsion keeps white dwarfs from collapsing entirely. -For many years, accurate detection of light variability in white dwarfs was difficult due to -a lack of adequate instruments. However , the launch of the Kepler space telescope in 2009 made -capturing the light of distant stars much more efficient and effective (Basri et al., 2010 ) . Kepler -was initially de veloped with the intention of surveying our region of the Milky Way galaxy in +For many years, accurate detection of light variability in white dwarfs was difficult due to +a lack of adequate instruments. However, the launch of the Kepler space telescope in 2009 made +capturing the light of distant stars much more efficient and effective (Basri et al., 2010). Kepler +was initially developed with the intention of surveying our region of the Milky Way galaxy in order to find potentially habitable planets. The purpose of the mission was to identify key traits for -such planets by determining the number of planets in habitable zones, the s izes and shapes of orbits, -and the characteristics of the stars being orbited. Over the lifespan of its first mission, Kepler -observed approximately 1.5 x 10 5 - stars ( Johnson, 2018) , affording scientists excellent -opportunities to research stel lar variability . Due to the loss of a second reaction wheel in 2013, -NASA developed the K2 mission, a way to prolong Kepler’s assistance to astronomy and +such planets by determining the number of planets in habitable zones, the sizes and shapes of orbits, +and the characteristics of the stars being orbited. Over the lifespan of its first mission, Kepler +observed approximately 1.5 x 105 + stars (Johnson, 2018), affording scientists excellent +opportunities to research stellar variability. Due to the loss of a second reaction wheel in 2013, +NASA developed the K2 mission, a way to prolong Kepler’s assistance to astronomy and astrophysics. -Utilizing Kepler’s ability to maintain three - dimensional control, NASA proceeded to use +Utilizing Kepler’s ability to maintain three-dimensional control, NASA proceeded to use the telescope to collect photometry data of certain sections of our galaxy, although the number of -targets was significantly reduced. In addition, the K2 mission was designed to be community oriented, - with the scientific community having a n influence on th e fields observed and serving as -the analysts of the vast amounts of data being received ( Howell et al., 2014 ). Although Kepler was -deactivated in 2018, the data used in this paper came from observations during 2010 and 2012 of -white dwarf KIC 8626021 and was obtained from the Kepler Asteroseismic Science Operations +targets was significantly reduced. In addition, the K2 mission was designed to be communityoriented, + with the scientific community having an influence on the fields observed and serving as +the analysts of the vast amounts of data being received (Howell et al., 2014). Although Kepler was +deactivated in 2018, the data used in this paper came from observations during 2010 and 2012 of +white dwarf KIC 8626021 and was obtained from the Kepler Asteroseismic Science Operations Center (KASOC). The DBV white dwarf KIC 8626021 has an atmosphere rich in helium. Building upon -previous studies, this research investigated novel techniques of analyzing variability in white -dw arfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on +previous studies, this research investigated novel techniques of analyzing variability in white +dwarfs. The dwarf KIC 8626021 was chosen due to the large amount of preexisting research on the star, allowing for the validation of results using our methods. KIC 8626021 has an effective -temperature of 2 9,700 K, log g = 7.890, and mass of 0.56 M - ☉ (Córsico, 2020 ) . Other research -has found that this white dwarf is the DBV with the highest known temperature, and its helium -layer is the thinn est (Bischoff - K im et al., 2015). Despite the long - cadence light curve being too -noisy to draw many conclusions , other FTs of short - cadence data have been performed to find -variability in the dwarf. Analyses at high frequencies of KIC 8626021 yielded pulsations with -frequencies of 4309.89 µHz , 5073.26 µHz , 36 81.87 µHz , 3294.22 µHz and 2658.85 µ Hz -(Østensen et al., 2011 ). These fin dings confirm the classification of the white dwarf as a V777 -Herculis, although our research focuses on low frequencies using long - cadence data. +temperature of 29,700 K, log g = 7.890, and mass of 0.56 M +☉ (Córsico, 2020). Other research +has found that this white dwarf is the DBV with the highest known temperature, and its helium +layer is the thinnest (Bischoff-Kim et al., 2015). Despite the long-cadence light curve being too +noisy to draw many conclusions, other FTs of short-cadence data have been performed to find +variability in the dwarf. Analyses at high frequencies of KIC 8626021 yielded pulsations with +frequencies of 4309.89 µHz, 5073.26 µHz, 3681.87 µHz, 3294.22 µHz and 2658.85 µHz +(Østensen et al., 2011). These findings confirm the classification of the white dwarf as a V777 +Herculis, although our research focuses on low frequencies using long-cadence data. - Method s + Methods -All data were downloaded from the KASOC database, and the long - cadence (data -sampled ap proximately every thirty minutes) measurements of Corrected F lux (ppm) were -analyzed. All computations were made in Wolfram Mathematica and Microsoft Exce l , and FTs -were performed in Mathematica . The re - binning process consist ed of summin g adjacent light -c urve data points in each quarter , therefore doubling the sampling interval from 0 .5 hour to one -hour, and then repeating this process on the data sample fo r a total of three times. In addition, a -significant detection was defin ed as being 3 𝝈 above the mean of the relative flux, and 0 on the -graphs below represents this 3 𝝈 cutoff. ( Koch, D. G., 2010), ( Wolfram Research, Inc., 2021). To -find the SNR , we converted to decibels . Using these SNRs , we were able to easily identify -im provement in signal strength. +All data were downloaded from the KASOC database, and the long-cadence (data +sampled approximately every thirty minutes) measurements of Corrected Flux (ppm) were +analyzed. All computations were made in Wolfram Mathematica and Microsoft Excel, and FTs +were performed in Mathematica. The re-binning process consisted of summing adjacent light +curve data points in each quarter, therefore doubling the sampling interval from 0.5 hour to one +hour, and then repeating this process on the data sample for a total of three times. In addition, a +significant detection was defined as being 3𝝈 above the mean of the relative flux, and 0 on the +graphs below represents this 3𝝈 cutoff. (Koch, D. G., 2010), (Wolfram Research, Inc., 2021). To +find the SNR, we converted to decibels. Using these SNRs, we were able to easily identify +improvement in signal strength. Results - Figure 1 presents the lightcurves constructed for quarters seven (Q 7) and thirteen (Q13) , -with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs -of the first iteration and three successive re - bins for Q7 , while Figure 3 presents the FTs of the -same for Q13 . - Tables 1 and 2 both show the hypothesized f requency corresponding to the rotation of -KIC 8626021 that is found in the FTs of the f irst iteration and subsequent re - bins for Q7 and -Q1 3 . Tables 3 and 4 show all data values < 17 µ Hz found in the first iterations and re - bins of Q7 -and Q13 . + Figure 1 presents the lightcurves constructed for quarters seven (Q7) and thirteen (Q13), +with corrected flux magnitude (ppm) plotted versus time (Julian days). Figure 2 presents the FTs +of the first iteration and three successive re-bins for Q7, while Figure 3 presents the FTs of the +same for Q13. + Tables 1 and 2 both show the hypothesized frequency corresponding to the rotation of +KIC 8626021 that is found in the FTs of the first iteration and subsequent re-bins for Q7 and +Q13. Tables 3 and 4 show all data values < 17 µHz found in the first iterations and re-bins of Q7 +and Q13. @@ -111,11 +111,11 @@ and Q13 . -FIG. 1 : Pictured top is the light curve constructed for Q7 , below is the light curve for Q13 . Q 7 -lasted from September 24 – December 13 , 2010, and Q13 was from M arch 29 – June 23, 2012. -Both graphs were constructed by plotting corrected flux magnitude (flux corrected for -instrumental artifacts) versus time in Excel, and gaps in the data were filled in by interpolating -between points. Q 7 had forty - three interpolated points, and Q13 had sixty - six . +FIG. 1: Pictured top is the light curve constructed for Q7, below is the light curve for Q13. Q7 +lasted from September 24 – December 13, 2010, and Q13 was from March 29 – June 23, 2012. +Both graphs were constructed by plotting corrected flux magnitude (flux corrected for +instrumental artifacts) versus time in Excel, and gaps in the data were filled in by interpolating +between points. Q7 had forty-three interpolated points, and Q13 had sixty-six. @@ -129,11 +129,11 @@ between points. Q 7 had forty - three interpolated points, and Q13 had s -FIG. 2 : The graphs show the initial FTs of Q7, and then the FTs of the three successive re - bins of -the light curve data. The significant fr equenc ies of 5.886 µHz and 5.889 µHz are circled. The -d isappearance of the freque ncy in the last FT is most likely a b yproduct of the method, and the -spurious frequency of 5.464 µHz in the last FT most probably represents an artifact of the re binning - proc ess. +FIG. 2: The graphs show the initial FTs of Q7, and then the FTs of the three successive re-bins of +the light curve data. The significant frequencies of 5.886 µHz and 5.889 µHz are circled. The +disappearance of the frequency in the last FT is most likely a byproduct of the method, and the +spurious frequency of 5.464 µHz in the last FT most probably represents an artifact of the rebinning + process. @@ -147,11 +147,11 @@ spurious frequency of 5.464 µHz in the last FT most probably represents a -FIG. 3 : The graphs show the initial FT of Q13 , and t he n the FT s of the three successive re - bins -of the light curve data. The significant frequencies of 5.784 µHz and 5. 787 µHz are circled. In -addition, in the third re - bin , the frequencies 11.641 µHz and 16.823 µHz rise above 3 𝝈 and are -near ly perfect integer multiples of 5 .787 µHz . These harmonics are potentially indications of a -starspot (Santos et al., 2017). +FIG. 3: The graphs show the initial FT of Q13, and then the FTs of the three successive re-bins +of the light curve data. The significant frequencies of 5.784 µHz and 5.787 µHz are circled. In +addition, in the third re-bin, the frequencies 11.641 µHz and 16.823 µHz rise above 3𝝈 and are +nearly perfect integer multiples of 5.787 µHz. These harmonics are potentially indications of a +starspot (Santos et al., 2017). @@ -167,196 +167,196 @@ starspot (Santos et al., 2017). Q7 Significant -Data Points Light +Data Points Light Variability -Frequenc y -(µHz) Corrected Flux +Frequency +(µHz) Corrected Flux Magnitude -(ppm) Period (days ) Signal - to - Noise +(ppm) Period (days) Signal-to-Noise (dB) Q7 First -Iteration 5.886 - 1.198 1.966 9.9 -Q7 Re - bin 1 5.886 - 1.477 1.966 12.8 -Q7 Re - bin 2 5.889 0.59 7 1.965 19.2 -TABLE I : The table displays the various frequencies collected from Q7 and the information -found throu gh calculations to find period and SNR. The frequency of 5.464 µHz is not included, -and therefore was not used in any calculations deter mining the average period of rotation. The -values under corrected flux magnitude are relative to our significant frequency cutoff of 3 𝝈 , thus +Iteration 5.886 -1.198 1.966 9.9 +Q7 Re-bin 1 5.886 -1.477 1.966 12.8 +Q7 Re-bin 2 5.889 0.597 1.965 19.2 +TABLE I: The table displays the various frequencies collected from Q7 and the information +found through calculations to find period and SNR. The frequency of 5.464 µHz is not included, +and therefore was not used in any calculations determining the average period of rotation. The +values under corrected flux magnitude are relative to our significant frequency cutoff of 3𝝈, thus negative numbers are under the cutoff. - Q 13 Significant -Data Points Light + Q13 Significant +Data Points Light Variability -Frequenc y -(µHz) Corrected Flux +Frequency +(µHz) Corrected Flux Magnitude -(ppm) Period (days) Signal - to - Noise +(ppm) Period (days) Signal-to-Noise (dB) -Q13 First -Iteration 5.784 1.555 2.001 15.6 -Q13 Re - bin 1 5.784 2.873 2.001 1 7.7 -Q13 Re - bin 2 5.787 4.938 2.000 22.6 -Q13 Re - bin 3 5.787 6.909 2.0 00 26.3 -Q13 Re - bin 3 11.641 7.073 0.994 26.4 -Q13 Re - bin 3 16.823 2.299 0.688 24.1 -TABLE II : The table displays the various frequencies collected from Q13 and the information -found through calculations to find period and SNR. The last two signific ant frequencies (11.641 -µHz and 16.823 µHz ) for Q13 Re - bin 3 represent potential harmonic s, which are discussed in -further detail in the Con clusions section of this paper . The values under corrected flux magnitude -are relative to our significant frequency cutoff of 3 𝝈 , thus negative numbers are under the cutoff. - - - - - - - - - - -First Iteration ( µ Hz) First Re - bin ( µ Hz) Second Re - bin ( µ Hz) Third Re - bin ( µ Hz) -0.933 0.933 0.21 5 0.216 -1.148 1.148 0.575 0.575 -1.364 1.364 0.934 0.935 -1.507 1.507 1.005 1.006 -12.5 61 12.561 1.149 1.150 -16.581 16.581 1.221 1.222 - 1.364 1.366 - 1.508 1 .509 - 1 .580 1. 582 - 1.7 24 1. 725 - 1.795 1.797 - 5.889 2.0 85 - 6.822 5.392 - 9.192 5. 464 - 9.479 7. 476 - 11.203 9. 489 - 12.568 11.215 - 14 . 291 12.581 - 16.230 13.084 - 1 6.589 13.443 - 13.659 - 14.018 - 14. 809 - 15.097 - 16.031 - 16.463 - 16.894 -TABLE III : The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm) -above the cutoff of 3 𝝈 . The minor shifting of significant frequencies between re - bins is a by product +Q13 First +Iteration 5.784 1.555 2.001 15.6 +Q13 Re-bin 1 5.784 2.873 2.001 17.7 +Q13 Re-bin 2 5.787 4.938 2.000 22.6 +Q13 Re-bin 3 5.787 6.909 2.000 26.3 +Q13 Re-bin 3 11.641 7.073 0.994 26.4 +Q13 Re-bin 3 16.823 2.299 0.688 24.1 +TABLE II: The table displays the various frequencies collected from Q13 and the information +found through calculations to find period and SNR. The last two significant frequencies (11.641 +µHz and 16.823 µHz) for Q13 Re-bin 3 represent potential harmonics, which are discussed in +further detail in the Conclusions section of this paper. The values under corrected flux magnitude +are relative to our significant frequency cutoff of 3𝝈, thus negative numbers are under the cutoff. + + + + + + + + + + +First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) +0.933 0.933 0.215 0.216 +1.148 1.148 0.575 0.575 +1.364 1.364 0.934 0.935 +1.507 1.507 1.005 1.006 +12.561 12.561 1.149 1.150 +16.581 16.581 1.221 1.222 + 1.364 1.366 + 1.508 1.509 + 1.580 1.582 + 1.724 1.725 + 1.795 1.797 + 5.889 2.085 + 6.822 5.392 + 9.192 5.464 + 9.479 7.476 + 11.203 9.489 + 12.568 11.215 + 14.291 12.581 + 16.230 13.084 + 16.589 13.443 + 13.659 + 14.018 + 14.809 + 15.097 + 16.031 + 16.463 + 16.894 +TABLE III: The table displays all frequencies of Q7 that had a corrected flux magnitude (ppm) +above the cutoff of 3𝝈. The minor shifting of significant frequencies between re-bins is a byproduct of the method, and we calculated for such errors when finding our average. -First Iteration ( µ Hz) First Re - bin ( µ Hz) Second Re - bin ( µ Hz) Third Re - bin ( µ Hz) -3.094 2 .018 2.019 1.951 -5.784 3.094 3.095 2.019 -9.080 5.784 5.787 2.442 -13.519 7.667 7.671 2. 759 -15.671 9.080 9.084 3.095 -16.209 11.165 11.641 3.634 -16.411 13.519 13.526 4.374 - 15.469 15.477 4.778 - 15.671 15.679 4.912 - 16.209 15.881 5.0 47 - 16.41 1 16.419 5.787 - 8. 479 - 9. 084 - 10.565 - 11.641 - 13.526 - 15.544 - 15.881 - 16.82 3 -TABLE IV : The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm) -above the cutoff of 3 𝝈 . The minor shifting of significant frequencies between re - bins is a by product - of the method, and we calculate d for such errors when finding our average. +First Iteration (µHz) First Re-bin (µHz) Second Re-bin (µHz) Third Re-bin (µHz) +3.094 2.018 2.019 1.951 +5.784 3.094 3.095 2.019 +9.080 5.784 5.787 2.442 +13.519 7.667 7.671 2.759 +15.671 9.080 9.084 3.095 +16.209 11.165 11.641 3.634 +16.411 13.519 13.526 4.374 + 15.469 15.477 4.778 + 15.671 15.679 4.912 + 16.209 15.881 5.047 + 16.411 16.419 5.787 + 8.479 + 9.084 + 10.565 + 11.641 + 13.526 + 15.544 + 15.881 + 16.823 +TABLE IV: The table displays all frequencies of Q13 that had a corrected flux magnitude (ppm) +above the cutoff of 3𝝈. The minor shifting of significant frequencies between re-bins is a byproduct + of the method, and we calculated for such errors when finding our average. Conclusions -As our research used the long - cadence data from Kepler, much of the high - frequency -va riability due to gravitational wave pulsations is lost. However, this presents an opportunity to -verify our results with the work of research groups that analyzed short - cadence data. With the -data analyzed, the lower fre quencies between 5 - 6 µHz emerged . Aft er finding the average of the -periods and accounting for a 1 𝝈 margin of error, our research hypothesizes that the rotation -period of KIC 8626021 is 1.99 ± 0.02 days. Other short - cadence re search has found the rotation -period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff - K im et -al ., 2015 ) . Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011 ) , and -these periods indicate that the more precise significant period identified through our re - binning +As our research used the long-cadence data from Kepler, much of the high-frequency +variability due to gravitational wave pulsations is lost. However, this presents an opportunity to +verify our results with the work of research groups that analyzed short-cadence data.With the +data analyzed, the lower frequencies between 5-6 µHz emerged. After finding the average of the +periods and accounting for a 1𝝈 margin of error, our research hypothesizes that the rotation +period of KIC 8626021 is 1.99 ± 0.02 days. Other short-cadence research has found the rotation +period to be 1.8 ± 0.4 days, by analyzing the structures of independent modes (Bischoff-Kim et +al., 2015). Other calculated periods of rotation have been ≈ 1.7 days (Østensen et al., 2011), and +these periods indicate that the more precise significant period identified through our re-binning relates to the rotation of the white dwarf. -Through the re - b inning process , the SNR clearly improves for both quarters, and for Q7 it -improves by approximately 1.3 dB, except f o r the last data re - bin. In the last re - bin, the previous - -significant frequency disappears , which beco mes increasingly likely after succe ssive re - binning -processes . The frequency 5.464 µHz rises as another significant frequency; however, we believe -that this new frequ ency is simply an artifact of the re - binning process. In Q13 , we saw SNR -improvement ranging from 1.1 dB to 1.3 dB . -Through the re - binning process , more lines , or significant frequencies, appeared above -the 3 𝝈 cutoff , particularly at lower frequencies. These findings suggest that as an alternative to -short - cadence analysis, the re - binning process of long - cadence dat a can be used to identify -significant lower frequencies in white dwarfs. The methods we used are also si mple and -replicable, which allows even those with less experience to quickly analyze the large amounts of -data being collected by orbiting telescopes, s uch as the currently active TESS (Transiting -Exoplanet Survey Satellite) telescope. -The presence of poss ible harmonics in the third re - bin of Q13 also indicates the possible -presence of a previously unseen starspot in KIC 8626021 caused by mag netic activity. These -spots are darker, cooler, and modulate stellar light curves, and with confirmation of its existence, +Through the re-binning process, the SNR clearly improves for both quarters, and for Q7 it +improves by approximately 1.3 dB, except for the last data re-bin. In the last re-bin, the previous + +significant frequency disappears, which becomes increasingly likely after successive re-binning +processes. The frequency 5.464 µHz rises as another significant frequency; however, we believe +that this new frequency is simply an artifact of the re-binning process. In Q13, we saw SNR +improvement ranging from 1.1 dB to 1.3 dB. +Through the re-binning process, more lines, or significant frequencies, appeared above +the 3𝝈 cutoff, particularly at lower frequencies. These findings suggest that as an alternative to +short-cadence analysis, the re-binning process of long-cadence data can be used to identify +significant lower frequencies in white dwarfs. The methods we used are also simple and +replicable, which allows even those with less experience to quickly analyze the large amounts of +data being collected by orbiting telescopes, such as the currently active TESS (Transiting +Exoplanet Survey Satellite) telescope. +The presence of possible harmonics in the third re-bin of Q13 also indicates the possible +presence of a previously unseen starspot in KIC 8626021 caused by magnetic activity. These +spots are darker, cooler, and modulate stellar light curves, and with confirmation of its existence, the harmonic frequencies can be used to calculate the spot’s rotation rate, size, latitude, and -contrast (Santos et al., 2017) . Using the process of re - binning, a starspot signal, previously -dominated by noise, may have been discov ered. +contrast (Santos et al., 2017). Using the process of re-binning, a starspot signal, previously +dominated by noise, may have been discovered. Acknowledgments -W e wish to thank Bloomsburg University of Pennsylvania for its continued support of our +We wish to thank Bloomsburg University of Pennsylvania for its continued support of our research. -This paper includes data collected b y the Kepler mission and obtained from the MAST -data archive at the Space Telescope Science Institute (STScI). Funding for the Kepler mission is +This paper includes data collected by the Kepler mission and obtained from the MAST +data archive at the Space Telescope Science Institute (STScI). Funding for the Kepler mission is provided by the NASA Science Mission Directorate. STScI is operated by the Association of -Universities for Rese arch in Astronomy, Inc., under NASA contract NAS 5 – 26555. +Universities for Research in Astronomy, Inc., under NASA contract NAS 5–26555. References - Basri, G., Walkowicz, L. M., Batalha, N., Gilliland, R. L., Jenkins, J., Borucki, W. J., Koch, D., -Caldwell, D., Dupree, A. K., Latham, D. W., Meibom, S., Howell, S., & Brown, T. (2010) . -PHOTOMETRIC VARIABILITY IN KEPLER TARGET stars: THE SUN AMONG -stars — a FIRST LOOK. The Astr ophysical Journal, 713(2), L155 - L159. -https://doi.org/10.1088/2041 - 8205/713/2/L155 -Bischoff - K im , A., Øs tensen, R. H., Hermes, J.j., & Provencal, J. L. (2015). Seven - Period -asteroseismic fit of KI C 8626021. EPJ Web of Conferences, 101, 06009. -https://doi.org/10.1051/epjconf/ 201510106009 -Córsico, A. H. (2020). White - Dwarf asteros eismology with the kepler space telescope. Frontiers + Basri, G., Walkowicz, L. M., Batalha, N., Gilliland, R. L., Jenkins, J., Borucki, W. J., Koch, D., +Caldwell, D., Dupree, A. K., Latham, D. W., Meibom, S., Howell, S., & Brown, T. (2010). +PHOTOMETRIC VARIABILITY IN KEPLER TARGET stars: THE SUN AMONG +stars—a FIRST LOOK. The Astrophysical Journal, 713(2), L155-L159. +https://doi.org/10.1088/2041-8205/713/2/L155 +Bischoff-Kim, A., Østensen, R. H., Hermes, J.j., & Provencal, J. L. (2015). Seven-Period +asteroseismic fit of KIC 8626021. EPJ Web of Conferences, 101, 06009. +https://doi.org/10.1051/epjconf/201510106009 +Córsico, A. H. (2020). White-Dwarf asteroseismology with the kepler space telescope. Frontiers in Astronomy and Space Sciences, 7. https://doi.org/10.3389/fspas.2020.00047 -Holberg, J . B. (2009). The discovery of the existence of white dwarf stars: 1862 to 1930. Journal -for the History of Astrono my, 40(2), 137 - 154. +Holberg, J. B. (2009). The discovery of the existence of white dwarf stars: 1862 to 1930. Journal +for the History of Astronomy, 40(2), 137-154. https://doi.org/10.1177%2F002182860904000201 -Howell, S. B., Sobeck, C., Haas, M., Still, M., Barclay, T., Mullally, F., Tr oeltzsch, J., Aigrain, S., -Bryson, S. T., Caldwell, D., Chaplin, W. J., Cochran, W. D., Huber, D., Marcy, G. W., -M iglio, A., Najita, J. R., Smith, M., Twicken, J. D., & Fortney, J. J. (2014). The k2 mission: -Characterization and early results. Publications of the Astronomical Society of the Pacific, -126(938), 398 - 408. https://doi.org/10.1086/676406 -Johnson, M. (Ed.). (2018, October 30). Mission overview. National Aeronautics and Space +Howell, S. B., Sobeck, C., Haas, M., Still, M., Barclay, T., Mullally, F., Troeltzsch, J., Aigrain, S., +Bryson, S. T., Caldwell, D., Chaplin, W. J., Cochran, W. D., Huber, D., Marcy, G. W., +Miglio, A., Najita, J. R., Smith, M., Twicken, J. D., & Fortney, J. J. (2014). The k2 mission: +Characterization and early results. Publications of the Astronomical Society of the Pacific, +126(938), 398-408. https://doi.org/10.1086/676406 +Johnson, M. (Ed.). (2018, October 30). Mission overview. National Aeronautics and Space Administration. Retrieved September 2, 2021, from -https://www.nasa.gov/mission_pages/keple r/overview/index.html -Koch, D. G., Borucki, W. J., Basri, G., Batalha, N. M., Brown, T. M., Caldwell, D., Christensen dalsgaard, - J., Cochran, W. D., Devore, E., Dunham, E. W., Gautier, T. N., Geary, J. C., -Gilliland, R. L., Gould, A., Jenkins, J., Kondo, Y ., Latham, D. W., Lissauer, J. J., Marcy, -G., . . . Morrison , D. (2010). KEPLER MISSION design, REALIZED PHOTOMETRIC -performance, AND EARLY SCIENCE. The Astrophy sical Journal , 713 (2), L79 - L86. -https://dx.doi.org/10.1088/2041 - 8205/713/2/L79 +https://www.nasa.gov/mission_pages/kepler/overview/index.html +Koch, D. G., Borucki, W. J., Basri, G., Batalha, N. M., Brown, T. M., Caldwell, D., Christensendalsgaard, + J., Cochran, W. D., Devore, E., Dunham, E. W., Gautier, T. N., Geary, J. C., +Gilliland, R. L., Gould, A., Jenkins, J., Kondo, Y., Latham, D. W., Lissauer, J. J., Marcy, +G., . . . Morrison, D. (2010). KEPLER MISSION design, REALIZED PHOTOMETRIC +performance, AND EARLY SCIENCE. The Astrophysical Journal, 713(2), L79-L86. +https://dx.doi.org/10.1088/2041-8205/713/2/L79 Østensen, R. H., Bloemen, S., Vučković, M., Aerts, C., Oreiro, R., Kinemuchi, K., Still, M., & -Koester, D. (2011) . AT last — a v777 HER PULSATOR IN THE KEPLER FIELD. The -Astrophysical Journal , 736 (2), L39. https://doi.org/10.1088/2041 - 8205/736/2/L39 -Santos, A. R. G., Cunha, M. S., Avelino, P. P., García, R. A., & Mathur, S. (2017). Starspot -signature on the light curv e. Astronomy & Astrophysics , 599 , A1. -https://doi.org/10.1051/0004 - 6361/201629923 +Koester, D. (2011). AT last—a v777 HER PULSATOR IN THE KEPLER FIELD. The +Astrophysical Journal, 736(2), L39. https://doi.org/10.1088/2041-8205/736/2/L39 +Santos, A. R. G., Cunha, M. S., Avelino, P. P., García, R. A., & Mathur, S. (2017). Starspot +signature on the light curve. Astronomy & Astrophysics, 599, A1. +https://doi.org/10.1051/0004-6361/201629923 Winget, D.e., & Kepler, S.o. (2008). Pulsating white dwarf stars and precision asteroseismology. -Annual Review of Astronomy and Astrophyics, 46(1), 157 - 199. -https://doi.org/10.1146/annurev.astro. 46.060407.145250 -Wolfram Research , Inc., Mathematica, Version 12.3.1, Champaig n, IL (2021). \ No newline at end of file +Annual Review of Astronomy and Astrophyics, 46(1), 157-199. +https://doi.org/10.1146/annurev.astro.46.060407.145250 +Wolfram Research, Inc., Mathematica, Version 12.3.1, Champaign, IL (2021). \ No newline at end of file diff --git a/read/results/playa/2201.00037.txt b/read/results/playa/2201.00037.txt index 854599e..224811d 100644 --- a/read/results/playa/2201.00037.txt +++ b/read/results/playa/2201.00037.txt @@ -3,7 +3,7 @@ The influence of a fluid core and a solid inner core on the Cassini sate of Mercury Mathieu Dumberry 1 1 - Department of Physics, University of Alberta, Edmonton, Alberta, Canada. +Department of Physics, University of Alberta, Edmonton, Alberta, Canada. Key Points: • The Cassini state obliquity of Mercury’s mantle spin axis deviates from that of a rigid @@ -48,66 +48,67 @@ offset smaller than the present-day error in measurements. We also show that the solid inner core is, the more the planet behaves as if it were precessing as an entirely rigid body. 1 Introduction Mercury is expected to be in a Cassini state (Figure 1) whereby its orbit normal and spinsymmetry - axis are both coplanar with, and precess about, the normal to the Laplace plane [ Colombo , + axis are both coplanar with, and precess about, the normal to the Laplace plane [Colombo , 1966; Peale , 1969, 2006]. The orientation of the Laplace plane varies on long timescales, but -its present-day orientation can be reconstructed from ephemerides data [ Yseboodt and Margot , +its present-day orientation can be reconstructed from ephemerides data [Yseboodt and Margot , 2006; Baland et al., 2017]. Likewise, the rate of precession is also not observed directly, but is reconstructed by ephemerides data. The latest estimate is a retrograde precession period of 325,513 -yr with an inclination angle of I = 8.5330 ◦ - between the orbit and Laplace plane normals [ Baland +yr with an inclination angle of I = 8.5330◦ + between the orbit and Laplace plane normals [Baland et al., 2017]. Measurements of the obliquity ε -m , defined as the angle of misalignment between +m, defined as the angle of misalignment between the spin-symmetry axis and the orbit normal, have been obtained by different techniques, -including ground based radar observations [Margot et al. , 2007, 2012], and stereo digital terrain - images [Stark et al., 2015a] and radio tracking data [Mazarico et al. , 2014; Verma and Margot +including ground based radar observations [Margot et al., 2007, 2012], and stereo digital terrain + images [Stark et al., 2015a] and radio tracking data [Mazarico et al., 2014; Verma and Margot , 2016; Genova et al., 2019; Konopliv et al., 2020] from the MErcury Surface Space ENvironment GEochemistry and Ranging (MESSENGER) spacecraft. Within measurement errors, all techniques yield an obliquity which is coplanar with the orbit and Laplace plane normals -and consistent with a Cassini state. Furthermore, the observed obliquity angle (2. 042 ± 0 .08 +and consistent with a Cassini state. Furthermore, the observed obliquity angle (2.042 ± 0.08 –2– Confidential manuscript submitted to JGR-Planets I descending -node of orbit Ω - p +node of orbitΩ +p ê - 3 I +3I I ê - 3 Lε - mI +3Lε +mI ê - 3 p +3p ascending node of orbit descending -node of equator equatorial +node of equatorequatorial plane orbital direction Sê - 3 I +3I ê - 3 L - M +3L +M ε - morbital planeFigure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded -rectangle) and the Cassini state of Mercury. The normal to the orbital plane ( ˆe I -3 ) is offset from the normal - to the Laplace plane ( ˆe L -3 ) by an angle I = 8. 5330◦ - . The symmetry axis of the mantle ˆe p +morbital plane +Figure 1. The orbit of Mercury (M) around Sun (S) with respect to the Laplace plane (grey shaded +rectangle) and the Cassini state of Mercury. The normal to the orbital plane (ˆeI +3) is offset from the normal + to the Laplace plane (ˆeL +3 ) by an angle I = 8.5330◦ +. The symmetry axis of the mantle ˆep 3 is offset -from ˆe I +from ˆeI 3 by ε - m ≈ 2 arcmin. ˆe I -3 and ˆe p -3 are coplanar with, and precess about, ˆe L +m ≈ 2 arcmin. ˆeI +3 and ˆep +3 are coplanar with, and precess about, ˆeL 3 in a retrograde direction at frequency Ω - p = 2 π/325, 513 yr− 1 - . The blue (orange) shaded region indicates the portion of the orbit +p = 2π/325, 513 yr−1 +. The blue (orange) shaded region indicates the portion of the orbit when Mercury is above (below) the Laplace plane. Angles are not drawn to scale. -arcmin [ Margot et al. , 2012], 2.029 ± 0. 085 arcmin [Stark et al., 2015a] and 1.968 ± 0 .027 [ Genova +arcmin [Margot et al., 2012], 2.029±0.085 arcmin [Stark et al., 2015a] and 1.968±0.027 [Genova et al., 2019] to list a few) matches that expected if Mercury occupies Cassini state 1. The prediction of Mercury’s obliquity is based on the assumption that the whole planet precesses as a single body. However, we know that Mercury has a fluid core from two main lines @@ -115,42 +116,42 @@ of evidence. First, Mercury’s large scale magnetic field is intrinsic, and mus dynamo action [Anderson et al., 2011, 2012; Johnson et al., 2012]. This requires fluid motion in its metallic core, and hence that Mercury’s core is at least partially liquid. Second, the observed amplitude of the 88-day longitudinal libration is approximately twice as large as that -expected if Mercury were librating as a rigid body [ Margot et al. , 2007, 2012; Stark et al., 2015a]. +expected if Mercury were librating as a rigid body [Margot et al., 2007, 2012; Stark et al., 2015a]. This indicates that it is only the mantle that librates, and that the outer part of the core is fluid. These evidences do not necessarily imply that the whole of Mercury’s core is fluid, but only that its outermost part must be. A solid inner core may have nucleated at the centre although its size is not well constrained. Inner core growth leads to planetary contraction, and the inferred -radial contraction of ∼ 7 km since the late heavy bombardment [Byrne et al. , 2014] places an -approximate limit of 800 km on the inner core radius [ Grott et al. , 2011]. However, the inner +radial contraction of ∼ 7 km since the late heavy bombardment [Byrne et al., 2014] places an +approximate limit of 800 km on the inner core radius [Grott et al., 2011]. However, the inner core could be larger if a significant fraction of its growth occurred earlier in Mercury’s history. –3– Confidential manuscript submitted to JGR-Planets With a fluid core, and possibly a solid inner core, the observed obliquity ε - m reflects the +m reflects the orientation of the spin-symmetry axis of the precessing mantle and crust alone. Neglecting dissipation, and at equilibrium in the Cassini state, the spin axis of the fluid core and the spinsymmetry axis of the inner core should both also precess about the normal to the Laplace plane in a retrograde direction with a period of 325,513 yr. Both of these axes should also lie in the plane that defines the equilibrium Cassini state [e.g. Dumberry and Wieczorek , 2016], although their obliquity angles may be different than ε - m . Whether the spin axis of the fluid core is brought +m. Whether the spin axis of the fluid core is brought into an alignment with the mantle obliquity depends primarily on the pressure torque (also referred to as the inertial torque) exerted by the centrifugal force of the rotating fluid core on the -misaligned elliptical shape of the core-mantle boundary (CMB) [ Poincar´e , 1910]. The more flattened +misaligned elliptical shape of the core-mantle boundary (CMB) [Poincar´e , 1910]. The more flattened the CMB is, the stronger the pressure torque is, and the more the fluid core is entrained into a co-precession at a similar obliquity to that of the mantle. The flattening of Mercury’s CMB is not known. But if one assumes that the topography of the CMB coincides with an equipotential surface at hydrostatic equilibrium with the imposed frozen-in mass anomalies in the upper mantle and crust, then the pressure torque at the CMB is sufficient to bring the fluid core -into a close alignment with the mantle [ Peale et al. , 2014]. The spin axis of the fluid core is not +into a close alignment with the mantle [Peale et al., 2014]. The spin axis of the fluid core is not expected to be exactly aligned with the spin-symmetry axis of the mantle, but sufficiently close that the resulting mantle obliquity does not differ much from that of a single body planet. Furthermore, viscous and electromagnetic (EM) coupling at the CMB can further restrict the misalignment - between the mantle and core [Peale et al. , 2014]. + between the mantle and core [Peale et al., 2014]. If an inner core is present, its obliquity angle is determined by the sum of the torques acting on it. This includes the gravitational torque from the Sun acting on its tilted figure, analogous to the torque applied on the tilted mantle that sets the obliquity ε - m . In addition, the +m. In addition, the tilt of the inner core also depends on the gravitational torque imposed by the mantle and the pressure torque at the inner core boundary (ICB) imposed by the fluid core. If the mantle gravitational torque dominates, the inner core tilt is expected to remain closely aligned with the @@ -159,7 +160,7 @@ mantle. Conversely, if the pressure torque at the ICB is the largest, the inner at the ICB should also enforce a closer alignment between the rotation vectors of the inner core and fluid core. It is on the basis of the observed mantle obliquity that the polar moment of inertia of Mercury - is inferred [e.g. Peale , 1976; Margot et al. , 2018]. Inherent in this calculation is the builtin + is inferred [e.g. Peale , 1976; Margot et al., 2018]. Inherent in this calculation is the builtin assumption that the mantle obliquity does not deviate from that of a rigid planet by a substantial amount. However, the recent study by Peale et al. [2016] suggests that the inner core can be misaligned from the mantle by a few arcmin and that a large inner core can perturb the @@ -168,8 +169,8 @@ orientation of the spin vector of the mantle by as much as 0.1 arcmin. This chal Furthermore, if a large inner core is misaligned with the mantle, then the mantle spin axis does not coincide with the orientation of the polar moment of inertia of the whole planet. This can introduce a systematic offset between different types of obliquity measurements. Those based -on tracking topographic features [ Margot et al. , 2007, 2012; Stark et al., 2015a] capture the obliquity - of the mantle spin axis. While those based on the orientation of the gravity field [ Mazarico +on tracking topographic features [Margot et al., 2007, 2012; Stark et al., 2015a] capture the obliquity + of the mantle spin axis. While those based on the orientation of the gravity field [Mazarico et al., 2014; Verma and Margot , 2016; Genova et al., 2019; Konopliv et al., 2020] are instead tied to the orientation of the principal moment of inertia of the whole planet. An offset of the obliquity of the mantle spin axis with respect to the gravity field could be used to constrain the @@ -192,41 +193,41 @@ differ from that of an entirely rigid Mercury, and third, by how much the obliqu Our model of Mercury consists of four layers of uniform density: a solid inner core, a fluid outer core, a solid mantle, and a thin crust. The outer radii of each of these layers, are denoted by r -s , r +s, r f , r -m , and R , and their densities by ρ - s , ρ - f , ρ - m , and ρ - c , respectively. The inner core radius +m, and R, and their densities by ρ +s, ρ +f , ρ +m, and ρ +c, respectively. The inner core radius r s corresponds to the ICB radius, the fluid core radius r f to the CMB radius, and R = -2439 .36 km to the planetary radius of Mercury. Compressibility effects from increasing pressure +2439.36 km to the planetary radius of Mercury. Compressibility effects from increasing pressure with depth are not negligible in the core of Mercury. However adopting uniform densities simplifies the analytical expressions of the model while still capturing the first order rotational dynamics. Uniform densities were also adopted by Peale et al. [2016] and following the same strategy facilitates comparisons between our results. We build our interior model as detailed in Peale et al. [2016]. We first specify r -s , ρ +s, ρ s (or a density contrast at the ICB), the crustal density ρ - c and crustal thickness h = R − r -m . The +c and crustal thickness h = R−r +m. The three unknowns r f , ρ f and ρ - m are then solved such that the interior model is consistent with +m are then solved such that the interior model is consistent with the known mass M and chosen values of the moments of inertia of the whole planet C and that of the mantle and crust C - m . +m. Each layer is triaxial in shape. We denote the polar flattening (or geometrical ellipticity) by - i , defined as the difference between the mean equatorial and polar radii, divided by the mean +i, defined as the difference between the mean equatorial and polar radii, divided by the mean spherical radius. Likewise, we denote the equatorial flattening by the variable ξ - i , defined as the +i, defined as the difference between the maximum and minimum equatorial radii, divided by the mean spherical - radius. As above, we use the subscript i = s, f , m and r , to denote the polar or equatorial + radius. As above, we use the subscript i = s, f , m and r, to denote the polar or equatorial flattenings at the ICB, CMB, crust-mantle boundary (CrMB), and surface. The measured polar and equatorial flattenings are taken from Perry et al. [2015] and their numerical values are given in Table 1. We then assume that the ICB and CMB are both at hydrostatic @@ -234,55 +235,55 @@ numerical values are given in Table 1. We then assume that the ICB and CMB are b CrMB and surface. The flattenings at all interior boundaries are specified such that they are consistent with the observed degree 2 spherical harmonic coefficients of gravity J 2 and C - 22 ; their +22; their numerical values are given in Table 1. Specifically, J 2 and C - 22 are connected to the principal +22 are connected to the principal moments of inertia of Mercury (C > B > A) and to the polar and equatorial flattenings by J - 2 = C − ¯ +2 = C − ¯ A -M R 2 = 8π +M R2 = 8π 15 1 -M R 2 +M R2 (ρ - s − ρ - f )r 5 +s − ρ +f )r5 s - s + ( ρ - f − ρ - m )r 5 +s + (ρ +f − ρ +m)r5 f - f + ( ρ - m − ρ - c ) r 5 +f + (ρ +m − ρ +c)r5 m - m + ρ - c R 5 +m + ρ +cR5 - r +r , (1a) C - 22 = B − A -4M R 2 = 8π +22 = B − A +4M R2 = 8π 15 1 -4M R 2 -( ρ - s − ρ - f )r 5 +4M R2 +(ρ +s − ρ +f )r5 s ξ - s + ( ρ - f − ρ - m )r 5 +s + (ρ +f − ρ +m)r5 f ξ -f + ( ρ - m − ρ - c ) r 5 -m ξ - m + ρ - c R 5 - ξ - r +f + (ρ +m − ρ +c)r5 +mξ +m + ρ +cR5 +ξ +r . (1b) where ¯ A is the mean equatorial moment of inertia defined below. The same procedure was used @@ -290,85 +291,85 @@ in Peale et al. [2016] and the mathematical details are given in Equations (18-2 –5– Confidential manuscript submitted to JGR-Planets Mercury Parameter Numerical value Reference -mean motion, n 2π/87 .96935 day− 1 +mean motion, n 2π/87.96935 day−1 Stark et al. [2015b] rotation rate, Ω -o = 1.5n 2π/58 .64623 day− 1 +o = 1.5n 2π/58.64623 day−1 Stark et al. [2015b] orbit precession rate, Ω - p 2π/325 , 513 yr −1 +p 2π/325, 513 yr−1 Baland et al. [2017] Poincar´e number, δω = Ω -p /Ω - o 4. 9327 × 10 − 7 +p/Ω +o 4.9327 × 10−7 orbital eccentricity, e - c 0. 20563 Baland et al. [2017] -orbital inclination, I 8. 5330◦ +c 0.20563 Baland et al. [2017] +orbital inclination, I 8.5330◦ Baland et al. [2017] -mean planetary radius, R 2439. 360 km Perry et al. [2015] -mass, M 3. 3012 × 10 23 +mean planetary radius, R 2439.360 km Perry et al. [2015] +mass, M 3.3012 × 1023 kg Genova et al. [2019] -mean density, ¯ρ 5429. 5 kg m− 3 +mean density, ¯ρ 5429.5 kg m−3 J - 2 5. 0291 × 10 − 5 +2 5.0291 × 10−5 Genova et al. [2019] C - 22 8. 0415 × 10 − 6 +22 8.0415 × 10−6 Genova et al. [2019] polar surface flattening, - r 6. 7436 × 10 − 4 +r 6.7436 × 10−4 Perry et al. [2015] equatorial surface flattening, ξ - r 5. 1243 × 10 − 4 +r 5.1243 × 10−4 Perry et al. [2015] -Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031. 8636 × 109 -m 3 - /s 2 - taken from Genova et al. [2019]. The mean density is calculated from 4 π -3 ¯ρR 3 +Table 1. Reference parameters for Mercury. The mass M is computed from GM = 22031.8636 × 109 +m3 +/s2 + taken from Genova et al. [2019]. The mean density is calculated from 4π +3 ¯ρR3 = M . The numerical values of r and ξ r are calculated from -r = (¯a − c )/R and ξ - r = (a − b ) /R , where ¯a = 1 -2 ( a + b ) and where -a = 2440 . 53 km, b = 2439 . 28 km and c = 2438 .26 km are the semima jor, intermediate and semiminor +r = (¯a − c)/R and ξ +r = (a − b)/R, where ¯a = 1 +2 (a + b) and where +a = 2440.53 km, b = 2439.28 km and c = 2438.26 km are the semima jor, intermediate and semiminor axes of the trixial ellipsoidal shape of Mercury taken from Table 2 of Perry et al. [2015]. J 2 and C - 22 are +22 are computed from Equation (4) in the Supporting Information of Genova et al. [2019]. and Wieczorek [2016] who adopted the same strategy in their interior modelling of the Moon. Note that we neglect the misalignment between the triaxial shape of Mercury’s surface topography and the axes of the principal moments of inertia, which amount to a polar offset of ∼ 2◦ -and an equatorial offset of ∼ 15 ◦ - [Perry et al. , 2015]. +and an equatorial offset of ∼ 15◦ + [Perry et al., 2015]. Once the densities and flattenings of all interior regions are known, we can specify the moments - of inertia of the fluid core ( C - f > B - f > A - f ) and solid inner core (C + of inertia of the fluid core (C +f > B +f > A +f ) and solid inner core (C s > B - s > A - s ) +s > A +s) along with the mean equatorial moments of inertia ¯ A = 1 -2 ( A + B ) , ¯ +2 (A + B ) , ¯ A - f = 1 +f = 1 2 (A - f + B - f ) , ¯ +f + B +f ) , ¯ A - s = 1 +s = 1 2 (A - s + B - s ) . (2) -From these, we define the polar (e , e - f , e - s ) and equatorial (γ , γ - s ) dynamical ellipticities of the +s + B +s) . (2) +From these, we define the polar (e, e +f , e +s) and equatorial (γ , γ +s) dynamical ellipticities of the whole planet (no subscript), fluid core (subscript f ) and solid inner core (subscript s), which enter our rotational model, e = C − ¯ @@ -376,106 +377,106 @@ A ¯ A e f = C - f − ¯ +f − ¯ A - f +f ¯ A - f e +f e s = C s − ¯ A - s +s ¯ A - s , (3a) +s , (3a) γ = B − A ¯ A γ - s = B - s − A - s +s = B +s − A +s ¯ A - s . (3b) +s . (3b) We further note that e and γ are connected to J - 2 and C +2 and C 22 by -e = M R 2 +e = M R2 ¯ A J -2 , γ = 4 M R 2 +2 , γ = 4M R2 ¯ A C - 22 . (4) +22 . (4) –6– Confidential manuscript submitted to JGR-Planets θ - m +m θ - n +n θ - s +s θ - fΩ +fΩ Ω - s +s Ω - fê - 3 p +fê +3p ê - 3 sê - 3 I +3sê +3I I ε - m +m θ - pê - 3 L +pê +3L ê - 1 p +1p ê - 2 p Cassini plane - ω Ω - o tê - 3 I +2p Cassini plane + ωΩ +otê +3I I ε - m ê - 3 p +m ê +3p ê - 1ê - 2 pê - 3 La) b) +1ê +2pê +3La) b) Figure 2. Geometry of the Cassini state model of Mercury viewed (a) in the Cassini plane and (b) -in a frame attached to the rotating mantle. The orbit normal ( ˆe I -3 ) is tilted by an angle I = 8 . 533◦ +in a frame attached to the rotating mantle. The orbit normal (ˆeI +3) is tilted by an angle I = 8.533◦ from -the Laplace normal ( ˆe L -3 ) and the symmetry axis of Mercury’s mantle ( ˆe p +the Laplace normal (ˆeL +3 ) and the symmetry axis of Mercury’s mantle (ˆep 3 ) is tilted by an obliquity ε - m -with respect to ˆe I -3 . Shown in (a) are the orientations of the symmetry axis of the inner core ( ˆe s -3 ), the -rotation rate vectors of the mantle ( Ω ), fluid core (Ω - f ) and inner core ( Ω +m +with respect to ˆeI +3. Shown in (a) are the orientations of the symmetry axis of the inner core (ˆes +3), the +rotation rate vectors of the mantle (Ω), fluid core (Ω +f ) and inner core (Ω f ) and angles θ - p , θ - n , θ - m , θ +p, θ +n, θ +m, θ f and θ - s in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer +s in their Cassini state equilibrium. All vectors and angles are in a common plane which we refer to as the Cassini plane. The light grey, white, and dark grey ellipsoid represent a polar cross-section of the mantle, fluid core and inner core, respectively; blue shaded parts show an equatorial cross section. The black curved arrow in the equatorial plane in (a) indicates the direction of rotation of the equatorial -mantle axes ˆe p -1 and ˆe p +mantle axes ˆep +1 and ˆep 2 with respect to the Cassini plane. Viewed in the frame attached to the rotating -mantle (b), the Cassini plane is rotating at frequency ω Ω - o = −Ω - o − Ω - p cos I in the longitudinal direction. +mantle (b), the Cassini plane is rotating at frequency ωΩ +o = −Ω +o − Ω +p cos I in the longitudinal direction. The oblateness of all three regions and the amplitude of all angles are exaggerated for purpose of illustration. –7– @@ -483,15 +484,15 @@ Confidential manuscript submitted to JGR-Planets 2.2 The rotational model Mercury’s rotation is characterized by a 3:2 spin-orbit resonance in which it completes 3 rotations around itself for every 2 orbital revolutions around the Sun. The orbital period is -87.96935 day and the sidereal rotation period is 58.64623 day [ Stark et al., 2015b]. These define - the mean motion n = 2 π/87 . 96935 day −1 +87.96935 day and the sidereal rotation period is 58.64623 day [Stark et al., 2015b]. These define + the mean motion n = 2π/87.96935 day−1 and the sidereal frequency Ω - o = 2 π/58 .64623 -day −1 - , with Ω -o = 1. 5 n . Mercury’s rotational state is also characterized by a Cassini state whereby +o = 2π/58.64623 +day−1 +, with Ω +o = 1.5 n. Mercury’s rotational state is also characterized by a Cassini state whereby the orientations of the orbit normal ( ˆeI -3 ) and of the mantle symmetry axis ( ˆep +3) and of the mantle symmetry axis ( ˆep 3 ) are both coplanar with, and precess about, the normal to the Laplace plane ( ˆeL 3 ). The orientation of the Laplace @@ -500,99 +501,99 @@ purpose. The Cassini state of Mercury is illustrated in Figure 1. The angle betw 3 and ˆeI 3 is the orbital inclination I = 8.5330◦ - [ Baland et al., 2017], the angle between ˆeI + [Baland et al., 2017], the angle between ˆeI 3 and ˆep 3 is the obliquity ε - m and the angle between ˆeL +m and the angle between ˆeL 3 and ˆep 3 is θ p = I + ε - m . The precession of ˆeI +m. The precession of ˆeI 3 and ˆep 3 about the Laplace pole is retrograde with frequency Ω - p = 2π/325 , 513 yr − 1 - [ Baland et al. , 2017]. +p = 2π/325, 513 yr−1 + [Baland et al., 2017]. The mantle and crust are welded together and form a single rotating region which we refer to as the ‘mantle’ in the context of our rotational model. The rotation and symmetry axes of the mantle are expected to remain in close alignment, but they do not coincide exactly. We -define the rotation rate vector of the mantle by Ω, and its misalignment from ˆe p +define the rotation rate vector of the mantle by Ω, and its misalignment from ˆep 3 by an angle θ - m . Note that θ +m. Note that θ m ε m and it is often the spin axis of Mercury which is used to define the obliquity ε - m [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, ˆep +m [e.g. Baland et al., 2017]. If Mercury were an entirely rigid planet, ˆep 3 and Ω would characterize the symmetry and rotation axes of the whole of Mercury, not just its mantle, and the angles I , ε - m and θ - m would completely describe the Cassini state. The presence of a fluid +m and θ +m would completely describe the Cassini state. The presence of a fluid outer core and solid inner core require three additional orientation vectors and angles. The symmetry axis of the inner core is defined by unit vector ˆes 3 and its misalignment from ˆep 3 by an angle θ - n . The rotation vectors of the fluid core and inner core are defined as Ω +n. The rotation vectors of the fluid core and inner core are defined as Ω f and Ω -s , respectively, +s, respectively, and their misalignment from the rotation vector of the mantle Ω are defined by angles θ f and θ - s (see Figure 2a). The rotation and symmetry axes of the inner core remain in close +s (see Figure 2a). The rotation and symmetry axes of the inner core remain in close alignment, so θ - n ≈ θ - s . To be formal in our definition of the different angles of misalignment, +n ≈ θ +s. To be formal in our definition of the different angles of misalignment, for I defined positive as depicted on Figure 2a, all angles are defined positive in the clockwise direction. -At equilibrium in the Cassini state, the three orientation vectors ( ˆe I -3 , ˆep -3 , ˆe s -3 ) and three +At equilibrium in the Cassini state, the three orientation vectors ( ˆeI +3, ˆep +3 , ˆes +3) and three rotation vectors (Ω, Ω f , Ω - s ) are forced to precess about ˆeL +s) are forced to precess about ˆeL 3 at the same frequency. If we neglect dissipation, all vectors lie on the same plane, which we refer to as the Cassini plane. Viewed in inertial space, the Cassini plane is rotating in a retrograde direction at frequency Ω - p . Viewed +p. Viewed in the frame attached to the mantle rotating at sidereal frequency Ω - o , the Cassini plane is rotating - in a retrograde direction at frequency ω Ω - o (see Figure 2b), where ω , expressed in cycles +o, the Cassini plane is rotating + in a retrograde direction at frequency ωΩ +o (see Figure 2b), where ω, expressed in cycles per Mercury day, is equal to - ω = − 1 − δω cos( θ - p ) . (5) + ω = −1 − δω cos(θ +p) . (5) The factor δω = Ω -p /Ω - o = 4.933 × 10 − 7 +p/Ω +o = 4.933 × 10−7 is the Poincar´e number, expressing the ratio of the forced precession to sidereal rotation frequencies. The invariance of the Laplace plane normal as seen in the mantle frame is expressed as d -dt ˆe L +dt ˆeL 3 + Ω × ˆeL 3 = 0 , (6) or equivalently, by Equation (19e) of Stys and Dumberry [2018], ω sin(θ - p ) + sin( θ - m + θ - p ) = 0 . (7) +p) + sin(θ +m + θ +p) = 0 . (7) –8– Confidential manuscript submitted to JGR-Planets This expresses a formal connection between θ - p and θ - m which is independent of the interior structure +p and θ +m which is independent of the interior structure of Mercury. Using Equation (5) and cos(θ - m ) → 1, this connection can be rewritten as -sin( θ -m ) = δω sin( θ - p ) . (8) +m) → 1, this connection can be rewritten as +sin(θ +m) = δω sin(θ +p) . (8) and thus the relative amplitudes of θ - m and θ - p depend of the Poincar´e number δω . +m and θ +p depend of the Poincar´e number δω. To investigate Mercury’s response to the gravitational torque from the Sun, we take advantage of the framework developed in Mathews et al. [1991] to model the forced nutations of Earth [see also Mathews et al., 2002; Dehant and Mathews , 2015]. This model takes into account @@ -609,59 +610,59 @@ Because the forced precession period is much longer than the rotation and orbita torque averaged over one orbit. This mean torque is perpendicular to the Cassini plane, pointing in the same direction as the vector connecting the Sun to the descending node of Mercury’s orbit in Figure 1. Hence, viewed from the mantle frame, the orientation of this mean torque -is periodic, rotating at frequency ω Ω - o . Setting the equatorial directions ˆe p +is periodic, rotating at frequency ωΩ +o. Setting the equatorial directions ˆep 1 and ˆep 2 to correspond to the real and imaginary axes of the complex plane, respectively, we can write the equatorial components of this periodic applied torque in a compact form as Γ - 1 ( t) + iΓ - 2 ( t) = − i ˜ -Γ( ω ) exp[ iω Ω - o t ] , (9) +1(t) + iΓ +2(t) = −i ˜ +Γ(ω) exp[iωΩ +ot] , (9) where ˜ -Γ(ω ) represents the amplitude of the torque at frequency ω Ω - o . In response to this torque, +Γ(ω) represents the amplitude of the torque at frequency ωΩ +o. In response to this torque, the axes defining all angles (θ - p , ε -m , θ - m , θ - f , θ - s , θ - n ) as viewed in the mantle frame are also rotating - at frequency ω Ω - o (see Figure 2). The longitudinal direction of each of these angles at +p, ε +m, θ +m, θ +f , θ +s, θ +n) as viewed in the mantle frame are also rotating + at frequency ωΩ +o (see Figure 2). The longitudinal direction of each of these angles at a specific time t can then also be written in the equatorial complex plane and is proportional -to exp[ iω Ω - o t]. For instance, the two equatorial time-dependent components θ - m 1 and θ - m 2 of the +to exp[iωΩ +ot]. For instance, the two equatorial time-dependent components θ +m1 and θ +m2 of the angle θ - m , as seen in the mantle frame, can be written as +m, as seen in the mantle frame, can be written as θ - m 1 ( t) + iθ - m 2 (t ) = ˜m exp[iω Ω - o t ] , (10a) +m1(t) + iθ +m2(t) = ˜m exp[iωΩ +ot] , (10a) where - ˜m ≡ ˜m( ω ) = Re[ ˜m] + iI m [ ˜m ] , (10b) -is the amplitude at frequency ω Ω - o . Equivalent definitions apply for all other angles, with the + ˜m ≡ ˜m(ω) = Re[ ˜m] + iI m[ ˜m] , (10b) +is the amplitude at frequency ωΩ +o. Equivalent definitions apply for all other angles, with the connection as follows: θ - m ⇔ ˜m , θ - f ⇔ ˜m +m ⇔ ˜m , θ +f ⇔ ˜m f , θ - s ⇔ ˜m +s ⇔ ˜m +s , θ +n ⇔ ˜n s , θ - n ⇔ ˜n - s , θ - p ⇔ ˜p , ε - m ⇔ ˜ε - m . (11) +p ⇔ ˜p , ε +m ⇔ ˜ε +m . (11) The notation ˜m, ˜m - f , ˜m -s , ˜n +f , ˜m +s, ˜n s follows that introduced in the original model of Mathews et al. [1991]. Note that all tilded amplitudes are complex: their imaginary part reflects the out-of-phase response to the applied torque as a result of dissipation, for instance from viscous or EM coupling @@ -672,12 +673,12 @@ real. We concentrate our analysis in this work on the real part of the solutions to the mutual alignment of these five rotation angles in the Cassini plane. As such, ˜ε m corresponds to the observed obliquity of the mantle symmetry axis. It is thus equivalent to ε - m , +m, though we keep the tilde notation in the presentation of our results to emphasize that it represents the real part of the solution from our system. Furthermore, since ˜m ˜ε - m , we often +m, we often refer to ˜ε - m as the orientation of spin axis of the mantle, since the Cassini state of Mercury is +m as the orientation of spin axis of the mantle, since the Cassini state of Mercury is more customarily described in terms of the latter in the literature. The model of Mathews et al. [1991] is developed under the assumption of small angles as appropriate for the nutations on Earth. The details on how the equations of the model are derived @@ -685,27 +686,27 @@ appropriate for the nutations on Earth. The details on how the equations of the describe, respectively, the time rate of change of the angular momenta of the whole of Mercury, the fluid core, and the inner core in the reference frame of the rotating mantle. These three equations are - (ω − e ) ˜m + (1 + ω ) + (ω − e) ˜m + (1 + ω) ¯ A - f +f ¯ A ˜m - f + ¯ +f + ¯ A - s +s ¯ A ˜m - s + α - 3 e - s ¯ +s + α +3e +s ¯ A - s +s ¯ A ˜n s = 1 -i Ω 2 +iΩ2 o ¯ A ˜ @@ -713,42 +714,42 @@ A sun , (12a) ω ˜m + (1 + ω + e - f ) ˜m - f − ωα -1 e - s ¯ +f ) ˜m +f − ωα +1e +s ¯ A - s +s ¯ A - f ˜n - s = 1 -iΩ 2 +f ˜n +s = 1 +iΩ2 o ¯ A - f +f − ˜ Γ cmb − ˜ Γ - icb +icb , (12b) (ω − α -3 e - s ) ˜m + α -1 e +3e +s) ˜m + α +1e s ˜m -f + (1 + ω ) ˜m +f + (1 + ω) ˜m s + (1 + ω − α -2 ) e - s ˜n - s = 1 -iΩ 2 +2) e +s ˜n +s = 1 +iΩ2 o ¯ A - s +s ˜ -Γ s +Γs sun + ˜ Γ icb @@ -757,22 +758,22 @@ and a fourth equation consists of a kinematic relation that expresses the change of the inner core figure as a result of its own rotation, ˜m s + ω ˜n - s = 0 . (12d) +s = 0 . (12d) In these equations, the parameters α -1 , α +1, α 2 and α 3 involve the density contrast at the ICB and are given by α - 1 = ρ - f +1 = ρ +f ρ - s , α - 3 = 1 − α +s , α +3 = 1 − α 1 , α - 2 = α +2 = α 1 − α -3 α +3α g , (13a) where the parameter α g is a measure of the ratio of the gravitational to inertial torque applied @@ -781,13 +782,13 @@ on the inner core, g = 8πG 5Ω2 o [ρ - c ( - r − - m ) + ρ -m ( - m − +c( +r − +m) + ρ +m( +m − f ) + ρ - f +f f ] , (13b) where G is the gravitational constant. ˜ @@ -795,30 +796,30 @@ where G is the gravitational constant. sun is the amplitude of the gravitational torque by the Sun on the whole of Mercury. For a small mantle obliquity ˜ε m and a small inner core tilt ˜n - s , it is given by +s, it is given by ˜ Γ - sun = − iΩ 2 +sun = −iΩ2 o ¯ A φ - m ˜ε - m + ¯ +m ˜ε +m + ¯ A - s +s ¯ A α -3 φ - s ˜n +3φ +s ˜n s , (14) where –10– Confidential manuscript submitted to JGR-Planets φ - m = 3 -2 n 2 -Ω 2 +m = 3 +2 n2 +Ω2 o G 210 e + 1 @@ -826,119 +827,119 @@ G 201 γ , (15a) φ - s = 3 -2 n 2 -Ω 2 +s = 3 +2 n2 +Ω2 o G 210 e - s + 1 +s + 1 2 G - 201 γ - s +201 γ +s , (15b) and where G 210 and G - 201 are functions of the orbital eccentricity e - c , +201 are functions of the orbital eccentricity e +c, G 210 = 1 -(1 − e 2 -c ) 3/ 2 , (16a) +(1 − e2 +c )3/2 , (16a) G 201 = 7 2 e - c − 123 -16 e 3 +c − 123 +16 e3 c + 489 128 e5 c . (16b) The gravitational torque by the Sun acting on the inner core alone, ˜ -Γ s -sun , is +Γs +sun, is ˜ -Γ s -sun = − iΩ 2 +Γs +sun = −iΩ2 o ¯ A - s α -3 φ - s ( ˜ε - m + ˜n - s ) . (17) +sα +3φ +s( ˜ε +m + ˜n +s) . (17) ˜ Γ - cmb and ˜ +cmb and ˜ Γ icb are the torques from tangential stresses by the fluid core on the mantle at the CMB and on the inner core at the ICB, respectively. These torques can be parameterized in terms of dimensionless complex coupling constants K - icb and K +icb and K cmb and the differential angular velocities at each boundary [e.g Buffett , 1992; Buffett et al., 2002], ˜ Γ - icb = iΩ 2 +icb = iΩ2 o ¯ A - s K - icb ( ˜m +sK +icb( ˜m f − ˜m - s ) , (18a) +s) , (18a) ˜ Γ -cmb = iΩ 2 +cmb = iΩ2 o ¯ A - f K +f K cmb ˜m - f . (18b) +f . (18b) Specific expressions for K icb and K cmb are delayed to sections 4 and 5 when we consider the effects of viscous and EM coupling, respectively. A fifth equation is required to connect this interior model to the obliquity of the mantle, and this is provided by Equation (7). For small angles θ - m and θ - p , this gives [e.g. Mathews et al., +m and θ +p, this gives [e.g. Mathews et al., 1991; Dumberry and Wieczorek , 2016; Baland et al., 2019] -˜m + (1 + ω ) ˜p = 0 . (19) +˜m + (1 + ω) ˜p = 0 . (19) For Mercury, it is more convenient to connect the internal model with ˜ε - m instead of ˜p. This +m instead of ˜p. This is because θ - p ≈ 8 .567 ◦ +p ≈ 8.567◦ whereas ˜ε m ≈ 2 arcmin and thus the latter obeys more strictly the condition of small angles assumed in our framework. Furthermore, the external torques acting on the whole planet (Equation 14) and inner core (Equation 17) depend linearly on ˜ε - m . Written +m. Written in terms of ˜ε - m , and with the approximation of ˜ε - m 1 and ˜m 1, Equation (7) becomes -˜m + (1 + ω ) ˜ε -m = −(1 + ω ) tan I . (20) +m, and with the approximation of ˜ε +m 1 and ˜m 1, Equation (7) becomes +˜m + (1 + ω) ˜ε +m = −(1 + ω) tan I . (20) Likewise, the frequency ω from Equation (5) can be written simply in terms of I , -ω = − 1 − δω cos I . (21) +ω = −1 − δω cos I . (21) The set of four Equations (12) with the addition of Equation (20) form a linear system -of equations for the five rotational variables ˜m , ˜m +of equations for the five rotational variables ˜m, ˜m f , ˜m - s , ˜n - s and ˜ε - m . It captures the response +s, ˜n +s and ˜ε +m. It captures the response of Mercury, in the frequency domain, when sub ject to a periodic solar torque applied at frequency - ω . The system can be written in a matrix form as + ω. The system can be written in a matrix form as –11– Confidential manuscript submitted to JGR-Planets M · x = y , (22a) -where the solution (x ) and forcing (y ) vectors are -x T +where the solution (x) and forcing (y) vectors are +xT = [ ˜m, ˜m f , ˜m -s , ˜n - s , ˜ε - m ] , (22b) -y T - = [0, 0, 0, 0 , − (1 + ω ) tan I ] , (22c) +s, ˜n +s, ˜ε +m] , (22b) +yT + = [0, 0, 0, 0, −(1 + ω) tan I ] , (22c) and the elements of matrix M are M =   @@ -946,13 +947,13 @@ M =     - ω − e (1 + ω ) ¯ + ω − e (1 + ω) ¯ A f ¯ -A (1 + ω ) ¯ +A (1 + ω) ¯ A - s +s ¯ A ¯ A @@ -960,27 +961,27 @@ s ¯ A α 3 - (1 + ω )e - s + φ - s +(1 + ω)e +s + φ +s φ - m +m ω 1 + ω + e - f + K +f + K cmb + ¯ A - s +s ¯ A - f K +f K icb − ¯ A s ¯ A - f K -icb − ωe -s α +f K +icb −ωe +sα 1 ¯ A s @@ -988,20 +989,20 @@ s A f 0 ω − α -3 e +3e s α -1 e - s − K +1e +s − K icb 1 + ω + K icb (1 + ω − α -2 ) e - s + α - 3 φ - s α -3 φ - s +2)e +s + α +3φ +s α +3φ +s 0 0 1 ω 0 -1 0 0 0 (1 + ω ) +1 0 0 0 (1 + ω)    @@ -1047,84 +1048,84 @@ Confidential manuscript submitted to JGR-Planets 2.3.1 The Cassini state of a single-body, rigid Mercury For a rigid planet with no fluid and solid cores, our system of equations reduces to Equations (12a) and (20), - (ω − e ) ˜m + φ - m ˜ε + (ω − e) ˜m + φ +m ˜ε m = 0 , (23a) -˜m + (1 + ω ) ˜ε - m = −(1 + ω ) tan I . (23b) +˜m + (1 + ω) ˜ε +m = −(1 + ω) tan I . (23b) Using Equation (21), δω 1, and the approximation ¯ -A (1 + e + δω cos I ) = C + ¯ +A(1 + e + δω cos I ) = C + ¯ Aδω cos I ≈ C , these can be written as C ˜m = ¯ Aφ - m ˜ε - m , (24a) +m ˜ε +m , (24a) ˜m = δω sin I + cos I ˜ε - m +m . (24b) Equation (24b) gives a direct relationship between ˜m and ˜ε - m . For I = 8 . 5330◦ - , δω = -4 .9327 × 10 −7 +m. For I = 8.5330◦ +, δω = +4.9327×10−7 and taking ˜ε -m = 2.04 arcmin, this gives ˜m = 2.52 × 10 − 4 +m = 2.04 arcmin, this gives ˜m = 2.52×10−4 arcmin, much smaller than ˜ε - m : the offset of the rotation axis of the mantle with respect to its symmetry axis is very +m: the offset of the rotation axis of the mantle with respect to its symmetry axis is very small. Substituting Equation (24b) in Equation (24a) gives C Ω - p +p sin I + cos I ˜ε - m +m = ¯ -A Ω - o φ - m ˜ε - m , (25) +AΩ +oφ +m ˜ε +m , (25) and isolating for ˜ε - m , +m, ˜ε - m = C Ω - p sin I -− C Ω - p cos I + ¯ -A Ω - o φ - m . (26) +m = C Ω +p sin I +−C Ω +p cos I + ¯ +AΩ +oφ +m . (26) Upon using Equations (4), (15a), and Ω - o = 3 -2 n , we can write +o = 3 +2 n, we can write ˜ε - m = C Ω - p sin I +m = C Ω +p sin I −C Ω - p cos I + nM R 2 +p cos I + nM R2 (G -210 J -2 + 2 G -201 C -22 ) . (27) +210J +2 + 2G +201C +22) . (27) This is the standard prediction for the obliquity of a rigid Mercury occupying Cassini state 1 [see for instance Equation (1) of Baland et al., 2017, where their definition of ˙ -Ω is equal to − Ω - p ]. +Ω is equal to −Ω +p]. Hence, in the absence of a fluid core and inner core, our system retrieves the Cassini state of Mercury correctly. Equation (27) can be manipulated to solve instead for the normalized moment of inertia ˆ C , ˆ C = C -M R 2 = n +M R2 = n Ω - p G -210 J -2 + 2 G - 201 C - 22 +p G +210J +2 + 2G +201C +22 cos I + sin I / ˜ε - m . (28) +m . (28) which is equivalent to Equation (89) of Van Hoolst [2015]. It is based on the latter equation that a measurement of the obliquity gives a constraint on ˆ C . @@ -1135,30 +1136,30 @@ of the rotation axis about the symmetry axis. The second mode is the free retrog –13– Confidential manuscript submitted to JGR-Planets ω -f p = n M R 2 +f p = n M R2 C - G -210 J - 2 + 2 G -201 C +G +210J +2 + 2G +201C 22 , (29) which is equivalent to the prediction by Peale [2005] when neglecting its small elliptical component. Note that in Peale [2005] it was assumed that only the mantle was involved in the solidbody precession and hence C was replaced by C - m . Using C = 0.346 · M R 2 - [ Margot et al. , -2012] and the numerical values for n , J -2 , C +m. Using C = 0.346 · M R2 + [Margot et al., +2012] and the numerical values for n, J +2, C 22 and e - c given in Table 1, we obtain a free precession +c given in Table 1, we obtain a free precession period of T f p = 2π/ω f p = 1298 yr. If we use C m instead of C in Equation (29), and take C - m = 0. 431 · C = 0.431 · 0. 346 · M R 2 - [Margot et al. , 2012], we obtain T +m = 0.431 · C = 0.431 · 0.346 · M R2 + [Margot et al., 2012], we obtain T f p = 2π/ω f p = 560 yr. These estimates are similar to those obtained by Peale [2005]. Because the CMB is elliptical, @@ -1168,130 +1169,130 @@ true free precession period lies somewhere between 560 and 1298 yr. Regardless o the free precession period is much shorter than the forcing period of 325 kyr. Using Equation (29), Equation (27) can be written as [e.g. Baland et al., 2017] ˜ε - m = Ω - p sin I +m = Ω +p sin I −Ω - p cos I + ω +p cos I + ω f p . (30) The obliquity of Mercury is thus determined by how the forcing frequency Ω - p compares with +p compares with the free precession frequency ω -f p . Because ω +f p. Because ω f p > Ω - p , Mercury occupies Cassini state 1 [Peale , +p, Mercury occupies Cassini state 1 [Peale , 1974]. Furthermore, Equation (30) shows that a large obliquity can be generated by resonant amplification if Ω p ≈ ω -f p . Since ω +f p. Since ω f p Ω - p , resonant amplification is minimal and the resulting +p, resonant amplification is minimal and the resulting obliquity, ˜ε - m ≈ 2 arcmin, is much smaller than the inclination angle I ≈ 8 .5◦ - . +m ≈ 2 arcmin, is much smaller than the inclination angle I ≈ 8.5◦ +. 2.3.2 The misalignment of the fluid and solid cores -With ω = − 1 − δω cos I and δω 1, Equation (12d) gives ˜n - s ≈ ˜m -s ; as for the mantle, +With ω = −1 − δω cos I and δω 1, Equation (12d) gives ˜n +s ≈ ˜m +s; as for the mantle, the rotation and symmetry axes of the inner core remain closely aligned in the Cassini state. The relationship between ˜m and ˜ε - m of Equation (24b) is independent of the interior structure, +m of Equation (24b) is independent of the interior structure, so it remains unchanged when a fluid and a solid cores are present. Substituting it in Equation (12a), and setting ˜n - s = ˜m -s , the angular momentum equation of the whole planet becomes +s = ˜m +s, the angular momentum equation of the whole planet becomes C Ω - p +p sin I + cos I ˜ε m + ( ¯ A - f cos I Ω - p ) ˜m +f cos I Ω +p) ˜m f + ¯ A - s (cos I Ω - p − Ω - o α - 3 φ - s )˜n - s = ¯ -A Ω - o φ - m ˜ε - m . (31) +s(cos I Ω +p − Ω +oα +3φ +s)˜n +s = ¯ +AΩ +oφ +m ˜ε +m . (31) This latter equation shows how the misaligned inner core and fluid core can lead to a modification of the mantle obliquity ˜ε - m . Approximate analytical solutions of ˜n - s and ˜m +m. Approximate analytical solutions of ˜n +s and ˜m f are given by ˜n - s ≈ Ω - p +s ≈ Ω +p κλ - s - 1 + Ω - o (K - icb − α -1 e - s ) +s +1 + Ω +o(K +icb − α +1e +s) λ - f +f sin I + cos I ˜ε - m +m − Ω - o α -3 φ - s +oα +3φ +s κλ - s ˜ε - m , (32a) +s ˜ε +m , (32a) ˜m - f ≈ Ω - p +f ≈ Ω +p λ - f +f sin I + cos I ˜ε m + Ω - o +o λ - f ¯ +f ¯ A - s +s ¯ A - f - K +f +K icb − α -1 e - s - ˜n - s , (32b) +1e +s +˜n +s , (32b) where κ = 1 − ¯ A - s +s ¯ A - f Ω 2 +f Ω2 o - K +K icb − α -1 e - s - 2 +1e +s +2 λ - s λ +s λ f , (33a) λ - f = ¯σ - f − Ω - p cos I , (33b) +f = ¯σ +f − Ω +p cos I , (33b) λ s = ¯σ - s − Ω - p cos I , (33c) +s − Ω +p cos I , (33c) –14– Confidential manuscript submitted to JGR-Planets and where we have introduced the frequencies @@ -1299,97 +1300,97 @@ and where we have introduced the frequencies f = Ω o e - f + K - cmb + ¯ +f + K +cmb + ¯ A - s +s ¯ A - f K +f K icb , (33d) ¯σ - s = Ω +s = Ω o - e - s α - 3 α +e +sα +3α g − e - s α +sα 1 + α -3 φ - s + K - icb +3φ +s + K +icb . (33e) These solutions are good approximations for all the results that we present in section 3. For an observed mantle obliquity ˜ε - m and for a chosen set of interior model parameters, they provide +m and for a chosen set of interior model parameters, they provide useful predictions of ˜n - s and ˜m +s and ˜m f . In the limit of a very strong coupling between the fluid core, solid core and mantle, ¯σ - s +s Ω - p and ¯σ - f Ω - p , so that ˜n +p and ¯σ +f Ω +p, so that ˜n s → 0, ˜m f → 0 and Equation (31) reverts back to Equation (25) for a rigid planet. In the opposite limit of no coupling between the fluid core, solid core and mantle (i.e. for spherical internal boundaries, e - f = e - s = γ - s = 0 and no viscous or EM coupling, +f = e +s = γ +s = 0 and no viscous or EM coupling, K - cmb = K +cmb = K icb = 0), then φ - s = 0 , κ = 1 , λ - f = λ -s = − Ω - p cos I , ˜m - f = ˜n - s = − (tan I + ˜ε - m ) . (34) +s = 0 , κ = 1 , λ +f = λ +s = −Ω +p cos I , ˜m +f = ˜n +s = −(tan I + ˜ε +m) . (34) Inserting these in Equation (31), and with the moment of inertia of the mantle equal to C - m = +m = C − ¯ A - f − ¯ +f − ¯ A - s , we obtain +s, we obtain C m Ω - p +p sin I + cos I ˜ε - m +m = ¯ -A Ω - o φ - m ˜ε - m . (35) +AΩ +oφ +m ˜ε +m . (35) which describes, as expected, a forced precession of the mantle alone. If this was the case for Mercury, taking C -m /C = 0. 431, the obliquity should be ˜ε - m ≈ 0. 88 arcmin, substantially smaller +m/C = 0.431, the obliquity should be ˜ε +m ≈ 0.88 arcmin, substantially smaller than the observed obliquity of ˜ε m ≈ 2 arcmin. If ¯σ f ≈ Ω - p (and thus λ +p (and thus λ f → 0) and/or ¯σ - s ≈ Ω - p (and thus λ +s ≈ Ω +p (and thus λ s → 0) resonant amplification leads to large amplitudes for ˜m f , ˜n - s and the mantle obliquity ˜ε - m . The frequencies ¯σ - f and +s and the mantle obliquity ˜ε +m. The frequencies ¯σ +f and ¯σ s are closely related to the FCN and FICN frequencies ω - f cn and ω - f icn , respectively. Hence, +f cn and ω +f icn, respectively. Hence, just as a large mantle obliquity can result from resonant amplification when the forcing frequency approaches the free precession frequency, a large mantle obliquity can likewise result from resonant amplification when the forcing frequency approaches the FCN or FICN frequencies. These @@ -1397,86 +1398,86 @@ frequencies depend on the interior density structure and are not known. However, that for reasonable interior models of Mercury, the FCN and FICN periods are in the range of a few hundred yr. This is sufficiently far from the forcing period (325 kyr) that we do not expect an important amplification effect. Furthermore, since ω -f cn , ω +f cn, ω f icn Ω - p , then ¯σ - f Ω - p +p, then ¯σ +f Ω +p and ¯σ s Ω - p , and we are in the strong coupling limit. The mantle obliquity should be close +p, and we are in the strong coupling limit. The mantle obliquity should be close to that expected for a rigid planet, as observations suggest. Therefore, we expect that ˜m f and ˜n - s should be of the order of ˜ε - m or smaller. This further justifies the assumption of small angles +s should be of the order of ˜ε +m or smaller. This further justifies the assumption of small angles that we have adopted. 3 Results 3.1 Geodetic constraints and interior density structure All our interior models are constrained to match the mass M of Mercury and specific choices of ˆ -C = C/M R 2 +C = C/M R2 and C - m /C . The choice of ˆ +m/C . The choice of ˆ C is determined from Equation (28). For the parameters listed in Table 1, and an observed obliquity of ε - m = 2. 04 arcmin [Margot et al. , 2012], +m = 2.04 arcmin [Margot et al., 2012], this gives ˆ -C = C/M R 2 - = 0. 3455 and all our interior models are consistent with this choice. +C = C/M R2 + = 0.3455 and all our interior models are consistent with this choice. Obviously, this reflects a Cassini state equilibrium in which the fluid core and inner core are –15– Confidential manuscript submitted to JGR-Planets perfectly aligned with the mantle, which is not strictly correct. Hence, we make an error in estimating ˆ C from Equation (28), or conversely in predicting ε - m based on a given choice for ˆ +m based on a given choice for ˆ C . Part of the ob jective of our study is to estimate how large this error is. The ratio C -m /C is obtained +m/C is obtained from the amplitude of the 88-day longitudinal mantle libration φ - o , which is given by +o, which is given by φ - o = 6 · f (e - c )C - 22 M R 2 +o = 6 · f (e +c)C +22 M R2 C C C - m 1 +m 1 1 + ζ , (36) where f (e - c ) = 1 − 11 e 2 +c) = 1 − 11e2 c + 959 -48 e 4 +48 e4 c , (37) and where ζ is a correction that takes into account the entrainment of the inner core in the libration - [ Van Hoolst et al., 2012; Dumberry et al. , 2013; Dumberry and Rivoldini , 2015]; this correction + [Van Hoolst et al., 2012; Dumberry et al., 2013; Dumberry and Rivoldini , 2015]; this correction is small and, to simplify, we neglect it here. Taking the observed libration amplitude -to be 38.5 arcsec [ Margot et al. , 2012], ˆ -C = C/M R 2 +to be 38.5 arcsec [Margot et al., 2012], ˆ +C = C/M R2 = 0.3455 and C - 22 and e - c from Table 1, +22 and e +c from Table 1, this corresponds to a ratio C -m /C = 0. 4269, or equivalently ˆ +m/C = 0.4269, or equivalently ˆ C m = C - m /M R 2 - = 0. 1475. +m/M R2 + = 0.1475. For all results presented in our study, the crustal density is set at ρ - c = 2974 kg m −3 +c = 2974 kg m−3 [Sori , -2018]. Our standard choice for the crustal thickness is h = 26 km [ Sori , 2018], although in +2018]. Our standard choice for the crustal thickness is h = 26 km [Sori , 2018], although in section 3.2 we also present some results with other choices of h. We have considered two possible prescriptions connected to the density of the inner core. First, for all the results presented in sections 3.2, 3.3 and 3.4, we have used a fixed inner core density of ρ -s = 8800 kg m − 3 +s = 8800 kg m−3 approximately that obtained in Dumberry and Rivoldini [2015] under the assumption of a pure Fe composition in face-centered cubic phase. This captures an end-member scenario where the core composition is an Fe-S alloy; at Mercury’s core conditions, crystallization of Fe is relatively -free of S on the Fe-rich side of the eutectic [ Li et al., 2001]. If the core composition is instead +free of S on the Fe-rich side of the eutectic [Li et al., 2001]. If the core composition is instead an Fe-Si alloy, approximately equal partitioning of Si between the liquid and solid phase [e.g. Schaefer et al., 2017] implies a weak chemical contrast at the ICB. The density jump across the ICB is expected to be small, although since density increases with depth, the contrast between @@ -1484,63 +1485,63 @@ the mean densities of the fluid and solid cores is larger. It is these mean dens our Mercury model with uniform density layers. To capture this other end-member core composition scenario, in section 3.5 we present results where we instead prescribe a fixed density contrast between the fluid and solid core; specifically, we set the numerical value of α -3 . +3. For a given choice of inner core radius r -s , the densities of the mantle ( ρ - m ) and fluid core -( ρ - f ) and the radius of the CMB ( r +s, the densities of the mantle (ρ +m) and fluid core +(ρ +f ) and the radius of the CMB (r f ) are determined such that the interior model matches M , ˆ -C = 0. 3455 and ˆ +C = 0.3455 and ˆ C m = 0.1475. Figure 3a shows how ρ - m , ρ - f and r +m, ρ +f and r f vary as a function of inner core radius r s for each of the two inner core density scenarios: a fixed ρ -s , or a fixed α -3 . When +s, or a fixed α +3. When the inner core is small, its presence has a limited influence on the resulting density structure, and we find ρ - m = 3197 kg m− 3 - , ρ - f = 7263 kg m−3 +m = 3197 kg m−3 +, ρ +f = 7263 kg m−3 and r f = 2000 km in each of the two scenarios. When ρ - s is fixed to 8800 kg m− 3 - , as the inner core reaches 1500 km in size, r +s is fixed to 8800 kg m−3 +, as the inner core reaches 1500 km in size, r f increases to above 2100 km, ρ - m approaches 4000 kg m −3 +m approaches 4000 kg m−3 and ρ - f is reduced to below 5000 kg m− 3 - . +f is reduced to below 5000 kg m−3 +. Figure 3a illustrates that when adopting a fixed ρ - s , there is a limit in the possible inner core +s, there is a limit in the possible inner core size, as otherwise ρ m gets unreasonably large and ρ - f gets inappropriately small (as it would +f gets inappropriately small (as it would require an excessively large concentration of light elements). When adopting instead a fixed density contrast, with α -3 = 0. 1, the changes in r +3 = 0.1, the changes in r f , ρ - m and ρ - f with inner core radius are more modest, +m and ρ +f with inner core radius are more modest, allowing larger possible inner core sizes. Different assumptions on ρ - c and h would alter the +c and h would alter the numerical values shown on Figure 3a but not their trends with r -s . +s. Figure 3b shows how the FCN and FICN periods vary with r s for each of the two inner core density scenarios and in the absence of viscous and EM coupling (i.e. K - cmb = K +cmb = K icb = –16– Confidential manuscript submitted to JGR-Planets - 0200400600800100012001400 +0200400600800100012001400 period (yr) 0 200 400 600 800 1000 1200 1400 Inner core radius (km)300040005000600070008000 @@ -1550,28 +1551,29 @@ Inner core radius (km) 200020202040206020802100 Fluid core radius (km)fluid core density CMB radius FICNFCN - int -mantle densitya b - FCNFigure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand +int +mantle densitya b + FCN +Figure 3. a) Fluid core density (red), mantle density (blue), fluid core radius (orange, right-hand side scale) and b) FICN (blue) and FCN (red) periods as a function of inner core radius. The FCN period when the external torque is set to zero (FCN - int ) is shown in orange. Solid lines correspond to -a scenario where the density of the inner core is set to 8800 kg m −3 - ; thin dashed lines correspond to a +int ) is shown in orange. Solid lines correspond to +a scenario where the density of the inner core is set to 8800 kg m−3 +; thin dashed lines correspond to a scenario where the density contrast between the fluid and solid cores is set to α -3 = 0. 1. +3 = 0.1. 0). Both of these free modes are retrograde. The FCN period is close to 400 yr for a small inner core, increasing to approximately 600 yr at the largest r -s . The FICN period is shorter, close +s. The FICN period is shorter, close to 100 yr (160 yr) for a small inner core and decreasing to approximately 40 yr (120 yr) at the largest r s under the fixed ρ - s (fixed α -3 ) scenario. This confirms that the FCN and FICN periods +s (fixed α +3) scenario. This confirms that the FCN and FICN periods are both much shorter than the forcing precession period of 325 kyr and sufficiently far away from it that we do not expect large ˜m - f and ˜n - s from resonant amplification. +f and ˜n +s from resonant amplification. The FCN and FICN periods that we have computed include the influence of the external torque. As shown by Baland et al. [2019], the external torque allow solid regions to have a free motion in inertial space thereby affecting the free rotational modes. To a good approximation, @@ -1581,69 +1583,69 @@ icb = 0 are given by ω f cn ≈ −Ω - o +o ¯ A ¯ A - m + ¯ +m + ¯ A - s +s - e - f + φ - m +e +f + φ +m + Ω - o e - f φ - m -( e - f + φ - m ) , (38a) +o e +f φ +m +(e +f + φ +m) , (38a) ω - f icn ≈ Ω - o +f icn ≈ Ω +o ¯ A + ¯ A - s +s ¯ A − ¯ A - s +s - e - s α +e +sα 1 − e -s α -3 α +sα +3α g − α -3 φ - s +3φ +s . (38b) The expression of the FICN frequency involves the inertial torque (term e - s α -1 ) and the gravitational - torque from the rest of Mercury ( e - s α -3 α -g ) and the Sun ( α -3 φ - s ) acting on the inner core. +sα +1) and the gravitational + torque from the rest of Mercury (e +sα +3α +g ) and the Sun (α +3φ +s) acting on the inner core. For both of our inner core density scenarios (and our choices of ρ -s = 8800 kg m −3 +s = 8800 kg m−3 and α 3 = -0 .1), the internal gravitational torque dominates that from the Sun. Furthermore, α -3 α +0.1), the internal gravitational torque dominates that from the Sun. Furthermore, α +3α g α -1 ; +1; the gravitational torque dominates the inertial torque, in large part because of the slow rotation rate of Mercury. As a result the FICN frequency is negative (i.e. the precession motion is retrograde). This is also the case for the Moon [e.g. Dumberry and Wieczorek , 2016; Stys and Dumberry , 2018], but it is different for Earth, where α - 1 > α -3 α +1 > α +3α g because of its faster rotation and the FICN mode is prograde [Mathews et al., 1991]. Note also that our approximate expres–17– @@ -1651,85 +1653,85 @@ Confidential manuscript submitted to JGR-Planets sion for the FICN differs by a factor ( ¯ A + ¯ A - s ) /( ¯ +s)/( ¯ A − ¯ A - s ) compared to that given in Dumberry +s) compared to that given in Dumberry and Wieczorek [2016] and Stys and Dumberry [2018] for the Moon. The expression for FCN frequency differs from the usual expression for Earth. First, it involves the external torque from the Sun captured by the parameter φ - m . If we set φ - m = 0, +m. If we set φ +m = 0, we obtain the FCN frequency for a decoupled model in which only interior torques contribute, ω f cn,int ≈ −Ω - o +o ¯ A ¯ A - m + ¯ +m + ¯ A - s +s e - f . (38c) +f . (38c) This frequency is slightly different from the usual expression for Earth, involving the ratio ¯ A/( ¯ A - m + +m+ ¯ A - s ) rather than ¯ +s) rather than ¯ A/ ¯ A - m . This is because of the relatively thin mantle of Mercury; for the largest +m. This is because of the relatively thin mantle of Mercury; for the largest r s considered, the moment of inertia of the inner core can get close to 40% of that of the mantle and is not negligible. The period of the FCN when only interior torques contribute is shown in Figure 3b. It is close to 1100 yr for a small inner core, increasing to approximately 1500 yr at the largest r -s . Hence, the influence of the solar torque reduces the FCN period by a factor +s. Hence, the influence of the solar torque reduces the FCN period by a factor of approximately 3. We note that the FICN period, in contrast, is not altered substantially when the external torque is set to zero. 3.2 Gravitational and inertial coupling Let us now investigate the obliquities of the mantle, fluid core and inner core in their equilibrium Cassini state. We assume a fixed inner core density scenario in this section, with ρ - s = -8800 kg m− 3 - . Viscous and EM coupling are set to zero in order to isolate the influence of gravitational +s = +8800 kg m−3 +. Viscous and EM coupling are set to zero in order to isolate the influence of gravitational and inertial coupling. Figure 4 shows how ˜ε - m , ˜m +m, ˜m f and ˜n - s vary as functions of inner +s vary as functions of inner core radius. We show calculations for three different choices of crustal thickness, but let us concentrate first on the case for h = 26 km. For small r -s , we retrieve an obliquity of ˜ε - m = 2. 0494 +s, we retrieve an obliquity of ˜ε +m = 2.0494 arcmin (Figure 4a). ˜ε - m decreases with r -s , but not substantially; at the largest r +m decreases with r +s, but not substantially; at the largest r s (1500 km), ˜ε - m = 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜ε - m = 2.04 +m = 2.0460 arcmin, a decrease of 0.0034 arcmin. The maximum difference from ˜ε +m = 2.04 arcmin, the obliquity that we used in setting the constraint for ˆ C – and hence the prediction -we should recover for a rigid planet – is an overestimate of approximately 0 . 01 arcmin which +we should recover for a rigid planet – is an overestimate of approximately 0.01 arcmin which occurs for small inner cores. The deviation of ˜ε - m from that of a rigid planet is due to the misalignments of the fluid +m from that of a rigid planet is due to the misalignments of the fluid core ( ˜m f ) and solid inner core ( ˜n - s ) with respect to the mantle (Figure 4b). The misalignment +s) with respect to the mantle (Figure 4b). The misalignment of the fluid core spin axis from the mantle is significant: ˜m - f is approximately 4.02 arcmin for +f is approximately 4.02 arcmin for a small inner core and does not vary substantially with inner core size; it drops to 3.97 arcmin at the largest r -s . Recall that ˜m +s. Recall that ˜m f is measured with respect to the mantle rotation axis (which coincides closely with the symmetry axis), so the obliquity of the spin axis of the fluid core with respect to the orbit normal is ˜ε -m + ˜m +m+ ˜m f ≈ 6 arcmin. The reason why the obliquity of the spin axis of the fluid core is larger than that of the mantle can be understood from Equation (32b), which shows that ˜m @@ -1739,10 +1741,10 @@ case for Mercury, the resonant amplification is very weak but remains present an f is larger than zero. In contrast to ˜m - f , the misalignment of the inner core with respect to the mantle is much +f , the misalignment of the inner core with respect to the mantle is much smaller; ˜n - s is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜ε - m . +s is approximately between 0.023-0.025 arcmin, a factor 80 times smaller than ˜ε +m. Physically, this is because the gravitational torque acting on the inner core when it is tilted from the mantle is much stronger than the inertial torque acting at the ICB. As a result, the inner core must remain in close alignment with the mantle. Presented differently, since the FICN period @@ -1756,209 +1758,210 @@ Inner core radius (km) 1.52.02.53.03.54.04.5 Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 Inner core radius (km)crustal thickness - 16 km +16 km 36 km26 km crustal thickness - 16 km +16 km 36 km26 kmε - m +m ε - g +g for a rigid planet ε - m m - f +m m +f n - s (x100)a bFigure 4. a) Obliquity of the mantle ( ˜ε - m , solid lines) and of the principal moment of inertia ( ˜ε - g , +s (x100)a b +Figure 4. a) Obliquity of the mantle ( ˜ε +m, solid lines) and of the principal moment of inertia ( ˜ε +g , dashed line) b) ˜m f (solid lines) and ˜n s (dashed lines, x100) as a function of inner core radius and for different choices of crustal thickness. ily follow the forced precession of the mantle and remains gravitationally locked to it. ˜n - s does +s does not change substantially as the inner core increases in size. When K icb = K cmb = 0, a good approximation of ˜ε - m is given by +m is given by ˜ε - m = C - Ω - p sin I -− C - Ω - p cos I + ¯ -A Ω - o φ - m , (39) +m = C +Ω +p sin I +−C +Ω +p cos I + ¯ +AΩ +oφ +m , (39) which is identical to the prediction of Equation (26) for a rigid Mercury, except C is replaced by C - . The latter represents an effective moment of inertia that accounts for the coupling of +. The latter represents an effective moment of inertia that accounts for the coupling of the core to the mantle, C = C + ¯ A - c χ , (40) +cχ , (40) where ¯ A - c = ¯ +c = ¯ A - f + ¯ +f + ¯ A - s and +s and χ = Ω - p cos I +p cos I ¯ A - c +c ¯ A - f +f ( ¯σ - f − Ω - p cos I ) + ¯ +f − Ω +p cos I ) + ¯ A - s +s ( ¯σ - s − Ω - p cos I ) +s − Ω +p cos I ) − ¯ A - s +s ¯ A - c Ω - o α -3 φ - s +c Ω +oα +3φ +s ( ¯σ - s − Ω - p cos I ) . (41) +s − Ω +p cos I ) . (41) The frequencies ¯σ f and ¯σ - s are given in Equations (33d-33e) and closely approximate the FCN +s are given in Equations (33d-33e) and closely approximate the FCN and FICN frequencies of Equations (38c) and (38b), respectively. The factor χ captures then how the core is entrained to precess with the mantle, with the coupling between the two expressed in terms of the resonant amplification of the FCN and FICN frequencies. In the limit of ¯σ - f , ¯σ - s → 0, then χ = − 1, C +f , ¯σ +s → 0, then χ = −1, C = C -m , the core is fully decoupled from the mantle and we +m, the core is fully decoupled from the mantle and we retrieve Equation (35). If instead ¯σ - f , ¯σ - s → ∞ , then χ = 0, C +f , ¯σ +s → ∞, then χ = 0, C = C and we retrieve the prediction for a rigid planet. When both the FCN and FICN frequencies are much larger than Ω - p , +p, as is the case here, resonant amplification is weak, χ is small and positive, C > C and this leads to a slightly larger ˜ε - m compared to a rigid planet. Because the inner core core is gravitationally +m compared to a rigid planet. Because the inner core core is gravitationally locked to the mantle, deviations from a rigid planet are dominantly caused by the misalignment of the fluid core. In Equation (41), ¯σ - s ¯σ +s ¯σ f , so to a good approximation –19– Confidential manuscript submitted to JGR-Planets χ ≈ ¯ A - f +f ¯ A - c Ω - o cos I +c Ω +o cos I ( ¯σ f − Ω - p cos I ) . (42) -For a small inner core, χ ≈ 7. 55 ×10 − 3 - . As the inner core grows, ¯ +p cos I ) . (42) +For a small inner core, χ ≈ 7.55×10−3 +. As the inner core grows, ¯ A - f decreases, and the combination +f decreases, and the combination ¯ A - c χ also decreases. This implies that C +cχ also decreases. This implies that C decreases with inner core size and, consequently, ˜ε - m also decreases with inner core size, as seen in Figure 4a, though it remains larger than the +m also decreases with inner core size, as seen in Figure 4a, though it remains larger than the prediction for a rigid planet. The specific predictions of ˜ε - m , ˜m +m, ˜m f and ˜n - s on Figure 4 depend sensitively on the assumed +s on Figure 4 depend sensitively on the assumed interior density model and on the dynamical ellipticities of the inner core (e - s ) and fluid core +s) and fluid core (e - f ). Hence, it depends on the choices we have made for the inner core density ρ - s , the crustal +f ). Hence, it depends on the choices we have made for the inner core density ρ +s, the crustal density ρ - c and its thickness h. Changing ρ - s , ρ +c and its thickness h. Changing ρ +s, ρ c and/or h requires a different combination of ρ - f , +f , ρ m and r f in order to match M , ˆ C and ˆ C -m . In turn, this leads to different ellipticities at interior +m. In turn, this leads to different ellipticities at interior boundary in order to match J - 2 and C -22 , and thus different predictions for ˜ε - m , ˜m - f and +2 and C +22, and thus different predictions for ˜ε +m, ˜m +f and ˜n - s . To illustrate this, we show on Figure 4 two additional predictions computed with crustal +s. To illustrate this, we show on Figure 4 two additional predictions computed with crustal thicknesses changed to h = 16 and 36 km. The change in ˜ε - m remains modest, ∼ 0. 025%, but +m remains modest, ∼ 0.025%, but the changes in ˜m - f and ˜n - s are more substantial, ∼ 5% and ∼ 10%, respectively. +f and ˜n +s are more substantial, ∼ 5% and ∼ 10%, respectively. We also show on Figure 4a (only for h = 26 km) the obliquity of the principal moment of inertia of the whole planet, which we denote by ˜ε - g . A difference between ˜ε +g . A difference between ˜ε g and ˜ε - m occurs +m occurs if the inner core is misaligned with the mantle. As seen in the mantle frame, a tilted inner core (with ˜n s assumed small) leads to an off-diagonal component of the moment of inertia tensor -of ( C - s − ¯ +of (C +s− ¯ A - s ) α +s)α 3 ˜n - s = ¯ +s = ¯ A - s e - s α +se +sα 3 ˜n - s . The angle by which the mantle frame must be rotated so that +s. The angle by which the mantle frame must be rotated so that the moment of inertia of the whole planet is purely diagonal is ( ¯ A - s e - s α +se +sα 3 ˜n - s ) /( ¯ -Ae ), and hence a +s)/( ¯ +Ae), and hence a good approximation of ˜ε g is ˜ε - g = ˜ε - m + ¯ +g = ˜ε +m + ¯ A - s e - s +se +s ¯ Ae α 3 ˜n - s . (43) +s . (43) Since the inner core is gravitationally forced into a close alignment with the mantle, the difference between ˜ε g and ˜ε - m remains very small. For the largest inner core radius that we have +m remains very small. For the largest inner core radius that we have considered, ˜ε - g differs from ˜ε +g differs from ˜ε m only by approximately 0.001 arcmin. 3.3 Viscous coupling We now investigate how viscous coupling at the CMB and ICB affects the equilibrium Cassini @@ -1968,76 +1971,76 @@ complete analytical solutions for the flow resulting from a differentially prece been derived [e.g. Stewartson and Roberts , 1963; Busse , 1968; Rochester , 1976] and we exploit these solutions here. The parametrization of the viscous coupling constants K cmb and K - icb based +icb based on them are given in Mathews and Guo [2005], K cmb = πρ -f r 4 +f r4 f ¯ A - f +f ν 2Ω - o - 0.195 − 1. 976 i +o +0.195 − 1.976i , (44a) K icb = πρ -f r 4 +f r4 s ¯ A - s +s ν 2Ω - o - 0. 195 − 1 .976 i +o +0.195 − 1.976i , (44b) where ν is the kinematic viscosity. The appropriate numerical value for ν in planetary interior is not well known but based on theoretical and experimental studies it is expected to be -of the order of 10 −6 - m 2 - s −1 - [e.g. Gans , 1972; de Wijs et al. , 1998; Alf`e et al., 2000; Rutter et al., +of the order of 10−6 + m2 + s−1 + [e.g. Gans , 1972; de Wijs et al., 1998; Alf`e et al., 2000; Rutter et al., 2002a,b]. –20– Confidential manuscript submitted to JGR-Planets The above parameterizations are valid only under the assumption that the flow in the boundary layer remains laminar. Whether this is reasonable can be assessed by evaluating the Reynolds number Re = r -f ∆ u -f /ν , associated with the differential velocity ∆ u +f ∆u +f /ν , associated with the differential velocity ∆u f = r f Ω - o ˜m +o ˜m f at the CMB. For r f = 2000 km, and taking ˜m -f = 4 arcmin ≈ 0. 001 rad from the results in the previous -section, we get ∆ u +f = 4 arcmin ≈ 0.001 rad from the results in the previous +section, we get ∆u f ∼ 2 mm/s and Re ∼ 6 × 109 - . Such a large Reynolds number indicates +. Such a large Reynolds number indicates that the viscous friction between the fluid core and mantle should induce turbulent flows, as -is the case for the Cassini state of the Moon [ Yoder , 1981; Wil liams et al. , 2001; C´ebron et al. , +is the case for the Cassini state of the Moon [Yoder , 1981; Wil liams et al., 2001; C´ebron et al., 2019]. For a boundary layer that involves turbulent flows, the viscous torque should be independent of the fluid viscosity and proportional to the square of the differential velocity. The coupling constant K cmb should be in the form K - cmb = f +cmb = f cmb ˜m f - 0.195 − 1. 976 i +0.195 − 1.976i , (45) where f - cmb is a numerical factor that depends among other things on surface roughness. Incorporating +cmb is a numerical factor that depends among other things on surface roughness. Incorporating a viscous coupling of this form in our rotational model is more challenging not only because f - cmb is not known but also because the viscous torque is no longer linear in ˜m +cmb is not known but also because the viscous torque is no longer linear in ˜m f . One strategy is to find solutions through an iterative process. The simpler alternative strategy that we adopt is to use the laminar formulas of Equation (44) but with the understanding that ν @@ -2045,87 +2048,87 @@ represents an effective turbulent viscosity. To give an estimate of an appropriate turbulent value for ν , we turn to the Cassini state of the Moon. A measure of the viscous dissipation at the CMB of the Moon has been obtained by fitting a rotation model to the librations of the Moon observed by Lunar Laser Ranging (LLR) -[ Wil liams et al. , 2001, 2014; Wil liams and Boggs , 2015]. Viscous dissipation is reported in terms -of a coupling parameter K and a recent estimate is K /C - L = (1.41 ± 0.34)× 10 −8 - day − 1 - [ Wil liams +[Wil liams et al., 2001, 2014; Wil liams and Boggs , 2015]. Viscous dissipation is reported in terms +of a coupling parameter K and a recent estimate is K/C +L = (1.41±0.34)×10−8 + day−1 + [Wil liams and Boggs , 2015], where C - L is the lunar polar moment of inertia. The connection between K +L is the lunar polar moment of inertia. The connection between K and K - cmb is +cmb is - I m [K - cmb ] +I m[K +cmb] = K C L C - L +L C f L 1 Ω - L , (46) +L , (46) where C - f L is the moment of inertia of the lunar core and Ω - L = 2. 66 × 10 − 6 - s −1 +f L is the moment of inertia of the lunar core and Ω +L = 2.66 × 10−6 + s−1 the lunar rotation rate. With C - f L /C - L ∼ 7 × 10− 4 - [e.g. Wil liams et al. , 2014], this gives |I m [K -cmb ]| ∼ -9 × 10− 5 - . In order to match this amplitude in Equation (44a), with lunar parameters and assuming - a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10− 4 - m 2 -s − 1 - , about 500 times larger than the laminar viscosity. Note that the differential velocity at the -CMB of the Moon is closer to 3 cm/s [ Yoder , 1981; Wil liams et al. , 2001], more than 10 times +f L/C +L ∼ 7 × 10−4 + [e.g. Wil liams et al., 2014], this gives |I m[K +cmb]| ∼ +9×10−5 +. In order to match this amplitude in Equation (44a), with lunar parameters and assuming + a lunar core radius of 400 km, the required turbulent viscosity is ν ≈ 5 × 10−4 + m2 +s−1 +, about 500 times larger than the laminar viscosity. Note that the differential velocity at the +CMB of the Moon is closer to 3 cm/s [Yoder , 1981; Wil liams et al., 2001], more than 10 times larger than our estimate for Mercury above. Since the effective turbulent coupling constant K cmb is proportional to the differential velocity, the effective turbulent viscosity appropriate for Mercury - should be smaller. Thus, ν ≈ 5 × 10− 4 - m 2 - s− 1 + should be smaller. Thus, ν ≈ 5×10−4 + m2 + s−1 gives a conservative upper bound for the possible effective turbulent viscosity that can be expected for Mercury. Figure 5 shows how ˜ε - m , ˜m +m, ˜m f and ˜n - s vary as functions of inner core radius for different choices -of effective viscosities. For ν = 10 − 5 - m 2 - s − 1 - , viscous coupling is too weak to affect ˜ε +s vary as functions of inner core radius for different choices +of effective viscosities. For ν = 10−5 + m2 + s−1 +, viscous coupling is too weak to affect ˜ε m and ˜m - f and they are essentially unchanged from the solutions shown in Figure 4. With increasing +f and they are essentially unchanged from the solutions shown in Figure 4. With increasing ν , the stronger viscous coupling between the core and the mantle reduces their differential velocity, and ˜m f is reduced. With the reduced differential velocity at the CMB, the prediction of ˜ε m gets closer to 2.04 arcmin, the obliquity expected for a rigid planet. Although our CMB viscous coupling model is different than the one used by Peale et al. [2014], our results for ˜ε - m +m and ˜m - f are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the +f are qualitatively similar: viscous coupling at the CMB acts to reduce the offset of the fluid spin axis from the mantle symmetry axis. Considering the upper bound in turbulent viscosity - that we have identified above (i.e ν ≈ 5 × 10 − 4 - m 2 - s − 1 - ), the influence of viscous cou–21– + that we have identified above (i.e ν ≈ 5 × 10−4 + m2 + s−1 +), the influence of viscous cou–21– Confidential manuscript submitted to JGR-Planets ε - mε - g +mε +g m - f +f n - s +s 2.0382.0402.0422.0442.0462.0482.050 Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 @@ -2133,63 +2136,64 @@ Inner core radius (km) 0.00.51.01.52.02.53.03.54.04.5 Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 Inner core radius (km)kinematic viscosity: - 0.01 m 2 - s -1 - 0.00001 m 2 - s -1 -0.0001 m 2 - s -1 -0.0005 m 2 - s -1 -0.001 m 2 - s -1 -a b + 0.01 m2 + s-1 + 0.00001 m2 + s-1 +0.0001 m2 + s-1 +0.0005 m2 + s-1 +0.001 m2 + s-1 +a b for a rigid planet ε - m Figure 5. a) Obliquity of the mantle ( ˜ε - m , solid lines) and gravity field ( ˜ε - g , dashed lines) b) ˜m +m +Figure 5. a) Obliquity of the mantle ( ˜ε +m, solid lines) and gravity field ( ˜ε +g , dashed lines) b) ˜m f (solid lines) and ˜n s (dashed lines) as a function of inner core radius and for different choices of kinematic viscosity (color in legend). pling on ˜ε - m remains modest, reducing its amplitude by a maximum of approximately 0.0015 +m remains modest, reducing its amplitude by a maximum of approximately 0.0015 arcmin. The inclusion of viscous coupling at the ICB can lead to a substantial change in inner core tilt. A larger viscosity leads to stronger viscous coupling and to a closer alignment of the inner core with the fluid core spin axis. The viscous coupling strength is inversely proportional to r -s , so a larger viscosity results in a larger inner core radius at which viscous coupling is of -a similar magnitude to gravitational coupling. Taking again an upper bound of ν = 5× 10− 4 -m 2 - s −1 - , Figure 5 indicates that ˜n - s may be 1 arcmin or larger only if the inner core radius is +s, so a larger viscosity results in a larger inner core radius at which viscous coupling is of +a similar magnitude to gravitational coupling. Taking again an upper bound of ν = 5×10−4 +m2 + s−1 +, Figure 5 indicates that ˜n +s may be 1 arcmin or larger only if the inner core radius is smaller than approximately 100 km. For an inner core of a few hundred km in radius, gravitational coupling is much larger than viscous coupling, and the inner core tilt is limited to a fraction of 1 arcmin. The larger inner core tilt observed with increasing effective viscosity results in a larger offset between the obliquity of the principal moment of inertia ˜ε - g and that of the mantle ˜ε - m , -though it remains limited. For the upper bound of ν = 5 × 10 − 4 - m 2 - s − 1 - , and for r +g and that of the mantle ˜ε +m, +though it remains limited. For the upper bound of ν = 5 × 10−4 + m2 + s−1 +, and for r s = 1500 km, the difference between ˜ε g and ˜ε - m is limited to 0.0013 arcmin. +m is limited to 0.0013 arcmin. The conclusion that emerges from Figure 5 is that the larger the inner core is, the smaller the misalignments of both the fluid core and inner core are with respect to the mantle. This implies that the larger the inner core is, the more we approach a planet precessing as a rigid body, although the misalignment of the spin axis of the fluid core remains important, approximately 3-4 arcmin away from the mantle symmetry axis. The specific way in which ˜ε - m , ˜m - f +m, ˜m +f and ˜n - s change with inner core size would certainly be different for a turbulent model of viscous +s change with inner core size would certainly be different for a turbulent model of viscous coupling. But the general conclusion remains that the addition of viscous coupling at the CMB and ICB does not significantly modify the Cassini state equilibrium angle of the mantle. –22– @@ -2204,27 +2208,27 @@ This induces a secondary magnetic field (or equivalently, an electrical current) acts then in a similar way to viscous coupling, and this ’magnetic friction’ depends on the strength of the radial magnetic field B r and the electrical conductivity σ on either side of the boundary - [ Rochester , 1960, 1962, 1968]. + [Rochester , 1960, 1962, 1968]. The parametrization of EM coupling in terms of the coupling constants K cmb and K icb has been developed in a few studies [e.g. Buffett , 1992; Buffett et al., 2002; Dumberry and Koot , 2012]. Assuming a dominating axial dipole field, with a radial component at the CMB given by B - r = √ - 3 - B d +r = √ +3 +B d r - cos θ , where + cos θ, where B d r is the r.m.s. strength of the field, the coupling constant K - cmb can be written is the form +cmb can be written is the form K cmb = 3(1 − i)F - cmb - B d +cmb +Bd r 2 , (47) @@ -2232,108 +2236,108 @@ where F cmb = 1 Ω - o ρ - f r +oρ +f r f 1 σ - m δ - m + 1 +mδ +m + 1 σ - f δ - f - −1 +f δ +f +−1 , (48) and where σ - m , δ - m = - 2/ (σ - m µΩ - o ) and σ +m, δ +m = +2/(σ +mµΩ +o) and σ f , δ - f = - 2/( σ +f = +2/(σ f µΩ - o ) are the electrical conductivities - and magnetic skin depths in the mantle and fluid core, respectively, with µ = 4π × 10 − 7 -N A − 2 +o) are the electrical conductivities + and magnetic skin depths in the mantle and fluid core, respectively, with µ = 4π × 10−7 +N A−2 the magnetic permeability of free space. The r.m.s. field strength - B d +Bd r is connected to -the Gauss coefficient g 0 +the Gauss coefficient g0 1 of the surface magnetic field by - B d +Bd r = 2 √ - 3 +3 R r f - 3 +3 - g 0 +g0 1 . (49) We can readily build an estimate of the amplitude of K -cmb . The electrical conductivity +cmb. The electrical conductivity of common mantle minerals in Earth’s mantle at the pressure and temperature corresponding to the CMB of Mercury is in the range of σ - m ∼ 0 .01 − 1 S m−1 +m ∼ 0.01 − 1 S m−1 [Constable , 2015]. In contrast, the electrical conductivity of Fe in planetary cores is expected to be close σ - f ∼ 10 6 +f ∼ 106 S -m −1 - [Pozzo et al., 2012; de Koker et al. , 2012]. This implies that (σ - m δ - m )− 1 +m−1 + [Pozzo et al., 2012; de Koker et al., 2012]. This implies that (σ +mδ +m)−1 (σ - f δ -f )− 1 - . Taking +f δ +f )−1 +. Taking σ - m = 1 S m −1 - , +m = 1 S m−1 +, - g 0 +g0 1 - = 190 nT for Mercury’s dipole field [ Anderson et al., 2012], r + = 190 nT for Mercury’s dipole field [Anderson et al., 2012], r f = 2000 km, ρ - f = 7000 kg m − 3 - , this gives K -cmb ≈ (3 .1 × 10 − 11 - ) · (1 − i). To put this amplitude +f = 7000 kg m−3 +, this gives K +cmb ≈ (3.1 × 10−11 +) · (1 − i). To put this amplitude in perspective, taking a molecular viscosity of ν = 10−6 - m 2 - s − 1 + m2 + s−1 in Equation (44a) gives a viscous coupling constant of K -cmb ≈ (6 .0 × 10 − 7 - ) · (0 .195 − 1. 976 i). Hence, EM coupling at the +cmb ≈ (6.0 × 10−7 +) · (0.195 − 1.976i). Hence, EM coupling at the CMB is much weaker than viscous coupling, even if we include other spherical harmonic components of the radial magnetic field. EM coupling can be enhanced if strongly stratified pockets of core fluid are trapped by CMB cavities [Buffett , 2010; Glane and Buffett , 2018], in which case the effective σ - m could be +m could be closer to σ - f . Likewise, σ +f . Likewise, σ m can be increased if a more electrically conducting layer has formed at the bottom of Mercury’s mantle, for instance by the upward sedimentation and compaction of solid FeS crystals precipitating out of the fluid core [e.g. Hauck et al., 2013]. However, even in the extreme case of σ - m = σ - f = 10 6 - S m − 1 - , K -cmb ≈ (1. 6 × 10 − 8 - ) · (1 − i ), which remains +m = σ +f = 106 + S m−1 +, K +cmb ≈ (1.6 × 10−8 +) · (1 − i), which remains –23– Confidential manuscript submitted to JGR-Planets smaller by a factor ∼ 60 than the smallest possible viscous coupling constant. Viscous forces @@ -2343,83 +2347,83 @@ and fluid core to be similar, and because the radial magnetic field is likely mu coupling can be much larger and dominate viscous coupling. We assume that the magnetic field morphology at the ICB is dominantly comprised of small spatial scales for example as predicted by the dynamo model of Christensen [2006]. EM coupling in this case can be parametrized in -terms of an equivalent uniform radial magnetic field B -r capturing its r.m.s. strength [ Buffett +terms of an equivalent uniform radial magnetic field B +r capturing its r.m.s. strength [Buffett et al., 2002; Dumberry and Koot , 2012]. Assuming an electrical conductivity σ equal in the fluid and solid core, the coupling constant K icb can be written in the form K icb = 5 -4 (1 − i )F - icb B - r 2 +4 (1 − i)F +icb B +r 2 , (50) where F - icb = σδ +icb = σδ Ω - o ρ - s r +oρ +sr s , (51) and where δ = - 2/ (σµΩ - o ) is the magnetic skin depth. As F +2/(σµΩ +o) is the magnetic skin depth. As F icb is inversely proportional to r -s , K +s, K icb is inversely proportional to inner core size. Note that computing the EM coupling based on the r.m.s. strength B - r rather than a true field morphology tends to overestimate the strength +r rather than a true field morphology tends to overestimate the strength of the coupling [Koot and Dumberry , 2013]. However, since the strength of the radial magnetic field at the ICB of Mercury is largely unknown, imperfections of the EM coupling model are absorbed in the range of possible B - r values. +r values. The parametrization of Equation (50) is only valid in a ’weak field’ regime [Buffett et al., 2002], when the feedback from the Lorentz force on the flow in the fluid core can be neglected. When B - r is sufficiently large, this is no longer the case. EM coupling then enters a ’strong +r is sufficiently large, this is no longer the case. EM coupling then enters a ’strong field’ regime [Buffett et al., 2002; Dumberry and Koot , 2012; Koot and Dumberry , 2013] in which K - icb increases linearly with B - r instead of quadratically. A good approximation of K - icb calculated +icb increases linearly with B +r instead of quadratically. A good approximation of K +icb calculated for Earth can be extracted from Figure 6a of Dumberry and Koot [2012], K E -icb = (0.175 − i0. 138) B - r , (52) +icb = (0.175 − i0.138) B +r , (52) where B - r is in units of Tesla. The superscript E emphasizes that the numerical factors are +r is in units of Tesla. The superscript E emphasizes that the numerical factors are appropriate for the parameter values adopted for Earth in the computation of Dumberry and Koot [2012]. To adapt these numerical factors to Mercury, we write, K - icb = (0.175 − i0. 138) F - icb +icb = (0.175 − i0.138) F +icb F E icb B - r , (53) +r , (53) where F E icb is defined as in Equation (51) but using the parameters for Earth as defined in Dumberry and Koot [2012]. These are Ω - o = 7. 292 × 10− 5 - s − 1 - , ρ - s = 12846 kg m− 3 - , r -s = 1221. 5 -km, σ = 5 × 10 5 - S m − 1 - , which gives F E -icb = 90.36 T − 2 - . +o = 7.292 × 10−5 + s−1 +, ρ +s = 12846 kg m−3 +, r +s = 1221.5 +km, σ = 5 × 105 + S m−1 +, which gives F E +icb = 90.36 T−2 +. To compute F - icb , we assume an electrical conductivity of σ = 106 - S m − 1 +icb, we assume an electrical conductivity of σ = 106 + S m−1 in the core of -Mercury [e.g. de Koker et al. , 2012; Deng et al., 2013]. The transition between the weak and +Mercury [e.g. de Koker et al., 2012; Deng et al., 2013]. The transition between the weak and strong field regime occurs when B - r ≈ 1. 53 mT for the real part of K - icb . B - r at the ICB +r ≈ 1.53 mT for the real part of K +icb. B +r at the ICB of Mercury is unknown. The dynamo model of Christensen [2006] showed that the field geometry inside the core could be dominated by small length scales, yet only the weaker lower harmonics of the field would penetrate through a thermally stratified layer in the upper region of @@ -2428,37 +2432,37 @@ Confidential manuscript submitted to JGR-Planets the fluid core and reach the surface. If so, the field strength inside the core can exceed the surface field strength by a factor 1000. Taking a surface field strength equal to ∼ 300 nT [e.g Anderson et al., 2012], B - r at the ICB could be as large as 0.3 mT, corresponding to approximately +r at the ICB could be as large as 0.3 mT, corresponding to approximately 10% of the field strength within Earth’s core. Given that it is perhaps unlikely that Mercury’s field can be as high as that in Earth’s core, in all likelihood EM coupling at the ICB of Mercury remains in the weak field regime. Figure 6 shows how ˜ε - m , ˜m +m, ˜m f and ˜n - s vary as functions of inner core radius for different choices +s vary as functions of inner core radius for different choices of B - r . The larger B - r is, the stronger is the EM coupling at the ICB, and the smaller is the +r . The larger B +r is, the stronger is the EM coupling at the ICB, and the smaller is the differential rotation between the fluid core and inner core. The inner core and fluid core are virtually - locked into a common precession motion when B -r > 0. 3 mT. Further increasing B - r + locked into a common precession motion when B +r > 0.3 mT. Further increasing B +r above 1 mT does not change the solution as EM coupling already dominates all other torques on the inner core. This is the case even when EM coupling transitions into the strong field regime. EM coupling at the CMB is included in these calculations, with σ - m = 1 S m−1 +m = 1 S m−1 and - g 0 +g0 1 = 190 nT, but remains much weaker than the inertial torque at the CMB, so for a small inner core we retrieved the solutions of ˜ε - m and ˜m - f shown in Figure 4. +m and ˜m +f shown in Figure 4. As the inner core radius is increased, both ˜ε - m and ˜m +m and ˜m f get smaller, as it was the case with viscous coupling alone, although the addition of EM coupling lead to more substantial changes. The inner core needs to be larger than approximately 500 km for changes in the Cassini state @@ -2468,79 +2472,79 @@ EM coupling at the CMB, but rather from the combination of EM coupling at the IC pulls the fluid core towards an alignment with the inner core, and gravitational coupling on the inner core, which pulls the latter to align with the mantle. The larger the EM coupling is, the greater is the reduction in ˜ε - m and ˜m - f . +m and ˜m +f . When the EM coupling at the ICB is sufficiently strong that the fluid and solid cores are locked into a common precession motion, a good approximation of ˜ε - m is given by the same prediction +m is given by the same prediction as Equations (39-40) involving the effective moment of inertia C - , except χ is now given +, except χ is now given by χ = ¯ A - c Ω - p cos I − ¯ +cΩ +p cos I − ¯ A - s Ω - o α -3 φ - s +sΩ +oα +3φ +s ¯ A - f Ω - o ( e - f + K -cmb ) + ¯ -A - s Ω - o e - s α -3 α +f Ω +o(e +f + K +cmb) + ¯ +A +sΩ +oe +sα +3α g − ¯ A - c Ω - p cos I . (54) +cΩ +p cos I . (54) For a small inner core, ¯ A - c Ω - p cos I > ¯ +cΩ +p cos I > ¯ A - s Ω - o α -3 φ - s and χ is positive. Because ¯ +sΩ +oα +3φ +s and χ is positive. Because ¯ A - s Ω - o α -3 φ - s increases +sΩ +oα +3φ +s increases with inner core size, χ gets smaller, and so do C and ˜ε -m . The mantle obliquity drops from 2.049 +m. The mantle obliquity drops from 2.049 arcmin for a small inner core to 2.034 arcmin for an inner core of 1500 km, a reduction of 0.015 arcmin. For an inner core larger than ≈ 1000 km, ¯ A - c Ω - p cos I < ¯ +cΩ +p cos I < ¯ A - s Ω - o α -3 φ - s , so χ becomes negative, +sΩ +oα +3φ +s, so χ becomes negative, C becomes smaller than the moment of inertia of a rigid Mercury C , and ˜ε - m becomes +m becomes smaller than the prediction based on a rigid planet. The larger the inner core is, the smaller are the misalignments of the fluid and solid cores with respect to the mantle. Hence, the general conclusion we reached for viscous coupling alone is not altered with the addition of EM coupling but further strengthened; the larger the inner core is, the closer we approach a planet precessing as a rigid body. This is best revealed by the obliquity of the gravity field ˜ε - g which, for a large inner core, asymptotically approaches the obliquity +g which, for a large inner core, asymptotically approaches the obliquity expected for a rigid planet. Note that with strong EM coupling at the ICB, the offset between ˜ε - m and ˜ε - g can be as large as 0.008 arcmin for a large inner core. +m and ˜ε +g can be as large as 0.008 arcmin for a large inner core. 3.5 Fixed inner core density versus fixed ICB density contrast Coupling models when viscous and EM stresses are both present have been presented in Mathews and Guo [2005] and Deleplace and Cardin [2006]. However, in the light of our results, @@ -2553,21 +2557,22 @@ Inner core radius (km) 0.00.51.01.52.02.53.03.54.04.5 Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 Inner core radius (km)B - r at ICB: +r at ICB: 1 mT 0.01 mT0.03 mT0.1 mT0.3 mT ε - m +m ε - g +g m - f +f n - sa b +sa b for a rigid planet ε - m Figure 6. a) Obliquity of the mantle ( ˜ε - m , solid lines) and gravity field ( ˜ε - g , dashed lines) b) ˜m +m +Figure 6. a) Obliquity of the mantle ( ˜ε +m, solid lines) and gravity field ( ˜ε +g , dashed lines) b) ˜m f (solid lines) and ˜n s (dashed lines) as a function of inner core radius and for different choices of B @@ -2576,36 +2581,36 @@ r for the Cassini state equilibrium of Mercury, the tangential stress at the CMB is dominated by viscous forces, and that at the ICB should be dominated by EM forces. To simplify, we consider a model where K - cmb is purely from viscous coupling and K +cmb is purely from viscous coupling and K icb purely from EM coupling. -We choose an effective viscosity at the CMB of ν = 10 −4 - m 2 - s −1 - , which we believe to be a +We choose an effective viscosity at the CMB of ν = 10−4 + m2 + s−1 +, which we believe to be a representative value given the comparison with the Moon (see section 3.3). We take a radial field strength at the ICB of B - r = 0.3 mT, approximately the field strength expected under +r = 0.3 mT, approximately the field strength expected under the dynamo scenario of Christensen [2006]. We adopt these values as those of a ‘representative’ - coupling model, although the uncertainty on ν and B + coupling model, although the uncertainty on ν and B r obviously remains high. Figure 7 shows how ˜ε - m , ˜m +m, ˜m f and ˜n - s vary with inner core radius for the ’representative’ +s vary with inner core radius for the ’representative’ coupling model (black lines) under the fixed inner core density scenario that we have used in sections 3.2, 3.3 and 3.4. Figure 7 also shows how the results change when, for the same representative coupling model, we adopt instead a fixed density contrast between the fluid and solid cores and for different choices of α 3 (coloured lines). For a relatively high density contrast (α - 3 = -0 .2), the results are qualitatively similar to the fixed inner core density scenario. For a smaller +3 = +0.2), the results are qualitatively similar to the fixed inner core density scenario. For a smaller α -3 , the point at which the orientation of the co-precessing fluid and inner cores begins to be +3, the point at which the orientation of the co-precessing fluid and inner cores begins to be pulled into an alignment with the mantle is pushed to a larger inner core radius. However, the general behaviour of ˜ε -m , ˜m - f and ˜n - s as functions of inner core radius is unchanged. Hence, all +m, ˜m +f and ˜n +s as functions of inner core radius is unchanged. Hence, all our results in the previous three sections would be qualitatively similar under a fixed density contrast scenario. A smaller density contrast at the ICB only implies that a larger inner core is required in order to produce an equivalent change in the Cassini state equilibrium. @@ -2624,40 +2629,41 @@ Obliquity angle (arcmin) 0 200 400 600 800 1000 1200 1400 Inner core radius (km) for a rigid planet ε - ma bα - 3 : +ma bα +3: 0.20 0.010.05 0.100.15ρ - s = 8800 kg m -3 +s = 8800 kg m-3 m - f +f n - sε - m +sε +m ε - gFigure 7. a) Obliquity of the mantle ( ˜ε - m , solid lines) and gravity field ( ˜ε - g , dashed lines) b) ˜m +g +Figure 7. a) Obliquity of the mantle ( ˜ε +m, solid lines) and gravity field ( ˜ε +g , dashed lines) b) ˜m f (solid lines) and ˜n s (dashed lines) as a function of inner core radius, for a fixed inner core density of -8800 kg m −3 +8800 kg m−3 (black lines) and for different choices of α 3 (coloured lines). i -m , i +m, i f and i -s ; these represent the obliquities with respect to the orbital plane and are connected +s; these represent the obliquities with respect to the orbital plane and are connected to our variables by: i m = ˜ε - m , i +m, i f = ˜ε - m + ˜m + ˜m - f ≈ ˜ε - m + ˜m +m + ˜m + ˜m +f ≈ ˜ε +m + ˜m f and i s = ˜ε m + ˜n - s . To summarize +s. To summarize their results, i f and i s vary substantially for different inner core sizes, are always of comparable @@ -2670,7 +2676,7 @@ from its expected orientation based of a rigid planet (see their Figure 6). The m they obtain between a case with no inner core and an inner core radius equal to 0.6 times the planetary radius (≈ 1463 km, close to the maximum inner core size of 1500 km we have considered), -is approximately an increase of 5 × 10 −5 +is approximately an increase of 5 × 10−5 rad = 0.17 arcmin. This also corresponds approximately to the deviation of the obliquity with respect to that of a rigid planet. When only viscous stress is included in our model (section 3.3), our results are substantially @@ -2710,17 +2716,17 @@ amplitude of the decrease can be as large as 0.015 arcmin, 3 times larger than f and again, importantly, in the reverse direction. Our results suggest then that the presence and size of an inner core leads to only modest changes of the mantle obliquity ε - m compared to the obliquity predicted on the basis of an -entirely rigid planet ( ε r -m ). Let us denote this difference as ∆ε - m = ε - m −ε r -m . The largest ∆ ε - m -occurs for a small or no inner core, and is ∆ ε +m compared to the obliquity predicted on the basis of an +entirely rigid planet (εr +m). Let us denote this difference as ∆ε +m = ε +m−εr +m. The largest ∆ε +m +occurs for a small or no inner core, and is ∆ε m ≈ 0.01 arcmin. This difference is decreased as the inner core size is increased. For a sufficiently large inner core, in the case of a strong EM -coupling and large density contrast at the ICB, ∆ ε +coupling and large density contrast at the ICB, ∆ε m can be negative, but its absolute value remains smaller than 0.01 arcmin. To put these results in perspective, the uncertainty in the measurement of the mantle obliquity @@ -2732,61 +2738,61 @@ planet. But it also implies that the observed obliquity cannot be used to place the inner core size. Nevertheless, our results show that the presence of a fluid core and inner core affect the resulting mantle obliquity by as much as 0.01 arcmin. This is of the same order as the change -in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec ( ≈ 0 .006 -arcmin) [ Baland et al. , 2017]. This is also of the same order as the amplitude of the nutation +in obliquity caused by elastic tidal deformation, which is of the order of 0.35 arcsec (≈ 0.006 +arcmin) [Baland et al., 2017]. This is also of the same order as the amplitude of the nutation motion about the mean equilibrium Cassini state forced by the precession of the pericenter, which -is approximately 0.85 arcsec (≈ 0.014 arcmin) [Baland et al. , 2017]. The precision on the obliquity - from the upcoming BepiColombo satellite mission is expected to be ≤ 0 .5 arcsec (≤ 0 .008 -arcmin) [ Cical`o et al. , 2016]. Thus, in addition to including tidal deformation and the precession +is approximately 0.85 arcsec (≈ 0.014 arcmin) [Baland et al., 2017]. The precision on the obliquity + from the upcoming BepiColombo satellite mission is expected to be ≤ 0.5 arcsec (≤ 0.008 +arcmin) [Cical`o et al., 2016]. Thus, in addition to including tidal deformation and the precession of the pericenter, a Cassini state model that includes a fluid and solid core will then be necessary in order to properly tie Mercury’s obliquity to its interior structure. In turn, this opens the possibility of further constraining the interior structure of Mercury on the basis of its obliquity. Obliquity measurements based on tracking topographic features reflect the orientation of -the spin-symmetry axis of the mantle ( ε - m ). Measurements based on tracking the gravity field +the spin-symmetry axis of the mantle (ε +m). Measurements based on tracking the gravity field of Mercury reflect instead the orientation of the principal moment of the whole planet (ε - g ). These +g ). These two orientations do not coincide when an inner core is present and is misaligned from the mantle. Since gravitational coupling prevents a large inner core tilt with respect to the mantle, we –28– Confidential manuscript submitted to JGR-Planets -find that the misalignment ∆ ε +find that the misalignment ∆ε g = ε - g − ε - m is limited. The maximum offset that we obtain +g − ε +m is limited. The maximum offset that we obtain is approximately ∆ε - g ≈ 0 .007 arcmin. This limited magnitude of offset is important in the +g ≈ 0.007 arcmin. This limited magnitude of offset is important in the light of the recent obliquity of the gravity field estimated in Genova et al. [2019], ε - g = 1.968 ± -0 .027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the +g = 1.968± +0.027 arcmin. This is substantially smaller than the two mesurements of the obliquity of the spin-symmetry axis of the mantle: ε -m = 2 . 04 ± 0.08 arcmin [Margot et al. , 2012] and ε - m = -2 .029 ± 0. 085 arcmin [Stark et al., 2015a], although all three measurements remain consistent +m = 2.04 ± 0.08 arcmin [Margot et al., 2012] and ε +m = +2.029±0.085 arcmin [Stark et al., 2015a], although all three measurements remain consistent with one another within their error estimates. In their interpretation, Genova et al. [2019] suggest - that the different central value of the obliquity that they obtain (smaller by ∼ 0 .07 arcmin) - is perhaps explained by an offset ∆ ε - g due to the presence of a (possibly large) solid inner + that the different central value of the obliquity that they obtain (smaller by ∼ 0.07 arcmin) + is perhaps explained by an offset ∆ε +g due to the presence of a (possibly large) solid inner core. However, this is one order of magnitude larger than the maximum magnitude of ∆ε g that we predict. Moreover, we predict that the obliquity of the gravity field should be larger than that of the mantle spin axis, not smaller. Hence, at the present-day level of the precision of the measurements, ε - g and ε - m should coincide, and their difference cannot be interpreted as +g and ε +m should coincide, and their difference cannot be interpreted as reflecting the misalignment between the polar moment of inertia of the whole planet and the mantle spin axis. Lastly, we have concentrated our efforts on the mutual orientations of the different spin and symmetry axes in the Cassini plane. Dissipation at the CMB and ICB introduced by viscous and EM coupling also lead to a displacement of these axes in the direction perpendicular - to the Cassini plane [e.g Peale et al. , 2014]. Indeed, the two measurements based on tracking + to the Cassini plane [e.g Peale et al., 2014]. Indeed, the two measurements based on tracking surface topographic features from Margot et al. [2012] and Stark et al. [2015a] suggest that -the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼ 0. 03 arcmin). +the mantle spin axis lags behind the Cassini plane by approximately 2 arcsec (∼ 0.03 arcmin). Although this offset is smaller than the measurement errors, so that the observed obliquity is still consistent with no deviation away from the Cassini plane, some amount of dissipation invariably takes place. These measurements give then a measure of the possible amplitude of the -dissipation. One source of dissipation is from anelastic tidal deformation [ Baland et al., 2017], +dissipation. One source of dissipation is from anelastic tidal deformation [Baland et al., 2017], but viscous and EM coupling at the boundaries of the fluid core is another. Hence, the out-ofplane component of the observed obliquity may further help to quantify and constrain the interior coupling mechanisms. This will be the sub ject of a future study. @@ -2812,7 +2818,7 @@ Mercury’s interior structure. –29– Confidential manuscript submitted to JGR-Planets Acknowledgments -Figures were created using the GMT software [ Wessel et al. , 2013]. The source codes, GMT +Figures were created using the GMT software [Wessel et al., 2013]. The source codes, GMT scripts and data files to reproduce all figures are freely accessible in Dumberry [2020]. This work was supported by an NSERC/CRSNG Discovery Grant. References @@ -2820,184 +2826,184 @@ Alf`e, D., G. Kresse, and M. Gillan (2000), Structure and dynamics of liquid iro conditions, Phys. Rev., B61, 132–142. Anderson, B. J., C. L. Johnson, H. Korth, M. E. Purucker, R. M. Winslow, J. A. Slavin, S. C. Solomon, R. L. McNutt, M. Raines, Jim, and T. H. Zurbuchen (2011), The global -magnetic field of Mercury from MESSENGER orbital observations, Science, 333 , 1859– +magnetic field of Mercury from MESSENGER orbital observations, Science, 333, 1859– 1862. Anderson, B. J., C. L. Johnson, H. Korth, R. M. Winslow, J. E. Borovsky, M. E. Purucker, J. A. Slavin, S. C. Solomon, M. T. Zuber, and R. L. McNutt (2012), Lowdegree - structure in mercury’s planetary magnetic field, J. Geophys. Res. , 117 , E00L12, + structure in mercury’s planetary magnetic field, J. Geophys. Res., 117, E00L12, doi:10.1029/2012JE004159. Baland, R.-M., A. Yseboodt, M. Rivoldini, and T. Van Hoolst (2017), Obliquity of Mercury: - Influence of the precession of the pericenter and of tides, Icarus, 291 , 136–159. + Influence of the precession of the pericenter and of tides, Icarus, 291, 136–159. Baland, R.-M., A. Coyette, and T. Van Hoolst (2019), Coupling between the spin precession and polar motion of a synchronously rotating satellite: application to Titan, Celestial Mechanics and Dynamical Astronomy, 131 (11), 1–50. Buffett, B. A. (1992), Constraints on magnetic energy and mantle conductivity from the -forced nutations of the Earth, J. Geophys. Res. , 97 , 19,581–19,597. +forced nutations of the Earth, J. Geophys. Res., 97, 19,581–19,597. Buffett, B. A. (2010), Chemical stratification at the top of earth’s core: Constraints from -observations of nutations, Earth Planet. Sci. Lett. , 296 , 367–372. +observations of nutations, Earth Planet. Sci. Lett., 296, 367–372. Buffett, B. A., P. M. Mathews, and T. A. Herring (2002), Modeling of nutation-precession: -effects of electromagnetic coupling, J. Geophys. Res. , 107 , doi:10.1029/2001JB000056. -Busse, F. H. (1968), Steady fluid flow in a precessing spheroidal shell, J. Fluid Mech. , 33 , +effects of electromagnetic coupling, J. Geophys. Res., 107, doi:10.1029/2001JB000056. +Busse, F. H. (1968), Steady fluid flow in a precessing spheroidal shell, J. Fluid Mech., 33, 739–751. Byrne, P. K., C. Klimczak, A. M. C. Seng¨or, S. C. Solomon, T. R. Watters, and S. A. Hauck (2014), Mercury’s global contraction much greater than earlier estimates, Nature -Geosci., 7 , 301–307. +Geosci., 7, 301–307. C´ebron, D., R. Laguerre, J. Noir, and N. Schaeffer (2019), Precessing spherical shells: -flows, dissipation, dynamo and the lunar core, Geophys. J. Int. , 219 (Supplement +flows, dissipation, dynamo and the lunar core, Geophys. J. Int., 219 (Supplement 1), S34–S57, doi:10.1093/gji/ggz037. Christensen, U. R. (2006), A deep dynamo generating Mercury’s magnetic field, Nature, -444 , 1056–1058. +444, 1056–1058. Cical`o, S., G. Schettino, S. Di Ruzza, E. M. Alessi, G. Tommei, and A. Milani (2016), The BepiColombo MORE gravimetry and rotation experiments with the ORBIT14 software, -Month. N. Roy. Astr. Soc., 457 , 1507–1521. -Colombo, G. (1966), Cassini’s second and third laws, Astron. J., 71 , 891–896. +Month. N. Roy. Astr. Soc., 457, 1507–1521. +Colombo, G. (1966), Cassini’s second and third laws, Astron. J., 71, 891–896. Constable, S. (2015), Geomagnetic induction studies, in Treatise on Geophysics, Second Edition, vol. 5, edited by G. Schubert and M. Kono, chap. 7, pp. 219–254, Elsevier, Oxford. de Koker, N., G. Seinle-Neumann, and V. Vlˇcek (2012), Electrical resistivity and thermal conductivity of liquid Fe alloys at high P and T, and heat flux in Earth’s core, Proc. -Nat. Acad. Sci. , 109 , 4070–4073. +Nat. Acad. Sci., 109, 4070–4073. –30– Confidential manuscript submitted to JGR-Planets de Wijs, G. A., G. Kresse, L. Voˇcadlo, D. Dobson, D. Alf´e, M. J. Gillan, and G. D. Price (1998), The viscosity of liquid iron at the physical conditions of the Earth’s core, Nature, -392 , 805–807. -Dehant, V., and P. Mathews (2015), Earth rotation variations, in Treatise on Geophysics , +392, 805–807. +Dehant, V., and P. Mathews (2015), Earth rotation variations, in Treatise on Geophysics, vol. 3, edited by G. Schubert, chap. 10, pp. 263–305, Elsevier, Oxford. Deleplace, B., and P. Cardin (2006), Viscomagnetic torque at the core mantle boundary, -Geophys. J. Int. , 167 , 557–566. +Geophys. J. Int., 167, 557–566. Deng, L., C. Seagle, Y. Fei, and A. Shahar (2013), High pressure and temperature electrical -resistivity of iron and implications for planetary cores, Geophys. Res. Lett. , 40 , 33–37, +resistivity of iron and implications for planetary cores, Geophys. Res. Lett., 40, 33–37, doi:10.1029/2012GL054347. Dumberry, M. (2020), Replication Data for: The influence of a fluid core and a solid inner core on the Cassini sate of Mercury, https://doi.org/10.7939/DVN/903HUV, UAL Dataverse, V2. Dumberry, M., and L. Koot (2012), A global model of electromagnetic coupling for nutations, - Geophys. J. Int. , 191 , 530–544. + Geophys. J. Int., 191, 530–544. Dumberry, M., and A. Rivoldini (2015), Mercury’s inner core size and core-crystallization -regime, Icarus, 248 , 254–268. +regime, Icarus, 248, 254–268. Dumberry, M., and M. A. Wieczorek (2016), The forced precession of the Moon’s inner -core, J. Geophys. Res. Planets , 121 , 1264–1292. +core, J. Geophys. Res. Planets, 121, 1264–1292. Dumberry, M., A. Rivoldini, T. Van Hoolst, and M. Yseboodt (2013), The role of Mercury’s - core density structure on its longitudinal librations, Icarus, 225 , 62–74. -Gans, R. F. (1972), Viscosity of the Earth’s core, J. Geophys. Res. , 77 , 360–366. + core density structure on its longitudinal librations, Icarus, 225, 62–74. +Gans, R. F. (1972), Viscosity of the Earth’s core, J. Geophys. Res., 77, 360–366. Genova, A., S. Goossens, E. Mazarico, F. G. Lemoine, G. A. Neumann, W. Kuang, T. J. Sabaka, S. A. Hauck II, D. E. Smith, S. C. Solomon, and M. T. Zuber (2019), -Geodetic evidence that Mercury has a solid inner core, Geophys. Res. Lett. , 46 , +Geodetic evidence that Mercury has a solid inner core, Geophys. Res. Lett., 46, doi:10.1029/2018GL081135. Glane, S., and B. A. Buffett (2018), Enhanced core-mantle coupling due to stratification at -the top of the core, Frontiers in Earth Science , 6 , 171, doi:10.3389/feart.2018.00171. +the top of the core, Frontiers in Earth Science, 6, 171, doi:10.3389/feart.2018.00171. Grott, M., D. Breuer, and M. Laneuville (2011), Thermo-chemical evolution and global -contraction of Mercury, Earth Planet. Sci. Lett. , 307 , 135–146. +contraction of Mercury, Earth Planet. Sci. Lett., 307, 135–146. Hauck, S. A., J.-L. Margot, S. C. Solomon, R. J. Phillips, C. L. Johnson, F. G. Lemoine, E. Mazarico, T. J. McCoy, S. Padovan, S. J. Peale, M. E. Perry, D. E. Smith, and M. T. -Zuber (2013), The curious case of Mercury’s internal structure, J. Geophys. Res. , 118 , +Zuber (2013), The curious case of Mercury’s internal structure, J. Geophys. Res., 118, doi:10.1002/jgre.20091. Johnson, C. L., M. E. Purucker, H. Korth, B. J. Anderson, R. M. Winslow, M. M. H. Al Asad, J. A. Slavin, I. I. Alexeev, R. J. Phillips, M. T. Zuber, and S. C. Solomon (2012), MESSENGER observations of mercury’s magnetic field structure, J. Geophys. -Res., 117 , E00L14, doi:10.1029/2012JE004217. +Res., 117, E00L14, doi:10.1029/2012JE004217. Konopliv, A. S., R. S. Park, and A. I. Ermakov (2020), The Mercury gravity field, orientation, love number, and ephemeris from the MESSENGER radiometric tracking data, -Icarus, 335 , 113,386. +Icarus, 335, 113,386. Koot, L., and M. Dumberry (2013), The role of the magnetic field morphology on the -electromagnetic coupling for nutations, Geophys. J. Int. , 195 , 200–210. +electromagnetic coupling for nutations, Geophys. J. Int., 195, 200–210. Li, J., Y. Fei, H. Mao, K. Hirose, and S. Shieh (2001), Sulfur in Earth’s inner core, Earth -Planet. Sci. Lett. , 193 , 509–514. +Planet. Sci. Lett., 193, 509–514. Margot, J. L., S. J. Peale, R. F. Jurgens, M. A. Slade, and I. V. Holin (2007), Large longitude - libration of Mercury reveals a molten core, Science, 316 , 710–714. + libration of Mercury reveals a molten core, Science, 316, 710–714. Margot, J. L., S. J. Peale, S. C. Solomon, S. A. Hauck, F. D. Ghigo, R. F. Jurgens, M. Yseboodt, J. D. Giorgini, S. Padovan, and D. B. Campbell (2012), Mercury’s –31– Confidential manuscript submitted to JGR-Planets -moment of inertia from spin and gravity data, J. Geophys. Res. , 117 , E00L09, +moment of inertia from spin and gravity data, J. Geophys. Res., 117, E00L09, doi:10.1029/2012JE004161. Margot, J. L., S. A. Hauck II, E. Mazarico, S. Padovan, and S. J. Peale (2018), Mercury’s -internal structure, in Mercury: The View after MESSENGER , edited by S. Solomon, +internal structure, in Mercury: The View after MESSENGER, edited by S. Solomon, L. Nittler, and B. Anderson, pp. 85–113, Cambridge University Press, Cambridge, doi: 10.1017/9781316650684.005. Mathews, P. M., and J. Guo (2005), Viscoelectromagnetic coupling in precession-nutation -theory, J. Geophys. Res. , 110 (B02402), doi:10.1029/2003JB002915. +theory, J. Geophys. Res., 110 (B02402), doi:10.1029/2003JB002915. Mathews, P. M., B. A. Buffett, T. A. Herring, and I. I. Shapiro (1991), Forced nutations of -the Earth: Influence of inner core dynamics. 1. theory, J. Geophys. Res. , 96 , 8219–8242. +the Earth: Influence of inner core dynamics. 1. theory, J. Geophys. Res., 96, 8219–8242. Mathews, P. M., T. A. Herring, and B. A. Buffett (2002), Modeling of nutations and precession: New nutation series for nonrigid Earth and insights into the Earth’s interior, J. -Geophys. Res., 107 , doi:10.1029/2004JB000390. +Geophys. Res., 107, doi:10.1029/2004JB000390. Mazarico, E., A. Genova, S. Goossens, F. G. Lemoine, G. A. Neumann, M. T. Zuber, D. E. Smith, and S. C. Solomon (2014), The gravity field, orientation, and ephemeris of Mercury from MESSENGER observations after three years in orbit, J. Geophys. Res. -Planets, 119 , 2417–2436. +Planets, 119, 2417–2436. Organowski, O., and M. Dumberry (2020), Viscoelastic relaxation within the Moon -and the phase lead of its Cassini state, Journal of Geophysical Research Planets , 125 , +and the phase lead of its Cassini state, Journal of Geophysical Research Planets, 125, e2020JE006386. -Peale, S. J. (1969), Generalized Cassini’s laws, Astron. J., 74 , 483–489. -Peale, S. J. (1974), Possible histories of the obliquity of Mercury, Astron. J., 79 , 722–744. -Peale, S. J. (1976), Does Mercury have a molten core?, Nature, 262 , 765–766. -Peale, S. J. (2005), The free precession and libration of Mercury, Icarus, 178 , 4–18. +Peale, S. J. (1969), Generalized Cassini’s laws, Astron. J., 74, 483–489. +Peale, S. J. (1974), Possible histories of the obliquity of Mercury, Astron. J., 79, 722–744. +Peale, S. J. (1976), Does Mercury have a molten core?, Nature, 262, 765–766. +Peale, S. J. (2005), The free precession and libration of Mercury, Icarus, 178, 4–18. Peale, S. J. (2006), The proximity of Mercury’s spin to Cassini state 1 from adiabatic invariance, - Icarus, 181 , 338–347. + Icarus, 181, 338–347. Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2014), Effect of core-mantle -and tidal torques on Mercury’s spin axis orientation, Icarus, 231 , 206–220. +and tidal torques on Mercury’s spin axis orientation, Icarus, 231, 206–220. Peale, S. J., J. L. Margot, S. A. Hauck II, and S. C. Solomon (2016), Consequences of a -solid inner core on Mercury’s spin configuration, Icarus, 264 , 443–455. +solid inner core on Mercury’s spin configuration, Icarus, 264, 443–455. Perry, M. E., G. A. Neumann, R. J. Phillips, and et al. (2015), The low-degree shape of -Mercury, Geophys. Res. Lett. , 42 , 6951–6958. -Poincar´e, H. (1910), Sur la pr´ecession des corps d´eformables, Bul l. Astron. Ser. 1 , 27 , +Mercury, Geophys. Res. Lett., 42, 6951–6958. +Poincar´e, H. (1910), Sur la pr´ecession des corps d´eformables, Bul l. Astron. Ser. 1, 27, 321–356. Pozzo, M., C. Davies, D. Gubbins, and D. Alf´e (2012), Thermal and electrical conductivity -of iron at Earth’s core conditions, Nature, 485 , 355–358. +of iron at Earth’s core conditions, Nature, 485, 355–358. Rochester, M. G. (1960), Geomagnetic westward drift and irregularities in the Earth’s -rotation, Phil. Trans. R. Soc. Lond., A, 252 , 531–555. -Rochester, M. G. (1962), Geomagnetic core-mantle coupling, J. Geophys. Res. , 67 , 4833– +rotation, Phil. Trans. R. Soc. Lond., A, 252, 531–555. +Rochester, M. G. (1962), Geomagnetic core-mantle coupling, J. Geophys. Res., 67, 4833– 4836. Rochester, M. G. (1968), Perturbations in the Earth’s rotation and geomagnetic coremantle - coupling, J. Geomag. Geoelectr., 20 , 387–402. + coupling, J. Geomag. Geoelectr., 20, 387–402. Rochester, M. G. (1976), The secular decrease of obliquity due to dissipative core-mantle -coupling, Geophys. J. R. Astron. Soc., 46 , 109–126. +coupling, Geophys. J. R. Astron. Soc., 46, 109–126. Rutter, M., R. Secco, T. Uchida, H. Liu, Y. Wang, M. Rivers, and S. Sutton (2002a), Towards evaluating the viscosity of the Earth’s outer core: an experimental high pressure -study of liquid Fe-S (8.5 wt. per cent S), Geophys. Res. Lett. , 29 , 080,000–1. +study of liquid Fe-S (8.5 wt. per cent S), Geophys. Res. Lett., 29, 080,000–1. Rutter, M. D., R. A. Secco, H. Liu, T. Uchida, M. Rivers, S. Sutton, and Y. Wang -(2002b), Viscosity of liquid Fe at high pressure, Phys. Rev. B , 66 , 060,102, +(2002b), Viscosity of liquid Fe at high pressure, Phys. Rev. B, 66, 060,102, –32– Confidential manuscript submitted to JGR-Planets doi:10.1029/2001GL014392. Schaefer, L., S. B. Jacobsen, J. L. Remo, M. I. Petaev, and D. D. Sasselov (2017), Metalsilicate partitioning and its role in core formation and composition on Super-Earths, -Astrophys. J., 835 , 234. -Sori, M. M. (2018), A thin, dense crust for Mercury, Earth Planet. Sci. Lett. , 489 , 92–99. +Astrophys. J., 835, 234. +Sori, M. M. (2018), A thin, dense crust for Mercury, Earth Planet. Sci. Lett., 489, 92–99. Stark, A., J. Oberst, F. Preusker, S. J. Peale, J.-L. Margot, R. J. Phillips, G. A. Neumann, S. D. E., M. T. Zuber, and S. C. Solomon (2015a), First MESSENGER orbital observations - of Mercury’s librations, Geophys. Res. Lett. , 42 , 7881–7889. + of Mercury’s librations, Geophys. Res. Lett., 42, 7881–7889. Stark, A., J. Oberst, and H. Hussmann (2015b), Mercury’s resonant rotation from secular -orbital elements, Celest. Mech. Dyn. Astr., 123 , 263–277. +orbital elements, Celest. Mech. Dyn. Astr., 123, 263–277. Stewartson, K., and P. H. Roberts (1963), On the motion of a liquid in a spheroidal cavity -of a precessing rigid body, J. Fluid Mech. , 17 , 1–20. +of a precessing rigid body, J. Fluid Mech., 17, 1–20. Stys, C., and M. Dumberry (2018), The cassini state of the Moon’s inner core, J. Geophys. -Res. Planets, 123 , 1–25, doi:10.1029/2018JE005607. -Van Hoolst, T. (2015), Rotation of the terrestrial planets, in Treatise on Geophysics , +Res. Planets, 123, 1–25, doi:10.1029/2018JE005607. +Van Hoolst, T. (2015), Rotation of the terrestrial planets, in Treatise on Geophysics, vol. 10, edited by G. Schubert, chap. 4, pp. 121 – 151, Elsevier, Oxford. Van Hoolst, T., A. Rivoldini, R.-M. Baland, and M. Yseboodt (2012), The effects of tides -and an inner core on the forced libration of mercury, Earth Planet. Sci. Lett. , 333–334 , +and an inner core on the forced libration of mercury, Earth Planet. Sci. Lett., 333–334, 83–90. Verma, A. K., and J. L. Margot (2016), Mercury’s gravity, tides, and spin from MESSENGER - radio science data, J. Geophys. Res. Planets , 121 , 1627–1640. + radio science data, J. Geophys. Res. Planets, 121, 1627–1640. Wessel, P., W. H. F. Smith, R. Scharroo, J. Luis, and F. Wobbe (2013), Generic Mapping -Tools: Improved version released, EOS Trans. AGU , 94 , 409–410. +Tools: Improved version released, EOS Trans. AGU, 94, 409–410. Williams, J. G., and D. H. Boggs (2015), Tides on the Moon: theory and determination of -dissipation, J. Geophys. Res. Planets , 120 (4), 689–724, doi:10.1002/2014JE004755. +dissipation, J. Geophys. Res. Planets, 120 (4), 689–724, doi:10.1002/2014JE004755. Williams, J. G., D. H. Boggs, C. F. Yoder, J. T. Ratcliff, and J. O. Dickey (2001), Lunar -rotational dissipation in solid body and molten core, J. Geophys. Res. , 106 , 27,933– +rotational dissipation in solid body and molten core, J. Geophys. Res., 106, 27,933– 27,968. Williams, J. G., A. S. Konopliv, D. H. Boggs, R. S. Park, D.-N. Yuan, F. G. Lemoine, S. Goossens, E. Mazarico, F. Nimmo, R. C. Weber, S. W. Asmar, H. J. Melosh, G. A. Neumann, R. J. Phillips, D. E. Smith, S. C. Solomon, M. M. Watkins, M. A. Wieczorek, J. C. Andrews-Hanna, J. W. Head, W. S. Kiefer, I. Matsuyama, P. J. McGovern, G. J. Taylor, and M. T. Zuber (2014), Lunar interior properties from the GRAIL mission, J. -Geophys. Res. Planets , 119 (7), 1546–1578, doi:10.1002/2013JE004559. +Geophys. Res. Planets, 119 (7), 1546–1578, doi:10.1002/2013JE004559. Yoder, C. F. (1981), The free librations of a dissipative Moon, Phil. Trans. R. Soc. Lond. -A , 303 , 327–338. -Yseboodt, M., and J. L. Margot (2006), Evolution of Mercury’s obliquity, Icarus, 181 , +A, 303, 327–338. +Yseboodt, M., and J. L. Margot (2006), Evolution of Mercury’s obliquity, Icarus, 181, 327–337. –33– \ No newline at end of file diff --git a/read/results/playa/2201.00069.txt b/read/results/playa/2201.00069.txt index e899cc7764d6247030dad72853c97a22530f4100..c79949c0f359c9c08b372af576d9dece2d954170 100644 GIT binary patch delta 5681 zcmaJ_dvsLQxo46P!bu!JNeB?gCNYq}$uQ@fc@i<1KthBBLM9|YrE)Sk$sFc!;+&bp z2^B<9f_j1Y8B`vkw^nPHdL;+A;SnB_Dn%Y*t)jixb-U=gx4lbOE4S8mwPNqL=S`{o zBRgk*d+*==-ru+9h?e=+!R^S)UK2yYq|rc}J#x@1 z1z0mf8Vtmtx;%9&n5ScBS|fXHk-c_?I1CidX&^1K%Pj^v zRTflT$z((}Gc5*EE2(|5%frI!y169+LvLDXAWH+Xq^gpiMGAs17yTZkPxZ(we5G3q zd`2yVGFE2eXi}r<*Wi2$(jw< zvQAItQAPx5KoG>l#n;9!glppk$jvrP;dzF%8^{W8D018{izsGe_9EDkZGdCh#j`oo zC4x4IE{_yuIoyk!Hi_P_>Xv$0_&QtIDxh|kQDPuC_GTqsFR3Cc*cgJUL*hd$Vo(js zR3NXH<*6P}WV)<$N&%`f0p&uqHYx4^YR1pf^*TBac~KZkjE9hynhuS*0u3N~MAa?( zL>7i8w6+Q;A=+=1+eZ~hp;me2UMV0}u9T&qlECYf$SS``+GQ_t;*+C$QQNE3U?65Y zfoBk=8CeLkurX&AY{@agzvYy{M>zz(&MAlDi5#q+=vczrm<9uN8kaj13`*2YYofD0 z^aF_%qAag8AehrF_9#6v%b`Y44bC3PrS!@n^s!GTa#N_Ftp?JH;<%|mEX-n7R3kbn zy-oIeSQColG~ldV?DkXf(I5!ZE=z%ams|0w9+Blx`gI}F9`ciR#ox`s8w@GrP1N0} zxz>w;ZZSaJjpNZ5|ChNBgxq_w375~A38=aE#y_>AxuqFdks-kj4qv``9LJ13jsr{9 zRtCf{@yb3qNR%#8NGy>d3&|jyaU6$+L%$_1-#GLQbsU)VR`BYl&x{ON<2#U^zJp`n zNBX&0UmiTL13u6{ANx=ynX)58ykIeFZ%n$GaSv%!01?ZdO^!{hE6P1q*>`vdJ&)*Z4>a|Cu}QlL?LizY%$Ivd{`jp2xLzy!*Rj1 zmKhYmT0zQHNg~Z&F&IqU_UyF5JkltKn8ewIg#&qHsVFD;Iam1k6dFC<>Tuv}EQDkVGa}~ueDyf@`0CuZA3BZx^dJKca9aR1b1JewH+X|s;gC5>4 zX~2VjE~&+FQR&Kz$dLAQX@!n@89D~j98j7*F~Ml&JyivMWrqay@`EhK^5+(TCi&ZF%i+a=8;XX>II5 z-9#gXd{800Rr|JbIt8D$@Kqc~7k+``>&ExBK5k$Y%T*?OvZ77o-uMphBH&wWWZRvK{pQe)Oz5X z+ACYrkDE;K;}#2=B6*(IGiIxiG25f(&6cP&S`3&~7c(564j@?L06b?&1E37pjf@~f z0ZeGlATb$`B_d{{VeJMJNK3y(k45$@W=w<8AY!1e?(BGHpWGFMd+Pzk{h*#l=Yt8$ z9?=Gt{WwG8mhaZh(leX|g|ksFa4Mmo8}($-GXlN9DTw3FCMO#G zKbvYOsc4k~Pp>+Mgr??)b)^BHiVEWx%(ai=Ft~>}>*|*>*9Z3fe&jJq*w4U?)wA+& zeE_*c!pjU4wRFJN76ZK1QUkwlDS0ASS*7>;`HSGr^K3~7>A5dLGYw9Q^kPj2q%nN z*m2@cQr8ufJlH}elPp~CkkM`osuyb*18ZF+lSqRk`-A<-VA$c}V^#2s%Sk_WL1E{- z*hgJwC8(YL*z#{WACFN3($*wjf0SOCk}h1(EfE3U?2-$Xhncw1ViDLXc6hM{R(4O+ zf!S?E!+71*@P^wy$;i-rw6tysdJ6^y{MpUJ0{H;$I4_^SgE+jRFUjl+n`G1wXSQgJ zr$Gl!?=sl$l|bjKqOM%yD;OW`8{h%o_woFw?;0(|Mp_5!dPRS#1it29kxz}r*=%Ow zQbPWq8W~clhv^l^)CS}P!<*wt2E$?PV6ee}ZsnI)n7*U&z*eW+hP_Mlz-?`b_Gpit zy5h+PU%;_7V1k{20X=C^yq;8*nWg>?@`3}}P%uZAmYs?r61tg#T?`ui%B$Gi6wc<6 zHDjv1Ni;tkqXc4G{(1jX)AuLj!ursdK4m%5r34^vQZZD7jj%ra#zYJvv>HyCVE@lz|Z+Q4QT)6V^vbawd zZDBWld2s(J4E;T$)GWrxUW}FfXBZsg6b2K&g^!PNp?qAT6DJzJ*@Jf5r5lNBMK z7)TT+IP`?7Pmgr;ox68*l|@s54XqQa4@P1(@Fa35T2qo~0gmk`iS49qYAk)&F&Efx z-JCq)dXrfj0_95-V&pP+}w#vDpk(Jwt5PI3KX$N$xu! z?Lu0Kjh*Lg1Vbw-O+qdcdWiuGDX|$1)+o2|>685Gf6OIkNrQ}O(9Fg2CO$zD^$|-Y zXEk80!-s{{V2+|{o~M>)Bz)(kv109$f5}QXLNX`)c2{%U(W-aPgYMnoyD+ZG+c6KP zJr>{(yW3_U9V>)DmNoQ~a(wN(Qix@*NpFq*_pSS96 zD--KpxQ>OrYTvy{ePA`!t{y0*iR}*u`3yayRlGPxXI~r|%_UZr*w|=E)egU;>1dnl z{?{TMt@c6nFJ3$Ipj`u@R4b9vEg5XDjH991=|%IQgF=b5fH#j?SHkKMMpy z0lII|(HaEpKRQ3P;U`C_BvBqeJ8Ht!DaYJ|5}YjI1#A%U)g6OQ&q!3qzde?N*(i4O ztA8&^$TVZ5G=;uoB&mT-BlHQldnBp2$Vge@_~#>=ajolkIgY!I&rgs~9d{t%=J6*| zfICk-kyyR!yxRCQuc->J5FDGLc4`hwMJWg5ew z4iSHI36piGUu^JAsI42P%xSu-9Hk;g73CbAleVyVbTQsBGHOmN-xy8UTFXDpP|qtq zvna*c183%@CQqGNP>8?D;V&&c7?-jOdy!^$Se0EWGH~e(eUeQ*yXlT7k($4Unt$h6 z>WSyhno~I6oaK;Id2T+A?dRs3(qyz?lgFO0Z{&(-7c zKb_lxV<|io-`6i>4`3L3eFA3GW_|uY# zG}cTUOUwe(ssu&9jKBCqRWGol+2N!?^WQ&h!-LT%tA2+n`3LWCrSU~8R!4kKuXl^s zwARK>qA_Czy7W%PcWX8G-T7d7cMa5hTn2~VRiW(s7I@{n3qE^qI?VamJ@EaX)kDn{ zY|!z)T*do{s^WhUN~@JCcx#j@Wo{(!gc`n(KI-5MMCU{^bFw`8-2Z>joHpeE delta 5906 zcmZ`-eRLDom1m4?>@gn>AGRq5KMXd;k%eX?SuzHKjAab?3ya!7V8d7*TZ1Kykz|9R zm?kB`v<;ZN(4~+sx=orUO@WM4z%d`Op$Q}*(2|g}d-jmD&DlNKO`1JDXZLJ2+x@*6 zjcn8ApQ|_b-FLs=z4z&>GZP22iT!sPh%o#kB9c#zJ<5|>D_JE4*d zU#Aia$`K23@}$~IJkdah6tR>t62ZljDp2?XBSX=sPDJxK3r}3wbTbONo}0xH+Q#M2 zBsy0o-)JQja&)KS@30Vhj4Lc4cAm+pvy#e)B1fZg*uqd?yjBW#s5_$_3MlV#%fV=F zQU%~cNp-v~UuQ|RBu6$8+)N?93EZXjKu8MrNZ>=iN-AGLM6j!5Vk7Ocg@`sDmR5}X zDi-qxm|+4>Rso?Z(Wn}>h%kID`wWKEa11OuCw4&Q?8tl__)}HE%u<5E>{{EBQO*9@v8; zFt=6;_Cx~^P3V0`iTwwl31nZ-*NIX02A`ey# zPzL7Hd1-UunesFnU`JX3V1F6`Je^hq_+gp=n4azdT$`S^h}d;~>#X453rL-6CqxMF z3OwQKl>KVA(!~Pyc)Bn{w{(M*Y=AKVW)AT0rLU`mqj&_MNeLr94yfm|;om^#E>`BJEOy<*^{@m+1(< z21@^qUzRM)5}Y(QV}-uZwq-0x+Cqkp!lxvO2sSc7fKAAD;tx$A$cE?ymgFOk68+ag zHkL%B9uibSN{py&B!}QS9|bTD69EB^pEeHDTT@(+T-QCDFhZA32wvD7LkF=uq06Qg zrjkED{nPDq=jpf=N(jrSC5Kv+FOiHF@r87pNz0^tQ$E~xp_0#?}DZfUIC?PpQ z9c6p^c&Mfn<$j))ru67b+QzewT&~Xxcfw#P*%QH z3dvFP#H%yEP65_lPJcD4F$=5G@&>h2?dEZ?0U?ZZsNHmamWLg+8(Zel?kqcfDXW0q z%xcWWu4^$Vipp|BP>RJ8?3ZO9;cJvG-q4wTl-X;fOnk|LQWYv*P|BhgmACLC4T1F@3ojkO!-uDM>s;lf-M z)%7THNOajrDzUK-o6Y9ti#EvN)~+Bbbes=UySze*MiY$v^Bw?;pUm@4)~_IoAJT(< zIjwjU@;-lOr=2p^->-C$BR{5hge3 zJM+EsbtYx5vN22qQdEWXH$^SU@1`&0SI~dUuS;gsTG@idTrrF&E9V;`svnoG!?Z-l zqUP=BLPawf^e;T3hcHTEVP-Hr@0KclP58ih%TZHmdxhpZR-nk&ys(q0o!|;TX-m0JLbH= zX-%%ABzSXU*$05urTyBarPV-ala{aH+IZG7iz=0n+SRHc+dL7hBGAQI?r9x48In83 zB1=tVJWoL9DmM?CH$rfmREeYX;)Wk-iViiOS+M3@=ma|G>pUM-PK;Q_F4j*M6pzpY$F7 z;@tOi%n6=!szKxeX%>1~V29RVql=mlF_lqtIFk}z=VNqDm*e@eBigk!tS^2PZ@ zvw6DwK7DzJ&_n)3=JLG11it>%A2!K0wmxd^ecZawaCW==)0yL)5(~(fziDopl8p^s z@5O8+U?l_7OiNJKPPC>dp~MMdS! z3{OxBnYyQLSJDtJ6pK=##kN;-pd0E~4LC6<0|tWS3)sE`i7VK~jwo!%Fh&!MDD-mh zA820a4!WX^bzE<^6i#&8jPN>k;o&~P?RFBs+7(7C1|oi*c7#1Sq#D|?iiH7%B=G{e z8jnc`oWsBKFtpL1CX5+#^dU*VB1P=sf=Nn0h#NP46oQu8EwzlJ89RPVpWoqtSs(4_ zo7|`dI}%+`jeHLD)@UCt(4V4d+-M2Tqcgg`=9w&H&5ak>K$TN~(GH5*bf-WAY(mx1 zsZEWY^yVyW{eAh2h1(v%a zt?wz|L~Uo!ecW_i2Ys4Av4H~y34L<+M*1JS37xuUlhKNmJ(D@kPIrChA8F>^)rtGE zWv^u)uGonoR=N5wU^+)J#9IugOJh(xO5VK%eoO^G<0%<;^UaRqvXzg!B5X!kj0qeG z-KjN|64}h=m`8^n{!rh| z;zm!T@7I7E+MmHqUe^_pBIc0y{-e?9tg4KuL~QbHdaTU?N3yH!MmS9iY{Y3rke@Xg z?&u*5zK`Xb2UuLqVpoQ$g%g#atu#3@>B_Z*=qeP3<)=F;wTu)yjS9fAc{Tm!UCa|&R$ z(L_6l+ii8R2H#_E66Zc9n~o(MQHSf<8Lbgxw{FXVW!@tF!TZo1nh{oR&t#?@#obpB)xi zu}6)^J1>YA0mfOq50nu4`C%JhBkeRr7x&{?p{e~Rlcv_nvEA*7HyFhqANQcNj3@t) zlz7y{IcR_Vk&C;hmwAtUq80skg<-Q6Jkib3KRksg3$E9)pB~}p@1MDl=CZggtjB2o z^sL4ib!OwqTyC0~Wbsy|efK91r;a+W@aHwDe51F@+vu%Z<@M2vr{>UOr>4`FPyLc^ zKD{K_Q|PhK!>9A;snc8Olrv(Q&#%Z~zigpv&MYYMbgs3y9d-+;aq|^&P;F5oYOC7K z-srVf_V*BcA{LYr?ca|Q}0)k!go6c)OAU?uGvC^okS2Vejb z|HXiVE_^MJiw~bgv}2ma-wb4Y?b3en+Jl@yKILq_p{?`m0+ZLavkQ#PgJ%oqsk3p< z{PApoft_}45Bkf6P2SaQ-m^zjAO5{r+H)@o{dDm;`qR+vw`o z(c52tz}S1`^`=p0PZ`Rk#Y4`7p@AW2$Smt0nx8-p4lSquJ>)c^;>MgW2>63ee~@iJ zG>oSVi;8H|qoQrPz;m?q;!;zG7cLf>%i9<4f8tUW&AwDkw_Vzv(Gci~vY>P#D0eQo zY3608Nm+YYp!+Tt)0Zw6kLvp`m$UCPf`Y#}@#j)l4fR-P+8b50{*7LGci2ng-FFzi>Rlh8DNB17PYBw3JDX0wp_)x~L#mS( z@hqlyZ(PLgGw*c+&b%oBcHMk}PP>*(=iXvmI&bX&{P-5zZ~o0Tz_Y(O3|RGB_L_R_ zw*kP9Z_T8_?OLpQZ$AT=`##$< r +a(log r − log r +0)c + + log(Υ +0) r > r 0 (5) Article number, page 5 of 12 A&A proofs: manuscript no. Populations4 @@ -495,83 +495,83 @@ A&A proofs: manuscript no. Populations4 χ2 Fig. 7. Absolute values of χ2 obtained from the fits of three data sets: all stars (top left panel), population I (bottom left), and population II (bottom -right) for the observations along the major axis of the simul ated galaxy. The results for the modeling of two populations (top right) were obtained -as an algebraic sum of values for populations I and II. To avoi d large numbers in the figure, Υ +right) for the observations along the major axis of the simulated galaxy. The results for the modeling of two populations (top right) were obtained +as an algebraic sum of values for populations I and II. To avoid large numbers in the figure, Υ 0 was divided by the mean mass of a stellar particle. where r is the distance from the center of the galaxy, r 0 is a constant, while Υ - 0 , a , and c are the parameters of a model. We +0, a, and c are the parameters of a model. We have assumed log r -0 = 0 . 33 which corresponds to three softening +0 = 0.33 which corresponds to three softening scales for stellar particles in the Illustris simulation. -We probed the parameter a ∈ [0 : 1 . 3] with a step ∆a = 0 . 04 -and c ∈ [1 . 1 : 2 . 9] with a step ∆c = 0 . 2, imposing the requirement +We probed the parameter a ∈ [0 : 1.3] with a step ∆a = 0.04 +and c ∈ [1.1 : 2.9] with a step ∆c = 0.2, imposing the requirement on the total density profile to be monotonically decreasing with radius. For each set of parameters and for each line of sight we generated 1200 orbits using 100 values of energy (expressed with the radius of a circular orbit) spaced logarithmically and 12 values of the relative angular momentum spaced linearly. The -outer radius of the orbit library, that is the apocenter of th e most +outer radius of the orbit library, that is the apocenter of the most extended orbit, was set to r out = 165 kpc in order to cover over 0.999 of the total stellar mass based on the fitted King profile parameters. We fit the kinematics weighted with the fraction of mass with -the constrained least squares algorithm where diff erent values +the constrained least squares algorithm where different values of Υ - 0 were obtained with a simple transformation of velocities +0 were obtained with a simple transformation of velocities given by Eq. 12, 13, and 15 in Kowalczyk et al. (2018). In order to smooth out the numerical artifacts, the three-dimensional χ2 - spaces were then interpolated with 12-order polynomials ( ∼ a 4 - c 4 - Υ 4 -0 ) that were further used to determine the global minimums + spaces were then interpolated with 12-order polynomials (∼ a4 +c4 +Υ4 +0) that were further used to determine the global minimums (identified as the best-fitting models) and 1, 2, 3 σ confidence levels which for three parameters correspond to ∆χ2 = -3 . 53 , 8 . 02 , 14 . 2 (Press et al. 1992). +3.53, 8.02, 14.2 (Press et al. 1992). 3.2. Application to mock data -In the following we present the direct and inferred results o f -the Schwarzschild modeling of the data sets described in Sec tion +In the following we present the direct and inferred results of +the Schwarzschild modeling of the data sets described in Section 2.3. -First, Fig. 7 shows the distribution of the absolute values o f +First, Fig. 7 shows the distribution of the absolute values of the χ2 as a function of three parameters of the mass-to-light ratio. - In order to avoid unnecessary repetitions, we include o nly + In order to avoid unnecessary repetitions, we include only the plot for the mock data obtained by observing the Illustris galaxy along its major axis as the others are qualitatively similar. -The four panels refer to fits for all stars (top left), the meta l-rich -population I (bottom left), the metal-poor population II (b ottom +The four panels refer to fits for all stars (top left), the metal-rich +population I (bottom left), the metal-poor population II (bottom right), and the one named "populations" (top right) which is the algebraic sum of values for both populations. As our parametrization of the mass-to-light ratio is not intuitive - we present its profiles explicitly in the first rows of th e leftArticle + we present its profiles explicitly in the first rows of the leftArticle number, page 6 of 12 -K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling -10 610 710 810 910 10 +K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling +1061071081091010 10 100 ALL -Υ(r) [M⊙/L⊙] +Υ(r) [M ⊙/L ⊙] r [kpc]major 10 100ALL r [kpc]intermediate 10 100ALL r [kpc]minor -3 σ -2 σ -1 σ +3σ +2σ +1σ best model data -10 410 610 8 +104106108 10 100 ALL -νtot(r) [M⊙ kpc-3] +ν tot(r) [M ⊙ kpc-3 ] r [kpc] 10 100ALL r [kpc] 10 100ALL r [kpc] -10 1010 1110 12 +101010111012 10 100 ALL -Mtot(r) [M⊙] +M tot(r) [M ⊙] r [kpc] 10 100ALL r [kpc] 10 100ALL r [kpc] @@ -580,28 +580,28 @@ r [kpc] 10 100ALL β(r) r [kpc] 0 10 20 30 40ALL r [kpc] 0 10 20 30 40 50ALL - r [kpc] 10 610 710 810 910 10 + r [kpc] 1061071081091010 10 100 POPULATIONS -Υ(r) [M⊙/L⊙] +Υ(r) [M ⊙/L ⊙] r [kpc]major 10 100POPULATIONS r [kpc]intermediate 10 100POPULATIONS r [kpc]minor -3 σ -2 σ -1 σ +3σ +2σ +1σ best model data -10 410 610 8 +104106108 10 100 POPULATIONS -νtot(r) [M⊙ kpc-3] +ν tot(r) [M ⊙ kpc-3 ] r [kpc] 10 100POPULATIONS r [kpc] 10 100POPULATIONS r [kpc] -10 1010 1110 12 +101010111012 10 100 POPULATIONS -Mtot(r) [M⊙] +M tot(r) [M ⊙] r [kpc] 10 100POPULATIONS r [kpc] 10 100POPULATIONS r [kpc] @@ -611,7 +611,7 @@ r [kpc] 10 100POPULATIONS r [kpc] 0 10 20 30 40POPULATIONS r [kpc] 0 10 20 30 40 50POPULATIONS r [kpc] -Fig. 8. Left-hand side: results of Schwarzschild modeling of three mock data sets obtained by observing the simulated galaxy al ong the principal +Fig. 8. Left-hand side: results of Schwarzschild modeling of three mock data sets obtained by observing the simulated galaxy along the principal axes. In rows: derived mass-to-light ratio, total density, total mass, and anisotropy parameter. In columns: observations along the major, intermediate, and minor axis, respectively. Green lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the 1, 2, and 3 σ confidence levels. The true values are presented as black lines. Thin vertical lines mark the values of r @@ -620,69 +620,69 @@ data sets, from left to right. Right-hand side: same as left but for the fit of and right-hand side panels of Fig. 8 for the results obtained for all stars and the populations, respectively. We further calculate the total density (second rows) and the total mass content (third -rows). We include the obtained orbit anisotropy within the m odeled - range in the bottom rows. The consecutive columns prese nt +rows). We include the obtained orbit anisotropy within the modeled + range in the bottom rows. The consecutive columns present the results for the observations along the major, intermediate, -and minor axis. Green lines indicate values for the best-fit m odels - whereas the colored areas of decreasing intensity corre spond +and minor axis. Green lines indicate values for the best-fit models + whereas the colored areas of decreasing intensity correspond to 1, 2, and 3 σ confidence regions obtained as extreme values allowed by the models with χ2 within a given region. In each panel the true values from the simulation are presented with black lines while thin vertical lines mark the values of r 0 and the outer range -of the data sets beyond which the reliability of results drop s significantly. - The true mass-to-light ratio profile was obtaine d by +of the data sets beyond which the reliability of results drops significantly. + The true mass-to-light ratio profile was obtained by dividing the total mass by the fitted King profiles, therefore the -drop at 100 kpc is the numerical artifact occurring at the ver y +drop at 100 kpc is the numerical artifact occurring at the very outskirts of the galaxy. Whereas in the right-hand side panels of Fig. 8 the resulting anisotropy is obtained from the fit of all stars and uses only the -location of global minimum and confidence levels from two pop ulations +location of global minimum and confidence levels from two populations (as in the top right panel of Fig. 7), in Fig. 9 we present another method of calculating the anisotropy. In the second and third row we show the derived profiles for population I and II -separately and combine them as stellar mass weighted averag e +separately and combine them as stellar mass weighted average in the top row. As in previous figures, three columns refer to the -diff erent lines of sight whereas the narrow fourth one shows the -behavior of the true profiles outside the modeled range which , as -we noticed in our previous studies, in a limited way influence s -the results. Such an impact is understandable since the star s at +different lines of sight whereas the narrow fourth one shows the +behavior of the true profiles outside the modeled range which, as +we noticed in our previous studies, in a limited way influences +the results. Such an impact is understandable since the stars at larger distances from the center are still included in the line-ofsight - measurements. 3.3. Comparison of fitting results + measurements. 3.3. Comparison of fitting results The main strength of the two populations method comes from -tracing the underlying gravitational potential at diff erent scales. +tracing the underlying gravitational potential at different scales. As can be seen in the bottom panels of Fig. 7, population I, which is more concentrated, is also more sensitive to Υ - 0 , but gives -weaker constraints on a or c . On the other hand, population II -attempts to reproduce the total mass content at larger dista nces -as well, therefore showing stronger coupling between the pa rameters. +0, but gives +weaker constraints on a or c. On the other hand, population II +attempts to reproduce the total mass content at larger distances +as well, therefore showing stronger coupling between the parameters. The global minimums of the χ2 distributions for both approaches, that is modeling one and two populations, which we identify as the best-fitting models, closely coincide showing that there is no internal bias in the improved method. However, significant - diff erences can be observed when comparing the confidence + differences can be observed when comparing the confidence levels, mainly at 1 and 3 σ. Namely, we find that using -two populations, the constraints we obtain on the density an d +two populations, the constraints we obtain on the density and anisotropy profile are much stronger. Additionally, the more accurate method allows us to study -other e ff ects and biases, for example the consequences of the +other effects and biases, for example the consequences of the nonsphericity of the modeled object. Whereas for the fit of all -stars the true values of the density, mass, and anisotropy pr ofiles +stars the true values of the density, mass, and anisotropy profiles are contained within 1 σ confidence regions, the results for the -populations are more or less biased depending on the axis. Th ey +populations are more or less biased depending on the axis. They are well reproduced for the observation along the intermediate -axis, for which the e ff ects of nonsphericity seem to cancel out, +axis, for which the effects of nonsphericity seem to cancel out, and more biased for the remaining lines of sight. We notice a -trend from under- to overestimation of the anisotropy when g oing +trend from under- to overestimation of the anisotropy when going from the major to the minor axis. Article number, page 7 of 12 A&A proofs: manuscript no. Populations4 -101 - 0 10 20 30 40POP I + POP IIβ(r) + 0 10 20 30 40POP I + POP II β(r) r [kpc]major 0 10 20 30 40 r [kpc]intermediate @@ -690,12 +690,12 @@ r [kpc]intermediate r [kpc]minor 50 60 70 80 -101 - 0 10 20 30 40POP Iβ(r) + 0 10 20 30 40POP I β(r) r [kpc] 0 10 20 30 40 r [kpc] 0 10 20 30 40 r [kpc] 50 60 70 80 -101 - 0 10 20 30 40POP IIβ(r) + 0 10 20 30 40POP II β(r) r [kpc] 0 10 20 30 40 r [kpc] 0 10 20 30 40 r [kpc] 50 60 70 80 @@ -704,36 +704,36 @@ best model 1σ 2σ 3σ -Fig. 9. Profiles of the anisotropy parameter obtained with the Schwarzschild modeling of two stellar populations of the simulat ed galaxy. In rows: -results for all stars (calculated as the superposition of two populations), population I, and population II. Colors fol low the convention used in +Fig. 9. Profiles of the anisotropy parameter obtained with the Schwarzschild modeling of two stellar populations of the simulated galaxy. In rows: +results for all stars (calculated as the superposition of two populations), population I, and population II. Colors follow the convention used in previous figures. In columns: observations along the major, intermediate, and minor axis. The last narrower column shows the data (black lines) outside the modeled radial range. Color lines indicate values for the best-fit models whereas the colored areas of decreasing intensity show the 1, 2, and 3 σ confidence regions. 4. Modeling Fornax dSph -In this section we present the application of our Schwarzsch ild -modeling scheme to the observational data for the Fornax dSp h +In this section we present the application of our Schwarzschild +modeling scheme to the observational data for the Fornax dSph galaxy obtained by del Pino et al. (2015) and del Pino et al. -(2017). This study is a follow-up of the work of Kowalczyk et a l. +(2017). This study is a follow-up of the work of Kowalczyk et al. (2019) and can be directly compared to the results presented there. Moreover, we refer the reader to these previous publications - for details on the origin of data and our procedures use d + for details on the origin of data and our procedures used for cleaning the spectroscopic sample. Similarly to the approach introduced in Section 2.2, we divided - all available stars into two equal-size populations b ased on + all available stars into two equal-size populations based on their metallicity and then cross-correlated the samples with the -data used in Kowalczyk et al. (2019). The metallicity histog ram -of the final spectroscopic sample is shown in Fig. 10. Additio nally, +data used in Kowalczyk et al. (2019). The metallicity histogram +of the final spectroscopic sample is shown in Fig. 10. Additionally, we color-coded each bin with the population it has been assigned to, namely orange or blue for population I or II. Interestingly, - the case of Fornax is similar to our simulated gala xy -as the split at [Fe / H] = − 1 also captures an important feature -of the object’s star formation history, separating stars in to subsamples + the case of Fornax is similar to our simulated galaxy +as the split at [Fe/H]= −1 also captures an important feature +of the object’s star formation history, separating stars into subsamples older and younger than 6 Gyr, as shown in Fig. 12 of del Pino et al. (2015) and Fig. 8 of del Pino et al. (2017). The numbers of stars contained in the samples of all stars, population - I, and population II are given in Table 2, where the indic es -"phot" and "spec" refer to the photometric and kinematic sam ples. - The sum of stars in the populations is lower than in the sample of all stars since only stars with reliable measureme nts + I, and population II are given in Table 2, where the indices +"phot" and "spec" refer to the photometric and kinematic samples. + The sum of stars in the populations is lower than in the sample of all stars since only stars with reliable measurements of metallicity could be included. N [Fe/H]pop I @@ -741,28 +741,28 @@ pop II 0 20 40 60 80 100 -2.5 -2 -1.5 -1 -0.5 0 Fig. 10. Metallicity histogram of the final spectroscopic sample used in -the modeling of two stellar populations in the Fornax dSph. E ach bin is +the modeling of two stellar populations in the Fornax dSph. Each bin is color-coded according to the population it has been assigned to, orange or blue for population I and II, respectively. As we have shown in our earlier work, the light profile of the Fornax dSph can be well reproduced with the three-parameter Article number, page 8 of 12 -K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling +K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling Table 2. Properties of the data samples for the Fornax dSph. Property ALL POP I POP II -Number of stars ( N -phot ) 65 797 14 882 49 205 -Number of stars ( N -spec ) 3286 1136 1151 +Number of stars (N +phot) 65 797 14 882 49 205 +Number of stars (N +spec) 3286 1136 1151 Stars within 1.8 kpc 3268 1134 1130 -Fitted normalization ( N -0 ) [ × 10 4 - ] 6.95 1.81 5.45 -Sérsic radius ( R - S ) [kpc] 0.454 0.429 0.420 -Sérsic parameter ( m ) 0.808 0.807 0.898 +Fitted normalization (N +0 ) [×104 +] 6.95 1.81 5.45 +Sérsic radius (R +S) [kpc] 0.454 0.429 0.420 +Sérsic parameter (m) 0.808 0.807 0.898 102103104105 - 0.2 0.5 2 0.1 1n⋆(R) [kpc-2] + 0.2 0.5 2 0.1 1n ⋆(R) [kpc-2 ] R [kpc]all stars popI popII @@ -773,54 +773,54 @@ vertical lines indicate r 0 (see text) and the outer boundary of the spectroscopic data. Sérsic formula (Sérsic 1968). The profiles of number density for -all stars and both populations together with the best-fittin g Sérsic -profiles are presented in Fig. 11. The colors follow the conve ntion +all stars and both populations together with the best-fitting Sérsic +profiles are presented in Fig. 11. The colors follow the convention introduced in previous sections. Thin vertical lines indicate -the innermost data point for the light profile for all stars an d -the outer boundary of the kinematic sample. The former, set a t -log r = − 0 . 16, is also used as the minimum of the mass-to-light -ratio profile ( r +the innermost data point for the light profile for all stars and +the outer boundary of the kinematic sample. The former, set at +log r = −0.16, is also used as the minimum of the mass-to-light +ratio profile (r 0 in Eq. 5). The fitted parameters of the profiles, that is the normalization N 0 , the Sérsic radius R - S , and the Sérsic -parameter m , are included in the second part of Table 2. +S , and the Sérsic +parameter m, are included in the second part of Table 2. Figure 12 presents the profiles of the observables used in the -Schwarzschild modeling: the fraction of stars and the 2nd, 3 rd, -and 4th velocity moments (top to bottom) for the three data sa mples: - all stars, population I, and population II (in red, ora nge, and +Schwarzschild modeling: the fraction of stars and the 2nd, 3rd, +and 4th velocity moments (top to bottom) for the three data samples: + all stars, population I, and population II (in red, orange, and blue, respectively). The error bars indicate 1 σ sampling errors. -The parameter space for Υ ( r ) has been probed as follows: -a ∈ [0 : 1 . 85] with a step ∆a = 0 . 05 and c ∈ [1 . 2 : 6] with a -step ∆c = 0 . 2. We point out that in Kowalczyk et al. (2019) the +The parameter space for Υ(r) has been probed as follows: +a ∈ [0 : 1.85] with a step ∆a = 0.05 and c ∈ [1.2 : 6] with a +step ∆c = 0.2. We point out that in Kowalczyk et al. (2019) the parameter c was fixed at c = 3 and now we fit it as a free parameter. - As for the mock data in Section 3.2, diff erent values of + As for the mock data in Section 3.2, different values of Υ - 0 were obtained with the transformation of velocity moments +0 were obtained with the transformation of velocity moments within the χ2 fitting routine. The values of ∆χ2 for all stars and -the populations are shown in the two panels of Fig. 13 (left an d -right-hand side, respectively). Due to the dense coverage o f the +the populations are shown in the two panels of Fig. 13 (left and +right-hand side, respectively). Due to the dense coverage of the grid, we decided to include only the values within 3 σ from the fitted minimums (see Section 3.1). -The profiles of the mass-to-light ratio, total density, tota l +The profiles of the mass-to-light ratio, total density, total mass, and velocity anisotropy resulting from the χ2 distributions -are presented in the consecutive rows of Fig. 14. The anisotr opy -profile for the populations is based on the fit of all stars but u sing 0 0.05 0.1 0.15 0.2 0.25 +are presented in the consecutive rows of Fig. 14. The anisotropy +profile for the populations is based on the fit of all stars but using 0 0.05 0.1 0.15 0.2 0.25 0 0.4 0.8 1.2 1.6M(R) R [kpc]all stars pop I pop II 04080120160200 - 0 0.4 0.8 1.2 1.6m2(R)[(km s-1)2 ] + 0 0.4 0.8 1.2 1.6m 2(R)[(km s-1 )2 ] R [kpc] -16-80816 - 0 0.4 0.8 1.2 1.6m3(R)[102 (km s-1)3 ] + 0 0.4 0.8 1.2 1.6m 3(R)[102 (km s-1 )3 ] R [kpc] 0481216 - 0 0.4 0.8 1.2 1.6m4(R)[104 (km s-1)4 ] + 0 0.4 0.8 1.2 1.6m 4(R)[104 (km s-1 )4 ] R [kpc] Fig. 12. Observables of the Fornax dSph used in our Schwarzschild modeling scheme. In rows: the fraction of the total number of stars, the @@ -828,12 +828,12 @@ modeling scheme. In rows: the fraction of the total number of stars, the for all stars whereas in orange and blue those for populations I and II, respectively. the confidence levels on Υ from the fit of two populations. Green -lines indicate the values for the best-fitting models wherea s the +lines indicate the values for the best-fitting models whereas the colored areas of decreasing intensity show the 1, 2, and 3 σ confidence - regions. Additionally, with black dashed lines we in clude + regions. Additionally, with black dashed lines we include the results from Kowalczyk et al. (2019) for comparison. As a result of freeing the steepness of the mass-to-light -ratio profile (parameter c ) with respect to the previous study +ratio profile (parameter c) with respect to the previous study Article number, page 9 of 12 A&A proofs: manuscript no. Populations4 0 0.5 1 1.5 @@ -860,46 +860,46 @@ panel) for the Fornax dSph. (Kowalczyk et al. 2019), we obtained higher estimates of the enclosed total mass at larger radii. In particular, for the mass enclosed within 1.8 kpc we get M -all ( < 1 . 8 kpc) = 3 . 87 + 1. 48 -− 1. 56 × 10 8 +all(< 1.8 kpc) = 3.87+1.48 +−1.56 × 108 M - ⊙ from the fit for all stars and M -pops ( < 1 . 8 kpc) = 4 . 71 + 0. 87 -− 1. 13 × -10 8 +⊙ from the fit for all stars and M +pops (< 1.8 kpc) = 4.71+0.87 +−1.13 × +108 M - ⊙ from the fit of populations, while previously we had +⊙ from the fit of populations, while previously we had M -old ( < 1 . 8 kpc) = 3 . 7 + 1. 4 -− 1. 3 × 10 8 +old (< 1.8 kpc) = 3.7+1.4 +−1.3 × 108 M - ⊙ . -Interestingly, despite the significant shift of the positio n of +⊙. +Interestingly, despite the significant shift of the position of χ2 -min (to c = 4 . 2 for all stars and 3.6 for populations), the obtained +min (to c = 4.2 for all stars and 3.6 for populations), the obtained profile of the anisotropy parameter remains decreasing or -flat for all stars but changes to increasing from 0 to 0.5 for th e -populations. Nevertheless, even in the latter case the prev ious +flat for all stars but changes to increasing from 0 to 0.5 for the +populations. Nevertheless, even in the latter case the previous result agrees with the new finding within 1 σ. The detailed analysis of the anisotropy is shown in Fig. 15 where the middle and bottom panels present the profiles obtained - for each population separately. We notice that the pr ofile + for each population separately. We notice that the profile for population I is decreasing or has a local minimum whereas -for population II is increasing (from − 0 . 25 to 0.5 for the bestfitting +for population II is increasing (from −0.25 to 0.5 for the bestfitting model). Since population I is more concentrated, the last bins contain very few stars, which limits their credibility. The -top panel of Fig. 15 presents the anisotropy of all stars calc ulated +top panel of Fig. 15 presents the anisotropy of all stars calculated as a weighted superposition of two populations. With such -approach we still obtain the increasing profile (from 0 to 0.5 ) but +approach we still obtain the increasing profile (from 0 to 0.5) but the previous result agrees with it only within 2 σ. Since Fornax dSph is significantly elongated with the projected - ellipticity of ǫ = 0 . 30 ± 0 . 01 (Irwin & Hatzidimitriou -1995), we anticipate some bias in the obtained results cause d -by the spherically symmetric modeling. Kowalczyk et al. (20 18) + ellipticity of ǫ = 0.30 ± 0.01 (Irwin & Hatzidimitriou +1995), we anticipate some bias in the obtained results caused +by the spherically symmetric modeling. Kowalczyk et al. (2018) studied such bias in an axisymmetric simulated object qualitatively - similar to Fornax and identified diff erences in the systematic - errors depending on whether the galaxy was observed alo ng -its major or minor axis. Assuming that Fornax is observed alo ng + similar to Fornax and identified differences in the systematic + errors depending on whether the galaxy was observed along +its major or minor axis. Assuming that Fornax is observed along the line of sight in between these extremes, we expect the total mass profile to be slightly overestimated and the anisotropy to be underestimated, further strengthening the likelihood of the real @@ -909,16 +909,16 @@ Both constant (like for our population I) and growing (population II) anisotropy profiles can arise from biased modeling of the real growing profile by observing an object along the minor and major axis, respectively. However, for the bias to occur in two populations presented here, their inner orientations - would need to be opposite. Since such morphological fe atures + would need to be opposite. Since such morphological features are not supported by the photometric studies of Fornax (del Pino et al. 2015; Wang et al. 2019) which rather find a good -spatial alignment between the stellar populations, we conc lude +spatial alignment between the stellar populations, we conclude that the anisotropy profiles of the two populations modeled in this work are indeed significantly distinct. Finally, it is worth noticing that the so-called mass-followslight - model, that is the one following from the assumption th at -the total density traces the stellar distribution, is no lon ger supported - by the fit of the populations. With our parametrizatio n, + model, that is the one following from the assumption that +the total density traces the stellar distribution, is no longer supported + by the fit of the populations. With our parametrization, the mass-follows-light model corresponds to a = 0 and whereas it is enclosed within 3 σ for the fit of all stars, as was the case in Kowalczyk et al. (2019), the allowed values for the improved @@ -929,13 +929,13 @@ Building on the previously created implementation of the Schwarzschild orbit superposition method focused on modeling dSph galaxies of the Local Group (Kowalczyk et al. 2017, 2018, 2019), we improved our tool by introducing multiple stellar populations. - Such an improvement is desirable and justified sin ce + Such an improvement is desirable and justified since many of the dwarfs show signs of multiple star formation bursts -or extended star formation episodes. As the diff erent populations -trace the common underlying gravitational potential, one m ay +or extended star formation episodes. As the different populations +trace the common underlying gravitational potential, one may expect a significant improvement in the estimates of not only the -total mass content but also the orbit anisotropy since this r obust -modeling technique reproduces the anisotropy as a by-produ ct +total mass content but also the orbit anisotropy since this robust +modeling technique reproduces the anisotropy as a by-product of the modeling rather than taking it as an assumption. We have tested our hypothesis by modeling mock data generated from a galaxy formed in the Illustris simulation. Due to the @@ -944,9 +944,9 @@ limitations of the resolution, we chose a galaxy of mass a few orders dwarfs. Still, the galaxy possessed appropriate qualitative characteristics, such as the lack of gas and an almost spherical shape, Article number, page 10 of 12 -K. Kowalczyk & E. L. Łokas: Multiple stellar populations in S chwarzschild modeling +K. Kowalczyk & E. L. Łokas: Multiple stellar populations in Schwarzschild modeling 101103105 - 0.1 1Υ(r) [M⊙/L⊙] + 0.1 1Υ(r) [M ⊙/L ⊙] r [kpc]ALL 0.1 1 r [kpc]POPULATIONS @@ -956,11 +956,11 @@ r [kpc]POPULATIONS best model K19 104106108 - 0.1 1νtot(r) [M⊙ kpc-3] + 0.1 1ν tot(r) [M ⊙ kpc-3 ] r [kpc] 0.1 1 r [kpc] 105107109 - 0.1 1Mtot(r) [M⊙] + 0.1 1M tot(r) [M ⊙] r [kpc] 0.1 1 r [kpc] -3-2-101 @@ -976,120 +976,120 @@ confidence regions. The best-fitting values obtained by Kowalczyk et al. (2019) are shown with black dashed lines. that made it a good test bed for modeling techniques applicable to dSph galaxies. We applied our approach to all data and -to two stellar populations separately, comparing the accur acy of -the obtained results. Although the addition of the second tr acer -seemingly increases the number of constraints twice, the in crement - is somewhat compromised by the sampling errors since th e -number of stars in each sample is then reduced. Still, we foun d +to two stellar populations separately, comparing the accuracy of +the obtained results. Although the addition of the second tracer +seemingly increases the number of constraints twice, the increment + is somewhat compromised by the sampling errors since the +number of stars in each sample is then reduced. Still, we found strong improvements in the accuracy of the method when using - two populations. The results of the modeling show that th e -density and velocity anisotropy profiles are more strongly c onstrained, + two populations. The results of the modeling show that the +density and velocity anisotropy profiles are more strongly constrained, most importantly at the 3 σ level, that is the range of allowed values is much narrower. Similarly to the conclusions of Kowalczyk et al. (2018) who -explored the e ff ects of nonsphericity using large and small -data samples, the comparison of results presented in the lef tand +explored the effects of nonsphericity using large and small +data samples, the comparison of results presented in the leftand right-hand side panels of Fig. 8 suggests that the improved method using two stellar populations gives more precise but less accurate outcome. However, in both studies the apparent deterioration of the reliability is a consequence of modeling of a -nonspherical object. In both cases, a simpler approach (muc h +nonspherical object. In both cases, a simpler approach (much smaller data samples or using one stellar population) resulted -2-101 - 0 0.4 0.8 1.2 1.6POP I + POP IIβ(r) + 0 0.4 0.8 1.2 1.6POP I + POP II β(r) r [kpc] -2-101 - 0 0.4 0.8 1.2 1.6POP Iβ(r) + 0 0.4 0.8 1.2 1.6POP I β(r) r [kpc] -2-101 - 0 0.4 0.8 1.2 1.6POP IIβ(r) + 0 0.4 0.8 1.2 1.6POP II β(r) r [kpc] best model -1 σ -2 σ 3 σ +1σ +2σ 3σ K19 Fig. 15. Profiles of the anisotropy parameter obtained with the Schwarzschild modeling of two stellar populations for the Fornax dSph. In rows: results for all stars (calculated as the superposition of two populations), population I, and population II. Color lines indicate values -for the best-fit models whereas the colored areas of decreasi ng intensity +for the best-fit models whereas the colored areas of decreasing intensity show the 1, 2, and 3 σ confidence regions. The dashed black line shows the result from Kowalczyk et al. (2019) for comparison. in larger final uncertainties, usually containing the true values within 1 σ confidence region. On the other hand, the improved -methods exhibit substantially reduced uncertainties, hig hlighting +methods exhibit substantially reduced uncertainties, highlighting the underlying bias. Our method parametrizes the total mass content with the -mass-to-light ratio varying with radius as a power-law in th e loglog +mass-to-light ratio varying with radius as a power-law in the loglog scale. We made two main changes with respect to our previous work: we added a third parameter c controlling the steepness -of the mass-to-light ratio profile (previously fixed at the va lue of -3) and allowed for diff erent stellar density profiles (previously +of the mass-to-light ratio profile (previously fixed at the value of +3) and allowed for different stellar density profiles (previously only Sérsic, now also King). These changes are of course coupled - since diff erent density profiles require diff erent exponents to -reproduce the same mass profile. It is visible also in our resu lts + since different density profiles require different exponents to +reproduce the same mass profile. It is visible also in our results since the King profile applied in the simulated galaxy gave us -values of c lower than 3. Nevertheless, we decided to use diff erent +values of c lower than 3. Nevertheless, we decided to use different density profiles to make our method more general and applicable to objects, such as our Illustris galaxy, for which the Sérsic formula does not provide a good approximation of the density distribution. Finally, we applied the improved method to the data for the -Fornax dSph galaxy. Due to the addition of another free param eter +Fornax dSph galaxy. Due to the addition of another free parameter in our functional form for the mass-to-light ratio, our results - for modeling all stars are slightly diff erent from the ones + for modeling all stars are slightly different from the ones Article number, page 11 of 12 A&A proofs: manuscript no. Populations4 obtained in Kowalczyk et al. (2019). However, in terms of the total density and mass distribution the estimates obtained here -agree very well with those earlier results in the range cover ed -by the data. Therefore, the detailed comparison with other e stimates - from the literature presented in Kowalczyk et al. (201 9) is +agree very well with those earlier results in the range covered +by the data. Therefore, the detailed comparison with other estimates + from the literature presented in Kowalczyk et al. (2019) is still valid and we do not repeat it here. -A more significant diff erence with respect to these previous +A more significant difference with respect to these previous estimates is seen in the results of modeling two populations in -Fornax. In this case we find the anisotropy to be slightly incr easing +Fornax. In this case we find the anisotropy to be slightly increasing rather than decreasing with radius and, most importantly, the confidence regions for this parameter, as well as for the density, - are much narrower. We were thus able to obtain tighter c onstraints - on the properties of Fornax, which means that the im proved - method is successful. For the first time, we were also a ble -to deduce the velocity anisotropy profiles for each of the pop ulations - separately. We found that the more concentrated, meta l-rich -population I has a decreasing anisotropy profile while the mo re -extended, metal-poor population II has the anisotropy incr easing -with radius. This finding may partially explain the large spr ead -of the anisotropy values obtained in the literature and summ arized + are much narrower. We were thus able to obtain tighter constraints + on the properties of Fornax, which means that the improved + method is successful. For the first time, we were also able +to deduce the velocity anisotropy profiles for each of the populations + separately. We found that the more concentrated, metal-rich +population I has a decreasing anisotropy profile while the more +extended, metal-poor population II has the anisotropy increasing +with radius. This finding may partially explain the large spread +of the anisotropy values obtained in the literature and summarized in Table 2 and 3 of Kowalczyk et al. (2019), which were often based on modeling subsamples of our spectroscopic data set. For both studied objects we split the stars into two populations by dividing them in half based on their metallicity, Z (in -solar units), for the Illustris galaxy and [Fe / H] for Fornax. Such -a method is approximate but justified. Both galaxies have com plex - star formation history with multiple star formation bu rsts, as +solar units), for the Illustris galaxy and [Fe/H] for Fornax. Such +a method is approximate but justified. Both galaxies have complex + star formation history with multiple star formation bursts, as demonstrated by Fig. 1 in this work and Fig. 7 in del Pino et al. -(2013), producing multiple stellar populations which cann ot be -easily tracked as the metallicity is a good but not perfect pr oxy -for the stellar age. Moreover, the metallicity histograms f or both -objects are approximately unimodal not allowing for a conve nient +(2013), producing multiple stellar populations which cannot be +easily tracked as the metallicity is a good but not perfect proxy +for the stellar age. Moreover, the metallicity histograms for both +objects are approximately unimodal not allowing for a convenient separation. More refined methods of division have been suggested in the literature, for example in the form of the likelihood - function based on the position, velocity, and metallic ity index - (Walker & Peñarrubia 2011). However, the likelihood fun ction + function based on the position, velocity, and metallicity index + (Walker & Peñarrubia 2011). However, the likelihood function requires many assumptions which introduce additional uncertainties - into the treatment of the data. On the other hand , our -approach ensures the maximization of each sample (and there fore + into the treatment of the data. On the other hand, our +approach ensures the maximization of each sample (and therefore minimization of sampling errors) while capturing the important features of the star formation history. Further improvements to the Schwarzschild modeling method are certainly possible. One way to proceed would be to -include the modeling of the proper motions of the stars. For n ow, -measurements of transverse velocities are available only f or the +include the modeling of the proper motions of the stars. For now, +measurements of transverse velocities are available only for the brightest stars in dSph galaxies, but even small samples of this -type could provide further constraints on the models, as dem onstrated +type could provide further constraints on the models, as demonstrated by Strigari et al. (2007) and Massari et al. (2020). Acknowledgements. We are grateful to Andrés del Pino for providing the data for -the Fornax dSph and to the Illustris team for making their sim ulations publicly +the Fornax dSph and to the Illustris team for making their simulations publicly available. Useful comments from the anonymous referee are kindly appreciated. This research was supported by the Polish National Science Center under grant 2018/28/C/ST9/00529. @@ -1101,14 +1101,14 @@ Binney, J., & Tremaine, S. 2008, Galactic Dynamics, 2nd edn. (Princeton Universi Press, Princeton) Breddels, M. A., & Helmi, A. 2013, A&A, 558, A35 Breddels, M. A., Helmi, A., van den Bosch, R. C. E., van de Ven, G., & Battaglia, -G. 2013, MNRAS, 433, 3173 del Pino, A., Hidalgo, S. L., Aparicio, A., et al. 2013, MNRAS , 433, 1505 +G. 2013, MNRAS, 433, 3173 del Pino, A., Hidalgo, S. L., Aparicio, A., et al. 2013, MNRAS, 433, 1505 del Pino, A., Aparicio, A., & Hidalgo, S. L. 2015, MNRAS, 454, 3996 del Pino, A., Aparicio, A., Hidalgo, S. L., & Łokas, E. L. 2017, MNRAS, 465, 3708 Fabrizio, M., Bono, G., Nonino, M., et al. 2016, ApJ, 830, 126 Gebhardt, K., Richstone, D., Tremaine, S., et al. 2003, ApJ, 583, 92 -Genel, S., Fall, S. M., Hernquist, L., et al. 2015, ApJ, 804, L 40 -Genel, S., Vogelsberger, M., Springel, V., et al. 2014, MNRA S, 445, 175 +Genel, S., Fall, S. M., Hernquist, L., et al. 2015, ApJ, 804, L40 +Genel, S., Vogelsberger, M., Springel, V., et al. 2014, MNRAS, 445, 175 Genina, A., Benitez-Llambay, A., Frenk, C. S., et al. 2018, MNRAS, 474, 1398 Hayashi, K., Fabrizio, M., Łokas, E. L., et al. 2018, MNRAS, 481, 250 Irwin, M., & Hatzidimitriou, D. 1995, MNRAS, 277, 1354 @@ -1130,7 +1130,7 @@ Nelson, D., Pillepich, A., Genel, S., et al. 2015, Astronomy and Computing, 13, 12 Pace, A. B., Kaplinghat, M., Kirby, E., et al. 2020, MNRAS, 495, 3022 Press, W. H., Teukolsky, S. A., Vetterling, W. T., & Flannery, B. P. 1992, Numerical - Recipes in C, 2nd edn. (Cambridge University Press, Cam bridge) + Recipes in C, 2nd edn. (Cambridge University Press, Cambridge) Schwarzschild, M. 1979, ApJ, 232, 236 Sérsic, J. L. 1968, Atlas de Galaxias Australes (Observatorio Astronomico, Cordoba, Argentina) diff --git a/read/results/playa/2201.00178.txt b/read/results/playa/2201.00178.txt index 5c079d2..b3c719c 100644 --- a/read/results/playa/2201.00178.txt +++ b/read/results/playa/2201.00178.txt @@ -3,659 +3,659 @@ Typeset using LA T EX default style in AASTeX631 Imaging the Sun’s near-surface flows using mode-coupling analysis -Prasad Mani , 1 - Chris S. Hanson , 2 - andShravan Hanasoge 1, 2 +Prasad Mani ,1 + Chris S. Hanson ,2 + and Shravan Hanasoge 1, 2 1 - Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India +Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Mumbai, India 2 - Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE +Center for Space Science, NYUAD Institute, New York University Abu Dhabi, Abu Dhabi, UAE ABSTRACT The technique of normal-mode coupling is a powerful tool with which to seismically image nonaxisymmetric phenomena in the Sun. Here we apply mode coupling in the Cartesian approximation to probe steady, near-surface flows in the Sun. Using Doppler cubes obtained from the Helioseismic and Magnetic Imager onboard the Solar Dynamics Observatory, we perform inversions on mode-coupling measurements to show that the resulting divergence and radial vorticity maps at supergranular length -scales (∼ 30 Mm) near the surface compare extremely well with those obtained using the Local Correlation +scales (∼30 Mm) near the surface compare extremely well with those obtained using the Local Correlation Tracking method. We find that the Pearson correlation coefficient is ≥ 0.9 for divergence flows, while ≥ 0.8 is obtained for the radial vorticity. Keywords: Helioseismology (709); Solar physics (1476); Supergranulation (1662) 1. INTRODUCTION Helioseismology is the study of the Sun’s internal structure and its properties, by means of interpreting its effect -on solar oscillations (seeChristensen-Dalsgaard2002, for a review). These are resonant normal modes of the Sun, +on solar oscillations (see Christensen-Dalsgaard 2002, for a review). These are resonant normal modes of the Sun, behaving as standing waves in a cavity bounded by the solar surface and a depth that depends on the wavenumber of the oscillation. As these waves penetrate the interior, they register information of the properties and dynamics of the solar interior and return to the surface, where they are observed. The internal structure of the Sun can then be retrieved through meticulous inversions of these seismic measurements. Several important flow systems on the Sun have been inferred using various global and local helioseismic methods. Of those, the most notable global helioseismic results include inferences on the solar differential rotation, through -global mode frequency splitting (Thompson et al.1996;Schou et al.1998), and the resolving the neutrino problem -(Bahcall & Pinsonneault1992). Notable local helioseismic results include imaging of the meridional flow (Giles et al. -1997;Gizon et al.2020) through time-distance helioseismology (Duvall et al.1993), and farside imaging of active -regions (Braun & Lindsey2001) and their near side emergence (Birch et al.2016), through helioseismic holography -(Lindsey & Braun2000). The recent discovery of various inertial waves (Gizon et al.2021), including the equatorial -Rossby wave (L¨optien et al.2018), has been achieved through local helioseismic ring-diagram analysis (Hill1988) and -the non-helioseismic local correlation tracking (LCT,November & Simon1988) of granulation. -In recent years, the use of global mode-coupling helioseismology (Woodard1989;Lavely & Ritzwoller1992) has +global mode frequency splitting (Thompson et al. 1996; Schou et al. 1998), and the resolving the neutrino problem +(Bahcall & Pinsonneault 1992). Notable local helioseismic results include imaging of the meridional flow (Giles et al. +1997; Gizon et al. 2020) through time-distance helioseismology (Duvall et al. 1993), and farside imaging of active +regions (Braun & Lindsey 2001) and their near side emergence (Birch et al. 2016), through helioseismic holography +(Lindsey & Braun 2000). The recent discovery of various inertial waves (Gizon et al. 2021), including the equatorial +Rossby wave (L¨optien et al. 2018), has been achieved through local helioseismic ring-diagram analysis (Hill 1988) and +the non-helioseismic local correlation tracking (LCT, November & Simon 1988) of granulation. +In recent years, the use of global mode-coupling helioseismology (Woodard 1989; Lavely & Ritzwoller 1992) has received attention, with many studies seeking to validate and demonstrate the importance of such a technique for investigating numerous solar phenomena. While the derivation of the mode-coupling technique is mathematically challenging, the data analysis is simple and utilizes all the information registered by the mode. Thus far, global -mode-coupling has been validated through observations of the meridional flow (Vorontsov2011;Woodard et al.2013), -differential rotation (Schad & Roth2020;Kashyap et al.2021), global-scale convection (Woodard2014,2016;Hanasoge -et al.2020;Mani & Hanasoge2021) and Rossby modes (Hanasoge & Mandal2019;Mandal & Hanasoge2020;Mandal -et al.2021). Local mode-coupling analysis in the Cartesian approximation, formulated byWoodard(2006), was -validated byHanson et al.(2021) (hereafter H21) by examining the power-spectrum of supergranular waves and -comparing with previous time-distance studies (Langfellner et al.2018). +mode-coupling has been validated through observations of the meridional flow (Vorontsov 2011; Woodard et al. 2013), +differential rotation (Schad & Roth 2020; Kashyap et al. 2021), global-scale convection (Woodard 2014, 2016; Hanasoge +et al. 2020; Mani & Hanasoge 2021) and Rossby modes (Hanasoge & Mandal 2019; Mandal & Hanasoge 2020; Mandal +et al. 2021). Local mode-coupling analysis in the Cartesian approximation, formulated by Woodard (2006), was +validated by Hanson et al. (2021) (hereafter H21) by examining the power-spectrum of supergranular waves and +comparing with previous time-distance studies (Langfellner et al. 2018). prasad.subramanian@tifr.res.inarXiv:2201.00178v1 [astro-ph.SR] 1 Jan 2022 - Mani et al. + Mani et al. Normal-mode coupling refers to the concept of expressing solar-oscillation eigenfunctions as a linear weighted combination - of model-eigenfunctions (e.g., Model SChristensen-Dalsgaard2021). The model eigenfunctions form a complete + of model-eigenfunctions (e.g., Model S Christensen-Dalsgaard 2021). The model eigenfunctions form a complete and orthogonal basis. By design, the model Sun is spherically symmetric, adiabatic, free from rotation, magnetism and flows. In this state, the oscillations are considered to be uncoupled. The weights needed to express the solar-oscillation eigenfunctions would then encode all the perturbations that are absent in the model. The forward problem then reduces to relating observed seismic measurements to the perturbations that we want to infer. The surface wavefield cross-correlation is the primary measurement in the mode-coupling analysis and can be directly related to the weights -(Woodard2016). As mode coupling is a Fourier domain technique, wavefields are cross-correlated at different spatial +(Woodard 2016). As mode coupling is a Fourier domain technique, wavefields are cross-correlated at different spatial and temporal frequencies, leaving us with measurements sensitive to different quantities of interest. In this study, we extend the spectral analysis of H21 and develop the method to produce near-surface flow maps at supergranulation length scales. A part of the formalism that was used to derive the forward model in H21 is reworked, primarily to image steady flows. Measurements are then constructed, and inversions to infer divergence flow and radial vorticity are described. We also demonstrate signal associated with supergranular flow in a radial-order coupling (p -2 -p -2 ), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface. +2-p +2), which was not shown in H21. This helps in localizing the measurement sensitivity to the surface. We compare our results with flows obtained using the Local Correlation Tracking method on solar granules. 1.1. Forward problem -In favor of algebraic brevity, we only show crucial steps here and refer the interested reader to AppendixAfor a -complete derivation of the forward problem. Working in the plane-parallel atmosphere (see alsoWoodard2006), we +In favor of algebraic brevity, we only show crucial steps here and refer the interested reader to Appendix A for a +complete derivation of the forward problem. Working in the plane-parallel atmosphere (see also Woodard 2006), we denote the horizontal unit vectors e x and e y in our local Cartesian domain as pointing towards west and north on the solar surface, respectively, and e - z points outwards. This approximation is valid when observing patches of the surface +z points outwards. This approximation is valid when observing patches of the surface that are small when compared to the solar radius. When imaging steady, near-surface flows in the neighbourhood -of the supergranular scale ( ∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the +of the supergranular scale (∼ 30 Mm), we expect the measured spectral cross-correlation signal to peak around the horizontal wavenumber qR - ≈ 120 (Rincon & Rieutord2018), where q = | q | = |( q -x , q + ≈ 120 (Rincon & Rieutord 2018), where q = |q| = |(q +x, q y )| is the vector horizontal wavenumber of the flow. Accordingly, the goal is to relate measurements (linearly, to facilitate inversion) to the flow -perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, seeRincon -& Rieutord2018), permitting us to model the flow vector uu +perturbation described in a horizontal Fourier domain. Supergranular velocities are subsonic (300-400 m/s, see Rincon +& Rieutord 2018), permitting us to model the flow vector uu u = (u -x , u +x, u y , u z ) in the Cartesian domain like so (Unno et al. -1989;Woodard2006) - u σ - = ∇× [ ∇× ( P e -z )] + ∇× (T e +1989; Woodard 2006) + uσ + = ∇×[∇×(P e +z )] + ∇×(T e z ), (1) where P = P σ - (x ) and T = T σ - (x ) are poloidal and toroidal scalar functions, varying with position x and temporal -frequency σ . ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying -perturbations (seeWoodard2016;Mani & Hanasoge2020;Hanasoge et al.2020;Mandal & Hanasoge2020, for + (x) and T = T σ + (x) are poloidal and toroidal scalar functions, varying with position x and temporal +frequency σ. ∇ is the 3D gradient operator. While mode-coupling can easily be extended to study time-varying +perturbations (see Woodard 2016; Mani & Hanasoge 2020; Hanasoge et al. 2020; Mandal & Hanasoge 2020, for example), here we only consider the frequency bin σ = 0, denoting the temporally averaged flow over the period of analysis. We therefore suppress σ from all terms this point forward, remembering that temporal dynamics of -perturbations may also be studied using the same model outlined in the following paragraphs. Simplifying eq1using +perturbations may also be studied using the same model outlined in the following paragraphs. Simplifying eq 1 using vector calculus results in - u = − ∇ 2 - P e -z + ∇ (∂ + u = −∇2 +P e +z + ∇(∂ z P ) + ∇ - h T × e +hT ×e z , (2) where ∇ h refers to derivatives only in the horizontal direction. Mode-coupling helioseismology is performed in the Fourier domain, and since we wish to image horizontal flows on a small patch of the surface, we describe the flow as a -function of horizontal wavenumber q and depth z e +function of horizontal wavenumber q and depth ze z . Hence the poloidal and toroidal flows are described by P - q (z ) and +q (z) and T -q (z ), respectively. Furthermore, we parametrize the flow along e -z using basis functions f (z ) (Chebyshev, B -spline, +q (z), respectively. Furthermore, we parametrize the flow along e +z using basis functions f (z) (Chebyshev, B -spline, etc). This is expressed as P ≡ P - q (z ) = +q (z) = j f -j (z ) P - q j , T ≡ T -q ( z ) = +j (z) P +qj , T ≡ T +q (z) = j f - j (z ) T -q j . (3) +j (z) T +qj . (3) The flow coefficients P - q j and T -q j , represented by the discrete indices q and j , become ideal candidates for inversions, +qj and T +qj , represented by the discrete indices q and j , become ideal candidates for inversions, where the flow for each wavenumber q can be inverted for independently; parallelization in computation can thus be exploited to expedite inversions. Note that P - q j = P ∗ -− q j and T -q j = T ∗ -− q j for the flow field to be real in the spatiotemporal +qj = P ∗ +−qj and T +qj = T ∗ +−qj for the flow field to be real in the spatiotemporal domain. -To infer flows from wavefields φ scattered by a perturbation of length scale q , cross-correlate them in the manner +To infer flows from wavefields φ scattered by a perturbation of length scale q, cross-correlate them in the manner Imaging near-surface flows using mode-coupling analysis 3 -φ ω ∗ -k φ ω -k +q , where k is the oscillation mode wavenumber (k - x , k -y ) and ω is the temporal frequency. Relate φ ω ∗ -k φ ω -k +q thus +φω∗ +k φω +k+q , where k is the oscillation mode wavenumber (k +x, k +y ) and ω is the temporal frequency. Relate φω∗ +k φω +k+q thus to the flow coefficients P - q j and T -q j (see eqA7) -φ ω ∗ -k φ ω -k +q = H ω +qj and T +qj (see eq A7) +φω∗ +k φω +k+q = Hω kk - nn +nn j C - q j, k P - q j + D -q j, k T -q j . (4) -The weight factor H ω - (see eqA8) is a function of frequency, capturing information about the extent of coupling between -the two modes [ n, k ] and [n - , k - ], where n and n - are the radial orders of the modes, and k = |k | and k +qj,k P +qj + D +qj,k T +qj . (4) +The weight factor Hω + (see eq A8) is a function of frequency, capturing information about the extent of coupling between +the two modes [n, k] and [n +, k +], where n and n + are the radial orders of the modes, and k = |k| and k = |k - | = |k + q |. -The spectral profile of the mode (see eqA9) is approximated using a Lorentzian (Anderson et al.1990). The more the +| = |k + q|. +The spectral profile of the mode (see eq A9) is approximated using a Lorentzian (Anderson et al. 1990). The more the Lorentzians of the two modes overlap, the stronger the coupling. Finally, the real terms C - q j, k and D -q j, k are poloidal +qj,k and D +qj,k are poloidal and toroidal flow sensitivity kernels respectively, that allow us to relate the flows in question to the measurements -and are derived from the solar model see AppendixA. They possess the symmetry relation: C -q j, k = C -− q j, −k and +and are derived from the solar model see Appendix A. They possess the symmetry relation: C +qj,k = C +−qj,−k and D -q j, k = D -− q j, − k (see eqA6). The kernels, as flows, are expressed on the basis f -j ( z ). +qj,k = D +−qj,−k (see eq A6). The kernels, as flows, are expressed on the basis f +j (z). 1.2. Least-squares of cross-correlation -Even though φ ω ∗ -k φ ω -k +q isolates the effect of flow perturbations at individual wavenumbers q , a more compact measurement, - known in mode-coupling literature as ’ B -coefficients’, is much better designed for inversion as it reduces the -dimension of the problem. A least-squares fit to the cross-correlation φ ω ∗ -k φ ω -k +q (seeWoodard2006,2014,2016) results +Even though φω∗ +k φω +k+q isolates the effect of flow perturbations at individual wavenumbers q, a more compact measurement, + known in mode-coupling literature as ’B -coefficients’, is much better designed for inversion as it reduces the +dimension of the problem. A least-squares fit to the cross-correlation φω∗ +k φω +k+q (see Woodard 2006, 2014, 2016) results in the B -coefficients B -k ,q , according to +k,q , according to B - k ,q = -ω H ω ∗ +k,q = +ω Hω∗ kk - nn φ ω ∗ -k φ ω -k +q +nn φω∗ +k φω +k+q -ω |H ω +ω |Hω kk - nn | 2 . (5) -Multiplying eq4on both sides by H ω ∗ +nn |2 . (5) +Multiplying eq 4 on both sides by Hω∗ kk - nn and substituting by eq5on the left-hand-side results in a concisely defined -forward problem (compare with eq4) +nn and substituting by eq 5 on the left-hand-side results in a concisely defined +forward problem (compare with eq 4) B - k ,q = +k,q = j C -q j, k P - q j + D - q j, k T -q j . (6) -In eq5,Woodard(2007) and H21 thus far only considered positive-frequency components in the summation over ω . +qj,k P +qj + D +qj,k T +qj . (6) +In eq 5, Woodard (2007) and H21 thus far only considered positive-frequency components in the summation over ω. Here, we sum over both ±ω within a few mode linewidths Γ. Denoting the resonant frequency of a mode using ω nk , -|ω | ∈ - ω -nk − Γ -nk /2 , ω -nk + Γ - nk / 2 +|ω| ∈ +ω +nk − Γ +nk /2, ω +nk + Γ +nk /2 or -|ω | ∈ - ω +|ω| ∈ +ω n - k +k − Γ - n - k - / 2, ω n - k - + Γ +k + /2, ω +n +k + + Γ n - k - / 2 - . (7) -Summing over ± ω guarantees that the parity B - k ,q = B ∗ -−k ,− q (see AppendixAfor derivation) is obeyed, thereby -ensuring that the flow field on the right-hand-side of eq6is a real physical quantity in the spatio-temporal domain. -Taking the complex conjugate on both sides of eq6and considering the negative wavenumber components − q and -− k , - B ∗ -− k , −q = +k + /2 +. (7) +Summing over ±ω guarantees that the parity B +k,q = B ∗ +−k,−q (see Appendix A for derivation) is obeyed, thereby +ensuring that the flow field on the right-hand-side of eq 6 is a real physical quantity in the spatio-temporal domain. +Taking the complex conjugate on both sides of eq 6 and considering the negative wavenumber components −q and +−k, + B∗ +−k,−q = j C - − q j, − k P ∗ -−q j + D -− q j, −k T ∗ -− q j . (8) -Substituting parity and symmetry relations for all terms in the above results in eq6. As B - k , q is constructed by a -least-squares fitting, it is noteworthy that summing over − ω will also lead to improvement in its signal-to-noise as a +−qj,−k P ∗ +−qj + D +−qj,−k T ∗ +−qj . (8) +Substituting parity and symmetry relations for all terms in the above results in eq 6. As B +k,q is constructed by a +least-squares fitting, it is noteworthy that summing over −ω will also lead to improvement in its signal-to-noise as a by-product. 1.3. Noise model In the addition to the sensitivity kernels, a systematic background noise model is required to infer the flows from the observed B -coefficients. For estimating the contribution from realization noise to the measurements, we make the -following assumptions (Gizon & Birch2004): that the excitation of the wavefield is modelled as a multivariate Gaussian +following assumptions (Gizon & Birch 2004): that the excitation of the wavefield is modelled as a multivariate Gaussian random process and the wavefields are uncorrelated across wavenumber and frequency in the absence of perturbations. Every independent realization of a mode can be understood as the output of a damped harmonic oscillator driven by a -random forcing function (seeDuvall & Harvey1986). Modes are thus generated with random phases and amplitudes +random forcing function (see Duvall & Harvey 1986). Modes are thus generated with random phases and amplitudes and with finite lifetimes. This stochasticity leads to realization noise in repeated measurements of mode parameters - Mani et al. + Mani et al. Figure 1. Dispersion relation for the radial orders used in this analysis; f (blue), p 1 (orange) and p - 2 (green). The shaded +2 (green). The shaded regions of the same colours indicate 1-linewidth Γ about the mode frequency. The yellow shaded region indicates the range of kR - and ω/2 π to which we have restricted ourselves in this analysis. Beyond kR - of 2000, it is seen that the theoretical fitting + and ω/2π to which we have restricted ourselves in this analysis. Beyond kR + of 2000, it is seen that the theoretical fitting of mode frequencies start deviating from the observed dispersion relation for the f -mode. such as its amplitude, frequency and linewidth, and consequently in B - k ,q in our case. We use the same noise model +k,q in our case. We use the same noise model as in H21, which was motivated by the above discussion, G -k ,q ≡ |B - k ,q | 2 - , (9) -where, unlike H21, we again sum over ± ω . G -k ,q is real, with the symmetry relation G -k ,q = G -− k ,− q (see AppendixA +k,q ≡ |B +k,q |2 +, (9) +where, unlike H21, we again sum over ±ω. G +k,q is real, with the symmetry relation G +k,q = G +−k,−q (see Appendix A for explanation). 2. DATA ANALYSIS In order to examine near surface flows we build a time-series cube of Doppler images that are obtained from the -Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO,Scherrer et al.2012). Each image -is Postel pro jected, with a spatial resolution of approximately 0. 48Mm, sperated in time by 45 seconds, and is tracked -at the (Snodgrass1984) rotation rate. Here, we select a patch that is 194 .4 × 194 .4 Mm2 +Helioseismic Magnetic Imager aboard the Solar Dynamics Observatory (HMI/SDO, Scherrer et al. 2012). Each image +is Postel pro jected, with a spatial resolution of approximately 0.48Mm, sperated in time by 45 seconds, and is tracked +at the (Snodgrass 1984) rotation rate. Here, we select a patch that is 194.4 × 194.4 Mm2 in size, tracked for 24 hours and crosses the disk-center in the middle of observation time on the 14 Novemeber 2017 (Carrington rotation number -2197, Carrington longitude 90 ◦ - ). This Dopplercube is considered as the physical wavefield φ ( x, y ; t ). The Fourier-space -wavefield φ ω -k (and subsequently, the cross-correlation φ ω ∗ -k φ ω -k +q ) is obtained by computing the 3D spatial and temporal +2197, Carrington longitude 90◦ +). This Dopplercube is considered as the physical wavefield φ(x, y; t). The Fourier-space +wavefield φω +k (and subsequently, the cross-correlation φω∗ +k φω +k+q ) is obtained by computing the 3D spatial and temporal Fourier transform of the Dopplercube. The duration of the observed region is long enough to provide sufficient frequency bins with which to sum over in -Eq6, while short enough that supergranules do not substantially evolve (lifetime is purported to be 1.6 days;Rincon -& Rieutord2018) over this period. Our observation region is close to the disk center to also avoid any contamination -from center-to-limb systematics (Zhao et al.2012;Langfellner et al.2015). -Maximum signal can be extracted from the weighted summation of the cross correlations (eq5) when the spectral -profiles of the two modes [ n, k ] and [n - , k - ] closely align in ω space. This implies that their mode frequencies should be -sufficiently close ( |ω +Eq 6, while short enough that supergranules do not substantially evolve (lifetime is purported to be 1.6 days; Rincon +& Rieutord 2018) over this period. Our observation region is close to the disk center to also avoid any contamination +from center-to-limb systematics (Zhao et al. 2012; Langfellner et al. 2015). +Maximum signal can be extracted from the weighted summation of the cross correlations (eq 5) when the spectral +profiles of the two modes [n, k] and [n +, k +] closely align in ω space. This implies that their mode frequencies should be +sufficiently close (|ω nk − ω n - k - | ≤ δ , the separation parameter). Since Lorentzians decay rapidly, the summation over -± ω is significant only over a few linewidths ( , the summation parameter; see eq7). We have empirically found and -tabulated δ in Table1for the radial order couplings n - n +k + | ≤ δ, the separation parameter). Since Lorentzians decay rapidly, the summation over +±ω is significant only over a few linewidths (, the summation parameter; see eq 7). We have empirically found and +tabulated δ in Table 1 for the radial order couplings n-n ∈ f-f, p -1 -p - 1 , and p - 2 -p - 2 (the signal strength depends only -weakly on ; we set it to 3 line widths). -Figure1shows that for any two adjacent ridges (adjacent n and n - ), mode frequencies ω +1-p +1, and p +2-p +2 (the signal strength depends only +weakly on ; we set it to 3 line widths). +Figure 1 shows that for any two adjacent ridges (adjacent n and n +), mode frequencies ω nk and ω - n - k become spaced +n +k become spaced farther apart with increasing wavenumber kR - . It is also known that mode linewidth Γ grows with radial orders for +. It is also known that mode linewidth Γ grows with radial orders for a given kR - . Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of +. Moreover, holding the spatial and temporal sampling rates constant, the spatial size and duration of observation set the total number of modes within a range of kR - (and ω/2π ) that can be clearly observed, thereby + (and ω/2π) that can be clearly observed, thereby affecting the quality of the seismic measurements. Owing to these factors, to maximize signal-to-noise (by visually inspecting the power-spectrum), the parameters describing the extent of coupling over different ranges of kR - at fixed + at fixed radial order are different. In wavenumber, we restrict our analysis to within 200 ≤ kR ≤ 2000 and qR ≤ 300. Our frequency range is confined to span the range over which acoustic modes are observed (2 ≤ ω/2π ≤ 5 in mHz). Imaging near-surface flows using mode-coupling analysis 5 -Coupling k R +Coupling kR range # of δ modes f-f [400,1000] 5240 4 [1000,1500] 7784 1.1 [1500,2000] 10940 0.4 p -1 -p - 1 [400,1000] 5240 4.5 +1-p +1 [400,1000] 5240 4.5 [1000,1750] 12852 2 p -2 -p - 2 [200,1000] 5886 3 +2-p +2 [200,1000] 5886 3 [1000,1300] 4280 3 Table 1. Total number of modes, and separation parameter (in number of linewidths) for different couplings, for different ranges of kR - . +. 3. INVERSION The final step to producing near-surface flow maps in Cartesian mode-coupling is to invert the measurements B - k , q from the linear relation in eq6. We describe inversion using regularized-least-squares (RLS) method here and -leave Subtractive Optimally Localized Averages (SOLA,Pijpers & Thompson1994) for AppendixB. The methods -complement each other (seeSekii1997), where RLS tries to minimize the misfit between data and model, whereas +k,q from the linear relation in eq 6. We describe inversion using regularized-least-squares (RLS) method here and +leave Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) for Appendix B. The methods +complement each other (see Sekii 1997), where RLS tries to minimize the misfit between data and model, whereas SOLA gives better localization. For total number of modes M , RLS scales as M xJ where J is the number of basis functions f - j (z ) (J M ; see eq3and section3.1), whereas SOLA scales as M 2 - (see AppendixB). For M > 5000, +j (z) (J M ; see eq 3 and section 3.1), whereas SOLA scales as M 2 + (see Appendix B). For M > 5000, computation starts to quickly become expensive for SOLA. Mode eigenfunctions peak near the surface, with higher radial orders possessing smaller peaks in the interior. While f-f coupling alone has enough sensitivity to probe perturbations at supergranular scales close to surface, signal is present even in p -1 -p - 1 , and p - 2 -p -2 (see Figure3), and possibly other higher order self- and cross-couplings. Since we are +1-p +1, and p +2-p +2 (see Figure 3), and possibly other higher order self- and cross-couplings. Since we are interested in only surface flows, we leave higher order coupling to future work. It bears mentioning that the slopes of the ridges in the kR - - ν spectrum (Figure1) increase with radial order. This +-ν spectrum (Figure 1) increase with radial order. This limits us to low-to-intermediate kR - (< 1000) for these higher radial orders if we are to remain under the acoustic cutoff + (< 1000) for these higher radial orders if we are to remain under the acoustic cutoff frequency of 5.3mHz. It also becomes imperative to use a spatially larger observation patch to gain access to signals from low kR - too large an observation region could possibly render invalid the Cartesian geometry approximation. Regardless, in addition to performing inversions using all the couplings stacked together, we also demonstrate inversions -separately for the three couplings (see Table2) in order to account for the full gamut of mode-coupling as a signal-rich +separately for the three couplings (see Table 2) in order to account for the full gamut of mode-coupling as a signal-rich helioseismic technique. 3.1. RLS -For given q , the forward problem may be stated as +For given q, the forward problem may be stated as KU = B, (10) with the aim to minimize the misfit k ||KU − B|| -2 , with || || - 2 denoting the L +2, with || || +2 denoting the L 2 norm. Here, K is the matrix formed by the sensitivity kernels: {C - q j, k , D -q j, k } . U is a vector composed of the flow coefficients: {P - q j , T -q j } and B is a vector -composed of computed B -coefficients: { B -k ,q }. The least-squares problem is solved simultaneously for poloidal and +qj,k , D +qj,k }. U is a vector composed of the flow coefficients: {P +qj , T +qj } and B is a vector +composed of computed B-coefficients: {B +k,q }. The least-squares problem is solved simultaneously for poloidal and toroidal flow. We use B -spline basis functions as our f - j (z ), comprising 11 knots spaced uniformly in acoustic radius, +j (z), comprising 11 knots spaced uniformly in acoustic radius, for both poloidal and toroidal coefficients. Hence, for M modes (total number of k for a given q is M ) and 11 basis functions for each poloidal and toroidal, the dimensions of K, U and B are thus M × 22, 22 × 1, and M × 1 respectively. -Normalizing both sides of eq10by the noise covariance Λ (a diagonal matrix with the entries G -k ,q ; see eq9; dimension +Normalizing both sides of eq 10 by the noise covariance Λ (a diagonal matrix with the entries G +k,q ; see eq 9; dimension M × M ) and pre-multiplying by K - , +, (K - Λ −1 - K)U =(K - Λ− 1 - ) B, (11) +Λ−1 +K)U =(K +Λ−1 +)B, (11) U =(K - Λ− 1 - K )− 1 - K - Λ −1 - B. (12) - Mani et al. -Figure 2. Left : Averaging kernel for poloidal flow (see sectionB.2, eqB17, and left panel of Figure8) for q R - = [ −112, − 45], +Λ−1 +K)−1 +K +Λ−1 +B. (12) + Mani et al. +Figure 2. Left : Averaging kernel for poloidal flow (see section B.2, eq B17, and left panel of Figure 8) for qR + = [−112, −45], at the depth z - o = −0 . 41 Mm. Right : L-curve for the mode q R - = [ −112, − 45]; the knee (λ = 2 . 48) is marked by a blue +o = −0.41 Mm. Right : L-curve for the mode qR + = [−112, −45]; the knee (λ = 2.48) is marked by a blue diamond. Since the least-squares problem is typically ill-posed, we restate the minimization as k ||KU − B|| -2 + λ ||U || +2 + λ||U|| 2 with the regularization parameter λ which this results in a trade-off between misfit reduction (first term) and solution norm minimization (second term). Under-regularizing can lead to a solution U that is dominated by errors in the data and on the other hand, over-regularizing may smooth or damp the solution more than necessary. Including this regularization makes the problem better conditioned and is now defined as U = (K - Λ− 1 - K + λ I )− 1 - K - Λ −1 - B, (13) +Λ−1 +K + λI)−1 +K +Λ−1 +B, (13) where I is the identity matrix for L -1 regularization. The knee-point of the L-curve (Hansen1992), a curve formed -by plotting ||U || +1 regularization. The knee-point of the L-curve (Hansen 1992), a curve formed +by plotting ||U|| 2 vs ||KU − B|| -2 for different values of λ (see right panel of Figure2), is usually chosen as the -regularization parameter. After successfully inverting for U , we reconstruct the flow using eq3. Results for poloidal +2 for different values of λ (see right panel of Figure 2), is usually chosen as the +regularization parameter. After successfully inverting for U, we reconstruct the flow using eq 3. Results for poloidal flow P - q are shown in Figure3. +q are shown in Figure 3. 4. LCT To improve confidence in the imaged near-surface flows through mode-coupling, we compare them with flows obtained -from Local Correlation Tracking method (LCT;November & Simon1988). LCT provides surface-flow maps by +from Local Correlation Tracking method (LCT; November & Simon 1988). LCT provides surface-flow maps by examining the advection of convective granules (1.2 Mm, qR - ≈ 3500;Hathaway et al.2015) by underlying largerscale - flow systems. Since granules are used as tracers, which are much smaller in size than supergranules ( ≈ 35 Mm), -LCT is an effective method (seeRieutord et al.2001) to produce surface horizontal flow maps of supergranulation. -Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section2 + ≈ 3500; Hathaway et al. 2015) by underlying largerscale + flow systems. Since granules are used as tracers, which are much smaller in size than supergranules (≈ 35 Mm), +LCT is an effective method (see Rieutord et al. 2001) to produce surface horizontal flow maps of supergranulation. +Time series of intensity images from HMI, with the same properties of the Dopplercubes described in section 2 (tracking rate, date, location, size and duration of observed patch, spatial and temporal sampling rate), are obtained and Postel pro jected. The horizontal flows are deduced by tracking the proper motions of granules between consecutive intensity images, which we denote as I - 1 , I - 2 . The LCT method selects a patch in two images each +1, I +2. The LCT method selects a patch in two images each (I - 1 = I - 1 e ( x− x +1 = I +1e(x−x ij )2 - / 2 sigma2 +/2 sigma2 , I 2 = I - 2 e (x − x +2e(x−x ij )2 - / 2 sigma2 +/2 sigma2 ) that observe the same granule at the grid point x - ij = ( x - i , y +ij = (x +i, y j ). A Gaussian of width sigma allows to isolate a small region surrounding the grid point of interest as the distance moved by granules are usually in sub-pixel regime. The convention for the direction of x is the same as described in -section1.1. The two patches I - 1 , I -2 are then cross correlated for different values of position shifts ∆x , +section 1.1. The two patches I +1, I +2 are then cross correlated for different values of position shifts ∆x, C - ij (∆x, ∆ y ) = +ij (∆x, ∆y) = dx I ∗ -1 (− x )I -2 (∆x − x) . (14) -The shift ∆x = (∆x, ∆ y ) that maximizes the cross-correlation C - ij is taken to be the proper motion of the granule. -Provided that the time difference ∆t, here 45 seconds, between the images is less than the lifetime of granules ( < 10 +1 (−x)I +2(∆x − x). (14) +The shift ∆x = (∆x, ∆y) that maximizes the cross-correlation C +ij is taken to be the proper motion of the granule. +Provided that the time difference ∆t, here 45 seconds, between the images is less than the lifetime of granules (< 10 min), the velocities are given by v - x = ∆x/ ∆ t and v - y = ∆y/∆ t . This exercise is repeated for all grid points in the +x = ∆x/∆t and v +y = ∆y/∆t. This exercise is repeated for all grid points in the images I -1 , I +1, I 2 and for each consecutive pair of images in the cube. -In practice, we use the Fourier LCT algorithm (FLCT,Fisher & Welsch2008) for computing v - x and v - y . FLCT +In practice, we use the Fourier LCT algorithm (FLCT, Fisher & Welsch 2008) for computing v +x and v +y . FLCT requires the input sigma, which we set to 4 pix, that captures the extent of localization desired, and depends on the Imaging near-surface flows using mode-coupling analysis 7 Figure 3. Top : Inverted poloidal flow power-spectrum for the three couplings f-f, p - 1 -p - 1 , and p -2 -p +1-p +1, and p +2-p 2 as a function of q - x R +xR and q - y R - . Bottom : Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ± 1 σ error around the +y R +. Bottom : Corresponding power-spectrum averaged over the azimuthal angle. Shaded region shows ±1σ error around the mean. Total power appears to increase through the radial orders. Power is in units of m2 - /s 4 - . +/s4 +. dominant length scale of the velocity field in the images. The Postel-pro jected intensity images are fed as input to the FLCT code. v - x and v - y are then computed for consecutive pairs of images and are averaged over the entire day. +x and v +y are then computed for consecutive pairs of images and are averaged over the entire day. 5. MAPS OF HORIZONTAL DIVERGENCE AND RADIAL VORTICITY -For mode-coupling, horizontal divergence (hereafter div ) and radial vorticity (hereafter curl ) are computed by -substituting P and T from eq3into eq2as below uu +For mode-coupling, horizontal divergence (hereafter div) and radial vorticity (hereafter curl) are computed by +substituting P and T from eq 3 into eq 2 as below uu -u( q , z ) = − ∇2 - P e - z + ∇(∂ - z P ) + ∇ -h T × e - z , -= −(0 , 0 , ∂ 2 +u(q, z) = −∇2 +P e +z + ∇(∂ +z P ) + ∇ +hT ×e +z , += −(0, 0, ∂ 2 x P + ∂ 2 y P + ∂ 2 -z P ) + ( ∂ -x ∂ +z P ) + (∂ +x∂ z P, ∂ - y ∂ +y ∂ z P, ∂ 2 -z P ) + ( ∂ -y T , − ∂ -x T , 0) . (15) +z P ) + (∂ +y T , −∂ +xT , 0). (15) Setting ∂ 2 x + ∂ 2 -y = q 2 - , div is given by, +y = q2 +, div is given by, ∇ h · uu -u(q , z ) = q 2 - ∂ +u(q, z) = q2 +∂ z P, (16) and curl is given by, - ∇ × uu -u( q , z ) - z = q 2 - T . (17) -We follow similar steps to those taken inLangfellner et al.(2015) for comparison of flow maps with LCT. The +∇ × uu +u(q, z) +z = q2 +T . (17) +We follow similar steps to those taken in Langfellner et al. (2015) for comparison of flow maps with LCT. The essential step for comparison at different length scales is to bandpass filter the Fourier-space flow around the qR of -interest (see Figure4), and subsequently convert it to real space. -We seek to show comparisons (see Figures5,6, and7) for qR +interest (see Figure 4), and subsequently convert it to real space. +We seek to show comparisons (see Figures 5, 6, and 7) for qR = 100, 150, 200 and 250. To sufficiently delineate -flows at these length scales, we apply a Gaussian filter (see Figure4) to flows obtained from eqns16and17. The +flows at these length scales, we apply a Gaussian filter (see Figure 4) to flows obtained from eqns 16 and 17. The Gaussian is centered at the desired wavenumber with a half-width of 25. We then perform a 2D Fourier transform to obtain a real-space steady-flow map. - Mani et al. -Figure 4. Left: Divergence-flow power spectrum | div | 2 - , from eqn16, obtained from inversion using all the couplings. The + Mani et al. +Figure 4. Left: Divergence-flow power spectrum |div|2 +, from eqn 16, obtained from inversion using all the couplings. The power-spectrum is then filtered with a bandpass centered around qR = 150 (middle panel). The resulting spectra is shown in -the right panel. The units of | div | 2 - are in s − 2 - . For illustration, we show the action of the filter on the power-spectrum | div | 2 +the right panel. The units of |div|2 + are in s−2 +. For illustration, we show the action of the filter on the power-spectrum |div|2 since it is a real quantity, but recall that it is the Fourier-space flow div (a complex quantity) on which we apply the filter. For LCT, we first apply a Gaussian smoothing to v - x and v - y to average over small-scale features; the extent of +x and v +y to average over small-scale features; the extent of smoothing depends on the length scale qR to be compared with mode-coupling. div and curl are then simply computed by div = ∂ - x v - x + ∂ +xv +x + ∂ y v - y , (18) +y , (18) curl = ∂ - x v - y − ∂ +xv +y − ∂ y v - x . (19) -We then perform a 2D Fourier transform on eqns18and19, apply the same Gaussian filters as for mode-coupling, +x. (19) +We then perform a 2D Fourier transform on eqns 18 and 19, apply the same Gaussian filters as for mode-coupling, and transform back to real space. Condensing all of the above, the following sequence of operations to compare flows at desired length scales are performed for mode-coupling (M-C) and for LCT M-C - : φ (x, y ; t) 3D FFT -=====⇒ φ ω + : φ(x, y; t) 3D FFT +=====⇒ φω k , B -k ,q inversion +k,q inversion ======⇒ P, T ∇ -h · +h· ===⇒ -∇× eqns16 , 17 Filter, +∇× eqns 16, 17 Filter, =====⇒ 2D FFT div, curl LCT : I - 1 , I - 2 FLCT +1, I +2 FLCT ====⇒ v - x , v +x, v y smooth, ======⇒ ∇ -h · ∇× eqns18 , 19 2D FFT, +h· ∇× eqns 18, 19 2D FFT, ======⇒ Filter Filtered, Fourier-space flows 2D FFT =====⇒ div, curl 6. RESULTS -Table2summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure5, +Table 2 summarizes the results of the comparison between flows obtained from mode-coupling and LCT. Figure 5, where we have used all the couplings to perform inversions, shows a 97% correlation between divergence flows from -the two methods near supergranular scale ( qR +the two methods near supergranular scale (qR ≈ 100). Near-surface flows are imaged most faithfully when all the couplings are used. Since vortical flows are imaged at a region near the equator, it is possible that the source of vorticity is something other than Coriolis force. Nevertheless, there is also a very good agreement (87%) between the vortical flows as inferred from the two methods, despite being an order of magnitude weaker than the divergence -flows (this is consistent with the results ofHathaway et al.2015;Langfellner et al.2015;Rincon et al.2017). Due to +flows (this is consistent with the results of Hathaway et al. 2015; Langfellner et al. 2015; Rincon et al. 2017). Due to insufficient modes for the p - 2 -p - 2 case (see Table1), we are unable to infer vortical flows with conviction other than near -the supergranular scale, as can be seen from Table2. Figure6also aligns with what we believe can be accomplished +2-p +2 case (see Table 1), we are unable to infer vortical flows with conviction other than near +the supergranular scale, as can be seen from Table 2. Figure 6 also aligns with what we believe can be accomplished through mode-coupling helioseismology - using f-f or p - 1 -p - 1 alone to seismically infer near-surface divergence and vortical +1-p +1 alone to seismically infer near-surface divergence and vortical flows at different scales (qR = 100, 150) can yield extremely good agreement with LCT. As the length scale of the -inferred flow moves further away from that of supergranules (Figure7), the demand on signal-to-noise also increases. +inferred flow moves further away from that of supergranules (Figure 7), the demand on signal-to-noise also increases. An adequate number of modes (and coupling strength between higher radial-orders) thus becomes a necessity to comment substantively on the flows at these scales. 6.1. Amplitudes of mode-coupling flows Imaging near-surface flows using mode-coupling analysis 9 (a) qR - = 100 , f-f + p - 1 -p - 1 + p - 2 -p + = 100, f-f + p +1-p +1 + p +2-p 2 Figure 5. Real-space divergence flows (left column, in units of 10−5 - s − 1 - ) and radial vorticity (right column, in units of 10−6 - s −1 - ) +s−1 +) and radial vorticity (right column, in units of 10−6 +s−1 +) for LCT (top row), and mode-coupling inversions through RLS using all the couplings (middle row), bandpass filtered around qR - = 100 (see Figure4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges + = 100 (see Figure 4). Corresponding scatter plots and correlation coefficients are shown in the bottom row. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. The slopes of the best-fit line through the scatter plots are 0.51 for divergence and 0.01 for vorticity. The vorticity flow maps are saturated to show only 40% of the maximum values. For both LCT and mode-coupling divergence and vorticity maps, numerous factors, arising from the associated numerous data processing steps, can influence the final inference of flow amplitudes, making it difficult to put forward a precise statement on them. H21 reported a 60% greater amplitude for p -1 -p - 1 over f-f coupling (Figure3reflects a +1-p +1 over f-f coupling (Figure 3 reflects a similar conclusion), another element to consider when combining different radial orders. The choice of regularization -(see right panel of Figure2) has the potential to affect the amplitudes of the inverted flows to some degree. Flow +(see right panel of Figure 2) has the potential to affect the amplitudes of the inverted flows to some degree. Flow amplitudes also vary with depth, implying that different radial orders and LCT will measure different flow averages. This variability emerges as a natural consequence of any helioseismic inversion procedure necessitating the use of a radial grid along which kernels and flows tend to be described. @@ -664,44 +664,44 @@ Thus, the amplitudes of the mode-coupling flows (and the correlation coefficient • Regularization parameter in the inversion, • Smoothing applied to LCT flows (indirectly; see below paragraph), • The depth at which flows are inferred. -Here, we report in Table2only the maximum correlation found from among the points in the radial grid close -to the surface (within ± 0.5 Mm from z =0). For a desired comparison length scale qR - , we first fix the coupling(s) +Here, we report in Table 2 only the maximum correlation found from among the points in the radial grid close +to the surface (within ±0.5 Mm from z=0). For a desired comparison length scale qR +, we first fix the coupling(s) and the regularization parameter to be used in the inversion. We then separately compute filtered divergence and - Mani et al. + Mani et al. (a) qR - = 100 , f-f (b) qR + = 100, f-f (b) qR = 150, p - 1 -p +1-p 1 Figure 6. Real-space divergence flows (left column, in units of 10−5 - s − 1 - ) and radial vorticity (right column, in units of 10− 6 - s −1 - ) +s−1 +) and radial vorticity (right column, in units of 10−6 +s−1 +) for LCT (top row), and mode-coupling inversion through RLS using (a) f-f coupling (bottom row), bandpass filtered around qR = 100, and using (b) p -1 -p - 1 coupling (bottom row), bandpass filtered around qR +1-p +1 coupling (bottom row), bandpass filtered around qR = 150. We cut edges out from the flow maps and compare a circular region of diameter ≈175 Mm. (a) qR - = 200 , f-f + p - 1 -p - 1 + p - 2 -p + = 200, f-f + p +1-p +1 + p +2-p 2 (b) qR - = 250 , f-f + p - 1 -p + = 250, f-f + p +1-p 1 + p - 2 -p +2-p 2 Figure 7. Real-space divergence flows (left column, in units of 10−5 - s − 1 - ) and radial vorticity (right column, in units of 10− 6 - s −1 - ) +s−1 +) and radial vorticity (right column, in units of 10−6 +s−1 +) for LCT (top row), and mode-coupling inversion through RLS using all the couplings (bottom row), bandpass filtered around (a) qR = 200, and (b) qR @@ -710,22 +710,22 @@ vorticity maps for LCT for different values of smoothing. These flow maps are th from inversions at all depths in the radial grid that are within 0.5 Mm from the surface. The highest correlation (corresponding to the above depths and smoothing) is noted and comparison flow maps are plotted for the desired qR - . -It has been shown (seeDe Rosa & Toomre2004;Langfellner et al.2015) that line-of-sight velocity from Dopplergrams +. +It has been shown (see De Rosa & Toomre 2004; Langfellner et al. 2015) that line-of-sight velocity from Dopplergrams and LCT agree closely in amplitudes. But, to recapitulate, a host of factors described above can skew the amplitudes for divergence flows owing to the multi-step process involved in obtaining them. For example, there has been a history -(see, e.g.,De Rosa et al.2000;Sekii et al.2007;Zhao et al.2007;Langfellner et al.2018;B¨oning et al.2020;Korda +(see, e.g., De Rosa et al. 2000; Sekii et al. 2007; Zhao et al. 2007; Langfellner et al. 2018; B¨oning et al. 2020; Korda & ˇ -Svanda2021) of using travel-time difference as only a proxy for horizontal divergence. However,Langfellner et al. +Svanda 2021) of using travel-time difference as only a proxy for horizontal divergence. However, Langfellner et al. Imaging near-surface flows using mode-coupling analysis 11 Coupling qR - div curl + div curl f-f 100 0.97 0.87 + p -1 -p +1-p 1 150 0.95 0.76 + p -2 -p +2-p 2 200 0.92 0.76 250 0.85 0.65 f-f 100 0.96 0.85 @@ -733,540 +733,540 @@ f-f 100 0.96 0.85 200 0.89 0.69 250 0.77 0.58 p - 1 -p +1-p 1 100 0.95 0.83 150 0.95 0.75 200 0.92 0.75 250 0.85 0.61 p - 2 -p +2-p 2 100 0.94 0.7 150 0.91 0.39 200 0.79 0.3 250 0.55 0.3 Table 2. Correlation between mode-coupling flow maps and LCT maps derived from HMI Dopplergrams and intensity images, respectively. -(2015),Birch et al.(2016) andBirch et al.(2019) use empirically determined conversion factors to align flow amplitudes +(2015), Birch et al. (2016) and Birch et al. (2019) use empirically determined conversion factors to align flow amplitudes from travel-time measurements with those of LCT, while acknowledging that LCT underestimates magnitudes (see -Verma et al.2013;L¨optien et al.2016). Even for the case of supergranulation divergence maps obtained through -ring-diagram helioseismology,Greer et al.(2016) only report normalized amplitudes. +Verma et al. 2013; L¨optien et al. 2016). Even for the case of supergranulation divergence maps obtained through +ring-diagram helioseismology, Greer et al. (2016) only report normalized amplitudes. In this work, we have developed inversions to show that the Cartesian approximation of mode-coupling can be used with great confidence to investigate flows near the surface. Careful inversions of mode-coupling measurements, built using a sufficiently large modeset that penetrates into the deeper layers of the convection zone, can also enable probing of the depth structure and time-evolution of supergranules, part of future work. With enough modes to improve signal-to-noise through larger observation sizes, we suggest that Cartesian mode-coupling can find local helioseismic -applications to investigate other depth- and time-varying features such as giant cell flows (seeHathaway et al.2013; -Hanson et al.2020), emerging active regions, meridional flows and Rossby waves. +applications to investigate other depth- and time-varying features such as giant cell flows (see Hathaway et al. 2013; +Hanson et al. 2020), emerging active regions, meridional flows and Rossby waves. APPENDIX A. DERIVATION OF THE FORWARD MODEL -As described in section1.1, we seek to describe the flow u as a function of q along e +As described in section 1.1, we seek to describe the flow u as a function of q along e z . To that end, substituting -eq3into eq2, - u σ -q (z ) = +eq 3 into eq 2, + uσ +q (z) = j - q 2 +q2 f - j e +j e z + iq f j P σ -j q + iq × e +jq + iq×e z f - j T σ -j q . (A1) -For flows in the anelastic limit (u speed of sound), we can denote the flow perturbation operator as δ L σ +j T σ +jq . (A1) +For flows in the anelastic limit (u speed of sound), we can denote the flow perturbation operator as δLσ = -− 2iωρu σ - · ∇ (seeHanasoge et al.2017). Substituting Eq.A1into the operator, we get, -δ Lσ -q = − 2i ω ρ ( i u σ +−2iωρuσ + · ∇ (see Hanasoge et al. 2017). Substituting Eq. A1 into the operator, we get, +δLσ +q = −2i ω ρ (i uσ q · k + uσ q · e - z ∂ +z ∂ z ), (A2) -= − 2 i ωρ += −2i ωρ j -− k · q f +−k · q f j P σ -j q − k · ( q × e +jq − k · (q×e z ) f - j T σ -j q + q 2 +j T σ +jq + q2 f - j P σ -j q ∂ +j P σ +jq ∂ z . (A3) - Mani et al. -Express the mode eigenfunction describing oscillations in the Cartesian domain by (seeWoodard2006) + Mani et al. +Express the mode eigenfunction describing oscillations in the Cartesian domain by (see Woodard 2006) ξ - k ≡ ξ - nk (z ) = i ˆ -k H -nk (z )e +k ≡ ξ +nk (z) = i ˆ +kH +nk (z)e z + ˆzV -nk (z ), (A4) +nk (z), (A4) where H and V are real-valued functions; n and n are dropped for compactness of notation. Then the coupling of two modes ξ - k and ξ - k (k - = k + q ), by the flow perturbation operator δ Lσ +k and ξ +k (k + = k + q), by the flow perturbation operator δLσ q , denoted by coupling integral Λk -k (σ ), is +k (σ), is given by Λk -k (σ ) ≡ - dx (δ L σ +k (σ) ≡ + dx (δLσ q ξ - k ) · ξ ∗ -k = +k ) · ξ∗ +k = dx − 2i ωρ j -q 2 +q2 f - j P σ -j q ( ˆ +j P σ +jq ( ˆ k · ˆ k H k H ∗ -k + V +k + V k V ∗ -k ) +k ) − k · q f j P σ -j q + k · ( q × e +jq + k · (q×e z ) f - j T σ -j q +j T σ +jq ( ˆ k · ˆ k H k H ∗ -k + V - k V ∗ -k ) +k + V +k V ∗ +k ) (A5) We desire to linearly relate the coupling integral in the above equation to the flows P and T , through poloidal and toroidal sensitivity kernels, C -q j, k and D -q j, k respectively. Hence, they are given by +qj,k and D +qj,k respectively. Hence, they are given by C - q j, k = +qj,k = dz ρ - q 2 +q2 f j ( ˆ k · ˆ k H k H ∗ -k + V +k + V k V ∗ -k ) -− k · q f +k ) +−k · q f j ( ˆ k · ˆ k H k H ∗ -k + V +k + V k V ∗ -k ) +k ) , D - q j, k = k · ( q × e +qj,k = k · (q×e z ) dz ρ f - j ( ˆ +j ( ˆ k · ˆ k H - k H ∗ -k + V - k V ∗ -k ) . (A6) +k H ∗ +k + V +k V ∗ +k ). (A6) Note the symmetry C -q j, k = C - −q j, −k and D - q j, k = D -− q j, − k . This coupling integral contributes to the cross-spectral -measurement between modes k and k + q From eq 8 ofWoodard(2014), we write the first-order effect of flow on +qj,k = C +−qj,−k and D +qj,k = D +−qj,−k . This coupling integral contributes to the cross-spectral +measurement between modes k and k + q From eq 8 of Woodard (2014), we write the first-order effect of flow on wavefield cross-correlation as - φ ω ∗ -k φ ω +σ -k +q = H ω + φω∗ +k φω+σ +k+q = H ω kk - σ Λk -k ( σ ), (A7) +σ Λk +k (σ), (A7) where the function H is given by - H ω + Hω kk - σ = −2 i ω ( N -k |R ω -k | 2 - R ω +σ -k + N +σ = −2i ω(N +k |Rω +k |2 + Rω+σ +k + N k - |R ω +σ -k |2 - R ω ∗ + |Rω+σ +k |2 + Rω∗ k ). (A8) -We absorb the factor − 2i ω into the definition of H . Substitute eqA6in right-hand-side of eqA7to obtain eq4. +We absorb the factor −2i ω into the definition of H. Substitute eq A6 in right-hand-side of eq A7 to obtain eq 4. The mode spectral profile R is a Lorentzian, given by -R ω +Rω k = 1 -ω 2 -nk − ω 2 +ω2 +nk − ω2 − iωγ nk /2 , (A9) where ω - nk is the resonant frequency of the mode, and γ - nk is the mode linewidth. EqA9can be derived by introducing -mode damping − i ωγ ρ as an operator in the differential equation that governs undamped, driven oscillations (see eq -5 ofHanasoge et al.2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation. -Also, the parity H ω +nk is the resonant frequency of the mode, and γ +nk is the mode linewidth. Eq A9 can be derived by introducing +mode damping −i ωγ ρ as an operator in the differential equation that governs undamped, driven oscillations (see eq +5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation. +Also, the parity Hω kk - σ = H − ω ∗ +σ = H−ω∗ kk - − σ and R ω -k = R − ω ∗ +−σ and Rω +k = R−ω∗ k are established. Mode normalization N is given by N - k = 1 +k = 1 Q Q k -ω |φ ω -k | 2 +ω |φω +k |2 -ω R ω +ω Rω k , (A10) where the 1 Q Q k on the right-hand-side implies average over all [k - x , k -y ] (Q terms in all) such that k = |k | is constant. -This forces N to be isotropic, i.e., to only depend on k , and not k . The sum over ω is within five linewidths of ω +x, k +y ] (Q terms in all) such that k = |k| is constant. +This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over ω is within five linewidths of ω nk . -Note that Eq.A8throughA10are modified from H21 to ensure parity and that flow maps are real. -The three equationsA8throughA10, along with the symmetry relation for kernels, and summation over ± ω , serve -to establish the parity B σ -k ,q = B ∗− σ -− k ,−q . This allows for obtaining P σ -q = P ∗− σ +Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real. +The three equations A8 through A10, along with the symmetry relation for kernels, and summation over ±ω, serve +to establish the parity Bσ +k,q = B∗−σ +−k,−q . This allows for obtaining P σ +q = P ∗−σ −q , and subsequently, purely real flow in -the real domain. Setting σ = 0 gives us the linear, invertible equation eq6. Substituting eqnsA8throughA10into -the noise model obtained in H21 and summing over ± ω establishes the symmetry Gσ -k , q = G− σ -− k ,− q . +the real domain. Setting σ = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into +the noise model obtained in H21 and summing over ±ω establishes the symmetry Gσ +k,q = G−σ +−k,−q . Imaging near-surface flows using mode-coupling analysis 13 B. SOLA INVERSIONS -Subtractive Optimally Localized Averages (SOLA,Pijpers & Thompson1994) aims to obtain a set of weight factors +Subtractive Optimally Localized Averages (SOLA, Pijpers & Thompson 1994) aims to obtain a set of weight factors for the mode q and depth z -o , which we will call α -k ,zo . A linear weighted sum of the measurements B -k ,q in the fashion +o, which we will call α +k,zo. A linear weighted sum of the measurements B +k,q in the fashion k α -k ,zo B - k ,q allows for an average value of the flow P - q (z ) to be estimated at the depth z -o . To obtain the coefficients +k,zoB +k,q allows for an average value of the flow P +q (z) to be estimated at the depth z +o. To obtain the coefficients α -k ,zo , it is assumed that a set of sensitivity kernels K -k ,q (z ) for the mode q can be summed up coherently to give an +k,zo, it is assumed that a set of sensitivity kernels K +k,q (z) for the mode q can be summed up coherently to give an ’averaging kernel’ that is localized at the depth z -o . Conventionally, a Gaussian centered at z +o. Conventionally, a Gaussian centered at z o and a width ∆ is chosen which the averaging kernel should resemble after performing inversion. B.1. Kernels in the integral form -Since the kernels in eqA6are manifest as coefficients on a basis f - j (z ), we first derive kernels that can be expressed -as a function of depth z (see Figure8). It is convenient to derive in matrix form. Thus, with the following definitions: +Since the kernels in eq A6 are manifest as coefficients on a basis f +j (z), we first derive kernels that can be expressed +as a function of depth z (see Figure 8). It is convenient to derive in matrix form. Thus, with the following definitions: P ≡ P - q ( z ), p ≡ P - q j , F ≡ f - j (z ), B ≡ B - k ,q C ≡ C - q j, k and K ≡ K -k , q ( z ), we write (assume only poloidal flow for +q (z), p ≡ P +qj , F ≡ f +j (z), B ≡ B +k,q C ≡ C +qj,k and K ≡ K +k,q (z), we write (assume only poloidal flow for simplicity, the same derivations hold true for toroidal flow as well) P = F p (B11) -The size of P is thus the same as the length of the radial grid z . +The size of P is thus the same as the length of the radial grid z. Now, pre-multiply by F T and integrate over z on both sides (drop the integral notation for compactness), F T P = (F T - F ) p + F )p p = (F T - F ) −1 + F )−1 F T P (B12) -Now, substituting eqB12into the forward problem eq6, +Now, substituting eq B12 into the forward problem eq 6, B = C p = (F T - F )− 1 - F T + F )−1 +F T C P = K P (B13) where K = (F T - F )− 1 - F T + F )−1 +F T C, i.e., K - k ,q ( z ) = +k,q (z) = j,j dz f -j (z ) f +j (z)f j - (z ) -− 1 - f - j - (z ) C -q j - ,k (B14) + (z) +−1 +f +j + (z)C +qj +,k (B14) B.2. Obtaining the coefficients α Now, demand that the averaging kernel should resemble a unimodulus target Gaussian centered at z - o +o T (z, z -o ) = 1 +o) = 1 √ - 2π ∆ 2 exp +2π∆2 exp z − z o -2∆ 2 - . (B15) +2∆2 +. (B15) This can be achieved by solving the optimization problem minimize X = dz - T ( z, z -o ) − Θ - q (z, z -o ) +T (z, z +o) − Θ +q (z, z +o) 2 - , (B16) +, (B16) where we introduce the averaging kernel for mode q thus Θ - q (z, z -o ) = +q (z, z +o) = k α -k ,zo K - k ,q ( z ). (B17) -As an aside, we note that averaging kernels can similarly be constructed for RLS (see section3.1) using eqns13 -andB14. - Mani et al. +k,zoK +k,q (z). (B17) +As an aside, we note that averaging kernels can similarly be constructed for RLS (see section 3.1) using eqns 13 +and B14. + Mani et al. Figure 8. Left : Kernel K -k ,q ( z ) (eqB14) shown vs depth z for the three radial order couplings f-f, p -1 -p -1 , and p -2 -p - 2 . q R +k,q (z) (eq B14) shown vs depth z for the three radial order couplings f-f, p +1-p +1, and p +2-p +2. qR = -[ −112 , − 45] and k R - = [− 853, − 157] is chosen for all the radial order couplings for comparison. Right : Averaging kernel -(eqB17) using SOLA, for q R - = [− 112, −45] at depth z - 0 = − 0. 48 Mm, and the corresponding target Gaussian (eqB15). +[−112, −45] and kR + = [−853, −157] is chosen for all the radial order couplings for comparison. Right : Averaging kernel +(eq B17) using SOLA, for qR + = [−112, −45] at depth z +0 = −0.48 Mm, and the corresponding target Gaussian (eq B15). Integral of the averaging kernel over z is 0.89. -Setting ∂ X +Setting ∂X ∂α → 0 gives us the matrix problem to be solved -A { α } = v, -{ α } = +A{α} = v, +{α} = A + µI -− 1 - v, (B18) +−1 +v, (B18) where the square matrix A = dz K -k ,q (z ) K +k,q (z)K k - ,q (z ) and v = +,q (z) and v = dz K -k ,q (z ) T (z, z -o ). Here, k +k,q (z)T (z, z +o). Here, k is just a dummy index for -denoting elements in the matrix A , ( k - = k + q ). In the last line of eqB18, we introduce regularization using an Identity -matrix I , with the regularization parameter µ - purpose being the same as that described in section3.1. Obtaining -α thus becomes a highly expensive computationally for very large number of modes (see section3). Substitute α -obtained from eqB18into last line of eqB13, and +denoting elements in the matrix A, (k + = k + q). In the last line of eq B18, we introduce regularization using an Identity +matrix I , with the regularization parameter µ - purpose being the same as that described in section 3.1. Obtaining +α thus becomes a highly expensive computationally for very large number of modes (see section 3). Substitute α +obtained from eq B18 into last line of eq B13, and k on both sides k α -k ,z +k,z o B σ -k ,q = +k,q = k α -k ,z +k,z o dz K -k ,q ( z )P σ -q (z ), +k,q (z)P σ +q (z), = dz Θ -q ( z, z -o ) P σ -q (z ) , +q (z, z +o)P σ +q (z), ≈ P σ -q ( z -o ) (B19) +q (z +o) (B19) Inversions can similarly be performed for multiple depths by choosing suitable widths for the target Gaussians. Divergence - flow can then be obtained from eq16. Results are shown in Figures9and10. + flow can then be obtained from eq 16. Results are shown in Figures 9 and 10. REFERENCES Anderson, E. R., Duvall, Thomas L., J., & Jefferies, S. M. -1990, ApJ, 364, 699, doi:10.1086/169452 +1990, ApJ, 364, 699, doi: 10.1086/169452 Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of Modern Physics, 64, 885, -doi:10.1103/RevModPhys.64.885 +doi: 10.1103/RevModPhys.64.885 Birch, A. C., Schunker, H., Braun, D. C., et al. 2016, Science Advances, 2, e1600557, -doi:10.1126/sciadv.1600557 +doi: 10.1126/sciadv.1600557 Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019, -A&A, 628, A37, doi:10.1051/0004-6361/201935591 B¨oning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., & +A&A, 628, A37, doi: 10.1051/0004-6361/201935591 B¨oning, V. G. A., Birch, A. C., Gizon, L., Duvall, T. L., & Schou, J. 2020, A&A, 635, A181, -doi:10.1051/0004- 6361/201937331 +doi: 10.1051/0004- 6361/201937331 Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189, -doi:10.1086/324323 +doi: 10.1086/324323 Christensen-Dalsgaard, J. 2002, Reviews of Modern -Physics, 74, 1073, doi:10.1103/RevModPhys.74.1073 +Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073 —. 2021, Living Reviews in Solar Physics, 18, 2, -doi:10.1007/s41116- 020-00028- 3 +doi: 10.1007/s41116- 020-00028- 3 Imaging near-surface flows using mode-coupling analysis 15 Figure 9. Left : Poloidal flow power-spectrum for f-f as a function of q - x R +xR and q - y R - . Right : Corresponding power-spectrum -averaged over the azimuthal angle. Shaded region shows ± 1 − σ error around the mean. Power is in units of m 2 - /s4 - . -Figure 10. Real-space divergence flows (in units of 10 − 5 - s −1 - ) for mode-coupling inversion through SOLA using f-f coupling, +y R +. Right : Corresponding power-spectrum +averaged over the azimuthal angle. Shaded region shows ±1 − σ error around the mean. Power is in units of m2 +/s4 +. +Figure 10. Real-space divergence flows (in units of 10−5 +s−1 +) for mode-coupling inversion through SOLA using f-f coupling, and LCT, bandpass filtered around qR = 100. We cut edges out from the flow maps and compare a circular region of diameter -≈ 175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is +≈175 Mm. The scatter plot shows the agreement between the maps. The slopes of the best-fit line through the scatter plot is 1.05. For demonstration, we show inversions only for poloidal flow using SOLA. De Rosa, M., Duvall, T. L., J., & Toomre, J. 2000, SoPh, -192, 351, doi:10.1023/A:1005269001739 +192, 351, doi: 10.1023/A:1005269001739 De Rosa, M. L., & Toomre, J. 2004, ApJ, 616, 1242, -doi:10.1086/424920 +doi: 10.1086/424920 Duvall, T. L., J., & Harvey, J. W. 1986, in NATO Advanced Study Institute (ASI) Series C, Vol. 169, Seismology of the Sun and the Distant Stars, ed. D. O. Gough, 105–116 Duvall, T. L., J., Jefferies, S. M., Harvey, J. W., & Pomerantz, M. A. 1993, Nature, 362, 430, -doi:10.1038/362430a0 +doi: 10.1038/362430a0 Fisher, G. H., & Welsch, B. T. 2008, in Astronomical Society of the Pacific Conference Series, Vol. 383, Subsurface and Atmospheric Influences on Solar Activity, ed. R. Howe, R. W. Komm, K. S. Balasubramaniam, & -G. J. D. Petrie, 373.https://arxiv.org/abs/0712.4289 +G. J. D. Petrie, 373. https://arxiv.org/abs/0712.4289 Giles, P. M., Duvall, T. L., Scherrer, P. H., & Bogart, R. S. -1997, Nature, 390, 52, doi:10.1038/36294 +1997, Nature, 390, 52, doi: 10.1038/36294 Gizon, L., & Birch, A. C. 2004, ApJ, 614, 472, -doi:10.1086/423367 +doi: 10.1086/423367 Gizon, L., Cameron, R. H., Pourabdian, M., et al. 2020, -Science, 368, 1469, doi:10.1126/science.aaz7119 Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A, -652, L6, doi:10.1051/0004- 6361/202141462 +Science, 368, 1469, doi: 10.1126/science.aaz7119 Gizon, L., Cameron, R. H., Bekki, Y., et al. 2021, A&A, +652, L6, doi: 10.1051/0004- 6361/202141462 Greer, B. J., Hindman, B. W., & Toomre, J. 2016, ApJ, -824, 128, doi:10.3847/0004- 637X/824/2/128 +824, 128, doi: 10.3847/0004- 637X/824/2/128 Hanasoge, S., & Mandal, K. 2019, ApJL, 871, L32, -doi:10.3847/2041- 8213/aaff60 +doi: 10.3847/2041- 8213/aaff60 Hanasoge, S. M., Hotta, H., & Sreenivasan, K. R. 2020, Science Advances, 6, eaba9639, -doi:10.1126/sciadv.aba9639 +doi: 10.1126/sciadv.aba9639 Hanasoge, S. M., Woodard, M., Antia, H. M., Gizon, L., & Sreenivasan, K. R. 2017, MNRAS, 470, 1404, -doi:10.1093/mnras/stx1298 +doi: 10.1093/mnras/stx1298 Hansen, P. C. 1992, SIAM review, 34, 561 Hanson, C. S., Duvall, T. L., Birch, A. C., Gizon, L., & Sreenivasan, K. R. 2020, A&A, 644, A103, -doi:10.1051/0004- 6361/202039108 +doi: 10.1051/0004- 6361/202039108 Hanson, C. S., Hanasoge, S., & Sreenivasan, K. R. 2021, -ApJ, 910, 156, doi:10.3847/1538-4357/abe770 +ApJ, 910, 156, doi: 10.3847/1538-4357/abe770 Hathaway, D. H., Teil, T., Norton, A. A., & Kitiashvili, I. -2015, ApJ, 811, 105, doi:10.1088/0004- 637X/811/2/105 - Mani et al. +2015, ApJ, 811, 105, doi: 10.1088/0004- 637X/811/2/105 + Mani et al. Hathaway, D. H., Upton, L., & Colegrove, O. 2013, Science, -342, 1217, doi:10.1126/science.1244682 -Hill, F. 1988, ApJ, 333, 996, doi:10.1086/166807 +342, 1217, doi: 10.1126/science.1244682 +Hill, F. 1988, ApJ, 333, 996, doi: 10.1086/166807 Kashyap, S. G., Das, S. B., Hanasoge, S. M., Woodard, M. F., & Tromp, J. 2021, ApJS, 253, 47, -doi:10.3847/1538- 4365/abdf5e +doi: 10.3847/1538- 4365/abdf5e Korda, D., & ˇ Svanda, M. 2021, A&A, 646, A184, -doi:10.1051/0004- 6361/202039928 +doi: 10.1051/0004- 6361/202039928 Langfellner, J., Birch, A. C., & Gizon, L. 2018, A&A, 617, -A97, doi:10.1051/0004-6361/201732471 +A97, doi: 10.1051/0004-6361/201732471 Langfellner, J., Gizon, L., & Birch, A. C. 2015, A&A, 581, -A67, doi:10.1051/0004-6361/201526024 +A67, doi: 10.1051/0004-6361/201526024 Lavely, E. M., & Ritzwoller, M. H. 1992, Philosophical Transactions of the Royal Society of London Series A, -339, 431, doi:10.1098/rsta.1992.0048 +339, 431, doi: 10.1098/rsta.1992.0048 Lindsey, C., & Braun, D. C. 2000, SoPh, 192, 261, -doi:10.1023/A:1005227200911 +doi: 10.1023/A:1005227200911 L¨optien, B., Birch, A. C., Duvall, T. L., Gizon, L., & Schou, J. 2016, A&A, 587, A9, -doi:10.1051/0004- 6361/201526805 +doi: 10.1051/0004- 6361/201526805 L¨optien, B., Gizon, L., Birch, A. C., et al. 2018, Nature -Astronomy, 2, 568, doi:10.1038/s41550-018- 0460-x +Astronomy, 2, 568, doi: 10.1038/s41550-018- 0460-x Mandal, K., & Hanasoge, S. 2020, ApJ, 891, 125, -doi:10.3847/1538- 4357/ab7227 +doi: 10.3847/1538- 4357/ab7227 Mandal, K., Hanasoge, S. M., & Gizon, L. 2021, A&A, 652, -A96, doi:10.1051/0004-6361/202141044 +A96, doi: 10.1051/0004-6361/202141044 Mani, P., & Hanasoge, S. 2020, ApJ, 901, 139, -doi:10.3847/1538- 4357/abb133 -—. 2021, ApJ, 920, 36, doi:10.3847/1538- 4357/ac1ad6 +doi: 10.3847/1538- 4357/abb133 +—. 2021, ApJ, 920, 36, doi: 10.3847/1538- 4357/ac1ad6 November, L. J., & Simon, G. W. 1988, ApJ, 333, 427, -doi:10.1086/166758 +doi: 10.1086/166758 Pijpers, F. P., & Thompson, M. J. 1994, A&A, 281, 231 Rieutord, M., Roudier, T., Ludwig, H. G., Nordlund, ˚ A., & Stein, R. 2001, A&A, 377, L14, -doi:10.1051/0004- 6361:20011160 Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar -Physics, 15, 6, doi:10.1007/s41116- 018-0013- 5 +doi: 10.1051/0004- 6361:20011160 Rincon, F., & Rieutord, M. 2018, Living Reviews in Solar +Physics, 15, 6, doi: 10.1007/s41116- 018-0013- 5 Rincon, F., Roudier, T., Schekochihin, A. A., & Rieutord, M. 2017, A&A, 599, A69, -doi:10.1051/0004- 6361/201629747 +doi: 10.1051/0004- 6361/201629747 Schad, A., & Roth, M. 2020, ApJ, 890, 32, -doi:10.3847/1538- 4357/ab65ec +doi: 10.3847/1538- 4357/ab65ec Scherrer, P. H., Schou, J., Bush, R. I., et al. 2012, SoPh, -275, 207, doi:10.1007/s11207- 011-9834- 2 +275, 207, doi: 10.1007/s11207- 011-9834- 2 Schou, J., Antia, H. M., Basu, S., et al. 1998, ApJ, 505, -390, doi:10.1086/306146 +390, doi: 10.1086/306146 Sekii, T. 1997, in Sounding Solar and Stellar Interiors, ed. J. Provost & F.-X. Schmider, Vol. 181, ISBN0792348389 Sekii, T., Kosovichev, A. G., Zhao, J., et al. 2007, PASJ, -59, S637, doi:10.1093/pasj/59.sp3.S637 +59, S637, doi: 10.1093/pasj/59.sp3.S637 Snodgrass, H. B. 1984, SoPh, 94, 13, -doi:10.1007/BF00154804 +doi: 10.1007/BF00154804 Thompson, M. J., Toomre, J., Anderson, E. R., et al. 1996, -Science, 272, 1300, doi:10.1126/science.272.5266.1300 +Science, 272, 1300, doi: 10.1126/science.272.5266.1300 Unno, W., Osaki, Y., Ando, H., Saio, H., & Shibahashi, H. 1989, Nonradial oscillations of stars Verma, M., Steffen, M., & Denker, C. 2013, A&A, 555, -A136, doi:10.1051/0004-6361/201321628 +A136, doi: 10.1051/0004-6361/201321628 Vorontsov, S. V. 2011, MNRAS, 418, 1146, -doi:10.1111/j.1365- 2966.2011.19564.x +doi: 10.1111/j.1365- 2966.2011.19564.x Woodard, M. 2014, SoPh, 289, 1085, -doi:10.1007/s11207- 013-0386- 5 +doi: 10.1007/s11207- 013-0386- 5 Woodard, M., Schou, J., Birch, A. C., & Larson, T. P. -2013, SoPh, 287, 129, doi:10.1007/s11207- 012-0075- 9 -Woodard, M. F. 1989, ApJ, 347, 1176, doi:10.1086/168206 -—. 2006, ApJ, 649, 1140, doi:10.1086/506927 -—. 2007, ApJ, 668, 1189, doi:10.1086/521391 -—. 2016, MNRAS, 460, 3292, doi:10.1093/mnras/stw1223 +2013, SoPh, 287, 129, doi: 10.1007/s11207- 012-0075- 9 +Woodard, M. F. 1989, ApJ, 347, 1176, doi: 10.1086/168206 +—. 2006, ApJ, 649, 1140, doi: 10.1086/506927 +—. 2007, ApJ, 668, 1189, doi: 10.1086/521391 +—. 2016, MNRAS, 460, 3292, doi: 10.1093/mnras/stw1223 Zhao, J., Georgobiani, D., Kosovichev, A. G., et al. 2007, -ApJ, 659, 848, doi:10.1086/512009 +ApJ, 659, 848, doi: 10.1086/512009 Zhao, J., Nagashima, K., Bogart, R. S., Kosovichev, A. G., & Duvall, T. L., J. 2012, ApJL, 749, L5, -doi:10.1088/2041- 8205/749/1/L5 \ No newline at end of file +doi: 10.1088/2041- 8205/749/1/L5 \ No newline at end of file diff --git a/read/results/playa/2201.00200.txt b/read/results/playa/2201.00200.txt index dba297d..5c78e5f 100644 --- a/read/results/playa/2201.00200.txt +++ b/read/results/playa/2201.00200.txt @@ -1,21 +1,21 @@ -Astronomy & Astrophysics manuscript no. solar˙model˙v10˙corrected © ESO 2022 +Astronomy & Astrophysics manuscript no. solar˙model˙v10˙corrected © ESO 2022 January 4, 2022 Local heating due to convective overshooting and the solar modelling problem -I. Baraff e1, 2 - , T. Constantino 1 - , J. Clarke1 - , A. Le Saux1, 2 - , T. Goffrey 4 - , T. Guillet1 - , J. Pratt3 - , D. G. Vlaykov1 +I. Baraffe1,2 +, T. Constantino1 +, J. Clarke1 +, A. Le Saux1,2 +, T. Goffrey4 +, T. Guillet1 +, J. Pratt3 +, D. G. Vlaykov1 1 University of Exeter, Physics and Astronomy, EX4 4QL Exeter, UK (e-mail: i.baraffe@ex.ac.uk) 2 ´ -Ecole Normale Sup ´ -erieure, Lyon, CRAL (UMR CNRS 5574), Universit ´ +Ecole Normale Sup´ +erieure, Lyon, CRAL (UMR CNRS 5574), Universit´ e de Lyon, France 3 Department of Physics and Astronomy, Georgia State University, Atlanta GA 30303, USA @@ -29,18 +29,18 @@ solar model. This simple prescription qualitatively reproduces the behaviour fou local heating and smoothing of the temperature gradient below the convective boundary. We show that introducing local heating in the overshooting layer can reduce the sound-speed discrepancy usually reported between solar models and the structure of the Sun inferred from helioseismology. It also affects key quantities in the convective envelope, such as the density, the entropy, and the -speed of sound. These eff ects could help reduce the discrepancies between solar models and observed constraints based on seismic +speed of sound. These effects could help reduce the discrepancies between solar models and observed constraints based on seismic inversions of the Ledoux discriminant. Since mixing due to overshooting and local heating are the result of the same convective penetration process, the goal of this work is to invite solar modellers to consider both processes for a more consistent approach. Key words. Convection – Hydrodynamics – Stars: evolution – Sun: evolution - helioseismology - interior 1. Introduction Modelling the internal structure of the Sun is still a challenge. A recent review by Christensen-Dalsgaard (2021) describes in -detail the long-standing eff orts to improve solar models. The solar +detail the long-standing efforts to improve solar models. The solar modelling problem refers to the discrepancy between helioseismology and solar interior models that adopt low metallicities predicted by the three-dimensional (3D) atmosphere models -of, for example, Asplund et al. (2009) and Ca ffau et al. (2011), +of, for example, Asplund et al. (2009) and Caffau et al. (2011), in contrast to the high metallicities based on previous literature compilations by, for example, Anders & Grevesse (1989) and Grevesse & Noels (1993). Asplund et al. (2021) have recently @@ -57,13 +57,13 @@ penetration, also called overshooting, at the bottom of the convective agreement between solar models and helioseismic constraints (see for example Christensen-Dalsgaard et al. 2011; Zhang et al. 2012; Buldgen et al. 2019b). Overshooting in solar models has -most often been treated using diff usive or instantaneous chemical +most often been treated using diffusive or instantaneous chemical mixing. A temperature gradient that sharply transitions from a nearly adiabatic form to a radiative form is usually assumed, as suggested by the theoretical work of Zahn (1991). Models with a smoother transition have also been investigated. Based -on the analysis of models with di ff erent stratifications near the -Send o ffprint requests to : I. Bara ffe base of the convective zone, Christensen-Dalsgaard et al. (2011) +on the analysis of models with different stratifications near the +Send offprint requests to: I. Baraffe base of the convective zone, Christensen-Dalsgaard et al. (2011) found that models that better fit the helioseismic data have a weakly sub-adiabatic temperature gradient in the lower part of the convective zone and a smooth transition to the radiative gradient @@ -86,17 +86,17 @@ solar problem because such a flux worsens the sound-speed profile in the deep radiative interior of their solar model. Given the uncertainties regarding the temperature stratification of the overshooting region, solar modellers have considered these effects as -secondary and have focused their e fforts on exploring the impact +secondary and have focused their efforts on exploring the impact of solar abundances, microphysics (opacities, equations of state, nuclear reaction rates), and chemical mixing and diffusion (see details and references in the review of Buldgen et al. 2019a). -Additional, more exotic e ff ects such as early disk accretion or +Additional, more exotic effects such as early disk accretion or solar-wind mass loss (Zhang et al. 2019; Kunitomo & Guillot 2021) are also attracting increasing attention. To reinvigorate the debate, Buldgen et al. (2019b) recently highlighted once again how the transition of the temperature gra1arXiv:2201.00200v1 [astro-ph.SR] 1 Jan 2022 -Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem +Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem dient just below the convective envelope can significantly impact the disagreement between solar models and helioseismic constraints. Their results, based on a method that combines multiple @@ -108,7 +108,7 @@ two extremes. Christensen-Dalsgaard et al. (2018) also note that an increase in the temperature at the transition would remove a remaining small sharp dip in the speed of sound immediately beneath the convective zone of the model. A major difficulty is -to disentangle the eff ects of overshoot from the e ff ects of opacities, +to disentangle the effects of overshoot from the effects of opacities, which can also alter the temperature gradient in these layers. Given the large number of parameters to deal with in order to improve solar models and the current lack of strong arguments in @@ -119,7 +119,7 @@ traditional picture of a sharp transition as formalised by Zahn The present work is motivated by arguments inspired by hydrodynamical simulations of convection and convective penetration in solar-like models. Recent hydrodynamical simulations by -Bara ffe et al. (2021, hereafter B21) highlight the process of local +Baraffe et al. (2021, hereafter B21) highlight the process of local heating in the overshooting region due to penetrating convective motions across the convective boundary. In the following, we analyse the potential impact of this feature on one-dimensional @@ -133,7 +133,7 @@ two-dimensional hydrodynamical simulations B21 performed two-dimensional (2D) fully compressible timeimplicit simulations of convection and convective penetration in a solar-like model with the MUlti-dimensional Stellar Implicit -Code MUSIC (Viallet et al. 2011, 2016; Go ff rey et al. 2017). +Code MUSIC (Viallet et al. 2011, 2016; Goffrey et al. 2017). The main motivation was to explore the impact of an artificial increase in the stellar luminosity on the properties of convection and convective penetration. This procedure is a common tactic @@ -169,26 +169,26 @@ These two features are also commonly observed in other hydrodynamical simulations, as mentioned above. An exploration of the impact of this heating on stellar evolution models may reveal that heating is a necessary aspect of models for overshooting. -Fig. 1. Radial profile of the temperature departure ∆ T / T - 0 from +Fig. 1. Radial profile of the temperature departure ∆T /T +0 from the initial profile T - 0 and of the sub-adiabaticity (∇ − ∇ - ad ) close to +0 and of the sub-adiabaticity (∇ − ∇ +ad) close to the convective boundary predicted by 2D hydrodynamical simulations (B21) of solar-like models. The lower panel corresponds to the model with a realistic stellar luminosity and the upper panel to a model with luminosity enhanced by a factor of ten. -The dash-dotted red lines show ∆ T /T - 0 (in %), the relative difference +The dash-dotted red lines show ∆T /T +0 (in %), the relative difference between the time and space averages of the temperature, T , and the initial temperature, T - 0 . The solid blue lines show the +0. The solid blue lines show the time and space averages of the sub-adiabaticity (∇ − ∇ -ad ). The +ad). The dashed black lines show the initial profile of the sub-adiabaticity, -( ∇ − ∇ - ad ) - init . The convective boundary is indicated by the vertical +(∇ − ∇ +ad) +init . The convective boundary is indicated by the vertical solid line (see details in B21) The behaviour of the thermal profile below the convective boundary found in the simulations of B21 is illustrated in Fig. @@ -197,27 +197,27 @@ boundary found in the simulations of B21 is illustrated in Fig. enhancement in the luminosity by a factor of ten because the features are intensified in these ‘boosted’ models (upper panel). The figure shows the local heating in the overshooting layer and -its impact on the sub-adiabaticity ( ∇ − ∇ -ad ), with ∇ = d log T +its impact on the sub-adiabaticity (∇ − ∇ +ad), with ∇ = d log T d log P the -Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem +Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem temperature gradient and ∇ ad = d log T d log P | S the adiabatic gradient. The initial stratification below the convective boundary (located -at r = 0 .6734 × R +at r = 0.6734 × R star for this specific stellar model) is set by the stable radiative gradient, ∇ rad (see the dashed black line below the convective boundary in Fig. 1). B21 show that, as a result of the local heating below the convective boundary characterised - by the bump in temperature di fference ∆ T /T - 0 displayed + by the bump in temperature difference ∆T /T +0 displayed in Fig. 1, the temperature gradient becomes less sub-adiabatic immediately below the convective boundary1 - . The net result is +. The net result is a smoother transition just below the convective boundary with a temperature gradient that has an intermediate value between the radiative temperature gradient and the adiabatic one. In the @@ -238,10 +238,10 @@ A = 1 Γ 1 d ln P d ln r − d ln ρ -d ln r , (1) +d ln r , (1) with Γ 1 = (∂ ln P/∂ ln ρ) - ad . Starting from a reference evolutionary +ad. Starting from a reference evolutionary model, Buldgen et al. (2020) used an inversion procedure to iteratively reconstruct a solar model. Successive inversions of the Ledoux discriminant allowed them to obtain a @@ -253,9 +253,9 @@ s , and the density, ρ. To illustrate the convergence of their reconstruction procedure, they show (right panels of their Figs. 3-6) the successive iterations that converge to an excellent level of agreement for the four structural -inversions ( A , S , c2 +inversions (A, S , c2 s , ρ) starting from the initial reference model -adopted in their work. The diff erences found between the reconstructed +adopted in their work. The differences found between the reconstructed model and the reference model are useful as they indicate the modifications of the reference model that are required to converge towards a solar model in agreement with helioseismic @@ -265,25 +265,25 @@ analysis in Sect. 3.2. The first concerns the Ledoux discriminant. The major discrepancy between the Sun and the reference model occurs just below the convective boundary, with a large positive bump for -the quantity ( A +the quantity (A Sun - A ref ). The second concerns the speed of sound. The same positive bump at the same location as for the Ledoux discriminant, A, is observed for the quantity (c2 -s ,Sun − c2 -s ,ref ) /c 2 -s ,ref . The corrections +s,Sun − c2 +s,ref )/c2 +s,ref . The corrections applied to A during the reconstruction procedure also reduce the discrepancy in the speed of sound in the radiative region. The third concerns the entropy. Large discrepancies are observed in both the radiative region and the convective zone. The 1 Less sub-adiabatic means that |∇ − ∇ -ad | decreases compared to the -initial profile. entropy discrepancy (S +ad| decreases compared to the +initial profile. entropy discrepancy (S Sun − S - ref )/ S + ref )/S ref has two positive peaks in the radiative zone, one just below the overshooting region and a larger peak deeper at ∼ 40% of the stellar radius. This discrepancy @@ -293,7 +293,7 @@ The fourth concerns the density. The quantity (ρ Sun − ρ ref )/ρ - ref has a negative peak in the radiative region, at ∼ 35% +ref has a negative peak in the radiative region, at ∼ 35% of the stellar radius, and is positive in the convective zone. Importantly, Buldgen et al. (2020) mention that their reconstruction procedure gives similar Ledoux discriminant profiles @@ -306,7 +306,7 @@ Our main motivation is to show the potential impact of the local heating described in Sect. 2 on stellar models. We are not aiming in this short work at constructing the best solar model to fit helioseismic constraints. Using stellar evolution codes, we have -adopted two di ff erent methods that can be found in the literature +adopted two different methods that can be found in the literature to construct solar models (e.g. Zhang et al. 2012; Vinyoles et al. 2017). Our first method relies on the thermal relaxation of a reference model with solar radius and luminosity that is @@ -315,7 +315,7 @@ modified to reproduce the temperature gradient in the overshooting the chemical abundances are not modified by nuclear reactions, mixing, or microscopic diffusion during the relaxation process. For these tests, we used the 1D Lyon stellar evolution code -(Bara ff e et al. 1998). We repeated this experiment based on thermal +(Baraffe et al. 1998). We repeated this experiment based on thermal relaxation with the stellar evolution code MONSTAR (e.g. Constantino et al. 2014) and obtained the same qualitative results. @@ -328,11 +328,11 @@ nuclear reactions, microscopic diffusion, and overshooting mixing are also consistent with any modification of the structure induced by the forced local heating in the overshooting layer. These tests were performed with MONSTAR as it includes the -treatment of microscopic di ff usion. +treatment of microscopic diffusion. The first method allows the impact of local heating in the overshooting layer after thermal relaxation to be isolated. The second method provides evolutionary models that are selfconsistent - since the eff ect of the modification of the temperature + since the effect of the modification of the temperature gradient is accounted for during their evolution on the main sequence. In the following, we adopt a modification of the local temperature @@ -340,52 +340,52 @@ In the following, we adopt a modification of the local temperature the behaviour displayed in Fig. 1. We define an overshooting length d ov = α - ov H - P, CB , with H - P, CB the pressure scale height +ov H +P,CB, with H +P,CB the pressure scale height at the convective boundary and α ov a free parameter. We also define two radial locations, r ov = r - CB − d +CB − d ov and r - mid = r +mid = r CB − d -ov / 2, +ov/2, with r CB the radial location of the convective boundary. The temperature gradient is modified as follows. For r mid ≤ r < r - CB , we +CB, we use -∇ = g( r ) ∇ -ad + (1 − g (r ))∇ -rad , (2) +∇ = g(r)∇ +ad + (1 − g(r))∇ +rad, (2) with -g( r ) = sin{ [(r − r -mid )/ (r - CB − r -mid )]a - × π/ 2} . (3) +g(r) = sin{[(r − r +mid)/(r +CB − r +mid)]a + × π/2}. (3) -Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem +Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem For r - ov ≤ r < r -mid , we use +ov ≤ r < r +mid, we use ∇ = ∇ -rad − h( r ) ∇ -ad , (4) +rad − h(r)∇ +ad, (4) with -h( r ) = b × sin {[( r -mid − r ) /( r +h(r) = b × sin{[(r +mid − r)/(r mid − r - ov )] × π } . (5) +ov)] × π}. (5) Sine functions are used in Eqs. (3) and (5) to reproduce the smooth variations in the temperature gradient below the convective boundary produced by the hydrodynamical simulations. We have verified that the results are insensitive to the smoothness of these variations and to the exact shape of the temperature gradient - radial profile.We adopted a =0.3 in Eq. (3) as it provides a + radial profile.We adopted a=0.3 in Eq. (3) as it provides a behaviour for the temperature gradient very close to the one displayed in Fig. 1. Results are rather insensitive to variations in the values of a between 0.2 and 0.4. We adopted b=0.03 in Eq. (5), @@ -395,12 +395,12 @@ b. 3.2.1. Thermal equilibrium models The details of the procedure for the first method are the following. We calculate the evolution of a 1 M - model with an initial -helium mass fraction of 0.28, metallicity Z = 0. 02 , and a mixing + model with an initial +helium mass fraction of 0.28, metallicity Z = 0.02, and a mixing length l - mix = 1 .9 H - P . We use a reference model that is in -thermal equilibrium 2 +mix = 1.9H +P. We use a reference model that is in +thermal equilibrium2 and has the luminosity and radius of the current Sun. Starting from this reference model, the temperature gradient is modified over a prescribed depth to mimic the @@ -409,8 +409,8 @@ impact of overshooting according to the hydrodynamical simulations by Eqs. (2)-(5) over a distance d ov below the convective boundary. We show the results in Fig. 2 for α - ov = 0.15 and α - ov = 0.20. +ov = 0.15 and α +ov= 0.20. These overshooting widths are in good agreement with the maximal depth reached by downflows below the convective boundary predicted by the hydrodynamical simulations for the solar-like @@ -419,12 +419,12 @@ in B21 is slightly under-luminous compared to the Sun (see B21 for details). B21 also mention that one should be cautious when directly applying the overshooting depths predicted by their simulations to real stars since the final relaxed state for these simulations - may have di fferent properties from non-thermally relaxed + may have different properties from non-thermally relaxed states. We varied α - ov between 0.15 and 0.35 and find that the +ov between 0.15 and 0.35 and find that the results do not change qualitatively. However, the amplitude of the variations in the model properties depends on d - ov (see below). +ov (see below). As shown below, this simple prescription implemented in a stellar evolution code yields a local increase in the temperature below the convective boundary, similar to that observed in @@ -445,7 +445,7 @@ temperature gradient can be consistently compared to the reference Thermal equilibrium means that the total nuclear energy produced in the central regions balances the radiative losses at the surface, i.e. the total nuclear luminosity, L -nuc , equals the total stellar luminosity, L . by Eqs. (2)-(5) yields similar qualitative changes in the temperature +nuc, equals the total stellar luminosity, L. by Eqs. (2)-(5) yields similar qualitative changes in the temperature and the sub-adiabaticity close to the convective boundary that was found in the hydrodynamical simulations of B21. Fig. 2. Radial profile of the temperature difference and of the @@ -454,29 +454,29 @@ sub-adiabaticity of a 1D solar-like structure with a modified temperature (2)-(5). The temperature gradient is modified over a distance d ov = α - ov H - P,CB , with α - ov =0.15 in the lower panel and α - ov =0.20 +ov H +P,CB, with α +ov=0.15 in the lower panel and α +ov=0.20 in the upper panel. The dash-dotted red lines show the percentage - relative temperature di ff erence, ∆ T / T - ref , with ∆ T = T − T - ref . -The solid blue lines correspond to the sub-adiabaticity ( ∇ − ∇ -ad ). + relative temperature difference, ∆T /T +ref , with ∆T = T − T +ref . +The solid blue lines correspond to the sub-adiabaticity (∇ − ∇ +ad). The dashed black lines show the sub-adiabaticity of the reference model. The convective boundary is indicated by the vertical solid line. The vertical dashed line in each panel is located at a distance d - ov below the convective boundary. +ov below the convective boundary. The impact on the whole stellar structure was quantified by -comparing the four structural quantities ( A , S , c 2 +comparing the four structural quantities (A, S , c2 s , ρ) between the modified and the reference model. The results are displayed in -Fig. 3, with ∆ X defined as ( X − X - ref ) for any structural quantity X . +Fig. 3, with ∆X defined as (X − X +ref ) for any structural quantity X. The forced local heating in the overshooting layer produces similar - positive peaks for ∆ A, ∆ S , and ∆ c 2 + positive peaks for ∆A, ∆S , and ∆c2 s , as found for the temperature. The modification thus provides the correction required to improve the discrepancy for the Ledoux discriminant described @@ -484,13 +484,13 @@ in the first of the trends outlined in Sect. 3.1. Unsurprisingly, such a modification of the temperature gradient is expected to improve the agreement with helioseismic constraints and help -Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem +Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem remove the sound speed anomaly below the convective boundary (second trend in Sect. 3.1), as suggested by the results of Christensen-Dalsgaard et al. (2011). But it is also interesting to note that such a modification yields a slight cooling of the convective - zone (see Fig. 2) and thus a negative di ff erence for the -entropy (see Fig. 3). A negative di fference in the convective envelope + zone (see Fig. 2) and thus a negative difference for the +entropy (see Fig. 3). A negative difference in the convective envelope is in agreement with the correction required for the reference model of Buldgen et al. (2020) to better match the Sun (see third trend in Sect. 3.1). Regarding the density, the modification @@ -500,103 +500,103 @@ to the reference model over a broad region below the convective boundary. The impact on the density in the convective region for this specific model is partly in agreement with the correction required for this quantity in the Buldgen et al. (2020) study, with a -positive diff erence found only in the upper part of the convective +positive difference found only in the upper part of the convective envelope (see the fourth trend in Sect. 3.1). These trends are insensitive to the depth over which the temperature gradient is modified. Increasing the depth increases the -magnitude of the di ff erences but has no impact on their sign. We +magnitude of the differences but has no impact on their sign. We find that the maximum variation in the model properties, such as -the speed of sound, ∆ c2 -s / c2 -s , ref , roughly scales with d 2 -ov . This scaling +the speed of sound, ∆c2 +s /c2 +s,ref , roughly scales with d2 +ov. This scaling is linked to the integrated area between the modified temperature gradient curve and the one for the reference (non-modified) -temperature gradient, which roughly decreases linearly with r . +temperature gradient, which roughly decreases linearly with r. This area is proportional to the square of the overshooting depth, and consequently, the maximum variation in the model properties - is also proportional to d 2 -ov . The qualitative trends also remain + is also proportional to d2 +ov. The qualitative trends also remain the same whether overshooting mixing in the reference model is ignored or included using a step function (with instantaneous -mixing) or an exponential decay for the diff usion coefficient (e.g. +mixing) or an exponential decay for the diffusion coefficient (e.g. Freytag et al. 1996). 3.2.2. Self-consistent evolutionary models -For the tests based on the second method, we ran di ff erent sets -of models with diff erent combinations of assumptions, including -or not microscopic diff usion and with or without overshooting +For the tests based on the second method, we ran different sets +of models with different combinations of assumptions, including +or not microscopic diffusion and with or without overshooting mixing. When overshooting mixing was included in the overshooting layer, it was based either on a step function or on an -exponential decay for the di ffusion coe ffi cient. Microscopic diffusion +exponential decay for the diffusion coefficient. Microscopic diffusion for H and He was implemented according to Thoul et al. (1994). For these tests, the temperature gradient was modified according to Eqs. (2)-(5). All models start from the ZAMS and are evolved until they reach the solar radius and luminosity at the same age. This was achieved by making small adjustments to the mixing length, l -mix . The models with temperature gradient modifications +mix. The models with temperature gradient modifications were compared to the relevant reference model, which has no modification of the temperature gradient but everything -else is the same (i.e. the same treatment of microscopic diff usion +else is the same (i.e. the same treatment of microscopic diffusion and of overshooting mixing). The evolutionary models with temperature gradient modifications are thus self-consistent. The -main di fference between this approach and the one in the previous - section is that these models accumulate small di ff erences in, +main difference between this approach and the one in the previous + section is that these models accumulate small differences in, for example, central H abundance when compared to their reference model. These tests produce the same trends in the overshooting layer as found for the tests based on the first method (Sect. 3.2.1), independently of the treatment of overshooting -mixing and whether microscopic di ffusion is included or not. -In the convective zone, all models give a positive di ff erence for +mixing and whether microscopic diffusion is included or not. +In the convective zone, all models give a positive difference for the density between the model with a modified temperature gradient and the relevant reference model. For the other quantities -( S , c2 -s ), the diff erences in the convective zone are very sensitive Fig. 3. Di fference of various structural quantities between a +(S , c2 +s ), the differences in the convective zone are very sensitive Fig. 3. Difference of various structural quantities between a model with a modified temperature gradient in the overshooting layer and a reference model calculated with the Lyon stellar evolution code. The temperature gradient in the modified model is changed over a distance d ov = α ov H - P, CB below the convective +P,CB below the convective boundary (indicated by the vertical solid line). The lower panel shows the results for α - ov = 0. 15 and the upper panel for +ov = 0.15 and the upper panel for α - ov = 0. 20. +ov = 0.20. to the assumptions regarding whether overshooting mixing is included or not. But at least we find solutions that are compatible with the four trends found by Buldgen et al. (2020) for the four structural quantities. This is illustrated in Fig. 4 with a model that accounts for step function overshooting mixing over a distance d -ov = 0 .15 H - P, CB (lower panel) and d -ov = 0. 20 H - P, CB (upper +ov = 0.15H +P,CB (lower panel) and d +ov = 0.20H +P,CB (upper panel). 4. Conclusion -The tests performed in Sect. 3 are based on di ff erent methods +The tests performed in Sect. 3 are based on different methods (relaxed models versus consistent evolution) that can be used to construct solar models. Independently of the method used, the tests show that a local increase in the temperature in the overshooting region due to convective penetration provides the qualitative - e ffects required to improve the speed of sound discrepancy + effects required to improve the speed of sound discrepancy below the convective boundary. This discrepancy is persistent in -Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem -Fig. 4. Di fference of various structural quantities between a +Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem +Fig. 4. Difference of various structural quantities between a modified model and a reference model calculated with the MONSTAR stellar evolution code. The reference model is -evolved from the ZAMS with microscopic di ff usion and step +evolved from the ZAMS with microscopic diffusion and step function overshooting mixing over a distance d - ov = α - ov H - P,CB below +ov = α +ov H +P,CB below the convective boundary. The lower panel shows the results for α - ov = 0. 15 and the upper panel for α - ov = 0. 20. The models +ov = 0.15 and the upper panel for α +ov = 0.20. The models with a modified temperature gradient in the overshooting layer (same microscopic diffusion and overshooting mixing treatment as the reference model) are evolved similarly from the ZAMS. @@ -608,31 +608,31 @@ this problem, as mentioned in Sect. 1. However, the details of the physical process responsible for this local heating have been lacking, whereas we can now suggest an explanation based on the B21 results. The trends that we find for the four structural -quantities ( A, S , c2 +quantities (A, S , c2 s , ρ) are robust below the convective boundary and in a large fraction of the radiative core, independently of -the treatment of mixing and di ffusion and of the method for constructing +the treatment of mixing and diffusion and of the method for constructing the models in Sects. 3.2.1 and 3.2.2. Our experiments additionally show that such a local change in the temperature, despite being made over a very limited region below the convective - boundary, can also aff ect the density, the entropy, and the speed of sound in the convective envelope after thermal relaxation + boundary, can also affect the density, the entropy, and the speed of sound in the convective envelope after thermal relaxation or evolution on the main sequence. How these quantities are affected in the convective envelope compared to a reference model with no local heating depends on the strategy for building solar models and on the treatment of overshooting mixing. This mixing is obviously linked to the local heating given that both result from the same dynamical process. A combined testing of -both eff ects in stellar models could provide more constraints on +both effects in stellar models could provide more constraints on the general process of overshooting. -Increasingly, eff orts are now devoted to characterising the +Increasingly, efforts are now devoted to characterising the process of convective boundary mixing in stellar models based on multi-dimensional hydrodynamical simulations. More work is required to obtain reliable determinations of an overshooting depth and to describe quantitatively the mixing and impact on -the temperature gradient. Understanding the e ff ects of rotation +the temperature gradient. Understanding the effects of rotation and magnetic fields on overshooting is a significantly more difficult theoretical and numerical problem to address; however, -eff orts to study these combined non-linear e ff ects are ongoing +efforts to study these combined non-linear effects are ongoing (Hotta 2017; Korre et al. 2021). Despite the limitations of existing hydrodynamical simulations, they are already providing constraints on physical processes usually treated with several @@ -645,19 +645,19 @@ that produce a local change in the temperature gradient are also responsible for the mixing in this region. Because much observational evidence points towards the need for extra mixing at convective boundaries, for example lithium depletion in solar-like -stars (Baraff e et al. 2017), the size of convective cores (Claret +stars (Baraffe et al. 2017), the size of convective cores (Claret & Torres 2016), and colour-magnitude diagrams (Castro et al. 2014), solar modellers often include this extra mixing in their models. But a consistent approach should also require accounting for a local change in the temperature gradient. The impact of this local heating goes in the right direction to improve not only the discrepancies of solar models below the convective boundary, - but also in the convective envelope. This e ffect o ff ers an interesting + but also in the convective envelope. This effect offers an interesting step forward for solving the solar modelling problem. In this exploratory work, we adopt a simple prescription for the local heating in the overshooting layer since the main goal is to highlight its qualitative impact on stellar models. However, -this eff ect should not be considered as another free parameter in +this effect should not be considered as another free parameter in the solar modelling problem. Future multi-dimensional hydrodynamical simulations will enable this process, and its treatment in 1D stellar evolution codes, to be better constrained. @@ -665,7 +665,7 @@ in 1D stellar evolution codes, to be better constrained. We thank our anonymous referee for valuable comments which helped improving the manuscript. This work is supported by the ERC grant No. 787361-COBOM and the consolidated STFC -grant ST /R000395 / 1. IB thanks the Max Planck Institut f ¨ +grant ST/R000395/1. IB thanks the Max Planck Institut f ¨ ur Astrophysics (Garching) for warm hospitality during completion of part of this work. The authors would like to acknowledge the @@ -674,25 +674,25 @@ use of the University of Exeter High-Performance Computing at Leicester, operated by the University of Leicester IT Services, which forms part of the STFC DiRAC HPC Facility. The equipment was funded by BEIS capital funding via STFC capital -grants ST/ K000373 /1 and ST / R002363 /1 and STFC DiRAC -Operations grant ST/ R001014 / 1. DiRAC is part of the National +grants ST/K000373/1 and ST/R002363/1 and STFC DiRAC +Operations grant ST/R001014/1. DiRAC is part of the National e-Infrastructure. -Bara ff e et al.: Local heating due to convective overshooting and the solar modelling problem +Baraffe et al.: Local heating due to convective overshooting and the solar modelling problem References Anders, E. & Grevesse, N. 1989, Geochim. Cosmochim. Acta, 53, 197 Asplund, M., Amarsi, A. M., & Grevesse, N. 2021, A&A, 653, A141 Asplund, M., Grevesse, N., Sauval, A. J., & Scott, P. 2009, ARA&A, 47, 481 -Bara ffe, I., Chabrier, G., Allard, F., & Hauschildt, P. H. 1998, A&A, 337, 403 -Bara ffe, I., Pratt, J., Goffrey, T., et al. 2017, ApJ, 845, L6 -Bara ffe, I., Pratt, J., Vlaykov, D. G., et al. 2021, A&A, 654, A126 +Baraffe, I., Chabrier, G., Allard, F., & Hauschildt, P. H. 1998, A&A, 337, 403 +Baraffe, I., Pratt, J., Goffrey, T., et al. 2017, ApJ, 845, L6 +Baraffe, I., Pratt, J., Vlaykov, D. G., et al. 2021, A&A, 654, A126 Brummell, N. H., Clune, T. L., & Toomre, J. 2002, ApJ, 570, 825 Brun, A. S., Miesch, M. S., & Toomre, J. 2011, ApJ, 742, 79 Buldgen, G., Eggenberger, P., Baturin, V. A., et al. 2020, A&A, 642, A36 Buldgen, G., Salmon, S., & Noels, A. 2019a, Frontiers in Astronomy and Space Sciences, 6, 42 Buldgen, G., Salmon, S. J. A. J., Noels, A., et al. 2019b, A&A, 621, A33 -Ca ffau, E., Ludwig, H. G., Steffen, M., Freytag, B., & Bonifacio, P. 2011, +Caffau, E., Ludwig, H. G., Steffen, M., Freytag, B., & Bonifacio, P. 2011, Sol. Phys., 268, 255 Cai, T. 2020, ApJ, 888, 46 Castro, N., Fossati, L., Langer, N., et al. 2014, A&A, 570, L13 @@ -704,16 +704,16 @@ M. J. 2011, MNRAS, 414, 1158 Claret, A. & Torres, G. 2016, A&A, 592, A15 Constantino, T., Campbell, S., Gil-Pons, P., & Lattanzio, J. 2014, ApJ, 784, 56 Edelmann, P. V. F., Ratnasingam, R. P., Pedersen, M. G., et al. 2019, ApJ, 876, 4 -Freytag, B., Ludwig, H. G., & Ste ffen, M. 1996, A&A, 313, 497 -Go ffrey, T., Pratt, J., Viallet, M., et al. 2017, A&A, 600, A7 +Freytag, B., Ludwig, H. G., & Steffen, M. 1996, A&A, 313, 497 +Goffrey, T., Pratt, J., Viallet, M., et al. 2017, A&A, 600, A7 Grevesse, N. & Noels, A. 1993, in Origin and Evolution of the Elements, ed. N. Prantzos, E. Vangioni-Flam, & M. Casse, 15–25 Higl, J., M ¨ uller, E., & Weiss, A. 2021, A&A, 646, A133 Hotta, H. 2017, ApJ, 843, 52 Hurlburt, N. E., Toomre, J., & Massaguer, J. M. 1986, ApJ, 311, 563 -K ¨ -apyl ¨ +K¨ +apyl¨ a, P. J. 2019, A&A, 631, A122 Korre, L., Brummell, N., Garaud, P., & Guervilly, C. 2021, MNRAS, 503, 362 Korre, L., Garaud, P., & Brummell, N. H. 2019, MNRAS, 484, 1220 @@ -723,9 +723,9 @@ Muthsam, H. J., Goeb, W., Kupka, F., Liebich, W., & Zoechling, J. 1995, A&A, 293, 127 Rogers, T. M., Glatzmaier, G. A., & Jones, C. A. 2006, ApJ, 653, 765 Thoul, A. A., Bahcall, J. N., & Loeb, A. 1994, ApJ, 421, 828 -Viallet, M., Bara ffe, I., & Walder, R. 2011, A&A, 531, A86 -Viallet, M., Go ffrey, T., Bara ffe, I., et al. 2016, A&A, 586, A153 -Viallet, M., Meakin, C., Arnett, D., & Moc ´ +Viallet, M., Baraffe, I., & Walder, R. 2011, A&A, 531, A86 +Viallet, M., Goffrey, T., Baraffe, I., et al. 2016, A&A, 586, A153 +Viallet, M., Meakin, C., Arnett, D., & Moc´ ak, M. 2013, ApJ, 769, 1 Vinyoles, N., Serenelli, A. M., Villante, F. L., et al. 2017, ApJ, 835, 202 Zahn, J. P. 1991, A&A, 252, 179 diff --git a/read/results/playa/2201.00201.txt b/read/results/playa/2201.00201.txt index faed955..a3b0dc4 100644 --- a/read/results/playa/2201.00201.txt +++ b/read/results/playa/2201.00201.txt @@ -1,9 +1,9 @@ -Astronomy & Astrophysics manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs © ESO 2022 +Astronomy & Astrophysics manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs ©ESO 2022 January 19, 2022 - Letter to the E ditor + Letter to the Editor The period-age relation of long-period variables M. Trabucchi1, - , N. Mowlavi1 +, N. Mowlavi1 Department of Astronomy, University of Geneva, Ch. Pegasi 51, 1290 Versoix, Switzerland December 2021 ABSTRACT @@ -16,7 +16,7 @@ clusters. Results. In agreement with observations, models predict that the fundamental mode period decreases with increasing age because of the dominant role of mass in shaping stellar structure and evolution. At a given age, the period distribution shows a non-negligible width and is skewed toward short periods, except for young C-rich stars. As a result, the period-age relations of O-rich and Crich - models are predicted to have diff erent slopes. We derived best-fit relations describing age and initial mass as a function of the + models are predicted to have different slopes. We derived best-fit relations describing age and initial mass as a function of the fundamental mode period for both O- and C-rich models. Conclusions. The study confirms the power of the period-age relations to study populations of LPVs of specific types, either O-rich or C-rich, on statistical grounds. In doing so, it is recommended not to limit a study to Miras, which would make it prone to selection @@ -41,122 +41,122 @@ The notion that younger LPVs tend to display longer periods compared to older ones, often referred to as the period-age (PA) relation, is rooted in the empirical evidence from stellar kinematics in the solar neighborhood. The first such piece of evidence -is probably due toMerrill(1923), who pointed out that M-type +is probably due to Merrill (1923), who pointed out that M-type LPVs increasingly lag behind the local standard of rest (i.e., possess a higher asymmetric drift) as their period decreases. Later -studies (as summarized byWyatt & Cahn1983) confirmed this -behavior (also using proper motion data, e.g.,Wilson & Merrill1942), - and showed that the shorter periods are also accompanied +studies (as summarized by Wyatt & Cahn 1983) confirmed this +behavior (also using proper motion data, e.g., Wilson & Merrill + 1942), and showed that the shorter periods are also accompanied by a higher velocity dispersion. Furthermore, groups of LPVs with relatively short periods are characterized by a greater scale height above the Galactic plane. This was shown, using for Corresponding author: M. Trabucchi -( michele.trabucchi@unige.ch) the first time the radial velocity of LPVs in the southern hemisphere, - byFeast(1963). In this seminal paper, Feast realized +(michele.trabucchi@unige.ch) the first time the radial velocity of LPVs in the southern hemisphere, + by Feast (1963). In this seminal paper, Feast realized that LPVs with shorter periods must be members of older stellar populations and emphasized their highly promising applications for both Galactic and extra-galactic studies over a wide range of stellar ages. It should be noted that the PA relation is connected with the existence of a period-metallicity relation (Lloyd -Evans & Menzies1973;Lloyd Evans1983b;Feast1981;Feast -& Whitelock2000a, and references therein). +Evans & Menzies 1973; Lloyd Evans 1983b; Feast 1981; Feast +& Whitelock 2000a, and references therein). A number of subsequent works have corroborated the PA relation on empirical grounds, or have exploited it to interpret observational results. Relevant examples are studies of LPVs in -globular clusters (e.g.,Feast1966;Lloyd Evans1983b;Whitelock1986), - toward the galactic center and bulge (Lloyd Evans -1976;Feast et al.1980;Whitelock et al.1991) or at high galactic -latitude (Jura & Kleinmann1992;Whitelock et al.1994). Of particular - interest is the recent eff ort to extend the analysis of LPVs -to dwarf galaxies in the Local Group (Menzies et al.2002,2008; -Whitelock et al.2009;Menzies et al.2010,2011;Sakamoto et al. -2012;Battinelli & Demers2012,2013;Whitelock et al.2013; -Menzies et al.2015). +globular clusters (e.g., Feast 1966; Lloyd Evans 1983b; Whitelock + 1986), toward the galactic center and bulge (Lloyd Evans +1976; Feast et al. 1980; Whitelock et al. 1991) or at high galactic +latitude (Jura & Kleinmann 1992; Whitelock et al. 1994). Of particular + interest is the recent effort to extend the analysis of LPVs +to dwarf galaxies in the Local Group (Menzies et al. 2002, 2008; +Whitelock et al. 2009; Menzies et al. 2010, 2011; Sakamoto et al. +2012; Battinelli & Demers 2012, 2013; Whitelock et al. 2013; +Menzies et al. 2015). The Hipparcos mission provided the means to refine the results on the period-kinematics connection. This was done by -Feast & Whitelock(2000b), who found evidence supporting the +Feast & Whitelock (2000b), who found evidence supporting the existence of a bar-like structure in the Bulge from the orbits of local LPVs. A similar study dedicated to C-rich LPVs was performed - byFeast et al.(2006), who provided quantitative age + by Feast et al. (2006), who provided quantitative age estimates for these stars. A summary of the main results and prospects emerging from these Hipparcos-era studies is given by Article number, page 1 of 9arXiv:2201.00201v2 [astro-ph.SR] 17 Jan 2022 -A & A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs -Feast(2007). More recently, the study of the Galaxy with LPVs +A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs +Feast (2007). More recently, the study of the Galaxy with LPVs has been stimulated by the wealth of data acquired by large-scale -surveys (e.g.,Catchpole et al.2016;Urago et al.2020), especially - the Gaia mission (Grady et al.2019,2020). -It seems relevant that just a few years after the study ofFeast -(1963),Kippenhahn & Smith(1969) predicted the PA relation +surveys (e.g., Catchpole et al. 2016; Urago et al. 2020), especially + the Gaia mission (Grady et al. 2019, 2020). +It seems relevant that just a few years after the study of Feast +(1963), Kippenhahn & Smith (1969) predicted the PA relation of classical Cepheids from stellar evolution and pulsation models. The theoretical modeling of Cepheids and of their periodluminosity (PL) and PA relations is now an active field of research - (e.g.,Bono et al.2005;Anderson et al.2016;De Somma -et al.2020). In contrast, when it comes to theoretical assessments + (e.g., Bono et al. 2005; Anderson et al. 2016; De Somma +et al. 2020). In contrast, when it comes to theoretical assessments of the LPV PA relation, the literature is surprisingly scarce (especially - in comparison with the significant e ff ort put into empirical + in comparison with the significant effort put into empirical studies). In fact, we were able to identify only two relevant studies - addressing this subject (Wyatt & Cahn1983;Eggen1998). + addressing this subject (Wyatt & Cahn 1983; Eggen 1998). The discrepancy in period predictions between linear and nonlinear - pulsation models (e.g.,Ya’Ari & Tuchman1996;Lebzelter -& Wood2005;Trabucchi et al.2021b), and more generally the -di fficulty in modeling the structure of evolved red giants, likely + pulsation models (e.g., Ya’Ari & Tuchman 1996; Lebzelter +& Wood 2005; Trabucchi et al. 2021b), and more generally the +difficulty in modeling the structure of evolved red giants, likely played a role in hampering the theoretical investigation of the PA relation of LPVs. Motivated by the release of updated AGB evolutionary models - (Pastorelli et al.2019,2020) and the availability of new, accurate + (Pastorelli et al. 2019, 2020) and the availability of new, accurate model predictions for the FM period of AGB stars (Trabucchi - et al.2019,2021b), we decided to investigate the nature + et al. 2019, 2021b), we decided to investigate the nature of the PA relation of LPVs on theoretical grounds. The adopted -models and observed data are described in Sect.2, while in -Sect.3we present the results, which are discussed in Sect.4. -We summarize our conclusions in Sect.5. +models and observed data are described in Sect. 2, while in +Sect. 3 we present the results, which are discussed in Sect. 4. +We summarize our conclusions in Sect. 5. 2. Methods 2.1. Models We employed PARSEC-COLIBRI isochrones (Marigo et al. -2017) with stellar evolutionary models fromPastorelli et al. -(2019,2020) for the thermally pulsing asymptotic giant branch -(TP-AGB) phase, and from PARSEC (Bressan et al.2012, version +2017) with stellar evolutionary models from Pastorelli et al. +(2019, 2020) for the thermally pulsing asymptotic giant branch +(TP-AGB) phase, and from PARSEC (Bressan et al. 2012, version 1.2S) for the preceding evolution. The adopted set of isochrones covers the range 0.001 to 0.016 in initial metallicity (Z -i ), with a 0.001 step, while it spans the age interval -8 .00 ≤ log( τ/ yr) ≤ 10 . 45 with a step of 0.05. Since the AGB +i), with a 0.001 step, while it spans the age interval +8.00 ≤ log(τ/yr) ≤ 10.45 with a step of 0.05. Since the AGB phase is short-lived, it only spans a small range of initial masses -for each given isochrone, of order of 10 − 2 +for each given isochrone, of order of 10−2 M - at most. + at most. The adopted isochrones include linear pulsation periods from -Trabucchi et al.(2019) for overtone modes and nonlinear periods -computed with the period-mass-radius relation fromTrabucchi -et al.(2021b) for the FM 1 - . Pulsation properties were computed +Trabucchi et al. (2019) for overtone modes and nonlinear periods +computed with the period-mass-radius relation from Trabucchi +et al. (2021b) for the FM1 +. Pulsation properties were computed along both the early-AGB and the TP-AGB. We did not extend our analysis to red supergiant stars as the pulsation prescription we employed are strictly valid only below 7 M - . +. We recall that, with the adopted nonlinear relation, the period increases with radius (R) as a broken power law, whose exponent decreases as soon as the “bending radius” R - b is exceeded, it and +b is exceeded, it and becomes zero when the “saturation radius” R s > R b is reached (i.e., the period becomes independent of radius). The exact values of R - b and R -s , as well as of the exponents, depend on the -current mass ( M ). We assume that the FM is dominant if the +b and R +s, as well as of the exponents, depend on the +current mass (M). We assume that the FM is dominant if the stellar radius is larger than the critical value R -dom, 0 , which we -computed from the current stellar mass using Eq. 4 ofTrabucchi -et al.(2021b). +dom,0, which we +computed from the current stellar mass using Eq. 4 of Trabucchi +et al. (2021b). 1 Hereinafter, whenever we discuss periods, it should be understood -that we refer to FM periods on which this work is focused. 2.2. Data +that we refer to FM periods on which this work is focused. 2.2. Data As a first set of data, we considered the cluster-LPV pairs used -byGrady et al.(2019, see their tables 1 and 2). These consist of +by Grady et al. (2019, see their tables 1 and 2). These consist of 19 clusters in the Large Magellanic Cloud, hosting a total of 20 potential LPV members, and eight Galactic clusters each hosting a potential LPV member. @@ -164,63 +164,63 @@ We expanded this list with data for LPVs in a few populous clusters, namely the Galactic clusters NGC 362, NGC 2808, 47 Tuc (NGC 104), and ω Cen (NGC 5139); the LMC clusters NGC 1978 and NGC 1846; and the cluster NGC 419 in the Small Magellanic - Cloud (SMC). The source lists were taken fromLebzelter - & Wood(2005,2007,2011,2016) andKamath et al.(2010), + Cloud (SMC). The source lists were taken from Lebzelter + & Wood (2005, 2007, 2011, 2016) and Kamath et al. (2010), whose notation for the sources names is adopted here. After excluding the star LW3 in NGC 1846 and the star V129 in ω Cen, -which are unlikely cluster members (cf.Lebzelter & Wood2007, +which are unlikely cluster members (cf. Lebzelter & Wood 2007, 2016), we reached a total of 203 sources. The aforementioned studies also provide a lot of information, possibly including J H K photometry, one or more periods, and a spectral type. In order to expand on the available data, we crossmatched the selected sample with the Two Micron AllSky - Survey (2MASS,Skrutskie et al.2006), the all-sky data + Survey (2MASS, Skrutskie et al. 2006), the all-sky data release of the Wide-field Infrared Survey Explorer (AllWISE, -Cutri et al.2013), the catalog of variable stars from the AllSky - Automated Survey for SuperNovae (ASAS-SNJayasinghe -et al.2020), the catalogs of LPVs in the Magellanic Clouds from +Cutri et al. 2013), the catalog of variable stars from the AllSky + Automated Survey for SuperNovae (ASAS-SN Jayasinghe +et al. 2020), the catalogs of LPVs in the Magellanic Clouds from the third phase of the Optical Gravitational Lensing Experiment -(OGLE-III,Soszy ´ -nski et al.2009,2011), the early third data release - from the Gaia mission ( Gaia EDR3,Gaia Collaboration -et al.2021), and the catalog of LPV candidates from Gaia DR2 -(Mowlavi et al.2018). -FollowingGrady et al.(2019), we took ages from -Kharchenko et al.(2016) andBaumgardt et al.(2013) for clusters +(OGLE-III, Soszy ´ +nski et al. 2009, 2011), the early third data release + from the Gaia mission (Gaia EDR3, Gaia Collaboration +et al. 2021), and the catalog of LPV candidates from Gaia DR2 +(Mowlavi et al. 2018). +Following Grady et al. (2019), we took ages from +Kharchenko et al. (2016) and Baumgardt et al. (2013) for clusters in the Galaxy and LMC, respectively, thereby ensuring that ages would be homogeneously derived for clusters in both galaxies. -Age uncertainties fromBaumgardt et al.(2013), provided for +Age uncertainties from Baumgardt et al. (2013), provided for each cluster, are generally around σ - log( τ ) 0. 05.Kharchenko -et al.(2016) do not provide age uncertainties, but a reasonable +log(τ) 0.05. Kharchenko +et al. (2016) do not provide age uncertainties, but a reasonable upper limit for their method should be σ - log(τ ) = 0. 2 based on -the analysis ofKharchenko et al.(2005) (the same value was -adopted byGrady et al.2019, in their Fig. 7). -As discussed byKamath et al.(2010), the age of the SMC +log(τ) = 0.2 based on +the analysis of Kharchenko et al. (2005) (the same value was +adopted by Grady et al. 2019, in their Fig. 7). +As discussed by Kamath et al. (2010), the age of the SMC cluster NGC 419 is believed to be around 1.4-1.6 Gyr. This is -consistent with the value τ = 1. 45 ± 0 .05 Gyr fromGoudfrooij -et al.(2014), while it is as young as τ 0. 89 ± 0. 015 Gyr according - toPerren et al.(2017). Since an accurate estimate is not +consistent with the value τ = 1.45 ± 0.05 Gyr from Goudfrooij +et al. (2014), while it is as young as τ 0.89 ± 0.015 Gyr according + to Perren et al. (2017). Since an accurate estimate is not necessary for our exploratory analysis, we took a rough average -and assumed log( τ/ yr) = 9 .1 ± 0. 1. NGC 419 and NGC 1846 -likely exhibit TP-AGB boosting (Girardi et al.2013). We note +and assumed log(τ/yr) = 9.1 ± 0.1. NGC 419 and NGC 1846 +likely exhibit TP-AGB boosting (Girardi et al. 2013). We note that some clusters show multiple stellar populations, whose age -spread has been estimated in some cases (e.g.,Mackey & Broby -Nielsen2007;Joo & Lee2013;Villanova et al.2014) and is consistent +spread has been estimated in some cases (e.g., Mackey & Broby +Nielsen 2007; Joo & Lee 2013; Villanova et al. 2014) and is consistent with the age uncertainties we adopted. Distances of Galactic clusters were also taken from -Kharchenko et al.(2016), while for the Magellanic Clouds and +Kharchenko et al. (2016), while for the Magellanic Clouds and their clusters we adopted the distance moduli µ - LMC = 18 .49 ± -0. 09 mag and µ - SMC = 18. 96 ± 0. 02 mag fromde Grijs et al. +LMC = 18.49 ± +0.09 mag and µ +SMC = 18.96 ± 0.02 mag from de Grijs et al. (2017). We searched for data on interstellar extinction from several - literature works (e.g.,Nayak et al.2016;Kharchenko et al. -2016;Perren et al.2017), all of which suggest that extinction + literature works (e.g., Nayak et al. 2016; Kharchenko et al. +2016; Perren et al. 2017), all of which suggest that extinction in the K -s filter is smaller than ∼ 0 .1 mag for most of the clusters - we considered, and at most as large as ∼ 0 .3 mag, which is +s filter is smaller than ∼ 0.1 mag for most of the clusters + we considered, and at most as large as ∼ 0.3 mag, which is negligible for our purposes. Article number, page 2 of 9 Trabucchi et al.: The period-age relation of LPVs @@ -229,13 +229,13 @@ this work, and we relied on the checks performed by authors whose source lists we adopted. It should be kept in mind that some sources may not be real cluster members. For sources without a spectral type, we used the Gaia2MASS - diagram (Lebzelter et al.2018,2019) to determine + diagram (Lebzelter et al. 2018, 2019) to determine whether they are O- or C-rich. We used the near-infrared periodluminosity diagram to identify the most likely pulsation mode associated with each period of each observed source. We selected only FM periods and rejected long secondary periods and periods attributed to overtone mode pulsation. The details of -these classification steps are provided in AppendixA. Out of +these classification steps are provided in Appendix A. Out of 203 sources from the initial list, we identified 95 LPVs pulsating in the FM, consisting of 40 C-rich and 55 O-rich sources. They consist of 29 Miras, 33 semi-regular variables, and 33 other @@ -247,24 +247,24 @@ sources of variability data considered here do not report the uncertainty variability observations, relative period uncertainties are most likely negligible compared with those associated with age. 3. Results -Panel (a) of Fig.1shows a comparison between model predictions +Panel (a) of Fig. 1 shows a comparison between model predictions and observations in the P -FM –log(τ/ yr) plane. The former +FM–log(τ/yr) plane. The former are displayed by a density map showing the expected number N - FM of LPVs pulsating in the FM in each period-age bin, normalized +FM of LPVs pulsating in the FM in each period-age bin, normalized to maximum. Model predictions are in good agreement with data derived from observations (i.e., individual LPVs in clusters, represented by symbols), and they show that the period of LPVs pulsating in the FM decreases with increasing age. Crosses mark the average properties of the three groups of Crich - LPVs fromFeast et al.(2006, their table 4), which fit the + LPVs from Feast et al. (2006, their table 4), which fit the general pattern with the exception of their group 3, estimated to be older than what our models predict at P 650. We also show a linear best-fit to the models distribution (weighted by N -FM ), which shows a fairly good agreement with -the best-fit to observations byGrady et al.(2019, also shown). +FM), which shows a fairly good agreement with +the best-fit to observations by Grady et al. (2019, also shown). However, the best-fit line does not fully capture the properties of the predictions, nor of the observed trend. Indeed, models are indicative of a substantial dispersion around the relation. For instance, @@ -284,25 +284,25 @@ with the age uncertainties we adopted. This means that longerperiod opposite is true at shorter periods. This tends to strengthen the agreement between models and observations. Our data set samples the intermediate-age range (NGC 419 -and NGC 1846) relatively well as well as old ages ( ω Cen, 47 +and NGC 1846) relatively well as well as old ages (ω Cen, 47 Tuc, NGC 362, and NGC 2808). This provides us with the opportunity to study the period distribution at these ages, and for a more detailed comparison between models and observations. On the basis of the average age of these two groups of clusters and the associated uncertainty, and taking the discrete age sampling of the isochrones into account, we considered the age -ranges log( τ/yr) = 9. 15 ± 0. 10 and log(τ/yr) = 10. 10 ± 0. 20. Period +ranges log(τ/yr) = 9.15 ± 0.10 and log(τ/yr) = 10.10 ± 0.20. Period distributions at those ages are displayed in panels (b) and (c) -of Fig.1, respectively, showing good agreement between model +of Fig. 1, respectively, showing good agreement between model predictions and observations. We note that in both cases, the distribution is skewed toward short periods, which seems to be true -at all ages for O-rich stars. This can be seen in panel (a) of Fig.2, -which is a version of the PA plane limited to an O-rich composition - 2 - . Indeed, although at τ 5 Gyr the observed sample is +at all ages for O-rich stars. This can be seen in panel (a) of Fig. 2, +which is a version of the PA plane limited to an O-rich composition2 + +. Indeed, although at τ 5 Gyr the observed sample is very scarce, it appears to be consistent with models predicting a more densely populated region in the shorter-period half of the PA distribution. -The case of C-stars, shown in panel (b) of Fig.2, is diff erent. +The case of C-stars, shown in panel (b) of Fig. 2, is different. They only form over a restricted range of initial masses and ages, so their occurrence in a given stellar population is an age indicator on its own. Toward the low-mass (old age) side @@ -315,16 +315,16 @@ higher masses, so that younger C-rich models are more concentrated at longer periods, leading to a steeper PA relation compared with the O-rich case. These predictions agree with observations on the old side of the period distribution, while the -scarcity of C stars at τ 0 .6 Gyr prevents us from performing a +scarcity of C stars at τ 0.6 Gyr prevents us from performing a comparison at younger ages. -In appendixB, we provide analytic PA relations by fitting the +In appendix B, we provide analytic PA relations by fitting the high-density parts of the O- and C-rich models’ distribution. We emphasize that, because of the large scatter of the relation, ages estimated in this way for individual LPVs are bound to be highly uncertain. As a way to assess the error in age determination, we also provide analytic best-fit relations to the boundaries of the PA distribution of the models in the appendix. These relations -are displayed in Fig.2. +are displayed in Fig. 2. 4. Discussion In general agreement with observations, models confirm that LPVs pulsating predominantly in the FM follow a PA relation, @@ -336,8 +336,8 @@ both patterns emerging because of the prominent role of mass in shaping stellar structure and evolution. Indeed, stellar mass determines the lifetimes of the main evolutionary stages, and thus the age of stars in the AGB phase. Pulsation models (Trabucchi - et al.2021b) show that the radius R -dom ,0 (and corresponding + et al. 2021b) show that the radius R +dom,0 (and corresponding luminosity) at the onset of dominant FM pulsation (DFMP) increases with mass, so that the most massive FM-dominated LPVs are brighter. They also have longer periods, as this increases @@ -350,52 +350,52 @@ this phase would result in a wide range of periods at a given age. It is the very fact that DFMP occurs only during the final portion 2 A further version of the PA plane highlighting both chemical types -can be found in Fig.A.2of appendixA.1. +can be found in Fig. A.2 of appendix A.1. Article number, page 3 of 9 -A & A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs +A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs Fig. 1. Period-age diagram. Panel (a) shows the predicted period-age distribution (darker tones indicate a higher expected number of LPVs on a linear scale, normalized to maximum). Symbols represent observed LPVs (green: SRVs; purple: Miras; white: unclassified) with the shape indicating their host cluster or literature source as indicated in the legend. The age uncertainties are marked by the error bars. The groups of -galactic C-stars ofFeast et al.(2006) are marked by crosses annotated with the group number. The solid and dotted line represent a linear best-fit -to models and the best-fit byGrady et al.(2019), respectively. Period distributions at selected ages are compared in panels (b) and (c) and marked -in panel (a) by the blue and red shaded areas (at log( τ/yr) ∼ 9. 15 and ∼ 10. 10, respectively). For clarity, the eff ect of the TP-AGB boosting is +galactic C-stars of Feast et al. (2006) are marked by crosses annotated with the group number. The solid and dotted line represent a linear best-fit +to models and the best-fit by Grady et al. (2019), respectively. Period distributions at selected ages are compared in panels (b) and (c) and marked +in panel (a) by the blue and red shaded areas (at log(τ/yr) ∼ 9.15 and ∼ 10.10, respectively). For clarity, the effect of the TP-AGB boosting is suppressed in panel (a). -Fig. 2. Similar to Fig.1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while +Fig. 2. Similar to Fig. 1, but limited to O-rich (left panel) and C-rich (right panel) LPVs. The solid line marks the best fit to the models, while dashed lines are best fits to the edges of the model distribution (see the text for more details). of the AGB that limits the range of periods a FM-pulsating LPV can have at a given age. Yet, the DFMP part of the AGB is long enough for significant variations in radius to occur, which result -in the dispersion of the PA relation seen in Fig.1. +in the dispersion of the PA relation seen in Fig. 1. At a given initial metallicity Z -i , the shape of the period distribution +i, the shape of the period distribution primarily results from the fact that, throughout the TPAGB (the stage during which the FM is normally excited), the envelope expansion accelerates, while the period becomes progressively - less sensitive to changes in radius (see AppendixC). + less sensitive to changes in radius (see Appendix C). In particular, the slope of the period-radius relation decreases sharply at P b = P(R -b ). The FM period distribution is roughly +b). The FM period distribution is roughly symmetric around that value, but at its short-period side, the FM is not dominant. Therefore, when only FM-dominated LPVs are considered, as is done here, the observed period distribution appears - skewed toward short periods. This feature is strengthened when a set of isochrones is considered + skewed toward short periods. This feature is strengthened when a set of isochrones is considered which spans a range of initial metallicities because the adopted criterion for the onset of DFMP does not depend on metallicity, but the FM period does as metal-poor LPVs are warmer and have smaller radii compared with metal-rich ones. As a consequence, the bulk of the period distribution of metalpoor LPVs is at periods shorter than P -b , so they only contribute +b, so they only contribute to the global distribution (i.e., at all Z i at a given age) over a small period range at P P -b . In contrast, metal-rich LPVs have +b. In contrast, metal-rich LPVs have periods well beyond P - b , so they contribute both at that value and +b, so they contribute both at that value and at longer periods. The result is an excess of FM-dominated LPVs near P -b , that is to say on the short side of the overall period distribution. +b, that is to say on the short side of the overall period distribution. We note that, in contrast with the prescription we adopted, the onset of DFMP in reality is probably sensitive to metallicArticle @@ -410,52 +410,52 @@ any realistic stellar environment. In this sense, the PA relation is environment-dependent, and it is not necessarily universal. A further point of uncertainty stems from the fact that the prescription we adopted assumes that the FM period only depends - upon the mass and radius, and that it is a ffected by a -change in composition only through the eff ect that such a variation + upon the mass and radius, and that it is affected by a +change in composition only through the effect that such a variation has on the radius. While this is true to a good approximation, linear models show a small dependence of periods on metallicity at a fixed mass and radius, but the quantitative impact in the nonlinear case is unknown. We can only estimate, based on the -results ofTrabucchi et al.(2019), an uncertainty of ±10% at most +results of Trabucchi et al. (2019), an uncertainty of ±10% at most with respect to the prescriptions adopted here. Qualitatively, a realistic age-metallicity relation and the metallicity dependence of the period and of the onset of DFMP are all expected to result in a steeper PA relation than the one -we predict, but it is di fficult to assess the relative importance of -these e ff ects. In this sense, the composition probably a ffects the +we predict, but it is difficult to assess the relative importance of +these effects. In this sense, the composition probably affects the shape of the PA relation more than its dispersion. The latter is -likely aff ected by the composition indirectly through mass loss, +likely affected by the composition indirectly through mass loss, the analysis of which is beyond the scope of this study. However, we point out that mass loss represents a source of scatter in combination with the occurrence of thermal pulses, because it reduces the minimum radius for the onset of DFMP. Thus, during the luminosity dips associated with thermal pulses, a LPV can have a period shorter than the one it had when it first entered the -DFMP regime (see AppendixC). An additional source of uncertainty, +DFMP regime (see Appendix C). An additional source of uncertainty, which we disregarded, is rotation (or other processes that induce extra mixing in the core) which causes a spread in ages -at a given initial mass (cf.Anderson et al.2016, for the case of +at a given initial mass (cf. Anderson et al. 2016, for the case of classical Cepheids). The fairly good agreement between models and observations encourages the use of LPVs as age indicators, but the scatter of the PA relation hampers this application. We attempted to reduce the scatter through corrections involving photometric properties, as is customarily done for classical Cepheids with a color term -(e.g.,Bono et al.2005), but with unsatisfactory results. A correction +(e.g., Bono et al. 2005), but with unsatisfactory results. A correction dependent on the photometric amplitude of variability represents a promising alternative, but it cannot be pursued at the moment. Indeed, for computational efficiency, current pulsation models include only a crude treatment of the atmospheric layers -as they do not aff ect pulsation periods. On the other hand, the +as they do not affect pulsation periods. On the other hand, the atmosphere is crucial in determining the spectral energy distribution and its variation throughout the pulsation cycle, and hence the amplitude of variability. At the same time, the observational sample adopted here is too heterogeneous for a self-consistent investigation of amplitude, but this kind of study could be made possible by the upcoming data release 3 of the Gaia mission -(Gaia Collaboration et al.2021) and the future Legacy Survey -of Space and Time (LSST,Ivezi ´ -c et al.2019) of the Vera Rubin +(Gaia Collaboration et al. 2021) and the future Legacy Survey +of Space and Time (LSST, Ivezi ´ +c et al. 2019) of the Vera Rubin Observatory. It is worth noting that our analysis applies to Miras as well as SRVs, provided that they predominantly pulsate in the FM. @@ -466,14 +466,14 @@ detect than SRVs, and their light curves are easier to process as they tend to be more regular. Moreover, Miras represent the end-point of AGB evolution, so in principle they correspond to a smaller range of stellar parameters compared to the full extent of the DFMP regime, and they display a smaller range of periods -at a given age (cf.Feast & Whitelock2000b). In other words, +at a given age (cf. Feast & Whitelock 2000b). In other words, they should exhibit a relatively narrow PA relation (even though, based on the observational data set we adopted, there is no conclusive evidence that considering only Miras reduces the scatter of the PA relation). Nonetheless, we caution against this approach as it is prone to introducing uncontrolled biases, as the traditional distinction -between SRVs and Miras is arbitrary (seeTrabucchi et al.2021a, +between SRVs and Miras is arbitrary (see Trabucchi et al. 2021a, and references therein). As such, it disregards the physical processes at the origin of the range of amplitudes characterizing LPVs. In particular, photometric amplitudes are largely determined @@ -527,7 +527,7 @@ scatter. We suggest that corrective terms, involving the amplitude this possibility. A study of the impact of metallicity on nonlinear pulsation is highly desirable to pursue this line of investigation, Article number, page 5 of 9 -A & A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs +A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs as would be a theoretical investigation of the dependence of photometric amplitudes upon global stellar parameters. Acknowledgements. M.T. and N.M. acknowledge the support provided by the @@ -537,25 +537,25 @@ this paper, and to Léo Girardi for helping with the computation and interpretat of isochrones. This research has made use of: data from the OGLE-III Catalog of Variable Stars; data products from the Two Micron All Sky Survey, which is a joint project of the University of Massachusetts and the Infrared - Processing and Analysis Center/ California Institute of Technology, funded + Processing and Analysis Center/California Institute of Technology, funded by the National Aeronautics and Space Administration and the National Science Foundation; data from the European Space Agency (ESA) mission Gaia -(https://www.cosmos.esa.int/gaia ), processed by the Gaia Data Processing +(https://www.cosmos.esa.int/gaia), processed by the Gaia Data Processing and Analysis Consortium (DPAC, https://www.cosmos.esa.int/web/ -gaia/dpac/consortium ). Funding for the DPAC has been provided by national +gaia/dpac/consortium). Funding for the DPAC has been provided by national institutions, in particular the institutions participating in the Gaia Multilateral - Agreement. This research has made use of the following free / open source -software and/ or libraries: the Starlink Tables Infrastructure Library (STILTS and -Topcat,Taylor2006); IPython (Pérez & Granger2007) and Jupyter (Kluyver -et al.2016) notebooks; the P ython libraries N umPy (Harris et al.2020), SciP y -(Virtanen et al.2020), matplotlib (a Python library for publication quality graphics,Hunter2007), - and A stropy (a community-developed core Python package -for Astronomy,Astropy Collaboration et al.2018). This research has made use of + Agreement. This research has made use of the following free/open source +software and/or libraries: the Starlink Tables Infrastructure Library (STILTS and +Topcat, Taylor 2006); IPython (Pérez & Granger 2007) and Jupyter (Kluyver +et al. 2016) notebooks; the Python libraries NumPy (Harris et al. 2020), SciPy +(Virtanen et al. 2020), matplotlib (a Python library for publication quality graphics, + Hunter 2007), and Astropy (a community-developed core Python package +for Astronomy, Astropy Collaboration et al. 2018). This research has made use of NASA’s Astrophysics Data System Bibliographic Services, and of the following services provided by CDS, Strasbourg: the SIMBAD data base, VizieR catalogue -access tool (DOI: 10.26093/ cds/ vizier,Ochsenbein et al.2000), the “Aladin sky -atlas” (Bonnarel et al.2000), and the cross-match service (Boch et al.2012; -Pineau et al.2020). +access tool (DOI: 10.26093/cds/vizier, Ochsenbein et al. 2000), the “Aladin sky +atlas” (Bonnarel et al. 2000), and the cross-match service (Boch et al. 2012; +Pineau et al. 2020). References Anderson, R. I., Saio, H., Ekström, S., Georgy, C., & Meynet, G. 2016, A&A, 591, A8 @@ -601,7 +601,7 @@ Grady, J., Belokurov, V., & Evans, N. W. 2019, MNRAS, 483, 3022 Grady, J., Belokurov, V., & Evans, N. W. 2020, MNRAS, 492, 3128 Harris, C. R., Millman, K. J., van der Walt, S. J., et al. 2020, Nature, 585, 357 Hunter, J. D. 2007, Computing in Science & Engineering, 9, 90 -Ivezi ´ +Ivezi´ c, Ž., Kahn, S. M., Tyson, J. A., et al. 2019, ApJ, 873, 111 Jayasinghe, T., Stanek, K. Z., Kochanek, C. S., et al. 2020, MNRAS, 491, 13 Joo, S.-J. & Lee, Y.-W. 2013, ApJ, 762, 36 Jura, M. & Kleinmann, S. G. 1992, ApJS, 79, 105 @@ -682,33 +682,33 @@ Wyatt, S. P. & Cahn, J. H. 1983, ApJ, 275, 225 Ya’Ari, A. & Tuchman, Y. 1996, ApJ, 456, 350 Article number, page 6 of 9 Trabucchi et al.: The period-age relation of LPVs -Fig. A.1. Absolute- K - s Gaia -2MASS diagram for the stars with or without +Fig. A.1. Absolute-K +s Gaia-2MASS diagram for the stars with or without a spectral type (left and right panels, respectively) in the selected sample. Symbol colors and shapes indicate the spectral type and host cluster described in the legend, respectively, which also reports the number of sources displayed (i.e., having both optical and NIR photometry). The dashed line marks the separation between O- and C-rich sources -according toLebzelter et al.(2018). An arrow marks the source MSX +according to Lebzelter et al. (2018). An arrow marks the source MSX LMC 124 in NGC 1830 that, having W -BP, RP − W -J, K - s = 9. 73 mag, lies outside +BP,RP − W +J,K +s = 9.73 mag, lies outside the plot area. Background dots are LPVs in the LMC from OGLEIII - (light gray) andMowlavi et al.(2018) (darker gray). + (light gray) and Mowlavi et al. (2018) (darker gray). Appendix A: Classification of observed LPVs Appendix A.1: Spectral type -We adopted the spectral types provided byLebzelter & Wood -(2007) andKamath et al.(2010) for 52 of the LPVs they studied +We adopted the spectral types provided by Lebzelter & Wood +(2007) and Kamath et al. (2010) for 52 of the LPVs they studied in NGC 1846, NGC 1978, and NGC 419. The only exception is the star 5-3 in NGC 419, for which we adopted the S-type as -reported byLloyd Evans(1983a). +reported by Lloyd Evans (1983a). We also searched the SIMBAD astronomical database -(Wenger et al.2000) for spectral type information, which we +(Wenger et al. 2000) for spectral type information, which we found for 26 more stars. We used the Gaia-2MASS diagram of -Lebzelter et al.(2018) to confirm the chemical type classification +Lebzelter et al. (2018) to confirm the chemical type classification taken from literature and to characterize the surface chemistry of -sources of an unknown spectral type (see Fig.A.1). Among the +sources of an unknown spectral type (see Fig. A.1). Among the latter, we identified 13 C-rich stars and 106 O-rich sources. Three of the sources without a spectral type lack Gaia photometry, so they cannot be classified with the Gaia-2MASS. Two @@ -718,17 +718,17 @@ their position in the J − K s versus K s color-magnitude diagram. The third source is one of the two stars in NGC 1903 from the -list ofGrady et al.(2019), which we identified with the 2MASS +list of Grady et al. (2019), which we identified with the 2MASS source J05171633-6920298. It is likely C-rich according to the NIR color-magnitude diagram. Finally, the sources V138 in ω Cen, LW15 in NGC 2808, and LW4 in NGC 362 lack NIR data. They cannot be placed in the NIR PL diagram, upon which we relied to assign pulsation modes to periods, so we excluded them from the sample. The distribution of O- and C-rich sources in the period-age diagram -is shown in Fig.A.2. +is shown in Fig. A.2. Appendix A.2: Variability For variability information, we complemented the data from -Lebzelter & Wood andKamath et al.(2010) with the catalogs +Lebzelter & Wood and Kamath et al. (2010) with the catalogs from OGLE-III, ASAS-SN, and Gaia DR2. Combining these data sets, we found at least one period for each of the 176 sources in our sample. @@ -736,25 +736,25 @@ In order to identify the pulsation mode most likely responsible for periods in a given source, we assumed that the second overtone mode is associated with sequence A, the first overtone mode with sequences B and C - , and the fundamental mode with -sequence C (e.g.,Trabucchi et al.2017). We excluded long secondary +, and the fundamental mode with +sequence C (e.g., Trabucchi et al. 2017). We excluded long secondary periods on sequence D as they are not due to stellar pulsation (Soszy ´ -nski et al.2021, and references therein), and we +nski et al. 2021, and references therein), and we used the pattern of PL sequences in the LMC as a reference to -guide the mode identification (cf.Trabucchi et al.2021a). +guide the mode identification (cf. Trabucchi et al. 2021a). We performed this classification separately for periods coming from each distinct data set. If two or more periods from different data sets were assigned to the same pulsation mode, we retained only one of those periods, with priority to the values -from Lebzelter & Wood andKamath et al.(2010). If the latter +from Lebzelter & Wood and Kamath et al. (2010). If the latter authors do not provide this information, we adopted the period from OGLE-III if available, and otherwise from ASAS-SN or from Gaia DR2. -For some sources, the periods reported in di fferent catalogs +For some sources, the periods reported in different catalogs were assigned to the same mode through this procedure. In most cases, these periods are reasonably similar to each other. Only -in a few cases were they significantly di ff erent, but this did not +in a few cases were they significantly different, but this did not alter our conclusions. When available, the variability type was taken from OGLEIII or ASAS-SN. We note that we are only interested in whether @@ -765,7 +765,7 @@ instance, as an LPV or AGB in SIMBAD, in which case we considered Appendix B: Fitting relations We obtained analytic expressions for the PA relations separately for O- and C-rich stars, proceeding as follows. For each bin of -log( τ/ yr), we modeled the period distribution with a Gaussian +log(τ/yr), we modeled the period distribution with a Gaussian kernel density estimator (KDE) and identified the peak of the distribution. To describe the boundaries of the PA relation, we adopted, at each age, the values of the period at which the distribution @@ -773,19 +773,19 @@ adopted, at each age, the values of the period at which the distribution value upon visual inspection of the PA plane. We modeled the central trend of the PA relation, as well as its short- and longperiod edges, with linear or quadratic functions in the form -log( τ/ yr) = a +log(τ/yr) = a 0 + a -1 ( P/ ˜ -P ) + a -2 ( P/ ˜ -P) 2 +1 (P/ ˜ +P) + a +2 (P/ ˜ +P)2 , (B.1) (where ˜ P = 350 days) and employed a Lenvenberg-Marquardt -nonlinear regression algorithm 3 - to derive the best-fit coeffi cients, -which are listed in TableB.1. We remark that these best-fit expressions - are only valid in the intervals 8 . 0 ≤ log( τ/ yr) ≤ 10 . 3 +nonlinear regression algorithm3 + to derive the best-fit coefficients, +which are listed in Table B.1. We remark that these best-fit expressions + are only valid in the intervals 8.0 ≤ log(τ/yr) ≤ 10.3 and 20 < P/days < 700 for O-rich composition, and within 3 We made use of the Python library SciPy to perform Gaussian KDE @@ -793,57 +793,57 @@ modeling and best-fit, respectively, by means of the gaussian_kde tool from the stats module and the curve_fit function from the optimize module. Article number, page 7 of 9 -A & A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs -Fig. A.2. Similar to Fig.1, except each source is color-coded according to whether it has been classified as O-rich (blue) or C-rich (red). -Table B.1. Best-fit coe ffi cients for the PA relation and its boundaries in -the form given in Eq.B.1. +A&A proofs: manuscript no. trabucchi_etal_2022_period_age_relation_of_lpvs +Fig. A.2. Similar to Fig. 1, except each source is color-coded according to whether it has been classified as O-rich (blue) or C-rich (red). +Table B.1. Best-fit coefficients for the PA relation and its boundaries in +the form given in Eq. B.1. Sp. type relation a 0 a - 1 a - 2 +1 a +2 O-rich center 10.78 -2.660 0.5953 lower edge 10.46 -2.818 0.6578 upper edge 10.54 -0.8187 -0.2335 C-rich center 9.755 -0.7532 lower edge 9.982 -1.698 upper edge 8.498 -1.827 -0.9959 -8 .6 ≤ log(τ/ yr) ≤ 9 .3 and 140 < P/days < 620 in the C-rich +8.6 ≤ log(τ/yr) ≤ 9.3 and 140 < P/days < 620 in the C-rich case. Because of the connection between age and initial mass, the PA relation can be translated into a period-initial mass relation, which we derived using the same approach described above, and assuming the form -log( M - i / M - ) = b - 0 + b -1 ( P/ ˜ +log(M +i/M +) = b +0 + b +1 (P/ ˜ P) + b - 2 ( P/ ˜ +2 (P/ ˜ P)2 . (B.2) -The resulting best-fit lines are displayed in Fig.B.1, and the coe - fficients are given in TableB.2. +The resulting best-fit lines are displayed in Fig. B.1, and the coefficients + are given in Table B.2. We remark that both the PA and the period-initial mass relations depend on model assumptions, in particular mass loss and mixing, as well as on the properties of the population of LPVs, namely the star-formation history and age-metallicity relation. Appendix C: The shape of the period distribution -As an example case, we consider an isochrone of age log(τ/ yr) = -8 .3 and initial metallicity Z - i = 0 .006. Stars on the TP-AGB have +As an example case, we consider an isochrone of age log(τ/yr) = +8.3 and initial metallicity Z +i = 0.006. Stars on the TP-AGB have initial masses M - i 3. 85 M - over a small range of ∼ 10 −3 +i 3.85 M + over a small range of ∼ 10−3 M - . +. The relation between period and initial mass is displayed in -panel (a) of Fig.C.1, where isochrone portions undergoing Table B.2. Best-fit coefficients for the period-initial mass relation and -its boundaries in the form given in Eq.B.2. +panel (a) of Fig. C.1, where isochrone portions undergoing Table B.2. Best-fit coefficients for the period-initial mass relation and +its boundaries in the form given in Eq. B.2. Sp. type relation b - 0 b - 1 b - 2 +0 b +1 b +2 O-rich center -0.2790 0.8958 -0.1828 lower edge -0.1772 0.9975 -0.2203 upper edge -0.1740 0.2783 0.8247 @@ -851,37 +851,37 @@ C-rich center -0.0304 0.2885 lower edge -0.0131 0.5752 upper edge -0.2245 -0.2720 0.2343 DFMP are indicated by solid lines. Panel (b) shows the period -distributions for a few di ff erent cases. +distributions for a few different cases. It is instructive, to begin with, to ignore the effect of thermal pulses and consider only the quiescent evolution (green lines in -Fig.C.1). The smallest initial mass corresponds to a star that just +Fig. C.1). The smallest initial mass corresponds to a star that just entered the TP-AGB, when the FM has a period of ∼ 240 days but is not dominant. It only becomes dominant above a threshold radius R -dom, 0 , that is for periods longer than a (mass-dependent) +dom,0, that is for periods longer than a (mass-dependent) critical period P -dom ,0 (the solid gray line in Fig.C.1). The least +dom,0 (the solid gray line in Fig. C.1). The least evolved (quiescent) model with dominant FM has P FM 360 days (green circle and horizontal line), corresponding to a sharp -cut in the period distribution shown in panel (b) of Fig.C.1. +cut in the period distribution shown in panel (b) of Fig. C.1. As a star evolves along the AGB it expands, and its period becomes longer in response to the increase in radius. Models with a higher initial mass are more evolved, hence they have a larger radius and a longer period. The rate at which a period increases with radius is not fixed, but rather decreases with evolution. According - to the prescription ofTrabucchi et al.(2021b), a period -grows with radius as a broken power-law with exponent α 1. 8 + to the prescription of Trabucchi et al. (2021b), a period +grows with radius as a broken power-law with exponent α 1.8 if R < R -b , and with α 1. 25 at larger radii. +b, and with α 1.25 at larger radii. This is equivalent to saying that the period grows more slowly after it exceeds a critical value P -b = P( R -b ), marked by -the gray dotted line in Fig.C.1. The isochrone reaches it at +b = P(R +b), marked by +the gray dotted line in Fig. C.1. The isochrone reaches it at Article number, page 8 of 9 Trabucchi et al.: The period-age relation of LPVs -Fig. B.1. Similar to Fig.2, but showing initial mass M +Fig. B.1. Similar to Fig. 2, but showing initial mass M i in place of age. The best-fit lines to the most populated band and edges of the theoretical P FM – M @@ -889,7 +889,7 @@ i relation are shown. Fig. C.1. Period distribution at fixed age and metallicity. Panel (a) shows period as a function of initial mass (current mass on the top axis) on the TP-AGB for a ∼ 200 Myr old isochrone with Z -i = 0. 006. Red lines +i = 0.006. Red lines show full thermal pulses, while blue lines ignore luminosity spikes and green lines show only the quiescent evolution. The same color code is used for the period distributions (normalized to their maximum) on @@ -901,7 +901,7 @@ dominant (solid line), less sensitive to radius (dotted line, which occurs at the vertical line for this specific isochrone), and independent of radius (dashed line). M -i 3. 8524 M +i 3.8524 M (vertical gray line), when P FM 420 days. In models with a smaller initial mass, the period is still increasing @@ -909,15 +909,15 @@ at a relatively large rate as the envelope expands, while in more massive models the period has already become less sensitive to changes in radius. This is reflected by a slight inflection of the green curve, which corresponds to the maximum in the period -distribution shown in panel (b) of Fig.C.1. The period distribution +distribution shown in panel (b) of Fig. C.1. The period distribution of the full TP-AGB range is roughly symmetric around this maximum, while limiting the selection to DFMP, produces -a distribution skewed toward short periods, as found in Sect.3. +a distribution skewed toward short periods, as found in Sect. 3. If the luminosity dips following thermal pulses are taken into account (blue lines), the corresponding envelope contrac- tion causes the period to decrease, and the cut at ∼ 360 days becomes less sharp. Because of mass loss, the threshold period P -dom ,0 is lowered, so that the shortest period associated with +dom,0 is lowered, so that the shortest period associated with DFMP does not correspond to the least evolved model (green circle), but rather to the luminosity dip of a thermal pulse (blue circle). @@ -928,5 +928,5 @@ spikes alters the period distribution at long periods very little. Luminosity spikes are relevant only for relatively massive and young TP-AGB stars, and they give rise to the poorly populated portion of the PA relation at the longest periods, as seen in panel -(a) of Fig.2. +(a) of Fig. 2. Article number, page 9 of \ No newline at end of file diff --git a/read/results/playa/2201.00214.txt b/read/results/playa/2201.00214.txt index 91eaf9e..06db44a 100644 --- a/read/results/playa/2201.00214.txt +++ b/read/results/playa/2201.00214.txt @@ -1,507 +1,506 @@ -arXiv:2201.00214v1 [astro-ph.SR] 1 Jan 2022Temperature Analysis of Flaring +arXiv:2201.00214v1 [astro-ph.SR] 1 Jan 2022 Temperature Analysis of Flaring (AR11283) and non-Flaring (AR12194) Coronal Loops N. Fathalian1 - , S. S. H osseini R ad 2 - , N. A lipour2 - , H. Safari2 + , S. S. Hosseini Rad2 +, N. Alipour2 +, H. Safari2 1 - Department of Physics, Payame Noor University (PNU), 19395 -3697, Tehran, Iran. +Department of Physics, Payame Noor University (PNU), 19395-3697, Tehran, Iran. 2 - Department of Physics, Faculty of Science, University of Za njan, 45195-313, Zanjan, Iran. +Department of Physics, Faculty of Science, University of Zanjan, 45195-313, Zanjan, Iran. e-mail: narges_fathalian@alum.sharif.edu January 4, 2022 Abstract -Here, we study the temperature structure of flaring and non-fl aring coronal loops, using extracted +Here, we study the temperature structure of flaring and non-flaring coronal loops, using extracted loops from images taken in six extreme ultraviolet (EUV) channels recorded by Atmospheric Imaging -Assembly (AIA)/ Solar Dynamic Observatory (SDO). We use dat a for loops of X2.1-class-flaring active -region (AR11283) during 22:10UT till 23:00UT, on 2011, Sept ember 6; and non-flaring active region -(AR12194) during 08:00:00UT till 09:00:00UT on 2014, Octob er 26. By using spatially-synthesized -Gaussian DEM forward-fitting method, we calculate the peak t emperatures for each strip of the loops. -We apply the Lomb-Scargle method to compute the oscillation s periods for the temperature series of each -strip. The periods of the temperature oscillations for the fl aring loops are ranged from 7 min to 28.4 -min. These temperature oscillations show very close behavi or to the slow-mode oscillation. We observe -that the temperature oscillations in the flaring loops are st arted at least around 10 minutes before the -transverse oscillations and continue for a long time durati on even after the transverse oscillations are -ended. The temperature amplitudes are increased at the flari ng time (during 20 min) in the flaring loops. +Assembly (AIA)/ Solar Dynamic Observatory (SDO). We use data for loops of X2.1-class-flaring active +region (AR11283) during 22:10UT till 23:00UT, on 2011, September 6; and non-flaring active region +(AR12194) during 08:00:00UT till 09:00:00UT on 2014, October 26. By using spatially-synthesized +Gaussian DEM forward-fitting method, we calculate the peak temperatures for each strip of the loops. +We apply the Lomb-Scargle method to compute the oscillations periods for the temperature series of each +strip. The periods of the temperature oscillations for the flaring loops are ranged from 7 min to 28.4 +min. These temperature oscillations show very close behavior to the slow-mode oscillation. We observe +that the temperature oscillations in the flaring loops are started at least around 10 minutes before the +transverse oscillations and continue for a long time duration even after the transverse oscillations are +ended. The temperature amplitudes are increased at the flaring time (during 20 min) in the flaring loops. The periods of the temperatures obtained for the non-flaring loops are ranged from 8.5 min to 30 min,but -their significances are less (below 0.5) in comparison with t he flaring ones (near to one). Hence the +their significances are less (below 0.5) in comparison with the flaring ones (near to one). Hence the detected temperature periods for the non-flaring loops’ strips are less probable in comparison with the -flaring ones, and maybe they are just fluctuations. Based on ou r confined observations, it seems that the +flaring ones, and maybe they are just fluctuations. Based on our confined observations, it seems that the flaring loops’ periods show more diversity and their temperatures have wider ranges of variation than the non-flaring ones. More accurate commentary in this respect requires more extensive statistical research and broader observations. Coronal Loops,Temperature Analysis, Temperature Oscillations,Flaring and non-Flaring Active Regions -I. I ntroduction +I. Introduction Analyzing the thermal structure of coronal loops is of considerable interest, especially as these magnetic loops have an essential role in heating the solar chromosphere and corona. Such analysis can help to describe how the process of solar flaring is correlated with the loop’s thermal structure. Detections of coronal waves have a historical preview and have been reported for several times (e.g., - Aschwanden et al. ( 1999 ); Nakariakov et al. ( 1999 ); Wang et al. ( 2003 ); Wang & Solanki ( 2004 ); -Berghmans & Clette ( 1999 ); De Moortel et al. ( 2000 ), Verwichte et al. ( 2004 ), De Moortel & Brady -( 2007 ), Ballai et al. ( 2011 )). Coronal seismology and MHD waves have been reviewed wide ly by + Aschwanden et al. (1999); Nakariakov et al. (1999);Wang et al. (2003); Wang & Solanki (2004); +Berghmans & Clette (1999); De Moortel et al. (2000), Verwichte et al. (2004), De Moortel & Brady +(2007), Ballai et al. (2011)). Coronal seismology and MHD waves have been reviewed widely by -De Moortel ( 2005 ), Nakariakov & Verwichte ( 2005 ), Aschwanden ( 2006 ), Banerjee et al. ( 2007 ) and -De Moortel & Nakariakov ( 2012 ). Along with the development of the observations, transver se -and longitudinal oscillations have also been studied theor etically (e.g., Gruszecki et al. ( 2006 ), -Pascoe et al. ( 2007 ), Fathalian et al. ( 2010 ); Luna et al. ( 2010 ); Fathalian & Safari ( 2010 ). Coronal +De Moortel (2005), Nakariakov & Verwichte (2005), Aschwanden (2006), Banerjee et al. (2007) and +De Moortel & Nakariakov (2012). Along with the development of the observations, transverse +and longitudinal oscillations have also been studied theoretically (e.g., Gruszecki et al. (2006), +Pascoe et al. (2007), Fathalian et al. (2010); Luna et al. (2010); Fathalian & Safari (2010). Coronal seismology techniques help to elicit the information from observations of oscillatory phenomena and the results to be interpreted by using theoretical models (see for e.g., - Roberts et al. ( 1984 ); -Goossens et al. ( 1992 )). Oscillatory patterns and processes which happen during solar flares, were -interesting and subject of investigations from different a pproaches (e.g., Nakariakov et al. ( 2010 ), -Nisticò et al. ( 2013 ), Anfinogentov et al. ( 2013 ), Hindman & Jain ( 2014 ), Russell et al. ( 2015 )). As -we know the transverse loops oscillations usually occur in r esponse to a close filament or flare -( Wills-Davey & Thompson ( 1999 )). -Rapidly decaying long-period oscillations are mostly inte rpreted as global (or fundamental - mode) standing slow magnetoacoustic waves (reviewed by Liu & Ofman ( 2014 ), and Wang -( 2011 ), also see Ofman & Wang ( 2002 ), and for slow-mode observed in fan-loops see Pant et al. -( 2017 )). They often occur in hot coronal loops of active regions, a ssociated with tiny (or micro-) + Roberts et al. (1984); +Goossens et al. (1992)). Oscillatory patterns and processes which happen during solar flares, were +interesting and subject of investigations from different approaches (e.g., Nakariakov et al. (2010), +Nisticò et al. (2013), Anfinogentov et al. (2013), Hindman & Jain (2014), Russell et al. (2015)). As +we know the transverse loops oscillations usually occur in response to a close filament or flare +(Wills-Davey & Thompson (1999)). +Rapidly decaying long-period oscillations are mostly interpreted as global (or fundamental + mode) standing slow magnetoacoustic waves (reviewed by Liu & Ofman (2014), and Wang +(2011), also see Ofman & Wang (2002), and for slow-mode observed in fan-loops see Pant et al. +(2017)). They often occur in hot coronal loops of active regions, associated with tiny (or micro-) flares.Increasing evidence has suggested that the harmonic type of decaying pulsations detected -in intensity plots of solar and stellar flares are possibly ca used by standing slow-mode waves (see -reviews by Van Doorsselaere et al. ( 2016 ), and McLaughlin et al. ( 2018 )).Excitation, propagation, +in intensity plots of solar and stellar flares are possibly caused by standing slow-mode waves (see +reviews by Van Doorsselaere et al. (2016), and McLaughlin et al. (2018)).Excitation, propagation, and damping mechanisms of slow-mode waves have been studied theoretically (e.g., Wang et al. -( 2007 ); Wang et al. ( 2015 ); Jess et al. ( 2016 ); Nakariakov et al. ( 2017 ); Nisticò et al. ( 2017 ); Kolotkov -et al. ( 2019 ); Krishna Prasad et al. ( 2019 ); Reale et al. ( 2019 ); Wang & Ofman ( 2019 )). To have -a complete overview of slow-mode magnetoacoustic waves in c oronal loops see the review by -Wang et al. ( 2021 ). -Investigating and comparing the thermal structures and osc illations of coronal loops in loops +(2007); Wang et al. (2015); Jess et al. (2016); Nakariakov et al. (2017); Nisticò et al. (2017); Kolotkov +et al. (2019); Krishna Prasad et al. (2019); Reale et al. (2019); Wang & Ofman (2019)). To have +a complete overview of slow-mode magnetoacoustic waves in coronal loops see the review by +Wang et al. (2021). +Investigating and comparing the thermal structures and oscillations of coronal loops in loops of flaring and non-flaring active regions could help us in better understanding the loops’ material oscillations and the flare impact on them. Several different methods have been developed to investigate the thermal structure of the coronal loops and loop strands. The thermal stability of the -coronal loops was the subject of research, done by Habbal & Rosner ( 1979 ) (and references cited -therein). McClymont & Craig ( 1985 ) stated that a pressure fluctuation must assist asymmetric +coronal loops was the subject of research, done by Habbal & Rosner (1979) (and references cited +therein). McClymont & Craig (1985) stated that a pressure fluctuation must assist asymmetric coronal temperature perturbation. They concluded that coronal loops are impartially stable in the case of uniform heating. - Van Doorsselaere et al. ( 2011 ) used spectroscopic line ratios to obtain + Van Doorsselaere et al. (2011) used spectroscopic line ratios to obtain the required temperature (via CHIANTI code) and estimated the adiabatic index of the corona. The dependence of coronal loop temperature on loop length and magnetic field strength is also -a favorite topic. For instance, Dahlburg et al. ( 2018 ) probed the temperature properties of solar +a favorite topic. For instance, Dahlburg et al. (2018) probed the temperature properties of solar coronal loops over a wide range of lengths and magnetic field strengths via numerical simulations and observed a very high correlation between magnetic field strength and a maximum of the temperature. The effect of temperature inhomogeneity on the periods and the damping times of the standing slow-modes in stratified solar coronal loops was studied either (e.g., Abedini et al. -( 2012 )). Fathalian ( 2019 ) estimated the loop temperature using the intensity ratios and the AIA response +(2012)). Fathalian (2019) estimated the loop temperature using the intensity ratios and the AIA response functions in different wavelengths. Different emission measure (DEM) computations and methods have been developed to estimate the temperature in the corona, which led to various discussions. - Schmelz et al. ( 2010 ) analyzed a coronal loop, which was observed on 2010 August -3, by AIA. They took some differential emission measure (DEM ) curves, claiming a multithermal + Schmelz et al. (2010) analyzed a coronal loop, which was observed on 2010 August +3, by AIA. They took some differential emission measure (DEM) curves, claiming a multithermal rather than an isothermal DEM distribution (for the cross-sectional temperature of the loop). After - that, Aschwanden & Boerner ( 2011 ) criticized the method of background subtraction which + that, Aschwanden & Boerner (2011) criticized the method of background subtraction which Schmelz et al. had applied. They claimed that the background subtraction method caused their -inferred result of a multithermal loop. Aschwanden & Boerner ( 2011 ) analyzed a set of hundred +inferred result of a multithermal loop. Aschwanden & Boerner (2011) analyzed a set of hundred loops and understood that 66% of the loops could be fitted with a narrowband single-Gaussian DEM model. In this regard, some attention was paid to the instrumental limitations and ability - of AIA and Guennou et al. ( 2012a , b ) discussed on the accuracy of the differential emission + of AIA and Guennou et al. (2012a,b) discussed on the accuracy of the differential emission measure diagnostics of solar plasmas in respect of the AIA instrument of SDO. The abovementioned controversy of whether the cross-field temperatures of coronal loops are multithermal or isothermal, continued by - Schmelz et al. ( 2013 ) (similar to Schmelz et al. ( 2011 )). They analyzed + Schmelz et al. (2013) (similar to Schmelz et al. (2011)). They analyzed twelve loops to understand the cross-field temperature distributions of them and reveal the loops’ -substructure. Based on their achievements, the warmer loop s entail broader DEMs. Thereafter, -Schmelz et al. ( 2014 ) found indications of a relationship between the DEM weighted-temperature +substructure. Based on their achievements, the warmer loops entail broader DEMs. Thereafter, +Schmelz et al. (2014) found indications of a relationship between the DEM weighted-temperature and the cross-field DEM width for coronal loops. They argued that cooler loops tend to have narrower DEM widths. This could imply that fewer strands are seen emitting in the later cooling - phase, which they claim could potentially resolve the ab ovementioned controversy. In this -subject, Aschwanden et al. ( 2015 ) (as well as 2013 ( Aschwanden, 2013 )) developed a method to + phase, which they claim could potentially resolve the abovementioned controversy. In this +subject, Aschwanden et al. (2015) (as well as 2013 (Aschwanden, 2013)) developed a method to extract the loop temperature which is based on Gaussian fit for Differential Emission Measure, named spatially-synthesized Gaussian DEM forward-fitting method (DEM hereafter). This paper aims to analyze and compare thermal oscillations of coronal loops in flaring and non-flaring active regions, 11283 and 12194, respectively. The contents of this paper are as follows: In section - II , data, we introduce the considered flaring and non-flaring ac tive regions and describe -the data employed and the time and properties of the flare, occ urred in the active region. In -section III , we explain the method we use to analyze the time-series of te mperatures in different + II, data, we introduce the considered flaring and non-flaring active regions and describe +the data employed and the time and properties of the flare, occurred in the active region. In +section III, we explain the method we use to analyze the time-series of temperatures in different strips of the loops. Section IV is specified to our results, obtained related to flaring and nonflaring regions. In section V we briefly state a summary of this work. II. Data We investigate the thermal structure and treatment of loops in a flaring region to see if it follows the transverse oscillations of the loops, and we examine the thermal fluctuations at the flare time. For this purpose, we select a high energy flare x2.1 which the transverse oscillations of two loops -of it have been analyzed by Jain et al. ( 2015 ). They analyzed intensity variations in the wavelength +of it have been analyzed by Jain et al. (2015). They analyzed intensity variations in the wavelength 171 in two coronal loops of this region and detected obvious transverse oscillation with periods of roughly 2 minutes and decay times of 5 minutes for these loops at the flare time. To see the specific thermal properties of the flaring loops, as a blind test, we select a non-flaring active -region, extract its loops and analyze their thermal treatme nt. Then we compare the temperature +region, extract its loops and analyze their thermal treatment. Then we compare the temperature treatment of the loops at the flaring region with the loops of the non-flaring region to see the differences. -The temperature analysis done here uses EUV images from the A IA onboard the SDO. AIA +The temperature analysis done here uses EUV images from the AIA onboard the SDO. AIA has ten different wavelength channels, three in white light and UV, and the other seven in EUV channels. Between these seven, the 304 filter, which is mostly sensitive to chromospheric temperatures - (in order of T = 10 4.7 - K), not the corona, is not taken into account (Aschwanden et a l. 2015). + (in order of T = 104.7 +K), not the corona, is not taken into account (Aschwanden et al. 2015). Therefore, we consider the images of the events in the six wavelengths (94, 131, 171, 193, 211, 335 -). These are covering the coronal temperature range from T ≈ 0.6 to T ≥ 16 MK . +). These are covering the coronal temperature range from T ≈ 0.6 to T ≥ 16 MK. The two below data sets are finally selected to study thermal variations and coronal loops -oscillations in flaring or non-flaring active regions. A few d istinct loops are visible in the regions. +oscillations in flaring or non-flaring active regions. A few distinct loops are visible in the regions. Finally, these loops are chosen: – Three loops of the x-flaring active region 11283: Observationally, the X-class flares are rarely happening around the loops with the specification we are looking for. So this selected LOS X-flare, which occurs near the loops is of rare cases. We consider EUV images of NOAA -AR 11283, in the time period of 22:10UT till 23:00UT of 2011 Se ptember 6 with the cadence +AR 11283, in the time period of 22:10UT till 23:00UT of 2011 September 6 with the cadence of 12 sec. This period of time is selected since no other flare is happening during it. A few distinct loops are visible and follow-able here during this period. Loop shapes in our active region change permanently; therefore, it is difficult or impossible to follow a loop over a very long time. Hence, it is not useful to extend the time interval of this region to the time before the flare. The transverse oscillations of two loops in this region were analyzed before by - Jain et al. ( 2015 ). We mark these loops by A and B in Figure 1 b. They + Jain et al. (2015). We mark these loops by A and B in Figure 1 b. They detected fundamental mode oscillation with periods of roughly 2 minutes and decay time of 5 minutes for these loops. We are curious to see the loops’ thermal oscillations (if any) -or thermal fluctuations in this condition. Figure 1 a (left) displays AR 11283 and the area, -indicated by the white box is featured in a zoom-in view in Figure 1 .b (right) and the five +or thermal fluctuations in this condition. Figure 1a (left) displays AR 11283 and the area, +indicated by the white box is featured in a zoom-in view in Figure 1.b (right) and the five selected parts of the center of the three chosen loops are shown by red lines (the movie of -the region is available in this link). As it is clear in the mov ie, these three loops oscillate -together and their oscillations decay simultaneously. The center of figure 1 .a is coordinated -at (230, 165) arcsec and its width and height are 450 ′′ - × 456 ′′ +the region is available in this link). As it is clear in the movie, these three loops oscillate +together and their oscillations decay simultaneously. The center of figure 1.a is coordinated +at (230, 165) arcsec and its width and height are 450′′ + × 456′′ /750 × 775 pixels. The flare occurring in this active region is an X2.1 class flare located close to the disk center at latitude -14 ◦ - north and longitude 18 ◦ - west (269.9 arcsec, 129.9 arcsec). This flare initiates at 22 :12UT, +14◦ + north and longitude 18◦ + west (269.9 arcsec, 129.9 arcsec). This flare initiates at 22:12UT, ends about 22:24UT with the peak at 22:20UT, and associates with a coronal mass ejection -(CME) which occurs from 2011 September 6, 21:36:05T to 2011 S eptember 7, 02:24:05T, with +(CME) which occurs from 2011 September 6, 21:36:05T to 2011 September 7, 02:24:05T, with the radial velocity of 469 km/s,angular width of 252 deg, and position angle of 275 deg (for more details look at LASCO CME catalogue.) 1 -– Three loops of non-flaring active region 12194: As a blind te st, we select three loops of the +– Three loops of non-flaring active region 12194: As a blind test, we select three loops of the non-flaring (nonf hereafter) active region 12194 in the smooth time period of 08:00:00UT till -09:00:00UT of 2014 October 26. The center of figure 2 .a is coordinated at (0, -264) arcsec -and its width and height are 615 ′′ - × 615 ′′ +09:00:00UT of 2014 October 26. The center of figure 2.a is coordinated at (0, -264) arcsec +and its width and height are 615′′ + × 615′′ /1025 × 1025 pixels. We consider the images of the selected area with the cadence of 12 sec in the same six wavelengths mentioned above. -These loops are relatively motionless and do not show any tra nsversal oscillation (see the +These loops are relatively motionless and do not show any transversal oscillation (see the region’s movie in the link). We select the loops in such a way that they do not have any crossing over the neighbor loops (in our perspective) during this time. In figure 2 the selected loops are distinguished in red in the mentioned active region. The size of the final cut of non-flaring region (represented in the right) is 351 × 401 pixels. -The data set are primarily downloaded at level 1 with a pixel r esolution of 0.6 arcsec. We use -the standard aia _ pre p . pro subroutine available in SDO package SolarSoftWare library to adjust +The data set are primarily downloaded at level 1 with a pixel resolution of 0.6 arcsec. We use +the standard aia_ pre p. pro subroutine available in SDO package SolarSoftWare library to adjust the screen scale between the four arms of the AIA. This pre-processing step increases the data level from 1 to 1.5, so that finally no jump or sudden movement is observed in the image series. -We also used drot _ ma p . pro subroutine to correct the differential rotation effect. Ac cording to the -movie made by pre-processed images, the most obvious loops ( marked in the abovementioned +We also used drot_ma p. pro subroutine to correct the differential rotation effect. According to the +movie made by pre-processed images, the most obvious loops (marked in the abovementioned figures) are selected in each region (with obvious transversal oscillations in the case of the flaring active region). - III. Temperature A nalysis Method -We extract the selected loop segment pixels, for each loop, a nd calculate the normal vectors + III. Temperature Analysis Method +We extract the selected loop segment pixels, for each loop, and calculate the normal vectors to each point of the loop’s direction. Then by using these data, we straighten each loop in a -considered box with the thickness of 15 to 40 pixels (macro-p ixels, depending on the available +considered box with the thickness of 15 to 40 pixels (macro-pixels, depending on the available empty area around each loop and the distance to the neighbor loop). The area around the loop is needed for calculations of background subtraction. The selected loop segment is cut in 1 - Based on data on these WebSites: https://solarflare.njit.e du/webapp.html, and https://www.swpc.noaa.gov/ -all wavelengths and at the same considered box from the image s set. These loop images are +Based on data on these WebSites: https://solarflare.njit.edu/webapp.html, and https://www.swpc.noaa.gov/ +all wavelengths and at the same considered box from the images set. These loop images are necessary entrances for our thermal analysis process. Then the loop is divided into different strips and its best division in terms of pixel intervals is considered. To do thermal analysis, we use the spatially-synthesized Gaussian DEM forward-fitting method founded by Aschwanden et al. - ( 2015 ). + (2015). The images in the above six wavelength filters are considered to calculate the temperature in each strip of the loop. The DEM function is considered a single-Gaussian function relative to the temperature determined by the forward fitting method. To obtain the temperature for each loop, we divided the loop into narrow strips, and then the intensity flux was averaged over each strip. The number of each strip is displayed with the index i. One of the usual methods to subtract the background from observed data is fitting a single-Gaussian cospatial function with a linear -function on the flux profile. The DEM for each strip is consider ed to be single-Gaussian DEM -in terms of the logarithm of the temperature, which has three free parameters ( Aschwanden & -Boerner , 2011 ): +function on the flux profile. The DEM for each strip is considered to be single-Gaussian DEM +in terms of the logarithm of the temperature, which has three free parameters (Aschwanden & +Boerner, 2011): D E M - i = dE M - i +i = dE M +i dT = E M - p , i exp ( − [ log ( T ) − log ( T -p , i ) -2 σ 2 -T , i ) . (1) +p,i exp (− [log (T ) − log (T +p,i ) +2σ2 +T,i ). (1) In which, T -p , i is the DEM peak temperature, E M - p , i is the peak EM function, and σ -T , i is the -logarithmic width of the temperature for that strip. To calc ulate the background-subtracted fluxes -(for each strip) we use Eq.6 of Aschwanden & Boerner ( 2011 ) (in below): +p,i is the DEM peak temperature, E M +p,i is the peak EM function, and σ +T,i is the +logarithmic width of the temperature for that strip. To calculate the background-subtracted fluxes +(for each strip) we use Eq.6 of Aschwanden & Boerner (2011) (in below): F -0 λ = Z - dE M ( T ) +0λ = + dE M(T ) dT R - λ ( T ) dT = +λ (T )dT = ∑ -k E M ( T +k E M(T k ) R - λ ( T -k ) . (2) +λ (T +k ). (2) Here, R - λ ( T ) is the instrumental temperature response function of each wavelength filter λ , which -is obtained by the code aia _ get _res ponse . pro in the SSW package. As time has passed, the AIA +λ (T ) is the instrumental temperature response function of each wavelength filter λ, which +is obtained by the code aia_get_res ponse. pro in the SSW package. As time has passed, the AIA response functions calibration has partly changed. Here, we use the updated calibration of the -temperature response functions, for each of the AIA tempera ture filters, according to the CHIANTI - Version 2019 code available in the Solar SoftWare (SSW) . After forward-fitting the Gaussian +temperature response functions, for each of the AIA temperature filters, according to the CHIANTI + Version 2019 code available in the Solar SoftWare (SSW). After forward-fitting the Gaussian DEM to the background-subtracted observed fluxes in multiple wavelengths, the three-fitting parameters, - temperature width ( σ -T , i ), peak of temperature ( T -p , i ), and peak emission measure ( E M - p , i ) -are found by minimizing χ 2 + temperature width (σ +T,i), peak of temperature (T +p,i), and peak emission measure (E M +p,i ) +are found by minimizing χ2 i . Our data sample is uneven because of omitting some damaged images in between. Therefore to analyze the temperature oscillations, we use the Lomb-Scargle method. This method is -developed to use the technique periodogram, in the case wher e the observation times are unevenly +developed to use the technique periodogram, in the case where the observation times are unevenly spaced ( - Scargle , 1982 ). The Lomb-Scargle periodogram method is useful in cases where -the periodicity of data treatment is not immediately appare nt. This method allows efficient computation +Scargle, 1982). The Lomb-Scargle periodogram method is useful in cases where +the periodicity of data treatment is not immediately apparent. This method allows efficient computation of a Fourier-like power spectrum estimator from unevenly-sampled data, resulting in -an intuitive means of determining the period of oscillation ( VanderPlas, 2018 ). Therefore we use -Lomb-Scargle Periodogram to evaluate and estimate the effic ient periods of temperature oscillations +an intuitive means of determining the period of oscillation (VanderPlas, 2018). Therefore we use +Lomb-Scargle Periodogram to evaluate and estimate the efficient periods of temperature oscillations in our loops. We select the first period related to the highest power frequency, which is obtained by this method.We considered the achieved periods with the highest significances and -amplitudes. The most significant (highest) periods observe d in temperature (minute) for flaring -and non-flaring loops are listed in Tables 1 and 2, respective ly. To estimate the significance of -the periods, we computed the probability values (p-values) . In the Lomb-Scargle method, the +amplitudes. The most significant (highest) periods observed in temperature (minute) for flaring +and non-flaring loops are listed in Tables 1 and 2, respectively. To estimate the significance of +the periods, we computed the probability values (p-values). In the Lomb-Scargle method, the significance returned here is the false alarm probability of the null hypothesis, i.e., as the data is composed of independent Gaussian random variables. Accordingly, low probability values (p-value less than 0.05) indicate a high degree of significance in the associated periodic signal. -IV. R esults +IV. Results i. Temperature Analysis of Flaring Active Region Loops Thenceforth the temperature time-series of different strips of the selected loops are calculated using the method described in section 3. In the following figures, the vertical axis shows the logarithm of the temperature and the horizontal axis shows the time duration. To be comparable by eyes, all the forthcoming figures (which show the loops temperature oscillations) have been coscaled in the (log) temperature range of 5.7 to 6.9. The color maps are shown for each temperature -map. Loops A, B1, B2, C1, and C2 are subdivided into 25, 11, 8, 1 2, and 6 strips, respectively. Each +map. Loops A, B1, B2, C1, and C2 are subdivided into 25, 11, 8, 12, and 6 strips, respectively. Each strip’s length is equal to 4 pixels (macro-pixel), for all loops in this paper. For brevity, a few strips’ temperature oscillations are presented here. Figure 3 displays the time-series of temperature oscillations for the first 3 strips of Loop A, and first 2 strips of loops B1. We calculated the -errors for each point (temperature) but removed in the prese ntation to avoid overcrowding of the -figures. As we observe in Figures 3 and 4 ), the temperature oscillations are started and increase +errors for each point (temperature) but removed in the presentation to avoid overcrowding of the +figures. As we observe in Figures 3 and 4), the temperature oscillations are started and increase around 22:12 before the flare peak time (22:20) and are mostly continuing after the flare ended (22:24). These temperature oscillations follow the transverse loop oscillations observed by Jain -et al. ( 2015 ). As Jain et al. reported, LoopA and B have a transverse oscillation with periods +et al. (2015). As Jain et al. reported, LoopA and B have a transverse oscillation with periods of roughly 2 minutes and decay times of 5 minutes, starting at 22:18 around the flare peak time -(23:20) and decaying after the flare ended (22:24). So as we ob serve, the temperature oscillations in -these flaring loops happen before the start of their transver se oscillations and are continuing even +(23:20) and decaying after the flare ended (22:24). So as we observe, the temperature oscillations in +these flaring loops happen before the start of their transverse oscillations and are continuing even in the time interval after the transverse oscillations decay. Although the temperature oscillations -do not decay as rapid as the transverse oscillations do, and c onversely, the loop temperature -increases at the end of the oscillating mode (see Fig. 4 , the temperature map of the loop A, for +do not decay as rapid as the transverse oscillations do, and conversely, the loop temperature +increases at the end of the oscillating mode (see Fig.4, the temperature map of the loop A, for instance) -We calculate the temperature oscillations periods, using L omb-Scargle method. We consider -the thermal oscillations periods with the highest significa nces. As this method shows, the most +We calculate the temperature oscillations periods, using Lomb-Scargle method. We consider +the thermal oscillations periods with the highest significances. As this method shows, the most powerful period in the range of data time-series (listed in Table - 1 ) are from 7 to 28.4 minutes +1) are from 7 to 28.4 minutes observed in the strips of the marked loops of this flaring region. These loops of flaring region also show some short periods in temperature oscillations which some are less than 10 minutes -(listed in Table 1 ). These short periods are more frequently observed in the loops of the flaring +(listed in Table1). These short periods are more frequently observed in the loops of the flaring active region. Such short periods are very scarce for the loops of the non-flaring active region -(compare Tables1 and 2 ). -The first column in Table 1 is the number of every strip along the loop. The second column is +(compare Tables1 and 2). +The first column in Table1 is the number of every strip along the loop. The second column is the period of the most powerful frequency observed for the loop strips, calculated by the LombScargle - method. The third column shows the maximum of log ( T ) minus its minimum in each + method. The third column shows the maximum of log(T ) minus its minimum in each strip. The columns of Table - 2 are exactly the same as Table 1 ; the only difference is that Table 2 is +2 are exactly the same as Table1; the only difference is that Table2 is for the non-flaring loops. The loop A, has the length of 42.3 (Mm) which is the length of the selected part of the loop -marked in Figure 1 .b. The mean of the parameter (Max(log T )-Min(log T )) for the strips of loop A +marked in Figure 1.b. The mean of the parameter (Max(log T)-Min(log T)) for the strips of loop A is 1.21. Mean of the temperature (log) of this loop over time is 6.15 ± 0.25. The loop B1, divided -into 11 strips, has the length of 20.24 (Mm). The mean of (Max( log T )-Min(log T )) and the mean +into 11 strips, has the length of 20.24 (Mm). The mean of (Max(log T)-Min(log T)) and the mean of the temperature for this loop are, 1.10, and 6.28 ± 0.22 respectively. The loop B2, which has 8 strips, with the length of 15.61 (Mm), has the mean temperature (log) of 6.21 ± 0.21. The mean -of (Max(log T )-Min(log T )) is 0.81 through this loop segment. The loops C1 and C2, divided into +of (Max(log T)-Min(log T)) is 0.81 through this loop segment. The loops C1 and C2, divided into 12, and 6 strips, have the lengths of 22.08 and 11.06 (Mm), the mean temperatures of 6.25 ± 0.22, -and 6.14 ± 0.25 (log), and the mean (Max(log T )-Min(log T )) of 1.48, 0.88, respectively. +and 6.14 ± 0.25 (log), and the mean (Max(log T)-Min(log T)) of 1.48, 0.88, respectively. We observe that despite the temperature oscillations, the flaring loops show a temperature -rise at the end of the considered time interval (figure 3 ). As their temperature maps also show, +rise at the end of the considered time interval (figure3). As their temperature maps also show, the oscillations follow with a relatively sensible rise in the final temperature of the loop segments -(Figures 4 ). Although in the case of the transverse oscillations, the loops oscillate as the flare +(Figures 4). Although in the case of the transverse oscillations, the loops oscillate as the flare occurs and then the oscillations decay and stop, in the case of temperature oscillations, the temperatures of the various strips of the loops oscillate and at the end of the flare occurrence, they get to a relatively higher value of temperature in average. Figure - 4 shows the temperature maps of the flaring loops A, B1, B2, C1, a nd C2, respectively + 4 shows the temperature maps of the flaring loops A, B1, B2, C1, and C2, respectively as a time series. In each plot, the vertical axis is the distance along the loop segment in Mm, and the horizontal axis shows time. The color bar (in the left) shows the temperature range. Each separated grid part on the map is standing for one strip. Figure 4 shows that the temperature for most of the strips increased, bypassing a few oscillations. Before the end of the time duration, some strips become hotter (yellow ones) and some cooler (blue ones). The loop B1 is colder at -the early times of the duration and becomes hotter at the midd le and end times with a swing -to lower temperatures again (see Fig. 4 ). There are some temperature fluctuations at the middle +the early times of the duration and becomes hotter at the middle and end times with a swing +to lower temperatures again (see Fig. 4). There are some temperature fluctuations at the middle times (the red and green stripes) while at the end the strips temperatures are smoother with less fluctuations. The temperature map of the loop segment B2 (Fig. - 4 ) shows that at the beginning of +4) shows that at the beginning of the time duration, the first strips of the loop are hotter, and the last ones are colder, but at the end -times this pattern is reversed in this loop segment. In loop segment C1 (Fig. 4 ), the temperature +times this pattern is reversed in this loop segment. In loop segment C1 (Fig.4), the temperature fluctuations are mainly observed to start after the end of the flare (22:24), and at the end time -(23:00) the temperature is much higher than the beginning. T he temperature is increasing after -the flare time (22:24) for the loop C2 either (see Fig. 4 ). This happens with some oscillations in +(23:00) the temperature is much higher than the beginning. The temperature is increasing after +the flare time (22:24) for the loop C2 either (see Fig.4). This happens with some oscillations in the strips’ temperatures. So as figure 4 shows, the temperature increases with some fluctuation in most of the flaring loops’ strips after the flare time. According to these temperature maps, the temperature fluctuations in the flaring loops are increasing at the flaring time and around 20 minutes after that. -We expect the flaring loops to cool down as a result of heat cond uction and radiative cooling. +We expect the flaring loops to cool down as a result of heat conduction and radiative cooling. Hence this relative temperature increase should be scrutinized. As we probed, this temperature rise is also followed in intensity time-series. As the intensity time-series show, the related intensity in the Loop A of the flaring AR increases at the end of the time duration. To be assured, the authors also checked the wavelength of Fe XV I I I which has a peak formation temperature of -7 × 10 6 ◦ - K ( Ugarte-Urra & Warren ( 2014 )). By using the method developed by Warren et al. ( 2012 ) +7 × 106 ◦ + K (Ugarte-Urra & Warren (2014)). By using the method developed by Warren et al. (2012) the contribution of the Fe XV I I I emission line can be isolated from the AIA 94 , to analyze the -evolution of hot plasma in the loops. We do it to omit the conta mination from the cooler plasma -(mostly around 1MK) which also contributes to this AIA channel Boerner et al. ( 2012 ). This is -done by subtracting the contaminating warm (i.e., around 1M K) component to the bandpass. +evolution of hot plasma in the loops. We do it to omit the contamination from the cooler plasma +(mostly around 1MK) which also contributes to this AIA channel Boerner et al. (2012). This is +done by subtracting the contaminating warm (i.e., around 1MK) component to the bandpass. This warm contribution is calculated from a weighted combination of the emission from the AIA 171 and 193 channels dominated by Fe X and Fe X I I emission, respectively. This intensity -analysis is done directly and it has not gone through any othe r process like the thermal analysis. -For this purpose, we applied the formulation (1) used by Li et al. ( 2015 ). Plots in Figure 5 show +analysis is done directly and it has not gone through any other process like the thermal analysis. +For this purpose, we applied the formulation (1) used by Li et al. (2015). Plots in Figure 5 show the intensity map, and the mean intensity variation of the wavelength Fe XV I I I , for Loop A of the flaring region, respectively. As these plots show, this intensity is also higher at the end of the time duration in respect of the flare time. It seems to us that the expected cooling has not -occurred in these flaring loops yet, even after the flare occur rence in the probed duration due to +occurred in these flaring loops yet, even after the flare occurrence in the probed duration due to some plausible reasons. We consider that the mentioned simultaneous CME (see section -II ) which +II) which this flare is associated with could cause this increase in temperature. We can be sure that the -source of this CME is AR 11283 ( Romano et al. ( 2015 )). This CME is in our flare region, hence +source of this CME is AR 11283 (Romano et al. (2015)). This CME is in our flare region, hence the loops receive energy even after the flare occurrence and it is probably the reason why the expected cooling does not occur. The thermal oscillations periods obtained the Lomb-Scargle method, do not have the same significance in all strips of the loops, but for most strips of the flaring loops, the significances are -very near to one. To be assured about these oscillations, we p robed the intensity time-series for +very near to one. To be assured about these oscillations, we probed the intensity time-series for each strip of the loops and we observed that this loop’s intensities shows intensity oscillations -too (i.e., alongside the loop). The most probable dominant p eriods observed in intensity, for -wavelength of 171 is 18.22, and 16.7 min for strips of F-Loop A , 16.7, and 18.22 min for strips of +too (i.e., alongside the loop). The most probable dominant periods observed in intensity, for +wavelength of 171 is 18.22, and 16.7 min for strips of F-Loop A, 16.7, and 18.22 min for strips of F-Loop B1, 16.70, and 12.52 for F-Loop B2, and 16.7 for F-Loop C1 and F-Loop C2. These periods -are in the same order of the observed thermal oscillation per iods. The intensity in this time series +are in the same order of the observed thermal oscillation periods. The intensity in this time series has not passed any thermal process but still shows oscillation periods close to thermal ones. So -we think these results confirm the observation of thermal osc illations. -ii. Temperature Analysis of non-Flaring Active Region Loop s +we think these results confirm the observation of thermal oscillations. +ii. Temperature Analysis of non-Flaring Active Region Loops The temperature time-series for different strips of the selected loops of the non-flaring active region 12194 are calculated using the Lomb-Scargle method. In the following figures (Fig. - 6 ), + 6), the vertical axis shows the logarithm of the temperature and the horizontal axis shows the time duration. Figure 6 displays the time-series of temperature variations for the first two strips of the non-flaring Loops A, and B. These figures are all co-scaled in the range of 5.7 to 6.9 for the logarithm of temperature (like the flaring loops range). The most powerful periods, observed in -most of these non-flaring loops’ strips (listed in Table 2 ) are from 8.5 min. to 30 min. Comparing -the periods of the loops in the flaring region (Table 1 ) with the non-flaring one (Table 2 ), we see +most of these non-flaring loops’ strips (listed in Table2) are from 8.5 min. to 30 min. Comparing +the periods of the loops in the flaring region (Table1) with the non-flaring one (Table2), we see that the temperature periods of the flaring loops have lower values on average and have more diversity than the non-flaring ones. As Tables 1 and 2 show, the mean temperatures of nonfloops are lower in comparison with the f-loops, a fact we also expected from common sense. -The parameter (Max(log T )-Min(log T )) in nonf-loops’ strips is less than that for the flaring loop s’ +The parameter (Max(log T)-Min(log T)) in nonf-loops’ strips is less than that for the flaring loops’ strips. -Nonf-loop A, divided into 11 strips, has the length of 19.91 ( Mm) which is the length of the -selected part of the loop marked in Figure 2 b. The mean of (Max(log T )-Min(log T )) for the strips +Nonf-loop A, divided into 11 strips, has the length of 19.91 (Mm) which is the length of the +selected part of the loop marked in Figure 2b. The mean of (Max(log T)-Min(log T)) for the strips of nonf-loop A is 0.81. Mean of the temperature (log) of this loop segment over time is 5.93 ± 0.10. -Nonf-Loop B, divided into 6 strips, has the length of 11.11 (M m), and the mean temperature (log), -and the mean of (Max(log T )-Min(log T )) for this loop are, 5.99 ± 0.13 and 0.62 respectively. Nonfloop +Nonf-Loop B, divided into 6 strips, has the length of 11.11 (Mm), and the mean temperature (log), +and the mean of (Max(log T)-Min(log T)) for this loop are, 5.99 ± 0.13 and 0.62 respectively. Nonfloop C, which has 5 strips, with the length of 10.13 (Mm), has the mean temperature (log) of -5.82 ± 0.12, and the mean (Max(log T )-Min(log T )) of 0.56. +5.82 ± 0.12, and the mean (Max(log T)-Min(log T)) of 0.56. The first highest period observed for the temperature oscillations of these non-flaring loops’ -strips is reported in Table 2 . As we observe the temperature periods in these non-flaring loops +strips is reported in Table2. As we observe the temperature periods in these non-flaring loops are mostly longer than those of the flaring loops (compare the values listed in Table - 1 and Table 2 ). +1 and Table2). Therefore the temperature oscillations of these loops are a little slower than the flaring ones. -Figure - 7 shows the temperature maps of the non-flaring loops A, B, and C , respectively as a -time series. In each plot, the vertical axis is the distance a long the loop in Mm, and the horizontal +Figure 7 shows the temperature maps of the non-flaring loops A, B, and C, respectively as a +time series. In each plot, the vertical axis is the distance along the loop in Mm, and the horizontal axis is the time. The color bar in the left shows the colors considered for the temperature range. Each separated colored part in the map is one strip. These color maps are plotted totally at the same color range of the loops of the flaring region either. As figure 7 shows, the strips’ temperature of these non-flaring loops have fewer temperature -fluctuations and are smoother in comparison with the flaring ones (Fig. 4 ). Furthermore, that +fluctuations and are smoother in comparison with the flaring ones (Fig. 4). Furthermore, that much increase in the temperatures of the strips, which was obvious in the loops of the flaring -region toward the end times, is not observed here. The temper atures are also totally lower in the +region toward the end times, is not observed here. The temperatures are also totally lower in the nonf-loops in comparison with the flaring loops. Conversely, it seems that different strips of the non-flaring loops have relatively more similar temperature fluctuations. As figure 8 shows, the peaks of the observed temperature periods for the loops’ strips of the flaring active region (blue ones), and non-flaring active region (red ones), are around 18 minutes, and 30 minutes, respectively. The temperature periods’ diversity is higher in the loops’ strips of -the flaring active region, and shorter temperature periods ( less than 10 minutes, nearer to the +the flaring active region, and shorter temperature periods (less than 10 minutes, nearer to the transverse oscillations periods) are observed in the case of the flaring loops’ strips in comparison with the non-flaring ones. And figure 9 shows that the increasing and decreasing of temperature -range, or the difference between maximum and minimum of the temperature value (max(log( T ) )min(log - ( T ) )), is much higher on average for the loops’ strips of the flaring AR in comparison with +range, or the difference between maximum and minimum of the temperature value (max(log(T ))min(log(T + ))), is much higher on average for the loops’ strips of the flaring AR in comparison with the loops’ strips of the non-flaring one. V. Summery We reported the temperature oscillations of coronal loops of a flaring active region. We selected the flaring active region 11283 to investigate the thermal structure and treatment of its loops. This region includes a high energy flare x2.1 and the transverse oscillations of two loops of it have been -analyzed before by Jain et al. ( 2015 ). They analyzed intensity variations in the wavelength 171 +analyzed before by Jain et al. (2015). They analyzed intensity variations in the wavelength 171 in two coronal loops of this region and detected obvious transverse oscillation with periods of -roughly 2 minutes and decay times of 5 minutes for these loops (loops A and B in Figure. 1 b) -at the flare time. We were curious to know if the temperature va riations follow the transverse +roughly 2 minutes and decay times of 5 minutes for these loops (loops A and B in Figure.1b) +at the flare time. We were curious to know if the temperature variations follow the transverse oscillations of the loops, or there is any relation or correlation between them. We also wanted to investigate the thermal fluctuations at the flare time. As a blind test to see the specific thermal properties of the flaring loops, we selected a LOS non-flaring active region (12194), extracted three segments of its loops and analyzed their thermal treatment. Then we compared the temperature treatment of the loops at the flaring region with the loops of the non-flaring region to see the -differences. We were eager to observe the probable discrepa ncies between flaring and non-flaring +differences. We were eager to observe the probable discrepancies between flaring and non-flaring loops in this respect. -Here we used data of three loops of the flaring active region (A R11283) around the time of the +Here we used data of three loops of the flaring active region (AR11283) around the time of the Flare X2.1, from 22:10UT till 23:00UT on 2011 September 6, plus three loops of the non-flaring active region (AR12194), from 08:00:00UT till 09:00:00UT of 2014 October 26 (marked in figures -1 and 2 ). To calculate the time series of the loop temperature value s, we first extracted the loop -pixels in each image and then displayed the loop straightly f or all the images in the time series +1 and 2). To calculate the time series of the loop temperature values, we first extracted the loop +pixels in each image and then displayed the loop straightly for all the images in the time series of different wavelengths. To do thermal analysis, we used the spatially-synthesized Gaussian -DEM forward-fitting method founded by Aschwanden et al. ( 2015 ). We calculated the peak +DEM forward-fitting method founded by Aschwanden et al. (2015). We calculated the peak temperatures for each strip of the loops. Then we applied the Lomb-Scargle method to analyze temperature oscillations of the time-series for each strip of the loops. We observed temperature oscillations which are following the transverse loop oscillations -observed by Jain et al. ( 2015 ) for the flaring loops. Furthermore, the temperature oscillations in +observed by Jain et al. (2015) for the flaring loops. Furthermore, the temperature oscillations in these flaring loops happen before the transverse oscillations start and continue even in the time -duration after the transverse oscillations decay. As obser ved, the temperature oscillations do not +duration after the transverse oscillations decay. As observed, the temperature oscillations do not decay as rapidly as the transverse oscillations do. Conversely, the strips’ temperatures increase at the end of the oscillating mode and a rather sensible rise is observed in the final temperatures of the f-loops’ segments. The ranges of the obtained periods are from 7 min. to 28.4 min. for the flaring loops, and from 8.5 min. to 30 min. for the non-flaring loops. With the onset of X-flare in the F-loopA, which has a distinct transverse oscillation in the flaring time with period of roughly 2 minutes and decay time of 5 minutes, a temperature oscillation is observed with periods of -roughly 10 to 28.5 minutes in different segments of this loop . And as the transverse oscillation -decays in this interval, no special definite decay is observe d in its temperature oscillations. +roughly 10 to 28.5 minutes in different segments of this loop. And as the transverse oscillation +decays in this interval, no special definite decay is observed in its temperature oscillations. The temperature periods of the flaring loops are rather shorter than the temperature periods of the non-flaring loops. The loops of the flaring region show some short temperature oscillations -periods in which some are less than 10 minutes (Table 1 ). These kind of short periods are more +periods in which some are less than 10 minutes (Table1). These kind of short periods are more frequently observed for the loops of the flaring active region and in the case of the non-flaring -ones, are very scarce. We observed that the periods of the flar ing loops have more diversity -than those of the non-flaring ones. Based on our confined obser vations, the non-flaring loops’ +ones, are very scarce. We observed that the periods of the flaring loops have more diversity +than those of the non-flaring ones. Based on our confined observations, the non-flaring loops’ periods are longer and their temperatures’ values are totally lower. So our research showed that thermal structures of the flaring loops differ from the non-flaring ones in the ways described above. As temperature maps show, the temperature fluctuations are increasing at the flaring time and around 20 min. after, in the flaring loops. This happens with some oscillations in strips’ temperature. Conversely, it seems that different strips of the non-flaring loops have relatively -more similar temperature fluctuations. The temperatures ar e either higher in average in the flaring - loops’ segments as expected. The significances of the per iods, obtained by the Lomb-Scargle +more similar temperature fluctuations. The temperatures are either higher in average in the flaring + loops’ segments as expected. The significances of the periods, obtained by the Lomb-Scargle method, are calculated for each strip of each loop and the results show that these significances for the loops’ strips of the flaring region are high and close to one, while for the loops’ strips of -the non-flaring region are less than 0.5. Hence the detected p eriods in the flaring loops’ strips +the non-flaring region are less than 0.5. Hence the detected periods in the flaring loops’ strips have high significances (near to one) and are oscillations. Whereas the detected periods in the non-flaring loops’ strips have less significances in comparison with the flaring ones, and maybe they are just fluctuations. Using this method for the coronal loops showed that the oscillation modes obtained for the temperatures of the flaring loops are very close to those of the spatial slow-mode oscillations of the coronal loops. So the origin of temperature oscillation is probably slow-mode waves. These -kind of oscillations often occur in hot coronal loops (log ( T ) > 6) of active regions especially the -ones associated with small (or micro-) flares ( Wang et al. ( 2021 )). The loops of our flaring active +kind of oscillations often occur in hot coronal loops (log(T ) > 6) of active regions especially the +ones associated with small (or micro-) flares (Wang et al. (2021)). The loops of our flaring active region are also hot loops with the mean temperature above this range. They also show intensity oscillations. Hence we think the above evidence confirms the slow-mode oscillations for flaring -loops. The temperature of the non-flaring loops are lower (log ( T ) < 6) and as discussed above, +loops. The temperature of the non-flaring loops are lower (log(T ) < 6) and as discussed above, we believe that the observed oscillation-like periods in non-flaring loops should be more probably related to the high amplitude fluctuations. Comparing the loops of the flaring and non-flaring regions, we observed that the amplitudes -of the fluctuations show a discrepancy. Mean of the parameter (Max(log T )-Min(log T )) in the +of the fluctuations show a discrepancy. Mean of the parameter (Max(log T)-Min(log T)) in the FloopA, , FloopB1, FLoopB2, FloopC1, and FloopC2, are 1.21, 1.10, 0.81, 1.48, and 0.88, respectively. - And for non-flaring region, mean of (Max(log T )-Min(log T )), are 0.81, 0.62, and 0.56, for -nonfloopA, B, and C respectively. Therefore the values of the quantity mean of (Max(log T )Min(log - T )) for these non-flaring loops show a difference from the flaring ones and are lower. + And for non-flaring region, mean of (Max(log T)-Min(log T)), are 0.81, 0.62, and 0.56, for +nonfloopA, B, and C respectively. Therefore the values of the quantity mean of (Max(log T)Min(log + T)) for these non-flaring loops show a difference from the flaring ones and are lower. Loops of the non-flaring active region 12194 have a relatively uniform temperature at the -beginning of the time interval, which rises slightly at its e nd. As the Solar Monitor reports in the +beginning of the time interval, which rises slightly at its end. As the Solar Monitor reports in the neighborhood of this region, the flaring active region 12192 exists of which between its multiple -flares, there is a c 4.6 class flare occurring at 9:44UT. Therefore, it could be a p ossible suggestion -that the abovementioned slight temperature rise in the loop s of AR 12194 (in the time interval +flares, there is a c4.6 class flare occurring at 9:44UT. Therefore, it could be a possible suggestion +that the abovementioned slight temperature rise in the loops of AR 12194 (in the time interval 8:00 to 9:00) originated from the influence of an increase in the energy at the pre-flare conditions exist in the AR 12192. Hence as our study shows, the temperature of coronal loops of flaring AR changes in an @@ -522,48 +521,48 @@ Loop C1b Figure 1: (a) AIA image of the AR 11283 on 2011 September 6, 22:10 UT as seen in the 171 filter. (b) Zoom-in view of the area marked by a box in the left. The selected loops are distinguished in red. The loops A and B are the same loops studied by - Jain et al. ( 2015) (see Fig.3a in Jain et al. ( 2015)). + Jain et al. (2015) (see Fig.3a in Jain et al. (2015)). arcsecarcsec −154 0 154 308−572−418−264−11044 a arcsecarcsec -−202 −134 −66 2 70−396−338−280−221−162 + −202 −134 −66 2 70−396−338−280−221−162 nonf−LoopAnonf−LoopB nonf−LoopCb Figure 2: (a) The NOAA AR12194 on 2014 October 26, at 08:00:00UT in 171 recorded by AIA/SDO. (b) Zoom-in -view of the area, marked by a box in the left, the loops are dist inguished in red. +view of the area, marked by a box in the left, the loops are distinguished in red. 5.866.26.46.66.8 -LogT F−LoopA +LogT F−LoopA 5.866.26.46.66.8 LogT -22:10 22:20 22:30 22:40 22:50 23:005.866.26.46.66.8 + 22:10 22:20 22:30 22:40 22:50 23:005.866.26.46.66.8 timeLogT 5.866.26.46.66.8 LogT F−LoopB1 22:10 22:20 22:30 22:40 22:50 23:005.866.26.46.66.8 timeLogT -Figure 3: From up to down: The time-series of the temperature oscillat ions for the first 3 strips of Loop A (strip 1 to +Figure 3: From up to down: The time-series of the temperature oscillations for the first 3 strips of Loop A (strip 1 to 3 from top to down), and the first 2 strips of LoopB1. Horizontal axis is the time and the vertical axis is the logarithm of the temperature. The red lines mark the initial and final time of the flare x2.1. -22:10 22:20 22:30 22:40 22:50 23:000 11213242 F−loopA +22:10 22:20 22:30 22:40 22:50 23:000 11213242 F−loopA Time Loop Length(Mm) 5.866.26.46.66.8 -22:10 22:20 22:30 22:40 22:50 23:000 5 101520 F−loopB1 +22:10 22:20 22:30 22:40 22:50 23:000 5 101520 F−loopB1 Time Loop Length(Mm) 66.056.16.156.26.256.36.356.46.456.5 -22:10 22:20 22:30 22:40 22:50 23:000 4 8 1216 F−loopB2 +22:10 22:20 22:30 22:40 22:50 23:000 4 8 1216 F−loopB2 Time Loop Length(Mm) 5.866.26.46.66.8 -22:10 22:20 22:30 22:40 22:50 23:000 6 111722 F−loopC1 +22:10 22:20 22:30 22:40 22:50 23:000 6 111722 F−loopC1 Time Loop Length(Mm) 5.65.866.26.46.66.8 -22:10 22:20 22:30 22:40 22:50 23:000 3 6 8 11 F−loopC2 +22:10 22:20 22:30 22:40 22:50 23:000 3 6 8 11 F−loopC2 Time Loop Length(Mm) 5.866.26.46.66.8 Figure 4: Temperature map of the flaring loops A, B1, B2, C1, and C2 (from top to down) as a time series. The vertical axis is the distance along the loop in Mm, and the horizontal axis is the time. The colorbar in the left shows the colors considered for the temperature range. -Table 1: The properties observed for the loop segments of the flaring A R. +Table 1: The properties observed for the loop segments of the flaring AR. FLoopA (Strip Number) The highest Temp.’s period @@ -610,7 +609,7 @@ FLoopB1 - - 4 16.57 0.93 9 11.04 1.6 10 18.07 1.6 11 18.07 1.6 -Table 2: The properties observed for the loop segments of the non flari ng AR. +Table 2: The properties observed for the loop segments of the non flaring AR. Nonf-LoopA (Strip Number) The highest Temp.’s period @@ -648,13 +647,13 @@ observed Max(log(T))Min(log(T)) 3 26.66 0.26 4 30 0.27 5 30 0.8 -22:10 22:20 22:30 22:40 22:50 23:000 11223243 Int−Fe−LoopA +22:10 22:20 22:30 22:40 22:50 23:000 11223243 Int−Fe−LoopA Time Loop Length(Mm) 00.020.040.060.080.10.120.140.160.180.2 22:10 22:20 22:30 22:40 22:50 23:0000.10.20.30.40.50.60.70.80.91 Int−Fe−LoopA TimeNormalized Intensity Fe XVIII -Figure 5: Normalized intensity map of the flaring loop A for the wavelen gth Fe XV I I I, and mean intensity of Fe -XV I I I (from top to down). The vertical axis is the distance al ong the loop in Mm for the first plot, and +Figure 5: Normalized intensity map of the flaring loop A for the wavelength Fe XV I I I, and mean intensity of Fe +XV I I I (from top to down). The vertical axis is the distance along the loop in Mm for the first plot, and normalized intensity for the second. The horizontal axis is the time. The colorbar in the left shows the colors considered for the Intensity range. VI. acknowledgements @@ -665,23 +664,23 @@ work. LogT NonF−LoopA 8:00 8:10 8:20 8:30 8:40 8:50 9:005.866.26.46.66.8 timeLogT - 5.866.26.46.66.8 + 5.866.26.46.66.8 LogT NonF−LoopB 8:00 8:10 8:20 8:30 8:40 8:50 9:005.866.26.46.66.8 timeLogT Figure 6: from top to down: The time-series of the temperature for the first 2 strips (from top to down) of the nonflaring - Loops A and B. Horizontal axis is the time and the verti cal axis is the logarithm of the temperature. -8:10 8:20 8:30 8:40 8:50 9:000 5 101520 NonF−loopA + Loops A and B. Horizontal axis is the time and the vertical axis is the logarithm of the temperature. +8:10 8:20 8:30 8:40 8:50 9:000 5 101520 NonF−loopA Time Loop Length(Mm) 5.866.26.46.66.8 -8:10 8:20 8:30 8:40 8:50 9:000 5 9 1418 NonF−loopB +8:10 8:20 8:30 8:40 8:50 9:000 5 9 1418 NonF−loopB Time Loop Length(Mm) 5.866.26.46.66.8 -8:10 8:20 8:30 8:40 8:50 9:000 3 5 8 10 NonF−loopC +8:10 8:20 8:30 8:40 8:50 9:000 3 5 8 10 NonF−loopC Time Loop Length(Mm) 5.866.26.46.66.8 Figure 7: from top to down: Temperature map of the non-flaring loops A, B and C as a time-series. The vertical axis -is the distance along the loop in Mm, and the horizontal axis i s the time. The color-bar in the left shows the +is the distance along the loop in Mm, and the horizontal axis is the time. The color-bar in the left shows the colors considered for the temperature range. 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 3000.050.10.150.20.250.30.350.4 Temp. Period (min)Percentage of Temp. Periods @@ -691,8 +690,8 @@ Figure 8: Hisogram of the temperature periods percentages for the loops’ strip max(log(T))−min(log(T))Number Figure 9: Hisogram of the parameter of (max(log(T))-min(log(T))) for each strip of the loops of the flaring (blue bars) and non-flaring (red bars) ARs. -R eferences -Abedini, A., Safari, H., & Nasiri, S. 2012, Solar Physics, 28 0 +References +Abedini, A., Safari, H., & Nasiri, S. 2012, Solar Physics, 280 Anfinogentov, S., Nakariakov, V. M., Mathioudakis, M., Van Doorsselaere, T., & Kowalski, A. F. 2013, ApJ, 773, 156 Aschwanden, M., B. P. S. C. M. A. 2013, Solar Physics, 283, 5 @@ -700,13 +699,13 @@ Aschwanden, M. J. 2006, Philosophical Transactions of the Royal Society of Londo 417 Aschwanden, M. J., & Boerner, P. 2011, The Astrophysical Journal, 732, 81 Aschwanden, M. J., Boerner, P., Ryan, D., et al. 2015, The Astrophysical Journal, 802, 53 -Aschwanden, M. J., Fletcher, L., Schrijver, C. J., & Alexand er, D. 1999, ApJ, 520, 880 +Aschwanden, M. J., Fletcher, L., Schrijver, C. J., & Alexander, D. 1999, ApJ, 520, 880 Ballai, I., Jess, D. B., & Douglas, M. 2011, A&A, 534, A13 Banerjee, D., Erdélyi, R., Oliver, R., & O’Shea, E. 2007, Solar Physics, 246, 3 Berghmans, D., & Clette, F. 1999, Solar Physics, 186, 207 Boerner, P., Edwards, C., Lemen, J., et al. 2012, Solar Physics, 275, 41 Dahlburg, R. B., Einaudi, G., Ugarte-Urra, I., Rappazzo, A. F., & Velli, M. 2018, ApJ, 868, 116 -De Moortel, I. 2005, Philosophical Transactions of the Roya l Society of London Series A, 363, 2743 +De Moortel, I. 2005, Philosophical Transactions of the Royal Society of London Series A, 363, 2743 De Moortel, I., & Brady, C. S. 2007, ApJ, 664, 1210 De Moortel, I., Ireland, J., & Walsh, R. W. 2000, A&A, 355, L23 De Moortel, I., & Nakariakov, V. M. 2012, Philosophical Transactions of the Royal Society of @@ -715,52 +714,52 @@ Fathalian, N. 2019, arXiv e-prints, arXiv:1908.11369 Fathalian, N., & Safari, H. 2010, ApJ, 724, 411 Fathalian, N., Safari, H., & Nasiri, S. 2010, New Astronomy, 15, 403 Goossens, M., Hollweg, J. V., & Sakurai, T. 1992, Solar Physics, 138, 233 -Gruszecki, M., Murawski, K., Selwa, M., & Ofman, L. 2006, A&A , 460, 887 -Guennou, C., Auchère, F., Soubrié, E., et al. 2012a, ApJ, 203 , 25 -Guennou, C., Auchère, F., Soubrié, E., et al. 2012b, ApJ, 203 , 26 +Gruszecki, M., Murawski, K., Selwa, M., & Ofman, L. 2006, A&A, 460, 887 +Guennou, C., Auchère, F., Soubrié, E., et al. 2012a, ApJ, 203, 25 +Guennou, C., Auchère, F., Soubrié, E., et al. 2012b, ApJ, 203, 26 Habbal, S. R., & Rosner, R. 1979, ApJ, 234, 1113 Hindman, B. W., & Jain, R. 2014, ApJ, 784, 103 Jain, R., Maurya, R. A., & Hindman, B. W. 2015, ApJ, 804, L19 Jess, D. B., Reznikova, V. E., Ryans, R. S. I., et al. 2016, Nature Physics, 12, 179 -Kolotkov, D. Y., Nakariakov, V. M., & Zavershinskii, D. I. 20 19, A&A, 628, A133 +Kolotkov, D. Y., Nakariakov, V. M., & Zavershinskii, D. I. 2019, A&A, 628, A133 Krishna Prasad, S., Jess, D. B., & Van Doorsselaere, T. 2019, Frontiers in Astronomy and Space Sciences, 6, 57 Li, L. P., Peter, H., Chen, F., & Zhang, J. 2015, A&A, 583, A109 Liu, W., & Ofman, L. 2014, Solar Physics, 289, 3233–3277 -Luna, M., Terradas, J., Oliver, R., & Ballester, J. L. 2010, A pJ, 716, 1371 +Luna, M., Terradas, J., Oliver, R., & Ballester, J. L. 2010, ApJ, 716, 1371 McClymont, A. N., & Craig, I. J. D. 1985, ApJ, 289, 834 -McLaughlin, J. A., Nakariakov, V. M., Dominique, M., Jelíne k, P., & Takasao, S. 2018, Space +McLaughlin, J. A., Nakariakov, V. M., Dominique, M., Jelínek, P., & Takasao, S. 2018, Space Science Reviews volume, 214, 45 -Nakariakov, V. M., Afanasyev, A. N., Kumar, S., & Moon, Y. J. 2 017, ApJ, 849, 62 -Nakariakov, V. M., Inglis, A. R., Zimovets, I. V., et al. 2010 , Plasma Physics and Controlled Fusion, +Nakariakov, V. M., Afanasyev, A. N., Kumar, S., & Moon, Y. J. 2017, ApJ, 849, 62 +Nakariakov, V. M., Inglis, A. R., Zimovets, I. V., et al. 2010, Plasma Physics and Controlled Fusion, 52, 124009 Nakariakov, V. M., Ofman, L., Deluca, E. E., Roberts, B., & Davila, J. M. 1999, Science, 285, 862 Nakariakov, V. M., & Verwichte, E. 2005, Living Reviews in Solar Physics, 2, 3 -Nisticò, G., Nakariakov, V. M., & Verwichte, E. 2013, A&A, 55 2, A57 -Nisticò, G., Polito, V., Nakariakov, V. M., & Del Zanna, G. 20 17, A&A, 600, A37 +Nisticò, G., Nakariakov, V. M., & Verwichte, E. 2013, A&A, 552, A57 +Nisticò, G., Polito, V., Nakariakov, V. M., & Del Zanna, G. 2017, A&A, 600, A37 Ofman, L., & Wang, T. 2002, ApJ, 580, L85 Pant, V., Tiwari, A., Yuan, D., & Banerjee, D. 2017, ApJ, 847, L5 Pascoe, D. J., Nakariakov, V. M., & Arber, T. D. 2007, Solar Physics, 246, 165 -Reale, F., Testa, P., Petralia, A., & Kolotkov, D. Y. 2019, Ap J, 884, 131 +Reale, F., Testa, P., Petralia, A., & Kolotkov, D. Y. 2019, ApJ, 884, 131 Roberts, B., Edwin, P. M., & Benz, A. O. 1984, ApJ, 279, 857 Romano, P., Zuccarello, F., Guglielmino, S. L., et al. 2015, A&A, 582, A55 -Russell, A. J. B., Simões, P. J. A., & Fletcher, L. 2015, A&A, 5 81, A8 +Russell, A. J. B., Simões, P. J. A., & Fletcher, L. 2015, A&A, 581, A8 Scargle, J. D. 1982, ApJ, 263, 835 -Schmelz, J. T., Jenkins, B. S., Worley, B. T., et al. 2011, ApJ , 731, 49 -Schmelz, J. T., Kimble, J. A., Jenkins, B. S., et al. 2010, ApJ , 725, L34 +Schmelz, J. T., Jenkins, B. S., Worley, B. T., et al. 2011, ApJ, 731, 49 +Schmelz, J. T., Kimble, J. A., Jenkins, B. S., et al. 2010, ApJ, 725, L34 Schmelz, J. T., Pathak, S., Brooks, D. H., Christian, G. M., & Dhaliwal, R. S. 2014, ApJ, 795, 171 -Schmelz, J. T., Pathak, S., Jenkins, B. S., & Worley, B. T. 201 3, ApJ, 764, 53 +Schmelz, J. T., Pathak, S., Jenkins, B. S., & Worley, B. T. 2013, ApJ, 764, 53 Ugarte-Urra, I., & Warren, H. P. 2014, ApJ, 783, 12 Van Doorsselaere, T., Kupriyanova, E. G., & Yuan, D. 2016, Solar Physics, 291, 3143 Van Doorsselaere, T., Wardle, N., Del Zanna, G., et al. 2011, ApJ, 727, L32 VanderPlas, J. T. 2018, ApJ, 236, 16 -Verwichte, E., Nakariakov, V. M., Ofman, L., & Deluca, E. E. 2 004, Solar Physics, 223, 77 +Verwichte, E., Nakariakov, V. M., Ofman, L., & Deluca, E. E. 2004, Solar Physics, 223, 77 Wang, T. 2011, Space Science Reviews, 158, 397–419 Wang, T., Innes, D. E., & Qiu, J. 2007, ApJ, 656, 598 Wang, T. J., & Solanki, S. K. 2004, A&A, 421, L33 Wang, T. J., Solanki, S. K., Innes, D. E., Curdt, W., & Marsch, E. 2003, A&A, 402, L17 Wang, T., & Ofman, L. 2019, ApJ, 886, 2 -Wang, T., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M . 2015, ApJ, 811, L13 +Wang, T., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M. 2015, ApJ, 811, L13 Wang, T., Ofman, L., Yuan, D., et al. 2021, Space Science Reviews, 217 -Warren, H. P., Winebarger, A. R., & Brooks, D. H. 2012, ApJ, 75 9, 141 -Wills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 19 0, 467 \ No newline at end of file +Warren, H. P., Winebarger, A. R., & Brooks, D. H. 2012, ApJ, 759, 141 +Wills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 190, 467 \ No newline at end of file diff --git a/read/results/playa/GeoTopo-book.txt b/read/results/playa/GeoTopo-book.txt index 2e39a2a..8a01b45 100644 --- a/read/results/playa/GeoTopo-book.txt +++ b/read/results/playa/GeoTopo-book.txt @@ -5,7 +5,7 @@ Vorwort Dieses Skript wurde im Wintersemester 2013/2014 von Martin Thoma geschrieben. Es beinhaltet die Mitschriften aus der Vorlesung von Prof. Dr. Herrlich sowie die Mitschriften einiger Übungen und Tutorien. -Das Skript ist kostenlos übermartin-thoma.com/geotopoverfügbar. Wer es gerne in A5 (SchwarzWeiß, +Das Skript ist kostenlos über martin-thoma.com/geotopo verfügbar. Wer es gerne in A5 (SchwarzWeiß, Ringbindung) für 10 Euro hätte, kann mir eine E-Mail schicken (info@martin-thoma.de). Danksagungen An dieser Stelle möchte ich Herrn Prof. Dr. Herrlich für einige Korrekturvorschläge und einen @@ -23,145 +23,145 @@ E-Mail und nach dem Tutorium beantwortet. Danke! Was ist Topologie? Die Kugeloberfläche S 2 lässt sich durch strecken, stauchen und umformen zur Würfeloberfläche -oder der Oberfläche einer Pyramide verformen, aber nicht zumR 2 +oder der Oberfläche einer Pyramide verformen, aber nicht zum R2 oder zu einem Torus T 2 - . Für -den R 2 +. Für +den R2 müsste man die Oberfläche unendlich ausdehnen und für einen Torus müsste man ein Loch machen. Erforderliche Vorkenntnisse -Es wird ein sicherer Umgang mit den Quantoren (∀, ∃), Mengenschreibweisen (∪, ∩, \ , ∅ , R , P (M )) +Es wird ein sicherer Umgang mit den Quantoren (∀, ∃), Mengenschreibweisen (∪, ∩, \, ∅, R, P (M )) und ganz allgemein formaler Schreibweise vorausgesetzt. Auch die Beweisführung mittels Widerspruchsbeweisen - sollte bekannt sein und der Umgang mit komplexen Zahlen C , deren Betrag, + sollte bekannt sein und der Umgang mit komplexen Zahlen C, deren Betrag, Folgen und Häufungspunkten nicht weiter schwer fallen. Diese Vorkenntnisse werden vor allem in „Analysis I“ vermittelt. Außerdem wird vorausgesetzt, dass (affine) Vektorräume, Faktorräume, lineare Unabhängigkeit, -der Spektralsatz und der pro jektive RaumP (R) aus „Lineare Algebra I“ bekannt sind. In „Lineare +der Spektralsatz und der pro jektive Raum P (R) aus „Lineare Algebra I“ bekannt sind. In „Lineare Algebra II“ wird der Begriff der Orthonormalbasis eingeführt. (a) S 2 - (b)Würfel (c)Pyramide + (b) Würfel (c) Pyramide y x -(d) R 2 +(d) R2 (e) T 2 -Abbildung 0.1:Beispiele für verschiedene Formen +Abbildung 0.1: Beispiele für verschiedene Formen Obwohl es nicht vorausgesetzt wird, könnte es von Vorteil sein „Einführung in die Algebra und Zahlentheorie“ gehört zu haben. Inhaltsverzeichnis -1 Topologische Grundbegriffe2 -1.1 Topologische Räume. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .2 -1.2 Metrische Räume. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .6 -1.3 Stetigkeit. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .9 -1.4 Zusammenhang. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .11 -1.5 Kompaktheit. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .14 -1.6 Wege und Knoten. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .17 -Übungsaufgaben. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .22 -2 Mannigfaltigkeiten und Simplizialkomplexe24 -2.1 Topologische Mannigfaltigkeiten. . . . . . . . . . . . . . . . . . . . . . . . . . . .24 -2.2 Differenzierbare Mannigfaltigkeiten. . . . . . . . . . . . . . . . . . . . . . . . . .29 -2.3 Simplizialkomplex. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .34 -Übungsaufgaben. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .43 -3 Fundamentalgruppe und Überlagerungen44 -3.1 Homotopie von Wegen. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .44 -3.2 Fundamentalgruppe. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .47 -3.3 Überlagerungen. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .51 -3.4 Gruppenoperationen. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .61 -4 Euklidische und nichteuklidische Geometrie64 -4.1 Axiome für die euklidische Ebene. . . . . . . . . . . . . . . . . . . . . . . . . . .64 -4.2 Weitere Eigenschaften einer euklidischen Ebene. . . . . . . . . . . . . . . . . . .74 -4.2.1 Flächeninhalt. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .74 -4.3 Hyperbolische Geometrie. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .77 -Übungsaufgaben. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .86 -5 Krümmung 87 -5.1 Krümmung von Kurven. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .87 -5.2 Tangentialebene. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .89 -5.3 Gauß-Krümmung. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .91 -5.4 Erste und zweite Fundamentalform. . . . . . . . . . . . . . . . . . . . . . . . . .94 -Lösungen der Übungsaufgaben99 -Bildquellen 105 -Abkürzungsverzeichnis106 -Ergänzende Definitionen und Sätze107 -Symbolverzeichnis108 - Inhaltsverzeichnis -Stichwortverzeichnis111 +1 Topologische Grundbegriffe 2 +1.1 Topologische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2 +1.2 Metrische Räume . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6 +1.3 Stetigkeit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9 +1.4 Zusammenhang . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 +1.5 Kompaktheit . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14 +1.6 Wege und Knoten . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17 +Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 22 +2 Mannigfaltigkeiten und Simplizialkomplexe 24 +2.1 Topologische Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . . . 24 +2.2 Differenzierbare Mannigfaltigkeiten . . . . . . . . . . . . . . . . . . . . . . . . . . 29 +2.3 Simplizialkomplex . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34 +Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 43 +3 Fundamentalgruppe und Überlagerungen 44 +3.1 Homotopie von Wegen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 44 +3.2 Fundamentalgruppe . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 47 +3.3 Überlagerungen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 51 +3.4 Gruppenoperationen . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 61 +4 Euklidische und nichteuklidische Geometrie 64 +4.1 Axiome für die euklidische Ebene . . . . . . . . . . . . . . . . . . . . . . . . . . . 64 +4.2 Weitere Eigenschaften einer euklidischen Ebene . . . . . . . . . . . . . . . . . . . 74 +4.2.1 Flächeninhalt . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 74 +4.3 Hyperbolische Geometrie . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 77 +Übungsaufgaben . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 86 +5 Krümmung 87 +5.1 Krümmung von Kurven . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 87 +5.2 Tangentialebene . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 89 +5.3 Gauß-Krümmung . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 +5.4 Erste und zweite Fundamentalform . . . . . . . . . . . . . . . . . . . . . . . . . . 94 +Lösungen der Übungsaufgaben 99 +Bildquellen 105 +Abkürzungsverzeichnis 106 +Ergänzende Definitionen und Sätze 107 +Symbolverzeichnis 108 + Inhaltsverzeichnis +Stichwortverzeichnis 111 1 Top ologische Grundb egriffe 1.1 Topologische Räume Definition 1 Ein topologischer Raum ist ein Paar (X, T) bestehend aus einer Menge X und T ⊆ P (X ) mit folgenden Eigenschaften (i) ∅, X ∈ T -(ii)Sind U -1 , U +(ii) Sind U +1, U 2 ∈ T, so ist U - 1 ∩ U - 2 ∈ T -(iii)Ist I eine Menge und U - i ∈ T für jedes i ∈ I , so ist -i ∈ I U +1 ∩ U +2 ∈ T +(iii) Ist I eine Menge und U +i ∈ T für jedes i ∈ I , so ist +i∈I U i ∈ T Die Elemente von T heißen offene Teilmengen von X . A ⊆ X heißt abgeschlossen, wenn X \ A offen ist. -Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0 , 1) . Auch gibt es +Es gibt auch Mengen, die weder abgeschlossen, noch offen sind wie z. B. [0, 1). Auch gibt es Mengen, die sowohl abgeschlossen als auch offen sind. Bemerkung 1 (Mengen, die offen & abgeschlossen sind, ex.) Betrachte ∅ und X mit der trivialen Topologie T -triv = { ∅ , X } . +triv = { ∅, X }. Es gilt: X ∈ T und ∅ ∈ T, d. h. X und ∅ sind offen. Außerdem X C = X \ X = ∅ ∈ T und X \ ∅ = X ∈ T, d. h. X und ∅ sind als Komplement offener Mengen abgeschlossen. Beispiel 1 (Topologien) -1) X = R n +1) X = Rn mit der von der euklidischen Metrik erzeugten Topologie T -Euklid : -U ⊆ R n - offen ⇔ für jedes x ∈ U gibt es r > 0 , +Euklid: +U ⊆ Rn + offen ⇔ für jedes x ∈ U gibt es r > 0, sodass B -r (x ) = { y ∈ Rn - | d(x, y ) < r } ⊆ U -Diese Topologie wird auch „Standardtopologie des R n - “ genannt. Sie beinhaltet unter +r (x) = { y ∈ Rn + | d(x, y) < r } ⊆ U +Diese Topologie wird auch „Standardtopologie des Rn +“ genannt. Sie beinhaltet unter anderem alle offenen Kugeln, aber z. B. auch Schnitte zweier Kugeln mit unterschiedlichem - Mittelpunkt (vgl.Definition 1.ii). -2)Jeder metrische Raum ( X, d) ist auch ein topologischer Raum. -3)Für eine Menge X heißt T -Diskret = P (X ) diskrete Topologie . + Mittelpunkt (vgl. Definition 1.ii). +2) Jeder metrische Raum (X, d) ist auch ein topologischer Raum. +3) Für eine Menge X heißt T +Diskret = P (X ) diskrete Topologie. 4) X := R, T Z := { U ⊆ R | R \ U endlich } ∪ { ∅ } heißt Zariski-Topologie Beobachtungen: • U ∈ T -Z ⇔ ∃f ∈ R [X ] , sodass R \ U = V (f ) = { x ∈ R | f (x ) = 0 } +Z ⇔ ∃f ∈ R[X ], sodass R \ U = V (f ) = { x ∈ R | f (x) = 0 } • Es gibt keine disjunkten offenen Mengen in T Z . - 1.1. TOPOLOGISCHE RÄUME + 1.1. TOPOLOGISCHE RÄUME 5) X := Rn - , T -Z = { U ⊆ R n - |Es gibt Polynome f -1 , . . . , f - r ∈ R[ X -1 , . . . , X - n ] sodass -R n - \ U = V ( f -1 , . . . , f - r )} -6) X := { 0 , 1 } , T = { ∅ , { 0 , 1 } , { 0 } } heißt Sierpińskiraum. -∅ , { 0, 1 } , { 1 } sind dort alle abgeschlossenen Mengen. +, T +Z = {U ⊆ Rn +|Es gibt Polynome f +1, . . . , f +r ∈ R[X +1, . . . , X +n] sodass +Rn + \ U = V (f +1, . . . , f +r )} +6) X := { 0, 1 } , T = { ∅, { 0, 1 } , { 0 } } heißt Sierpińskiraum. +∅, { 0, 1 } , { 1 } sind dort alle abgeschlossenen Mengen. Definition 2 -Sei ( X, T) ein topologischer Raum und x ∈ X . -Eine Teilmenge U ⊆ X heißt Umgebung von x , wenn es ein U +Sei (X, T) ein topologischer Raum und x ∈ X . +Eine Teilmenge U ⊆ X heißt Umgebung von x, wenn es ein U 0 ∈ T gibt mit x ∈ U - 0 und +0 und U - 0 ⊆ U . +0 ⊆ U . Gilt eine Eigenschaft in einer Umgebung, so sagt man, dass die Eigenschaft lokal gilt. Definition 3 Sei (X, T) ein topologischer Raum und M ⊆ X eine Teilmenge. a) M ◦ := { x ∈ M | M ist Umgebung von x } = U ⊆M -U ∈ T U heißt Inneres oder offener +U ∈T U heißt Inneres oder offener Kern von M . b) M := M ⊆A @@ -171,59 +171,59 @@ c) ∂ M := heißt Rand von M . d) M heißt dicht in X , wenn M = X ist. Beispiel 2 -1)Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M ◦ +1) Sei X = R mit euklidischer Topologie und M = Q. Dann gilt: M = R und M ◦ = ∅ -2)Sei X = R und M = (a, b). Dann gilt: M = [a, b] -3)Sei X = R , T = T -Z und M = (a, b) . Dann gilt: M = R +2) Sei X = R und M = (a, b). Dann gilt: M = [a, b] +3) Sei X = R, T = T +Z und M = (a, b). Dann gilt: M = R Definition 4 -Sei ( X, T) ein topologischer Raum. -a) B ⊆ T heißt Basis der Topologie T , wenn jedes U ∈ T Vereinigung von Elementen +Sei (X, T) ein topologischer Raum. +a) B ⊆ T heißt Basis der Topologie T, wenn jedes U ∈ T Vereinigung von Elementen aus B ist. b) S ⊆ T heißt Subbasis der Topologie T, wenn jedes U ∈ T Vereinigung von endlichen Durchschnitten von Elementen aus S ist. Beispiel 3 (Basis und Subbasis) -1)Jede Basis ist auch eine Subbasis, z.B. -S = { ( a, b) | a, b ∈ R, a < b } ist für R mit der Standardtopologie sowohl Basis als +1) Jede Basis ist auch eine Subbasis, z.B. +S = { (a, b) | a, b ∈ R, a < b } ist für R mit der Standardtopologie sowohl Basis als auch Subbasis. -2)Gegeben sei X = R n +2) Gegeben sei X = Rn mit euklidischer Topologie T. Dann ist B = { B - r ( x ) | r ∈ Q ->0 , x ∈ Qn +r (x) | r ∈ Q +>0, x ∈ Qn } ist eine abzählbare Basis von T. -3) Sei (X, T) ein topologischer Raum mitX = { 0, 1, 2 } und T = { ∅ , { 0 } , { 0 , 1 } , { 0, 2 } , X }. -Dann ist S = { ∅ , { 0 , 1 } , { 0, 2 } } eine Subbasis von T , da gilt: - 1.1. TOPOLOGISCHE RÄUME -•S ⊆ T -•∅ , { 0, 1 } und { 0 , 2 } ∈ S -•{ 0 } = { 0 , 1 } ∩ { 0 , 2 } -• X = { 0 , 1 } ∪ { 0 , 2 } -Allerings ist S keine Basis von ( X, T) , da { 0 } nicht als Vereinigung von Elementen +3) Sei (X, T) ein topologischer Raum mit X = { 0, 1, 2 } und T = { ∅, { 0 } , { 0, 1 } , { 0, 2 } , X }. +Dann ist S = { ∅, { 0, 1 } , { 0, 2 } } eine Subbasis von T, da gilt: + 1.1. TOPOLOGISCHE RÄUME +• S ⊆ T +• ∅, { 0, 1 } und { 0, 2 } ∈ S +• { 0 } = { 0, 1 } ∩ { 0, 2 } +• X = { 0, 1 } ∪ { 0, 2 } +Allerings ist S keine Basis von (X, T), da { 0 } nicht als Vereinigung von Elementen aus S erzeugt werden kann. Bemerkung 2 -Sei X eine Menge und S ⊆ P ( X ) . Dann gibt es genau eine Topologie T auf X , für die S +Sei X eine Menge und S ⊆ P (X ). Dann gibt es genau eine Topologie T auf X , für die S Subbasis ist. Definition 5 -Sei ( X, T) ein topologischer Raum und Y ⊆ X . +Sei (X, T) ein topologischer Raum und Y ⊆ X . T Y := { U ∩ Y | U ∈ T } ist eine Topologie auf Y . T Y heißt Teilraumtopologie und (Y , T -Y ) heißt ein Teilraum von ( X, T) . +Y ) heißt ein Teilraum von (X, T). Die Teilraumtopologie wird auch Spurtopologie oder Unterraumtopologie genannt. Definition 6 Seien X -1 , X +1, X 2 topologische Räume. U ⊆ X 1 × X -2 sei offen, wenn es zu jedem x = ( x - 1 , x -2 ) ∈ U Umgebungen U +2 sei offen, wenn es zu jedem x = (x +1, x +2) ∈ U Umgebungen U i um x - i mit +i mit i = 1, 2 gibt, sodass U 1 × U 2 ⊆ U gilt. @@ -231,27 +231,27 @@ T = { U ⊆ X 1 × X 2 | U offen } ist eine Topologie auf X 1 × X -2 . Sie heißt Produkttopologie. +2. Sie heißt Produkttopologie. B = { U 1 × U 2 | U - i offen in X -i , i = 1, 2 } ist eine Basis von T. +i offen in X +i, i = 1, 2 } ist eine Basis von T. U x x - 2 +2 x 1U - 2 +2 U 1 X 1X 2 -Abbildung 1.1:Zu x = (x - 1 , x -2 ) gibt es Umgebungen U -1 , U +Abbildung 1.1: Zu x = (x +1, x +2) gibt es Umgebungen U +1, U 2 mit U 1 × U 2 ⊆ U @@ -261,357 +261,360 @@ Beispiel 4 (Produkttopologien) 2 = R mit euklidischer Topologie. ⇒ Die Produkttopologie auf R × R = R2 stimmt mit der euklidischen Topologie auf -R 2 +R2 überein. 2) X 1 = X 2 = R mit Zariski-Topologie. T Produkttopologie auf R2 - : U - 1 × U - 2 -(SieheAbbildung 1.2) - 1.1. TOPOLOGISCHE RÄUME +: U +1 × U +2 +(Siehe Abbildung 1.2) + 1.1. TOPOLOGISCHE RÄUME U 1 = R \ NU - 2 = R \ N -Abbildung 1.2:Zariski-Topologie auf R 2 +2 + = +R +\ +N +Abbildung 1.2: Zariski-Topologie auf R2 Definition 7 Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X , X = X/ - ∼ sei die Menge -der Äquivalenzklassen, π : X → X , x → [ x] - ∼ . +∼ sei die Menge +der Äquivalenzklassen, π : X → X , x → [x] +∼. T X := U ⊆ X - π − 1 - (U ) ∈ T + π−1 +(U ) ∈ T X (X , T - X ) heißt Quotiententopologie . +X ) heißt Quotiententopologie. Beispiel 5 -X = R , a ∼ b : ⇔ a − b ∈ Z +X = R, a ∼ b :⇔ a − b ∈ Z R -1 0 1 2 3 4 5 0a - U aπ − 1 - (u) -0 ∼ 1 , d. h. [0] = [1] + U aπ−1 +(u) +0 ∼ 1, d. h. [0] = [1] Beispiel 6 Sei X = R2 und (x - 1 , y -1 ) ∼ (x - 2 , y -2 ) ⇔ x - 1 − x - 2 ∈ Z und y - 1 − y - 2 ∈ Z. Dann ist X/ - ∼ ein Torus. +1, y +1) ∼ (x +2, y +2) ⇔ x +1 − x +2 ∈ Z und y +1 − y +2 ∈ Z. Dann ist X/ +∼ ein Torus. Beispiel 7 (Pro jektiver Raum) -X = R n+1 - \ { 0 } , x ∼ y ⇔ ∃λ ∈ R × +X = Rn+1 + \ { 0 } , x ∼ y ⇔ ∃λ ∈ R× mit y = λx ⇔ x und y liegen auf der gleichen Ursprungsgerade X = P n - ( R ) - 1.2. METRISCHE RÄUME +(R) + 1.2. METRISCHE RÄUME Also für n = 1: −4 −2 2 4 6 8 −4−224 1.2 Metrische Räume Definition 8 -Sei X eine Menge. Eine Abbildung d : X × X → R + -0 heißt Metrik , wenn gilt: -(i)Definitheit: d(x, y ) = 0 ⇔ x = y ∀x, y ∈ X -(ii)Symmetrie: d(x, y ) = d( y, x ) ∀x, y ∈ X -(iii)Dreiecksungleichung: d(x, z ) ≤ d( x, y ) + d(y, z ) ∀x, y, z ∈ X -Das Paar (X, d) heißt ein metrischer Raum . +Sei X eine Menge. Eine Abbildung d : X × X → R+ +0 heißt Metrik, wenn gilt: +(i) Definitheit: d(x, y) = 0 ⇔ x = y ∀x, y ∈ X +(ii) Symmetrie: d(x, y) = d(y, x) ∀x, y ∈ X +(iii) Dreiecksungleichung: d(x, z) ≤ d(x, y) + d(y, z) ∀x, y, z ∈ X +Das Paar (X, d) heißt ein metrischer Raum. Bemerkung 3 Sei (X, d) ein metrischer Raum und B -r (x ) := { y ∈ X | d(x, y ) < r } für x ∈ X, r ∈ R + +r (x) := { y ∈ X | d(x, y) < r } für x ∈ X, r ∈ R+ B = { B - r ( x ) ⊆ P ( X ) | x ∈ X, r ∈ R + +r (x) ⊆ P (X ) | x ∈ X, r ∈ R+ } ist Basis einer Topologie auf X . Definition 9 -Seien ( X, d - X ) und (Y , d - Y ) metrische Räume und ϕ : X → Y eine Abbildung mit +Seien (X, d +X ) und (Y , d +Y ) metrische Räume und ϕ : X → Y eine Abbildung mit ∀x - 1 , x +1, x 2 ∈ X : d -X ( x - 1 , x -2 ) = d -Y ( ϕ( x -1 ), ϕ(x - 2 )) +X (x +1, x +2) = d +Y (ϕ(x +1), ϕ(x +2)) Dann heißt ϕ eine Isometrie von X nach Y . Beispiel 8 (Skalarprodukt erzeugt Metrik) -Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt·, ·. Dann wird V -durch d(x, y ) := -x − y, x − y zum metrischen Raum. +Sei V ein euklidischer oder hermitescher Vektorraum mit Skalarprodukt ·, ·. Dann wird V +durch d(x, y) := +x − y, x − y zum metrischen Raum. Beispiel 9 (diskrete Metrik) Sei X eine Menge. Dann heißt - d(x, y ) = + d(x, y) = 0 falls x = y 1 falls x = y -die diskrete Metrik. Die Metrik d induziert die diskrete Topologie . - 1.2. METRISCHE RÄUME +die diskrete Metrik. Die Metrik d induziert die diskrete Topologie. + 1.2. METRISCHE RÄUME Beispiel 10 -X = R 2 +X = R2 und d ((x - 1 , y -1 ) , ( x - 2 , y -2 )) := max( x - 1 − x - 2 , y +1, y +1), (x +2, y +2)) := max(x +1 − x +2, y 1 − y - 2 ) ist Metrik. +2) ist Metrik. Beobachtung: d erzeugt die euklidische Topologie. B r (0) = r r r r (a) B -r (0) (b)Euklidische Topologie -Abbildung 1.3:Veranschaulichungen zur Metrik d ausBeispiel 10 - 1.2. METRISCHE RÄUME +r (0) (b) Euklidische Topologie +Abbildung 1.3: Veranschaulichungen zur Metrik d aus Beispiel 10 + 1.2. METRISCHE RÄUME Beispiel 11 (SNCF-Metrik1 - ) -X = R 2 +) +X = R2 −4 −2 2 4 6 8 −4−224 Definition 10 -Ein topologischer Raum X heißt hausdorffsch , wenn es für je zwei Punkte x = y in X +Ein topologischer Raum X heißt hausdorffsch, wenn es für je zwei Punkte x = y in X Umgebungen U x um x und U y um y gibt, sodass U x ∩ U -y = ∅ . +y = ∅. Bemerkung 4 (Trennungseigenschaft) Metrische Räume sind hausdorffsch, wegen -d( x, y ) > 0 ⇒ ∃ε > 0 : B - ε ( x) ∩ B -ε (y ) = ∅ +d(x, y) > 0 ⇒ ∃ε > 0 : B +ε(x) ∩ B +ε(y) = ∅ Beispiel 12 (Topologische Räume und Hausdorff-Räume) -1) (R , T +1) (R, T Z ) ist ein topologischer Raum, der nicht hausdorffsch ist. -2) (R , T -Euklid ) ist ein topologischer Hausdorff-Raum. +2) (R, T +Euklid) ist ein topologischer Hausdorff-Raum. Bemerkung 5 (Eigenschaften von Hausdorff-Räumen) Seien X, X -1 , X +1, X 2 Hausdorff-Räume. -a)Jeder Teilraum von X ist hausdorffsch. +a) Jeder Teilraum von X ist hausdorffsch. b) X 1 × X -2 ist hausdorffsch (vgl.Abbildung 1.4). +2 ist hausdorffsch (vgl. Abbildung 1.4). Definition 11 -Sei X ein topologischer Raum und ( x ) - n∈N eine Folge in X . x ∈ X heißt Grenzwert oder -Limes von ( x -n ) , wenn es für jede Umgebung U von x ein n - 0 gibt, sodass x - n ∈ U für alle +Sei X ein topologischer Raum und (x) +n∈N eine Folge in X . x ∈ X heißt Grenzwert oder +Limes von (x +n), wenn es für jede Umgebung U von x ein n +0 gibt, sodass x +n ∈ U für alle n ≥ n - 0 . +0. Bemerkung 6 Ist X hausdorffsch, so hat jede Folge in X höchstens einen Grenzwert. -Beweis: Sei ( x - n ) eine konvergierende Folge und x und y Grenzwerte der Folge. +Beweis: Sei (x +n) eine konvergierende Folge und x und y Grenzwerte der Folge. Da X hausdorffsch ist, gibt es Umgebungen U x von x und U - y von y mit U - x ∩ U - y = ∅ falls -x = y . Da (x - n ) gegen x und y konvergiert, existiert ein n - 0 mit x - n ∈ U +y von y mit U +x ∩ U +y = ∅ falls +x = y. Da (x +n) gegen x und y konvergiert, existiert ein n +0 mit x +n ∈ U x ∩ U y für alle n ≥ n - 0 +0 ⇒ x = y 1 - Diese Metrik wird auch „französische Eisenbahnmetrik“ genannt. - 1.3. STETIGKEIT +Diese Metrik wird auch „ französische Eisenbahnmetrik“ genannt. + 1.3. STETIGKEIT (x - 1 , y -1 ) (x - 2 , y -2 ) +1, y +1) (x +2, y +2) x 1 x 2 U - 1 × X +1 × X 2 U - 2 × X +2 × X 2 X 1X 2 -Abbildung 1.4:Wenn X -1 , X +Abbildung 1.4: Wenn X +1, X 2 hausdorffsch sind, dann auch X 1 × X 2 1.3 Stetigkeit Definition 12 -Seien ( X, T -X ) , (Y , T +Seien (X, T +X ), (Y , T Y ) topologische Räume und f : X → Y eine Abbildung. a) f heißt stetig :⇔ ∀U ∈ T -Y : f − 1 - (U ) ∈ T +Y : f −1 +(U ) ∈ T X . b) f - heißt Homöomorphismus , wenn f stetig ist und es eine stetige Abbildung g : + heißt Homöomorphismus, wenn f stetig ist und es eine stetige Abbildung g : Y → X gibt, sodass g ◦ f = id X und f ◦ g = id Y . Bemerkung 72 Seien X, Y metrische Räume und f : X → Y eine Abbildung. -Dann gilt: f ist stetig ⇔ zu jedem x ∈ X und jedem ε > 0 gibt es δ ( x, ε) > 0 , sodass für -alle y ∈ X mit d(x, y ) < δ gilt d -Y (f ( x ), f (y )) < ε. +Dann gilt: f ist stetig ⇔ zu jedem x ∈ X und jedem ε > 0 gibt es δ(x, ε) > 0, sodass für +alle y ∈ X mit d(x, y) < δ gilt d +Y (f (x), f (y)) < ε. Beweis: „ ⇒“: Sei x ∈ X, ε > 0 gegeben und U := B -ε (f ( x )). +ε(f (x)). Dann ist U offen in Y . Def. 12.a =====⇒ f −1 - ( U ) ist offen in X . Dann ist x ∈ f − 1 - ( U ). +(U ) ist offen in X . Dann ist x ∈ f −1 +(U ). ⇒ ∃δ > 0, sodass B - δ (x ) ⊆ f − 1 - (U ) +δ (x) ⊆ f −1 +(U ) ⇒ f (B -δ ( x )) ⊆ U +δ (x)) ⊆ U ⇒ { y ∈ X | d -X ( x, y ) < δ } ⇒ Beh. +X (x, y) < δ } ⇒ Beh. „ ⇐“: Sei U ⊆ Y offen, X ∈ f −1 - (U ). -Dann gibt es ε > 0 , sodass B -ε (f ( x )) ⊆ U +(U ). +Dann gibt es ε > 0, sodass B +ε(f (x)) ⊆ U Vor. -==⇒ Es gibt δ > 0 , sodass f ( B -δ (x )) ⊆ B - ε ( f (x ))) +==⇒ Es gibt δ > 0, sodass f (B +δ (x)) ⊆ B +ε(f (x))) ⇒ B - δ (x ) ⊆ f − 1 - (B - ε ( f (x ))) ⊆ f −1 - (U ) +δ (x) ⊆ f −1 +(B +ε(f (x))) ⊆ f −1 +(U ) Bemerkung 8 Seien X, Y topologische Räume und f : X → Y eine Abbildung. Dann gilt: f ist stetig ⇔ für jede abgeschlossene Teilmenge A ⊆ Y gilt : f −1 - ( A ) ⊆ X ist abgeschlossen. +(A) ⊆ X ist abgeschlossen. Beispiel 13 (Stetige Abbildungen und Homöomorphismen) -1)Für jeden topologischen Raum X gilt: id - X : X → X ist Homöomorphismus. +1) Für jeden topologischen Raum X gilt: id +X : X → X ist Homöomorphismus. 2 - Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt. - 1.3. STETIGKEIT -2) Ist ( Y , T +Es wird die Äquivalenz von Stetigkeit im Sinne der Analysis und Topologie auf metrischen Räumen gezeigt. + 1.3. STETIGKEIT +2) Ist (Y , T Y ) trivialer topologischer Raum, d. h. T Y = T -triv , so ist jede Abbildung +triv, so ist jede Abbildung f : X → Y stetig. 3) Ist X diskreter topologischer Raum, so ist f : X → Y stetig für jeden topologischen Raum Y und jede Abbildung f . -4)Sei X = [0, 1), Y = S 1 - = { z ∈ C | z = 1 } und f (t ) = e 2πit - . +4) Sei X = [0, 1), Y = S 1 + = { z ∈ C | z = 1 } und f (t) = e2πit +. R 0 1 0f g -Abbildung 1.5:Beispiel einer stetigen Funktion f , deren Umkehrabbildung g nicht stetig ist. -Die Umkehrabbildung g ist nicht stetig, da g − 1 - (U ) nicht offen ist (vgl.Abbildung 1.5). +Abbildung 1.5: Beispiel einer stetigen Funktion f , deren Umkehrabbildung g nicht stetig ist. +Die Umkehrabbildung g ist nicht stetig, da g−1 +(U ) nicht offen ist (vgl. Abbildung 1.5). Bemerkung 9 (Verkettungen stetiger Abbildungen sind stetig) Seien X, Y , Z topologische Räume, f : X → Y und g : Y → Z stetige Abbildungen. Dann ist g ◦ f : X → Z stetig. X f -g ◦f +g◦f Y g Z -Beweis: Sei U ⊆ Z offen ⇒ ( g ◦ f ) − 1 - ( U ) = f −1 - ( g −1 - ( U )) . g − 1 - ( U ) ist offen in Y weil g stetig -ist, f − 1 - ( g − 1 - ( U )) ist offen in X , weil f stetig ist. +Beweis: Sei U ⊆ Z offen ⇒ (g ◦ f )−1 +(U ) = f −1 +(g−1 +(U )). g−1 +(U ) ist offen in Y weil g stetig +ist, f −1 +(g−1 +(U )) ist offen in X , weil f stetig ist. Bemerkung 10 -a)Für jeden topologischen Raum X ist +a) Für jeden topologischen Raum X ist Homöo(X ) := { f : X → X | f ist Homöomorphismus } eine Gruppe. -b)Jede Isometrie f : X → Y zwischen metrischen Räumen ist ein Homöomorphismus. -c) Iso - ( X ) := { f : X → X | f ist Isometrie } ist eine Untergruppe von Homöo( X ) für +b) Jede Isometrie f : X → Y zwischen metrischen Räumen ist ein Homöomorphismus. +c) Iso(X ) := { f : X → X | f ist Isometrie } ist eine Untergruppe von Homöo(X ) für jeden metrischen Raum X . Bemerkung 11 (Pro jektionen sind stetig) Seien X, Y topologische Räume. π X : X × Y → X und π Y : X × Y → Y die Pro jektionen π - X : (x, y ) → x und π - Y : (x, y ) → y +X : (x, y) → x und π +Y : (x, y) → y Wird X × Y mit der Produkttopologie versehen, so sind π X und π - Y stetig. +Y stetig. Beweis: Sei U ⊆ X offen -⇒ π − 1 -X ( U ) = U × Y ist offen in X × Y . +⇒ π−1 +X (U ) = U × Y ist offen in X × Y . Bemerkung 12 Sei X ein topologischer Raum, ∼ eine Äquivalenzrelation auf X , X = X/ - ∼ der Bahnenraum -versehen mit der Quotiententopologie, π : X → X , x → [x ] - ∼ . +∼ der Bahnenraum +versehen mit der Quotiententopologie, π : X → X , x → [x] +∼. Dann ist π stetig. - 1.4. ZUSAMMENHANG -Beweis: Nach Definition ist U ⊆ X offen ⇔ π − 1 - ( U ) ⊆ X offen. + 1.4. ZUSAMMENHANG +Beweis: Nach Definition ist U ⊆ X offen ⇔ π−1 +(U ) ⊆ X offen. Beobachtung: Die Quotiententopologie ist die feinste Topologie, sodass π stetig wird. Beispiel 14 (Stereographische Pro jektion) -R n +Rn und S n \ { N } sind homöomorph für beliebiges N ∈ S n - . Es gilt: +. Es gilt: S n = - x ∈ R n+1 + x ∈ Rn+1 - x = 1 + x = 1 = - x ∈ R n+1 + x ∈ Rn+1 n+1 -i =1 x 2 +i=1 x2 i = 1 O. B. d. A. sei N =     - 0 +0 . . . 0 -1  +1    @@ -619,14 +622,14 @@ O. B. d. A. sei N =  einem Punkt ˆ P . P wird auf ˆ P abgebildet. -f : S n - \ { N } → R n +f :S n + \ { N } → Rn P → genau ein Punkt L P ∩ H -wobei R n +wobei Rn = H =    @@ -634,26 +637,26 @@ wobei R n     x - 1 +1 . . . x - n+1  +n+1  - ∈ R n+1 + ∈ Rn+1 x -n +1 = 0  +n+1 = 0      und L -P die Gerade in R n+1 +P die Gerade in Rn+1 durch N und P ist. Sei P =  @@ -664,12 +667,12 @@ Sei P =  . . x - n+1  +n+1  - , so ist x -n +1 < 1 , also ist L - P nicht parallel zu H . Also schneiden sich L - P +, so ist x +n+1 < 1, also ist L +P nicht parallel zu H . Also schneiden sich L +P und H in genau einem Punkt ˆ P . Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig. @@ -677,7 +680,7 @@ Es gilt: f ist bijektiv und die Umkehrabbildung ist ebenfalls stetig. Definition 13 a) Ein Raum X heißt zusammenhängend, wenn es keine offenen, nichtleeren Teilmengen U -1 , U +1, U 2 von X gibt mit U 1 ∩ U 2 = ∅ und U @@ -685,80 +688,80 @@ U 2 = X . b) Eine Teilmenge Y ⊆ X heißt zusammenhängend, wenn Y als topologischer Raum mit der Teilraumtopologie zusammenhängend ist. - 1.4. ZUSAMMENHANG + 1.4. ZUSAMMENHANG x yz N ˆ P0 P -Abbildung 1.6:Visualisierung der stereographischen Pro jektion +Abbildung 1.6: Visualisierung der stereographischen Pro jektion Bemerkung 13 X ist zusammenhängend ⇔ Es gibt keine abgeschlossenen, nichtleeren Teilmengen A - 1 , A +1, A 2 mit A - 1 ∩ A - 2 = ∅ und A +1 ∩ A +2 = ∅ und A 1 ∪ A - 2 = X . +2 = X . Beispiel 15 (Zusammenhang von Räumen) -1) (R n - , T -Euklid ) ist zusammenhängend, denn: -Annahme: R n +1) (Rn +, T +Euklid) ist zusammenhängend, denn: +Annahme: Rn = U 1 ˙ ∪ U 2 mit ∅ = U - 1 , U +1, U 2 ∈ T Euklid existieren. Sei x ∈ U - 1 , y ∈ U - 2 und [ x, y ] die Strecke zwischen x und y . Sei V = [ x, y ] . Nun +1, y ∈ U +2 und [x, y] die Strecke zwischen x und y. Sei V = [x, y]. Nun betrachten wir V Rn als (metrischen) Teilraum mit der Teilraumtopologie T V . Somit gilt U - 1 ∩ [ x, y ] ∈ T - V wegen der Definition der Teilraumtopologie. -Dann gibt es z ∈ [ x, y ] mit z ∈ ∂ ( U -1 ∩ [ x, y ]) , aber z /∈ U +1 ∩ [x, y] ∈ T +V wegen der Definition der Teilraumtopologie. +Dann gibt es z ∈ [x, y] mit z ∈ ∂ (U +1 ∩ [x, y]), aber z /∈ U 1 ⇒ z ∈ U - 2 . In jeder +2. In jeder Umgebung von z liegt ein Punkt von U 1 ⇒ Widerspruch zu U 2 offen. 2) R \ { 0 } ist nicht zusammenhängend, denn R \ { 0 } = R - < 0 ∪ R - >0 -3) R 2 +<0 ∪ R +>0 +3) R2 \ { 0 } ist zusammenhängend. 4) Q R ist nicht zusammenhängend, da (Q ∩ R - < √ - 2 ) ∪ (Q ∩ R - >√ - 2 ) = Q +<√ +2) ∪ (Q ∩ R +>√ +2) = Q 5) { x } ist zusammenhängend für jedes x ∈ X , wobei X ein topologischer Raum ist. 6) R mit Zariski-Topologie ist zusammenhängend. Bemerkung 14 Sei X ein topologischer Raum und A ⊆ X zusammenhängend. Dann ist auch A zusammenhängend. - 1.4. ZUSAMMENHANG + 1.4. ZUSAMMENHANG Beweis: durch Widerspruch Annahme: A = A - 1 ∪ A - 2 , A - i abgeschlossen, A - i = ∅, A - 1 ∩ A - 2 = ∅ +1 ∪ A +2, A +i abgeschlossen, A +i = ∅, A +1 ∩ A +2 = ∅ ⇒ A = (A ∩ A - 1 ) +1) abgeschlossen ˙ -∪ ( A ∩ A -2 ) +∪ (A ∩ A +2) abgeschlossen @@ -767,171 +770,171 @@ disjunkt Wäre A ∩ A 1 = ∅ ⇒ A ⊆ A = A - 1 ˙ +1 ˙ ∪ A - 2 +2 ⇒ A ⊆ A - 2 ⇒ A ⊆ A - 2 +2 ⇒ A ⊆ A +2 ⇒ A - 1 = ∅ +1 = ∅ ⇒ Widerspruch zu A - 1 = ∅ +1 = ∅ ⇒ A ∩ A - 1 = ∅ und analog A ∩ A - 2 = ∅ +1 = ∅ und analog A ∩ A +2 = ∅ ⇒ Widerspruch zu A ist zusammenhängend. Bemerkung 15 Sei X ein topologischer Raum und A, B ⊆ X zusammenhängend. -Ist A ∩ B = ∅ , dann ist A ∪ B zusammenhängend. +Ist A ∩ B = ∅, dann ist A ∪ B zusammenhängend. Beweis: Sei A ∪ B = U 1 ˙ ∪ U -2 , U +2, U i = ∅ offen o. B. d. A. ======⇒ A = (A ∩ U -1 ) ˙ +1) ˙ ∪ (A ∩ U - 2 ) offen +2) offen A zhgd. ====⇒ A ∩ U 1 = ∅ -A ∩B =∅ +A∩B=∅ ====⇒ U 1 ⊆ B B = (B ∩ U - 1 ) +1) =U - 1 ∪ ( B ∩ U -2 ) +1 ∪ (B ∩ U +2) -= ∅ ist unerlaubte Zerlegung. +=∅ ist unerlaubte Zerlegung. Definition 14 Sei X ein topologischer Raum. -Für x ∈ X sei Z (x ) ⊆ X definiert durch -Z ( x) := -A ⊆X zhgd. -x ∈ AA -Z ( x ) heißt Zusammenhangskomponente . +Für x ∈ X sei Z (x) ⊆ X definiert durch +Z (x) := +A⊆X zhgd. +x∈AA +Z (x) heißt Zusammenhangskomponente. Bemerkung 16 (Eigenschaften von Zusammenhangskomponenten) Sei X ein topologischer Raum. Dann gilt: -a) Z ( x) ist die größte zusammenhängende Teilmenge von X , die x enthält. -b) Z ( x) ist abgeschlossen. +a) Z (x) ist die größte zusammenhängende Teilmenge von X , die x enthält. +b) Z (x) ist abgeschlossen. c) X ist disjunkte Vereinigung von Zusammenhangskomponenten. Beweis: - 1.5. KOMPAKTHEIT -a)Sei Z ( x) = A - 1 ˙ + 1.5. KOMPAKTHEIT +a) Sei Z (x) = A +1 ˙ ∪ A 2 mit A - i = ∅ abgeschlossen. +i = ∅ abgeschlossen. O. B. d. A. sei x ∈ A - 1 und y ∈ A - 2 . y liegt in einer zusammehängenden Teilmenge A , +1 und y ∈ A +2. y liegt in einer zusammehängenden Teilmenge A, die auch x enthält. ⇒ A = (A ∩ A -1 ) +1) -x ∪ ( A ∩ A -2 ) +x ∪ (A ∩ A +2) - y ist unerlaubte Zerlegung. -b)NachBemerkung 14ist Z (x ) zusammenhängend ⇒ Z (x ) ⊆ Z (x ) ⇒ Z ( x ) = Z (x ) -c)Ist Z (y ) ∩ Z ( x) = ∅ Bem. 15 -=====⇒ Z (y ) ∪ Z (x ) ist zusammenhängend. -⇒ Z (x ) ∪ Z ( y ) ⊆ Z (x ) ⇒ Z ( y ) ⊆ Z (x ) -⊆ Z (y ) ⇒ Z ( x ) ⊆ Z (y ) +y ist unerlaubte Zerlegung. +b) Nach Bemerkung 14 ist Z (x) zusammenhängend ⇒ Z (x) ⊆ Z (x) ⇒ Z (x) = Z (x) +c) Ist Z (y) ∩ Z (x) = ∅ Bem. 15 +=====⇒ Z (y) ∪ Z (x) ist zusammenhängend. +⇒ Z (x) ∪ Z (y) ⊆ Z (x) ⇒ Z (y) ⊆ Z (x) +⊆ Z (y) ⇒ Z (x) ⊆ Z (y) Bemerkung 17 -Sei f : X → Y stetig. Ist A ⊆ X zusammenhängend, so ist f ( A ) ⊆ Y zusammenhängend. -Beweis: Sei f (A ) = U +Sei f : X → Y stetig. Ist A ⊆ X zusammenhängend, so ist f (A) ⊆ Y zusammenhängend. +Beweis: Sei f (A) = U 1 ∪ U -2 , U -i = ∅ , offen, disjunkt. +2, U +i = ∅, offen, disjunkt. ⇒ f −1 - (f ( A)) = f −1 - (U - 1 ) ∪ f −1 - ( U -2 ) -⇒ A = (A ∩ f − 1 - ( U -1 )) - -= ∅ ∪ ( A ∩ f −1 - (U - 2 )) +(f (A)) = f −1 +(U +1) ∪ f −1 +(U +2) +⇒ A = (A ∩ f −1 +(U +1)) + +=∅ ∪ (A ∩ f −1 +(U +2)) =∅ 1.5 Kompaktheit Definition 15 -Sei X eine Menge und U ⊆ P (X ) . +Sei X eine Menge und U ⊆ P (X ). U heißt eine Überdeckung von X , wenn gilt: ∀x ∈ X : ∃M ∈ U : x ∈ M Definition 16 -Ein topologischer Raum X heißt kompakt , wenn jede offene Überdeckung von X +Ein topologischer Raum X heißt kompakt, wenn jede offene Überdeckung von X U = { U i } -i ∈I mit U +i∈I mit U i offen in X eine endliche Teilüberdeckung -i ∈J ⊆IU - i = X mit |J | ∈ N +i∈J ⊆IU +i = X mit |J | ∈ N besitzt. Bemerkung 18 Das Einheitsintervall I := [0, 1] ist kompakt bezüglich der euklidischen Topologie. -Beweis: Sei ( U -i ) -i ∈ J eine offene Überdeckung von I . +Beweis: Sei (U +i) +i∈J eine offene Überdeckung von I . Es genügt zu zeigen, dass es ein δ > 0 gibt, sodass jedes Teilintervall der Länge δ von I in einem der U - i enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich viele Intervalle - 1.5. KOMPAKTHEIT +i enthalten ist. Wenn es ein solches δ gibt, kann man I in endlich viele Intervalle + 1.5. KOMPAKTHEIT der Länge δ unterteilen und alle U i in die endliche Überdeckung aufnehmen, die Teilintervalle enthalten. -Angenommen, es gibt kein solches δ . Dann gibt es für jedes n ∈ N ein Intervall I -n ⊆ [0 , 1] +Angenommen, es gibt kein solches δ. Dann gibt es für jedes n ∈ N ein Intervall I +n ⊆ [0, 1] der Länge 1 /n sodass I n U - i für alle i ∈ J . +i für alle i ∈ J . Sei x - n der Mittelpunkt von I - n . Die Folge ( x - n ) hat einen Häufungspunkt x ∈ [0 , 1] . Dann +n der Mittelpunkt von I +n. Die Folge (x +n) hat einen Häufungspunkt x ∈ [0, 1]. Dann gibt es i ∈ J mit x ∈ U - i . Da U - i offen ist, gibt es ein ε > 0 , sodass ( x − ε, x + ε ) ⊆ U - i . +i. Da U +i offen ist, gibt es ein ε > 0, sodass (x − ε, x + ε) ⊆ U +i. Dann gibt es n - 0 , sodass gilt: 1 +0, sodass gilt: 1 /n 0 < ε /2 und für unendlich viele3 n ≥ n - 0 : |x − x -n | < ε +0 : |x − x +n| < ε /2, also I - n ⊆ ( x − ε, x + ε ) ⊆ U - i für mindestens ein n ∈ N .4 +n ⊆ (x − ε, x + ε) ⊆ U +i für mindestens ein n ∈ N.4 ⇒ Widerspruch -Dann überdecke [0 , 1] mit endlich vielen Intervallen I - 1 , . . . , I - d der Länge δ . Jedes I - j ist in +Dann überdecke [0, 1] mit endlich vielen Intervallen I +1, . . . , I +d der Länge δ. Jedes I +j ist in U - ij enthalten. +ij enthalten. ⇒ U - j - 1 , . . . , U - j +j +1 , . . . , U +j d ist endliche Teilüberdeckung von U . Beispiel 16 (Kompakte Räume) 1) R ist nicht kompakt. @@ -939,259 +942,259 @@ Beispiel 16 (Kompakte Räume) U n = (1 /n, 1 − 1 -/n ) ⇒ -n ∈N U +/n) ⇒ +n∈N U n = (0, 1) 3) R mit der Zariski-Topologie ist kompakt und jede Teilmenge von R ist es auch. Bemerkung 19 Sei X kompakter Raum, A ⊆ X abgeschlossen. Dann ist A kompakt. Beweis: Sei (V - i ) -i ∈I offene Überdeckung von A. +i) +i∈I offene Überdeckung von A. Dann gibt es für jedes i ∈ I eine offene Teilmenge U - i ⊆ X mit V - i = U - i ∩ A . +i ⊆ X mit V +i = U +i ∩ A. ⇒ A ⊆ -i ∈ I U - i +i∈I U +i ⇒ U = { U i | i ∈ I } ∪ { X \ A } ist offene Überdeckung von X X kompakt =======⇒ es gibt i -1 , . . . , i - n ∈ I , sodass n +1, . . . , i +n ∈ I , sodass n -j =1 U - i - j ∪ (X \ A ) = X +j=1 U +i +j ∪ (X \ A) = X ⇒   n -j =1 U - i - j ∪ (X \ A ) +j=1 U +i +j ∪ (X \ A)  ∩ A = A ⇒ n -j =1 (U - i - j ∩ A ) +j=1 (U +i +j ∩ A) -= V - i -j ∪ ((X \ A ) ∩ A ) +=V +i +j ∪ ((X \ A) ∩ A) -= ∅ = A +=∅ = A ⇒ V i - 1 , . . . , V - i - n überdecken A . +1 , . . . , V +i +n überdecken A. Bemerkung 20 Seien X, Y kompakte topologische Räume. Dann ist X × Y mit der Produkttopologie kompakt. -Beweis: Sei ( W - i ) - i ∈I eine offene Überdeckung von X × Y . Für jedes ( x, y ) ∈ X × Y gibt es +Beweis: Sei (W +i) +i∈I eine offene Überdeckung von X × Y . Für jedes (x, y) ∈ X × Y gibt es offene Teilmengen U x,y von X und V x,y von Y sowie ein i ∈ I , sodass U - x,y × V - x,y ⊆ W - i . +x,y × V +x,y ⊆ W +i. 3 - Dies gilt nicht für alle n ≥ n -0 , da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. +Dies gilt nicht für alle n ≥ n +0, da ein Häufungspunkt nur eine konvergente Teilfolge impliziert. 4 - Sogar für unendlich viele. - 1.5. KOMPAKTHEIT +Sogar für unendlich viele. + 1.5. KOMPAKTHEIT W - i +i x y xV - x,y +x,y U x,y YX -Abbildung 1.7:Die blaue Umgebung ist Schnitt vieler Umgebungen +Abbildung 1.7: Die blaue Umgebung ist Schnitt vieler Umgebungen Die offenen Mengen U - x -0 ,y × V - x - 0 ,y für festes x - 0 und alle y ∈ Y überdecken { x - 0 } × y . Da Y +x +0,y × V +x +0,y für festes x +0 und alle y ∈ Y überdecken { x +0 } × y. Da Y kompakt ist, ist auch { x - 0 } × Y kompakt. Also gibt es y -1 , . . . , y - m (x - 0 ) mit - m (x - 0 ) -i =1 U - x - 0 ,y - i × +0 } × Y kompakt. Also gibt es y +1, . . . , y +m(x +0) mit +m(x +0) +i=1 U +x +0,y +i × V - x - 0 ,y - i ⊇ { x - 0 } × Y . +x +0,y +i ⊇ { x +0 } × Y . Sei U - x - 0 := - m (x ) -i =1 U x - 0 ,y - i . Da X kompakt ist, gibt es x - 1 , . . . , x - n ∈ X mit - n -j =1 U +0 := +m(x) +i=1 U +x +0,y +i . Da X kompakt ist, gibt es x +1, . . . , x +n ∈ X mit +n +j=1 U x - j = X +j = X ⇒ k -j =1 - m (x - j ) -i =1 - U - x - j ,y - i × V +j=1 +m(x +j ) +i=1 +U +x +j ,y +i × V x - j ,y - i +j ,y +i Ein grün-oranges Kästchen⊇ X × Y ⇒ j - i W - i ( x - j , y -i ) = X × Y +i W +i(x +j , y +i) = X × Y Bemerkung 21 Sei X ein Hausdorffraum und K ⊆ X kompakt. Dann ist K abgeschlossen. Beweis: z. Z.: Komplement ist offen Ist X = K , so ist K abgeschlossen in X . Andernfalls sei y ∈ X \ K . Für jedes x ∈ K seien U - x bzw. V - y Umgebungen von x bzw. von y , sodass U +x bzw. V +y Umgebungen von x bzw. von y, sodass U x ∩ V -y = ∅ . +y = ∅. X i Kx y Da K kompakt ist, gibt es endlich viele x - 1 , . . . , x - n ∈ K , sodass - m -i =1 U - x - i ⊇ K . +1, . . . , x +n ∈ K , sodass +m +i=1 U +x +i ⊇ K . Sei V := n -i =1 V - x - i - 1.6. WEGE UND KNOTEN +i=1 V +x +i + 1.6. WEGE UND KNOTEN ⇒ V ∩ n -i =1 U +i=1 U x - i +i = ∅ ⇒ V ∩ K = ∅ -⇒ V ist Überdeckung von y , die ganz in X \ K enthalten ist. +⇒ V ist Überdeckung von y, die ganz in X \ K enthalten ist. ⇒ X \ K ist offen Damit ist K abgeschlossen. Bemerkung 22 Seien X, Y topologische Räume, f : X → Y stetig. -Ist K ⊆ X kompakt, so ist f ( K ) ⊆ Y kompakt. +Ist K ⊆ X kompakt, so ist f (K ) ⊆ Y kompakt. Beweis: Sei (V - i ) -i ∈ I offene Überdeckung von f ( K ) +i) +i∈I offene Überdeckung von f (K ) f stetig -====⇒ (f − 1 - ( V -i )) -i ∈ I ist offene Überdeckung von K +====⇒ (f −1 +(V +i)) +i∈I ist offene Überdeckung von K Kompakt =====⇒ es gibt i -1 , . . . , i - n , sodass f − 1 - ( V +1, . . . , i +n, sodass f −1 +(V +i +1 ), . . . , f −1 +(V i - 1 ), . . . , f − 1 - ( V +n ) Überdeckung von K ist. +⇒ f (f −1 +(V i - n ) Überdeckung von K ist. -⇒ f (f − 1 - (V - i - 1 )), . . . , f (f − 1 - ( V +1 )), . . . , f (f −1 +(V i - n )) überdecken f (K ). +n )) überdecken f (K ). Es gilt: f (f −1 - ( V )) = V ∩ f ( X ) +(V )) = V ∩ f (X ) Satz 1.1 (Heine-Borel) -Eine Teilmenge von R n +Eine Teilmenge von Rn oder Cn ist genau dann kompakt, wenn sie beschränkt und abgeschlossen ist. -Beweis: „ ⇒“: Sei K ⊆ R n - (oder C n - ) kompakt. -Da R n - und C n - hausdorffsch sind, ist K nachBemerkung 21abgeschlossen. Nach Voraussetzung +Beweis: „ ⇒“: Sei K ⊆ Rn + (oder Cn +) kompakt. +Da Rn + und Cn + hausdorffsch sind, ist K nach Bemerkung 21 abgeschlossen. Nach Voraussetzung kann K mit endlich vielen offenen Kugeln von Radien 1 überdeckt werden ⇒ K ist beschränkt. -„ ⇐“ Sei A ⊆ R n +„ ⇐“ Sei A ⊆ Rn (oder Cn - ) beschränkt und abgeschlossen. +) beschränkt und abgeschlossen. Dann gibt es einen Würfel W = [−N , N ] × · · · × [−N , N ] n mal mit A ⊆ W bzw. „Polyzylinder“ Z = { (z - 1 , . . . , z - n ) ∈ C n +1, . . . , z +n) ∈ Cn | z - i ≤ N für i = 1, . . . , n } -NachBemerkung 20undBemerkung 18istW kompakt, also ist A nachBemerkung 19auch +i ≤ N für i = 1, . . . , n } +Nach Bemerkung 20 und Bemerkung 18 ist W kompakt, also ist A nach Bemerkung 19 auch kompakt. Genauso ist Z kompakt, weil -{ z ∈ C z | ≤ 1 } +{ z ∈ C z| ≤ 1 } homöomorph zu - (x, y ) ∈ R 2 + (x, y) ∈ R2 - ( x, y ) ≤ 1 + (x, y) ≤ 1 ist. 1.6 Wege und Knoten Definition 17 Sei X ein topologischer Raum. - 1.6. WEGE UND KNOTEN -a)Ein Weg in X ist eine stetige Abbildung γ : [0, 1] → X . -b) γ heißt geschlossen , wenn γ (1) = γ (0) gilt. -c) γ heißt einfach , wenn γ | -[0, 1) injektiv ist. + 1.6. WEGE UND KNOTEN +a) Ein Weg in X ist eine stetige Abbildung γ : [0, 1] → X . +b) γ heißt geschlossen, wenn γ (1) = γ (0) gilt. +c) γ heißt einfach, wenn γ | +[0,1) injektiv ist. Beispiel 17 Ist X diskret, so ist jeder Weg konstant, d. h. von der Form -∀x ∈ [0, 1] : γ ( x) = c, c ∈ X +∀x ∈ [0, 1] : γ (x) = c, c ∈ X Denn γ ([0, 1]) ist zusammenhängend für jeden Weg γ . Definition 18 Ein topologischer Raum X heißt wegzusammenhängend, wenn es zu je zwei Punkten -x, y ∈ X einen Weg γ : [0, 1] → X gibt mit γ (0) = x und γ (1) = y . +x, y ∈ X einen Weg γ : [0, 1] → X gibt mit γ (0) = x und γ (1) = y. Bemerkung 23 Sei X ein topologischer Raum. a) X ist wegzusammenhängend ⇒ X ist zusammenhängend @@ -1199,75 +1202,75 @@ b) X ist wegzusammenhängend ⇐ X ist zusammenhängend Beweis: a) Sei X ein wegzusammenhängender topologischer Raum, A -1 , A +1, A 2 nichtleere, disjunkte, abgeschlossene Teilmengen von X mit A - 1 ∪ A - 2 = X . Sei x ∈ A - 1 , y ∈ A -2 , γ : [0, 1] → X -ein Weg von x nach y . +1 ∪ A +2 = X . Sei x ∈ A +1, y ∈ A +2, γ : [0, 1] → X +ein Weg von x nach y. Dann ist C := γ ([0, 1]) ⊆ X zusammenhängend, weil γ stetig ist. C = (C ∩ A - 1 ) +1) - x ∪ ( C ∩ A - 2 ) +x ∪ (C ∩ A +2) - y +y ist Zerlegung in nichtleere, disjunkte, abgeschlossene Teilmengen ⇒ Widerspruch -b)Sei X = - ( x, y ) ∈ R 2 +b) Sei X = + (x, y) ∈ R2 - x 2 - + y 2 - = 1 ∨ y = 1 + 2 · e − 1 + x2 + + y2 + = 1 ∨ y = 1 + 2 · e− 1 10 x . -Abbildung 1.8averanschaulicht diesen Raum. +Abbildung 1.8a veranschaulicht diesen Raum. Sei U - 1 ∪ U +1 ∪ U 2 = X, U 1 = U 2 = ∅, U i offen. X = C ∪ S . Dann ist C ⊆ U 1 oder C ⊆ U -2 , +2, weil C und S zusammenhängend sind. Also ist C = U 1 und S = U 2 (oder umgekehrt). Sei y ∈ C = U -1 , ε > 0 und B - ε ( y ) ⊆ U - 1 eine Umgebung von y , die in U +1, ε > 0 und B +ε(y) ⊆ U +1 eine Umgebung von y, die in U 1 enthalten ist. Aber: B -ε ( y ) ∩ S = ∅ ⇒ Widerspruch ⇒ X ∪ S ist zusammenhängend, aber nicht +ε(y) ∩ S = ∅ ⇒ Widerspruch ⇒ X ∪ S ist zusammenhängend, aber nicht wegzusammenhängend. Beispiel 18 (Hilbert-Kurve) -Es gibt stetige, surjektive Abbildungen [0 , 1] → [0 , 1] × [0 , 1] . Ein Beispiel ist die inAbbildung - 1.9dargestellte Hilbert-Kurve. +Es gibt stetige, surjektive Abbildungen [0, 1] → [0, 1] × [0, 1]. Ein Beispiel ist die in Abbildung + 1.9 dargestellte Hilbert-Kurve. Definition 19 Sei X ein topologischer Raum. Eine Jordankurve in X ist ein Homöomorphismus γ : [0, 1] → C ⊆ X bzw. γ : S 1 → C ⊆ X , wobei C := Bild γ . - 1.6. WEGE UND KNOTEN -(a)Spirale S mit Kreis C 0. 1 1 + 1.6. WEGE UND KNOTEN +(a) Spirale S mit Kreis C 0.1 1 −101 X -Y {( x, sin( 1 +Y {(x, sin( 1 x )) ∈ X × Y } -(−1 , 1) ⊆ Y -(b)Sinus +(−1, 1) ⊆ Y +(b) Sinus Abbildung 1.8: Beispiele für Räume, die zusammenhängend, aber nicht wegzusammenhängend sind. (a) n = 1 (b) n = 2 (c) n = 3 (d) n = 4 (e) n = 5 -Abbildung 1.9:Hilbert-Kurve +Abbildung 1.9: Hilbert-Kurve Jede Jordankurve ist also ein einfacher Weg. Satz 1.2 (Jordanscher Kurvensatz) -Ist C = γ ([0 , 1]) eine geschlossene Jordankurve in R 2 - , so hat R 2 +Ist C = γ ([0, 1]) eine geschlossene Jordankurve in R2 +, so hat R2 \ C genau zwei Zusammenhangskomponenten, von denen eine beschränkt ist und eine unbeschränkt. außen @@ -1279,262 +1282,262 @@ Beweis: ist technisch mühsam und wird hier nicht geführt. Er kann in „Algebr Eine Einführung“ von R. Stöcker und H. Zieschang auf S. 301f (ISBN 978-3519122265) nachgelesen werden. Idee: Ersetze Weg C durch Polygonzug. - 1.6. WEGE UND KNOTEN + 1.6. WEGE UND KNOTEN Definition 20 -Eine geschlossene Jordankurve in R 3 +Eine geschlossene Jordankurve in R3 heißt Knoten. Beispiel 19 (Knoten) -(a)Trivialer Knoten (b)Kleeblattknoten (c)Achterknoten (d) 6 - 2 -Knoten -Abbildung 1.11:Beispiele für verschiedene Knoten +(a) Trivialer Knoten (b) Kleeblattknoten (c) Achterknoten (d) 6 +2-Knoten +Abbildung 1.11: Beispiele für verschiedene Knoten Definition 21 Zwei Knoten γ - 1 , γ +1, γ 2 : S 1 - → R 3 + → R3 heißen äquivalent, wenn es eine stetige Abbildung H : S 1 - × [0, 1] → R 3 + × [0, 1] → R3 gibt mit H (z, 0) = γ - 1 ( z ) ∀z ∈ S 1 +1(z) ∀z ∈ S 1 H (z, 1) = γ - 2 ( z ) ∀z ∈ S 1 +2(z) ∀z ∈ S 1 und für jedes feste t ∈ [0, 1] ist H z : S 1 - → R 3 - , z → H (z, t) + → R3 +, z → H (z, t) ein Knoten. Die Abbildung H heißt Isotopie zwischen γ - 1 und γ - 2 . +1 und γ +2. Definition 22 Sei γ : [0, 1] → R3 - ein Knoten, E eine Ebene und π : R 3 + ein Knoten, E eine Ebene und π : R3 → E eine Pro jektion auf E . π heißt Knotendiagramm von γ , wenn gilt: - π − 1 - ( x ) +π−1 +(x) - ≤ 2 ∀x ∈ π (γ ) -Ist (π | -γ ([0, 1]) ) −1 - (x ) = { y -1 , y + ≤ 2 ∀x ∈ π(γ ) +Ist (π| +γ([0,1]))−1 +(x) = { y +1, y 2 }, so liegt y - 1 über y - 2 , wenn gilt: -∃λ > 1 : ( y - 1 − x) = λ (y - 2 − x ) +1 über y +2, wenn gilt: +∃λ > 1 : (y +1 − x) = λ(y +2 − x) Satz 1.3 (Satz von Reidemeister) Zwei endliche Knotendiagramme gehören genau dann zu äquivalenten Knoten, wenn sie durch endlich viele „Reidemeister-Züge“ ineinander überführt werden können. - 1.6. WEGE UND KNOTEN + 1.6. WEGE UND KNOTEN (a) Ω - 1 (b) Ω +1 (b) Ω 2 (c) Ω - 3 -Abbildung 1.12:Reidemeister-Züge -Beweis: Durch sorgfältige Fallunterscheidung. 5 +3 +Abbildung 1.12: Reidemeister-Züge +Beweis: Durch sorgfältige Fallunterscheidung.5 Definition 23 Ein Knotendiagramm heißt 3-färbbar, wenn jeder Bogen von D so mit einer Farbe gefärbt werden kann, dass an jeder Kreuzung eine oder 3 Farben auftreten und alle 3 Farben auftreten. - Abbildung 1.13:Ein 3-gefärber Kleeblattknoten + Abbildung 1.13: Ein 3-gefärber Kleeblattknoten 5 - Siehe „Knot Theory and Its Applications“ von Kunio Murasugi. ISBN 978-0817638177. - 1.6. WEGE UND KNOTEN +Siehe „Knot Theory and Its Applications“ von Kunio Murasugi. ISBN 978-0817638177. + 1.6. WEGE UND KNOTEN Übungsaufgaben Aufgabe 1 (Sierpińskiraum) Es sei X := { 0, 1 } und T -X := { ∅ , { 0 } , X }. Dies ist der sogenannte Sierpińskiraum. -(a)Beweisen Sie, dass (X, T +X := { ∅, { 0 } , X }. Dies ist der sogenannte Sierpińskiraum. +(a) Beweisen Sie, dass (X, T X ) ein topologischer Raum ist. -(b)Ist (X, T +(b) Ist (X, T X ) hausdorffsch? -(c)Ist T +(c) Ist T X von einer Metrik erzeugt? Aufgabe 2 Es sei Z mit der von den Mengen U - a,b := a + bZ(a ∈ Z, b ∈ Z \ { 0 }) erzeugten Topologie +a,b := a + bZ(a ∈ Z, b ∈ Z \ { 0 }) erzeugten Topologie versehen. Zeigen Sie: -(a)Jedes U +(a) Jedes U a,b und jede einelementige Teilmenge von Z ist abgeschlossen. -(b) { − 1, 1 } ist nicht offen. -(c)Es gibt unendlich viele Primzahlen. +(b) { −1, 1 } ist nicht offen. +(c) Es gibt unendlich viele Primzahlen. Aufgabe 3 (Cantorsches Diskontinuum) Für jedes i ∈ N sei P - i := { 0, 1 } mit der diskreten Topologie. Weiter Sei P := - i ∈N P - i . -(a)Wie sehen die offenen Mengen von P aus? -(b)Was können Sie über den Zusammenhang von P sagen? +i := { 0, 1 } mit der diskreten Topologie. Weiter Sei P := +i∈N P +i. +(a) Wie sehen die offenen Mengen von P aus? +(b) Was können Sie über den Zusammenhang von P sagen? Aufgabe 4 (Kompaktheit) -(a)Ist GL - n ( R) = { A ∈ R n×n +(a) Ist GL +n(R) = { A ∈ Rn×n | det(A) = 0 } kompakt? -(b)Ist SL -n ( R) = { A ∈ R n× n +(b) Ist SL +n(R) = { A ∈ Rn×n | det(A) = 1 } kompakt? -(c)Ist P (R ) kompakt? +(c) Ist P (R) kompakt? Aufgabe 5 (Begriffe) Definieren Sie die Begriffe „Homomorphismus“ und „Homöomorphismus“. Geben Sie, falls möglich, ein Beispiel für folgende Fälle an. Falls es nicht möglich ist, begründen Sie warum. -1)Ein Homomorphismus, der zugleich ein Homöomorphismus ist, -2)ein Homomorphismus, der kein Homöomorphismus ist, - 1.6. WEGE UND KNOTEN -3)ein Homöomorphismus, der kein Homomorphismus ist +1) Ein Homomorphismus, der zugleich ein Homöomorphismus ist, +2) ein Homomorphismus, der kein Homöomorphismus ist, + 1.6. WEGE UND KNOTEN +3) ein Homöomorphismus, der kein Homomorphismus ist Aufgabe 6 (Begriffe) Definieren Sie die Begriffe „Isomorphismus“, „Isotopie“ und „Isometrie“. 2 Mannigfaltigkeiten und Simplizialkomplexe 2.1 Topologische Mannigfaltigkeiten Definition 24 -Sei ( X, T) ein topologischer Raum und n ∈ N. -a) Eine n -dimensionale Karte auf X ist ein Paar ( U, ϕ) , wobei U ∈ T und ϕ : U → V -Homöomorphismus von U auf eine offene Teilmenge V ⊆ R n - . -b) Ein n -dimensionaler Atlas A auf X ist eine Familie ( U - i , ϕ -i ) - i ∈ I von Karten auf X , +Sei (X, T) ein topologischer Raum und n ∈ N. +a) Eine n-dimensionale Karte auf X ist ein Paar (U, ϕ), wobei U ∈ T und ϕ : U → V +Homöomorphismus von U auf eine offene Teilmenge V ⊆ Rn +. +b) Ein n-dimensionaler Atlas A auf X ist eine Familie (U +i, ϕ +i) +i∈I von Karten auf X , sodass - i ∈ I U +i∈I U i = X . c) X - heißt (topologische) n -dimensionale Mannigfaltigkeit , wenn X hausdorffsch ist, -eine abzählbare Basis der Topologie hat und einen n -dimensionalen Atlas besitzt. -Anschaulich ist also ein n -dimensionale Mannigfaltigkeit lokal dem Rn + heißt (topologische) n-dimensionale Mannigfaltigkeit, wenn X hausdorffsch ist, +eine abzählbare Basis der Topologie hat und einen n-dimensionalen Atlas besitzt. +Anschaulich ist also ein n-dimensionale Mannigfaltigkeit lokal dem Rn ähnlich. Bemerkung 24 (Mächtigkeit von Mannigfaltigkeiten) -Jede n -dimensionale Mannigfaltigkeit mit n ≥ 1 ist mindestens so mächtig wie R . -Beweis: Sei (X, T ) ein topologischer Raum und (U, ϕ) mit U ∈ T und ϕ : U → V ⊆ R n - , wobei +Jede n-dimensionale Mannigfaltigkeit mit n ≥ 1 ist mindestens so mächtig wie R. +Beweis: Sei (X, T) ein topologischer Raum und (U, ϕ) mit U ∈ T und ϕ : U → V ⊆ Rn +, wobei V offen und ϕ ein Homöomorphismus ist, eine Karte auf X . -Da jede offene Teilmenge des R n - genauso mächtig ist wie der R n - , ϕ als Homöomorphismus +Da jede offene Teilmenge des Rn + genauso mächtig ist wie der Rn +, ϕ als Homöomorphismus insbesondere bijektiv ist und Mengen, zwischen denen eine Bijektion existiert, gleich mächtig -sind, ist U genauso mächtig wie der R n - . Da jede Mannigfaltigkeit mindestens eine Karte +sind, ist U genauso mächtig wie der Rn +. Da jede Mannigfaltigkeit mindestens eine Karte hat, muss jede Mannigfaltigkeit X mindestens so mächtig sein wie der Rn - . +. Hinweis: Es gibt auch noch 0-dimensionale Mannigfaltigkeiten. Diese Mannigfaltigkeiten können beliebig viele Elemente haben. Bemerkung 25 -a)Es gibt surjektive, stetige Abbildungen [0, 1] → [0, 1] × [0, 1] +a) Es gibt surjektive, stetige Abbildungen [0, 1] → [0, 1] × [0, 1] b) Für n = m sind Rn - und R m + und Rm nicht homöomorph. Zum Beweis benutzt man den „Satz von der Gebietstreue“ (Brouwer): -Ist U ⊆ R n - offen und f : U → R n +Ist U ⊆ Rn + offen und f : U → Rn stetig und injektiv, so ist f (U ) offen. -Ist n < m und R m - homöomorph zu R n - , so wäre -f : R n - → R m - → R n - , ( x - 1 , . . . , x - n ) → (x - 1 , x -2 , . . . , x - n , 0, . . . , 0) -eine stetige injektive Abbildung. Also müsste f (R n - ) offen sein ⇒ Widerspruch +Ist n < m und Rm + homöomorph zu Rn +, so wäre +f : Rn + → Rm + → Rn +, (x +1, . . . , x +n) → (x +1, x +2, . . . , x +n, 0, . . . , 0) +eine stetige injektive Abbildung. Also müsste f (Rn +) offen sein ⇒ Widerspruch 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN Beispiel 20 (Mannigfaltigkeiten) -1) Jede offene Teilmenge U ⊆ R n - ist eine n -dimensionale Mannigfaltigkeit mit einem +1) Jede offene Teilmenge U ⊆ Rn + ist eine n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte. -2) C n - ist eine 2n -dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte: +2) Cn + ist eine 2n-dimensionale Mannigfaltigkeit mit einem Atlas aus einer Karte: (z - 1 , . . . , z - n ) → ( (z - 1 ), (z - 1 ), . . . , ( z - n ) , (z - n )) +1, . . . , z +n) → ((z +1), (z +1), . . . , (z +n), (z +n)) 3) P n - (R ) = (R n+1 - \ { 0 } )/ +(R) = (Rn+1 + \ { 0 })/ ∼ = S n - / - ∼ und P n - (C ) sind Mannigfaltigkeiten der Dimension -n bzw. 2 n , da gilt: +/ +∼ und P n +(C) sind Mannigfaltigkeiten der Dimension +n bzw. 2n, da gilt: Sei U -i := { ( x +i := { (x 0 : · · · : x -n ) ∈ P n - (R ) | x - i = 0 } ∀i ∈ 0, . . . , n . Dann ist P n - (R ) = - n -i =0 U +n) ∈ P n +(R) | x +i = 0 } ∀i ∈ 0, . . . , n. Dann ist P n +(R) = +n +i=0 U i und die Abbildung U -i → R n -( x - 0 : · · · : x - n ) → +i → Rn +(x +0 : · · · : x +n) → x - 0 +0 x - i , . . . , +i , . . . , x - i +i x - i , . . . , x - n +i , . . . , x +n x - i +i (y - 1 : · · · : y -i −1 : 1 : y - i : · · · : y - n ) →(y - 1 , . . . , y - n ) +1 : · · · : y +i−1 : 1 : y +i : · · · : y +n) → (y +1, . . . , y +n) ist bijektiv. Die U -i mit i = 0, . . . , n bilden einen n -dimensionalen Atlas: +i mit i = 0, . . . , n bilden einen n-dimensionalen Atlas: x = (1 : 0 : 0) ∈ U - 0 → R 2 +0 → R2 x → (0, 0) y = (0 : 1 : 1) ∈ U - 2 → R 2 +2 → R2 y → (0, 1) Umgebung: B -1 (0, 1) → { (1 : u : v ) | ( u, v ) < 1 } = V - 1 +1(0, 1) → { (1 : u : v) | (u, v) < 1 } = V +1 Umgebung: B -1 (0, 1) → +1(0, 1) → (w : z : 1) - w 2 - + z 2 + w2 + + z2 < 1 = V 2 V - 1 ∩ V - 2 = ∅ ? +1 ∩ V +2 = ∅? (a : b : c) ∈ V - 1 ∩ V +1 ∩ V 2 ⇒ a = 0 und ( b -a ) 2 +a )2 + ( c a )2 < 1 ⇒ c @@ -1542,186 +1545,185 @@ a < 1 ⇒ c = 0 und ( a c )2 + ( b -c ) 2 +c )2 < 1 ⇒ a c < 1 ⇒ Widerspruch 4) S n = - x ∈ R n+1 + x ∈ Rn+1 - x = 1 - ist n -dimensionale Mannigfaltigkeit. + x = 1 + ist n-dimensionale Mannigfaltigkeit. Karten: D - i := {( x - 1 , . . . , x - n+1 ) ∈ S n - | x -i > 0 } → B -1 (0, . . . , 0 - - -∈ Rn ) +i := {(x +1, . . . , x +n+1) ∈ S n +|x +i > 0} → B +1(0, . . . , 0 + +∈Rn ) C - i := {( x - 1 , . . . , x - n+1 ) ∈ S n - | x -i < 0 } → B -1 (0, . . . , 0) +i := {(x +1, . . . , x +n+1) ∈ S n +|x +i < 0} → B +1(0, . . . , 0) (x - 1 , . . . , x - n+1 ) → ( x - 1 , . . . , +1, . . . , x +n+1) → (x +1, . . . , x - i , . . . , x - n +1 )1 +i, . . . , x +n+1)1 (x - 1 , . . . , x - n ) → (x -1 , . . . , x - i − 1 , +1, . . . , x +n) → (x +1, . . . , x +i−1, 1 − n -k =1 x 2 +k=1 x2 k , x -i , . . . , x - n ), oder − +i, . . . , x +n), oder − 1 − n -k =1 x2 +k=1 x2 k für C - i +i S n = - n+1 -i =1 (C - i ∪ D -i ) +n+1 +i=1 (C +i ∪ D +i) Als kompakte Mannigfaltigkeit wird S n auch „ geschlossene Mannigfaltigkeit“ genannt. 5) [0, 1] ist keine Mannigfaltigkeit, denn: -Es gibt keine Umgebung von 0 in [0 , 1] , die homöomorph zu einem offenem Intervall +Es gibt keine Umgebung von 0 in [0, 1], die homöomorph zu einem offenem Intervall ist. 1 - x +x i wird rausgenommen 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN 6) V - 1 = - ( x, y ) ∈ R 2 +1 = + (x, y) ∈ R2 x · y = 0 ist keine Mannigfaltigkeit. -Das Problem ist (0 , 0) . Wenn man diesen Punkt entfernt, zerfällt der Raum in 4 -Zusammenhangskomponenten. Jeder R n +Das Problem ist (0, 0). Wenn man diesen Punkt entfernt, zerfällt der Raum in 4 +Zusammenhangskomponenten. Jeder Rn zerfällt jedoch in höchstens zwei Zusammenhangskomponenten, wenn man einen Punkt entfernt. 7) V - 2 = - ( x, y ) ∈ R 2 +2 = + (x, y) ∈ R2 - x 3 - = y 2 + x3 + = y2 ist eine Mannigfaltigkeit. 8) X = (R \ { 0 }) ∪ (0 -1 , 0 -2 ) +1, 0 +2) U ⊆ X offen ⇔ U offen in R \ { 0 } , falls 0 1 /∈ U, 0 - 2 ∈ U +2 ∈ U ∃ε > 0 : (−ε, ε) ⊆ U falls 0 1 ∈ U, 0 - 2 ∈ U -Insbesondere sind ( R \ { 0 }) ∪ { 0 - 1 } und ( R \ { 0 } ) ∪ { 0 +2 ∈ U +Insbesondere sind (R \ { 0 }) ∪ { 0 +1 } und (R \ { 0 }) ∪ { 0 2 } offen und homöomorph -zu R . +zu R. Aber: X ist nicht hausdorffsch! Denn es gibt keine disjunkten Umgebungen von 0 - 1 +1 und 0 - 2 . +2. 9) GL - n ( R ) ist eine Mannigfaltigkeit der Dimension n 2 - , weil offene Teilmengen von R n2 +n(R) ist eine Mannigfaltigkeit der Dimension n2 +, weil offene Teilmengen von Rn2 eine Mannigfaltigkeit bilden. Definition 25 -Seien X, Y n -dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y offen, Φ : U → V ein Homöomorphismus +Seien X, Y n-dimensionale Mannigfaltigkeiten, U ⊆ X und V ⊆ Y offen, Φ : U → V ein Homöomorphismus Z = (X ˙ ∪ Y )/ - ∼ mit der von u ∼ Φ(u) ∀u ∈ U erzeugten Äquivalenzrelation +∼ mit der von u ∼ Φ(u) ∀u ∈ U erzeugten Äquivalenzrelation und der von ∼ induzierten Quotiententopologie. -Z heißt Verklebung von X und Y längs U und V . Z besitzt einen Atlas aus n -dimensionalen -Karten. Falls Z hausdorffsch ist, ist Z eine n -dimensionale Mannigfaltigkeit. +Z heißt Verklebung von X und Y längs U und V . Z besitzt einen Atlas aus n-dimensionalen +Karten. Falls Z hausdorffsch ist, ist Z eine n-dimensionale Mannigfaltigkeit. Bemerkung 26 Sind X, Y Mannigfaltigkeiten der Dimension n bzw. m, so ist X × Y eine Mannigfaltigkeit der Dimension n + m. Beweis: Produkte von Karten sind Karten. Beispiel 21 Mannigfaltigkeiten mit Dimension 1: -1)Offene Intervalle, R , (0, 1) sind alle homöomorph +1) Offene Intervalle, R, (0, 1) sind alle homöomorph 2) S 1 Mannigfaltigkeiten mit Dimension 2: -1) R 2 +1) R2 2) S 2 (0 Henkel) 3) T 2 (1 Henkel) -4)oder mehr Henkel, wie z.B. der Zweifachtorus inAbbildung 2.1 +4) oder mehr Henkel, wie z.B. der Zweifachtorus in Abbildung 2.1 Bemerkung 27 -Sei n ∈ N , F : R n - → R stetig differenzierbar und X = V ( F ) := { x ∈ R n - | F ( x) = 0 } das +Sei n ∈ N, F : Rn + → R stetig differenzierbar und X = V (F ) := { x ∈ Rn + | F (x) = 0 } das „vanishing set“ . Dann gilt: 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN -Abbildung 2.1:Durch Verklebung zweier Tori entsteht ein Zweifachtorus. -a) X ist abgeschlossen in R n -b)Ist grad(F )(X ) = 0 ∀x ∈ X , so ist X eine Mannigfaltigkeit der Dimension n − 1. +Abbildung 2.1: Durch Verklebung zweier Tori entsteht ein Zweifachtorus. +a) X ist abgeschlossen in Rn +b) Ist grad(F )(X ) = 0 ∀x ∈ X , so ist X eine Mannigfaltigkeit der Dimension n − 1. Beweis: a) Sei y ∈ Rn \ V (F ). Weil F stetig ist, gibt es δ > 0, sodass F (B -δ (y )) ⊆ B -ε (F (y )) mit +δ (y)) ⊆ B +ε(F (y)) mit ε = 1 -2 F ( y ) . Folgt B -δ ( y ) ∩ V (F ) = ∅ ⇒ Rn +2 F (y). Folgt B +δ (y) ∩ V (F ) = ∅ ⇒ Rn \ V (F ) ist offen. -b) Sei x ∈ X mit grad( F )( x ) = 0 , also o. B. d. A. ∂F +b) Sei x ∈ X mit grad(F )(x) = 0, also o. B. d. A. ∂F ∂X - 1 ( x ) = 0 , x = ( x - 1 , . . . , x - n ) , +1 (x) = 0, x = (x +1, . . . , x +n), x - := ( x - 2 , . . . , x - n ) ∈ R n− 1 - . Der Satz von der impliziten Funktion liefert nun: Es + := (x +2, . . . , x +n) ∈ Rn−1 +. Der Satz von der impliziten Funktion liefert nun: Es gibt Umgebungen U von x - und differenzierbare Funktionen g : U → R , sodass -G : U → R n - , u → (g (u), u) eine stetige Abbildung auf eine offene UmgebungV von x + und differenzierbare Funktionen g : U → R, sodass +G : U → Rn +, u → (g(u), u) eine stetige Abbildung auf eine offene Umgebung V von x in X ist. Beispiel 22 1) F - : R 3 - → R , (x, y, z ) → x 2 - + y 2 - + z 2 + : R3 + → R, (x, y, z) → x2 + + y2 + + z2 − 1, V (F ) = S 2 - , grad(F ) = (2x, 2y, 2z ) Bem. 27.b +, grad(F ) = (2x, 2y, 2z) Bem. 27.b ======⇒ S n - ist n -dimensionale Mannigfaltigkeit in R n+1 -2) F : R 2 - → R , (x, y ) → y 2 - − x 3 - Es gilt: grad(F ) = (−3x 2 - , 2y ). Also: grad(0, 0) = (0, 0). + ist n-dimensionale Mannigfaltigkeit in Rn+1 +2) F : R2 + → R, (x, y) → y2 + − x3 + Es gilt: grad(F ) = (−3x2 +, 2y). Also: grad(0, 0) = (0, 0). −5 −4 −3 @@ -1738,98 +1740,98 @@ S n 2 4−1000100 xyz - −1000100f (x, y ) - (a) F ( x, y ) = y 2 - − x 3 2 4 6 8 10 12 + −1000100f (x, y) + (a) F (x, y) = y2 + − x3 2 4 6 8 10 12 −10−5510 xy a = 1 3 a = 1 a = 2 -(b) y 2 +(b) y2 − ax3 = 0 -Abbildung 2.2:Rechts ist die Neilsche Parabel für verschiedene Parameter a. -Daher istBemerkung 27.bnicht anwendbar, aberV (F ) ist trotzdem eine 1-dimensionale +Abbildung 2.2: Rechts ist die Neilsche Parabel für verschiedene Parameter a. +Daher ist Bemerkung 27.b nicht anwendbar, aber V (F ) ist trotzdem eine 1-dimensionale topologische Mannigfaltigkeit. 2.1. TOPOLOGISCHE MANNIGFALTIGKEITEN Definition 26 -Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n -dimensionale -Mannigfaltigkeit mit Rand , wenn es einen Atlas (U - i , ϕ -i ) gibt, wobei U - i ⊆ X +Sei X ein Hausdorffraum mit abzählbarer Basis der Topologie. X heißt n-dimensionale +Mannigfaltigkeit mit Rand, wenn es einen Atlas (U +i, ϕ +i) gibt, wobei U +i ⊆ X i offen und ϕ i ein Homöomorphismus auf eine offene Teilmenge von -R n -+ , 0 := { (x - 1 , . . . , x - n ) ∈ Rn +Rn ++,0 := { (x +1, . . . , x +n) ∈ Rn | x n ≥ 0 } ist. -R n -+, 0 ist ein „Halbraum“ . +Rn ++,0 ist ein „Halbraum“ . Hinweis: Mannigfaltigkeiten mit Rand sind keine Mannigfaltigkeiten. ∼ = -(a)Halbraum +(a) Halbraum ∼ = -(b)Pair of pants ∼ +(b) Pair of pants ∼ = -(c)Sphäre mit einem Loch -Abbildung 2.3:Beispiele für Mannigfaltigkeiten mit Rand +(c) Sphäre mit einem Loch +Abbildung 2.3: Beispiele für Mannigfaltigkeiten mit Rand Definition 27 -Sei X eine n -dimensionale Mannigfaltigkeit mit Rand und Atlas A . Dann heißt +Sei X eine n-dimensionale Mannigfaltigkeit mit Rand und Atlas A. Dann heißt ∂ X := -( U,ϕ )∈A { x ∈ U | ϕ( x ) = 0 } +(U,ϕ)∈A { x ∈ U | ϕ(x) = 0 } Rand von X . -∂ X ist eine Mannigfaltigkeit der Dimension n − 1 . +∂ X ist eine Mannigfaltigkeit der Dimension n − 1. Definition 28 -Sei X eine n -dimensionale Mannigfaltigkeit mit Atlas (U - i , ϕ -i ) - i ∈I +Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U +i, ϕ +i) +i∈I Für i, j ∈ I mit U - i ∩ U - j = ∅ heißt +i ∩ U +j = ∅ heißt ϕ ij := ϕ - j ◦ ϕ− 1 +j ◦ ϕ−1 i ϕ - i ( U +i(U i ∩ U - j ) → ϕ +j ) → ϕ j (U - i ∩ U +i ∩ U j ) Kartenwechsel oder Übergangsfunktion. 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN -R n - R nU +Rn + RnU i U - j +j V i V - jX +jX ϕ - i ϕ - j -Abbildung 2.4:Kartenwechsel +i ϕ +j +Abbildung 2.4: Kartenwechsel 2.2 Differenzierbare Mannigfaltigkeiten Definition 29 -Sei X eine n -dimensionale Mannigfaltigkeit mit Atlas (U - i , ϕ -i ) - i ∈I . +Sei X eine n-dimensionale Mannigfaltigkeit mit Atlas (U +i, ϕ +i) +i∈I . a) X heißt differenzierbare Mannigfaltigkeit der Klasse C k , wenn jede Kartenwechselabbildung ϕ - ij , i, j ∈ I k -mal stetig differenzierbar ist. +ij , i, j ∈ I k-mal stetig differenzierbar ist. b) X heißt differenzierbare Mannigfaltigkeit, wenn X eine differenzierbare Mannigfaltigkeit der Klasse C ∞ ist. @@ -1837,39 +1839,39 @@ Differenzierbare Mannigfaltigkeiten der Klasse C ∞ werden auch glatt genannt. Definition 30 Sei X eine differenzierbare Mannigfaltigkeit der Klasse C k - ( k ∈ N ∪ { ∞ }) mit Atlas + (k ∈ N ∪ { ∞ }) mit Atlas A = (U - i , ϕ -i ) - i ∈I . -a) Eine Karte ( U, ϕ) auf X heißt verträglich mit A , wenn alle Kartenwechsel ϕ ◦ ϕ −1 +i, ϕ +i) +i∈I . +a) Eine Karte (U, ϕ) auf X heißt verträglich mit A, wenn alle Kartenwechsel ϕ ◦ ϕ−1 i und ϕ - i ◦ ϕ− 1 +i ◦ ϕ−1 (i ∈ I mit U -i ∩ U = ∅ ) differenzierbar von Klasse C k +i ∩ U = ∅) differenzierbar von Klasse C k sind. b) Die Menge aller mit A verträglichen Karten auf X bildet einen maximalen Atlas der Klasse C k . Er heißt C k -Struktur auf X . Eine C ∞ - -Struktur heißt auch differenzierbare Struktur auf X . +-Struktur heißt auch differenzierbare Struktur auf X . Bemerkung 28 Für n ≥ 4 gibt es auf S n mehrere verschiedene differenzierbare Strukturen, die sogenannten „exotische Sphären“ . Definition 31 -Seien X, Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m , x ∈ X . +Seien X, Y differenzierbare Mannigfaltigkeiten der Dimension n bzw. m, x ∈ X . a) Eine stetige Abbildung f : X → Y heißt differenzierbar in x (von Klasse C k ), wenn -es Karten ( U, ϕ) von X mit x ∈ U und ( V , ψ ) von Y mit f ( U ) ⊆ V gibt, sodass -ψ ◦ f ◦ ϕ −1 +es Karten (U, ϕ) von X mit x ∈ U und (V , ψ) von Y mit f (U ) ⊆ V gibt, sodass +ψ ◦ f ◦ ϕ−1 stetig differenzierbar von Klasse C k - in ϕ (x ) ist. + in ϕ(x) ist. b) f heißt differenzierbar (von Klasse C k ), wenn f in jedem x ∈ X differenzierbar ist. -c) f heißt Diffeomorphismus , wenn f differenzierbar von Klasse C ∞ +c) f heißt Diffeomorphismus, wenn f differenzierbar von Klasse C ∞ ist und es eine differenzierbare Abbildung g : Y → X von Klasse C ∞ gibt mit g ◦ f = id @@ -1878,186 +1880,188 @@ f ◦ g = id Y . 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN Bemerkung 29 -Die Bedingung inDefinition 31.ahängt nicht von den gewählten Karten ab. +Die Bedingung in Definition 31.a hängt nicht von den gewählten Karten ab. Beweis: Seien (U - , ϕ - ) und (V - , ψ - ) Karten von X bzw. Y um x bzw. f (x ) mit f (U - ) ⊆ V - . +, ϕ +) und (V +, ψ +) Karten von X bzw. Y um x bzw. f (x) mit f (U +) ⊆ V +. ⇒ ψ ◦ f ◦ (ϕ - )− 1 +)−1 = ψ - ◦ (ψ −1 - ◦ ψ ) ◦ f ◦ ( ϕ− 1 + ◦ (ψ−1 + ◦ ψ) ◦ f ◦ (ϕ−1 ◦ ϕ) ◦ (ϕ - )−1 +)−1 ist genau dann differenzierbar, wenn ψ ◦ f ◦ ϕ−1 differenzierbar ist. Beispiel 23 f - : R → R , x → x 3 - ist kein Diffeomorphismus, aber Homöomorphismus, da mitg (x ) := 3√ - x + : R → R, x → x3 + ist kein Diffeomorphismus, aber Homöomorphismus, da mit g(x) := 3√ +x gilt: f ◦ g = id -R , g ◦ f = id +R, g ◦ f = id R Bemerkung 30 Sei X eine glatte Mannigfaltigkeit. Dann ist Diffeo(X ) := { f : X → X | f ist Diffeomorphismus } -eine Untergruppe von Homöo( X ). +eine Untergruppe von Homöo(X ). Definition 32 -S ⊆ R 3 - heißt reguläre Fläche : ⇔ ∀ s ∈ S ∃ Umgebung V ( s ) ⊆ R 3 - ∃U ⊆ R 2 +S ⊆ R3 + heißt reguläre Fläche :⇔ ∀s ∈ S ∃ Umgebung V (s) ⊆ R3 + ∃U ⊆ R2 offen: ∃ differenzierbare Abbildung F : U → V ∩ S : Rg(J F (u)) = 2 ∀u ∈ U . F heißt (lokale) reguläre Parametrisierung von S . -F (u, v ) = ( x (u, v ), y (u, v ), z (u, v )) +F (u, v) = (x(u, v), y(u, v), z(u, v)) J -F (u, v ) =  +F (u, v) =   ∂x -∂u (p ) ∂x -∂v ( p) +∂u (p) ∂x +∂v (p) ∂y -∂u (p ) ∂y -∂v (p ) +∂u (p) ∂y +∂v (p) ∂z -∂u (p ) ∂z -∂v ( p )  +∂u (p) ∂z +∂v (p)  Beispiel 24 -1)Rotationsflächen: Sei r : R → R - >0 eine differenzierbare Funktion. -F : R 2 - → R 3 - (u, v ) → (r ( u) cos(u), r ( v ) sin(u), v ) +1) Rotationsflächen: Sei r : R → R +>0 eine differenzierbare Funktion. +F : R2 + → R3 + (u, v) → (r(u) cos(u), r(v) sin(u), v) J -F ( u, v ) =  - −r ( v ) sin u r - ( v ) cos u -r (v ) cos u r - ( v ) sin u +F (u, v) =  +−r(v) sin u r +(v) cos u +r(v) cos u r +(v) sin u 0 1   -hat Rang 2 für alle (u, v ) ∈ R 2 - . -2)Kugelkoordinaten: F : R2 - → R 3 - , -(u, v ) → (R cos v cos u, R cos v sin u, R sin v ) -Es gilt: F ( u, v ) ∈ S 2 +hat Rang 2 für alle (u, v) ∈ R2 +. +2) Kugelkoordinaten: F : R2 + → R3 +, +(u, v) → (R cos v cos u, R cos v sin u, R sin v) +Es gilt: F (u, v) ∈ S 2 R , denn -R 2 - cos 2 - (v ) cos2 - (u) + R 2 - cos 2 - (v ) sin2 - (u) + R 2 +R2 + cos2 +(v) cos2 +(u) + R2 + cos2 +(v) sin2 +(u) + R2 sin2 - (v ) -=R 2 - (cos 2 - ( v ) cos2 - (u) + cos 2 - ( v ) sin2 - (u) + sin 2 - ( v )) -=R 2 - cos 2 - (v )(cos 2 - (u) + sin 2 - ( u)) + sin 2 - (v ) -=R 2 - cos 2 - (v ) + sin 2 - ( v ) -=R 2 +(v) +=R2 +(cos2 +(v) cos2 +(u) + cos2 +(v) sin2 +(u) + sin2 +(v)) +=R2 +cos2 +(v)(cos2 +(u) + sin2 +(u)) + sin2 +(v) +=R2 +cos2 +(v) + sin2 +(v) +=R2 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN N S v -u (a)Kugelkoordinaten −1 +u +(a) Kugelkoordinaten −1 0 1 2 - −2 −1 0 1 20. 60. 81 - (b)Rotationskörper + −2 −1 0 1 20.60.81 + (b) Rotationskörper π 2 π - 3 π -2 2 π -− 1− 0. 50. 51 + 3π +2 2π +−1−0.50.51 xy sin x -cos x (c)Sinus und Kosinus haben keine gemeinsame Nullstelle +cos x +(c) Sinus und Kosinus haben keine gemeinsame Nullstelle 2.2. DIFFERENZIERBARE MANNIGFALTIGKEITEN Die Jacobi-Matrix J - F ( u, v ) =  - −R cos v sin u −R sin v cos u +F (u, v) =  +−R cos v sin u −R sin v cos u R cos v cos u −R sin v sin u 0 R cos v   hat Rang 2 für cos v = 0. In N und S ist cos v = 0. Bemerkung 31 -Jede reguläre Fläche S ⊆ R 3 +Jede reguläre Fläche S ⊆ R3 ist eine 2-dimensionale, differenzierbare Mannigfaltigkeit. Beweis: S ⊆ R3 ist als reguläre Fläche eine 2-dimensionale Mannigfaltigkeit. Aus der Definition von -regulären Flächen folgt direkt, dass Karten(U -i , F -i ) und (U - j ⊆ R 2 - , F -j : R 2 - → R 3 - ) von S mit +regulären Flächen folgt direkt, dass Karten (U +i, F +i) und (U +j ⊆ R2 +, F +j : R2 + → R3 +) von S mit U - i ∩ U +i ∩ U j = ∅ existieren, wobei F i und F j nach Definition differenzierbare Abbildungen sind. -z.Z.: F − 1 +z.Z.: F −1 j ◦ F i ist ein Diffeomorphismus. U i U - jS +jS s F i F j F −1 -j ◦ F +j ◦F i -Abbildung 2.5:Reguläre Fläche S zum Beweis vonBemerkung 31 +Abbildung 2.5: Reguläre Fläche S zum Beweis von Bemerkung 31 Idee: Finde differenzierbare Funktion F −1 j in Umgebung W von s, sodass -F − 1 +F −1 j | -S ∩ W = F − 1 +S∩W = F −1 j . Ausführung: Sei u 0 ∈ U - i , v +i, v 0 ∈ U j mit F - i (u -0 ) = s = F +i(u +0) = s = F j (v - 0 ) . +0). Da Rg(J F -j ( v - 0 )) = 2 ist, ist o. B. d. A. +j (v +0)) = 2 ist, ist o. B. d. A. det ∂x ∂u ∂x @@ -2066,27 +2070,27 @@ det ∂u ∂y ∂v (v - 0 ) = 0 +0) = 0 und F -j (u, v ) = (x ( u, v ) , y ( u, v ) , z ( u, v )). +j (u, v) = (x(u, v), y(u, v), z(u, v)). Definiere F j : U - j × R → R3 +j × R → R3 durch F -j ( u, v, t ) := (x (u, v ), y (u, v ), z (u, v ) + t) +j (u, v, t) := (x(u, v), y(u, v), z(u, v) + t) Offensichtlich: F j | - U - j ×{ 0 } = F +U +j ×{ 0 } = F j J F - j =  +j =   ∂x ∂u ∂x ∂v 0 @@ -2100,16 +2104,16 @@ F ⇒ det J F - j ( v - 0 , 0) = 0 +j (v +0, 0) = 0 Analysis II ======⇒ Es gibt Umgebungen W von F j von F j (v - 0 , 0) = F +0, 0) = F j (v - 0 ) = s , sodass +0) = s, sodass F j auf W eine differenzierbar Inverse F −1 @@ -2119,51 +2123,51 @@ Weiter gilt: F j −1 - | - W ∩ S = F −1 +| +W ∩S = F −1 j | - W ∩ S -⇒ F − 1 +W ∩S +⇒ F −1 j ◦ F -i | - F −1 -i (W ∩ S ) = F − 1 +i| +F −1 +i (W ∩S) = F −1 j ◦ F -i | +i| F −1 -i ( W ∩ S ) +i (W ∩S) ist differenzierbar. Definition 33 -Sei G eine Mannigfaltigkeit und ( G, ◦) eine Gruppe. -a) G heißt topologische Gruppe , wenn die Abbildungen ◦ : G × G → G und ι : G → G +Sei G eine Mannigfaltigkeit und (G, ◦) eine Gruppe. +a) G heißt topologische Gruppe, wenn die Abbildungen ◦ : G × G → G und ι : G → G definiert durch - g ◦ h := g · h und ι (g ) := g −1 + g ◦ h := g · h und ι(g) := g−1 stetig sind. -b) Ist G eine differenzierbare Mannigfaltigkeit, so heißtG Lie-Gruppe, wenn (G, ◦ ) und +b) Ist G eine differenzierbare Mannigfaltigkeit, so heißt G Lie-Gruppe, wenn (G, ◦) und (G, ι) differenzierbar sind. Beispiel 25 (Lie-Gruppen) -1)Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen. +1) Alle endlichen Gruppen sind 0-dimensionale Lie-Gruppen. 2) GL - n ( R) -3) (R × - , · ) +n(R) +3) (R× +, ·) 4) (R - > 0 , · ) -5) (R n - , +), denn A · B ( i, j ) = +>0, ·) +5) (Rn +, +), denn A · B (i, j ) = n -k =1 a +k=1 a ik b kj ist nach allen Variablen differenzierbar -(A −1 - )(i, j ) = det(A - ij ) +(A−1 +)(i, j ) = det(A +ij ) det A A ij =    a - i 1 . . . a +i1 . . . a in . . @@ -2174,9 +2178,9 @@ in . a n1 . . . a -nn  +nn  - ∈ R (n− 1)×( n−1) + ∈ R(n−1)×(n−1) ist differenzierbar. det A ij kann 0 werden, da: @@ -2184,131 +2188,131 @@ ij kann 0 werden, da: 1 1 −1 0 6) SL -n ( R) = { A ∈ GL - n ( R) | det(A) = 1 } +n(R) = { A ∈ GL +n(R) | det(A) = 1 } Bemerkung 32 -Ist G eine Lie-Gruppe und g ∈ G , so ist die Abbildung +Ist G eine Lie-Gruppe und g ∈ G, so ist die Abbildung l g : G → G h → g · h ein Diffeomorphismus. - 2.3. SIMPLIZIALKOMPLEX + 2.3. SIMPLIZIALKOMPLEX 2.3 Simplizialkomplex Definition 34 Seien v - 0 , . . . , v - k ∈ Rn +0, . . . , v +k ∈ Rn Punkte. a) v - 0 , . . . , v - k sind in allgemeiner Lage +0, . . . , v +k sind in allgemeiner Lage ⇔ - es gibt keinen (k − 1)-dimensionalen affinen Untervektorraum, derv - 0 , . . . , v - k enthält + es gibt keinen (k − 1)-dimensionalen affinen Untervektorraum, der v +0, . . . , v +k enthält ⇔ v - 1 − v - 0 , . . . , v - k − v - 0 sind linear unabhängig. -b) conv (v - 0 , . . . , v - k ) := +1 − v +0, . . . , v +k − v +0 sind linear unabhängig. +b) conv(v +0, . . . , v +k ) := k -i =0 λ -i v - i +i=0 λ +iv +i λ - i ≥ 0, +i ≥ 0, k -i =0 λ - i = 1 +i=0 λ +i = 1 heißt die konvexe Hülle von v - 0 , . . . , v - k . +0, . . . , v +k . Definition 35 a) - Sei ∆ n - = conv ( e - 0 , . . . , e - n ) ⊆ R n+1 + Sei ∆n + = conv(e +0, . . . , e +n) ⊆ Rn+1 die konvexe Hülle der Standard-Basisvektoren e -0 , . . . , e - n . -Dann heißt ∆ n +0, . . . , e +n. +Dann heißt ∆n Standard-Simplex und n die Dimension des Simplex. b) Für Punkte v - 0 , . . . , v - k im R n +0, . . . , v +k im Rn in allgemeiner Lage heißt ∆(v +0, . . . , v +k ) = conv(v +0, . . . , v +k ) +ein k-Simplex in Rn +. +c) Ist ∆(v +0, . . . , v +k ) ein k-Simplex und I = { i +0, . . . , i +r } ⊆ { 0, . . . , k }, so ist s +i +0,...,i +r := +conv(v +i 0 , . . . , v - k ) = conv (v - 0 , . . . , v - k ) -ein k -Simplex in R n - . -c) Ist ∆( v - 0 , . . . , v - k ) ein k -Simplex und I = { i -0 , . . . , i - r } ⊆ { 0, . . . , k } , so ist s i - 0 ,...,i - r := -conv( v - i - 0 , . . . , v - i - r ) ein r -Simplex und heißt Teilsimplex oder Seite von ∆ . -(a)0-Simplex ∆ 0 +r ) ein r-Simplex und heißt Teilsimplex oder Seite von ∆. +(a) 0-Simplex ∆0 1 2 3123 e 0e - 1 -(b)1-Simplex ∆ 1 1 2 3123 +1 +(b) 1-Simplex ∆1 1 2 3123 e 0e 1 e - 2 -(c)2-Simplex ∆ 2 e - 0 e - 1e +2 +(c) 2-Simplex ∆2 e +0 e +1e 2 e 3 -(d)3-Simplex ∆ 3 -Abbildung 2.6:Beispiele für k -Simplexe +(d) 3-Simplex ∆3 +Abbildung 2.6: Beispiele für k-Simplexe Definition 36 a) Eine endliche Menge K von Simplizes im Rn - heißt (endlicher) Simplizialkomplex , + heißt (endlicher) Simplizialkomplex, wenn gilt: -(i)Für ∆ ∈ K und S ⊆ ∆ Teilsimplex ist S ∈ K . -(ii)Für ∆ - 1 , ∆ - 2 ∈ K ist ∆ - 1 ∩ ∆ - 2 leer oder ein Teilsimplex von ∆ - 1 und von ∆ - 2 . +(i) Für ∆ ∈ K und S ⊆ ∆ Teilsimplex ist S ∈ K . +(ii) Für ∆ +1, ∆ +2 ∈ K ist ∆ +1 ∩ ∆ +2 leer oder ein Teilsimplex von ∆ +1 und von ∆ +2. b) |K | := - ∆ ∈K ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K . -c)Ist d = max { k ∈ N - 0 | K enthält k -Simplex }, so heißt d die Dimension von K . - 2.3. SIMPLIZIALKOMPLEX -(a)1D Simplizialkomplex (b) 2D Simplizialkomplex -(ohne untere Fläche!) (c)2D Simplizialkomplex -(d)1D Simplizialkomplex (e)2D Simplizialkomplex +∆∈K ∆ (mit Teilraumtopologie) heißt geometrische Realisierung von K . +c) Ist d = max { k ∈ N +0 | K enthält k-Simplex }, so heißt d die Dimension von K . + 2.3. SIMPLIZIALKOMPLEX +(a) 1D Simplizialkomplex (b) 2D Simplizialkomplex +(ohne untere Fläche!) (c) 2D Simplizialkomplex +(d) 1D Simplizialkomplex (e) 2D Simplizialkomplex P -(f ) P ist kein Teilsimplex, da EigenschaftPunkt - b.iiverletzt ist P -(g)Simplizialkomplex -Abbildung 2.7:Beispiele für Simplizialkomplexe +(f ) P ist kein Teilsimplex, da Eigenschaft + Punkt b.ii verletzt ist P +(g) Simplizialkomplex +Abbildung 2.7: Beispiele für Simplizialkomplexe Definition 37 Seien K, L Simplizialkomplexe. Eine stetige Abbildung f : |K | → |L| @@ -2317,206 +2321,206 @@ a) f (∆) ∈ L b) f | ∆ : ∆ → f (∆) ist eine affine Abbildung. Beispiel 26 (Simpliziale Abbildungen) -1) ϕ( e -1 ) := b -1 , ϕ(e - 2 ) := b +1) ϕ(e +1) := b +1, ϕ(e +2) := b 2 ϕ ist eine eindeutig bestimmte lineare Abbildung - 2.3. SIMPLIZIALKOMPLEX + 2.3. SIMPLIZIALKOMPLEX 0 e - 2e - 1 +2e +1 0 b 1b 2 ϕ -2)Folgende Abbildung ϕ : ∆n - → ∆ n−1 +2) Folgende Abbildung ϕ : ∆n + → ∆n−1 ist simplizial: ϕ -3)Tori können simplizial auf Sphären abgebildet werden (vgl.Abbildung 2.8) +3) Tori können simplizial auf Sphären abgebildet werden (vgl. Abbildung 2.8) M Ma -a ab - b bc -c c +a ab + b bc +c c dd dM a b c d -bb bb b bbb b - b - b bb -bb -bbb - b bb bb b -b -b b -bAbbildung 2.8:Abbildung eines Torus auf eine Sphäre + + + + + + + + + +Abbildung 2.8: Abbildung eines Torus auf eine Sphäre Definition 38 Sei K ein endlicher Simplizialkomplex. Für n ≥ 0 sei a - n ( K ) die Anzahl der n -Simplizes in +n(K ) die Anzahl der n-Simplizes in K . Dann heißt - χ (K ) := dim K + χ(K ) := dim K -n =0 (−1)n - a - n ( K ) +n=0 (−1)n +a +n(K ) Eulerzahl (oder Euler-Charakteristik) von K . Beispiel 27 -1) χ (∆1 - ) = 2 − 1 = 1 -χ (∆2 - ) = 3 − 3 + 1 = 1 -χ (∆3 - ) = 4 − 6 + 4 − 1 = 1 -2) χ (Oktaeder-Oberfläche ) = 6 − 12 + 8 = 2 -χ (Rand des Tetraeders) = 2 -χ (Ikosaeder ) = 12 − 30 + 20 = 2 -3) χ (Würfel) = 8 − 12 + 6 = 2 -χ (Würfel, unterteilt in Dreiecksflächen) = 8 − (12 + 6) + (6 · 2) = 2 +1) χ(∆1 +) = 2 − 1 = 1 +χ(∆2 +) = 3 − 3 + 1 = 1 +χ(∆3 +) = 4 − 6 + 4 − 1 = 1 +2) χ(Oktaeder-Oberfläche) = 6 − 12 + 8 = 2 +χ(Rand des Tetraeders) = 2 +χ(Ikosaeder) = 12 − 30 + 20 = 2 +3) χ(Würfel) = 8 − 12 + 6 = 2 +χ(Würfel, unterteilt in Dreiecksflächen) = 8 − (12 + 6) + (6 · 2) = 2 Bemerkung 33 -χ (∆n - ) = 1 für jedes n ∈ N - 0 - 2.3. SIMPLIZIALKOMPLEX +χ(∆n +) = 1 für jedes n ∈ N +0 + 2.3. SIMPLIZIALKOMPLEX Beweis: ∆n ist die konvexe Hülle von (e - 0 , . . . , e - n ) in Rn +1 - . Jede (k + 1)-elementige Teilmenge +0, . . . , e +n) in Rn+1 +. Jede (k + 1)-elementige Teilmenge von { e -0 , . . . , e - n } definiert ein k -Simplex. +0, . . . , e +n } definiert ein k-Simplex. ⇒ a - k (∆n - ) = - n+1 -k +1 - , k = 0, . . . , n -⇒ χ (∆n - ) = +k (∆n +) = +n+1 +k+1 +, k = 0, . . . , n +⇒ χ(∆n +) = n -k =0 ( −1)k - n +1 -k +1 -f ( x) = (x + 1) n+1 Binomischer +k=0(−1)k +n+1 +k+1 +f (x) = (x + 1)n+1 Binomischer Lehrsatz = -n +1 -k =0 - n+1 +n+1 +k=0 +n+1 k - xk +xk ⇒ 0 = -n +1 -k =0 - n+1 +n+1 +k=0 +n+1 k - (−1)k - = χ (∆n - ) − 1 -⇒ χ (∆n - ) = 1 +(−1)k + = χ(∆n +) − 1 +⇒ χ(∆n +) = 1 Definition 39 -a)Ein 1D-Simplizialkomplex heißt Graph. -b)Ein Graph, der homöomorph zu S 1 +a) Ein 1D-Simplizialkomplex heißt Graph. +b) Ein Graph, der homöomorph zu S 1 ist, heißt Kreis. -c)Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält. +c) Ein zusammenhängender Graph heißt Baum, wenn er keinen Kreis enthält. (a) Dies wird häufig auch als Multigraph bezeichnet. (b) Planare Einbettung des Tetraeders (c) K 5 (d) K -3 ,3 -Abbildung 2.9:Beispiele für Graphen +3,3 +Abbildung 2.9: Beispiele für Graphen Bemerkung 34 -Für jeden Baum T gilt χ( T ) = 1. +Für jeden Baum T gilt χ(T ) = 1. Beweis: Induktion über die Anzahl der Ecken. Bemerkung 35 -a) - Jeder zusammenhängende Graph Γ enthält einen Teilbaum T , der alle Ecken von Γ -enthält. 2 -b)Ist n = a -1 (Γ) − a -1 (T ) , so ist χ (Γ) = 1 − n . +a) Jeder zusammenhängende Graph Γ enthält einen Teilbaum T , der alle Ecken von Γ +enthält.2 +b) Ist n = a +1(Γ) − a +1(T ), so ist χ(Γ) = 1 − n. Beweis: -a)Siehe „Algorithmus von Kruskal“. +a) Siehe „Algorithmus von Kruskal“. 2 - T wird „Spannbaum“ genannt. - 2.3. SIMPLIZIALKOMPLEX -b) χ (Γ) = a - 0 (Γ) − a - 1 (Γ) +T wird „Spannbaum“ genannt. + 2.3. SIMPLIZIALKOMPLEX +b) χ(Γ) = a +0(Γ) − a +1(Γ) = a - 0 (Γ) − ( n + a -1 (T )) +0(Γ) − (n + a +1(T )) = a - 0 (T ) − a - 1 (T ) − n -= χ (T ) − n +0(T ) − a +1(T ) − n += χ(T ) − n = 1 − n Bemerkung 36 -Sei ∆ ein n -Simplex und x ∈ ∆ ◦ - ⊆ R n - . Sei K der Simplizialkomplex, der aus ∆ durch -„Unterteilung“ in x entsteht. Dann ist χ ( K ) = χ (∆) = 1. +Sei ∆ ein n-Simplex und x ∈ ∆◦ + ⊆ Rn +. Sei K der Simplizialkomplex, der aus ∆ durch +„Unterteilung“ in x entsteht. Dann ist χ(K ) = χ(∆) = 1. (a) K (b) ∆, das aus K durch Unterteilung entsteht -Abbildung 2.10:Beispiel fürBemerkung 36. -Beweis: χ( K ) = χ (∆) − (−1)n +Abbildung 2.10: Beispiel für Bemerkung 36. +Beweis: χ(K ) = χ(∆) − (−1)n n-Simplex + n -k =0 ( −1)k - n + 1 +k=0(−1)k +n + 1 k -(1+(−1))n +1 = χ (∆) +(1+(−1))n+1 = χ(∆) Definition 40 Sei X ein topologischer Raum, K ein Simplizialkomplex und -h : | K | → X +h : |K | → X ein Homöomorphismus von der geometrischen Realisierung |K | auf X . Dann heißt h eine Triangulierung von X . Beispiel 28 (Triangulierung des Torus) Für eine Triangulierung des Torus werden mindestens 14 Dreiecke benötigt. Beispiele für -fehlerhafte „Triangulierungen“ sind inBeispiel 28zu sehen. Korrekte Triangulierungen sind -inBeispiel 28. +fehlerhafte „Triangulierungen“ sind in Beispiel 28 zu sehen. Korrekte Triangulierungen sind +in Beispiel 28. Satz 2.1 (Eulersche Polyederformel) -Sei P ein konvexes Polyeder in R 3 - , d. h. ∂ P ist ein 2-dimensionaler Simplizialkomplex, +Sei P ein konvexes Polyeder in R3 +, d. h. ∂ P ist ein 2-dimensionaler Simplizialkomplex, sodass gilt: - ∀x, y ∈ ∂ P : [x, y ] ⊆ P -Dann ist χ (∂ P ) = 2. + ∀x, y ∈ ∂ P : [x, y] ⊆ P +Dann ist χ(∂ P ) = 2. Beweis: -1)Die Aussage ist richtig für den Tetraeder. +1) Die Aussage ist richtig für den Tetraeder. 2) O. B. d. A. sei 0 ∈ P und P ⊆ B -1 (0) . Pro jeziere ∂ P von 0 aus auf ∂ B -1 (0) = S 2 - . +1(0). Pro jeziere ∂ P von 0 aus auf ∂ B +1(0) = S 2 +. Erhalte Triangulierung von S 2 - . - 2.3. SIMPLIZIALKOMPLEX +. + 2.3. SIMPLIZIALKOMPLEX (a) Die beiden markierten Dreiecke schneiden sich im Mittelpunkt und in einer Seite. (b) Die beiden markierten Dreiecke schneiden sich im Mittelpunkt und außen. -Abbildung 2.11:Fehlerhafte Triangulierungen -(a)Einfache Triangulierung (b)Minimale Triangulierung -Abbildung 2.12:Triangulierungen des Torus - 2.3. SIMPLIZIALKOMPLEX +Abbildung 2.11: Fehlerhafte Triangulierungen +(a) Einfache Triangulierung (b) Minimale Triangulierung +Abbildung 2.12: Triangulierungen des Torus + 2.3. SIMPLIZIALKOMPLEX 3) Sind P 1 und P - 2 konvexe Polygone und T -1 , T +2 konvexe Polygone und T +1, T 2 die zugehörigen Triangulierungen von S 2 - , so gibt es eine Triangulierung T , die sowohl um T +, so gibt es eine Triangulierung T , die sowohl um T 1 als auch um T 2 Verfeinerung -ist (vgl.Abbildung 2.13). +ist (vgl. Abbildung 2.13). T 1 T @@ -2525,28 +2529,28 @@ T Abbildung 2.13: T ist eine Triangulierung, die für T 1 und T 2 eine Verfeinerung ist. -NachBemerkung 36ist χ (∂ P -1 ) = χ (T -1 ) = χ(T ) = χ (T -2 ) = χ (∂ P -2 ) = 2, weil o. B. d. A. +Nach Bemerkung 36 ist χ(∂ P +1) = χ(T +1) = χ(T ) = χ(T +2) = χ(∂ P +2) = 2, weil o. B. d. A. P - 2 ein Tetraeder ist. +2 ein Tetraeder ist. Bemerkung 37 (Der Rand vom Rand ist 0) -Sei K ein endlicher Simplizialkomplex mit KnotenmengeV und < eine Totalordnung auf V . +Sei K ein endlicher Simplizialkomplex mit Knotenmenge V und < eine Totalordnung auf V . Sei A - n die Menge der n -Simplizes in K , d. h. +n die Menge der n-Simplizes in K , d. h. A - n ( K ) := { σ ∈ K | dim( σ ) = n } für n = 0, . . . , d = dim(K ) +n(K ) := { σ ∈ K | dim(σ) = n } für n = 0, . . . , d = dim(K ) und C -n (K ) der R -Vektorraum mit Basis A - n ( K ) , d. h. +n(K ) der R-Vektorraum mit Basis A +n(K ), d. h. C - n (K ) =  +n(K ) =    -σ ∈ A - n (K ) c +σ∈A +n(K ) c σ · σ @@ -2557,428 +2561,428 @@ C   Sei σ = ∆(x - 0 , . . . , x - n ) ∈ A - n ( K ) , sodass x - 0 < x +0, . . . , x +n) ∈ A +n(K ), sodass x +0 < x 1 < · · · < x -n . -Für i = 0 , . . . , n sei ∂ - i σ := ∆( x - 0 , . . . , ˆx - i , . . . , x - n ) die i-te Seite von σ und d +n. +Für i = 0, . . . , n sei ∂ +iσ := ∆(x +0, . . . , ˆx +i, . . . , x +n) die i-te Seite von σ und d σ = d -n σ := +nσ := -i =0 ( −1) i - ∂ -i σ ∈ C - n− 1 ( K ) und d +i=0(−1)i +∂ +iσ ∈ C +n−1(K ) und d n : C - n ( K ) → C - n− 1 ( K ) die dadurch definierte lineare +n(K ) → C +n−1(K ) die dadurch definierte lineare Abbildung. Dann gilt: d -n− 1 ◦ d +n−1 ◦ d n = 0 a bc σ e 3 e - 1e - 2 -Abbildung 2.14:Simplizialkomplex mit Totalordnung +1e +2 +Abbildung 2.14: Simplizialkomplex mit Totalordnung Beispiel 29 Sei a < b < c. Dann gilt: d -2 σ = e +2σ = e 1 − e 2 + e 3 d -1 ( e +1(e 1 − e 2 + e - 3 ) = (c − b) − (c − a) + ( b − a ) - 2.3. SIMPLIZIALKOMPLEX +3) = (c − b) − (c − a) + (b − a) + 2.3. SIMPLIZIALKOMPLEX = 0 -Sei a < b < c < d . Dann gilt für Tetraeder: +Sei a < b < c < d. Dann gilt für Tetraeder: d -3 (∆( a, b, c, d)) = ∆(b, c, d ) − ∆(a, c, d ) + ∆( a, b, d ) − ∆( a, b, c ), wobei: +3(∆(a, b, c, d)) = ∆(b, c, d) − ∆(a, c, d) + ∆(a, b, d) − ∆(a, b, c), wobei: d -2 ( ∆(b, c, d )) =∆( c, d)−∆(b, d)+∆( b, c) +2( ∆(b, c, d)) = ∆(c, d)−∆(b, d) + ∆(b, c) d -2 (−∆(a, c, d )) = −∆(c, d)+∆( a, d)−∆(a, c) +2(−∆(a, c, d)) = −∆(c, d) + ∆(a, d)−∆(a, c) d -2 ( ∆(a, b, d )) =∆( b, d) −∆(a, d)+∆( a, b) +2( ∆(a, b, d)) = ∆(b, d)−∆(a, d) + ∆(a, b) d -2 (−∆(a, b, c )) = −∆(b, c)+∆( a, c) −∆( a, b) +2(−∆(a, b, c)) = −∆(b, c) + ∆(a, c)−∆(a, b) ⇒ d -2 (d -3 (∆(a, b, c, d))) = 0 +2(d +3(∆(a, b, c, d))) = 0 Beweis: Sei σ ∈ A -n . Dann gilt: +n. Dann gilt: d -n−1 (d -n σ ) = d -n −1 ( n +n−1(d +nσ) = d +n−1( n -i =0 (−1)i - ∂ - i σ ) +i=0 (−1)i +∂ +iσ) = n -i =0 (−1)i - d -n−1 (∂ - i σ ) +i=0 (−1)i +d +n−1(∂ +iσ) = n -i =0 (−1)i n−1 +i=0 (−1)i n−1 -j =0 ∂ -i (∂ - j σ )(−1)j +j=0 ∂ +i(∂ +j σ)(−1)j = -0 ≤i ≤ j ≤n− 1(−1)i + j +0≤i≤j≤n−1(−1)i+j ∂ - j (∂ - i ( σ )) + -0≤ j d ( P, C ) = d( P, B ) + d( B , C ) = d( P, A ) + d( B , C ) ⇒ -d(A, C ) > d(B , C ) ⇒ Widerspruch zuPunkt (i) + (a) 1. Fall P QAB + (b) 2. Fall +Abbildung 4.4: Fallunterscheidung aus Bemerkung 62 +(ii) a) B liegt zwischen P und C . +d(P, A) + d(A, C ) > d(P, C ) = d(P, B ) + d(B , C ) = d(P, A) + d(B , C ) ⇒ +d(A, C ) > d(B , C ) ⇒ Widerspruch zu Punkt (i) b) C liegt zwischen P und B -d(P, C ) + d(C, A) > d( P, A ) = d(P, B ) = d( P, C ) + d(C, B ) -⇒ d( C, A) > d(C, B ) -⇒ Widerspruch zuPunkt (i) +d(P, C ) + d(C, A) > d(P, A) = d(P, B ) = d(P, C ) + d(C, B ) +⇒ d(C, A) > d(C, B ) +⇒ Widerspruch zu Punkt (i) 2. Fall : Q und B liegen auf verschieden Halbebenen bzgl. P A. Dann liegen A und Q in derselben Halbebene bzgl. P B . Tausche A und B ⇒ Fall 1 Bemerkung 63 -Sei (X, d, G ) eine Geometrie, die§1-§3erfüllt, P, Q ∈ X mit P = Q und ϕ eine Isometrie -mit ϕ( P ) = P und ϕ(Q ) = Q . -Dann gilt ϕ( S ) = S ∀S ∈ P Q. +Sei (X, d, G) eine Geometrie, die §1 - §3 erfüllt, P, Q ∈ X mit P = Q und ϕ eine Isometrie +mit ϕ(P ) = P und ϕ(Q) = Q. +Dann gilt ϕ(S ) = S ∀S ∈ P Q. Beweis: O. B. d. A. sei S ∈ P Q 2 -⇔ d(P, Q ) = d(P, S ) + d( S, Q) -ϕ∈ Iso(X ) -⇒ d( ϕ(P ) , ϕ(Q)) = d(ϕ( P ), ϕ(S )) + d(ϕ (S ) , ϕ( Q)) -P,Q∈ Fix( ϕ) -⇒ d( P, Q ) = d(P, ϕ (S )) + d(ϕ (S ) , Q) +⇔ d(P, Q) = d(P, S ) + d(S, Q) +ϕ∈Iso(X ) +⇒ d(ϕ(P ), ϕ(Q)) = d(ϕ(P ), ϕ(S )) + d(ϕ(S ), ϕ(Q)) +P,Q∈Fix(ϕ) +⇒ d(P, Q) = d(P, ϕ(S )) + d(ϕ(S ), Q) ⇒ ϕ(S ) liegt zwischen P und Q -⇒ d(P, S ) = d( ϕ( P ), ϕ(S )) = d(P, ϕ (S )) -3(i ) +⇒ d(P, S ) = d(ϕ(P ), ϕ(S )) = d(P, ϕ(S )) +3(i) ⇒ ϕ(S ) = S Proposition 4.2 -In einer Geometrie, die§1-§3erfüllt, gibt es zu P, P - , Q, Q - mit d( P, Q ) = d( P - , Q - ) -höchstens zwei Isometrien mit ϕ( P ) = P - und ϕ( Q) = Q +In einer Geometrie, die §1 - §3 erfüllt, gibt es zu P, P +, Q, Q + mit d(P, Q) = d(P +, Q +) +höchstens zwei Isometrien mit ϕ(P ) = P + und ϕ(Q) = Q 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE -Aus den Axiomen folgt, dass es in der Situation von§4höchstens zwei Isometrien mit +Aus den Axiomen folgt, dass es in der Situation von §4 höchstens zwei Isometrien mit ϕ -i (P ) = P +i(P ) = P und ϕ - i (Q) = Q +i(Q) = Q gibt. Beweis: Seien ϕ -1 , ϕ -2 , ϕ +1, ϕ +2, ϕ 3 Isometrien mit ϕ -i (P ) = P - , ϕ -i ( Q) = Q +i(P ) = P +, ϕ +i(Q) = Q mit i = 1, 2, 3. -Der Beweis vonProposition 4.2erfolgt über zwei Teilaussagen: +Der Beweis von Proposition 4.2 erfolgt über zwei Teilaussagen: (Teil i) ∃R ∈ X \ P Q mit ϕ -1 (R ) = ϕ - 2 (R ). -(Teil ii)Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = id +1(R) = ϕ +2(R). +(Teil ii) Hat ϕ 3 Fixpunkte, die nicht kollinear sind, so ist ϕ = id X . -Aus(Teil i)und(Teil ii)folgt, dass ϕ− 1 +Aus (Teil i) und (Teil ii) folgt, dass ϕ−1 2 ◦ ϕ 1 = id X , also ϕ 2 = ϕ -1 , da P , Q und R in diesem +1, da P , Q und R in diesem Fall Fixpunkte sind. Nun zu den Beweisen der Teilaussagen: (Teil i) Sei R ∈ X \ P Q. Von den drei Punkten ϕ -1 (R ), ϕ -2 (R ), ϕ -3 (R ) liegen zwei in der selben +1(R), ϕ +2(R), ϕ +3(R) liegen zwei in der selben Halbebene bzgl. P - Q +Q = ϕ -i ( P Q). +i(P Q). O. B. d. A. seien ϕ -1 (R ) und ϕ - 2 (R ) in der selben Halbebene. +1(R) und ϕ +2(R) in der selben Halbebene. Es gilt: d(P - , ϕ -1 ( R )) = d(ϕ -1 (P ) , ϕ -1 (R )) -= d(P, R ) +, ϕ +1(R)) = d(ϕ +1(P ), ϕ +1(R)) += d(P, R) = d(ϕ -2 (P ) , ϕ -2 (R )) +2(P ), ϕ +2(R)) = d(P - , ϕ -2 ( R )) -und analog d( Q - , ϕ -1 ( R )) = d( Q - , ϕ -2 ( R )) +, ϕ +2(R)) +und analog d(Q +, ϕ +1(R)) = d(Q +, ϕ +2(R)) (Teil ii) - Seien P , Q und R Fixpunkte von ϕ, R /∈ P Q und A /∈ P Q ∪ P R ∪ QR . Sei B ∈ -P Q \ { P, Q } . Dann ist ϕ( B ) = B wegenBemerkung 63. + Seien P , Q und R Fixpunkte von ϕ, R /∈ P Q und A /∈ P Q ∪ P R ∪ QR. Sei B ∈ +P Q \ { P, Q }. Dann ist ϕ(B ) = B wegen Bemerkung 63. Ist R ∈ AB , so enthält AB 2 Fixpunkte von ϕ Bem. 63 -=====⇒ ϕ( A ) = A . +=====⇒ ϕ(A) = A. P B QC RA Abbildung 4.5: P, Q, R sind Fixpunkte, B ∈ P Q \ { P, Q }, A /∈ P Q ∪ P R ∪ QR -Ist R /∈ AB , so ist AB ∩ P R = ∅ oder AB ∈ RQ = ∅ nachSatz 4.1. Der Schnittpunkt +Ist R /∈ AB , so ist AB ∩ P R = ∅ oder AB ∈ RQ = ∅ nach Satz 4.1. Der Schnittpunkt C ist dann Fixpunkt von ϕ - nachBemerkung 63 ⇒ ϕ( A ) = A. + nach Bemerkung 63 ⇒ ϕ(A) = A. Bemerkung 64 (SWS-Kongruenzsatz) -Sei ( X, d, G ) eine Geometrie, die§1-§4erfüllt. Seien außerdem AB C und A - B - C +Sei (X, d, G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem AB C und A +B +C Dreiecke, für die gilt: -(i) d(A, B ) = d( A - , B - ) +(i) d(A, B ) = d(A +, B +) (ii) ∠C AB ∼ -= ∠ C - A - B += ∠C +A +B 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE (iii) d(A, C ) = d(A - , C - ) +, C +) Dann ist AB C kongruent zu A - B - C +B +C . -Beweis: Sei ϕ die Isometrie mit ϕ( A - ) = A , ϕ( A - C + - ) = AC + - und ϕ( A - B + - ) = AB + - . Diese -Isometrie existiert wegenPunkt §4. -⇒ C ∈ ϕ( A - C + - ) und B ∈ ϕ (A - B + - ). -d( A - , C - ) = d(ϕ (A - ) , ϕ( C - )) = d( A, ϕ( C - )) 3(i ) -==⇒ ϕ( C - ) = C +Beweis: Sei ϕ die Isometrie mit ϕ(A +) = A, ϕ(A +C + +) = AC + + und ϕ(A +B + +) = AB + +. Diese +Isometrie existiert wegen Punkt §4. +⇒ C ∈ ϕ(A +C + +) und B ∈ ϕ(A +B + +). d(A - , B - ) = d(ϕ( A - ), ϕ(B - )) = d(A, ϕ(B - )) 3(i ) -==⇒ ϕ( B - ) = B -Also gilt insbesondere ϕ( A - B - C - ) = AB C . +, C +) = d(ϕ(A +), ϕ(C +)) = d(A, ϕ(C +)) 3(i) +==⇒ ϕ(C +) = C +d(A +, B +) = d(ϕ(A +), ϕ(B +)) = d(A, ϕ(B +)) 3(i) +==⇒ ϕ(B +) = B +Also gilt insbesondere ϕ(A +B +C +) = AB C . Bemerkung 65 (WSW-Kongruenzsatz) -Sei ( X, d, G ) eine Geometrie, die§1-§4erfüllt. Seien außerdem AB C und A - B - C +Sei (X, d, G) eine Geometrie, die §1 - §4 erfüllt. Seien außerdem AB C und A +B +C Dreiecke, für die gilt: -(i) d(A, B ) = d( A - , B - ) -(ii) ∠ C AB ∼ -= ∠ C - A - B -(iii) ∠ AB C ∼ -= ∠ A - B - C +(i) d(A, B ) = d(A +, B +) +(ii) ∠C AB ∼ += ∠C +A +B +(iii) ∠AB C ∼ += ∠A +B +C Dann ist AB C kongruent zu A - B - C +B +C . Beweis: Sei ϕ die Isometrie mit ϕ(A - ) = A , ϕ(B - ) = B und ϕ(C - ) liegt in der selben Halbebene -bzgl. AB wie C . Diese Isometrie existiert wegen§4. -Aus ∠ C AB = ∠C - A - B - = ∠ ϕ( C - )ϕ (A - ) ϕ(B - ) = ∠ ϕ( C - )AB folgt, dass ϕ (C - ) ∈ AC + - . -Analog folgt aus ∠ AB C = ∠ A - B - C - = ∠ ϕ ( A - ) ϕ( B - ) ϕ( C - ) = ∠ AB ϕ ( C - ) , dass ϕ ( C - ) ∈ +) = A, ϕ(B +) = B und ϕ(C +) liegt in der selben Halbebene +bzgl. AB wie C . Diese Isometrie existiert wegen §4. +Aus ∠C AB = ∠C +A +B + = ∠ϕ(C +)ϕ(A +)ϕ(B +) = ∠ϕ(C +)AB folgt, dass ϕ(C +) ∈ AC + +. +Analog folgt aus ∠AB C = ∠A +B +C + = ∠ϕ(A +)ϕ(B +)ϕ(C +) = ∠AB ϕ(C +), dass ϕ(C +) ∈ B C + - . -Dann gilt ϕ (C - ) ∈ AC ∩ B C = { C } ⇒ ϕ( C - ) = C . -Es gilt also ϕ (A - B - C - ) = AB C . +. +Dann gilt ϕ(C +) ∈ AC ∩ B C = { C } ⇒ ϕ(C +) = C . +Es gilt also ϕ(A +B +C +) = AB C . Definition 61 a) Ein Winkel ist ein Punkt P ∈ X zusammen mit 2 Halbgeraden mit Anfangspunkt P . -Man schreibt: ∠ R -1 P R -2 bzw. ∠ R -2 P R -1 2 +Man schreibt: ∠R +1P R +2 bzw. ∠R +2P R +12 b) - Zwei Winkel sind gleich , wenn es eine Isometrie gibt, die den einen Winkel auf den + Zwei Winkel sind gleich, wenn es eine Isometrie gibt, die den einen Winkel auf den anderen abbildet. -c) ∠ R -1 P - R -2 heißt kleiner als ∠ R -1 P R -2 , wenn es eine Isometrie ϕ gibt, mit ϕ ( P - ) = P , +c) ∠R +1P +R +2 heißt kleiner als ∠R +1P R +2, wenn es eine Isometrie ϕ gibt, mit ϕ(P +) = P , ϕ(P - R + -1 ) = P R + -1 und ϕ (R -2 ) liegt in der gleichen Halbebene bzgl. P R +R+ +1 ) = P R+ +1 und ϕ(R +2) liegt in der gleichen Halbebene bzgl. P R 1 wie R 2 und in der gleichen Halbebene bzgl. P R 2 wie R 1 -d)Im Dreieck P QR gibt es Innenwinkel und Außenwinkel. +d) Im Dreieck P QR gibt es Innenwinkel und Außenwinkel. Bemerkung 66 In einem Dreieck ist jeder Innenwinkel kleiner als jeder nicht anliegende Außenwinkel. -Beweis: Zeige ∠ P RQ < ∠ RQP - . +Beweis: Zeige ∠P RQ < ∠RQP +. Sei M der Mittelpunkt der Strecke QR und P ∈ P Q+ \ P Q. Sei A ∈ M P − mit d(P, M ) = -d( M , A ). +d(M , A). 2 - Für dieses Skript gilt: ∠ R - 1 P R -2 = ∠ R -2 P R -1 . Also sind insbesondere alle Winkel ≤ 180◦ - . +Für dieses Skript gilt: ∠R +1P R +2 = ∠R +2P R +1. Also sind insbesondere alle Winkel ≤ 180◦ +. 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE P R 1 R 1R 2R - 2 +2 (a) ∠R -1 P - R -2 ist kleiner als ∠ R -1 P R -2 , -vgl.Definition 61.c P +1P +R +2 ist kleiner als ∠R +1P R +2, +vgl. Definition 61.c P Q R -(b) InnenwinkelundAußenwinkelin - P QR , vgl.Definition +(b) Innenwinkel und Außenwinkel + in P QR, vgl. Definition 61.d -Abbildung 4.6:Situation ausDefinition 61 +Abbildung 4.6: Situation aus Definition 61 Q M A P R @@ -4858,117 +4861,117 @@ A P Q P (b) Innen- und Außenwinkel von P QR -Abbildung 4.7:Situation ausBemerkung 66 -Es gilt: d( Q, M ) = d( M , R ) und d( P, M ) = d( M , A ) sowie ∠P M R = ∠ AM Q ⇒ M RQ -ist kongruent zu AM Q , denn eine der beiden Isometrien, die∠ P M R auf ∠ AM Q abbildet, +Abbildung 4.7: Situation aus Bemerkung 66 +Es gilt: d(Q, M ) = d(M , R) und d(P, M ) = d(M , A) sowie ∠P M R = ∠AM Q ⇒ M RQ +ist kongruent zu AM Q, denn eine der beiden Isometrien, die ∠P M R auf ∠AM Q abbildet, bildet R auf Q und P auf A ab. -⇒ ∠M QA = ∠ M RP = ∠ QRP = ∠ P RQ. -Noch zu zeigen: ∠ M QA < ∠ RQP - , denn A liegt in der selben Halbebene bzgl. P Q wie M . +⇒ ∠M QA = ∠M RP = ∠QRP = ∠P RQ. +Noch zu zeigen: ∠M QA < ∠RQP +, denn A liegt in der selben Halbebene bzgl. P Q wie M . Proposition 4.3 (Existenz der Parallelen) -Sei (X, d, G ) eine Geometrie mit den Axiomen§1-§4. +Sei (X, d, G) eine Geometrie mit den Axiomen §1 - §4. Dann gibt es zu jeder Geraden g ∈ G und jedem Punkt P ∈ X \ g mindestens eine Parallele h ∈ G mit P ∈ h und g ∩ h = ∅. Beweis: Seien P, Q ∈ f ∈ G und ϕ die Isometrie, die Q auf P und P auf P ∈ f mit -d( P, P - ) = d(P, Q ) abbildet und die Halbebenen bzgl. f erhält. +d(P, P +) = d(P, Q) abbildet und die Halbebenen bzgl. f erhält. 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE Q hf gP -Abbildung 4.8:Situation ausProposition 4.3 -Annahme: ϕ(g ) ∩ g = ∅ -⇒ Es gibt einen Schnittpunkt { R } = ϕ( g ) ∩ g . -Dann ist ∠RQP = ∠ RQP - < ∠ RP P - nachBemerkung 66und ∠RQP = ∠ RP P - , weil -ϕ( ∠ RQP ) = ∠ RP P - . +Abbildung 4.8: Situation aus Proposition 4.3 +Annahme: ϕ(g) ∩ g = ∅ +⇒ Es gibt einen Schnittpunkt { R } = ϕ(g) ∩ g. +Dann ist ∠RQP = ∠RQP + < ∠RP P + nach Bemerkung 66 und ∠RQP = ∠RP P +, weil +ϕ(∠RQP ) = ∠RP P +. ⇒ Widerspruch -⇒ ϕ (g ) ∩ g = ∅ +⇒ ϕ(g) ∩ g = ∅ Folgerung 4.4 -Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π . -D. h. es gibt eine Isometrie ϕ mit ϕ(Q) = P und ϕ (QP + - ) = P R + - , sodass ϕ(R ) in der gleichen -Halbebene bzgl. P Q liegt wie R . -Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln ist π , d. h. die +Die Summe zweier Innenwinkel in einem Dreieck ist kleiner als π. +D. h. es gibt eine Isometrie ϕ mit ϕ(Q) = P und ϕ(QP + +) = P R+ +, sodass ϕ(R) in der gleichen +Halbebene bzgl. P Q liegt wie R. +Beweis: Die Summe eines Innenwinkels mit den anliegenden Außenwinkeln ist π, d. h. die beiden Halbgeraden bilden eine Gerade. Abbildung 4.9: In der sphärischen Geometrie gibt es, im Gegensatz zur euklidischen Geometrie, Dreiecke mit drei 90◦ - -Winkeln. +-Winkeln. Proposition 4.5 -In einer Geometrie mit den Axiomen§1-§4ist in jedem Dreieck die Summe der -Innenwinkel ≤ π . +In einer Geometrie mit den Axiomen §1 - §4 ist in jedem Dreieck die Summe der +Innenwinkel ≤ π. 4.1. AXIOME FÜR DIE EUKLIDISCHE EBENE Sei im Folgenden „ IWS“ die „Innenwinkelsumme“. Beweis: Sei ein Dreieck mit IWS() = π + ε αβ γ P -(a)Summe der Winkel α , β und γ α - 1 +(a) Summe der Winkel α, β und γ α +1 α - 2 βγ +2 βγ M A BC A α -(b)Situation ausProposition 4.5 -Abbildung 4.10:Situation ausProposition 4.5 +(b) Situation aus Proposition 4.5 +Abbildung 4.10: Situation aus Proposition 4.5 Sei α ein Innenwinkel von . Beh.: Es gibt ein Dreieck mit IWS( - ) = IWS( ) und einem Innenwinkel α +) = IWS() und einem Innenwinkel α ≤ α 2 . Dann gibt es für jedes n ein -n mit IWS ( -n ) = IWS() und Innenwinkel α +n mit IWS( +n) = IWS() und Innenwinkel α ≤ α -2 n . Für +2n . Für α 2n < ε ist dann die Summe der beiden Innenwinkel um n größer als π ⇒ Widerspruch -zuFolgerung 4.4. +zu Folgerung 4.4. Beweis: Es seien A, B , C ∈ X und das Dreieck mit den Eckpunkten A, B , C und α sei der Innenwinkel bei A, β der Innenwinkel bei B und γ der Innenwinkel bei C . Sei M der Mittelpunkt der Strecke B C . Sei außerdem α - 1 = ∠ C AM und α - 2 = ∠ B AM . +1 = ∠C AM und α +2 = ∠B AM . Sei weiter A - ∈ M A − + ∈ M A− mit d(A - , M ) = d( A, M ). -Die Situation ist inAbbildung 4.10bskizziert. -⇒ ( M A - C ) und ( M AB ) sind kongruent. ⇒ ∠ AB M = ∠ A - C M und ∠ M A - C = +, M ) = d(A, M ). +Die Situation ist in Abbildung 4.10b skizziert. +⇒ (M A +C ) und (M AB ) sind kongruent. ⇒ ∠AB M = ∠A +C M und ∠M A +C = ∠M AB . ⇒ α + β + γ = IWS(AB C ) = IWS(AA - C ) und α - 1 + α -2 = α , also o. B. d. A. +C ) und α +1 + α +2 = α, also o. B. d. A. α 1 ≤ α 2 Bemerkung 67 -In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π . +In einer euklidischen Ebene ist in jedem Dreieck die Innenwinkelsumme gleich π. α α α ββ - γ +γ A BC g -Abbildung 4.11:Situation ausBemerkung 67 +Abbildung 4.11: Situation aus Bemerkung 67 Beweis: Sei g eine Parallele von AB durch C . • Es gilt α - = α wegenProposition 4.3. + = α wegen Proposition 4.3. • Es gilt β - = β wegenProposition 4.3. + = β wegen Proposition 4.3. • Es gilt α = α - wegenAufgabe 8. + wegen Aufgabe 8. 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE ⇒ IWS(AB C ) = γ + α + β @@ -4981,11 +4984,11 @@ In ähnlichen Dreiecken sind Verhältnisse entsprechender Seiten gleich. xy −1 0 1 2 3 40123 z - x λ 2 - z -λ 2 - x -Abbildung 4.12:Strahlensatz + x λ2 +z +λ2 +x +Abbildung 4.12: Strahlensatz Der Beweis wird hier nicht geführt. Für Beweisvorschläge wäre ich dankbar. A B C @@ -4993,17 +4996,17 @@ B C cb a c b a -Abbildung 4.13:Die Dreiecke AB C und AB - C +Abbildung 4.13: Die Dreiecke AB C und AB +C sind ähnlich. 4.2.1 Flächeninhalt Definition 62 -„Simplizialkomplexe“ in euklidischer Ebene ( X, d) heißen flächengleich , wenn sie sich in +„Simplizialkomplexe“ in euklidischer Ebene (X, d) heißen flächengleich, wenn sie sich in kongruente Dreiecke zerlegen lassen. 4.2. WEITERE EIGENSCHAFTEN EINER EUKLIDISCHEN EBENE -(a)Zwei kongruente Dreiecke (b) Zwei weitere kongruente Dreiecke +(a) Zwei kongruente Dreiecke (b) Zwei weitere kongruente Dreiecke -Abbildung 4.14:Flächengleichheit +Abbildung 4.14: Flächengleichheit Der Flächeninhalt eines Dreiecks ist 1 /2 · Grundseite · Höhe. A BC @@ -5012,27 +5015,27 @@ C h c c (a) 1 -/2 · | AB | · |h - c | · +/2 · |AB| · |h +c| · A BC L A h -a c +ac (b) 1 -/2 · | BC | · |h - a | -Abbildung 4.15:Flächenberechnung im Dreieck +/2 · |BC | · |h +a| +Abbildung 4.15: Flächenberechnung im Dreieck Zu zeigen: Unabhängigkeit von der gewählten Grundseite. α α γ γ A BC L - A +A L C Abbildung 4.16: AB L - a und C L +a und C L C B sind ähnlich, weil IWS = π Strahlensatz =======⇒ a @@ -5046,10 +5049,10 @@ Satz 4.7 (Satz des Pythagoras) Im rechtwinkligen Dreieck gilt a2 + b2 = c2 - , wobei c die Hypotenuse und a, b die beiden +, wobei c die Hypotenuse und a, b die beiden Katheten sind. -Beweis: (a + b) · (a + b) = a 2 - + 2 ab + b2 +Beweis: (a + b) · (a + b) = a2 + + 2ab + b2 = c2 + 4 · ( 1 2 · a · b) @@ -5063,37 +5066,37 @@ b a · ··· γ - (b)Beweisskizze -Abbildung 4.17:Satz des Pythagoras + (b) Beweisskizze +Abbildung 4.17: Satz des Pythagoras Satz 4.8 -Bis auf Isometrie gibt es genau eine euklidische Ebene ( X, d, G ) , nämlich X = R 2 - , +Bis auf Isometrie gibt es genau eine euklidische Ebene (X, d, G), nämlich X = R2 +, d = euklidischer Abstand, G = Menge der üblichen Geraden. Beweis: -(i) (R 2 - , d -Euklid ) ist offensichtlich eine euklidische Ebene. +(i) (R2 +, d +Euklid) ist offensichtlich eine euklidische Ebene. (ii) Sei (X, d) eine euklidische Ebene und g - 1 , g +1, g 2 Geraden in X , die sich in einem Punkt 0 im rechten Winkel schneiden. -Sei P ∈ X \ ( g +Sei P ∈ X \ (g 1 ∪ g -2 ) ein Punkt und P - X der Fußpunkt des Lots von P auf g +2) ein Punkt und P +X der Fußpunkt des Lots von P auf g 1 (vgl. Aufgabe 9 (c)) und P - Y der Fußpunkt des Lots von P auf g -2 . +Y der Fußpunkt des Lots von P auf g +2. Sei x - P := d(P - X , 0) und y P := d(P - Y , 0). -InAbbildung 4.19wurde die Situation skizziert. +X , 0) und y +P := d(P +Y , 0). +In Abbildung 4.19 wurde die Situation skizziert. Sei h : X → R2 - eine Abbildung mit h ( P ) := ( x - P , y + eine Abbildung mit h(P ) := (x +P , y P ) Dadurch wird h auf dem Quadranten definiert, in dem P liegt, d. h. ∀Q ∈ X mit P Q ∩ g @@ -5103,274 +5106,275 @@ Fortsetzung auf ganz X durch konsistente Vorzeichenwahl. Im Folgenden werden zwei Aussagen gezeigt: (i) h ist surjektiv (ii) h ist eine Isometrie -Da jede Isometrie injektiv ist, folgt aus(i)und(ii), dass h bijektiv ist. +Da jede Isometrie injektiv ist, folgt aus (i) und (ii), dass h bijektiv ist. Nun zu den Beweisen der Teilaussagen: 4.3. HYPERBOLISCHE GEOMETRIE · g 1g - 2 +2 PX - (a)Schritt 1 · + (a) Schritt 1 · g - 1g - 2 +1g +2 x Py P P 0 P - XP - YX - (b)Schritt 2 -Abbildung 4.18:Beweis zuSatz 4.8 -(i) Sei ( x, y ) ∈ R 2 - , z. B. x ≥ 0 , y ≥ 0 . Sei P +XP +YX + (b) Schritt 2 +Abbildung 4.18: Beweis zu Satz 4.8 +(i) Sei (x, y) ∈ R2 +, z. B. x ≥ 0, y ≥ 0. Sei P ∈ g -1 mit d(0 , P - ) = x und P +1 mit d(0, P +) = x und P auf der gleichen Seite von g - 2 wie P . +2 wie P . g 1g - 2 +2 x Py - P P Q +P P Q 0 R X -Abbildung 4.19:Beweis zuSatz 4.8 -(ii)Zu Zeigen: d(P, Q ) = d( h( P ), h(Q )) -d( P, Q )2 Pythagoras -= d(P, R ) 2 - + d( R, Q )2 +Abbildung 4.19: Beweis zu Satz 4.8 +(ii) Zu Zeigen: d(P, Q) = d(h(P ), h(Q)) +d(P, Q)2 Pythagoras += d(P, R)2 + + d(R, Q)2 = (y - Q − y +Q − y P )2 - + ( x - Q − x - P )2 - . -h( Q) = (x - Q , y -Q ) + + (x +Q − x +P )2 +. +h(Q) = (x +Q, y +Q) 4.3 Hyperbolische Geometrie Definition 63 Sei - H := { z ∈ C | (z ) > 0 } = - ( x, y ) ∈ R 2 + H := { z ∈ C | (z) > 0 } = + (x, y) ∈ R2 y > 0 4.3. HYPERBOLISCHE GEOMETRIE die obere Halbebene bzw. Poincaré-Halbebene und G = G - 1 ∪ G - 2 mit +1 ∪ G +2 mit G - 1 = { g -1 ⊆ H | ∃m ∈ R , r ∈ R - >0 : g +1 = { g +1 ⊆ H | ∃m ∈ R, r ∈ R +>0 : g 1 = { z ∈ H : | z − m| = r } } G - 2 = { g +2 = { g 2 ⊆ H | ∃x ∈ R : g -2 = { z ∈ H : ( z ) = x } } +2 = { z ∈ H : (z) = x } } Die Elemente aus G heißen hyperbolische Geraden. Bemerkung 68 (Eigenschaften der hyperbolischen Geraden) Die hyperbolischen Geraden erfüllen. . . -a). . . die Inzidenzaxiome§1 -b). . . das Anordnungsaxiom§3 (ii) -c). . . nicht das Parallelenaxiom§5 +a) . . . die Inzidenzaxiome §1 +b) . . . das Anordnungsaxiom §3 (ii) +c) . . . nicht das Parallelenaxiom §5 Beweis: -a)Offensichtlich sind§1 (iii)und§1 (ii)erfüllt. Für§1 (i)gilt: +a) Offensichtlich sind §1 (iii) und §1 (ii) erfüllt. Für §1 (i) gilt: Gegeben z - 1 , z +1, z 2 ∈ H Existenz: -Fall 1 (z - 1 ) = ( z - 2 ) +Fall 1 (z +1) = (z +2) ⇒ z - 1 und z - 2 liegen auf - g = { z ∈ C | ( z ) = (z - 1 ) ∧ H } -SieheAbbildung 4.20a. -Fall 2 (z - 1 ) = ( z - 2 ) +1 und z +2 liegen auf + g = { z ∈ C | (z) = (z +1) ∧ H } +Siehe Abbildung 4.20a. +Fall 2 (z +1) = (z +2) Betrachte nun z - 1 und z - 2 als Punkte in der euklidischen Ebene. Die Mittelsenkrechte - zu diesen Punkten schneidet diex -Achse. Alle Punkte auf der Mittelsenkrechten +1 und z +2 als Punkte in der euklidischen Ebene. Die Mittelsenkrechte + zu diesen Punkten schneidet die x-Achse. Alle Punkte auf der Mittelsenkrechten zu z - 1 und z - 2 sind gleich weit von z - 1 und z - 2 entfernt. Daher ist der Schnittpunkt mit -der x-Achse der Mittelpunkt eines Kreises durchz - 1 und z - 2 (vgl.Abbildung 4.20b) +1 und z +2 sind gleich weit von z +1 und z +2 entfernt. Daher ist der Schnittpunkt mit +der x-Achse der Mittelpunkt eines Kreises durch z +1 und z +2 (vgl. Abbildung 4.20b) xy −1 0 1 2 3 4 501234 Z - 1Z - 2 - (Z - 1 ) -(a)Fall 1 xy -−1 0 1 2 3 4 501234 +1Z +2 +(Z +1 ) +(a) Fall 1 xy +−1 0 1 + 2 3 4 501234 Z - 1 Z - 2 -(b)Fall 2 +1 Z +2 +(b) Fall 2 Abbildung 4.20: Zwei Punkte liegen in der hyperbolischen Geometrie immer auf genau einer Geraden -b)Sei g ∈ G - 1 ˙ +b) Sei g ∈ G +1 ˙ ∪ G - 2 eine hyperbolische Gerade. +2 eine hyperbolische Gerade. 4.3. HYPERBOLISCHE GEOMETRIE -Es existieren disjunkte Zerlegungen von H \ g : +Es existieren disjunkte Zerlegungen von H \ g: Fall 1: g = { z ∈ H z − m| = r } ∈ G - 1 +1 Dann gilt: - H = { z ∈ H z − m | < r } + H = { z ∈ H z − m| < r } =:H - 1 (Kreisinneres) ˙ -∪ { z ∈ H z − m | > r } +1 (Kreisinneres) ˙ +∪ { z ∈ H z − m| > r } -=: H +=:H 2 (Kreisäußeres) Da r > 0 ist H 1 nicht leer, da r ∈ R ist H 2 nicht leer. Fall 2: g = { z ∈ H | z = x } ∈ G - 2 +2 Die disjunkte Zerlegung ist: -H = { z ∈ H | (z ) < x } +H = { z ∈ H | (z) < x } -=: H +=:H 1 (Links) ˙ -∪ { z ∈ H | ( z ) > x } +∪ { z ∈ H | (z) > x } -=: H - 2 (Rechts) +=:H +2 (Rechts) Zu zeigen: ∀A ∈ H -i , B ∈ H +i, B ∈ H j mit i, j ∈ { 1, 2 } gilt: AB ∩ g = ∅ ⇔ i = j -„ ⇐ “: A ∈ H - 1 , B ∈ H +„ ⇐“: A ∈ H +1, B ∈ H 2 : AB ∩ g = ∅ Da d H stetig ist, folgt diese Richtung direkt. Alle Punkte in H 1 haben einen Abstand von m der kleiner ist als r und alle Punkte in H 2 haben einen Abstand von m der -größer ist als r . Da man jede Strecke von A nach B insbesondere auch als stetige +größer ist als r. Da man jede Strecke von A nach B insbesondere auch als stetige Abbildung f : R → R -> 0 auffassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g = ∅ -„ ⇒ “: A ∈ H - i , B ∈ H - j mit i, j ∈ { 1 , 2 } : AB ∩ g = ∅ ⇒ i = j +>0 auffassen kann, greift der Zwischenwertsatz ⇒ AB ∩ g = ∅ +„ ⇒“: A ∈ H +i, B ∈ H +j mit i, j ∈ { 1, 2 } : AB ∩ g = ∅ ⇒ i = j Sei h die Gerade, die durch A und B geht. -Da A, B /∈ g , aber A, B ∈ h gilt, haben g und h insbesondere mindestens einen -unterschiedlichen Punkt. Aus§1 (i)folgt, dass sich g und h in höchstens einen Punkt +Da A, B /∈ g, aber A, B ∈ h gilt, haben g und h insbesondere mindestens einen +unterschiedlichen Punkt. Aus §1 (i) folgt, dass sich g und h in höchstens einen Punkt schneiden. Sei C dieser Punkt. Aus A, B /∈ g folgt: C = A und C = B . Also liegt C zwischen A und B . Daraus folgt, dass A und B bzgl. g in verschiedenen Halbebenen liegen. -c)SieheAbbildung 4.21. +c) Siehe Abbildung 4.21. xy −5 −4 −3 −2 −1 0 1 2 3 4 5 6012345 -Abbildung 4.21:Hyperbolische Geraden erfüllen§5nicht. +Abbildung 4.21: Hyperbolische Geraden erfüllen §5 nicht. 4.3. HYPERBOLISCHE GEOMETRIE Definition 64 Es seien a, b, c, d ∈ R mit ad − bc = 0 und σ : C → C eine Abbildung definiert durch -σ (z ) := az + b +σ(z) := az + b cz + d σ heißt Möbiustransformation. Proposition 4.9 -a)Die Gruppe SL -2 (R ) operiert auf H durch die Möbiustransformation -σ (z ) := +a) Die Gruppe SL +2(R) operiert auf H durch die Möbiustransformation +σ(z) := a b c d ◦ z := az + b cz + d -b)Die Gruppe PSL -2 ( R) = SL -2 ( R )/ - (±I ) operiert durch σ auf H. +b) Die Gruppe PSL +2(R) = SL +2(R)/ +(±I ) operiert durch σ auf H. c) PSL -2 (R ) operiert auf R ∪ { ∞ }. Diese Gruppenoperation ist 3-fach transitiv, d. h. +2(R) operiert auf R ∪ { ∞ }. Diese Gruppenoperation ist 3-fach transitiv, d. h. zu x - 0 < x - 1 < x +0 < x +1 < x ∞ ∈ R gibt es genau ein σ ∈ PSL -2 ( R ) mit σ ( x - 0 ) = 0 , σ ( x -1 ) = 1 , -σ (x - ∞ ) = ∞. +2(R) mit σ(x +0) = 0, σ(x +1) = 1, +σ(x +∞) = ∞. d) SL -2 (R ) wird von den Matrizen +2(R) wird von den Matrizen λ 0 -0 λ− 1 +0 λ−1 -=: A - λ , +=:A +λ , 1 t 0 1 -=: B +=:B t und 0 1 −1 0 -=: C mit t, λ ∈ R × +=:C mit t, λ ∈ R× erzeugt. e) PSL -2 ( R) operiert auf G . +2(R) operiert auf G. Beweis: -a)Sei z = x + i y ∈ H, d. h. y > 0 und σ = - a b +a) Sei z = x + i y ∈ H, d. h. y > 0 und σ = +a b c d ∈ SL -2 ( R ) -⇒ σ (z ) = a(x + i y ) + b -c( x + i y ) + d +2(R) +⇒ σ(z) = a(x + i y) + b +c(x + i y) + d = (ax + b) + i ay -(cx + d) + i cy · ( cx + d) − i cy +(cx + d) + i cy · (cx + d) − i cy (cx + d) − i cy = (ax + b)(cx + d) + aycy -(cx + d) 2 - + ( cy ) 2 + i ay ( cx + d) − (ax + b) cy (cx + d)2 - + ( cy )2 + + (cy)2 + i ay(cx + d) − (ax + b)cy +(cx + d)2 + + (cy)2 = axcx + axd + bcx + bd + aycy -(cx + d) 2 - + ( cy )2 + i (ad − bc)y -( cx + d)2 - + ( cy ) 2 +(cx + d)2 + + (cy)2 + i (ad − bc)y +(cx + d)2 + + (cy)2 SL -2 (R ) -= ac( x2 - + y 2 - ) + adx + bcx + bd -(cx + d) 2 - + ( cy )2 + i y +2(R) += ac(x2 + + y2 +) + adx + bcx + bd (cx + d)2 - + ( cy )2 -⇒ ( σ (z )) = y -(cx +d )2 - +( cy )2 > 0 + + (cy)2 + i y +(cx + d)2 + + (cy)2 +⇒ (σ(z)) = y +(cx+d)2 ++(cy)2 > 0 Die Abbildung bildet also nach H ab. Außerdem gilt: 1 0 @@ -5392,48 +5396,48 @@ c a b c d ◦ a - z + b +z + b c - z + d +z + d = a a - z + b +z+b c - z +d + b +z+d + b c a - z + b +z+b c - z +d + d +z+d + d = a(a - z + b - )+b (c - z +d - ) +z+b +)+b(c +z+d +) c - z + d -c (a - z +b - )+ d(c - z + d - ) +z+d +c(a +z+b +)+d(c +z+d +) c - z + d -= a (a - z + b - ) + b( c - z + d - ) +z+d += a(a +z + b +) + b(c +z + d +) c(a - z + b - ) + d(c - z + d - ) -= ( aa +z + b +) + d(c +z + d +) += (aa + bc - )z + ab +)z + ab + bd (ca + db - )z + cb +)z + cb + dd = aa @@ -5449,101 +5453,101 @@ ca a b c d · - a +a b c d ◦ z -b)Es gilt σ (z ) = (−σ )(z ) für alle σ ∈ SL -2 ( R ) und z ∈ H. -c)Ansatz: σ = +b) Es gilt σ(z) = (−σ)(z) für alle σ ∈ SL +2(R) und z ∈ H. +c) Ansatz: σ = a b c d - σ (x - 0 ) = ax -0 + b + σ(x +0) = ax +0+b cx - 0 + d ! +0+d ! = 0 ⇒ ax - 0 + b = 0 ⇒ b = −ax +0 + b = 0 ⇒ b = −ax 0 -σ (x - ∞ ) = ∞ ⇒ cx +σ(x +∞) = ∞ ⇒ cx ∞ + d = 0 ⇒ d = −cx ∞ -σ (x - 1 ) = 1 ⇒ ax - 1 + b = cx +σ(x +1) = 1 ⇒ ax +1 + b = cx 1 + d -a( x - 1 − x - 0 ) = c(x - 1 − x - ∞ ) ⇒ c = a x - 1 − x +a(x +1 − x +0) = c(x +1 − x +∞) ⇒ c = a x +1−x 0 x - 1 −x - ∞ -⇒ −a 2 +1−x +∞ +⇒ −a2 · x ∞ x - 1 −x - 0 +1−x +0 x - 1 − x - ∞ + a2 - x - 0 x -1 − x - 0 +1−x +∞ + a2 x - 1 −x - ∞ = 1 -⇒ a 2 x - 1 − x +0 x +1−x 0 x -0 −x - ∞ ( x - 0 − x - ∞ ) = 1 ⇒ a 2 +1−x +∞ = 1 +⇒ a2 x +1−x +0 +x +0−x +∞ (x +0 − x +∞) = 1 ⇒ a2 = x - 1 −x - ∞ +1−x +∞ (x - 1 − x - ∞ )(x - 1 − x -0 ) -d)Es gilt: - A −1 +1−x +∞)(x +1−x +0) +d) Es gilt: + A−1 λ = A 1 λ B −1 t = B -− t +−t C −1 = C 3 Daher genügt es zu zeigen, dass man mit A - λ , B +λ, B t und C alle Matrizen aus SL -2 ( R ) +2(R) erzeugen kann, genügt es also von einer beliebigen Matrix durch Multiplikation mit Matrizen der Form A - λ , B +λ, B t und C die Einheitsmatrix zu generieren. Sei also M = - a b +a b c d ∈ SL -2 ( R ) +2(R) beliebig. Fall 1: a = 0 Da M ∈ SL -2 (R ) ist, gilt det M = 1 = ad − bc = −bc. Daher ist insbesondere c = 0. Es +2(R) ist, gilt det M = 1 = ad − bc = −bc. Daher ist insbesondere c = 0. Es folgt: 0 1 @@ -5583,505 +5587,503 @@ c d 1 0 c d − bc Da wir det M = 1 = ad − bc = d − bc wissen, gilt sogar M - 2, 2 = 1. +2,2 = 1. Gehe zu Fall 4. Fall 4: a = 1, b = 0, d = 1 A - − 1 C B -c C +−1C B +cC 1 0 c 1 = 1 0 0 1 Daher erzeugen Matrizen der Form A - λ , B - t und C die Gruppe SL -2 R . -e)Es genügt die Aussage für Matrizen ausProposition 4.9 (d)zu zeigen. +λ, B +t und C die Gruppe SL +2R. +e) Es genügt die Aussage für Matrizen aus Proposition 4.9 (d) zu zeigen. • σ = - λ 0 -0 λ −1 -, also σ ( z ) = λ 2 - z . Daraus ergeben sich die Situationen, die in -Abbildung 4.22aundAbbildung 4.22bdargestellt sind. +λ 0 +0 λ−1 +, also σ(z) = λ2 +z. Daraus ergeben sich die Situationen, die in +Abbildung 4.22a und Abbildung 4.22b dargestellt sind. xy −1 0 1 2 3 4 5 6 70123 m λ2 - mm + i rλ2 - m + i λ2 - r +mm + irλ2 +m + iλ2 +r m + 1 -(a)Fall 1 xy +(a) Fall 1 xy −1 0 1 2 3 40123 z - x λ 2 + x λ2 z -λ 2 +λ2 x -(b)Fall 2 (Strahlensatz) -Abbildung 4.22:Beweis vonProposition 4.9 (e)für eine Diagonalmatrix +(b) Fall 2 (Strahlensatz) +Abbildung 4.22: Beweis von Proposition 4.9 (e) für eine Diagonalmatrix • Offensichtlich gilt die Aussage für σ = 1 a 0 1 • Sei nun σ = 0 1 −1 0 -, also σ (z ) = − 1 +, also σ(z) = − 1 z Bemerkung 69 Zu hyperbolischen Geraden g -1 , g +1, g 2 gibt es σ ∈ PSL -2 ( R) mit σ (g -1 ) = g -2 . +2(R) mit σ(g +1) = g +2. 4.3. HYPERBOLISCHE GEOMETRIE · xy −1 0 101 z = r · eiϕ 1 z = 1 -r · e iϕ -Abbildung 4.23:Inversion am Kreis -Beweis: NachProposition 4.9 (c)gibt es σ mit σ ( a -1 ) = b -1 und σ ( a -2 ) = b -2 . Dann existiert -σ (g - 1 ) := g -2 wegen dem Inzidenzaxiom§1und ist eindeutig bestimmt. +r · eiϕ +Abbildung 4.23: Inversion am Kreis +Beweis: Nach Proposition 4.9 (c) gibt es σ mit σ(a +1) = b +1 und σ(a +2) = b +2. Dann existiert +σ(g +1) := g +2 wegen dem Inzidenzaxiom §1 und ist eindeutig bestimmt. Definition 65 Seien z - 1 , z -2 , z -3 , z +1, z +2, z +3, z 4 ∈ C paarweise verschieden. Dann heißt - DV( z - 1 , z -2 , z -3 , z -4 ) := z -1 −z - 4 + DV(z +1, z +2, z +3, z +4) := z +1−z +4 z - 1 −z - 2 +1−z +2 z - 3 −z - 4 +3−z +4 z - 3 −z - 2 = (z - 1 − z - 4 ) · (z - 3 − z - 2 ) +3−z +2 = (z +1 − z +4) · (z +3 − z +2) (z - 1 − z - 2 ) · (z - 3 − z - 4 ) +1 − z +2) · (z +3 − z +4) Doppelverhältnis von z - 1 , . . . , z - 4 . +1, . . . , z +4. Bemerkung 70 (Eigenschaften des Doppelverhältnisses) -a) DV( z - 1 , . . . , z - 4 ) ∈ C \ { 0 , 1 } -b) DV( z - 1 , z -4 , z -3 , z -2 ) = 1 -DV( z - 1 ,z - 2 ,z - 3 ,z - 4 ) -c) DV( z - 3 , z -2 , z -1 , z -4 ) = 1 -DV( z - 1 ,z - 2 ,z - 3 ,z - 4 ) +a) DV(z +1, . . . , z +4) ∈ C \ { 0, 1 } +b) DV(z +1, z +4, z +3, z +2) = 1 +DV(z +1,z +2,z +3,z +4) +c) DV(z +3, z +2, z +1, z +4) = 1 +DV(z +1,z +2,z +3,z +4) d) DV ist auch wohldefiniert, wenn eines der z - i = ∞ oder wenn zwei der z - i gleich sind. -e) DV(0 , 1, ∞, z -4 ) = z - 4 (Der Fall z - 4 ∈ { 0 , 1 , ∞ } ist zugelassen). -f )Für σ ∈ PSL -2 (C ) und z - 1 , . . . , z - 4 ∈ C ∪ { ∞ } ist -DV( σ (z - 1 ), σ (z - 2 ), σ ( z - 3 ) , σ ( z - 4 )) = DV(z - 1 , z -2 , z -3 , z -4 ) -und für σ (z ) = 1 +i = ∞ oder wenn zwei der z +i gleich sind. +e) DV(0, 1, ∞, z +4) = z +4 (Der Fall z +4 ∈ { 0, 1, ∞ } ist zugelassen). +f ) Für σ ∈ PSL +2(C) und z +1, . . . , z +4 ∈ C ∪ { ∞ } ist +DV(σ(z +1), σ(z +2), σ(z +3), σ(z +4)) = DV(z +1, z +2, z +3, z +4) +und für σ(z) = 1 z gilt -DV( σ (z - 1 ), σ (z - 2 ), σ ( z - 3 ) , σ ( z - 4 )) = - DV( z - 1 , z -2 , z -3 , z -4 ) -g) DV( z - 1 , z -2 , z -3 , z -4 ) ∈ R ∪ { ∞ } ⇔ z - 1 , . . . , z - 4 liegen auf einer hyperbolischen Geraden. +DV(σ(z +1), σ(z +2), σ(z +3), σ(z +4)) = DV(z +1, z +2, z +3, z +4) +g) DV(z +1, z +2, z +3, z +4) ∈ R ∪ { ∞ } ⇔ z +1, . . . , z +4 liegen auf einer hyperbolischen Geraden. Beweis: -a) DV( z - 1 , . . . , z - 4 ) = 0, da z - i paarweise verschieden -DV( z - 1 , . . . , z - 4 ) = 1, da: -Annahme: DV( z - 1 , . . . , z - 4 ) = 1 +a) DV(z +1, . . . , z +4) = 0, da z +i paarweise verschieden +DV(z +1, . . . , z +4) = 1, da: +Annahme: DV(z +1, . . . , z +4) = 1 ⇔ (z - 1 − z - 2 )(z - 3 − z - 4 ) = ( z - 1 − z - 4 )(z - 3 − z - 2 ) +1 − z +2)(z +3 − z +4) = (z +1 − z +4)(z +3 − z +2) 4.3. HYPERBOLISCHE GEOMETRIE ⇔ z - 1 z - 3 − z - 2 z - 3 − z - 1 z - 4 + z - 2 z - 4 = z - 1 z - 3 − z - 3 z - 4 − z - 1 z - 2 + z - 2 z - 4 +1z +3 − z +2z +3 − z +1z +4 + z +2z +4 = z +1z +3 − z +3z +4 − z +1z +2 + z +2z +4 ⇔ z - 2 z - 3 + z - 1 z - 4 = z - 3 z - 4 + z - 1 z - 2 +2z +3 + z +1z +4 = z +3z +4 + z +1z +2 ⇔ z - 2 z - 3 − z - 3 z - 4 = z - 1 z - 2 − z - 1 z - 4 +2z +3 − z +3z +4 = z +1z +2 − z +1z +4 ⇔ z - 3 (z - 2 − z - 4 ) = z - 1 ( z - 2 − z - 4 ) +3(z +2 − z +4) = z +1(z +2 − z +4) ⇔ z - 3 = z - 1 oder z - 2 = z - 4 +3 = z +1 oder z +2 = z +4 Alle z - i sind paarweise verschieden ⇒ Widerspruch -b) DV( z - 1 , z -4 , z -3 , z -2 ) = (z - 1 − z - 2 )· ( z - 3 −z - 4 ) +i sind paarweise verschieden ⇒ Widerspruch +b) DV(z +1, z +4, z +3, z +2) = (z +1−z +2)·(z +3−z +4) (z - 1 − z - 4 )· ( z - 3 −z - 2 ) = 1 -DV( z - 1 ,z - 2 ,z - 3 ,z - 4 ) -c) DV( z - 3 , z -2 , z -1 , z -4 ) = (z - 3 − z - 4 )· ( z - 1 −z - 2 ) +1−z +4)·(z +3−z +2) = 1 +DV(z +1,z +2,z +3,z +4) +c) DV(z +3, z +2, z +1, z +4) = (z +3−z +4)·(z +1−z +2) (z - 3 − z - 2 )· ( z - 1 −z - 4 ) = 1 -DV( z - 1 ,z - 2 ,z - 3 ,z - 4 ) -d)Zwei der z - i dürfen gleich sein, da: +3−z +2)·(z +1−z +4) = 1 +DV(z +1,z +2,z +3,z +4) +d) Zwei der z +i dürfen gleich sein, da: Fall 1 z - 1 = z - 4 oder z - 3 = z - 2 -In diesem Fall ist DV( z - 1 , . . . , z - 4 ) = 0 +1 = z +4 oder z +3 = z +2 +In diesem Fall ist DV(z +1, . . . , z +4) = 0 Fall 2 z - 1 = z - 2 oder z - 3 = z - 4 -Mit der Regel von L’Hospital folgt, dass in diesem Fall DV (z - 1 , . . . , z - 4 ) = ∞ gilt. +1 = z +2 oder z +3 = z +4 +Mit der Regel von L’Hospital folgt, dass in diesem Fall DV(z +1, . . . , z +4) = ∞ gilt. Fall 3 z - 1 = z - 3 oder z - 2 = z - 4 -Durch Einsetzen ergibt sich DV( z - 1 , . . . , z - 4 ) = 1 . +1 = z +3 oder z +2 = z +4 +Durch Einsetzen ergibt sich DV(z +1, . . . , z +4) = 1. Im Fall, dass ein z - i = ∞ ist, ist entweder DV (0, 1, ∞, z -4 ) = 0 oder DV (0, 1, ∞ , z -4 ) ± ∞ -e) DV(0 , 1 , ∞, z -4 ) = (0 −z - 4 ) · (∞− 1) -(0 −1) · (∞− z - 4 ) = z - 4 · ( ∞−1) -∞− z - 4 = z - 4 -f )Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. +i = ∞ ist, ist entweder DV(0, 1, ∞, z +4) = 0 oder DV(0, 1, ∞, z +4) ± ∞ +e) DV(0, 1, ∞, z +4) = (0−z +4)·(∞−1) +(0−1)·(∞−z +4) = z +4·(∞−1) +∞−z +4 = z +4 +f ) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. g) Sei σ ∈ PSL -2 (C ) mit σ (z - 1 ) = 0, σ (z - 2 ) = 1, σ (z - 3 ) = ∞. Ein solches σ existiert, da man +2(C) mit σ(z +1) = 0, σ(z +2) = 1, σ(z +3) = ∞. Ein solches σ existiert, da man drei Parameter von σ wählen darf. Bem. 70.f -⇒ DV( z - 1 , . . . , z - 4 ) = DV(0, 1 , ∞, σ (z - 4 )) -⇒ DV( z - 1 , . . . , z - 4 ) ∈ R ∪ { ∞ } -⇔ σ ( z - 4 ) ∈ R ∪ { ∞ } -Behauptung folgt, weil σ − 1 - (R ∪ ∞ ) ein Kreis oder eine Gerade in C ist. +⇒ DV(z +1, . . . , z +4) = DV(0, 1, ∞, σ(z +4)) +⇒ DV(z +1, . . . , z +4) ∈ R ∪ { ∞ } +⇔ σ(z +4) ∈ R ∪ { ∞ } +Behauptung folgt, weil σ−1 +(R ∪ ∞) ein Kreis oder eine Gerade in C ist. Definition 66 Für z - 1 , z +1, z 2 ∈ H sei g z - 1 ,z - 2 die eindeutige hyperbolische Gerade durch z - 1 und z - 2 und a - 1 , a +1,z +2 die eindeutige hyperbolische Gerade durch z +1 und z +2 und a +1, a 2 die „Schnittpunkte“ von g z - 1 ,z - 2 mit R ∪ { ∞ }. +1,z +2 mit R ∪ { ∞ }. Dann sei d -H (z - 1 , z -2 ) := 1 +H(z +1, z +2) := 1 2 | ln DV(a -1 , z -1 , a -2 , z -2 )| und heiße hyperbolische Metrik. -Beh.: - Für z - 1 , z +1, z +1, a +2, z +2)| und heiße hyperbolische Metrik. +Beh.: Für z +1, z 2 ∈ H sei g z - 1 ,z - 2 die eindeutige hyperbolische Gerade durch z - 1 und z - 2 und a - 1 , a +1,z +2 die eindeutige hyperbolische Gerade durch z +1 und z +2 und a +1, a 2 die „Schnittpunkte“ von g z - 1 ,z - 2 mit R ∪ { ∞ }. +1,z +2 mit R ∪ { ∞ }. Dann gilt: 1 2 | ln DV(a - 1 , z -1 , a -2 , z -2 ) | = 1 +1, z +1, a +2, z +2)| = 1 2 | ln DV(a -2 , z -1 , a -1 , z -2 )| -Beweis: WegenBemerkung 70.cgilt: -DV( a - 1 , z -1 , a -2 , z -2 ) = 1 -DV( a -2 , z -1 , a -1 , z -2 ) +2, z +1, a +1, z +2)| +Beweis: Wegen Bemerkung 70.c gilt: +DV(a +1, z +1, a +2, z +2) = 1 +DV(a +2, z +1, a +1, z +2) Außerdem gilt: ln 1 -x = ln x − 1 +x = ln x−1 = (−1) · ln x = − ln x 4.3. HYPERBOLISCHE GEOMETRIE Da der ln im Betrag steht, folgt direkt: 1 2 | ln DV(a - 1 , z -1 , a -2 , z -2 ) | = 1 +1, z +1, a +2, z +2)| = 1 2 | ln DV(a -2 , z -1 , a -1 , z -2 )| -Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x -Achse im Doppelverhältnis +2, z +1, a +1, z +2)| +Es ist also egal in welcher Reihenfolge die „Schnittpunkte“ mit der x-Achse im Doppelverhältnis genutzt werden. Beh.: Die hyperbolische Metrik ist eine Metrik auf H. -Beweis: WegenBemerkung 70.fist -d( z - 1 , z -2 ) := d( σ (z - 1 ), σ (z - 2 )) mit σ ( a -1 ) = 0, σ (a -2 ) = ∞ -d. h. σ ( g +Beweis: Wegen Bemerkung 70.f ist +d(z +1, z +2) := d(σ(z +1), σ(z +2)) mit σ(a +1) = 0, σ(a +2) = ∞ +d. h. σ(g z - 1 ,z - 2 ) = i R (imaginäre Achse). +1,z +2 ) = i R (imaginäre Achse). also gilt o. B. d. A. z - 1 = i a und z - 2 = i b mit a, b ∈ R und a < b. -2d( i a, i b) =| ln DV(0, i a, ∞, i b ) | +1 = i a und z +2 = i b mit a, b ∈ R und a < b. +2d(i a, i b) =| ln DV(0, i a, ∞, i b) | =| ln (0 − i b)(∞ − i a) (0 − i a)(∞ − i b) | =| ln b a | = ln b − ln a Also: d(z - 1 , z -2 ) ≥ 0, d( z - 1 , z -2 ) = 0 ⇔ z - 1 = z - 2 -2d( z - 2 , z -1 ) =| ln DV(a -2 , z -2 , a -1 , z -1 ) | -=| ln DV(∞, i b, 0, i a ) | +1, z +2) ≥ 0, d(z +1, z +2) = 0 ⇔ z +1 = z +2 +2d(z +2, z +1) =| ln DV(a +2, z +2, a +1, z +1) | +=| ln DV(∞, i b, 0, i a) | Bem. 70.b -= | ln DV(0, i b, ∞, i a ) | += | ln DV(0, i b, ∞, i a) | = 2d(z - 1 , z -2 ) +1, z +2) Liegen drei Punkte z - 1 , z -2 , z -3 ∈ C auf einer hyperbolischen Geraden, so gilt d( z - 1 , z -3 ) = -d( z - 1 , z -2 ) + d(z - 2 , z -3 ) (wenn z - 2 zwischen z - 1 und z - 3 liegt). +1, z +2, z +3 ∈ C auf einer hyperbolischen Geraden, so gilt d(z +1, z +3) = +d(z +1, z +2) + d(z +2, z +3) (wenn z +2 zwischen z +1 und z +3 liegt). Dreiecksungleichung: Beweis ist umständlich und wird hier nicht geführt. Es sei auf die Vorlesung „Hyperbolische Geometrie“ verwiesen. Satz 4.10 Die hyperbolische Ebene H mit der hyperbolischen Metrik d und den hyperbolischen -Geraden bildet eine „nichteuklidische Geometrie“, d. h. die Axiome§1-§4sind erfüllt, -aber Axiom§5ist verletzt. +Geraden bildet eine „nichteuklidische Geometrie“, d. h. die Axiome §1 - §4 sind erfüllt, +aber Axiom §5 ist verletzt. 4.3. HYPERBOLISCHE GEOMETRIE Übungsaufgaben Aufgabe 8 Seien (X, d) eine absolute Ebene und P, Q, R ∈ X Punkte. Der Scheitelwinkel des Winkels -∠ P QR ist der Winkel, der aus den Halbgeraden QP − - und QR − +∠P QR ist der Winkel, der aus den Halbgeraden QP − + und QR− gebildet wird. Die -Nebenwinkel von ∠ P QR sind die von QP + - und QR − +Nebenwinkel von ∠P QR sind die von QP + + und QR− bzw. QP − - und QR + + und QR+ gebildeten Winkel. Zeigen Sie: -(a)Die beiden Nebenwinkel von ∠P QR sind gleich. -(b)Der Winkel ∠ P QR ist gleich seinem Scheitelwinkel. +(a) Die beiden Nebenwinkel von ∠P QR sind gleich. +(b) Der Winkel ∠P QR ist gleich seinem Scheitelwinkel. Aufgabe 9 Sei (X, d) eine absolute Ebene. Der Abstand eines Punktes P zu einer Menge Y ⊆ X von -Punkten ist definiert durch d( P, Y ) := inf d(P, y ) |y ∈ Y . +Punkten ist definiert durch d(P, Y ) := inf d(P, y)|y ∈ Y . Zeigen Sie: (a) Ist AB C ein Dreieck, in dem die Seiten AB und AC kongruent sind, so sind die -Winkel ∠ AB C und ∠ B C A gleich. +Winkel ∠AB C und ∠B C A gleich. (b) Ist AB C ein beliebiges Dreieck, so liegt der längeren Seite der größere Winkel gegenüber und umgekehrt. (c) Sind g eine Gerade und P /∈ g ein Punkt, so gibt es eine eindeutige Gerade h mit @@ -6094,96 +6096,96 @@ Aufgabe 11 Beweise den Kongruenzsatz S S S . 5 Krümmung Definition 67 -Sei f : [a, b] → R n +Sei f : [a, b] → Rn eine eine Funktion aus C ∞ - . Dann heißt f Kurve . +. Dann heißt f Kurve. 5.1 Krümmung von Kurven Definition 68 -Sei γ : I = [a, b] → R n +Sei γ : I = [a, b] → Rn eine Kurve. -a)Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt: - γ - (t ) - 2 = 1 ∀t ∈ I +a) Die Kurve γ heißt durch Bogenlänge parametrisiert, wenn gilt: +γ +(t) +2 = 1 ∀t ∈ I Dabei ist γ - (t) = (γ -1 (t ), γ -2 (t ), . . . , γ -n (t )). -b) l (γ ) = +(t) = (γ +1(t), γ +2(t), . . . , γ +n(t)). +b) l(γ ) = b -a γ - ( t) dt heißt Länge von γ . +a γ +(t)dt heißt Länge von γ . Bemerkung 71 (Eigenschaften von Kurven I) -Sei γ : I = [a, b] → R n +Sei γ : I = [a, b] → Rn eine C ∞ - -Funktion. -a)Ist γ durch Bogenlänge parametrisiert, so ist l (γ ) = b − a. -b)Ist γ durch Bogenlänge parametrisiert, so ist γ - (t ) orthogonal zu γ - ( t) für alle t ∈ I . +-Funktion. +a) Ist γ durch Bogenlänge parametrisiert, so ist l(γ ) = b − a. +b) Ist γ durch Bogenlänge parametrisiert, so ist γ +(t) orthogonal zu γ +(t) für alle t ∈ I . Beweis: -a) l (γ ) = +a) l(γ ) = b -a γ - (t ) dt = +a γ +(t)dt = b a 1dt = b − a. -b) Im Folgenden wird die Aussage nur fürγ : [a, b] → R 2 +b) Im Folgenden wird die Aussage nur für γ : [a, b] → R2 bewiesen. Allerdings funktioniert der Beweis im Rn analog. Es muss nur die Ableitung angepasst werden. 1 = γ - (t) = γ - ( t) 2 +(t) = γ +(t)2 = γ - ( t) , γ - ( t) +(t), γ +(t) ⇒ 0 = d dt γ - ( t) , γ - ( t) +(t), γ +(t) = d dt (γ -1 (t) γ -1 ( t) + γ -2 (t )γ -2 (t)) +1(t)γ +1(t) + γ +2(t)γ +2(t)) = 2 · (γ -1 ( t) · γ -1 (t ) + γ -2 ( t) · γ -2 ( t)) -= 2 · γ - ( t) , γ - ( t) +1 (t) · γ +1(t) + γ +2 (t) · γ +2(t)) += 2 · γ +(t), γ +(t) Definition 69 -Sei γ : I → R 2 +Sei γ : I → R2 eine durch Bogenlänge parametrisierte Kurve. -a)Für t ∈ I sei n ( t) Normalenvektor an γ in t wenn gilt: - n (t) , γ - ( t) = 0, n ( t) = 1 und det((γ - (t ), n(t ))) = +1 - 5.1. KRÜMMUNG VON KURVEN -b)Seit κ : I → R so, dass gilt: +a) Für t ∈ I sei n(t) Normalenvektor an γ in t wenn gilt: +n(t), γ +(t) = 0, n(t) = 1 und det((γ +(t), n(t))) = +1 + 5.1. KRÜMMUNG VON KURVEN +b) Seit κ : I → R so, dass gilt: γ - ( t) = κ( t) · n ( t) -Dann heißt κ (t ) Krümmung von γ in t . -Da n (t ) und γ - ( t) nachBemerkung 71.blinear abhängig sind, existiert κ (t) . +(t) = κ(t) · n(t) +Dann heißt κ(t) Krümmung von γ in t. +Da n(t) und γ +(t) nach Bemerkung 71.b linear abhängig sind, existiert κ(t). Beispiel 45 -Gegeben sei ein Kreis mit Radius r , d. h. mit Umfang 2πr . Es gilt: -γ (t ) = +Gegeben sei ein Kreis mit Radius r, d. h. mit Umfang 2πr. Es gilt: +γ (t) = r · cos t r , r · sin t r - für t ∈ [0, 2 πr ] + für t ∈ [0, 2πr] ist parametrisiert durch Bogenlänge, da gilt: γ - (t ) = +(t) = (r · 1 r )(− sin t -r ) , r 1 +r ), r 1 r cos t r = @@ -6191,13 +6193,13 @@ r r , cos t r Der Normalenvektor von γ in t ist -n (t) = +n(t) = − cos t r , − sin t r da gilt: - n (t ), γ - (t ) = + n(t), γ +(t) = − cos t r − sin t @@ -6209,11 +6211,11 @@ cos t r = (− cos t r ) · (− sin t -r ) + ( − sin t +r ) + (− sin t r ) · (cos t r ) = 0 - n (t ) = +n(t) = (− cos t @@ -6223,12 +6225,12 @@ r ) = (− cos t -r ) 2 - + ( − sin t +r )2 + + (− sin t r )2 = 1 det(γ -1 ( t), n(t )) = +1(t), n(t)) = @@ -6242,16 +6244,16 @@ r = (− sin t -r ) 2 - − ( − cos t +r )2 + − (− cos t r ) · cos t r = 1 Die Krümmung ist für jedes t konstant 1 r , da gilt: γ - (t ) = - − 1 +(t) = +− 1 r cos t r , − 1 r sin t @@ -6261,189 +6263,189 @@ r · − cos t r , − sin t r -⇒ κ (t ) = 1 +⇒ κ(t) = 1 r - 5.2. TANGENTIALEBENE + 5.2. TANGENTIALEBENE Definition 70 -Sei γ : I → R 3 +Sei γ : I → R3 eine durch Bogenlänge parametrisierte Kurve. -a)Für t ∈ I heißt κ( t) := γ - (t ) die Krümmung von γ in t . -b)Ist für t ∈ I die Ableitung γ - ( t) = 0, so heißt γ - ( t) - γ - ( t) Normalenvektor an γ in t. -c) b(t ) sei ein Vektor, der γ - (t), n(t) zu einer orientierten Orthonormalbasis vonR 3 +a) Für t ∈ I heißt κ(t) := γ +(t) die Krümmung von γ in t. +b) Ist für t ∈ I die Ableitung γ +(t) = 0, so heißt γ +(t) +γ +(t) Normalenvektor an γ in t. +c) b(t) sei ein Vektor, der γ +(t), n(t) zu einer orientierten Orthonormalbasis von R3 ergänzt. Also gilt: det(γ - (t ), n(t) , b( t)) = 1 -b(t ) heißt Binormalenvektor, die Orthonormalbasis +(t), n(t), b(t)) = 1 +b(t) heißt Binormalenvektor, die Orthonormalbasis γ - (t ), n(t ), b( t) +(t), n(t), b(t) heißt begleitendes Dreibein. Bemerkung 72 (Eigenschaften von Kurven II) -Sei γ : I → R 3 +Sei γ : I → R3 durch Bogenlänge parametrisierte Kurve. -a) n (t ) ist orthogonal zu γ - ( t) . -b) b(t ) ausDefinition 70.cist eindeutig. +a) n(t) ist orthogonal zu γ +(t). +b) b(t) aus Definition 70.c ist eindeutig. 5.2 Tangentialebene -Erinnerung Sie sich anDefinition 32„reguläre Fläche“. +Erinnerung Sie sich an Definition 32 „reguläre Fläche“. Äquivalent dazu ist: S ist lokal von der Form V (f ) = - x ∈ R 3 + x ∈ R3 - f ( x) = 0 + f (x) = 0 für eine C ∞ - -Funktion f : R 3 - → R . +-Funktion f : R3 + → R. Definition 71 Sei S ⊆ R3 eine reguläre Fläche, s ∈ S , F : U → V ∩ S eine lokale Parametrisierung um s ∈ V : - ( u, v ) → ( x (u, v ), y (u, v ), z (u, v )) + (u, v) → (x(u, v), y(u, v), z(u, v)) Für p = F −1 - ( s) ∈ U sei +(s) ∈ U sei J -F (p ) =  +F (p) =   ∂x -∂u (p ) ∂x -∂v (p ) +∂u (p) ∂x +∂v (p) ∂y -∂u (p ) ∂y -∂v (p ) +∂u (p) ∂y +∂v (p) ∂z -∂u (p ) ∂z -∂v (p )  +∂u (p) ∂z +∂v (p)  und D - p F : R 2 - → R 3 +pF : R2 + → R3 die durch J -F (p ) definierte lineare Abbildung. +F (p) definierte lineare Abbildung. Dann heißt T -s S := Bild(D - p F ) die Tangentialebene an s ∈ S . +sS := Bild(D +pF ) die Tangentialebene an s ∈ S . Bemerkung 73 (Eigenschaften der Tangentialebene) a) T -s S ist 2 -dimensionaler Untervektorraum von R 3 - . +sS ist 2-dimensionaler Untervektorraum von R3 +. b) T -s S = ˜u, ˜v , wobei ˜u, ˜v die Spaltenvektoren der Jacobi-Matrix J - F (p ) sind. +sS = ˜u, ˜v, wobei ˜u, ˜v die Spaltenvektoren der Jacobi-Matrix J +F (p) sind. c) T -s S hängt nicht von der gewählten Parametrisierung ab. - 5.2. TANGENTIALEBENE -d) Sei S = V ( f ) eine reguläre Fläche in R 3 - , also f : V → R eine C ∞ - -Funktion, V ⊆ R 3 -offen, grad(f )(x ) = 0 für alle x ∈ S . +sS hängt nicht von der gewählten Parametrisierung ab. + 5.2. TANGENTIALEBENE +d) Sei S = V (f ) eine reguläre Fläche in R3 +, also f : V → R eine C ∞ +-Funktion, V ⊆ R3 +offen, grad(f )(x) = 0 für alle x ∈ S . Dann ist T -s S = (grad(f )(s ))⊥ +sS = (grad(f )(s))⊥ für jedes s ∈ S . Beweis: a) J -F ist eine 3 × 2 -Matrix, die mit einem 2 × 1 -Vektor multipliziert wird. Das ist +F ist eine 3 × 2-Matrix, die mit einem 2 × 1-Vektor multipliziert wird. Das ist eine lineare Abbildung und aus der linearen Algebra ist bekannt, das das Bild ein Vektorraum ist. Da Rg(J - F ) = 2, ist auch dim(T -s S ) = 2. -b)Hier kann man wie inPunkt a)argumentieren +F ) = 2, ist auch dim(T +sS ) = 2. +b) Hier kann man wie in Punkt a) argumentieren c) T -s S - = { x ∈ R 3 - |∃parametrisierte Kurve γ : [ −ε, + ε ] → S für ein ε > 0 mit γ (0) = +sS + = {x ∈ R3 +|∃parametrisierte Kurve γ : [−ε, +ε] → S für ein ε > 0 mit γ (0) = s und γ - (0) = x } +(0) = x} Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. d) Sei x ∈ T -s S, γ : [ −ε, + ε ] → S eine parametrisierte Kurve mit ε > 0 und γ - (0) = s, +sS, γ : [−ε, +ε] → S eine parametrisierte Kurve mit ε > 0 und γ +(0) = s, sodass γ - (0) = x gilt. Da γ ( t) ∈ S für alle t ∈ [−ε, ε], ist f ◦ γ = 0 +(0) = x gilt. Da γ (t) ∈ S für alle t ∈ [−ε, ε], ist f ◦ γ = 0 ⇒ 0 = (f ◦ γ ) - (0) = grad(f )(γ (0)), γ - (0) +(0) = grad(f )(γ (0)), γ +(0) ⇒ T -s S ⊆ grad(f )(s) ⊥ +sS ⊆ grad(f )(s)⊥ dim=2 ====⇒ T -s S = (grad(f )(s ))⊥ +sS = (grad(f )(s))⊥ Definition 72 a) Ein Normalenfeld auf der regulären Fläche S ⊆ R3 ist eine Abbildung n : S → S 2 ⊆ -R 3 - mit n (s ) ∈ T -s S ⊥ +R3 + mit n(s) ∈ T +sS ⊥ für jedes s ∈ S . -b) S heißt orientierbar , wenn es ein stetiges Normalenfeld auf S gibt. +b) S heißt orientierbar, wenn es ein stetiges Normalenfeld auf S gibt. Manchmal wird zwischen einem Normalenfeld und einem Einheitsnormalenfeld unterschieden. Im Folgenden werden diese Begriffe jedoch synonym benutzt. Bemerkung 74 (Eigenschaften von Normalenfeldern) -a)Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C ∞ - ). +a) Ein Normalenfeld auf S ist genau dann stetig, wenn es glatt ist (also C ∞ +). b) Zu jedem s ∈ S gibt es eine Umgebung V ⊆ R3 von s und eine lokale Parametrisierung F : U → V von S um s, sodass auf F (U ) = V ∩ S ein stetiges Normalenfeld existiert. -c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas vonS aus lokalen +c) S ist genau dann orientierbar, wenn es einen differenzierbaren Atlas von S aus lokalen Parametrisierungen F i : U - i → V - i , i ∈ I gibt, sodass für alle i, j ∈ F und alle +i → V +i, i ∈ I gibt, sodass für alle i, j ∈ F und alle s ∈ V i ∩ V j ∩ S gilt: det(D - s V - i → V +s V +i→V j F - j ◦ F − 1 +j ◦ F −1 i -∈ R3 ×3 ) > 0 +∈R3×3 ) > 0 Beweis: Wird hier nicht geführt. Beispiel 46 (Normalenfelder) 1) S = S 2 - , n - 1 = id -S 2 ist ein stetiges Normalenfeld. +, n +1 = id +S2 ist ein stetiges Normalenfeld. Auch n - 2 = −id -S 2 ist ein stetiges Normalenfeld. -2) S = Möbiusband (vgl.Abbildung 5.1) ist nicht orientierbar. Es existiert ein Normalenfeld, +2 = −id +S2 ist ein stetiges Normalenfeld. +2) S = Möbiusband (vgl. Abbildung 5.1) ist nicht orientierbar. Es existiert ein Normalenfeld, aber kein stetiges Normalenfeld. - 5.3. GAUSS-KRÜMMUNG -Abbildung 5.1:Möbiusband + 5.3. GAUSS-KRÜMMUNG +Abbildung 5.1: Möbiusband 5.3 Gauß-Krümmung Bemerkung 75 -Sei S eine reguläre Fläche, s ∈ S , n ( s) ist ein Normalenvektor in s, x ∈ T -s S , x = 1. -Sei E der von x und n ( s) aufgespannte 2-dimensionale Untervektorraum von R 3 - . -Dann gibt es eine Umgebung V ⊆ R 3 - von s , sodass +Sei S eine reguläre Fläche, s ∈ S , n(s) ist ein Normalenvektor in s, x ∈ T +sS , x = 1. +Sei E der von x und n(s) aufgespannte 2-dimensionale Untervektorraum von R3 +. +Dann gibt es eine Umgebung V ⊆ R3 + von s, sodass C := (s + E ) ∩ S ∩ V -das Bild einer durch Bogenlänge parametrisierten Kurveγ : [−ε, ε] → S enthält mit γ (0) = s +das Bild einer durch Bogenlänge parametrisierten Kurve γ : [−ε, ε] → S enthält mit γ (0) = s und γ - (0) = x . +(0) = x. Beweis: „Satz über implizite Funktionen“ 1 Definition 73 -In der Situation ausBemerkung 75heißt die Krümmung κ - γ (0) der Kurve γ in der Ebene +In der Situation aus Bemerkung 75 heißt die Krümmung κ +γ (0) der Kurve γ in der Ebene (s + E ) im Punkt s die Normalkrümmung von S in s in Richtung x = γ - (0). +(0). Man schreibt: κ - Nor ( s, x) := κ +Nor(s, x) := κ γ (0) Hinweis: Die Krümmung ist nur bis auf das Vorzeichen bestimmt. Beispiel 47 (Gauß-Krümmung) @@ -6451,513 +6453,514 @@ Beispiel 47 (Gauß-Krümmung) = V (X 2 + Y 2 + Z 2 - − 1) ist die Kugel um den Ursprung mit Radius 1,n = id, -s = (0, 0 , 1), x = (1, 0, 0) -⇒ E = R · x + R · n (s ) (x, z -Ebene) + − 1) ist die Kugel um den Ursprung mit Radius 1, n = id, +s = (0, 0, 1), x = (1, 0, 0) +⇒ E = R · x + R · n(s) (x, z-Ebene) C = E ∩ S ist Kreislinie κ -Nor ( s, x) = 1 +Nor(s, x) = 1 r = 1 -2) S = V ( X 2 +2) S = V (X 2 + Z 2 - − 1) ⊆ R 3 - ist ein Zylinder (sieheAbbildung 5.2a). s = (1, 0, 0) + − 1) ⊆ R3 + ist ein Zylinder (siehe Abbildung 5.2a). s = (1, 0, 0) x - 1 = (0, 1, 0) ⇒ E +1 = (0, 1, 0) ⇒ E 1 = R · e 1 + R · e - 2 (x, y -Ebene) +2 (x, y-Ebene) S ∩ E 1 = V (X 2 + Y 2 − 1) ∩ E , Kreislinie in E ⇒ κ - Nor ( s, x -1 ) = ±1 +Nor(s, x +1) = ±1 x - 2 = (0, 0, 1), E +2 = (0, 0, 1), E 2 = R · e 1 + R · e - 3 (x, z -Ebene) +3 (x, z-Ebene) 1 - Siehe z. B. https://github.com/MartinThoma/LaTeX- examples/tree/master/documents/Analysis%20II - 5.3. GAUSS-KRÜMMUNG +Siehe z. B. https://github.com/MartinThoma/LaTeX- examples/tree/master/documents/Analysis%20II + 5.3. GAUSS-KRÜMMUNG V ∩ E 2 ∩ S = - (1, 0 , z ) ∈ R 3 + (1, 0, z) ∈ R3 z ∈ R ist eine Gerade ⇒ κ - Nor (s, x -2 ) = 0 +Nor(s, x +2) = 0 3) S = V (X 2 − Y 2 - − Z ), s = (0, 0, 0) (Hyperbolisches Paraboloid, sieheAbbildung 5.2b) + − Z ), s = (0, 0, 0) (Hyperbolisches Paraboloid, siehe Abbildung 5.2b) x - 1 = (1, 0, 0), n (s ) = (0 , 0 , 1) +1 = (1, 0, 0), n(s) = (0, 0, 1) x - 2 = (0, 1 , 0) +2 = (0, 1, 0) κ -Nor ( s, x -1 ) = 2 +Nor(s, x +1) = 2 κ -Nor ( s, x -2 ) = −2 - −1 .5 −1 −0. 5 0 0 .5 1 1.5 +Nor(s, x +2) = −2 +−1.5 −1 −0.5 0 0.5 1 1.5 −101012345 xyz - (a) S = V ( X 2 + (a) S = V (X 2 + Z 2 - − 1) −2 −1. 5 −1 −0 .5 0 0 .5 1 1 .5 2 + − 1) −2 −1.5 −1 −0.5 0 0.5 1 1.5 2 −2−1012−202 xyz - −4−2024f (x, y ) -(b) S = V ( X 2 + −4−2024f(x, y) + (b) S = V (X 2 − Y 2 − Z ) -Abbildung 5.2:Beispiele für reguläre Flächen +Abbildung 5.2: Beispiele für reguläre Flächen Definition 74 Sei S ⊆ R3 eine reguläre Fläche, s ∈ S und n ein stetiges Normalenfeld auf S . -γ : [ −ε, ε] → S eine nach Bogenlänge parametrisierte Kurve ( ε > 0 ) mit γ (0) = s und +γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve (ε > 0) mit γ (0) = s und γ - (0) = 0. -Sei n (0) := γ - (0) +(0) = 0. +Sei n(0) := γ +(0) γ - (0) . Zerlege -n (0) = n (0)t - + n (0)⊥ - mit n (0)t +(0) . Zerlege +n(0) = n(0)t + + n(0)⊥ + mit n(0)t ∈ T -s S und n (0)⊥ - ∈ ( T -s S )⊥ -Dann ist n (0) ⊥ - = n (0) , n( s) · n (s ) +sS und n(0)⊥ + ∈ (T +sS )⊥ +Dann ist n(0)⊥ + = n(0), n(s) · n(s) κ - Nor (s, γ ) := γ - (0), n(s ) die Normalkrümmung . +Nor(s, γ ) := γ +(0), n(s) die Normalkrümmung. Bemerkung 76 -Sei γ ( t) = γ (−t) , t ∈ [ −ε, ε]. Dann ist κ - Nor (s, γ ) = κ -Nor ( s, γ ). +Sei γ (t) = γ (−t), t ∈ [−ε, ε]. Dann ist κ +Nor(s, γ ) = κ +Nor(s, γ ). Beweis: γ - (0) = γ - (0) , da γ - (0) = −γ - (0). +(0) = γ +(0), da γ +(0) = −γ +(0). Es gilt: κ -Nor ( s, γ ) hängt nur von |γ - (0)| ab und ist gleich κ - Nor (s, γ - (0)). +Nor(s, γ ) hängt nur von |γ +(0)| ab und ist gleich κ +Nor(s, γ +(0)). Bemerkung 77 -Sei S eine reguläre Fläche und n = n ( s) ein Normalenvektor an S in s . +Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s. Sei T 1 s S = { x ∈ T -s S | x = 1 } ∼ +sS | x = 1 } ∼ = S 1 - . Dann ist +. Dann ist κn -Nor (s ) : T 1 -s S → R , x → κ -Nor ( s, x) +Nor(s) : T 1 +s S → R, x → κ +Nor(s, x) eine glatte Funktion und Bild κn -Nor (s ) ist ein abgeschlossenes Intervall. +Nor(s) ist ein abgeschlossenes Intervall. Definition 75 -Sei S eine reguläre Fläche und n = n ( s) ein Normalenvektor an S in s . - 5.3. GAUSS-KRÜMMUNG +Sei S eine reguläre Fläche und n = n(s) ein Normalenvektor an S in s. + 5.3. GAUSS-KRÜMMUNG a) κn -1 (s ) : = min +1 (s) : = min κn -Nor ( s, x) +Nor(s, x) x ∈ T 1 s S und κn -2 (s ) : = max - κ n -Nor ( s, x) +2 (s) : = max + κn +Nor(s, x) x ∈ T 1 s S heißen Hauptkrümmungen von S in s. -b) K ( s) := κn -1 ( s) · κ n -2 ( s) heißt Gauß-Krümmung von S in s . +b) K (s) := κn +1 (s) · κn +2 (s) heißt Gauß-Krümmung von S in s. Bemerkung 78 -Ersetzt man n durch −n , so gilt: -κ −n -Nor (s, x) = −κn -Nor (x ) ∀x ∈ T 1 +Ersetzt man n durch −n, so gilt: +κ−n +Nor(s, x) = −κn +Nor(x) ∀x ∈ T 1 s S -⇒ κ− n -1 (s ) = −κn -2 ( s) -κ− n -2 (s ) = −κn -1 ( s) -und K − n - (s ) = K n - (s ) =: K ( s) +⇒ κ−n +1 (s) = −κn +2 (s) +κ−n +2 (s) = −κn +1 (s) +und K −n +(s) = K n +(s) =: K (s) Beispiel 48 1) S = S 2 - . Dann ist κ -1 ( s) = κ - 2 (s ) = ±1 ∀s ∈ S 2 -⇒ K (s ) = 1 -2)Zylinder: +. Dann ist κ +1(s) = κ +2(s) = ±1 ∀s ∈ S 2 +⇒ K (s) = 1 +2) Zylinder: κ -1 (s ) = 0 , κ -2 ( s) = 1 ⇒ K (s ) = 0 -3)Sattelpunkt auf hyperbolischem Paraboloid: +1(s) = 0, κ +2(s) = 1 ⇒ K (s) = 0 +3) Sattelpunkt auf hyperbolischem Paraboloid: κ -1 (s ) < 0 , κ -2 ( s) = 0 → K (s ) < 0 -4) S = Torus. SieheAbbildung 5.3 +1(s) < 0, κ +2(s) = 0 → K (s) < 0 +4) S = Torus. Siehe Abbildung 5.3 s - 1s - 2 +1s +2 s - 3Abbildung 5.3: K ( s -1 ) > 0 , K (s - 2 ) = 0, K (s - 3 ) < 0 +3 +Abbildung 5.3: K (s +1) > 0, K (s +2) = 0, K (s +3) < 0 Bemerkung 79 Sei S eine reguläre Fläche, s ∈ S ein Punkt. 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM -a)Ist K ( s) > 0 , so liegt S in einer Umgebung von s ganz auf einer Seite von T -s S + s . -b)Ist K ( s) < 0 , so schneidet jede Umgebung von s in S beide Seiten von T -s S + s . +a) Ist K (s) > 0, so liegt S in einer Umgebung von s ganz auf einer Seite von T +sS + s. +b) Ist K (s) < 0, so schneidet jede Umgebung von s in S beide Seiten von T +sS + s. 5.4 Erste und zweite Fundamentalform -Sei S ⊆ R 3 +Sei S ⊆ R3 eine reguläre Fläche, s ∈ S , T -s S die Tangentialebene an S in s und F : U → V eine -lokale Parametrisierung von S um s . Weiter sei p := F −1 - (s ). +sS die Tangentialebene an S in s und F : U → V eine +lokale Parametrisierung von S um s. Weiter sei p := F −1 +(s). Definition 76 Sei I - S ∈ R2 ×2 +S ∈ R2×2 definiert als I - S : = - g -1, 1 (s ) g -1, 2 (s ) +S : = g -1, 2 (s ) g -2, 2 (s ) +1,1(s) g +1,2(s) +g +1,2(s) g +2,2(s) = -E ( s) F ( s) -F ( s ) G (s ) +E (s) F (s) +F (s) G(s) mit g i,j = g - s (D - p F ( e -i ) , D -p F ( e +s(D +pF (e +i), D +pF (e j )) = ∂ F ∂ u -i (p ), ∂ F +i (p), ∂ F ∂ u -j (p ) i, j ∈ { 1, 2 } +j (p) i, j ∈ { 1, 2 } Die Matrix I - S heißt erste Fundamentalform von S bzgl. der Parametrisierung F . +S heißt erste Fundamentalform von S bzgl. der Parametrisierung F . Bemerkung 80 a) - Die Einschränkung des Standardskalarproduktes des R 3 + Die Einschränkung des Standardskalarproduktes des R3 auf T -s S macht T -s S zu einem +sS macht T +sS zu einem euklidischen Vektorraum. b) { D - p F (e - 1 ), D -p F (e - 2 ) } ist eine Basis von T -s S . +pF (e +1), D +pF (e +2) } ist eine Basis von T +sS . c) Bzgl. der Basis { D - p F ( e -1 ) , D -p F ( e -2 ) } hat das Standardskalarprodukt ausBemerkung - 80.adie Darstellungsmatrix I +pF (e +1), D +pF (e +2) } hat das Standardskalarprodukt aus Bemerkung + 80.a die Darstellungsmatrix I S . d) g -i,j (s ) ist eine differenzierbare Funktion von s . +i,j (s) ist eine differenzierbare Funktion von s. Bemerkung 81 det(I - S ) = +S ) = ∂ F ∂ u -1 ( p) × ∂ F +1 (p) × ∂ F ∂ u -2 (p ) +2 (p) 2 Beweis: Sei ∂F ∂u - 1 ( p) =  +1 (p) =  x - 1 +1 x - 2 +2 x - 3  +3  , ∂F ∂u - 2 (p ) =  +2 (p) =   y - 1 +1 y - 2 -y 3  +2 +y3  Dann ist ∂F ∂u -1 (p ) × ∂F +1 (p) × ∂F ∂u - 2 ( p) =  - z - 1 +2 (p) =  +z +1 z - 2 +2 z - 3  +3  mit z - 1 = x - 2 y +1 = x +2y 3 − x - 3 y - 2 +3y +2 z - 2 = x - 3 y +2 = x +3y 1 − x - 1 y - 3 +1y +3 z - 3 = x - 1 y - 2 − x - 2 y - 1 +3 = x +1y +2 − x +2y +1 ⇒ ∂ F ∂ u -1 ( p ) × ∂ F +1 (p) × ∂ F ∂ u -2 ( p) = z 2 -1 + z 2 -2 + z 2 +2 (p) = z2 +1 + z2 +2 + z2 3 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM det(I S ) = g -1 , 1 g -2 , 2 − g 2 -1, 2 +1,1g +2,2 − g2 +1,2 =  x - 1 +1 x - 2 +2 x - 3  +3  ,  x - 1 +1 x - 2 +2 x - 3  -  +3 +  y - 1 +1 y - 2 +2 y - 3  +3  ,  - y +y 1 y 2 y -3  +3  −  - x +x 1 x 2 x -3  +3  ,  y - 1 +1 y - 2 +2 y - 3  - 2 -= (x 2 -1 + x 2 -2 + x 2 -3 )(y 2 -1 + y 2 -2 + y 2 -3 ) − ( x -1 y +3 +2 += (x2 +1 + x2 +2 + x2 +3)(y2 +1 + y2 +2 + y2 +3 ) − (x +1y 1 + x - 2 y - 2 + x - 3 y - 3 ) 2 +2y +2 + x +3y +3)2 Definition 77 a) - Das Differential d A = - det(I )d u -1 d u + Das Differential dA = +det(I )du +1du 2 heißt Flächenelement von S bzgl. der Parametrisierung F . -b)Für eine Funktion f : V → R heißt +b) Für eine Funktion f : V → R heißt V f dA := -U f ( F (u -1 , u -2 ) +U f (F (u +1, u +2) -=: s ) -det I (s )du -1 du +=:s ) +det I (s)du +1du 2 der Wert des Integrals von f über V , falls das Integral rechts existiert. Bemerkung 82 a) V f dA ist unabhängig von der gewählten Parametrisierung. -b)Sei f : S → R eine Funktion, die im Sinne vonDefinition 77.blokal integrierbar ist. +b) Sei f : S → R eine Funktion, die im Sinne von Definition 77.b lokal integrierbar ist. Dann ist - S f dA wohldefiniert, falls (z. B.) S kompakt ist. +S f dA wohldefiniert, falls (z. B.) S kompakt ist. Etwa: S f dA = n -i =1 +i=1 V - if dA +if dA − -i = j +i=j V - i ∩V - jf dA +i∩V +jf dA + i,j,k V - i ∩V - j ∩V - kf dA +i∩V +j ∩V +kf dA − . . . Beweis: -a)Mit Transformationsformel. -b)Ist dem Leser überlassen. +a) Mit Transformationsformel. +b) Ist dem Leser überlassen. Proposition 5.1 Sei S ⊆ R3 eine reguläre, orientierbare Fläche mit glatten Normalenfeld n : S → S 2 - . +. Dann gilt: a) n induziert für jedes s ∈ S eine lineare Abbildung d -s n : T -s S → T -n (s ) S 2 +sn : T +sS → T +n(s)S 2 durch d -s n ( x ) = d -dt n (s „+“ tx +sn(x) = d +dt n(s„+“ tx Soll auf Fläche S bleiben) -t =0 +t=0 Die Abbildung d -s n heißt Weingarten-Abbildung +sn heißt Weingarten-Abbildung 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM b) T -n(s ) S 2 +n(s)S 2 = T -s S . +sS . c) d -s n ist ein Endomorphismus von T -s S . +sn ist ein Endomorphismus von T +sS . d) d -s n ist selbstadjungiert bzgl. des Skalarproduktes I +sn ist selbstadjungiert bzgl. des Skalarproduktes I S . Hinweis: Die Weingarten-Abbildung wird auch Formoperator genannt. 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM Beweis: -a)Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. +a) Wenn jemand diesen Beweis führt, bitte an info@martin-thoma.de schicken. b) T -n(S ) S 2 - = n (s ) ⊥ +n(S)S 2 + = n(s)⊥ = T -s S -c)WegenProposition 5.1 (a)ist d -s n ein Homomorphismus. -d)Zu zeigen: ∀x, y ∈ I -s S : x, d -s n ( y ) = d -s n ( x ), y +sS +c) Wegen Proposition 5.1 (a) ist d +sn ein Homomorphismus. +d) Zu zeigen: ∀x, y ∈ I +sS : x, d +sn(y) = d +sn(x), y Aufgrund der Bilinearität des Skalarproduktes genügt es diese Eigenschaft für die Basisvektoren zu zeigen. Sei x - i = D - p F ( e -i ) = ∂F +i = D +pF (e +i) = ∂F ∂u - i (p ) i = 1, 2 +i (p) i = 1, 2 Beh.: x - i , d -s n ( x -j ) = ∂ 2 - F +i, d +sn(x +j ) = ∂2 +F ∂u -i ∂u - j (p ), d -s n (x - i ) -⇒ ∂ 2 - F +i∂u +j (p), d +sn(x +i) +⇒ ∂2 +F ∂u - i ∂u -j ( p) , d -s n ( x - i ) = x +i∂u +j (p), d +sn(x +i) = x j , d -s n ( x -i ) +sn(x +i) Bew.: 0 = ∂ F ∂ u (p + te - j ) , n( p + te - j ) +j ), n(p + te +j ) ⇒ 0 = d dt - ∂ F -∂ u ( p + te - j ), n( p + te - j ) + ∂ F +∂ u (p + te +j ), n(p + te +j ) @@ -6966,244 +6969,244 @@ t=0 dt ∂ F ∂ u i (p + te - j ) +j ) -∂ 2 - F +∂2 +F ∂u j ∂u -i ( p) +i (p) - t=0 , n( s) + x - i , d -s n D -p F (e - j ) +t=0, n(s) + x +i, d +sn D +pF (e +j ) x - j +j Definition 78 Die durch −d -s n definierte symmetrische Bilinearform aufT -s S heißt zweite Fundamentalform +sn definierte symmetrische Bilinearform auf T +sS heißt zweite Fundamentalform von S in s bzgl. F . Man schreibt: I I - s ( x, y ) = −d -s n ( x) , y = I - s (−d -s n (x ) , y ) +s(x, y) = −d +sn(x), y = I +s(−d +sn(x), y) Bemerkung 83 Bezüglich der Basis { x - 1 , x +1, x 2 } von T -s S hat I I - s die Darstellungsmatrix -(h( s ) +sS hat I I +s die Darstellungsmatrix +(h(s) i,j ) -i,j =1, 2 mit h -i,j (s ) = ∂ 2 - F +i,j=1,2 mit h +i,j (s) = ∂ 2 +F ∂ u -i ∂ u -j ( p) , n( s) +i∂ u +j (p), n(s) Proposition 5.2 -Sei γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve mitγ (0) = s. Dann gilt: +Sei γ : [−ε, ε] → S eine nach Bogenlänge parametrisierte Kurve mit γ (0) = s. Dann gilt: κ - Nor (s, γ ) = I I - s (γ - (0), γ - (0)) -Beweis: NachDefinition 74ist κ -Nor (s, γ ) = γ - (0), n(s ) . Nach Voraussetzung gilt -n (γ (t )) ⊥ γ - ( t) ⇔ γ - (0), n( s) = 0 +Nor(s, γ ) = I I +s(γ +(0), γ +(0)) +Beweis: Nach Definition 74 ist κ +Nor(s, γ ) = γ +(0), n(s). Nach Voraussetzung gilt +n(γ (t)) ⊥ γ +(t) ⇔ γ +(0), n(s) = 0 Die Ableitung nach t ergibt 0 = d -dt (n ( γ (t )), γ - ( t)) +dt (n(γ (t)), γ +(t)) = d -dt n ( γ (t )) +dt n(γ (t)) -t=0 , γ - (0) - + n ( s) , γ - (0) +t=0, γ +(0) + + n(s), γ +(0) 5.4. ERSTE UND ZWEITE FUNDAMENTALFORM = d -s n ( γ - (0)), γ - (0) + κ -Nor ( s, γ ) +sn(γ +(0)), γ +(0) + κ +Nor(s, γ ) = −I I - s ( γ - (0), γ - (0)) + κ -Nor ( s, γ ) +s(γ +(0), γ +(0)) + κ +Nor(s, γ ) Folgerung 5.3 -Die beiden Definitionen von Normalkrümmung inAbschnitt 5.1stimmen überein: +Die beiden Definitionen von Normalkrümmung in Abschnitt 5.1 stimmen überein: κ -Nor (s, γ ) = κ - Nor ( s, γ - (0)) +Nor(s, γ ) = κ +Nor(s, γ +(0)) Satz 5.4 -Sei S ⊆ R 3 +Sei S ⊆ R3 eine reguläre, orientierbare Fläche und s ∈ S . -a)Die Hauptkrümmungen κ -1 (s ), κ -2 ( s) sind die Eigenwerte von I I - s . -b)Für die Gauß-Krümmung gilt: K (s ) = det( I I - s ) +a) Die Hauptkrümmungen κ +1(s), κ +2(s) sind die Eigenwerte von I I +s. +b) Für die Gauß-Krümmung gilt: K (s) = det(I I +s) Beweis: a) I I - s ist symmetrisch, I -s S hat also eine Orthonormalbasis aus Eigenvektoren y -1 , y +s ist symmetrisch, I +sS hat also eine Orthonormalbasis aus Eigenvektoren y +1, y 2 von I I - s . Ist x ∈ T -s S , x = 1, so gibt es ϕ ∈ [0, 2 π ) mit x = cos ϕ · y - 1 + sin ϕ · y -2 . +s. Ist x ∈ T +sS , x = 1, so gibt es ϕ ∈ [0, 2π) mit x = cos ϕ · y +1 + sin ϕ · y +2. Seien λ - 1 , λ +1, λ 2 die Eigenwerte von I I - s , also I I - s (y - i , y -i ) = λ - i . Dann gilt: +s, also I I +s(y +i, y +i) = λ +i. Dann gilt: I I - s (x, x) = cos2 +s(x, x) = cos2 ϕλ -1 + sin 2 +1 + sin2 ϕλ 2 = (1 − sin2 ϕ)λ - 1 + sin 2 +1 + sin2 ϕλ 2 = λ - 1 + sin 2 +1 + sin2 ϕ(λ - 2 − λ -1 ) ≥ λ - 1 +2 − λ +1) ≥ λ +1 = cos2 - ϕ + (1 − cos 2 - ϕ) λ + ϕ + (1 − cos2 + ϕ)λ 2 = λ - 2 − cos 2 +2 − cos2 ϕ(λ - 2 − λ -1 ) ≤ λ - 2 -Prop. 5. 2 +2 − λ +1) ≤ λ +2 +Prop. 5.2 =====⇒ λ - 1 = min +1 = min κ -Nor (s, x) +Nor(s, x) x ∈ T 1 s S λ 2 = max κ -Nor ( s, x) +Nor(s, x) x ∈ T 1 s S Satz 5.5 (Satz von Gauß-Bonnet) -Sei S ⊆ R 3 +Sei S ⊆ R3 eine kompakte orientierbare reguläre Fläche. Dann gilt: -S K (s )dA = 2πχ( S ) -Dabei ist χ ( S ) die Euler-Charakteristik von S . +S K (s)dA = 2πχ(S ) +Dabei ist χ(S ) die Euler-Charakteristik von S . Beweis: Der Beweis wird hier nicht geführt. Er kann in „Elementare Differentialgeometrie“ von Christian Bär (2. Auflage), ISBN 978-3-11-022458-0, ab Seite 281 nachgelesen werden. Lösungen der Übungsaufgab en -Lösung zu Aufgabe1 +Lösung zu Aufgabe 1 Teilaufgabe a) Es gilt: -(i) ∅ , X ∈ T +(i) ∅, X ∈ T X . (ii) T -X ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alleU -1 , U +X ist offensichtlich unter Durchschnitten abgeschlossen, d. h. es gilt für alle U +1, U 2 ∈ T X : U - 1 ∩ U +1 ∩ U 2 ∈ T X . (iii) Auch unter beliebigen Vereinigungen ist T X abgeschlossen, d. h. es gilt für eine beliebige Indexmenge I und alle U - i ∈ T +i ∈ T X für alle i ∈ I : -i ∈I U - i ∈ T +i∈I U +i ∈ T X -Also ist ( X, T +Also ist (X, T X ) ein topologischer Raum. -Teilaufgabe b) Wähle x = 1 , y = 0 . Dann gilt x = y und die einzige Umgebung von x +Teilaufgabe b) Wähle x = 1, y = 0. Dann gilt x = y und die einzige Umgebung von x ist X . Da y = 0 ∈ X können also x und y nicht durch offene Mengen getrennt werden. (X, T X ) ist also nicht hausdorffsch. -Teilaufgabe c) Nach Bemerkung4sind metrische Räume hausdorffsch. Da(X, T - X ) nach -(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass(X, T +Teilaufgabe c) Nach Bemerkung 4 sind metrische Räume hausdorffsch. Da (X, T +X ) nach +(b) nicht hausdorffsch ist, liefert die Kontraposition der Trennungseigenschaft, dass (X, T X ) kein metrischer Raum sein kann. -Lösung zu Aufgabe2 +Lösung zu Aufgabe 2 Teilaufgabe a) Beh.: ∀a ∈ Z : { a } ist abgeschlossen. Sei a ∈ Z beliebig. Dann gilt: Wenn jemand diese Aufgabe gemacht hat, bitte die Lösung an info@martin-thoma.de schicken. Teilaufgabe b) -Beh.: { − 1, 1 } ist nicht offen +Beh.: { −1, 1 } ist nicht offen Bew.: durch Widerspruch -Annahme: { − 1, 1 } ist offen. +Annahme: { −1, 1 } ist offen. Dann gibt es T ⊆ B, sodass - M ∈ T M = { − 1, 1 } . Aber alle U ∈ B haben unendlich viele -Elemente. Auch endlich viele Schnitte von Elementen inB haben unendlich viele Elemente -⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒ { − 1 , 1 } ist +M ∈T M = { −1, 1 }. Aber alle U ∈ B haben unendlich viele +Elemente. Auch endlich viele Schnitte von Elementen in B haben unendlich viele Elemente +⇒ keine endliche nicht-leere Menge kann in dieser Topologie offen sein ⇒ { −1, 1 } ist nicht offen. Teilaufgabe c) Beh.: Es gibt unendlich viele Primzahlen. - Lösungen der Übungsaufgaben + Lösungen der Übungsaufgaben Bew.: durch Widerspruch Annahme: Es gibt nur endlich viele Primzahlen p ∈ P Dann ist - Z \ { − 1 , +1 } FS d. Arithmetik + Z \ { −1, +1 } FS d. Arithmetik = -p∈ P U +p∈P U 0,p -endlich. Das ist ein Widerspruch zu | Z| ist unendlich und | { −1, 1 } | ist endlich. -Lösung zu Aufgabe3 +endlich. Das ist ein Widerspruch zu |Z| ist unendlich und | { −1, 1 } | ist endlich. +Lösung zu Aufgabe 3 (a) Beh.: Die offenen Mengen von P sind Vereinigungen von Mengen der Form -j ∈ J U +j∈J U j × -i ∈ N,i = j P - i +i∈N,i=j P +i wobei J ⊆ N endlich und U - j ⊆ P - j offen ist. +j ⊆ P +j offen ist. Beweis: Nach Definition der Produkttopologie bilden Mengen der Form -i ∈ J U +i∈J U j × -i ∈N \J P - i +i∈N\J P +i wobei J ⊆ N endlich und U j ⊆ P j offen ∀j ∈ J eine Basis der Topologie. @@ -7212,108 +7215,108 @@ Form. (b) Beh.: Die Zusammenhangskomponenten von P sind alle einpunktig. Beweis: Es seinen x, y ∈ P und x sowie y liegen in der gleichen Zusammenhangskomponente Z ⊆ P . Da Z zusammenhängend ist und ∀i ∈ I : p - i : P → P - i ist +i : P → P +i ist stetig, ist p -i (Z ) ⊆ P -i zusammenhängend für alle i ∈ N . Die zusammenhängenden +i(Z ) ⊆ P +i zusammenhängend für alle i ∈ N. Die zusammenhängenden Mengen von P - i sind genau { 0 } und { 1 } , d. h. für alle i ∈ N gilt entweder +i sind genau { 0 } und { 1 }, d. h. für alle i ∈ N gilt entweder p -i ( Z ) ⊆ { 0 } oder p -i ( Z ) ⊆ { 1 }. Es sei z - i ∈ { 0 , 1 } so, dass p -i ( Z ) ⊆ { z - i } für -alle i ∈ N . Dann gilt also: +i(Z ) ⊆ { 0 } oder p +i(Z ) ⊆ { 1 }. Es sei z +i ∈ { 0, 1 } so, dass p +i(Z ) ⊆ { z +i } für +alle i ∈ N. Dann gilt also: p -i (x ) +i(x) -= x +=x i = z - i = p -i (y ) +i = p +i(y) -= y +=y i ∀i ∈ N Somit folgt: x = y -Lösung zu Aufgabe4 +Lösung zu Aufgabe 4 (a) Beh.: GL - n ( R ) ist nicht kompakt. +n(R) ist nicht kompakt. Bew.: det : GL - n ( R ) → R \ { 0 } ist stetig. Außerdem ist det ( GL - n ( R)) = R \ { 0 } +n(R) → R \ { 0 } ist stetig. Außerdem ist det(GL +n(R)) = R \ { 0 } nicht kompakt. 22 ⇒ GL - n ( R) ist nicht kompakt. +n(R) ist nicht kompakt. (b) Beh.: SL -1 ( R ) ist nicht kompakt, für n > 1 ist SL -n ( R ) kompakt. +1(R) ist nicht kompakt, für n > 1 ist SL +n(R) kompakt. Bew.: Für SL -1 (R ) gilt: SL -1 (R) = - A ∈ R1 ×1 +1(R) gilt: SL +1(R) = + A ∈ R1×1 det A = 1 = - 1 +1 ∼ = { 1 }. 22 ⇒ SL -1 (R) +1(R) ist kompakt. - Lösungen der Übungsaufgaben + Lösungen der Übungsaufgaben SL -n (R ) ⊆ GL - n (R ) lässt sich mit einer Teilmenge des R n2 - identifizieren. NachSatz 1.1 +n(R) ⊆ GL +n(R) lässt sich mit einer Teilmenge des Rn2 + identifizieren. Nach Satz 1.1 sind diese genau dann kompakt, wenn sie beschränkt und abgeschlossen sind. Definiere nun für für n ∈ N - ≥ 2 , m ∈ N : +≥2, m ∈ N: A - m = diag -n (m, 1 +m = diag +n(m, 1 m , . . . , 1) Dann gilt: det A - m = 1, d. h. A - m ∈ SL -n (R ), und A - m ist unbeschränkt, da A - m +m = 1, d. h. A +m ∈ SL +n(R), und A +m ist unbeschränkt, da A +m ∞ = m −−−−→ -m →∞ ∞. -(c) Beh.: P ( R) ist kompakt. -Bew.: P (R ) ∼ +m→∞ ∞. +(c) Beh.: P (R) ist kompakt. +Bew.: P (R) ∼ = S n - / - x ∼− x . Per Definition der Quotiententopologie ist die Klassenabbildung +/ +x∼−x. Per Definition der Quotiententopologie ist die Klassenabbildung stetig. Da S n - als abgeschlossene und beschränkte Teilmenge desR n+1 + als abgeschlossene und beschränkte Teilmenge des Rn+1 kompakt ist 22 -⇒ P (R ) ist kompakt. -Lösung zu Aufgabe5 -Die Definition von Homöomorphismus kann aufSeite 9nachgelesen werden. +⇒ P (R) ist kompakt. +Lösung zu Aufgabe 5 +Die Definition von Homöomorphismus kann auf Seite 9 nachgelesen werden. Definition 79 -Seien (G, ∗) und ( H, ◦) Gruppen und ϕ : G → H eine Abbildung. -ϕ heißt Homomorphismus , wenn +Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung. +ϕ heißt Homomorphismus, wenn ∀g - 1 , g -2 ∈ G : ϕ (g +1, g +2 ∈ G : ϕ(g 1 ∗ g -2 ) = ϕ( g -1 ) ◦ ϕ( g -2 ) +2) = ϕ(g +1) ◦ ϕ(g +2) gilt. Es folgt direkt: 1) Sei X = R mit der Standarttopologie und ϕ - 1 : id -R und R = ( R , +) . Dann ist ϕ +1 : id +R und R = (R, +). Dann ist ϕ 1 ein Gruppenhomomorphismus und ein Homöomorphismus. -2) Sei G = ( Z, +) und H = ( Z/3 Z, +) . Dann ist ϕ +2) Sei G = (Z, +) und H = (Z/3Z, +). Dann ist ϕ 2 : G → H, x → x mod 3 ein Gruppenhomomorphismus. Jedoch ist ϕ 2 nicht injektiv, also sicher kein Homöomorphismus. @@ -7324,208 +7327,208 @@ Verknüpfung auf X definiert wurde, ist X keine Gruppe und daher auch kein Grupp Also: Obwohl die Begriffe ähnlich klingen, werden sie in ganz unterschiedlichen Kontexten verwendet. -Lösung zu Aufgabe6 -Die Definition einer Isotopie kann aufSeite 20nachgelesen werden, die einer Isometrie auf +Lösung zu Aufgabe 6 +Die Definition einer Isotopie kann auf Seite 20 nachgelesen werden, die einer Isometrie auf Seite 6. Definition 80 -Seien (G, ∗) und ( H, ◦) Gruppen und ϕ : G → H eine Abbildung. -ϕ heißt Isomorphismus , wenn ϕ ein bijektiver Homomorphismus ist. +Seien (G, ∗) und (H, ◦) Gruppen und ϕ : G → H eine Abbildung. +ϕ heißt Isomorphismus, wenn ϕ ein bijektiver Homomorphismus ist. Eine Isotopie ist also für Knoten definiert, Isometrien machen nur in metrischen Räumen Sinn und ein Isomorphismus benötigt eine Gruppenstruktur. - Lösungen der Übungsaufgaben -Lösung zu Aufgabe7 + Lösungen der Übungsaufgaben +Lösung zu Aufgabe 7 (a) Vor.: Sei M eine topologische Mannigfaltigkeit. Beh.: M ist wegzusammehängend ⇔ M ist zusammenhängend Beweis: „ ⇒“: Da M insbesondere ein topologischer Raum ist folgt diese Richtung -direkt ausBemerkung 23. +direkt aus Bemerkung 23. „ ⇐“: Seien x, y ∈ M und Z := { z ∈ M | ∃Weg von x nach z } Es gilt: -(i) Z = ∅ , da M lokal wegzusammenhängend ist +(i) Z = ∅, da M lokal wegzusammenhängend ist (ii) Z ist offen, da M lokal wegzusammenhängend ist (iii) Z C := { ˜z ∈ M | Weg von x nach ˜z } ist offen Da M eine Mannigfaltigkeit ist, existiert zu jedem ˜z ∈ Z C eine offene und wegzusammenhängende Umgebung U - ˜z ⊆ M . +˜z ⊆ M . Es gilt sogar U - ˜z ⊆ Z C +˜z ⊆ Z C , denn gäbe es ein U - ˜z z ∈ Z , so gäbe es Wege γ - 2 : -[0 , 1] → M , γ - 2 (0) = z, γ -2 (1) = x und γ - 1 : [0 , 1] → M , γ - 1 (0) = ˜z, γ -1 (1) = z . +˜z z ∈ Z , so gäbe es Wege γ +2 : +[0, 1] → M , γ +2(0) = z, γ +2(1) = x und γ +1 : [0, 1] → M , γ +1(0) = ˜z, γ +1(1) = z. Dann wäre aber γ : [0, 1] → M , -γ ( x ) = +γ (x) = γ - 1 (2x) falls 0 ≤ x ≤ 1 +1(2x) falls 0 ≤ x ≤ 1 2 γ - 2 (2x − 1) falls 1 +2(2x − 1) falls 1 2 < x ≤ 1 ein stetiger Weg von ˜z nach x ⇒ Widerspruch. Da M zusammenhängend ist und M = Z offen ∪ Z C -offen , sowie Z = ∅ folgt Z C +offen , sowie Z = ∅ folgt Z C = ∅. Also ist M = Z wegzusammenhängend. (b) Beh.: X ist wegzusammenhängend. Beweis: X := (R \ { 0 }) ∪ { 0 -1 , 0 - 2 } und (R \ { 0 }) ∪ { 0 - 2 } sind homöomorph zu R. +1, 0 +2 } und (R \ { 0 }) ∪ { 0 +2 } sind homöomorph zu R. Also sind die einzigen kritischen Punkte, die man nicht verbinden können könnte 0 1 und 0 -2 . -Da (R \ { 0 } ) ∪ { 0 - 1 } homöomorph zu R ist, exisitert ein Weg γ - 1 von 0 +2. +Da (R \ { 0 }) ∪ { 0 +1 } homöomorph zu R ist, exisitert ein Weg γ +1 von 0 1 zu einem beliebigen Punkt a ∈ R \ { 0 }. -Da ( R \ { 0 }) ∪ { 0 +Da (R \ { 0 }) ∪ { 0 2 } ebenfalls homöomorph zu R ist, existiert außerdem ein Weg γ - 2 von a nach 0 - 2 . Damit existiert ein (nicht einfacher) Weg γ von 0 - 1 nach +2 von a nach 0 +2. Damit existiert ein (nicht einfacher) Weg γ von 0 +1 nach 0 -2 . -Lösung zu Aufgabe9 -Vor.: Sei ( X, d) eine absolute Ebene, A, B , C ∈ X und AB C ein Dreieck. - Lösungen der Übungsaufgaben +2. +Lösung zu Aufgabe 9 +Vor.: Sei (X, d) eine absolute Ebene, A, B , C ∈ X und AB C ein Dreieck. + Lösungen der Übungsaufgaben (a) Beh.: AB ∼ -= AC ⇒ ∠ AB C ∼ -= ∠ AC B += AC ⇒ ∠AB C ∼ += ∠AC B Bew.: Sei AB ∼ = AC . -⇒ ∃ Isometrie ϕ mit ϕ( B ) = C und ϕ (C ) = B und ϕ (A ) = A . -⇒ ϕ (∠ AB C ) = ∠ AC B -⇒ ∠ AB C ∼ +⇒ ∃ Isometrie ϕ mit ϕ(B ) = C und ϕ(C ) = B und ϕ(A) = A. +⇒ ϕ(∠AB C ) = ∠AC B +⇒ ∠AB C ∼ = ∠AC B (b) Beh.: Der längeren Seite von AB C liegt der größere Winkel gegenüber und umgekehrt. -Bew.: Sei d(A, C ) > d(A, B ). Nach§3 (i)gibt es C +Bew.: Sei d(A, C ) > d(A, B ). Nach §3 (i) gibt es C ∈ AC + mit d(A, C - ) = d(A, B ) +) = d(A, B ) ⇒ C liegt zwischen A und C . -Es gilt AB C - < AB C und ausAufgabe 9 (a)folgt: AB C +Es gilt AB C + < AB C und aus Aufgabe 9 (a) folgt: AB C = AC - B . -∠ B C - A ist ein nicht anliegender Außenwinkel zu ∠ B C A Bem. 66 -=====⇒ B C - A > B C A -⇒ B C A < B C - A = AB C - < AB C Sei umgekehrt AB C > B C A, kann -wegen 1. Teil vonAufgabe 9 (b)nicht d(A, B ) > d( A, C ) gelten. -WegenAufgabe 9 (a)kann nicht d(A, B ) = d(A, C ) gelten. -⇒ d(A, B ) < d( A, C ) +B . +∠B C +A ist ein nicht anliegender Außenwinkel zu ∠B C A Bem. 66 +=====⇒ B C +A > B C A +⇒ B C A < B C +A = AB C + < AB C Sei umgekehrt AB C > B C A, kann +wegen 1. Teil von Aufgabe 9 (b) nicht d(A, B ) > d(A, C ) gelten. +Wegen Aufgabe 9 (a) kann nicht d(A, B ) = d(A, C ) gelten. +⇒ d(A, B ) < d(A, C ) (c) Vor.: Sei g eine Gerade, P ∈ X und P /∈ g Beh.: ∃! Lot Bew.: - ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g . ϕ vertauscht die beiden -Halbebenen bzgl. g . -⇒ ϕ(P ) P schneidet g in F . -Es gibt eine Geradenspiegelung ϕ an g . ϕ vertauscht die beiden Halbebenen bzgl. g -⇒ ϕ(P ) P schneidet g in F . -Sei A ∈ g \ { F } . Dann gilt ϕ (∠ AF P ) = ∠ AF ϕ(P ) = π ⇒ ∠ AF P ist rechter Winkel. + ÜB10 A4(a): Es gibt Geradenspiegelung ϕ an g. ϕ vertauscht die beiden +Halbebenen bzgl. g. +⇒ ϕ(P )P schneidet g in F . +Es gibt eine Geradenspiegelung ϕ an g. ϕ vertauscht die beiden Halbebenen bzgl. g +⇒ ϕ(P )P schneidet g in F . +Sei A ∈ g \ { F }. Dann gilt ϕ(∠AF P ) = ∠AF ϕ(P ) = π ⇒ ∠AF P ist rechter Winkel. Gäbe es nun G ∈ g \ { F }, so dass P G weiteres Lot von P auf g ist, wäre P F G -ein Dreieck mit zwei rechten Innenwinkeln (vgl.Abbildung 5.4). +ein Dreieck mit zwei rechten Innenwinkeln (vgl. Abbildung 5.4). · ·A GP F g -Abbildung 5.4:Zwei Lote zu einer Geraden g durch einen Punkt P -NachFolgerung 4.4ist die Summe von zwei Innenwinkeln immer < π +Abbildung 5.4: Zwei Lote zu einer Geraden g durch einen Punkt P +Nach Folgerung 4.4 ist die Summe von zwei Innenwinkeln immer < π ⇒ G gibt es nicht. -Lösung zu Aufgabe10 -Sei f h und o. B. d. A. f g . -f ∦ h ⇒ f ∩ h = ∅ , sei also x ∈ f ∩ h. Mit Axiom§5folgt: Es gibt höchstens eine Parallele -zu g durch x , da x /∈ g . Diese ist f , da x ∈ f und f g . Da aber x ∈ h, kann h nicht - Lösungen der Übungsaufgaben -parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zug durch x (f = h). ⇒ g ∦ h -Lösung zu Aufgabe11 -Sei ( X, d, G ) eine Geometrie, die§1-§4erfüllt. Seien außerdem AB C und A - B - C +Lösung zu Aufgabe 10 +Sei f h und o. B. d. A. f g. +f ∦ h ⇒ f ∩ h = ∅, sei also x ∈ f ∩ h. Mit Axiom §5 folgt: Es gibt höchstens eine Parallele +zu g durch x, da x /∈ g. Diese ist f , da x ∈ f und f g. Da aber x ∈ h, kann h nicht + Lösungen der Übungsaufgaben +parallel zu g sein, denn ansonsten gäbe es zwei Parallelen zu g durch x (f = h). ⇒ g ∦ h +Lösung zu Aufgabe 11 +Sei (X, d, G) eine Geometrie, die §1-§4 erfüllt. Seien außerdem AB C und A +B +C Dreiecke, für die gilt: - d(A, B ) = d (A - , B - ) -d(A, C ) = d (A - , C - ) + d(A, B ) = d(A +, B +) +d(A, C ) = d(A +, C +) d(B , C ) = d(B - , C - ) -Sei ϕ die Isometrie mit ϕ( A) = A - , ϕ( B ) = B - und ϕ ( C - ) liegt in der selben Halbebene -bzgl. AB wie C . Diese Isometrie existiert wegen§4. -Es gilt d( A, C ) = d ( A - , C - ) = d( ϕ( A - ) , ϕ( C - )) = d( A, ϕ( C - )) und d( B , C ) = d( B - , C - ) = -d( ϕ(B - ) , ϕ(C - )) = d(B , ϕ(C - )). +, C +) +Sei ϕ die Isometrie mit ϕ(A) = A +, ϕ(B ) = B + und ϕ(C +) liegt in der selben Halbebene +bzgl. AB wie C . Diese Isometrie existiert wegen §4. +Es gilt d(A, C ) = d(A +, C +) = d(ϕ(A +), ϕ(C +)) = d(A, ϕ(C +)) und d(B , C ) = d(B +, C +) = +d(ϕ(B +), ϕ(C +)) = d(B , ϕ(C +)). Bem. 62 -=====⇒ C = ϕ (C ) . -Es gilt also ϕ (A - B - C - ) = AB C . +=====⇒ C = ϕ(C ). +Es gilt also ϕ(A +B +C +) = AB C . Bildquellen Alle Bilder, die hier nicht aufgeführt sind, wurden von Martin Thoma erstellt. Teilweise wurden die im folgenden aufgelisteten Bilder noch leicht modifiziert. -Abb.0.1a S 2 - : Tom Bombadil,tex.stackexchange.com/a/42865 -Abb.0.1bWürfel: Jan Hlavacek,tex.stackexchange.com/a/12069 -Abb.0.1e T 2 - : Jake,tex.stackexchange.com/a/70979/5645 -Abb.1.6Stereographische Pro jektion:texample.net/tikz/examples/map-pro jections -Abb.1.11Knoten von Jim.belk aus der „Blue knots“-Serie: +Abb. 0.1a S 2 +: Tom Bombadil, tex.stackexchange.com/a/42865 +Abb. 0.1b Würfel: Jan Hlavacek, tex.stackexchange.com/a/12069 +Abb. 0.1e T 2 +: Jake, tex.stackexchange.com/a/70979/5645 +Abb. 1.6 Stereographische Pro jektion: texample.net/tikz/examples/map-pro jections +Abb. 1.11 Knoten von Jim.belk aus der „ Blue knots“-Serie: – Trivialer Knoten: commons.wikimedia.org/wiki/File:Blue_Unknot.png – Kleeblattknoten: commons.wikimedia.org/wiki/File:Blue_Trefoil_Knot.png – Achterknoten: commons.wikimedia.org/wiki/File:Blue_Figure- Eight_Knot.png – 6 -2 -Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png -Abb.1.12Reidemeister-Züge: YAMASHITA Makoto (1,2,3) -Abb.1.13 - Kleeblattknoten, 3-Färbung: Jim.belk,commons.wikimedia.org/wiki/File:Tricoloring. +2-Knoten: commons.wikimedia.org/wiki/File:Blue_6_2_Knot.png +Abb. 1.12 Reidemeister-Züge: YAMASHITA Makoto (1, 2, 3) +Abb. 1.13 + Kleeblattknoten, 3-Färbung: Jim.belk, commons.wikimedia.org/wiki/File:Tricoloring. png -Abb.2.1 - Doppeltorus: Oleg Alexandrov,commons.wikimedia.org/wiki/File:Double\_torus\_illustration. +Abb. 2.1 + Doppeltorus: Oleg Alexandrov, commons.wikimedia.org/wiki/File:Double\_torus\_illustration. png -Abb.2.8Faltungsdiagramm: Jérôme Urhausen, Email vom 11.02.2014. -Abb.3.3b3 Pfade auf Torus:Charles Staats, tex.stackexchange.com/a/149991/5645 -Abb.3.10Überlagerung von S 1 - mit R :Alex, tex.stackexchange.com/a/149706/5645 -Abb.4.7aSphärisches Dreieck:Dominique Toussaint, +Abb. 2.8 Faltungsdiagramm: Jérôme Urhausen, Email vom 11.02.2014. +Abb. 3.3b 3 Pfade auf Torus: Charles Staats, tex.stackexchange.com/a/149991/5645 +Abb. 3.10 Überlagerung von S 1 + mit R: Alex, tex.stackexchange.com/a/149706/5645 +Abb. 4.7a Sphärisches Dreieck: Dominique Toussaint, commons.wikimedia.org/wiki/File:Spherical_triangle_3d_opti.png -Abb.5.1Möbiusband:Jake, tex.stackexchange.com/a/118573/5645 -Abb.5.3Krümmung des Torus:Charles Staats, tex.stackexchange.com/a/149991/5645 +Abb. 5.1 Möbiusband: Jake, tex.stackexchange.com/a/118573/5645 +Abb. 5.3 Krümmung des Torus: Charles Staats, tex.stackexchange.com/a/149991/5645 Abkürzungsverzeichnis Beh. Behauptung Bew. Beweis @@ -7552,59 +7555,59 @@ aber nicht erklärt, da sie Bestandteil der Vorlesungen „Analysis I und II“ und analytische Geometrie I und II“ sind. Jedoch will ich zumindest die Definitionen bereitstellen. Definition 81 Sei D ⊆ R und x - 0 ∈ R. x -0 heißt ein Häufungspunkt von D : ⇔ ∃ Folge x - n in D \ { x - 0 } +0 ∈ R. x +0 heißt ein Häufungspunkt von D :⇔ ∃ Folge x +n in D \ { x +0 } mit x - n → x - 0 . +n → x +0. Folgende Definition wurde dem Skript von Herrn Prof. Dr. Leuzinger für Lineare Algebra entnommen: Definition 82 -Es seien V und W K-Vektorräume und A( V ) und A ( W ) die zugehörigen affinen Räume. +Es seien V und W K-Vektorräume und A(V ) und A(W ) die zugehörigen affinen Räume. Eine Abbildung f : V → W heißt affin, falls für alle a, b ∈ V und alle λ, µ ∈ K mit λ + µ = 1 gilt: - f (λa + µb ) = λf ( a) + µf ( b) + f (λa + µb) = λf (a) + µf (b) Definition 83 Sei V ein Vektorraum und S ⊆ V eine Teilmenge. S heißt eine Orthonormalbasis von V , wenn gilt: (i) S ist eine Basis von V -(ii) ∀v ∈ S : v = 1 +(ii) ∀v ∈ S : v = 1 (iii) ∀v -1 , v +1, v 2 ∈ S : v - 1 = v - 2 ⇒ v - 1 , v -2 = 0 +1 = v +2 ⇒ v +1, v +2 = 0 Satz (Zwischenwertsatz) -Sei a < b und f ∈ C [ a, b] := C ([ a, b]) , weiter sei y -0 ∈ R und f ( a ) < y -0 < f ( b) oder -f ( b) < y -0 < f (a ). Dann existiert ein x -0 ∈ [a, b] mit f ( x -0 ) = y -0 . +Sei a < b und f ∈ C [a, b] := C ([a, b]), weiter sei y +0 ∈ R und f (a) < y +0 < f (b) oder +f (b) < y +0 < f (a). Dann existiert ein x +0 ∈ [a, b] mit f (x +0) = y +0. Definition 84 Sei V ein Vektorraum über einem Körper K und f : V → V eine lineare Abbildung. -v ∈ V \ { 0 } heißt Eigenvektor :⇔ ∃λ ∈ K : f (v ) = λv . +v ∈ V \ { 0 } heißt Eigenvektor :⇔ ∃λ ∈ K : f (v) = λv. Wenn ein solches λ ∈ K existiert, heißt es Eigenwert von f . Satz (Binomischer Lehrsatz) Sei x, y ∈ R. Dann gilt: - ( x + y )n + (x + y)n = n -k =0 - n +k=0 +n k -x n− k - y k +xn−k + yk ∀n ∈ N - 0 +0 Definition 85 -Seien a, b ∈ R 3 +Seien a, b ∈ R3 Vektoren. a × b :=  a @@ -7612,37 +7615,37 @@ a × b :=  b 3 a -3  +3  ×  - a +a 1 b 3 a -3  +3  =  - a -2 b +a +2b 3 − a - 3 b +3b 2 a -3 b +3b 1 − a - 1 b +1b 3 a -1 b +1b 2 − a - 2 b -1  +2b +1  Symb olverzeichnis Mengenoperationen Seien A, B und M Mengen. -A C +AC Komplement von A P (M ) Potenzmenge von M M Abschluss von M @@ -7671,44 +7674,44 @@ Simplizialkomplexes K Gruppen Sei X ein topologischer Raum und K ein Körper. -Homöo (X ) Homöomorphismengruppe -Iso( X ) Isometriengruppe +Homöo(X ) Homöomorphismengruppe +Iso(X ) Isometriengruppe GL - n (K ) Allgemeine lineare Gruppe (von -G eneral L inear Group ) +n(K ) Allgemeine lineare Gruppe (von +General Linear Group ) SL -n (K ) Spezielle lineare Gruppe +n(K ) Spezielle lineare Gruppe PSL -n ( K ) Pro jektive lineare Gruppe Perm( X ) Permutationsgruppe +n(K ) Pro jektive lineare Gruppe Perm(X ) Permutationsgruppe Sym(X ) Symmetrische Gruppe Wege Sei γ : I → X ein Weg. [γ ] Homotopieklasse von γ γ - 1 ∗ γ - 2 Zusammenhängen von Wegen +1 ∗ γ +2 Zusammenhängen von Wegen γ - 1 ∼ γ - 2 Homotopie von Wegen -γ (x ) Inverser Weg, also γ (x ) := γ (1 − x) +1 ∼ γ +2 Homotopie von Wegen +γ (x) Inverser Weg, also γ (x) := γ (1 − x) C Bild eines Weges γ , also C := γ ([0, 1]) Weiteres B Basis einer Topologie B -δ ( x ) δ -Kugel um x +δ (x) δ-Kugel um x S Subbasis einer Topologie T Topologie A Atlas P Pro jektiver Raum ·, · Skalarprodukt X/ - ∼ X modulo ∼ -[x ] +∼ X modulo ∼ +[x] ∼ Äquivalenzklassen von x bzgl. ∼ - x Norm von x -|x | Betrag von x -a Erzeugnis von a +x Norm von x +|x| Betrag von x +a Erzeugnis von a S n Sphäre T n @@ -7718,26 +7721,26 @@ f ◦ g Verkettung von f und g X Pro jektion auf X f | U f eingeschränkt auf U -f − 1 - (M ) Urbild von M +f −1 +(M ) Urbild von M Rg(M ) Rang von M -χ (K ) Euler-Charakteristik von K - Symbolverzeichnis -∆ k +χ(K ) Euler-Charakteristik von K + Symbolverzeichnis +∆k Standard-Simplex -X # Y Verklebung von X und Y +X #Y Verklebung von X und Y d -n Lineare Abbildung ausBemerkung +n Lineare Abbildung aus Bemerkung 37 A ∼ = B A ist isometrisch zu B f ∗ Abbildung zwischen Fundamentalgruppen - (vgl.Seite 49) - Symbolverzeichnis + (vgl. Seite 49) + Symbolverzeichnis Zahlenmengen N = { 1, 2, 3, . . . } Natürliche Zahlen -Z = N ∪ { 0, −1 , −2 , . . . } Ganze Zahlen +Z = N ∪ { 0, −1, −2, . . . } Ganze Zahlen Q = Z ∪ 1 2 , 1 @@ -7748,275 +7751,275 @@ Q = Z ∪ n mit z ∈ Z und n ∈ Z \ { 0 } Rationale Zahlen R = Q ∪ √ - 2, − 3√ - 3 , . . . +2, − 3√ +3, . . . Reele Zahlen R - + Echt positive reele Zahlen -R n -+, 0 := { ( x -1 , . . . , x - n ) ∈ R n ++ Echt positive reele Zahlen +Rn ++,0 := { (x +1, . . . , x +n) ∈ Rn | x - n ≥ 0 } Halbraum -R × +n ≥ 0 } Halbraum +R× = R \ { 0 } Einheitengruppe von R C = { a + ib | a, b ∈ R } Komplexe Zahlen P = { 2, 3, 5, 7, . . . } Primzahlen H = { z ∈ C | z > 0 } obere Halbebene I = [0, 1] R Einheitsintervall f : S 1 - → R 2 + → R2 Einbettung der Kreislinie in die Ebene π -1 (X, x) Fundamentalgruppe im topologischen Raum X um x ∈ X +1(X, x) Fundamentalgruppe im topologischen Raum X um x ∈ X Fix(f ) Menge der Fixpunkte der Abbildung f · - 2 2-Norm; Euklidische Norm +2 2-Norm; Euklidische Norm κ Krümmung κ Nor Normalenkrümmung V (f ) Nullstellenmenge von f 2 Krümmung D - p F : R 2 +pF : R2 → R3 - Lineare Abbildung mit Jacobi-Matrix in p (sieheSeite 89) + Lineare Abbildung mit Jacobi-Matrix in p (siehe Seite 89) T -s S Tangentialebene an S ⊆ R 3 +sS Tangentialebene an S ⊆ R3 durch s ∈ S d -s n ( x) Weingarten-Abbildung +sn(x) Weingarten-Abbildung 2 - von Vanishing Set +von Vanishing Set Stichwortverzeichnis Abbildung -affine,107 -differenzierbare,29 -homotope,50 -offene,53 -simpliziale,35 -stetige,9 -Abschluss,3 -Abstand,86 -Abstandsaxiom,65 -Achterknoten,20 +affine, 107 +differenzierbare, 29 +homotope, 50 +offene, 53 +simpliziale, 35 +stetige, 9 +Abschluss, 3 +Abstand, 86 +Abstandsaxiom, 65 +Achterknoten, 20 Aktion, siehe Gruppenoperation -Anordnungsaxiome,66 -Atlas,24 -Außenwinkel,70 -Axiom,64 -Axiomensystem,64 -Basis,3 -Baum,37 -Betti-Zahl,41 -Bewegungsaxiom,66 -Binormalenvektor,89 -Cantorsches Diskontinuum,22 +Anordnungsaxiome, 66 +Atlas, 24 +Außenwinkel, 70 +Axiom, 64 +Axiomensystem, 64 +Basis, 3 +Baum, 37 +Betti-Zahl, 41 +Bewegungsaxiom, 66 +Binormalenvektor, 89 +Cantorsches Diskontinuum, 22 C k - -Struktur,29 -Decktransformation,59 -Decktransformationsgruppe,59 -Deformationsretrakt,47 -dicht,3 -Diffeomorphismus,29 -Dimension,34 -diskret,53 -Doppelverhältnis,83 + -Struktur, 29 +Decktransformation, 59 +Decktransformationsgruppe, 59 +Deformationsretrakt, 47 +dicht, 3 +Diffeomorphismus, 29 +Dimension, 34 +diskret, 53 +Doppelverhältnis, 83 Dreibein -begleitendes,89 +begleitendes, 89 Ebene -euklidische,64 -Eigenvektor,107 -Eigenwert,107 einfach zusammenhängend,49 -Einheitsnormalenfeld,90 +euklidische, 64 +Eigenvektor, 107 +Eigenwert, 107 einfach zusammenhängend, 49 +Einheitsnormalenfeld, 90 Euler-Charakteristik, siehe Eulerzahl -Eulersche Polyederformel,38 -Eulerzahl,36 -Färbbarkeit,21 +Eulersche Polyederformel, 38 +Eulerzahl, 36 +Färbbarkeit, 21 Faser, siehe Urbild Fläche -orientierbare,90 -reguläre,30 -Flächenelement,95 +orientierbare, 90 +reguläre, 30 +Flächenelement, 95 Formoperator, siehe Weingarten-Abbildung Fundamentalform -erste,94 -zweite,97 -Fundamentalgruppe,47 -Gauß-Krümmung,92, 91–94 -Geometrie,64 -Gerade,64 -hyperbolische,77 -Graph,37 -Grenzwert,8 +erste, 94 +zweite, 97 +Fundamentalgruppe, 47 +Gauß-Krümmung, 92, 91–94 +Geometrie, 64 +Gerade, 64 +hyperbolische, 77 +Graph, 37 +Grenzwert, 8 Gruppe -allgemeine lineare,22,26 -spezielle lineare,22 -topologische,33 +allgemeine lineare, 22, 26 +spezielle lineare, 22 +topologische, 33 Gruppe operiert durch Homöomorphismen, 61 Gruppenaktion, siehe Gruppenoperation -Gruppenoperation,60, 60–63 -stetige,61 -Häufungspunkt,107 +Gruppenoperation, 60, 60–63 +stetige, 61 +Häufungspunkt, 107 Hülle -konvexe,34 -Halbebene,66 -Halbgerade,65 -Halbraum,28 -Hauptkrümmung,92 -Hilbert-Kurve,19,19 - Stichwortverzeichnis -Homöomorphismengruppe,10 -Homöomorphismus,9 -Homologiegruppe,41 -Homomorphismus,101 -Homotopie,44 -Homotopieklasse,47 -Inklusionsabbildung,47 -Innenwinkel,70 -Inneres,3 -Inzidenzaxiome,64 -Isometrie,6,10 -Isometriegruppe,10 -Isomorphismus,101 -Isotopie,20 -Jordankurve,19 -geschlossene,19 -Karte,24 -Kartenwechsel,28 +konvexe, 34 +Halbebene, 66 +Halbgerade, 65 +Halbraum, 28 +Hauptkrümmung, 92 +Hilbert-Kurve, 19, 19 + Stichwortverzeichnis +Homöomorphismengruppe, 10 +Homöomorphismus, 9 +Homologiegruppe, 41 +Homomorphismus, 101 +Homotopie, 44 +Homotopieklasse, 47 +Inklusionsabbildung, 47 +Innenwinkel, 70 +Inneres, 3 +Inzidenzaxiome, 64 +Isometrie, 6, 10 +Isometriegruppe, 10 +Isomorphismus, 101 +Isotopie, 20 +Jordankurve, 19 +geschlossene, 19 +Karte, 24 +Kartenwechsel, 28 Kern -offener,3 -Kleeblattknoten,20 +offener, 3 +Kleeblattknoten, 20 Klumpentopologie, siehe triviale Topologie -Knoten,20, 17–21 -äquivalente,20 -trivialer,20 -Knotendiagramm,20 -kollinear,65 +Knoten, 20, 17–21 +äquivalente, 20 +trivialer, 20 +Knotendiagramm, 20 +kollinear, 65 kongruent, siehe isometrisch Kongruenz, siehe Isometrie Kongruenzsatz -SSS,104 -SWS,69 -SWW,74 -WSW,70 -Krümmung,88,89 -Kreis,37 -Kreuzprodukt,107 -Kurve,87 -Länge einer,87 +SSS, 104 +SWS, 69 +SWW, 74 +WSW, 70 +Krümmung, 88, 89 +Kreis, 37 +Kreuzprodukt, 107 +Kurve, 87 +Länge einer, 87 Lage -allgemeine,34 +allgemeine, 34 Lehrsatz -Binomischer,107 -Lie-Gruppe,33 -liegt zwischen,65 -Liftung,54 -Limes,8 lokal,3 -Lot,86 -Lotfußpunkt,86 -Möbiusband,91 -Möbiustransformation,80 -Mannigfaltigkeit,24 -differenzierbare,29 -geschlossene,25 -glatte,29 -mit Rand,28 +Binomischer, 107 +Lie-Gruppe, 33 +liegt zwischen, 65 +Liftung, 54 +Limes, 8 lokal, 3 +Lot, 86 +Lotfußpunkt, 86 +Möbiusband, 91 +Möbiustransformation, 80 +Mannigfaltigkeit, 24 +differenzierbare, 29 +geschlossene, 25 +glatte, 29 +mit Rand, 28 Menge -abgeschlossene,2 -offene,2 -zusammenhängende,11 -Metrik,6 -diskrete,6 -hyperbolische,84 -SNCF,8 -Nebenwinkel,86 -Neilsche Parabel,27 -Normalenfeld,90 -Normalenvektor,87,89 -Normalkrümmung,91,92,98 -Oktaeder,34 -Orthonormalbasis,107 +abgeschlossene, 2 +offene, 2 +zusammenhängende, 11 +Metrik, 6 +diskrete, 6 +hyperbolische, 84 +SNCF, 8 +Nebenwinkel, 86 +Neilsche Parabel, 27 +Normalenfeld, 90 +Normalenvektor, 87, 89 +Normalkrümmung, 91, 92, 98 +Oktaeder, 34 +Orthonormalbasis, 107 Paraboloid -hyperbolisches,92 -Parallele,66 -Parallelenaxiom,64 +hyperbolisches, 92 +Parallele, 66 +Parallelenaxiom, 64 parametrisiert -durch Bogenlänge,87 +durch Bogenlänge, 87 Parametrisierung -reguläre,30 -Polyzylinder,17 -Produkttopologie,4 +reguläre, 30 +Polyzylinder, 17 +Produkttopologie, 4 Pro jektion -stereographische,11 -Punkt,34 -Quotiententopologie,5,10,11 -Rand,3,28 +stereographische, 11 +Punkt, 34 +Quotiententopologie, 5, 10, 11 +Rand, 3, 28 Raum -hausdorffscher,8 -kompakter,14 -metrischer,6 -pro jektiver,5,22,25,52 - Stichwortverzeichnis -topologischer,2 -zusammenhängender,11 +hausdorffscher, 8 +kompakter, 14 +metrischer, 6 +pro jektiver, 5, 22, 25, 52 + Stichwortverzeichnis +topologischer, 2 +zusammenhängender, 11 Realisierung -geometrische,34 -Retraktion,47 +geometrische, 34 +Retraktion, 47 Satz von -Gauß-Bonnet,98 -Scheitelwinkel,86 -Seite,34 -Sierpińskiraum,3,22 -Simplex,34 -Simplizialkomplex,34 +Gauß-Bonnet, 98 +Scheitelwinkel, 86 +Seite, 34 +Sierpińskiraum, 3, 22 +Simplex, 34 +Simplizialkomplex, 34 Simplizialkomplexe -flächengleiche,74 +flächengleiche, 74 Sphäre -exotische,29 -Standard-Simplex,34 -Standardtopologie,2 -sternförmig,48 +exotische, 29 +Standard-Simplex, 34 +Standardtopologie, 2 +sternförmig, 48 Stetigkeit, 9–11 -Strecke,65 +Strecke, 65 Struktur -differenzierbare,29 -Subbasis,3 -Tangentialebene,89, 89–90 -Teilraum,4 -Teilraumtopologie,4 -Teilsimplex,34 +differenzierbare, 29 +Subbasis, 3 +Tangentialebene, 89, 89–90 +Teilraum, 4 +Teilraumtopologie, 4 +Teilsimplex, 34 Topologie -diskrete,2,6 -euklidische,2 -feinste,11 -triviale,2 -Zariski,2,12,15 -Torus,iii,5,38,51,93 -Total Unzusammenhängend,100 -Triangulierung,38 -Überdeckung,14 +diskrete, 2, 6 +euklidische, 2 +feinste, 11 +triviale, 2 +Zariski, 2, 12, 15 +Torus, iii, 5, 38, 51, 93 +Total Unzusammenhängend, 100 +Triangulierung, 38 +Überdeckung, 14 Übergangsfunktion, siehe Kartenwechsel -Überlagerung,51, 51–60 -reguläre,59 -universelle,57 -Umgebung,3 -Umgebungsbasis,58 -vanishing set,26 +Überlagerung, 51, 51–60 +reguläre, 59 +universelle, 57 +Umgebung, 3 +Umgebungsbasis, 58 +vanishing set, 26 Vektorprodukt, siehe Kreuzprodukt -Verklebung,26 verträglich,29 -Würfel,34 -Weg,17 -einfacher,17 -geschlossener,17 -homotope,44 -inverser,48 -zusammengesetzter,46 -Wegzusammenhang,18 -Weingarten-Abbildung,95 -Winkel,70 +Verklebung, 26 verträglich, 29 +Würfel, 34 +Weg, 17 +einfacher, 17 +geschlossener, 17 +homotope, 44 +inverser, 48 +zusammengesetzter, 46 +Wegzusammenhang, 18 +Weingarten-Abbildung, 95 +Winkel, 70 Zusammenhang, 11–14 -Zusammenhangskomponente,13 -Zwischenwertsatz,107 \ No newline at end of file +Zusammenhangskomponente, 13 +Zwischenwertsatz, 107 \ No newline at end of file diff --git a/read/results/pypdf/2201.00151.txt b/read/results/pypdf/2201.00151.txt index 6f747ec..f264cdd 100644 --- a/read/results/pypdf/2201.00151.txt +++ b/read/results/pypdf/2201.00151.txt @@ -23,7 +23,7 @@ Key words. galaxies: kinematics and dynamics – galaxies: structure – g alaxi star clusters: individual: Fornax 1. Introduction Dwarf spheroidal (dSph) galaxies of the Local Group (Mateo -1998; T olstoy et al. 2009) are considered to be a perfect toolto +1998; T olstoy et al. 2009) are considered to be a perfect tool to test our current theories of structure formation involving dark matter in the context of near-field cosmology. The objects ar e believed to be strongly dark matter dominated with mass-to- light @@ -39,7 +39,7 @@ As the samples of the stars with kinematic measurements grew , it became possible to estimate the profile of the velocity dis persion and model it using the Jeans equation (Binney & Tremaine 2008). Since the stars in the galaxy can move on a variety -of orbits, from circular to radial, the degeneracy between the +of orbits, from circular to radial, the degeneracy between t he anisotropy of the orbits and the mass distribution is inhere nt in this type of modeling. The reason for this lies in the fact tha t different combinations of these quantities can reproduce the ve locity @@ -57,13 +57,13 @@ The Schwarzschild modeling technique (Schwarzschild dSph galaxies without prior assumptions on the type of orbit s. It relies on building a galaxy model out of a set of best-fittin g orbits probed in the range of energy and angular momenta. In -this method, the anisotropy of the stellar orbits comes out as a +this method, the anisotropy of the stellar orbits comes out a s a result of the modeling in the same way as the density profile. A lthough it has been originally developed for large elliptica l galaxies (van der Marel et al. 1998; V alluri et al. 2004; Gebhardt e t al. 2015), it has recently been adopted for use on discrete data characteristic of dSph galaxies and applied to a number of -dwarfs, including Carina, Draco, Fornax, Sculptor, and Sextans +dwarfs, including Carina, Draco, Fornax, Sculptor, and Sex tans (Jardel & Gebhardt 2008; Jardel et al. 2013; Breddels & Helmi 2013; Breddels et al. 2013; Kowalczyk et al. 2019). Many dSph galaxies show signs of the presence of multiple @@ -72,21 +72,21 @@ stellar populations resulting from a few star formation epi sodes Pace et al. 2020). This observation o ffers a way to improve the modeling methods since, assuming dynamical equilibrium, a ll populations are supposed to be influenced by the same underlying - gravitational potential of the galaxy, but they have different + gravitational potential of the galaxy, but they have d ifferent distributions so more constraints can be imposed dur ing the modeling. This approach was first used by Battaglia et al. (2008) to model the mass distribution in the Sculptor dSph galaxy. A few attempts have also been made to constrain the inner slope of the dark matter profile in dSph galaxies using this technique (W alker & Peñarrubia 2011; Amorisco & Evans -2012; Hayashi et al. 2018) in order to resolve the so-called cuspcore +2012; Hayashi et al. 2018) in order to resolve the so-called c uspcore problem. It has been shown to be di fficult, however, due Article number, page 1 of 12 A&A proofs: manuscript no. Populations4 T able 1.Properties of the Illustris galaxy used to create mock data. Property V alue Subhalo ID 16960 -Number of stellar particles (N⋆) 70446 +Number of stellar particles ( N⋆) 70446 Number of dark matter particles ( NDM ) 78448 Stellar mass ( M⋆) 5 .74 ×1010 M⊙ Dark matter mass ( MDM ) 4 .91 ×1011 M⊙ @@ -117,7 +117,7 @@ dSph galaxies are available in this simulation because of th e resolution, we use a more massive galaxy but with properties oth erwise similar to dSphs. The reliability of the modeling doe s not depend on the particular value of the mass so we believe these -tests to be viable. W e do not attempt to constrain the inner dark +tests to be viable. W e do not attempt to constrain the inner da rk matter density profile (which is poorly resolved anyway) but try to put tighter limits on the estimates of the mass and anisotr opy profiles. Finally, we apply the improved method to the availa ble @@ -136,7 +136,7 @@ summarize the paper in Section 5. 2.1. Selection of the simulated galaxy In order to test our modeling method on realistic simulated data, we decided to use a galaxy from the Illustris project -(V ogelsberger et al. 2014a,b; Genel et al. 2014; Nelson et al. +(V ogelsberger et al. 2014a,b; Genel et al. 2014; Nelson et al . 2015), namely the Illustris-1 cosmological simulation. Th is simulation follows the formation and evolution of galaxies fro m the early Universe to the present by solving gravity and hydrody namics, @@ -171,17 +171,17 @@ Fig. 2. Number of stars as a function of their metallicity and time of formation (the age of the Universe) in the simulated galaxy . The vertical line indicates the applied split into stellar populations. magnetic fields, and the feedback from black holes. Although -dwarf galaxies that are of our interest here are not resolvedin the +dwarf galaxies that are of our interest here are not resolved in the suite, this can be easily overcome with the appropriate choi ce of the object and the treatment of data. As the key properties of dSph galaxy equivalents we identified: the lack of gas, the lack of a black hole, a low spin, the stellar mass much smaller than the dark matter mass and a -nearly spherical shape. The last condition was adopted in anattempt +nearly spherical shape. The last condition was adopted in an attempt to avoid any strong bias introduced by the spherical mo deling of a nonspherical object. Moreover, we required the ga laxy to possess a significant number of both stellar and dark matter - particles (over 105 ), and a well resolved center. Due to the + particles (over 10 5 ), and a well resolved center. Due to the large softening scale for dark matter particles in the simul ation (ǫDM = 1.42 kpc), we looked for an object in which even the more concentrated stellar population (see Section 2.2) ext ended @@ -353,7 +353,7 @@ Fig. 4. Profiles of the velocity anisotropy parameter, radial veloc ity dispersi logarithmic distance scale and reaching the outskirts of th e galaxy whereas the bottom row presents in the linear scale o nly the radial range used in the modeling. ments. W e decided to use a galaxy labeled as subhalo 16960. -All the relevant properties of the galaxy are given in T able 1, +All the relevant properties of the galaxy are given in T able 1 , including numbers of particles and total masses for both com ponents, and details on the shape of the stellar component: the axis ratios minor to major (shortest to longest) c/a, intermediate to @@ -391,14 +391,14 @@ the right). Di fferent lines show profiles for all available stars (in red), th blue). Thin vertical lines indicate r0 (see text) and the outer boundary of the spectroscopic data. 2.2. Splitting the stars into populations Our chosen galaxy shows a complex formation history undergoing - multiple mergers which result in extended star formation + multiple mergers which result in extended star format ion with a few star formation bursts. The last wet merger, that is a merger with an object containing gas, happens at 6.9 Gyr from -the beginning of the simulation, whereas the last dry merger(no +the beginning of the simulation, whereas the last dry merger (no gas transfer) at 12.1 Gyr, giving the galaxy enough time to re gain dynamical equilibrium. W e present the star formation rate ( SFR) as a function of time (the age of the Universe) in Fig. 1, where -these last mergers are indicated with black and gray vertical arrows. +these last mergers are indicated with black and gray vertica l arrows. In Fig. 2 we show the distribution of stars as a function of their metallicity (in solar units) and the time of formation . In order to divide the stellar sample into two populations we cut i t in @@ -425,14 +425,14 @@ The velocity anisotropy parameter β(r) = 1 − (σ2 θ + σ2 φ)/(2σ2 -r), where σi are velocity dispersions in spherical coordinates +r ), where σi are velocity dispersions in spherical coordinates (Binney & Tremaine 2008), describes the orbital struc ture of galaxies. It is one of the most important dynamical proper ties of bound systems which cannot be inferred directly from observations - and has to be recovered by dynamical modeling. The + and has to be recovered by dynamical modeling. Th e profiles of the anisotropy parameter β as well as the radial σr and tangential σt = [(σ2 -θ+σ2 +θ +σ2 φ)/2]1/2 velocity dispersions for our simulated galaxy are presented in the consecutive columns o f Fig. 4. Throughout the paper we use red, orange, and blue colo rs @@ -440,7 +440,7 @@ to indicate values calculated or recovered for all stars, po pulation I, and population II, respectively. The two rows of the fi gure show the behavior of the parameters at di fferent scales. The top row plots the profiles with the distance from the center of the -galaxy in the logarithmic scale and shows the drop of anisotropy +galaxy in the logarithmic scale and shows the drop of anisotr opy at the outer edges of the object. The bottom row uses the linea r distance scale and focuses on the main body of the galaxy. Figure 5 shows the surface number density profiles of the @@ -450,24 +450,24 @@ difference between the lines of sight is small because the galaxy is close to spherical. 2.3. Observables W e generated nine sets of mock data by observing all stars and -each population separately along the principal axes determined +each population separately along the principal axes determ ined from all stars. For the observables to be used in the modeling we divided the stars into 20 bins spaced linearly in distance fr om the center of the galaxy up to 50 kpc, measuring the fraction of the total number of stars and the 2nd, 3rd, and 4th proper moments of the line-of-sight velocity defined in Eq. 8 and 9 -of Kowalczyk et al. (2018). The profiles of these quantities are +of Kowalczyk et al. (2018). The profiles of these quantities a re shown in consecutive rows in Fig. 6. Columns correspond to di fferent lines of sight, from the left to the right: along the ma jor, intermediate, and minor axis of the galaxy. For clarity of th e figure, in each panel we indicate only the error bars for one of th e data sets. However, as the number of stars in a sample remains -roughly constant between the lines of sight, the error bars are +roughly constant between the lines of sight, the error bars a re very similar among the panels in a given row . Although in our previous studies of the reliability of the Schwarzschild modeling and its applications to real dat a (Kowalczyk et al. 2017, 2018, 2019) we approximated the density - profile of the tracer with the Sérsic formula, we found that it + profile of the tracer with the Sérsic formula, we found th at it does not provide a good approximation of the data for the simu lated galaxy considered here. W e therefore fit the projected density profile with the King formula (King 1962) @@ -551,7 +551,7 @@ Fig. 6. Observables used in our Schwarzschild modeling scheme of th e simulated 3rd, and 4th velocity moment. In columns: mock data from the simulated galaxy along the major, interme diate, and minor axis. In red we present the values obtained for all stars whereas in orange and blue t hose for populations I and II, respectively . For clarity of t he figure, in each panel we indicate only the error bars for one of the data sets. -whereI0 , Rc , and Rt are the model parameters. The profile can +where I0 , Rc , and Rt are the model parameters. The profile can be analytically deprojected to obtain the 3D density ρ(r) = ρ0 z2 @@ -565,24 +565,24 @@ where ρ0 = I0 πRc [1 +(Rt /Rc)2 ]3/2 (3) and -z= +z = √ r2 +R2 c R2 -c+R2 +c +R2 t . (4) 3. Schwarzschild modeling In this section we briefly present our modeling method and its -application to the data sets derived for all stars and the twopopulations +application to the data sets derived for all stars and the two populations of the simulated galaxy separately. In both cases o ur aim was to recover the profiles of the total mass and the veloci ty anisotropy. 3.1. Overview of the method W e follow the approach introduced in Kowalczyk et al. (2018) , namely we model the total mass profile with the mass-to-light -ratioΥvarying with radius: +ratio Υvarying with radius: log Υ(r) = { log(Υ0 ) r ≤ r0 @@ -670,10 +670,10 @@ W e fit the kinematics weighted with the fraction of mass with the constrained least squares algorithm where di fferent values of Υ0 were obtained with a simple transformation of velocities given by Eq. 12, 13, and 15 in Kowalczyk et al. (2018). In order - to smooth out the numerical artifacts, the three-dimensional + to smooth out the numerical artifacts, the three-dimens ional χ2 spaces were then interpolated with 12-order polynomials (∼ a4 c4 Υ4 -0) that were further used to determine the global minimums +0 ) that were further used to determine the global minimums (identified as the best-fitting models) and 1, 2, 3 σ confidence levels which for three parameters correspond to ∆χ2 = 3.53, 8.02, 14.2 (Press et al. 1992). @@ -848,7 +848,7 @@ location of global minimum and confidence levels from two pop ulations (as in the top right panel of Fig. 7), in Fig. 9 we pres ent another method of calculating the anisotropy. In the second and third row we show the derived profiles for population I and II -separately and combine them as stellar mass weighted average +separately and combine them as stellar mass weighted averag e in the top row . As in previous figures, three columns refer to t he different lines of sight whereas the narrow fourth one shows the behavior of the true profiles outside the modeled range which , as @@ -858,7 +858,7 @@ larger distances from the center are still included in the li ne-ofsight measurements. 3.3. Comparison of fitting results The main strength of the two populations method comes from -tracing the underlying gravitational potential at different scales. +tracing the underlying gravitational potential at di fferent scales. As can be seen in the bottom panels of Fig. 7, population I, whi ch is more concentrated, is also more sensitive to Υ0 , but gives weaker constraints on a or c. On the other hand, population II @@ -867,7 +867,7 @@ as well, therefore showing stronger coupling between the pa rameters. The global minimums of the χ2 distributions for both approaches, that is modeling one and two populations, which we -identify as the best-fitting models, closely coincide showing that +identify as the best-fitting models, closely coincide showi ng that there is no internal bias in the improved method. However, si gnificant di fferences can be observed when comparing the confidence levels, mainly at 1 and 3 σ. Namely, we find that using @@ -882,7 +882,7 @@ populations are more or less biased depending on the axis. Th ey are well reproduced for the observation along the intermedi ate axis, for which the e ffects of nonsphericity seem to cancel out, and more biased for the remaining lines of sight. W e notice a -trend from under- to overestimation of the anisotropy when going +trend from under- to overestimation of the anisotropy when g oing from the major to the minor axis. Article number, page 7 of 12 A&A proofs: manuscript no. Populations4 @@ -937,12 +937,12 @@ previous figures. In columns: observations along the major, intermediate, and mi outside the modeled radial range. Color lines indicate valu es for the best-fit models whereas the colored areas of decrea sing intensity show the 1, 2, and 3 σ confidence regions. 4. Modeling Fornax dSph -In this section we present the application of our Schwarzschild +In this section we present the application of our Schwarzsch ild modeling scheme to the observational data for the Fornax dSp h galaxy obtained by del Pino et al. (2015) and del Pino et al. -(2017). This study is a follow-up of the work of Kowalczyk et al. +(2017). This study is a follow-up of the work of Kowalczyk et a l. (2019) and can be directly compared to the results presented -there. Moreover, we refer the reader to these previous publications +there. Moreover, we refer the reader to these previous publi cations for details on the origin of data and our procedures use d for cleaning the spectroscopic sample. Similarly to the approach introduced in Section 2.2, we divided @@ -951,13 +951,13 @@ their metallicity and then cross-correlated the samples wi th the data used in Kowalczyk et al. (2019). The metallicity histog ram of the final spectroscopic sample is shown in Fig. 10. Additio nally, we color-coded each bin with the population it has been -assigned to, namely orange or blue for population I or II. Interestingly, +assigned to, namely orange or blue for population I or II. Int erestingly, the case of Fornax is similar to our simulated gala xy as the split at [Fe /H]= −1 also captures an important feature of the object’s star formation history, separating stars in to subsamples older and younger than 6 Gyr, as shown in Fig. 12 of del Pino et al. (2015) and Fig. 8 of del Pino et al. (2017). The -numbers of stars contained in the samples of all stars, population +numbers of stars contained in the samples of all stars, popul ation I, and population II are given in T able 2, where the indic es "phot" and "spec" refer to the photometric and kinematic sam ples. The sum of stars in the populations is lower than in the @@ -1005,7 +1005,7 @@ Fig. 11. Surface number density profiles of the photometric data samples I (in orange), and the metal-poor population II (in b lue). Thin vertical lines indicate r0 (see text) and the outer boundary of the spectroscopic data. -Sérsic formula (Sérsic 1968). The profiles of number densityfor +Sérsic formula (Sérsic 1968). The profiles of number density for all stars and both populations together with the best-fittin g Sérsic profiles are presented in Fig. 11. The colors follow the conve ntion introduced in previous sections. Thin vertical lines i ndicate @@ -1077,7 +1077,7 @@ modeling scheme. In rows: the fraction of the total number of stars, the 2nd, 3rd, and 4th velocity moment. In red we present the value s obtained for all stars whereas in orange and blue those for population s I and II, respectively . -the confidence levels onΥfrom the fit of two populations. Green +the confidence levels on Υfrom the fit of two populations. Green lines indicate the values for the best-fitting models wherea s the colored areas of decreasing intensity show the 1, 2, and 3 σconfidence regions. Additionally, with black dashed lines we in clude @@ -1129,7 +1129,7 @@ c min Fig. 13. V alues of χ2 relative to the fitted minimum within the range of 3 σ confidence level for all stars (left panel) and for the popula tions (right panel) for the Fornax dSph. -(Kowalczyk et al. 2019), we obtained higher estimates of theenclosed +(Kowalczyk et al. 2019), we obtained higher estimates of the enclosed total mass at larger radii. In particular, for the mas s enclosed within 1.8 kpc we get Mall (< 1.8 kpc) = 3.87+1.48 −1.56 × 108 @@ -1147,9 +1147,9 @@ populations. Nevertheless, even in the latter case the prev ious result agrees with the new finding within 1 σ. The detailed analysis of the anisotropy is shown in Fig. 15 where the middle and bottom panels present the profiles obtained - for each population separately. W e notice that the profile + for each population separately. W e notice that the pr ofile for population I is decreasing or has a local minimum whereas -for population II is increasing (from−0.25 to 0.5 for the bestfitting +for population II is increasing (from −0.25 to 0.5 for the bestfitting model). Since population I is more concentrated, the last bins contain very few stars, which limits their credibility . The top panel of Fig. 15 presents the anisotropy of all stars calc ulated @@ -1173,11 +1173,11 @@ Both constant (like for our population I) and growing (population II) anisotropy profiles can arise from biased modeli ng of the real growing profile by observing an object along the minor and major axis, respectively. However, for the bias to -occur in two populations presented here, their inner orientations +occur in two populations presented here, their inner orient ations would need to be opposite. Since such morphological fe atures are not supported by the photometric studies of Fornax (del Pino et al. 2015; W ang et al. 2019) which rather find a good -spatial alignment between the stellar populations, we conclude +spatial alignment between the stellar populations, we conc lude that the anisotropy profiles of the two populations modeled i n this work are indeed significantly distinct. Finally, it is worth noticing that the so-called mass-follo wslight @@ -1191,7 +1191,7 @@ method are much larger, as demonstrated by the right panel of Fig. 13. 5. Summary and discussion Building on the previously created implementation of the -Schwarzschild orbit superposition method focused on modeling +Schwarzschild orbit superposition method focused on model ing dSph galaxies of the Local Group (Kowalczyk et al. 2017, 2018 , 2019), we improved our tool by introducing multiple stellar populations. Such an improvement is desirable and justified sin ce @@ -1259,13 +1259,13 @@ confidence regions. The best-fitting values obtained by Kowa lczyk et al. (2019) are shown with black dashed lines. that made it a good test bed for modeling techniques applicable to dSph galaxies. W e applied our approach to all data and -to two stellar populations separately, comparing the accuracy of +to two stellar populations separately, comparing the accur acy of the obtained results. Although the addition of the second tr acer seemingly increases the number of constraints twice, the in crement is somewhat compromised by the sampling errors since th e number of stars in each sample is then reduced. Still, we foun d strong improvements in the accuracy of the method when using - two populations. The results of the modeling show that the + two populations. The results of the modeling show that th e density and velocity anisotropy profiles are more strongly c onstrained, most importantly at the 3 σ level, that is the range of allowed values is much narrower. @@ -1314,21 +1314,21 @@ In rows: results for all stars (calculated as the superposition of tw o populati for the best-fit models whereas the colored areas of decreasi ng intensity show the 1, 2, and 3 σconfidence regions. The dashed black line shows the result from Kowalczyk et al. (2019) for comparison. -in larger final uncertainties, usually containing the true values +in larger final uncertainties, usually containing the true v alues within 1 σ confidence region. On the other hand, the improved methods exhibit substantially reduced uncertainties, hig hlighting the underlying bias. Our method parametrizes the total mass content with the mass-to-light ratio varying with radius as a power-law in th e loglog scale. W e made two main changes with respect to our previous - work: we added a third parameterc controlling the steepness + work: we added a third parameter c controlling the steepness of the mass-to-light ratio profile (previously fixed at the va lue of 3) and allowed for di fferent stellar density profiles (previously only Sérsic, now also King). These changes are of course coupled - since different density profiles require di fferent exponents to + since di fferent density profiles require di fferent exponents to reproduce the same mass profile. It is visible also in our resu lts since the King profile applied in the simulated galaxy gave us -values ofc lower than 3. Nevertheless, we decided to use di fferent +values of c lower than 3. Nevertheless, we decided to use di fferent density profiles to make our method more general and appli cable to objects, such as our Illustris galaxy, for which the Sérsic formula does not provide a good approximation of the density @@ -1340,7 +1340,7 @@ Fornax dSph galaxy. Due to the addition of another free param eter Article number, page 11 of 12 A&A proofs: manuscript no. Populations4 obtained in Kowalczyk et al. (2019). However, in terms of the -total density and mass distribution the estimates obtainedhere +total density and mass distribution the estimates obtained here agree very well with those earlier results in the range cover ed by the data. Therefore, the detailed comparison with other e stimates from the literature presented in Kowalczyk et al. (201 9) is @@ -1350,7 +1350,7 @@ estimates is seen in the results of modeling two populations in Fornax. In this case we find the anisotropy to be slightly incr easing rather than decreasing with radius and, most importantl y, the confidence regions for this parameter, as well as for the density, - are much narrower. W e were thus able to obtain tighter constraints + are much narrower. W e were thus able to obtain tighter c onstraints on the properties of Fornax, which means that the im proved method is successful. For the first time, we were also a ble to deduce the velocity anisotropy profiles for each of the pop ulations @@ -1360,7 +1360,7 @@ extended, metal-poor population II has the anisotropy incr easing with radius. This finding may partially explain the large spr ead of the anisotropy values obtained in the literature and summ arized in T able 2 and 3 of Kowalczyk et al. (2019), which were -often based on modeling subsamples of our spectroscopic data +often based on modeling subsamples of our spectroscopic dat a set. For both studied objects we split the stars into two populations by dividing them in half based on their metallicity, Z (in @@ -1368,12 +1368,12 @@ solar units), for the Illustris galaxy and [Fe /H] for Fornax. Such a method is approximate but justified. Both galaxies have com plex star formation history with multiple star formation bu rsts, as demonstrated by Fig. 1 in this work and Fig. 7 in del Pino et al. -(2013), producing multiple stellar populations which cannot be +(2013), producing multiple stellar populations which cann ot be easily tracked as the metallicity is a good but not perfect pr oxy for the stellar age. Moreover, the metallicity histograms f or both objects are approximately unimodal not allowing for a conve nient separation. More refined methods of division have been -suggested in the literature, for example in the form of the likelihood +suggested in the literature, for example in the form of the li kelihood function based on the position, velocity, and metallic ity index (W alker & Peñarrubia 2011). However, the likelihood fun ction requires many assumptions which introduce additional uncertainties @@ -1383,41 +1383,41 @@ approach ensures the maximization of each sample (and there fore features of the star formation history. Further improvements to the Schwarzschild modeling method are certainly possible. One way to proceed would be to -include the modeling of the proper motions of the stars. For now , +include the modeling of the proper motions of the stars. For n ow , measurements of transverse velocities are available only f or the brightest stars in dSph galaxies, but even small samples of t his type could provide further constraints on the models, as dem onstrated by Strigari et al. (2007) and Massari et al. (2020). -Acknowledgements.W e are grateful to Andrés del Pino for providing the data for +Acknowledgements. W e are grateful to Andrés del Pino for providing the data for the Fornax dSph and to the Illustris team for making their sim ulations publicly available. Useful comments from the anonymous referee are k indly appreciated. This research was supported by the Polish National Science C enter under grant 2018/28/C/ST9/00529. References Amorisco, N. C., & Evans, N. W . 2012, MNRAS, 419, 184 -Battaglia, G., Helmi, A., T olstoy , E., et al. 2008, ApJ, 681,L13 +Battaglia, G., Helmi, A., T olstoy , E., et al. 2008, ApJ, 681, L13 Bellazzini, M., Ferraro, F . R., & Pancino, E. 2001, MNRAS, 32 7, L15 Binney , J., & Tremaine, S. 2008, Galactic Dynamics, 2nd edn. (Princeton University Press, Princeton) Breddels, M. A., & Helmi, A. 2013, A&A, 558, A35 -Breddels, M. A., Helmi, A., van den Bosch, R. C. E., van de V en,G., & Battaglia, +Breddels, M. A., Helmi, A., van den Bosch, R. C. E., van de V en, G., & Battaglia, G. 2013, MNRAS, 433, 3173 del Pino, A., Hidalgo, S. L., Aparicio, A., et al. 2013, MNRAS , 433, 1505 del Pino, A., Aparicio, A., & Hidalgo, S. L. 2015, MNRAS, 454, 3996 del Pino, A., Aparicio, A., Hidalgo, S. L., & Łokas, E. L. 2017 , MNRAS, 465, 3708 Fabrizio, M., Bono, G., Nonino, M., et al. 2016, ApJ, 830, 126 -Gebhardt, K., Richstone, D., Tremaine, S., et al. 2003, ApJ,583, 92 +Gebhardt, K., Richstone, D., Tremaine, S., et al. 2003, ApJ, 583, 92 Genel, S., Fall, S. M., Hernquist, L., et al. 2015, ApJ, 804, L 40 Genel, S., V ogelsberger, M., Springel, V ., et al. 2014, MNRA S, 445, 175 Genina, A., Benitez-Llambay , A., Frenk, C. S., et al. 2018, M NRAS, 474, 1398 Hayashi, K., Fabrizio, M., Łokas, E. L., et al. 2018, MNRAS, 4 81, 250 Irwin, M., & Hatzidimitriou, D. 1995, MNRAS, 277, 1354 Jardel, J. R., & Gebhardt, K. 2012, ApJ, 746, 89 -Jardel, J. R., Gebhardt, K., Fabricius, M. H., Drory , N., & Williams, M. J. 2013, +Jardel, J. R., Gebhardt, K., Fabricius, M. H., Drory , N., & Wi lliams, M. J. 2013, ApJ, 763, 91 King, I. 1962, AJ, 67, 471 -Kowalczyk, K., Łokas, E. L., Kazantzidis, S., & Mayer, L. 2013, MNRAS, 431, +Kowalczyk, K., Łokas, E. L., Kazantzidis, S., & Mayer, L. 201 3, MNRAS, 431, 2796 Kowalczyk, K., Łokas, E. L., & V alluri, M. 2017, MNRAS, 470, 3 959 Kowalczyk, K., Łokas, E. L., & V alluri, M. 2018, MNRAS, 476, 2 918 @@ -1425,20 +1425,20 @@ Kowalczyk, K., del Pino, A., Łokas, E. L., & V alluri, M. 2019, MNRAS, 482, 5241 Łokas, E. L., 2002, MNRAS, 333, 697 Łokas, E. L., Mamon, G. A., & Prada, F . 2005, MNRAS, 363, 918 -Massari, D., Helmi, A., Mucciarelli, A. et al. 2020, A&A, 633, A36 +Massari, D., Helmi, A., Mucciarelli, A. et al. 2020, A&A, 633 , A36 Mateo, M. 1998, ARA&A, 36, 435 -Nelson, D., Pillepich, A., Genel, S., et al. 2015, Astronomyand Computing, 13, +Nelson, D., Pillepich, A., Genel, S., et al. 2015, Astronomy and Computing, 13, 12 Pace, A. B., Kaplinghat, M., Kirby , E., et al. 2020, MNRAS, 49 5, 3022 Press, W . H., T eukolsky , S. A., V etterling, W . T ., & Flannery , B. P . 1992, Numerical Recipes in C, 2nd edn. (Cambridge University Press, Cam bridge) Schwarzschild, M. 1979, ApJ, 232, 236 -Sérsic, J. L. 1968, Atlas de Galaxias Australes (Observatorio Astronomico, Cordoba, +Sérsic, J. L. 1968, Atlas de Galaxias Australes (Observator io Astronomico, Cordoba, Argentina) Strigari, L. E., Bullock, J. S., & Kaplinghat, M. 2007, ApJ, 6 57, L1 T olstoy , E., Hill, V ., & T osi, M. 2009, ARA&A, 47, 371 V alluri, M., Merritt, D., & Emsellem, E. 2004, ApJ, 602, 66 -van der Marel, R. P ., Cretton, N., de Zeeuw , P . T ., & Rix, H.-W .1998, ApJ, 493, +van der Marel, R. P ., Cretton, N., de Zeeuw , P . T ., & Rix, H.-W . 1998, ApJ, 493, 613 V ogelsberger, M., Genel, S., Springel, V ., et al. 2014a, Nat ure, 509, 177 V ogelsberger, M., Genel, S., Springel, V ., et al. 2014b, MNR AS, 444, 1518 diff --git a/read/results/pypdf/2201.00214.txt b/read/results/pypdf/2201.00214.txt index e1570cb..9e65d22 100644 --- a/read/results/pypdf/2201.00214.txt +++ b/read/results/pypdf/2201.00214.txt @@ -906,24 +906,24 @@ Abedini, A., Safari, H., & Nasiri, S. 2012, Solar Physics, 28 0 Anfinogentov, S., Nakariakov, V . M., Mathioudakis, M., V an D oorsselaere, T ., & Kowalski, A. F . 2013, ApJ, 773, 156 Aschwanden, M., B. P . S. C. M. A. 2013, Solar Physics, 283, 5 -Aschwanden, M. J. 2006, Philosophical T ransactions of the Royal Society of London Series A, 364, +Aschwanden, M. J. 2006, Philosophical T ransactions of the R oyal Society of London Series A, 364, 417 Aschwanden, M. J., & Boerner , P . 2011, The Astrophysical Jou rnal, 732, 81 Aschwanden, M. J., Boerner , P ., Ryan, D., et al. 2015, The Ast rophysical Journal, 802, 53 Aschwanden, M. J., Fletcher, L., Schrijver, C. J., & Alexand er, D. 1999, ApJ, 520, 880 Ballai, I., Jess, D. B., & Douglas, M. 2011, A&A, 534, A13 -Banerjee, D., Erdélyi, R., Oliver, R., & O’Shea, E. 2007, Solar Physics, 246, 3 +Banerjee, D., Erdélyi, R., Oliver, R., & O’Shea, E. 2007, Sol ar Physics, 246, 3 Berghmans, D., & Clette, F . 1999, Solar Physics, 186, 207 -Boerner, P ., Edwards, C., Lemen, J., et al. 2012, Solar Physics, 275, 41 +Boerner, P ., Edwards, C., Lemen, J., et al. 2012, Solar Physi cs, 275, 41 Dahlburg, R. B., Einaudi, G., Ugarte-Urra, I., Rappazzo, A. F ., & V elli, M. 2018, ApJ, 868, 116 De Moortel, I. 2005, Philosophical T ransactions of the Roya l Society of London Series A, 363, 2743 De Moortel, I., & Brady, C. S. 2007, ApJ, 664, 1210 De Moortel, I., Ireland, J., & W alsh, R. W . 2000, A&A, 355, L23 -De Moortel, I., & Nakariakov, V . M. 2012, Philosophical T ransactions of the Royal Society of +De Moortel, I., & Nakariakov, V . M. 2012, Philosophical T ran sactions of the Royal Society of London Series A, 370, 3193 Fathalian, N. 2019, arXiv e-prints, arXiv:1908.11369 Fathalian, N., & Safari, H. 2010, ApJ, 724, 411 -Fathalian, N., Safari, H., & Nasiri, S. 2010, New Astronomy ,15, 403 +Fathalian, N., Safari, H., & Nasiri, S. 2010, New Astronomy , 15, 403 Goossens, M., Hollweg, J. V ., & Sakurai, T . 1992, Solar Physi cs, 138, 233 Gruszecki, M., Murawski, K., Selwa, M., & Ofman, L. 2006, A&A , 460, 887 Guennou, C., Auchère, F ., Soubrié, E., et al. 2012a, ApJ, 203 , 25 @@ -931,15 +931,15 @@ Guennou, C., Auchère, F ., Soubrié, E., et al. 2012b, ApJ, 203 , 26 Habbal, S. R., & Rosner, R. 1979, ApJ, 234, 1113 Hindman, B. W ., & Jain, R. 2014, ApJ, 784, 103 Jain, R., Maurya, R. A., & Hindman, B. W . 2015, ApJ, 804, L19 -Jess, D. B., Reznikova, V . E., Ryans, R. S. I., et al. 2016, Nature Physics, 12, 179 +Jess, D. B., Reznikova, V . E., Ryans, R. S. I., et al. 2016, Nat ure Physics, 12, 179 Kolotkov, D. Y ., Nakariakov, V . M., & Zavershinskii, D. I. 20 19, A&A, 628, A133 Krishna Prasad, S., Jess, D. B., & V an Doorsselaere, T . 2019, Frontiers in Astronomy and Space Sciences, 6, 57 Li, L. P ., Peter, H., Chen, F ., & Zhang, J. 2015, A&A, 583, A109 Liu, W ., & Ofman, L. 2014, Solar Physics, 289, 3233–3277 -Luna, M., T erradas, J., Oliver, R., & Ballester, J. L. 2010, ApJ, 716, 1371 +Luna, M., T erradas, J., Oliver, R., & Ballester, J. L. 2010, A pJ, 716, 1371 McClymont, A. N., & Craig, I. J. D. 1985, ApJ, 289, 834 -McLaughlin, J. A., Nakariakov, V . M., Dominique, M., Jelínek, P ., & T akasao, S. 2018, Space +McLaughlin, J. A., Nakariakov, V . M., Dominique, M., Jelíne k, P ., & T akasao, S. 2018, Space Science Reviews volume, 214, 45 Nakariakov, V . M., Afanasyev, A. N., Kumar, S., & Moon, Y . J. 2 017, ApJ, 849, 62 Nakariakov, V . M., Inglis, A. R., Zimovets, I. V ., et al. 2010 , Plasma Physics and Controlled Fusion, @@ -949,28 +949,28 @@ Nakariakov, V . M., & V erwichte, E. 2005, Living Reviews in So lar Physics, 2, Nisticò, G., Nakariakov, V . M., & V erwichte, E. 2013, A&A, 55 2, A57 Nisticò, G., Polito, V ., Nakariakov, V . M., & Del Zanna, G. 20 17, A&A, 600, A37 Ofman, L., & W ang, T . 2002, ApJ, 580, L85 -Pant, V ., Tiwari, A., Y uan, D., & Banerjee, D. 2017, ApJ, 847,L5 +Pant, V ., Tiwari, A., Y uan, D., & Banerjee, D. 2017, ApJ, 847, L5 Pascoe, D. J., Nakariakov, V . M., & Arber, T . D. 2007, Solar Ph ysics, 246, 165 Reale, F ., T esta, P ., Petralia, A., & Kolotkov, D. Y . 2019, Ap J, 884, 131 Roberts, B., Edwin, P . M., & Benz, A. O. 1984, ApJ, 279, 857 -Romano, P ., Zuccarello, F ., Guglielmino, S. L., et al. 2015,A&A, 582, A55 +Romano, P ., Zuccarello, F ., Guglielmino, S. L., et al. 2015, A&A, 582, A55 Russell, A. J. B., Simões, P . J. A., & Fletcher, L. 2015, A&A, 5 81, A8 Scargle, J. D. 1982, ApJ, 263, 835 -Schmelz, J. T ., Jenkins, B. S., W orley, B. T ., et al. 2011, ApJ, 731, 49 +Schmelz, J. T ., Jenkins, B. S., W orley, B. T ., et al. 2011, ApJ , 731, 49 Schmelz, J. T ., Kimble, J. A., Jenkins, B. S., et al. 2010, ApJ , 725, L34 Schmelz, J. T ., Pathak, S., Brooks, D. H., Christian, G. M., & Dhaliwal, R. S. 2014, ApJ, 795, 171 Schmelz, J. T ., Pathak, S., Jenkins, B. S., & W orley, B. T . 201 3, ApJ, 764, 53 Ugarte-Urra, I., & W arren, H. P . 2014, ApJ, 783, 12 -V an Doorsselaere, T ., Kupriyanova, E. G., & Y uan, D. 2016, Solar Physics, 291, 3143 +V an Doorsselaere, T ., Kupriyanova, E. G., & Y uan, D. 2016, So lar Physics, 291, 3143 V an Doorsselaere, T ., W ardle, N., Del Zanna, G., et al. 2011, ApJ, 727, L32 V anderPlas, J. T . 2018, ApJ, 236, 16 -V erwichte, E., Nakariakov, V . M., Ofman, L., & Deluca, E. E. 2004, Solar Physics, 223, 77 +V erwichte, E., Nakariakov, V . M., Ofman, L., & Deluca, E. E. 2 004, Solar Physics, 223, 77 W ang, T . 2011, Space Science Reviews, 158, 397–419 W ang, T ., Innes, D. E., & Qiu, J. 2007, ApJ, 656, 598 W ang, T . J., & Solanki, S. K. 2004, A&A, 421, L33 -W ang, T . J., Solanki, S. K., Innes, D. E., Curdt, W ., & Marsch,E. 2003, A&A, 402, L17 +W ang, T . J., Solanki, S. K., Innes, D. E., Curdt, W ., & Marsch, E. 2003, A&A, 402, L17 W ang, T ., & Ofman, L. 2019, ApJ, 886, 2 -W ang, T ., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M. 2015, ApJ, 811, L13 +W ang, T ., Ofman, L., Sun, X., Provornikova, E., & Davila, J. M . 2015, ApJ, 811, L13 W ang, T ., Ofman, L., Y uan, D., et al. 2021, Space Science Revi ews, 217 W arren, H. P ., W inebarger, A. R., & Brooks, D. H. 2012, ApJ, 75 9, 141 W ills-Davey, M. J., & Thompson, B. J. 1999, Solar Physics, 19 0, 467 \ No newline at end of file diff --git a/read/results/tika/2201.00022.txt b/read/results/tika/2201.00022.txt index 99832bf..21f3bbf 100644 --- a/read/results/tika/2201.00022.txt +++ b/read/results/tika/2201.00022.txt @@ -642,7 +642,7 @@ o(ae)* where G is the gravitational constant and r, is the sum of the radii of the interacting objects, a black hole with mass mpy and a star with mass m,. Detailed in Rose -et al. (2020), fi(e.) and fo(e.) account for the effect of +et al. (2020), fi(es) and fo(e.) account for the effect of the eccentricity of the BH’s orbit about the SMBH on the collision rate, while n and o are simply evaluated at the semimajor axis of the orbit (see below). Note @@ -954,19 +954,19 @@ collisions and increase the BH mass accordingly. -% * Initial -4x10 e e@ AM=1M. -¢ Bondi-Hoyle-Lyttleton +% Initial +4x10) e AM=1M, +« Bondi-Hoyle-Lyttleton -3x10 ¥ +ax 101 ¥ -3 ate ° -es -= es -1 ° -n 2x10 on oe, -= os -103 4 +"3 at . +s +=, Xie. ‘ +uw 2x10 ‘cs * +m1 "A509, += +10? @@ -976,7 +976,10 @@ n 2x10 on oe, -10-3 10-7 107? 10° + + +103 10°? 107? 10° + Distance from SMBH [pc] Figure 2. We consider an example that highlights the mass @@ -1254,7 +1257,7 @@ following equation: -Ménal(tcon — Const.) = Minitial + Am — , (7) +Ménal (toll md const.) = Minitial + Am — ’ (7) co in which T represents the simulation time and Am and teoll Temain constant, approximated as their initial val- diff --git a/read/results/tika/2201.00069.txt b/read/results/tika/2201.00069.txt index 1a05855..59c7d4c 100644 --- a/read/results/tika/2201.00069.txt +++ b/read/results/tika/2201.00069.txt @@ -127,9 +127,9 @@ with the High Energy Stereoscopic System (H.E.S.S.) in very high energy gamma ra searched for signals in the ultraviolet, optical, and X-ray bands. For this FRB, we obtain a UV flux upper limit of 1.39×10−16 erg cm−2 s−1Å−1, X-ray limit of ∼ 6.6×10−14 erg cm−2 s−1 and a limit on the very-high-energy gamma-ray fluxΦ(𝐸 > 120GeV) < 1.7× 10−12 erg cm−2 s−1. -We obtain a radio upper limit of∼15`Jy beam−1 for persistent emission at the locations of both +We obtain a radio upper limit of∼15𝜇Jy beam−1 for persistent emission at the locations of both FRBs 20190711A and 20171019A, but detect diffuse radio emission with a peak brightness -of ∼53`Jy beam−1 associated with FRB 20190714A at 𝑧 = 0.2365. This represents the first +of ∼53𝜇Jy beam−1 associated with FRB 20190714A at 𝑧 = 0.2365. This represents the first detection of the radio continuum emission potentially associated with the host (galaxy) of FRB 20190714A, and is only the third known FRB to have such an association. Given the possible association of a faint persistent source, FRB 20190714A may potentially be a repeating FRB @@ -196,14 +196,14 @@ ies2, only the sub-arcsecond localisation of the repeating FRB 20121102A to a host galaxy at a redshift of 𝑧 = 0.19273 ± 0.0008 (Tendulkar et al. 2017; Bassa et al. 2017) showed that it is physi- cally associated with a compact (≤ 0.7 pc), persistent radio source -of luminosity a𝐿a ∼ 1039 erg s−1 at a few GHz (Marcote et al. +of luminosity 𝜈𝐿𝜈 ∼ 1039 erg s−1 at a few GHz (Marcote et al. 2017). This source is detectable from 300MHz – 26GHz (Resmi et al. 2020; Chatterjee et al. 2017) and is seen to exhibit ∼ 10% vari- ability on day timescales. In contrast, a similar sub-milliarcsecond localisation of another repeating FRB20180916B to a nearby mas- sive spiral galaxy at 𝑧 = 0.0337 ± 0.0002 (Marcote et al. 2020) showed no associated persistent radio emission. This places a strong -upper limit on the persistent source luminosity of a𝐿a . 7.6×1035 +upper limit on the persistent source luminosity of 𝜈𝐿𝜈 . 7.6×1035 erg s−1 at 1.6GHz, which is three orders of magnitude lower than that of FRB 20121102A. Recently, the CHIME/FRB collaboration announced heightened activity in the repeating FRB 20201124A @@ -450,19 +450,19 @@ The theoretical thermal noise of the MeerKAT can be calculated as 𝑆rms = 1 -[𝑐 +𝜂𝑐 SEFD√︃ -𝑛pol × 𝑁 (𝑁 − 1) × Δa × 𝑡int +𝑛pol × 𝑁 (𝑁 − 1) × Δ𝜈 × 𝑡int . (1) The system equivalent flux density (SEFD) of MeerKAT at the -1.28GHz is 443 Jy and [𝑐 is the correlator efficiency. We used 𝑛pol -= 2 polarisation products (XX and YY), N = 64 telescopes, Δa = +1.28GHz is 443 Jy and 𝜂𝑐 is the correlator efficiency. We used 𝑛pol += 2 polarisation products (XX and YY), N = 64 telescopes, Δ𝜈 = 856MHz bandwidth and 𝑡int = 21600 sec observing time for one -epoch. This gives the theoretical rms of∼ 2 `Jy beam−1. The typical -image rms obtained from our residual images is ∼ 5 `Jy beam−1, +epoch. This gives the theoretical rms of∼ 2 𝜇Jy beam−1. The typical +image rms obtained from our residual images is ∼ 5 𝜇Jy beam−1, which is 2.5 times the expected theoretical rms. The widebandMFS image does not allow primary beam correction procedure as this can only be done on the sub-band images with limited rms for detection @@ -649,14 +649,14 @@ FRB 20190714 Compact persistent emission was detected in the 1.51GHz e- MERLIN image at R.A. = 12ℎ15𝑚55𝑠 .116, Dec. = −13◦01′14.′′48 -at 86 `Jy beam−1 by e-MERLIN. The stochastic position uncer- +at 86 𝜇Jy beam−1 by e-MERLIN. The stochastic position uncer- tainty is (0.04, 0.15) arcsec and the uncertainty (due to the sepa- ration between phase-calibrator and target, and antenna position uncertainty) is (0.013, 0.056) arcsec, giving a total astrometric uncertainty of (0.04, 0.16) arcsec in R.A. and Dec., respectively. The offset from the FRB position is negligible in R.A. and 1.2 arcsec in Dec. The rms in this region (of full primary beam sen- -sitivity) is 20 `Jy beam−1, making this a 4.3𝜎rms detection. It is +sitivity) is 20 𝜇Jy beam−1, making this a 4.3𝜎rms detection. It is ∼1.5𝜎rms higher than that of the MeerKAT detection. Although the e-MERLIN flux scale nominal uncertainty is ∼5%, in these data it is possibly higher due to the low declination of the phase-reference @@ -672,11 +672,11 @@ ground persistent radio source and the host galaxy, following the procedure of Eftekhari et al. (2018). Instead of using the FRB lo- calisation region, we use the area of the galaxy, which is taken as 2′′ × 2′′, twice the half light radius from Heintz et al. (2020). Given -the source has a flux density of ∼ 90`Jy we estimate the chance +the source has a flux density of ∼ 90𝜇Jy we estimate the chance alignment probability of 0.0008, which corresponds to 3.4𝜎. The flux density threshold, assuming 3𝜎, for an unresolved radio source -is ∼ 15 `Jy. If instead we consider the probability of detecting any -radio source above our flux density threshold of 15`Jy, the probabil- +is ∼ 15 𝜇Jy. If instead we consider the probability of detecting any +radio source above our flux density threshold of 15𝜇Jy, the probabil- ity of a chance alignment is, therefore, approximately 0.8%, making the statistical significance of our detection 2.6𝜎. This represents the first detection of radio continuum emission associated with the host @@ -686,8 +686,8 @@ first detection of radio continuum emission associated with the host No continuum emission was detected near FRBs 20171019A and 20190711A. As each of the images of these sources has an rms -of ∼ 5 `Jy beam−1, the 3𝜎 intensity upper limit of any emission -associated with FRBs 20171019A and 20190711A will be ∼ 15 `Jy +of ∼ 5 𝜇Jy beam−1, the 3𝜎 intensity upper limit of any emission +associated with FRBs 20171019A and 20190711A will be ∼ 15 𝜇Jy beam−1 (see Table 1). Candidate pulses above a signal-to-noise (S/N) of 10 from the @@ -920,9 +920,9 @@ the full region accessible within the H.E.S.S. field of view above Of the targeted FRB fields reported here, only FRB 20190714A is observed to be spatially coincident with a persistent radio con- -tinuum source. We obtain an upper limit of ∼ 15 `Jy beam−1 for +tinuum source. We obtain an upper limit of ∼ 15 𝜇Jy beam−1 for FRBs 20190711A and 20171019A, respectively, and a peak inten- -sity of ∼ 53 `Jy beam−1 for the emission coincident with FRB +sity of ∼ 53 𝜇Jy beam−1 for the emission coincident with FRB 20190714A. This source is detected at both epochs with similar intensities within the measured rms of the images (see Tables 1 and 2 for details). The values in the Table 2 are derived by carrying @@ -953,101 +953,6 @@ MeerKAT flux and is unresolved on the e-MERLIN baselines. The MNRAS 000, 1–15 (2021) -6 = Chibueze et al. - - - -10.0 T T T T T T -7.54 5 4 -9 -O -5.0+ : -O -° -2.5 ° -= . ho oO _ -% " eo -UO DOD @& Oo -Oo Oo -© oF 0 0% | -O -@ -2 os ° " -” o 00 -O -O --5.0+ O 4 - -—7.5 - -T - - - -—10 | | - - - - - -1 --10.0 —-7.5 —5.0 -2.5 - -| -2.5 5.0 7.5 10.0 - -AR.A. (arcsec) - -Figure 1. Astrometric comparison between MeerKAT and NVSS discrete compact sources.The open circles represent the difference in position between the - -MeerKAT and NVSS sources. - -gamma-ray flux above that threshold and assuming an energy depen- -dence following E~? is ®(E > 120GeV) < 2.10x 107!2 cm=? s7! -or ®(E > 120GeV) < 1.7 x 107! ergem~?s7!. A variation of -+ 0.5 of the assumed spectral index leads to a variation in the upper -limit of less than + 19%. A map of energy flux upper limits covering -the full region accessible within the H.E.S.S. field of view above -120 GeV is given in Figure 6. - -4 DISCUSSION - -Of the targeted FRB fields reported here, only FRB 20190714A -is observed to be spatially coincident with a persistent radio con- -tinuum source. We obtain an upper limit of ~ 15 uJy beam™! for -FRBs 20190711A and 20171019A, respectively, and a peak inten- -sity of ~ S53 wy beam7! for the emission coincident with FRB -20190714A. This source is detected at both epochs with similar -intensities within the measured rms of the images (see Tables 1 and -2 for details). The values in the Table 2 are derived by carrying -out 2D Gaussian fit using similar ellipses enclosing the detected -persistent emission. The average flux density is ~ 3 times less than - -that of the persistent source associated with FRBs 20121102A, one -of the most prolific repeaters, located at z = 0.19273(8). Persistent -radio emission from FRB 20201124A was detected by the UGMRT -(Wharton et al. 2021) and the JVLA (Ricci et al. 2021) on angular -scales of a few arcseconds. However, it is resolved out at scales of -~ 0.1 arcseconds with the European VLBI Network (Marcote et al. -2021) suggesting that it is not a compact source directly associated -with the FRB. In contrast, the other localised, prolific repeating -FRB 20180916A has no persistent radio counterpart. - -In the image in Figure 3 one can see that the persistent radio -source lies at the edge of the optical extent of the host galaxy -as seen in PanSTARRS observations (Heintz et al. 2020). Our -derived 1283 MHz peak position with MeerKAT places it just -1’’68 away from the position of FRB 20190714A (@;2000, 6.72000 -= 12/15558 12, -13°01'15’’70; Heintz et al. 2020). The posi- -tional uncertainty on the FRB position is 0’’283. Similarly, the peak -1.51 GHz e-MERLIN position of the persistent radio source is sepa- -rated from the position of FRB 20190714A by 0°’53. The persistent -source near FRB 20190714 5000, computation starts to quickly become expensive for SOLA. @@ -759,7 +758,7 @@ composed of computed B-coefficients: {B,,q}. The least-squares problem is solved toroidal flow. We use B-spline basis functions as our f;(z), comprising 11 knots spaced uniformly in acoustic radius, for both poloidal and toroidal coefficients. Hence, for M modes (total number of k& for a given q is M) and 11 basis functions for each poloidal and toroidal, the dimensions of K, U and B are thus M x 22, 22x 1, and M x 1 respectively. -Normalizing both sides of eq 10 by the noise covariance A (a diagonal matrix with the entries G,,_; see eq 9; dimension +Normalizing both sides of eq 10 by the noise covariance A (a diagonal matrix with the entries Gz,_; see eq 9; dimension M x M) and pre-multiplying by KT, (KTA~'K)U =(KTA“!)B, (11) @@ -1400,7 +1399,7 @@ _ twnk /2 , (A9) a = k w?, — Ww -where wpx is the resonant frequency of the mode, and 7ynz is the mode linewidth. Eq AY can be derived by introducing +where wpx is the resonant frequency of the mode, and 7ynzx is the mode linewidth. Eq AY can be derived by introducing mode damping —iwyp as an operator in the differential equation that governs undamped, driven oscillations (see eq 5 of Hanasoge et al. 2017), and then deriving the effects of first-order perturbations to the wavefield cross-correlation. @@ -1418,7 +1417,7 @@ where the o >> on the right-hand-side implies average over all [k,z,k,] (Q terms This forces N to be isotropic, i.e., to only depend on k, and not k. The sum over w is within five linewidths of wy x. Note that Eq. A8 through A10 are modified from H21 to ensure parity and that flow maps are real. -The three equations A8 through A10, along with the symmetry relation for kernels, and summation over tw, serve +The three equations A8 through A10, along with the symmetry relation for kernels, and summation over +w, serve to establish the parity Bg = Bry ¢ This allows for obtaining Py = Pry, and subsequently, purely real flow in the real domain. Setting o = 0 gives us the linear, invertible equation eq 6. Substituting eqns A8 through A10 into the noise model obtained in H21 and summing over tw establishes the symmetry GZ 4 = Gh ¢ @@ -1704,7 +1703,7 @@ http://doi.org/10.1103/RevModPhys.74.1073 http://doi.org/10.1007/s41116-020-00028-3 14 MANI ET AL. -x1071? x10-’ +x10-1 x10-’ 6.0 4 — Avg. kernel sees Target @@ -1716,17 +1715,17 @@ sees Target -aR =[-112, — 45] -kRo =[-853,-157] +qk. =[-112, -45] +o =[-853,-157] -4.55 2=—-0.48 Mm + - - +457 z= —-0.48 Mm -——o—or +—— oe - + + @@ -1742,13 +1741,13 @@ kRo =[-853,-157] -y OS 5 +yO hh 3 Z N 3.07 -x < -1.55 -0.0 -fasmeererrstrccrssssesee et -—-4 —2 0 -5 -4 3 —2 -1 0 -z, Mm z,Mm +- o +1.5 4 +0.0 fasmererrritivrstsereree? +—4 —2 0 —5 -4 3 2 -1 0 +2, Mm z, Mm Figure 8. Left: Kernel Kx,q(z) (eq B14) shown vs depth z for the three radial order couplings ff, pi-p1, and p2-p2e. qRo = [—112, —45] and kRo = [-—853,—157] is chosen for all the radial order couplings for comparison. Right: Averaging kernel @@ -1757,8 +1756,8 @@ Integral of the averaging kernel over z is 0.89. Setting ox — 0 gives us the matrix problem to be solved A{a} =v, - -{a} = |4 + y| “'y, (B18) +-1 +{a} = |4 + y| v, (B18) where the square matrix A = [dz Keq(z) Kp g(z) and vu = [ dz Ke.q(z)T(z, 20). Here, k’ is just a dummy index for denoting elements in the matrix A, (k’ 4 k+q). In the last line of eq B18, we introduce regularization using an Identity @@ -1784,18 +1783,16 @@ Bahcall, J. N., & Pinsonneault, M. H. 1992, Reviews of doi: 10.1051/0004-6361/20 Modern Physics, 64, 885, doi: 10.1103/RevModPhys.64.885 -Birch, A. C., Schunker, H., Braun, D. C., et al. 2016, -Science Advances, 2, e1600557, -doi: 10.1126/sciadv.1600557 +doi: 10.1086 /324323 +Birch, A. C., Schunker, H., Braun, D. C., et al. 2016, hei . ' +Science Advances, 2, €1600557, Christensen-Dalsgaard, J. 2002, Reviews of Modern -Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019, —. 2021, Living Reviews in Solar Physics, 18, 2, -A&A, 628, A37, doi: 10.1051/0004-6361/201935591 doi: 10.1007/s41116-020-00028-3 +doi: 10.1126/sciadv.1600557 Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073 Braun, D. C., & Lindsey, C. 2001, ApJL, 560, L189, -doi: 10.1086 /324323 -Christensen-Dalsgaard, J. 2002, Reviews of Modern -Physics, 74, 1073, doi: 10.1103/RevModPhys.74.1073 +Birch, A. C., Schunker, H., Braun, D. C., & Gizon, L. 2019, —. 2021, Living Reviews in Solar Physics, 18, 2, +A&A, 628, A37, doi: 10.1051/0004-6361/201935591 doi: 10.1007/s41116-020-00028-3 diff --git a/requirements/dev.txt b/requirements/dev.txt index cd48c0c..108978e 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -2,11 +2,11 @@ # uv pip compile requirements/dev.in build==1.2.2.post1 # via pip-tools -click==8.1.8 +click==8.2.0 # via pip-tools -packaging==24.2 +packaging==25.0 # via build -pip==25.0.1 +pip==25.1.1 # via pip-tools pip-tools==7.4.1 # via -r requirements/dev.in @@ -14,7 +14,11 @@ pyproject-hooks==1.2.0 # via # build # pip-tools -setuptools==75.8.0 +setuptools==80.6.0 # via pip-tools +tomli==2.2.1 + # via + # build + # pip-tools wheel==0.45.1 # via pip-tools diff --git a/requirements/main.txt b/requirements/main.txt index 67dc7a1..0d939e2 100644 --- a/requirements/main.txt +++ b/requirements/main.txt @@ -4,25 +4,25 @@ annotated-types==0.7.0 # via pydantic borb==2.1.25 # via -r requirements/main.in -certifi==2025.1.31 +certifi==2025.4.26 # via requests cffi==1.17.1 # via cryptography -charset-normalizer==3.4.1 +charset-normalizer==3.4.2 # via # pdfminer-six # requests -cryptography==44.0.1 +cryptography==44.0.3 # via # borb # pdfminer-six -fonttools==4.56.0 +fonttools==4.58.0 # via borb idna==3.10 # via requests -levenshtein==0.26.1 +levenshtein==0.27.1 # via python-levenshtein -lxml==5.3.1 +lxml==5.4.0 # via # -r requirements/main.in # borb @@ -30,36 +30,36 @@ markdown-it-py==3.0.0 # via rich mdurl==0.1.2 # via markdown-it-py -numpy==2.2.3 +numpy==2.2.5 # via -r requirements/main.in -pdfminer-six==20231228 +pdfminer-six==20250327 # via # -r requirements/main.in # pdfplumber -pdfplumber==0.11.5 +pdfplumber==0.11.6 # via -r requirements/main.in pdfrw==0.4 # via -r requirements/main.in pdftotext==3.0.0 # via -r requirements/main.in -pillow==11.1.0 +pillow==11.2.1 # via # borb # pdfplumber # qrcode -playa-pdf==0.2.10 +playa-pdf==0.5.0 # via -r requirements/main.in pycparser==2.22 # via cffi -pydantic==2.10.6 +pydantic==2.11.4 # via -r requirements/main.in -pydantic-core==2.27.2 +pydantic-core==2.33.2 # via pydantic pygments==2.19.1 # via rich -pymupdf==1.25.3 +pymupdf==1.25.5 # via -r requirements/main.in -pypdf==5.3.0 +pypdf==5.5.0 # via -r requirements/main.in pypdfium2==4.30.1 # via @@ -67,28 +67,33 @@ pypdfium2==4.30.1 # pdfplumber python-barcode==0.15.1 # via borb -python-levenshtein==0.26.1 +python-levenshtein==0.27.1 # via -r requirements/main.in -qrcode==8.0 +qrcode==8.2 # via borb -rapidfuzz==3.12.1 +rapidfuzz==3.13.0 # via levenshtein requests==2.32.3 # via # -r requirements/main.in # borb # tika -rich==13.9.4 +rich==14.0.0 # via -r requirements/main.in -setuptools==75.8.0 +setuptools==80.6.0 # via # borb # tika -tika==2.6.0 +tika==3.1.0 # via -r requirements/main.in -typing-extensions==4.12.2 +typing-extensions==4.13.2 # via # pydantic # pydantic-core -urllib3==2.3.0 + # pypdf + # rich + # typing-inspection +typing-inspection==0.4.0 + # via pydantic +urllib3==2.4.0 # via requests